├── .github └── ISSUE_TEMPLATE.md ├── .gitignore ├── DOCKER.md ├── Docker ├── latest.Dockerfile └── monitor.Dockerfile ├── HISTORY.md ├── LICENSE ├── MIGRATION_GUIDE.md ├── README.md ├── autoelective ├── __init__.py ├── _internal.py ├── captcha │ ├── __init__.py │ ├── classifier.py │ ├── feature.py │ ├── model │ │ ├── KNN.model.f5.l1.c1.bz2 │ │ ├── RandomForest.model.f2.c6.bz2 │ │ └── SVM.model.f3.l1.c9.xz │ └── processor.py ├── client.py ├── config.py ├── const.py ├── course.py ├── elective.py ├── exceptions.py ├── hook.py ├── iaaa.py ├── logger.py ├── loop.py ├── monitor.py ├── parser.py └── utils.py ├── config.sample.ini ├── course.gbk.sample.csv ├── course.utf-8.sample.csv ├── main.py └── requirements.txt /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Check List 2 | 3 | - [ ] 我已经阅读了 [Readme](https://github.com/zhongxinghong/PKUAutoElective/blob/master/README.md), [Migration Guide](https://github.com/zhongxinghong/PKUAutoElective/blob/master/MIGRATION_GUIDE.md), [Realease History](https://github.com/zhongxinghong/PKUAutoElective/blob/master/HISTORY.md) ,但是并没有找到有用的信息 4 | - [ ] 我已经搜索了已有的 [Issues](https://github.com/zhongxinghong/PKUAutoElective/issues) ,但是没有找到相同的问题 5 | 6 | ### Version / Environment 7 | 8 | System infomation: [ ] \( Windows10 64bit, MacOS 10.13.6, Ubuntu 18.04.3 amd64, ... ) 9 | Python version: [ ] \( run `python3 --version` ) 10 | AutoElective version: [ ] \( run `python3 main.py --version` ) 11 | 12 | ### Config 13 | 14 | 除了学号/密码外的其他配置 15 | 16 | ### Issue Description 17 | 18 | #### What 19 | 20 | 遇到的问题 21 | 22 | #### Console Log 23 | 24 | 必要的终端输出信息 25 | 26 | #### Reproduce 27 | 28 | 如有必要,提供复现的步骤 29 | 30 | 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | /test 3 | /log 4 | /cache 5 | /config 6 | *.bac 7 | 8 | config.ini 9 | course.gbk.csv 10 | course.utf-8.csv 11 | -------------------------------------------------------------------------------- /DOCKER.md: -------------------------------------------------------------------------------- 1 | # PKUAutoElective Docker Image 2 | 3 | 依赖于原项目v2.0.2(2019.09.09)版本。 4 | 5 | ## Tags 6 | 7 | 1. latest 8 | 2. monitor 9 | 10 | ## latest 11 | 12 | 包含python3,依赖库,以及项目源代码。 13 | 14 | ### 运行方法 15 | 16 | ``` bash 17 | docker run -d \ 18 | --name=pae \ 19 | -v /path/to/config/folder:/config \ 20 | yousiki/pkuautoelective:latest # 运行工具 21 | docker logs pae # 查看输出 22 | docker stop pae # 停止工具 23 | ``` 24 | 25 | ## monitor 26 | 27 | 额外包含Monitor运行依赖的库。 28 | 29 | ### 运行方法 30 | 31 | `config.ini`中的`host`值建议设为`0.0.0.0` 32 | 33 | ``` bash 34 | docker run -d \ 35 | --name=pae \ 36 | -p 7074:7074 \ 37 | -v /path/to/config/folder:/config \ 38 | yousiki/pkuautoelective:latest # 运行工具 39 | docker logs pae # 查看输出 40 | docker stop pae # 停止工具 41 | ``` -------------------------------------------------------------------------------- /Docker/latest.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:slim 2 | 3 | LABEL maintainer="you.siki@outlook.com" 4 | 5 | RUN pip install --no-cache-dir \ 6 | -i https://pypi.tuna.tsinghua.edu.cn/simple \ 7 | lxml \ 8 | numpy \ 9 | Pillow \ 10 | sklearn \ 11 | requests \ 12 | simplejson 13 | 14 | ADD . /workspace 15 | 16 | VOLUME [ "/config" ] 17 | 18 | WORKDIR /workspace 19 | 20 | CMD [ \ 21 | "python", \ 22 | "main.py", \ 23 | "--config=/config/config.ini", \ 24 | "--course-csv-gbk=/config/course.gbk.csv", \ 25 | "--course-csv-utf8=/config/course.utf-8.csv" ] -------------------------------------------------------------------------------- /Docker/monitor.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:slim 2 | 3 | LABEL maintainer="you.siki@outlook.com" 4 | 5 | RUN pip install --no-cache-dir \ 6 | -i https://pypi.tuna.tsinghua.edu.cn/simple \ 7 | lxml \ 8 | numpy \ 9 | Pillow \ 10 | sklearn \ 11 | requests \ 12 | simplejson 13 | 14 | RUN pip install --no-cache-dir \ 15 | -i https://pypi.tuna.tsinghua.edu.cn/simple \ 16 | flask \ 17 | werkzeug 18 | 19 | ADD . /workspace 20 | 21 | VOLUME [ "/config" ] 22 | 23 | WORKDIR /workspace 24 | 25 | CMD [ \ 26 | "python", \ 27 | "main.py", \ 28 | "--with-monitor", \ 29 | "--config=/config/config.ini", \ 30 | "--course-csv-gbk=/config/course.gbk.csv", \ 31 | "--course-csv-utf8=/config/course.utf-8.csv" ] -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | Release History 2 | =============== 3 | 4 | v2.1.1 (2019-09-13) 5 | ------------------- 6 | - 修复了 `OperationFailedError` 使用错误的父类派生而导致不能正常初始化的问题 7 | 8 | 9 | v2.1.0 (2019-09-13) 10 | ------------------- 11 | - 修复了 Windows 下自定义参数不生效的问题 12 | 13 | 14 | v2.0.9 (2019-09-12) 15 | ------------------- 16 | - 对 v2.0.8 版本的完善,现在删除了与 `signal` 相关的逻辑,统一了两种运行模式下主进程退出的方式,确保了 `Ctrl + C` 的信号和子进程内部发出的终止信号均能使主进程正常退出 17 | 18 | 19 | v2.0.8 (2019-09-11) 20 | ------------------- 21 | - 对 v2.0.6 版本的完善,该版本在不带 `--with-monitor` 运行的情况下,也可以正确地接收到来自 `Ctrl + C` 的终止命令 22 | 23 | 24 | v2.0.7 (2019-09-11) 25 | ------------------- 26 | - 为 monitor 添加了与错误捕获记录相关的路由 27 | 28 | 29 | v2.0.6 (2019-09-11) 30 | ------------------- 31 | - 修复了在 Windows 下 `Ctrl + C` 无法退出程序的问题 32 | 33 | 34 | v2.0.5 (2019-09-11) 35 | ------------------- 36 | - 可以捕获 IAAA 登录时的密码错误和多次登录失败导致账号已被封禁的错误 37 | - 完善了对多进程/线程下进程死亡的处理,以确保主进程在遇到错误时可以完全退出 38 | - 现在 monitor 进程会在 loop 进程结束后自动退出 39 | 40 | 41 | v2.0.4 (2019-09-10) 42 | ------------------- 43 | - elective 客户端采用多会话机制 44 | 45 | 46 | v2.0.3 (2019-09-09) 47 | ------------------- 48 | - 可以捕获来自 IAAA 的错误 49 | - 丰富了部分错误的提示信息 50 | 51 | 52 | v2.0.2 (2019.09.09) 53 | ------------------- 54 | - 添加了对处于选课计划第一页之后的课程的支持 55 | 56 | 57 | v2.0.1 (2019.09.09) 58 | ------------------- 59 | - 代码重构,删减大量冗余设计 60 | - 新增监视器进程,开启后可以通过特定端口监听运行状态 61 | - 添加多账号支持,去除 cookies / token 本地共享的逻辑,并可以手动指定 config.ini / course.csv 文件的路径 62 | - 修复了在一些情况下会话无法保持的错误 63 | - 可以捕获几个新遇到的系统异常/错误提示 64 | - 美化了终端的输出格式 65 | 66 | 67 | v1.0.4 (2019.02.22) 68 | ------------------- 69 | - 修复了一处语法错误,位于 **main.py** 第 216-235 行的 `ignored.append` 处 70 | - 纠正了一些变量名的拼写错误 71 | - 可以捕获多选英语课引起的错误 72 | 73 | 74 | v1.0.3 (2019.02.20) 75 | ------------------- 76 | - 兼容了本科生辅双的登录界面,主修身份选课测试通过,辅双身份选课支持第一页 77 | - 可以捕获共享回话引起的系统异常 78 | - 可以捕获辅双登录无验证信息的系统异常 79 | 80 | 81 | v1.0.2 (2019.02.19) 82 | ------------------- 83 | - 研究生选课测试通过 84 | - 兼容了部分页面没有 `.//head/title` 标签的情况 85 | - 修改 `Course` 类的 `classNo` 属性为 int 类型,确保 `01` 与 `1` 为同班号 86 | - 主程序开始的第一个循环回合更改为首先主动登录一次,以免旧缓存导致无法切换账号 87 | - 重新登录时会率先删除客户端已有的 cookies ,修复了一次重新登录需要花费两回合的问题 88 | - 更改单一 `User-Agent` 为 `User-Agent` 池 89 | - 可以捕获课程互斥引起的错误提示 90 | 91 | 92 | v1.0.1 (2019.02.18) 93 | ------------------- 94 | - 上线版本,支持非辅双本科生选课 95 | 96 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Rabbit 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MIGRATION_GUIDE.md: -------------------------------------------------------------------------------- 1 | Migration Guide 2 | ==================== 3 | 4 | v2.0.9 -> 2.1.0 5 | ------------------ 6 | 7 | #### Development Related 8 | - Windows 在创建多进程时只能采用 `spawn` 方法,子进程创建后并不直接共享父进程已经设置好的的用户配置项,因此还需要将用户配置项 `userInfo` 在进程中共享。但是 `userInfo` 直接影响着最基本的 `config.py` ,为了让用户自定义配置 `userInfo` 能够在子进程中被正确应用,`userInfo` 的更新至子线程 `_internal.py` 和 `config` 单例的第一次初始化必须早于任何依赖于 `config` 单例的方法的调用。 9 | - 因此,这一版中对包调用的逻辑进行了大幅度的修改,删减了大部分包的在导入时即创建 `_config` 全局变量的逻辑,改成将 `config` 变量在函数使用时才创建,并且将 `loop.py` 和 `monitor.py` 中的所有全局变量和全局函数声明为局部 10 | - 个人觉得这个改动很丑陋,但是由于我的开发经验有限,一时想不到其他的写法,如果你对这个问题有更好的解决方法,欢迎发 Issue ! 11 | - 个人的一个改进想法是把多进程全部换成多线程,这样就不需要考虑资源共享的问题 12 | 13 | 14 | v2.0.6 -> v2.0.7 15 | ------------------ 16 | - monitor 进程中 `/loop` 路由细分为 `/main_loop` 和 `/login_loop` 两个路由 17 | - monitor 进程中 `/all` 路由添加了与错误捕获记录相关的键 18 | - monitor 进程中添加了 `/errors` 路由 19 | 20 | #### Development Related 21 | - 进程共享的 status 对象中的 `loop` 项细分为了 `main_loop` 和 `login_loop` 两个值,并添加了与错误捕获记录相关的键 22 | 23 | 24 | v2.0.4 -> v2.0.5 25 | ------------------ 26 | 27 | #### Development Related 28 | 29 | - 修改了错误类 `IAAANotSuccessError` 的全局错误码 30 | 31 | 32 | v2.0.3 -> v2.0.4 33 | ------------------ 34 | - `config.ini` 内添加了 elective 多会话相关的配置 35 | - `config.ini` 内删除了 `iaaa_relogin_interval` 字段 36 | 37 | #### Development Related 38 | 39 | - 为了应对选课网偶发的会话过期问题,为 elective 客户端引入了多会话机制,并删除了旧有的定时重登机制。具体见 README 中的 [运行流程](/README.md#运行流程) 小节 40 | 41 | 42 | v2.0.1 -> v2.0.2 43 | ------------------ 44 | - `config.ini` 内添加了 `client/supply_cancel_page` 值,以支持不处于选课计划第一页的课程 45 | 46 | 47 | v1.0.4 -> v2.0.1 48 | ------------------ 49 | - 新版对 `config.ini` 内的绝大多数配置项名称做了修改,需要用新提供 `config.sample.ini` 重新填写一遍配置 50 | - 添加了自定义 `config.ini` 和 `course.csv` 51 | - 添加了对 `Flask` 库的依赖,对于尚未安装该依赖的环境,还需额外运行 `pip3 install flask` 52 | 53 | #### For 'git pull' Comment 54 | 55 | 如果你使用了 `git` 命令更新该项目,在输入 `git pull origin master` 后,可能会报错 `error: Your local changes to the following files would be overwritten by merge:` ,这是因为新版删除了 `config.ini` 和 `course.*.csv` 文件,而改用 `config.sample.ini` 和 `course.*.sample.csv` 代替。只需要输入以下命令即可消除冲突: 56 | 57 | 在项目根目录下: 58 | ```console 59 | $ git checkout config.ini 60 | $ git checkout course.utf-8.csv 61 | $ git checkout course.gbk.csv 62 | ``` 63 | 64 | #### Development Related 65 | 66 | - 在 `BaseClient` 内添加了 `persist_cookies` 方法,会在 `hooks.py` 内被调用,以确保在一些特定的错误下仍然可以保持会话 67 | - 在 `elective.py` 的 `sso_login` 的请求头中添加了一个固定的无效 `Cookie` 以防止登录时报 `101` 状态码 68 | - 修改了 `IAAA` 的重新登录逻辑,由原来的遇到错误重登,变为每隔一段时间重登 69 | - 在 `loop.py` 中对 `elective.py` 的 `get_Validate` 方法的调用结果添加了一层错误捕获,以应对非 JSON 格式的响应体 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PKUAutoElective 2 | 3 | 北大选课网 **补退选** 阶段自动选课小工具 v2.1.1 (2019.09.13) 4 | 5 | 目前支持 `本科生(含辅双)` 和 `研究生` 选课 6 | 7 | 8 | ## 特点 9 | 10 | - 运行过程中不需要进行任何人为操作,且支持同时通过其他设备、IP 访问选课网 11 | - 利用机器学习模型自动识别验证码,具体参见我的项目 [PKUElectiveCaptcha](https://github.com/zhongxinghong/PKUElectiveCaptcha) ,识别率测试值为 **95.6%** 12 | - 具有较为完善的错误捕获机制,不容易在运行中意外退出 13 | - 可以选择性开启额外的监视器进程,之后可以通过端口监听当前的选课状况 14 | - 支持多进程下的多账号/多身份选课 15 | 16 | 17 | ## 安装 18 | 19 | 该项目至少需要 Python 3 (项目开发环境为 Python 3.6.6),可以从 [Python 官网](https://www.python.org/) 下载并安装 20 | 21 | 例如在 Debian-Linux 下运行: 22 | ```console 23 | $ apt-get install python3 24 | ``` 25 | 26 | 下载这个 repo 至本地。点击右上角的 `Clone or download` 即可下载 27 | 28 | 对于 git 命令行: 29 | ```console 30 | $ git clone https://github.com/zhongxinghong/PKUAutoElective.git 31 | ``` 32 | 33 | 安装依赖包 34 | ```console 35 | $ pip3 install requests lxml Pillow numpy sklearn flask 36 | ``` 37 | 38 | 可以改用清华 pip 源,加快下载速度 39 | ```console 40 | $ pip3 install requests lxml Pillow numpy sklearn flask -i https://pypi.tuna.tsinghua.edu.cn/simple 41 | ``` 42 | 43 | 可选依赖包 44 | ```console 45 | $ pip3 install simplejson 46 | ``` 47 | 48 | ## 基本用法 49 | 50 | 1. 复制 `config.sample.ini` 文件,并将所复制得的文件重命名为 `config.ini` 51 | 2. 根据系统类型选择合适的 `course.csv` ,同理复制 `course.*.sample.csv` 并将所得文件重命名为 `course.*.csv` ,即 `course.utf-8.csv/course.gbk.csv` ,以确保 csv 表格用软件打开后不会乱码 52 | - **Linux** 若使用 `utf-8` 编码,可以用 LibreOffice 以 `UTF-8` 编码打开,若使用 `gbk` 编码,可以用 LibreOffice 以 `GB-18030` 编码打开 53 | - **Windows** 使用 `gbk` 编码,可以用 MS Excel 打开 54 | - **MacOS** 若使用 `gbk` 编码,可以用 MS Excel 打开,若使用 `utf-8` 编码,可以用 numbers 打开 55 | 3. 将待选课程手动添加到选课网的 “选课计划” 中,并确保所选课程处在 **补退选页** 中 “选课计划” 列表的 **第 1 页** 。 56 | - 注:为了保证刷新速度,减小服务器压力,该项目不解析位于选课计划第 1 页之后的课程 57 | - 注:该项目不会事前校验待选课程的合理性,只会根据选课提交结果来判断是否提交成功,所以请自行 **确保填写的课程在有名额的时候可以被选上** ,以免浪费时间。部分常见错误可参看 [异常处理](#异常处理) 小节 58 | 4. 将待选课程的 `课程名`, `班号`, `开课单位` 对应复制到 `course.csv` 中(本项目根据这三个字段唯一确定一个课程),每个课程占一行,高优先级的课程在上(即如果当前循环回合同时发现多个课程可选,则按照从上往下的优先级顺序依次提交选课请求) 59 | - 注:考虑到 csv 格式不区分数字和字符串,该项目允许将课号 `01` 以数字 `1` 的形式直接录入 60 | - 注:请确保每一行的所有字段都被填写,**信息填写不完整的行会被自动忽略,并且不会抛出异常** 61 | 5. 配置 `config.ini` 62 | - 修改 `coding/csv_coding` 项,使之与所用 `course.*.csv` 的编码匹配 63 | - 填写 IAAA 认证所用的学号和密码 64 | - 如果是双学位账号,则设置 `dual_degree` 项为 `true` ,同时设置双学位登录身份 `identity` ,只能填 `bzx`, `bfx` ,分别代表 `主修` 和 `辅双` ;对于非双学位账号,则设置 `dual_degree` 为 `false` ,此时登录身份项没有意义。注:以 **双学位账号的主学位身份** 进行选课仍然需要将 `dual_degree` 设为 `true` ,否则可能会遇到一直显示会话过期/尚未登录的情况。 65 | - 如果待选的课程不在选课计划的第一页,并且无法将第一页的其他课程删除,你可以通过修改 `supply_cancel_page` 来指定实际刷新第几页。注:该项目一个进程只能刷新一页的选课计划,如果你需要选的课处于选课计划的不同页,则需要为每个页面分别开一个进程,详见 [高级用法](#高级用法) 中的 [多账号设置](#多账号设置) 小节 66 | - 如有需要,可以修改刷新间隔项 `refresh_interval` 和 `random_deviation`,但 **不要将刷新间隔改得过短!** 67 | 6. 进入项目根目录,利用 `python3 main.py` 命令运行主程序,即可开始自动选课。 68 | 69 | 70 | ## 测试方法 71 | 72 | 如有需要,可以进行下面的部分测试,确保程序可以在 `你的补退选页` 中正常运行: 73 | 74 | - 可以通过向课程列表中添加如下几种课程,测试程序的反应: 75 | 76 | - 正常的可以直接补选上的课程 77 | - 已经选满的课程 78 | - 上课时间/考试时间冲突的课程 79 | - 相同课号的课程(其他院的相同课或同一门课的不同班) 80 | - 性质互斥的课程(例如:线代与高代) 81 | - 跨院系选课阶段开放的其他院专业课 82 | 83 | - 可以尝试一下超学分选课会出现什么情况 84 | 85 | #### 注意: 86 | 87 | - 之后手动退选的时候不要点错课噢 QvQ 88 | - 研究生不能修改选课计划,请慎重测试,不要随便添加其他课程,以免造成不必要的麻烦! 89 | 90 | 91 | ## 高级用法 92 | 93 | 自 `v2.0.0` 起,可以在程序运行时指定命令行选项。通过 `python3 main.py --help` 查看帮助。 94 | ```console 95 | $ python3 main.py --help 96 | 97 | Usage: main.py [options] 98 | 99 | PKU Auto-Elective Tool v2.0.1 (2019.09.09) 100 | 101 | Options: 102 | --version show program's version number and exit 103 | -h, --help show this help message and exit 104 | --config=FILE custom config file encoded with utf8 105 | --course-csv-utf8=FILE 106 | custom course.csv file encoded with utf8 107 | --course-csv-gbk=FILE 108 | custom course.csv file encoded with gbk 109 | --with-monitor run the monitor process simultaneously 110 | 111 | ``` 112 | 113 | 通过指定命令行参数,可以开启以下的功能: 114 | 115 | ### 多账号设置 116 | 117 | 可以为每一个账号单独创建一个配置文件和一个课程列表,在不同的进程中以不同的配置文件运行该项目,以实现多账号同时刷课 118 | 119 | 假如为 Alice 和 Bob 同学创建了如下的文件,填写好了相应配置。假设它们与 `main.py` 处于同一目录下 120 | ```console 121 | $ ls 122 | 123 | config.alice.ini course.utf-8.alice.csv config.bob.ini course.gbk.bob.csv main.py 124 | 125 | ``` 126 | 127 | 接下来分别在两个终端中运行下面两个命令,即可实现多账号刷课 128 | ```console 129 | $ python3 main.py --config ./config.alice.ini --course-csv-utf8 ./course.utf-8.alice.csv 130 | $ python3 main.py --config ./config.bob.ini --course-csv-gbk ./course.gbk.bob.csv 131 | 132 | ``` 133 | 134 | 由于选课网存在会话数上限,开启多进程的时候还需要调整各进程的配置文件中的 `client/elective_client_pool_size` 项,合理分配各个进程的会话数。详见 [其他配置项](#其他配置项) 。同一 IP 下所有进程的会话总数不超过 5 。建议值: 单进程 4; 两进程 2+2; 三进程 1+1+2 ...... 135 | 136 | 137 | ### 开启监视器 138 | 139 | 假如你拥有一个可以连上 `elective.pku.edu.cn` 和 `iaaa.pku.edu.cn` 的服务器,你可以在服务器上运行这个项目,并开启监听进程,然后通过访问特定地址来查看当前的运行状态。具体的配置方法如下: 140 | 141 | 1. 在 `config.ini` 中修改需要绑定的 `host/post` 142 | 2. 在运行时指定 `--with-monitor` 参数,即 `python3 main.py --with-monitor` 143 | 3. 请求相应的地址即可查看运行状态。例如按照默认设置,可以请求 `http://127.0.0.1:7074` 144 | 145 | 可以通过 nginx 进行反向代理,配置示例如下: 146 | ```nginx 147 | # filename: nginx.autoelective.conf 148 | # coding: utf-8 149 | 150 | server { 151 | 152 | listen 12345; 153 | server_name 10.123.124.125; 154 | charset UTF-8; 155 | 156 | location / { 157 | 158 | proxy_pass http://127.0.0.1:7074; 159 | } 160 | } 161 | 162 | ``` 163 | 164 | 在这个示例中,通过访问 `http://10.123.124.125:12345` 可以查看运行状态 165 | 166 | 该项目为这个监视器注册了如下路由: 167 | ``` 168 | GET / 同 /rules 169 | GET /all 完整的状态 170 | GET /current 当前候选的课程 171 | GET /errors 当前已捕获到的错误数 172 | GET /goals 输出原始的选课计划(直接从 course.csv 中读取到的课程) 173 | GET /ignored 已经被忽略的课程及相应原因(已选上/无法选) 174 | GET /login_loop login-loop 当前循环数 175 | GET /main_loop main-loop 当前循环数 176 | GET /rules 输出这个路由列表 177 | 178 | ``` 179 | 180 | 例如,请求 `http://10.123.124.125:12345/all` 可以查看完整的状态 181 | 182 | 183 | ## 项目架构与分析 184 | 185 | `autoelective/` 目录结构如下 186 | ```console 187 | $ tree autoelective/ 188 | autoelective/ 189 | ├── captcha 验证码相关 190 | │ ├── classifier.py 模型导入与分类器类 191 | │ ├── feature.py 与特征向量提取相关的函数 192 | │ ├── __init__.py 验证码识别结果的模型和验证码识别类 193 | │ ├── model 可用模型 194 | │ │ ├── KNN.model.f5.l1.c1.bz2 195 | │ │ ├── RandomForest.model.f2.c6.bz2 196 | │ │ └── SVM.model.f3.l1.c9.xz 197 | │ └── processor.py 验证码图像处理相关的函数 198 | ├── client.py 客户端的基类 199 | ├── config.py ini 配置文件的解析类及配置的模型声明 200 | ├── const.py 文件夹路径、URL 等常数 201 | ├── course.py 课程模型 202 | ├── elective.py 与 elective.pku.edu.cn 的接口通信的客户端类 203 | ├── exceptions.py 错误类 204 | ├── hook.py 对客户端请求结果进行校验的相关函数 205 | ├── iaaa.py 与 iaaa.pku.edu.cn 的接口通信的客户端类 206 | ├── __init__.py 207 | ├── _internal.py 内部工具函数 208 | ├── logger.py 日志类声明 209 | ├── loop.py 主循环进程的入口 210 | ├── monitor.py 监视器进程的入口 211 | ├── parser.py 网页解析相关的函数 212 | └── utils.py 通用工具函数 213 | 214 | ``` 215 | 216 | ## 运行流程 217 | 218 | #### loop 进程 219 | 220 | 基本的思路是轮询服务器。利用 `iaaa.py` 和 `elective.py` 中定义的客户端类与服务器进行交互,请求结果借助 `parser.py` 中定义的函数进行解析,然后通过 `hook.py` 中定义的函数对结果进行校验,如果遇到错误,则抛出 `exceptions.py` 中定义的错误类,循环体外层可以捕获相应的错误。并判断应该退出还是进入下回合。 221 | 222 | 采用多 elective 客户端的机制,存在着可用的 elective 客户端池 `electivePool` 和需登录/重登的 elective 客户端池 `loginPool`,在 loop 进程内有 `login-loop` 和 `main-loop` 两个子线程。 223 | 224 | ##### login-loop 线程 225 | 226 | 该线程维护一个登录循环: 227 | 228 | 1. 监听 `loginPool` ,阻塞线程,直到出现需要登录的客户端 229 | 2. 就尝试对该客户端进行登录 230 | 3. 登录成功后将该客户端放入 `electivePool` ,如果登录失败,则持有该客户端进入下一回合 231 | 4. 结束循环,不管成功失败,等待 `login_loop_interval` 时间(可在 `config.ini` 中修改) 232 | 233 | ##### main-loop 线程 234 | 235 | 该线程负责轮询选课网及提交选课请求,运行流程如下: 236 | 237 | 1. 一次循环回合开始,打印候选课程的列表和已忽略课程的列表。 238 | 2. 从 `electivePool` 中获取一个客户端,如果 `electivePool` 为空则阻塞线程,如果客户端尚未登录,则立刻停止当前回合,跳至步骤 (8) 239 | 3. 获得补退选页的 HTML ,并解析 “选课计划” 列表和 “已选课程” 列表。 240 | 4. 校验 `course.csv` 所列课程的合理性(即必须出现在 “选课计划” 或 “已选课程” 中),随后结合上一步的结果筛选出当回合有选课名额的课程。 241 | 5. 如果发现存在可选的课程,则依次提交选课请求。在每次提交前先自动识别一张验证码。 242 | 6. 根据请求结果调整候选课程列表,并结束当次回合。 243 | 7. 将当前客户端放回 `electivePool` ,下回合会重新选择一个客户端 244 | 8. 当次循环回合结束后,等待一个带随机偏量的 `refresh_interval` 时间(可在 `config.ini` 中修改该值)。 245 | 246 | #### monitor 进程 247 | 248 | 在运行时指定 `--with-monitor` 参数,可以开启 `monitor` 进程。此时会在主进程中开启 `loop` 和 `monitor` 两个子进程,它们通过 `multiprocessing.Manager` 共享一部分资源(计划选课列表、已忽略课程列表等)。monitor 本质是一个 server 应用,它注册了可以用于查询共享资源状态的路由,此时通过访问 server 所绑定的地址,即可实现对 loop 状态的监听。 249 | 250 | 251 | ## DEBUG 相关 252 | 253 | 在 `config.ini` 中提供了如下的选项: 254 | 255 | - `client/debug_print_request` 如果你需要了解每个请求的细节,可以将该项设为 `true` ,会将与请求相关的一些重要信息打印到终端。如果你需要知道其他的请求信息,可以自行修改 `hook.py` 下的 `debug_print_request` 函数 256 | - `client/debug_dump_request` 会用 `pickle/gzip` 记录该请求的 `Response` 对象,如果发生未知的错误,仍然可以恢复出当时的请求。如有必要可以将该项设为 `True` 以开启该功能。关于未知错误,详见 [未知错误警告](#未知错误警告) 小节。日志会被记录在 `log/request/` 目录下,可以通过 `utils.py` 中的 `pickle_gzip_load` 函数重新导入 257 | 258 | 259 | ## 其他配置项 260 | 261 | - `client/iaaa_client_timeout` IAAA 客户端的最长请求超时 262 | - `client/elective_client_timeout` Elective 客户端的最长请求超时,考虑到选课网在网络阻塞的时候响应时间会很长,这个时间默认比 IAAA 的客户端要长 263 | - `client/elective_client_pool_size` Elective 客户端池的最大容量。注:根据观察,每个 IP 似乎只能总共同时持有 **5 个会话**,否则会遇到 elective 登录时无限超时的问题。因此这个这个值不宜大于 5 (如果你还需要通过浏览器访问选课网,则不能大于 4)。 264 | - `client/login_loop_interval` IAAA 登录循环每两回合的时间间隔 265 | 266 | 267 | ## 异常处理 268 | 269 | 各种异常类定义参看 `exceptions.py` 。每个类下均有简短的文字说明。 270 | 271 | ### 系统异常 `SystemException` 272 | 273 | 对应于 `elective.pku.edu.cn` 的各种系统异常页,目前可识别: 274 | 275 | - **请不要用刷课机刷课:** 请求头未设置 `Referer` 字段,或者未事先提交验证码校验请求,就提交选课请求(比如在 Chrome 的开发者工具中,直接找到 “补选” 按钮在 DOM 中对应的链接地址并单击访问。 276 | - **Token无效:** token 失效 277 | - **尚未登录或者会话超时:** cookies 中的 session 信息过期 278 | - **不在操作时段:** 例如,在预选阶段试图打开补退选页 279 | - **索引错误:** 貌似是因为在其他客户端操作导致课程列表中的索引值变化 280 | - **验证码不正确:** 在补退选页填写了错误验证码后刷新页面 281 | - **无验证信息:** 辅双登录时可能出现,原因不明 282 | - **你与他人共享了回话,请退出浏览器重新登录:** 同一浏览器内登录了第二个人的账号,则原账号选课页会报此错误(由于共用 cookies) 283 | - **只有同意选课协议才可以继续选课!** 第一次选课时需要先同意选课协议 284 | 285 | ### 提示框反馈 `TipsException` 286 | 287 | 对应于 `补退选页` 各种提交操作(补选、退选等)后的提示框反馈,目前可识别: 288 | 289 | - **补选课程成功:** 成功选课后的提示 290 | - **您已经选过该课程了:** 已经选了相同课号的课程(可能是别的院开的相同课,也可能是同一门课的不同班) 291 | - **上课时间冲突:** 上课时间冲突 292 | - **考试时间冲突** 考试时间冲突 293 | - **超时操作,请重新登录:** 貌似是在 cookies 失效时提交选课请求(比如在退出登录或清空 `session.cookies` 的情况下,直接提交选课请求) 294 | - **该课程在补退选阶段开始后的约一周开放选课:** 跨院系选课阶段未开放时,试图选其他院的专业课 295 | - **您本学期所选课程的总学分已经超过规定学分上限:** 选课超学分 296 | - **选课操作失败,请稍后再试:** 未知的操作失败,貌似是因为请求过快 297 | - **只能选其一门:** 已选过与待选课程性质互斥的课程(例如:高代与线代) 298 | - **学校规定每学期只能修一门英语课:** 一学期试图选修多门英语课 299 | 300 | 301 | ## 说明与注意事项 302 | 303 | - 为了避免访问频率过快,每一个循环回合结束后,都会暂停一下,确保每两回合间保持适当的间隔,**这个时间间隔不可以改得过短** ,否则有可能对服务器造成压力!(据说校方选课网所在的服务器为单机) 304 | - 不要修改 `course.csv` 的文件编码、表头字段、文件格式,不要添加或删除列,不要在空列填写任何字符,否则可能会造成 csv 文件不能正常读取。 305 | - 该项目通过指定 I/O 相关函数的 `encoding` 参数为 `utf-8-sig` 来兼容带 BOM 头的 UTF-8 编码的文件,包括 `config.ini`, `course.csv` ,如果仍然存在问题,请不要使用 `记事本 NotePad` 进行文件编辑,应改用更加专业的编辑工具或者代码编辑器,例如 `NotePad ++`, `Sublime Text`, `VSCode`, `PyCharm` 等,对配置文件进行修改,并以 `无 BOM 的 UTF-8` 编码保存文件。 306 | - 该项目针对 `预选页` 和 `补退选页` 相关的接口进行设计,`elective.py` 内定义的接口请求方法,只在 **补退选** 阶段进行过测试,不能保证适用于其他阶段。 307 | - 该项目针对如下的情景设计:课在有空位的时候可以选,但是当前满人无法选上,需要长时间不断刷新页面。对于有名额但是网络拥堵的情况(比如到达某个特定的选课时间节点时),用该项目选课 **不一定比手选快**,因为该项目在每次启动前会先登录一次 IAAA ,这个请求在网络堵塞的时候可能很难完成。如果你已经通过浏览器提前登入了选课网,那么手选可能是个更好的选择。 308 | 309 | 310 | ## 未知错误警告 311 | 312 | - 在 2019.02.22 下午 5:00 跨院系选课名额开放的时刻,有人使用该项目试图抢 `程设3班`,终端日志表明,程序运行时发现 `程设3班` 存在空位,并成功选上,但人工登录选课网后发现,实际选上了 `程设4班(英文班)` 。使用者并未打算选修英文班,且并未将 `程设4班` 加入到 `course.csv` 中,而仅仅将其添加到教学网 “选课计划” 中,在网页中与 `程设3班` 相隔一行。从本项目的代码逻辑上我可以断定,网页的解析部分是不会出错的,对应的提交选课链接一定是 `程设3班` 的链接。可惜没有用文件日志记录网页结构,当时的请求结果已无从考证。从这一极其奇怪的现象中我猜测,北大选课网的数据库或服务器有可能存在 **线程不安全** 的设计,也有可能在高并发时会偶发 **Race condition** 漏洞。因此,我在此 **强烈建议: (1) 不要把同班号、有空位,但是不想选的课放在选课计划内; (2) 不要在学校服务器遭遇突发流量的时候拥挤选课。** 否则很有可能遭遇 **未知错误!** 313 | 314 | 315 | ## 历史更新信息 316 | 317 | 见 [Realease History](/HISTORY.md) 318 | 319 | 320 | ## 版本迁移指南 321 | 322 | 见 [Migration Guide](/MIGRATION_GUIDE.md) 323 | 324 | 325 | ## 责任须知 326 | 327 | - 本项目仅供参考学习,你可以修改和使用这个项目,但请自行承担由此造成的一切后果 328 | - 严禁在公共场合扩散这个项目,以免给你我都造成不必要的麻烦 329 | 330 | 331 | ## 证书 332 | 333 | - PKUElectiveCaptcha [MIT LICENSE](https://github.com/zhongxinghong/PKUElectiveCaptcha/blob/master/LICENSE) 334 | - PKUAutoElective [MIT LICENSE](https://github.com/zhongxinghong/PKUAutoElective/blob/master/LICENSE) -------------------------------------------------------------------------------- /autoelective/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: __init__.py 4 | # modified: 2019-09-13 5 | 6 | __version__ = "2.1.1" 7 | __date__ = "2019.09.13" 8 | __author__ = "Rabbit" 9 | -------------------------------------------------------------------------------- /autoelective/_internal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: _internal.py 4 | # modified: 2019-09-08 5 | 6 | __all__ = [ 7 | 8 | "mkdir", 9 | "abspath" 10 | 11 | "userInfo", 12 | ] 13 | 14 | import os 15 | 16 | userInfo = {} # shared the user's custom options 17 | 18 | 19 | def mkdir(path): 20 | if not os.path.exists(path): 21 | os.mkdir(path) 22 | 23 | def abspath(*paths): 24 | _BASE_DIR = os.path.dirname(__file__) 25 | return os.path.normpath(os.path.abspath(os.path.join(_BASE_DIR, *paths))) 26 | 27 | -------------------------------------------------------------------------------- /autoelective/captcha/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: __init__.py 4 | # modified: 2019-09-08 5 | 6 | __all__ = ["CaptchaRecognizer"] 7 | 8 | import os 9 | from PIL import Image 10 | from .processor import denoise8, denoise24, crop 11 | from .classifier import KNN, SVM, RandomForest 12 | from ..const import CAPTCHA_CACHE_DIR 13 | from ..utils import Singleton, xMD5, xSHA1 14 | 15 | 16 | def _captcha_cache_file(*paths): 17 | return os.path.abspath(os.path.join(CAPTCHA_CACHE_DIR, *paths)) 18 | 19 | 20 | class CaptchaRecognitionResult(object): 21 | 22 | def __init__(self, code, segs, spans, cache): 23 | self.code = code 24 | self.segs = segs 25 | self.spans = spans 26 | self.cache = cache 27 | 28 | def __repr__(self): 29 | return '<%s: %r>' % ( 30 | self.__class__.__name__, 31 | self.code, 32 | ) 33 | 34 | def clear_cache(self): 35 | for file in self.cache: 36 | if os.path.exists(file): 37 | os.remove(file) 38 | 39 | 40 | class CaptchaRecognizer(object, metaclass=Singleton): 41 | 42 | def __init__(self): 43 | self._clf = SVM() 44 | 45 | def recognize(self, imgBytes): 46 | 47 | cache = [] 48 | imgHash = xMD5(imgBytes) 49 | 50 | rawImgCacheFile = _captcha_cache_file("%s.raw.jpg" % imgHash) 51 | with open(rawImgCacheFile, "wb") as fp: 52 | fp.write(imgBytes) 53 | 54 | cache.append(rawImgCacheFile) 55 | 56 | img = Image.open(rawImgCacheFile) 57 | img = img.convert("1") 58 | 59 | img = denoise8(img, repeat=1) 60 | img = denoise24(img, repeat=1) 61 | denoisedImgCacheFile = _captcha_cache_file("%s.denoised.jpg" % imgHash) 62 | img.save(denoisedImgCacheFile) 63 | cache.append(denoisedImgCacheFile) 64 | 65 | segs, spans = crop(img) 66 | 67 | Xlist = [ self._clf.feature(segImg) for segImg in segs ] 68 | chars = self._clf.predict(Xlist) 69 | captcha = "".join(chars) 70 | 71 | for idx, (segImg, ch) in enumerate(zip(segs, chars)): 72 | segImgCacheFile = _captcha_cache_file("%s.seg%d.%s.jpg" % (imgHash, idx, ch)) 73 | segImg.save(segImgCacheFile) 74 | cache.append(segImgCacheFile) 75 | 76 | return CaptchaRecognitionResult(captcha, segs, spans, cache) 77 | -------------------------------------------------------------------------------- /autoelective/captcha/classifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: classifier.py 4 | # modified: 2019-09-08 5 | 6 | __all__ = ["KNN","SVM","RandomForest"] 7 | 8 | import os 9 | import re 10 | from sklearn.neighbors.classification import KNeighborsClassifier 11 | from sklearn.svm import SVC 12 | from sklearn.ensemble.forest import RandomForestClassifier 13 | from sklearn.externals import joblib 14 | from .feature import get_feature_extractor 15 | from ..const import MODEL_DIR 16 | from ..utils import Singleton 17 | 18 | 19 | _regexModelFilename = re.compile( 20 | pattern=( 21 | r'^(?P\S+)\.model\.' 22 | r'f(?P[1-5])\.' 23 | r'(?:l(?P\d{1})\.)*' 24 | r'c(?P\d{1})' 25 | r'(?P\.z|\.gz|\.bz2|\.xz|\.lzma)$' 26 | ), 27 | flags=re.I, 28 | ) 29 | 30 | def _get_MODEL_FILES(): 31 | 32 | model_files = {} 33 | for file in os.listdir(MODEL_DIR): 34 | res = _regexModelFilename.match(file) 35 | if res is not None: 36 | filename = res.group() 37 | resDict = res.groupdict() 38 | alg = resDict.pop("alg") 39 | resDict["path"] = os.path.abspath(os.path.join(MODEL_DIR, filename)) 40 | model_files[alg] = resDict 41 | 42 | return model_files 43 | 44 | 45 | _MODEL_FILES = _get_MODEL_FILES() 46 | 47 | 48 | class BaseClassifier(object, metaclass=Singleton): 49 | 50 | ALG = "" 51 | 52 | def __init__(self): 53 | if self.__class__ is __class__: 54 | raise NotImplementedError 55 | clf, feature = self._load_model() 56 | self._clf = clf 57 | self.feature = feature 58 | 59 | def _load_model(self): 60 | alg = self.__class__.ALG 61 | detail = _MODEL_FILES.get(alg) 62 | path, fCode, lCode = map(detail.__getitem__, ("path","feature","level")) 63 | feature = get_feature_extractor(fCode, lCode) 64 | if path is None: 65 | raise FileNotFoundError("model %s.* is missing" % alg) 66 | return joblib.load(path), feature 67 | 68 | def predict(self, Xlist): 69 | return self._clf.predict(Xlist) 70 | 71 | 72 | class RandomForest(BaseClassifier): 73 | 74 | ALG = "RandomForest" 75 | 76 | 77 | class KNN(BaseClassifier): 78 | 79 | ALG = "KNN" 80 | 81 | 82 | class SVM(BaseClassifier): 83 | 84 | ALG = "SVM" -------------------------------------------------------------------------------- /autoelective/captcha/feature.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: feature.py 4 | # modified: 2019-09-08 5 | 6 | __all__ = ["get_feature_extractor"] 7 | 8 | from functools import partial 9 | import numpy as np 10 | 11 | 12 | def _feature1(img): 13 | """ 遍历全部像素 """ 14 | ary = np.array(img.convert("1")) 15 | ary = 1 - ary # 反相 16 | return ary.flatten() 17 | 18 | 19 | def _feature2(img): 20 | """ feature2 降维 """ 21 | ary = np.array(img.convert("1")) 22 | ary = 1 - ary # 反相 23 | return np.concatenate([ary.sum(axis=0), ary.sum(axis=1)]) 24 | 25 | 26 | def _feature3(img, level): 27 | """ 考虑临近像素的遍历 """ 28 | ary = np.array(img.convert("1")) 29 | ary = 1 - ary # 反相 30 | l = level 31 | featureVector = [] 32 | for i in range(l, ary.shape[0]-l): 33 | for j in range(l, ary.shape[1]-l): 34 | i1, i2, j1, j2 = i-l, i+l+1, j-l, j+l+1 35 | featureVector.append(np.sum(ary[i1:i2, j1:j2])) # sum block 36 | return np.array(featureVector) 37 | 38 | 39 | def _feature4(img, level): 40 | """ feature3 降维 """ 41 | ary = _feature3(img, level) 42 | s = int(np.sqrt(ary.size)) 43 | assert s**2 == ary.size # 确保为方 44 | ary.resize((s,s)) 45 | return np.concatenate([ary.sum(axis=0), ary.sum(axis=1)]) 46 | 47 | 48 | def _feature5(img, level): 49 | """ feature3 改版,给接近中心的点增加权重 50 | 51 | weight 矩阵例如: 52 | array([[1, 1, 1, 1, 1], 53 | [1, 2, 2, 2, 1], 54 | [1, 2, 3, 2, 1], 55 | [1, 2, 2, 2, 1], 56 | [1, 1, 1, 1, 1]]) 57 | """ 58 | ary = np.array(img.convert("1")) 59 | ary = 1 - ary # 反相 60 | l = level 61 | s = size = 2 * l + 1 62 | weight = np.zeros(s**2, dtype=np.int).reshape((s,s)) 63 | for k in range(l+1): 64 | mask = np.array([ k <= i < s-k and k <= j < s-k for i in range(s) for j in range(s) ]).reshape((s,s)) 65 | weight[mask] += (k + 1)**2 # 等比数列 66 | featureVector = [] 67 | for i in range(l, ary.shape[0]-l): 68 | for j in range(l, ary.shape[1]-l): 69 | i1, i2, j1, j2 = i-l, i+l+1, j-l, j+l+1 70 | featureVector.append(np.sum(ary[i1:i2, j1:j2]*weight)) # sum block with weight 71 | return np.array(featureVector) 72 | 73 | 74 | 75 | _FEATURE_MAP = { 76 | 77 | "1": _feature1, 78 | "2": _feature2, 79 | "3": _feature3, 80 | "4": _feature4, 81 | "5": _feature5, 82 | } 83 | 84 | 85 | def get_feature_extractor(feature, level=""): 86 | feature = str(feature) 87 | if feature in ("1","2"): 88 | func = _FEATURE_MAP[feature] 89 | elif feature in ("3","4","5"): 90 | if level == "": 91 | raise ValueError("level must be given for feature %s" % feature) 92 | level = int(level) 93 | if level <= 0: 94 | raise ValueError("level must be a positive integer, not %s" % level) 95 | func = partial(_FEATURE_MAP[feature], level=level) 96 | return func 97 | -------------------------------------------------------------------------------- /autoelective/captcha/model/KNN.model.f5.l1.c1.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbwang2016/PKUAutoElective2/6d1aeac60a0073ab0d68754053fe1823ad866dee/autoelective/captcha/model/KNN.model.f5.l1.c1.bz2 -------------------------------------------------------------------------------- /autoelective/captcha/model/RandomForest.model.f2.c6.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbwang2016/PKUAutoElective2/6d1aeac60a0073ab0d68754053fe1823ad866dee/autoelective/captcha/model/RandomForest.model.f2.c6.bz2 -------------------------------------------------------------------------------- /autoelective/captcha/model/SVM.model.f3.l1.c9.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbwang2016/PKUAutoElective2/6d1aeac60a0073ab0d68754053fe1823ad866dee/autoelective/captcha/model/SVM.model.f3.l1.c9.xz -------------------------------------------------------------------------------- /autoelective/captcha/processor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: processor.py 4 | # modified: 2019-09-08 5 | 6 | __all__ = [ 7 | 8 | "denoise8","denoise24","crop", 9 | 10 | "STEPS4","STEPS8","STEPS24", 11 | 12 | ] 13 | 14 | from PIL import Image 15 | 16 | 17 | _STEPS_LAYER_1 = ((1,1),(1,0),(1,-1),(0,1),(0,-1),(-1,1),(-1,0),(-1,-1)) 18 | _STEPS_LAYER_2 = ((2,2),(2,1),(2,0),(2,-1),(2,-2),(1,2),(1,-2),(0,2),(0,-2),(-1,2),(-1,-2),(-2,2),(-2,1),(-2,0),(-2,-1),(-2,-2)) 19 | 20 | STEPS4 = ((0,1),(0,-1),(1,0),(-1,0)) 21 | STEPS8 = _STEPS_LAYER_1 22 | STEPS24 = _STEPS_LAYER_1 + _STEPS_LAYER_2 23 | 24 | _PX_WHITE = 255 25 | _PX_Black = 0 26 | 27 | _DEFAULT_MIN_BLOCK_SIZE = 9 28 | 29 | 30 | def _assert_image_mode_equals_to_1(img): 31 | assert img.mode == "1", "image mode must be '1', not %s" % img.mode 32 | 33 | 34 | def _denoise(img, steps, threshold, repeat): 35 | """ 去噪函数模板 """ 36 | _assert_image_mode_equals_to_1(img) 37 | 38 | for _ in range(repeat): 39 | for j in range(img.width): 40 | for i in range(img.height): 41 | px = img.getpixel((j,i)) 42 | if px == _PX_WHITE: # 自身白 43 | continue 44 | count = 0 45 | for x, y in steps: 46 | j2 = j + y 47 | i2 = i + x 48 | if 0 <= j2 < img.width and 0 <= i2 < img.height: # 边界内 49 | if img.getpixel((j2,i2)) == _PX_WHITE: # 周围白 50 | count += 1 51 | else: # 边界外全部视为黑 52 | count += 1 53 | if count >= threshold: 54 | img.putpixel((j,i), _PX_WHITE) 55 | 56 | return img 57 | 58 | 59 | def denoise8(img, steps=STEPS8, threshold=6, repeat=2): 60 | """ 考虑外一周的降噪 """ 61 | return _denoise(img, steps, threshold, repeat) 62 | 63 | 64 | def denoise24(img, steps=STEPS24, threshold=20, repeat=2): 65 | """ 考虑外两周的降噪 """ 66 | return _denoise(img, steps, threshold, repeat) 67 | 68 | 69 | def _search_blocks(img, steps=STEPS8, min_block_size=_DEFAULT_MIN_BLOCK_SIZE): 70 | """ 找到图像中的所有块 """ 71 | _assert_image_mode_equals_to_1(img) 72 | 73 | marked = [ [ 0 for j in range(img.width) ] for i in range(img.height) ] 74 | 75 | 76 | def _is_marked(i,j): 77 | if marked[i][j]: 78 | return True 79 | else: 80 | marked[i][j] = 1 81 | return False 82 | 83 | 84 | def _is_white_px(i,j): 85 | return img.getpixel((j,i)) == _PX_WHITE 86 | 87 | 88 | def _queue_search(i,j): 89 | """ 利用堆栈寻找字母 """ 90 | queue = [(j,i),] 91 | head = 0 92 | while head < len(queue): 93 | now = queue[head] 94 | head += 1 95 | for x, y in steps: 96 | j2 = now[0] + y 97 | i2 = now[1] + x 98 | if 0 <= j2 < img.width and 0 <= i2 < img.height: 99 | if _is_marked(i2,j2) or _is_white_px(i2,j2): 100 | continue 101 | queue.append((j2,i2)) 102 | return queue 103 | 104 | 105 | blocks = [] 106 | for j in range(img.width): 107 | for i in range(img.height): 108 | if _is_marked(i,j) or _is_white_px(i,j): 109 | continue 110 | block = _queue_search(i,j) 111 | if len(block) >= min_block_size: 112 | js = [ j for j, _ in block ] 113 | blocks.append( (block, min(js), max(js)) ) 114 | 115 | assert 1 <= len(blocks) <= 4, "unexpected number of captcha blocks: %s" % len(blocks) 116 | 117 | return blocks 118 | 119 | 120 | def _split_spans(spans): 121 | """ 确保 spans 为 4 份 """ 122 | assert 1 <= len(spans) <= 4, "unexpected number of captcha blocks: %s" % len(spans) 123 | 124 | if len(spans) == 1: # 四等分 125 | totalSpan = spans[0] 126 | delta = (totalSpan[1] - totalSpan[0]) // 4 127 | spans = [ 128 | (totalSpan[0], totalSpan[0]+delta ), 129 | (totalSpan[0]+delta, totalSpan[0]+delta*2), 130 | (totalSpan[1]-delta*2, totalSpan[1]-delta ), 131 | (totalSpan[1]-delta, totalSpan[1] ), 132 | ] 133 | 134 | if len(spans) == 2: # 三等分较大块 135 | maxSpan = max(spans, key=lambda span: span[1]-span[0]) 136 | idx = spans.index(maxSpan) 137 | delta = (maxSpan[1] - maxSpan[0]) // 3 138 | spans.remove(maxSpan) 139 | spans.insert(idx, (maxSpan[0], maxSpan[0]+delta)) 140 | spans.insert(idx+1, (maxSpan[0]+delta, maxSpan[1]-delta)) 141 | spans.insert(idx+2, (maxSpan[1]-delta, maxSpan[1] )) 142 | 143 | if len(spans) == 3: # 平均均分较大块 144 | maxSpan = max(spans, key=lambda span: span[1]-span[0]) 145 | idx = spans.index(maxSpan) 146 | mid = sum(maxSpan) // 2 147 | spans.remove(maxSpan) 148 | spans.insert(idx, (maxSpan[0], mid)) 149 | spans.insert(idx+1, (mid, maxSpan[1])) 150 | 151 | if len(spans) == 4: 152 | pass 153 | 154 | return spans 155 | 156 | 157 | def _crop(img, spans): 158 | """ 分割图片 """ 159 | _assert_image_mode_equals_to_1(img) 160 | assert len(spans) == 4, "unexpected number of captcha blocks: %s" % len(spans) 161 | 162 | size = img.height # img.height == 22 163 | segs = [] 164 | 165 | for left, right in spans: 166 | quadImg = Image.new("1", (size,size), _PX_WHITE) 167 | segImg = img.crop((left, 0, right+1, size)) # left, upper, right, and lower 168 | quadImg.paste(segImg, ( (size-segImg.width) // 2, 0 )) # a 2-tuple giving the upper left corner 169 | segs.append(quadImg) 170 | 171 | return segs 172 | 173 | 174 | def crop(img): 175 | _assert_image_mode_equals_to_1(img) 176 | 177 | blocks = _search_blocks(img, steps=STEPS8) 178 | spans = [i[1:] for i in blocks] 179 | spans.sort(key=lambda span: sum(span)) 180 | spans = _split_spans(spans) 181 | segs = _crop(img, spans) 182 | 183 | return segs, spans 184 | -------------------------------------------------------------------------------- /autoelective/client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: client.py 4 | # modified: 2019-09-09 5 | 6 | __all__ = ["BaseClient"] 7 | 8 | from requests.models import Request 9 | from requests.sessions import Session 10 | from requests.cookies import extract_cookies_to_jar 11 | from .const import DEFAULT_CONFIG_INI 12 | 13 | 14 | class BaseClient(object): 15 | 16 | HEADERS = {} 17 | 18 | def __init__(self, *args, **kwargs): 19 | if self.__class__ is __class__: 20 | raise NotImplementedError 21 | 22 | self._timeout = kwargs.get("timeout", DEFAULT_CONFIG_INI) 23 | 24 | self._session = Session() 25 | self._session.headers.update(self.__class__.HEADERS) 26 | 27 | 28 | def _request(self, method, url, 29 | params=None, data=None, headers=None, cookies=None, files=None, 30 | auth=None, timeout=None, allow_redirects=True, proxies=None, 31 | hooks=None, stream=None, verify=None, cert=None, json=None): 32 | """ 33 | Extended from requests/sessions.py for '_client' kwargs 34 | 35 | """ 36 | req = Request( 37 | method=method.upper(), 38 | url=url, 39 | headers=headers, 40 | files=files, 41 | data=data or {}, 42 | json=json, 43 | params=params or {}, 44 | auth=auth, 45 | cookies=cookies, 46 | hooks=hooks, 47 | ) 48 | prep = self._session.prepare_request(req) 49 | prep._client = self # hold the reference to client 50 | 51 | 52 | proxies = proxies or {} 53 | 54 | settings = self._session.merge_environment_settings( 55 | prep.url, proxies, stream, verify, cert 56 | ) 57 | 58 | # Send the request. 59 | send_kwargs = { 60 | 'timeout': timeout or self._timeout, # set default timeout 61 | 'allow_redirects': allow_redirects, 62 | } 63 | send_kwargs.update(settings) 64 | resp = self._session.send(prep, **send_kwargs) 65 | 66 | return resp 67 | 68 | 69 | def _get(self, url, params=None, **kwargs): 70 | return self._request('GET', url, params=params, **kwargs) 71 | 72 | def _post(self, url, data=None, json=None, **kwargs): 73 | return self._request('POST', url, data=data, json=json, **kwargs) 74 | 75 | 76 | def persist_cookies(self, r): 77 | """ 78 | From requests/sessions.py, Session.send() 79 | 80 | Session.send() 方法会首先 dispatch_hook 然后再 extract_cookies_to_jar 81 | 82 | 在该项目中,对于返回信息异常的请求,在 hooks 校验时会将错误抛出,send() 之后的处理将不会执行。 83 | 遇到的错误往往是 SystemException / TipsException ,而这些客户端认为是错误的情况, 84 | 对于服务器端来说并不是错误请求,服务器端在该次请求结束后可能会要求 Set-Cookies 85 | 但是由于 send() 在 dispatch_hook 时遇到错误而中止,导致后面的 extract_cookies_to_jar 86 | 未能调用,因此 Cookies 并未更新。下一次再请求服务器的时候,就会遇到会话过期的情况。 87 | 88 | 在这种情况下,需要在捕获错误后手动更新 cookies 以确保能够保持会话 89 | 90 | """ 91 | if r.history: 92 | 93 | # If the hooks create history then we want those cookies too 94 | for resp in r.history: 95 | extract_cookies_to_jar(self._session.cookies, resp.request, resp.raw) 96 | 97 | extract_cookies_to_jar(self._session.cookies, r.request, r.raw) 98 | 99 | def clear_cookies(self): 100 | self._session.cookies.clear() 101 | 102 | # def extend_cookies_from(self, other): 103 | # assert isinstance(other, BaseClient) 104 | # self._session.cookies.update(other._session.cookies) 105 | 106 | -------------------------------------------------------------------------------- /autoelective/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: config.py 4 | # modified: 2019-09-10 5 | 6 | __all__ = ["AutoElectiveConfig"] 7 | 8 | import os 9 | from configparser import RawConfigParser 10 | from .utils import Singleton 11 | from .const import DEFAULT_CONFIG_INI 12 | from ._internal import userInfo 13 | 14 | 15 | class BaseConfig(object): 16 | 17 | def __init__(self, config_file=None, allow_no_value=True): 18 | if self.__class__ is __class__: 19 | raise NotImplementedError 20 | file = os.path.normpath(os.path.abspath(config_file)) 21 | if not os.path.exists(file): 22 | raise FileNotFoundError("config file was not found: %s" % file) 23 | self._config = RawConfigParser(allow_no_value=allow_no_value) 24 | self._config.read(file, encoding="utf-8-sig") # 必须显示指明 encoding 25 | 26 | def get(self, section, key): 27 | return self._config.get(section, key) 28 | 29 | def getint(self, section, key): 30 | return self._config.getint(section, key) 31 | 32 | def getfloat(self, section, key): 33 | return self._config.getfloat(section, key) 34 | 35 | def getboolean(self, section, key): 36 | return self._config.getboolean(section, key) 37 | 38 | 39 | class AutoElectiveConfig(BaseConfig, metaclass=Singleton): 40 | 41 | def __init__(self): 42 | config_file = userInfo.get("CONFIG_INI", DEFAULT_CONFIG_INI) 43 | allow_no_value = True 44 | super().__init__(config_file, allow_no_value) 45 | 46 | # MAKR: value constraints 47 | 48 | ALLOWED_IDENTIFY = ("bzx","bfx") 49 | ALLOWED_CSV_CODING = ("utf-8","gbk") 50 | 51 | # MAKR: model 52 | 53 | # [coding] 54 | 55 | @property 56 | def csvCoding(self): 57 | return self.get("coding", "csv_coding") 58 | 59 | # [user] 60 | 61 | @property 62 | def iaaaID(self): 63 | return self.get("user", "student_ID") 64 | 65 | @property 66 | def iaaaPassword(self): 67 | return self.get("user", "password") 68 | 69 | @property 70 | def isDualDegree(self): 71 | return self.getboolean("user", "dual_degree") 72 | 73 | @property 74 | def identity(self): 75 | return self.get("user", "identity") 76 | 77 | # [client] 78 | 79 | @property 80 | def supplyCancelPage(self): 81 | return self.getint("client", "supply_cancel_page") 82 | 83 | @property 84 | def refreshInterval(self): 85 | return self.getfloat("client", "refresh_interval") 86 | 87 | @property 88 | def refreshRandomDeviation(self): 89 | return self.getfloat("client", "random_deviation") 90 | 91 | @property 92 | def iaaaClientTimeout(self): 93 | return self.getfloat("client", "iaaa_client_timeout") 94 | 95 | @property 96 | def electiveClientTimeout(self): 97 | return self.getfloat("client", "elective_client_timeout") 98 | 99 | @property 100 | def electiveClientPoolSize(self): 101 | return self.getint("client", "elective_client_pool_size") 102 | 103 | @property 104 | def captchaRefreshInterval(self): 105 | return self.getfloat("client", "captcha_refresh_interval") 106 | 107 | @property 108 | def loginLoopInterval(self): 109 | return self.getfloat("client", "login_loop_interval") 110 | 111 | @property 112 | def isDebugPrintRequest(self): 113 | return self.getboolean("client", "debug_print_request") 114 | 115 | @property 116 | def isDebugDumpRequest(self): 117 | return self.getboolean("client", "debug_dump_request") 118 | 119 | # [monitor] 120 | 121 | @property 122 | def monitorHost(self): 123 | return self.get("monitor", "host") 124 | 125 | @property 126 | def monitorPort(self): 127 | return self.getint("monitor", "port") 128 | 129 | # MAKR: methods 130 | 131 | def check_csv_coding(self, coding): 132 | limited = self.__class__.ALLOWED_CSV_CODING 133 | if coding not in limited: 134 | raise ValueError("unsupported csv coding %s, csv coding must be in %s" % (coding, limited)) 135 | 136 | def check_identify(self, identity): 137 | limited = self.__class__.ALLOWED_IDENTIFY 138 | if identity not in limited: 139 | raise ValueError("unsupported identity %s for elective, identity must be in %s" % (identity, limited)) 140 | 141 | def check_supply_cancel_page(self, page): 142 | if page <= 0: 143 | raise ValueError("supply_cancel_page must be positive number, not %s" % page) 144 | 145 | def get_user_subpath(self): 146 | if self.isDualDegree: 147 | identity = self.identity 148 | self.check_identify(identity) 149 | 150 | if identity == "bfx": 151 | return "%s_%s" % (self.iaaaID, identity) 152 | 153 | return self.iaaaID 154 | -------------------------------------------------------------------------------- /autoelective/const.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: const.py 4 | # modified: 2019-09-11 5 | 6 | __all__ = [ 7 | 8 | "SIGNAL_KILL_ALL_PROCESSES", 9 | "SIGNAL_KILL_ALL_THREADS", 10 | 11 | "BASE_DIR", 12 | "MODEL_DIR", 13 | "CACHE_DIR", 14 | "LOG_DIR", 15 | "ERROR_LOG_DIR", 16 | "CAPTCHA_CACHE_DIR", 17 | "REQUEST_LOG_DIR", 18 | 19 | "DEFAULT_COURSE_UTF8_CSV", 20 | "DEFAULT_COURSE_GBK_CSV", 21 | "DEFAULT_CONFIG_INI", 22 | 23 | "USER_AGENT", 24 | "DEFAULT_CLIENT_TIMEOUT", 25 | 26 | "IAAALinks", 27 | "ElectiveLinks", 28 | 29 | ] 30 | 31 | import random 32 | from ._internal import mkdir, abspath 33 | 34 | 35 | SIGNAL_KILL_ALL_PROCESSES = 1 36 | SIGNAL_KILL_ALL_THREADS = 2 37 | 38 | 39 | BASE_DIR = abspath("./") 40 | MODEL_DIR = abspath("./captcha/model/") 41 | CACHE_DIR = abspath("../cache/") 42 | CAPTCHA_CACHE_DIR = abspath("../cache/captcha/") 43 | LOG_DIR = abspath("../log/") 44 | ERROR_LOG_DIR = abspath("../log/error") 45 | REQUEST_LOG_DIR = abspath("../log/request/") 46 | 47 | DEFAULT_COURSE_UTF8_CSV = abspath("../course.utf-8.csv") 48 | DEFAULT_COURSE_GBK_CSV = abspath("../course.gbk.csv") 49 | DEFAULT_CONFIG_INI = abspath("../config.ini") 50 | 51 | 52 | mkdir(CACHE_DIR) 53 | mkdir(CAPTCHA_CACHE_DIR) 54 | mkdir(LOG_DIR) 55 | mkdir(ERROR_LOG_DIR) 56 | mkdir(REQUEST_LOG_DIR) 57 | 58 | 59 | # 警惕直接复制的 User-Agent 中可能存在的省略号(通常源自 Firefox 开发者工具),它可能会引发如下错误: 60 | # File "/usr/lib/python3.6/http/client.py", line 1212, in putheader 61 | # values[i] = one_value.encode('latin-1') 62 | # UnicodeEncodeError: 'latin-1' codec can't encode character '\u2026' in position 30: ordinal not in range(256) 63 | USER_AGENT = random.choice([ 64 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36", 65 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36", 66 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 OPR/58.0.3135.65", 67 | "Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0", 68 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0", 69 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36", 70 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0", 71 | "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:63.0) Gecko/20100101 Firefox/63.0", 72 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0", 73 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", 74 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36", 75 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36", 76 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36", 77 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36", 78 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15", 79 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15", 80 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:62.0) Gecko/20100101 Firefox/62.0", 81 | ]) 82 | # USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/75.0.3770.90 Chrome/75.0.3770.90 Safari/537.36" 83 | 84 | DEFAULT_CLIENT_TIMEOUT = 10 85 | 86 | 87 | class IAAALinks(object): 88 | """ 89 | Host 90 | OauthHomePage 91 | OauthLogin 92 | """ 93 | _BASE_URL = "https://iaaa.pku.edu.cn/iaaa" 94 | 95 | Host = "iaaa.pku.edu.cn" 96 | OauthHomePage = _BASE_URL + "/oauth.jsp" 97 | OauthLogin = _BASE_URL + "/oauthlogin.do" 98 | 99 | 100 | class ElectiveLinks(object): 101 | """ 102 | Host 103 | SSOLoginRedirect 重定向链接 104 | SSOLogin sso登录 105 | SSOLoginDualDegree sso登录(双学位) 106 | Logout 登出 107 | HelpController 选课帮助页 108 | ElectivePlanController 选课计划页 109 | ElectiveWorkController 选课页 110 | ShowResults 选课结果页 111 | SupplyCancel 补退选页 112 | CourseQueryController 课程查询页 113 | GetCurriculmByForm 发送查询表单 114 | addToPlan 添加课程到选课计划 115 | DeleElecPlanCurriclum 删除选课计划(补退选阶段) 116 | validate 补退选验证码校验接口 117 | """ 118 | _BASE_URL = "http://elective.pku.edu.cn/elective2008" 119 | _CONTROLLER_BASE_URL = _BASE_URL + "/edu/pku/stu/elective/controller" 120 | 121 | Host = "elective.pku.edu.cn" 122 | SSOLoginRedirect = "http://elective.pku.edu.cn:80/elective2008/agent4Iaaa.jsp/../ssoLogin.do" 123 | SSOLogin = _BASE_URL + "/ssoLogin.do" 124 | SSOLoginDualDegree = "http://elective.pku.edu.cn:80/elective2008/scnStAthVef.jsp/../ssoLogin.do" 125 | Logout = _BASE_URL + "/logout.do" 126 | HelpController = _CONTROLLER_BASE_URL + "/help/HelpController.jpf" 127 | ElectivePlanController = _CONTROLLER_BASE_URL + "/electivePlan/ElectivePlanController.jpf" 128 | ElectiveWorkController = _CONTROLLER_BASE_URL + "/electiveWork/ElectiveWorkController.jpf" 129 | ShowResults = _CONTROLLER_BASE_URL + "/electiveWork/showResults.do" 130 | SupplyCancel = _CONTROLLER_BASE_URL + "/supplement/SupplyCancel.do" 131 | Supplement = _CONTROLLER_BASE_URL + "/supplement/supplement.jsp" 132 | SupplyOnly = _CONTROLLER_BASE_URL + "/supplement/SupplyOnly.do" 133 | # CourseQueryController = _CONTROLLER_BASE_URL + "/courseQuery/CourseQueryController.jpf" 134 | # GetCurriculmByForm = _CONTROLLER_BASE_URL + "/courseQuery/getCurriculmByForm.do" 135 | # AddToPlan = _CONTROLLER_BASE_URL + "/courseQuery/addToPlan.do" 136 | # DeleElecPlanCurriclum = _CONTROLLER_BASE_URL + "/electivePlan/deleElecPlanCurriclum.do" 137 | DrawServlet = _BASE_URL + "/DrawServlet" 138 | Validate = _CONTROLLER_BASE_URL + "/supplement/validate.do" 139 | -------------------------------------------------------------------------------- /autoelective/course.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: course.py 4 | # modified: 2019-09-08 5 | 6 | __all__ = ["Course"] 7 | 8 | 9 | class Course(object): 10 | 11 | def __init__(self, name, classNo, school, status=None, href=None): 12 | self.name = name 13 | self.classNo = int(classNo) # 确保 01 与 1 为同班号,因为表格软件将 01 视为 1 14 | self.school = school 15 | self.status = status # (limit, current) 限选 / 已选 16 | self.href = href # 选课链接 17 | 18 | def is_available(self): 19 | assert self.status is not None 20 | limit, current = self.status 21 | return limit > current 22 | 23 | def to_simplified(self): 24 | return Course(self.name, self.classNo, self.school) 25 | 26 | def __eq__(self, other): 27 | if not isinstance(other, self.__class__): 28 | return False 29 | return ( 30 | (self.name == other.name) and 31 | (self.classNo == other.classNo) and 32 | (self.school == other.school) 33 | ) 34 | 35 | def __repr__(self): 36 | if self.status is not None: 37 | return "Course(%s, %s, %s, %d / %d)" % ( 38 | self.name, self.classNo, self.school, *self.status) 39 | else: 40 | return "Course(%s, %s, %s)" % ( 41 | self.name, self.classNo, self.school) 42 | 43 | -------------------------------------------------------------------------------- /autoelective/elective.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: elective.py 4 | # modified: 2019-09-10 5 | 6 | __all__ = ["ElectiveClient"] 7 | 8 | import time 9 | import random 10 | from .client import BaseClient 11 | from .hook import * 12 | from .const import USER_AGENT, DEFAULT_CLIENT_TIMEOUT, ElectiveLinks 13 | 14 | 15 | _hooks_check_status_code = get_hooks( 16 | # debug_dump_request, 17 | debug_print_request, 18 | check_status_code, 19 | ) 20 | 21 | _hooks_check_title = get_hooks( 22 | debug_dump_request, 23 | debug_print_request, 24 | check_status_code, 25 | with_etree, 26 | check_elective_title, 27 | ) 28 | 29 | _hooks_check_tips = get_hooks( 30 | debug_dump_request, 31 | debug_print_request, 32 | check_status_code, 33 | with_etree, 34 | check_elective_title, 35 | check_elective_tips, 36 | ) 37 | 38 | 39 | def _get_headers_with_referer(kwargs, referer=ElectiveLinks.HelpController): 40 | headers = kwargs.pop("headers", {}) 41 | headers["Referer"] = referer 42 | return headers 43 | 44 | 45 | class ElectiveClient(BaseClient): 46 | 47 | HEADERS = { 48 | # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", 49 | # "Accept-Language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7", 50 | "Host": ElectiveLinks.Host, 51 | "Upgrade-Insecure-Requests": "1", 52 | "User-Agent": USER_AGENT, 53 | } 54 | 55 | def __init__(self, id, **kwargs): 56 | super().__init__(**kwargs) 57 | self._id = id 58 | 59 | @property 60 | def id(self): 61 | return self._id 62 | 63 | @property 64 | def hasLogined(self): 65 | return len(self._session.cookies) > 0 66 | 67 | @property 68 | def captchaNeedsRefresh(self): 69 | return time.time() > self._captcha_expire_time 70 | 71 | def setCaptchaTime(self, interval): 72 | self._captcha_expire_time = time.time() + interval 73 | 74 | def sso_login(self, token, **kwargs): 75 | r = self._get( 76 | url=ElectiveLinks.SSOLogin, 77 | params={ 78 | "rand": str(random.random()), 79 | "token": token, 80 | }, 81 | # 必须要随便指定一个 Cookie 否则无法会报 101 status_code 82 | headers={ 83 | "Cookie": "JSESSIONID=RZ1Qd4rTThr3Y5yN2QL8PTRgnLVQhFz2NpJ17tyvNWnCGfGkpS7R!2042005199", 84 | }, 85 | hooks=_hooks_check_title, 86 | **kwargs, 87 | ) # 无 Referer 88 | return r 89 | 90 | def sso_login_dual_degree(self, sida, sttp, referer, **kwargs): 91 | assert len(sida) == 32 92 | assert sttp in ("bzx", "bfx") 93 | headers = kwargs.pop("headers", {}) 94 | headers["Referer"] = referer # 为之前登录的链接 95 | r = self._get( 96 | url=ElectiveLinks.SSOLoginDualDegree, 97 | params={ 98 | "sida": sida, 99 | "sttp": sttp, 100 | }, 101 | headers=headers, 102 | hooks=_hooks_check_title, 103 | **kwargs, 104 | ) 105 | return r 106 | 107 | def logout(self, **kwargs): 108 | headers = _get_headers_with_referer(kwargs) 109 | r = self._get( 110 | url=ElectiveLinks.Logout, 111 | headers=headers, 112 | hooks=_hooks_check_title, 113 | **kwargs, 114 | ) 115 | return r 116 | 117 | def get_HelpController(self, **kwargs): 118 | """ 帮助 """ 119 | r = self._get( 120 | url=ElectiveLinks.HelpController, 121 | hooks=_hooks_check_title, 122 | **kwargs, 123 | ) # 无 Referer 124 | return r 125 | 126 | def get_PlanController(self, **kwargs): 127 | """ 选课计划 """ 128 | headers = _get_headers_with_referer(kwargs) 129 | r = self._get( 130 | url=ElectiveLinks.ElectivePlanController, 131 | headers=headers, 132 | hooks=_hooks_check_title, 133 | **kwargs, 134 | ) 135 | return r 136 | 137 | def get_WorkController(self, **kwargs): 138 | """ 预选 """ 139 | headers = _get_headers_with_referer(kwargs) 140 | r = self._get( 141 | url=ElectiveLinks.ElectiveWorkController, 142 | headers=headers, 143 | hooks=_hooks_check_title, 144 | **kwargs, 145 | ) 146 | return r 147 | 148 | def get_ShowResults(self, **kwargs): 149 | """ 选课结果 """ 150 | headers = _get_headers_with_referer(kwargs) 151 | r = self._get( 152 | url=ElectiveLinks.ShowResults, 153 | headers=headers, 154 | hooks=_hooks_check_title, 155 | **kwargs, 156 | ) 157 | return r 158 | 159 | def get_SupplyCancel(self, **kwargs): 160 | """ 补退选 """ 161 | headers = _get_headers_with_referer(kwargs) 162 | r = self._get( 163 | url=ElectiveLinks.SupplyCancel, 164 | headers=headers, 165 | hooks=_hooks_check_title, 166 | **kwargs, 167 | ) 168 | return r 169 | 170 | def get_supplement(self, page=1, **kwargs): # 辅双第二页,通过输入数字 2 进行跳转 171 | assert page > 0 172 | headers = _get_headers_with_referer(kwargs, ElectiveLinks.SupplyCancel) 173 | r = self._get( 174 | url=ElectiveLinks.Supplement + "?netui_row=electResultLisGrid%3B0", 175 | params={ 176 | # "netui_row": "electResultLisGrid;0", # leave this field in url for duplicate key 'netui_row' 177 | "netui_row": "electableListGrid;%s" % ( (page - 1) * 50 ), 178 | "conflictCourse": "", 179 | }, 180 | headers=headers, 181 | hooks=_hooks_check_title, 182 | **kwargs, 183 | ) 184 | return r 185 | 186 | def get_SupplyOnly(self, **kwargs): 187 | """ 补选 """ 188 | headers = _get_headers_with_referer(kwargs) 189 | r = self._get( 190 | url=ElectiveLinks.SupplyOnly, 191 | headers=headers, 192 | hooks=_hooks_check_title, 193 | **kwargs, 194 | ) 195 | return r 196 | 197 | def get_DrawServlet(self, **kwargs): 198 | """ 获得验证码 """ 199 | headers = _get_headers_with_referer(kwargs, ElectiveLinks.SupplyCancel) 200 | r = self._get( 201 | url=ElectiveLinks.DrawServlet, 202 | params={ 203 | "Rand": str(random.random() * 10000), 204 | }, 205 | headers=headers, 206 | hooks=_hooks_check_status_code, 207 | **kwargs, 208 | ) 209 | return r 210 | 211 | def get_Validate(self, captcha, **kwargs): 212 | headers = _get_headers_with_referer(kwargs, ElectiveLinks.SupplyCancel) 213 | r = self._post( 214 | url=ElectiveLinks.Validate, 215 | data={ 216 | "validCode": captcha, 217 | }, 218 | headers=headers, 219 | hooks=_hooks_check_status_code, 220 | **kwargs, 221 | ) 222 | return r 223 | 224 | def get_ElectSupplement(self, href, **kwargs): 225 | headers = _get_headers_with_referer(kwargs, ElectiveLinks.SupplyCancel) 226 | r = self._get( 227 | url="http://{host}{href}".format(host=ElectiveLinks.Host, href=href), 228 | headers=headers, 229 | hooks=_hooks_check_tips, 230 | **kwargs, 231 | ) 232 | return r 233 | -------------------------------------------------------------------------------- /autoelective/exceptions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: exceptions.py 4 | # modified: 2019-09-13 5 | 6 | __all__ = [ 7 | 8 | "AutoElectiveException", 9 | 10 | "UserInputException", 11 | "NotInCoursePlanException", 12 | 13 | "AutoElectiveClientException", 14 | 15 | "StatusCodeError", 16 | "ServerError", 17 | "OperationFailedError", 18 | 19 | "IAAAException", 20 | "IAAANotSuccessError", 21 | "IAAAIncorrectPasswordError", 22 | "IAAAForbiddenError", 23 | 24 | "ElectiveException", 25 | 26 | "SystemException", 27 | "CaughtCheatingError", 28 | "InvalidTokenError", 29 | "SessionExpiredError", 30 | "NotInOperationTimeError", 31 | "CourseIndexError", 32 | "CaptchaError", 33 | "NoAuthInfoError", 34 | "SharedSessionError", 35 | 36 | "TipsException", 37 | "ElectionSuccess", 38 | "ElectionRepeatedError", 39 | "TimeConflictError", 40 | "OperationTimeoutError", 41 | "ElectionPermissionError", 42 | "ElectionFailedError", 43 | "CreditsLimitedError", 44 | "MutuallyExclusiveCourseError", 45 | "MultiEnglishCourseError", 46 | "ExamTimeConflictError", 47 | 48 | ] 49 | 50 | 51 | class AutoElectiveException(Exception): 52 | """ Abstract Exception for AutoElective """ 53 | 54 | class UserInputException(AutoElectiveException): 55 | """ 由于用户的输入数据不当而引发的错误 """ 56 | 57 | class NotInCoursePlanException(UserInputException): 58 | """ csv 内指定的课程不在选课计划内 """ 59 | 60 | 61 | class AutoElectiveClientException(AutoElectiveException): 62 | 63 | code = -1 64 | desc = "AutoElectiveException" 65 | 66 | def __init__(self, *args, **kwargs): 67 | response = kwargs.pop("response", None) 68 | self.response = response 69 | msg = "[%d] %s" % ( 70 | self.__class__.code, 71 | kwargs.pop("msg", self.__class__.desc) 72 | ) 73 | super().__init__(msg, *args, **kwargs) 74 | 75 | 76 | class StatusCodeError(AutoElectiveClientException): 77 | code = 101 78 | desc = "response.status_code != 200" 79 | 80 | def __init__(self, *args, **kwargs): 81 | r = kwargs.get("response") 82 | if r is not None and "msg" not in kwargs: 83 | kwargs["msg"] = "%s. response status code: %s" % (self.__class__.code, r.status_code) 84 | super().__init__(*args, **kwargs) 85 | 86 | class ServerError(AutoElectiveClientException): 87 | code = 102 88 | desc = r"response.status_code in (500, 501, 502, 503)" 89 | 90 | def __init__(self, *args, **kwargs): 91 | r = kwargs.get("response") 92 | if r is not None and "msg" not in kwargs: 93 | kwargs["msg"] = "%s. response status_code: %s" % (self.__class__.code, r.status_code) 94 | super().__init__(*args, **kwargs) 95 | 96 | class OperationFailedError(AutoElectiveClientException): 97 | code = 103 98 | desc = r"some operations failed for unknown reasons" 99 | 100 | 101 | class IAAAException(AutoElectiveClientException): 102 | code = 200 103 | desc = "IAAAException" 104 | 105 | 106 | class IAAANotSuccessError(IAAAException): 107 | code = 210 108 | desc = "response.json()['success'] == False" 109 | 110 | def __init__(self, *args, **kwargs): 111 | r = kwargs.get("response") 112 | if r is not None and "msg" not in kwargs: 113 | kwargs["msg"] = "%s. response JSON: %s" % (self.__class__.code, r.json()) 114 | super().__init__(*args, **kwargs) 115 | 116 | class IAAAIncorrectPasswordError(IAAANotSuccessError): 117 | code = 211 118 | desc = "User ID or Password is incorrect" 119 | 120 | class IAAAForbiddenError(IAAANotSuccessError): 121 | code = 212 122 | desc = "You are FORBIDDEN. Please sign in after a half hour" 123 | 124 | 125 | class ElectiveException(AutoElectiveClientException): 126 | code = 300 127 | desc = "ElectiveException" 128 | 129 | 130 | class SystemException(ElectiveException): 131 | code = 310 132 | desc = "系统异常" 133 | 134 | class CaughtCheatingError(SystemException): 135 | code = 311 136 | desc = "请不要用刷课机刷课,否则会受到学校严厉处分!" # 没有设 referer 137 | 138 | class InvalidTokenError(SystemException): 139 | code = 312 140 | desc = "Token无效" # sso_login 时出现,在上次登录前发生异地登录,缓存 token 失效 141 | 142 | class SessionExpiredError(SystemException): 143 | code = 313 144 | desc = "您尚未登录或者会话超时,请重新登录." # 相当于 token 失效 145 | 146 | class NotInOperationTimeError(SystemException): 147 | code = 314 148 | desc = "不在操作时段" 149 | 150 | class CourseIndexError(SystemException): 151 | code = 315 152 | desc = "索引错误。" 153 | 154 | class CaptchaError(SystemException): 155 | code = 316 156 | desc = "验证码不正确。" 157 | 158 | class NoAuthInfoError(SystemException): 159 | code = 317 160 | desc = "无验证信息。" # 仅辅双登录时会出现 161 | 162 | class SharedSessionError(SystemException): 163 | code = 318 164 | desc = "你与他人共享了回话,请退出浏览器重新登录。" 165 | 166 | class NotAgreedToSelectionAgreement(SystemException): 167 | code = 319 168 | desc = "只有同意选课协议才可以继续选课! " 169 | 170 | 171 | class TipsException(ElectiveException): 172 | code = 330 173 | desc = "TipsException" 174 | 175 | class ElectionSuccess(TipsException): 176 | code = 331 177 | desc = "补选课程成功,请查看已选上列表确认,并查看选课结果。" 178 | 179 | class ElectionRepeatedError(TipsException): 180 | code = 332 181 | desc = "您已经选过该课程了。" 182 | 183 | class TimeConflictError(TipsException): 184 | code = 333 185 | desc = "上课时间冲突" 186 | 187 | class OperationTimeoutError(TipsException): 188 | code = 334 189 | desc = "对不起,超时操作,请重新登录。" 190 | 191 | class ElectionPermissionError(TipsException): 192 | code = 335 193 | desc = "该课程在补退选阶段开始后的约一周开放选课" 194 | 195 | class ElectionFailedError(TipsException): 196 | code = 336 197 | desc = "选课操作失败,请稍后再试。" 198 | 199 | class CreditsLimitedError(TipsException): 200 | code = 327 201 | desc = "您本学期所选课程的总学分已经超过规定学分上限。" 202 | 203 | class MutuallyExclusiveCourseError(TipsException): 204 | code = 328 205 | desc = "只能选其一门。" 206 | 207 | class MultiEnglishCourseError(TipsException): 208 | code = 329 209 | desc = "学校规定每学期只能修一门英语课,因此您不能选择该课。" 210 | 211 | class ExamTimeConflictError(TipsException): 212 | code = 330 213 | desc = "考试时间冲突" -------------------------------------------------------------------------------- /autoelective/hook.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: hook.py 4 | # modified: 2019-09-11 5 | 6 | __all__ = [ 7 | 8 | "get_hooks", 9 | 'merge_hooks', 10 | 11 | "with_etree", 12 | "del_etree", 13 | 14 | "check_status_code", 15 | 16 | "check_iaaa_success", 17 | "check_elective_title", 18 | "check_elective_tips", 19 | 20 | "debug_print_request", 21 | "debug_dump_request", 22 | 23 | ] 24 | 25 | import os 26 | import re 27 | import time 28 | from urllib.parse import quote, urlparse 29 | from .logger import ConsoleLogger 30 | from .config import AutoElectiveConfig 31 | from .parser import get_tree_from_response, get_title, get_errInfo, get_tips 32 | from .utils import pickle_gzip_dump 33 | from .const import REQUEST_LOG_DIR 34 | from .exceptions import * 35 | from ._internal import mkdir 36 | 37 | 38 | _logger = ConsoleLogger("hook") 39 | _config = AutoElectiveConfig() 40 | 41 | _USER_REQUEST_LOG_DIR = os.path.join(REQUEST_LOG_DIR, _config.get_user_subpath()) 42 | mkdir(_USER_REQUEST_LOG_DIR) 43 | 44 | # __regex_errInfo = re.compile(r"出错提示:(\S+?)
", re.S) 45 | _regexErrorOperatingTime = re.compile(r'目前不是(\S+?)时间,因此不能进行相应操作。') 46 | _regexElectionSuccess = re.compile(r'补选课程(\S+)成功,请查看已选上列表确认,并查看选课结果。') 47 | _regexMutex = re.compile(r'(\S+)与(\S+)只能选其一门。') 48 | 49 | _DUMMY_HOOK = {"response": []} 50 | 51 | 52 | def get_hooks(*fn): 53 | return {"response": fn} 54 | 55 | 56 | def merge_hooks(*hooklike): 57 | funcs = [] 58 | for hook in hooklike: 59 | if isinstance(hook, dict): 60 | funcs.extend(hook["response"]) 61 | elif callable(hook): # function 62 | funcs.append(hook) 63 | else: 64 | raise TypeError(hook) 65 | return get_hooks(*funcs) 66 | 67 | 68 | def with_etree(r, **kwargs): 69 | r._tree = get_tree_from_response(r) 70 | 71 | 72 | def del_etree(r, **kwargs): 73 | del r._tree 74 | 75 | 76 | def check_status_code(r, **kwargs): 77 | if r.status_code != 200: 78 | if r.status_code in (301,302,304): 79 | pass 80 | elif r.status_code in (500,501,502,503): 81 | raise ServerError(response=r) 82 | else: 83 | raise StatusCodeError(response=r) 84 | 85 | 86 | def check_iaaa_success(r, **kwargs): 87 | respJson = r.json() 88 | 89 | if not respJson.get("success", False): 90 | try: 91 | errors = respJson["errors"] 92 | code = errors["code"] 93 | msg = errors["msg"] 94 | except Exception as e: 95 | cout.error(e) 96 | cout.info("Unable to get errcode/errmsg from response JSON") 97 | pass 98 | else: 99 | if code == "E01": 100 | raise IAAAIncorrectPasswordError(response=r, msg=msg) 101 | elif code == "E21": 102 | raise IAAAForbiddenError(response=r, msg=msg) 103 | 104 | raise IAAANotSuccessError(response=r) 105 | 106 | 107 | def check_elective_title(r, **kwargs): 108 | assert hasattr(r, "_tree") 109 | 110 | title = get_title(r._tree) 111 | if title is None: 112 | return 113 | 114 | try: 115 | if title == "系统异常": 116 | # err = __regex_errInfo.search(r.text).group(1) 117 | err = get_errInfo(r._tree) 118 | 119 | if err == "token无效": # sso_login 时出现 120 | raise InvalidTokenError(response=r) 121 | 122 | elif err == "您尚未登录或者会话超时,请重新登录.": 123 | raise SessionExpiredError(response=r) 124 | 125 | elif err == "请不要用刷课机刷课,否则会受到学校严厉处分!": 126 | raise CaughtCheatingError(response=r) 127 | 128 | elif err == "索引错误。": 129 | raise CourseIndexError(response=r) 130 | 131 | elif err == "验证码不正确。": 132 | raise CaptchaError(response=r) 133 | 134 | elif err == "无验证信息。": 135 | raise NoAuthInfoError(response=r) 136 | 137 | elif err == "你与他人共享了回话,请退出浏览器重新登录。": 138 | raise SharedSessionError(response=r) 139 | 140 | elif err == "只有同意选课协议才可以继续选课!": 141 | raise NotAgreedToSelectionAgreement(response=r) 142 | 143 | elif _regexErrorOperatingTime.search(err): 144 | raise NotInOperationTimeError(response=r, msg=err) 145 | 146 | else: 147 | raise SystemException(response=r, msg=err) 148 | 149 | except Exception as e: 150 | if "_client" in r.request.__dict__: # _client will be set by BaseClient 151 | r.request._client.persist_cookies(r) 152 | raise e 153 | 154 | 155 | def check_elective_tips(r, **kwargs): 156 | assert hasattr(r, "_tree") 157 | tips = get_tips(r._tree) 158 | 159 | try: 160 | 161 | if tips is None: 162 | return 163 | 164 | elif tips == "您已经选过该课程了。": 165 | raise ElectionRepeatedError(response=r) 166 | 167 | elif tips == "对不起,超时操作,请重新登录。": 168 | raise OperationTimeoutError(response=r) 169 | 170 | elif tips == "选课操作失败,请稍后再试。": 171 | raise ElectionFailedError(response=r) 172 | 173 | elif tips == "您本学期所选课程的总学分已经超过规定学分上限。": 174 | raise CreditsLimitedError(response=r) 175 | 176 | elif tips == "学校规定每学期只能修一门英语课,因此您不能选择该课。": 177 | raise MultiEnglishCourseError(response=r) 178 | 179 | elif tips.startswith("上课时间冲突"): 180 | raise TimeConflictError(response=r, msg=tips) 181 | 182 | elif tips.startswith("考试时间冲突"): 183 | raise ExamTimeConflictError(response=r, msg=tips) 184 | 185 | elif tips.startswith("该课程在补退选阶段开始后的约一周开放选课"): # 这个可能需要根据当学期情况进行修改 186 | raise ElectionPermissionError(response=r, msg=tips) 187 | 188 | elif _regexElectionSuccess.search(tips): 189 | raise ElectionSuccess(response=r, msg=tips) 190 | 191 | elif _regexMutex.search(tips): 192 | raise MutuallyExclusiveCourseError(response=r, msg=tips) 193 | 194 | else: 195 | _logger.warning("Unknown tips: %s" % tips) 196 | # raise TipsException(response=r, msg=tips) 197 | 198 | except Exception as e: 199 | if "_client" in r.request.__dict__: # _client will be set by BaseClient 200 | r.request._client.persist_cookies(r) 201 | raise e 202 | 203 | 204 | def debug_print_request(r, **kwargs): 205 | if not _config.isDebugPrintRequest: 206 | return 207 | _logger.debug("> %s %s" % (r.request.method, r.url)) 208 | _logger.debug("> Headers:") 209 | for k, v in r.request.headers.items(): 210 | _logger.debug("%s: %s" % (k, v)) 211 | _logger.debug("> Body:") 212 | _logger.debug(r.request.body) 213 | _logger.debug("> Response Headers:") 214 | for k, v in r.headers.items(): 215 | _logger.debug("%s: %s" % (k, v)) 216 | _logger.debug("") 217 | 218 | 219 | def debug_dump_request(r, **kwargs): 220 | if not _config.isDebugDumpRequest: 221 | return 222 | 223 | if "_client" in r.request.__dict__: # _client will be set by BaseClient 224 | client = r.request._client 225 | r.request._client = None # don't save client object 226 | 227 | hooks = r.request.hooks 228 | r.request.hooks = _DUMMY_HOOK # don't save hooks array 229 | 230 | timestamp = time.strftime("%Y-%m-%d_%H.%M.%S%z") 231 | basename = quote(urlparse(r.url).path, '') 232 | filename = "%s.%s.gz" % (timestamp, basename) # put timestamp first 233 | file = os.path.normpath(os.path.abspath(os.path.join(_USER_REQUEST_LOG_DIR, filename))) 234 | 235 | _logger.debug("Dump request %s to %s" % (r.url, file)) 236 | pickle_gzip_dump(r, file) 237 | 238 | # restore objects defined by autoelective package 239 | if "_client" in r.request.__dict__: 240 | r.request._client = client 241 | r.request.hooks = hooks 242 | -------------------------------------------------------------------------------- /autoelective/iaaa.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: iaaa.py 4 | # modified: 2019-09-10 5 | 6 | __all__ = ["IAAAClient"] 7 | 8 | from .client import BaseClient 9 | from .hook import * 10 | from .const import USER_AGENT, IAAALinks, ElectiveLinks 11 | 12 | 13 | _hooks_check_iaaa_success = get_hooks( 14 | debug_print_request, 15 | check_status_code, 16 | check_iaaa_success, 17 | ) 18 | 19 | 20 | class IAAAClient(BaseClient): 21 | 22 | HEADERS = { 23 | "Host": IAAALinks.Host, 24 | "Origin": "https://%s" % IAAALinks.Host, 25 | "User-Agent": USER_AGENT, 26 | "X-Requested-With": "XMLHttpRequest", 27 | } 28 | 29 | def oauth_login(self, username, password, **kwargs): 30 | headers = kwargs.pop("headers", {}) 31 | headers["Referer"] = IAAALinks.OauthHomePage + \ 32 | "?appID=syllabus" + \ 33 | "&appName=%E5%AD%A6%E7%94%9F%E9%80%89%E8%AF%BE%E7%B3%BB%E7%BB%9F" + \ 34 | "&redirectUrl=%s" % ElectiveLinks.SSOLoginRedirect 35 | r = self._post( 36 | url=IAAALinks.OauthLogin, 37 | data={ 38 | "appid": "syllabus", 39 | "userName": username, 40 | "password": password, 41 | "randCode": "", 42 | "smsCode": "", 43 | "otpCode": "", 44 | "redirUrl": ElectiveLinks.SSOLoginRedirect, 45 | }, 46 | headers=headers, 47 | hooks=_hooks_check_iaaa_success, 48 | **kwargs, 49 | ) 50 | return r 51 | -------------------------------------------------------------------------------- /autoelective/logger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: logger.py 4 | # modified: 2019-09-09 5 | 6 | __all__ = ["ConsoleLogger","FileLogger"] 7 | 8 | import os 9 | import datetime 10 | import logging 11 | from .config import AutoElectiveConfig 12 | from .const import ERROR_LOG_DIR 13 | from ._internal import mkdir 14 | 15 | 16 | class BaseLogger(object): 17 | 18 | LEVEL = logging.DEBUG 19 | FORMAT = logging.Formatter("[%(levelname)s] %(name)s, %(asctime)s, %(message)s", "%H:%M:%S") 20 | 21 | def __init__(self, name): 22 | if self.__class__ is __class__: 23 | raise NotImplementedError 24 | self._name = name 25 | self._logger = logging.getLogger(name) 26 | self._logger.setLevel(self.__class__.LEVEL) 27 | self._logger.addHandler(self._get_headler()) 28 | 29 | @property 30 | def name(self): 31 | return self._name 32 | 33 | @property 34 | def handlers(self): 35 | return self._logger.handlers 36 | 37 | def _get_headler(self): 38 | raise NotImplementedError 39 | 40 | def log(self, level, msg, *args, **kwargs): 41 | return self._logger.log(level, msg, *args, **kwargs) 42 | 43 | def debug(self, msg, *args, **kwargs): 44 | return self._logger.debug(msg, *args, **kwargs) 45 | 46 | def info(self, msg, *args, **kwargs): 47 | return self._logger.info(msg, *args, **kwargs) 48 | 49 | def warn(self, msg, *args, **kwargs): 50 | return self._logger.warn(msg, *args, **kwargs) 51 | 52 | def warning(self, msg, *args, **kwargs): 53 | return self._logger.warning(msg, *args, **kwargs) 54 | 55 | def error(self, msg, *args, **kwargs): 56 | return self._logger.error(msg, *args, **kwargs) 57 | 58 | def exception(self, msg, *args, **kwargs): 59 | kwargs.setdefault("exc_info", True) 60 | return self._logger.exception(msg, *args, **kwargs) 61 | 62 | def fatal(self, msg, *args, **kwargs): 63 | return self._logger.fatal(msg, *args, **kwargs) 64 | 65 | def critical(self, msg, *args, **kwargs): 66 | return self._logger.critical(msg, *args, **kwargs) 67 | 68 | 69 | class ConsoleLogger(BaseLogger): 70 | """ 控制台日志输出类 """ 71 | 72 | LEVEL = logging.DEBUG 73 | 74 | def _get_headler(self): 75 | headler = logging.StreamHandler() 76 | headler.setLevel(self.__class__.LEVEL) 77 | headler.setFormatter(self.__class__.FORMAT) 78 | return headler 79 | 80 | 81 | class FileLogger(BaseLogger): 82 | """ 文件日志输出类 """ 83 | 84 | LEVEL = logging.WARNING 85 | 86 | def _get_headler(self): 87 | config = AutoElectiveConfig() 88 | 89 | USER_ERROR_LOG_DIR = os.path.join(ERROR_LOG_DIR, config.get_user_subpath()) 90 | mkdir(USER_ERROR_LOG_DIR) 91 | 92 | filename = "%s.%s.log" % ( 93 | self.name, 94 | datetime.date.strftime(datetime.date.today(), "%Y%m%d") 95 | ) 96 | file = os.path.join(USER_ERROR_LOG_DIR, filename) 97 | headler = logging.FileHandler(file, encoding="utf-8-sig") 98 | headler.setLevel(self.__class__.LEVEL) 99 | headler.setFormatter(self.__class__.FORMAT) 100 | return headler 101 | -------------------------------------------------------------------------------- /autoelective/loop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: loop.py 4 | # modified: 2019-09-11 5 | 6 | __all__ = ["main"] 7 | 8 | import time 9 | import random 10 | from queue import Queue, Empty 11 | from collections import deque 12 | from threading import Thread, Lock, Event, current_thread 13 | from requests.compat import json 14 | from requests.exceptions import RequestException 15 | from . import __version__, __date__ 16 | from .course import Course 17 | from .captcha import CaptchaRecognizer 18 | from .config import AutoElectiveConfig 19 | from .parser import load_course_csv, get_tables, get_courses, get_courses_with_detail, get_sida 20 | from .logger import ConsoleLogger, FileLogger 21 | from .const import SIGNAL_KILL_ALL_PROCESSES 22 | from .exceptions import * 23 | from ._internal import userInfo as _userInfo 24 | 25 | 26 | def main(signals, userInfo, goals=None, ignored=None, status=None): 27 | 28 | _userInfo.update(userInfo) # setup userInfo first 29 | config = AutoElectiveConfig() # create singleton first 30 | 31 | # import clients here, after the config singleton has already been inited 32 | from .iaaa import IAAAClient 33 | from .elective import ElectiveClient 34 | 35 | # don't forget to nonlocal variables first in nested functions 36 | username = config.iaaaID 37 | password = config.iaaaPassword 38 | isDualDegree = config.isDualDegree 39 | identity = config.identity 40 | interval = config.refreshInterval 41 | deviation = config.refreshRandomDeviation 42 | page = config.supplyCancelPage 43 | iaaaTimeout = config.iaaaClientTimeout 44 | electiveTimeout = config.electiveClientTimeout 45 | loginLoopInterval = config.loginLoopInterval 46 | captchaRefreshInterval = config.captchaRefreshInterval 47 | electivePoolSize = config.electiveClientPoolSize 48 | 49 | config.check_identify(identity) 50 | config.check_supply_cancel_page(page) 51 | 52 | cout = ConsoleLogger("loop") 53 | ferr = FileLogger("loop.error") # loop 的子日志,同步输出到 console 54 | 55 | recognizer = CaptchaRecognizer() 56 | 57 | electivePool = Queue(maxsize=electivePoolSize) 58 | reloginPool = Queue(maxsize=electivePoolSize) 59 | 60 | statusLock = Lock() 61 | killallEvent = Event() 62 | 63 | 64 | class _ElectiveNeedsLogin(Exception): 65 | pass 66 | 67 | 68 | def _get_refresh_interval(): 69 | nonlocal interval, deviation 70 | if deviation <= 0: 71 | return interval 72 | delta = (random.random() * 2 - 1) * deviation * interval 73 | return interval + delta 74 | 75 | 76 | def _has_candidates(goals, ignored): 77 | _ignored = [ x[0] for x in ignored ] 78 | count = 0 79 | for course in goals: 80 | if course in _ignored: 81 | continue 82 | count += 1 83 | return count > 0 84 | 85 | 86 | def _get_available_courses(goals, plans, elected, ignored): 87 | queue = deque() 88 | _ignored = [ x[0] for x in ignored ] 89 | for c0 in goals: 90 | if c0 in _ignored: 91 | continue 92 | for c in elected: 93 | if c == c0: 94 | cout.info("%s is elected, ignored" % c0) 95 | ignored.append( (c0, "Elected") ) 96 | break 97 | else: 98 | for c in plans: 99 | if c == c0: 100 | if c.is_available(): 101 | queue.append(c) 102 | cout.info("%s is AVAILABLE now !" % c) 103 | break 104 | else: 105 | raise NotInCoursePlanException("%s is not in your course plan." % c0) 106 | return queue 107 | 108 | 109 | def _task_setup_pools(): 110 | nonlocal electivePoolSize, electiveTimeout 111 | for i in range(1, electivePoolSize+1): 112 | electivePool.put_nowait(ElectiveClient(id=i, timeout=electiveTimeout)) 113 | 114 | 115 | def _task_print_header(): 116 | header = "# PKU Auto-Elective Tool v%s (%s) #" % (__version__, __date__) 117 | line = "#" + "-"*(len(header) - 2) + "#" 118 | cout.info(line) 119 | cout.info(header) 120 | cout.info(line) 121 | cout.info("") 122 | 123 | 124 | def _task_print_goals(goals, ignored): 125 | """ 输出待选课程 """ 126 | if not _has_candidates(goals, ignored): 127 | return 128 | line = "-" * 30 129 | cout.info("> Current tasks") 130 | cout.info(line) 131 | idx = 1 132 | _ignored = [ x[0] for x in ignored ] 133 | for course in goals: 134 | if course in _ignored: 135 | continue 136 | cout.info("%02d. %s" % (idx, course)) 137 | idx += 1 138 | cout.info(line) 139 | cout.info("") 140 | 141 | 142 | def _task_print_ignored(ignored): 143 | """ 输出忽略列表 """ 144 | if len(ignored) == 0: 145 | return 146 | line = "-" * 30 147 | cout.info("> Ignored tasks") 148 | cout.info(line) 149 | idx = 1 150 | for course, reason in ignored: 151 | cout.info("%02d. %s %s" % (idx, course, reason)) 152 | idx += 1 153 | cout.info(line) 154 | cout.info("") 155 | 156 | 157 | def _task_validate_captcha(elective): 158 | """ 填一次验证码 """ 159 | while True: 160 | cout.info("Fetch a captcha (client: %s)" % elective.id) 161 | r = elective.get_DrawServlet() 162 | try: 163 | captcha = recognizer.recognize(r.content) # 可能识别分割错误 164 | except Exception as e: 165 | ferr.error(e) 166 | raise OperationFailedError(msg="Unable to recognize captcha") 167 | code = captcha.code 168 | cout.info("Recognition result: %s" % code) 169 | 170 | r = elective.get_Validate(code) 171 | try: 172 | res = r.json()["valid"] # 可能会返回一个错误网页 ... 173 | except Exception as e: 174 | ferr.error(e) 175 | raise OperationFailedError(msg="Unable to validate captcha") 176 | 177 | if res == "2": 178 | cout.info("Validation passed!") 179 | captcha.clear_cache() 180 | cout.info("Clear captcha cache") 181 | break 182 | elif res == "0": 183 | cout.info("Validation failed, try again") 184 | else: 185 | cout.warning("Unknown validation result: %s" % res) 186 | elective.setCaptchaTime(captchaRefreshInterval) 187 | 188 | 189 | def _task_increase_error_count(status, error): 190 | if status is None: 191 | return 192 | 193 | cls = error.__class__ 194 | name = cls.__name__ 195 | if hasattr(cls, "code"): 196 | code = error.code 197 | key = "[%s] %s" % (code, name) 198 | else: 199 | key = name 200 | 201 | with statusLock: 202 | if key not in status["errors"]: 203 | status["errors"][key] = 1 204 | else: 205 | status["errors"][key] += 1 206 | status["error_count"] += 1 207 | 208 | 209 | def _task_print_current_thread_killed(): 210 | cout.info("Kill thread %s" % current_thread().name) 211 | 212 | 213 | def _task_send_signal_to_kill_all_blocking_threads(): 214 | if electivePool.empty(): 215 | electivePool.put_nowait(None) 216 | if reloginPool.empty(): 217 | reloginPool.put_nowait(None) 218 | 219 | 220 | def _thread_login_loop(status): 221 | 222 | nonlocal username, password, isDualDegree, identity 223 | nonlocal iaaaTimeout, loginLoopInterval 224 | 225 | loop = 0 226 | elective = None 227 | 228 | shouldQuitImmediately = False 229 | 230 | 231 | def _update_loop(): 232 | nonlocal loop 233 | loop += 1 234 | if status is not None: 235 | status["login_loop"] = loop 236 | 237 | 238 | while True: 239 | 240 | shouldQuitImmediately = False 241 | 242 | if killallEvent.is_set(): # a signal to kill this thread 243 | _task_print_current_thread_killed() 244 | return 245 | 246 | if elective is None: 247 | elective = reloginPool.get() 248 | if elective is None: # a signal to kill this thread 249 | _task_print_current_thread_killed() 250 | return 251 | 252 | _update_loop() 253 | 254 | try: 255 | cout.info("Try to login IAAA (client: %s)" % elective.id) 256 | 257 | iaaa = IAAAClient(timeout=iaaaTimeout) # not reusable 258 | 259 | r = iaaa.oauth_login(username, password) 260 | 261 | try: 262 | token = r.json()["token"] 263 | except Exception as e: 264 | ferr.error(e) 265 | raise OperationFailedError(msg="Unable to parse IAAA token. response body: %s" % r.content) 266 | 267 | elective.clear_cookies() 268 | r = elective.sso_login(token) 269 | 270 | if isDualDegree: 271 | sida = get_sida(r) 272 | sttp = identity 273 | referer = r.url 274 | _ = elective.sso_login_dual_degree(sida, sttp, referer) 275 | 276 | cout.info("Login success (client: %s)" % elective.id) 277 | _task_validate_captcha(elective) 278 | 279 | electivePool.put_nowait(elective) 280 | elective = None 281 | 282 | except (ServerError, StatusCodeError) as e: 283 | ferr.error(e) 284 | cout.warning("ServerError/StatusCodeError encountered") 285 | _task_increase_error_count(status, e) 286 | 287 | except OperationFailedError as e: 288 | ferr.error(e) 289 | cout.warning("OperationFailedError encountered") 290 | _task_increase_error_count(status, e) 291 | 292 | except RequestException as e: 293 | ferr.error(e) 294 | cout.warning("RequestException encountered") 295 | _task_increase_error_count(status, e) 296 | 297 | except IAAAIncorrectPasswordError as e: 298 | cout.error(e) 299 | _task_increase_error_count(status, e) 300 | shouldQuitImmediately = True 301 | raise e 302 | 303 | except IAAAForbiddenError as e: 304 | ferr.error(e) 305 | _task_increase_error_count(status, e) 306 | shouldQuitImmediately = True 307 | raise e 308 | 309 | except IAAAException as e: 310 | ferr.error(e) 311 | cout.warning("IAAAException encountered") 312 | _task_increase_error_count(status, e) 313 | 314 | except CaughtCheatingError as e: 315 | ferr.critical(e) # 严重错误 316 | _task_increase_error_count(status, e) 317 | shouldQuitImmediately = True 318 | raise e 319 | 320 | except ElectiveException as e: 321 | ferr.error(e) 322 | cout.warning("ElectiveException encountered") 323 | _task_increase_error_count(status, e) 324 | 325 | except json.JSONDecodeError as e: 326 | ferr.error(e) 327 | cout.warning("JSONDecodeError encountered") 328 | _task_increase_error_count(status, e) 329 | 330 | except KeyboardInterrupt as e: 331 | _task_increase_error_count(status, e) 332 | shouldQuitImmediately = True 333 | raise e 334 | 335 | except Exception as e: 336 | ferr.exception(e) 337 | _task_increase_error_count(status, e) 338 | shouldQuitImmediately = True 339 | raise e 340 | 341 | finally: 342 | if shouldQuitImmediately: 343 | killallEvent.set() 344 | _task_print_current_thread_killed() 345 | _task_send_signal_to_kill_all_blocking_threads() 346 | return 347 | 348 | t = loginLoopInterval 349 | cout.info("") 350 | cout.info("IAAA login loop sleep %s s" % t) 351 | cout.info("") 352 | time.sleep(t) 353 | 354 | 355 | def _thread_main_loop(goals, ignored, status): 356 | 357 | nonlocal page 358 | 359 | loop = 0 360 | elective = None 361 | 362 | shouldQuitImmediately = False 363 | shouldEnterNextLoopImmediately = False 364 | 365 | 366 | def _update_loop(): 367 | nonlocal loop 368 | loop += 1 369 | if status is not None: 370 | status["main_loop"] = loop 371 | 372 | def _ignore_course(course, reason): 373 | ignored.append( (course.to_simplified(), reason) ) 374 | 375 | 376 | _task_setup_pools() 377 | _task_print_header() 378 | 379 | 380 | while True: 381 | 382 | shouldQuitImmediately = False 383 | shouldEnterNextLoopImmediately = False 384 | 385 | if killallEvent.is_set(): # a signal to kill this thread 386 | _task_print_current_thread_killed() 387 | return 388 | 389 | if not _has_candidates(goals, ignored): 390 | cout.info("No tasks, exit") 391 | killallEvent.set() 392 | _task_print_current_thread_killed() 393 | _task_send_signal_to_kill_all_blocking_threads() 394 | return 395 | 396 | _update_loop() 397 | 398 | cout.info("") 399 | cout.info("======== Loop %d ========" % loop) 400 | cout.info("") 401 | 402 | # MARK: print current plans 403 | 404 | _task_print_goals(goals, ignored) 405 | _task_print_ignored(ignored) 406 | 407 | try: 408 | if elective is None: 409 | elective = electivePool.get() 410 | if elective is None: # a signal to kill this thread 411 | shouldQuitImmediately = True 412 | return # log will be print in `finally` block 413 | 414 | cout.info("> Current client: %s, (qsize: %s)" % (elective.id, electivePool.qsize() + 1)) 415 | cout.info("") 416 | 417 | if not elective.hasLogined: 418 | raise _ElectiveNeedsLogin # quit this loop 419 | 420 | if elective.captchaNeedsRefresh: 421 | try: 422 | _task_validate_captcha(elective) 423 | except Exception as e: 424 | cout.warning("Captcha validation failed... relogin!") 425 | raise SessionExpiredError() 426 | 427 | # MARK: check supply/cancel page 428 | 429 | if page == 1: 430 | 431 | cout.info("Get SupplyCancel page %s" % page) 432 | 433 | resp = elective.get_SupplyCancel() 434 | tables = get_tables(resp._tree) 435 | elected = get_courses(tables[1]) 436 | plans = get_courses_with_detail(tables[0]) 437 | 438 | else: 439 | # 440 | # 刷新非第一页的课程,第一次请求会遇到返回空页面的情况 441 | # 442 | # 模拟方法: 443 | # 1.先登录辅双,打开补退选第二页 444 | # 2.再在同一浏览器登录主修 445 | # 3.刷新辅双的补退选第二页可以看到 446 | # 447 | # ----------------------------------------------- 448 | # 449 | # 引入 retry 逻辑以防止以为某些特殊原因无限重试 450 | # 正常情况下一次就能成功,但是为了应对某些偶发错误,这里设为最多尝试 3 次 451 | # 452 | retry = 3 453 | while True: 454 | if retry == 0: 455 | raise OperationFailedError(msg="unable to get normal Supplement page %s" % page) 456 | 457 | cout.info("Get Supplement page %s" % page) 458 | resp = elective.get_supplement(page=page) # 双学位第二页 459 | tables = get_tables(resp._tree) 460 | try: 461 | elected = get_courses(tables[1]) 462 | plans = get_courses_with_detail(tables[0]) 463 | except IndexError as e: 464 | cout.warning("IndexError encountered") 465 | cout.info("Get SupplyCancel first to prevent empty table returned") 466 | _ = elective.get_SupplyCancel() # 遇到空页面时请求一次补退选主页,之后就可以不断刷新 467 | else: 468 | break 469 | finally: 470 | retry -= 1 471 | 472 | 473 | # MARK: check available courses 474 | 475 | cout.info("Get available courses") 476 | queue = _get_available_courses(goals, plans, elected, ignored) 477 | 478 | 479 | # MAKR: elect available courses 480 | 481 | if len(queue) == 0: 482 | cout.info("No courses available") 483 | continue 484 | while len(queue) > 0: 485 | course = queue.popleft() 486 | cout.info("Try to elect %s" % course) 487 | 488 | # _task_validate_captcha(elective) 489 | 490 | retryRequired = True 491 | while retryRequired: 492 | retryRequired = False 493 | try: 494 | resp = elective.get_ElectSupplement(course.href) 495 | 496 | except ElectionRepeatedError as e: 497 | ferr.error(e) 498 | cout.warning("ElectionRepeatedError encountered") 499 | _ignore_course(course, reason="Repeated") 500 | _task_increase_error_count(status, e) 501 | 502 | except TimeConflictError as e: 503 | ferr.error(e) 504 | cout.warning("TimeConflictError encountered") 505 | _ignore_course(course, reason="Time conflict") 506 | _task_increase_error_count(status, e) 507 | 508 | except ExamTimeConflictError as e: 509 | ferr.error(e) 510 | cout.warning("ExamTimeConflictError encountered") 511 | _ignore_course(course, reason="Exam time conflict") 512 | _task_increase_error_count(status, e) 513 | 514 | except ElectionPermissionError as e: 515 | ferr.error(e) 516 | cout.warning("ElectionPermissionError encountered") 517 | _ignore_course(course, reason="Permission required") 518 | _task_increase_error_count(status, e) 519 | 520 | except CreditsLimitedError as e: 521 | ferr.error(e) 522 | cout.warning("CreditsLimitedError encountered") 523 | _ignore_course(course, reason="Credits limited") 524 | _task_increase_error_count(status, e) 525 | 526 | except MutuallyExclusiveCourseError as e: 527 | ferr.error(e) 528 | cout.warning("MutuallyExclusiveCourseError encountered") 529 | _ignore_course(course, reason="Mutual exclusive") 530 | _task_increase_error_count(status, e) 531 | 532 | except MultiEnglishCourseError as e: 533 | ferr.error(e) 534 | cout.warning("MultiEnglishCourseError encountered") 535 | _ignore_course(course, reason="Multi English course") 536 | _task_increase_error_count(status, e) 537 | 538 | except ElectionSuccess as e: 539 | cout.info("%s is ELECTED !" % course) # 不从此处加入 ignored ,而是在下回合根据实际选课结果来决定是否忽略 540 | 541 | except ElectionFailedError as e: 542 | ferr.error(e) 543 | cout.warning("ElectionFailedError encountered") # 具体原因不明,且不能马上重试 544 | _task_increase_error_count(status, e) 545 | 546 | except Exception as e: 547 | raise e # don't increase error count here 548 | 549 | except NotInCoursePlanException as e: 550 | cout.error(e) 551 | _task_increase_error_count(status, e) 552 | shouldQuitImmediately = True 553 | raise e 554 | 555 | except (ServerError, StatusCodeError) as e: 556 | ferr.error(e) 557 | cout.warning("ServerError/StatusCodeError encountered") 558 | _task_increase_error_count(status, e) 559 | 560 | except OperationFailedError as e: 561 | ferr.error(e) 562 | cout.warning("OperationFailedError encountered") 563 | _task_increase_error_count(status, e) 564 | 565 | except RequestException as e: 566 | ferr.error(e) 567 | cout.warning("RequestException encountered") 568 | _task_increase_error_count(status, e) 569 | 570 | except IAAAException as e: 571 | ferr.error(e) 572 | cout.warning("IAAAException encountered") 573 | _task_increase_error_count(status, e) 574 | 575 | except _ElectiveNeedsLogin as e: 576 | cout.warning("client: %s needs Login" % elective.id) 577 | reloginPool.put_nowait(elective) 578 | elective = None 579 | shouldEnterNextLoopImmediately = True 580 | 581 | except (SessionExpiredError, InvalidTokenError, NoAuthInfoError, SharedSessionError) as e: 582 | ferr.error(e) 583 | _task_increase_error_count(status, e) 584 | cout.info("client: %s needs relogin" % elective.id) 585 | reloginPool.put_nowait(elective) 586 | elective = None 587 | shouldEnterNextLoopImmediately = True 588 | 589 | except CaughtCheatingError as e: 590 | ferr.critical(e) # 严重错误 591 | _task_increase_error_count(status, e) 592 | shouldQuitImmediately = True 593 | raise e 594 | 595 | except SystemException as e: 596 | ferr.error(e) 597 | cout.warning("SystemException encountered") 598 | _task_increase_error_count(status, e) 599 | 600 | except TipsException as e: 601 | ferr.error(e) 602 | cout.warning("TipsException encountered") 603 | _task_increase_error_count(status, e) 604 | 605 | except OperationTimeoutError as e: 606 | ferr.error(e) 607 | cout.warning("OperationTimeoutError encountered") 608 | _task_increase_error_count(status, e) 609 | 610 | except json.JSONDecodeError as e: 611 | ferr.error(e) 612 | cout.warning("JSONDecodeError encountered") 613 | _task_increase_error_count(status, e) 614 | 615 | except KeyboardInterrupt as e: 616 | _task_increase_error_count(status, e) 617 | shouldQuitImmediately = True 618 | raise e 619 | 620 | except Exception as e: 621 | ferr.exception(e) 622 | _task_increase_error_count(status, e) 623 | shouldQuitImmediately = True 624 | raise e 625 | 626 | finally: 627 | if shouldQuitImmediately: 628 | killallEvent.set() 629 | _task_print_current_thread_killed() 630 | _task_send_signal_to_kill_all_blocking_threads() 631 | return 632 | 633 | if elective is not None: # change elective client 634 | electivePool.put_nowait(elective) 635 | elective = None 636 | 637 | if shouldEnterNextLoopImmediately: 638 | cout.info("") 639 | cout.info("======== END Loop %d ========" % loop) 640 | cout.info("") 641 | else: 642 | t = _get_refresh_interval() 643 | cout.info("") 644 | cout.info("======== END Loop %d ========" % loop) 645 | cout.info("Main loop sleep %s s" % t) 646 | cout.info("") 647 | time.sleep(t) 648 | 649 | 650 | # MAKR: start threads 651 | 652 | goals = load_course_csv() if goals is None else goals 653 | ignored = [] if ignored is None else ignored # (course, reason) 654 | 655 | tList = [ 656 | Thread(target=_thread_login_loop, name="Loop-Login", args=(status,)), 657 | Thread(target=_thread_main_loop, name="Loop-Main", args=(goals, ignored, status)) 658 | ] 659 | 660 | for t in tList: 661 | t.daemon = True 662 | t.start() 663 | 664 | try: 665 | for t in tList: 666 | t.join() 667 | except Exception as e: 668 | raise e 669 | finally: 670 | if signals is not None: 671 | signals.put_nowait(SIGNAL_KILL_ALL_PROCESSES) 672 | -------------------------------------------------------------------------------- /autoelective/monitor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: monitor.py 4 | # modified: 2019-09-11 5 | 6 | __all__ = ["main"] 7 | 8 | import logging 9 | import werkzeug._internal as _werkzeug_internal 10 | from flask import Flask, current_app, jsonify 11 | from flask.logging import default_handler 12 | from .config import AutoElectiveConfig 13 | from .logger import ConsoleLogger 14 | from ._internal import userInfo as _userInfo 15 | 16 | 17 | def main(signals, userInfo, goals, ignored, status): 18 | 19 | _userInfo.update(userInfo) # setup userInfo first 20 | config = AutoElectiveConfig() # create singleton first 21 | 22 | cout = ConsoleLogger("monitor") 23 | ferr = ConsoleLogger("monitor.error") 24 | 25 | 26 | monitor = Flask(__name__) 27 | 28 | 29 | # MARK: register routes 30 | 31 | @monitor.route("/", methods=["GET"]) 32 | @monitor.route("/rules", methods=["GET"]) 33 | def get_root(): 34 | rules = [] 35 | for r in sorted(current_app.url_map.iter_rules(), key=lambda r: r.rule): 36 | line = "{method} {rule}".format( 37 | method=','.join( m for m in r.methods if m not in ("HEAD","OPTIONS") ), 38 | rule=r.rule 39 | ) 40 | rules.append(line) 41 | return jsonify(rules) 42 | 43 | 44 | @monitor.route("/main_loop", methods=["GET"]) 45 | def get_main_loop(): 46 | return str(status["main_loop"]) 47 | 48 | 49 | @monitor.route("/login_loop", methods=["GET"]) 50 | def get_login_loop(): 51 | return str(status["login_loop"]) 52 | 53 | 54 | @monitor.route("/goals", methods=["GET"]) 55 | def get_goals(): 56 | return jsonify([ str(course) for course in goals ]) 57 | 58 | 59 | @monitor.route("/current",methods=["GET"]) 60 | def get_current(): 61 | _ignored = [ x[0] for x in ignored ] 62 | return jsonify([ str(course) for course in goals if course not in _ignored ]) 63 | 64 | 65 | @monitor.route("/ignored", methods=["GET"]) 66 | def get_ignored(): 67 | return jsonify([ "%s %s" % (course, reason) for (course, reason) in ignored ]) 68 | 69 | 70 | @monitor.route("/all", methods=["GET"]) 71 | def get_all(): 72 | _ignored = [ x[0] for x in ignored ] 73 | return jsonify( 74 | { 75 | "goals": [ str(course) for course in goals ], 76 | "current": [ str(course) for course in goals if course not in _ignored ], 77 | "ignored": [ "%s %s" % (course, reason) for (course, reason) in ignored ], 78 | "main_loop": status["main_loop"], 79 | "login_loop": status["login_loop"], 80 | "error_count": status["error_count"], 81 | "errors": dict(status["errors"]), 82 | } 83 | ) 84 | 85 | @monitor.route("/errors", methods=["GET"]) 86 | def get_errors(): 87 | return jsonify( 88 | { 89 | "main_loop": status["main_loop"], 90 | "login_loop": status["login_loop"], 91 | "error_count": status["error_count"], 92 | "errors": dict(status["errors"]), 93 | } 94 | ) 95 | 96 | 97 | # MARK: setup monitor 98 | 99 | monitor.config["JSON_AS_ASCII"] = False 100 | monitor.config["JSON_SORT_KEYS"] = False 101 | 102 | 103 | _werkzeug_internal._logger = cout # custom _logger for werkzeug 104 | 105 | monitor.logger.removeHandler(default_handler) 106 | for logger in [cout, ferr]: 107 | for handler in logger.handlers: 108 | monitor.logger.addHandler(handler) 109 | 110 | 111 | monitor.run( 112 | host=config.monitorHost, 113 | port=config.monitorPort, 114 | debug=True, 115 | use_reloader=False, 116 | ) 117 | 118 | -------------------------------------------------------------------------------- /autoelective/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: parser.py 4 | # modified: 2019-09-09 5 | 6 | __all__ = [ 7 | 8 | "get_tree_from_response", 9 | "get_tree", 10 | "get_tables", 11 | "get_table_header", 12 | "get_table_trs", 13 | "get_title", 14 | "get_errInfo", 15 | "get_tips", 16 | 17 | "get_courses", 18 | "get_courses_with_detail", 19 | 20 | "load_course_csv", 21 | 22 | ] 23 | 24 | import re 25 | from lxml import etree 26 | from .course import Course 27 | from .config import AutoElectiveConfig 28 | from .utils import read_csv 29 | from .const import DEFAULT_COURSE_UTF8_CSV, DEFAULT_COURSE_GBK_CSV 30 | from ._internal import userInfo 31 | 32 | 33 | _regexBzfxSida = re.compile(r'\?sida=(\S+?)&sttp=(?:bzx|bfx)') 34 | 35 | 36 | def get_tree_from_response(r): 37 | return etree.HTML(r.text) # 不要用 r.content, 否则可能会以 latin-1 编码 38 | 39 | def get_tree(content): 40 | return etree.HTML(content) 41 | 42 | def get_tables(tree): 43 | return tree.xpath('.//table//table[@class="datagrid"]') 44 | 45 | def get_table_header(table): 46 | return table.xpath('.//tr[@class="datagrid-header"]/th/text()') 47 | 48 | def get_table_trs(table): 49 | return table.xpath('.//tr[@class="datagrid-odd" or @class="datagrid-even"]') 50 | 51 | def get_title(tree): 52 | title = tree.find('.//head/title') 53 | if title is not None: # 双学位 sso_login 后先到 主修/辅双 选择页,这个页面没有 title 标签 54 | return title.text 55 | else: 56 | return None 57 | 58 | def get_errInfo(tree): 59 | tds = tree.xpath(".//table//table//table//td") 60 | assert len(tds) == 1 61 | td = tds[0] 62 | strong = td.getchildren()[0] 63 | assert strong.tag == 'strong' and strong.text == '出错提示:' 64 | return "".join(td.xpath('./text()')).strip() 65 | 66 | def get_tips(tree): 67 | tips = tree.xpath('.//td[@id="msgTips"]') 68 | if len(tips) == 0: 69 | return None 70 | else: 71 | td = tips[0].xpath('.//table//table//td')[1] 72 | return "".join(td.xpath('.//text()')).strip() 73 | 74 | def get_sida(r): 75 | return _regexBzfxSida.search(r.text).group(1) 76 | 77 | def get_courses(table): 78 | header = get_table_header(table) 79 | trs = get_table_trs(table) 80 | idxs = tuple(map(header.index, ["课程名","班号","开课单位"])) 81 | cs = [] 82 | for tr in trs: 83 | t = tr.xpath('./th | ./td') 84 | name, classNo, school = map(lambda i: t[i].xpath('.//text()')[0], idxs) 85 | c = Course(name, classNo, school) 86 | cs.append(c) 87 | return cs 88 | 89 | def get_courses_with_detail(table): 90 | header = get_table_header(table) 91 | trs = get_table_trs(table) 92 | idxs = tuple(map(header.index, ["课程名","班号","开课单位","限数/已选","补选"])) 93 | cs = [] 94 | for tr in trs: 95 | t = tr.xpath('./th | ./td') 96 | name, classNo, school, status, _ = map(lambda i: t[i].xpath('.//text()')[0], idxs) 97 | status = tuple(map(int, status.split("/"))) 98 | href = t[idxs[-1]].xpath('./a/@href')[0] 99 | c = Course(name, classNo, school, status, href) 100 | cs.append(c) 101 | return cs 102 | 103 | 104 | def load_course_csv(): 105 | config = AutoElectiveConfig() 106 | 107 | coding = config.csvCoding.lower() 108 | config.check_csv_coding(coding) 109 | 110 | COURSE_UTF8_CSV = userInfo.get("COURSE_UTF8_CSV", DEFAULT_COURSE_UTF8_CSV) 111 | COURSE_GBK_CSV = userInfo.get("COURSE_GBK_CSV", DEFAULT_COURSE_GBK_CSV) 112 | 113 | if coding == "utf-8": 114 | rows = read_csv(COURSE_UTF8_CSV, encoding="utf-8-sig") 115 | elif coding == "gbk": 116 | rows = read_csv(COURSE_GBK_CSV, encoding="gbk") 117 | else: 118 | raise NotImplementedError 119 | 120 | rows = [ { k:v.strip() for k,v in d.items() } for d in rows ] # 去除空格 121 | courses = [] 122 | for d in rows: 123 | for k,v in d.items(): 124 | if v.strip() == '': # 存在空格 125 | break 126 | else: 127 | courses.append(Course(**d)) 128 | return courses 129 | -------------------------------------------------------------------------------- /autoelective/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: utils.py 4 | # modified: 2019-09-09 5 | 6 | __all__ = [ 7 | 8 | "b", 9 | "u", 10 | "xMD5", 11 | "xSHA1", 12 | 13 | "json", 14 | 15 | "json_load", 16 | "json_dump", 17 | "read_csv", 18 | "pickle_gzip_dump", 19 | "pickle_gzip_load", 20 | 21 | "Singleton", 22 | 23 | ] 24 | 25 | import os 26 | import csv 27 | import pickle 28 | import gzip 29 | import hashlib 30 | from requests.compat import json 31 | 32 | 33 | def b(s): 34 | if isinstance(s, (str,int,float)): 35 | return str(s).encode("utf-8") 36 | elif isinstance(s, bytes): 37 | return s 38 | else: 39 | raise TypeError 40 | 41 | def u(s): 42 | if isinstance(s, bytes): 43 | return s.decode("utf-8") 44 | elif isinstance(s, (str,int,float)): 45 | return str(s) 46 | else: 47 | raise TypeError 48 | 49 | def xMD5(data): 50 | return hashlib.md5(b(data)).hexdigest() 51 | 52 | def xSHA1(data): 53 | return hashlib.sha1(b(data)).hexdigest() 54 | 55 | 56 | def json_load(file, *args, **kwargs): 57 | if not os.path.exists(file): 58 | return None 59 | with open(file, "r", encoding="utf-8-sig") as fp: 60 | try: 61 | return json.load(fp, *args, **kwargs) 62 | except json.JSONDecodeError: 63 | return None 64 | 65 | def json_dump(obj, file, *args, **kwargs): 66 | with open(file, "w", encoding="utf-8") as fp: 67 | json.dump(obj, fp, *args, **kwargs) 68 | 69 | 70 | def read_csv(file, encoding="utf-8-sig"): 71 | with open(file, "r", encoding=encoding, newline="") as fp: 72 | reader = csv.DictReader(fp) 73 | return list(reader) 74 | 75 | 76 | def pickle_gzip_dump(obj, file): 77 | with gzip.open(file, "wb") as fp: 78 | pickle.dump(obj, fp) 79 | 80 | 81 | def pickle_gzip_load(file): 82 | with gzip.open(file, "rb") as fp: 83 | return pickle.load(fp) 84 | 85 | 86 | class Singleton(type): 87 | """ 88 | Singleton Metaclass 89 | @link https://github.com/jhao104/proxy_pool/blob/428359c8dada998481f038dbdc8d3923e5850c0e/Util/utilClass.py 90 | """ 91 | _inst = {} 92 | 93 | def __call__(cls, *args, **kwargs): 94 | if cls not in cls._inst: 95 | cls._inst[cls] = super(Singleton, cls).__call__(*args, **kwargs) 96 | return cls._inst[cls] 97 | -------------------------------------------------------------------------------- /config.sample.ini: -------------------------------------------------------------------------------- 1 | # filename: config.ini 2 | # coding: utf-8 3 | 4 | 5 | [coding] 6 | 7 | # csv_coding str course.csv 的编码,可选 ("utf-8","gbk") 8 | 9 | csv_coding = utf-8 10 | 11 | 12 | [user] 13 | 14 | # student_ID string 学号 15 | # password string 密码 16 | # dual_degree boolean 是否为双学位账号,可选 (true, false, True, False, 1, 0) 17 | # identity string 双学位账号登录身份,可选 ("bzx","bfx") 对应于 “主修/辅双” 18 | 19 | student_ID = 1x000xxxxx 20 | password = xxxxxxxx 21 | dual_degree = false 22 | identity = bzx 23 | 24 | 25 | [client] 26 | 27 | # supply_cancel_page int 待刷课程处在选课计划的第几页 28 | # refresh_interval float 每次循环后的暂停时间,单位 s 29 | # random_deviation float 偏移量分数,如果设置为 <= 0 的值,则视为 0 30 | # iaaa_client_timeout float IAAA 客户端最长请求超时 31 | # elective_client_timeout float elective 客户端最长请求超时 32 | # elective_client_pool_size int 最多同时保持几个 elective 的有效会话 33 | # login_loop_interval float IAAA 登录线程每回合结束后的等待时间 34 | # debug_print_request boolean 是否打印请求细节 35 | # debug_dump_request boolean 是否将重要接口的请求以日志的形式记录到本地(包括补退选页、提交选课等接口) 36 | # 37 | # 关于刷新间隔的配置示例: 38 | # 39 | # refresh_interval = 8 40 | # random_deviation = 0.2 41 | # 42 | # 则每两个循环的间隔时间为 8 * (1.0 ± 0.2) s 43 | 44 | supply_cancel_page = 1 45 | refresh_interval = 8 46 | random_deviation = 0.2 47 | iaaa_client_timeout = 30 48 | elective_client_timeout = 60 49 | elective_client_pool_size = 4 50 | login_loop_interval = 2 51 | debug_print_request = false 52 | debug_dump_request = false 53 | 54 | 55 | [monitor] 56 | 57 | host = 127.0.0.1 58 | port = 7074 59 | 60 | -------------------------------------------------------------------------------- /course.gbk.sample.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cbwang2016/PKUAutoElective2/6d1aeac60a0073ab0d68754053fe1823ad866dee/course.gbk.sample.csv -------------------------------------------------------------------------------- /course.utf-8.sample.csv: -------------------------------------------------------------------------------- 1 | name,classNo,school 2 | 结构化学,1,化学与分子工程学院 3 | 中国历史地理,1,城市与环境学院 4 | 心理学概论,1,心理与认知科学学院 5 | 概率统计 (B),1,信息科学技术学院 6 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # filename: main.py 4 | # modified: 2019-09-11 5 | 6 | import os 7 | import time 8 | from optparse import OptionParser 9 | from multiprocessing import Process, Manager, Queue 10 | from autoelective import __version__, __date__ 11 | from autoelective.config import AutoElectiveConfig 12 | from autoelective.parser import load_course_csv 13 | from autoelective.logger import ConsoleLogger 14 | from autoelective.loop import main as run_main_loop 15 | from autoelective.monitor import main as run_monitor 16 | from autoelective.const import SIGNAL_KILL_ALL_PROCESSES 17 | from autoelective._internal import userInfo as _userInfo # ugly ! 18 | 19 | 20 | def task_run_loop(userInfo): 21 | 22 | config = AutoElectiveConfig() # create singleton first 23 | 24 | cout = ConsoleLogger("main") 25 | signals = Queue() 26 | 27 | p = Process(target=run_main_loop, name="Main", args=(signals, userInfo)) 28 | p.daemon = True 29 | p.start() 30 | 31 | while True: 32 | try: 33 | signal = signals.get() # block process 34 | except KeyboardInterrupt as e: 35 | cout.info("Process %s is killed" % os.getpid()) 36 | return 37 | time.sleep(0.1) # wait a minute 38 | if signal == SIGNAL_KILL_ALL_PROCESSES: 39 | if p.is_alive(): 40 | p.terminate() 41 | cout.info("Process %s is killed" % p.name) 42 | break 43 | 44 | 45 | def task_run_loop_with_monitor(userInfo): 46 | 47 | config = AutoElectiveConfig() # create singleton first 48 | 49 | cout = ConsoleLogger("main") 50 | signals = Queue() 51 | 52 | with Manager() as manager: 53 | 54 | # shared objects 55 | goals = manager.list(load_course_csv()) 56 | ignored = manager.list() 57 | status = manager.dict() 58 | 59 | status["main_loop"] = 0 60 | status["login_loop"] = 0 61 | status["error_count"] = 0 62 | status["errors"] = manager.dict() 63 | 64 | args = (signals, userInfo, goals, ignored, status) 65 | 66 | pList = [ 67 | Process(target=run_main_loop, name="Main", args=args), 68 | Process(target=run_monitor, name="Monitor", args=args), 69 | ] 70 | 71 | for p in pList: 72 | p.daemon = True 73 | p.start() 74 | 75 | while True: 76 | try: 77 | signal = signals.get() # block process 78 | except KeyboardInterrupt as e: 79 | cout.info("Process %s is killed" % os.getpid()) 80 | return 81 | time.sleep(0.1) # wait a minute 82 | if signal == SIGNAL_KILL_ALL_PROCESSES: 83 | for p in pList: 84 | if p.is_alive(): 85 | p.terminate() 86 | cout.info("Process %s is killed" % p.name) 87 | break 88 | 89 | 90 | def main(): 91 | 92 | parser = OptionParser( 93 | description='PKU Auto-Elective Tool v%s (%s)' % (__version__, __date__), 94 | version=__version__, 95 | ) 96 | 97 | # MARK: custom input files 98 | 99 | parser.add_option( 100 | '--config', 101 | dest='CONFIG_INI', 102 | metavar="FILE", 103 | help='custom config file encoded with utf8', 104 | ) 105 | parser.add_option( 106 | '--course-csv-utf8', 107 | dest='COURSE_UTF8_CSV', 108 | metavar="FILE", 109 | help='custom course.csv file encoded with utf8', 110 | ) 111 | parser.add_option( 112 | '--course-csv-gbk', 113 | dest='COURSE_GBK_CSV', 114 | metavar="FILE", 115 | help='custom course.csv file encoded with gbk', 116 | ) 117 | 118 | # MARK: boolean (flag) options 119 | 120 | parser.add_option( 121 | '--with-monitor', 122 | dest='with_monitor', 123 | action='store_true', 124 | default=False, 125 | help='run the monitor process simultaneously', 126 | ) 127 | 128 | 129 | options, args = parser.parse_args() 130 | run_task = task_run_loop 131 | 132 | # MARK: setup userInfo 133 | 134 | userInfo = {} 135 | 136 | if options.CONFIG_INI is not None: 137 | userInfo["CONFIG_INI"] = options.CONFIG_INI 138 | 139 | if options.COURSE_UTF8_CSV is not None: 140 | userInfo["COURSE_UTF8_CSV"] = options.COURSE_UTF8_CSV 141 | 142 | if options.COURSE_GBK_CSV is not None: 143 | userInfo["COURSE_GBK_CSV"] = options.COURSE_GBK_CSV 144 | 145 | # MAKR: handle boolean (flag) options 146 | 147 | if options.with_monitor: 148 | run_task = task_run_loop_with_monitor 149 | 150 | 151 | _userInfo.update(userInfo) # setup userInfo first 152 | run_task(userInfo) 153 | 154 | 155 | if __name__ == '__main__': 156 | main() 157 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml==4.3.0 2 | requests==2.22.0 3 | simplejson==3.15.0 4 | Pillow==5.4.1 5 | numpy==1.16.0 6 | sklearn==0.0 7 | Flask==1.0.2 8 | --------------------------------------------------------------------------------