├── .gitignore
├── .gitattributes
├── figure
    ├── 0.png
    ├── 1.png
    ├── 2.png
    ├── favicon.ico
    └── title.jpg
├── config.json
├── config_example.json
├── LICENSE
├── README.md
├── download_parallel.py
└── download.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.exe
2 | *.json
3 | *.zip
4 | *.txt
5 | */
6 | **/test


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/figure/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricZhu-42/PedagogySquare_Downloader/HEAD/figure/0.png


--------------------------------------------------------------------------------
/figure/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricZhu-42/PedagogySquare_Downloader/HEAD/figure/1.png


--------------------------------------------------------------------------------
/figure/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricZhu-42/PedagogySquare_Downloader/HEAD/figure/2.png


--------------------------------------------------------------------------------
/figure/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricZhu-42/PedagogySquare_Downloader/HEAD/figure/favicon.ico


--------------------------------------------------------------------------------
/figure/title.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EricZhu-42/PedagogySquare_Downloader/HEAD/figure/title.jpg


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "username": "your_username",
3 |     "password": "your_password",
4 |     "ext_expel_list": [],
5 |     "cid_expel_list": [],
6 | 	"cid_include_list": [],
7 |     "save_path": "",
8 |     "keep_dirs": false
9 | }


--------------------------------------------------------------------------------
/config_example.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"username": "13500000000",
3 | 	"password": "somepassword",
4 | 	"ext_expel_list": ["mp4", "pdf"],
5 | 	"cid_include_list": [],
6 | 	"cid_expel_list": [12102],
7 | 	"save_path" : "",
8 | 	"keep_dirs": false
9 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 EricZhu-42
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center"> <img src="figure/title.jpg" width=800 align="center"/> </p>
  2 | 
  3 | <p align="center">
  4 | 
  5 |   <img alt="GitHub" src="https://img.shields.io/github/license/EricZhu-42/PedagogySquare_Downloader">
  6 |   <img alt="GitHub last commit" src="https://img.shields.io/github/last-commit/EricZhu-42/PedagogySquare_Downloader">	
  7 |   <img alt="GitHub release (latest by date)" src="https://img.shields.io/github/v/release/EricZhu-42/PedagogySquare_Downloader">
  8 |   <img alt="GitHub code size in bytes" src="https://img.shields.io/github/languages/code-size/EricZhu-42/PedagogySquare_Downloader">
  9 |   <img alt="GitHub top language" src="https://img.shields.io/github/languages/top/EricZhu-42/PedagogySquare_Downloader">
 10 |   </br>
 11 |   <img alt="GitHub stars" src="https://img.shields.io/github/stars/EricZhu-42/PedagogySquare_Downloader">
 12 |   <img alt="GitHub All Releases" src="https://img.shields.io/github/downloads/EricZhu-42/PedagogySquare_Downloader/total">
 13 |   <img alt="GitHub issues" src="https://img.shields.io/github/issues-raw/EricZhu-42/PedagogySquare_Downloader">
 14 |   <img alt="GitHub closed issues" src="https://img.shields.io/github/issues-closed-raw/EricZhu-42/PedagogySquare_Downloader">
 15 |   <img alt="PRs welcome" src="https://img.shields.io/badge/PRs-welcome-brightgreen">
 16 | 
 17 | </p>
 18 | 
 19 | # 教学立方课件下载器
 20 | 
 21 | 在线教学平台——[教学立方](https://teaching.applysquare.com)的课件批量下载脚本，基于**Python** + **Requests**
 22 | 
 23 | > 创建日期：2020-03-30  
 24 | > 更新日期：2022-03-06
 25 | 
 26 | **✨ [2025/02/16] [@TwinklerG](https://github.com/TwinklerG) 维护了该项目的 [Rust 版本](https://github.com/TwinklerG/PedagogySquare-Downloader-rs)，支持多线程并行下载，欢迎试用。**
 27 | 
 28 | ## 下载地址
 29 | 
 30 | **更新日期：2021年6月21日**
 31 | 
 32 | 推荐通过Github release[下载](https://github.com/EricZhu-42/PedagogySquare_Downloader/releases/download/v1.7/PedagogySquare_Downloader_20210621.zip)；若连接速度较慢，亦可通过国内镜像地址[下载](https://box.nju.edu.cn/f/4214a2459d4347df9394/?dl=1)。
 33 | 
 34 | 
 35 | ## 版本更新日志
 36 | 
 37 | ### 2025年2月19日：并发支持
 38 | 
 39 | 增加`download_parallel.py`文件，此脚本为并发下载版本，使用`tqdm`提供进度条
 40 | 
 41 | ### 2021年6月21日：功能性更新
 42 | 
 43 | 在 `config.json` 文件中增加了 `cid_include_list` 项，在非空时将仅下载指定的部分课程。
 44 | 
 45 | ### 2021年4月26日：功能性更新
 46 | 
 47 | 在 `config.json` 文件中增加了 `keep_dirs` 项，可选择下载时是否保持课件的文件夹结构（默认为 `false`）
 48 | 
 49 | ### 2021年3月5日：功能性更新
 50 | 
 51 | 为了方便课件管理，当前版本的 `config.json` 文件中增加了 `save_path` 项（支持绝对路径/相对路径），可设置下载文件的保存目录。
 52 | 
 53 | 请参照下文“配置文件说明”一节，调整下载目录（默认为空，表示下载到脚本的相同目录）
 54 | 
 55 | ### 2021年3月1日：Windows平台下的工具封装
 56 | 
 57 | **TLDR: 当前版本做了脚本封装，现在Windows平台下不安装Python环境也能用了**
 58 | 
 59 | 自2021年3月1日起，我们将在release中为**Windows平台**用户提供封装后的**独立可执行程序**（standalone executable）。**无需安装配置Python环境即可使用封装后的课件下载工具**，具体的使用方法为：
 60 | 
 61 | 1. 下载并解压最新的release文件包，并进入解压后的文件夹
 62 | 3. 参照下文“最简配置方案”一节，修改文件 `config.json` ，填入用户名、密码等信息
 63 | 4. 双击执行 `run.bat`，等待课件下载完成
 64 | 
 65 | 此外，新版本脚本优化了 `config.json` 的配置逻辑，推荐参考下文“配置文件说明”一节重新进行配置。
 66 | 
 67 | > 如果更新后的脚本无法正常使用，请尝试回退至[旧版本](https://github.com/EricZhu-42/PedagogySquare_Downloader/releases/download/v1.4_stable/PedagogySquare_Downloader_20200914.zip)，观察问题是否解决，并联系开发者反馈问题，感谢！
 68 | 
 69 | ## 程序特色
 70 | 
 71 | 1. **一键下载**所有课程的全部课件，方便快捷
 72 | 2. **可下载未直接开放下载的课件**，视频等内容
 73 | 3. **可深度配置**的课程筛选/文件拓展名筛选功能
 74 | 
 75 | > 本程序旨在方便学生下载教学立方平台上的课件及相关教学资料，消除下载文件的重复劳动  
 76 | > 请尊重教师的知识产权与劳动成果。除非获得教师许可，请勿将下载得到的文件在互联网上进行传播  
 77 | > 如本程序损害了您的权益，请联系作者删除相关代码  
 78 | 
 79 | ## 运行环境
 80 | 
 81 | 开发过程中使用的环境与第三方模块版本如下：
 82 | 
 83 | - **Python** = 3.7.4
 84 | - **Requests** = 2.22.0
 85 | - **tqdm** = 4.66.2 (可选，用于并发下载版本）
 86 | 
 87 | 为了正常运行脚本，请安装**不低于**以上版本的Python与Requests
 88 | 
 89 | > 另：经测试，**该脚本可在移动端的终端模拟器中正常运行**；安卓平台执行Python脚本可以参考[@OrangeX4](https://github.com/OrangeX4)提供的[工具与教程](https://orangex.orangex4.cool/)。
 90 | 
 91 | ## 使用方法
 92 | 
 93 | ### 1. 配置环境（请参考其他教程）
 94 | 
 95 | 1. 安装对应版本的Python
 96 | 
 97 | 2. 安装对应版本的Python模块：**Requests**（推荐使用[Anaconda](https://www.anaconda.com/)进行管理）
 98 | 
 99 | 
100 | ### 2. 修改配置文件
101 | 
102 | 修改文件 `config.json` ，填入用户名、密码等信息
103 | 
104 | > 关于如何修改配置文件，请参考“最简配置方案”一章  
105 | > 关于配置文件内各项参数的说明，请参考“配置文件说明”一章  
106 | 
107 | ### 3. 运行脚本
108 | 
109 | 运行 `download.py`  
110 | 
111 | > 注：若运行过程中出现下载速度过慢等现象，可能是由于与教学立方网站连接不稳定，请尝试重新运行脚本。
112 | 
113 | ## 项目结构介绍
114 | 
115 | | 文件名              | 功能                     |
116 | | ------------------- | ------------------------ |
117 | | figure/             | 脚本说明中用到的图片文件 |
118 | | download.py         | 脚本运行入口             |
119 | | config.json         | 执行参数的配置文件       |
120 | | config_example.json | 供参考的样例配置文件     |
121 | 
122 | ## 最简配置方案
123 | 
124 | ```json
125 | {
126 | 	"username": "your_username",
127 | 	"password": "your_password",
128 | 	"ext_expel_list": [],
129 | 	"cid_expel_list": [],
130 | 	"cid_include_list": [],
131 |         "save_path": "",
132 | 	"keep_dirs": false
133 | }
134 | ```
135 | 
136 | 将 `your_username` 与 `your_password` （**注意保留外部的双引号**）替换成你的**手机号**和**教学立方登录密码**即可，其他参数无需修改。
137 | 
138 | > 请确保json文件格式正确，可参考提供的 `config_example.json` 进行配置。
139 | 
140 | ## 配置文件说明
141 | 
142 | 以下对 `config.json` 内各项参数进行简要说明：
143 | 
144 | | 参数名               | 类型 | 含义                                                |
145 | | -------------------- | ---- | --------------------------------------------------- |
146 | | username             | str  | 教学立方登录用户名（一般为手机号）                  |
147 | | password             | str  | 教学立方登录密码                                    |
148 | | ext_expel_list       | list | 排除文件的类型列表                                      |
149 | | cid_expel_list             | list | 排除课程的课程ID列表                                    |
150 | | cid_include_list | list | 保留课程的ID列表 |
151 | | save_path | str | 下载目录（支持绝对路径或相对路径） |
152 | | keep_dirs | bool | 是否保留文件的目录结构 |
153 | 
154 | ### 注意事项与说明：
155 | 
156 | 1. 例如：若不需要下载课程ID为 `12345` 的课程，且不需要下载 `pdf` 文件与 `mp4` 文件，可设置为：
157 |    - `ext_expel_list = ["pdf", "mp4"]`
158 |    - `cid_expel_list = [12345]`
159 | 2. 在修改下载目录 `save_path` 时，请先在对应的位置**创建文件夹**；如无法找到对应的文件夹，将会报错并在默认目录（程序根目录）保存下载得到的文件。
160 | 4. 课程ID在课程主页地址中查看，例如：  
161 |    ![](./figure/0.png)  
162 |    图中对应课程的ID为**8261**  
163 | 
164 | 
165 | ## 版权信息
166 | 
167 | 联系邮箱：zhuxinhao00@gmail.com
168 | 
169 | 本项目基于MIT协议开源
170 | 


--------------------------------------------------------------------------------
/download_parallel.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: UTF-8 -*-
  2 | """
  3 | @FileName: download_parallel.py
  4 | 
  5 | @Author: zhuxinhao00@gmail.com
  6 | @Author: twinklerchn@gmail.com
  7 | 
  8 | @Create date: 2025/2/19
  9 | 
 10 | @Modified date: 2025/2/19
 11 | 
 12 | @description: A script to download file automatically from teaching.applysquare.com concurrently
 13 | """
 14 | 
 15 | import hashlib
 16 | import json
 17 | import os
 18 | import pathlib
 19 | import time
 20 | from concurrent.futures import ThreadPoolExecutor
 21 | from tqdm import tqdm
 22 | 
 23 | import requests
 24 | import urllib3
 25 | 
 26 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 27 | 
 28 | 
 29 | # Get Hex-md5 encoded password
 30 | def hex_md5_stringify(raw_str: str):
 31 |     md5_encoder = hashlib.md5()
 32 |     md5_encoder.update(str(raw_str).encode("utf-8"))
 33 |     return md5_encoder.hexdigest()
 34 | 
 35 | 
 36 | # Function dealing with illegal characters of windows filename
 37 | def filename_filter(name: str):
 38 |     illegal_list = list('/\:*?”"<>|')
 39 |     for char in illegal_list:
 40 |         name = name.replace(char, " ")
 41 |     return name
 42 | 
 43 | 
 44 | def construct_attchment_list(sess, token, pid, uid, cid, parent_dir):
 45 |     attachment_list = list()
 46 |     attachment_info_url = attachment_url_fmt.format(token, pid, 1, uid, cid)
 47 |     r = sess.get(attachment_info_url, verify=False)
 48 |     info = r.json()["message"]
 49 |     file_num = info.get("count")
 50 | 
 51 |     current_page = 1
 52 |     # Add attachment path to attachment_list
 53 |     while len(attachment_list) < file_num:
 54 |         current_url = attachment_url_fmt.format(token, pid, current_page, uid, cid)
 55 |         r = sess.get(current_url, verify=False)
 56 |         info = r.json()["message"]
 57 |         attachment_list.extend(info.get("list"))
 58 |         current_page += 1
 59 |     for entry in attachment_list:
 60 |         entry["parent_dir"] = parent_dir
 61 |     return attachment_list
 62 | 
 63 | 
 64 | # Load config from config.json
 65 | with open("config.json", "r", encoding="utf-8") as f:
 66 |     config = json.loads(f.read())
 67 |     user_name = config.get("username")
 68 |     user_passwd = config.get("password")
 69 |     ext_expel_list = config.get("ext_expel_list")
 70 |     cid_include_list = list(map(str, config.get("cid_include_list", [])))
 71 |     cid_expel_list = list(map(str, config.get("cid_expel_list", [])))
 72 |     save_path = config.get("save_path", "")
 73 |     keep_dirs = config.get("keep_dirs", False)
 74 | 
 75 | if save_path:
 76 |     try:
 77 |         os.chdir(save_path)
 78 |     except Exception as e:
 79 |         print(
 80 |             'Changing save_path failed for reason "{}", using default path instead.'.format(
 81 |                 e
 82 |             )
 83 |         )
 84 |         time.sleep(1)
 85 | 
 86 | print("Files will be saved to ", os.getcwd())
 87 | 
 88 | # Some metadata
 89 | login_url = r"https://teaching.applysquare.com/Api/User/ajaxLogin"
 90 | attachment_url_fmt = r"https://teaching.applysquare.com/Api/CourseAttachment/getList/token/{}?parent_id={}&page={}&plan_id=-1&uid={}&cid={}"
 91 | course_info_url_fmt = r"https://teaching.applysquare.com/Api/Public/getIndexCourseList/token/{}?type=1&usertype=1&uid={}"
 92 | attachment_detail_url_fmt = r"https://teaching.applysquare.com/Api/CourseAttachment/ajaxGetInfo/token/{}?id={}&uid={}&cid={}"
 93 | 
 94 | # Init Requests session
 95 | sess = requests.Session()
 96 | 
 97 | # Login in
 98 | print("Trying to log in, please wait ...")
 99 | login_request = sess.post(
100 |     login_url,
101 |     data={"email": user_name, "password": hex_md5_stringify(user_passwd)},
102 |     verify=False,
103 | )
104 | 
105 | login_response = login_request.json()
106 | login_info = login_response["message"]
107 | 
108 | try:
109 |     token = login_info["token"]
110 | except TypeError:
111 |     print("Login Failed, please check your username & password")
112 |     print("Login info received: {}".format(login_info))
113 |     exit()
114 | 
115 | uid = login_info["uid"]
116 | print("Login successfully!")
117 | 
118 | cid2name_dict = dict()
119 | course_info_url = course_info_url_fmt.format(token, uid)
120 | r = sess.get(course_info_url, verify=False)
121 | info = r.json()["message"]
122 | for entry in info:
123 |     cid2name_dict[entry.get("cid")] = entry.get("name")
124 | 
125 | cid_list = cid2name_dict.keys()
126 | 
127 | 
128 | def check_cid(cid):
129 |     if len(cid_include_list) and cid not in cid_include_list:
130 |         return False
131 |     return cid not in cid_expel_list
132 | 
133 | 
134 | print("\nReady to download the following courses:")
135 | for cid, cname in cid2name_dict.items():
136 |     if not check_cid(cid):
137 |         continue
138 |     print("Course: {:8s}, CID={:6}".format(cname, cid))
139 | 
140 | 
141 | def download_cid(cid):
142 |     cid = str(cid)  # Prevent bug caused by wrong type of cid
143 | 
144 |     if not check_cid(cid):
145 |         return
146 | 
147 |     try:
148 |         course_name = filename_filter(cid2name_dict[cid])
149 |     except KeyError:
150 |         print(
151 |             "Can't find course name for cid {}, maybe it's a legacy course?".format(cid)
152 |         )
153 |         course_name = "CID_{}".format(cid)
154 |     print("\nDownloading files of course {}".format(course_name))
155 | 
156 |     # Create dir for this course
157 |     root = pathlib.Path(os.getcwd()) / course_name
158 |     if not root.exists() or root.is_file():
159 |         os.makedirs(root)
160 | 
161 |     # Construct attachment list, with some dirs in it
162 |     course_attachment_list = construct_attchment_list(
163 |         sess=sess, token=token, pid=0, uid=uid, cid=cid, parent_dir=pathlib.Path(".")
164 |     )
165 | 
166 |     # Iteratively add files in dirs to global attachment list
167 |     dir_counter = 0
168 |     for entry in course_attachment_list:
169 |         if entry.get("ext") == "dir":
170 |             dir_counter += 1
171 |             # Add dir content to attachment list
172 |             dir_id = entry.get("id")
173 |             dir_name = filename_filter(entry.get("title")) if keep_dirs else ""
174 |             parent_dir = entry.get("parent_dir")
175 |             if not (root / parent_dir / dir_name).exists():
176 |                 os.makedirs(root / parent_dir / dir_name)
177 |             course_attachment_list.extend(
178 |                 construct_attchment_list(
179 |                     sess=sess,
180 |                     token=token,
181 |                     pid=dir_id,
182 |                     uid=uid,
183 |                     cid=cid,
184 |                     parent_dir=parent_dir / dir_name,
185 |                 )
186 |             )
187 | 
188 |     print(
189 |         "Get {:d} files with {:d} dirs".format(
190 |             len(course_attachment_list) - dir_counter, dir_counter
191 |         )
192 |     )
193 | 
194 |     def download_entry(entry):
195 |         ext = entry.get("ext")
196 |         if (ext == "dir") or (ext in ext_expel_list):
197 |             return
198 | 
199 |         if ext in entry.get("title"):
200 |             filename = filename_filter(entry.get("title"))
201 |         else:
202 |             filename = filename_filter("{}.{}".format(entry.get("title"), ext))
203 |         filepath = root / entry.get("parent_dir") / filename
204 | 
205 |         filesize = entry.get("size")
206 | 
207 |         # Get download url for un-downloadable files
208 |         if entry.get("can_download") == "0":
209 |             attachment_detail_url = attachment_detail_url_fmt.format(
210 |                 token, entry.get("id"), uid, cid
211 |             )
212 |             r = sess.get(attachment_detail_url, verify=False)
213 |             info = r.json()["message"]
214 |             entry["path"] = info.get("path")
215 | 
216 |         # Streaming, so we can iterate over the response
217 |         response = requests.get(entry.get("path").replace("amp;", ""), stream=True)
218 | 
219 |         try:
220 |             content_size = eval(response.headers["content-length"])
221 |         except Exception:
222 |             print(
223 |                 "Failed to get content length of file {}, please download it manually.".format(
224 |                     filename
225 |                 )
226 |             )
227 |             return
228 | 
229 |         if filepath.exists() and filepath.is_file():
230 |             # If file is up-to date, continue; else, delete and re-download
231 |             if os.path.getsize(filepath) == content_size:
232 |                 print("File {:\u3000<20} is up-to-date".format(filename))
233 |                 return
234 |             else:
235 |                 print("Updating File {}".format(filename))
236 |                 os.remove(filepath)
237 | 
238 |         chunk_size = min(content_size, 10240)
239 | 
240 |         with tqdm(total=content_size, unit="B", unit_scale=True, desc=f"Downloading {filename}") as progress_bar:
241 |             with open(filepath, "wb") as file:
242 |                 for data in response.iter_content(chunk_size):
243 |                     progress_bar.update(len(data))
244 |                     file.write(data)
245 | 
246 |     # Download attachments
247 |     with ThreadPoolExecutor(max_workers=8) as exe:
248 |         exe.map(download_entry, course_attachment_list)
249 | 
250 | 
251 | with ThreadPoolExecutor(max_workers=8) as exe:
252 |     exe.map(download_cid, cid_list)
253 | 
254 | print("Done!")
255 | 


--------------------------------------------------------------------------------
/download.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: UTF-8 -*-
  2 | """
  3 | @FileName: download.py
  4 | 
  5 | @Author：zhuxinhao00@gmail.com
  6 | 
  7 | @Create date: 2020/03/31
  8 | 
  9 | @Modified date: 2021/09/14
 10 | 
 11 | @description: A script to download file automatically from teaching.applysquare.com
 12 | """
 13 | 
 14 | import hashlib
 15 | import json
 16 | import logging
 17 | import os
 18 | import pathlib
 19 | import re
 20 | import time
 21 | from contextlib import closing
 22 | 
 23 | import requests
 24 | import urllib3
 25 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 26 | 
 27 | 
 28 | # Get Hex-md5 encoded password
 29 | def hex_md5_stringify(raw_str:str):
 30 |     md5_encoder = hashlib.md5()
 31 |     md5_encoder.update(str(raw_str).encode('utf-8'))
 32 |     return md5_encoder.hexdigest()
 33 | 
 34 | # Function dealing with illegal characters of windows filename
 35 | def filename_filter(name:str):
 36 |     illegal_list = list('/\:*?”"<>|')
 37 |     for char in illegal_list:
 38 |         name = name.replace(char, ' ')
 39 |     return name
 40 | 
 41 | def construct_attchment_list(sess, token, pid, uid, cid, parent_dir):
 42 |     attachment_list = list()
 43 |     attachment_info_url = attachment_url_fmt.format(token, pid, 1, uid, cid)
 44 |     r = sess.get(attachment_info_url, verify=False)
 45 |     info = r.json()['message']
 46 |     file_num = info.get('count')
 47 | 
 48 |     current_page = 1
 49 |     # Add attachment path to attachment_list
 50 |     while len(attachment_list) < file_num:
 51 |         current_url = attachment_url_fmt.format(token, pid, current_page, uid, cid)
 52 |         r = sess.get(current_url, verify=False)
 53 |         info = r.json()['message']
 54 |         attachment_list.extend(info.get('list'))
 55 |         current_page += 1
 56 |     for entry in attachment_list:
 57 |         entry["parent_dir"] = parent_dir
 58 |     return attachment_list
 59 | 
 60 | # Load config from config.json
 61 | with open('config.json', 'r', encoding='utf-8') as f:
 62 |     config = json.loads(f.read())
 63 |     user_name = config.get('username')
 64 |     user_passwd = config.get('password')
 65 |     ext_expel_list = config.get('ext_expel_list')
 66 |     cid_include_list = list(map(str, config.get('cid_include_list', [])))
 67 |     cid_expel_list = list(map(str, config.get('cid_expel_list', [])))
 68 |     save_path = config.get('save_path', "")
 69 |     keep_dirs = config.get('keep_dirs', False)
 70 | 
 71 | if save_path:
 72 |     try:
 73 |         os.chdir(save_path)
 74 |     except Exception as e:
 75 |         print('Changing save_path failed for reason \"{}\", using default path instead.'.format(e))
 76 |         time.sleep(1)
 77 | 
 78 | print("Files will be saved to ", os.getcwd())
 79 | 
 80 | # Some metadata
 81 | login_url = r'https://teaching.applysquare.com/Api/User/ajaxLogin'
 82 | attachment_url_fmt = r'https://teaching.applysquare.com/Api/CourseAttachment/getList/token/{}?parent_id={}&page={}&plan_id=-1&uid={}&cid={}'
 83 | course_info_url_fmt = r'https://teaching.applysquare.com/Api/Public/getIndexCourseList/token/{}?type=1&usertype=1&uid={}'
 84 | attachment_detail_url_fmt = r'https://teaching.applysquare.com/Api/CourseAttachment/ajaxGetInfo/token/{}?id={}&uid={}&cid={}'
 85 | 
 86 | # Init Requests session
 87 | sess = requests.Session()
 88 | 
 89 | # Login in
 90 | print("Trying to log in, please wait ...")
 91 | login_request = sess.post(login_url, data={"email" : user_name, "password" : hex_md5_stringify(user_passwd)}, verify=False)
 92 | 
 93 | login_response = login_request.json()
 94 | login_info = login_response['message']
 95 | 
 96 | try:
 97 |     token = login_info['token']
 98 | except TypeError:
 99 |     print("Login Failed, please check your username & password")
100 |     print("Login info received: {}".format(login_info))
101 |     exit()
102 | 
103 | uid = login_info['uid']
104 | print("Login successfully!")
105 | 
106 | cid2name_dict = dict()
107 | course_info_url = course_info_url_fmt.format(token, uid)
108 | r = sess.get(course_info_url, verify=False)
109 | info = r.json()["message"]
110 | for entry in info:
111 |     cid2name_dict[entry.get('cid')] = entry.get('name')
112 | 
113 | cid_list = cid2name_dict.keys()
114 | 
115 | def check_cid(cid):
116 |     if len(cid_include_list) and cid not in cid_include_list:
117 |         return False
118 |     return cid not in cid_expel_list
119 | 
120 | print("\nReady to download the following courses:")
121 | for cid, cname in cid2name_dict.items():
122 |     if not check_cid(cid):
123 |         continue
124 |     print("Course: {:8s}, CID={:6}".format(cname, cid))
125 | 
126 | for cid in cid_list:
127 |     cid = str(cid) # Prevent bug caused by wrong type of cid
128 | 
129 |     if not check_cid(cid):
130 |         continue
131 | 
132 |     try:
133 |         course_name = filename_filter(cid2name_dict[cid])
134 |     except KeyError:
135 |         print("Can't find course name for cid {}, maybe it's a legacy course?".format(cid))
136 |         course_name = "CID_{}".format(cid)
137 |     print("\nDownloading files of course {}".format(course_name))
138 | 
139 |     # Create dir for this course
140 |     root= pathlib.Path(os.getcwd()) / course_name
141 |     if not root.exists() or root.is_file():
142 |         os.makedirs(root)
143 | 
144 |     # Construct attachment list, with some dirs in it
145 |     course_attachment_list = construct_attchment_list(sess=sess, token=token, pid=0, uid=uid, cid=cid, parent_dir=pathlib.Path("."))
146 | 
147 |     # Iteratively add files in dirs to global attachment list
148 |     dir_counter = 0
149 |     for entry in course_attachment_list:
150 |         if (entry.get('ext') == 'dir'):
151 |             dir_counter += 1
152 |             # Add dir content to attachment list
153 |             dir_id = entry.get('id')
154 |             dir_name = filename_filter(entry.get('title')) if keep_dirs else ''
155 |             parent_dir = entry.get('parent_dir')
156 |             if not (root/parent_dir/dir_name).exists():
157 |                 os.makedirs(root/parent_dir/dir_name)
158 |             course_attachment_list.extend(construct_attchment_list(sess=sess, token=token, pid=dir_id, uid=uid, cid=cid, parent_dir=parent_dir/dir_name))
159 | 
160 |     print("Get {:d} files with {:d} dirs".format(len(course_attachment_list)-dir_counter, dir_counter))
161 | 
162 |     # Download attachments
163 |     for entry in course_attachment_list:
164 |         ext = entry.get('ext')
165 |         if (ext == 'dir') or (ext in ext_expel_list):
166 |             continue
167 | 
168 |         if (ext in entry.get('title')):
169 |             filename = filename_filter(entry.get('title'))
170 |         else:
171 |             filename = filename_filter("{}.{}".format(entry.get('title'), ext))
172 |         filepath = root/entry.get("parent_dir")/filename
173 | 
174 |         filesize = entry.get('size')
175 | 
176 |         # Get download url for un-downloadable files
177 |         if (entry.get('can_download') == '0'):
178 |             attachment_detail_url = attachment_detail_url_fmt.format(token, entry.get('id'), uid, cid)
179 |             r = sess.get(attachment_detail_url, verify=False)
180 |             info = r.json()['message']
181 |             entry['path'] = info.get('path')
182 | 
183 |         with closing(requests.get(entry.get('path').replace('amp;', ''), stream=True)) as res:
184 | 
185 |             try:
186 |                 content_size = eval(res.headers['content-length'])
187 |             except Exception:
188 |                 print("Failed to get content length of file {}, please download it manually.".format(filename))
189 |                 continue
190 | 
191 |             if filepath.exists() and filepath.is_file():
192 |                 # If file is up-to date, continue; else, delete and re-download
193 |                 if os.path.getsize(filepath) == content_size:
194 |                     print("File {:\u3000<20} is up-to-date".format(filename))
195 |                     continue
196 |                 else:
197 |                     print("Updating File {}".format(filename))
198 |                     os.remove(filepath)
199 | 
200 |             print("Downloading {:\u3000<20s}, filesize = {}".format(filename, filesize))
201 |             chunk_size = min(content_size, 10240)
202 |             with open(filepath, "wb") as f:
203 |                 chunk_count = 0
204 |                 start_time = time.time()
205 |                 # previous_time = time.time()
206 |                 # lag_counter = 0
207 |                 total = content_size / 1024 / 1024
208 |                 for data in res.iter_content(chunk_size=chunk_size):
209 |                     chunk_count += 1
210 |                     processed = len(data) * chunk_count / 1024 / 1024
211 |                     current_time = time.time()
212 |                     if chunk_count < 5:
213 |                         print(r"    Total: {:.2f} MB  Processed: {:.2f} MB ({:.2f}%)".format(total, processed, processed/total*100), end = '\r')
214 |                     else:
215 |                         remaining = (current_time-start_time)/processed*(total-processed)
216 |                         print(r"    Total: {:.2f} MB  Processed: {:.2f} MB ({:.2f}%), ETA {:.2f}s".format(total, processed, processed/total*100, remaining), end = '\r')
217 |                     f.write(data)
218 | 
219 |                     # speed = chunk_size / 1.0 * (current_time - previous_time)
220 |                     # if speed < speed_threshold:
221 |                     #     lag_counter += 1
222 |                     # else:
223 |                     #     lag_counter = 0
224 | 
225 |                     # if lag_counter > 10:
226 |                     #     print("Restart downloading of file {}".format(filename))
227 |                     #     attachment_list.append(entry)
228 |                     #     continue
229 | 
230 | 
231 | print("Done!")
232 | 


--------------------------------------------------------------------------------