├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── LICENSE
├── README.md
├── example_cookie.txt
├── hint.jpg
├── learn-old.py
├── learn-slow.py
├── learn-stdio.py
├── learn.py
├── learn_async.py
└── requirements.txt


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: PyInstaller
 2 | on: [push]
 3 | jobs:
 4 |   build:
 5 |     runs-on: ${{ matrix.os }}
 6 |     strategy:
 7 |       matrix:
 8 |         os: [macos-latest, windows-latest, ubuntu-latest]
 9 |     steps:
10 |       - uses: actions/checkout@v1
11 |       - name: Set up Python 3.7
12 |         uses: actions/setup-python@v1
13 |         with:
14 |           python-version: 3.7
15 |       - name: Install dependencies
16 |         run: |
17 |           python -m pip install --upgrade pip
18 |           pip install -r requirements.txt
19 |           pip install pyinstaller
20 |       - name: build with pyinstaller
21 |         run: |
22 |           pyinstaller --onefile learn-stdio.py -n learn-stdio-${{ matrix.os }}
23 |       - name : Upload artifact
24 |         uses: actions/upload-artifact@master
25 |         with:
26 |           name: learn-stdio
27 |           path: dist/


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | venv/
3 | .pass


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 n+e
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 清华大学新版网络学堂课程自动下载脚本
  2 | 
  3 | ## Features
  4 | 
  5 | 0. 跨平台支持：Windows/Mac/Linux 支持双击运行（[详情点击](https://github.com/Trinkle23897/learn2018-autodown/releases)）
  6 | 1. 下载所有课程公告
  7 | 2. 下载所有课件
  8 | 3. 下载所有作业文件及其批阅情况
  9 | 4. 下载所有课程讨论
 10 | 5. 下载课程信息
 11 | 6. 增量更新
 12 | 7. 可选下载课程
 13 | 8. 随时按 Ctrl+C 跳过某个文件的下载
 14 | 9. 下载助教课程
 15 | 10. 可使用 cookie 登录
 16 | 11. 多刷刷有利于后台成绩提高，比如以下第三条记录是我的：
 17 | 
 18 | ![](hint.jpg)
 19 | 
 20 | ## Dependency
 21 | 
 22 | python>=3.5, bs4, tqdm, requests
 23 | 
 24 | ```bash
 25 | pip3 install -r requirements.txt --user -U
 26 | ```
 27 | 
 28 | ## Usage
 29 | 
 30 | ### CLI 下载
 31 | 
 32 | ```bash
 33 | python learn-stdio.py
 34 | ```
 35 | 
 36 | ### 原始脚本下载选项
 37 | 
 38 | 下载当前学期课程（默认）
 39 | 
 40 | ```bash
 41 | python learn_async.py
 42 | ```
 43 | 
 44 | 下载所有学期课程
 45 | 
 46 | ```bash
 47 | python learn_async.py --all
 48 | ```
 49 | 
 50 | 下载指定学期课程
 51 | 
 52 | ```bash
 53 | python learn_async.py --semester 2018-2019-1 2018-2019-3
 54 | ```
 55 | 
 56 | 下载指定课程
 57 | 
 58 | ```bash
 59 | python learn_async.py --course 计算机网络安全技术 计算机组成原理
 60 | ```
 61 | 
 62 | 跳过某几个课程下载
 63 | 
 64 | ```bash
 65 | python learn_async.py --ignore 数据结构 "实验室科研探究(1)"
 66 | ```
 67 | 
 68 | 移除所有文件夹下完全相同的文件
 69 | 
 70 | ```bash
 71 | python learn_async.py --clear --all
 72 | ```
 73 | 
 74 | 指定下载路径
 75 | 
 76 | ```bash
 77 | python learn_async.py --dist your_download_path
 78 | ```
 79 | 
 80 | 启用多进程下载
 81 | 
 82 | ```bash
 83 | python learn_async.py --multi
 84 | ```
 85 | 
 86 | 启用多进程下载，并指定进程数（如果不指定则默认使用所有 CPU 核心数）
 87 | 
 88 | ```bash
 89 | python learn_async.py --multi --processes 4
 90 | ```
 91 | 
 92 | 以上参数均可组合使用，比如我想并发的更新大二的课程到`./download`目录，但是不想下载数据结构、实验室科研探究、中国近现代史纲要（课程文件太大了）：
 93 | 
 94 | ```bash
 95 | python learn_async.py --semester 2017-2018-1 2017-2018-2 2017-2018-3 --ignore 数据结构 "实验室科研探究(2)" 中国近现代史纲要 --multi --dist ./download
 96 | ```
 97 | 
 98 | **如果想跳过正在下载的某个文件，按 Ctrl+C 即可。**
 99 | 
100 | ### 登录选项（learn-stdio 中禁用）
101 | 
102 | 懒得每次输入 info 账号密码？创建文件`.pass`，写入 info 账号和密码之后可以自动登录，或者是：
103 | 
104 | ```bash
105 | python learn_async.py --_pass your_info_file
106 | ```
107 | 
108 | 其中文件格式为
109 | 
110 | ```bash
111 | info账号
112 | info密码
113 | ```
114 | 
115 | 使用 Cookie 登录而不是输入 info 密码：
116 | 
117 | ```bash
118 | python learn_async.py --cookie your_cookie_filename
119 | ```
120 | 
121 | 其中 cookie 文件格式可参考 `example_cookie.txt`。
122 | 
123 | ## Common Issues
124 | 
125 | - 卡在 login：网络原因，看看 pulse-secure 关了没，重跑试试看
126 | - `500 : Internal Server Error`：请拉取最新版的脚本。网络学堂自 2020/2/22 开启强制 https。
127 | - `info_xxx.csv`在 Mac 下打开是乱码：别用 office，用 mac 自带的软件吧 :)
128 | 


--------------------------------------------------------------------------------
/example_cookie.txt:
--------------------------------------------------------------------------------
1 | # Netscape HTTP Cookie File
2 | learn.tsinghua.edu.cn	FALSE	/	FALSE		JSESSIONID	B1274E298A712E84F1346C2753AA4BC0.wlxt20181
3 | 


--------------------------------------------------------------------------------
/hint.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Trinkle23897/learn2018-autodown/c000b00eafa0846341f27eb760419ca846d78ca2/hint.jpg


--------------------------------------------------------------------------------
/learn-old.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import os, re, sys, bs4
 5 | import urllib
 6 | import getpass
 7 | import http.cookiejar
 8 | from bs4 import BeautifulSoup as bs
 9 | 
10 | url = 'https://learn.tsinghua.edu.cn/'
11 | user_agent = r'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'
12 | headers = { 'User-Agent': user_agent, 'Connection': 'keep-alive' }
13 | 
14 | cookie = http.cookiejar.MozillaCookieJar()
15 | handler = urllib.request.HTTPCookieProcessor(cookie)
16 | opener = urllib.request.build_opener(handler)
17 | 
18 | def open_page(uri, values = {}):
19 | 	post_data = urllib.parse.urlencode(values).encode()
20 | 	request = urllib.request.Request(url + uri, post_data, headers)
21 | 	try:
22 | 		response = opener.open(request)
23 | 		return response
24 | 	except urllib.error.URLError as e:
25 | 		print(e.code, ':', e.reason)
26 | 
27 | def get_page(uri, values = {}):
28 | 	data = open_page(uri, values)
29 | 	if data:
30 | 		return data.read().decode()
31 | 
32 | def login(username, password):
33 | 	login_uri = 'MultiLanguage/lesson/teacher/loginteacher.jsp'
34 | 	values = { 'userid': username, 'userpass': password, 'submit1': '登陆' }
35 | 	successful = get_page(login_uri, values).find('loginteacher_action.jsp') != -1
36 | 	print('Login successfully' if successful else 'Login failed!')
37 | 	return successful
38 | 
39 | def get_courses(typepage = 1):
40 | 	soup = bs(get_page('MultiLanguage/lesson/student/MyCourse.jsp?language=cn&typepage=' + str(typepage)), 'html.parser')
41 | 	ids = soup.findAll(href=re.compile("course_id="))
42 | 	courses = []
43 | 	for link in ids:
44 | 		href = link.get('href').split('course_id=')[-1]
45 | 		name = link.text.strip()
46 | 		courses.append((href, name))
47 | 	return courses
48 | 
49 | def sync_file(path_prefix, course_id):
50 | 	if not os.path.exists(path_prefix):
51 | 		os.makedirs(path_prefix)
52 | 	soup = bs(get_page('MultiLanguage/lesson/student/download.jsp?course_id=' + str(course_id)), 'html.parser')
53 | 	for comment in soup(text=lambda text: isinstance(text, bs4.Comment)):
54 | 		link = bs(comment, 'html.parser').a
55 | 		name = link.text
56 | 		uri = comment.next.next.a.get('href')
57 | 		filename = link.get('onclick').split('getfilelink=')[-1].split('&id')[0]
58 | 		file_path = os.path.join(path_prefix, filename)
59 | 		if not os.path.exists(file_path):
60 | 			print('Download ', name)
61 | 			open(file_path, 'wb').write(open_page(uri).read())
62 | 
63 | def sync_hw(path_prefix, course_id):
64 | 	if not os.path.exists(path_prefix):
65 | 		os.makedirs(path_prefix)
66 | 	root = bs(get_page('MultiLanguage/lesson/student/hom_wk_brw.jsp?course_id=' + str(course_id)), 'html.parser')
67 | 	for ele in root.findAll('a'):
68 | 		hw_path = os.path.join(path_prefix, ele.text)
69 | 		if not os.path.exists(hw_path):
70 | 			os.makedirs(hw_path)
71 | 		soup = bs(get_page('MultiLanguage/lesson/student/' + ele.get('href')), 'html.parser')
72 | 		for link in soup.findAll('a'):
73 | 			name = 'upload-'+link.text if link.parent.previous.previous.strip() == '上交作业附件' else link.text
74 | 			uri = link.get('href')
75 | 			file_path = os.path.join(hw_path, name)
76 | 			if not os.path.exists(file_path):
77 | 				print('Download ', name)
78 | 				open(file_path, 'wb').write(open_page(uri).read())
79 | 
80 | if __name__ == '__main__':
81 | 	ignore = open('.ignore').read().split() if os.path.exists('.ignore') else []
82 | 	username = input('username: ')
83 | 	password = getpass.getpass('password: ')
84 | 	if login(username, password):
85 | 		typepage = 1 if '.py' in sys.argv[-1] else int(sys.argv[-1])
86 | 		courses = get_courses(typepage)
87 | 		for course_id, name in courses:
88 | 			if name in ignore:
89 | 				print('Skip ' + name)
90 | 			else:
91 | 				print('Sync '+ name)
92 | 				sync_file(name, course_id)
93 | 			sync_hw(name, course_id)
94 | 


--------------------------------------------------------------------------------
/learn-slow.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | __author__ = "Trinkle23897"
  5 | __copyright__ = "Copyright (C) 2019 Trinkle23897"
  6 | __license__ = "MIT"
  7 | __email__ = "463003665@qq.com"
  8 | 
  9 | import os, sys, getpass, requests
 10 | from time import sleep
 11 | from selenium import webdriver
 12 | from bs4 import BeautifulSoup as bs
 13 | from selenium.webdriver.chrome.options import Options
 14 | 
 15 | root_uri = 'http://learn2018.tsinghua.edu.cn'
 16 | time_sleep = 0.05
 17 | time_out = 5
 18 | 
 19 | def wait_for_load(cond, driver): # wait for loading course info
 20 |     cnt = time_out / time_sleep # max try
 21 |     while cond(driver) and cnt > 0:
 22 |         sleep(time_sleep)
 23 |         cnt -= 1
 24 | 
 25 | def load_course_cond(driver): # avoid null
 26 |     return len(bs(driver.page_source, 'html.parser').findAll(class_='title stu')) == 0
 27 | 
 28 | def load_notice_cond(driver): # avoid '条数据'.count == 1
 29 |     return bs(driver.page_source, 'html.parser').text.count(u'条数据') < 2
 30 | 
 31 | def load_notice_ele_cond(driver): # avoid single '\n'
 32 |     return len(bs(driver.page_source, 'html.parser').find(id='ggnr').text) < 2
 33 | 
 34 | def load_course_file_cond(driver): # avoid no element in tabbox
 35 |     return bs(driver.page_source, 'html.parser').find(id='tabbox').text.count(u'电子教案') == 0
 36 | 
 37 | def load_course_file_ele_cond(driver): # avoid no element in tabbox
 38 |     return len(bs(driver.page_source, 'html.parser').find(class_='playli').findAll('li')) == 0 and u'此类别没有课程文件' not in bs(driver.page_source, 'html.parser').text
 39 | 
 40 | def load_hw_cond(driver):
 41 |     hw_html = bs(driver.page_source, 'html.parser')
 42 |     return len(hw_html.find(id='wtj').findAll('tr')) <= 2 and u'表中数据为空' not in hw_html.text
 43 | 
 44 | def download(pwd, url, cookie, name):
 45 |     r = requests.get(url, cookies=cookie, stream=True)
 46 |     filename = r.headers['Content-Disposition'].split('filename="')[-1].split('"')[0]
 47 |     if filename in os.listdir(pwd):
 48 |         return
 49 |     print('Download %s' % name)
 50 |     open(os.path.join(pwd, filename), 'wb').write(r.content)
 51 | 
 52 | if __name__ == "__main__":
 53 |     ignore = open('.ignore').read().split() if os.path.exists('.ignore') else []
 54 |     chrome_options = Options()
 55 |     chrome_options.add_argument("--headless") # comment for looking its behavior
 56 |     driver = webdriver.Chrome(chrome_options=chrome_options)
 57 |     print('Login ...')
 58 |     driver.get("http://learn.tsinghua.edu.cn/f/login")
 59 |     driver.find_element_by_name("i_user").send_keys(str(raw_input('Username: ')))
 60 |     driver.find_element_by_name("i_pass").send_keys(str(getpass.getpass('Password: ')))
 61 |     driver.find_element_by_id("loginButtonId").click()
 62 |     wait_for_load(load_course_cond, driver)
 63 |     print('\rLogin successfully!')
 64 |     # remember cookie for downloading files
 65 |     cookie = {}
 66 |     for c in driver.get_cookies():
 67 |         cookie[c[u'name'].encode('utf-8')] = c[u'value'].encode('utf-8')
 68 |     print(cookie)
 69 |     exit()
 70 |     root = bs(driver.page_source, 'html.parser')
 71 |     for course in root.findAll(class_='title stu')[:2]:
 72 |         if course.text in ignore:
 73 |             print('Skip ' + course.text)
 74 |             continue
 75 |         print('Sync ' + course.text)
 76 |         if not os.path.exists(course.text):
 77 |             os.mkdir(course.text)
 78 |         os.chdir(course.text)
 79 |         driver.get(root_uri + course.attrs['href'])
 80 | 
 81 |         # 公告
 82 |         if not os.path.exists('公告'):
 83 |             os.mkdir('公告')
 84 |         os.chdir('公告')
 85 |         driver.find_element_by_id("wlxt_kcgg_wlkc_ggb").click()
 86 |         wait_for_load(load_notice_cond, driver)
 87 |         all_notice = bs(driver.page_source, 'html.parser').find(id='table').findAll('a')
 88 |         for notice in all_notice:
 89 |             if os.path.exists(notice.attrs['title'].replace(u'/', u'、') + u'.txt'):
 90 |                 continue
 91 |             driver.get(root_uri + notice.attrs['href'])
 92 |             wait_for_load(load_notice_ele_cond, driver)
 93 |             text = bs(driver.page_source, 'html.parser').find(id='ggnr').text
 94 |             open(notice.attrs['title'].replace(u'/', u'、') + u'.txt', 'w').write(text.encode('utf-8'))
 95 |         os.chdir('..') # leave 公告
 96 |         
 97 |         # 文件
 98 |         if not os.path.exists('文件'):
 99 |             os.mkdir('文件')
100 |         os.chdir('文件')
101 |         driver.find_element_by_id("wlxt_kj_wlkc_kjxxb").click()
102 |         wait_for_load(load_course_file_cond, driver)
103 |         all_tab = bs(driver.page_source, 'html.parser').find(id='tabbox').findAll('li')
104 |         # print(all_tab)
105 |         for tab in all_tab:
106 |             driver.find_element_by_xpath('//li[@kjflid="%s"]' % tab.attrs['kjflid']).click()
107 |             wait_for_load(load_course_file_cond, driver)
108 |             wait_for_load(load_course_file_ele_cond, driver)
109 |             all_file = bs(driver.page_source, 'html.parser').find(class_='playli').findAll('li')
110 |             for file in all_file:
111 |                 download(os.getcwd(), root_uri + '/b/wlxt/kj/wlkc_kjxxb/student/downloadFile?sfgk=0&wjid=%s' % file.attrs['wjid'], cookie, file.attrs['kjbt'])
112 |         os.chdir('..') # leave 文件
113 |         
114 |         # 作业 
115 |         if not os.path.exists('作业'):
116 |             os.mkdir('作业')
117 |         os.chdir('作业')
118 |         driver.find_element_by_id("wlxt_kczy_zy").click()
119 |         wait_for_load(load_hw_cond, driver)
120 |         hw_html = bs(driver.page_source, 'html.parser')
121 |         for hw_list in [hw_html.find(id='wtj'), hw_html.find(id='yjwg'), hw_html.find(id='ypg')]:
122 |             # print(hw_list)
123 |             if u'表中数据为空' in hw_list.text:
124 |                 continue
125 |             for hw in hw_list.findAll('tr')[1:]:
126 |                 driver.get(root_uri + hw.td.next_sibling.a.attrs['href'])
127 |                 html = bs(driver.page_source, 'html.parser')
128 |                 title = html.find(class_='detail').find(class_='right').text.strip()
129 |                 if not os.path.exists(title):
130 |                     os.mkdir(title)
131 |                 os.chdir(title)
132 |                 disc = html.find(class_='detail').find(class_='c55').text.strip()
133 |                 open('作业说明.txt', 'w').write(disc.encode('utf-8'))
134 |                 file_list = html.findAll(class_='ftitle')
135 |                 for f in file_list:
136 |                     download(os.getcwd(), root_uri + f.a.attrs['href'].split('downloadUrl=')[-1], cookie, f.text.replace('\n', ''))
137 |                 os.chdir('..') # leave sub_hw
138 | 
139 |         os.chdir('..') # leave 作业
140 |         os.chdir('..') # leave course
141 | 


--------------------------------------------------------------------------------
/learn-stdio.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import time, argparse
 4 | from learn_async import main
 5 | import os
 6 | 
 7 | 
 8 | def get(help, choices=None, default=None):
 9 |     while True:
10 |         i = input(help)
11 |         if i:
12 |             if choices and i not in choices:
13 |                 pass
14 |             else:
15 |                 if default == []:
16 |                     i = i.split()
17 |                 return i
18 |         else:
19 |             return default
20 | 
21 | 
22 | def get_args():
23 |     parser = argparse.ArgumentParser()
24 |     args = parser.parse_args()
25 |     print("按回车选择默认选项 ...")
26 |     args.all = get(
27 |         "同步所有学期的所有课程 [y/N]：", choices=["Y", "N", "y", "n"], default=None
28 |     )
29 |     if args.all in ["n", "N"]:
30 |         args.all = None
31 |     args.clear = get("清空相同文件 [y/N]：", choices=["Y", "N", "y", "n"], default=None)
32 |     if args.clear in ["n", "N"]:
33 |         args.clear = None
34 |     args.semester = get("学期：", default=[])
35 |     args.course = get("指定课程：", default=[])
36 |     args.ignore = get("忽略课程：", default=[])
37 |     args.dist = get("下载路径(默认当前目录)：", default="")
38 |     if len(args.dist) != 0:
39 |         if not os.path.exists(args.dist):
40 |             multi = get(
41 |                 f"路径{args.dist}不存在，是否创建? [Y/n]",
42 |                 choices=["Y", "N", "y", "n"],
43 |                 default="Y",
44 |             )
45 |             if multi in ["y", "Y"]:
46 |                 os.makedirs(args.dist)
47 |             else:
48 |                 exit()
49 |     multi = get("是否启用多进程?[y/N]", choices=["Y", "N", "y", "n"], default="N")
50 |     if multi in ["y", "Y"]:
51 |         args.multi = True
52 |         args.processes = get("进程数(默认使用所有CPU核心数):", default=None)
53 |     else:
54 |         args.multi = False
55 |     args._pass = ".pass"
56 |     args.cookie = ""
57 |     args.http_proxy = ""
58 |     args.https_proxy = ""
59 |     args.username = ""
60 |     args.password = ""
61 |     return args
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     t = time.time()
66 |     main(get_args())
67 |     t = time.time() - t
68 |     print("耗时: %02d:%02d:%02.0f" % (t // 3600, (t % 3600) // 60, t % 60))
69 |     input("请按任意键退出")
70 | 


--------------------------------------------------------------------------------
/learn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | __author__ = "Trinkle23897"
  5 | __copyright__ = "Copyright (C) 2019 Trinkle23897"
  6 | __license__ = "MIT"
  7 | __email__ = "463003665@qq.com"
  8 | 
  9 | import os, csv, json, html, urllib, getpass, base64, hashlib, argparse, platform, subprocess
 10 | from tqdm import tqdm
 11 | import urllib.request, http.cookiejar
 12 | from bs4 import BeautifulSoup as bs
 13 | 
 14 | import ssl
 15 | 
 16 | ssl._create_default_https_context = ssl._create_unverified_context
 17 | global dist_path, url, user_agent, headers, cookie, opener, err404
 18 | dist_path = url = user_agent = headers = cookie = opener = err404 = None
 19 | 
 20 | 
 21 | def build_global(args):
 22 |     global dist_path, url, user_agent, headers, cookie, opener, err404
 23 |     dist_path = args.dist
 24 |     url = 'https://learn.tsinghua.edu.cn'
 25 |     user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
 26 |     headers = {'User-Agent': user_agent, 'Connection': 'keep-alive'}
 27 |     handlers = []
 28 |     if args.http_proxy:
 29 |         handlers.append(urllib.request.ProxyHandler({'http': args.http_proxy}))
 30 |     if args.https_proxy:
 31 |         handlers.append(urllib.request.ProxyHandler({'https': args.https_proxy}))
 32 |     cookie = http.cookiejar.MozillaCookieJar()
 33 |     handlers.append(urllib.request.HTTPCookieProcessor(cookie))
 34 |     opener = urllib.request.build_opener(*handlers)
 35 |     urllib.request.install_opener(opener)
 36 |     err404 = '\r\n\r\n\r\n<script type="text/javascript">\r\n\tlocation.href="/";\r\n</script>'
 37 | 
 38 | def get_xsrf_token():
 39 |     cookie_obj = cookie._cookies.get('learn.tsinghua.edu.cn', dict()).get('/', dict()).get('XSRF-TOKEN', None)
 40 |     return cookie_obj.value if cookie_obj else None
 41 | 
 42 | def open_page(uri, values={}):
 43 |     post_data = urllib.parse.urlencode(values).encode() if values else None
 44 |     request = urllib.request.Request(uri if uri.startswith('http') else url + uri, post_data, headers)
 45 |     try:
 46 |         response = opener.open(request)
 47 |         return response
 48 |     except urllib.error.URLError as e:
 49 |         print(uri, e.code, ':', e.reason)
 50 | 
 51 | 
 52 | def get_page(uri, values={}):
 53 |     data = open_page(uri, values)
 54 |     if data:
 55 |         return data.read().decode()
 56 | 
 57 | 
 58 | def get_json(uri, values={}):
 59 |     xsrf_token = get_xsrf_token()
 60 |     if xsrf_token:
 61 |         if '?' not in uri:
 62 |             uri = uri + f'?_csrf={xsrf_token}'
 63 |         else:
 64 |             uri = uri + f'&_csrf={xsrf_token}'
 65 |     try:
 66 |         page = get_page(uri, values)
 67 |         result = json.loads(page)
 68 |         return result
 69 |     except:
 70 |         return {}
 71 | 
 72 | 
 73 | def escape(s):
 74 |     return html.unescape(s).replace(os.path.sep, '、').replace(':', '_').replace(' ', '_').replace('\t', '').replace('?', '.').replace('/', '_').replace('\'', '_').replace('<', '').replace('>', '').replace('#', '').replace(';', '').replace('*', '_').replace("\"", '_').replace("\'", '_').replace('|', '')
 75 | 
 76 | 
 77 | def login(username, password):
 78 |     login_uri = 'https://id.tsinghua.edu.cn/do/off/ui/auth/login/post/bb5df85216504820be7bba2b0ae1535b/0?/login.do'
 79 |     values = {'i_user': username, 'i_pass': password, 'atOnce': 'true'}
 80 |     info = get_page(login_uri, values)
 81 |     successful = 'SUCCESS' in info
 82 |     print('User %s login successfully' % (username) if successful else 'User %s login failed!' % (username))
 83 |     if successful:
 84 |         get_page(get_page(info.split('replace("')[-1].split('");\n')[0]).split('location="')[1].split('";\r\n')[0])
 85 |     return successful
 86 | 
 87 | 
 88 | def get_courses(args):
 89 |     try:
 90 |         now = get_json('/b/kc/zhjw_v_code_xnxq/getCurrentAndNextSemester')['result']['xnxq']
 91 |         if args.all or args.course or args.semester:
 92 |             query_list = [x for x in get_json('/b/wlxt/kc/v_wlkc_xs_xktjb_coassb/queryxnxq') if x is not None]
 93 |             query_list.sort()
 94 |             if args.semester:
 95 |                 query_list_ = [q for q in query_list if q in args.semester]
 96 |                 if len(query_list_) == 0:
 97 |                     print('Invalid semester, choices: ', query_list)
 98 |                     return []
 99 |                 query_list = query_list_
100 |         else:
101 |             query_list = [now]
102 |     except:
103 |         print('您被退学了！')
104 |         return []
105 |     courses = []
106 |     for q in query_list:
107 |         try:
108 |             c_stu = get_json('/b/wlxt/kc/v_wlkc_xs_xkb_kcb_extend/student/loadCourseBySemesterId/' + q + '/zh/')['resultList']
109 |         except:
110 |             c_stu = []
111 |         try:
112 |             c_ta = get_json('/b/kc/v_wlkc_kcb/queryAsorCoCourseList/%s/0' % q)['resultList']
113 |         except:
114 |             c_ta = []
115 |         current_courses = []
116 |         for c in c_stu:
117 |             c['jslx'] = '3'
118 |             current_courses.append(c)
119 |         for c in c_ta:
120 |             c['jslx'] = '0'
121 |             current_courses.append(c)
122 |         courses += current_courses
123 |     escape_c = []
124 | 
125 |     def escape_course_fn(c):
126 |         return escape(c).replace(' ', '').replace('_', '').replace('（', '(').replace('）', ')')
127 | 
128 |     for c in courses:
129 |         c['kcm'] = escape_course_fn(c['kcm'])
130 |         escape_c.append(c)
131 |     courses = escape_c
132 |     if args.course:
133 |         args.course = [escape_course_fn(c) for c in args.course]
134 |         courses = [c for c in courses if c['kcm'] in args.course]
135 |     if args.ignore:
136 |         args.ignore = [escape_course_fn(c) for c in args.ignore]
137 |         courses = [c for c in courses if c['kcm'] not in args.ignore]
138 |     return courses
139 | 
140 | 
141 | class TqdmUpTo(tqdm):
142 |     def update_to(self, b=1, bsize=1, tsize=None):
143 |         if tsize is not None:
144 |             self.total = tsize
145 |         self.update(b * bsize - self.n)
146 | 
147 | 
148 | def download(uri, name):
149 |     filename = escape(name)
150 |     if os.path.exists(filename) and os.path.getsize(filename) or 'Connection__close' in filename:
151 |         return
152 |     try:
153 |         with TqdmUpTo(ascii=True, dynamic_ncols=True, unit='B', unit_scale=True, miniters=1, desc=filename) as t:
154 |             urllib.request.urlretrieve(url + uri, filename=filename, reporthook=t.update_to, data=None)
155 |     except:
156 |         print('Could not download file %s ... removing broken file' % filename)
157 |         if os.path.exists(filename):
158 |             os.remove(filename)
159 |         return
160 | 
161 | 
162 | def build_notify(s):
163 |     tp = bs(base64.b64decode(s['ggnr']).decode('utf-8'), 'html.parser').text if s['ggnr'] else ''
164 |     st = '题目: %s\n发布人: %s\n发布时间: %s\n\n内容: %s\n' % (s['bt'], s['fbr'], s['fbsjStr'], tp)
165 |     return st
166 | 
167 | 
168 | def sync_notify(c):
169 |     global dist_path
170 |     pre = os.path.join(dist_path, c['kcm'], '公告')
171 |     if not os.path.exists(pre):
172 |         os.makedirs(pre)
173 |     try:
174 |         data = {'aoData': [{"name": "wlkcid", "value": c['wlkcid']}]}
175 |         if c['_type'] == 'student':
176 |             notify = get_json('/b/wlxt/kcgg/wlkc_ggb/student/pageListXs', data)['object']['aaData']
177 |         else:
178 |             notify = get_json('/b/wlxt/kcgg/wlkc_ggb/teacher/pageList', data)['object']['aaData']
179 |     except:
180 |         return
181 |     for n in notify:
182 |         if not os.path.exists(os.path.join(pre, escape(n['bt']))):
183 |             os.makedirs(os.path.join(pre, escape(n['bt'])))
184 |         path = os.path.join(os.path.join(pre, escape(n['bt'])), escape(n['bt']) + '.txt')
185 |         open(path, 'w', encoding='utf-8').write(build_notify(n))
186 | 
187 |         if n.get('fjmc') is not None:
188 |             html = get_page('/f/wlxt/kcgg/wlkc_ggb/%s/beforeViewXs?wlkcid=%s&id=%s' % (c['_type'], n['wlkcid'], n['ggid']))
189 |             soup = bs(html, 'html.parser')
190 | 
191 |             link = soup.find('a', class_='ml-10')
192 | 
193 |             now = os.getcwd()
194 |             os.chdir(os.path.join(pre, escape(n['bt'])))
195 |             name = n['fjmc']
196 |             download(link['href'], name=name)
197 |             os.chdir(now)
198 | 
199 | 
200 | def sync_file(c):
201 |     global dist_path
202 |     now = os.getcwd()
203 |     pre = os.path.join(dist_path, c['kcm'], '课件')
204 |     if not os.path.exists(pre):
205 |         os.makedirs(pre)
206 | 
207 |     if c['_type'] == 'student':
208 |         files = get_json('/b/wlxt/kj/wlkc_kjxxb/student/kjxxbByWlkcidAndSizeForStudent?wlkcid=%s&size=0' % c['wlkcid'])['object']
209 |     else:
210 |         try:
211 |             files = get_json('/b/wlxt/kj/v_kjxxb_wjwjb/teacher/queryByWlkcid?wlkcid=%s&size=0' % c['wlkcid'])['object']['resultsList']
212 |         except:  # None
213 |             return
214 | 
215 |     rows = json.loads(get_page(f'/b/wlxt/kj/wlkc_kjflb/{c["_type"]}/pageList?_csrf={get_xsrf_token()}&wlkcid={c["wlkcid"]}'))['object']['rows']
216 | 
217 |     os.chdir(pre)
218 |     for r in rows:
219 |         if c['_type'] == 'student':
220 |             row_files = get_json(f'/b/wlxt/kj/wlkc_kjxxb/{c["_type"]}/kjxxb/{c["wlkcid"]}/{r["kjflid"]}')['object']
221 |         else:
222 |             data = {'aoData': [
223 |                 {"name": "wlkcid", "value": c['wlkcid']},
224 |                 {"name": "kjflid","value": r["kjflid"]},
225 |                 {"name": "iDisplayStart","value": 0},
226 |                 {"name": "iDisplayLength","value": "-1"},
227 |             ]}
228 |             row_files = get_json('/b/wlxt/kj/v_kjxxb_wjwjb/teacher/pageList', data)['object']['aaData']
229 |         if not os.path.exists(escape(r['bt'])):
230 |             os.makedirs(escape(r['bt']))
231 |         rnow = os.getcwd()
232 |         os.chdir(escape(r['bt']))
233 |         for rf in row_files:
234 |             wjlx = None
235 |             if c['_type'] == 'student':
236 |                 flag = False
237 |                 for f in files:
238 |                     if rf[7] == f['wjid']:
239 |                         flag = True
240 |                         wjlx = f['wjlx']
241 |                         break
242 |                 wjid = rf[7]
243 |                 name = rf[1]
244 |             else:
245 |                 flag = True
246 |                 wjlx = rf['wjlx']
247 |                 wjid = rf['wjid']
248 |                 name = rf['bt']
249 |             if flag:
250 |                 if wjlx:
251 |                     name += '.' + wjlx
252 |                 download(f'/b/wlxt/kj/wlkc_kjxxb/{c["_type"]}/downloadFile?sfgk=0&wjid={wjid}', name=name)
253 |             else:
254 |                 print(f'文件{rf[1]}出错')
255 |         os.chdir(rnow)
256 | 
257 |     os.chdir(now)
258 | 
259 | 
260 | def sync_info(c):
261 |     global dist_path
262 |     pre = os.path.join(dist_path, c['kcm'], '课程信息.txt')
263 |     try:
264 |         if c['_type'] == 'student':
265 |             html = get_page('/f/wlxt/kc/v_kcxx_jskcxx/student/beforeXskcxx?wlkcid=%s&sfgk=-1' % c['wlkcid'])
266 |         else:
267 |             html = get_page('/f/wlxt/kc/v_kcxx_jskcxx/teacher/beforeJskcxx?wlkcid=%s&sfgk=-1' % c['wlkcid'])
268 |         open(pre, 'w').write('\n'.join(bs(html, 'html.parser').find(class_='course-w').text.split()))
269 |     except:
270 |         return
271 | 
272 | 
273 | def append_hw_csv(fname, stu):
274 |     try:
275 |         f = [i for i in csv.reader(open(fname)) if i]
276 |     except:
277 |         f = [['学号', '姓名', '院系', '班级', '上交时间', '状态', '成绩', '批阅老师']]
278 |     info_str = [stu['xh'], stu['xm'], stu['dwmc'], stu['bm'], stu['scsjStr'], stu['zt'], stu['cj'], stu['jsm']]
279 |     xhs = [i[0] for i in f]
280 |     if stu['xh'] in xhs:
281 |         i = xhs.index(stu['xh'])
282 |         f[i] = info_str
283 |     else:
284 |         f.append(info_str)
285 |     csv.writer(open(fname, 'w')).writerows(f)
286 | 
287 | 
288 | def sync_hw(c):
289 |     global dist_path
290 |     now = os.getcwd()
291 |     pre = os.path.join(dist_path, c['kcm'], '作业')
292 |     if not os.path.exists(pre):
293 |         os.makedirs(pre)
294 |     data = {'aoData': [{"name": "wlkcid", "value": c['wlkcid']}]}
295 |     if c['_type'] == 'student':
296 |         hws = []
297 |         for hwtype in ['zyListWj', 'zyListYjwg', 'zyListYpg']:
298 |             try:
299 |                 hws += get_json('/b/wlxt/kczy/zy/student/%s' % hwtype, data)['object']['aaData']
300 |             except:
301 |                 continue
302 |     else:
303 |         hws = get_json('/b/wlxt/kczy/zy/teacher/pageList', data)['object']['aaData']
304 |     for hw in hws:
305 |         path = os.path.join(pre, escape(hw['bt']))
306 |         if not os.path.exists(path):
307 |             os.makedirs(path)
308 |         if c['_type'] == 'student':
309 |             append_hw_csv(os.path.join(path, 'info_%s.csv' % c['wlkcid']), hw)
310 |             page = bs(get_page('/f/wlxt/kczy/zy/student/viewCj?wlkcid=%s&zyid=%s&xszyid=%s' % (hw['wlkcid'], hw['zyid'], hw['xszyid'])), 'html.parser')
311 |             files = page.findAll(class_='fujian')
312 |             for i, f in enumerate(files):
313 |                 if len(f.findAll('a')) == 0:
314 |                     continue
315 |                 os.chdir(path)  # to avoid filename too long
316 |                 name = f.findAll('a')[0].text
317 |                 if i >= 2 and not name.startswith(hw['xh']):
318 |                     name = hw['xh'] + '_' + name
319 |                 download('/b/wlxt/kczy/zy/%s/downloadFile/%s/%s' % (c['_type'], hw['wlkcid'], f.findAll('a')[-1].attrs['onclick'].split("ZyFile('")[-1][:-2]), name=name)
320 |                 os.chdir(now)
321 |         else:
322 |             print(hw['bt'])
323 |             data = {'aoData': [{"name": "wlkcid", "value": c['wlkcid']}, {"name": "zyid", "value": hw['zyid']}]}
324 |             stus = get_json('/b/wlxt/kczy/xszy/teacher/getDoneInfo', data)['object']['aaData']
325 |             for stu in stus:
326 |                 append_hw_csv(os.path.join(path, 'info_%s.csv' % c['wlkcid']), stu)
327 |                 page = bs(get_page('/f/wlxt/kczy/xszy/teacher/beforePiYue?wlkcid=%s&xszyid=%s' % (stu['wlkcid'], stu['xszyid'])), 'html.parser')
328 |                 files = page.findAll(class_='wdhere')
329 |                 os.chdir(path)  # to avoid filename too long
330 |                 for f in files:
331 |                     if f.text == '\n':
332 |                         continue
333 |                     try:
334 |                         id = f.findAll('span')[0].attrs['onclick'].split("'")[1]
335 |                         name = f.findAll('span')[0].text
336 |                     except:
337 |                         try:
338 |                             id = f.findAll('a')[-1].attrs['onclick'].split("'")[1]
339 |                             name = f.findAll('a')[0].text
340 |                         except:  # another error
341 |                             continue
342 |                     if not name.startswith(stu['xh']):
343 |                         name = stu['xh'] + '_' + name
344 |                     download('/b/wlxt/kczy/xszy/teacher/downloadFile/%s/%s' % (stu['wlkcid'], id), name=name)
345 |                 os.chdir(now)
346 |             stus = get_json('/b/wlxt/kczy/xszy/teacher/getUndoInfo', data)['object']['aaData']
347 |             for stu in stus:
348 |                 append_hw_csv(os.path.join(path, 'info_%s.csv' % c['wlkcid']), stu)
349 | 
350 | 
351 | def build_discuss(s):
352 |     return '课程：%s\n内容：%s\n学号：%s\n姓名：%s\n发布时间:%s\n最后回复：%s\n回复时间：%s\n' % (s['kcm'], s['bt'], s['fbr'], s['fbrxm'], s['fbsj'], s['zhhfrxm'], s['zhhfsj'])
353 | 
354 | 
355 | def sync_discuss(c):
356 |     global dist_path
357 |     pre = os.path.join(dist_path, c['kcm'], '讨论')
358 |     if not os.path.exists(pre):
359 |         os.makedirs(pre)
360 |     try:
361 |         disc = get_json('/b/wlxt/bbs/bbs_tltb/%s/kctlList?wlkcid=%s' % (c['_type'], c['wlkcid']))['object']['resultsList']
362 |     except:
363 |         return
364 |     for d in disc:
365 |         filename = os.path.join(pre, escape(d['bt']) + '.txt')
366 |         if os.path.exists(filename):
367 |             continue
368 |         try:
369 |             html = get_page('/f/wlxt/bbs/bbs_tltb/%s/viewTlById?wlkcid=%s&id=%s&tabbh=2&bqid=%s' % (c['_type'], d['wlkcid'], d['id'], d['bqid']))
370 |             open(filename, 'w').write(build_discuss(d) + bs(html, 'html.parser').find(class_='detail').text)
371 |         except:
372 |             pass
373 | 
374 | 
375 | def gethash(fname):
376 |     if platform.system() == 'Linux':
377 |         return subprocess.check_output(['md5sum', fname]).decode().split()[0]
378 |     hash_md5 = hashlib.md5()
379 |     with open(fname, "rb") as f:
380 |         for chunk in iter(lambda: f.read(4096), b""):
381 |             hash_md5.update(chunk)
382 |     return hash_md5.hexdigest()
383 | 
384 | 
385 | def dfs_clean(d):
386 |     subdirs = [os.path.join(d, i) for i in os.listdir(d) if os.path.isdir(os.path.join(d, i))]
387 |     for i in subdirs:
388 |         dfs_clean(i)
389 |     files = [os.path.join(d, i) for i in os.listdir(d) if os.path.isfile(os.path.join(d, i))]
390 |     info = {}
391 |     for f in files:
392 |         if os.path.getsize(f):
393 |             info[f] = {'size': os.path.getsize(f), 'time': os.path.getmtime(f), 'hash': '', 'rm': 0}
394 |     info = list({k: v for k, v in sorted(info.items(), key=lambda item: item[1]['size'])}.items())
395 |     for i in range(len(info)):
396 |         for j in range(i):
397 |             if info[i][1]['size'] == info[j][1]['size']:
398 |                 if info[i][1]['hash'] == '':
399 |                     info[i][1]['hash'] = gethash(info[i][0])
400 |                 if info[j][1]['hash'] == '':
401 |                     info[j][1]['hash'] = gethash(info[j][0])
402 |                 if info[i][1]['hash'] == info[j][1]['hash']:
403 |                     if info[i][1]['time'] < info[j][1]['time']:
404 |                         info[i][1]['rm'] = 1
405 |                     elif info[i][1]['time'] > info[j][1]['time']:
406 |                         info[j][1]['rm'] = 1
407 |                     elif len(info[i][0]) < len(info[j][0]):
408 |                         info[i][1]['rm'] = 1
409 |                     elif len(info[i][0]) > len(info[j][0]):
410 |                         info[j][1]['rm'] = 1
411 |     rm = [i[0] for i in info if i[1]['rm'] or i[1]['size'] == 0]
412 |     if rm:
413 |         print('rmlist:', rm)
414 |         for f in rm:
415 |             os.remove(f)
416 | 
417 | 
418 | def clear(args):
419 |     courses = [i for i in os.listdir('.') if os.path.isdir(i) and not i.startswith('.')]
420 |     if args.all:
421 |         pass
422 |     else:
423 |         if args.course:
424 |             courses = [i for i in courses if i in args.course]
425 |         if args.ignore:
426 |             courses = [i for i in courses if i not in args.ignore]
427 |     courses.sort()
428 |     for i, c in enumerate(courses):
429 |         print('Checking #%d %s' % (i + 1, c))
430 |         for subdir in ['课件', '作业']:
431 |             d = os.path.join(c, subdir)
432 |             if os.path.exists(d):
433 |                 dfs_clean(d)
434 | 
435 | 
436 | def get_args():
437 |     parser = argparse.ArgumentParser()
438 |     parser.add_argument("--all", action='store_true')
439 |     parser.add_argument("--clear", action='store_true', help='remove the duplicate course file')
440 |     parser.add_argument("--semester", nargs='+', type=str, default=[])
441 |     parser.add_argument("--ignore", nargs='+', type=str, default=[])
442 |     parser.add_argument("--course", nargs='+', type=str, default=[])
443 |     parser.add_argument('-p', "--_pass", type=str, default='.pass')
444 |     parser.add_argument('-c', "--cookie", type=str, default='', help='Netscape HTTP Cookie File')
445 |     parser.add_argument('-d', '--dist', type=str, default='', help='download path')
446 |     parser.add_argument('--http_proxy', type=str, default='', help='http proxy')
447 |     parser.add_argument('--https_proxy', type=str, default='', help='https proxy')
448 |     args = parser.parse_args()
449 |     return args
450 | 
451 | 
452 | def main(args):
453 |     global dist_path
454 |     build_global(args)
455 |     assert (dist_path is not None) and (url is not None) and (user_agent is not None) and (headers is not None) and (cookie is not None) and (opener is not None) and (err404 is not None)
456 |     if args.clear:
457 |         clear(args)
458 |         exit()
459 |     args.login = False
460 |     if args.cookie:
461 |         cookie.load(args.cookie, ignore_discard=True, ignore_expires=True)
462 |         args.login = (get_page('/b/wlxt/kc/v_wlkc_xs_xktjb_coassb/queryxnxq') != err404)
463 |         print('login successfully' if args.login else 'login failed!')
464 |     else:
465 |         if os.path.exists(args._pass):
466 |             username, password = open(args._pass).read().split()
467 |         else:
468 |             username = input('请输入INFO账号：')
469 |             password = getpass.getpass('请输入INFO密码：')
470 |         args.login = login(username, password)
471 |     if args.login:
472 |         courses = get_courses(args)
473 |         for c in courses:
474 |             c['_type'] = {'0': 'teacher', '3': 'student'}[c['jslx']]
475 |             print('Sync ' + c['xnxq'] + ' ' + c['kcm'])
476 |             if not os.path.exists(os.path.join(dist_path, c['kcm'])):
477 |                 os.makedirs(os.path.join(dist_path, c['kcm']))
478 |             sync_info(c)
479 |             sync_discuss(c)
480 |             sync_notify(c)
481 |             sync_file(c)
482 |             sync_hw(c)
483 | 
484 | 
485 | if __name__ == '__main__':
486 |     main(get_args())
487 |  


--------------------------------------------------------------------------------
/learn_async.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | __author__ = "Trinkle23897"
  5 | __copyright__ = "Copyright (C) 2019 Trinkle23897"
  6 | __license__ = "MIT"
  7 | __email__ = "463003665@qq.com"
  8 | __modified_by__ = "zycccishere"
  9 | 
 10 | import os, csv, json, html, urllib, getpass, base64, hashlib, argparse, platform, subprocess
 11 | from tqdm import tqdm
 12 | import urllib.request, http.cookiejar
 13 | from bs4 import BeautifulSoup as bs
 14 | import multiprocessing as mp
 15 | from functools import partial
 16 | 
 17 | import ssl
 18 | 
 19 | ssl._create_default_https_context = ssl._create_unverified_context
 20 | global dist_path, url, user_agent, headers, cookie, opener, err404
 21 | dist_path = url = user_agent = headers = cookie = opener = err404 = None
 22 | 
 23 | 
 24 | def build_global(args):
 25 |     global dist_path, url, user_agent, headers, cookie, opener, err404
 26 |     dist_path = args.dist
 27 |     url = "https://learn.tsinghua.edu.cn"
 28 |     user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
 29 |     headers = {"User-Agent": user_agent, "Connection": "keep-alive"}
 30 |     handlers = []
 31 | 
 32 |     # 添加SSL上下文配置
 33 |     context = ssl.create_default_context()
 34 |     context.options |= 0x4  # OP_LEGACY_SERVER_CONNECT
 35 |     context.check_hostname = False
 36 |     context.verify_mode = ssl.CERT_NONE
 37 |     handlers.append(urllib.request.HTTPSHandler(context=context))
 38 | 
 39 |     if args.http_proxy:
 40 |         handlers.append(urllib.request.ProxyHandler({"http": args.http_proxy}))
 41 |     if args.https_proxy:
 42 |         handlers.append(urllib.request.ProxyHandler({"https": args.https_proxy}))
 43 |     cookie = http.cookiejar.MozillaCookieJar()
 44 |     handlers.append(urllib.request.HTTPCookieProcessor(cookie))
 45 |     opener = urllib.request.build_opener(*handlers)
 46 |     urllib.request.install_opener(opener)
 47 |     err404 = '\r\n\r\n\r\n<script type="text/javascript">\r\n\tlocation.href="/";\r\n</script>'
 48 | 
 49 | 
 50 | def get_xsrf_token():
 51 |     cookie_obj = (
 52 |         cookie._cookies.get("learn.tsinghua.edu.cn", dict())
 53 |         .get("/", dict())
 54 |         .get("XSRF-TOKEN", None)
 55 |     )
 56 |     return cookie_obj.value if cookie_obj else None
 57 | 
 58 | 
 59 | def open_page(uri, values={}):
 60 |     post_data = urllib.parse.urlencode(values).encode() if values else None
 61 |     request = urllib.request.Request(
 62 |         uri if uri.startswith("http") else url + uri, post_data, headers
 63 |     )
 64 |     try:
 65 |         response = opener.open(request)
 66 |         return response
 67 |     except urllib.error.URLError as e:
 68 |         if hasattr(e, "code"):
 69 |             print(uri, e.code, ":", e.reason)
 70 |         else:
 71 |             print(uri, ":", e.reason)
 72 | 
 73 | 
 74 | def get_page(uri, values={}):
 75 |     data = open_page(uri, values)
 76 |     if data:
 77 |         return data.read().decode()
 78 | 
 79 | 
 80 | def get_json(uri, values={}):
 81 |     xsrf_token = get_xsrf_token()
 82 |     if xsrf_token:
 83 |         if "?" not in uri:
 84 |             uri = uri + f"?_csrf={xsrf_token}"
 85 |         else:
 86 |             uri = uri + f"&_csrf={xsrf_token}"
 87 |     try:
 88 |         page = get_page(uri, values)
 89 |         result = json.loads(page)
 90 |         return result
 91 |     except:
 92 |         return {}
 93 | 
 94 | 
 95 | def escape(s):
 96 |     return (
 97 |         html.unescape(s)
 98 |         .replace(os.path.sep, "、")
 99 |         .replace(":", "_")
100 |         .replace(" ", "_")
101 |         .replace("\t", "")
102 |         .replace("?", ".")
103 |         .replace("/", "_")
104 |         .replace("'", "_")
105 |         .replace("<", "")
106 |         .replace(">", "")
107 |         .replace("#", "")
108 |         .replace(";", "")
109 |         .replace("*", "_")
110 |         .replace('"', "_")
111 |         .replace("'", "_")
112 |         .replace("|", "")
113 |     )
114 | 
115 | 
116 | def login(username, password):
117 |     login_uri = "https://id.tsinghua.edu.cn/do/off/ui/auth/login/post/bb5df85216504820be7bba2b0ae1535b/0?/login.do"
118 |     values = {"i_user": username, "i_pass": password, "atOnce": "true"}
119 |     info = get_page(login_uri, values)
120 |     successful = "SUCCESS" in info
121 |     # print(
122 |     #     "User %s login successfully" % (username)
123 |     #     if successful
124 |     #     else "User %s login failed!" % (username)
125 |     # )
126 |     if not successful:
127 |         print("User %s login failed!" % (username))
128 |         return False
129 |     if successful:
130 |         get_page(
131 |             get_page(info.split('replace("')[-1].split('");\n')[0])
132 |             .split('location="')[1]
133 |             .split('";\r\n')[0]
134 |         )
135 |     return successful
136 | 
137 | 
138 | def get_courses(args):
139 |     try:
140 |         now = get_json("/b/kc/zhjw_v_code_xnxq/getCurrentAndNextSemester")["result"][
141 |             "xnxq"
142 |         ]
143 |         if args.all or args.course or args.semester:
144 |             query_list = [
145 |                 x
146 |                 for x in get_json("/b/wlxt/kc/v_wlkc_xs_xktjb_coassb/queryxnxq")
147 |                 if x is not None
148 |             ]
149 |             query_list.sort()
150 |             if args.semester:
151 |                 query_list_ = [q for q in query_list if q in args.semester]
152 |                 if len(query_list_) == 0:
153 |                     print("Invalid semester, choices: ", query_list)
154 |                     return []
155 |                 query_list = query_list_
156 |         else:
157 |             query_list = [now]
158 |     except:
159 |         print("您被退学了！")
160 |         return []
161 |     courses = []
162 |     for q in query_list:
163 |         try:
164 |             c_stu = get_json(
165 |                 "/b/wlxt/kc/v_wlkc_xs_xkb_kcb_extend/student/loadCourseBySemesterId/"
166 |                 + q
167 |                 + "/zh/"
168 |             )["resultList"]
169 |         except:
170 |             c_stu = []
171 |         try:
172 |             c_ta = get_json("/b/kc/v_wlkc_kcb/queryAsorCoCourseList/%s/0" % q)[
173 |                 "resultList"
174 |             ]
175 |         except:
176 |             c_ta = []
177 |         current_courses = []
178 |         for c in c_stu:
179 |             c["jslx"] = "3"
180 |             current_courses.append(c)
181 |         for c in c_ta:
182 |             c["jslx"] = "0"
183 |             current_courses.append(c)
184 |         courses += current_courses
185 |     escape_c = []
186 | 
187 |     def escape_course_fn(c):
188 |         return (
189 |             escape(c)
190 |             .replace(" ", "")
191 |             .replace("_", "")
192 |             .replace("（", "(")
193 |             .replace("）", ")")
194 |         )
195 | 
196 |     for c in courses:
197 |         c["kcm"] = escape_course_fn(c["kcm"])
198 |         escape_c.append(c)
199 |     courses = escape_c
200 |     if args.course:
201 |         args.course = [escape_course_fn(c) for c in args.course]
202 |         courses = [c for c in courses if c["kcm"] in args.course]
203 |     if args.ignore:
204 |         args.ignore = [escape_course_fn(c) for c in args.ignore]
205 |         courses = [c for c in courses if c["kcm"] not in args.ignore]
206 |     return courses
207 | 
208 | 
209 | class TqdmUpTo(tqdm):
210 |     def update_to(self, b=1, bsize=1, tsize=None):
211 |         if tsize is not None:
212 |             self.total = tsize
213 |         self.update(b * bsize - self.n)
214 | 
215 | 
216 | def download(uri, name, target_dir=None):
217 |     filename = escape(name)
218 | 
219 |     # 使用绝对路径
220 |     if target_dir:
221 |         filename = os.path.join(target_dir, filename)
222 | 
223 |     if (
224 |         os.path.exists(filename)
225 |         and os.path.getsize(filename)
226 |         or "Connection__close" in filename
227 |     ):
228 |         return
229 | 
230 |     try:
231 |         with TqdmUpTo(
232 |             ascii=True,
233 |             dynamic_ncols=True,
234 |             unit="B",
235 |             unit_scale=True,
236 |             miniters=1,
237 |             desc=filename,
238 |         ) as t:
239 |             urllib.request.urlretrieve(
240 |                 url + uri, filename=filename, reporthook=t.update_to, data=None
241 |             )
242 |     except Exception as e:
243 |         print(
244 |             f"Could not download file {filename} ... removing broken file. Error: {str(e)}"
245 |         )
246 |         if os.path.exists(filename):
247 |             os.remove(filename)
248 |         return
249 | 
250 | 
251 | def build_notify(s):
252 |     tp = (
253 |         bs(base64.b64decode(s["ggnr"]).decode("utf-8"), "html.parser").text
254 |         if s["ggnr"]
255 |         else ""
256 |     )
257 |     st = "题目: %s\n发布人: %s\n发布时间: %s\n\n内容: %s\n" % (
258 |         s["bt"],
259 |         s["fbr"],
260 |         s["fbsjStr"],
261 |         tp,
262 |     )
263 |     return st
264 | 
265 | 
266 | def makedirs_safe(directory):
267 |     try:
268 |         if not os.path.exists(directory):
269 |             os.makedirs(directory)
270 |     except FileExistsError:
271 |         pass
272 | 
273 | 
274 | def sync_notify(c):
275 |     global dist_path
276 |     pre = os.path.join(dist_path, c["kcm"], "公告")
277 |     makedirs_safe(pre)
278 |     try:
279 |         data = {"aoData": [{"name": "wlkcid", "value": c["wlkcid"]}]}
280 |         if c["_type"] == "student":
281 |             notify = get_json("/b/wlxt/kcgg/wlkc_ggb/student/pageListXs", data)[
282 |                 "object"
283 |             ]["aaData"]
284 |         else:
285 |             notify = get_json("/b/wlxt/kcgg/wlkc_ggb/teacher/pageList", data)["object"][
286 |                 "aaData"
287 |             ]
288 |     except:
289 |         return
290 |     for n in notify:
291 |         makedirs_safe(os.path.join(pre, escape(n["bt"])))
292 |         path = os.path.join(
293 |             os.path.join(pre, escape(n["bt"])), escape(n["bt"]) + ".txt"
294 |         )
295 |         open(path, "w", encoding="utf-8").write(build_notify(n))
296 | 
297 |         if n.get("fjmc") is not None:
298 |             html = get_page(
299 |                 "/f/wlxt/kcgg/wlkc_ggb/%s/beforeViewXs?wlkcid=%s&id=%s"
300 |                 % (c["_type"], n["wlkcid"], n["ggid"])
301 |             )
302 |             soup = bs(html, "html.parser")
303 | 
304 |             link = soup.find("a", class_="ml-10")
305 | 
306 |             now = os.getcwd()
307 |             os.chdir(os.path.join(pre, escape(n["bt"])))
308 |             name = n["fjmc"]
309 |             download(link["href"], name=name)
310 |             os.chdir(now)
311 | 
312 | 
313 | def sync_file(c):
314 |     global dist_path
315 |     now = os.getcwd()
316 |     pre = os.path.join(dist_path, c["kcm"], "课件")
317 |     makedirs_safe(pre)
318 | 
319 |     if c["_type"] == "student":
320 |         files = get_json(
321 |             "/b/wlxt/kj/wlkc_kjxxb/student/kjxxbByWlkcidAndSizeForStudent?wlkcid=%s&size=0"
322 |             % c["wlkcid"]
323 |         )["object"]
324 |     else:
325 |         try:
326 |             files = get_json(
327 |                 "/b/wlxt/kj/v_kjxxb_wjwjb/teacher/queryByWlkcid?wlkcid=%s&size=0"
328 |                 % c["wlkcid"]
329 |             )["object"]["resultsList"]
330 |         except:  # None
331 |             return
332 | 
333 |     rows = json.loads(
334 |         get_page(
335 |             f'/b/wlxt/kj/wlkc_kjflb/{c["_type"]}/pageList?_csrf={get_xsrf_token()}&wlkcid={c["wlkcid"]}'
336 |         )
337 |     )["object"]["rows"]
338 | 
339 |     os.chdir(pre)
340 |     for r in rows:
341 |         if c["_type"] == "student":
342 |             row_files = get_json(
343 |                 f'/b/wlxt/kj/wlkc_kjxxb/{c["_type"]}/kjxxb/{c["wlkcid"]}/{r["kjflid"]}'
344 |             )["object"]
345 |         else:
346 |             data = {
347 |                 "aoData": [
348 |                     {"name": "wlkcid", "value": c["wlkcid"]},
349 |                     {"name": "kjflid", "value": r["kjflid"]},
350 |                     {"name": "iDisplayStart", "value": 0},
351 |                     {"name": "iDisplayLength", "value": "-1"},
352 |                 ]
353 |             }
354 |             row_files = get_json("/b/wlxt/kj/v_kjxxb_wjwjb/teacher/pageList", data)[
355 |                 "object"
356 |             ]["aaData"]
357 |         makedirs_safe(escape(r["bt"]))
358 |         rnow = os.getcwd()
359 |         os.chdir(escape(r["bt"]))
360 |         for rf in row_files:
361 |             wjlx = None
362 |             if c["_type"] == "student":
363 |                 flag = False
364 |                 for f in files:
365 |                     if rf[7] == f["wjid"]:
366 |                         flag = True
367 |                         wjlx = f["wjlx"]
368 |                         break
369 |                 wjid = rf[7]
370 |                 name = rf[1]
371 |             else:
372 |                 flag = True
373 |                 wjlx = rf["wjlx"]
374 |                 wjid = rf["wjid"]
375 |                 name = rf["bt"]
376 |             if flag:
377 |                 if wjlx:
378 |                     name += "." + wjlx
379 |                 download(
380 |                     f'/b/wlxt/kj/wlkc_kjxxb/{c["_type"]}/downloadFile?sfgk=0&wjid={wjid}',
381 |                     name=name,
382 |                 )
383 |             else:
384 |                 print(f"文件{rf[1]}出错")
385 |         os.chdir(rnow)
386 | 
387 |     os.chdir(now)
388 | 
389 | 
390 | def sync_info(c):
391 |     global dist_path
392 |     pre = os.path.join(dist_path, c["kcm"], "课程信息.txt")
393 |     try:
394 |         if c["_type"] == "student":
395 |             html = get_page(
396 |                 "/f/wlxt/kc/v_kcxx_jskcxx/student/beforeXskcxx?wlkcid=%s&sfgk=-1"
397 |                 % c["wlkcid"]
398 |             )
399 |         else:
400 |             html = get_page(
401 |                 "/f/wlxt/kc/v_kcxx_jskcxx/teacher/beforeJskcxx?wlkcid=%s&sfgk=-1"
402 |                 % c["wlkcid"]
403 |             )
404 |         open(pre, "w").write(
405 |             "\n".join(bs(html, "html.parser").find(class_="course-w").text.split())
406 |         )
407 |     except:
408 |         return
409 | 
410 | 
411 | def append_hw_csv(fname, stu):
412 |     try:
413 |         f = [i for i in csv.reader(open(fname)) if i]
414 |     except:
415 |         f = [["学号", "姓名", "院系", "班级", "上交时间", "状态", "成绩", "批阅老师"]]
416 |     info_str = [
417 |         stu["xh"],
418 |         stu["xm"],
419 |         stu["dwmc"],
420 |         stu["bm"],
421 |         stu["scsjStr"],
422 |         stu["zt"],
423 |         stu["cj"],
424 |         stu["jsm"],
425 |     ]
426 |     xhs = [i[0] for i in f]
427 |     if stu["xh"] in xhs:
428 |         i = xhs.index(stu["xh"])
429 |         f[i] = info_str
430 |     else:
431 |         f.append(info_str)
432 |     csv.writer(open(fname, "w")).writerows(f)
433 | 
434 | 
435 | def sync_hw(c):
436 |     global dist_path
437 |     now = os.getcwd()
438 |     pre = os.path.join(dist_path, c["kcm"], "作业")
439 |     if not os.path.exists(pre):
440 |         os.makedirs(pre)
441 |     data = {"aoData": [{"name": "wlkcid", "value": c["wlkcid"]}]}
442 |     if c["_type"] == "student":
443 |         hws = []
444 |         for hwtype in ["zyListWj", "zyListYjwg", "zyListYpg"]:
445 |             try:
446 |                 hws += get_json("/b/wlxt/kczy/zy/student/%s" % hwtype, data)["object"][
447 |                     "aaData"
448 |                 ]
449 |             except:
450 |                 continue
451 |     else:
452 |         hws = get_json("/b/wlxt/kczy/zy/teacher/pageList", data)["object"]["aaData"]
453 |     for hw in hws:
454 |         path = os.path.join(pre, escape(hw["bt"]))
455 |         if not os.path.exists(path):
456 |             os.makedirs(path)
457 |         if c["_type"] == "student":
458 |             append_hw_csv(os.path.join(path, "info_%s.csv" % c["wlkcid"]), hw)
459 |             page = bs(
460 |                 get_page(
461 |                     "/f/wlxt/kczy/zy/student/viewCj?wlkcid=%s&zyid=%s&xszyid=%s"
462 |                     % (hw["wlkcid"], hw["zyid"], hw["xszyid"])
463 |                 ),
464 |                 "html.parser",
465 |             )
466 |             files = page.find_all(class_="fujian")
467 |             for i, f in enumerate(files):
468 |                 if len(f.find_all("a")) == 0:
469 |                     continue
470 |                 os.chdir(path)  # to avoid filename too long
471 |                 name = f.find_all("a")[0].text
472 |                 if i >= 2 and not name.startswith(hw["xh"]):
473 |                     name = hw["xh"] + "_" + name
474 |                 download(
475 |                     "/b/wlxt/kczy/zy/%s/downloadFile/%s/%s"
476 |                     % (
477 |                         c["_type"],
478 |                         hw["wlkcid"],
479 |                         f.find_all("a")[-1].attrs["onclick"].split("ZyFile('")[-1][:-2],
480 |                     ),
481 |                     name=name,
482 |                 )
483 |                 os.chdir(now)
484 |         else:
485 |             print(hw["bt"])
486 |             data = {
487 |                 "aoData": [
488 |                     {"name": "wlkcid", "value": c["wlkcid"]},
489 |                     {"name": "zyid", "value": hw["zyid"]},
490 |                 ]
491 |             }
492 |             stus = get_json("/b/wlxt/kczy/xszy/teacher/getDoneInfo", data)["object"][
493 |                 "aaData"
494 |             ]
495 |             for stu in stus:
496 |                 append_hw_csv(os.path.join(path, "info_%s.csv" % c["wlkcid"]), stu)
497 |                 page = bs(
498 |                     get_page(
499 |                         "/f/wlxt/kczy/xszy/teacher/beforePiYue?wlkcid=%s&xszyid=%s"
500 |                         % (stu["wlkcid"], stu["xszyid"])
501 |                     ),
502 |                     "html.parser",
503 |                 )
504 |                 files = page.find_all(class_="wdhere")
505 |                 os.chdir(path)  # to avoid filename too long
506 |                 for f in files:
507 |                     if f.text == "\n":
508 |                         continue
509 |                     try:
510 |                         id = f.find_all("span")[0].attrs["onclick"].split("'")[1]
511 |                         name = f.find_all("span")[0].text
512 |                     except:
513 |                         try:
514 |                             id = f.find_all("a")[-1].attrs["onclick"].split("'")[1]
515 |                             name = f.find_all("a")[0].text
516 |                         except:  # another error
517 |                             continue
518 |                     if not name.startswith(stu["xh"]):
519 |                         name = stu["xh"] + "_" + name
520 |                     download(
521 |                         "/b/wlxt/kczy/xszy/teacher/downloadFile/%s/%s"
522 |                         % (stu["wlkcid"], id),
523 |                         name=name,
524 |                     )
525 |                 os.chdir(now)
526 |             stus = get_json("/b/wlxt/kczy/xszy/teacher/getUndoInfo", data)["object"][
527 |                 "aaData"
528 |             ]
529 |             for stu in stus:
530 |                 append_hw_csv(os.path.join(path, "info_%s.csv" % c["wlkcid"]), stu)
531 | 
532 | 
533 | def build_discuss(s):
534 |     return "课程：%s\n内容：%s\n学号：%s\n姓名：%s\n发布时间:%s\n最后回复：%s\n回复时间：%s\n" % (
535 |         s["kcm"],
536 |         s["bt"],
537 |         s["fbr"],
538 |         s["fbrxm"],
539 |         s["fbsj"],
540 |         s["zhhfrxm"],
541 |         s["zhhfsj"],
542 |     )
543 | 
544 | 
545 | def sync_discuss(c):
546 |     global dist_path
547 |     pre = os.path.join(dist_path, c["kcm"], "讨论")
548 |     if not os.path.exists(pre):
549 |         os.makedirs(pre)
550 |     try:
551 |         disc = get_json(
552 |             "/b/wlxt/bbs/bbs_tltb/%s/kctlList?wlkcid=%s" % (c["_type"], c["wlkcid"])
553 |         )["object"]["resultsList"]
554 |     except:
555 |         return
556 |     for d in disc:
557 |         filename = os.path.join(pre, escape(d["bt"]) + ".txt")
558 |         if os.path.exists(filename):
559 |             continue
560 |         try:
561 |             html = get_page(
562 |                 "/f/wlxt/bbs/bbs_tltb/%s/viewTlById?wlkcid=%s&id=%s&tabbh=2&bqid=%s"
563 |                 % (c["_type"], d["wlkcid"], d["id"], d["bqid"])
564 |             )
565 |             open(filename, "w").write(
566 |                 build_discuss(d) + bs(html, "html.parser").find(class_="detail").text
567 |             )
568 |         except:
569 |             pass
570 | 
571 | 
572 | def gethash(fname):
573 |     if platform.system() == "Linux":
574 |         return subprocess.check_output(["md5sum", fname]).decode().split()[0]
575 |     hash_md5 = hashlib.md5()
576 |     with open(fname, "rb") as f:
577 |         for chunk in iter(lambda: f.read(4096), b""):
578 |             hash_md5.update(chunk)
579 |     return hash_md5.hexdigest()
580 | 
581 | 
582 | def dfs_clean(d):
583 |     subdirs = [
584 |         os.path.join(d, i) for i in os.listdir(d) if os.path.isdir(os.path.join(d, i))
585 |     ]
586 |     for i in subdirs:
587 |         dfs_clean(i)
588 |     files = [
589 |         os.path.join(d, i) for i in os.listdir(d) if os.path.isfile(os.path.join(d, i))
590 |     ]
591 |     info = {}
592 |     for f in files:
593 |         if os.path.getsize(f):
594 |             info[f] = {
595 |                 "size": os.path.getsize(f),
596 |                 "time": os.path.getmtime(f),
597 |                 "hash": "",
598 |                 "rm": 0,
599 |             }
600 |     info = list(
601 |         {
602 |             k: v for k, v in sorted(info.items(), key=lambda item: item[1]["size"])
603 |         }.items()
604 |     )
605 |     for i in range(len(info)):
606 |         for j in range(i):
607 |             if info[i][1]["size"] == info[j][1]["size"]:
608 |                 if info[i][1]["hash"] == "":
609 |                     info[i][1]["hash"] = gethash(info[i][0])
610 |                 if info[j][1]["hash"] == "":
611 |                     info[j][1]["hash"] = gethash(info[j][0])
612 |                 if info[i][1]["hash"] == info[j][1]["hash"]:
613 |                     if info[i][1]["time"] < info[j][1]["time"]:
614 |                         info[i][1]["rm"] = 1
615 |                     elif info[i][1]["time"] > info[j][1]["time"]:
616 |                         info[j][1]["rm"] = 1
617 |                     elif len(info[i][0]) < len(info[j][0]):
618 |                         info[i][1]["rm"] = 1
619 |                     elif len(info[i][0]) > len(info[j][0]):
620 |                         info[j][1]["rm"] = 1
621 |     rm = [i[0] for i in info if i[1]["rm"] or i[1]["size"] == 0]
622 |     if rm:
623 |         print("rmlist:", rm)
624 |         for f in rm:
625 |             os.remove(f)
626 | 
627 | 
628 | def clear(args):
629 |     courses = [i for i in os.listdir(".") if os.path.isdir(i) and not i.startswith(".")]
630 |     if args.all:
631 |         pass
632 |     else:
633 |         if args.course:
634 |             courses = [i for i in courses if i in args.course]
635 |         if args.ignore:
636 |             courses = [i for i in courses if i not in args.ignore]
637 |     courses.sort()
638 |     for i, c in enumerate(courses):
639 |         print("Checking #%d %s" % (i + 1, c))
640 |         for subdir in ["课件", "作业"]:
641 |             d = os.path.join(c, subdir)
642 |             if os.path.exists(d):
643 |                 dfs_clean(d)
644 | 
645 | 
646 | def process_course(c, args):
647 |     # 处理单个课程的函数，用于多进程
648 |     build_global(args)
649 |     login(args.username, args.password)
650 | 
651 |     c["_type"] = {"0": "teacher", "3": "student"}[c["jslx"]]
652 |     print("Sync " + c["xnxq"] + " " + c["kcm"])
653 | 
654 |     if not os.path.exists(os.path.join(dist_path, c["kcm"])):
655 |         os.makedirs(os.path.join(dist_path, c["kcm"]))
656 |     sync_info(c)
657 |     sync_discuss(c)
658 |     sync_notify(c)
659 |     sync_file(c)
660 |     sync_hw(c)
661 | 
662 |     return c["kcm"]
663 | 
664 | 
665 | def main(args):
666 |     global dist_path
667 |     build_global(args)
668 |     assert (
669 |         (dist_path is not None)
670 |         and (url is not None)
671 |         and (user_agent is not None)
672 |         and (headers is not None)
673 |         and (cookie is not None)
674 |         and (opener is not None)
675 |         and (err404 is not None)
676 |     )
677 |     if args.clear:
678 |         clear(args)
679 |         exit()
680 |     args.login = False
681 |     if args.cookie:
682 |         cookie.load(args.cookie, ignore_discard=True, ignore_expires=True)
683 |         args.login = get_page("/b/wlxt/kc/v_wlkc_xs_xktjb_coassb/queryxnxq") != err404
684 |         print("login successfully" if args.login else "login failed!")
685 |     else:
686 |         if os.path.exists(args._pass):
687 |             username, password = open(args._pass).read().split()
688 |         else:
689 |             if not args.username:
690 |                 args.username = input("请输入INFO账号：")
691 |             if not args.password:
692 |                 args.password = getpass.getpass("请输入INFO密码：")
693 |         args.login = login(args.username, args.password)
694 |     if args.login:
695 |         courses = get_courses(args)
696 |         if args.multi:
697 |             # 如果未指定进程数，则使用CPU核数
698 |             if not args.processes:
699 |                 args.processes = mp.cpu_count()
700 |             print(f"启动多进程下载，进程数：{args.processes}")
701 |             pool = mp.Pool(processes=args.processes)
702 |             process_func = partial(process_course, args=args)
703 |             for _ in tqdm(
704 |                 pool.imap_unordered(process_func, courses),
705 |                 total=len(courses),
706 |                 desc="处理课程",
707 |             ):
708 |                 pass
709 | 
710 |             pool.close()
711 |             pool.join()
712 |         else:
713 |             # 原始单进程处理
714 |             for c in courses:
715 |                 c["_type"] = {"0": "teacher", "3": "student"}[c["jslx"]]
716 |                 print("Sync " + c["xnxq"] + " " + c["kcm"])
717 |                 if not os.path.exists(os.path.join(dist_path, c["kcm"])):
718 |                     os.makedirs(os.path.join(dist_path, c["kcm"]))
719 |                 sync_info(c)
720 |                 sync_discuss(c)
721 |                 sync_notify(c)
722 |                 sync_file(c)
723 |                 sync_hw(c)
724 | 
725 | 
726 | def get_args():
727 |     parser = argparse.ArgumentParser()
728 |     parser.add_argument("--all", action="store_true")
729 |     parser.add_argument(
730 |         "--clear", action="store_true", help="remove the duplicate course file"
731 |     )
732 |     parser.add_argument("--semester", nargs="+", type=str, default=[])
733 |     parser.add_argument("--ignore", nargs="+", type=str, default=[])
734 |     parser.add_argument("--course", nargs="+", type=str, default=[])
735 |     parser.add_argument("-p", "--_pass", type=str, default=".pass")
736 |     parser.add_argument(
737 |         "-c", "--cookie", type=str, default="", help="Netscape HTTP Cookie File"
738 |     )
739 |     parser.add_argument("-d", "--dist", type=str, default="", help="download path")
740 |     parser.add_argument("--http_proxy", type=str, default="", help="http proxy")
741 |     parser.add_argument("--https_proxy", type=str, default="", help="https proxy")
742 |     parser.add_argument("--username", type=str, default="", help="username")
743 |     parser.add_argument("--password", type=str, default="", help="password")
744 |     parser.add_argument("--multi", action="store_true", help="multi-process")
745 |     parser.add_argument("--processes", type=int, help="concurrent processes")
746 |     args = parser.parse_args()
747 |     return args
748 | 
749 | 
750 | if __name__ == "__main__":
751 |     main(get_args())
752 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | bs4
2 | tqdm
3 | requests
4 | 


--------------------------------------------------------------------------------