├── README.md ├── json_util.py ├── LICENSE └── hole_export_v2.py /README.md: -------------------------------------------------------------------------------- 1 | # PKU_attention_hole_backup 2 | 3 | 北大树洞备份关注列表脚本。该脚本不会记录个人信息,可以放心使用。 4 | 5 | ## 使用方法 6 | 7 | 替换你的user_token然后运行即可。得到文件:export.txt 8 | 9 | ## 功能增补 10 | 11 | - 新增:边获取树洞边写入文件的功能,成功获取一个就成功写入一个,避免中途程序异常退出导致获取的树洞前功尽弃。 12 | - 新增:保存emoji的功能,最终得到的文件内包含emoji 13 | - 修补:最终文件export.txt就是utf8编码,不用再进行二次转换 14 | 15 | ## 注意事项 16 | 17 | 理论上如果不是所有的树洞都被成功获取了,该脚本不会停止,但是经过测试四次,在备份4000+条树洞、每条树洞最多重复获取7次、且每次都休眠的情况下这种现象没有发生过(每条树洞7次尝试之内就成功了)。如果发生了,此时你直接停止程序会保留你已经获取到的树洞。你也可以自行修改代码,或者提issue。 18 | -------------------------------------------------------------------------------- /json_util.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | def get_format_time(time_stamp): 4 | time_local = time.localtime(int(time_stamp)) 5 | return time.strftime("%Y-%m-%d %H:%M:%S", time_local) 6 | 7 | 8 | def get_post(obj): 9 | post = obj['post'] 10 | pid = "#" + post['pid'] + "\n" 11 | text = post['text'] + "\n" 12 | pic_url = "" 13 | if "image" == post['type']: 14 | pic_url = "[树洞图片]https://pkuhelper.pku.edu.cn/services/pkuhole/images/" + \ 15 | post['url'] + "\n" 16 | tail = f"({get_format_time(post['timestamp'])} {post['likenum']}关注 {post['reply']}回复)\n" 17 | return (pid + text + pic_url + tail) 18 | 19 | 20 | def get_comments(obj): 21 | comments = obj['comments'] 22 | ret = "" 23 | for comment in comments: 24 | ret += get_format_time(comment['timestamp']) + \ 25 | "\n" + comment['text'] + "\n" 26 | return ret 27 | 28 | # 将json对象转为可写入文件的string 29 | def obj_stringfy(obj): 30 | return get_post(obj) + get_comments(obj) 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 sntaoo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /hole_export_v2.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | import random 4 | import requests 5 | from datetime import datetime 6 | from json_util import obj_stringfy 7 | 8 | # Input your pkuhelper user token here 9 | USER_TOKEN = "your user token here" 10 | 11 | r = requests.get( 12 | "https://pkuhelper.pku.edu.cn/services/pkuhole/api.php?action=getattention&PKUHelperAPI=3.0&jsapiver=null-111111&user_token="+USER_TOKEN) 13 | attentions = r.json() 14 | if attentions["code"] != 0: 15 | print(attentions) 16 | raise Exception("get attention request failed") 17 | attentions_data = attentions["data"] 18 | n = len(attentions_data) 19 | print("exporting " + str(n) + " holes...") 20 | export = [] 21 | ready_to_try = [] 22 | pid2ind = {} 23 | def get_hole_content(i, pid, depth): 24 | try: 25 | if depth >= 7: # 增加重试七次,每次休眠 26 | print(f"Fetch hole {pid} failed after {depth} attempts!") 27 | ready_to_try.append(pid) # 如果失败,添加到列表末尾,留待之后尝试 28 | time.sleep(random.uniform(20.5,25.5)) 29 | return False 30 | time.sleep(random.uniform(0.2, 0.5)) 31 | r2 = requests.get("https://pkuhelper.pku.edu.cn/services/pkuhole/api.php?action=getcomment&pid=" + 32 | pid+"&PKUHelperAPI=3.0&jsapiver=null-111111&user_token="+USER_TOKEN) 33 | comments = r2.json() 34 | if comments["code"] != 0: 35 | # 如果获取失败,会在函数中被再次添加到list末尾留待重新获取,直到所有的树洞都获取完成 36 | ready_to_try.append(pid) 37 | raise Exception("get comments (pid=" + pid + ") request failed") 38 | export.append({"post": attentions_data[pid2ind[pid]], "comments": comments["data"]}) 39 | return True 40 | except Exception as e: 41 | return get_hole_content(i, pid, depth+1) 42 | 43 | for elem in attentions_data: 44 | ready_to_try.append(elem["pid"]) 45 | 46 | 47 | for i, pid in enumerate(ready_to_try): 48 | pid2ind[pid] = i 49 | 50 | continuously_failure = 0 # 连着失败的次数。如果连着失败2次,休眠30秒 51 | 52 | with open("export.txt", "w", encoding="utf8") as holes: 53 | if ready_to_try: 54 | for i, pid in enumerate(ready_to_try): 55 | if get_hole_content(i, pid, 0): 56 | continuously_failure = 0 57 | # 获取成功,将新添加的json对象写入文件 58 | obj = export[-1] 59 | holes.write(obj_stringfy(obj)) 60 | holes.write("\n========================================\n\n") 61 | holes.flush() 62 | print(f"{i+1}/{n} finished export pid={pid}") 63 | else: 64 | # 获取失败,连续失败次数加1,如果连续失败两次,休眠30秒躲过网络拦截 65 | continuously_failure += 1 66 | if continuously_failure >= 2: 67 | time.sleep(30) 68 | continuously_failure = 0 69 | --------------------------------------------------------------------------------