├── README.md
├── custom
├── analysis_bilibili.py
├── atall.py
├── cat.py
├── dog.py
├── fileLink.py
├── fox.py
├── manage_group.py
├── nbnhhsh.py
├── ping.py
├── processing_request.py
├── requirements.txt
├── scan_qrcode.py
├── status_info.py
└── wantwords.py
├── custom_reply
├── __init__.py
├── custom_reply.py
├── manage_content.py
└── requirements.txt
└── rss2
├── __init__.py
├── command
├── __init__.py
├── add_cookies.py
├── add_dy.py
├── change_dy.py
├── del_dy.py
├── rsshub_add.py
├── show_all.py
├── show_dy.py
└── upload_group_file.py
├── config.py
├── my_trigger.py
├── parsing
├── __init__.py
├── cache_manage.py
├── check_update.py
├── download_torrent.py
├── handle_html_tag.py
├── handle_images.py
├── handle_translation.py
├── parsing_rss.py
├── routes
│ ├── __init__.py
│ ├── danbooru.py
│ ├── nga.py
│ ├── pixiv.py
│ ├── south_plus.py
│ ├── twitter.py
│ ├── weibo.py
│ ├── yande_re.py
│ └── youtube.py
├── send_message.py
└── utils.py
├── permission.py
├── pikpak_offline.py
├── qbittorrent_download.py
├── requirements.txt
├── rss_class.py
├── rss_parsing.py
└── utils.py
/README.md:
--------------------------------------------------------------------------------
1 | # 使用说明
2 |
3 | ### 注意更新 Hoshino 本体,以防 CQ 码注入.jpg
4 |
5 | ### requirements.txt 是所需第三方模块,使用前需安装模块
6 |
7 | ### 在所要使用插件的目录下执行 `pip install -r requirements.txt` 安装依赖库
8 |
9 | ### `pip install -r requirements.txt --upgrade` 更新依赖库
10 |
11 | 需配合[Hoshino(v2)](https://github.com/Ice-Cirno/HoshinoBot)使用
12 | 具体使用看[WIKI](https://github.com/mengshouer/HoshinoBot-Plugins/wiki)
13 |
14 | ## 其他插件
15 |
16 | [FFXIV 相关插件](https://github.com/mengshouer/HoshinoBot-Plugins/tree/ffxiv)
17 |
18 | [搜图插件](https://github.com/mengshouer/HoshinoBot-Plugins/tree/picsearch)
19 |
--------------------------------------------------------------------------------
/custom/analysis_bilibili.py:
--------------------------------------------------------------------------------
1 | import re
2 | import urllib.parse
3 | import json
4 | import nonebot
5 | from typing import Optional, Union
6 | from time import localtime, strftime
7 | from aiohttp import ClientSession
8 |
9 | from hoshino import Service, logger
10 | from nonebot import Message, MessageSegment
11 |
12 |
13 | headers = {
14 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.58"
15 | }
16 | analysis_stat = {} # group_id : last_vurl
17 | config = nonebot.get_bot().config
18 | blacklist = getattr(config, "analysis_blacklist", [])
19 | analysis_display_image = getattr(config, "analysis_display_image", False)
20 | analysis_display_image_list = getattr(config, "analysis_display_image_list", [])
21 | trust_env = getattr(config, "analysis_trust_env", False)
22 |
23 |
24 | sv2 = Service("search_bilibili_video")
25 | # 手动搜视频标题
26 | @sv2.on_prefix("搜视频")
27 | async def search_bilibili_video_by_title(bot, ev):
28 | title = ev.message.extract_plain_text()
29 | group_id = ev.group_id if ev.group_id else ev.get("channel_id", None)
30 |
31 | async with ClientSession(trust_env=trust_env, headers=headers) as session:
32 | vurl = await search_bili_by_title(title, session)
33 | msg = await bili_keyword(group_id, vurl, session)
34 | try:
35 | await bot.send(ev, msg)
36 | except:
37 | # 避免简介有风控内容无法发送
38 | logger.warning(f"{msg}\n此次解析可能被风控,尝试去除简介后发送!")
39 | msg = re.sub(r"简介.*", "", msg)
40 | await bot.send(ev, msg)
41 |
42 |
43 | sv = Service("analysis_bilibili")
44 | # on_rex判断不到小程序信息
45 | @sv.on_message()
46 | async def rex_bilibili(bot, ev):
47 | text = str(ev.message).strip()
48 | if blacklist and ev.user_id in blacklist:
49 | return
50 | if re.search(r"(b23.tv)|(bili(22|23|33|2233).cn)", text, re.I):
51 | # 提前处理短链接,避免解析到其他的
52 | text = await b23_extract(text)
53 | patterns = r"(\.bilibili\.com)|(^(av|cv)(\d+))|(^BV([a-zA-Z0-9]{10})+)|(\[\[QQ小程序\]哔哩哔哩\])|(QQ小程序]哔哩哔哩)|(QQ小程序]哔哩哔哩)"
54 | match = re.compile(patterns, re.I).search(text)
55 | if match:
56 | group_id = ev.group_id if ev.group_id else ev.get("channel_id", None)
57 | async with ClientSession(trust_env=trust_env, headers=headers) as session:
58 | msg = await bili_keyword(group_id, text, session)
59 | if msg:
60 | try:
61 | await bot.send(ev, msg)
62 | except:
63 | # 避免简介有风控内容无法发送
64 | logger.warning(f"{msg}\n此次解析可能被风控,尝试去除简介后发送!")
65 | msg = re.sub(r"简介.*", "", msg)
66 | await bot.send(ev, msg)
67 |
68 |
69 | async def bili_keyword(
70 | group_id: Optional[int], text: str, session: ClientSession
71 | ) -> Union[Message, str]:
72 | try:
73 | # 提取url
74 | url, page, time_location = extract(text)
75 | # 如果是小程序就去搜索标题
76 | if not url:
77 | if title := re.search(r'"desc":("[^"哔哩]+")', text):
78 | vurl = await search_bili_by_title(title[1], session)
79 | if vurl:
80 | url, page, time_location = extract(vurl)
81 |
82 | # 获取视频详细信息
83 | msg, vurl = "", ""
84 | if "view?" in url:
85 | msg, vurl = await video_detail(
86 | url, page=page, time_location=time_location, session=session
87 | )
88 | elif "bangumi" in url:
89 | msg, vurl = await bangumi_detail(url, time_location, session)
90 | elif "xlive" in url:
91 | msg, vurl = await live_detail(url, session)
92 | elif "article" in url:
93 | msg, vurl = await article_detail(url, page, session)
94 | elif "dynamic" in url:
95 | msg, vurl = await dynamic_detail(url, session)
96 |
97 | # 避免多个机器人解析重复推送
98 | if group_id:
99 | if group_id in analysis_stat and analysis_stat[group_id] == vurl:
100 | return ""
101 | analysis_stat[group_id] = vurl
102 | except Exception as e:
103 | msg = "bili_keyword Error: {}".format(type(e))
104 | return msg
105 |
106 |
107 | async def b23_extract(text):
108 | b23 = re.compile(r"b23.tv/(\w+)|(bili(22|23|33|2233).cn)/(\w+)", re.I).search(
109 | text.replace("\\", "")
110 | )
111 | url = f"https://{b23[0]}"
112 | # 考虑到是在 on_message 内进行操作,避免无用的创建 session,所以分开写
113 | async with ClientSession(trust_env=trust_env) as session:
114 | async with session.get(url) as resp:
115 | return str(resp.url)
116 |
117 |
118 | def extract(text: str):
119 | try:
120 | url = ""
121 | # 视频分p
122 | page = re.compile(r"([?&]|&)p=\d+").search(text)
123 | # 视频播放定位时间
124 | time = re.compile(r"([?&]|&)t=\d+").search(text)
125 | # 主站视频 av 号
126 | aid = re.compile(r"av\d+", re.I).search(text)
127 | # 主站视频 bv 号
128 | bvid = re.compile(r"BV([A-Za-z0-9]{10})+", re.I).search(text)
129 | # 番剧视频页
130 | epid = re.compile(r"ep\d+", re.I).search(text)
131 | # 番剧剧集ssid(season_id)
132 | ssid = re.compile(r"ss\d+", re.I).search(text)
133 | # 番剧详细页
134 | mdid = re.compile(r"md\d+", re.I).search(text)
135 | # 直播间
136 | room_id = re.compile(r"live.bilibili.com/(blanc/|h5/)?(\d+)", re.I).search(text)
137 | # 文章
138 | cvid = re.compile(
139 | r"(/read/(cv|mobile|native)(/|\?id=)?|^cv)(\d+)", re.I
140 | ).search(text)
141 | # 动态
142 | dynamic_id_type2 = re.compile(
143 | r"(t|m).bilibili.com/(\d+)\?(.*?)(&|&)type=2", re.I
144 | ).search(text)
145 | # 动态
146 | dynamic_id = re.compile(r"(t|m).bilibili.com/(\d+)", re.I).search(text)
147 | if bvid:
148 | url = f"https://api.bilibili.com/x/web-interface/view?bvid={bvid[0]}"
149 | elif aid:
150 | url = f"https://api.bilibili.com/x/web-interface/view?aid={aid[0][2:]}"
151 | elif epid:
152 | url = (
153 | f"https://bangumi.bilibili.com/view/web_api/season?ep_id={epid[0][2:]}"
154 | )
155 | elif ssid:
156 | url = f"https://bangumi.bilibili.com/view/web_api/season?season_id={ssid[0][2:]}"
157 | elif mdid:
158 | url = f"https://bangumi.bilibili.com/view/web_api/season?media_id={mdid[0][2:]}"
159 | elif room_id:
160 | url = f"https://api.live.bilibili.com/xlive/web-room/v1/index/getInfoByRoom?room_id={room_id[2]}"
161 | elif cvid:
162 | page = cvid[4]
163 | url = f"https://api.bilibili.com/x/article/viewinfo?id={page}&mobi_app=pc&from=web"
164 | elif dynamic_id_type2:
165 | url = f"https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/get_dynamic_detail?rid={dynamic_id_type2[2]}&type=2"
166 | elif dynamic_id:
167 | url = f"https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/get_dynamic_detail?dynamic_id={dynamic_id[2]}"
168 | return url, page, time
169 | except Exception:
170 | return "", None, None
171 |
172 |
173 | async def search_bili_by_title(title: str, session: ClientSession) -> str:
174 | mainsite_url = "https://www.bilibili.com"
175 | search_url = f"https://api.bilibili.com/x/web-interface/wbi/search/all/v2?keyword={urllib.parse.quote(title)}"
176 |
177 | # set headers
178 | async with session.get(mainsite_url) as resp:
179 | assert resp.status == 200
180 |
181 | async with session.get(search_url) as resp:
182 | result = (await resp.json())["data"]["result"]
183 |
184 | for i in result:
185 | if i.get("result_type") != "video":
186 | continue
187 | # 只返回第一个结果
188 | return i["data"][0].get("arcurl")
189 |
190 |
191 | # 处理超过一万的数字
192 | def handle_num(num: int):
193 | if num > 10000:
194 | num = f"{num / 10000:.2f}万"
195 | return num
196 |
197 |
198 | async def video_detail(url: str, session: ClientSession, **kwargs):
199 | try:
200 | async with session.get(url) as resp:
201 | res = (await resp.json()).get("data")
202 | if not res:
203 | return "解析到视频被删了/稿件不可见或审核中/权限不足", url
204 | vurl = f"https://www.bilibili.com/video/av{res['aid']}"
205 | title = f"\n标题:{res['title']}\n"
206 | cover = (
207 | MessageSegment.image(res["pic"])
208 | if analysis_display_image or "video" in analysis_display_image_list
209 | else MessageSegment.text("")
210 | )
211 | if page := kwargs.get("page"):
212 | page = page[0].replace("&", "&")
213 | p = int(page[3:])
214 | if p <= len(res["pages"]):
215 | vurl += f"?p={p}"
216 | part = res["pages"][p - 1]["part"]
217 | if part != res["title"]:
218 | title += f"小标题:{part}\n"
219 | if time_location := kwargs.get("time_location"):
220 | time_location = time_location[0].replace("&", "&")[3:]
221 | if page:
222 | vurl += f"&t={time_location}"
223 | else:
224 | vurl += f"?t={time_location}"
225 | pubdate = strftime("%Y-%m-%d %H:%M:%S", localtime(res["pubdate"]))
226 | tname = f"类型:{res['tname']} | UP:{res['owner']['name']} | 日期:{pubdate}\n"
227 | stat = f"播放:{handle_num(res['stat']['view'])} | 弹幕:{handle_num(res['stat']['danmaku'])} | 收藏:{handle_num(res['stat']['favorite'])}\n"
228 | stat += f"点赞:{handle_num(res['stat']['like'])} | 硬币:{handle_num(res['stat']['coin'])} | 评论:{handle_num(res['stat']['reply'])}\n"
229 | desc = f"简介:{res['desc']}"
230 | desc_list = desc.split("\n")
231 | desc = "".join(i + "\n" for i in desc_list if i)
232 | desc_list = desc.split("\n")
233 | if len(desc_list) > 4:
234 | desc = desc_list[0] + "\n" + desc_list[1] + "\n" + desc_list[2] + "……"
235 | mstext = MessageSegment.text("".join([vurl, title, tname, stat, desc]))
236 | msg = Message([cover, mstext])
237 | return msg, vurl
238 | except Exception as e:
239 | msg = "视频解析出错--Error: {}".format(type(e))
240 | return msg, None
241 |
242 |
243 | async def bangumi_detail(url: str, time_location: str, session: ClientSession):
244 | try:
245 | async with session.get(url) as resp:
246 | res = (await resp.json()).get("result")
247 | if not res:
248 | return None, None
249 | cover = (
250 | MessageSegment.image(res["cover"])
251 | if analysis_display_image or "bangumi" in analysis_display_image_list
252 | else MessageSegment.text("")
253 | )
254 | title = f"番剧:{res['title']}\n"
255 | desc = f"{res['newest_ep']['desc']}\n"
256 | index_title = ""
257 | style = "".join(f"{i}," for i in res["style"])
258 | style = f"类型:{style[:-1]}\n"
259 | evaluate = f"简介:{res['evaluate']}\n"
260 | if "season_id" in url:
261 | vurl = f"https://www.bilibili.com/bangumi/play/ss{res['season_id']}"
262 | elif "media_id" in url:
263 | vurl = f"https://www.bilibili.com/bangumi/media/md{res['media_id']}"
264 | else:
265 | epid = re.compile(r"ep_id=\d+").search(url)[0][len("ep_id=") :]
266 | for i in res["episodes"]:
267 | if str(i["ep_id"]) == epid:
268 | index_title = f"标题:{i['index_title']}\n"
269 | break
270 | vurl = f"https://www.bilibili.com/bangumi/play/ep{epid}"
271 | if time_location:
272 | time_location = time_location[0].replace("&", "&")[3:]
273 | vurl += f"?t={time_location}"
274 | mstext = MessageSegment.text(
275 | "".join([f"{vurl}\n", title, index_title, desc, style, evaluate])
276 | )
277 | msg = Message([cover, mstext])
278 | return msg, vurl
279 | except Exception as e:
280 | msg = "番剧解析出错--Error: {}".format(type(e))
281 | msg += f"\n{url}"
282 | return msg, None
283 |
284 |
285 | async def live_detail(url: str, session: ClientSession):
286 | try:
287 | async with session.get(url) as resp:
288 | res = await resp.json()
289 | if res["code"] != 0:
290 | return None, None
291 | res = res["data"]
292 | uname = res["anchor_info"]["base_info"]["uname"]
293 | room_id = res["room_info"]["room_id"]
294 | title = res["room_info"]["title"]
295 | cover = (
296 | MessageSegment.image(res["room_info"]["cover"])
297 | if analysis_display_image or "live" in analysis_display_image_list
298 | else MessageSegment.text("")
299 | )
300 | live_status = res["room_info"]["live_status"]
301 | lock_status = res["room_info"]["lock_status"]
302 | parent_area_name = res["room_info"]["parent_area_name"]
303 | area_name = res["room_info"]["area_name"]
304 | online = res["room_info"]["online"]
305 | tags = res["room_info"]["tags"]
306 | watched_show = res["watched_show"]["text_large"]
307 | vurl = f"https://live.bilibili.com/{room_id}\n"
308 | if lock_status:
309 | lock_time = res["room_info"]["lock_time"]
310 | lock_time = strftime("%Y-%m-%d %H:%M:%S", localtime(lock_time))
311 | title = f"[已封禁]直播间封禁至:{lock_time}\n"
312 | elif live_status == 1:
313 | title = f"[直播中]标题:{title}\n"
314 | elif live_status == 2:
315 | title = f"[轮播中]标题:{title}\n"
316 | else:
317 | title = f"[未开播]标题:{title}\n"
318 | up = f"主播:{uname} 当前分区:{parent_area_name}-{area_name}\n"
319 | watch = f"观看:{watched_show} 直播时的人气上一次刷新值:{handle_num(online)}\n"
320 | if tags:
321 | tags = f"标签:{tags}\n"
322 | if live_status:
323 | player = f"独立播放器:https://www.bilibili.com/blackboard/live/live-activity-player.html?enterTheRoom=0&cid={room_id}"
324 | else:
325 | player = ""
326 | mstext = MessageSegment.text("".join([vurl, title, up, watch, tags, player]))
327 | msg = Message([cover, mstext])
328 | return msg, vurl
329 | except Exception as e:
330 | msg = "直播间解析出错--Error: {}".format(type(e))
331 | return msg, None
332 |
333 |
334 | async def article_detail(url: str, cvid: str, session: ClientSession):
335 | try:
336 | async with session.get(url) as resp:
337 | res = (await resp.json()).get("data")
338 | if not res:
339 | return None, None
340 | images = (
341 | [MessageSegment.image(i) for i in res["origin_image_urls"]]
342 | if analysis_display_image or "article" in analysis_display_image_list
343 | else []
344 | )
345 | vurl = f"https://www.bilibili.com/read/cv{cvid}"
346 | title = f"标题:{res['title']}\n"
347 | up = f"作者:{res['author_name']} (https://space.bilibili.com/{res['mid']})\n"
348 | view = f"阅读数:{handle_num(res['stats']['view'])} "
349 | favorite = f"收藏数:{handle_num(res['stats']['favorite'])} "
350 | coin = f"硬币数:{handle_num(res['stats']['coin'])}"
351 | share = f"分享数:{handle_num(res['stats']['share'])} "
352 | like = f"点赞数:{handle_num(res['stats']['like'])} "
353 | dislike = f"不喜欢数:{handle_num(res['stats']['dislike'])}"
354 | desc = view + favorite + coin + "\n" + share + like + dislike + "\n"
355 | mstext = MessageSegment.text("".join([title, up, desc, vurl]))
356 | msg = Message(images)
357 | msg.append(mstext)
358 | return msg, vurl
359 | except Exception as e:
360 | msg = "专栏解析出错--Error: {}".format(type(e))
361 | return msg, None
362 |
363 |
364 | async def dynamic_detail(url: str, session: ClientSession):
365 | try:
366 | async with session.get(url) as resp:
367 | res = (await resp.json())["data"].get("card")
368 | if not res:
369 | return None, None
370 | card = json.loads(res["card"])
371 | dynamic_id = res["desc"]["dynamic_id"]
372 | vurl = f"https://t.bilibili.com/{dynamic_id}\n"
373 | if not (item := card.get("item")):
374 | return "动态不存在文字内容", vurl
375 | if not (content := item.get("description")):
376 | content = item.get("content")
377 | content = content.replace("\r", "\n")
378 | if len(content) > 250:
379 | content = content[:250] + "......"
380 | images = (
381 | item.get("pictures", [])
382 | if analysis_display_image or "dynamic" in analysis_display_image_list
383 | else []
384 | )
385 | if images:
386 | images = [MessageSegment.image(i.get("img_src")) for i in images]
387 | else:
388 | pics = item.get("pictures_count")
389 | if pics:
390 | content += f"\nPS:动态中包含{pics}张图片"
391 | if origin := card.get("origin"):
392 | jorigin = json.loads(origin)
393 | short_link = jorigin.get("short_link")
394 | if short_link:
395 | content += f"\n动态包含转发视频{short_link}"
396 | else:
397 | content += f"\n动态包含转发其他动态"
398 | msg = Message(content)
399 | msg.extend(images)
400 | msg.append(MessageSegment.text(f"\n{vurl}"))
401 | return msg, vurl
402 | except Exception as e:
403 | msg = "动态解析出错--Error: {}".format(type(e))
404 | return msg, None
405 |
--------------------------------------------------------------------------------
/custom/atall.py:
--------------------------------------------------------------------------------
1 | from hoshino import Service
2 | from nonebot import MessageSegment
3 |
4 | sv_help = """
5 | 让群员使用bot来@全体成员,前提bot得有管理员(叫人用
6 | 只要前缀为"@全员"就触发,默认关闭
7 | """.strip()
8 | sv = Service("atall", enable_on_default=False)
9 |
10 |
11 | @sv.on_prefix("@全员")
12 | async def atall(bot, ev):
13 | try:
14 | msg = ev.message.extract_plain_text()
15 | msg = f"{MessageSegment.at('all')} {msg}"
16 | await bot.send(ev, msg)
17 | # 一个一个群员进行@,慎用
18 | # try:
19 | # await bot.send(ev, msg)
20 | # except:
21 | # try:
22 | # m = await bot.get_group_member_list(group_id=ev.group_id)
23 | # msg = ""
24 | # for i in range(0, len(m)):
25 | # u = m[i]["user_id"]
26 | # if u != ev.self_id:
27 | # msg += f"{MessageSegment.at(u)} "
28 | # msg += ev.message.extract_plain_text()
29 | # await bot.send(ev, msg)
30 | # except:
31 | # await bot.send(ev, "at all send fail!!!")
32 | except:
33 | # 可能发送内容被风控,只发送@全体成员
34 | await bot.send(ev, MessageSegment.at("all"))
35 |
--------------------------------------------------------------------------------
/custom/cat.py:
--------------------------------------------------------------------------------
1 | import httpx
2 | from nonebot import on_command, CommandSession, MessageSegment
3 |
4 |
5 | @on_command("!cat", only_to_me=False)
6 | async def cat(session: CommandSession):
7 | url = "https://api.thecatapi.com/v1/images/search"
8 | with httpx.Client(proxies={}) as client:
9 | r = client.get(url, timeout=5)
10 | picurl = r.json()[0]["url"]
11 | await session.send(MessageSegment.image(picurl))
12 |
--------------------------------------------------------------------------------
/custom/dog.py:
--------------------------------------------------------------------------------
1 | import httpx
2 | from nonebot import on_command, CommandSession, MessageSegment
3 |
4 |
5 | @on_command("/dog", aliases=("!dog", "\\dog"), only_to_me=False)
6 | async def dog(session: CommandSession):
7 | try:
8 | try:
9 | api_url = "https://api.thedogapi.com/v1/images/search"
10 | with httpx.Client(proxies={}) as client:
11 | r = client.get(api_url, timeout=5)
12 | img_url = r.json()[0]["url"]
13 | except Exception as e:
14 | api_url = "https://dog.ceo/api/breeds/image/random"
15 | with httpx.Client(proxies={}) as client:
16 | r = client.get(api_url, timeout=5)
17 | img_url = r.json()["message"]
18 | msg = MessageSegment.image(img_url)
19 | except Exception as e:
20 | msg = "Error: {}".format(type(e))
21 | await session.send(msg)
22 |
--------------------------------------------------------------------------------
/custom/fileLink.py:
--------------------------------------------------------------------------------
1 | import re
2 | import urllib
3 |
4 | try:
5 | from hoshino import Service
6 | from nonebot import MessageSegment
7 |
8 | _sv = Service("groupFileLink")
9 | sv = _sv.on_notice
10 | except:
11 | from nonebot import on_notice, MessageSegment
12 |
13 | sv = on_notice
14 |
15 |
16 | @sv("group_upload")
17 | async def groupFileLink(session):
18 | link = session.ctx["file"]["url"]
19 | file_name = session.ctx["file"]["name"]
20 | size = session.ctx["file"]["size"]
21 | link = re.sub(r"fname=.*", f"fname={urllib.parse.quote(file_name)}", link)
22 | if (
23 | link[-4:].lower() in [".jpg", ".png", ".gif", ".bmp", "jfif", "webp"]
24 | and size < 31457280
25 | ):
26 | await session.send(MessageSegment.image(link))
27 | elif (
28 | link[-4:].lower()
29 | in [".mp4", ".avi", ".mkv", ".rmvb", ".flv", ".wmv", ".mpg", ".mpeg"]
30 | and size < 104857600
31 | ):
32 | await session.send(MessageSegment.video(link))
33 | elif (
34 | link[-4:].lower() in [".mp3", ".wav", ".wma", ".ogg", ".ape", ".flac"]
35 | and size < 31457280
36 | ):
37 | await session.send(MessageSegment.record(link))
38 | else:
39 | await session.send(f"文件:{file_name}\n直链:{link}")
40 |
--------------------------------------------------------------------------------
/custom/fox.py:
--------------------------------------------------------------------------------
1 | import httpx
2 | from nonebot import on_command, CommandSession, MessageSegment
3 |
4 |
5 | @on_command("/fox", aliases=("!fox", "\\fox"), only_to_me=False)
6 | async def fox(session: CommandSession):
7 | try:
8 | api_url = "https://randomfox.ca/floof/"
9 | with httpx.Client(proxies={}) as client:
10 | r = client.get(api_url, timeout=5)
11 | img_url = r.json()["image"]
12 | msg = MessageSegment.image(img_url)
13 | except Exception as e:
14 | msg = "Error: {}".format(type(e))
15 | await session.send(msg)
16 |
--------------------------------------------------------------------------------
/custom/manage_group.py:
--------------------------------------------------------------------------------
1 | import nonebot
2 | from nonebot.argparse import ArgumentParser
3 | from nonebot import on_command, CommandSession
4 | from hoshino.typing import NoticeSession
5 | from nonebot.permission import SUPERUSER
6 |
7 | USAGE = r"""
8 | USAGE: group [OPTIONS]
9 |
10 | OPTIONS:
11 | -h, --help 显示本使用帮助
12 | -ls, --list 显示群列表
13 | -l ]+>|\[img]", summary
70 | ):
71 | logger.info(f"{rss.name} 已开启仅图片/仅含有图片,该消息没有图片,将跳过")
72 | write_item(db, item)
73 | change_data.remove(item)
74 |
75 | return {"change_data": change_data}
76 |
77 |
78 | # 如果启用了去重模式,对推送列表进行过滤
79 | @ParsingBase.append_before_handler(priority=12) # type: ignore
80 | async def handle_check_update(rss: Rss, state: Dict[str, Any]):
81 | change_data = state.get("change_data")
82 | conn = state.get("conn")
83 | db = state.get("tinydb")
84 |
85 | # 检查是否启用去重 使用 duplicate_filter_mode 字段
86 | if not rss.duplicate_filter_mode:
87 | return {"change_data": change_data}
88 |
89 | if not conn:
90 | conn = sqlite3.connect(str(DATA_PATH / "cache.db"))
91 | conn.set_trace_callback(logger.debug)
92 |
93 | cache_db_manage(conn)
94 |
95 | delete = []
96 | for index, item in enumerate(change_data):
97 | is_duplicate, image_hash = await duplicate_exists(
98 | rss=rss,
99 | conn=conn,
100 | item=item,
101 | summary=get_summary(item),
102 | )
103 | if is_duplicate:
104 | write_item(db, item)
105 | delete.append(index)
106 | else:
107 | change_data[index]["image_hash"] = str(image_hash)
108 |
109 | change_data = [
110 | item for index, item in enumerate(change_data) if index not in delete
111 | ]
112 |
113 | return {
114 | "change_data": change_data,
115 | "conn": conn,
116 | }
117 |
118 |
119 | # 处理标题
120 | @ParsingBase.append_handler(parsing_type="title")
121 | async def handle_title(
122 | rss: Rss,
123 | state: Dict[str, Any],
124 | item: Dict[str, Any],
125 | item_msg: str,
126 | tmp: str,
127 | tmp_state: Dict[str, Any],
128 | ) -> str:
129 | # 判断是否开启了只推送图片
130 | if rss.only_pic:
131 | return ""
132 |
133 | title = item["title"]
134 |
135 | if not config.blockquote:
136 | title = re.sub(r" - 转发 .*", "", title)
137 |
138 | res = f"标题:{title}\n"
139 | # 隔开标题和正文
140 | if not rss.only_title:
141 | res += "\n"
142 | if rss.translation:
143 | res += await handle_translation(content=title)
144 |
145 | # 如果开启了只推送标题,跳过下面判断标题与正文相似度的处理
146 | if rss.only_title:
147 | return emoji.emojize(res, language="alias")
148 |
149 | # 判断标题与正文相似度,避免标题正文一样,或者是标题为正文前N字等情况
150 | try:
151 | summary_html = Pq(get_summary(item))
152 | if not config.blockquote:
153 | summary_html.remove("blockquote")
154 | similarity = SequenceMatcher(None, summary_html.text()[: len(title)], title)
155 | # 标题正文相似度
156 | if similarity.ratio() > 0.6:
157 | res = ""
158 | except Exception as e:
159 | logger.warning(f"{rss.name} 没有正文内容!{e}")
160 |
161 | return emoji.emojize(res, language="alias")
162 |
163 |
164 | # 处理正文 判断是否是仅推送标题 、是否仅推送图片
165 | @ParsingBase.append_handler(parsing_type="summary", priority=1)
166 | async def handle_summary(
167 | rss: Rss,
168 | state: Dict[str, Any],
169 | item: Dict[str, Any],
170 | item_msg: str,
171 | tmp: str,
172 | tmp_state: Dict[str, Any],
173 | ) -> str:
174 | if rss.only_title or rss.only_pic:
175 | tmp_state["continue"] = False
176 | return ""
177 |
178 |
179 | # 处理正文 处理网页 tag
180 | @ParsingBase.append_handler(parsing_type="summary", priority=10) # type: ignore
181 | async def handle_summary(
182 | rss: Rss,
183 | state: Dict[str, Any],
184 | item: Dict[str, Any],
185 | item_msg: str,
186 | tmp: str,
187 | tmp_state: Dict[str, Any],
188 | ) -> str:
189 | try:
190 | tmp += handle_html_tag(html=Pq(get_summary(item)))
191 | except Exception as e:
192 | logger.warning(f"{rss.name} 没有正文内容!{e}")
193 | return tmp
194 |
195 |
196 | # 处理正文 移除指定内容
197 | @ParsingBase.append_handler(parsing_type="summary", priority=11) # type: ignore
198 | async def handle_summary(
199 | rss: Rss,
200 | state: Dict[str, Any],
201 | item: Dict[str, Any],
202 | item_msg: str,
203 | tmp: str,
204 | tmp_state: Dict[str, Any],
205 | ) -> str:
206 | # 移除指定内容
207 | if rss.content_to_remove:
208 | for pattern in rss.content_to_remove:
209 | tmp = re.sub(pattern, "", tmp)
210 | # 去除多余换行
211 | while "\n\n\n" in tmp:
212 | tmp = tmp.replace("\n\n\n", "\n\n")
213 | tmp = tmp.strip()
214 | return emoji.emojize(tmp, language="alias")
215 |
216 |
217 | # 处理正文 翻译
218 | @ParsingBase.append_handler(parsing_type="summary", priority=12) # type: ignore
219 | async def handle_summary(
220 | rss: Rss,
221 | state: Dict[str, Any],
222 | item: Dict[str, Any],
223 | item_msg: str,
224 | tmp: str,
225 | tmp_state: Dict[str, Any],
226 | ) -> str:
227 | if rss.translation:
228 | tmp += await handle_translation(tmp)
229 | return tmp
230 |
231 |
232 | # 处理图片
233 | @ParsingBase.append_handler(parsing_type="picture")
234 | async def handle_picture(
235 | rss: Rss,
236 | state: Dict[str, Any],
237 | item: Dict[str, Any],
238 | item_msg: str,
239 | tmp: str,
240 | tmp_state: Dict[str, Any],
241 | ) -> str:
242 |
243 | # 判断是否开启了只推送标题
244 | if rss.only_title:
245 | return ""
246 |
247 | res = ""
248 | try:
249 | res += await handle_img(
250 | item=item,
251 | img_proxy=rss.img_proxy,
252 | img_num=rss.max_image_number,
253 | )
254 | except Exception as e:
255 | logger.warning(f"{rss.name} 没有正文内容!{e}")
256 |
257 | # 判断是否开启了只推送图片
258 | return f"{res}\n" if rss.only_pic else f"{tmp + res}\n"
259 |
260 |
261 | # 处理来源
262 | @ParsingBase.append_handler(parsing_type="source")
263 | async def handle_source(
264 | rss: Rss,
265 | state: Dict[str, Any],
266 | item: Dict[str, Any],
267 | item_msg: str,
268 | tmp: str,
269 | tmp_state: Dict[str, Any],
270 | ) -> str:
271 | return f"链接:{item['link']}\n"
272 |
273 |
274 | # 处理种子
275 | @ParsingBase.append_handler(parsing_type="torrent")
276 | async def handle_torrent(
277 | rss: Rss,
278 | state: Dict[str, Any],
279 | item: Dict[str, Any],
280 | item_msg: str,
281 | tmp: str,
282 | tmp_state: Dict[str, Any],
283 | ) -> str:
284 | res: List[str] = []
285 | if not rss.is_open_upload_group:
286 | rss.group_id = []
287 | if rss.down_torrent:
288 | # 处理种子
289 | try:
290 | hash_list = await down_torrent(
291 | rss=rss, item=item, proxy=get_proxy(rss.img_proxy)
292 | )
293 | if hash_list and hash_list[0] is not None:
294 | res.append("\n磁力:")
295 | res.extend([f"magnet:?xt=urn:btih:{h}" for h in hash_list])
296 | except Exception:
297 | logger.exception("下载种子时出错")
298 | if rss.pikpak_offline:
299 | try:
300 | result = await pikpak_offline(
301 | rss=rss, item=item, proxy=get_proxy(rss.img_proxy)
302 | )
303 | if result:
304 | res.append("\nPikPak 离线成功")
305 | res.extend(
306 | [
307 | f"{r.get('name')}\n{r.get('file_size')} - {r.get('path')}"
308 | for r in result
309 | ]
310 | )
311 | except Exception:
312 | logger.exception("PikPak 离线时出错")
313 | return "\n".join(res)
314 |
315 |
316 | # 处理日期
317 | @ParsingBase.append_handler(parsing_type="date")
318 | async def handle_date(
319 | rss: Rss,
320 | state: Dict[str, Any],
321 | item: Dict[str, Any],
322 | item_msg: str,
323 | tmp: str,
324 | tmp_state: Dict[str, Any],
325 | ) -> str:
326 | date = get_item_date(item)
327 | date = date.replace(tzinfo="local") if date > arrow.now() else date.to("local")
328 | return f"日期:{date.format('YYYY年MM月DD日 HH:mm:ss')}"
329 |
330 |
331 | # 发送消息
332 | @ParsingBase.append_handler(parsing_type="after")
333 | async def handle_message(
334 | rss: Rss,
335 | state: Dict[str, Any],
336 | item: Dict[str, Any],
337 | item_msg: str,
338 | tmp: str,
339 | tmp_state: Dict[str, Any],
340 | ) -> str:
341 | db = state["tinydb"]
342 |
343 | # 发送消息并写入文件
344 | if await send_msg(rss=rss, msg=item_msg, item=item):
345 |
346 | if rss.duplicate_filter_mode:
347 | insert_into_cache_db(
348 | conn=state["conn"], item=item, image_hash=item["image_hash"]
349 | )
350 |
351 | if item.get("to_send"):
352 | item.pop("to_send")
353 |
354 | state["item_count"] += 1
355 | else:
356 | item["to_send"] = True
357 |
358 | write_item(db, item)
359 |
360 | return ""
361 |
362 |
363 | @ParsingBase.append_after_handler()
364 | async def after_handler(rss: Rss, state: Dict[str, Any]) -> Dict[str, Any]:
365 | item_count: int = state["item_count"]
366 | conn = state["conn"]
367 | db = state["tinydb"]
368 |
369 | if item_count > 0:
370 | logger.info(f"{rss.name} 新消息推送完毕,共计:{item_count}")
371 | else:
372 | logger.info(f"{rss.name} 没有新信息")
373 |
374 | if conn is not None:
375 | conn.close()
376 |
377 | new_data_length = len(state["new_data"])
378 | cache_json_manage(db, new_data_length)
379 | db.close()
380 |
381 | return {}
382 |
--------------------------------------------------------------------------------
/rss2/parsing/cache_manage.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | from sqlite3 import Connection
3 | from typing import Any, Dict, Optional, Tuple
4 |
5 | import imagehash
6 | from nonebot.log import logger
7 | from PIL import Image, UnidentifiedImageError
8 | from pyquery import PyQuery as Pq
9 | from tinydb import Query, TinyDB
10 | from tinydb.operations import delete
11 |
12 | from ..config import config
13 | from ..rss_class import Rss
14 | from .check_update import get_item_date
15 | from .handle_images import download_image
16 |
17 |
18 | # 精简 xxx.json (缓存) 中的字段
19 | def cache_filter(data: Dict[str, Any]) -> Dict[str, Any]:
20 | keys = [
21 | "guid",
22 | "link",
23 | "published",
24 | "updated",
25 | "title",
26 | "hash",
27 | ]
28 | if data.get("to_send"):
29 | keys += [
30 | "content",
31 | "summary",
32 | "to_send",
33 | ]
34 | return {k: data[k] for k in keys if k in data}
35 |
36 |
37 | # 对去重数据库进行管理
38 | def cache_db_manage(conn: Connection) -> None:
39 | cursor = conn.cursor()
40 | # 用来去重的 sqlite3 数据表如果不存在就创建一个
41 | cursor.execute(
42 | """
43 | CREATE TABLE IF NOT EXISTS main (
44 | "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
45 | "link" TEXT,
46 | "title" TEXT,
47 | "image_hash" TEXT,
48 | "datetime" TEXT DEFAULT (DATETIME('Now', 'LocalTime'))
49 | );
50 | """
51 | )
52 | cursor.close()
53 | conn.commit()
54 | cursor = conn.cursor()
55 | # 移除超过 config.db_cache_expire 天没重复过的记录
56 | cursor.execute(
57 | "DELETE FROM main WHERE datetime <= DATETIME('Now', 'LocalTime', ?);",
58 | (f"-{config.db_cache_expire} Day",),
59 | )
60 | cursor.close()
61 | conn.commit()
62 |
63 |
64 | # 对缓存 json 进行管理
65 | def cache_json_manage(db: TinyDB, new_data_length: int) -> None:
66 | # 只保留最多 config.limit + new_data_length 条的记录
67 | limit = config.limit + new_data_length
68 | retains = db.all()
69 | retains.sort(key=get_item_date)
70 | retains = retains[-limit:]
71 | db.truncate()
72 | db.insert_multiple(retains)
73 |
74 |
75 | # 去重判断
76 | async def duplicate_exists(
77 | rss: Rss, conn: Connection, item: Dict[str, Any], summary: str
78 | ) -> Tuple[bool, Optional[str]]:
79 | flag = False
80 | link = item["link"].replace("'", "''")
81 | title = item["title"].replace("'", "''")
82 | image_hash = None
83 | cursor = conn.cursor()
84 | sql = "SELECT * FROM main WHERE 1=1"
85 | args = []
86 | for mode in rss.duplicate_filter_mode:
87 | if mode == "image":
88 | try:
89 | summary_doc = Pq(summary)
90 | except Exception as e:
91 | logger.warning(e)
92 | # 没有正文内容直接跳过
93 | continue
94 | img_doc = summary_doc("img")
95 | # 只处理仅有一张图片的情况
96 | if len(img_doc) != 1:
97 | continue
98 | url = img_doc.attr("src")
99 | # 通过图像的指纹来判断是否实际是同一张图片
100 | content = await download_image(url, rss.img_proxy)
101 | if not content:
102 | continue
103 | try:
104 | im = Image.open(BytesIO(content))
105 | except UnidentifiedImageError:
106 | continue
107 | item["image_content"] = content
108 | # GIF 图片的 image_hash 实际上是第一帧的值,为了避免误伤直接跳过
109 | if im.format == "GIF":
110 | item["gif_url"] = url
111 | continue
112 | image_hash = str(imagehash.dhash(im))
113 | logger.debug(f"image_hash: {image_hash}")
114 | sql += " AND image_hash=?"
115 | args.append(image_hash)
116 | if mode == "link":
117 | sql += " AND link=?"
118 | args.append(link)
119 | elif mode == "title":
120 | sql += " AND title=?"
121 | args.append(title)
122 | if "or" in rss.duplicate_filter_mode:
123 | sql = sql.replace("AND", "OR").replace("OR", "AND", 1)
124 | cursor.execute(f"{sql};", args)
125 | result = cursor.fetchone()
126 | if result is not None:
127 | result_id = result[0]
128 | cursor.execute(
129 | "UPDATE main SET datetime = DATETIME('Now','LocalTime') WHERE id = ?;",
130 | (result_id,),
131 | )
132 | cursor.close()
133 | conn.commit()
134 | flag = True
135 | return flag, image_hash
136 |
137 |
138 | # 消息发送后存入去重数据库
139 | def insert_into_cache_db(
140 | conn: Connection, item: Dict[str, Any], image_hash: str
141 | ) -> None:
142 | cursor = conn.cursor()
143 | link = item["link"].replace("'", "''")
144 | title = item["title"].replace("'", "''")
145 | cursor.execute(
146 | "INSERT INTO main (link, title, image_hash) VALUES (?, ?, ?);",
147 | (link, title, image_hash),
148 | )
149 | cursor.close()
150 | conn.commit()
151 |
152 |
153 | # 写入缓存 json
154 | def write_item(db: TinyDB, new_item: Dict[str, Any]) -> None:
155 | if not new_item.get("to_send"):
156 | db.update(delete("to_send"), Query().hash == str(new_item.get("hash"))) # type: ignore
157 | db.upsert(cache_filter(new_item), Query().hash == str(new_item.get("hash")))
158 |
--------------------------------------------------------------------------------
/rss2/parsing/check_update.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | from contextlib import suppress
3 | from email.utils import parsedate_to_datetime
4 | from typing import Any, Dict, List
5 |
6 | import arrow
7 | from tinydb import Query, TinyDB
8 |
9 |
10 | # 对 dict 对象计算哈希值,供后续比较
11 | def dict_hash(dictionary: Dict[str, Any]) -> str:
12 | string = str(dictionary.get("guid", dictionary.get("link")))
13 | result = hashlib.md5(string.encode())
14 | return result.hexdigest()
15 |
16 |
17 | # 检查更新
18 | def check_update(db: TinyDB, new: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
19 |
20 | # 发送失败 1 次
21 | to_send_list: List[Dict[str, Any]] = db.search(Query().to_send.exists())
22 |
23 | if not new and not to_send_list:
24 | return []
25 |
26 | old_hash_list = [r.get("hash") for r in db.all()]
27 | for i in new:
28 | hash_temp = dict_hash(i)
29 | if hash_temp not in old_hash_list:
30 | i["hash"] = hash_temp
31 | to_send_list.append(i)
32 |
33 | # 对结果按照发布时间排序
34 | to_send_list.sort(key=get_item_date)
35 |
36 | return to_send_list
37 |
38 |
39 | def get_item_date(item: Dict[str, Any]) -> arrow.Arrow:
40 | if date := item.get("published", item.get("updated")):
41 | with suppress(Exception):
42 | date = parsedate_to_datetime(date)
43 | return arrow.get(date)
44 | return arrow.now()
45 |
--------------------------------------------------------------------------------
/rss2/parsing/download_torrent.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Any, Dict, List, Optional
3 |
4 | import aiohttp
5 | from nonebot import get_bot
6 | from nonebot.log import logger
7 |
8 | from ..config import config
9 | from ..parsing.utils import get_summary
10 | from ..pikpak_offline import pikpak_offline_download
11 | from ..qbittorrent_download import start_down
12 | from ..rss_class import Rss
13 | from ..utils import convert_size, get_torrent_b16_hash, send_msg
14 |
15 |
16 | async def down_torrent(
17 | rss: Rss, item: Dict[str, Any], proxy: Optional[str]
18 | ) -> List[str]:
19 | """
20 | 创建下载种子任务
21 | """
22 | hash_list = []
23 | for tmp in item["links"]:
24 | if (
25 | tmp["type"] == "application/x-bittorrent"
26 | or tmp["href"].find(".torrent") > 0
27 | ):
28 | hash_list.append(
29 | await start_down(
30 | url=tmp["href"],
31 | group_ids=rss.group_id,
32 | name=rss.name,
33 | proxy=proxy,
34 | )
35 | )
36 | return hash_list
37 |
38 |
39 | async def pikpak_offline(
40 | rss: Rss, item: Dict[str, Any], proxy: Optional[str]
41 | ) -> List[Dict[str, Any]]:
42 | """
43 | 创建pikpak 离线下载任务
44 | 下载到 config.pikpak_download_path/rss.name or find rss.pikpak_path_rex
45 | """
46 | download_infos = []
47 | for tmp in item["links"]:
48 | if (
49 | tmp["type"] == "application/x-bittorrent"
50 | or tmp["href"].find(".torrent") > 0
51 | ):
52 | url = tmp["href"]
53 | if not re.search(r"magnet:\?xt=urn:btih:", tmp["href"]):
54 | async with aiohttp.ClientSession(
55 | timeout=aiohttp.ClientTimeout(total=100)
56 | ) as session:
57 | try:
58 | resp = await session.get(tmp["href"], proxy=proxy)
59 | content = await resp.read()
60 | url = f"magnet:?xt=urn:btih:{get_torrent_b16_hash(content)}"
61 | except Exception as e:
62 | msg = f"{rss.name} 下载种子失败: {e}"
63 | logger.error(msg)
64 | await send_msg(
65 | msg=msg, user_ids=rss.user_id, group_ids=rss.group_id
66 | )
67 | continue
68 | try:
69 | path = f"{config.pikpak_download_path}/{rss.name}"
70 | summary = get_summary(item)
71 | if rss.pikpak_path_key and (
72 | result := re.findall(rss.pikpak_path_key, summary)
73 | ):
74 | path = (
75 | config.pikpak_download_path
76 | + "/"
77 | + re.sub(r'[?*:"<>\\/|]', "_", result[0])
78 | )
79 | logger.info(f"Offline download {url} to {path}")
80 | info = await pikpak_offline_download(url=url, path=path)
81 | download_infos.append(
82 | {
83 | "name": info["task"]["name"],
84 | "file_size": convert_size(int(info["task"]["file_size"])),
85 | "path": path,
86 | }
87 | )
88 | except Exception as e:
89 | msg = f"{rss.name} PikPak 离线下载失败: {e}"
90 | logger.error(msg)
91 | await send_msg(msg=msg, user_ids=rss.user_id, group_ids=rss.group_id)
92 | return download_infos
93 |
--------------------------------------------------------------------------------
/rss2/parsing/handle_html_tag.py:
--------------------------------------------------------------------------------
1 | import re
2 | from html import unescape as html_unescape
3 |
4 | import bbcode
5 | from pyquery import PyQuery as Pq
6 | from yarl import URL
7 |
8 | from ..config import config
9 |
10 |
11 | # 处理 bbcode
12 | def handle_bbcode(html: Pq) -> str:
13 | rss_str = html_unescape(str(html))
14 |
15 | # issue 36 处理 bbcode
16 | rss_str = re.sub(
17 | r"(\[url=[^]]+])?\[img[^]]*].+\[/img](\[/url])?", "", rss_str, flags=re.I
18 | )
19 |
20 | # 处理一些 bbcode 标签
21 | bbcode_tags = [
22 | "align",
23 | "b",
24 | "backcolor",
25 | "color",
26 | "font",
27 | "size",
28 | "table",
29 | "tbody",
30 | "td",
31 | "tr",
32 | "u",
33 | "url",
34 | ]
35 |
36 | for i in bbcode_tags:
37 | rss_str = re.sub(rf"\[{i}=[^]]+]", "", rss_str, flags=re.I)
38 | rss_str = re.sub(rf"\[/?{i}]", "", rss_str, flags=re.I)
39 |
40 | # 去掉结尾被截断的信息
41 | rss_str = re.sub(
42 | r"(\[[^]]+|\[img][^\[\]]+) \.\.\n?
标签后增加俩个换行 138 | for i in ["p", "pre"]: 139 | rss_str = re.sub(f"{i}>", f"{i}>\n\n", rss_str) 140 | 141 | # 直接去掉标签,留下内部文本信息 142 | for i in html_tags: 143 | rss_str = re.sub(f"<{i} [^>]+>", "", rss_str) 144 | rss_str = re.sub(f"?{i}>", "", rss_str) 145 | 146 | rss_str = re.sub(r"<(br|hr)\s?/?>|<(br|hr) [^>]+>", "\n", rss_str) 147 | rss_str = re.sub(r"]+>", "\n", rss_str) 148 | rss_str = re.sub(r"?h\d>", "\n", rss_str) 149 | 150 | # 删除图片、视频标签 151 | rss_str = re.sub( 152 | r")?| ]+>", "", rss_str, flags=re.DOTALL 153 | ) 154 | 155 | # 去掉多余换行 156 | while "\n\n\n" in rss_str: 157 | rss_str = rss_str.replace("\n\n\n", "\n\n") 158 | rss_str = rss_str.strip() 159 | 160 | if 0 < config.max_length < len(rss_str): 161 | rss_str = f"{rss_str[: config.max_length]}..." 162 | 163 | return rss_str 164 | -------------------------------------------------------------------------------- /rss2/parsing/handle_images.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import random 3 | import re 4 | from io import BytesIO 5 | from typing import Any, Dict, Optional, Tuple, Union 6 | 7 | import aiohttp 8 | from nonebot.log import logger 9 | from PIL import Image, UnidentifiedImageError 10 | from pyquery import PyQuery as Pq 11 | from tenacity import RetryError, retry, stop_after_attempt, stop_after_delay 12 | from yarl import URL 13 | 14 | from ..config import Path, config 15 | from ..rss_class import Rss 16 | from .utils import get_proxy, get_summary 17 | 18 | 19 | # 通过 ezgif 压缩 GIF 20 | @retry(stop=(stop_after_attempt(5) | stop_after_delay(30))) 21 | async def resize_gif(url: str, resize_ratio: int = 2) -> Optional[bytes]: 22 | async with aiohttp.ClientSession() as session: 23 | resp = await session.post( 24 | "https://s3.ezgif.com/resize", 25 | data={"new-image-url": url}, 26 | ) 27 | d = Pq(await resp.text()) 28 | next_url = d("form").attr("action") 29 | file = d("form > input[type=hidden]:nth-child(1)").attr("value") 30 | token = d("form > input[type=hidden]:nth-child(2)").attr("value") 31 | old_width = d("form > input[type=hidden]:nth-child(3)").attr("value") 32 | old_height = d("form > input[type=hidden]:nth-child(4)").attr("value") 33 | data = { 34 | "file": file, 35 | "token": token, 36 | "old_width": old_width, 37 | "old_height": old_height, 38 | "width": str(int(old_width) // resize_ratio), 39 | "method": "gifsicle", 40 | "ar": "force", 41 | } 42 | resp = await session.post(next_url, params="ajax=true", data=data) 43 | d = Pq(await resp.text()) 44 | output_img_url = "https:" + d("img:nth-child(1)").attr("src") 45 | return await download_image(output_img_url) 46 | 47 | 48 | # 通过 ezgif 把视频中间 4 秒转 GIF 作为预览 49 | @retry(stop=(stop_after_attempt(5) | stop_after_delay(30))) 50 | async def get_preview_gif_from_video(url: str) -> str: 51 | async with aiohttp.ClientSession() as session: 52 | resp = await session.post( 53 | "https://s3.ezgif.com/video-to-gif", 54 | data={"new-image-url": url}, 55 | ) 56 | d = Pq(await resp.text()) 57 | video_length = re.search( 58 | r"\d\d:\d\d:\d\d", str(d("#main > p.filestats > strong")) 59 | ).group() # type: ignore 60 | hours = int(video_length.split(":")[0]) 61 | minutes = int(video_length.split(":")[1]) 62 | seconds = int(video_length.split(":")[2]) 63 | video_length_median = (hours * 60 * 60 + minutes * 60 + seconds) // 2 64 | next_url = d("form").attr("action") 65 | file = d("form > input[type=hidden]:nth-child(1)").attr("value") 66 | token = d("form > input[type=hidden]:nth-child(2)").attr("value") 67 | default_end = d("#end").attr("value") 68 | if float(default_end) >= 4: 69 | start = video_length_median - 2 70 | end = video_length_median + 2 71 | else: 72 | start = 0 73 | end = default_end 74 | data = { 75 | "file": file, 76 | "token": token, 77 | "start": start, 78 | "end": end, 79 | "size": 320, 80 | "fps": 25, 81 | "method": "ffmpeg", 82 | } 83 | resp = await session.post(next_url, params="ajax=true", data=data) 84 | d = Pq(await resp.text()) 85 | return f'https:{d("img:nth-child(1)").attr("src")}' 86 | 87 | 88 | # 图片压缩 89 | async def zip_pic(url: str, content: bytes) -> Union[Image.Image, bytes, None]: 90 | # 打开一个 JPEG/PNG/GIF/WEBP 图像文件 91 | try: 92 | im = Image.open(BytesIO(content)) 93 | except UnidentifiedImageError: 94 | logger.error(f"无法识别图像文件 链接:[{url}]") 95 | return None 96 | if im.format != "GIF": 97 | # 先把 WEBP 图像转为 PNG 98 | if im.format == "WEBP": 99 | with BytesIO() as output: 100 | im.save(output, "PNG") 101 | im = Image.open(output) 102 | # 对图像文件进行缩小处理 103 | im.thumbnail((config.zip_size, config.zip_size)) 104 | width, height = im.size 105 | logger.debug(f"Resize image to: {width} x {height}") 106 | # 和谐 107 | points = [(0, 0), (0, height - 1), (width - 1, 0), (width - 1, height - 1)] 108 | for x, y in points: 109 | im.putpixel((x, y), random.randint(0, 255)) 110 | return im 111 | else: 112 | if len(content) > config.gif_zip_size * 1024: 113 | try: 114 | return await resize_gif(url) 115 | except RetryError: 116 | logger.error(f"GIF 图片[{url}]压缩失败,将发送原图") 117 | return content 118 | 119 | 120 | # 将图片转化为 base64 121 | def get_pic_base64(content: Union[Image.Image, bytes, None]) -> str: 122 | if not content: 123 | return "" 124 | if isinstance(content, Image.Image): 125 | with BytesIO() as output: 126 | content.save(output, format=content.format) 127 | content = output.getvalue() 128 | if isinstance(content, bytes): 129 | return str(base64.b64encode(content).decode()) 130 | return "" 131 | 132 | 133 | # 去你的 pixiv.cat 134 | async def fuck_pixiv_cat(url: str) -> str: 135 | img_id = re.sub("https://pixiv.cat/", "", url) 136 | img_id = img_id[:-4] 137 | info_list = img_id.split("-") 138 | async with aiohttp.ClientSession() as session: 139 | try: 140 | resp = await session.get( 141 | f"https://api.obfs.dev/api/pixiv/illust?id={info_list[0]}" 142 | ) 143 | resp_json = await resp.json() 144 | if len(info_list) >= 2: 145 | return str( 146 | resp_json["illust"]["meta_pages"][int(info_list[1]) - 1][ 147 | "image_urls" 148 | ]["original"] 149 | ) 150 | else: 151 | return str( 152 | resp_json["illust"]["meta_single_page"]["original_image_url"] 153 | ) 154 | except Exception as e: 155 | logger.error(f"处理pixiv.cat链接时出现问题 :{e} 链接:[{url}]") 156 | return url 157 | 158 | 159 | @retry(stop=(stop_after_attempt(5) | stop_after_delay(30))) 160 | async def download_image_detail(url: str, proxy: bool) -> Optional[bytes]: 161 | async with aiohttp.ClientSession(raise_for_status=True) as session: 162 | referer = f"{URL(url).scheme}://{URL(url).host}/" 163 | headers = {"referer": referer} 164 | try: 165 | resp = await session.get( 166 | url, headers=headers, proxy=get_proxy(open_proxy=proxy) 167 | ) 168 | # 如果图片无法获取到,直接返回 169 | if len(await resp.read()) == 0: 170 | if "pixiv.cat" in url: 171 | url = await fuck_pixiv_cat(url=url) 172 | return await download_image(url, proxy) 173 | logger.error( 174 | f"图片[{url}]下载失败! Content-Type: {resp.headers['Content-Type']} status: {resp.status}" 175 | ) 176 | return None 177 | # 如果图片格式为 SVG ,先转换为 PNG 178 | if resp.headers["Content-Type"].startswith("image/svg+xml"): 179 | next_url = str( 180 | URL("https://images.weserv.nl/").with_query(f"url={url}&output=png") 181 | ) 182 | return await download_image(next_url, proxy) 183 | return await resp.read() 184 | except Exception as e: 185 | logger.warning(f"图片[{url}]下载失败!将重试最多 5 次!\n{e}") 186 | raise 187 | 188 | 189 | async def download_image(url: str, proxy: bool = False) -> Optional[bytes]: 190 | try: 191 | return await download_image_detail(url=url, proxy=proxy) 192 | except RetryError: 193 | logger.error(f"图片[{url}]下载失败!已达最大重试次数!有可能需要开启代理!") 194 | return None 195 | 196 | 197 | async def handle_img_combo(url: str, img_proxy: bool, rss: Optional[Rss] = None) -> str: 198 | """' 199 | 下载图片并返回可用的CQ码 200 | 201 | 参数: 202 | url: 需要下载的图片地址 203 | img_proxy: 是否使用代理下载图片 204 | rss: Rss对象 205 | 返回值: 206 | 返回当前图片的CQ码,以base64格式编码发送 207 | 如获取图片失败将会提示图片走丢了 208 | """ 209 | content = await download_image(url, img_proxy) 210 | if content: 211 | if rss is not None and rss.download_pic: 212 | _url = URL(url) 213 | logger.debug(f"正在保存图片: {url}") 214 | try: 215 | save_image(content=content, file_url=_url, rss=rss) 216 | except Exception as e: 217 | logger.warning(e) 218 | logger.warning("在保存图片到本地时出现错误") 219 | resize_content = await zip_pic(url, content) 220 | if img_base64 := get_pic_base64(resize_content): 221 | return f"[CQ:image,file=base64://{img_base64}]" 222 | return f"\n图片走丢啦: {url}\n" 223 | 224 | 225 | async def handle_img_combo_with_content(gif_url: str, content: bytes) -> str: 226 | resize_content = await zip_pic(gif_url, content) 227 | if img_base64 := get_pic_base64(resize_content): 228 | return f"[CQ:image,file=base64://{img_base64}]" 229 | return "\n图片走丢啦\n" 230 | 231 | 232 | # 处理图片、视频 233 | async def handle_img(item: Dict[str, Any], img_proxy: bool, img_num: int) -> str: 234 | if item.get("image_content"): 235 | return await handle_img_combo_with_content( 236 | item.get("gif_url", ""), item["image_content"] 237 | ) 238 | html = Pq(get_summary(item)) 239 | img_str = "" 240 | # 处理图片 241 | doc_img = list(html("img").items()) 242 | # 只发送限定数量的图片,防止刷屏 243 | if 0 < img_num < len(doc_img): 244 | img_str += f"\n因启用图片数量限制,目前只有 {img_num} 张图片:" 245 | doc_img = doc_img[:img_num] 246 | for img in doc_img: 247 | url = img.attr("src") 248 | img_str += await handle_img_combo(url, img_proxy) 249 | 250 | # 处理视频 251 | if doc_video := html("video"): 252 | img_str += "\n视频封面:" 253 | for video in doc_video.items(): 254 | url = video.attr("poster") 255 | img_str += await handle_img_combo(url, img_proxy) 256 | 257 | return img_str 258 | 259 | 260 | # 处理 bbcode 图片 261 | async def handle_bbcode_img(html: Pq, img_proxy: bool, img_num: int) -> str: 262 | img_str = "" 263 | # 处理图片 264 | img_list = re.findall(r"\[img[^]]*](.+)\[/img]", str(html), flags=re.I) 265 | # 只发送限定数量的图片,防止刷屏 266 | if 0 < img_num < len(img_list): 267 | img_str += f"\n因启用图片数量限制,目前只有 {img_num} 张图片:" 268 | img_list = img_list[:img_num] 269 | for img_tmp in img_list: 270 | img_str += await handle_img_combo(img_tmp, img_proxy) 271 | 272 | return img_str 273 | 274 | 275 | def file_name_format(file_url: URL, rss: Rss) -> Tuple[Path, str]: 276 | """ 277 | 可以根据用户设置的规则来格式化文件名 278 | """ 279 | format_rule = config.img_format 280 | down_path = config.img_down_path 281 | rules = { # 替换格式化字符串 282 | "{subs}": rss.name, 283 | "{name}": file_url.name 284 | if "{ext}" not in format_rule 285 | else Path(file_url.name).stem, 286 | "{ext}": file_url.suffix if "{ext}" in format_rule else "", 287 | } 288 | for k, v in rules.items(): 289 | format_rule = format_rule.replace(k, v) 290 | if down_path == "": # 如果没设置保存路径的话,就保存到默认目录下 291 | save_path = Path().cwd() / "data" / "image" 292 | elif down_path[0] == ".": 293 | save_path = Path().cwd() / Path(down_path) 294 | else: 295 | save_path = Path(down_path) 296 | full_path = save_path / format_rule 297 | save_path = full_path.parents[0] 298 | save_name = full_path.name 299 | return save_path, save_name 300 | 301 | 302 | def save_image(content: bytes, file_url: URL, rss: Rss) -> None: 303 | """ 304 | 将压缩之前的原图保存到本地的电脑上 305 | """ 306 | save_path, save_name = file_name_format(file_url=file_url, rss=rss) 307 | 308 | full_save_path = save_path / save_name 309 | try: 310 | full_save_path.write_bytes(content) 311 | except FileNotFoundError: 312 | # 初次写入时文件夹不存在,需要创建一下 313 | save_path.mkdir(parents=True) 314 | full_save_path.write_bytes(content) 315 | -------------------------------------------------------------------------------- /rss2/parsing/handle_translation.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import random 3 | import re 4 | from typing import Dict, Optional 5 | 6 | import aiohttp 7 | import emoji 8 | from deep_translator import DeeplTranslator, GoogleTranslator, single_detection 9 | from nonebot.log import logger 10 | 11 | from ..config import config 12 | 13 | 14 | async def baidu_translator(content: str, appid: str, secret_key: str) -> str: 15 | url = "https://api.fanyi.baidu.com/api/trans/vip/translate" 16 | salt = str(random.randint(32768, 65536)) 17 | sign = hashlib.md5((appid + content + salt + secret_key).encode()).hexdigest() 18 | params = { 19 | "q": content, 20 | "from": "auto", 21 | "to": "zh", 22 | "appid": appid, 23 | "salt": salt, 24 | "sign": sign, 25 | } 26 | async with aiohttp.ClientSession() as session: 27 | resp = await session.get(url, params=params, timeout=aiohttp.ClientTimeout(10)) 28 | data = await resp.json() 29 | try: 30 | content = "".join(i["dst"] + "\n" for i in data["trans_result"]) 31 | return "\n百度翻译:\n" + content[:-1] 32 | except Exception as e: 33 | error_msg = f"百度翻译失败:{data['error_msg']}" 34 | logger.warning(error_msg) 35 | raise Exception(error_msg) from e 36 | 37 | 38 | async def google_translation(text: str, proxies: Optional[Dict[str, str]]) -> str: 39 | # text 是处理过emoji的 40 | try: 41 | translator = GoogleTranslator(source="auto", target="zh-CN", proxies=proxies) 42 | return "\n谷歌翻译:\n" + str(translator.translate(re.escape(text))) 43 | except Exception as e: 44 | error_msg = "\nGoogle翻译失败:" + str(e) + "\n" 45 | logger.warning(error_msg) 46 | raise Exception(error_msg) from e 47 | 48 | 49 | async def deepl_translator(text: str, proxies: Optional[Dict[str, str]]) -> str: 50 | try: 51 | lang = None 52 | if config.single_detection_api_key: 53 | lang = single_detection(text, api_key=config.single_detection_api_key) 54 | translator = DeeplTranslator( 55 | api_key=config.deepl_translator_api_key, 56 | source=lang, 57 | target="zh", 58 | use_free_api=True, 59 | proxies=proxies, 60 | ) 61 | return "\nDeepl翻译:\n" + str(translator.translate(re.escape(text))) 62 | except Exception as e: 63 | error_msg = "\nDeeplTranslator翻译失败:" + str(e) + "\n" 64 | logger.warning(error_msg) 65 | raise Exception(error_msg) from e 66 | 67 | 68 | # 翻译 69 | async def handle_translation(content: str) -> str: 70 | proxies = ( 71 | { 72 | "https": config.rss_proxy, 73 | "http": config.rss_proxy, 74 | } 75 | if config.rss_proxy 76 | else None 77 | ) 78 | 79 | text = emoji.demojize(content) 80 | text = re.sub(r":[A-Za-z_]*:", " ", text) 81 | try: 82 | # 优先级 DeeplTranslator > 百度翻译 > GoogleTranslator 83 | # 异常时使用 GoogleTranslator 重试 84 | google_translator_flag = False 85 | try: 86 | if config.deepl_translator_api_key: 87 | text = await deepl_translator(text=text, proxies=proxies) 88 | elif config.baidu_id and config.baidu_key: 89 | text = await baidu_translator( 90 | content, config.baidu_id, config.baidu_key 91 | ) 92 | else: 93 | google_translator_flag = True 94 | except Exception: 95 | google_translator_flag = True 96 | if google_translator_flag: 97 | text = await google_translation(text=text, proxies=proxies) 98 | except Exception as e: 99 | logger.error(e) 100 | text = str(e) 101 | 102 | text = text.replace("\\", "") 103 | return text 104 | -------------------------------------------------------------------------------- /rss2/parsing/parsing_rss.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Any, Callable, Dict, List 3 | 4 | from tinydb import TinyDB 5 | from tinydb.middlewares import CachingMiddleware 6 | from tinydb.storages import JSONStorage 7 | 8 | from ..config import DATA_PATH 9 | from ..rss_class import Rss 10 | 11 | 12 | # 订阅器启动的时候将解析器注册到rss实例类?,避免每次推送时再匹配 13 | class ParsingItem: 14 | def __init__( 15 | self, 16 | func: Callable[..., Any], 17 | rex: str = "(.*)", 18 | priority: int = 10, 19 | block: bool = False, 20 | ): 21 | # 解析函数 22 | self.func: Callable[..., Any] = func 23 | # 匹配的订阅地址正则,"(.*)" 是全都匹配 24 | self.rex: str = rex 25 | # 优先级,数字越小优先级越高。优先级相同时,会抛弃默认处理方式,即抛弃 rex="(.*)" 26 | self.priority: int = priority 27 | # 是否阻止执行之后的处理,默认不阻止。抛弃默认处理方式,只需要 block==True and priority<10 28 | self.block: bool = block 29 | 30 | 31 | # 解析器排序 32 | def _sort(_list: List[ParsingItem]) -> List[ParsingItem]: 33 | _list.sort(key=lambda x: x.priority) 34 | return _list 35 | 36 | 37 | # rss 解析类 ,需要将特殊处理的订阅注册到该类 38 | class ParsingBase: 39 | """ 40 | - **类型**: ``List[ParsingItem]`` 41 | - **说明**: 最先执行的解析器,定义了检查更新等前置步骤 42 | """ 43 | 44 | before_handler: List[ParsingItem] = [] 45 | 46 | """ 47 | - **类型**: ``Dict[str, List[ParsingItem]]`` 48 | - **说明**: 解析器 49 | """ 50 | handler: Dict[str, List[ParsingItem]] = { 51 | "before": [], # item的预处理 52 | "title": [], 53 | "summary": [], 54 | "picture": [], 55 | "source": [], 56 | "date": [], 57 | "torrent": [], 58 | "after": [], # item的最后处理,此处调用消息截取、发送 59 | } 60 | 61 | """ 62 | - **类型**: ``List[ParsingItem]`` 63 | - **说明**: 最后执行的解析器,在消息发送后,也可以多条消息合并发送 64 | """ 65 | after_handler: List[ParsingItem] = [] 66 | 67 | # 增加解析器 68 | @classmethod 69 | def append_handler( 70 | cls, 71 | parsing_type: str, 72 | rex: str = "(.*)", 73 | priority: int = 10, 74 | block: bool = False, 75 | ) -> Callable[..., Any]: 76 | def _decorator(func: Callable[..., Any]) -> Callable[..., Any]: 77 | cls.handler[parsing_type].append(ParsingItem(func, rex, priority, block)) 78 | cls.handler.update({parsing_type: _sort(cls.handler[parsing_type])}) 79 | return func 80 | 81 | return _decorator 82 | 83 | @classmethod 84 | def append_before_handler( 85 | cls, rex: str = "(.*)", priority: int = 10, block: bool = False 86 | ) -> Callable[..., Any]: 87 | """ 88 | 装饰一个方法,作为将其一个前置处理器 89 | 参数: 90 | rex: 用于正则匹配目标订阅地址,匹配成功后执行器将适用 91 | proirity: 执行器优先级,自定义执行器会覆盖掉相同优先级的默认执行器 92 | block: 是否要阻断后续执行器进行 93 | """ 94 | 95 | def _decorator(func: Callable[..., Any]) -> Callable[..., Any]: 96 | cls.before_handler.append(ParsingItem(func, rex, priority, block)) 97 | cls.before_handler = _sort(cls.before_handler) 98 | return func 99 | 100 | return _decorator 101 | 102 | @classmethod 103 | def append_after_handler( 104 | cls, rex: str = "(.*)", priority: int = 10, block: bool = False 105 | ) -> Callable[..., Any]: 106 | """ 107 | 装饰一个方法,作为将其一个后置处理器 108 | 参数: 109 | rex: 用于正则匹配目标订阅地址,匹配成功后执行器将适用 110 | proirity: 执行器优先级,自定义执行器会覆盖掉相同优先级的默认执行器 111 | block: 是否要阻断后续执行器进行 112 | """ 113 | 114 | def _decorator(func: Callable[..., Any]) -> Callable[..., Any]: 115 | cls.after_handler.append(ParsingItem(func, rex, priority, block)) 116 | cls.after_handler = _sort(cls.after_handler) 117 | return func 118 | 119 | return _decorator 120 | 121 | 122 | # 对处理器进行过滤 123 | def _handler_filter(_handler_list: List[ParsingItem], _url: str) -> List[ParsingItem]: 124 | _result = [h for h in _handler_list if re.search(h.rex, _url)] 125 | # 删除优先级相同时默认的处理器 126 | _delete = [ 127 | (h.func.__name__, "(.*)", h.priority) for h in _result if h.rex != "(.*)" 128 | ] 129 | _result = [ 130 | h for h in _result if (h.func.__name__, h.rex, h.priority) not in _delete 131 | ] 132 | return _result 133 | 134 | 135 | # 解析实例 136 | class ParsingRss: 137 | 138 | # 初始化解析实例 139 | def __init__(self, rss: Rss): 140 | self.state: Dict[str, Any] = {} # 用于存储实例处理中上下文数据 141 | self.rss: Rss = rss 142 | 143 | # 对处理器进行过滤 144 | self.before_handler: List[ParsingItem] = _handler_filter( 145 | ParsingBase.before_handler, self.rss.get_url() 146 | ) 147 | self.handler: Dict[str, List[ParsingItem]] = {} 148 | for k, v in ParsingBase.handler.items(): 149 | self.handler[k] = _handler_filter(v, self.rss.get_url()) 150 | self.after_handler = _handler_filter( 151 | ParsingBase.after_handler, self.rss.get_url() 152 | ) 153 | 154 | # 开始解析 155 | async def start(self, rss_name: str, new_rss: Dict[str, Any]) -> None: 156 | # new_data 是完整的 rss 解析后的 dict 157 | # 前置处理 158 | rss_title = new_rss["feed"]["title"] 159 | new_data = new_rss["entries"] 160 | _file = DATA_PATH / f"{Rss.handle_name(rss_name)}.json" 161 | db = TinyDB( 162 | _file, 163 | storage=CachingMiddleware(JSONStorage), # type: ignore 164 | encoding="utf-8", 165 | sort_keys=True, 166 | indent=4, 167 | ensure_ascii=False, 168 | ) 169 | self.state.update( 170 | { 171 | "rss_title": rss_title, 172 | "new_data": new_data, 173 | "change_data": [], # 更新的消息列表 174 | "conn": None, # 数据库连接 175 | "tinydb": db, # 缓存 json 176 | } 177 | ) 178 | for handler in self.before_handler: 179 | self.state.update(await handler.func(rss=self.rss, state=self.state)) 180 | if handler.block: 181 | break 182 | 183 | # 分条处理 184 | self.state.update( 185 | { 186 | "messages": [], 187 | "item_count": 0, 188 | } 189 | ) 190 | for item in self.state["change_data"]: 191 | item_msg = f"【{self.state.get('rss_title')}】更新了!\n----------------------\n" 192 | 193 | for handler_list in self.handler.values(): 194 | # 用于保存上一次处理结果 195 | tmp = "" 196 | tmp_state = {"continue": True} # 是否继续执行后续处理 197 | 198 | # 某一个内容的处理如正文,传入原文与上一次处理结果,此次处理完后覆盖 199 | for handler in handler_list: 200 | tmp = await handler.func( 201 | rss=self.rss, 202 | state=self.state, 203 | item=item, 204 | item_msg=item_msg, 205 | tmp=tmp, 206 | tmp_state=tmp_state, 207 | ) 208 | if handler.block or not tmp_state["continue"]: 209 | break 210 | item_msg += tmp 211 | self.state["messages"].append(item_msg) 212 | 213 | # 最后处理 214 | for handler in self.after_handler: 215 | self.state.update(await handler.func(rss=self.rss, state=self.state)) 216 | if handler.block: 217 | break 218 | -------------------------------------------------------------------------------- /rss2/parsing/routes/__init__.py: -------------------------------------------------------------------------------- 1 | from . import danbooru, nga, pixiv, south_plus, twitter, weibo, yande_re, youtube 2 | -------------------------------------------------------------------------------- /rss2/parsing/routes/danbooru.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from typing import Any, Dict 3 | 4 | import aiohttp 5 | from nonebot.log import logger 6 | from pyquery import PyQuery as Pq 7 | from tenacity import RetryError, retry, stop_after_attempt, stop_after_delay 8 | 9 | from ...config import DATA_PATH 10 | from ...rss_class import Rss 11 | from .. import ParsingBase, cache_db_manage, duplicate_exists, write_item 12 | from ..handle_images import ( 13 | get_preview_gif_from_video, 14 | handle_img_combo, 15 | handle_img_combo_with_content, 16 | ) 17 | from ..utils import get_proxy 18 | 19 | 20 | # 处理图片 21 | @ParsingBase.append_handler(parsing_type="picture", rex="danbooru") 22 | async def handle_picture( 23 | rss: Rss, 24 | state: Dict[str, Any], 25 | item: Dict[str, Any], 26 | item_msg: str, 27 | tmp: str, 28 | tmp_state: Dict[str, Any], 29 | ) -> str: 30 | 31 | # 判断是否开启了只推送标题 32 | if rss.only_title: 33 | return "" 34 | 35 | try: 36 | res = await handle_img( 37 | item=item, 38 | img_proxy=rss.img_proxy, 39 | ) 40 | except RetryError: 41 | res = "预览图获取失败" 42 | logger.warning(f"[{item['link']}]的预览图获取失败") 43 | 44 | # 判断是否开启了只推送图片 45 | return f"{res}\n" if rss.only_pic else f"{tmp + res}\n" 46 | 47 | 48 | # 处理图片、视频 49 | @retry(stop=(stop_after_attempt(5) | stop_after_delay(30))) 50 | async def handle_img(item: Dict[str, Any], img_proxy: bool) -> str: 51 | if item.get("image_content"): 52 | return await handle_img_combo_with_content( 53 | item.get("gif_url", ""), item["image_content"] 54 | ) 55 | img_str = "" 56 | 57 | # 处理图片 58 | async with aiohttp.ClientSession() as session: 59 | resp = await session.get(item["link"], proxy=get_proxy(img_proxy)) 60 | d = Pq(await resp.text()) 61 | if img := d("img#image"): 62 | url = img.attr("src") 63 | else: 64 | img_str += "视频预览:" 65 | url = d("video#image").attr("src") 66 | try: 67 | url = await get_preview_gif_from_video(url) 68 | except RetryError: 69 | logger.warning("视频预览获取失败,将发送原视频封面") 70 | url = d("meta[property='og:image']").attr("content") 71 | img_str += await handle_img_combo(url, img_proxy) 72 | 73 | return img_str 74 | 75 | 76 | # 如果启用了去重模式,对推送列表进行过滤 77 | @ParsingBase.append_before_handler(rex="danbooru", priority=12) 78 | async def handle_check_update(rss: Rss, state: Dict[str, Any]) -> Dict[str, Any]: 79 | change_data = state["change_data"] 80 | conn = state["conn"] 81 | db = state["tinydb"] 82 | 83 | # 检查是否启用去重 使用 duplicate_filter_mode 字段 84 | if not rss.duplicate_filter_mode: 85 | return {"change_data": change_data} 86 | 87 | if not conn: 88 | conn = sqlite3.connect(str(DATA_PATH / "cache.db")) 89 | conn.set_trace_callback(logger.debug) 90 | 91 | cache_db_manage(conn) 92 | 93 | delete = [] 94 | for index, item in enumerate(change_data): 95 | try: 96 | summary = await get_summary(item, rss.img_proxy) 97 | except RetryError: 98 | logger.warning(f"[{item['link']}]的预览图获取失败") 99 | continue 100 | is_duplicate, image_hash = await duplicate_exists( 101 | rss=rss, 102 | conn=conn, 103 | item=item, 104 | summary=summary, 105 | ) 106 | if is_duplicate: 107 | write_item(db, item) 108 | delete.append(index) 109 | else: 110 | change_data[index]["image_hash"] = str(image_hash) 111 | 112 | change_data = [ 113 | item for index, item in enumerate(change_data) if index not in delete 114 | ] 115 | 116 | return { 117 | "change_data": change_data, 118 | "conn": conn, 119 | } 120 | 121 | 122 | # 获取正文 123 | @retry(stop=(stop_after_attempt(5) | stop_after_delay(30))) 124 | async def get_summary(item: Dict[str, Any], img_proxy: bool) -> str: 125 | summary = ( 126 | item["content"][0].get("value") if item.get("content") else item["summary"] 127 | ) 128 | # 如果图片非视频封面,替换为更清晰的预览图;否则移除,以此跳过图片去重检查 129 | summary_doc = Pq(summary) 130 | async with aiohttp.ClientSession() as session: 131 | resp = await session.get(item["link"], proxy=get_proxy(img_proxy)) 132 | d = Pq(await resp.text()) 133 | if img := d("img#image"): 134 | summary_doc("img").attr("src", img.attr("src")) 135 | else: 136 | summary_doc.remove("img") 137 | return str(summary_doc) 138 | -------------------------------------------------------------------------------- /rss2/parsing/routes/nga.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Any, Dict, List 3 | 4 | from tinydb import Query, TinyDB 5 | 6 | from ...rss_class import Rss 7 | from .. import ParsingBase 8 | from ..check_update import get_item_date 9 | 10 | 11 | # 检查更新 12 | @ParsingBase.append_before_handler(rex="/nga/", priority=10) 13 | async def handle_check_update(rss: Rss, state: Dict[str, Any]) -> Dict[str, Any]: 14 | new_data = state["new_data"] 15 | db = state["tinydb"] 16 | 17 | for i in new_data: 18 | i["link"] = re.sub(r"&rand=\d+", "", i["link"]) 19 | 20 | change_data = check_update(db, new_data) 21 | return {"change_data": change_data} 22 | 23 | 24 | # 检查更新 25 | def check_update(db: TinyDB, new: List[Dict[str, Any]]) -> List[Dict[str, Any]]: 26 | 27 | # 发送失败 1 次 28 | to_send_list: List[Dict[str, Any]] = db.search(Query().to_send.exists()) 29 | 30 | if not new and not to_send_list: 31 | return [] 32 | 33 | old_link_list = [i["id"] for i in db.all()] 34 | to_send_list.extend([i for i in new if i["link"] not in old_link_list]) 35 | 36 | # 对结果按照发布时间排序 37 | to_send_list.sort(key=get_item_date) 38 | 39 | return to_send_list 40 | -------------------------------------------------------------------------------- /rss2/parsing/routes/pixiv.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sqlite3 3 | from typing import Any, Dict, List 4 | 5 | import aiohttp 6 | from nonebot.log import logger 7 | from pyquery import PyQuery as Pq 8 | from tenacity import RetryError, TryAgain, retry, stop_after_attempt, stop_after_delay 9 | from tinydb import Query, TinyDB 10 | 11 | from ...config import DATA_PATH 12 | from ...rss_class import Rss 13 | from .. import ParsingBase, cache_db_manage, duplicate_exists, write_item 14 | from ..check_update import get_item_date 15 | from ..handle_images import ( 16 | get_preview_gif_from_video, 17 | handle_img_combo, 18 | handle_img_combo_with_content, 19 | ) 20 | from ..utils import get_summary 21 | 22 | 23 | # 如果启用了去重模式,对推送列表进行过滤 24 | @ParsingBase.append_before_handler(priority=12, rex="/pixiv/") 25 | async def handle_check_update(rss: Rss, state: Dict[str, Any]) -> Dict[str, Any]: 26 | change_data = state["change_data"] 27 | conn = state["conn"] 28 | db = state["tinydb"] 29 | 30 | # 检查是否启用去重 使用 duplicate_filter_mode 字段 31 | if not rss.duplicate_filter_mode: 32 | return {"change_data": change_data} 33 | 34 | if not conn: 35 | conn = sqlite3.connect(str(DATA_PATH / "cache.db")) 36 | conn.set_trace_callback(logger.debug) 37 | 38 | cache_db_manage(conn) 39 | 40 | delete = [] 41 | for index, item in enumerate(change_data): 42 | summary = get_summary(item) 43 | try: 44 | summary_doc = Pq(summary) 45 | # 如果图片为动图,通过移除来跳过图片去重检查 46 | if re.search("类型:ugoira", str(summary_doc)): 47 | summary_doc.remove("img") 48 | summary = str(summary_doc) 49 | except Exception as e: 50 | logger.warning(e) 51 | is_duplicate, image_hash = await duplicate_exists( 52 | rss=rss, 53 | conn=conn, 54 | item=item, 55 | summary=summary, 56 | ) 57 | if is_duplicate: 58 | write_item(db, item) 59 | delete.append(index) 60 | else: 61 | change_data[index]["image_hash"] = str(image_hash) 62 | 63 | change_data = [ 64 | item for index, item in enumerate(change_data) if index not in delete 65 | ] 66 | 67 | return { 68 | "change_data": change_data, 69 | "conn": conn, 70 | } 71 | 72 | 73 | # 处理图片 74 | @ParsingBase.append_handler(parsing_type="picture", rex="pixiv") 75 | async def handle_picture( 76 | rss: Rss, 77 | state: Dict[str, Any], 78 | item: Dict[str, Any], 79 | item_msg: str, 80 | tmp: str, 81 | tmp_state: Dict[str, Any], 82 | ) -> str: 83 | 84 | # 判断是否开启了只推送标题 85 | if rss.only_title: 86 | return "" 87 | 88 | res = "" 89 | try: 90 | res += await handle_img( 91 | item=item, img_proxy=rss.img_proxy, img_num=rss.max_image_number, rss=rss 92 | ) 93 | except Exception as e: 94 | logger.warning(f"{rss.name} 没有正文内容!{e}") 95 | 96 | # 判断是否开启了只推送图片 97 | return f"{res}\n" if rss.only_pic else f"{tmp + res}\n" 98 | 99 | 100 | # 处理图片、视频 101 | @retry(stop=(stop_after_attempt(5) | stop_after_delay(30))) 102 | async def handle_img( 103 | item: Dict[str, Any], img_proxy: bool, img_num: int, rss: Rss 104 | ) -> str: 105 | if item.get("image_content"): 106 | return await handle_img_combo_with_content( 107 | item.get("gif_url", ""), item["image_content"] 108 | ) 109 | html = Pq(get_summary(item)) 110 | link = item["link"] 111 | img_str = "" 112 | # 处理动图 113 | if re.search("类型:ugoira", str(html)): 114 | ugoira_id = re.search(r"\d+", link).group() # type: ignore 115 | try: 116 | url = await get_ugoira_video(ugoira_id) 117 | url = await get_preview_gif_from_video(url) 118 | img_str += await handle_img_combo(url, img_proxy) 119 | except RetryError: 120 | logger.warning(f"动图[{link}]的预览图获取失败,将发送原动图封面") 121 | url = html("img").attr("src") 122 | img_str += await handle_img_combo(url, img_proxy) 123 | else: 124 | # 处理图片 125 | doc_img = list(html("img").items()) 126 | # 只发送限定数量的图片,防止刷屏 127 | if 0 < img_num < len(doc_img): 128 | img_str += f"\n因启用图片数量限制,目前只有 {img_num} 张图片:" 129 | doc_img = doc_img[:img_num] 130 | for img in doc_img: 131 | url = img.attr("src") 132 | img_str += await handle_img_combo(url, img_proxy, rss) 133 | 134 | return img_str 135 | 136 | 137 | # 获取动图为视频 138 | @retry(stop=(stop_after_attempt(5) | stop_after_delay(30))) 139 | async def get_ugoira_video(ugoira_id: str) -> Any: 140 | async with aiohttp.ClientSession() as session: 141 | data = {"id": ugoira_id, "type": "ugoira"} 142 | resp = await session.post("https://ugoira.huggy.moe/api/illusts", data=data) 143 | url = (await resp.json()).get("data")[0].get("url") 144 | if not url: 145 | raise TryAgain 146 | return url 147 | 148 | 149 | # 处理来源 150 | @ParsingBase.append_handler(parsing_type="source", rex="pixiv") 151 | async def handle_source( 152 | rss: Rss, 153 | state: Dict[str, Any], 154 | item: Dict[str, Any], 155 | item_msg: str, 156 | tmp: str, 157 | tmp_state: Dict[str, Any], 158 | ) -> str: 159 | source = item["link"] 160 | # 缩短 pixiv 链接 161 | str_link = re.sub("https://www.pixiv.net/artworks/", "https://pixiv.net/i/", source) 162 | return f"链接:{str_link}\n" 163 | 164 | 165 | # 检查更新 166 | @ParsingBase.append_before_handler(rex="pixiv/ranking", priority=10) # type: ignore 167 | async def handle_check_update(rss: Rss, state: Dict[str, Any]) -> Dict[str, Any]: 168 | db = state["tinydb"] 169 | change_data = check_update(db, state["new_data"]) 170 | return {"change_data": change_data} 171 | 172 | 173 | # 检查更新 174 | def check_update(db: TinyDB, new: List[Dict[str, Any]]) -> List[Dict[str, Any]]: 175 | 176 | # 发送失败 1 次 177 | to_send_list: List[Dict[str, Any]] = db.search(Query().to_send.exists()) 178 | 179 | if not new and not to_send_list: 180 | return [] 181 | 182 | old_link_list = [i["link"] for i in db.all()] 183 | to_send_list.extend([i for i in new if i["link"] not in old_link_list]) 184 | 185 | # 对结果按照发布时间排序 186 | to_send_list.sort(key=get_item_date) 187 | 188 | return to_send_list 189 | -------------------------------------------------------------------------------- /rss2/parsing/routes/south_plus.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Any, Dict 3 | 4 | from nonebot.log import logger 5 | from pyquery import PyQuery as Pq 6 | 7 | from ...rss_class import Rss 8 | from .. import ParsingBase, handle_html_tag 9 | from ..handle_html_tag import handle_bbcode 10 | from ..handle_images import handle_bbcode_img 11 | from ..utils import get_summary 12 | 13 | 14 | # 处理正文 处理网页 tag 15 | @ParsingBase.append_handler( 16 | parsing_type="summary", rex="(south|spring)-plus.net", priority=10 17 | ) 18 | async def handle_summary( 19 | rss: Rss, 20 | state: Dict[str, Any], 21 | item: Dict[str, Any], 22 | item_msg: str, 23 | tmp: str, 24 | tmp_state: Dict[str, Any], 25 | ) -> str: 26 | rss_str = handle_bbcode(html=Pq(get_summary(item))) 27 | tmp += handle_html_tag(html=Pq(rss_str)) 28 | return tmp 29 | 30 | 31 | # 处理图片 32 | @ParsingBase.append_handler(parsing_type="picture", rex="(south|spring)-plus.net") 33 | async def handle_picture( 34 | rss: Rss, 35 | state: Dict[str, Any], 36 | item: Dict[str, Any], 37 | item_msg: str, 38 | tmp: str, 39 | tmp_state: Dict[str, Any], 40 | ) -> str: 41 | 42 | # 判断是否开启了只推送标题 43 | if rss.only_title: 44 | return "" 45 | 46 | res = "" 47 | try: 48 | res += await handle_bbcode_img( 49 | html=Pq(get_summary(item)), 50 | img_proxy=rss.img_proxy, 51 | img_num=rss.max_image_number, 52 | ) 53 | except Exception as e: 54 | logger.warning(f"{rss.name} 没有正文内容!{e}") 55 | 56 | # 判断是否开启了只推送图片 57 | return f"{res}\n" if rss.only_pic else f"{tmp + res}\n" 58 | 59 | 60 | # 处理来源 61 | @ParsingBase.append_handler(parsing_type="source", rex="(south|spring)-plus.net") 62 | async def handle_source( 63 | rss: Rss, 64 | state: Dict[str, Any], 65 | item: Dict[str, Any], 66 | item_msg: str, 67 | tmp: str, 68 | tmp_state: Dict[str, Any], 69 | ) -> str: 70 | source = item["link"] 71 | # issue 36 处理链接 72 | if re.search(r"^//", source): 73 | source = source.replace("//", "https://") 74 | return f"链接:{source}\n" 75 | -------------------------------------------------------------------------------- /rss2/parsing/routes/twitter.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from nonebot.log import logger 4 | from pyquery import PyQuery as Pq 5 | from tenacity import RetryError 6 | 7 | from ...rss_class import Rss 8 | from .. import ParsingBase 9 | from ..handle_images import ( 10 | get_preview_gif_from_video, 11 | handle_img_combo, 12 | handle_img_combo_with_content, 13 | ) 14 | from ..utils import get_summary 15 | 16 | 17 | # 处理图片 18 | @ParsingBase.append_handler(parsing_type="picture", rex="twitter") 19 | async def handle_picture( 20 | rss: Rss, 21 | state: Dict[str, Any], 22 | item: Dict[str, Any], 23 | item_msg: str, 24 | tmp: str, 25 | tmp_state: Dict[str, Any], 26 | ) -> str: 27 | 28 | # 判断是否开启了只推送标题 29 | if rss.only_title: 30 | return "" 31 | 32 | res = await handle_img( 33 | item=item, 34 | img_proxy=rss.img_proxy, 35 | img_num=rss.max_image_number, 36 | ) 37 | 38 | # 判断是否开启了只推送图片 39 | return f"{res}\n" if rss.only_pic else f"{tmp + res}\n" 40 | 41 | 42 | # 处理图片、视频 43 | async def handle_img(item: Dict[str, Any], img_proxy: bool, img_num: int) -> str: 44 | if item.get("image_content"): 45 | return await handle_img_combo_with_content( 46 | item.get("gif_url", ""), item["image_content"] 47 | ) 48 | html = Pq(get_summary(item)) 49 | img_str = "" 50 | # 处理图片 51 | doc_img = list(html("img").items()) 52 | # 只发送限定数量的图片,防止刷屏 53 | if 0 < img_num < len(doc_img): 54 | img_str += f"\n因启用图片数量限制,目前只有 {img_num} 张图片:" 55 | doc_img = doc_img[:img_num] 56 | for img in doc_img: 57 | url = img.attr("src") 58 | img_str += await handle_img_combo(url, img_proxy) 59 | 60 | # 处理视频 61 | if doc_video := html("video"): 62 | img_str += "\n视频预览:" 63 | for video in doc_video.items(): 64 | url = video.attr("src") 65 | try: 66 | url = await get_preview_gif_from_video(url) 67 | except RetryError: 68 | logger.warning("视频预览获取失败,将发送原视频封面") 69 | url = video.attr("poster") 70 | img_str += await handle_img_combo(url, img_proxy) 71 | 72 | return img_str 73 | -------------------------------------------------------------------------------- /rss2/parsing/routes/weibo.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from nonebot.log import logger 4 | from pyquery import PyQuery as Pq 5 | 6 | from ...config import config 7 | from ...rss_class import Rss 8 | from .. import ParsingBase, handle_html_tag 9 | from ..handle_images import handle_img_combo, handle_img_combo_with_content 10 | from ..utils import get_summary 11 | 12 | 13 | # 处理正文 处理网页 tag 14 | @ParsingBase.append_handler(parsing_type="summary", rex="weibo", priority=10) 15 | async def handle_summary( 16 | rss: Rss, 17 | state: Dict[str, Any], 18 | item: Dict[str, Any], 19 | item_msg: str, 20 | tmp: str, 21 | tmp_state: Dict[str, Any], 22 | ) -> str: 23 | summary_html = Pq(get_summary(item)) 24 | 25 | # 判断是否保留转发内容 26 | if not config.blockquote: 27 | summary_html.remove("blockquote") 28 | 29 | tmp += handle_html_tag(html=summary_html) 30 | 31 | return tmp 32 | 33 | 34 | # 处理图片 35 | @ParsingBase.append_handler(parsing_type="picture", rex="weibo") 36 | async def handle_picture( 37 | rss: Rss, 38 | state: Dict[str, Any], 39 | item: Dict[str, Any], 40 | item_msg: str, 41 | tmp: str, 42 | tmp_state: Dict[str, Any], 43 | ) -> str: 44 | 45 | # 判断是否开启了只推送标题 46 | if rss.only_title: 47 | return "" 48 | 49 | res = "" 50 | try: 51 | res += await handle_img( 52 | item=item, 53 | img_proxy=rss.img_proxy, 54 | img_num=rss.max_image_number, 55 | ) 56 | except Exception as e: 57 | logger.warning(f"{rss.name} 没有正文内容!{e}") 58 | 59 | # 判断是否开启了只推送图片 60 | return f"{res}\n" if rss.only_pic else f"{tmp + res}\n" 61 | 62 | 63 | # 处理图片、视频 64 | async def handle_img(item: Dict[str, Any], img_proxy: bool, img_num: int) -> str: 65 | if item.get("image_content"): 66 | return await handle_img_combo_with_content( 67 | item.get("gif_url", ""), item["image_content"] 68 | ) 69 | html = Pq(get_summary(item)) 70 | # 移除多余图标 71 | html.remove("span.url-icon") 72 | img_str = "" 73 | # 处理图片 74 | doc_img = list(html("img").items()) 75 | # 只发送限定数量的图片,防止刷屏 76 | if 0 < img_num < len(doc_img): 77 | img_str += f"\n因启用图片数量限制,目前只有 {img_num} 张图片:" 78 | doc_img = doc_img[:img_num] 79 | for img in doc_img: 80 | url = img.attr("src") 81 | img_str += await handle_img_combo(url, img_proxy) 82 | 83 | # 处理视频 84 | if doc_video := html("video"): 85 | img_str += "\n视频封面:" 86 | for video in doc_video.items(): 87 | url = video.attr("poster") 88 | img_str += await handle_img_combo(url, img_proxy) 89 | 90 | return img_str 91 | -------------------------------------------------------------------------------- /rss2/parsing/routes/yande_re.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Any, Dict 3 | 4 | from ...rss_class import Rss 5 | from .. import ParsingBase, check_update 6 | 7 | 8 | # 检查更新 9 | @ParsingBase.append_before_handler( 10 | priority=10, rex=r"https\:\/\/yande\.re\/post\/piclens\?tags\=" 11 | ) 12 | async def handle_check_update(rss: Rss, state: Dict[str, Any]) -> Dict[str, Any]: 13 | db = state["tinydb"] 14 | change_data = check_update(db, state["new_data"]) 15 | for i in change_data: 16 | if i.get("media_content"): 17 | i["summary"] = re.sub( 18 | r'https://[^"]+', i["media_content"][0]["url"], i["summary"] 19 | ) 20 | return {"change_data": change_data} 21 | -------------------------------------------------------------------------------- /rss2/parsing/routes/youtube.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from ...rss_class import Rss 4 | from .. import ParsingBase 5 | from ..handle_images import handle_img_combo 6 | 7 | 8 | # 处理图片 9 | @ParsingBase.append_handler( 10 | parsing_type="picture", 11 | rex=r"https:\/\/www\.youtube\.com\/feeds\/videos\.xml\?channel_id=", 12 | ) 13 | async def handle_picture( 14 | rss: Rss, 15 | state: Dict[str, Any], 16 | item: Dict[str, Any], 17 | item_msg: str, 18 | tmp: str, 19 | tmp_state: Dict[str, Any], 20 | ) -> str: 21 | 22 | # 判断是否开启了只推送标题 23 | if rss.only_title: 24 | return "" 25 | 26 | img_url = item["media_thumbnail"][0]["url"] 27 | res = await handle_img_combo(img_url, rss.img_proxy) 28 | 29 | # 判断是否开启了只推送图片 30 | return f"{res}\n" if rss.only_pic else f"{tmp + res}\n" 31 | -------------------------------------------------------------------------------- /rss2/parsing/send_message.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections import defaultdict 3 | from contextlib import suppress 4 | from typing import Any, DefaultDict, Dict, Tuple, Union, List 5 | 6 | import arrow 7 | import nonebot 8 | from nonebot import logger 9 | 10 | from ..rss_class import Rss 11 | from ..utils import get_bot_friend_list, get_bot_group_list, get_all_bot_channel_list 12 | 13 | sending_lock: DefaultDict[Tuple[Union[int, str], str], asyncio.Lock] = defaultdict( 14 | asyncio.Lock 15 | ) 16 | 17 | # 发送消息 18 | async def send_msg(rss: Rss, msg: str, item: Dict[str, Any]) -> bool: 19 | bot = nonebot.get_bot() 20 | if not msg: 21 | return False 22 | flag = False 23 | error_msg = f"消息发送失败!\n链接:[{item.get('link')}]" 24 | if rss.user_id: 25 | all_friend = (await get_bot_friend_list(bot))[1] 26 | flag = any( 27 | await asyncio.gather( 28 | *[ 29 | send_private_msg( 30 | bot, msg, int(user_id), item, error_msg, all_friend 31 | ) 32 | for user_id in rss.user_id 33 | ] 34 | ) 35 | ) 36 | 37 | if rss.group_id: 38 | all_group = (await get_bot_group_list(bot))[1] 39 | flag = ( 40 | any( 41 | await asyncio.gather( 42 | *[ 43 | send_group_msg( 44 | bot, msg, int(group_id), item, error_msg, all_group 45 | ) 46 | for group_id in rss.group_id 47 | ] 48 | ) 49 | ) 50 | or flag 51 | ) 52 | 53 | if rss.guild_channel_id: 54 | all_channels = (await get_all_bot_channel_list(bot))[1] 55 | flag = ( 56 | any( 57 | await asyncio.gather( 58 | *[ 59 | send_guild_channel_msg( 60 | bot, msg, guild_channel_id, item, error_msg, all_channels 61 | ) 62 | for guild_channel_id in rss.guild_channel_id 63 | ] 64 | ) 65 | ) 66 | or flag 67 | ) 68 | return flag 69 | 70 | 71 | # 发送私聊消息 72 | async def send_private_msg( 73 | bot, 74 | msg: str, 75 | user_id: int, 76 | item: Dict[str, Any], 77 | error_msg: str, 78 | all_friend: Dict[int, List[int]], 79 | ) -> bool: 80 | flag = False 81 | start_time = arrow.now() 82 | sid = [k for k, v in all_friend.items() if int(user_id) in v][0] 83 | async with sending_lock[(user_id, "private")]: 84 | try: 85 | await bot.send_msg( 86 | self_id=sid, 87 | message_type="private", 88 | user_id=user_id, 89 | message=msg, 90 | ) 91 | await asyncio.sleep(max(1 - (arrow.now() - start_time).total_seconds(), 0)) 92 | flag = True 93 | except Exception as e: 94 | logger.error(f"E: {repr(e)} 链接:[{item.get('link')}]") 95 | if item.get("to_send"): 96 | flag = True 97 | with suppress(Exception): 98 | await bot.send_msg( 99 | self_id=sid, 100 | message_type="private", 101 | user_id=user_id, 102 | message=f"{error_msg}\nE: {repr(e)}", 103 | ) 104 | return flag 105 | 106 | 107 | # 发送群聊消息 108 | async def send_group_msg( 109 | bot, 110 | msg: str, 111 | group_id: int, 112 | item: Dict[str, Any], 113 | error_msg: str, 114 | all_group: Dict[int, List[int]], 115 | ) -> bool: 116 | flag = False 117 | start_time = arrow.now() 118 | sid = [k for k, v in all_group.items() if int(group_id) in v][0] 119 | async with sending_lock[(group_id, "group")]: 120 | try: 121 | await bot.send_msg( 122 | self_id=sid, 123 | message_type="group", 124 | group_id=group_id, 125 | message=msg, 126 | ) 127 | await asyncio.sleep(max(1 - (arrow.now() - start_time).total_seconds(), 0)) 128 | flag = True 129 | except Exception as e: 130 | logger.error(f"E: {repr(e)} 链接:[{item.get('link')}]") 131 | if item.get("to_send"): 132 | flag = True 133 | with suppress(Exception): 134 | await bot.send_msg( 135 | self_id=sid, 136 | message_type="group", 137 | group_id=group_id, 138 | message=f"E: {repr(e)}\n{error_msg}", 139 | ) 140 | return flag 141 | 142 | 143 | # 发送频道消息 144 | async def send_guild_channel_msg( 145 | bot, 146 | msg: str, 147 | guild_channel_id: str, 148 | item: Dict[str, Any], 149 | error_msg: str, 150 | all_channels: Dict, 151 | ) -> bool: 152 | flag = False 153 | start_time = arrow.now() 154 | guild_id, channel_id = guild_channel_id.split("@") 155 | sid = [k for k, v in all_channels.items() if channel_id in v][0] 156 | async with sending_lock[(guild_channel_id, "guild_channel")]: 157 | try: 158 | await bot.send_guild_channel_msg( 159 | self_id=sid, 160 | message=msg, 161 | guild_id=guild_id, 162 | channel_id=channel_id, 163 | ) 164 | await asyncio.sleep(max(1 - (arrow.now() - start_time).total_seconds(), 0)) 165 | flag = True 166 | except Exception as e: 167 | logger.error(f"E: {repr(e)} 链接:[{item.get('link')}]") 168 | if item.get("to_send"): 169 | flag = True 170 | with suppress(Exception): 171 | await bot.send_guild_channel_msg( 172 | self_id=sid, 173 | message=f"E: {repr(e)}\n{error_msg}", 174 | guild_id=guild_id, 175 | channel_id=channel_id, 176 | ) 177 | return flag 178 | -------------------------------------------------------------------------------- /rss2/parsing/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Any, Dict, Optional 3 | 4 | from ..config import config 5 | 6 | 7 | # 代理 8 | def get_proxy(open_proxy: bool) -> Optional[str]: 9 | if not open_proxy or not config.rss_proxy: 10 | return None 11 | return f"http://{config.rss_proxy}" 12 | 13 | 14 | # 获取正文 15 | def get_summary(item: Dict[str, Any]) -> str: 16 | summary: str = ( 17 | item["content"][0]["value"] if item.get("content") else item["summary"] 18 | ) 19 | return f"
{summary}" if re.search("^https?://", summary) else summary 20 | -------------------------------------------------------------------------------- /rss2/permission.py: -------------------------------------------------------------------------------- 1 | from nonebot import SenderRoles 2 | from .config import config 3 | 4 | 5 | def admin_permission(sender: SenderRoles): 6 | return ( 7 | sender.is_superuser 8 | or sender.is_admin 9 | or sender.is_owner 10 | or sender.sent_by(config.guild_superusers) 11 | or sender.sent_by(config.superusers) 12 | ) 13 | -------------------------------------------------------------------------------- /rss2/pikpak_offline.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | from nonebot.log import logger 4 | from pikpakapi.async_api import PikPakApiAsync 5 | from pikpakapi.PikpakException import PikpakAccessTokenExpireException, PikpakException 6 | 7 | from .config import config 8 | 9 | pikpak_client = PikPakApiAsync( 10 | username=config.pikpak_username, 11 | password=config.pikpak_password, 12 | ) 13 | 14 | 15 | async def refresh_access_token() -> None: 16 | """ 17 | Login or Refresh access_token PikPak 18 | 19 | """ 20 | try: 21 | await pikpak_client.refresh_access_token() 22 | except (PikpakException, PikpakAccessTokenExpireException) as e: 23 | logger.warning(f"refresh_access_token {e}") 24 | await pikpak_client.login() 25 | 26 | 27 | async def login() -> None: 28 | if not pikpak_client.access_token: 29 | await pikpak_client.login() 30 | 31 | 32 | async def path_to_id( 33 | path: Optional[str] = None, create: bool = False 34 | ) -> List[Dict[str, Any]]: 35 | """ 36 | path: str like "/1/2/3" 37 | create: bool create path if not exist 38 | 将形如 /path/a/b 的路径转换为 文件夹的id 39 | """ 40 | if not path: 41 | return [] 42 | paths = [p.strip() for p in path.split("/") if len(p) > 0] 43 | path_ids = [] 44 | count = 0 45 | next_page_token = None 46 | parent_id = None 47 | while count < len(paths): 48 | data = await pikpak_client.file_list( 49 | parent_id=parent_id, next_page_token=next_page_token 50 | ) 51 | if _id := next( 52 | ( 53 | f.get("id") 54 | for f in data.get("files", []) 55 | if f.get("kind", "") == "drive#folder" and f.get("name") == paths[count] 56 | ), 57 | "", 58 | ): 59 | path_ids.append( 60 | { 61 | "id": _id, 62 | "name": paths[count], 63 | } 64 | ) 65 | count += 1 66 | parent_id = _id 67 | elif data.get("next_page_token"): 68 | next_page_token = data.get("next_page_token") 69 | elif create: 70 | data = await pikpak_client.create_folder( 71 | name=paths[count], parent_id=parent_id 72 | ) 73 | _id = data.get("file").get("id") 74 | path_ids.append( 75 | { 76 | "id": _id, 77 | "name": paths[count], 78 | } 79 | ) 80 | count += 1 81 | parent_id = _id 82 | else: 83 | break 84 | return path_ids 85 | 86 | 87 | async def pikpak_offline_download( 88 | url: str, 89 | path: Optional[str] = None, 90 | parent_id: Optional[str] = None, 91 | name: Optional[str] = None, 92 | ) -> Dict[str, Any]: 93 | """ 94 | Offline download 95 | 当有path时, 表示下载到指定的文件夹, 否则下载到根目录 96 | 如果存在 parent_id, 以 parent_id 为准 97 | """ 98 | await login() 99 | try: 100 | if not parent_id: 101 | path_ids = await path_to_id(path, create=True) 102 | if path_ids and len(path_ids) > 0: 103 | parent_id = path_ids[-1].get("id") 104 | return await pikpak_client.offline_download(url, parent_id=parent_id, name=name) # type: ignore 105 | except (PikpakAccessTokenExpireException, PikpakException) as e: 106 | logger.warning(e) 107 | await refresh_access_token() 108 | return await pikpak_offline_download( 109 | url=url, path=path, parent_id=parent_id, name=name 110 | ) 111 | except Exception as e: 112 | msg = f"PikPak Offline Download Error: {e}" 113 | logger.error(msg) 114 | raise Exception(msg) from e 115 | -------------------------------------------------------------------------------- /rss2/qbittorrent_download.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import base64 3 | import re 4 | from typing import Any, Dict, List, Optional 5 | from pathlib import Path 6 | 7 | import aiohttp 8 | import arrow 9 | from apscheduler.triggers.interval import IntervalTrigger 10 | from aiocqhttp import ActionFailed, NetworkError 11 | from nonebot import get_bot, scheduler 12 | from nonebot.log import logger 13 | from qbittorrent import Client 14 | 15 | from .config import config 16 | from .utils import ( 17 | convert_size, 18 | get_torrent_b16_hash, 19 | get_bot_group_list, 20 | send_message_to_admin, 21 | ) 22 | 23 | # 计划 24 | # 创建一个全局定时器用来检测种子下载情况 25 | # 群文件上传成功回调 26 | # 文件三种状态1.下载中2。上传中3.上传完成 27 | # 文件信息持久化存储 28 | # 关键词正则表达式 29 | # 下载开关 30 | 31 | DOWN_STATUS_DOWNING = 1 # 下载中 32 | DOWN_STATUS_UPLOADING = 2 # 上传中 33 | DOWN_STATUS_UPLOAD_OK = 3 # 上传完成 34 | down_info: Dict[str, Dict[str, Any]] = {} 35 | 36 | # 示例 37 | # { 38 | # "hash值": { 39 | # "status":DOWN_STATUS_DOWNING, 40 | # "start_time":None, # 下载开始时间 41 | # "downing_tips_msg_id":[] # 下载中通知群上一条通知的信息,用于撤回,防止刷屏 42 | # } 43 | # } 44 | 45 | 46 | # 发送通知 47 | async def send_msg( 48 | msg: str, notice_group: Optional[List[str]] = None 49 | ) -> List[Dict[str, Any]]: 50 | logger.info(msg) 51 | bot = get_bot() 52 | msg_id = [] 53 | group_list, all_groups = await get_bot_group_list(bot) 54 | if down_status_msg_group := (notice_group or config.down_status_msg_group): 55 | for group_id in down_status_msg_group: 56 | if int(group_id) not in group_list: 57 | logger.error(f"Bot[{bot.self_id}]未加入群组[{group_id}]") 58 | continue 59 | sid = [k for k, v in all_groups.items() if int(group_id) in v][0] 60 | msg_id.append( 61 | await bot.send_group_msg( 62 | self_id=sid, group_id=int(group_id), message=msg 63 | ) 64 | ) 65 | return msg_id 66 | 67 | 68 | async def get_qb_client() -> Optional[Client]: 69 | try: 70 | qb = Client(config.qb_web_url) 71 | if config.qb_username and config.qb_password: 72 | qb.login(config.qb_username, config.qb_password) 73 | else: 74 | qb.login() 75 | except Exception: 76 | bot = get_bot() 77 | msg = ( 78 | "❌ 无法连接到 qbittorrent ,请检查:\n" 79 | "1. 是否启动程序\n" 80 | "2. 是否勾选了“Web用户界面(远程控制)”\n" 81 | "3. 连接地址、端口是否正确" 82 | ) 83 | logger.exception(msg) 84 | await send_message_to_admin(msg, bot) 85 | return None 86 | try: 87 | qb.get_default_save_path() 88 | except Exception: 89 | bot = get_bot() 90 | msg = "❌ 无法连登录到 qbittorrent ,请检查相关配置是否正确" 91 | logger.exception(msg) 92 | await send_message_to_admin(msg, bot) 93 | return None 94 | return qb 95 | 96 | 97 | async def get_torrent_info_from_hash( 98 | qb: Client, url: str, proxy: Optional[str] 99 | ) -> Dict[str, str]: 100 | info = None 101 | if re.search(r"magnet:\?xt=urn:btih:", url): 102 | qb.download_from_link(link=url) 103 | if _hash_str := re.search(r"[A-F\d]{40}", url, flags=re.I): 104 | hash_str = _hash_str[0].lower() 105 | else: 106 | hash_str = ( 107 | base64.b16encode( 108 | base64.b32decode(re.search(r"[2-7A-Z]{32}", url, flags=re.I)[0]) # type: ignore 109 | ) 110 | .decode("utf-8") 111 | .lower() 112 | ) 113 | 114 | else: 115 | async with aiohttp.ClientSession( 116 | timeout=aiohttp.ClientTimeout(total=100) 117 | ) as session: 118 | try: 119 | resp = await session.get(url, proxy=proxy) 120 | content = await resp.read() 121 | qb.download_from_file(content) 122 | hash_str = get_torrent_b16_hash(content) 123 | except Exception as e: 124 | await send_msg(f"下载种子失败,可能需要代理\n{e}") 125 | return {} 126 | 127 | while not info: 128 | for tmp_torrent in qb.torrents(): 129 | if tmp_torrent["hash"] == hash_str and tmp_torrent["size"]: 130 | info = { 131 | "hash": tmp_torrent["hash"], 132 | "filename": tmp_torrent["name"], 133 | "size": convert_size(tmp_torrent["size"]), 134 | } 135 | await asyncio.sleep(1) 136 | return info 137 | 138 | 139 | # 种子地址,种子下载路径,群文件上传 群列表,订阅名称 140 | async def start_down( 141 | url: str, group_ids: List[str], name: str, proxy: Optional[str] 142 | ) -> str: 143 | qb = await get_qb_client() 144 | if not qb: 145 | return "" 146 | # 获取种子 hash 147 | info = await get_torrent_info_from_hash(qb=qb, url=url, proxy=proxy) 148 | await rss_trigger( 149 | hash_str=info["hash"], 150 | group_ids=group_ids, 151 | name=f"订阅:{name}\n{info['filename']}\n文件大小:{info['size']}", 152 | ) 153 | down_info[info["hash"]] = { 154 | "status": DOWN_STATUS_DOWNING, 155 | "start_time": arrow.now(), # 下载开始时间 156 | "downing_tips_msg_id": [], # 下载中通知群上一条通知的信息,用于撤回,防止刷屏 157 | } 158 | return info["hash"] 159 | 160 | 161 | # 检查下载状态 162 | async def check_down_status(hash_str: str, group_ids: List[str], name: str) -> None: 163 | qb = await get_qb_client() 164 | if not qb: 165 | return 166 | # 防止中途删掉任务,无限执行 167 | try: 168 | info = qb.get_torrent(hash_str) 169 | files = qb.get_torrent_files(hash_str) 170 | except Exception as e: 171 | logger.exception(e) 172 | scheduler.remove_job(hash_str) 173 | return 174 | bot = get_bot() 175 | all_groups = (await get_bot_group_list(bot))[1] 176 | sid = None 177 | if info["total_downloaded"] - info["total_size"] >= 0.000000: 178 | all_time = arrow.now() - down_info[hash_str]["start_time"] 179 | await send_msg( 180 | f"👏 {name}\n" 181 | f"Hash:{hash_str}\n" 182 | f"下载完成!耗时:{str(all_time).split('.', 2)[0]}" 183 | ) 184 | down_info[hash_str]["status"] = DOWN_STATUS_UPLOADING 185 | for group_id in group_ids: 186 | for tmp in files: 187 | # 异常包起来防止超时报错导致后续不执行 188 | try: 189 | path = Path(info.get("save_path", "")) / tmp["name"] 190 | if config.qb_down_path: 191 | if (_path := Path(config.qb_down_path)).is_dir(): 192 | path = _path / tmp["name"] 193 | await send_msg(f"{name}\nHash:{hash_str}\n开始上传到群:{group_id}") 194 | sid = [k for k, v in all_groups.items() if int(group_id) in v][0] 195 | try: 196 | await bot.call_action( 197 | self_id=sid, 198 | action="upload_group_file", 199 | group_id=group_id, 200 | file=str(path), 201 | name=tmp["name"], 202 | ) 203 | except ActionFailed: 204 | msg = f"{name}\nHash:{hash_str}\n上传到群:{group_id}失败!请手动上传!" 205 | await send_msg(msg, [group_id]) 206 | logger.exception(msg) 207 | except NetworkError as e: 208 | logger.warning(e) 209 | except TimeoutError as e: 210 | logger.warning(e) 211 | scheduler.remove_job(hash_str) 212 | down_info[hash_str]["status"] = DOWN_STATUS_UPLOAD_OK 213 | else: 214 | await delete_msg(bot, sid, down_info[hash_str]["downing_tips_msg_id"]) 215 | msg_id = await send_msg( 216 | f"{name}\n" 217 | f"Hash:{hash_str}\n" 218 | f"下载了 {round(info['total_downloaded'] / info['total_size'] * 100, 2)}%\n" 219 | f"平均下载速度: {round(info['dl_speed_avg'] / 1024, 2)} KB/s" 220 | ) 221 | down_info[hash_str]["downing_tips_msg_id"] = msg_id 222 | 223 | 224 | # 撤回消息 225 | async def delete_msg(bot, sid, msg_ids: List[Dict[str, Any]]) -> None: 226 | for msg_id in msg_ids: 227 | try: 228 | await bot.call_action( 229 | "delete_msg", message_id=msg_id["message_id"], self_id=sid 230 | ) 231 | except Exception as e: 232 | logger.warning("下载进度消息撤回失败!", e) 233 | 234 | 235 | async def rss_trigger(hash_str: str, group_ids: List[str], name: str) -> None: 236 | # 制作一个频率为“ n 秒 / 次”的触发器 237 | trigger = IntervalTrigger(seconds=int(config.down_status_msg_date), jitter=10) 238 | job_defaults = {"max_instances": 1} 239 | # 添加任务 240 | scheduler.add_job( 241 | func=check_down_status, # 要添加任务的函数,不要带参数 242 | trigger=trigger, # 触发器 243 | args=(hash_str, group_ids, name), # 函数的参数列表,注意:只有一个值时,不能省略末尾的逗号 244 | id=hash_str, 245 | misfire_grace_time=60, # 允许的误差时间,建议不要省略 246 | job_defaults=job_defaults, 247 | ) 248 | await send_msg(f"👏 {name}\nHash:{hash_str}\n下载任务添加成功!", group_ids) 249 | -------------------------------------------------------------------------------- /rss2/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp~=3.8.3 2 | aiohttp[speedups]~=3.8.3 3 | APScheduler==3.9.1.post1 4 | arrow~=1.2.3 5 | bbcode~=1.1.0 6 | cachetools~=5.2.1 7 | emoji~=2.1.0 8 | feedparser~=6.0.10 9 | deep-translator~=1.9.1 10 | ImageHash~=4.3.1 11 | magneturi~=1.3 12 | nonebot~=1.9.0 13 | pikpakapi~=0.0.7 14 | Pillow~=9.4.0 15 | pydantic~=1.10.4 16 | pyquery~=1.4.3 17 | python-qbittorrent~=0.4.3 18 | tenacity==8.1.0 19 | tinydb~=4.7.0 20 | typing-extensions==4.4.0 21 | pydantic[dotenv] 22 | yarl~=1.8.2 -------------------------------------------------------------------------------- /rss2/rss_class.py: -------------------------------------------------------------------------------- 1 | import re 2 | from copy import deepcopy 3 | from typing import Any, Dict, List, Optional 4 | 5 | from tinydb import Query, TinyDB 6 | from tinydb.operations import set as tinydb_set 7 | from yarl import URL 8 | 9 | from .config import DATA_PATH, JSON_PATH, config 10 | 11 | 12 | class Rss: 13 | def __init__(self, data: Optional[Dict[str, Any]] = None): 14 | self.name: str = "" # 订阅名 15 | self.url: str = "" # 订阅地址 16 | self.user_id: List[str] = [] # 订阅用户(qq) 17 | self.group_id: List[str] = [] # 订阅群组 18 | self.guild_channel_id: List[str] = [] # 订阅子频道 19 | self.img_proxy: bool = False 20 | self.time: str = "5" # 更新频率 分钟/次 21 | self.translation: bool = False # 翻译 22 | self.only_title: bool = False # 仅标题 23 | self.only_pic: bool = False # 仅图片 24 | self.only_has_pic: bool = False # 仅含有图片 25 | self.download_pic: bool = False # 是否要下载图片 26 | self.cookies: Dict[str, str] = {} 27 | self.down_torrent: bool = False # 是否下载种子 28 | self.down_torrent_keyword: str = "" # 过滤关键字,支持正则 29 | self.black_keyword: str = "" # 黑名单关键词 30 | self.is_open_upload_group: bool = True # 默认开启上传到群 31 | self.duplicate_filter_mode: List[str] = [] # 去重模式 32 | self.max_image_number: int = 0 # 图片数量限制,防止消息太长刷屏 33 | self.content_to_remove: Optional[str] = None # 正文待移除内容,支持正则 34 | self.etag: Optional[str] = None 35 | self.last_modified: Optional[str] = None # 上次更新时间 36 | self.error_count: int = 0 # 连续抓取失败的次数,超过 100 就停止更新 37 | self.stop: bool = False # 停止更新 38 | self.pikpak_offline: bool = False # 是否PikPak离线 39 | self.pikpak_path_key: str = ( 40 | "" # PikPak 离线下载路径匹配正则表达式,用于自动归档文件 例如 r"(?:\[.*?\][\s\S])([\s\S]*)[\s\S]-" 41 | ) 42 | if data: 43 | self.__dict__.update(data) 44 | 45 | # 返回订阅链接 46 | def get_url(self, rsshub: str = config.rsshub) -> str: 47 | if URL(self.url).scheme in ["http", "https"]: 48 | return self.url 49 | # 先判断地址是否 / 开头 50 | if self.url.startswith("/"): 51 | return rsshub + self.url 52 | 53 | return f"{rsshub}/{self.url}" 54 | 55 | # 读取记录 56 | @staticmethod 57 | def read_rss() -> List["Rss"]: 58 | # 如果文件不存在 59 | if not JSON_PATH.exists(): 60 | return [] 61 | with TinyDB( 62 | JSON_PATH, 63 | encoding="utf-8", 64 | sort_keys=True, 65 | indent=4, 66 | ensure_ascii=False, 67 | ) as db: 68 | rss_list = [Rss(rss) for rss in db.all()] 69 | return rss_list 70 | 71 | # 过滤订阅名中的特殊字符 72 | @staticmethod 73 | def handle_name(name: str) -> str: 74 | name = re.sub(r'[?*:"<>\\/|]', "_", name) 75 | if name == "rss": 76 | name = "rss_" 77 | return name 78 | 79 | # 查找是否存在当前订阅名 rss 要转换为 rss_ 80 | @staticmethod 81 | def get_one_by_name(name: str) -> Optional["Rss"]: 82 | feed_list = Rss.read_rss() 83 | return next((feed for feed in feed_list if feed.name == name), None) 84 | 85 | # 添加订阅 86 | def add_user_or_group_or_channel( 87 | self, 88 | user: Optional[str] = None, 89 | group: Optional[str] = None, 90 | guild_channel: Optional[str] = None, 91 | ) -> None: 92 | if user: 93 | if user in self.user_id: 94 | return 95 | self.user_id.append(user) 96 | elif group: 97 | if group in self.group_id: 98 | return 99 | self.group_id.append(group) 100 | elif guild_channel: 101 | if guild_channel in self.guild_channel_id: 102 | return 103 | self.guild_channel_id.append(guild_channel) 104 | self.upsert() 105 | 106 | # 删除订阅 群组 107 | def delete_group(self, group: str) -> bool: 108 | if group not in self.group_id: 109 | return False 110 | self.group_id.remove(group) 111 | with TinyDB( 112 | JSON_PATH, 113 | encoding="utf-8", 114 | sort_keys=True, 115 | indent=4, 116 | ensure_ascii=False, 117 | ) as db: 118 | db.update(tinydb_set("group_id", self.group_id), Query().name == self.name) # type: ignore 119 | return True 120 | 121 | # 删除订阅 子频道 122 | def delete_guild_channel(self, guild_channel: str) -> bool: 123 | if guild_channel not in self.guild_channel_id: 124 | return False 125 | self.guild_channel_id.remove(guild_channel) 126 | with TinyDB( 127 | JSON_PATH, 128 | encoding="utf-8", 129 | sort_keys=True, 130 | indent=4, 131 | ensure_ascii=False, 132 | ) as db: 133 | db.update( 134 | tinydb_set("guild_channel_id", self.guild_channel_id), Query().name == self.name # type: ignore 135 | ) 136 | return True 137 | 138 | # 删除整个订阅 139 | def delete_rss(self) -> None: 140 | with TinyDB( 141 | JSON_PATH, 142 | encoding="utf-8", 143 | sort_keys=True, 144 | indent=4, 145 | ensure_ascii=False, 146 | ) as db: 147 | db.remove(Query().name == self.name) 148 | self.delete_file() 149 | 150 | # 重命名订阅缓存 json 文件 151 | def rename_file(self, target: str) -> None: 152 | source = DATA_PATH / f"{Rss.handle_name(self.name)}.json" 153 | if source.exists(): 154 | source.rename(target) 155 | 156 | # 删除订阅缓存 json 文件 157 | def delete_file(self) -> None: 158 | (DATA_PATH / f"{Rss.handle_name(self.name)}.json").unlink(missing_ok=True) 159 | 160 | # 隐私考虑,不展示除当前群组或频道外的群组、频道和QQ 161 | def hide_some_infos( 162 | self, group_id: Optional[int] = None, guild_channel_id: Optional[str] = None 163 | ) -> "Rss": 164 | if not group_id and not guild_channel_id: 165 | return self 166 | rss_tmp = deepcopy(self) 167 | rss_tmp.guild_channel_id = [guild_channel_id, "*"] if guild_channel_id else [] 168 | rss_tmp.group_id = [str(group_id), "*"] if group_id else [] 169 | rss_tmp.user_id = ["*"] if rss_tmp.user_id else [] 170 | return rss_tmp 171 | 172 | @staticmethod 173 | def get_by_guild_channel(guild_channel_id: str) -> List["Rss"]: 174 | rss_old = Rss.read_rss() 175 | return [ 176 | rss.hide_some_infos(guild_channel_id=guild_channel_id) 177 | for rss in rss_old 178 | if guild_channel_id in rss.guild_channel_id 179 | ] 180 | 181 | @staticmethod 182 | def get_by_group(group_id: int) -> List["Rss"]: 183 | rss_old = Rss.read_rss() 184 | return [ 185 | rss.hide_some_infos(group_id=group_id) 186 | for rss in rss_old 187 | if str(group_id) in rss.group_id 188 | ] 189 | 190 | @staticmethod 191 | def get_by_user(user: str) -> List["Rss"]: 192 | rss_old = Rss.read_rss() 193 | return [rss for rss in rss_old if user in rss.user_id] 194 | 195 | def set_cookies(self, cookies: str) -> None: 196 | self.cookies = cookies 197 | with TinyDB( 198 | JSON_PATH, 199 | encoding="utf-8", 200 | sort_keys=True, 201 | indent=4, 202 | ensure_ascii=False, 203 | ) as db: 204 | db.update(tinydb_set("cookies", cookies), Query().name == self.name) # type: ignore 205 | 206 | def upsert(self, old_name: Optional[str] = None) -> None: 207 | with TinyDB( 208 | JSON_PATH, 209 | encoding="utf-8", 210 | sort_keys=True, 211 | indent=4, 212 | ensure_ascii=False, 213 | ) as db: 214 | if old_name: 215 | db.update(self.__dict__, Query().name == old_name) 216 | else: 217 | db.upsert(self.__dict__, Query().name == str(self.name)) 218 | 219 | def __str__(self) -> str: 220 | mode_name = {"link": "链接", "title": "标题", "image": "图片"} 221 | mode_msg = "" 222 | if self.duplicate_filter_mode: 223 | delimiter = " 或 " if "or" in self.duplicate_filter_mode else "、" 224 | mode_msg = ( 225 | "已启用去重模式," 226 | f"{delimiter.join(mode_name[i] for i in self.duplicate_filter_mode if i != 'or')} 相同时去重" 227 | ) 228 | ret_list = [ 229 | f"名称:{self.name}", 230 | f"订阅地址:{self.url}", 231 | f"订阅QQ:{self.user_id}" if self.user_id else "", 232 | f"订阅群:{self.group_id}" if self.group_id else "", 233 | f"订阅子频道:{self.guild_channel_id}" if self.guild_channel_id else "", 234 | f"更新时间:{self.time}", 235 | f"代理:{self.img_proxy}" if self.img_proxy else "", 236 | f"翻译:{self.translation}" if self.translation else "", 237 | f"仅标题:{self.only_title}" if self.only_title else "", 238 | f"仅图片:{self.only_pic}" if self.only_pic else "", 239 | f"下载图片:{self.download_pic}" if self.download_pic else "", 240 | f"仅含有图片:{self.only_has_pic}" if self.only_has_pic else "", 241 | f"白名单关键词:{self.down_torrent_keyword}" if self.down_torrent_keyword else "", 242 | f"黑名单关键词:{self.black_keyword}" if self.black_keyword else "", 243 | f"cookies:{self.cookies}" if self.cookies else "", 244 | "种子自动下载功能已启用" if self.down_torrent else "", 245 | "" if self.is_open_upload_group else f"是否上传到群:{self.is_open_upload_group}", 246 | f"{mode_msg}" if self.duplicate_filter_mode else "", 247 | f"图片数量限制:{self.max_image_number}" if self.max_image_number else "", 248 | f"正文待移除内容:{self.content_to_remove}" if self.content_to_remove else "", 249 | f"连续抓取失败的次数:{self.error_count}" if self.error_count else "", 250 | f"停止更新:{self.stop}" if self.stop else "", 251 | f"PikPak离线: {self.pikpak_offline}" if self.pikpak_offline else "", 252 | f"PikPak离线路径匹配: {self.pikpak_path_key}" if self.pikpak_path_key else "", 253 | ] 254 | return "\n".join([i for i in ret_list if i != ""]) 255 | -------------------------------------------------------------------------------- /rss2/rss_parsing.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Tuple 2 | 3 | import aiohttp 4 | import feedparser 5 | from nonebot import get_bot 6 | from nonebot.log import logger 7 | from tinydb import TinyDB 8 | from tinydb.middlewares import CachingMiddleware 9 | from tinydb.storages import JSONStorage 10 | from yarl import URL 11 | 12 | from . import my_trigger as tr 13 | from .config import DATA_PATH, config 14 | from .parsing import get_proxy 15 | from .parsing.cache_manage import cache_filter 16 | from .parsing.check_update import dict_hash 17 | from .parsing.parsing_rss import ParsingRss 18 | from .rss_class import Rss 19 | from .utils import ( 20 | filter_valid_group_id_list, 21 | filter_valid_guild_channel_id_list, 22 | filter_valid_user_id_list, 23 | get_http_caching_headers, 24 | send_message_to_admin, 25 | ) 26 | 27 | HEADERS = { 28 | "Accept": "application/xhtml+xml,application/xml,*/*", 29 | "Accept-Language": "en-US,en;q=0.9", 30 | "Cache-Control": "max-age=0", 31 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36", 32 | "Connection": "keep-alive", 33 | "Content-Type": "application/xml; charset=utf-8", 34 | } 35 | 36 | 37 | # 抓取 feed,读取缓存,检查更新,对更新进行处理 38 | async def start(rss: Rss) -> None: 39 | bot = get_bot() # type: ignore 40 | # 先检查订阅者是否合法 41 | if rss.user_id: 42 | rss.user_id = await filter_valid_user_id_list(bot, rss.user_id) 43 | if rss.group_id: 44 | rss.group_id = await filter_valid_group_id_list(bot, rss.group_id) 45 | if rss.guild_channel_id: 46 | rss.guild_channel_id = await filter_valid_guild_channel_id_list( 47 | bot, rss.guild_channel_id 48 | ) 49 | if not any([rss.user_id, rss.group_id, rss.guild_channel_id]): 50 | await auto_stop_and_notify_admin(rss, bot) 51 | return 52 | new_rss, cached = await fetch_rss(rss) 53 | # 检查是否存在rss记录 54 | _file = DATA_PATH / f"{Rss.handle_name(rss.name)}.json" 55 | first_time_fetch = not _file.exists() 56 | if cached: 57 | logger.info(f"{rss.name} 没有新信息") 58 | return 59 | if not new_rss or not new_rss.get("feed"): 60 | rss.error_count += 1 61 | logger.warning(f"{rss.name} 抓取失败!") 62 | if first_time_fetch: 63 | # 第一次抓取失败,如果配置了代理,则自动使用代理抓取 64 | if config.rss_proxy and not rss.img_proxy: 65 | rss.img_proxy = True 66 | logger.info(f"{rss.name} 第一次抓取失败,自动使用代理抓取") 67 | await start(rss) 68 | else: 69 | await auto_stop_and_notify_admin(rss, bot) 70 | if rss.error_count >= 100: 71 | await auto_stop_and_notify_admin(rss, bot) 72 | return 73 | if new_rss.get("feed") and rss.error_count > 0: 74 | rss.error_count = 0 75 | if first_time_fetch: 76 | with TinyDB( 77 | _file, 78 | storage=CachingMiddleware(JSONStorage), # type: ignore 79 | encoding="utf-8", 80 | sort_keys=True, 81 | indent=4, 82 | ensure_ascii=False, 83 | ) as db: 84 | entries = new_rss["entries"] 85 | result = [] 86 | for i in entries: 87 | i["hash"] = dict_hash(i) 88 | result.append(cache_filter(i)) 89 | db.insert_multiple(result) 90 | logger.info(f"{rss.name} 第一次抓取成功!") 91 | return 92 | 93 | pr = ParsingRss(rss=rss) 94 | await pr.start(rss_name=rss.name, new_rss=new_rss) 95 | 96 | 97 | async def auto_stop_and_notify_admin(rss: Rss, bot) -> None: 98 | rss.stop = True 99 | rss.upsert() 100 | tr.delete_job(rss) 101 | cookies_str = "及 cookies " if rss.cookies else "" 102 | if not any([rss.user_id, rss.group_id, rss.guild_channel_id]): 103 | msg = f"{rss.name}[{rss.get_url()}]无人订阅!已自动停止更新!" 104 | elif rss.error_count >= 100: 105 | msg = ( 106 | f"{rss.name}[{rss.get_url()}]已经连续抓取失败超过 100 次!已自动停止更新!请检查订阅地址{cookies_str}!" 107 | ) 108 | else: 109 | msg = f"{rss.name}[{rss.get_url()}]第一次抓取失败!已自动停止更新!请检查订阅地址{cookies_str}!" 110 | await send_message_to_admin(msg, bot) 111 | 112 | 113 | # 获取 RSS 并解析为 json 114 | async def fetch_rss(rss: Rss) -> Tuple[Dict[str, Any], bool]: 115 | rss_url = rss.get_url() 116 | # 对本机部署的 RSSHub 不使用代理 117 | local_host = [ 118 | "localhost", 119 | "127.0.0.1", 120 | ] 121 | proxy = get_proxy(rss.img_proxy) if URL(rss_url).host not in local_host else None 122 | 123 | # 判断是否使用cookies 124 | cookies = rss.cookies or None 125 | 126 | # 获取 xml 127 | d: Dict[str, Any] = {} 128 | cached = False 129 | headers = HEADERS.copy() 130 | if not config.rsshub_backup: 131 | if rss.etag: 132 | headers["If-None-Match"] = rss.etag 133 | if rss.last_modified: 134 | headers["If-Modified-Since"] = rss.last_modified 135 | async with aiohttp.ClientSession( 136 | cookies=cookies, 137 | headers=HEADERS, 138 | raise_for_status=True, 139 | ) as session: 140 | try: 141 | resp = await session.get(rss_url, proxy=proxy) 142 | if not config.rsshub_backup: 143 | http_caching_headers = get_http_caching_headers(resp.headers) 144 | rss.etag = http_caching_headers["ETag"] 145 | rss.last_modified = http_caching_headers["Last-Modified"] 146 | rss.upsert() 147 | if ( 148 | resp.status == 200 and int(resp.headers.get("Content-Length", "1")) == 0 149 | ) or resp.status == 304: 150 | cached = True 151 | # 解析为 JSON 152 | d = feedparser.parse(await resp.text()) 153 | except Exception: 154 | if not URL(rss.url).scheme and config.rsshub_backup: 155 | logger.debug(f"[{rss_url}]访问失败!将使用备用 RSSHub 地址!") 156 | for rsshub_url in list(config.rsshub_backup): 157 | rss_url = rss.get_url(rsshub=rsshub_url) 158 | try: 159 | resp = await session.get(rss_url, proxy=proxy) 160 | d = feedparser.parse(await resp.text()) 161 | except Exception: 162 | logger.debug(f"[{rss_url}]访问失败!将使用备用 RSSHub 地址!") 163 | continue 164 | if d.get("feed"): 165 | logger.info(f"[{rss_url}]抓取成功!") 166 | break 167 | return d, cached 168 | -------------------------------------------------------------------------------- /rss2/utils.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import functools 3 | import math 4 | import re 5 | from contextlib import suppress 6 | from typing import Any, Dict, List, Mapping, Optional, Tuple 7 | 8 | from cachetools import TTLCache 9 | from cachetools.keys import hashkey 10 | import nonebot 11 | from nonebot.log import logger 12 | from .config import config 13 | 14 | 15 | def get_http_caching_headers( 16 | headers: Optional[Mapping[str, Any]], 17 | ) -> Dict[str, Optional[str]]: 18 | return ( 19 | { 20 | "Last-Modified": headers.get("Last-Modified") or headers.get("Date"), 21 | "ETag": headers.get("ETag"), 22 | } 23 | if headers 24 | else {"Last-Modified": None, "ETag": None} 25 | ) 26 | 27 | 28 | def convert_size(size_bytes: int) -> str: 29 | if size_bytes == 0: 30 | return "0 B" 31 | size_name = ("B", "KB", "MB", "GB", "TB") 32 | i = int(math.floor(math.log(size_bytes, 1024))) 33 | p = math.pow(1024, i) 34 | s = round(size_bytes / p, 2) 35 | return f"{s} {size_name[i]}" 36 | 37 | 38 | def cached_async(cache, key=hashkey): # type: ignore 39 | """ 40 | https://github.com/tkem/cachetools/commit/3f073633ed4f36f05b57838a3e5655e14d3e3524 41 | """ 42 | 43 | def decorator(func): # type: ignore 44 | if cache is None: 45 | 46 | async def wrapper(*args, **kwargs): # type: ignore 47 | return await func(*args, **kwargs) 48 | 49 | else: 50 | 51 | async def wrapper(*args, **kwargs): # type: ignore 52 | k = key(*args, **kwargs) 53 | with suppress(KeyError): # key not found 54 | return cache[k] 55 | v = await func(*args, **kwargs) 56 | with suppress(ValueError): # value too large 57 | cache[k] = v 58 | return v 59 | 60 | return functools.update_wrapper(wrapper, func) 61 | 62 | return decorator 63 | 64 | 65 | def get_bot_qq(bot) -> List[int]: 66 | return bot._wsr_api_clients.keys() 67 | 68 | 69 | @cached_async(TTLCache(maxsize=1, ttl=300)) # type: ignore 70 | async def get_bot_friend_list(bot) -> Tuple[List[int], Dict[int, List[int]]]: 71 | bot_qq = list(get_bot_qq(bot)) 72 | all_friends = {} 73 | friend_list = [] 74 | for sid in bot_qq: 75 | f = await bot.get_friend_list(self_id=sid) 76 | all_friends[sid] = [i["user_id"] for i in f] 77 | friend_list.extend(all_friends[sid]) 78 | return set(friend_list), all_friends 79 | 80 | 81 | @cached_async(TTLCache(maxsize=1, ttl=300)) # type: ignore 82 | async def get_bot_group_list(bot) -> Tuple[List[int], Dict[int, List[int]]]: 83 | bot_qq = list(get_bot_qq(bot)) 84 | all_groups = {} 85 | group_list = [] 86 | for sid in bot_qq: 87 | g = await bot.get_group_list(self_id=sid) 88 | all_groups[sid] = [i["group_id"] for i in g] 89 | group_list.extend(all_groups[sid]) 90 | return set(group_list), all_groups 91 | 92 | 93 | @cached_async(TTLCache(maxsize=1, ttl=300)) # type: ignore 94 | async def get_all_bot_guild_list(bot) -> Tuple[List[int], Dict[int, List[str]]]: 95 | bot_qq = list(get_bot_qq(bot)) 96 | # 获取频道列表 97 | all_guilds = {} 98 | guild_list = [] 99 | for sid in bot_qq: 100 | g = await bot.get_guild_list(self_id=sid) 101 | all_guilds[sid] = [i["guild_id"] for i in g] 102 | guild_list.extend(all_guilds[sid]) 103 | return set(guild_list), all_guilds 104 | 105 | 106 | @cached_async(TTLCache(maxsize=1, ttl=300)) # type: ignore 107 | async def get_all_bot_channel_list(bot) -> Tuple[List[str], Dict[int, List[str]]]: 108 | guild_list, all_guilds = await get_all_bot_guild_list(bot) 109 | # 获取子频道列表 110 | all_channels = {} 111 | channel_list = [] 112 | for guild in guild_list: 113 | sid = [k for k, v in all_guilds.items() if guild in v][0] 114 | c = await bot.get_guild_channel_list(self_id=sid, guild_id=guild) 115 | all_channels[sid] = [i["channel_id"] for i in c] 116 | channel_list.extend(all_channels[sid]) 117 | return set(channel_list), all_channels 118 | 119 | 120 | @cached_async(TTLCache(maxsize=1, ttl=300)) # type: ignore 121 | async def get_bot_guild_channel_list(bot, guild_id: Optional[str] = None) -> List[str]: 122 | guild_list, all_guilds = await get_all_bot_guild_list(bot) 123 | if guild_id is None: 124 | return guild_list 125 | if guild_id in guild_list: 126 | # 获取子频道列表 127 | sid = [k for k, v in all_guilds.items() if guild_id in v][0] 128 | channel_list = await bot.get_guild_channel_list(self_id=sid, guild_id=guild_id) 129 | return [i["channel_id"] for i in channel_list] 130 | return [] 131 | 132 | 133 | def get_torrent_b16_hash(content: bytes) -> str: 134 | import magneturi 135 | 136 | # mangetlink = magneturi.from_torrent_file(torrentname) 137 | manget_link = magneturi.from_torrent_data(content) 138 | # print(mangetlink) 139 | ch = "" 140 | n = 20 141 | b32_hash = n * ch + manget_link[20:52] 142 | # print(b32Hash) 143 | b16_hash = base64.b16encode(base64.b32decode(b32_hash)) 144 | b16_hash = b16_hash.lower() 145 | # print("40位info hash值:" + '\n' + b16Hash) 146 | # print("磁力链:" + '\n' + "magnet:?xt=urn:btih:" + b16Hash) 147 | return str(b16_hash, "utf-8") 148 | 149 | 150 | async def send_message_to_admin(message: str, bot=nonebot.get_bot()) -> None: 151 | await bot.send_private_msg(user_id=str(list(config.superusers)[0]), message=message) 152 | 153 | 154 | async def send_msg( 155 | msg: str, 156 | user_ids: Optional[List[str]] = None, 157 | group_ids: Optional[List[str]] = None, 158 | ) -> List[Dict[str, Any]]: 159 | """ 160 | msg: str 161 | user: List[str] 162 | group: List[str] 163 | 164 | 发送消息到私聊或群聊 165 | """ 166 | bot = nonebot.get_bot() 167 | msg_id = [] 168 | if group_ids: 169 | for group_id in group_ids: 170 | msg_id.append(await bot.send_group_msg(group_id=int(group_id), message=msg)) 171 | if user_ids: 172 | for user_id in user_ids: 173 | msg_id.append(await bot.send_private_msg(user_id=int(user_id), message=msg)) 174 | return msg_id 175 | 176 | 177 | # 校验正则表达式合法性 178 | def regex_validate(regex: str) -> bool: 179 | try: 180 | re.compile(regex) 181 | return True 182 | except re.error: 183 | return False 184 | 185 | 186 | # 过滤合法好友 187 | async def filter_valid_user_id_list(bot, user_id_list: List[str]) -> List[str]: 188 | friend_list, _ = await get_bot_friend_list(bot) 189 | valid_user_id_list = [ 190 | user_id for user_id in user_id_list if int(user_id) in friend_list 191 | ] 192 | if invalid_user_id_list := [ 193 | user_id for user_id in user_id_list if user_id not in valid_user_id_list 194 | ]: 195 | logger.warning(f"QQ号[{','.join(invalid_user_id_list)}]不是Bot[{bot.self_id}]的好友") 196 | return valid_user_id_list 197 | 198 | 199 | # 过滤合法群组 200 | async def filter_valid_group_id_list(bot, group_id_list: List[str]) -> List[str]: 201 | group_list, _ = await get_bot_group_list(bot) 202 | valid_group_id_list = [ 203 | group_id for group_id in group_id_list if int(group_id) in group_list 204 | ] 205 | if invalid_group_id_list := [ 206 | group_id for group_id in group_id_list if group_id not in valid_group_id_list 207 | ]: 208 | logger.warning(f"Bot[{bot.self_id}]未加入群组[{','.join(invalid_group_id_list)}]") 209 | return valid_group_id_list 210 | 211 | 212 | # 过滤合法频道 213 | async def filter_valid_guild_channel_id_list( 214 | bot, guild_channel_id_list: List[str] 215 | ) -> List[str]: 216 | valid_guild_channel_id_list = [] 217 | for guild_channel_id in guild_channel_id_list: 218 | guild_id, channel_id = guild_channel_id.split("@") 219 | guild_list = await get_bot_guild_channel_list(bot) 220 | if guild_id not in guild_list: 221 | guild_name = (await bot.get_guild_meta_by_guest(guild_id=guild_id))[ 222 | "guild_name" 223 | ] 224 | logger.warning(f"Bot[{bot.self_id}]未加入频道 {guild_name}[{guild_id}]") 225 | continue 226 | 227 | channel_list = await get_bot_guild_channel_list(bot, guild_id=guild_id) 228 | if channel_id not in channel_list: 229 | guild_name = (await bot.get_guild_meta_by_guest(guild_id=guild_id))[ 230 | "guild_name" 231 | ] 232 | logger.warning( 233 | f"Bot[{bot.self_id}]未加入频道 {guild_name}[{guild_id}]的子频道[{channel_id}]" 234 | ) 235 | continue 236 | valid_guild_channel_id_list.append(guild_channel_id) 237 | return valid_guild_channel_id_list 238 | --------------------------------------------------------------------------------