├── custom_reply
├── requirements.txt
├── custom_reply.py
├── __init__.py
└── manage_content.py
├── custom
├── requirements.txt
├── cat.py
├── ping.py
├── fox.py
├── dog.py
├── wantwords.py
├── atall.py
├── fileLink.py
├── nbnhhsh.py
├── manage_group.py
├── scan_qrcode.py
├── processing_request.py
├── status_info.py
└── analysis_bilibili.py
├── rss2
├── parsing
│ ├── routes
│ │ ├── __init__.py
│ │ ├── yande_re.py
│ │ ├── youtube.py
│ │ ├── nga.py
│ │ ├── south_plus.py
│ │ ├── twitter.py
│ │ ├── weibo.py
│ │ ├── danbooru.py
│ │ └── pixiv.py
│ ├── utils.py
│ ├── check_update.py
│ ├── download_torrent.py
│ ├── handle_translation.py
│ ├── handle_html_tag.py
│ ├── cache_manage.py
│ ├── send_message.py
│ ├── parsing_rss.py
│ ├── __init__.py
│ └── handle_images.py
├── command
│ ├── __init__.py
│ ├── upload_group_file.py
│ ├── add_cookies.py
│ ├── show_dy.py
│ ├── show_all.py
│ ├── add_dy.py
│ ├── del_dy.py
│ ├── rsshub_add.py
│ └── change_dy.py
├── permission.py
├── requirements.txt
├── __init__.py
├── config.py
├── my_trigger.py
├── pikpak_offline.py
├── rss_parsing.py
├── utils.py
├── qbittorrent_download.py
└── rss_class.py
└── README.md
/custom_reply/requirements.txt:
--------------------------------------------------------------------------------
1 | ujson~=5.7.0
--------------------------------------------------------------------------------
/custom/requirements.txt:
--------------------------------------------------------------------------------
1 | httpx
2 | aiohttp
3 | pyzbar # scan_qrcode.py
4 | psutil # status_info.py
--------------------------------------------------------------------------------
/rss2/parsing/routes/__init__.py:
--------------------------------------------------------------------------------
1 | from . import danbooru, nga, pixiv, south_plus, twitter, weibo, yande_re, youtube
2 |
--------------------------------------------------------------------------------
/rss2/command/__init__.py:
--------------------------------------------------------------------------------
1 | from . import (
2 | add_cookies,
3 | add_dy,
4 | change_dy,
5 | del_dy,
6 | rsshub_add,
7 | show_all,
8 | show_dy,
9 | upload_group_file,
10 | )
11 |
--------------------------------------------------------------------------------
/rss2/permission.py:
--------------------------------------------------------------------------------
1 | from nonebot import SenderRoles
2 | from .config import config
3 |
4 |
5 | def admin_permission(sender: SenderRoles):
6 | return (
7 | sender.is_superuser
8 | or sender.is_admin
9 | or sender.is_owner
10 | or sender.sent_by(config.guild_superusers)
11 | or sender.sent_by(config.superusers)
12 | )
13 |
--------------------------------------------------------------------------------
/custom/cat.py:
--------------------------------------------------------------------------------
1 | import httpx
2 | from nonebot import on_command, CommandSession, MessageSegment
3 |
4 |
5 | @on_command("!cat", only_to_me=False)
6 | async def cat(session: CommandSession):
7 | url = "https://api.thecatapi.com/v1/images/search"
8 | with httpx.Client(proxies={}) as client:
9 | r = client.get(url, timeout=5)
10 | picurl = r.json()[0]["url"]
11 | await session.send(MessageSegment.image(picurl))
12 |
--------------------------------------------------------------------------------
/custom/ping.py:
--------------------------------------------------------------------------------
1 | import time
2 | from nonebot import on_command, CommandSession
3 |
4 |
5 | @on_command("/ping", only_to_me=False)
6 | async def ping(session: CommandSession):
7 | time_from_receive = session.event["time"]
8 | if time_from_receive > 3000000000:
9 | time_from_receive = time_from_receive / 1000
10 | await session.finish(
11 | "->" + str(time.time() - time_from_receive) + "s", at_sender=True
12 | )
13 |
--------------------------------------------------------------------------------
/rss2/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp~=3.8.3
2 | aiohttp[speedups]~=3.8.3
3 | APScheduler==3.9.1.post1
4 | arrow~=1.2.3
5 | bbcode~=1.1.0
6 | cachetools~=5.2.1
7 | emoji~=2.1.0
8 | feedparser~=6.0.10
9 | deep-translator~=1.9.1
10 | ImageHash~=4.3.1
11 | magneturi~=1.3
12 | nonebot~=1.9.0
13 | pikpakapi~=0.0.7
14 | Pillow~=9.4.0
15 | pydantic~=1.10.4
16 | pyquery~=1.4.3
17 | python-qbittorrent~=0.4.3
18 | tenacity==8.1.0
19 | tinydb~=4.7.0
20 | typing-extensions==4.4.0
21 | pydantic[dotenv]
22 | yarl~=1.8.2
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 使用说明
2 |
3 | ### 注意更新 Hoshino 本体,以防 CQ 码注入.jpg
4 |
5 | ### requirements.txt 是所需第三方模块,使用前需安装模块
6 |
7 | ### 在所要使用插件的目录下执行 `pip install -r requirements.txt` 安装依赖库
8 |
9 | ### `pip install -r requirements.txt --upgrade` 更新依赖库
10 |
11 | 需配合[Hoshino(v2)](https://github.com/Ice-Cirno/HoshinoBot)使用
12 | 具体使用看[WIKI](https://github.com/mengshouer/HoshinoBot-Plugins/wiki)
13 |
14 | ## 其他插件
15 |
16 | [FFXIV 相关插件](https://github.com/mengshouer/HoshinoBot-Plugins/tree/ffxiv)
17 |
18 | [搜图插件](https://github.com/mengshouer/HoshinoBot-Plugins/tree/picsearch)
19 |
--------------------------------------------------------------------------------
/rss2/parsing/utils.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Any, Dict, Optional
3 |
4 | from ..config import config
5 |
6 |
7 | # 代理
8 | def get_proxy(open_proxy: bool) -> Optional[str]:
9 | if not open_proxy or not config.rss_proxy:
10 | return None
11 | return f"http://{config.rss_proxy}"
12 |
13 |
14 | # 获取正文
15 | def get_summary(item: Dict[str, Any]) -> str:
16 | summary: str = (
17 | item["content"][0]["value"] if item.get("content") else item["summary"]
18 | )
19 | return f"
标签后增加俩个换行
138 | for i in ["p", "pre"]:
139 | rss_str = re.sub(f"{i}>", f"{i}>\n\n", rss_str)
140 |
141 | # 直接去掉标签,留下内部文本信息
142 | for i in html_tags:
143 | rss_str = re.sub(f"<{i} [^>]+>", "", rss_str)
144 | rss_str = re.sub(f"?{i}>", "", rss_str)
145 |
146 | rss_str = re.sub(r"<(br|hr)\s?/?>|<(br|hr) [^>]+>", "\n", rss_str)
147 | rss_str = re.sub(r"]+>", "\n", rss_str)
148 | rss_str = re.sub(r"?h\d>", "\n", rss_str)
149 |
150 | # 删除图片、视频标签
151 | rss_str = re.sub(
152 | r")?|
]+>", "", rss_str, flags=re.DOTALL
153 | )
154 |
155 | # 去掉多余换行
156 | while "\n\n\n" in rss_str:
157 | rss_str = rss_str.replace("\n\n\n", "\n\n")
158 | rss_str = rss_str.strip()
159 |
160 | if 0 < config.max_length < len(rss_str):
161 | rss_str = f"{rss_str[: config.max_length]}..."
162 |
163 | return rss_str
164 |
--------------------------------------------------------------------------------
/rss2/parsing/cache_manage.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | from sqlite3 import Connection
3 | from typing import Any, Dict, Optional, Tuple
4 |
5 | import imagehash
6 | from nonebot.log import logger
7 | from PIL import Image, UnidentifiedImageError
8 | from pyquery import PyQuery as Pq
9 | from tinydb import Query, TinyDB
10 | from tinydb.operations import delete
11 |
12 | from ..config import config
13 | from ..rss_class import Rss
14 | from .check_update import get_item_date
15 | from .handle_images import download_image
16 |
17 |
18 | # 精简 xxx.json (缓存) 中的字段
19 | def cache_filter(data: Dict[str, Any]) -> Dict[str, Any]:
20 | keys = [
21 | "guid",
22 | "link",
23 | "published",
24 | "updated",
25 | "title",
26 | "hash",
27 | ]
28 | if data.get("to_send"):
29 | keys += [
30 | "content",
31 | "summary",
32 | "to_send",
33 | ]
34 | return {k: data[k] for k in keys if k in data}
35 |
36 |
37 | # 对去重数据库进行管理
38 | def cache_db_manage(conn: Connection) -> None:
39 | cursor = conn.cursor()
40 | # 用来去重的 sqlite3 数据表如果不存在就创建一个
41 | cursor.execute(
42 | """
43 | CREATE TABLE IF NOT EXISTS main (
44 | "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
45 | "link" TEXT,
46 | "title" TEXT,
47 | "image_hash" TEXT,
48 | "datetime" TEXT DEFAULT (DATETIME('Now', 'LocalTime'))
49 | );
50 | """
51 | )
52 | cursor.close()
53 | conn.commit()
54 | cursor = conn.cursor()
55 | # 移除超过 config.db_cache_expire 天没重复过的记录
56 | cursor.execute(
57 | "DELETE FROM main WHERE datetime <= DATETIME('Now', 'LocalTime', ?);",
58 | (f"-{config.db_cache_expire} Day",),
59 | )
60 | cursor.close()
61 | conn.commit()
62 |
63 |
64 | # 对缓存 json 进行管理
65 | def cache_json_manage(db: TinyDB, new_data_length: int) -> None:
66 | # 只保留最多 config.limit + new_data_length 条的记录
67 | limit = config.limit + new_data_length
68 | retains = db.all()
69 | retains.sort(key=get_item_date)
70 | retains = retains[-limit:]
71 | db.truncate()
72 | db.insert_multiple(retains)
73 |
74 |
75 | # 去重判断
76 | async def duplicate_exists(
77 | rss: Rss, conn: Connection, item: Dict[str, Any], summary: str
78 | ) -> Tuple[bool, Optional[str]]:
79 | flag = False
80 | link = item["link"].replace("'", "''")
81 | title = item["title"].replace("'", "''")
82 | image_hash = None
83 | cursor = conn.cursor()
84 | sql = "SELECT * FROM main WHERE 1=1"
85 | args = []
86 | for mode in rss.duplicate_filter_mode:
87 | if mode == "image":
88 | try:
89 | summary_doc = Pq(summary)
90 | except Exception as e:
91 | logger.warning(e)
92 | # 没有正文内容直接跳过
93 | continue
94 | img_doc = summary_doc("img")
95 | # 只处理仅有一张图片的情况
96 | if len(img_doc) != 1:
97 | continue
98 | url = img_doc.attr("src")
99 | # 通过图像的指纹来判断是否实际是同一张图片
100 | content = await download_image(url, rss.img_proxy)
101 | if not content:
102 | continue
103 | try:
104 | im = Image.open(BytesIO(content))
105 | except UnidentifiedImageError:
106 | continue
107 | item["image_content"] = content
108 | # GIF 图片的 image_hash 实际上是第一帧的值,为了避免误伤直接跳过
109 | if im.format == "GIF":
110 | item["gif_url"] = url
111 | continue
112 | image_hash = str(imagehash.dhash(im))
113 | logger.debug(f"image_hash: {image_hash}")
114 | sql += " AND image_hash=?"
115 | args.append(image_hash)
116 | if mode == "link":
117 | sql += " AND link=?"
118 | args.append(link)
119 | elif mode == "title":
120 | sql += " AND title=?"
121 | args.append(title)
122 | if "or" in rss.duplicate_filter_mode:
123 | sql = sql.replace("AND", "OR").replace("OR", "AND", 1)
124 | cursor.execute(f"{sql};", args)
125 | result = cursor.fetchone()
126 | if result is not None:
127 | result_id = result[0]
128 | cursor.execute(
129 | "UPDATE main SET datetime = DATETIME('Now','LocalTime') WHERE id = ?;",
130 | (result_id,),
131 | )
132 | cursor.close()
133 | conn.commit()
134 | flag = True
135 | return flag, image_hash
136 |
137 |
138 | # 消息发送后存入去重数据库
139 | def insert_into_cache_db(
140 | conn: Connection, item: Dict[str, Any], image_hash: str
141 | ) -> None:
142 | cursor = conn.cursor()
143 | link = item["link"].replace("'", "''")
144 | title = item["title"].replace("'", "''")
145 | cursor.execute(
146 | "INSERT INTO main (link, title, image_hash) VALUES (?, ?, ?);",
147 | (link, title, image_hash),
148 | )
149 | cursor.close()
150 | conn.commit()
151 |
152 |
153 | # 写入缓存 json
154 | def write_item(db: TinyDB, new_item: Dict[str, Any]) -> None:
155 | if not new_item.get("to_send"):
156 | db.update(delete("to_send"), Query().hash == str(new_item.get("hash"))) # type: ignore
157 | db.upsert(cache_filter(new_item), Query().hash == str(new_item.get("hash")))
158 |
--------------------------------------------------------------------------------
/rss2/parsing/routes/pixiv.py:
--------------------------------------------------------------------------------
1 | import re
2 | import sqlite3
3 | from typing import Any, Dict, List
4 |
5 | import aiohttp
6 | from nonebot.log import logger
7 | from pyquery import PyQuery as Pq
8 | from tenacity import RetryError, TryAgain, retry, stop_after_attempt, stop_after_delay
9 | from tinydb import Query, TinyDB
10 |
11 | from ...config import DATA_PATH
12 | from ...rss_class import Rss
13 | from .. import ParsingBase, cache_db_manage, duplicate_exists, write_item
14 | from ..check_update import get_item_date
15 | from ..handle_images import (
16 | get_preview_gif_from_video,
17 | handle_img_combo,
18 | handle_img_combo_with_content,
19 | )
20 | from ..utils import get_summary
21 |
22 |
23 | # 如果启用了去重模式,对推送列表进行过滤
24 | @ParsingBase.append_before_handler(priority=12, rex="/pixiv/")
25 | async def handle_check_update(rss: Rss, state: Dict[str, Any]) -> Dict[str, Any]:
26 | change_data = state["change_data"]
27 | conn = state["conn"]
28 | db = state["tinydb"]
29 |
30 | # 检查是否启用去重 使用 duplicate_filter_mode 字段
31 | if not rss.duplicate_filter_mode:
32 | return {"change_data": change_data}
33 |
34 | if not conn:
35 | conn = sqlite3.connect(str(DATA_PATH / "cache.db"))
36 | conn.set_trace_callback(logger.debug)
37 |
38 | cache_db_manage(conn)
39 |
40 | delete = []
41 | for index, item in enumerate(change_data):
42 | summary = get_summary(item)
43 | try:
44 | summary_doc = Pq(summary)
45 | # 如果图片为动图,通过移除来跳过图片去重检查
46 | if re.search("类型:ugoira", str(summary_doc)):
47 | summary_doc.remove("img")
48 | summary = str(summary_doc)
49 | except Exception as e:
50 | logger.warning(e)
51 | is_duplicate, image_hash = await duplicate_exists(
52 | rss=rss,
53 | conn=conn,
54 | item=item,
55 | summary=summary,
56 | )
57 | if is_duplicate:
58 | write_item(db, item)
59 | delete.append(index)
60 | else:
61 | change_data[index]["image_hash"] = str(image_hash)
62 |
63 | change_data = [
64 | item for index, item in enumerate(change_data) if index not in delete
65 | ]
66 |
67 | return {
68 | "change_data": change_data,
69 | "conn": conn,
70 | }
71 |
72 |
73 | # 处理图片
74 | @ParsingBase.append_handler(parsing_type="picture", rex="pixiv")
75 | async def handle_picture(
76 | rss: Rss,
77 | state: Dict[str, Any],
78 | item: Dict[str, Any],
79 | item_msg: str,
80 | tmp: str,
81 | tmp_state: Dict[str, Any],
82 | ) -> str:
83 |
84 | # 判断是否开启了只推送标题
85 | if rss.only_title:
86 | return ""
87 |
88 | res = ""
89 | try:
90 | res += await handle_img(
91 | item=item, img_proxy=rss.img_proxy, img_num=rss.max_image_number, rss=rss
92 | )
93 | except Exception as e:
94 | logger.warning(f"{rss.name} 没有正文内容!{e}")
95 |
96 | # 判断是否开启了只推送图片
97 | return f"{res}\n" if rss.only_pic else f"{tmp + res}\n"
98 |
99 |
100 | # 处理图片、视频
101 | @retry(stop=(stop_after_attempt(5) | stop_after_delay(30)))
102 | async def handle_img(
103 | item: Dict[str, Any], img_proxy: bool, img_num: int, rss: Rss
104 | ) -> str:
105 | if item.get("image_content"):
106 | return await handle_img_combo_with_content(
107 | item.get("gif_url", ""), item["image_content"]
108 | )
109 | html = Pq(get_summary(item))
110 | link = item["link"]
111 | img_str = ""
112 | # 处理动图
113 | if re.search("类型:ugoira", str(html)):
114 | ugoira_id = re.search(r"\d+", link).group() # type: ignore
115 | try:
116 | url = await get_ugoira_video(ugoira_id)
117 | url = await get_preview_gif_from_video(url)
118 | img_str += await handle_img_combo(url, img_proxy)
119 | except RetryError:
120 | logger.warning(f"动图[{link}]的预览图获取失败,将发送原动图封面")
121 | url = html("img").attr("src")
122 | img_str += await handle_img_combo(url, img_proxy)
123 | else:
124 | # 处理图片
125 | doc_img = list(html("img").items())
126 | # 只发送限定数量的图片,防止刷屏
127 | if 0 < img_num < len(doc_img):
128 | img_str += f"\n因启用图片数量限制,目前只有 {img_num} 张图片:"
129 | doc_img = doc_img[:img_num]
130 | for img in doc_img:
131 | url = img.attr("src")
132 | img_str += await handle_img_combo(url, img_proxy, rss)
133 |
134 | return img_str
135 |
136 |
137 | # 获取动图为视频
138 | @retry(stop=(stop_after_attempt(5) | stop_after_delay(30)))
139 | async def get_ugoira_video(ugoira_id: str) -> Any:
140 | async with aiohttp.ClientSession() as session:
141 | data = {"id": ugoira_id, "type": "ugoira"}
142 | resp = await session.post("https://ugoira.huggy.moe/api/illusts", data=data)
143 | url = (await resp.json()).get("data")[0].get("url")
144 | if not url:
145 | raise TryAgain
146 | return url
147 |
148 |
149 | # 处理来源
150 | @ParsingBase.append_handler(parsing_type="source", rex="pixiv")
151 | async def handle_source(
152 | rss: Rss,
153 | state: Dict[str, Any],
154 | item: Dict[str, Any],
155 | item_msg: str,
156 | tmp: str,
157 | tmp_state: Dict[str, Any],
158 | ) -> str:
159 | source = item["link"]
160 | # 缩短 pixiv 链接
161 | str_link = re.sub("https://www.pixiv.net/artworks/", "https://pixiv.net/i/", source)
162 | return f"链接:{str_link}\n"
163 |
164 |
165 | # 检查更新
166 | @ParsingBase.append_before_handler(rex="pixiv/ranking", priority=10) # type: ignore
167 | async def handle_check_update(rss: Rss, state: Dict[str, Any]) -> Dict[str, Any]:
168 | db = state["tinydb"]
169 | change_data = check_update(db, state["new_data"])
170 | return {"change_data": change_data}
171 |
172 |
173 | # 检查更新
174 | def check_update(db: TinyDB, new: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
175 |
176 | # 发送失败 1 次
177 | to_send_list: List[Dict[str, Any]] = db.search(Query().to_send.exists())
178 |
179 | if not new and not to_send_list:
180 | return []
181 |
182 | old_link_list = [i["link"] for i in db.all()]
183 | to_send_list.extend([i for i in new if i["link"] not in old_link_list])
184 |
185 | # 对结果按照发布时间排序
186 | to_send_list.sort(key=get_item_date)
187 |
188 | return to_send_list
189 |
--------------------------------------------------------------------------------
/rss2/parsing/send_message.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from collections import defaultdict
3 | from contextlib import suppress
4 | from typing import Any, DefaultDict, Dict, Tuple, Union, List
5 |
6 | import arrow
7 | import nonebot
8 | from nonebot import logger
9 |
10 | from ..rss_class import Rss
11 | from ..utils import get_bot_friend_list, get_bot_group_list, get_all_bot_channel_list
12 |
13 | sending_lock: DefaultDict[Tuple[Union[int, str], str], asyncio.Lock] = defaultdict(
14 | asyncio.Lock
15 | )
16 |
17 | # 发送消息
18 | async def send_msg(rss: Rss, msg: str, item: Dict[str, Any]) -> bool:
19 | bot = nonebot.get_bot()
20 | if not msg:
21 | return False
22 | flag = False
23 | error_msg = f"消息发送失败!\n链接:[{item.get('link')}]"
24 | if rss.user_id:
25 | all_friend = (await get_bot_friend_list(bot))[1]
26 | flag = any(
27 | await asyncio.gather(
28 | *[
29 | send_private_msg(
30 | bot, msg, int(user_id), item, error_msg, all_friend
31 | )
32 | for user_id in rss.user_id
33 | ]
34 | )
35 | )
36 |
37 | if rss.group_id:
38 | all_group = (await get_bot_group_list(bot))[1]
39 | flag = (
40 | any(
41 | await asyncio.gather(
42 | *[
43 | send_group_msg(
44 | bot, msg, int(group_id), item, error_msg, all_group
45 | )
46 | for group_id in rss.group_id
47 | ]
48 | )
49 | )
50 | or flag
51 | )
52 |
53 | if rss.guild_channel_id:
54 | all_channels = (await get_all_bot_channel_list(bot))[1]
55 | flag = (
56 | any(
57 | await asyncio.gather(
58 | *[
59 | send_guild_channel_msg(
60 | bot, msg, guild_channel_id, item, error_msg, all_channels
61 | )
62 | for guild_channel_id in rss.guild_channel_id
63 | ]
64 | )
65 | )
66 | or flag
67 | )
68 | return flag
69 |
70 |
71 | # 发送私聊消息
72 | async def send_private_msg(
73 | bot,
74 | msg: str,
75 | user_id: int,
76 | item: Dict[str, Any],
77 | error_msg: str,
78 | all_friend: Dict[int, List[int]],
79 | ) -> bool:
80 | flag = False
81 | start_time = arrow.now()
82 | sid = [k for k, v in all_friend.items() if int(user_id) in v][0]
83 | async with sending_lock[(user_id, "private")]:
84 | try:
85 | await bot.send_msg(
86 | self_id=sid,
87 | message_type="private",
88 | user_id=user_id,
89 | message=msg,
90 | )
91 | await asyncio.sleep(max(1 - (arrow.now() - start_time).total_seconds(), 0))
92 | flag = True
93 | except Exception as e:
94 | logger.error(f"E: {repr(e)} 链接:[{item.get('link')}]")
95 | if item.get("to_send"):
96 | flag = True
97 | with suppress(Exception):
98 | await bot.send_msg(
99 | self_id=sid,
100 | message_type="private",
101 | user_id=user_id,
102 | message=f"{error_msg}\nE: {repr(e)}",
103 | )
104 | return flag
105 |
106 |
107 | # 发送群聊消息
108 | async def send_group_msg(
109 | bot,
110 | msg: str,
111 | group_id: int,
112 | item: Dict[str, Any],
113 | error_msg: str,
114 | all_group: Dict[int, List[int]],
115 | ) -> bool:
116 | flag = False
117 | start_time = arrow.now()
118 | sid = [k for k, v in all_group.items() if int(group_id) in v][0]
119 | async with sending_lock[(group_id, "group")]:
120 | try:
121 | await bot.send_msg(
122 | self_id=sid,
123 | message_type="group",
124 | group_id=group_id,
125 | message=msg,
126 | )
127 | await asyncio.sleep(max(1 - (arrow.now() - start_time).total_seconds(), 0))
128 | flag = True
129 | except Exception as e:
130 | logger.error(f"E: {repr(e)} 链接:[{item.get('link')}]")
131 | if item.get("to_send"):
132 | flag = True
133 | with suppress(Exception):
134 | await bot.send_msg(
135 | self_id=sid,
136 | message_type="group",
137 | group_id=group_id,
138 | message=f"E: {repr(e)}\n{error_msg}",
139 | )
140 | return flag
141 |
142 |
143 | # 发送频道消息
144 | async def send_guild_channel_msg(
145 | bot,
146 | msg: str,
147 | guild_channel_id: str,
148 | item: Dict[str, Any],
149 | error_msg: str,
150 | all_channels: Dict,
151 | ) -> bool:
152 | flag = False
153 | start_time = arrow.now()
154 | guild_id, channel_id = guild_channel_id.split("@")
155 | sid = [k for k, v in all_channels.items() if channel_id in v][0]
156 | async with sending_lock[(guild_channel_id, "guild_channel")]:
157 | try:
158 | await bot.send_guild_channel_msg(
159 | self_id=sid,
160 | message=msg,
161 | guild_id=guild_id,
162 | channel_id=channel_id,
163 | )
164 | await asyncio.sleep(max(1 - (arrow.now() - start_time).total_seconds(), 0))
165 | flag = True
166 | except Exception as e:
167 | logger.error(f"E: {repr(e)} 链接:[{item.get('link')}]")
168 | if item.get("to_send"):
169 | flag = True
170 | with suppress(Exception):
171 | await bot.send_guild_channel_msg(
172 | self_id=sid,
173 | message=f"E: {repr(e)}\n{error_msg}",
174 | guild_id=guild_id,
175 | channel_id=channel_id,
176 | )
177 | return flag
178 |
--------------------------------------------------------------------------------
/rss2/rss_parsing.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Tuple
2 |
3 | import aiohttp
4 | import feedparser
5 | from nonebot import get_bot
6 | from nonebot.log import logger
7 | from tinydb import TinyDB
8 | from tinydb.middlewares import CachingMiddleware
9 | from tinydb.storages import JSONStorage
10 | from yarl import URL
11 |
12 | from . import my_trigger as tr
13 | from .config import DATA_PATH, config
14 | from .parsing import get_proxy
15 | from .parsing.cache_manage import cache_filter
16 | from .parsing.check_update import dict_hash
17 | from .parsing.parsing_rss import ParsingRss
18 | from .rss_class import Rss
19 | from .utils import (
20 | filter_valid_group_id_list,
21 | filter_valid_guild_channel_id_list,
22 | filter_valid_user_id_list,
23 | get_http_caching_headers,
24 | send_message_to_admin,
25 | )
26 |
27 | HEADERS = {
28 | "Accept": "application/xhtml+xml,application/xml,*/*",
29 | "Accept-Language": "en-US,en;q=0.9",
30 | "Cache-Control": "max-age=0",
31 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36",
32 | "Connection": "keep-alive",
33 | "Content-Type": "application/xml; charset=utf-8",
34 | }
35 |
36 |
37 | # 抓取 feed,读取缓存,检查更新,对更新进行处理
38 | async def start(rss: Rss) -> None:
39 | bot = get_bot() # type: ignore
40 | # 先检查订阅者是否合法
41 | if rss.user_id:
42 | rss.user_id = await filter_valid_user_id_list(bot, rss.user_id)
43 | if rss.group_id:
44 | rss.group_id = await filter_valid_group_id_list(bot, rss.group_id)
45 | if rss.guild_channel_id:
46 | rss.guild_channel_id = await filter_valid_guild_channel_id_list(
47 | bot, rss.guild_channel_id
48 | )
49 | if not any([rss.user_id, rss.group_id, rss.guild_channel_id]):
50 | await auto_stop_and_notify_admin(rss, bot)
51 | return
52 | new_rss, cached = await fetch_rss(rss)
53 | # 检查是否存在rss记录
54 | _file = DATA_PATH / f"{Rss.handle_name(rss.name)}.json"
55 | first_time_fetch = not _file.exists()
56 | if cached:
57 | logger.info(f"{rss.name} 没有新信息")
58 | return
59 | if not new_rss or not new_rss.get("feed"):
60 | rss.error_count += 1
61 | logger.warning(f"{rss.name} 抓取失败!")
62 | if first_time_fetch:
63 | # 第一次抓取失败,如果配置了代理,则自动使用代理抓取
64 | if config.rss_proxy and not rss.img_proxy:
65 | rss.img_proxy = True
66 | logger.info(f"{rss.name} 第一次抓取失败,自动使用代理抓取")
67 | await start(rss)
68 | else:
69 | await auto_stop_and_notify_admin(rss, bot)
70 | if rss.error_count >= 100:
71 | await auto_stop_and_notify_admin(rss, bot)
72 | return
73 | if new_rss.get("feed") and rss.error_count > 0:
74 | rss.error_count = 0
75 | if first_time_fetch:
76 | with TinyDB(
77 | _file,
78 | storage=CachingMiddleware(JSONStorage), # type: ignore
79 | encoding="utf-8",
80 | sort_keys=True,
81 | indent=4,
82 | ensure_ascii=False,
83 | ) as db:
84 | entries = new_rss["entries"]
85 | result = []
86 | for i in entries:
87 | i["hash"] = dict_hash(i)
88 | result.append(cache_filter(i))
89 | db.insert_multiple(result)
90 | logger.info(f"{rss.name} 第一次抓取成功!")
91 | return
92 |
93 | pr = ParsingRss(rss=rss)
94 | await pr.start(rss_name=rss.name, new_rss=new_rss)
95 |
96 |
97 | async def auto_stop_and_notify_admin(rss: Rss, bot) -> None:
98 | rss.stop = True
99 | rss.upsert()
100 | tr.delete_job(rss)
101 | cookies_str = "及 cookies " if rss.cookies else ""
102 | if not any([rss.user_id, rss.group_id, rss.guild_channel_id]):
103 | msg = f"{rss.name}[{rss.get_url()}]无人订阅!已自动停止更新!"
104 | elif rss.error_count >= 100:
105 | msg = (
106 | f"{rss.name}[{rss.get_url()}]已经连续抓取失败超过 100 次!已自动停止更新!请检查订阅地址{cookies_str}!"
107 | )
108 | else:
109 | msg = f"{rss.name}[{rss.get_url()}]第一次抓取失败!已自动停止更新!请检查订阅地址{cookies_str}!"
110 | await send_message_to_admin(msg, bot)
111 |
112 |
113 | # 获取 RSS 并解析为 json
114 | async def fetch_rss(rss: Rss) -> Tuple[Dict[str, Any], bool]:
115 | rss_url = rss.get_url()
116 | # 对本机部署的 RSSHub 不使用代理
117 | local_host = [
118 | "localhost",
119 | "127.0.0.1",
120 | ]
121 | proxy = get_proxy(rss.img_proxy) if URL(rss_url).host not in local_host else None
122 |
123 | # 判断是否使用cookies
124 | cookies = rss.cookies or None
125 |
126 | # 获取 xml
127 | d: Dict[str, Any] = {}
128 | cached = False
129 | headers = HEADERS.copy()
130 | if not config.rsshub_backup:
131 | if rss.etag:
132 | headers["If-None-Match"] = rss.etag
133 | if rss.last_modified:
134 | headers["If-Modified-Since"] = rss.last_modified
135 | async with aiohttp.ClientSession(
136 | cookies=cookies,
137 | headers=HEADERS,
138 | raise_for_status=True,
139 | ) as session:
140 | try:
141 | resp = await session.get(rss_url, proxy=proxy)
142 | if not config.rsshub_backup:
143 | http_caching_headers = get_http_caching_headers(resp.headers)
144 | rss.etag = http_caching_headers["ETag"]
145 | rss.last_modified = http_caching_headers["Last-Modified"]
146 | rss.upsert()
147 | if (
148 | resp.status == 200 and int(resp.headers.get("Content-Length", "1")) == 0
149 | ) or resp.status == 304:
150 | cached = True
151 | # 解析为 JSON
152 | d = feedparser.parse(await resp.text())
153 | except Exception:
154 | if not URL(rss.url).scheme and config.rsshub_backup:
155 | logger.debug(f"[{rss_url}]访问失败!将使用备用 RSSHub 地址!")
156 | for rsshub_url in list(config.rsshub_backup):
157 | rss_url = rss.get_url(rsshub=rsshub_url)
158 | try:
159 | resp = await session.get(rss_url, proxy=proxy)
160 | d = feedparser.parse(await resp.text())
161 | except Exception:
162 | logger.debug(f"[{rss_url}]访问失败!将使用备用 RSSHub 地址!")
163 | continue
164 | if d.get("feed"):
165 | logger.info(f"[{rss_url}]抓取成功!")
166 | break
167 | return d, cached
168 |
--------------------------------------------------------------------------------
/rss2/parsing/parsing_rss.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Any, Callable, Dict, List
3 |
4 | from tinydb import TinyDB
5 | from tinydb.middlewares import CachingMiddleware
6 | from tinydb.storages import JSONStorage
7 |
8 | from ..config import DATA_PATH
9 | from ..rss_class import Rss
10 |
11 |
12 | # 订阅器启动的时候将解析器注册到rss实例类?,避免每次推送时再匹配
13 | class ParsingItem:
14 | def __init__(
15 | self,
16 | func: Callable[..., Any],
17 | rex: str = "(.*)",
18 | priority: int = 10,
19 | block: bool = False,
20 | ):
21 | # 解析函数
22 | self.func: Callable[..., Any] = func
23 | # 匹配的订阅地址正则,"(.*)" 是全都匹配
24 | self.rex: str = rex
25 | # 优先级,数字越小优先级越高。优先级相同时,会抛弃默认处理方式,即抛弃 rex="(.*)"
26 | self.priority: int = priority
27 | # 是否阻止执行之后的处理,默认不阻止。抛弃默认处理方式,只需要 block==True and priority<10
28 | self.block: bool = block
29 |
30 |
31 | # 解析器排序
32 | def _sort(_list: List[ParsingItem]) -> List[ParsingItem]:
33 | _list.sort(key=lambda x: x.priority)
34 | return _list
35 |
36 |
37 | # rss 解析类 ,需要将特殊处理的订阅注册到该类
38 | class ParsingBase:
39 | """
40 | - **类型**: ``List[ParsingItem]``
41 | - **说明**: 最先执行的解析器,定义了检查更新等前置步骤
42 | """
43 |
44 | before_handler: List[ParsingItem] = []
45 |
46 | """
47 | - **类型**: ``Dict[str, List[ParsingItem]]``
48 | - **说明**: 解析器
49 | """
50 | handler: Dict[str, List[ParsingItem]] = {
51 | "before": [], # item的预处理
52 | "title": [],
53 | "summary": [],
54 | "picture": [],
55 | "source": [],
56 | "date": [],
57 | "torrent": [],
58 | "after": [], # item的最后处理,此处调用消息截取、发送
59 | }
60 |
61 | """
62 | - **类型**: ``List[ParsingItem]``
63 | - **说明**: 最后执行的解析器,在消息发送后,也可以多条消息合并发送
64 | """
65 | after_handler: List[ParsingItem] = []
66 |
67 | # 增加解析器
68 | @classmethod
69 | def append_handler(
70 | cls,
71 | parsing_type: str,
72 | rex: str = "(.*)",
73 | priority: int = 10,
74 | block: bool = False,
75 | ) -> Callable[..., Any]:
76 | def _decorator(func: Callable[..., Any]) -> Callable[..., Any]:
77 | cls.handler[parsing_type].append(ParsingItem(func, rex, priority, block))
78 | cls.handler.update({parsing_type: _sort(cls.handler[parsing_type])})
79 | return func
80 |
81 | return _decorator
82 |
83 | @classmethod
84 | def append_before_handler(
85 | cls, rex: str = "(.*)", priority: int = 10, block: bool = False
86 | ) -> Callable[..., Any]:
87 | """
88 | 装饰一个方法,作为将其一个前置处理器
89 | 参数:
90 | rex: 用于正则匹配目标订阅地址,匹配成功后执行器将适用
91 | proirity: 执行器优先级,自定义执行器会覆盖掉相同优先级的默认执行器
92 | block: 是否要阻断后续执行器进行
93 | """
94 |
95 | def _decorator(func: Callable[..., Any]) -> Callable[..., Any]:
96 | cls.before_handler.append(ParsingItem(func, rex, priority, block))
97 | cls.before_handler = _sort(cls.before_handler)
98 | return func
99 |
100 | return _decorator
101 |
102 | @classmethod
103 | def append_after_handler(
104 | cls, rex: str = "(.*)", priority: int = 10, block: bool = False
105 | ) -> Callable[..., Any]:
106 | """
107 | 装饰一个方法,作为将其一个后置处理器
108 | 参数:
109 | rex: 用于正则匹配目标订阅地址,匹配成功后执行器将适用
110 | proirity: 执行器优先级,自定义执行器会覆盖掉相同优先级的默认执行器
111 | block: 是否要阻断后续执行器进行
112 | """
113 |
114 | def _decorator(func: Callable[..., Any]) -> Callable[..., Any]:
115 | cls.after_handler.append(ParsingItem(func, rex, priority, block))
116 | cls.after_handler = _sort(cls.after_handler)
117 | return func
118 |
119 | return _decorator
120 |
121 |
122 | # 对处理器进行过滤
123 | def _handler_filter(_handler_list: List[ParsingItem], _url: str) -> List[ParsingItem]:
124 | _result = [h for h in _handler_list if re.search(h.rex, _url)]
125 | # 删除优先级相同时默认的处理器
126 | _delete = [
127 | (h.func.__name__, "(.*)", h.priority) for h in _result if h.rex != "(.*)"
128 | ]
129 | _result = [
130 | h for h in _result if (h.func.__name__, h.rex, h.priority) not in _delete
131 | ]
132 | return _result
133 |
134 |
135 | # 解析实例
136 | class ParsingRss:
137 |
138 | # 初始化解析实例
139 | def __init__(self, rss: Rss):
140 | self.state: Dict[str, Any] = {} # 用于存储实例处理中上下文数据
141 | self.rss: Rss = rss
142 |
143 | # 对处理器进行过滤
144 | self.before_handler: List[ParsingItem] = _handler_filter(
145 | ParsingBase.before_handler, self.rss.get_url()
146 | )
147 | self.handler: Dict[str, List[ParsingItem]] = {}
148 | for k, v in ParsingBase.handler.items():
149 | self.handler[k] = _handler_filter(v, self.rss.get_url())
150 | self.after_handler = _handler_filter(
151 | ParsingBase.after_handler, self.rss.get_url()
152 | )
153 |
154 | # 开始解析
155 | async def start(self, rss_name: str, new_rss: Dict[str, Any]) -> None:
156 | # new_data 是完整的 rss 解析后的 dict
157 | # 前置处理
158 | rss_title = new_rss["feed"]["title"]
159 | new_data = new_rss["entries"]
160 | _file = DATA_PATH / f"{Rss.handle_name(rss_name)}.json"
161 | db = TinyDB(
162 | _file,
163 | storage=CachingMiddleware(JSONStorage), # type: ignore
164 | encoding="utf-8",
165 | sort_keys=True,
166 | indent=4,
167 | ensure_ascii=False,
168 | )
169 | self.state.update(
170 | {
171 | "rss_title": rss_title,
172 | "new_data": new_data,
173 | "change_data": [], # 更新的消息列表
174 | "conn": None, # 数据库连接
175 | "tinydb": db, # 缓存 json
176 | }
177 | )
178 | for handler in self.before_handler:
179 | self.state.update(await handler.func(rss=self.rss, state=self.state))
180 | if handler.block:
181 | break
182 |
183 | # 分条处理
184 | self.state.update(
185 | {
186 | "messages": [],
187 | "item_count": 0,
188 | }
189 | )
190 | for item in self.state["change_data"]:
191 | item_msg = f"【{self.state.get('rss_title')}】更新了!\n----------------------\n"
192 |
193 | for handler_list in self.handler.values():
194 | # 用于保存上一次处理结果
195 | tmp = ""
196 | tmp_state = {"continue": True} # 是否继续执行后续处理
197 |
198 | # 某一个内容的处理如正文,传入原文与上一次处理结果,此次处理完后覆盖
199 | for handler in handler_list:
200 | tmp = await handler.func(
201 | rss=self.rss,
202 | state=self.state,
203 | item=item,
204 | item_msg=item_msg,
205 | tmp=tmp,
206 | tmp_state=tmp_state,
207 | )
208 | if handler.block or not tmp_state["continue"]:
209 | break
210 | item_msg += tmp
211 | self.state["messages"].append(item_msg)
212 |
213 | # 最后处理
214 | for handler in self.after_handler:
215 | self.state.update(await handler.func(rss=self.rss, state=self.state))
216 | if handler.block:
217 | break
218 |
--------------------------------------------------------------------------------
/rss2/utils.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import functools
3 | import math
4 | import re
5 | from contextlib import suppress
6 | from typing import Any, Dict, List, Mapping, Optional, Tuple
7 |
8 | from cachetools import TTLCache
9 | from cachetools.keys import hashkey
10 | import nonebot
11 | from nonebot.log import logger
12 | from .config import config
13 |
14 |
15 | def get_http_caching_headers(
16 | headers: Optional[Mapping[str, Any]],
17 | ) -> Dict[str, Optional[str]]:
18 | return (
19 | {
20 | "Last-Modified": headers.get("Last-Modified") or headers.get("Date"),
21 | "ETag": headers.get("ETag"),
22 | }
23 | if headers
24 | else {"Last-Modified": None, "ETag": None}
25 | )
26 |
27 |
28 | def convert_size(size_bytes: int) -> str:
29 | if size_bytes == 0:
30 | return "0 B"
31 | size_name = ("B", "KB", "MB", "GB", "TB")
32 | i = int(math.floor(math.log(size_bytes, 1024)))
33 | p = math.pow(1024, i)
34 | s = round(size_bytes / p, 2)
35 | return f"{s} {size_name[i]}"
36 |
37 |
38 | def cached_async(cache, key=hashkey): # type: ignore
39 | """
40 | https://github.com/tkem/cachetools/commit/3f073633ed4f36f05b57838a3e5655e14d3e3524
41 | """
42 |
43 | def decorator(func): # type: ignore
44 | if cache is None:
45 |
46 | async def wrapper(*args, **kwargs): # type: ignore
47 | return await func(*args, **kwargs)
48 |
49 | else:
50 |
51 | async def wrapper(*args, **kwargs): # type: ignore
52 | k = key(*args, **kwargs)
53 | with suppress(KeyError): # key not found
54 | return cache[k]
55 | v = await func(*args, **kwargs)
56 | with suppress(ValueError): # value too large
57 | cache[k] = v
58 | return v
59 |
60 | return functools.update_wrapper(wrapper, func)
61 |
62 | return decorator
63 |
64 |
65 | def get_bot_qq(bot) -> List[int]:
66 | return bot._wsr_api_clients.keys()
67 |
68 |
69 | @cached_async(TTLCache(maxsize=1, ttl=300)) # type: ignore
70 | async def get_bot_friend_list(bot) -> Tuple[List[int], Dict[int, List[int]]]:
71 | bot_qq = list(get_bot_qq(bot))
72 | all_friends = {}
73 | friend_list = []
74 | for sid in bot_qq:
75 | f = await bot.get_friend_list(self_id=sid)
76 | all_friends[sid] = [i["user_id"] for i in f]
77 | friend_list.extend(all_friends[sid])
78 | return set(friend_list), all_friends
79 |
80 |
81 | @cached_async(TTLCache(maxsize=1, ttl=300)) # type: ignore
82 | async def get_bot_group_list(bot) -> Tuple[List[int], Dict[int, List[int]]]:
83 | bot_qq = list(get_bot_qq(bot))
84 | all_groups = {}
85 | group_list = []
86 | for sid in bot_qq:
87 | g = await bot.get_group_list(self_id=sid)
88 | all_groups[sid] = [i["group_id"] for i in g]
89 | group_list.extend(all_groups[sid])
90 | return set(group_list), all_groups
91 |
92 |
93 | @cached_async(TTLCache(maxsize=1, ttl=300)) # type: ignore
94 | async def get_all_bot_guild_list(bot) -> Tuple[List[int], Dict[int, List[str]]]:
95 | bot_qq = list(get_bot_qq(bot))
96 | # 获取频道列表
97 | all_guilds = {}
98 | guild_list = []
99 | for sid in bot_qq:
100 | g = await bot.get_guild_list(self_id=sid)
101 | all_guilds[sid] = [i["guild_id"] for i in g]
102 | guild_list.extend(all_guilds[sid])
103 | return set(guild_list), all_guilds
104 |
105 |
106 | @cached_async(TTLCache(maxsize=1, ttl=300)) # type: ignore
107 | async def get_all_bot_channel_list(bot) -> Tuple[List[str], Dict[int, List[str]]]:
108 | guild_list, all_guilds = await get_all_bot_guild_list(bot)
109 | # 获取子频道列表
110 | all_channels = {}
111 | channel_list = []
112 | for guild in guild_list:
113 | sid = [k for k, v in all_guilds.items() if guild in v][0]
114 | c = await bot.get_guild_channel_list(self_id=sid, guild_id=guild)
115 | all_channels[sid] = [i["channel_id"] for i in c]
116 | channel_list.extend(all_channels[sid])
117 | return set(channel_list), all_channels
118 |
119 |
120 | @cached_async(TTLCache(maxsize=1, ttl=300)) # type: ignore
121 | async def get_bot_guild_channel_list(bot, guild_id: Optional[str] = None) -> List[str]:
122 | guild_list, all_guilds = await get_all_bot_guild_list(bot)
123 | if guild_id is None:
124 | return guild_list
125 | if guild_id in guild_list:
126 | # 获取子频道列表
127 | sid = [k for k, v in all_guilds.items() if guild_id in v][0]
128 | channel_list = await bot.get_guild_channel_list(self_id=sid, guild_id=guild_id)
129 | return [i["channel_id"] for i in channel_list]
130 | return []
131 |
132 |
133 | def get_torrent_b16_hash(content: bytes) -> str:
134 | import magneturi
135 |
136 | # mangetlink = magneturi.from_torrent_file(torrentname)
137 | manget_link = magneturi.from_torrent_data(content)
138 | # print(mangetlink)
139 | ch = ""
140 | n = 20
141 | b32_hash = n * ch + manget_link[20:52]
142 | # print(b32Hash)
143 | b16_hash = base64.b16encode(base64.b32decode(b32_hash))
144 | b16_hash = b16_hash.lower()
145 | # print("40位info hash值:" + '\n' + b16Hash)
146 | # print("磁力链:" + '\n' + "magnet:?xt=urn:btih:" + b16Hash)
147 | return str(b16_hash, "utf-8")
148 |
149 |
150 | async def send_message_to_admin(message: str, bot=nonebot.get_bot()) -> None:
151 | await bot.send_private_msg(user_id=str(list(config.superusers)[0]), message=message)
152 |
153 |
154 | async def send_msg(
155 | msg: str,
156 | user_ids: Optional[List[str]] = None,
157 | group_ids: Optional[List[str]] = None,
158 | ) -> List[Dict[str, Any]]:
159 | """
160 | msg: str
161 | user: List[str]
162 | group: List[str]
163 |
164 | 发送消息到私聊或群聊
165 | """
166 | bot = nonebot.get_bot()
167 | msg_id = []
168 | if group_ids:
169 | for group_id in group_ids:
170 | msg_id.append(await bot.send_group_msg(group_id=int(group_id), message=msg))
171 | if user_ids:
172 | for user_id in user_ids:
173 | msg_id.append(await bot.send_private_msg(user_id=int(user_id), message=msg))
174 | return msg_id
175 |
176 |
177 | # 校验正则表达式合法性
178 | def regex_validate(regex: str) -> bool:
179 | try:
180 | re.compile(regex)
181 | return True
182 | except re.error:
183 | return False
184 |
185 |
186 | # 过滤合法好友
187 | async def filter_valid_user_id_list(bot, user_id_list: List[str]) -> List[str]:
188 | friend_list, _ = await get_bot_friend_list(bot)
189 | valid_user_id_list = [
190 | user_id for user_id in user_id_list if int(user_id) in friend_list
191 | ]
192 | if invalid_user_id_list := [
193 | user_id for user_id in user_id_list if user_id not in valid_user_id_list
194 | ]:
195 | logger.warning(f"QQ号[{','.join(invalid_user_id_list)}]不是Bot[{bot.self_id}]的好友")
196 | return valid_user_id_list
197 |
198 |
199 | # 过滤合法群组
200 | async def filter_valid_group_id_list(bot, group_id_list: List[str]) -> List[str]:
201 | group_list, _ = await get_bot_group_list(bot)
202 | valid_group_id_list = [
203 | group_id for group_id in group_id_list if int(group_id) in group_list
204 | ]
205 | if invalid_group_id_list := [
206 | group_id for group_id in group_id_list if group_id not in valid_group_id_list
207 | ]:
208 | logger.warning(f"Bot[{bot.self_id}]未加入群组[{','.join(invalid_group_id_list)}]")
209 | return valid_group_id_list
210 |
211 |
212 | # 过滤合法频道
213 | async def filter_valid_guild_channel_id_list(
214 | bot, guild_channel_id_list: List[str]
215 | ) -> List[str]:
216 | valid_guild_channel_id_list = []
217 | for guild_channel_id in guild_channel_id_list:
218 | guild_id, channel_id = guild_channel_id.split("@")
219 | guild_list = await get_bot_guild_channel_list(bot)
220 | if guild_id not in guild_list:
221 | guild_name = (await bot.get_guild_meta_by_guest(guild_id=guild_id))[
222 | "guild_name"
223 | ]
224 | logger.warning(f"Bot[{bot.self_id}]未加入频道 {guild_name}[{guild_id}]")
225 | continue
226 |
227 | channel_list = await get_bot_guild_channel_list(bot, guild_id=guild_id)
228 | if channel_id not in channel_list:
229 | guild_name = (await bot.get_guild_meta_by_guest(guild_id=guild_id))[
230 | "guild_name"
231 | ]
232 | logger.warning(
233 | f"Bot[{bot.self_id}]未加入频道 {guild_name}[{guild_id}]的子频道[{channel_id}]"
234 | )
235 | continue
236 | valid_guild_channel_id_list.append(guild_channel_id)
237 | return valid_guild_channel_id_list
238 |
--------------------------------------------------------------------------------
/rss2/qbittorrent_download.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import base64
3 | import re
4 | from typing import Any, Dict, List, Optional
5 | from pathlib import Path
6 |
7 | import aiohttp
8 | import arrow
9 | from apscheduler.triggers.interval import IntervalTrigger
10 | from aiocqhttp import ActionFailed, NetworkError
11 | from nonebot import get_bot, scheduler
12 | from nonebot.log import logger
13 | from qbittorrent import Client
14 |
15 | from .config import config
16 | from .utils import (
17 | convert_size,
18 | get_torrent_b16_hash,
19 | get_bot_group_list,
20 | send_message_to_admin,
21 | )
22 |
23 | # 计划
24 | # 创建一个全局定时器用来检测种子下载情况
25 | # 群文件上传成功回调
26 | # 文件三种状态1.下载中2。上传中3.上传完成
27 | # 文件信息持久化存储
28 | # 关键词正则表达式
29 | # 下载开关
30 |
31 | DOWN_STATUS_DOWNING = 1 # 下载中
32 | DOWN_STATUS_UPLOADING = 2 # 上传中
33 | DOWN_STATUS_UPLOAD_OK = 3 # 上传完成
34 | down_info: Dict[str, Dict[str, Any]] = {}
35 |
36 | # 示例
37 | # {
38 | # "hash值": {
39 | # "status":DOWN_STATUS_DOWNING,
40 | # "start_time":None, # 下载开始时间
41 | # "downing_tips_msg_id":[] # 下载中通知群上一条通知的信息,用于撤回,防止刷屏
42 | # }
43 | # }
44 |
45 |
46 | # 发送通知
47 | async def send_msg(
48 | msg: str, notice_group: Optional[List[str]] = None
49 | ) -> List[Dict[str, Any]]:
50 | logger.info(msg)
51 | bot = get_bot()
52 | msg_id = []
53 | group_list, all_groups = await get_bot_group_list(bot)
54 | if down_status_msg_group := (notice_group or config.down_status_msg_group):
55 | for group_id in down_status_msg_group:
56 | if int(group_id) not in group_list:
57 | logger.error(f"Bot[{bot.self_id}]未加入群组[{group_id}]")
58 | continue
59 | sid = [k for k, v in all_groups.items() if int(group_id) in v][0]
60 | msg_id.append(
61 | await bot.send_group_msg(
62 | self_id=sid, group_id=int(group_id), message=msg
63 | )
64 | )
65 | return msg_id
66 |
67 |
68 | async def get_qb_client() -> Optional[Client]:
69 | try:
70 | qb = Client(config.qb_web_url)
71 | if config.qb_username and config.qb_password:
72 | qb.login(config.qb_username, config.qb_password)
73 | else:
74 | qb.login()
75 | except Exception:
76 | bot = get_bot()
77 | msg = (
78 | "❌ 无法连接到 qbittorrent ,请检查:\n"
79 | "1. 是否启动程序\n"
80 | "2. 是否勾选了“Web用户界面(远程控制)”\n"
81 | "3. 连接地址、端口是否正确"
82 | )
83 | logger.exception(msg)
84 | await send_message_to_admin(msg, bot)
85 | return None
86 | try:
87 | qb.get_default_save_path()
88 | except Exception:
89 | bot = get_bot()
90 | msg = "❌ 无法连登录到 qbittorrent ,请检查相关配置是否正确"
91 | logger.exception(msg)
92 | await send_message_to_admin(msg, bot)
93 | return None
94 | return qb
95 |
96 |
97 | async def get_torrent_info_from_hash(
98 | qb: Client, url: str, proxy: Optional[str]
99 | ) -> Dict[str, str]:
100 | info = None
101 | if re.search(r"magnet:\?xt=urn:btih:", url):
102 | qb.download_from_link(link=url)
103 | if _hash_str := re.search(r"[A-F\d]{40}", url, flags=re.I):
104 | hash_str = _hash_str[0].lower()
105 | else:
106 | hash_str = (
107 | base64.b16encode(
108 | base64.b32decode(re.search(r"[2-7A-Z]{32}", url, flags=re.I)[0]) # type: ignore
109 | )
110 | .decode("utf-8")
111 | .lower()
112 | )
113 |
114 | else:
115 | async with aiohttp.ClientSession(
116 | timeout=aiohttp.ClientTimeout(total=100)
117 | ) as session:
118 | try:
119 | resp = await session.get(url, proxy=proxy)
120 | content = await resp.read()
121 | qb.download_from_file(content)
122 | hash_str = get_torrent_b16_hash(content)
123 | except Exception as e:
124 | await send_msg(f"下载种子失败,可能需要代理\n{e}")
125 | return {}
126 |
127 | while not info:
128 | for tmp_torrent in qb.torrents():
129 | if tmp_torrent["hash"] == hash_str and tmp_torrent["size"]:
130 | info = {
131 | "hash": tmp_torrent["hash"],
132 | "filename": tmp_torrent["name"],
133 | "size": convert_size(tmp_torrent["size"]),
134 | }
135 | await asyncio.sleep(1)
136 | return info
137 |
138 |
139 | # 种子地址,种子下载路径,群文件上传 群列表,订阅名称
140 | async def start_down(
141 | url: str, group_ids: List[str], name: str, proxy: Optional[str]
142 | ) -> str:
143 | qb = await get_qb_client()
144 | if not qb:
145 | return ""
146 | # 获取种子 hash
147 | info = await get_torrent_info_from_hash(qb=qb, url=url, proxy=proxy)
148 | await rss_trigger(
149 | hash_str=info["hash"],
150 | group_ids=group_ids,
151 | name=f"订阅:{name}\n{info['filename']}\n文件大小:{info['size']}",
152 | )
153 | down_info[info["hash"]] = {
154 | "status": DOWN_STATUS_DOWNING,
155 | "start_time": arrow.now(), # 下载开始时间
156 | "downing_tips_msg_id": [], # 下载中通知群上一条通知的信息,用于撤回,防止刷屏
157 | }
158 | return info["hash"]
159 |
160 |
161 | # 检查下载状态
162 | async def check_down_status(hash_str: str, group_ids: List[str], name: str) -> None:
163 | qb = await get_qb_client()
164 | if not qb:
165 | return
166 | # 防止中途删掉任务,无限执行
167 | try:
168 | info = qb.get_torrent(hash_str)
169 | files = qb.get_torrent_files(hash_str)
170 | except Exception as e:
171 | logger.exception(e)
172 | scheduler.remove_job(hash_str)
173 | return
174 | bot = get_bot()
175 | all_groups = (await get_bot_group_list(bot))[1]
176 | sid = None
177 | if info["total_downloaded"] - info["total_size"] >= 0.000000:
178 | all_time = arrow.now() - down_info[hash_str]["start_time"]
179 | await send_msg(
180 | f"👏 {name}\n"
181 | f"Hash:{hash_str}\n"
182 | f"下载完成!耗时:{str(all_time).split('.', 2)[0]}"
183 | )
184 | down_info[hash_str]["status"] = DOWN_STATUS_UPLOADING
185 | for group_id in group_ids:
186 | for tmp in files:
187 | # 异常包起来防止超时报错导致后续不执行
188 | try:
189 | path = Path(info.get("save_path", "")) / tmp["name"]
190 | if config.qb_down_path:
191 | if (_path := Path(config.qb_down_path)).is_dir():
192 | path = _path / tmp["name"]
193 | await send_msg(f"{name}\nHash:{hash_str}\n开始上传到群:{group_id}")
194 | sid = [k for k, v in all_groups.items() if int(group_id) in v][0]
195 | try:
196 | await bot.call_action(
197 | self_id=sid,
198 | action="upload_group_file",
199 | group_id=group_id,
200 | file=str(path),
201 | name=tmp["name"],
202 | )
203 | except ActionFailed:
204 | msg = f"{name}\nHash:{hash_str}\n上传到群:{group_id}失败!请手动上传!"
205 | await send_msg(msg, [group_id])
206 | logger.exception(msg)
207 | except NetworkError as e:
208 | logger.warning(e)
209 | except TimeoutError as e:
210 | logger.warning(e)
211 | scheduler.remove_job(hash_str)
212 | down_info[hash_str]["status"] = DOWN_STATUS_UPLOAD_OK
213 | else:
214 | await delete_msg(bot, sid, down_info[hash_str]["downing_tips_msg_id"])
215 | msg_id = await send_msg(
216 | f"{name}\n"
217 | f"Hash:{hash_str}\n"
218 | f"下载了 {round(info['total_downloaded'] / info['total_size'] * 100, 2)}%\n"
219 | f"平均下载速度: {round(info['dl_speed_avg'] / 1024, 2)} KB/s"
220 | )
221 | down_info[hash_str]["downing_tips_msg_id"] = msg_id
222 |
223 |
224 | # 撤回消息
225 | async def delete_msg(bot, sid, msg_ids: List[Dict[str, Any]]) -> None:
226 | for msg_id in msg_ids:
227 | try:
228 | await bot.call_action(
229 | "delete_msg", message_id=msg_id["message_id"], self_id=sid
230 | )
231 | except Exception as e:
232 | logger.warning("下载进度消息撤回失败!", e)
233 |
234 |
235 | async def rss_trigger(hash_str: str, group_ids: List[str], name: str) -> None:
236 | # 制作一个频率为“ n 秒 / 次”的触发器
237 | trigger = IntervalTrigger(seconds=int(config.down_status_msg_date), jitter=10)
238 | job_defaults = {"max_instances": 1}
239 | # 添加任务
240 | scheduler.add_job(
241 | func=check_down_status, # 要添加任务的函数,不要带参数
242 | trigger=trigger, # 触发器
243 | args=(hash_str, group_ids, name), # 函数的参数列表,注意:只有一个值时,不能省略末尾的逗号
244 | id=hash_str,
245 | misfire_grace_time=60, # 允许的误差时间,建议不要省略
246 | job_defaults=job_defaults,
247 | )
248 | await send_msg(f"👏 {name}\nHash:{hash_str}\n下载任务添加成功!", group_ids)
249 |
--------------------------------------------------------------------------------
/rss2/rss_class.py:
--------------------------------------------------------------------------------
1 | import re
2 | from copy import deepcopy
3 | from typing import Any, Dict, List, Optional
4 |
5 | from tinydb import Query, TinyDB
6 | from tinydb.operations import set as tinydb_set
7 | from yarl import URL
8 |
9 | from .config import DATA_PATH, JSON_PATH, config
10 |
11 |
12 | class Rss:
13 | def __init__(self, data: Optional[Dict[str, Any]] = None):
14 | self.name: str = "" # 订阅名
15 | self.url: str = "" # 订阅地址
16 | self.user_id: List[str] = [] # 订阅用户(qq)
17 | self.group_id: List[str] = [] # 订阅群组
18 | self.guild_channel_id: List[str] = [] # 订阅子频道
19 | self.img_proxy: bool = False
20 | self.time: str = "5" # 更新频率 分钟/次
21 | self.translation: bool = False # 翻译
22 | self.only_title: bool = False # 仅标题
23 | self.only_pic: bool = False # 仅图片
24 | self.only_has_pic: bool = False # 仅含有图片
25 | self.download_pic: bool = False # 是否要下载图片
26 | self.cookies: Dict[str, str] = {}
27 | self.down_torrent: bool = False # 是否下载种子
28 | self.down_torrent_keyword: str = "" # 过滤关键字,支持正则
29 | self.black_keyword: str = "" # 黑名单关键词
30 | self.is_open_upload_group: bool = True # 默认开启上传到群
31 | self.duplicate_filter_mode: List[str] = [] # 去重模式
32 | self.max_image_number: int = 0 # 图片数量限制,防止消息太长刷屏
33 | self.content_to_remove: Optional[str] = None # 正文待移除内容,支持正则
34 | self.etag: Optional[str] = None
35 | self.last_modified: Optional[str] = None # 上次更新时间
36 | self.error_count: int = 0 # 连续抓取失败的次数,超过 100 就停止更新
37 | self.stop: bool = False # 停止更新
38 | self.pikpak_offline: bool = False # 是否PikPak离线
39 | self.pikpak_path_key: str = (
40 | "" # PikPak 离线下载路径匹配正则表达式,用于自动归档文件 例如 r"(?:\[.*?\][\s\S])([\s\S]*)[\s\S]-"
41 | )
42 | if data:
43 | self.__dict__.update(data)
44 |
45 | # 返回订阅链接
46 | def get_url(self, rsshub: str = config.rsshub) -> str:
47 | if URL(self.url).scheme in ["http", "https"]:
48 | return self.url
49 | # 先判断地址是否 / 开头
50 | if self.url.startswith("/"):
51 | return rsshub + self.url
52 |
53 | return f"{rsshub}/{self.url}"
54 |
55 | # 读取记录
56 | @staticmethod
57 | def read_rss() -> List["Rss"]:
58 | # 如果文件不存在
59 | if not JSON_PATH.exists():
60 | return []
61 | with TinyDB(
62 | JSON_PATH,
63 | encoding="utf-8",
64 | sort_keys=True,
65 | indent=4,
66 | ensure_ascii=False,
67 | ) as db:
68 | rss_list = [Rss(rss) for rss in db.all()]
69 | return rss_list
70 |
71 | # 过滤订阅名中的特殊字符
72 | @staticmethod
73 | def handle_name(name: str) -> str:
74 | name = re.sub(r'[?*:"<>\\/|]', "_", name)
75 | if name == "rss":
76 | name = "rss_"
77 | return name
78 |
79 | # 查找是否存在当前订阅名 rss 要转换为 rss_
80 | @staticmethod
81 | def get_one_by_name(name: str) -> Optional["Rss"]:
82 | feed_list = Rss.read_rss()
83 | return next((feed for feed in feed_list if feed.name == name), None)
84 |
85 | # 添加订阅
86 | def add_user_or_group_or_channel(
87 | self,
88 | user: Optional[str] = None,
89 | group: Optional[str] = None,
90 | guild_channel: Optional[str] = None,
91 | ) -> None:
92 | if user:
93 | if user in self.user_id:
94 | return
95 | self.user_id.append(user)
96 | elif group:
97 | if group in self.group_id:
98 | return
99 | self.group_id.append(group)
100 | elif guild_channel:
101 | if guild_channel in self.guild_channel_id:
102 | return
103 | self.guild_channel_id.append(guild_channel)
104 | self.upsert()
105 |
106 | # 删除订阅 群组
107 | def delete_group(self, group: str) -> bool:
108 | if group not in self.group_id:
109 | return False
110 | self.group_id.remove(group)
111 | with TinyDB(
112 | JSON_PATH,
113 | encoding="utf-8",
114 | sort_keys=True,
115 | indent=4,
116 | ensure_ascii=False,
117 | ) as db:
118 | db.update(tinydb_set("group_id", self.group_id), Query().name == self.name) # type: ignore
119 | return True
120 |
121 | # 删除订阅 子频道
122 | def delete_guild_channel(self, guild_channel: str) -> bool:
123 | if guild_channel not in self.guild_channel_id:
124 | return False
125 | self.guild_channel_id.remove(guild_channel)
126 | with TinyDB(
127 | JSON_PATH,
128 | encoding="utf-8",
129 | sort_keys=True,
130 | indent=4,
131 | ensure_ascii=False,
132 | ) as db:
133 | db.update(
134 | tinydb_set("guild_channel_id", self.guild_channel_id), Query().name == self.name # type: ignore
135 | )
136 | return True
137 |
138 | # 删除整个订阅
139 | def delete_rss(self) -> None:
140 | with TinyDB(
141 | JSON_PATH,
142 | encoding="utf-8",
143 | sort_keys=True,
144 | indent=4,
145 | ensure_ascii=False,
146 | ) as db:
147 | db.remove(Query().name == self.name)
148 | self.delete_file()
149 |
150 | # 重命名订阅缓存 json 文件
151 | def rename_file(self, target: str) -> None:
152 | source = DATA_PATH / f"{Rss.handle_name(self.name)}.json"
153 | if source.exists():
154 | source.rename(target)
155 |
156 | # 删除订阅缓存 json 文件
157 | def delete_file(self) -> None:
158 | (DATA_PATH / f"{Rss.handle_name(self.name)}.json").unlink(missing_ok=True)
159 |
160 | # 隐私考虑,不展示除当前群组或频道外的群组、频道和QQ
161 | def hide_some_infos(
162 | self, group_id: Optional[int] = None, guild_channel_id: Optional[str] = None
163 | ) -> "Rss":
164 | if not group_id and not guild_channel_id:
165 | return self
166 | rss_tmp = deepcopy(self)
167 | rss_tmp.guild_channel_id = [guild_channel_id, "*"] if guild_channel_id else []
168 | rss_tmp.group_id = [str(group_id), "*"] if group_id else []
169 | rss_tmp.user_id = ["*"] if rss_tmp.user_id else []
170 | return rss_tmp
171 |
172 | @staticmethod
173 | def get_by_guild_channel(guild_channel_id: str) -> List["Rss"]:
174 | rss_old = Rss.read_rss()
175 | return [
176 | rss.hide_some_infos(guild_channel_id=guild_channel_id)
177 | for rss in rss_old
178 | if guild_channel_id in rss.guild_channel_id
179 | ]
180 |
181 | @staticmethod
182 | def get_by_group(group_id: int) -> List["Rss"]:
183 | rss_old = Rss.read_rss()
184 | return [
185 | rss.hide_some_infos(group_id=group_id)
186 | for rss in rss_old
187 | if str(group_id) in rss.group_id
188 | ]
189 |
190 | @staticmethod
191 | def get_by_user(user: str) -> List["Rss"]:
192 | rss_old = Rss.read_rss()
193 | return [rss for rss in rss_old if user in rss.user_id]
194 |
195 | def set_cookies(self, cookies: str) -> None:
196 | self.cookies = cookies
197 | with TinyDB(
198 | JSON_PATH,
199 | encoding="utf-8",
200 | sort_keys=True,
201 | indent=4,
202 | ensure_ascii=False,
203 | ) as db:
204 | db.update(tinydb_set("cookies", cookies), Query().name == self.name) # type: ignore
205 |
206 | def upsert(self, old_name: Optional[str] = None) -> None:
207 | with TinyDB(
208 | JSON_PATH,
209 | encoding="utf-8",
210 | sort_keys=True,
211 | indent=4,
212 | ensure_ascii=False,
213 | ) as db:
214 | if old_name:
215 | db.update(self.__dict__, Query().name == old_name)
216 | else:
217 | db.upsert(self.__dict__, Query().name == str(self.name))
218 |
219 | def __str__(self) -> str:
220 | mode_name = {"link": "链接", "title": "标题", "image": "图片"}
221 | mode_msg = ""
222 | if self.duplicate_filter_mode:
223 | delimiter = " 或 " if "or" in self.duplicate_filter_mode else "、"
224 | mode_msg = (
225 | "已启用去重模式,"
226 | f"{delimiter.join(mode_name[i] for i in self.duplicate_filter_mode if i != 'or')} 相同时去重"
227 | )
228 | ret_list = [
229 | f"名称:{self.name}",
230 | f"订阅地址:{self.url}",
231 | f"订阅QQ:{self.user_id}" if self.user_id else "",
232 | f"订阅群:{self.group_id}" if self.group_id else "",
233 | f"订阅子频道:{self.guild_channel_id}" if self.guild_channel_id else "",
234 | f"更新时间:{self.time}",
235 | f"代理:{self.img_proxy}" if self.img_proxy else "",
236 | f"翻译:{self.translation}" if self.translation else "",
237 | f"仅标题:{self.only_title}" if self.only_title else "",
238 | f"仅图片:{self.only_pic}" if self.only_pic else "",
239 | f"下载图片:{self.download_pic}" if self.download_pic else "",
240 | f"仅含有图片:{self.only_has_pic}" if self.only_has_pic else "",
241 | f"白名单关键词:{self.down_torrent_keyword}" if self.down_torrent_keyword else "",
242 | f"黑名单关键词:{self.black_keyword}" if self.black_keyword else "",
243 | f"cookies:{self.cookies}" if self.cookies else "",
244 | "种子自动下载功能已启用" if self.down_torrent else "",
245 | "" if self.is_open_upload_group else f"是否上传到群:{self.is_open_upload_group}",
246 | f"{mode_msg}" if self.duplicate_filter_mode else "",
247 | f"图片数量限制:{self.max_image_number}" if self.max_image_number else "",
248 | f"正文待移除内容:{self.content_to_remove}" if self.content_to_remove else "",
249 | f"连续抓取失败的次数:{self.error_count}" if self.error_count else "",
250 | f"停止更新:{self.stop}" if self.stop else "",
251 | f"PikPak离线: {self.pikpak_offline}" if self.pikpak_offline else "",
252 | f"PikPak离线路径匹配: {self.pikpak_path_key}" if self.pikpak_path_key else "",
253 | ]
254 | return "\n".join([i for i in ret_list if i != ""])
255 |
--------------------------------------------------------------------------------
/rss2/parsing/__init__.py:
--------------------------------------------------------------------------------
1 | import re
2 | import sqlite3
3 | from difflib import SequenceMatcher
4 | from typing import Any, Dict, List
5 |
6 | import arrow
7 | import emoji
8 | from nonebot.log import logger
9 | from pyquery import PyQuery as Pq
10 |
11 | from ..config import DATA_PATH, config
12 | from ..rss_class import Rss
13 | from .cache_manage import (
14 | cache_db_manage,
15 | cache_json_manage,
16 | duplicate_exists,
17 | insert_into_cache_db,
18 | write_item,
19 | )
20 | from .check_update import check_update, get_item_date
21 | from .download_torrent import down_torrent, pikpak_offline
22 | from .handle_html_tag import handle_html_tag
23 | from .handle_images import handle_img
24 | from .handle_translation import handle_translation
25 | from .parsing_rss import ParsingBase
26 | from .routes import *
27 | from .send_message import send_msg
28 | from .utils import get_proxy, get_summary
29 |
30 |
31 | # 检查更新
32 | @ParsingBase.append_before_handler(priority=10)
33 | async def handle_check_update(rss: Rss, state: Dict[str, Any]):
34 | db = state.get("tinydb")
35 | change_data = check_update(db, state.get("new_data"))
36 | return {"change_data": change_data}
37 |
38 |
39 | # 判断是否满足推送条件
40 | @ParsingBase.append_before_handler(priority=11) # type: ignore
41 | async def handle_check_update(rss: Rss, state: Dict[str, Any]):
42 | change_data = state.get("change_data")
43 | db = state.get("tinydb")
44 | for item in change_data.copy():
45 | summary = get_summary(item)
46 | # 检查是否包含屏蔽词
47 | if config.black_word and re.findall("|".join(config.black_word), summary):
48 | logger.info("内含屏蔽词,已经取消推送该消息")
49 | write_item(db, item)
50 | change_data.remove(item)
51 | continue
52 | # 检查是否匹配关键词 使用 down_torrent_keyword 字段,命名是历史遗留导致,实际应该是白名单关键字
53 | if rss.down_torrent_keyword and not re.search(
54 | rss.down_torrent_keyword, summary
55 | ):
56 | write_item(db, item)
57 | change_data.remove(item)
58 | continue
59 | # 检查是否匹配黑名单关键词 使用 black_keyword 字段
60 | if rss.black_keyword and (
61 | re.search(rss.black_keyword, item["title"])
62 | or re.search(rss.black_keyword, summary)
63 | ):
64 | write_item(db, item)
65 | change_data.remove(item)
66 | continue
67 | # 检查是否只推送有图片的消息
68 | if (rss.only_pic or rss.only_has_pic) and not re.search(
69 | r"
]+>|\[img]", summary
70 | ):
71 | logger.info(f"{rss.name} 已开启仅图片/仅含有图片,该消息没有图片,将跳过")
72 | write_item(db, item)
73 | change_data.remove(item)
74 |
75 | return {"change_data": change_data}
76 |
77 |
78 | # 如果启用了去重模式,对推送列表进行过滤
79 | @ParsingBase.append_before_handler(priority=12) # type: ignore
80 | async def handle_check_update(rss: Rss, state: Dict[str, Any]):
81 | change_data = state.get("change_data")
82 | conn = state.get("conn")
83 | db = state.get("tinydb")
84 |
85 | # 检查是否启用去重 使用 duplicate_filter_mode 字段
86 | if not rss.duplicate_filter_mode:
87 | return {"change_data": change_data}
88 |
89 | if not conn:
90 | conn = sqlite3.connect(str(DATA_PATH / "cache.db"))
91 | conn.set_trace_callback(logger.debug)
92 |
93 | cache_db_manage(conn)
94 |
95 | delete = []
96 | for index, item in enumerate(change_data):
97 | is_duplicate, image_hash = await duplicate_exists(
98 | rss=rss,
99 | conn=conn,
100 | item=item,
101 | summary=get_summary(item),
102 | )
103 | if is_duplicate:
104 | write_item(db, item)
105 | delete.append(index)
106 | else:
107 | change_data[index]["image_hash"] = str(image_hash)
108 |
109 | change_data = [
110 | item for index, item in enumerate(change_data) if index not in delete
111 | ]
112 |
113 | return {
114 | "change_data": change_data,
115 | "conn": conn,
116 | }
117 |
118 |
119 | # 处理标题
120 | @ParsingBase.append_handler(parsing_type="title")
121 | async def handle_title(
122 | rss: Rss,
123 | state: Dict[str, Any],
124 | item: Dict[str, Any],
125 | item_msg: str,
126 | tmp: str,
127 | tmp_state: Dict[str, Any],
128 | ) -> str:
129 | # 判断是否开启了只推送图片
130 | if rss.only_pic:
131 | return ""
132 |
133 | title = item["title"]
134 |
135 | if not config.blockquote:
136 | title = re.sub(r" - 转发 .*", "", title)
137 |
138 | res = f"标题:{title}\n"
139 | # 隔开标题和正文
140 | if not rss.only_title:
141 | res += "\n"
142 | if rss.translation:
143 | res += await handle_translation(content=title)
144 |
145 | # 如果开启了只推送标题,跳过下面判断标题与正文相似度的处理
146 | if rss.only_title:
147 | return emoji.emojize(res, language="alias")
148 |
149 | # 判断标题与正文相似度,避免标题正文一样,或者是标题为正文前N字等情况
150 | try:
151 | summary_html = Pq(get_summary(item))
152 | if not config.blockquote:
153 | summary_html.remove("blockquote")
154 | similarity = SequenceMatcher(None, summary_html.text()[: len(title)], title)
155 | # 标题正文相似度
156 | if similarity.ratio() > 0.6:
157 | res = ""
158 | except Exception as e:
159 | logger.warning(f"{rss.name} 没有正文内容!{e}")
160 |
161 | return emoji.emojize(res, language="alias")
162 |
163 |
164 | # 处理正文 判断是否是仅推送标题 、是否仅推送图片
165 | @ParsingBase.append_handler(parsing_type="summary", priority=1)
166 | async def handle_summary(
167 | rss: Rss,
168 | state: Dict[str, Any],
169 | item: Dict[str, Any],
170 | item_msg: str,
171 | tmp: str,
172 | tmp_state: Dict[str, Any],
173 | ) -> str:
174 | if rss.only_title or rss.only_pic:
175 | tmp_state["continue"] = False
176 | return ""
177 |
178 |
179 | # 处理正文 处理网页 tag
180 | @ParsingBase.append_handler(parsing_type="summary", priority=10) # type: ignore
181 | async def handle_summary(
182 | rss: Rss,
183 | state: Dict[str, Any],
184 | item: Dict[str, Any],
185 | item_msg: str,
186 | tmp: str,
187 | tmp_state: Dict[str, Any],
188 | ) -> str:
189 | try:
190 | tmp += handle_html_tag(html=Pq(get_summary(item)))
191 | except Exception as e:
192 | logger.warning(f"{rss.name} 没有正文内容!{e}")
193 | return tmp
194 |
195 |
196 | # 处理正文 移除指定内容
197 | @ParsingBase.append_handler(parsing_type="summary", priority=11) # type: ignore
198 | async def handle_summary(
199 | rss: Rss,
200 | state: Dict[str, Any],
201 | item: Dict[str, Any],
202 | item_msg: str,
203 | tmp: str,
204 | tmp_state: Dict[str, Any],
205 | ) -> str:
206 | # 移除指定内容
207 | if rss.content_to_remove:
208 | for pattern in rss.content_to_remove:
209 | tmp = re.sub(pattern, "", tmp)
210 | # 去除多余换行
211 | while "\n\n\n" in tmp:
212 | tmp = tmp.replace("\n\n\n", "\n\n")
213 | tmp = tmp.strip()
214 | return emoji.emojize(tmp, language="alias")
215 |
216 |
217 | # 处理正文 翻译
218 | @ParsingBase.append_handler(parsing_type="summary", priority=12) # type: ignore
219 | async def handle_summary(
220 | rss: Rss,
221 | state: Dict[str, Any],
222 | item: Dict[str, Any],
223 | item_msg: str,
224 | tmp: str,
225 | tmp_state: Dict[str, Any],
226 | ) -> str:
227 | if rss.translation:
228 | tmp += await handle_translation(tmp)
229 | return tmp
230 |
231 |
232 | # 处理图片
233 | @ParsingBase.append_handler(parsing_type="picture")
234 | async def handle_picture(
235 | rss: Rss,
236 | state: Dict[str, Any],
237 | item: Dict[str, Any],
238 | item_msg: str,
239 | tmp: str,
240 | tmp_state: Dict[str, Any],
241 | ) -> str:
242 |
243 | # 判断是否开启了只推送标题
244 | if rss.only_title:
245 | return ""
246 |
247 | res = ""
248 | try:
249 | res += await handle_img(
250 | item=item,
251 | img_proxy=rss.img_proxy,
252 | img_num=rss.max_image_number,
253 | )
254 | except Exception as e:
255 | logger.warning(f"{rss.name} 没有正文内容!{e}")
256 |
257 | # 判断是否开启了只推送图片
258 | return f"{res}\n" if rss.only_pic else f"{tmp + res}\n"
259 |
260 |
261 | # 处理来源
262 | @ParsingBase.append_handler(parsing_type="source")
263 | async def handle_source(
264 | rss: Rss,
265 | state: Dict[str, Any],
266 | item: Dict[str, Any],
267 | item_msg: str,
268 | tmp: str,
269 | tmp_state: Dict[str, Any],
270 | ) -> str:
271 | return f"链接:{item['link']}\n"
272 |
273 |
274 | # 处理种子
275 | @ParsingBase.append_handler(parsing_type="torrent")
276 | async def handle_torrent(
277 | rss: Rss,
278 | state: Dict[str, Any],
279 | item: Dict[str, Any],
280 | item_msg: str,
281 | tmp: str,
282 | tmp_state: Dict[str, Any],
283 | ) -> str:
284 | res: List[str] = []
285 | if not rss.is_open_upload_group:
286 | rss.group_id = []
287 | if rss.down_torrent:
288 | # 处理种子
289 | try:
290 | hash_list = await down_torrent(
291 | rss=rss, item=item, proxy=get_proxy(rss.img_proxy)
292 | )
293 | if hash_list and hash_list[0] is not None:
294 | res.append("\n磁力:")
295 | res.extend([f"magnet:?xt=urn:btih:{h}" for h in hash_list])
296 | except Exception:
297 | logger.exception("下载种子时出错")
298 | if rss.pikpak_offline:
299 | try:
300 | result = await pikpak_offline(
301 | rss=rss, item=item, proxy=get_proxy(rss.img_proxy)
302 | )
303 | if result:
304 | res.append("\nPikPak 离线成功")
305 | res.extend(
306 | [
307 | f"{r.get('name')}\n{r.get('file_size')} - {r.get('path')}"
308 | for r in result
309 | ]
310 | )
311 | except Exception:
312 | logger.exception("PikPak 离线时出错")
313 | return "\n".join(res)
314 |
315 |
316 | # 处理日期
317 | @ParsingBase.append_handler(parsing_type="date")
318 | async def handle_date(
319 | rss: Rss,
320 | state: Dict[str, Any],
321 | item: Dict[str, Any],
322 | item_msg: str,
323 | tmp: str,
324 | tmp_state: Dict[str, Any],
325 | ) -> str:
326 | date = get_item_date(item)
327 | date = date.replace(tzinfo="local") if date > arrow.now() else date.to("local")
328 | return f"日期:{date.format('YYYY年MM月DD日 HH:mm:ss')}"
329 |
330 |
331 | # 发送消息
332 | @ParsingBase.append_handler(parsing_type="after")
333 | async def handle_message(
334 | rss: Rss,
335 | state: Dict[str, Any],
336 | item: Dict[str, Any],
337 | item_msg: str,
338 | tmp: str,
339 | tmp_state: Dict[str, Any],
340 | ) -> str:
341 | db = state["tinydb"]
342 |
343 | # 发送消息并写入文件
344 | if await send_msg(rss=rss, msg=item_msg, item=item):
345 |
346 | if rss.duplicate_filter_mode:
347 | insert_into_cache_db(
348 | conn=state["conn"], item=item, image_hash=item["image_hash"]
349 | )
350 |
351 | if item.get("to_send"):
352 | item.pop("to_send")
353 |
354 | state["item_count"] += 1
355 | else:
356 | item["to_send"] = True
357 |
358 | write_item(db, item)
359 |
360 | return ""
361 |
362 |
363 | @ParsingBase.append_after_handler()
364 | async def after_handler(rss: Rss, state: Dict[str, Any]) -> Dict[str, Any]:
365 | item_count: int = state["item_count"]
366 | conn = state["conn"]
367 | db = state["tinydb"]
368 |
369 | if item_count > 0:
370 | logger.info(f"{rss.name} 新消息推送完毕,共计:{item_count}")
371 | else:
372 | logger.info(f"{rss.name} 没有新信息")
373 |
374 | if conn is not None:
375 | conn.close()
376 |
377 | new_data_length = len(state["new_data"])
378 | cache_json_manage(db, new_data_length)
379 | db.close()
380 |
381 | return {}
382 |
--------------------------------------------------------------------------------
/rss2/parsing/handle_images.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import random
3 | import re
4 | from io import BytesIO
5 | from typing import Any, Dict, Optional, Tuple, Union
6 |
7 | import aiohttp
8 | from nonebot.log import logger
9 | from PIL import Image, UnidentifiedImageError
10 | from pyquery import PyQuery as Pq
11 | from tenacity import RetryError, retry, stop_after_attempt, stop_after_delay
12 | from yarl import URL
13 |
14 | from ..config import Path, config
15 | from ..rss_class import Rss
16 | from .utils import get_proxy, get_summary
17 |
18 |
19 | # 通过 ezgif 压缩 GIF
20 | @retry(stop=(stop_after_attempt(5) | stop_after_delay(30)))
21 | async def resize_gif(url: str, resize_ratio: int = 2) -> Optional[bytes]:
22 | async with aiohttp.ClientSession() as session:
23 | resp = await session.post(
24 | "https://s3.ezgif.com/resize",
25 | data={"new-image-url": url},
26 | )
27 | d = Pq(await resp.text())
28 | next_url = d("form").attr("action")
29 | file = d("form > input[type=hidden]:nth-child(1)").attr("value")
30 | token = d("form > input[type=hidden]:nth-child(2)").attr("value")
31 | old_width = d("form > input[type=hidden]:nth-child(3)").attr("value")
32 | old_height = d("form > input[type=hidden]:nth-child(4)").attr("value")
33 | data = {
34 | "file": file,
35 | "token": token,
36 | "old_width": old_width,
37 | "old_height": old_height,
38 | "width": str(int(old_width) // resize_ratio),
39 | "method": "gifsicle",
40 | "ar": "force",
41 | }
42 | resp = await session.post(next_url, params="ajax=true", data=data)
43 | d = Pq(await resp.text())
44 | output_img_url = "https:" + d("img:nth-child(1)").attr("src")
45 | return await download_image(output_img_url)
46 |
47 |
48 | # 通过 ezgif 把视频中间 4 秒转 GIF 作为预览
49 | @retry(stop=(stop_after_attempt(5) | stop_after_delay(30)))
50 | async def get_preview_gif_from_video(url: str) -> str:
51 | async with aiohttp.ClientSession() as session:
52 | resp = await session.post(
53 | "https://s3.ezgif.com/video-to-gif",
54 | data={"new-image-url": url},
55 | )
56 | d = Pq(await resp.text())
57 | video_length = re.search(
58 | r"\d\d:\d\d:\d\d", str(d("#main > p.filestats > strong"))
59 | ).group() # type: ignore
60 | hours = int(video_length.split(":")[0])
61 | minutes = int(video_length.split(":")[1])
62 | seconds = int(video_length.split(":")[2])
63 | video_length_median = (hours * 60 * 60 + minutes * 60 + seconds) // 2
64 | next_url = d("form").attr("action")
65 | file = d("form > input[type=hidden]:nth-child(1)").attr("value")
66 | token = d("form > input[type=hidden]:nth-child(2)").attr("value")
67 | default_end = d("#end").attr("value")
68 | if float(default_end) >= 4:
69 | start = video_length_median - 2
70 | end = video_length_median + 2
71 | else:
72 | start = 0
73 | end = default_end
74 | data = {
75 | "file": file,
76 | "token": token,
77 | "start": start,
78 | "end": end,
79 | "size": 320,
80 | "fps": 25,
81 | "method": "ffmpeg",
82 | }
83 | resp = await session.post(next_url, params="ajax=true", data=data)
84 | d = Pq(await resp.text())
85 | return f'https:{d("img:nth-child(1)").attr("src")}'
86 |
87 |
88 | # 图片压缩
89 | async def zip_pic(url: str, content: bytes) -> Union[Image.Image, bytes, None]:
90 | # 打开一个 JPEG/PNG/GIF/WEBP 图像文件
91 | try:
92 | im = Image.open(BytesIO(content))
93 | except UnidentifiedImageError:
94 | logger.error(f"无法识别图像文件 链接:[{url}]")
95 | return None
96 | if im.format != "GIF":
97 | # 先把 WEBP 图像转为 PNG
98 | if im.format == "WEBP":
99 | with BytesIO() as output:
100 | im.save(output, "PNG")
101 | im = Image.open(output)
102 | # 对图像文件进行缩小处理
103 | im.thumbnail((config.zip_size, config.zip_size))
104 | width, height = im.size
105 | logger.debug(f"Resize image to: {width} x {height}")
106 | # 和谐
107 | points = [(0, 0), (0, height - 1), (width - 1, 0), (width - 1, height - 1)]
108 | for x, y in points:
109 | im.putpixel((x, y), random.randint(0, 255))
110 | return im
111 | else:
112 | if len(content) > config.gif_zip_size * 1024:
113 | try:
114 | return await resize_gif(url)
115 | except RetryError:
116 | logger.error(f"GIF 图片[{url}]压缩失败,将发送原图")
117 | return content
118 |
119 |
120 | # 将图片转化为 base64
121 | def get_pic_base64(content: Union[Image.Image, bytes, None]) -> str:
122 | if not content:
123 | return ""
124 | if isinstance(content, Image.Image):
125 | with BytesIO() as output:
126 | content.save(output, format=content.format)
127 | content = output.getvalue()
128 | if isinstance(content, bytes):
129 | return str(base64.b64encode(content).decode())
130 | return ""
131 |
132 |
133 | # 去你的 pixiv.cat
134 | async def fuck_pixiv_cat(url: str) -> str:
135 | img_id = re.sub("https://pixiv.cat/", "", url)
136 | img_id = img_id[:-4]
137 | info_list = img_id.split("-")
138 | async with aiohttp.ClientSession() as session:
139 | try:
140 | resp = await session.get(
141 | f"https://api.obfs.dev/api/pixiv/illust?id={info_list[0]}"
142 | )
143 | resp_json = await resp.json()
144 | if len(info_list) >= 2:
145 | return str(
146 | resp_json["illust"]["meta_pages"][int(info_list[1]) - 1][
147 | "image_urls"
148 | ]["original"]
149 | )
150 | else:
151 | return str(
152 | resp_json["illust"]["meta_single_page"]["original_image_url"]
153 | )
154 | except Exception as e:
155 | logger.error(f"处理pixiv.cat链接时出现问题 :{e} 链接:[{url}]")
156 | return url
157 |
158 |
159 | @retry(stop=(stop_after_attempt(5) | stop_after_delay(30)))
160 | async def download_image_detail(url: str, proxy: bool) -> Optional[bytes]:
161 | async with aiohttp.ClientSession(raise_for_status=True) as session:
162 | referer = f"{URL(url).scheme}://{URL(url).host}/"
163 | headers = {"referer": referer}
164 | try:
165 | resp = await session.get(
166 | url, headers=headers, proxy=get_proxy(open_proxy=proxy)
167 | )
168 | # 如果图片无法获取到,直接返回
169 | if len(await resp.read()) == 0:
170 | if "pixiv.cat" in url:
171 | url = await fuck_pixiv_cat(url=url)
172 | return await download_image(url, proxy)
173 | logger.error(
174 | f"图片[{url}]下载失败! Content-Type: {resp.headers['Content-Type']} status: {resp.status}"
175 | )
176 | return None
177 | # 如果图片格式为 SVG ,先转换为 PNG
178 | if resp.headers["Content-Type"].startswith("image/svg+xml"):
179 | next_url = str(
180 | URL("https://images.weserv.nl/").with_query(f"url={url}&output=png")
181 | )
182 | return await download_image(next_url, proxy)
183 | return await resp.read()
184 | except Exception as e:
185 | logger.warning(f"图片[{url}]下载失败!将重试最多 5 次!\n{e}")
186 | raise
187 |
188 |
189 | async def download_image(url: str, proxy: bool = False) -> Optional[bytes]:
190 | try:
191 | return await download_image_detail(url=url, proxy=proxy)
192 | except RetryError:
193 | logger.error(f"图片[{url}]下载失败!已达最大重试次数!有可能需要开启代理!")
194 | return None
195 |
196 |
197 | async def handle_img_combo(url: str, img_proxy: bool, rss: Optional[Rss] = None) -> str:
198 | """'
199 | 下载图片并返回可用的CQ码
200 |
201 | 参数:
202 | url: 需要下载的图片地址
203 | img_proxy: 是否使用代理下载图片
204 | rss: Rss对象
205 | 返回值:
206 | 返回当前图片的CQ码,以base64格式编码发送
207 | 如获取图片失败将会提示图片走丢了
208 | """
209 | content = await download_image(url, img_proxy)
210 | if content:
211 | if rss is not None and rss.download_pic:
212 | _url = URL(url)
213 | logger.debug(f"正在保存图片: {url}")
214 | try:
215 | save_image(content=content, file_url=_url, rss=rss)
216 | except Exception as e:
217 | logger.warning(e)
218 | logger.warning("在保存图片到本地时出现错误")
219 | resize_content = await zip_pic(url, content)
220 | if img_base64 := get_pic_base64(resize_content):
221 | return f"[CQ:image,file=base64://{img_base64}]"
222 | return f"\n图片走丢啦: {url}\n"
223 |
224 |
225 | async def handle_img_combo_with_content(gif_url: str, content: bytes) -> str:
226 | resize_content = await zip_pic(gif_url, content)
227 | if img_base64 := get_pic_base64(resize_content):
228 | return f"[CQ:image,file=base64://{img_base64}]"
229 | return "\n图片走丢啦\n"
230 |
231 |
232 | # 处理图片、视频
233 | async def handle_img(item: Dict[str, Any], img_proxy: bool, img_num: int) -> str:
234 | if item.get("image_content"):
235 | return await handle_img_combo_with_content(
236 | item.get("gif_url", ""), item["image_content"]
237 | )
238 | html = Pq(get_summary(item))
239 | img_str = ""
240 | # 处理图片
241 | doc_img = list(html("img").items())
242 | # 只发送限定数量的图片,防止刷屏
243 | if 0 < img_num < len(doc_img):
244 | img_str += f"\n因启用图片数量限制,目前只有 {img_num} 张图片:"
245 | doc_img = doc_img[:img_num]
246 | for img in doc_img:
247 | url = img.attr("src")
248 | img_str += await handle_img_combo(url, img_proxy)
249 |
250 | # 处理视频
251 | if doc_video := html("video"):
252 | img_str += "\n视频封面:"
253 | for video in doc_video.items():
254 | url = video.attr("poster")
255 | img_str += await handle_img_combo(url, img_proxy)
256 |
257 | return img_str
258 |
259 |
260 | # 处理 bbcode 图片
261 | async def handle_bbcode_img(html: Pq, img_proxy: bool, img_num: int) -> str:
262 | img_str = ""
263 | # 处理图片
264 | img_list = re.findall(r"\[img[^]]*](.+)\[/img]", str(html), flags=re.I)
265 | # 只发送限定数量的图片,防止刷屏
266 | if 0 < img_num < len(img_list):
267 | img_str += f"\n因启用图片数量限制,目前只有 {img_num} 张图片:"
268 | img_list = img_list[:img_num]
269 | for img_tmp in img_list:
270 | img_str += await handle_img_combo(img_tmp, img_proxy)
271 |
272 | return img_str
273 |
274 |
275 | def file_name_format(file_url: URL, rss: Rss) -> Tuple[Path, str]:
276 | """
277 | 可以根据用户设置的规则来格式化文件名
278 | """
279 | format_rule = config.img_format
280 | down_path = config.img_down_path
281 | rules = { # 替换格式化字符串
282 | "{subs}": rss.name,
283 | "{name}": file_url.name
284 | if "{ext}" not in format_rule
285 | else Path(file_url.name).stem,
286 | "{ext}": file_url.suffix if "{ext}" in format_rule else "",
287 | }
288 | for k, v in rules.items():
289 | format_rule = format_rule.replace(k, v)
290 | if down_path == "": # 如果没设置保存路径的话,就保存到默认目录下
291 | save_path = Path().cwd() / "data" / "image"
292 | elif down_path[0] == ".":
293 | save_path = Path().cwd() / Path(down_path)
294 | else:
295 | save_path = Path(down_path)
296 | full_path = save_path / format_rule
297 | save_path = full_path.parents[0]
298 | save_name = full_path.name
299 | return save_path, save_name
300 |
301 |
302 | def save_image(content: bytes, file_url: URL, rss: Rss) -> None:
303 | """
304 | 将压缩之前的原图保存到本地的电脑上
305 | """
306 | save_path, save_name = file_name_format(file_url=file_url, rss=rss)
307 |
308 | full_save_path = save_path / save_name
309 | try:
310 | full_save_path.write_bytes(content)
311 | except FileNotFoundError:
312 | # 初次写入时文件夹不存在,需要创建一下
313 | save_path.mkdir(parents=True)
314 | full_save_path.write_bytes(content)
315 |
--------------------------------------------------------------------------------
/rss2/command/change_dy.py:
--------------------------------------------------------------------------------
1 | import re
2 | from contextlib import suppress
3 | from copy import deepcopy
4 | from typing import Any, List, Match, Optional
5 |
6 | from nonebot import on_command, CommandSession
7 | from nonebot.log import logger
8 |
9 | from .. import my_trigger as tr
10 | from ..config import DATA_PATH
11 | from ..permission import admin_permission
12 | from ..rss_class import Rss
13 | from ..utils import regex_validate
14 |
15 | prompt = """\
16 | 请输入要修改的订阅
17 | 订阅名[,订阅名,...] 属性=值[ 属性=值 ...]
18 | 如:
19 | test1[,test2,...] qq=,123,234 qun=-1
20 | 对应参数:
21 | 订阅名(-name): 禁止将多个订阅批量改名,名称相同会冲突
22 | 订阅链接(-url)
23 | QQ(-qq)
24 | 群(-qun)
25 | 更新频率(-time)
26 | 代理(-proxy)
27 | 翻译(-tl)
28 | 仅Title(ot)
29 | 仅图片(-op)
30 | 仅含图片(-ohp)
31 | 下载图片(-downpic): 下载图片到本地硬盘,仅pixiv有效
32 | 下载种子(-downopen)
33 | 白名单关键词(-wkey)
34 | 黑名单关键词(-bkey)
35 | 种子上传到群(-upgroup)
36 | 去重模式(-mode)
37 | 图片数量限制(-img_num): 只发送限定数量的图片,防止刷屏
38 | 正文移除内容(-rm_list): 从正文中移除指定内容,支持正则
39 | 停止更新(-stop): 停止更新订阅
40 | PikPak离线(-pikpak): 开启PikPak离线下载
41 | PikPak离线路径匹配(-ppk): 匹配离线下载的文件夹,设置该值后生效
42 | 注:
43 | 1. 仅含有图片不同于仅图片,除了图片还会发送正文中的其他文本信息
44 | 2. proxy/tl/ot/op/ohp/downopen/upgroup/stop/pikpak 值为 1/0
45 | 3. 去重模式分为按链接(link)、标题(title)、图片(image)判断,其中 image 模式生效对象限定为只带 1 张图片的消息。如果属性中带有 or 说明判断逻辑是任一匹配即去重,默认为全匹配
46 | 4. 白名单关键词支持正则表达式,匹配时推送消息及下载,设为空(wkey=)时不生效
47 | 5. 黑名单关键词同白名单相似,匹配时不推送,两者可以一起用
48 | 6. 正文待移除内容格式必须如:rm_list='a' 或 rm_list='a','b'。该处理过程在解析 html 标签后进行,设为空使用 rm_list='-1'"
49 | 7. QQ、群号、去重模式前加英文逗号表示追加,-1设为空
50 | 8. 各个属性使用空格分割
51 | 9. downpic保存的文件位于程序根目录下 "data/image/订阅名/图片名"
52 | 详细用法请查阅文档。\
53 | """
54 |
55 | # 处理带多个值的订阅参数
56 | def handle_property(value: str, property_list: List[Any]) -> List[Any]:
57 | # 清空
58 | if value == "-1":
59 | return []
60 | value_list = value.split(",")
61 | # 追加
62 | if value_list[0] == "":
63 | value_list.pop(0)
64 | return property_list + [i for i in value_list if i not in property_list]
65 | # 防止用户输入重复参数,去重并保持原来的顺序
66 | return list(dict.fromkeys(value_list))
67 |
68 |
69 | # 处理类型为正则表达式的订阅参数
70 | def handle_regex_property(value: str, old_value: str) -> Optional[str]:
71 | result = None
72 | if not value:
73 | result = None
74 | elif value.startswith("+"):
75 | result = f"{old_value}|{value.lstrip('+')}" if old_value else value.lstrip("+")
76 | elif value.startswith("-"):
77 | if regex_list := old_value.split("|"):
78 | with suppress(ValueError):
79 | regex_list.remove(value.lstrip("-"))
80 | result = "|".join(regex_list) if regex_list else None
81 | else:
82 | result = value
83 | if isinstance(result, str) and not regex_validate(result):
84 | result = None
85 | return result
86 |
87 |
88 | attribute_dict = {
89 | "name": "name",
90 | "url": "url",
91 | "qq": "user_id",
92 | "qun": "group_id",
93 | "channel": "guild_channel_id",
94 | "time": "time",
95 | "proxy": "img_proxy",
96 | "tl": "translation",
97 | "ot": "only_title",
98 | "op": "only_pic",
99 | "ohp": "only_has_pic",
100 | "downpic": "download_pic",
101 | "upgroup": "is_open_upload_group",
102 | "downopen": "down_torrent",
103 | "downkey": "down_torrent_keyword",
104 | "wkey": "down_torrent_keyword",
105 | "blackkey": "black_keyword",
106 | "bkey": "black_keyword",
107 | "mode": "duplicate_filter_mode",
108 | "img_num": "max_image_number",
109 | "stop": "stop",
110 | "pikpak": "pikpak_offline",
111 | "ppk": "pikpak_path_key",
112 | }
113 |
114 |
115 | # 处理要修改的订阅参数
116 | def handle_change_list(
117 | rss: Rss,
118 | key_to_change: str,
119 | value_to_change: str,
120 | group_id: Optional[int],
121 | guild_channel_id: Optional[str],
122 | ) -> None:
123 | if key_to_change == "name":
124 | tr.delete_job(rss)
125 | rss.rename_file(str(DATA_PATH / f"{Rss.handle_name(value_to_change)}.json"))
126 | elif (
127 | key_to_change in {"qq", "qun", "channel"}
128 | and not group_id
129 | and not guild_channel_id
130 | ) or key_to_change == "mode":
131 | value_to_change = handle_property(
132 | value_to_change, getattr(rss, attribute_dict[key_to_change])
133 | ) # type:ignore
134 | elif key_to_change == "time":
135 | if not re.search(r"[_*/,-]", value_to_change):
136 | if int(float(value_to_change)) < 1:
137 | value_to_change = "1"
138 | else:
139 | value_to_change = str(int(float(value_to_change)))
140 | elif key_to_change in {
141 | "proxy",
142 | "tl",
143 | "ot",
144 | "op",
145 | "ohp",
146 | "downpic",
147 | "upgroup",
148 | "downopen",
149 | "stop",
150 | "pikpak",
151 | }:
152 | value_to_change = bool(int(value_to_change)) # type:ignore
153 | if key_to_change == "stop" and not value_to_change and rss.error_count > 0:
154 | rss.error_count = 0
155 | elif key_to_change in {"downkey", "wkey", "blackkey", "bkey"}:
156 | value_to_change = handle_regex_property(
157 | value_to_change, getattr(rss, attribute_dict[key_to_change])
158 | ) # type:ignore
159 | elif key_to_change == "ppk" and not value_to_change:
160 | value_to_change = None # type:ignore
161 | elif key_to_change == "img_num":
162 | value_to_change = int(value_to_change) # type:ignore
163 | setattr(rss, attribute_dict.get(key_to_change), value_to_change) # type:ignore
164 |
165 |
166 | @on_command(
167 | "change", aliases=("修改订阅", "moddy"), permission=admin_permission, only_to_me=False
168 | )
169 | async def change(session: CommandSession) -> None:
170 | change_info = (await session.aget("change", prompt=prompt)).strip()
171 | group_id = session.ctx.get("group_id")
172 | guild_channel_id = session.ctx.get("guild_id")
173 | if guild_channel_id:
174 | group_id = None
175 | guild_channel_id = f"{guild_channel_id}@{session.ctx.get('channel_id')}"
176 | name_list = change_info.split(" ")[0].split(",")
177 | rss_list: List[Rss] = []
178 | for name in name_list:
179 | if rss_tmp := Rss.get_one_by_name(name=name):
180 | rss_list.append(rss_tmp)
181 |
182 | # 出于公平考虑,限制订阅者只有当前群组或频道时才能修改订阅,否则只有超级管理员能修改
183 | if group_id:
184 | if re.search(" (qq|qun|channel)=", change_info):
185 | await session.finish("❌ 禁止在群组中修改订阅账号!如要取消订阅请使用 deldy 命令!")
186 | rss_list = [
187 | rss
188 | for rss in rss_list
189 | if rss.group_id == [str(group_id)]
190 | and not rss.user_id
191 | and not rss.guild_channel_id
192 | ]
193 |
194 | if guild_channel_id:
195 | if re.search(" (qq|qun|channel)=", change_info):
196 | await session.finish("❌ 禁止在子频道中修改订阅账号!如要取消订阅请使用 deldy 命令!")
197 | rss_list = [
198 | rss
199 | for rss in rss_list
200 | if rss.guild_channel_id == [str(guild_channel_id)]
201 | and not rss.user_id
202 | and not rss.guild_channel_id
203 | ]
204 |
205 | if not rss_list:
206 | await session.finish("❌ 请检查是否存在以下问题:\n1.要修改的订阅名不存在对应的记录\n2.当前群组或频道无权操作")
207 | elif len(rss_list) > 1 and " name=" in change_info:
208 | await session.finish("❌ 禁止将多个订阅批量改名!会因为名称相同起冲突!")
209 |
210 | # 参数特殊处理:正文待移除内容
211 | rm_list_exist = re.search("rm_list='.+'", change_info)
212 | change_list = handle_rm_list(rss_list, change_info, rm_list_exist)
213 |
214 | changed_rss_list = await batch_change_rss(
215 | session, change_list, group_id, guild_channel_id, rss_list, rm_list_exist
216 | )
217 | # 隐私考虑,不展示除当前群组或频道外的群组、频道和QQ
218 | rss_msg_list = [
219 | str(rss.hide_some_infos(group_id, guild_channel_id)) for rss in changed_rss_list
220 | ]
221 | result_msg = f"👏 修改了 {len(rss_msg_list)} 条订阅"
222 | if rss_msg_list:
223 | separator = "\n----------------------\n"
224 | result_msg += separator + separator.join(rss_msg_list)
225 | await session.finish(result_msg)
226 |
227 |
228 | async def batch_change_rss(
229 | session: CommandSession,
230 | change_list: List[str],
231 | group_id: Optional[int],
232 | guild_channel_id: Optional[str],
233 | rss_list: List[Rss],
234 | rm_list_exist: Optional[Match[str]] = None,
235 | ) -> List[str]:
236 | changed_rss_list = []
237 | for rss in rss_list:
238 | new_rss = deepcopy(rss)
239 | rss_name = rss.name
240 | for change_dict in change_list:
241 | key_to_change, value_to_change = change_dict.split("=", 1)
242 | if key_to_change in attribute_dict.keys():
243 | # 对用户输入的去重模式参数进行校验
244 | mode_property_set = {"", "-1", "link", "title", "image", "or"}
245 | if key_to_change == "mode" and (
246 | set(value_to_change.split(",")) - mode_property_set
247 | or value_to_change == "or"
248 | ):
249 | await session.finish(f"❌ 去重模式参数错误!\n{change_dict}")
250 | elif key_to_change in {
251 | "downkey",
252 | "wkey",
253 | "blackkey",
254 | "bkey",
255 | } and not regex_validate(value_to_change.lstrip("+-")):
256 | await session.finish(f"❌ 正则表达式错误!\n{change_dict}")
257 | elif key_to_change == "ppk" and not regex_validate(value_to_change):
258 | await session.finish(f"❌ 正则表达式错误!\n{change_dict}")
259 | handle_change_list(
260 | new_rss, key_to_change, value_to_change, group_id, guild_channel_id
261 | )
262 | else:
263 | await session.finish(f"❌ 参数错误!\n{change_dict}")
264 |
265 | if new_rss.__dict__ == rss.__dict__ and not rm_list_exist:
266 | continue
267 | changed_rss_list.append(new_rss)
268 | # 参数解析完毕,写入
269 | new_rss.upsert(rss_name)
270 |
271 | # 加入定时任务
272 | if not new_rss.stop:
273 | await tr.add_job(new_rss)
274 | elif not rss.stop:
275 | tr.delete_job(new_rss)
276 | logger.info(f"{rss_name} 已停止更新")
277 |
278 | return changed_rss_list
279 |
280 |
281 | @change.args_parser
282 | async def _(session: CommandSession):
283 | # 去掉消息首尾的空白符
284 | stripped_arg = session.current_arg_text.strip()
285 |
286 | if session.is_first_run:
287 | # 该命令第一次运行(第一次进入命令会话)
288 | if stripped_arg:
289 | session.state["change"] = stripped_arg
290 | return
291 |
292 | if not stripped_arg:
293 | # 用户没有发送有效的订阅(而是发送了空白字符),则提示重新输入
294 | # 这里 session.pause() 将会发送消息并暂停当前会话(该行后面的代码不会被运行)
295 | session.pause("输入不能为空!")
296 |
297 | # 如果当前正在向用户询问更多信息,且用户输入有效,则放入会话状态
298 | session.state[session.current_key] = stripped_arg
299 |
300 |
301 | # 参数特殊处理:正文待移除内容
302 | def handle_rm_list(
303 | rss_list: List[Rss], change_info: str, rm_list_exist: Optional[Match[str]] = None
304 | ) -> List[str]:
305 | rm_list = None
306 |
307 | if rm_list_exist:
308 | rm_list_str = rm_list_exist[0].lstrip().replace("rm_list=", "")
309 | rm_list = [i.strip("'") for i in rm_list_str.split("','")]
310 | change_info = change_info.replace(rm_list_exist[0], "")
311 |
312 | if rm_list:
313 | for rss in rss_list:
314 | if len(rm_list) == 1 and rm_list[0] == "-1":
315 | setattr(rss, "content_to_remove", None)
316 | elif valid_rm_list := [i for i in rm_list if regex_validate(i)]:
317 | setattr(rss, "content_to_remove", valid_rm_list)
318 |
319 | change_list = [i.strip() for i in change_info.split(" ") if i != ""]
320 | # 去掉订阅名
321 | change_list.pop(0)
322 |
323 | return change_list
324 |
--------------------------------------------------------------------------------
/custom/analysis_bilibili.py:
--------------------------------------------------------------------------------
1 | import re
2 | import urllib.parse
3 | import json
4 | import nonebot
5 | from typing import Optional, Union
6 | from time import localtime, strftime
7 | from aiohttp import ClientSession
8 |
9 | from hoshino import Service, logger
10 | from nonebot import Message, MessageSegment
11 |
12 |
13 | headers = {
14 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.58"
15 | }
16 | analysis_stat = {} # group_id : last_vurl
17 | config = nonebot.get_bot().config
18 | blacklist = getattr(config, "analysis_blacklist", [])
19 | analysis_display_image = getattr(config, "analysis_display_image", False)
20 | analysis_display_image_list = getattr(config, "analysis_display_image_list", [])
21 | trust_env = getattr(config, "analysis_trust_env", False)
22 |
23 |
24 | sv2 = Service("search_bilibili_video")
25 | # 手动搜视频标题
26 | @sv2.on_prefix("搜视频")
27 | async def search_bilibili_video_by_title(bot, ev):
28 | title = ev.message.extract_plain_text()
29 | group_id = ev.group_id if ev.group_id else ev.get("channel_id", None)
30 |
31 | async with ClientSession(trust_env=trust_env, headers=headers) as session:
32 | vurl = await search_bili_by_title(title, session)
33 | msg = await bili_keyword(group_id, vurl, session)
34 | try:
35 | await bot.send(ev, msg)
36 | except:
37 | # 避免简介有风控内容无法发送
38 | logger.warning(f"{msg}\n此次解析可能被风控,尝试去除简介后发送!")
39 | msg = re.sub(r"简介.*", "", msg)
40 | await bot.send(ev, msg)
41 |
42 |
43 | sv = Service("analysis_bilibili")
44 | # on_rex判断不到小程序信息
45 | @sv.on_message()
46 | async def rex_bilibili(bot, ev):
47 | text = str(ev.message).strip()
48 | if blacklist and ev.user_id in blacklist:
49 | return
50 | if re.search(r"(b23.tv)|(bili(22|23|33|2233).cn)", text, re.I):
51 | # 提前处理短链接,避免解析到其他的
52 | text = await b23_extract(text)
53 | patterns = r"(\.bilibili\.com)|(^(av|cv)(\d+))|(^BV([a-zA-Z0-9]{10})+)|(\[\[QQ小程序\]哔哩哔哩\])|(QQ小程序]哔哩哔哩)|(QQ小程序]哔哩哔哩)"
54 | match = re.compile(patterns, re.I).search(text)
55 | if match:
56 | group_id = ev.group_id if ev.group_id else ev.get("channel_id", None)
57 | async with ClientSession(trust_env=trust_env, headers=headers) as session:
58 | msg = await bili_keyword(group_id, text, session)
59 | if msg:
60 | try:
61 | await bot.send(ev, msg)
62 | except:
63 | # 避免简介有风控内容无法发送
64 | logger.warning(f"{msg}\n此次解析可能被风控,尝试去除简介后发送!")
65 | msg = re.sub(r"简介.*", "", msg)
66 | await bot.send(ev, msg)
67 |
68 |
69 | async def bili_keyword(
70 | group_id: Optional[int], text: str, session: ClientSession
71 | ) -> Union[Message, str]:
72 | try:
73 | # 提取url
74 | url, page, time_location = extract(text)
75 | # 如果是小程序就去搜索标题
76 | if not url:
77 | if title := re.search(r'"desc":("[^"哔哩]+")', text):
78 | vurl = await search_bili_by_title(title[1], session)
79 | if vurl:
80 | url, page, time_location = extract(vurl)
81 |
82 | # 获取视频详细信息
83 | msg, vurl = "", ""
84 | if "view?" in url:
85 | msg, vurl = await video_detail(
86 | url, page=page, time_location=time_location, session=session
87 | )
88 | elif "bangumi" in url:
89 | msg, vurl = await bangumi_detail(url, time_location, session)
90 | elif "xlive" in url:
91 | msg, vurl = await live_detail(url, session)
92 | elif "article" in url:
93 | msg, vurl = await article_detail(url, page, session)
94 | elif "dynamic" in url:
95 | msg, vurl = await dynamic_detail(url, session)
96 |
97 | # 避免多个机器人解析重复推送
98 | if group_id:
99 | if group_id in analysis_stat and analysis_stat[group_id] == vurl:
100 | return ""
101 | analysis_stat[group_id] = vurl
102 | except Exception as e:
103 | msg = "bili_keyword Error: {}".format(type(e))
104 | return msg
105 |
106 |
107 | async def b23_extract(text):
108 | b23 = re.compile(r"b23.tv/(\w+)|(bili(22|23|33|2233).cn)/(\w+)", re.I).search(
109 | text.replace("\\", "")
110 | )
111 | url = f"https://{b23[0]}"
112 | # 考虑到是在 on_message 内进行操作,避免无用的创建 session,所以分开写
113 | async with ClientSession(trust_env=trust_env) as session:
114 | async with session.get(url) as resp:
115 | return str(resp.url)
116 |
117 |
118 | def extract(text: str):
119 | try:
120 | url = ""
121 | # 视频分p
122 | page = re.compile(r"([?&]|&)p=\d+").search(text)
123 | # 视频播放定位时间
124 | time = re.compile(r"([?&]|&)t=\d+").search(text)
125 | # 主站视频 av 号
126 | aid = re.compile(r"av\d+", re.I).search(text)
127 | # 主站视频 bv 号
128 | bvid = re.compile(r"BV([A-Za-z0-9]{10})+", re.I).search(text)
129 | # 番剧视频页
130 | epid = re.compile(r"ep\d+", re.I).search(text)
131 | # 番剧剧集ssid(season_id)
132 | ssid = re.compile(r"ss\d+", re.I).search(text)
133 | # 番剧详细页
134 | mdid = re.compile(r"md\d+", re.I).search(text)
135 | # 直播间
136 | room_id = re.compile(r"live.bilibili.com/(blanc/|h5/)?(\d+)", re.I).search(text)
137 | # 文章
138 | cvid = re.compile(
139 | r"(/read/(cv|mobile|native)(/|\?id=)?|^cv)(\d+)", re.I
140 | ).search(text)
141 | # 动态
142 | dynamic_id_type2 = re.compile(
143 | r"(t|m).bilibili.com/(\d+)\?(.*?)(&|&)type=2", re.I
144 | ).search(text)
145 | # 动态
146 | dynamic_id = re.compile(r"(t|m).bilibili.com/(\d+)", re.I).search(text)
147 | if bvid:
148 | url = f"https://api.bilibili.com/x/web-interface/view?bvid={bvid[0]}"
149 | elif aid:
150 | url = f"https://api.bilibili.com/x/web-interface/view?aid={aid[0][2:]}"
151 | elif epid:
152 | url = (
153 | f"https://bangumi.bilibili.com/view/web_api/season?ep_id={epid[0][2:]}"
154 | )
155 | elif ssid:
156 | url = f"https://bangumi.bilibili.com/view/web_api/season?season_id={ssid[0][2:]}"
157 | elif mdid:
158 | url = f"https://bangumi.bilibili.com/view/web_api/season?media_id={mdid[0][2:]}"
159 | elif room_id:
160 | url = f"https://api.live.bilibili.com/xlive/web-room/v1/index/getInfoByRoom?room_id={room_id[2]}"
161 | elif cvid:
162 | page = cvid[4]
163 | url = f"https://api.bilibili.com/x/article/viewinfo?id={page}&mobi_app=pc&from=web"
164 | elif dynamic_id_type2:
165 | url = f"https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/get_dynamic_detail?rid={dynamic_id_type2[2]}&type=2"
166 | elif dynamic_id:
167 | url = f"https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/get_dynamic_detail?dynamic_id={dynamic_id[2]}"
168 | return url, page, time
169 | except Exception:
170 | return "", None, None
171 |
172 |
173 | async def search_bili_by_title(title: str, session: ClientSession) -> str:
174 | mainsite_url = "https://www.bilibili.com"
175 | search_url = f"https://api.bilibili.com/x/web-interface/wbi/search/all/v2?keyword={urllib.parse.quote(title)}"
176 |
177 | # set headers
178 | async with session.get(mainsite_url) as resp:
179 | assert resp.status == 200
180 |
181 | async with session.get(search_url) as resp:
182 | result = (await resp.json())["data"]["result"]
183 |
184 | for i in result:
185 | if i.get("result_type") != "video":
186 | continue
187 | # 只返回第一个结果
188 | return i["data"][0].get("arcurl")
189 |
190 |
191 | # 处理超过一万的数字
192 | def handle_num(num: int):
193 | if num > 10000:
194 | num = f"{num / 10000:.2f}万"
195 | return num
196 |
197 |
198 | async def video_detail(url: str, session: ClientSession, **kwargs):
199 | try:
200 | async with session.get(url) as resp:
201 | res = (await resp.json()).get("data")
202 | if not res:
203 | return "解析到视频被删了/稿件不可见或审核中/权限不足", url
204 | vurl = f"https://www.bilibili.com/video/av{res['aid']}"
205 | title = f"\n标题:{res['title']}\n"
206 | cover = (
207 | MessageSegment.image(res["pic"])
208 | if analysis_display_image or "video" in analysis_display_image_list
209 | else MessageSegment.text("")
210 | )
211 | if page := kwargs.get("page"):
212 | page = page[0].replace("&", "&")
213 | p = int(page[3:])
214 | if p <= len(res["pages"]):
215 | vurl += f"?p={p}"
216 | part = res["pages"][p - 1]["part"]
217 | if part != res["title"]:
218 | title += f"小标题:{part}\n"
219 | if time_location := kwargs.get("time_location"):
220 | time_location = time_location[0].replace("&", "&")[3:]
221 | if page:
222 | vurl += f"&t={time_location}"
223 | else:
224 | vurl += f"?t={time_location}"
225 | pubdate = strftime("%Y-%m-%d %H:%M:%S", localtime(res["pubdate"]))
226 | tname = f"类型:{res['tname']} | UP:{res['owner']['name']} | 日期:{pubdate}\n"
227 | stat = f"播放:{handle_num(res['stat']['view'])} | 弹幕:{handle_num(res['stat']['danmaku'])} | 收藏:{handle_num(res['stat']['favorite'])}\n"
228 | stat += f"点赞:{handle_num(res['stat']['like'])} | 硬币:{handle_num(res['stat']['coin'])} | 评论:{handle_num(res['stat']['reply'])}\n"
229 | desc = f"简介:{res['desc']}"
230 | desc_list = desc.split("\n")
231 | desc = "".join(i + "\n" for i in desc_list if i)
232 | desc_list = desc.split("\n")
233 | if len(desc_list) > 4:
234 | desc = desc_list[0] + "\n" + desc_list[1] + "\n" + desc_list[2] + "……"
235 | mstext = MessageSegment.text("".join([vurl, title, tname, stat, desc]))
236 | msg = Message([cover, mstext])
237 | return msg, vurl
238 | except Exception as e:
239 | msg = "视频解析出错--Error: {}".format(type(e))
240 | return msg, None
241 |
242 |
243 | async def bangumi_detail(url: str, time_location: str, session: ClientSession):
244 | try:
245 | async with session.get(url) as resp:
246 | res = (await resp.json()).get("result")
247 | if not res:
248 | return None, None
249 | cover = (
250 | MessageSegment.image(res["cover"])
251 | if analysis_display_image or "bangumi" in analysis_display_image_list
252 | else MessageSegment.text("")
253 | )
254 | title = f"番剧:{res['title']}\n"
255 | desc = f"{res['newest_ep']['desc']}\n"
256 | index_title = ""
257 | style = "".join(f"{i}," for i in res["style"])
258 | style = f"类型:{style[:-1]}\n"
259 | evaluate = f"简介:{res['evaluate']}\n"
260 | if "season_id" in url:
261 | vurl = f"https://www.bilibili.com/bangumi/play/ss{res['season_id']}"
262 | elif "media_id" in url:
263 | vurl = f"https://www.bilibili.com/bangumi/media/md{res['media_id']}"
264 | else:
265 | epid = re.compile(r"ep_id=\d+").search(url)[0][len("ep_id=") :]
266 | for i in res["episodes"]:
267 | if str(i["ep_id"]) == epid:
268 | index_title = f"标题:{i['index_title']}\n"
269 | break
270 | vurl = f"https://www.bilibili.com/bangumi/play/ep{epid}"
271 | if time_location:
272 | time_location = time_location[0].replace("&", "&")[3:]
273 | vurl += f"?t={time_location}"
274 | mstext = MessageSegment.text(
275 | "".join([f"{vurl}\n", title, index_title, desc, style, evaluate])
276 | )
277 | msg = Message([cover, mstext])
278 | return msg, vurl
279 | except Exception as e:
280 | msg = "番剧解析出错--Error: {}".format(type(e))
281 | msg += f"\n{url}"
282 | return msg, None
283 |
284 |
285 | async def live_detail(url: str, session: ClientSession):
286 | try:
287 | async with session.get(url) as resp:
288 | res = await resp.json()
289 | if res["code"] != 0:
290 | return None, None
291 | res = res["data"]
292 | uname = res["anchor_info"]["base_info"]["uname"]
293 | room_id = res["room_info"]["room_id"]
294 | title = res["room_info"]["title"]
295 | cover = (
296 | MessageSegment.image(res["room_info"]["cover"])
297 | if analysis_display_image or "live" in analysis_display_image_list
298 | else MessageSegment.text("")
299 | )
300 | live_status = res["room_info"]["live_status"]
301 | lock_status = res["room_info"]["lock_status"]
302 | parent_area_name = res["room_info"]["parent_area_name"]
303 | area_name = res["room_info"]["area_name"]
304 | online = res["room_info"]["online"]
305 | tags = res["room_info"]["tags"]
306 | watched_show = res["watched_show"]["text_large"]
307 | vurl = f"https://live.bilibili.com/{room_id}\n"
308 | if lock_status:
309 | lock_time = res["room_info"]["lock_time"]
310 | lock_time = strftime("%Y-%m-%d %H:%M:%S", localtime(lock_time))
311 | title = f"[已封禁]直播间封禁至:{lock_time}\n"
312 | elif live_status == 1:
313 | title = f"[直播中]标题:{title}\n"
314 | elif live_status == 2:
315 | title = f"[轮播中]标题:{title}\n"
316 | else:
317 | title = f"[未开播]标题:{title}\n"
318 | up = f"主播:{uname} 当前分区:{parent_area_name}-{area_name}\n"
319 | watch = f"观看:{watched_show} 直播时的人气上一次刷新值:{handle_num(online)}\n"
320 | if tags:
321 | tags = f"标签:{tags}\n"
322 | if live_status:
323 | player = f"独立播放器:https://www.bilibili.com/blackboard/live/live-activity-player.html?enterTheRoom=0&cid={room_id}"
324 | else:
325 | player = ""
326 | mstext = MessageSegment.text("".join([vurl, title, up, watch, tags, player]))
327 | msg = Message([cover, mstext])
328 | return msg, vurl
329 | except Exception as e:
330 | msg = "直播间解析出错--Error: {}".format(type(e))
331 | return msg, None
332 |
333 |
334 | async def article_detail(url: str, cvid: str, session: ClientSession):
335 | try:
336 | async with session.get(url) as resp:
337 | res = (await resp.json()).get("data")
338 | if not res:
339 | return None, None
340 | images = (
341 | [MessageSegment.image(i) for i in res["origin_image_urls"]]
342 | if analysis_display_image or "article" in analysis_display_image_list
343 | else []
344 | )
345 | vurl = f"https://www.bilibili.com/read/cv{cvid}"
346 | title = f"标题:{res['title']}\n"
347 | up = f"作者:{res['author_name']} (https://space.bilibili.com/{res['mid']})\n"
348 | view = f"阅读数:{handle_num(res['stats']['view'])} "
349 | favorite = f"收藏数:{handle_num(res['stats']['favorite'])} "
350 | coin = f"硬币数:{handle_num(res['stats']['coin'])}"
351 | share = f"分享数:{handle_num(res['stats']['share'])} "
352 | like = f"点赞数:{handle_num(res['stats']['like'])} "
353 | dislike = f"不喜欢数:{handle_num(res['stats']['dislike'])}"
354 | desc = view + favorite + coin + "\n" + share + like + dislike + "\n"
355 | mstext = MessageSegment.text("".join([title, up, desc, vurl]))
356 | msg = Message(images)
357 | msg.append(mstext)
358 | return msg, vurl
359 | except Exception as e:
360 | msg = "专栏解析出错--Error: {}".format(type(e))
361 | return msg, None
362 |
363 |
364 | async def dynamic_detail(url: str, session: ClientSession):
365 | try:
366 | async with session.get(url) as resp:
367 | res = (await resp.json())["data"].get("card")
368 | if not res:
369 | return None, None
370 | card = json.loads(res["card"])
371 | dynamic_id = res["desc"]["dynamic_id"]
372 | vurl = f"https://t.bilibili.com/{dynamic_id}\n"
373 | if not (item := card.get("item")):
374 | return "动态不存在文字内容", vurl
375 | if not (content := item.get("description")):
376 | content = item.get("content")
377 | content = content.replace("\r", "\n")
378 | if len(content) > 250:
379 | content = content[:250] + "......"
380 | images = (
381 | item.get("pictures", [])
382 | if analysis_display_image or "dynamic" in analysis_display_image_list
383 | else []
384 | )
385 | if images:
386 | images = [MessageSegment.image(i.get("img_src")) for i in images]
387 | else:
388 | pics = item.get("pictures_count")
389 | if pics:
390 | content += f"\nPS:动态中包含{pics}张图片"
391 | if origin := card.get("origin"):
392 | jorigin = json.loads(origin)
393 | short_link = jorigin.get("short_link")
394 | if short_link:
395 | content += f"\n动态包含转发视频{short_link}"
396 | else:
397 | content += f"\n动态包含转发其他动态"
398 | msg = Message(content)
399 | msg.extend(images)
400 | msg.append(MessageSegment.text(f"\n{vurl}"))
401 | return msg, vurl
402 | except Exception as e:
403 | msg = "动态解析出错--Error: {}".format(type(e))
404 | return msg, None
405 |
--------------------------------------------------------------------------------