├── .env.exa ├── .gitignore ├── LICENSE ├── README.md ├── bot.py ├── config └── config.py ├── log.py ├── methods ├── __init__.py └── tg_parse_hub.py ├── plugins ├── ai_summary.py ├── inline_parse.py ├── parse.py └── start.py ├── pyproject.toml ├── utiles ├── converter.py ├── filters.py ├── img_host.py ├── ph.py └── utile.py └── uv.lock /.env.exa: -------------------------------------------------------------------------------- 1 | ##### Bot 配置 ##### 2 | API_HASH= 3 | API_ID= 4 | BOT_TOKEN= 5 | 6 | # BOT_PROXY=http://127.0.0.1:7890 7 | # PARSER_PROXY=http://127.0.0.1:7890 8 | # DOWNLOADER_PROXY=http://127.0.0.1:7890 9 | 10 | ##### API 配置 ##### 11 | # DOUYIN_API=http://127.0.0.1:80 12 | 13 | ##### AI总结配置 ##### 14 | AI_SUMMARY=True 15 | API_KEY= 16 | BASE_URL=https://apic.ohmygpt.com/v1 17 | MODEL=gpt-4o 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.venv/ 2 | /logs 3 | /.idea 4 | /downloads 5 | .env 6 | *.session -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 梓澪 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ParseHubBot 2 | 3 | --- 4 | 5 | **Telegram聚合解析Bot, 支持AI总结, 支持内联模式** 6 | **Telegram aggregation analysis Bot, supports AI summary, supports inline mode** 7 | 8 | 部署好的Bot: [@ParseHubBot](https://t.me/ParseHubBot)目前支持的平台: 9 | 10 | - `抖音视频|图文` 11 | - `哔哩哔哩视频|动态` 12 | - `YouTube` 13 | - `YouTube Music` 14 | - `TikTok视频|图文` 15 | - `小红书视频|图文` 16 | - `Twitter视频|图文` 17 | - `百度贴吧图文|视频` 18 | - `Facebook视频` 19 | - `微博视频|图文` 20 | - `Instagram视频|图文` 21 | - [查看更多...](https://github.com/z-mio/ParseHub) 22 | 23 |  24 |  25 | 26 | ## 部署Bot 27 | 28 | ### 修改配置 29 | 30 | 将 `.env.exa` 复制为 `.env`, 并修改配置 31 | 32 | | 参数 | 说明 | 33 | |----------------------|-------------------------------------| 34 | | `API_HASH`, `API_ID` | 登录 https://my.telegram.org 获取 | 35 | | `BOT_TOKEN` | 在 @BotFather 获取 | 36 | | `BOT_PROXY` | 海外服务器无需填写 | 37 | | `PARSER_PROXY` | 解析时使用的代理 | 38 | | `DOWNLOADER_PROXY` | 下载时使用的代理 | 39 | | `DOUYIN_API` | 你部署的抖音API地址, 默认: https://douyin.wtf | 40 | | `AI_SUMMARY` | 是否开启AI总结 | 41 | | `API_KEY` | openai 密钥 | 42 | | `BASE_URL` | openai api地址 | 43 | | `MODEL` | AI总结使用的模型 | 44 | | `CACHE_TIME` | 解析结果缓存时间, 单位: 秒, 0为不缓存, 默认缓存10分钟 | 45 | 46 | ### 开始部署 47 | 48 | **在项目根目录运行:** 49 | 50 | ```shell 51 | apt install python3-pip -y 52 | pip install uv --break-system-packages 53 | uv venv --python 3.12 54 | uv sync 55 | ``` 56 | 57 | **启动bot** 58 | 59 | ```shell 60 | uv run bot.py 61 | ``` 62 | 63 | **设置命令列表** 64 | 私聊bot发送指令 `/menu` 65 | 66 | ## 使用 67 | 68 | 普通使用: 发送分享链接给bot即可 69 | 内联使用: 任意聊天窗口输入: `@bot用户名 链接` 70 |  71 | 72 | ## 相关项目 73 | 74 | - [z-mio/ParseHub](https://github.com/z-mio/ParseHub) 75 | 76 | ## 鸣谢 77 | 78 | - [OhMyGPT](https://www.ohmygpt.com) 79 | - [KurimuzonAkuma/pyrogram](https://github.com/KurimuzonAkuma/pyrogram) 80 | -------------------------------------------------------------------------------- /bot.py: -------------------------------------------------------------------------------- 1 | from pyrogram import Client 2 | from config.config import bot_cfg 3 | from log import logger 4 | 5 | logger.add("logs/bot.log", rotation="10 MB") 6 | 7 | 8 | class Bot(Client): 9 | def __init__(self): 10 | self.cfg = bot_cfg 11 | 12 | super().__init__( 13 | f'{self.cfg.bot_token.split(":")[0]}_bot', 14 | api_id=self.cfg.api_id, 15 | api_hash=self.cfg.api_hash, 16 | bot_token=self.cfg.bot_token, 17 | plugins=dict(root="plugins"), 18 | proxy=self.cfg.bot_proxy.dict_format, 19 | ) 20 | 21 | async def start(self): 22 | logger.info("Bot开始运行...") 23 | await super().start() 24 | 25 | async def stop(self, *args): 26 | await super().stop() 27 | 28 | 29 | if __name__ == "__main__": 30 | bot = Bot() 31 | bot.run() 32 | -------------------------------------------------------------------------------- /config/config.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urlparse 2 | 3 | from dotenv import load_dotenv 4 | from os import getenv 5 | 6 | load_dotenv() 7 | 8 | 9 | class BotConfig: 10 | def __init__(self): 11 | self.bot_token = getenv("BOT_TOKEN") 12 | self.api_id = getenv("API_ID") 13 | self.api_hash = getenv("API_HASH") 14 | self.bot_proxy: None | BotConfig._Proxy = self._Proxy(getenv("BOT_PROXY", None)) 15 | self.parser_proxy: None | str = getenv("PARSER_PROXY", None) 16 | self.downloader_proxy: None | str = getenv("DOWNLOADER_PROXY", None) 17 | 18 | self.cache_time = int(ct) if (ct := getenv("CACHE_TIME")) else 600 19 | self.ai_summary = bool(getenv("AI_SUMMARY").lower() == "true") 20 | self.douyin_api = getenv("DOUYIN_API", None) 21 | 22 | class _Proxy: 23 | def __init__(self, url: str): 24 | self._url = urlparse(url) if url else None 25 | self.url = self._url.geturl() if self._url else None 26 | 27 | @property 28 | def dict_format(self): 29 | if not self._url: 30 | return None 31 | return { 32 | "scheme": self._url.scheme, 33 | "hostname": self._url.hostname, 34 | "port": self._url.port, 35 | "username": self._url.username, 36 | "password": self._url.password, 37 | } 38 | 39 | 40 | bot_cfg = BotConfig() 41 | -------------------------------------------------------------------------------- /log.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import logging 3 | from typing import TYPE_CHECKING 4 | 5 | import loguru 6 | 7 | if TYPE_CHECKING: 8 | # 避免 sphinx autodoc 解析注释失败 9 | # 因为 loguru 模块实际上没有 `Logger` 类 10 | from loguru import Logger 11 | 12 | logger: "Logger" = loguru.logger 13 | 14 | 15 | class InterceptHandler(logging.Handler): 16 | def emit(self, record: logging.LogRecord) -> None: 17 | level: str | int 18 | try: 19 | level = logger.level(record.levelname).name 20 | except ValueError: 21 | level = record.levelno 22 | 23 | frame, depth = inspect.currentframe(), 0 24 | while frame and (depth == 0 or frame.f_code.co_filename == logging.__file__): 25 | frame = frame.f_back 26 | depth += 1 27 | 28 | logger.opt(depth=depth, exception=record.exc_info).log( 29 | level, record.getMessage() 30 | ) 31 | 32 | 33 | def init_logger(): 34 | logging.basicConfig(handlers=[InterceptHandler()], force=True) 35 | 36 | 37 | init_logger() 38 | -------------------------------------------------------------------------------- /methods/__init__.py: -------------------------------------------------------------------------------- 1 | from .tg_parse_hub import ( 2 | TgParseHub, 3 | VideoParseResultOperate, 4 | ImageParseResultOperate, 5 | MultimediaParseResultOperate, 6 | ) 7 | -------------------------------------------------------------------------------- /methods/tg_parse_hub.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import tempfile 3 | from datetime import datetime, timedelta 4 | 5 | import httpx 6 | from abc import ABC, abstractmethod 7 | from typing import Union, Callable 8 | from aiocache import Cache 9 | from aiocache.plugins import TimingPlugin 10 | from apscheduler.schedulers.asyncio import AsyncIOScheduler 11 | from parsehub.config import ParseConfig, DownloadConfig 12 | from pyrogram import enums, Client 13 | from pyrogram.types import ( 14 | Message, 15 | InlineKeyboardMarkup as Ikm, 16 | InlineKeyboardButton as Ikb, 17 | InputMediaPhoto, 18 | InputMediaVideo, 19 | InlineQuery, 20 | InlineQueryResultPhoto, 21 | InlineQueryResultAnimation, 22 | CallbackQuery, 23 | InlineQueryResultArticle, 24 | InputTextMessageContent, 25 | ) 26 | 27 | from parsehub import ParseHub 28 | from parsehub.types import ( 29 | ParseResult, 30 | Image, 31 | Video, 32 | Ani, 33 | VideoParseResult, 34 | ImageParseResult, 35 | MultimediaParseResult, 36 | SummaryResult, 37 | DownloadResult, 38 | ParseError, 39 | ) 40 | from parsehub.utiles.utile import match_url 41 | from parsehub.parsers.parser.weixin import WXImageParseResult 42 | from parsehub.parsers.parser.coolapk import CoolapkImageParseResult 43 | from config.config import bot_cfg 44 | from utiles.converter import clean_article_html 45 | from utiles.img_host import ImgHost 46 | from utiles.ph import Telegraph 47 | from utiles.utile import encrypt 48 | from contextlib import asynccontextmanager 49 | from markdown import markdown 50 | 51 | _parsing = Cache(Cache.MEMORY, plugins=[TimingPlugin()]) # 正在解析的链接 52 | _url_cache = Cache(Cache.MEMORY, plugins=[TimingPlugin()]) # 网址缓存 53 | _operate_cache = Cache(Cache.MEMORY, plugins=[TimingPlugin()]) # 解析结果缓存 54 | _msg_cache = Cache(Cache.MEMORY, plugins=[TimingPlugin()]) # 解析结果消息缓存 55 | 56 | scheduler = AsyncIOScheduler() 57 | scheduler.start() 58 | 59 | 60 | class TgParseHub(ParseHub): 61 | """重新封装 ParseHub 类,使其适用于 Telegram""" 62 | 63 | def __init__(self): 64 | super().__init__( 65 | ParseConfig(douyin_api=bot_cfg.douyin_api, proxy=bot_cfg.parser_proxy) 66 | ) 67 | self.url = None 68 | self.on_cache = bool(bot_cfg.cache_time) 69 | self.parsing = _parsing 70 | """正在解析的链接""" 71 | self.cache = _operate_cache 72 | """解析结果缓存""" 73 | self.url_cache = _url_cache 74 | """网址缓存""" 75 | self.operate: ParseResultOperate | None = None 76 | """解析结果操作对象""" 77 | 78 | async def parse( 79 | self, url: str, cache_time: int = bot_cfg.cache_time 80 | ) -> "TgParseHub": 81 | """ 82 | 解析网址,并返回解析结果操作对象。 83 | :param url: url 或 hash后的url 84 | :param cache_time: 缓存时间, 默认缓存一天 85 | :return: 86 | """ 87 | self.url = await self._get_url(url) 88 | while await self._get_parse_task(): 89 | await asyncio.sleep(1) 90 | 91 | if not (operate := await self._get_cache()): 92 | await self._add_parse_task() 93 | async with self.error_handler(): 94 | r = await super().parse(self.url) 95 | operate = self._select_operate(r) 96 | 97 | self.operate = operate 98 | if self.on_cache: 99 | """缓存结果""" 100 | await self._set_cache(operate, cache_time) 101 | if bot_cfg.ai_summary: 102 | """开启 AI 总结""" 103 | await self._set_url_cache() 104 | 105 | return self 106 | 107 | async def ai_summary(self, cq: CallbackQuery): 108 | """获取 AI 总结""" 109 | self.operate = await self.operate.ai_summary(cq) 110 | await self._set_cache(self.operate, bot_cfg.cache_time) 111 | 112 | async def un_ai_summary(self, cq: CallbackQuery): 113 | """取消 AI 总结""" 114 | return await self.operate.un_ai_summary(cq) 115 | 116 | async def download( 117 | self, 118 | callback: Callable = None, 119 | callback_args: tuple = (), 120 | ) -> DownloadResult: 121 | if (dr := self.operate.download_result) and dr.exists(): 122 | return dr 123 | async with self.error_handler(): 124 | r = await self.result.download( 125 | None, 126 | callback, 127 | callback_args, 128 | config=DownloadConfig( 129 | yt_dlp_duration_limit=1800, proxy=bot_cfg.downloader_proxy 130 | ), 131 | ) 132 | self.operate.download_result = r 133 | return r 134 | 135 | async def delete(self): 136 | """删除文件""" 137 | if self.on_cache: 138 | await self.cache.delete(self.operate.hash_url) 139 | self.operate.delete() 140 | 141 | async def chat_upload( 142 | self, cli: Client, msg: Message 143 | ) -> Message | list[Message] | list[list[Message]]: 144 | """发送解析结果到聊天中""" 145 | 146 | async def handle_cache(m): 147 | if isinstance(m, Message): 148 | return await m.copy(msg.chat.id) 149 | if isinstance(m, list): 150 | if all(isinstance(i, Message) for i in m): 151 | if not m: 152 | return None 153 | m = m[0] 154 | mg = await cli.copy_media_group(msg.chat.id, m.chat.id, m.id) 155 | 156 | return mg 157 | [await handle_cache(i) for i in m] 158 | await msg.reply( 159 | self.operate.content_and_no_url, 160 | quote=False, 161 | reply_markup=self.operate.button(), 162 | disable_web_page_preview=True, 163 | ) 164 | 165 | cache_msg = await self._get_msg_cache() 166 | if cache_msg: 167 | return await handle_cache(cache_msg) 168 | 169 | async with self.error_handler(): 170 | msg = await self.operate.chat_upload(msg) 171 | 172 | if self.on_cache: 173 | await self._set_msg_cache(msg) 174 | else: 175 | await self.delete() 176 | await self._del_parse_task() 177 | return msg 178 | 179 | async def inline_upload(self, iq: InlineQuery): 180 | """发送解析结果到内联中""" 181 | async with self.error_handler(): 182 | await self.operate.inline_upload(iq) 183 | await self._del_parse_task() 184 | 185 | @asynccontextmanager 186 | async def error_handler(self): 187 | try: 188 | yield 189 | except Exception as e: 190 | await self._error_callback() 191 | raise e 192 | 193 | async def _error_callback(self): 194 | """错误回调""" 195 | await self._del_parse_task() 196 | 197 | async def get_parse_task(self, url: str) -> bool: 198 | """获取解析任务""" 199 | url = await self._get_url(url) 200 | return await self.parsing.get(url) 201 | 202 | async def _get_parse_task(self): 203 | """获取解析任务""" 204 | return await self.parsing.get(self.url, False) 205 | 206 | async def _add_parse_task(self): 207 | """添加解析任务, 超时: 5分钟""" 208 | await self.parsing.set(self.url, True, ttl=300) 209 | 210 | async def _del_parse_task(self): 211 | """解析结束""" 212 | await self.parsing.delete(self.url) 213 | 214 | async def _get_url(self, url: str): 215 | """获取网址""" 216 | if "http" not in url: 217 | url = await self._get_url_cache(url) 218 | url = match_url(url) 219 | if not url: 220 | raise ParseError("无效的网址") 221 | return await self._select_parser(url)(parse_config=self.config).get_raw_url(url) 222 | 223 | async def _set_url_cache(self): 224 | """缓存网址""" 225 | await self.url_cache.set(encrypt(self.url), self.url, ttl=bot_cfg.cache_time) 226 | 227 | async def _get_url_cache(self, hash_url: str) -> str | None: 228 | """获取缓存网址""" 229 | return await self.url_cache.get(hash_url) 230 | 231 | async def _get_cache(self) -> Union["ParseResultOperate", None]: 232 | """获取缓存结果""" 233 | return await self.cache.get(encrypt(self.url)) 234 | 235 | async def _set_cache(self, result: "ParseResultOperate", cache_time): 236 | """缓存结果""" 237 | await self.cache.set(result.hash_url, result) 238 | await self._clear_cache(cache_time) 239 | 240 | async def _clear_cache(self, cache_time: int = bot_cfg.cache_time): 241 | """定时删除缓存""" 242 | 243 | async def fn(): 244 | await self.cache.delete(self.operate.hash_url) 245 | self.operate.delete() 246 | 247 | if not scheduler.get_job(self.operate.hash_url): 248 | run_time = datetime.now() + timedelta(seconds=cache_time) 249 | scheduler.add_job(fn, "date", run_date=run_time, id=self.operate.hash_url) 250 | 251 | async def _get_msg_cache( 252 | self, 253 | ) -> Message | list[Message] | list[list[Message]] | None: 254 | """获取缓存消息""" 255 | return await _msg_cache.get(self.operate.hash_url) 256 | 257 | async def _set_msg_cache(self, msg: Message): 258 | """缓存消息""" 259 | await _msg_cache.set(self.operate.hash_url, msg, ttl=bot_cfg.cache_time) 260 | 261 | @staticmethod 262 | def _select_operate(result: ParseResult = None) -> "ParseResultOperate": 263 | """根据解析结果类型选择对应的操作类""" 264 | cls = result.__class__ 265 | if issubclass(cls, VideoParseResult): 266 | op = VideoParseResultOperate 267 | elif issubclass(cls, ImageParseResult): 268 | op = ImageParseResultOperate 269 | elif issubclass(cls, MultimediaParseResult): 270 | op = MultimediaParseResultOperate 271 | else: 272 | raise ValueError("未知的 ParseResult 类型") 273 | return op(result) 274 | 275 | @property 276 | def result(self) -> ParseResult: 277 | return self.operate and self.operate.result 278 | 279 | 280 | class ParseResultOperate(ABC): 281 | """解析结果操作基类""" 282 | 283 | def __init__(self, result: ParseResult): 284 | self.result = result 285 | self.download_result: DownloadResult | None = None 286 | self.ai_summary_result: SummaryResult | None = None 287 | self.telegraph_url: str | None = None # telegraph 帖子链接 288 | 289 | @abstractmethod 290 | async def chat_upload( 291 | self, msg: Message 292 | ) -> Message | list[Message] | list[list[Message]]: 293 | """普通聊天上传""" 294 | raise NotImplementedError 295 | 296 | async def inline_upload(self, iq: InlineQuery): 297 | """内联上传""" 298 | results = [] 299 | 300 | media = ( 301 | self.result.media 302 | if isinstance(self.result.media, list) 303 | else [self.result.media] 304 | ) 305 | if not media: 306 | results.append( 307 | InlineQueryResultArticle( 308 | title=self.result.title or "无标题", 309 | description=self.result.desc, 310 | input_message_content=InputTextMessageContent( 311 | self.content_and_no_url, disable_web_page_preview=True 312 | ), 313 | reply_markup=self.button(), 314 | ) 315 | ) 316 | for index, i in enumerate(media): 317 | text = self.content_and_no_url 318 | k = { 319 | "caption": text, 320 | "title": text, 321 | "reply_markup": self.button(), 322 | } 323 | 324 | if isinstance(i, Image): 325 | results.append( 326 | InlineQueryResultPhoto( 327 | i.path, 328 | photo_width=300, 329 | photo_height=300, 330 | **k, 331 | ) 332 | ) 333 | elif isinstance(i, Video): 334 | results.append( 335 | InlineQueryResultPhoto( 336 | i.thumb_url 337 | or "https://telegra.ph/file/cdfdb65b83a4b7b2b6078.png", 338 | photo_width=300, 339 | photo_height=300, 340 | id=f"download_{index}", 341 | title=text, 342 | caption=text, 343 | reply_markup=self.button(hide_summary=True), 344 | ) 345 | ) 346 | elif isinstance(i, Ani): 347 | results.append( 348 | InlineQueryResultAnimation(i.path, thumb_url=i.thumb_url, **k) 349 | ) 350 | return await iq.answer(results, cache_time=0) 351 | 352 | def delete(self): 353 | """删除文件""" 354 | if not self.download_result: 355 | return 356 | self.download_result.delete() 357 | 358 | def button( 359 | self, 360 | hide_summary: bool = False, 361 | show_summary_result: bool = False, 362 | summarizing: bool = False, 363 | ) -> Ikm | None: 364 | """ 365 | 按钮 366 | :param hide_summary: 隐藏 AI 总结按钮 367 | :param show_summary_result: 显示 AI 总结结果 368 | :param summarizing: 总结中 369 | :return: 370 | """ 371 | if not self.result.raw_url: 372 | return 373 | button = [] 374 | 375 | raw_url_btn = Ikb("原链接", url=self.result.raw_url) 376 | 377 | if show_summary_result: 378 | ai_summary_btn = Ikb("AI总结✅", callback_data=f"unsummary_{self.hash_url}") 379 | else: 380 | ai_summary_btn = Ikb("AI总结❎", callback_data=f"summary_{self.hash_url}") 381 | 382 | button.append(raw_url_btn) 383 | if bot_cfg.ai_summary and not hide_summary: 384 | if summarizing: 385 | ai_summary_btn = Ikb( 386 | "AI总结中❇️", callback_data=f"summarizing_{self.hash_url}" 387 | ) 388 | button.append(ai_summary_btn) 389 | 390 | return Ikm([button]) 391 | 392 | @property 393 | def hash_url(self): 394 | """网址哈希值""" 395 | return encrypt(self.result.raw_url) 396 | 397 | async def ai_summary(self, cq: CallbackQuery) -> "ParseResultOperate": 398 | """获取 AI 总结""" 399 | 400 | if not (r := self.ai_summary_result): 401 | await cq.edit_message_text( 402 | self.content_and_no_url, 403 | reply_markup=self.button(summarizing=True), 404 | ) 405 | if not self.download_result: 406 | self.download_result = await self.result.download() 407 | try: 408 | r = await self.download_result.summary() 409 | except Exception as e: 410 | await cq.edit_message_text( 411 | self.content_and_no_url, 412 | reply_markup=self.button(), 413 | ) 414 | raise e 415 | self.ai_summary_result = r 416 | 417 | await cq.edit_message_text( 418 | self.f_text(r.content), reply_markup=self.button(show_summary_result=True) 419 | ) 420 | 421 | return self 422 | 423 | async def un_ai_summary(self, cq: CallbackQuery): 424 | """取消 AI 总结""" 425 | 426 | await cq.edit_message_text(self.content_and_no_url, reply_markup=self.button()) 427 | 428 | @property 429 | def content_and_no_url(self) -> str: 430 | return ( 431 | f"[{self.result.title or '无标题'}]({self.telegraph_url})" 432 | if self.telegraph_url 433 | else self.f_text(f"**{self.result.title}**\n\n{self.result.desc}") 434 | ).strip() 435 | 436 | @property 437 | def content_and_url(self) -> str: 438 | text = self.content_and_no_url 439 | return self.f_text( 440 | f"{text}\n\n> 原文链接: [LINK]({self.result.raw_url})" 441 | if self.result.raw_url 442 | else text 443 | ).strip() 444 | 445 | @staticmethod 446 | def f_text(text: str) -> str: 447 | """格式化输出内容, 限制长度, 添加折叠块样式""" 448 | text = text.strip() 449 | if text[1020:]: 450 | text = text[:1000] + "..." 451 | return f"
{text}" 452 | elif text[500:] or len(text.splitlines()) > 10: 453 | # 超过 500 字或超过 10 行, 则添加折叠块样式 454 | return f"
{text}" 455 | else: 456 | return text 457 | 458 | 459 | class VideoParseResultOperate(ParseResultOperate): 460 | """视频解析结果操作""" 461 | 462 | async def chat_upload(self, msg: Message) -> Message: 463 | with tempfile.NamedTemporaryFile(delete=False) as temp_file: 464 | if self.result.media.thumb_url: 465 | async with httpx.AsyncClient() as client: 466 | thumb = await client.get(self.result.media.thumb_url) 467 | temp_file.write(thumb.content) 468 | temp = temp_file.name 469 | else: 470 | temp = None 471 | 472 | await msg.reply_chat_action(enums.ChatAction.UPLOAD_VIDEO) 473 | return await msg.reply_video( 474 | self.download_result.media.path, 475 | caption=self.content_and_no_url, 476 | thumb=temp, 477 | quote=True, 478 | reply_markup=self.button(), 479 | ) 480 | 481 | 482 | class ImageParseResultOperate(ParseResultOperate): 483 | """图片解析结果操作""" 484 | 485 | async def _send_ph(self, html_content: str, msg: Message): 486 | page = await Telegraph().create_page( 487 | self.result.title or "无标题", html_content=html_content 488 | ) 489 | self.telegraph_url = page.url 490 | return await msg.reply_text( 491 | self.content_and_no_url, 492 | quote=True, 493 | reply_markup=self.button(), 494 | ) 495 | 496 | async def chat_upload( 497 | self, msg: Message 498 | ) -> Message | list[Message] | list[list[Message]]: 499 | await msg.reply_chat_action(enums.ChatAction.UPLOAD_PHOTO) 500 | 501 | if isinstance(self.result, WXImageParseResult): 502 | return await self._send_ph( 503 | clean_article_html( 504 | markdown( 505 | self.result.wx.markdown_content.replace( 506 | "mmbiz.qpic.cn", "mmbiz.qpic.cn.in" 507 | ) 508 | ) 509 | ), 510 | msg, 511 | ) 512 | elif isinstance(self.result, CoolapkImageParseResult) and ( 513 | markdown_content := self.result.coolapk.markdown_content 514 | ): 515 | return await self._send_ph( 516 | clean_article_html( 517 | markdown( 518 | markdown_content.replace( 519 | "image.coolapk.com", "qpic.cn.in/image.coolapk.com" 520 | ) 521 | ) 522 | ), 523 | msg, 524 | ) 525 | 526 | count = len(self.download_result.media) 527 | text = self.content_and_no_url 528 | if count == 0: 529 | return await msg.reply_text( 530 | text, 531 | quote=True, 532 | disable_web_page_preview=True, 533 | reply_markup=self.button(), 534 | ) 535 | elif count == 1: 536 | return await msg.reply_photo( 537 | self.download_result.media[0].path, 538 | quote=True, 539 | caption=text, 540 | reply_markup=self.button(), 541 | ) 542 | elif count <= 9: 543 | text = self.content_and_no_url 544 | m = await msg.reply_media_group( 545 | [InputMediaPhoto(v.path) for v in self.download_result.media] 546 | ) 547 | await m[0].reply_text( 548 | text, 549 | disable_web_page_preview=True, 550 | reply_markup=self.button(), 551 | quote=True, 552 | ) 553 | return [m] 554 | else: 555 | tasks = [ImgHost().litterbox(i.path) for i in self.download_result.media] 556 | results = await asyncio.gather(*tasks, return_exceptions=True) 557 | results = [ 558 | f'
start/end postion 89 | for x in pre_content_re.finditer(html_string): 90 | start, end = x.start(), x.end() 91 | pre_ranges.extend((start, end)) 92 | pre_ranges.append(len(html_string)) 93 | 94 | # all odd elements are, leave them untouched 95 | for k in range(1, len(pre_ranges)): 96 | part = html_string[pre_ranges[k - 1] : pre_ranges[k]] 97 | if k % 2 == 0: 98 | out += line_breaks_inside_pre.sub("\n", part) 99 | else: 100 | out += line_breaks_and_empty_strings.sub(replace_by, part) 101 | return out 102 | -------------------------------------------------------------------------------- /utiles/filters.py: -------------------------------------------------------------------------------- 1 | from pyrogram import filters 2 | from pyrogram.types import Message, InlineQuery 3 | 4 | from parsehub import ParseHub 5 | 6 | 7 | async def _platform_filter(_, __, update: Message | InlineQuery): 8 | if isinstance(update, Message): 9 | t = update.caption or update.text 10 | else: 11 | t = update.query 12 | return bool(ParseHub()._select_parser(t)) 13 | 14 | 15 | platform_filter = filters.create(_platform_filter) 16 | 17 | 18 | def filter_inline_query_results(command: str): 19 | """ 20 | 过滤指定字符开头的内联查询结果 21 | 22 | :param command: 23 | :return: 24 | """ 25 | 26 | async def func(_, __, update): 27 | return update.query.startswith(command) 28 | 29 | return filters.create(func, commands=command) 30 | -------------------------------------------------------------------------------- /utiles/img_host.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | from httpx._types import ProxiesTypes 3 | from tenacity import retry, stop_after_attempt 4 | 5 | 6 | class ImgHost: 7 | def __init__(self, proxies: ProxiesTypes = None): 8 | self.async_client = httpx.AsyncClient(proxies=proxies) 9 | 10 | @retry(stop=stop_after_attempt(5)) 11 | async def litterbox(self, filename: str): 12 | host_url = "https://litterbox.catbox.moe/resources/internals/api.php" 13 | 14 | file = open(filename, "rb") 15 | try: 16 | data = { 17 | "reqtype": "fileupload", 18 | "time": "1h", 19 | } 20 | response = await self.async_client.post( 21 | host_url, data=data, files={"fileToUpload": file} 22 | ) 23 | finally: 24 | file.close() 25 | 26 | return response.text 27 | 28 | def __aexit__(self, exc_type, exc_val, exc_tb): 29 | self.async_client.aclose() 30 | -------------------------------------------------------------------------------- /utiles/ph.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from telegraph.aio import Telegraph as TelegraphAPI 4 | 5 | 6 | class Telegraph: 7 | """Telegraph API 封装""" 8 | 9 | def __init__(self, token: str = None, domain: str = "telegra.ph"): 10 | self.token = token 11 | self.domain = domain 12 | self.telegraph = TelegraphAPI(access_token=token, domain=domain) 13 | 14 | async def create_account( 15 | self, short_name: str, author_name: str = None, author_url: str = None 16 | ) -> "TelegraphAccount": 17 | """创建 Telegraph 账户""" 18 | account = await self.telegraph.create_account( 19 | short_name, author_name, author_url 20 | ) 21 | acc_info = await self.get_account_info(account) 22 | self.token = acc_info.access_token 23 | return acc_info 24 | 25 | async def get_account_info(self, account_info: dict = None) -> "TelegraphAccount": 26 | """获取 Telegraph 账户信息""" 27 | account_info = account_info or await self.telegraph.get_account_info( 28 | [ 29 | "short_name", 30 | "author_name", 31 | "author_url", 32 | "auth_url", 33 | ] 34 | ) 35 | return TelegraphAccount( 36 | self.telegraph.get_access_token(), 37 | account_info["short_name"], 38 | account_info["author_name"], 39 | account_info["author_url"], 40 | account_info["auth_url"], 41 | ) 42 | 43 | async def create_page( 44 | self, 45 | title, 46 | content=None, 47 | html_content=None, 48 | author_name=None, 49 | author_url=None, 50 | return_content=False, 51 | auto_create_account=True, 52 | ) -> "TelegraphPage": 53 | """创建 Telegraph 页面""" 54 | if auto_create_account and not self.token: 55 | # 随机用户名 56 | short_name = "tg_" + str( 57 | int(100000 * (1 + 0.5 * (1 + 0.5 * (1 + 0.5 * 1)))) 58 | ) 59 | await self.create_account(short_name) 60 | response = await self.telegraph.create_page( 61 | title, 62 | content, 63 | html_content, 64 | author_name, 65 | author_url, 66 | return_content, 67 | ) 68 | return TelegraphPage( 69 | response["path"], 70 | response["url"], 71 | response["title"], 72 | response["description"], 73 | response["views"], 74 | response["can_edit"], 75 | await self.get_account_info(), 76 | ) 77 | 78 | 79 | @dataclass 80 | class TelegraphAccount: 81 | access_token: str 82 | short_name: str 83 | author_name: str 84 | author_url: str 85 | auth_url: str 86 | 87 | 88 | @dataclass 89 | class TelegraphPage: 90 | path: str 91 | url: str 92 | title: str 93 | description: str 94 | views: int 95 | can_edit: bool 96 | account: TelegraphAccount 97 | -------------------------------------------------------------------------------- /utiles/utile.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import hashlib 3 | 4 | from pyrogram import Client 5 | 6 | 7 | async def schedule_delete_messages( 8 | client: Client, chat_id: int, message_ids: int | list, delay_seconds: int = 2 9 | ): 10 | """定时删除消息""" 11 | 12 | await asyncio.sleep(delay_seconds) 13 | 14 | try: 15 | await client.delete_messages(chat_id, message_ids) 16 | except Exception: 17 | ... 18 | 19 | 20 | def progress(current, total, status): 21 | if total == 0: 22 | return status 23 | 24 | text = None 25 | if total >= 100: 26 | if round(current * 100 / total, 1) % 25 == 0: 27 | text = f"下 载 中...|{status}" 28 | else: 29 | if (current + 1) % 3 == 0 or (current + 1) == total: 30 | text = f"下 载 中...|{status}" 31 | return text 32 | 33 | 34 | def encrypt(text: str): 35 | """hash加密""" 36 | md5 = hashlib.md5() 37 | md5.update(text.encode("utf-8")) 38 | return md5.hexdigest() 39 | --------------------------------------------------------------------------------