├── .env.test ├── .github ├── actions │ └── setup-pytest │ │ └── action.yml └── workflows │ ├── pypi-publish.yml │ └── pytest.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── nonebot_plugin_resolver2 ├── __init__.py ├── config.py ├── constant.py ├── cookie.py ├── download │ ├── __init__.py │ ├── utils.py │ └── ytdlp.py ├── exception.py ├── matchers │ ├── __init__.py │ ├── acfun.py │ ├── bilibili.py │ ├── douyin.py │ ├── filter.py │ ├── helper.py │ ├── kuaishou.py │ ├── kugou.py │ ├── ncm.py │ ├── preprocess.py │ ├── tiktok.py │ ├── twitter.py │ ├── weibo.py │ ├── xiaohongshu.py │ └── ytb.py └── parsers │ ├── __init__.py │ ├── acfun.py │ ├── bilibili.py │ ├── data.py │ ├── douyin.py │ ├── kuaishou.py │ ├── kugou.py │ ├── ncm.py │ ├── utils.py │ ├── weibo.py │ └── xiaohongshu.py ├── pyproject.toml ├── test_url.md ├── tests ├── conftest.py ├── test_acfun.py ├── test_bilibili.py ├── test_bilibili_need_ck.py ├── test_douyin.py ├── test_download.py ├── test_kuaishou.py ├── test_load.py ├── test_ncm.py ├── test_weibo.py ├── test_x.py ├── test_xhs.py └── utils.py └── uv.lock /.env.test: -------------------------------------------------------------------------------- 1 | PORT=8888 2 | LOG_LEVEL=DEBUG 3 | FASTAPI_RELOAD=false 4 | 5 | NICKNAME='["BOT"]' 6 | # 使用当前工作目录作为数据存储目录,以下数据目录配置项默认值将会对应变更 7 | LOCALSTORE_USE_CWD=true 8 | 9 | #r_bili_ck="SESSDATA=jdkcnsjkxbdj" 10 | r_xhs_ck="a=a;b=b;c=c" 11 | r_ytb_ck="a=a;b=b;c=c" 12 | r_is_oversea=False 13 | r_proxy="http://127.0.0.1:7890" 14 | r_video_duration_maximum=600 15 | r_disable_resolvers='["bilibili", "douyin", "kugou", "twitter", "ncm", "ytb", "acfun", "tiktok", "weibo", "xiaohongshu", "kuaishou"]' 16 | 17 | -------------------------------------------------------------------------------- /.github/actions/setup-pytest/action.yml: -------------------------------------------------------------------------------- 1 | name: setup-pytest 2 | description: setup-pytest 3 | 4 | inputs: 5 | python-version: 6 | description: python version 7 | required: false 8 | default: "3.12" 9 | 10 | runs: 11 | using: composite 12 | steps: 13 | - name: Install uv and set the python version 14 | uses: astral-sh/setup-uv@v6 15 | with: 16 | python-version: ${{ inputs.python-version }} 17 | 18 | - name: Install Dependences 19 | run: uv sync --group test 20 | shell: bash 21 | -------------------------------------------------------------------------------- /.github/workflows/pypi-publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | workflow_dispatch: 8 | 9 | jobs: 10 | pypi-publish: 11 | name: Upload release to PyPI 12 | runs-on: ubuntu-latest 13 | steps: 14 | 15 | - name: Checkout repository 16 | uses: actions/checkout@v4 17 | 18 | - name: Install uv and set the python version 19 | uses: astral-sh/setup-uv@v6 20 | with: 21 | python-version: "3.12" 22 | 23 | - name: Build a binary wheel and a source tarball 24 | run: >- 25 | uv build 26 | 27 | - name: Publish distribution to PyPI 28 | uses: pypa/gh-action-pypi-publish@release/v1 29 | with: 30 | password: ${{ secrets.PYPI_API_TOKEN }} 31 | -------------------------------------------------------------------------------- /.github/workflows/pytest.yml: -------------------------------------------------------------------------------- 1 | name: pytest 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'master' 7 | paths-ignore: 8 | - 'README.md' 9 | pull_request: 10 | 11 | jobs: 12 | plugin-load: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: 17 | - "3.10" 18 | - "3.11" 19 | - "3.12" 20 | - "3.13" 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: setup-pytest 25 | uses: ./.github/actions/setup-pytest 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: pytest 29 | run: uv run pytest tests/test_load.py 30 | 31 | 32 | bilibili-test: 33 | needs: plugin-load 34 | runs-on: ubuntu-latest 35 | steps: 36 | - uses: actions/checkout@v4 37 | 38 | - name: setup-pytest 39 | uses: ./.github/actions/setup-pytest 40 | 41 | - name: pytest 42 | run: uv run pytest tests/test_bilibili.py 43 | 44 | 45 | weibo-test: 46 | needs: plugin-load 47 | runs-on: ubuntu-latest 48 | steps: 49 | - uses: actions/checkout@v4 50 | - name: setup-pytest 51 | uses: ./.github/actions/setup-pytest 52 | 53 | - name: pytest 54 | run: uv run pytest tests/test_weibo.py 55 | 56 | 57 | twitter-test: 58 | needs: plugin-load 59 | runs-on: ubuntu-latest 60 | steps: 61 | - uses: actions/checkout@v4 62 | - name: setup-pytest 63 | uses: ./.github/actions/setup-pytest 64 | 65 | - name: pytest 66 | run: uv run pytest tests/test_x.py 67 | 68 | 69 | 70 | acfun-test: 71 | needs: plugin-load 72 | runs-on: ubuntu-latest 73 | steps: 74 | - uses: actions/checkout@v4 75 | - name: setup-pytest 76 | uses: ./.github/actions/setup-pytest 77 | 78 | - name: pytest 79 | run: uv run pytest tests/test_acfun.py 80 | 81 | ncm-test: 82 | needs: plugin-load 83 | runs-on: ubuntu-latest 84 | steps: 85 | - uses: actions/checkout@v4 86 | - name: setup-pytest 87 | uses: ./.github/actions/setup-pytest 88 | 89 | - name: pytest 90 | run: uv run pytest tests/test_ncm.py 91 | 92 | douyin-test: 93 | needs: plugin-load 94 | runs-on: ubuntu-latest 95 | steps: 96 | - uses: actions/checkout@v4 97 | - name: setup-pytest 98 | uses: ./.github/actions/setup-pytest 99 | 100 | - name: pytest 101 | run: uv run pytest tests/test_douyin.py 102 | 103 | xhs-test: 104 | needs: plugin-load 105 | runs-on: ubuntu-latest 106 | steps: 107 | - uses: actions/checkout@v4 108 | - name: setup-pytest 109 | uses: ./.github/actions/setup-pytest 110 | 111 | - name: pytest 112 | run: uv run pytest tests/test_xhs.py 113 | 114 | kuaishou-test: 115 | needs: plugin-load 116 | runs-on: ubuntu-latest 117 | steps: 118 | - uses: actions/checkout@v4 119 | - name: setup-pytest 120 | uses: ./.github/actions/setup-pytest 121 | 122 | - name: pytest 123 | run: uv run pytest tests/test_kuaishou.py 124 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | 86 | 87 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 88 | __pypackages__/ 89 | 90 | # Celery stuff 91 | celerybeat-schedule 92 | celerybeat.pid 93 | 94 | # SageMath parsed files 95 | *.sage.py 96 | 97 | # Environments 98 | .env 99 | .venv 100 | env/ 101 | venv/ 102 | ENV/ 103 | env.bak/ 104 | venv.bak/ 105 | 106 | # Spyder project settings 107 | .spyderproject 108 | .spyproject 109 | 110 | # Rope project settings 111 | .ropeproject 112 | 113 | # mkdocs documentation 114 | /site 115 | 116 | # mypy 117 | .mypy_cache/ 118 | .dmypy.json 119 | dmypy.json 120 | 121 | # Pyre type checker 122 | .pyre/ 123 | 124 | # pytype static type analyzer 125 | .pytype/ 126 | 127 | # Cython debug symbols 128 | cython_debug/ 129 | 130 | # ruff 131 | .ruff_cache/ 132 | 133 | # LSP config files 134 | pyrightconfig.json 135 | 136 | # uv 137 | uv.lock 138 | 139 | 140 | # PyCharm 141 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 142 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 143 | # and can be added to the global gitignore or merged into this file. For a more nuclear 144 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 145 | #.idea/ 146 | 147 | # VisualStudioCode 148 | .vscode/* 149 | !.vscode/settings.json 150 | !.vscode/tasks.json 151 | !.vscode/launch.json 152 | !.vscode/extensions.json 153 | !.vscode/*.code-snippets 154 | 155 | cache 156 | data 157 | config 158 | 159 | .DS_Store -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_install_hook_types: [pre-commit, prepare-commit-msg] 2 | ci: 3 | autofix_commit_msg: ":rotating_light: auto fix by pre-commit hooks" 4 | autofix_prs: true 5 | autoupdate_branch: master 6 | autoupdate_schedule: monthly 7 | autoupdate_commit_msg: ":arrow_up: auto update by pre-commit hooks" 8 | repos: 9 | - repo: https://github.com/astral-sh/ruff-pre-commit 10 | rev: v0.11.12 11 | hooks: 12 | - id: ruff 13 | args: [--fix] 14 | stages: [pre-commit] 15 | - id: ruff-format 16 | stages: [pre-commit] 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Les Freire 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | logo 4 | 5 | 6 | ## ✨ [Nonebot2](https://github.com/nonebot/nonebot2) 链接分享自动解析插件 ✨ 7 | 8 | 9 | license 10 | 11 | 12 | pypi 13 | 14 | python 15 | 16 | ruff 17 | 18 | 19 | uv 20 | 21 | 22 | onebot 23 | 24 | 25 | pre-commit 26 | 27 | 28 |
29 | 30 | > [!IMPORTANT] 31 | > **收藏项目**,你将从 GitHub 上无延迟地接收所有发布通知~⭐️ 32 | 33 | starify 34 | 35 | ## 📖 介绍 36 | 37 | [nonebot-plugin-resolver](https://github.com/zhiyu1998/nonebot-plugin-resolver) 重制版 38 | 39 | | 平台 | 触发的消息形态 | 视频 | 图集 | 音频 | 40 | | ------- | ------------------------------------- | ---- | ---- | ---- | 41 | | B站 | BV号/链接(包含短链,BV,av)/卡片/小程序 | ✅​ | ✅​ | ✅​ | 42 | | 抖音 | 链接(分享链接,兼容电脑端链接) | ✅​ | ✅​ | ❌️ | 43 | | 网易云 | 链接/卡片 | ❌️ | ❌️ | ✅​ | 44 | | 微博 | 链接(博文,视频,show) | ✅​ | ✅​ | ❌️ | 45 | | 小红书 | 链接(含短链)/卡片 | ✅​ | ✅​ | ❌️ | 46 | | 酷狗 | 链接/卡片 | ❌️ | ❌️ | ✅​ | 47 | | 快手 | 链接(包含标准链接和短链) | ✅​ | ✅​ | ❌️ | 48 | | acfun | 链接 | ✅​ | ❌️ | ❌️ | 49 | | youtube | 链接(含短链) | ✅​ | ❌️ | ✅​ | 50 | | tiktok | 链接 | ✅​ | ❌️ | ❌️ | 51 | | twitter | 链接 | ✅​ | ✅​ | ❌️ | 52 | 53 | 支持的链接,可参考 [测试链接](https://github.com/fllesser/nonebot-plugin-resolver2/blob/master/test_url.md) 54 | 55 | ## 💿 安装 56 | > [!Warning] 57 | > **如果你已经在使用 nonebot-plugin-resolver,请在安装此插件前卸载** 58 | 59 |
60 | 使用 nb-cli 安装/更新 61 | 在 nonebot2 项目的根目录下打开命令行, 输入以下指令即可安装 62 | 63 | nb plugin install nonebot-plugin-resolver2 --upgrade 64 | 使用 pypi 源更新 65 | 66 | nb plugin install nonebot-plugin-resolver2 --upgrade -i https://pypi.org/simple 67 | 安装仓库 dev 分支 68 | 69 | uv pip install git+https://github.com/fllesser/nonebot-plugin-resolver2.git@dev 70 |
71 | 72 |
73 | 使用包管理器安装 74 | 在 nonebot2 项目的插件目录下, 打开命令行, 根据你使用的包管理器, 输入相应的安装命令 75 |
76 | uv 77 | 使用 uv 安装 78 | 79 | uv add nonebot-plugin-resolver2 80 | 安装仓库 dev 分支 81 | 82 | uv add git+https://github.com/fllesser/nonebot-plugin-resolver2.git@master 83 |
84 | 85 | 86 |
87 | pip 88 | 89 | pip install --upgrade nonebot-plugin-resolver2 90 |
91 |
92 | pdm 93 | 94 | pdm add nonebot-plugin-resolver2 95 |
96 |
97 | poetry 98 | 99 | poetry add nonebot-plugin-resolver2 100 |
101 | 102 | 打开 nonebot2 项目根目录下的 `pyproject.toml` 文件, 在 `[tool.nonebot]` 部分追加写入 103 | 104 | plugins = ["nonebot_plugin_resolver2"] 105 | 106 |
107 | 108 |
109 | 安装必要组件 110 | 大部分解析都依赖于 ffmpeg 111 | 112 | ubuntu/debian 113 | 114 | sudo apt-get install ffmpeg 115 | 116 | 其他 linux 参考(原项目推荐): https://gitee.com/baihu433/ffmpeg 117 | 118 | Windows 参考(原项目推荐): https://www.jianshu.com/p/5015a477de3c 119 |
120 | 121 | ## ⚙️ 配置 122 | 123 | 在 nonebot2 项目的`.env`文件中添加下表中的必填配置 124 | 125 | | 配置项 | 必填 | 默认值 | 说明 | 126 | | :----------------------: | :---: | :----: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | 127 | | NICKNAME | 否 | [""] | nonebot2 内置配置,可作为解析结果消息的前缀 | 128 | | API_TIMEOUT | 否 | 30.0 | nonebot2 内置配置,若服务器上传带宽太低,建议调高,防止超时 | 129 | | r_xhs_ck | 否 | "" | 小红书 cookie,想要解析小红书必填 | 130 | | r_bili_ck | 否 | "" | B站 cookie, 可不填,若填写,必须含有 SESSDATA 项,可附加 B 站 AI 总结功能,如果需要长期使用此凭据则不应该在**浏览器登录账户**导致 Cookies 被刷新,建议注册个小号获取 cookie | 131 | | r_ytb_ck | 否 | "" | Youtube cookie, Youtube 视频因人机检测下载失败,需填 | 132 | | r_proxy | 否 | None | youtube, tiktok, x 的代理 | 133 | | r_need_upload | 否 | False | 音频解析,是否需要上传群文件 | 134 | | r_need_forward | 否 | True | **不超过** 4 条的解析消息是否需要合并转发,配置为 False ,超过4条的解析消息仍然会用合并转发包裹 | 135 | | r_use_base64 | 否 | False | 视频,图片,音频是否使用 base64 发送,注意:编解码和传输 base64 会占用更多的内存,性能和带宽, 甚至可能会使 websocket 连接崩溃,因此该配置项仅推荐 nonebot 和 协议端不在同一机器,或者使用 docker 懒得映射插件缓存目录的用户配置 | 136 | | r_video_duration_maximum | 否 | 480 | B站视频最大解析时长,单位:_秒_ | 137 | | r_max_size | 否 | 100 | 音视频下载最大文件大小,单位 MB,超过该配置将阻断下载 | 138 | | r_disable_resolvers | 否 | [] | 全局禁止的解析,示例 r_disable_resolvers=["bilibili", "douyin"] 表示禁止了哔哩哔哩和抖, 请根据自己需求填写["bilibili", "douyin", "kugou", "kuaishou", "twitter", "ncm", "ytb", "acfun", "tiktok", "weibo", "xiaohongshu"] | 139 | 140 | 141 | ## 🎉 使用 142 | ### 指令表 143 | | 指令 | 权限 | 需要@ | 范围 | 说明 | 144 | | :----------: | :-------------------: | :---: | :---: | :--------------------: | 145 | | 开启解析 | SUPERUSER/OWNER/ADMIN | 是 | 群聊 | 开启解析 | 146 | | 关闭解析 | SUPERUSER/OWNER/ADMIN | 是 | 群聊 | 关闭解析 | 147 | | 开启所有解析 | SUPERUSER | 否 | 私聊 | 开启所有群的解析 | 148 | | 关闭所有解析 | SUPERUSER | 否 | 私聊 | 关闭所有群的解析 | 149 | | 查看关闭解析 | SUPERUSER | 否 | - | 获取已经关闭解析的群聊 | 150 | | bm BV... | USER | 否 | - | 下载 b站 音乐 | 151 | 152 | ## 致谢 153 | [nonebot-plugin-resolver](https://github.com/zhiyu1998/nonebot-plugin-resolver) 154 | [parse-video-py](https://github.com/wujunwei928/parse-video-py) -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/__init__.py: -------------------------------------------------------------------------------- 1 | from nonebot import get_driver, logger 2 | from nonebot.plugin import PluginMetadata 3 | 4 | from .config import ( 5 | Config, 6 | plugin_cache_dir, 7 | rconfig, 8 | scheduler, 9 | ytb_cookies_file, 10 | ) 11 | from .cookie import save_cookies_to_netscape 12 | from .matchers import resolvers 13 | 14 | __plugin_meta__ = PluginMetadata( 15 | name="链接分享自动解析", 16 | description="BV号/链接/小程序/卡片 | B站/抖音/网易云/微博/小红书/youtube/tiktok/twitter/acfun", 17 | usage="发送支持平台的(BV号/链接/小程序/卡片)即可", 18 | type="application", 19 | homepage="https://github.com/fllesser/nonebot-plugin-resolver2", 20 | config=Config, 21 | supported_adapters={"~onebot.v11"}, 22 | extra={ 23 | "author": "fllesser", 24 | "email": "fllessive@gmail.com", 25 | "homepage": "https://github.com/fllesser/nonebot-plugin-resolver2", 26 | }, 27 | ) 28 | 29 | 30 | @get_driver().on_startup 31 | async def _(): 32 | if rconfig.r_ytb_ck: 33 | save_cookies_to_netscape(rconfig.r_ytb_ck, ytb_cookies_file, "youtube.com") 34 | logger.debug(f"保存 youtube cookie 到 {ytb_cookies_file}") 35 | 36 | destroy_resolvers: list[str] = [] 37 | if not rconfig.r_xhs_ck: 38 | if xiaohongshu := resolvers.pop("xiaohongshu", None): 39 | xiaohongshu.destroy() 40 | logger.warning("未配置小红书 cookie, 小红书解析已关闭") 41 | destroy_resolvers.append("xiaohongshu") 42 | 43 | # 关闭全局禁用的解析 44 | for resolver in rconfig.r_disable_resolvers: 45 | if matcher := resolvers.get(resolver, None): 46 | matcher.destroy() 47 | destroy_resolvers.append(resolver) 48 | if destroy_resolvers: 49 | logger.warning(f"已关闭解析: {', '.join(destroy_resolvers)}") 50 | 51 | 52 | @scheduler.scheduled_job("cron", hour=1, minute=0, id="resolver2-clean-local-cache") 53 | async def clean_plugin_cache(): 54 | import asyncio 55 | 56 | from .download.utils import safe_unlink 57 | 58 | try: 59 | files = [f for f in plugin_cache_dir.iterdir() if f.is_file()] 60 | if not files: 61 | logger.info("no cache files to clean") 62 | return 63 | 64 | # 并发删除文件 65 | tasks = [safe_unlink(file) for file in files] 66 | await asyncio.gather(*tasks) 67 | 68 | logger.info(f"Successfully cleaned {len(files)} cache files") 69 | except Exception as e: 70 | logger.error(f"Error while cleaning cache: {e}") 71 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Literal 3 | 4 | from nonebot import get_driver, get_plugin_config, require 5 | from pydantic import BaseModel 6 | 7 | require("nonebot_plugin_localstore") 8 | require("nonebot_plugin_apscheduler") 9 | from nonebot_plugin_apscheduler import scheduler # noqa: F401 10 | import nonebot_plugin_localstore as store 11 | 12 | MatcherNames = Literal[ 13 | "bilibili", 14 | "acfun", 15 | "douyin", 16 | "ytb", 17 | "kugou", 18 | "kuaishou", 19 | "ncm", 20 | "twitter", 21 | "tiktok", 22 | "weibo", 23 | "xiaohongshu", 24 | ] 25 | 26 | 27 | class Config(BaseModel): 28 | # 小红书 cookies 29 | r_xhs_ck: str | None = None 30 | # bilibili cookies 31 | r_bili_ck: str | None = None 32 | # youtube cookies 33 | r_ytb_ck: str | None = None 34 | # 代理 35 | r_proxy: str | None = None 36 | # 是否需要上传音频文件 37 | r_need_upload: bool = False 38 | # 4 条以内消息,是否需要合并转发 39 | r_need_forward: bool = True 40 | # 是否使用 base64 编码发送图片,音频,视频 41 | r_use_base64: bool = False 42 | # 资源最大大小 默认 100 单位 MB 43 | r_max_size: int = 100 44 | # 视频最大时长 45 | r_video_duration_maximum: int = 480 46 | # 禁止的解析器 47 | r_disable_resolvers: list[MatcherNames] = [] 48 | 49 | 50 | plugin_cache_dir: Path = store.get_plugin_cache_dir() 51 | plugin_config_dir: Path = store.get_plugin_config_dir() 52 | plugin_data_dir: Path = store.get_plugin_data_dir() 53 | 54 | # 配置加载 55 | rconfig: Config = get_plugin_config(Config) 56 | 57 | # cookie 存储位置 58 | ytb_cookies_file: Path = plugin_config_dir / "ytb_cookies.txt" 59 | 60 | # 全局名称 61 | NICKNAME: str = next(iter(get_driver().config.nickname), "") 62 | # 根据是否为国外机器声明代理 63 | PROXY: str | None = rconfig.r_proxy 64 | # 哔哩哔哩限制的最大视频时长(默认8分钟)单位:秒 65 | DURATION_MAXIMUM: int = rconfig.r_video_duration_maximum 66 | # 资源最大大小 67 | MAX_SIZE: int = rconfig.r_max_size 68 | # 是否需要上传音频文件 69 | NEED_UPLOAD: bool = rconfig.r_need_upload 70 | # 是否需要合并转发 71 | NEED_FORWARD: bool = rconfig.r_need_forward 72 | # 是否使用 base64 编码发送图片,音频,视频 73 | USE_BASE64: bool = rconfig.r_use_base64 74 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/constant.py: -------------------------------------------------------------------------------- 1 | from typing import Final 2 | 3 | COMMON_HEADER: Final[dict[str, str]] = { 4 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " 5 | "Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36" 6 | } 7 | 8 | IOS_HEADER: Final[dict[str, str]] = { 9 | "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) " 10 | "Version/16.6 Mobile/15E148 Safari/604.1 Edg/132.0.0.0" 11 | } 12 | 13 | ANDROID_HEADER: Final[dict[str, str]] = { 14 | "User-Agent": "Mozilla/5.0 (Linux; Android 15; SM-G998B) AppleWebKit/537.36 (KHTML, like Gecko) " 15 | "Chrome/132.0.0.0 Mobile Safari/537.36 Edg/132.0.0.0" 16 | } 17 | 18 | 19 | # 视频最大大小(MB) 20 | VIDEO_MAX_MB: Final[int] = 100 21 | 22 | # 解析列表文件名 23 | DISABLED_GROUPS: Final[str] = "disable_group_list.json" 24 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/cookie.py: -------------------------------------------------------------------------------- 1 | from http import cookiejar 2 | from pathlib import Path 3 | 4 | 5 | def save_cookies_to_netscape(cookies_str: str, file_path: Path, domain: str): 6 | """以 netscape 格式保存 cookies 7 | 8 | Args: 9 | cookies_str: cookies 字符串 10 | file_path: 保存的文件路径 11 | domain: 域名 12 | """ 13 | # 创建 MozillaCookieJar 对象 14 | cj = cookiejar.MozillaCookieJar(file_path) 15 | 16 | # 从字符串创建 cookies 并添加到 MozillaCookieJar 对象 17 | for cookie in cookies_str.split(";"): 18 | name, value = cookie.strip().split("=", 1) 19 | cj.set_cookie( 20 | cookiejar.Cookie( 21 | version=0, 22 | name=name, 23 | value=value, 24 | port=None, 25 | port_specified=False, 26 | domain="." + domain, 27 | domain_specified=True, 28 | domain_initial_dot=False, 29 | path="/", 30 | path_specified=True, 31 | secure=True, 32 | expires=0, 33 | discard=True, 34 | comment=None, 35 | comment_url=None, 36 | rest={"HttpOnly": ""}, 37 | rfc2109=False, 38 | ) 39 | ) 40 | 41 | # 保存 cookies 到文件 42 | cj.save(ignore_discard=True, ignore_expires=True) 43 | 44 | 45 | def ck2dict(cookies_str: str) -> dict[str, str]: 46 | """将 cookies 字符串转换为字典 47 | 48 | Args: 49 | cookies_str: cookies 字符串 50 | 51 | Returns: 52 | dict[str, str]: 字典 53 | """ 54 | res = {} 55 | for cookie in cookies_str.split(";"): 56 | name, value = cookie.strip().split("=", 1) 57 | res[name] = value 58 | return res 59 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/download/__init__.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pathlib import Path 3 | 4 | import aiofiles 5 | import aiohttp 6 | from nonebot import logger 7 | from tqdm.asyncio import tqdm 8 | 9 | from ..config import MAX_SIZE, plugin_cache_dir 10 | from ..constant import COMMON_HEADER 11 | from ..exception import DownloadException 12 | from .utils import exec_ffmpeg_cmd, generate_file_name, safe_unlink 13 | 14 | # 全局 session 15 | _SESSION: aiohttp.ClientSession | None = None 16 | 17 | 18 | async def _get_session() -> aiohttp.ClientSession: 19 | """获取或创建全局 session""" 20 | global _SESSION 21 | if _SESSION is None or _SESSION.closed: 22 | _SESSION = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300, connect=10.0)) 23 | return _SESSION 24 | 25 | 26 | async def download_file_by_stream( 27 | url: str, 28 | *, 29 | file_name: str | None = None, 30 | proxy: str | None = None, 31 | ext_headers: dict[str, str] | None = None, 32 | ) -> Path: 33 | """download file by url with stream 34 | 35 | Args: 36 | url (str): url address 37 | file_name (str | None, optional): file name. Defaults to get name by parse_url_resource_name. 38 | proxy (str | None, optional): proxy url. Defaults to None. 39 | ext_headers (dict[str, str] | None, optional): ext headers. Defaults to None. 40 | 41 | Returns: 42 | Path: file path 43 | 44 | Raises: 45 | aiohttp.ClientError: When download fails 46 | asyncio.TimeoutError: When download times out 47 | """ 48 | if not file_name: 49 | file_name = generate_file_name(url) 50 | file_path = plugin_cache_dir / file_name 51 | 52 | # 如果文件存在,则直接返回 53 | if file_path.exists(): 54 | return file_path 55 | 56 | headers = {**COMMON_HEADER, **(ext_headers or {})} 57 | 58 | try: 59 | session = await _get_session() 60 | async with session.get(url, headers=headers, proxy=proxy) as resp: 61 | resp.raise_for_status() 62 | # 获取文件大小 63 | content_length = resp.headers.get("Content-Length") 64 | content_length = int(content_length) if content_length else None 65 | if content_length and (file_size := content_length / 1024 / 1024) > MAX_SIZE: 66 | logger.warning(f"预下载 {file_name} 大小 {file_size:.2f} MB 超过 {MAX_SIZE} MB 限制, 取消下载") 67 | raise DownloadException("音视频流大小超过配置限制,取消下载") 68 | with tqdm( 69 | total=content_length, # 为 None 时,无进度条 70 | unit="B", 71 | unit_scale=True, 72 | unit_divisor=1024, 73 | dynamic_ncols=True, 74 | colour="green", 75 | desc=file_name, 76 | ) as bar: 77 | async with aiofiles.open(file_path, "wb") as file: 78 | async for chunk in resp.content.iter_chunked(1024 * 1024): 79 | await file.write(chunk) 80 | bar.update(len(chunk)) 81 | except asyncio.TimeoutError: 82 | await safe_unlink(file_path) 83 | logger.error(f"url: {url}, file_path: {file_path} 下载超时") 84 | raise DownloadException("资源下载超时") 85 | except aiohttp.ClientError as e: 86 | await safe_unlink(file_path) 87 | logger.error(f"url: {url}, file_path: {file_path} 下载过程中出现异常{e}") 88 | raise 89 | 90 | return file_path 91 | 92 | 93 | async def download_video( 94 | url: str, 95 | *, 96 | video_name: str | None = None, 97 | proxy: str | None = None, 98 | ext_headers: dict[str, str] | None = None, 99 | ) -> Path: 100 | """download video file by url with stream 101 | 102 | Args: 103 | url (str): url address 104 | video_name (str | None, optional): video name. Defaults to get name by parse url. 105 | proxy (str | None, optional): proxy url. Defaults to None. 106 | ext_headers (dict[str, str] | None, optional): ext headers. Defaults to None. 107 | 108 | Returns: 109 | Path: video file path 110 | 111 | Raises: 112 | aiohttp.ClientError: When download fails 113 | asyncio.TimeoutError: When download times out 114 | """ 115 | if video_name is None: 116 | video_name = generate_file_name(url, ".mp4") 117 | return await download_file_by_stream(url, file_name=video_name, proxy=proxy, ext_headers=ext_headers) 118 | 119 | 120 | async def download_audio( 121 | url: str, 122 | *, 123 | audio_name: str | None = None, 124 | proxy: str | None = None, 125 | ext_headers: dict[str, str] | None = None, 126 | ) -> Path: 127 | """download audio file by url with stream 128 | 129 | Args: 130 | url (str): url address 131 | audio_name (str | None, optional): audio name. Defaults to get name by parse_url_resource_name. 132 | proxy (str | None, optional): proxy url. Defaults to None. 133 | ext_headers (dict[str, str] | None, optional): ext headers. Defaults to None. 134 | 135 | Returns: 136 | Path: audio file path 137 | 138 | Raises: 139 | aiohttp.ClientError: When download fails 140 | asyncio.TimeoutError: When download times out 141 | """ 142 | if audio_name is None: 143 | audio_name = generate_file_name(url, ".mp3") 144 | return await download_file_by_stream(url, file_name=audio_name, proxy=proxy, ext_headers=ext_headers) 145 | 146 | 147 | async def download_img( 148 | url: str, 149 | *, 150 | img_name: str | None = None, 151 | proxy: str | None = None, 152 | ext_headers: dict[str, str] | None = None, 153 | ) -> Path: 154 | """download image file by url with stream 155 | 156 | Args: 157 | url (str): url 158 | img_name (str, optional): image name. Defaults to None. 159 | proxy (str, optional): proxy url. Defaults to None. 160 | ext_headers (dict[str, str], optional): ext headers. Defaults to None. 161 | 162 | Returns: 163 | Path: image file path 164 | 165 | Raises: 166 | aiohttp.ClientError: When download fails 167 | asyncio.TimeoutError: When download times out 168 | """ 169 | if img_name is None: 170 | img_name = generate_file_name(url, ".jpg") 171 | return await download_file_by_stream(url, file_name=img_name, proxy=proxy, ext_headers=ext_headers) 172 | 173 | 174 | async def download_imgs_without_raise( 175 | urls: list[str], 176 | *, 177 | ext_headers: dict[str, str] | None = None, 178 | proxy: str | None = None, 179 | ) -> list[Path]: 180 | """download images without raise 181 | 182 | Args: 183 | urls (list[str]): urls 184 | ext_headers (dict[str, str] | None, optional): ext headers. Defaults to None. 185 | proxy (str | None, optional): proxy url. Defaults to None. 186 | 187 | Returns: 188 | list[Path]: image file paths 189 | """ 190 | paths_or_errs = await asyncio.gather( 191 | *[download_img(url, ext_headers=ext_headers, proxy=proxy) for url in urls], return_exceptions=True 192 | ) 193 | return [p for p in paths_or_errs if isinstance(p, Path)] 194 | 195 | 196 | async def merge_av(*, v_path: Path, a_path: Path, output_path: Path) -> None: 197 | """合并视频和音频 198 | 199 | Args: 200 | v_path (Path): 视频文件路径 201 | a_path (Path): 音频文件路径 202 | output_path (Path): 输出文件路径 203 | """ 204 | logger.info(f"Merging {v_path.name} and {a_path.name} to {output_path.name}") 205 | 206 | cmd = [ 207 | "ffmpeg", 208 | "-y", 209 | "-i", 210 | str(v_path), 211 | "-i", 212 | str(a_path), 213 | "-c", 214 | "copy", 215 | "-map", 216 | "0:v:0", 217 | "-map", 218 | "1:a:0", 219 | str(output_path), 220 | ] 221 | 222 | await exec_ffmpeg_cmd(cmd) 223 | await asyncio.gather(safe_unlink(v_path), safe_unlink(a_path)) 224 | 225 | 226 | async def merge_av_h264(*, v_path: Path, a_path: Path, output_path: Path) -> None: 227 | """合并视频和音频,并使用 H.264 编码 228 | 229 | Args: 230 | v_path (Path): 视频文件路径 231 | a_path (Path): 音频文件路径 232 | output_path (Path): 输出文件路径 233 | """ 234 | logger.info(f"Merging {v_path.name} and {a_path.name} to {output_path.name}") 235 | 236 | # 修改命令以确保视频使用 H.264 编码 237 | cmd = [ 238 | "ffmpeg", 239 | "-y", 240 | "-i", 241 | str(v_path), 242 | "-i", 243 | str(a_path), 244 | "-c:v", 245 | "libx264", # 明确指定使用 H.264 编码 246 | "-preset", 247 | "medium", # 编码速度和质量的平衡 248 | "-crf", 249 | "23", # 质量因子,值越低质量越高 250 | "-c:a", 251 | "aac", # 音频使用 AAC 编码 252 | "-b:a", 253 | "128k", # 音频比特率 254 | "-map", 255 | "0:v:0", 256 | "-map", 257 | "1:a:0", 258 | str(output_path), 259 | ] 260 | 261 | await exec_ffmpeg_cmd(cmd) 262 | await asyncio.gather(safe_unlink(v_path), safe_unlink(a_path)) 263 | 264 | 265 | async def encode_video_to_h264(video_path: Path) -> Path: 266 | """将视频重新编码到 h264 267 | 268 | Args: 269 | video_path (Path): 视频路径 270 | 271 | Returns: 272 | Path: 编码后的视频路径 273 | """ 274 | output_path = video_path.with_name(f"{video_path.stem}_h264{video_path.suffix}") 275 | if output_path.exists(): 276 | return output_path 277 | cmd = [ 278 | "ffmpeg", 279 | "-y", 280 | "-i", 281 | str(video_path), 282 | "-c:v", 283 | "libx264", 284 | "-preset", 285 | "medium", 286 | "-crf", 287 | "23", 288 | str(output_path), 289 | ] 290 | await exec_ffmpeg_cmd(cmd) 291 | logger.success(f"视频重新编码为 H.264 成功: {output_path}, {fmt_size(output_path)}") 292 | await safe_unlink(video_path) 293 | return output_path 294 | 295 | 296 | def fmt_size(file_path: Path) -> str: 297 | """获取视频大小 298 | 299 | Args: 300 | video_path (Path): 视频路径 301 | """ 302 | return f"大小: {file_path.stat().st_size / 1024 / 1024:.2f} MB" 303 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/download/utils.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import hashlib 3 | from pathlib import Path 4 | import re 5 | from urllib.parse import urlparse 6 | 7 | from nonebot import logger 8 | 9 | 10 | def keep_zh_en_num(text: str) -> str: 11 | """ 12 | 保留字符串中的中英文和数字 13 | """ 14 | return re.sub(r"[^\u4e00-\u9fa5a-zA-Z0-9\-_]", "", text.replace(" ", "_")) 15 | 16 | 17 | async def safe_unlink(path: Path): 18 | """ 19 | 安全删除文件 20 | """ 21 | try: 22 | await asyncio.to_thread(path.unlink, missing_ok=True) 23 | except Exception as e: 24 | logger.error(f"删除 {path} 失败: {e}") 25 | 26 | 27 | async def exec_ffmpeg_cmd(cmd: list[str]) -> None: 28 | """ 29 | 执行 ffmpeg 命令 30 | """ 31 | try: 32 | process = await asyncio.create_subprocess_exec( 33 | *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE 34 | ) 35 | _, stderr = await process.communicate() 36 | return_code = process.returncode 37 | except FileNotFoundError: 38 | raise RuntimeError("ffmpeg 未安装或无法找到可执行文件") 39 | 40 | if return_code != 0: 41 | error_msg = stderr.decode().strip() 42 | raise RuntimeError(f"ffmpeg 执行失败: {error_msg}") 43 | 44 | 45 | def generate_file_name(url: str, default_suffix: str = "") -> str: 46 | """ 47 | 根据 url 生成文件名 48 | """ 49 | # 根据 url 获取文件后缀 50 | path = Path(urlparse(url).path) 51 | suffix = path.suffix if path.suffix else default_suffix 52 | # 获取 url 的 md5 值 53 | url_hash = hashlib.md5(url.encode()).hexdigest()[:16] 54 | file_name = f"{url_hash}{suffix}" 55 | return file_name 56 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/download/ytdlp.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections import OrderedDict 3 | from pathlib import Path 4 | from typing import Any 5 | 6 | import yt_dlp 7 | 8 | from ..config import PROXY, plugin_cache_dir 9 | from ..exception import ParseException 10 | from .utils import generate_file_name 11 | 12 | 13 | class LimitedSizeDict(OrderedDict): 14 | """ 15 | 定长字典 16 | """ 17 | 18 | def __init__(self, *args, max_size=20, **kwargs): 19 | self.max_size = max_size 20 | super().__init__(*args, **kwargs) 21 | 22 | def __setitem__(self, key, value): 23 | super().__setitem__(key, value) 24 | if len(self) > self.max_size: 25 | self.popitem(last=False) # 移除最早添加的项 26 | 27 | 28 | # 使用定长字典缓存链接信息,最多保存 20 个条目 29 | url_info_mapping: LimitedSizeDict = LimitedSizeDict() 30 | 31 | # 获取视频信息的 基础 opts 32 | ydl_extract_base_opts: dict[str, Any] = { 33 | "quiet": True, 34 | "skip_download": True, 35 | "force_generic_extractor": True, 36 | } 37 | 38 | # 下载视频的 基础 opts 39 | ydl_download_base_opts: dict[str, Any] = {} 40 | 41 | if PROXY is not None: 42 | ydl_download_base_opts["proxy"] = PROXY 43 | ydl_extract_base_opts["proxy"] = PROXY 44 | 45 | 46 | async def get_video_info(url: str, cookiefile: Path | None = None) -> dict[str, str]: 47 | """get video info by url 48 | 49 | Args: 50 | url (str): url address 51 | cookiefile (Path | None, optional): cookie file path. Defaults to None. 52 | 53 | Returns: 54 | dict[str, str]: video info 55 | """ 56 | info_dict = url_info_mapping.get(url, None) 57 | if info_dict: 58 | return info_dict 59 | ydl_opts = {} | ydl_extract_base_opts 60 | 61 | if cookiefile: 62 | ydl_opts["cookiefile"] = str(cookiefile) 63 | 64 | with yt_dlp.YoutubeDL(ydl_opts) as ydl: 65 | info_dict = await asyncio.to_thread(ydl.extract_info, url, download=False) 66 | if not info_dict: 67 | raise ParseException("获取视频信息失败") 68 | url_info_mapping[url] = info_dict 69 | return info_dict 70 | 71 | 72 | async def ytdlp_download_video(url: str, cookiefile: Path | None = None) -> Path: 73 | """download video by yt-dlp 74 | 75 | Args: 76 | url (str): url address 77 | cookiefile (Path | None, optional): cookie file path. Defaults to None. 78 | 79 | Returns: 80 | Path: video file path 81 | """ 82 | info_dict = await get_video_info(url, cookiefile) 83 | duration = int(info_dict.get("duration", 600)) 84 | video_path = plugin_cache_dir / generate_file_name(url, ".mp4") 85 | if video_path.exists(): 86 | return video_path 87 | ydl_opts = { 88 | "outtmpl": f"{video_path}", 89 | "merge_output_format": "mp4", 90 | "format": f"bv[filesize<={duration // 10 + 10}M]+ba/b[filesize<={duration // 8 + 10}M]", 91 | "postprocessors": [{"key": "FFmpegVideoConvertor", "preferedformat": "mp4"}], 92 | } | ydl_download_base_opts 93 | 94 | if cookiefile: 95 | ydl_opts["cookiefile"] = str(cookiefile) 96 | 97 | with yt_dlp.YoutubeDL(ydl_opts) as ydl: 98 | await asyncio.to_thread(ydl.download, [url]) 99 | return video_path 100 | 101 | 102 | async def ytdlp_download_audio(url: str, cookiefile: Path | None = None) -> Path: 103 | """download audio by yt-dlp 104 | 105 | Args: 106 | url (str): url address 107 | cookiefile (Path | None, optional): cookie file path. Defaults to None. 108 | 109 | Returns: 110 | Path: audio file path 111 | """ 112 | file_name = generate_file_name(url) 113 | audio_path = plugin_cache_dir / f"{file_name}.flac" 114 | if audio_path.exists(): 115 | return audio_path 116 | ydl_opts = { 117 | "outtmpl": f"{plugin_cache_dir / file_name}.%(ext)s", 118 | "format": "bestaudio/best", 119 | "postprocessors": [ 120 | { 121 | "key": "FFmpegExtractAudio", 122 | "preferredcodec": "flac", 123 | "preferredquality": "0", 124 | } 125 | ], 126 | } | ydl_download_base_opts 127 | 128 | if cookiefile: 129 | ydl_opts["cookiefile"] = str(cookiefile) 130 | with yt_dlp.YoutubeDL(ydl_opts) as ydl: 131 | await asyncio.to_thread(ydl.download, [url]) 132 | return audio_path 133 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/exception.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Callable 2 | from functools import wraps 3 | 4 | from nonebot.internal.matcher import current_matcher 5 | 6 | 7 | class DownloadException(Exception): 8 | """下载异常""" 9 | 10 | pass 11 | 12 | 13 | class ParseException(Exception): 14 | """解析异常""" 15 | 16 | pass 17 | 18 | 19 | def handle_exception(error_message: str | None = None): 20 | """处理 matcher 中的 DownloadException 和 ParseException 异常的装饰器 21 | 22 | Args: 23 | matcher: 需要处理的 matcher 类型 24 | error_message: 自定义错误消息 25 | """ 26 | 27 | def decorator(func: Callable): 28 | @wraps(func) 29 | async def wrapper(*args, **kwargs): 30 | try: 31 | return await func(*args, **kwargs) 32 | except (ParseException, DownloadException) as e: 33 | matcher = current_matcher.get() 34 | # logger.warning(f"{matcher.module_name}: {e}") 35 | await matcher.finish(error_message or str(e)) 36 | 37 | return wrapper 38 | 39 | return decorator 40 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/matchers/__init__.py: -------------------------------------------------------------------------------- 1 | from nonebot.matcher import Matcher 2 | 3 | from .acfun import acfun 4 | from .bilibili import bilibili 5 | from .douyin import douyin 6 | from .kuaishou import kuaishou 7 | from .kugou import kugou 8 | from .ncm import ncm 9 | from .tiktok import tiktok 10 | from .twitter import twitter 11 | from .weibo import weibo 12 | from .xiaohongshu import xiaohongshu 13 | from .ytb import ytb 14 | 15 | resolvers: dict[str, type[Matcher]] = { 16 | "bilibili": bilibili, 17 | "acfun": acfun, 18 | "douyin": douyin, 19 | "kuaishou": kuaishou, 20 | "ytb": ytb, 21 | "kugou": kugou, 22 | "ncm": ncm, 23 | "twitter": twitter, 24 | "tiktok": tiktok, 25 | "weibo": weibo, 26 | "xiaohongshu": xiaohongshu, 27 | } 28 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/matchers/acfun.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from nonebot import logger, on_keyword 4 | from nonebot.adapters.onebot.v11 import MessageEvent 5 | from nonebot.rule import Rule 6 | 7 | from ..config import NICKNAME 8 | from ..exception import handle_exception 9 | from ..parsers import AcfunParser 10 | from .filter import is_not_in_disabled_groups 11 | from .helper import get_video_seg 12 | 13 | acfun = on_keyword(keywords={"acfun.cn"}, rule=Rule(is_not_in_disabled_groups)) 14 | 15 | parser = AcfunParser() 16 | 17 | 18 | @acfun.handle() 19 | @handle_exception() 20 | async def _(event: MessageEvent) -> None: 21 | message: str = event.message.extract_plain_text().strip() 22 | matched = re.search(r"(?:ac=|/ac)(\d+)", message) 23 | if not matched: 24 | logger.info("acfun 链接中不包含 acid, 忽略") 25 | return 26 | acid = int(matched.group(1)) 27 | url = f"https://www.acfun.cn/v/ac{acid}" 28 | m3u8_url, video_desc = await parser.parse_url(url) 29 | await acfun.send(f"{NICKNAME}解析 | 猴山 - {video_desc}") 30 | 31 | video_file = await parser.download_video(m3u8_url, acid) 32 | await acfun.send(get_video_seg(video_file)) 33 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/matchers/bilibili.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pathlib import Path 3 | import re 4 | 5 | from nonebot import logger, on_command, on_message 6 | from nonebot.adapters.onebot.v11 import Bot, Message, MessageEvent, MessageSegment 7 | from nonebot.adapters.onebot.v11.exception import ActionFailed 8 | from nonebot.params import CommandArg 9 | 10 | from ..config import DURATION_MAXIMUM, NEED_UPLOAD, NICKNAME, plugin_cache_dir 11 | from ..download import ( 12 | download_file_by_stream, 13 | download_img, 14 | download_imgs_without_raise, 15 | download_video, 16 | encode_video_to_h264, 17 | merge_av, 18 | ) 19 | from ..download.utils import keep_zh_en_num 20 | from ..exception import ParseException, handle_exception 21 | from ..parsers import BilibiliParser, get_redirect_url 22 | from .filter import is_not_in_disabled_groups 23 | from .helper import get_file_seg, get_img_seg, get_record_seg, get_video_seg, send_segments 24 | from .preprocess import ExtractText, Keyword, r_keywords 25 | 26 | bilibili = on_message( 27 | rule=is_not_in_disabled_groups & r_keywords("bilibili", "bili2233", "b23", "BV", "av"), 28 | priority=5, 29 | ) 30 | 31 | bili_music = on_command(cmd="bm", block=True) 32 | 33 | PATTERNS: dict[str, re.Pattern] = { 34 | "BV": re.compile(r"(BV[1-9a-zA-Z]{10})(?:\s)?(\d{1,3})?"), 35 | "av": re.compile(r"av(\d{6,})(?:\s)?(\d{1,3})?"), 36 | "/BV": re.compile(r"/(BV[1-9a-zA-Z]{10})()"), 37 | "/av": re.compile(r"/av(\d{6,})()"), 38 | "b23": re.compile(r"https?://b23\.tv/[A-Za-z\d\._?%&+\-=/#]+()()"), 39 | "bili2233": re.compile(r"https?://bili2233\.cn/[A-Za-z\d\._?%&+\-=/#]+()()"), 40 | "bilibili": re.compile(r"https?://(?:space|www|live|m|t)?\.?bilibili\.com/[A-Za-z\d\._?%&+\-=/#]+()()"), 41 | } 42 | 43 | parser = BilibiliParser() 44 | 45 | 46 | @bilibili.handle() 47 | @handle_exception() 48 | async def _(text: str = ExtractText(), keyword: str = Keyword()): 49 | pub_prefix = f"{NICKNAME}解析 | 哔哩哔哩 - " 50 | matched = PATTERNS[keyword].search(text) 51 | if not matched: 52 | logger.info(f"{text} 中的链接或 BV/av 号无效, 忽略") 53 | return 54 | url, video_id, page_num = str(matched.group(0)), str(matched.group(1)), matched.group(2) 55 | # 是否附加链接 56 | need_join_link = keyword != "bilibili" 57 | # 短链重定向地址 58 | if keyword in ("b23", "bili2233"): 59 | b23url = url 60 | url = await get_redirect_url(url, parser.headers) 61 | if url == b23url: 62 | logger.info(f"链接 {url} 无效,忽略") 63 | return 64 | 65 | # 链接中是否包含BV,av号 66 | if id_type := next((i for i in ("/BV", "/av") if i in url), None): 67 | if matched := PATTERNS[id_type].search(url): 68 | keyword = id_type 69 | video_id = str(matched.group(1)) 70 | # 预发送消息列表 71 | segs: list[Message | MessageSegment | str] = [] 72 | # 如果不是视频 73 | if not video_id: 74 | # 动态 75 | if "t.bilibili.com" in url or "/opus" in url: 76 | matched = re.search(r"/(\d+)", url) 77 | if not matched: 78 | logger.info(f"链接 {url} 无效 - 没有获取到动态 id, 忽略") 79 | return 80 | opus_id = int(matched.group(1)) 81 | img_lst, text = await parser.parse_opus(opus_id) 82 | await bilibili.send(f"{pub_prefix}动态") 83 | segs = [text] 84 | if img_lst: 85 | paths = await download_imgs_without_raise(img_lst) 86 | segs.extend(get_img_seg(path) for path in paths) 87 | await send_segments(segs) 88 | await bilibili.finish() 89 | # 直播间解析 90 | elif "/live" in url: 91 | # https://live.bilibili.com/30528999?hotRank=0 92 | matched = re.search(r"/(\d+)", url) 93 | if not matched: 94 | logger.info(f"链接 {url} 无效 - 没有获取到直播间 id, 忽略") 95 | return 96 | room_id = int(matched.group(1)) 97 | title, cover, keyframe = await parser.parse_live(room_id) 98 | if not title: 99 | await bilibili.finish(f"{pub_prefix}直播 - 未找到直播间信息") 100 | res = f"{pub_prefix}直播 {title}" 101 | res += get_img_seg(await download_img(cover)) if cover else "" 102 | res += get_img_seg(await download_img(keyframe)) if keyframe else "" 103 | await bilibili.finish(res) 104 | # 专栏解析 105 | elif "/read" in url: 106 | matched = re.search(r"read/cv(\d+)", url) 107 | if not matched: 108 | logger.info(f"链接 {url} 无效 - 没有获取到专栏 id, 忽略") 109 | return 110 | read_id = int(matched.group(1)) 111 | texts, urls = await parser.parse_read(read_id) 112 | await bilibili.send(f"{pub_prefix}专栏") 113 | # 并发下载 114 | paths = await download_imgs_without_raise(urls) 115 | # 反转路径列表,pop 时,则为原始顺序,提高性能 116 | paths.reverse() 117 | segs = [] 118 | for text in texts: 119 | if text: 120 | segs.append(text) 121 | else: 122 | segs.append(get_img_seg(paths.pop())) 123 | if segs: 124 | await send_segments(segs) 125 | await bilibili.finish() 126 | # 收藏夹解析 127 | elif "/favlist" in url: 128 | # https://space.bilibili.com/22990202/favlist?fid=2344812202 129 | matched = re.search(r"favlist\?fid=(\d+)", url) 130 | if not matched: 131 | logger.info(f"链接 {url} 无效 - 没有获取到收藏夹 id, 忽略") 132 | return 133 | fav_id = int(matched.group(1)) 134 | # 获取收藏夹内容,并下载封面 135 | texts, urls = await parser.parse_favlist(fav_id) 136 | await bilibili.send(f"{pub_prefix}收藏夹\n正在为你找出相关链接请稍等...") 137 | paths: list[Path] = await download_imgs_without_raise(urls) 138 | segs = [] 139 | # 组合 text 和 image 140 | for path, text in zip(paths, texts): 141 | segs.append(get_img_seg(path) + text) 142 | await send_segments(segs) 143 | await bilibili.finish() 144 | else: 145 | logger.info(f"不支持的链接: {url}") 146 | await bilibili.finish() 147 | 148 | join_link = "" 149 | if need_join_link: 150 | url_id = f"av{video_id}" if keyword in ("av", "/av") else video_id 151 | join_link = f" https://www.bilibili.com/video/{url_id}" 152 | await bilibili.send(f"{pub_prefix}视频{join_link}") 153 | # 获取分集数 154 | page_num = int(page_num) if page_num else 1 155 | if url and (matched := re.search(r"(?:&|\?)p=(\d{1,3})", url)): 156 | page_num = int(matched.group(1)) 157 | # 视频 158 | if keyword in ("av", "/av"): 159 | video_info = await parser.parse_video_info(avid=int(video_id), page_num=page_num) 160 | else: 161 | video_info = await parser.parse_video_info(bvid=video_id, page_num=page_num) 162 | 163 | segs = [ 164 | video_info.title, 165 | get_img_seg(await download_img(video_info.cover_url)), 166 | video_info.display_info, 167 | video_info.ai_summary, 168 | ] 169 | if video_info.video_duration > DURATION_MAXIMUM: 170 | segs.append( 171 | f"⚠️ 当前视频时长 {video_info.video_duration // 60} 分钟, " 172 | f"超过管理员设置的最长时间 {DURATION_MAXIMUM // 60} 分钟!" 173 | ) 174 | await send_segments(segs) 175 | 176 | if video_info.video_duration > DURATION_MAXIMUM: 177 | logger.info(f"video duration > {DURATION_MAXIMUM}, ignore download") 178 | return 179 | # 下载视频和音频 180 | file_name = f"{video_id}-{page_num}" 181 | video_path = plugin_cache_dir / f"{file_name}.mp4" 182 | 183 | if not video_path.exists(): 184 | # 下载视频和音频 185 | if video_info.audio_url: 186 | v_path, a_path = await asyncio.gather( 187 | download_file_by_stream( 188 | video_info.video_url, file_name=f"{file_name}-video.m4s", ext_headers=parser.headers 189 | ), 190 | download_file_by_stream( 191 | video_info.audio_url, file_name=f"{file_name}-audio.m4s", ext_headers=parser.headers 192 | ), 193 | ) 194 | await merge_av(v_path=v_path, a_path=a_path, output_path=video_path) 195 | else: 196 | video_path = await download_video( 197 | video_info.video_url, video_name=f"{file_name}.mp4", ext_headers=parser.headers 198 | ) 199 | 200 | # 发送视频 201 | try: 202 | await bilibili.send(get_video_seg(video_path)) 203 | except ActionFailed as e: 204 | message: str = e.info.get("message", "") 205 | # 无缩略图 206 | if not message.endswith(".png'"): 207 | raise 208 | # 重新编码为 h264 209 | logger.warning("视频上传出现无缩略图错误,将重新编码为 h264 进行上传") 210 | h264_video_path = await encode_video_to_h264(video_path) 211 | await bilibili.send(get_video_seg(h264_video_path)) 212 | 213 | 214 | @bili_music.handle() 215 | @handle_exception() 216 | async def _(bot: Bot, event: MessageEvent, args: Message = CommandArg()): 217 | text = args.extract_plain_text().strip() 218 | matched = re.match(r"^(BV[1-9a-zA-Z]{10})(?:\s)?(\d{1,3})?$", text) 219 | if not matched: 220 | await bili_music.finish("命令格式: bm BV1LpD3YsETa [集数](中括号表示可选)") 221 | 222 | # 回应用户 223 | await bot.call_api("set_msg_emoji_like", message_id=event.message_id, emoji_id="282") 224 | bvid, p_num = str(matched.group(1)), matched.group(2) 225 | 226 | # 处理分 p 227 | p_num = int(p_num) if p_num else 1 228 | video_info = await parser.parse_video_info(bvid=bvid, page_num=p_num) 229 | if not video_info.audio_url: 230 | raise ParseException("没有可供下载的音频流") 231 | # 音频文件名 232 | video_title = keep_zh_en_num(video_info.title) 233 | audio_name = f"{video_title}.mp3" 234 | audio_path = plugin_cache_dir / audio_name 235 | # 下载 236 | if not audio_path.exists(): 237 | await download_file_by_stream(video_info.audio_url, file_name=audio_name, ext_headers=parser.headers) 238 | 239 | # 发送音频 240 | await bili_music.send(get_record_seg(audio_path)) 241 | # 上传音频 242 | if NEED_UPLOAD: 243 | await bili_music.send(get_file_seg(audio_path)) 244 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/matchers/douyin.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pathlib import Path 3 | import re 4 | 5 | from nonebot import logger, on_message 6 | from nonebot.adapters.onebot.v11 import Message, MessageSegment 7 | 8 | from ..config import NICKNAME 9 | from ..download import download_imgs_without_raise, download_video 10 | from ..exception import handle_exception 11 | from ..parsers import DouyinParser 12 | from .filter import is_not_in_disabled_groups 13 | from .helper import get_img_seg, get_video_seg, send_segments 14 | from .preprocess import ExtractText, Keyword, r_keywords 15 | 16 | # douyin = on_keyword(keywords={"douyin.com"}, rule=Rule(is_not_in_disabled_groups)) 17 | douyin = on_message(rule=is_not_in_disabled_groups & r_keywords("v.douyin", "douyin")) 18 | parser = DouyinParser() 19 | 20 | PATTERNS: dict[str, re.Pattern] = { 21 | "v.douyin": re.compile(r"https://v\.douyin\.com/[a-zA-Z0-9_\-]+"), 22 | "douyin": re.compile(r"https://www\.(?:douyin|iesdouyin)\.com/(?:video|note|share/(?:video|note|slides))/[0-9]+"), 23 | } 24 | 25 | 26 | @douyin.handle() 27 | @handle_exception() 28 | async def _(text: str = ExtractText(), keyword: str = Keyword()): 29 | # 正则匹配 30 | matched = PATTERNS[keyword].search(text) 31 | if not matched: 32 | logger.warning(f"{text} 中的链接无效, 忽略") 33 | return 34 | share_url = matched.group(0) 35 | parse_result = await parser.parse_share_url(share_url) 36 | await douyin.send(f"{NICKNAME}解析 | 抖音 - {parse_result.title}") 37 | 38 | segs: list[MessageSegment | Message | str] = [] 39 | # 存在普通图片 40 | if parse_result.pic_urls: 41 | paths = await download_imgs_without_raise(parse_result.pic_urls) 42 | segs.extend(get_img_seg(path) for path in paths) 43 | # 存在动态图片 44 | if parse_result.dynamic_urls: 45 | # 并发下载动态图片 46 | video_paths = await asyncio.gather( 47 | *[download_video(url) for url in parse_result.dynamic_urls], return_exceptions=True 48 | ) 49 | video_segs = [get_video_seg(p) for p in video_paths if isinstance(p, Path)] 50 | segs.extend(video_segs) 51 | if segs: 52 | await send_segments(segs) 53 | await douyin.finish() 54 | # 存在视频 55 | if video_url := parse_result.video_url: 56 | video_path = await download_video(video_url) 57 | await douyin.finish(get_video_seg(video_path)) 58 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/matchers/filter.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from nonebot import on_command 4 | from nonebot.adapters.onebot.v11 import ( 5 | GROUP_ADMIN, 6 | GROUP_OWNER, 7 | Bot, 8 | GroupMessageEvent, 9 | MessageEvent, 10 | PrivateMessageEvent, 11 | ) 12 | from nonebot.matcher import Matcher 13 | from nonebot.permission import SUPERUSER 14 | from nonebot.rule import to_me 15 | 16 | from ..config import store 17 | from ..constant import DISABLED_GROUPS 18 | 19 | 20 | def load_or_initialize_set() -> set[int]: 21 | """加载或初始化关闭解析的名单""" 22 | data_file = store.get_plugin_data_file(DISABLED_GROUPS) 23 | # 判断是否存在 24 | if not data_file.exists(): 25 | data_file.write_text(json.dumps([])) 26 | return set(json.loads(data_file.read_text())) 27 | 28 | 29 | def save_disabled_groups(): 30 | """保存关闭解析的名单""" 31 | data_file = store.get_plugin_data_file(DISABLED_GROUPS) 32 | data_file.write_text(json.dumps(list(disabled_group_set))) 33 | 34 | 35 | # 内存中关闭解析的名单,第一次先进行初始化 36 | disabled_group_set: set[int] = load_or_initialize_set() 37 | 38 | 39 | # Rule 40 | def is_not_in_disabled_groups(event: MessageEvent) -> bool: 41 | return True if not isinstance(event, GroupMessageEvent) else event.group_id not in disabled_group_set 42 | 43 | 44 | @on_command("开启所有解析", permission=SUPERUSER, block=True).handle() 45 | async def _(matcher: Matcher, bot: Bot, event: PrivateMessageEvent): 46 | """开启所有解析""" 47 | disabled_group_set.clear() 48 | save_disabled_groups() 49 | await matcher.finish("所有解析已开启") 50 | 51 | 52 | @on_command("关闭所有解析", permission=SUPERUSER, block=True).handle() 53 | async def _(matcher: Matcher, bot: Bot, event: PrivateMessageEvent): 54 | """关闭所有解析""" 55 | gid_list: list[int] = [g["group_id"] for g in await bot.get_group_list()] 56 | disabled_group_set.update(gid_list) 57 | save_disabled_groups() 58 | await matcher.finish("所有解析已关闭") 59 | 60 | 61 | @on_command( 62 | "开启解析", 63 | rule=to_me(), 64 | permission=GROUP_ADMIN | GROUP_OWNER | SUPERUSER, 65 | block=True, 66 | ).handle() 67 | async def _(matcher: Matcher, bot: Bot, event: GroupMessageEvent): 68 | """开启解析""" 69 | gid = event.group_id 70 | if gid in disabled_group_set: 71 | disabled_group_set.remove(gid) 72 | save_disabled_groups() 73 | await matcher.finish("解析已开启") 74 | else: 75 | await matcher.finish("解析已开启,无需重复开启") 76 | 77 | 78 | @on_command( 79 | "关闭解析", 80 | rule=to_me(), 81 | permission=GROUP_ADMIN | GROUP_OWNER | SUPERUSER, 82 | block=True, 83 | ).handle() 84 | async def _(matcher: Matcher, bot: Bot, event: GroupMessageEvent): 85 | """关闭解析""" 86 | gid = event.group_id 87 | if gid not in disabled_group_set: 88 | disabled_group_set.add(gid) 89 | save_disabled_groups() 90 | await matcher.finish("解析已关闭") 91 | else: 92 | await matcher.finish("解析已关闭,无需重复关闭") 93 | 94 | 95 | @on_command("查看关闭解析", permission=SUPERUSER, block=True).handle() 96 | async def _(matcher: Matcher, bot: Bot, event: MessageEvent): 97 | """查看关闭解析""" 98 | disable_groups = [ 99 | str(item) + "--" + (await bot.get_group_info(group_id=item))["group_name"] for item in disabled_group_set 100 | ] 101 | disable_groups = "\n".join(disable_groups) 102 | if isinstance(event, GroupMessageEvent): 103 | await matcher.send("已经发送到私信了~") 104 | message = f"解析关闭的群聊如下:\n{disable_groups} \n🌟 温馨提示:如果想开关解析需要在群聊@我然后输入[开启/关闭解析], 另外还可以私信我发送[开启/关闭所有解析]" # noqa: E501 105 | await bot.send_private_msg(user_id=event.user_id, message=message) 106 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/matchers/helper.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Any, cast 3 | 4 | from nonebot.adapters.onebot.utils import f2s 5 | from nonebot.adapters.onebot.v11 import GroupMessageEvent, Message, MessageEvent, MessageSegment 6 | from nonebot.internal.matcher import current_bot, current_event 7 | 8 | from ..config import NEED_FORWARD, NICKNAME, USE_BASE64 9 | from ..constant import VIDEO_MAX_MB 10 | 11 | 12 | def construct_nodes(user_id: int, segments: list[Message | MessageSegment | str]) -> Message: 13 | """构造节点 14 | 15 | Args: 16 | segments (MessageSegment | list[MessageSegment | Message | str]): 消息段 17 | 18 | Returns: 19 | Message: 消息 20 | """ 21 | 22 | def node(content): 23 | return MessageSegment.node_custom(user_id=user_id, nickname=NICKNAME, content=content) 24 | 25 | return Message([node(seg) for seg in segments]) 26 | 27 | 28 | async def send_segments(segments: list[Message | MessageSegment | str]) -> None: 29 | """发送消息段 30 | 31 | Args: 32 | segments (list): 消息段 33 | """ 34 | bot = current_bot.get() 35 | event: MessageEvent = cast(MessageEvent, current_event.get()) 36 | 37 | if NEED_FORWARD or len(segments) > 4: 38 | message = construct_nodes(int(bot.self_id), segments) 39 | kwargs: dict[str, Any] = {"messages": message} 40 | if isinstance(event, GroupMessageEvent): 41 | kwargs["group_id"] = event.group_id 42 | api = "send_group_forward_msg" 43 | else: 44 | kwargs["user_id"] = event.user_id 45 | api = "send_private_forward_msg" 46 | await bot.call_api(api, **kwargs) 47 | 48 | else: 49 | segments[:-1] = [seg + "\n" if isinstance(seg, str) else seg for seg in segments[:-1]] 50 | message = sum(segments, Message()) 51 | await bot.send(event, message=message) 52 | 53 | 54 | def get_img_seg(img_path: Path) -> MessageSegment: 55 | """获取图片 Seg 56 | 57 | Args: 58 | img_path (Path): 图片路径 59 | 60 | Returns: 61 | MessageSegment: 图片 Seg 62 | """ 63 | file = img_path.read_bytes() if USE_BASE64 else img_path 64 | return MessageSegment.image(file) 65 | 66 | 67 | def get_record_seg(audio_path: Path) -> MessageSegment: 68 | """获取语音 Seg 69 | 70 | Args: 71 | audio_path (Path): 语音路径 72 | 73 | Returns: 74 | MessageSegment: 语音 Seg 75 | """ 76 | file = audio_path.read_bytes() if USE_BASE64 else audio_path 77 | return MessageSegment.record(file) 78 | 79 | 80 | def get_video_seg(video_path: Path) -> MessageSegment: 81 | """获取视频 Seg 82 | 83 | Returns: 84 | MessageSegment: 视频 Seg 85 | """ 86 | seg: MessageSegment 87 | # 检测文件大小 88 | file_size_byte_count = int(video_path.stat().st_size) 89 | file = video_path.read_bytes() if USE_BASE64 else video_path 90 | if file_size_byte_count == 0: 91 | seg = MessageSegment.text("视频文件大小为0") 92 | elif file_size_byte_count > VIDEO_MAX_MB * 1024 * 1024: 93 | # 转为文件 Seg 94 | seg = get_file_seg(file, display_name=video_path.name) 95 | else: 96 | seg = MessageSegment.video(file) 97 | return seg 98 | 99 | 100 | def get_file_seg(file: Path | bytes, display_name: str = "") -> MessageSegment: 101 | """获取文件 Seg 102 | 103 | Args: 104 | file (Path | bytes): 文件路径 105 | display_name (str, optional): 显示名称. Defaults to file.name. 106 | 107 | Returns: 108 | MessageSegment: 文件 Seg 109 | """ 110 | if not display_name and isinstance(file, Path): 111 | display_name = file.name 112 | if not display_name: 113 | raise ValueError("文件名不能为空") 114 | if USE_BASE64: 115 | file = file.read_bytes() if isinstance(file, Path) else file 116 | return MessageSegment( 117 | "file", 118 | data={ 119 | "name": display_name, 120 | "file": f2s(file), 121 | }, 122 | ) 123 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/matchers/kuaishou.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from nonebot import logger, on_message 4 | from nonebot.adapters.onebot.v11 import Message, MessageSegment 5 | 6 | from ..config import NICKNAME 7 | from ..download import download_img, download_imgs_without_raise, download_video 8 | from ..exception import handle_exception 9 | from ..parsers import KuaishouParser 10 | from .filter import is_not_in_disabled_groups 11 | from .helper import get_img_seg, get_video_seg, send_segments 12 | from .preprocess import ExtractText, Keyword, r_keywords 13 | 14 | parser = KuaishouParser() 15 | 16 | kuaishou = on_message( 17 | rule=is_not_in_disabled_groups & r_keywords("v.kuaishou.com", "kuaishou", "chenzhongtech"), 18 | priority=5, 19 | ) 20 | 21 | 22 | # 匹配的正则表达式 23 | PATTERNS = { 24 | # - https://v.kuaishou.com/2yAnzeZ 25 | "v.kuaishou.com": re.compile(r"https?://v\.kuaishou\.com/[A-Za-z\d._?%&+\-=/#]+"), 26 | # - https://www.kuaishou.com/short-video/3xhjgcmir24m4nm 27 | "kuaishou": re.compile(r"https?://(?:www\.)?kuaishou\.com/[A-Za-z\d._?%&+\-=/#]+"), 28 | # - https://v.m.chenzhongtech.com/fw/photo/3xburnkmj3auazc 29 | "chenzhongtech": re.compile(r"https?://(?:v\.m\.)?chenzhongtech\.com/fw/[A-Za-z\d._?%&+\-=/#]+"), 30 | } 31 | 32 | 33 | @kuaishou.handle() 34 | @handle_exception() 35 | async def _(text: str = ExtractText(), keyword: str = Keyword()): 36 | """处理快手视频链接""" 37 | matched = PATTERNS[keyword].search(text) 38 | if not matched: 39 | logger.info(f"无有效的快手链接: {text}") 40 | return 41 | 42 | url = matched.group(0) 43 | 44 | video_info = await parser.parse_url(url) 45 | 46 | msg = f"{NICKNAME}解析 | 快手 - {video_info.title}-{video_info.author}" 47 | if video_info.cover_url: 48 | # 下载封面 49 | cover_path = await download_img(video_info.cover_url) 50 | msg += get_img_seg(cover_path) 51 | 52 | await kuaishou.send(msg) 53 | if video_info.video_url: 54 | video_path = await download_video(video_info.video_url) 55 | await kuaishou.send(get_video_seg(video_path)) 56 | if video_info.pic_urls: 57 | img_paths = await download_imgs_without_raise(video_info.pic_urls) 58 | segs: list[str | Message | MessageSegment] = [get_img_seg(img_path) for img_path in img_paths] 59 | await send_segments(segs) 60 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/matchers/kugou.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from nonebot import logger, on_message 4 | 5 | from ..config import NEED_UPLOAD, NICKNAME 6 | from ..download import download_audio, download_img 7 | from ..download.utils import keep_zh_en_num 8 | from ..exception import handle_exception 9 | from ..parsers import KuGouParser 10 | from .filter import is_not_in_disabled_groups 11 | from .helper import get_file_seg, get_img_seg, get_record_seg 12 | from .preprocess import ExtractText, r_keywords 13 | 14 | kugou = on_message(rule=is_not_in_disabled_groups & r_keywords("kugou.com")) 15 | parser = KuGouParser() 16 | 17 | 18 | @kugou.handle() 19 | @handle_exception() 20 | async def _(text: str = ExtractText()): 21 | pub_prefix = f"{NICKNAME}解析 | 酷狗音乐 - " 22 | # https://t1.kugou.com/song.html?id=1hfw6baEmV3 23 | pattern = r"https?://.*kugou\.com.*id=[a-zA-Z0-9]+" 24 | matched = re.search(pattern, text) 25 | if not matched: 26 | logger.info(f"{pub_prefix}无效链接,忽略 - {text}") 27 | return 28 | 29 | share_url_info = await parser.parse_share_url(matched.group(0)) 30 | 31 | title_author_name = f"{share_url_info.title} - {share_url_info.author}" 32 | 33 | await kugou.send(f"{pub_prefix}{title_author_name}" + get_img_seg(await download_img(share_url_info.cover_url))) 34 | if not share_url_info.audio_url: 35 | await kugou.finish(f"{pub_prefix}没有找到音频直链") 36 | audio_path = await download_audio(url=share_url_info.audio_url) 37 | # 发送语音 38 | await kugou.send(get_record_seg(audio_path)) 39 | # 发送群文件 40 | if NEED_UPLOAD: 41 | filename = f"{keep_zh_en_num(title_author_name)}.flac" 42 | await kugou.finish(get_file_seg(audio_path, filename)) 43 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/matchers/ncm.py: -------------------------------------------------------------------------------- 1 | from nonebot import on_message 2 | 3 | from ..config import NEED_UPLOAD, NICKNAME 4 | from ..download import download_audio, download_img 5 | from ..download.utils import keep_zh_en_num 6 | from ..exception import handle_exception 7 | from ..parsers import NCMParser 8 | from .filter import is_not_in_disabled_groups 9 | from .helper import get_file_seg, get_img_seg, get_record_seg 10 | from .preprocess import ExtractText, Keyword, r_keywords 11 | 12 | ncm = on_message(rule=is_not_in_disabled_groups & r_keywords("music.163.com", "163cn.tv")) 13 | 14 | parser = NCMParser() 15 | 16 | 17 | @ncm.handle() 18 | @handle_exception() 19 | async def _(text: str = ExtractText(), keyword: str = Keyword()): 20 | result = await parser.parse_ncm(text) 21 | detail = f"{NICKNAME}解析 | 网易云 - {result.title}-{result.author}" 22 | img_seg = get_img_seg(await download_img(result.cover_url)) 23 | await ncm.send(detail + img_seg) 24 | # 下载音频文件后会返回一个下载路径 25 | audio_path = await download_audio(result.audio_url) 26 | # 发送语音 27 | await ncm.send(get_record_seg(audio_path)) 28 | # 上传群文件 29 | if NEED_UPLOAD: 30 | file_name = keep_zh_en_num(f"{result.title}-{result.author}") 31 | file_name = f"{file_name}.flac" 32 | await ncm.send(get_file_seg(audio_path, file_name)) 33 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/matchers/preprocess.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Any, Literal 3 | 4 | from nonebot import logger 5 | from nonebot.adapters.onebot.v11 import MessageEvent, MessageSegment 6 | from nonebot.message import event_preprocessor 7 | from nonebot.params import Depends 8 | from nonebot.rule import Rule 9 | from nonebot.typing import T_State 10 | 11 | R_KEYWORD_KEY: Literal["_r_keyword"] = "_r_keyword" 12 | R_EXTRACT_KEY: Literal["_r_extract"] = "_r_extract" 13 | 14 | 15 | def ExtractText() -> str: 16 | return Depends(_extact_text) 17 | 18 | 19 | def _extact_text(state: T_State) -> str: 20 | return state.get(R_EXTRACT_KEY) or "" 21 | 22 | 23 | def Keyword() -> str: 24 | return Depends(_keyword) 25 | 26 | 27 | def _keyword(state: T_State) -> str: 28 | return state.get(R_KEYWORD_KEY) or "" 29 | 30 | 31 | URL_KEY_MAPPING = { 32 | "detail_1": "qqdocurl", 33 | "news": "jumpUrl", 34 | "music": "jumpUrl", 35 | } 36 | 37 | CHAR_REPLACEMENTS = {",": ",", "\\": "", "&": "&"} 38 | 39 | 40 | def _clean_url(url: str) -> str: 41 | """清理 URL 中的特殊字符 42 | 43 | Args: 44 | url: 原始 URL 45 | 46 | Returns: 47 | str: 清理后的 URL 48 | """ 49 | for old, new in CHAR_REPLACEMENTS.items(): 50 | url = url.replace(old, new) 51 | return url 52 | 53 | 54 | def _extract_json_url(json_seg: MessageSegment) -> str | None: 55 | """处理 JSON 类型的消息段,提取 URL 56 | 57 | Args: 58 | json_seg: JSON 类型的消息段 59 | 60 | Returns: 61 | Optional[str]: 提取的 URL,如果提取失败则返回 None 62 | """ 63 | data_str: str | None = json_seg.data.get("data") 64 | if not data_str: 65 | return None 66 | 67 | # 处理转义字符 68 | data_str = data_str.replace(",", ",") 69 | 70 | try: 71 | data: dict[str, Any] = json.loads(data_str) 72 | except json.JSONDecodeError: 73 | logger.debug("json 卡片解析失败") 74 | return None 75 | 76 | meta: dict[str, Any] | None = data.get("meta") 77 | if not meta: 78 | return None 79 | 80 | for key1, key2 in URL_KEY_MAPPING.items(): 81 | if item := meta.get(key1): 82 | if url := item.get(key2): 83 | return _clean_url(url) 84 | return None 85 | 86 | 87 | @event_preprocessor 88 | def extract_msg_text(event: MessageEvent, state: T_State) -> None: 89 | message = event.get_message() 90 | text: str | None = None 91 | 92 | # 提取纯文本 93 | if text := message.extract_plain_text().strip(): 94 | state[R_EXTRACT_KEY] = text 95 | return 96 | 97 | # 提取json数据 98 | if json_seg := next((seg for seg in message if seg.type == "json"), None): 99 | if url := _extract_json_url(json_seg): 100 | state[R_EXTRACT_KEY] = url 101 | 102 | 103 | class RKeywordsRule: 104 | """检查消息是否含有关键词 增强版""" 105 | 106 | __slots__ = ("keywords",) 107 | 108 | def __init__(self, *keywords: str): 109 | self.keywords = keywords 110 | 111 | def __repr__(self) -> str: 112 | return f"RKeywords(keywords={self.keywords})" 113 | 114 | def __eq__(self, other: object) -> bool: 115 | return isinstance(other, RKeywordsRule) and frozenset(self.keywords) == frozenset(other.keywords) 116 | 117 | def __hash__(self) -> int: 118 | return hash(frozenset(self.keywords)) 119 | 120 | async def __call__(self, state: T_State, text: str = ExtractText()) -> bool: 121 | if not text: 122 | return False 123 | if key := next((k for k in self.keywords if k in text), None): 124 | state[R_KEYWORD_KEY] = key 125 | return True 126 | return False 127 | 128 | 129 | def r_keywords(*keywords: str) -> Rule: 130 | return Rule(RKeywordsRule(*keywords)) 131 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/matchers/tiktok.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import aiohttp 4 | from nonebot import logger, on_keyword 5 | from nonebot.adapters.onebot.v11 import MessageEvent 6 | from nonebot.rule import Rule 7 | 8 | from ..config import NICKNAME, PROXY 9 | from ..download.ytdlp import get_video_info, ytdlp_download_video 10 | from ..exception import handle_exception 11 | from .filter import is_not_in_disabled_groups 12 | from .helper import get_video_seg 13 | 14 | tiktok = on_keyword(keywords={"tiktok.com"}, rule=Rule(is_not_in_disabled_groups)) 15 | 16 | 17 | @tiktok.handle() 18 | @handle_exception() 19 | async def _(event: MessageEvent): 20 | # 消息 21 | message: str = event.message.extract_plain_text().strip() 22 | url_reg = r"(?:http:|https:)\/\/(www|vt|vm).tiktok.com\/[A-Za-z\d._?%&+\-=\/#@]*" 23 | matched = re.search(url_reg, message) 24 | if not matched: 25 | logger.warning("tiktok url is incomplete, ignored") 26 | await tiktok.finish() 27 | # 提取 url 和 prefix 28 | url, prefix = matched.group(0), matched.group(1) 29 | 30 | # 如果 prefix 是 vt 或 vm,则需要重定向 31 | if prefix == "vt" or prefix == "vm": 32 | async with aiohttp.ClientSession() as session: 33 | async with session.get(url, allow_redirects=False, proxy=PROXY) as resp: 34 | url = resp.headers.get("Location") 35 | 36 | pub_prefix = f"{NICKNAME}解析 | TikTok - " 37 | if not url: 38 | await tiktok.finish(f"{pub_prefix}短链重定向失败") 39 | 40 | # 获取视频信息 41 | info = await get_video_info(url) 42 | await tiktok.send(f"{pub_prefix}{info['title']}") 43 | 44 | try: 45 | video_path = await ytdlp_download_video(url=url) 46 | except Exception as e: 47 | await tiktok.finish(f"{pub_prefix}下载视频失败 {e}") 48 | 49 | await tiktok.send(get_video_seg(video_path)) 50 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/matchers/twitter.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Any 3 | 4 | import aiohttp 5 | from nonebot import logger, on_keyword 6 | from nonebot.adapters.onebot.v11 import MessageEvent 7 | from nonebot.rule import Rule 8 | 9 | from ..config import NICKNAME, PROXY 10 | from ..constant import COMMON_HEADER 11 | from ..download import download_imgs_without_raise, download_video 12 | from ..exception import ParseException, handle_exception 13 | from .filter import is_not_in_disabled_groups 14 | from .helper import get_img_seg, get_video_seg, send_segments 15 | 16 | twitter = on_keyword(keywords={"x.com"}, rule=Rule(is_not_in_disabled_groups)) 17 | 18 | 19 | @twitter.handle() 20 | @handle_exception() 21 | async def _(event: MessageEvent): 22 | msg: str = event.message.extract_plain_text().strip() 23 | pattern = r"https?:\/\/x.com\/[0-9-a-zA-Z_]{1,20}\/status\/([0-9]+)" 24 | matched = re.search(pattern, msg) 25 | if not matched: 26 | logger.info("没有匹配到 x.com 的 url, 忽略") 27 | return 28 | x_url = matched.group(0) 29 | 30 | await twitter.send(f"{NICKNAME}解析 | 小蓝鸟") 31 | 32 | video_url, pic_urls = await parse_x_url(x_url) 33 | 34 | if video_url: 35 | video_path = await download_video(video_url, proxy=PROXY) 36 | await twitter.send(get_video_seg(video_path)) 37 | 38 | if pic_urls: 39 | img_paths = await download_imgs_without_raise(pic_urls, proxy=PROXY) 40 | await send_segments([get_img_seg(img_path) for img_path in img_paths]) 41 | 42 | 43 | async def parse_x_url(x_url: str) -> tuple[str, list[str]]: 44 | """ 45 | 解析 X (Twitter) 链接获取视频和图片URL 46 | @author: biupiaa 47 | Returns: 48 | tuple[str, list[str]]: (视频 URL, 图片 URL 列表) 49 | """ 50 | 51 | async def x_req(url: str) -> dict[str, Any]: 52 | headers = { 53 | "Accept": "application/json, text/plain, */*", 54 | "Content-Type": "application/x-www-form-urlencoded", 55 | "Origin": "https://xdown.app", 56 | "Referer": "https://xdown.app/", 57 | **COMMON_HEADER, 58 | } 59 | data = {"q": url, "lang": "zh-cn"} 60 | async with aiohttp.ClientSession() as session: 61 | async with session.post("https://xdown.app/api/ajaxSearch", headers=headers, data=data) as response: 62 | return await response.json() 63 | 64 | resp = await x_req(x_url) 65 | if resp.get("status") != "ok": 66 | raise ParseException("解析失败") 67 | 68 | html_content = resp.get("data", "") 69 | # 提取视频链接 (获取最高清晰度的视频) 70 | pattern = re.compile( 71 | r'.*?下载 MP4 \((\d+p)\)', # noqa: E501 72 | re.DOTALL, # 允许.匹配换行符 73 | ) 74 | video_matches = pattern.findall(html_content) 75 | # 转换为带数值的元组以便排序 76 | if video_matches: 77 | best_video_url = max( 78 | ((str(url), int(resolution.replace("p", ""))) for url, resolution in video_matches), key=lambda x: x[1] 79 | )[0] 80 | # 最高质量视频 81 | return best_video_url, [] 82 | 83 | # 提取图片链接 84 | img_urls = re.findall(r' tuple[str, str]: 20 | """解析acfun链接 21 | 22 | Args: 23 | url (str): 链接 24 | 25 | Returns: 26 | tuple: 视频链接和视频描述 27 | """ 28 | # 拼接查询参数 29 | url = f"{url}?quickViewId=videoInfo_new&ajaxpipe=1" 30 | 31 | async with aiohttp.ClientSession() as session: 32 | async with session.get(url, headers=self.headers) as resp: 33 | resp.raise_for_status() 34 | raw = await resp.text() 35 | 36 | matched = re.search(r"window\.videoInfo =(.*?)", raw) 37 | if not matched: 38 | raise ParseException("解析 acfun 视频信息失败") 39 | json_str = str(matched.group(1)) 40 | json_str = json_str.replace('\\\\"', '\\"').replace('\\"', '"') 41 | video_info = json.loads(json_str) 42 | 43 | video_desc = ( 44 | f"ac{video_info.get('dougaId', '')}\n" 45 | f"标题: {video_info.get('title', '')}\n" 46 | f"简介: {video_info.get('description', '')}\n" 47 | f"作者: {video_info.get('user', {}).get('name', '')}, 上传于 {video_info.get('createTime', '')}" 48 | ) 49 | 50 | ks_play_json = video_info["currentVideoInfo"]["ksPlayJson"] 51 | ks_play = json.loads(ks_play_json) 52 | representations = ks_play["adaptationSet"][0]["representation"] 53 | # 这里[d['url'] for d in representations],从 4k ~ 360,此处默认720p 54 | m3u8_url = [d["url"] for d in representations][3] 55 | 56 | return m3u8_url, video_desc 57 | 58 | async def download_video(self, m3u8s_url: str, acid: int) -> Path: 59 | """下载acfun视频 60 | 61 | Args: 62 | m3u8s_url (str): m3u8链接 63 | acid (int): acid 64 | 65 | Returns: 66 | Path: 下载的mp4文件 67 | """ 68 | from tqdm.asyncio import tqdm 69 | 70 | m3u8_full_urls = await self._parse_m3u8(m3u8s_url) 71 | video_file = plugin_cache_dir / f"acfun_{acid}.mp4" 72 | if video_file.exists(): 73 | return video_file 74 | 75 | try: 76 | max_size_in_bytes = MAX_SIZE * 1024 * 1024 77 | async with aiofiles.open(video_file, "wb") as f, aiohttp.ClientSession() as session: 78 | total_size = 0 79 | with tqdm( 80 | unit="B", 81 | unit_scale=True, 82 | unit_divisor=1024, 83 | dynamic_ncols=True, 84 | desc=video_file.name, 85 | ) as bar: 86 | for url in m3u8_full_urls: 87 | async with session.get(url, headers=self.headers) as resp: 88 | async for chunk in resp.content.iter_chunked(1024 * 1024): 89 | await f.write(chunk) 90 | total_size += len(chunk) 91 | bar.update(len(chunk)) 92 | if total_size > max_size_in_bytes: 93 | # 直接截断 94 | break 95 | except aiohttp.ClientError as e: 96 | await safe_unlink(video_file) 97 | raise DownloadException(f"下载 acfun 视频失败: {e}") 98 | except asyncio.TimeoutError: 99 | await safe_unlink(video_file) 100 | raise DownloadException("下载 acfun 视频超时") 101 | return video_file 102 | 103 | async def _parse_m3u8(self, m3u8_url: str): 104 | """解析m3u8链接 105 | 106 | Args: 107 | m3u8_url (str): m3u8链接 108 | 109 | Returns: 110 | list[str]: 视频链接 111 | """ 112 | async with aiohttp.ClientSession() as session: 113 | async with session.get(m3u8_url, headers=self.headers) as resp: 114 | m3u8_file = await resp.text() 115 | # 分离ts文件链接 116 | raw_pieces = re.split(r"\n#EXTINF:.{8},\n", m3u8_file) 117 | # 过滤头部\ 118 | m3u8_relative_links = raw_pieces[1:] 119 | 120 | # 修改尾部 去掉尾部多余的结束符 121 | patched_tail = m3u8_relative_links[-1].split("\n")[0] 122 | m3u8_relative_links[-1] = patched_tail 123 | 124 | # 完整链接,直接加 m3u8Url 的通用前缀 125 | m3u8_prefix = "/".join(m3u8_url.split("/")[0:-1]) 126 | m3u8_full_urls = [f"{m3u8_prefix}/{d}" for d in m3u8_relative_links] 127 | 128 | return m3u8_full_urls 129 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/parsers/bilibili.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | import re 3 | from typing import Any 4 | 5 | from bilibili_api import HEADERS, Credential 6 | from bilibili_api.video import Video 7 | from nonebot import logger 8 | 9 | from ..exception import ParseException 10 | 11 | 12 | @dataclass 13 | class BilibiliVideoInfo: 14 | """Bilibili 视频信息""" 15 | 16 | title: str 17 | display_info: str 18 | cover_url: str 19 | video_duration: int 20 | video_url: str 21 | audio_url: str 22 | ai_summary: str 23 | 24 | 25 | class BilibiliParser: 26 | def __init__(self): 27 | self.headers = HEADERS 28 | self.credential: Credential | None = None 29 | self._init_credential() 30 | 31 | def _init_credential(self): 32 | """初始化 bilibili api""" 33 | 34 | from bilibili_api import request_settings, select_client 35 | 36 | from ..config import rconfig 37 | from ..cookie import ck2dict 38 | 39 | # 选择客户端 40 | select_client("curl_cffi") 41 | # 模仿浏览器 42 | request_settings.set("impersonate", "chrome131") 43 | # 第二参数数值参考 curl_cffi 文档 44 | # https://curl-cffi.readthedocs.io/en/latest/impersonate.html 45 | 46 | if not rconfig.r_bili_ck: 47 | logger.warning("未配置哔哩哔哩 cookie, 无法使用哔哩哔哩 AI 总结, 可能无法解析 720p 以上画质视频") 48 | return 49 | self.credential = Credential.from_cookies(ck2dict(rconfig.r_bili_ck)) 50 | 51 | async def parse_opus(self, opus_id: int) -> tuple[list[str], str]: 52 | """解析动态信息 53 | 54 | Args: 55 | opus_id (int): 动态 id 56 | 57 | Returns: 58 | tuple[list[str], str]: 图片 url 列表和动态信息 59 | """ 60 | from bilibili_api.opus import Opus 61 | 62 | opus = Opus(opus_id, self.credential) 63 | opus_info = await opus.get_info() 64 | if not isinstance(opus_info, dict): 65 | raise ParseException("获取动态信息失败") 66 | 67 | # 获取图片信息 68 | urls = await opus.get_images_raw_info() 69 | urls = [url["url"] for url in urls] 70 | 71 | dynamic = opus.turn_to_dynamic() 72 | dynamic_info: dict[str, Any] = await dynamic.get_info() 73 | orig_text = ( 74 | dynamic_info.get("item", {}) 75 | .get("modules", {}) 76 | .get("module_dynamic", {}) 77 | .get("major", {}) 78 | .get("opus", {}) 79 | .get("summary", {}) 80 | .get("rich_text_nodes", [{}])[0] 81 | .get("orig_text", "") 82 | ) 83 | return urls, orig_text 84 | 85 | async def parse_live(self, room_id: int) -> tuple[str, str, str]: 86 | """解析直播信息 87 | 88 | Args: 89 | room_id (int): 直播 id 90 | 91 | Returns: 92 | tuple[str, str, str]: 标题、封面、关键帧 93 | """ 94 | from bilibili_api.live import LiveRoom 95 | 96 | room = LiveRoom(room_display_id=room_id, credential=self.credential) 97 | room_info: dict[str, Any] = (await room.get_room_info())["room_info"] 98 | title, cover, keyframe = ( 99 | room_info["title"], 100 | room_info["cover"], 101 | room_info["keyframe"], 102 | ) 103 | return (title, cover, keyframe) 104 | 105 | async def parse_read(self, read_id: int) -> tuple[list[str], list[str]]: 106 | """专栏解析 107 | 108 | Args: 109 | read_id (int): 专栏 id 110 | 111 | Returns: 112 | list[str]: img url or text 113 | """ 114 | from bilibili_api.article import Article 115 | 116 | ar = Article(read_id) 117 | 118 | # 加载内容 119 | await ar.fetch_content() 120 | data = ar.json() 121 | 122 | def accumulate_text(node: dict): 123 | text = "" 124 | if "children" in node: 125 | for child in node["children"]: 126 | text += accumulate_text(child) + " " 127 | if _text := node.get("text"): 128 | text += _text if isinstance(_text, str) else str(_text) + node["url"] 129 | return text 130 | 131 | urls: list[str] = [] 132 | texts: list[str] = [] 133 | for node in data.get("children", []): 134 | node_type = node.get("type") 135 | if node_type == "ImageNode": 136 | if img_url := node.get("url", "").strip(): 137 | urls.append(img_url) 138 | # 补空串占位符 139 | texts.append("") 140 | elif node_type == "ParagraphNode": 141 | if text := accumulate_text(node).strip(): 142 | texts.append(text) 143 | elif node_type == "TextNode": 144 | if text := node.get("text", "").strip(): 145 | texts.append(text) 146 | return texts, urls 147 | 148 | async def parse_favlist(self, fav_id: int) -> tuple[list[str], list[str]]: 149 | """解析收藏夹信息 150 | 151 | Args: 152 | fav_id (int): 收藏夹 id 153 | 154 | Returns: 155 | tuple[list[str], list[str]]: 标题、封面、简介、链接 156 | """ 157 | from bilibili_api.favorite_list import get_video_favorite_list_content 158 | 159 | fav_list: dict[str, Any] = await get_video_favorite_list_content(fav_id) 160 | if fav_list["medias"] is None: 161 | raise ParseException("收藏夹内容为空, 或被风控") 162 | # 取前 50 个 163 | medias_50: list[dict[str, Any]] = fav_list["medias"][:50] 164 | texts: list[str] = [] 165 | urls: list[str] = [] 166 | for fav in medias_50: 167 | title, cover, intro, link = ( 168 | fav["title"], 169 | fav["cover"], 170 | fav["intro"], 171 | fav["link"], 172 | ) 173 | matched = re.search(r"\d+", link) 174 | if not matched: 175 | continue 176 | avid = matched.group(0) if matched else "" 177 | urls.append(cover) 178 | texts.append(f"🧉 标题:{title}\n📝 简介:{intro}\n🔗 链接:{link}\nhttps://bilibili.com/video/av{avid}") 179 | return texts, urls 180 | 181 | def parse_video(self, *, bvid: str | None = None, avid: int | None = None) -> Video: 182 | """解析视频信息 183 | 184 | Args: 185 | bvid (str | None): bvid 186 | avid (int | None): avid 187 | """ 188 | if avid: 189 | return Video(aid=avid, credential=self.credential) 190 | elif bvid: 191 | return Video(bvid=bvid, credential=self.credential) 192 | else: 193 | raise ParseException("avid 和 bvid 至少指定一项") 194 | 195 | async def parse_video_info( 196 | self, 197 | *, 198 | bvid: str | None = None, 199 | avid: int | None = None, 200 | page_num: int = 1, 201 | ) -> BilibiliVideoInfo: 202 | """解析视频信息 203 | 204 | Args: 205 | bvid (str | None): bvid 206 | avid (int | None): avid 207 | page_num (int): 页码 208 | """ 209 | 210 | video = self.parse_video(bvid=bvid, avid=avid) 211 | video_info: dict[str, Any] = await video.get_info() 212 | 213 | video_duration: int = int(video_info["duration"]) 214 | 215 | display_info: str = "" 216 | cover_url: str | None = None 217 | title: str = video_info["title"] 218 | # 处理分 p 219 | page_idx = page_num - 1 220 | if (pages := video_info.get("pages")) and len(pages) > 1: 221 | assert isinstance(pages, list) 222 | # 取模防止数组越界 223 | page_idx = page_idx % len(pages) 224 | p_video = pages[page_idx] 225 | # 获取分集时长 226 | video_duration = int(p_video.get("duration", video_duration)) 227 | # 获取分集标题 228 | if p_name := p_video.get("part").strip(): 229 | title += f"\n分集: {p_name}" 230 | # 获取分集封面 231 | if first_frame_url := p_video.get("first_frame"): 232 | cover_url = first_frame_url 233 | else: 234 | page_idx = 0 235 | 236 | # 获取下载链接 237 | video_url, audio_url = await self.parse_video_download_url(video=video, page_index=page_idx) 238 | # 获取在线观看人数 239 | online = await video.get_online() 240 | 241 | display_info = ( 242 | f"{self._extra_bili_info(video_info)}\n" 243 | f"📝 简介:{video_info['desc']}\n" 244 | f"🏄‍♂️ {online['total']} 人正在观看,{online['count']} 人在网页端观看" 245 | ) 246 | ai_summary: str = "未配置 ck 无法使用 AI 总结" 247 | # 获取 AI 总结 248 | if self.credential: 249 | cid = await video.get_cid(page_idx) 250 | ai_conclusion = await video.get_ai_conclusion(cid) 251 | ai_summary = ai_conclusion.get("model_result", {"summary": ""}).get("summary", "").strip() 252 | ai_summary = f"AI总结: {ai_summary}" if ai_summary else "该视频暂不支持AI总结" 253 | 254 | return BilibiliVideoInfo( 255 | title=title, 256 | display_info=display_info, 257 | cover_url=cover_url if cover_url else video_info["pic"], 258 | video_url=video_url, 259 | audio_url=audio_url, 260 | video_duration=video_duration, 261 | ai_summary=ai_summary, 262 | ) 263 | 264 | async def parse_video_download_url( 265 | self, *, video: Video | None = None, bvid: str | None = None, avid: int | None = None, page_index: int = 0 266 | ) -> tuple[str, str]: 267 | """解析视频下载链接 268 | 269 | Args: 270 | bvid (str | None): bvid 271 | avid (int | None): avid 272 | page_index (int): 页索引 = 页码 - 1 273 | """ 274 | 275 | from bilibili_api.video import ( 276 | AudioStreamDownloadURL, 277 | VideoDownloadURLDataDetecter, 278 | VideoQuality, 279 | VideoStreamDownloadURL, 280 | ) 281 | 282 | if video is None: 283 | video = self.parse_video(bvid=bvid, avid=avid) 284 | # 获取下载数据 285 | download_url_data = await video.get_download_url(page_index=page_index) 286 | detecter = VideoDownloadURLDataDetecter(download_url_data) 287 | streams = detecter.detect_best_streams(video_max_quality=VideoQuality._1080P, no_dolby_video=True, no_hdr=True) 288 | video_stream = streams[0] 289 | if not isinstance(video_stream, VideoStreamDownloadURL): 290 | raise ParseException("未找到可下载的视频流") 291 | logger.debug(f"视频流质量: {video_stream.video_quality.name}") 292 | audio_stream = streams[1] 293 | if not isinstance(audio_stream, AudioStreamDownloadURL): 294 | return video_stream.url, "" 295 | logger.debug(f"音频流质量: {audio_stream.audio_quality.name}") 296 | return video_stream.url, audio_stream.url 297 | 298 | def _extra_bili_info(self, video_info: dict[str, Any]) -> str: 299 | """ 300 | 格式化视频信息 301 | """ 302 | # 获取视频统计数据 303 | video_state: dict[str, Any] = video_info["stat"] 304 | 305 | # 定义需要展示的数据及其显示名称 306 | stats_mapping = [ 307 | ("👍", "like"), 308 | ("🪙", "coin"), 309 | ("⭐", "favorite"), 310 | ("↩️", "share"), 311 | ("💬", "reply"), 312 | ("👀", "view"), 313 | ("💭", "danmaku"), 314 | ] 315 | 316 | # 构建结果字符串 317 | result_parts = [] 318 | for display_name, stat_key in stats_mapping: 319 | value = video_state[stat_key] 320 | # 数值超过10000时转换为万为单位 321 | formatted_value = f"{value / 10000:.1f}万" if value > 10000 else str(value) 322 | result_parts.append(f"{display_name} {formatted_value}") 323 | 324 | return " ".join(result_parts) 325 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/parsers/data.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | 3 | from ..constant import ANDROID_HEADER as ANDROID_HEADER 4 | from ..constant import COMMON_HEADER as COMMON_HEADER 5 | from ..constant import IOS_HEADER as IOS_HEADER 6 | 7 | 8 | @dataclass 9 | class ParseResult: 10 | """解析结果""" 11 | 12 | # 标题 13 | title: str 14 | 15 | # 作者 16 | author: str = "" 17 | 18 | # 封面地址 19 | cover_url: str = "" 20 | 21 | # 视频地址 22 | video_url: str = "" 23 | 24 | # 音频地址 25 | audio_url: str = "" 26 | 27 | # 图片地址 28 | pic_urls: list[str] = field(default_factory=list) 29 | 30 | # 动态视频地址 31 | dynamic_urls: list[str] = field(default_factory=list) 32 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/parsers/douyin.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | from typing import Any 4 | 5 | import aiohttp 6 | from nonebot import logger 7 | 8 | from ..exception import ParseException 9 | from .data import ANDROID_HEADER, IOS_HEADER, ParseResult 10 | from .utils import get_redirect_url 11 | 12 | 13 | class DouyinParser: 14 | def __init__(self): 15 | self.ios_headers = IOS_HEADER.copy() 16 | self.android_headers = {"Accept": "application/json, text/plain, */*", **ANDROID_HEADER} 17 | 18 | def _build_iesdouyin_url(self, _type: str, video_id: str) -> str: 19 | return f"https://www.iesdouyin.com/share/{_type}/{video_id}" 20 | 21 | def _build_m_douyin_url(self, _type: str, video_id: str) -> str: 22 | return f"https://m.douyin.com/share/{_type}/{video_id}" 23 | 24 | async def parse_share_url(self, share_url: str) -> ParseResult: 25 | if matched := re.match(r"(video|note)/([0-9]+)", share_url): 26 | # https://www.douyin.com/video/xxxxxx 27 | _type, video_id = matched.group(1), matched.group(2) 28 | iesdouyin_url = self._build_iesdouyin_url(_type, video_id) 29 | else: 30 | # https://v.douyin.com/xxxxxx 31 | iesdouyin_url = await get_redirect_url(share_url) 32 | # https://www.iesdouyin.com/share/video/7468908569061100857/?region=CN&mid=0&u_ 33 | matched = re.search(r"(slides|video|note)/(\d+)", iesdouyin_url) 34 | if not matched: 35 | raise ParseException(f"无法从 {share_url} 中解析出 ID") 36 | _type, video_id = matched.group(1), matched.group(2) 37 | if _type == "slides": 38 | return await self.parse_slides(video_id) 39 | for url in [ 40 | self._build_m_douyin_url(_type, video_id), 41 | share_url, 42 | iesdouyin_url, 43 | ]: 44 | try: 45 | return await self.parse_video(url) 46 | except ParseException as e: 47 | logger.warning(f"failed to parse {url[:60]}, error: {e}") 48 | continue 49 | except Exception as e: 50 | logger.warning(f"failed to parse {url[:60]}, unknown error: {e}") 51 | continue 52 | raise ParseException("作品已删除,或资源直链获取失败, 请稍后再试") 53 | 54 | async def parse_video(self, url: str) -> ParseResult: 55 | async with aiohttp.ClientSession() as session: 56 | async with session.get(url, headers=self.ios_headers, ssl=False) as response: 57 | response.raise_for_status() 58 | text = await response.text() 59 | data: dict[str, Any] = self._format_response(text) 60 | # 获取图集图片地址 61 | images: list[str] = [] 62 | # 如果data含有 images,并且 images 是一个列表 63 | if "images" in data and isinstance(data["images"], list): 64 | # 获取每个图片的url_list中的第一个元素,非空时添加到images列表中 65 | for img in data["images"]: 66 | assert isinstance(img, dict) 67 | if ( 68 | "url_list" in img 69 | and isinstance(img["url_list"], list) 70 | and len(img["url_list"]) > 0 71 | and len(img["url_list"][0]) > 0 72 | ): 73 | images.append(img["url_list"][0]) 74 | 75 | # 获取视频播放地址 76 | video_url: str = data["video"]["play_addr"]["url_list"][0].replace("playwm", "play") 77 | 78 | if video_url: 79 | # 获取重定向后的mp4视频地址 80 | video_url = await get_redirect_url(video_url) 81 | 82 | share_info = ParseResult( 83 | title=data["desc"], 84 | cover_url=data["video"]["cover"]["url_list"][0], 85 | pic_urls=images, 86 | video_url=video_url, 87 | author=data["author"]["nickname"], 88 | # author=Author( 89 | # # uid=data["author"]["sec_uid"], 90 | # name=data["author"]["nickname"], 91 | # avatar=data["author"]["avatar_thumb"]["url_list"][0], 92 | # ), 93 | ) 94 | return share_info 95 | 96 | def _format_response(self, text: str) -> dict[str, Any]: 97 | pattern = re.compile( 98 | pattern=r"window\._ROUTER_DATA\s*=\s*(.*?)", 99 | flags=re.DOTALL, 100 | ) 101 | find_res = pattern.search(text) 102 | 103 | if not find_res or not find_res.group(1): 104 | raise ParseException("can't find _ROUTER_DATA in html") 105 | 106 | json_data = json.loads(find_res.group(1).strip()) 107 | 108 | # 获取链接返回json数据进行视频和图集判断,如果指定类型不存在,抛出异常 109 | # 返回的json数据中,视频字典类型为 video_(id)/page 110 | VIDEO_ID_PAGE_KEY = "video_(id)/page" 111 | # 返回的json数据中,视频字典类型为 note_(id)/page 112 | NOTE_ID_PAGE_KEY = "note_(id)/page" 113 | if VIDEO_ID_PAGE_KEY in json_data["loaderData"]: 114 | original_video_info = json_data["loaderData"][VIDEO_ID_PAGE_KEY]["videoInfoRes"] 115 | elif NOTE_ID_PAGE_KEY in json_data["loaderData"]: 116 | original_video_info = json_data["loaderData"][NOTE_ID_PAGE_KEY]["videoInfoRes"] 117 | else: 118 | raise ParseException("failed to parse Videos or Photo Gallery info from json") 119 | 120 | # 如果没有视频信息,获取并抛出异常 121 | if len(original_video_info["item_list"]) == 0: 122 | err_msg = "failed to parse video info from HTML" 123 | if len(filter_list := original_video_info["filter_list"]) > 0: 124 | err_msg = filter_list[0]["detail_msg"] or filter_list[0]["filter_reason"] 125 | raise ParseException(err_msg) 126 | 127 | return original_video_info["item_list"][0] 128 | 129 | async def parse_slides(self, video_id: str) -> ParseResult: 130 | url = "https://www.iesdouyin.com/web/api/v2/aweme/slidesinfo/" 131 | params = { 132 | "aweme_ids": f"[{video_id}]", 133 | "request_source": "200", 134 | } 135 | async with aiohttp.ClientSession() as session: 136 | async with session.get(url, params=params, headers=self.android_headers, ssl=False) as resp: 137 | resp.raise_for_status() 138 | resp = await resp.json() 139 | detail = resp.get("aweme_details") 140 | if not detail: 141 | raise ParseException("can't find aweme_details in json") 142 | data = detail[0] 143 | title = data.get("share_info").get("share_desc_info") 144 | images = [] 145 | dynamic_images = [] 146 | for image in data.get("images"): 147 | video = image.get("video") 148 | if video: 149 | dynamic_images.append(video["play_addr"]["url_list"][0]) 150 | else: 151 | images.append(image["url_list"][0]) 152 | 153 | return ParseResult( 154 | title=title, 155 | cover_url="", 156 | author=data["author"]["nickname"], 157 | # author=Author( 158 | # name=data["author"]["nickname"], 159 | # avatar=data["author"]["avatar_thumb"]["url_list"][0], 160 | # ), 161 | pic_urls=images, 162 | dynamic_urls=dynamic_images, 163 | ) 164 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/parsers/kuaishou.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import urllib.parse 4 | 5 | import aiohttp 6 | 7 | from ..constant import COMMON_HEADER, IOS_HEADER 8 | from ..exception import ParseException 9 | from .data import ParseResult 10 | from .utils import get_redirect_url 11 | 12 | 13 | class KuaishouParser: 14 | """快手解析器""" 15 | 16 | def __init__(self): 17 | self.headers = COMMON_HEADER 18 | self.v_headers = { 19 | **IOS_HEADER, 20 | "Referer": "https://v.kuaishou.com/", 21 | } 22 | # 通用第三方解析API 23 | self.api_url = "http://47.99.158.118/video-crack/v2/parse?content={}" 24 | 25 | async def parse_url(self, url: str) -> ParseResult: 26 | """解析快手链接获取视频信息 27 | 28 | Args: 29 | url: 快手视频链接 30 | 31 | Returns: 32 | ParseResult: 快手视频信息 33 | """ 34 | location_url = await get_redirect_url(url, headers=self.v_headers) 35 | 36 | if len(location_url) <= 0: 37 | raise ParseException("failed to get location url from url") 38 | 39 | # /fw/long-video/ 返回结果不一样, 统一替换为 /fw/photo/ 请求 40 | location_url = location_url.replace("/fw/long-video/", "/fw/photo/") 41 | 42 | async with aiohttp.ClientSession() as session: 43 | async with session.get(location_url, headers=self.v_headers) as resp: 44 | resp.raise_for_status() 45 | response_text = await resp.text() 46 | 47 | pattern = r"window\.INIT_STATE\s*=\s*(.*?)" 48 | searched = re.search(pattern, response_text) 49 | 50 | if not searched or len(searched.groups()) < 1: 51 | raise ParseException("failed to parse video JSON info from HTML") 52 | 53 | json_text = searched.group(1).strip() 54 | try: 55 | json_data = json.loads(json_text) 56 | except json.JSONDecodeError as e: 57 | raise ParseException("failed to parse INIT_STATE payload") from e 58 | 59 | photo_data = {} 60 | for json_item in json_data.values(): 61 | if "result" in json_item and "photo" in json_item: 62 | photo_data = json_item 63 | break 64 | 65 | if not photo_data: 66 | raise ParseException("failed to parse photo info from INIT_STATE") 67 | 68 | # 判断result状态 69 | if (result_code := photo_data["result"]) != 1: 70 | raise ParseException(f"获取作品信息失败: {result_code}") 71 | 72 | data = photo_data["photo"] 73 | 74 | # 获取视频地址 75 | video_url = "" 76 | if "mainMvUrls" in data and len(data["mainMvUrls"]) > 0: 77 | video_url = data["mainMvUrls"][0]["url"] 78 | 79 | # 获取图集 80 | ext_params_atlas = data.get("ext_params", {}).get("atlas", {}) 81 | atlas_cdn_list = ext_params_atlas.get("cdn", []) 82 | atlas_list = ext_params_atlas.get("list", []) 83 | images = [] 84 | if len(atlas_cdn_list) > 0 and len(atlas_list) > 0: 85 | for atlas in atlas_list: 86 | images.append(f"https://{atlas_cdn_list[0]}/{atlas}") 87 | 88 | video_info = ParseResult( 89 | video_url=video_url, 90 | cover_url=data["coverUrls"][0]["url"], 91 | title=data["caption"], 92 | author=data["userName"], 93 | pic_urls=images, 94 | ) 95 | return video_info 96 | 97 | async def parse_url_by_api(self, url: str) -> ParseResult: 98 | """解析快手链接获取视频信息 99 | 100 | Args: 101 | url: 快手视频链接 102 | 103 | Returns: 104 | ParseResult: 快手视频信息 105 | """ 106 | video_id = await self._extract_video_id(url) 107 | if not video_id: 108 | raise ParseException("无法从链接中提取视频ID") 109 | 110 | # 构造标准链接格式,用于API解析 111 | standard_url = f"https://www.kuaishou.com/short-video/{video_id}" 112 | # URL编码content参数避免查询字符串无效 113 | encoded_url = urllib.parse.quote(standard_url) 114 | api_url = self.api_url.format(encoded_url) 115 | 116 | async with aiohttp.ClientSession() as session: 117 | async with session.get(api_url, headers=self.headers) as resp: 118 | if resp.status != 200: 119 | raise ParseException(f"解析API返回错误状态码: {resp.status}") 120 | 121 | result = await resp.json() 122 | 123 | # 根据API返回示例,成功时code应为0 124 | if result.get("code") != 0 or not result.get("data"): 125 | raise ParseException(f"解析API返回错误: {result.get('msg', '未知错误')}") 126 | 127 | data = result["data"] 128 | video_url = data.get("url") 129 | if not video_url: 130 | raise ParseException("未获取到视频直链") 131 | 132 | return ParseResult( 133 | # 字段名称与回退值 134 | title=data.get("title", "未知标题"), 135 | cover_url=data.get("imageUrl", ""), 136 | video_url=video_url, 137 | # API可能不提供作者信息 138 | author=data.get("name", "无名"), 139 | ) 140 | 141 | async def _extract_video_id(self, url: str) -> str: 142 | """提取视频ID 143 | 144 | Args: 145 | url: 快手视频链接 146 | 147 | Returns: 148 | str: 视频ID 149 | """ 150 | # 处理可能的短链接 151 | if "v.kuaishou.com" in url: 152 | url = await get_redirect_url(url) 153 | 154 | # 提取视频ID - 使用walrus operator和索引替代group() 155 | if "/fw/photo/" in url and (matched := re.search(r"/fw/photo/([^/?]+)", url)): 156 | return matched.group(1) 157 | elif "short-video" in url and (matched := re.search(r"short-video/([^/?]+)", url)): 158 | return matched.group(1) 159 | 160 | raise ParseException("无法从链接中提取视频ID") 161 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/parsers/kugou.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import aiohttp 4 | 5 | from ..exception import ParseException 6 | from .data import COMMON_HEADER, ParseResult 7 | 8 | 9 | class KuGouParser: 10 | async def parse_share_url(self, share_url: str) -> ParseResult: 11 | """解析酷狗分享链接""" 12 | # https://t1.kugou.com/song.html?id=1hfw6baEmV3 13 | async with aiohttp.ClientSession() as session: 14 | async with session.get(share_url, headers=COMMON_HEADER, ssl=False) as response: 15 | response.raise_for_status() 16 | html_text = await response.text() 17 | # 土坡上的狗尾草_卢润泽_高音质在线 18 | matched = re.search(r"<title>(.+)_高音质在线", html_text) 19 | if not matched: 20 | raise ParseException("无法获取歌曲名") 21 | 22 | title = matched.group(1).replace("_", " ") 23 | 24 | api_url = f"https://www.hhlqilongzhu.cn/api/dg_kugouSQ.php?msg={title}&n=1&type=json" 25 | async with aiohttp.ClientSession() as session: 26 | async with session.get(api_url, headers=COMMON_HEADER) as response: 27 | if response.status != 200: 28 | raise ParseException(f"无法获取歌曲信息: {response.status}") 29 | song_info = await response.json() 30 | 31 | return ParseResult( 32 | title=song_info.get("title"), 33 | cover_url=song_info.get("cover"), 34 | audio_url=song_info.get("music_url"), 35 | author=song_info["singer"], 36 | ) 37 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/parsers/ncm.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import aiohttp 4 | from nonebot import logger 5 | from nonebot.exception import FinishedException 6 | 7 | from ..exception import ParseException 8 | from .data import COMMON_HEADER, ParseResult 9 | from .utils import get_redirect_url 10 | 11 | 12 | class NCMParser: 13 | """ 14 | 网易云音乐解析器 15 | """ 16 | 17 | def __init__(self): 18 | self.short_url_pattern = re.compile(r"(http:|https:)\/\/163cn\.tv\/([a-zA-Z0-9]+)") 19 | 20 | async def parse_ncm(self, ncm_url: str): 21 | if matched := self.short_url_pattern.search(ncm_url): 22 | ncm_url = matched.group(0) 23 | ncm_url = await get_redirect_url(ncm_url) 24 | 25 | # 获取网易云歌曲id 26 | matched = re.search(r"\?id=(\d+)", ncm_url) 27 | if not matched: 28 | logger.warning(f"无效网易云链接: {ncm_url}, 忽略") 29 | raise FinishedException 30 | ncm_id = matched.group(1) 31 | 32 | # 对接临时接口 33 | try: 34 | async with aiohttp.ClientSession() as session: 35 | async with session.get( 36 | f"https://www.hhlqilongzhu.cn/api/dg_wyymusic.php?id={ncm_id}&br=7&type=json", headers=COMMON_HEADER 37 | ) as resp: 38 | resp.raise_for_status() 39 | ncm_vip_data = await resp.json() 40 | ncm_music_url, ncm_cover, ncm_singer, ncm_title = ( 41 | ncm_vip_data.get(key) for key in ["music_url", "cover", "singer", "title"] 42 | ) 43 | except Exception as e: 44 | raise ParseException(f"网易云音乐解析失败: {e}") 45 | 46 | return ParseResult( 47 | title=ncm_title, 48 | author=ncm_singer, 49 | cover_url=ncm_cover, 50 | audio_url=ncm_music_url, 51 | ) 52 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/parsers/utils.py: -------------------------------------------------------------------------------- 1 | async def get_redirect_url(url: str, headers: dict[str, str] | None = None) -> str: 2 | import aiohttp 3 | 4 | from .data import COMMON_HEADER 5 | 6 | """获取重定向后的URL""" 7 | async with aiohttp.ClientSession() as session: 8 | async with session.get(url, headers=headers or COMMON_HEADER, allow_redirects=False, ssl=False) as response: 9 | response.raise_for_status() 10 | return response.headers.get("Location", url) 11 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/parsers/weibo.py: -------------------------------------------------------------------------------- 1 | import math 2 | import re 3 | 4 | import aiohttp 5 | 6 | from ..constant import COMMON_HEADER 7 | from ..exception import ParseException 8 | from .data import ParseResult 9 | 10 | 11 | class WeiBoParser: 12 | async def parse_share_url(self, share_url: str) -> ParseResult: 13 | """解析微博分享链接""" 14 | # https://video.weibo.com/show?fid=1034:5145615399845897 15 | if match := re.search(r"https://video\.weibo\.com/show\?fid=(\d+:\d+)", share_url): 16 | return await self.parse_fid(match.group(1)) 17 | # https://m.weibo.cn/detail/4976424138313924 18 | elif match := re.search(r"m\.weibo\.cn(?:/detail|/status)?/([A-Za-z\d]+)", share_url): 19 | weibo_id = match.group(1) 20 | # https://weibo.com/tv/show/1034:5007449447661594?mid=5007452630158934 21 | elif match := re.search(r"mid=([A-Za-z\d]+)", share_url): 22 | weibo_id = self._mid2id(match.group(1)) 23 | # https://weibo.com/1707895270/5006106478773472 24 | elif match := re.search(r"(?<=weibo.com/)[A-Za-z\d]+/([A-Za-z\d]+)", share_url): 25 | weibo_id = match.group(1) 26 | # 无法获取到id则返回失败信息 27 | else: 28 | raise ParseException("无法获取到微博的 id") 29 | 30 | return await self.parse_weibo_id(weibo_id) 31 | 32 | async def parse_fid(self, fid: str) -> ParseResult: 33 | """ 34 | 解析带 fid 的微博视频 35 | """ 36 | req_url = f"https://h5.video.weibo.com/api/component?page=/show/{fid}" 37 | headers = { 38 | "Referer": f"https://h5.video.weibo.com/show/{fid}", 39 | "Content-Type": "application/x-www-form-urlencoded", 40 | **COMMON_HEADER, 41 | } 42 | post_content = 'data={"Component_Play_Playinfo":{"oid":"' + fid + '"}}' 43 | async with aiohttp.ClientSession() as session: 44 | async with session.post(req_url, headers=headers, data=post_content) as response: 45 | response.raise_for_status() 46 | json_data = await response.json() 47 | data = json_data["data"]["Component_Play_Playinfo"] 48 | 49 | video_url = data["stream_url"] 50 | if len(data["urls"]) > 0: 51 | # stream_url码率最低,urls中第一条码率最高 52 | _, first_mp4_url = next(iter(data["urls"].items())) 53 | video_url = f"https:{first_mp4_url}" 54 | 55 | video_info = ParseResult( 56 | video_url=video_url, 57 | cover_url="https:" + data["cover_image"], 58 | title=data["title"], 59 | author=data["author"], 60 | # author=Author( 61 | # # uid=str(data["user"]["id"]), 62 | # name=data["author"], 63 | # avatar="https:" + data["avatar"], 64 | # ), 65 | ) 66 | return video_info 67 | 68 | async def parse_weibo_id(self, weibo_id: str) -> ParseResult: 69 | """解析微博 id""" 70 | headers = { 71 | "accept": "application/json", 72 | "cookie": "_T_WM=40835919903; WEIBOCN_FROM=1110006030; MLOGIN=0; XSRF-TOKEN=4399c8", 73 | "Referer": f"https://m.weibo.cn/detail/{weibo_id}", 74 | **COMMON_HEADER, 75 | } 76 | 77 | # 请求数据 78 | async with aiohttp.ClientSession() as session: 79 | async with session.get(f"https://m.weibo.cn/statuses/show?id={weibo_id}", headers=headers) as resp: 80 | if resp.status != 200: 81 | raise ParseException(f"获取数据失败 {resp.status} {resp.reason}") 82 | if "application/json" not in resp.headers.get("content-type", ""): 83 | raise ParseException("获取数据失败 content-type is not application/json") 84 | resp = await resp.json() 85 | 86 | weibo_data = resp["data"] 87 | text, status_title, source, region_name, pics, page_info = ( 88 | weibo_data.get(key) 89 | for key in [ 90 | "text", 91 | "status_title", 92 | "source", 93 | "region_name", 94 | "pics", 95 | "page_info", 96 | ] 97 | ) 98 | video_url = "" 99 | # 图集 100 | if pics: 101 | pics = [x["large"]["url"] for x in pics] 102 | else: 103 | videos = page_info.get("urls") 104 | video_url: str = videos.get("mp4_720p_mp4") or videos.get("mp4_hd_mp4") if videos else "" 105 | 106 | return ParseResult( 107 | author=source, 108 | cover_url="", 109 | title=f"{re.sub(r'<[^>]+>', '', text)}\n{status_title}\n{source}\t{region_name if region_name else ''}", 110 | video_url=video_url, 111 | pic_urls=pics, 112 | ) 113 | 114 | def _base62_encode(self, number: int) -> str: 115 | """将数字转换为 base62 编码""" 116 | alphabet = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 117 | if number == 0: 118 | return "0" 119 | 120 | result = "" 121 | while number > 0: 122 | result = alphabet[number % 62] + result 123 | number //= 62 124 | 125 | return result 126 | 127 | def _mid2id(self, mid: str) -> str: 128 | """将微博 mid 转换为 id""" 129 | mid = str(mid)[::-1] # 反转输入字符串 130 | size = math.ceil(len(mid) / 7) # 计算每个块的大小 131 | result = [] 132 | 133 | for i in range(size): 134 | # 对每个块进行处理并反转 135 | s = mid[i * 7 : (i + 1) * 7][::-1] 136 | # 将字符串转为整数后进行 base62 编码 137 | s = self._base62_encode(int(s)) 138 | # 如果不是最后一个块并且长度不足4位,进行左侧补零操作 139 | if i < size - 1 and len(s) < 4: 140 | s = "0" * (4 - len(s)) + s 141 | result.append(s) 142 | 143 | result.reverse() # 反转结果数组 144 | return "".join(result) # 将结果数组连接成字符串 145 | -------------------------------------------------------------------------------- /nonebot_plugin_resolver2/parsers/xiaohongshu.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | from urllib.parse import parse_qs, urlparse 4 | 5 | import aiohttp 6 | 7 | from ..config import rconfig 8 | from ..constant import COMMON_HEADER 9 | from ..exception import ParseException 10 | from .data import ParseResult 11 | from .utils import get_redirect_url 12 | 13 | 14 | class XiaoHongShuParser: 15 | def __init__(self): 16 | self.headers = { 17 | "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8," 18 | "application/signed-exchange;v=b3;q=0.9", 19 | **COMMON_HEADER, 20 | } 21 | if rconfig.r_xhs_ck: 22 | self.headers["cookie"] = rconfig.r_xhs_ck 23 | 24 | async def parse_url(self, url: str) -> ParseResult: 25 | """解析小红书 URL 26 | 27 | Args: 28 | url (str): 小红书 URL 29 | 30 | Returns: 31 | ParseResult: 解析结果 32 | 33 | Raises: 34 | ParseException: 小红书分享链接不完整 35 | ParseException: 小红书 cookie 可能已失效 36 | """ 37 | # 处理 xhslink 短链 38 | if "xhslink" in url: 39 | url = await get_redirect_url(url, self.headers) 40 | # ?: 非捕获组 41 | pattern = r"(?:/explore/|/discovery/item/|source=note¬eId=)(\w+)" 42 | matched = re.search(pattern, url) 43 | if not matched: 44 | raise ParseException("小红书分享链接不完整") 45 | xhs_id = matched.group(1) 46 | # 解析 URL 参数 47 | parsed_url = urlparse(url) 48 | params = parse_qs(parsed_url.query) 49 | # 提取 xsec_source 和 xsec_token 50 | xsec_source = params.get("xsec_source", [None])[0] or "pc_feed" 51 | xsec_token = params.get("xsec_token", [None])[0] 52 | async with aiohttp.ClientSession() as session: 53 | async with session.get( 54 | f"https://www.xiaohongshu.com/explore/{xhs_id}?xsec_source={xsec_source}&xsec_token={xsec_token}", 55 | headers=self.headers, 56 | ) as resp: 57 | html = await resp.text() 58 | 59 | pattern = r"window.__INITIAL_STATE__=(.*?)</script>" 60 | matched = re.search(pattern, html) 61 | if not matched: 62 | raise ParseException("小红书 cookie 可能已失效") 63 | 64 | json_str = matched.group(1) 65 | json_str = json_str.replace("undefined", "null") 66 | json_obj = json.loads(json_str) 67 | try: 68 | note_data = json_obj["note"]["noteDetailMap"][xhs_id]["note"] 69 | except KeyError: 70 | raise ParseException("小红书 cookie 可能已失效") 71 | # 资源类型 normal 图,video 视频 72 | resource_type = note_data["type"] 73 | # 标题 74 | note_title = note_data["title"] 75 | # 描述 76 | note_desc = note_data["desc"] 77 | title_desc = f"{note_title}\n{note_desc}" 78 | img_urls = [] 79 | video_url = "" 80 | if resource_type == "normal": 81 | image_list = note_data["imageList"] 82 | img_urls = [item["urlDefault"] for item in image_list] 83 | elif resource_type == "video": 84 | video_url = note_data["video"]["media"]["stream"]["h264"][0]["masterUrl"] 85 | else: 86 | raise ParseException(f"不支持的小红书链接类型: {resource_type}") 87 | return ParseResult( 88 | title=title_desc, 89 | cover_url="", 90 | video_url=video_url, 91 | pic_urls=img_urls, 92 | author=note_data["user"]["nickname"], 93 | ) 94 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "nonebot-plugin-resolver2" 3 | version = "1.9.2" 4 | description = "NoneBot2 链接分享解析器自动解析, BV号/链接/小程序/卡片 | B站/抖音/网易云/微博/小红书/youtube/tiktok/twitter/acfun" 5 | authors = [{ "name" = "fllesser", "email" = "fllessive@gmail.com" }] 6 | urls = { Repository = "https://github.com/fllesser/nonebot-plugin-resolver2" } 7 | readme = "README.md" 8 | requires-python = ">=3.10" 9 | keywords = [ 10 | "nonebot", 11 | "nonebot2", 12 | "resolver", 13 | "bilibili", 14 | "youtube", 15 | "tiktok", 16 | "twitter", 17 | ] 18 | dependencies = [ 19 | "aiohttp>=3.10.5,<4.0.0", 20 | "curl_cffi>=0.8.0,<1.0.0", 21 | "tqdm>=4.67.1,<5.0.0", 22 | "aiofiles>=24.1.0", 23 | "yt-dlp>=2025.5.22", 24 | "nonebot2>=2.4.2,<3.0.0", 25 | "nonebot-adapter-onebot>=2.4.6,<3.0.0", 26 | "nonebot-plugin-localstore>=0.7.4,<1.0.0", 27 | "nonebot-plugin-apscheduler>=0.5.0,<1.0.0", 28 | "bilibili-api-python>=17.2.1,<18.0.0", 29 | ] 30 | 31 | 32 | [dependency-groups] 33 | dev = ["nonebot2[fastapi]>=2.4.2,<3.0.0", "ruff>=0.11.12,<1.0.0"] 34 | 35 | test = [ 36 | "nonebot2[fastapi]>=2.4.2,<3.0.0", 37 | "nonebug>=0.3.7,<1.0.0", 38 | "pytest-xdist>=3.6.1,<4.0.0", 39 | "pytest-asyncio>=1.0.0,<1.1.0", 40 | ] 41 | 42 | 43 | [tool.nonebot] 44 | plugins = ["nonebot_plugin_resolver2"] 45 | 46 | 47 | [tool.pytest.ini_options] 48 | asyncio_mode = "auto" 49 | asyncio_default_fixture_loop_scope = "session" 50 | pythonpath = [".", "nonebot_plugin_resolver2", "tests"] 51 | addopts = [ 52 | "-v", # 详细输出 53 | "-s", # 显示打印信息 54 | "--tb=short", # 简短的错误回溯 55 | "-ra", # 显示所有测试结果摘要 56 | "--strict-markers", # 严格标记模式 57 | # "--doctest-modules", # 运行文档测试 58 | "--import-mode=prepend", # 导入模式 59 | ] 60 | 61 | [tool.ruff] 62 | line-length = 120 63 | target-version = "py310" 64 | 65 | [tool.ruff.format] 66 | line-ending = "lf" 67 | 68 | [tool.ruff.lint] 69 | select = [ 70 | "F", # Pyflakes 71 | "W", # pycodestyle warnings 72 | "E", # pycodestyle errors 73 | "I", # isort 74 | "UP", # pyupgrade 75 | "ASYNC", # flake8-async 76 | "C4", # flake8-comprehensions 77 | "T10", # flake8-debugger 78 | "T20", # flake8-print 79 | "PYI", # flake8-pyi 80 | "PT", # flake8-pytest-style 81 | "Q", # flake8-quotes 82 | "TID", # flake8-tidy-imports 83 | "RUF", # Ruff-specific rules 84 | ] 85 | ignore = [ 86 | "E402", # module-import-not-at-top-of-file 87 | "UP037", # quoted-annotation 88 | "RUF001", # ambiguous-unicode-character-string 89 | "RUF002", # ambiguous-unicode-character-docstring 90 | "RUF003", # ambiguous-unicode-character-comment 91 | "W191", # indentation contains tabs 92 | # "I001", # isort: imports are incorrectly sorted 93 | "TID252", # 相对导入 94 | ] 95 | 96 | 97 | [tool.ruff.lint.isort] 98 | force-sort-within-sections = true 99 | known-first-party = ["tests/*"] 100 | extra-standard-library = ["typing_extensions"] 101 | 102 | [tool.ruff.lint.flake8-pytest-style] 103 | fixture-parentheses = false 104 | mark-parentheses = false 105 | 106 | [tool.ruff.lint.pyupgrade] 107 | keep-runtime-typing = true 108 | 109 | 110 | [tool.pyright] 111 | pythonVersion = "3.10" 112 | pythonPlatform = "All" 113 | defineConstant = { PYDANTIC_V2 = true } 114 | executionEnvironments = [ 115 | { root = "./tests", extraPaths = [ 116 | "./", 117 | ] }, 118 | { root = "./" }, 119 | ] 120 | typeCheckingMode = "standard" 121 | reportShadowedImports = false 122 | disableBytesTypePromotions = true 123 | -------------------------------------------------------------------------------- /test_url.md: -------------------------------------------------------------------------------- 1 | youtube: 2 | - https://youtu.be/EKkzbbLYPuI?si=K_S9zIp5g7DhigVz 3 | 4 | kugou: 5 | - https://www.kugou.com/mixsong/j3pry11.html 6 | 7 | kuaishou: 8 | 视频: 9 | - https://www.kuaishou.com/short-video/3xhjgcmir24m4nm 10 | - https://v.kuaishou.com/2yAnzeZ 11 | - https://v.m.chenzhongtech.com/fw/photo/3xburnkmj3auazc 12 | 图集: 13 | - https://v.kuaishou.com/2xZPkuV 14 | 15 | bilibili: 16 | - https://www.bilibili.com/opus/998440765151510535 17 | - https://www.bilibili.com/opus/1040093151889457152 18 | - https://www.bilibili.com/read/cv523868 19 | - https://space.bilibili.com/396886341/favlist?fid=311147541&ftype=create 20 | - https://live.bilibili.com/23585383 21 | - https://b23.tv/YTg9oSw 22 | - https://bili2233.cn/rnrwIyU 23 | - https://www.bilibili.com/video/BV1VLk9YDEzB 24 | - https://bilibili.com/av1234567 25 | - https://bilibili.com/BV1uCzoYEEir 26 | - BV1uCzoYEEir 27 | - av113706574811958 28 | 29 | 30 | 分p视频支持,未指定集数默认第一集 31 | - BV1584y167sD 40 32 | - av605821754 40 33 | - https://www.bilibili.com/video/BV1584y167sD?p=40 34 | 35 | 分p音频,仅BV号 36 | - bm BV1584y167sD 40 37 | 38 | tiktok: 39 | - https://www.tiktok.com/@xwc1897/video/7436010419002608928?is_from_webapp=1&sender_device=pc 40 | - https://www.tiktok.com/@mssethi.ss/video/7479816398814973227?is_from_webapp=1&sender_device=pc 41 | 42 | acfun: 43 | - https://www.acfun.cn/v/ac46593564 44 | 45 | ncm: 46 | - https://music.163.com/song?id=1948109333 47 | 48 | X: 49 | - https://x.com/fortnitegame/status/1870484479980052921?s=46 50 | - https://x.com/fortnitegame/status/1864640869900668940?s=46 51 | 52 | douyin: 53 | - 视频 54 | - https://v.douyin.com/iDHWnyTP 55 | - https://www.douyin.com/video/7440422807663660328 56 | - 普通图文 57 | - https://www.douyin.com/note/7469411074119322899 58 | - https://v.douyin.com/iP6Uu1Kh 59 | - 老视频,网页打开会重定向到 m.ixigua.com 60 | - https://v.douyin.com/iUrHrruH 61 | - 含视频的图集 62 | - https://v.douyin.com/CeiJfqyWs # 将会解析出视频 63 | - https://www.douyin.com/note/7450744229229235491 # 解析成普通图片 64 | 65 | xiaohongshu: 66 | - http://xhslink.com/a/WHdZNpdzwbl7 67 | - https://www.xiaohongshu.com/discovery/item/67c41945000000002802b2e2?source=webshare&xhsshare=pc_web&xsec_token=ABS6rGbAmdjNtTuLqfAB2aR0oDioMqDezM4Hx5EeDFGSI=&xsec_source=pc_share 68 | 69 | weibo: 70 | - https://video.weibo.com/show?fid=1034:5145615399845897 71 | - https://weibo.com/7207262816/P5kWdcfDe 72 | - https://weibo.com/7207262816/O70aCbjnd 73 | - http://m.weibo.cn/status/5112672433738061 -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from pytest_asyncio import is_async_test 5 | 6 | os.environ["ENVIRONMENT"] = "test" 7 | 8 | 9 | def pytest_collection_modifyitems(items: list[pytest.Item]): 10 | pytest_asyncio_tests = (item for item in items if is_async_test(item)) 11 | session_scope_marker = pytest.mark.asyncio(loop_scope="session") 12 | for async_test in pytest_asyncio_tests: 13 | async_test.add_marker(session_scope_marker, append=False) 14 | 15 | 16 | @pytest.fixture(scope="session", autouse=True) 17 | async def after_nonebot_init(after_nonebot_init: None): 18 | import nonebot 19 | from nonebot.adapters.onebot.v11 import Adapter as OnebotV11Adapter 20 | 21 | # 加载适配器 22 | driver = nonebot.get_driver() 23 | driver.register_adapter(OnebotV11Adapter) 24 | 25 | # 加载插件 26 | nonebot.load_from_toml("pyproject.toml") 27 | -------------------------------------------------------------------------------- /tests/test_acfun.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from nonebot import logger 4 | import pytest 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_parse_acfun_url(): 9 | from nonebot_plugin_resolver2.download import fmt_size 10 | from nonebot_plugin_resolver2.parsers import AcfunParser 11 | 12 | urls = ["https://www.acfun.cn/v/ac46593564", "https://www.acfun.cn/v/ac40867941"] 13 | acfun_parser = AcfunParser() 14 | 15 | async def parse_acfun_url(url: str) -> None: 16 | acid = int(url.split("/")[-1].split("ac")[1]) 17 | logger.info(f"{url} | 开始解析视频 acid: {acid}") 18 | m3u8s_url, video_desc = await acfun_parser.parse_url(url) 19 | assert m3u8s_url 20 | assert video_desc 21 | logger.debug(f"{url} | m3u8s_url: {m3u8s_url}, video_desc: {video_desc}") 22 | 23 | logger.info(f"{url} | 开始下载视频") 24 | video_file = await acfun_parser.download_video(m3u8s_url, acid) 25 | assert video_file 26 | logger.info(f"{url} | 视频下载成功, 视频{fmt_size(video_file)}") 27 | 28 | await asyncio.gather(*[parse_acfun_url(url) for url in urls]) 29 | -------------------------------------------------------------------------------- /tests/test_bilibili.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import re 3 | 4 | from nonebot import logger 5 | import pytest 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_bilibili_live(): 10 | logger.info("开始解析B站直播 https://live.bilibili.com/6") 11 | from nonebot_plugin_resolver2.parsers import BilibiliParser 12 | 13 | # https://live.bilibili.com/6 14 | room_id = 6 15 | bilibili_parser = BilibiliParser() 16 | title, cover, _ = await bilibili_parser.parse_live(room_id) 17 | assert title 18 | logger.debug(f"title: {title}") 19 | 20 | assert cover.startswith("https://i0.hdslb.com/") 21 | logger.debug(f"cover: {cover}") 22 | logger.success("B站直播解析成功") 23 | 24 | 25 | @pytest.mark.asyncio 26 | async def test_bilibili_read(): 27 | logger.info("开始解析B站图文 https://www.bilibili.com/read/cv523868") 28 | from nonebot_plugin_resolver2.parsers import BilibiliParser 29 | 30 | # https://www.bilibili.com/read/cv523868 31 | read_id = 523868 32 | bilibili_parser = BilibiliParser() 33 | texts, urls = await bilibili_parser.parse_read(read_id) 34 | assert texts 35 | logger.debug(f"texts: {texts}") 36 | 37 | assert urls 38 | logger.debug(f"urls: {urls}") 39 | logger.success("B站图文解析成功") 40 | 41 | 42 | @pytest.mark.asyncio 43 | async def test_bilibili_opus(): 44 | from nonebot_plugin_resolver2.download import download_imgs_without_raise 45 | from nonebot_plugin_resolver2.parsers import BilibiliParser 46 | 47 | opus_urls = [ 48 | "https://www.bilibili.com/opus/998440765151510535", 49 | "https://www.bilibili.com/opus/1040093151889457152", 50 | ] 51 | 52 | bilibili_parser = BilibiliParser() 53 | 54 | async def test_parse_opus(opus_url: str) -> None: 55 | matched = re.search(r"opus/(\d+)", opus_url) 56 | assert matched 57 | opus_id = int(matched.group(1)) 58 | logger.info(f"{opus_url} | 开始解析哔哩哔哩动态 opus_id: {opus_id}") 59 | 60 | pic_urls, orig_text = await bilibili_parser.parse_opus(opus_id) 61 | assert pic_urls 62 | logger.debug(f"{opus_url} | pic_urls: {pic_urls}") 63 | 64 | files = await download_imgs_without_raise(pic_urls) 65 | assert len(files) == len(pic_urls) 66 | 67 | assert orig_text 68 | logger.debug(f"{opus_url} | original_text: {orig_text}") 69 | 70 | await asyncio.gather(*[test_parse_opus(opus_url) for opus_url in opus_urls]) 71 | logger.success("B站动态解析成功") 72 | -------------------------------------------------------------------------------- /tests/test_bilibili_need_ck.py: -------------------------------------------------------------------------------- 1 | from nonebot import logger 2 | import pytest 3 | 4 | 5 | @pytest.mark.asyncio 6 | async def test_bilibili_favlist(): 7 | from nonebot_plugin_resolver2.download import download_imgs_without_raise 8 | from nonebot_plugin_resolver2.parsers import BilibiliParser 9 | 10 | logger.info("开始解析B站收藏夹 https://space.bilibili.com/396886341/favlist?fid=311147541&ftype=create") 11 | # https://space.bilibili.com/396886341/favlist?fid=311147541&ftype=create 12 | fav_id = 311147541 13 | bilibili_parser = BilibiliParser() 14 | texts, urls = await bilibili_parser.parse_favlist(fav_id) 15 | 16 | assert texts 17 | logger.debug(texts) 18 | 19 | assert urls 20 | logger.debug(urls) 21 | 22 | files = await download_imgs_without_raise(urls) 23 | assert len(files) == len(urls) 24 | logger.success("B站收藏夹解析成功") 25 | 26 | 27 | @pytest.mark.asyncio 28 | async def test_bilibili_video(): 29 | from nonebot_plugin_resolver2.parsers import BilibiliParser 30 | 31 | logger.info("开始解析B站视频 BV1VLk9YDEzB") 32 | bilibili_parser = BilibiliParser() 33 | video_info = await bilibili_parser.parse_video_info(bvid="BV1VLk9YDEzB") 34 | logger.debug(video_info) 35 | logger.success("B站视频 BV1VLk9YDEzB 解析成功") 36 | 37 | logger.info("开始解析B站视频 BV1584y167sD p40") 38 | video_info = await bilibili_parser.parse_video_info(bvid="BV1584y167sD", page_num=40) 39 | logger.debug(video_info) 40 | logger.success("B站视频 BV1584y167sD p40 解析成功") 41 | 42 | logger.info("开始解析B站视频 av605821754 p40") 43 | video_info = await bilibili_parser.parse_video_info(avid=605821754, page_num=40) 44 | logger.debug(video_info) 45 | logger.success("B站视频 av605821754 p40 解析成功") 46 | 47 | 48 | @pytest.mark.asyncio 49 | async def test_encode_h264_video(): 50 | import asyncio 51 | from pathlib import Path 52 | 53 | from bilibili_api import HEADERS 54 | 55 | from nonebot_plugin_resolver2.download import download_file_by_stream, encode_video_to_h264, merge_av 56 | from nonebot_plugin_resolver2.parsers import BilibiliParser 57 | 58 | bvid = "BV1VLk9YDEzB" 59 | bilibili_parser = BilibiliParser() 60 | video_url, audio_url = await bilibili_parser.parse_video_download_url(bvid=bvid) 61 | v_path, a_path = await asyncio.gather( 62 | download_file_by_stream(video_url, file_name=f"{bvid}-video.m4s", ext_headers=HEADERS), 63 | download_file_by_stream(audio_url, file_name=f"{bvid}-audio.m4s", ext_headers=HEADERS), 64 | ) 65 | 66 | video_path = Path(__file__).parent / f"{bvid}.mp4" 67 | await merge_av(v_path=v_path, a_path=a_path, output_path=video_path) 68 | video_h264_path = await encode_video_to_h264(video_path) 69 | assert not video_path.exists() 70 | assert video_h264_path.exists() 71 | 72 | 73 | async def test_no_audio_video(): 74 | from nonebot_plugin_resolver2.parsers import BilibiliParser 75 | 76 | bilibili_parser = BilibiliParser() 77 | 78 | video_url, _ = await bilibili_parser.parse_video_download_url(bvid="BV1gRjMziELt") 79 | logger.debug(f"video_url: {video_url}") 80 | -------------------------------------------------------------------------------- /tests/test_douyin.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from nonebot import logger 4 | import pytest 5 | from utils import skip_on_failure 6 | 7 | 8 | @pytest.mark.asyncio 9 | @skip_on_failure 10 | async def test_douyin_common_video(): 11 | """ 12 | 测试普通视频 13 | https://v.douyin.com/iDHWnyTP 14 | https://www.douyin.com/video/7440422807663660328 15 | """ 16 | from nonebot_plugin_resolver2.parsers import DouyinParser 17 | 18 | douyin_parser = DouyinParser() 19 | 20 | common_urls = [ 21 | "https://v.douyin.com/iDHWnyTP", 22 | "https://www.douyin.com/video/7440422807663660328", 23 | ] 24 | 25 | async def test_parse_share_url(url: str) -> None: 26 | logger.info(f"{url} | 开始解析抖音视频") 27 | video_info = await douyin_parser.parse_share_url(url) 28 | logger.debug(f"{url} | title: {video_info.title}") 29 | assert video_info.title 30 | logger.debug(f"{url} | author: {video_info.author}") 31 | assert video_info.author 32 | logger.debug(f"{url} | cover_url: {video_info.cover_url}") 33 | assert video_info.cover_url 34 | logger.debug(f"{url} | video_url: {video_info.video_url}") 35 | assert video_info.video_url 36 | logger.success(f"{url} | 抖音视频解析成功") 37 | 38 | await asyncio.gather(*[test_parse_share_url(url) for url in common_urls]) 39 | 40 | 41 | @pytest.mark.asyncio 42 | @skip_on_failure 43 | async def test_douyin_old_video(): 44 | """ 45 | 老视频,网页打开会重定向到 m.ixigua.com 46 | https://v.douyin.com/iUrHrruH 47 | """ 48 | 49 | # from nonebot_plugin_resolver2.parsers.douyin import DouYin 50 | 51 | # parser = DouYin() 52 | # # 该作品已删除,暂时忽略 53 | # url = "https://v.douyin.com/iUrHrruH" 54 | # logger.info(f"开始解析抖音西瓜视频 {url}") 55 | # video_info = await parser.parse_share_url(url) 56 | # logger.debug(f"title: {video_info.title}") 57 | # assert video_info.title 58 | # logger.debug(f"author: {video_info.author}") 59 | # assert video_info.author 60 | # logger.debug(f"cover_url: {video_info.cover_url}") 61 | # assert video_info.cover_url 62 | # logger.debug(f"video_url: {video_info.video_url}") 63 | # assert video_info.video_url 64 | # logger.success(f"抖音西瓜视频解析成功 {url}") 65 | 66 | 67 | @pytest.mark.asyncio 68 | @skip_on_failure 69 | async def test_douyin_note(): 70 | """ 71 | 测试普通图文 72 | https://www.douyin.com/note/7469411074119322899 73 | https://v.douyin.com/iP6Uu1Kh 74 | """ 75 | from nonebot_plugin_resolver2.parsers import DouyinParser 76 | 77 | douyin_parser = DouyinParser() 78 | 79 | note_urls = [ 80 | "https://www.douyin.com/note/7469411074119322899", 81 | "https://v.douyin.com/iP6Uu1Kh", 82 | ] 83 | 84 | async def test_parse_share_url(url: str) -> None: 85 | logger.info(f"{url} | 开始解析抖音图文") 86 | video_info = await douyin_parser.parse_share_url(url) 87 | logger.debug(f"{url} | title: {video_info.title}") 88 | assert video_info.title 89 | logger.debug(f"{url} | author: {video_info.author}") 90 | assert video_info.author 91 | logger.debug(f"{url} | cover_url: {video_info.cover_url}") 92 | assert video_info.cover_url 93 | logger.debug(f"{url} | images: {video_info.pic_urls}") 94 | assert video_info.pic_urls 95 | logger.success(f"{url} | 抖音图文解析成功") 96 | 97 | await asyncio.gather(*[test_parse_share_url(url) for url in note_urls]) 98 | 99 | 100 | @pytest.mark.asyncio 101 | @skip_on_failure 102 | async def test_douyin_slides(): 103 | """ 104 | 含视频的图集 105 | https://v.douyin.com/CeiJfqyWs # 将会解析出视频 106 | https://www.douyin.com/note/7450744229229235491 # 解析成普通图片 107 | """ 108 | from nonebot_plugin_resolver2.parsers import DouyinParser 109 | 110 | douyin_parser = DouyinParser() 111 | 112 | dynamic_image_url = "https://v.douyin.com/CeiJfqyWs" 113 | static_image_url = "https://www.douyin.com/note/7450744229229235491" 114 | 115 | logger.info(f"开始解析抖音图集(含视频解析出视频) {dynamic_image_url}") 116 | video_info = await douyin_parser.parse_share_url(dynamic_image_url) 117 | logger.debug(f"title: {video_info.title}") 118 | assert video_info.title 119 | logger.debug(f"dynamic_images: {video_info.dynamic_urls}") 120 | assert video_info.dynamic_urls 121 | logger.success(f"抖音图集(含视频解析出视频)解析成功 {dynamic_image_url}") 122 | 123 | logger.info(f"开始解析抖音图集(含视频解析出静态图片) {static_image_url}") 124 | video_info = await douyin_parser.parse_share_url(static_image_url) 125 | logger.debug(f"title: {video_info.title}") 126 | assert video_info.title 127 | logger.debug(f"images: {video_info.pic_urls}") 128 | assert video_info.pic_urls 129 | logger.success(f"抖音图集(含视频解析出静态图片)解析成功 {static_image_url}") 130 | 131 | 132 | @pytest.mark.asyncio 133 | @skip_on_failure 134 | async def test_douyin_oversea(): 135 | import aiohttp 136 | 137 | from nonebot_plugin_resolver2.constant import IOS_HEADER 138 | 139 | url = "https://m.douyin.com/share/note/7484675353898667274" 140 | async with aiohttp.ClientSession() as session: 141 | async with session.get(url, headers=IOS_HEADER) as response: 142 | # headers 143 | # logger.debug("headers") 144 | # for key, value in response.headers.items(): 145 | # logger.debug(f"{key}: {value}") 146 | logger.debug(f"status: {response.status}") 147 | response.raise_for_status() 148 | text = await response.text() 149 | assert "window._ROUTER_DATA" in text 150 | # logger.debug(text) 151 | -------------------------------------------------------------------------------- /tests/test_download.py: -------------------------------------------------------------------------------- 1 | from nonebot import logger 2 | 3 | 4 | def test_generate_file_name(): 5 | import random 6 | 7 | from nonebot_plugin_resolver2.download.utils import generate_file_name 8 | 9 | suffix_lst = [".jpg", ".png", ".gif", ".webp", ".jpeg", ".bmp", ".tiff", ".ico", ".svg", ".heic", ".heif"] 10 | # 测试 100 个链接 11 | for i in range(20): 12 | url = f"https://www.google.com/test{i}{random.choice(suffix_lst)}" 13 | file_name = generate_file_name(url) 14 | new_file_name = generate_file_name(url) 15 | assert file_name == new_file_name 16 | logger.info(f"{url}: {file_name}") 17 | 18 | 19 | def test_limited_size_dict(): 20 | from nonebot_plugin_resolver2.download.ytdlp import LimitedSizeDict 21 | 22 | limited_size_dict = LimitedSizeDict() 23 | for i in range(20): 24 | limited_size_dict[f"test{i}"] = f"test{i}" 25 | assert len(limited_size_dict) == 20 26 | for i in range(20): 27 | assert limited_size_dict[f"test{i}"] == f"test{i}" 28 | for i in range(20, 30): 29 | limited_size_dict[f"test{i}"] = f"test{i}" 30 | assert len(limited_size_dict) == 20 31 | -------------------------------------------------------------------------------- /tests/test_kuaishou.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from nonebot import logger 4 | import pytest 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_parse_by_api(): 9 | """测试快手视频解析 based on api""" 10 | from nonebot_plugin_resolver2.download import download_video, fmt_size 11 | from nonebot_plugin_resolver2.parsers import KuaishouParser 12 | 13 | parser = KuaishouParser() 14 | 15 | test_urls = [ 16 | "https://www.kuaishou.com/short-video/3xhjgcmir24m4nm", 17 | "https://v.kuaishou.com/2yAnzeZ", 18 | "https://v.m.chenzhongtech.com/fw/photo/3xburnkmj3auazc", 19 | ] 20 | 21 | async def test_parse_url(url: str) -> None: 22 | logger.info(f"{url} | 开始解析快手视频") 23 | video_info = await parser.parse_url_by_api(url) 24 | 25 | logger.debug(f"{url} | title: {video_info.title}") 26 | assert video_info.title, "视频标题为空" 27 | 28 | logger.debug(f"{url} | cover_url: {video_info.cover_url}") 29 | # assert video_info.cover_url, "视频封面URL为空" 30 | 31 | logger.debug(f"{url} | video_url: {video_info.video_url}") 32 | assert video_info.video_url, "视频URL为空" 33 | 34 | # 下载视频 35 | video_path = await download_video(video_info.video_url) 36 | logger.debug(f"{url} | 视频下载完成: {video_path}, 视频{fmt_size(video_path)}") 37 | 38 | if video_info.author: 39 | logger.debug(f"{url} | author: {video_info.author}") 40 | 41 | logger.success(f"{url} | 快手视频解析成功") 42 | 43 | await asyncio.gather(*[test_parse_url(url) for url in test_urls]) 44 | 45 | 46 | @pytest.mark.asyncio 47 | async def test_parse(): 48 | """测试快手视频解析""" 49 | from nonebot_plugin_resolver2.download import download_imgs_without_raise, download_video, fmt_size 50 | from nonebot_plugin_resolver2.parsers import KuaishouParser 51 | 52 | parser = KuaishouParser() 53 | 54 | test_urls = [ 55 | "https://www.kuaishou.com/short-video/3xhjgcmir24m4nm", # 视频 56 | "https://v.kuaishou.com/2yAnzeZ", # 视频 57 | "https://v.m.chenzhongtech.com/fw/photo/3xburnkmj3auazc", # 视频 58 | "https://v.kuaishou.com/2xZPkuV", # 图集 59 | ] 60 | 61 | async def test_parse_url(url: str) -> None: 62 | logger.info(f"{url} | 开始解析快手视频") 63 | video_info = await parser.parse_url(url) 64 | 65 | logger.debug(f"{url} | title: {video_info.title}") 66 | assert video_info.title, "视频标题为空" 67 | 68 | logger.debug(f"{url} | cover_url: {video_info.cover_url}") 69 | assert video_info.cover_url, "视频封面URL为空" 70 | 71 | if video_info.video_url: 72 | logger.debug(f"{url} | video_url: {video_info.video_url}") 73 | # 下载视频 74 | video_path = await download_video(video_info.video_url) 75 | logger.debug(f"{url} | 视频下载完成: {video_path}, 视频{fmt_size(video_path)}") 76 | 77 | if video_info.pic_urls: 78 | logger.debug(f"{url} | pic_urls: {video_info.pic_urls}") 79 | # 下载图片 80 | img_paths = await download_imgs_without_raise(video_info.pic_urls) 81 | logger.debug(f"{url} | 图片下载完成: {img_paths}") 82 | assert len(img_paths) == len(video_info.pic_urls), "图片下载数量不一致" 83 | 84 | if video_info.author: 85 | logger.debug(f"{url} | author: {video_info.author}") 86 | 87 | logger.success(f"{url} | 快手视频解析成功") 88 | 89 | await asyncio.gather(*[test_parse_url(url) for url in test_urls]) 90 | -------------------------------------------------------------------------------- /tests/test_load.py: -------------------------------------------------------------------------------- 1 | def test_load(): 2 | from nonebot import require 3 | 4 | assert require("nonebot_plugin_resolver2") 5 | -------------------------------------------------------------------------------- /tests/test_ncm.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from nonebot import logger 4 | from nonebot.exception import FinishedException 5 | import pytest 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_ncm(): 10 | from nonebot_plugin_resolver2.download import download_audio 11 | from nonebot_plugin_resolver2.parsers import NCMParser 12 | 13 | parser = NCMParser() 14 | 15 | urls = [ 16 | "https://st.music.163.com/listen-together/multishare/index.html?roomId=5766146a1616391e83da2c195811fb07_1744109168288&inviterUid=1868906482", 17 | "https://music.163.com/song?id=1948109333", 18 | ] 19 | 20 | async def test_parse_ncm(url: str) -> None: 21 | logger.info(f"{url} | 开始解析网易云音乐") 22 | try: 23 | result = await parser.parse_ncm(url) 24 | logger.debug(f"{url} | result: {result}") 25 | except FinishedException: 26 | logger.warning(f"{url} | 解析失败") 27 | return 28 | 29 | # 下载音频 30 | assert result.audio_url 31 | audio_path = await download_audio(result.audio_url) 32 | assert audio_path 33 | logger.debug(audio_path) 34 | logger.success(f"{url} | 网易云音乐解析成功") 35 | 36 | await asyncio.gather(*[test_parse_ncm(url) for url in urls]) 37 | -------------------------------------------------------------------------------- /tests/test_weibo.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from nonebot import logger 4 | import pytest 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_weibo_pics(): 9 | from nonebot_plugin_resolver2.download import download_imgs_without_raise, download_video 10 | from nonebot_plugin_resolver2.parsers import WeiBoParser 11 | 12 | weibo_parser = WeiBoParser() 13 | 14 | ext_headers = { 15 | "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", # noqa: E501 16 | "referer": "https://weibo.com/", 17 | } 18 | urls = [ 19 | "https://video.weibo.com/show?fid=1034:5145615399845897", 20 | "https://weibo.com/7207262816/P5kWdcfDe", 21 | "https://weibo.com/7207262816/O70aCbjnd", 22 | "http://m.weibo.cn/status/5112672433738061", 23 | "https://m.weibo.cn/status/5155768539808352", 24 | ] 25 | 26 | async def test_parse_share_url(url: str) -> None: 27 | logger.info(f"{url} | 开始解析微博") 28 | video_info = await weibo_parser.parse_share_url(url) 29 | logger.debug(f"{url} | 解析结果: {video_info}") 30 | assert video_info.video_url or video_info.pic_urls 31 | logger.success(f"{url} | 微博解析成功") 32 | if video_info.video_url: 33 | await download_video(video_info.video_url, ext_headers=ext_headers) 34 | logger.success(f"{url} | 微博视频下载成功") 35 | if video_info.pic_urls: 36 | files = await download_imgs_without_raise(video_info.pic_urls, ext_headers=ext_headers) 37 | assert len(files) == len(video_info.pic_urls) 38 | logger.success(f"{url} | 微博图片下载成功") 39 | 40 | await asyncio.gather(*[test_parse_share_url(url) for url in urls]) 41 | -------------------------------------------------------------------------------- /tests/test_x.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from nonebot import logger 4 | import pytest 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_x(): 9 | from nonebot_plugin_resolver2.download import download_imgs_without_raise, download_video 10 | from nonebot_plugin_resolver2.matchers.twitter import parse_x_url 11 | 12 | urls = [ 13 | "https://x.com/Fortnite/status/1904171341735178552", # 视频 14 | "https://x.com/Fortnite/status/1870484479980052921", # 单图 15 | "https://x.com/chitose_yoshino/status/1841416254810378314", # 多图 16 | ] 17 | 18 | async def parse_x_url_test(url: str): 19 | logger.info(f"开始解析推特 {url}") 20 | video_url, pic_urls = await parse_x_url(url) 21 | if video_url: 22 | logger.info(f"{url} | 解析为视频: {video_url}") 23 | video_path = await download_video(video_url) 24 | assert video_path.exists() 25 | logger.success(f"{url} | 视频解析并下载成功") 26 | if pic_urls: 27 | logger.info(f"{url} | 解析为图片: {pic_urls}") 28 | img_paths = await download_imgs_without_raise(pic_urls) 29 | assert len(img_paths) == len(pic_urls) 30 | for img_path in img_paths: 31 | assert img_path.exists() 32 | logger.success(f"{url} | 图片解析并下载成功") 33 | 34 | await asyncio.gather(*[parse_x_url_test(url) for url in urls]) 35 | -------------------------------------------------------------------------------- /tests/test_xhs.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from nonebot import logger 4 | from utils import skip_on_failure 5 | 6 | 7 | @skip_on_failure 8 | async def test_xiaohongshu(): 9 | """小红书解析测试""" 10 | # 需要 ck 才能解析, 暂时不测试 11 | from nonebot_plugin_resolver2.parsers import XiaoHongShuParser 12 | 13 | xhs_parser = XiaoHongShuParser() 14 | urls = [ 15 | "https://www.xiaohongshu.com/discovery/item/67cdaecd000000000b0153f8?source=webshare&xhsshare=pc_web&xsec_token=ABTvdTfbnDYQGDDB-aS-b3qgxOzsq22vIUcGzW6N5j8eQ=&xsec_source=pc_share", 16 | "https://www.xiaohongshu.com/explore/67ebf78f000000001c0050a1?app_platform=ios&app_version=8.77&share_from_user_hidden=true&xsec_source=app_share&type=normal&xsec_token=CBUGDKBemo2y6D0IIli9maqDaaazIQjzPrk2BVRi0FqLk=&author_share=1&xhsshare=QQ&shareRedId=N0pIOUc1PDk2NzUyOTgwNjY0OTdFNktO&apptime=1744081452&share_id=00207b217b7b472588141b083af74c7a", 17 | ] 18 | 19 | async def test_parse_url(url: str) -> None: 20 | logger.info(f"{url} | 开始解析小红书") 21 | parse_result = await xhs_parser.parse_url(url) 22 | assert parse_result.title 23 | logger.debug(f"{url} | title_desc: {parse_result.title}") 24 | assert parse_result.pic_urls or parse_result.video_url 25 | logger.debug(f"{url} | img_urls: {parse_result.pic_urls}") 26 | logger.debug(f"video_url: {parse_result.video_url}") 27 | logger.success(f"{url} | 小红书解析成功") 28 | 29 | await asyncio.gather(*[test_parse_url(url) for url in urls]) 30 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from nonebot import logger 2 | from nonebot.adapters.onebot.v11 import GroupMessageEvent, Message 3 | import pytest 4 | 5 | 6 | def make_onebot_msg(message: Message) -> GroupMessageEvent: 7 | from time import time 8 | 9 | from nonebot.adapters.onebot.v11.event import Sender 10 | 11 | event = GroupMessageEvent( 12 | time=int(time()), 13 | sub_type="normal", 14 | self_id=123456, 15 | post_type="message", 16 | message_type="group", 17 | message_id=12345623, 18 | user_id=1234567890, 19 | group_id=1234567890, 20 | raw_message=message.extract_plain_text(), 21 | message=message, 22 | original_message=message, 23 | sender=Sender(), 24 | font=123456, 25 | ) 26 | return event 27 | 28 | 29 | # 添加一个装饰器来跳过失败的测试 30 | def skip_on_failure(func): 31 | @pytest.mark.asyncio 32 | async def wrapper(*args, **kwargs): 33 | try: 34 | await func(*args, **kwargs) 35 | except Exception as e: 36 | logger.warning(f"测试 {func.__name__} 失败,已跳过: {e}") 37 | pytest.skip(f"测试失败: {e}") 38 | 39 | return wrapper 40 | --------------------------------------------------------------------------------