├── douyin_mcp_server ├── __main__.py ├── __init__.py └── server.py ├── .gitignore ├── pyproject.toml ├── README.md └── LICENSE /douyin_mcp_server/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | 抖音MCP服务器入口点 4 | """ 5 | 6 | from .server import main 7 | 8 | if __name__ == "__main__": 9 | main() -------------------------------------------------------------------------------- /douyin_mcp_server/__init__.py: -------------------------------------------------------------------------------- 1 | """抖音无水印链接提取 MCP 服务器""" 2 | 3 | __version__ = "1.2.0" 4 | __author__ = "yzfly" 5 | __email__ = "yz.liu.me@gmail.com" 6 | 7 | from .server import main 8 | 9 | __all__ = ["main"] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Virtual environments 24 | venv/ 25 | env/ 26 | ENV/ 27 | 28 | # IDE 29 | .vscode/ 30 | .idea/ 31 | *.swp 32 | *.swo 33 | 34 | # OS 35 | .DS_Store 36 | Thumbs.db 37 | 38 | # Testing 39 | .pytest_cache/ 40 | .coverage 41 | htmlcov/ 42 | 43 | # Distribution 44 | *.tar.gz 45 | *.whl 46 | 47 | .claude 48 | temp/ -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "douyin-mcp-server" 7 | version = "1.2.0" 8 | description = "MCP server for downloading Douyin videos and extracting text" 9 | readme = "README.md" 10 | license = {text = "MIT"} 11 | authors = [ 12 | {name = "yzfly", email = "yz.liu.me@gmail.com"} 13 | ] 14 | classifiers = [ 15 | "Development Status :: 3 - Alpha", 16 | "Intended Audience :: Developers", 17 | "License :: OSI Approved :: MIT License", 18 | "Programming Language :: Python :: 3", 19 | "Programming Language :: Python :: 3.10", 20 | "Programming Language :: Python :: 3.11", 21 | "Programming Language :: Python :: 3.12", 22 | ] 23 | requires-python = ">=3.10" 24 | dependencies = [ 25 | "mcp>=1.0.0", 26 | "requests", 27 | "ffmpeg-python", 28 | "tqdm", 29 | "dashscope", 30 | ] 31 | 32 | [project.urls] 33 | Homepage = "https://github.com/yzfly/douyin-mcp-server" 34 | Repository = "https://github.com/yzfly/douyin-mcp-server" 35 | Issues = "https://github.com/yzfly/douyin-mcp-server/issues" 36 | 37 | [project.scripts] 38 | douyin-mcp-server = "douyin_mcp_server.server:main" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 抖音无水印视频文本提取 MCP 服务器 2 | 3 | [![PyPI version](https://badge.fury.io/py/douyin-mcp-server.svg)](https://badge.fury.io/py/douyin-mcp-server) 4 | [![Python version](https://img.shields.io/pypi/pyversions/douyin-mcp-server.svg)](https://pypi.org/project/douyin-mcp-server/) 5 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 6 | 7 | 一个基于 Model Context Protocol (MCP) 的服务器,可以从抖音分享链接下载无水印视频,提取音频并转换为文本。 8 | 9 | 10 | douyin-mcp-server MCP server 11 | 12 | 13 | ## 📋 项目声明 14 | 15 | **官方文档地址:** https://github.com/yzfly/douyin-mcp-server 16 | 17 | 请以本项目的 [README.md](https://github.com/yzfly/douyin-mcp-server/blob/main/README.md) 文件为准,了解项目的功能特性、使用方法、API 配置说明等详细信息。 18 | 19 | **重要提醒:** 第三方平台如因自身 MCP Server 功能支持度限制而无法正常使用,请联系相应平台方。本项目不提供任何形式的技术支持或保证,用户需自行承担使用本项目可能产生的任何损失或损害。 20 | 21 | **法律声明:** 22 | 1. 本项目基于 Apache 2.0 协议发布 23 | 2. 本项目仅供学习和研究使用,不得用于任何违法或违规目的 24 | 3. 本项目的使用必须遵守相关法律法规 25 | 4. 本项目的作者和贡献者不对项目的任何部分承担法律责任 26 | 27 | ## ✨ 功能特性 28 | 29 | - 🎵 **无水印视频获取** - 从抖音分享链接获取高质量无水印视频 30 | - 🎧 **智能音频提取** - 自动从视频中提取音频内容 31 | - 📝 **AI 文本识别** - 使用先进的语音识别技术提取文本内容 32 | - 🧹 **自动清理** - 智能清理处理过程中的临时文件 33 | - 🔧 **灵活配置** - 支持自定义 API 配置,默认使用 [阿里云百炼 API](https://help.aliyun.com/zh/model-studio/get-api-key?) 34 | 35 | ## 🚀 快速开始 36 | 37 | ### 步骤 1:获取 API 密钥 38 | 39 | 前往 [阿里云百炼 API](https://help.aliyun.com/zh/model-studio/get-api-key?) 获取您的 `DASHSCOPE_API_KEY`: 40 | 41 | ![获取阿里云百炼API](https://files.mdnice.com/user/43439/36e658be-1ccf-41dd-87cf-d43fefde5c4e.png) 42 | 43 | ### 步骤 2:配置环境变量 44 | 45 | 在 Claude Desktop、Cherry Studio 等支持 MCP Server 的应用配置文件中添加以下配置: 46 | 47 | ```json 48 | { 49 | "mcpServers": { 50 | "douyin-mcp": { 51 | "command": "uvx", 52 | "args": ["douyin-mcp-server"], 53 | "env": { 54 | "DASHSCOPE_API_KEY": "sk-xxxx" 55 | } 56 | } 57 | } 58 | } 59 | ``` 60 | 61 | ### 步骤 3:开始使用 62 | 63 | 配置完成后,您就可以在支持的应用中正常调用 MCP 工具了。 64 | 65 | ## ⚙️ API 配置说明 66 | 67 | ### 当前版本(>= 1.2.0) 68 | 69 | 最新版本默认使用阿里云百炼 API,具有以下优势: 70 | - ✅ 识别效果更好 71 | - ✅ 处理速度更快 72 | - ✅ 本地资源消耗更小 73 | 74 | **配置步骤:** 75 | 1. 前往 [阿里云百炼](https://help.aliyun.com/zh/model-studio/get-api-key?) 开通 API 服务 76 | 2. 获取 API Key 并配置到环境变量 `DASHSCOPE_API_KEY` 中 77 | 78 | ### 旧版本兼容(<= 1.1.0) 79 | 80 | 如果您需要使用旧版本,请使用以下配置: 81 | 82 | ```json 83 | { 84 | "mcpServers": { 85 | "douyin-mcp": { 86 | "command": "uvx", 87 | "args": ["douyin-mcp-server@1.1.0"], 88 | "env": { 89 | "DOUYIN_API_KEY": "your-api-key-here" 90 | } 91 | } 92 | } 93 | } 94 | ``` 95 | 96 | **注意:** 旧版本使用硅基流动 API,需要在 [硅基流动](https://cloud.siliconflow.cn/i/TxUlXG3u) 注册账号并获取 API Key。 97 | 98 | 📖 [1.1.0 版本文档](https://pypi.org/project/douyin-mcp-server/1.1.0/) 99 | 100 | ## 🛠️ 工具说明 101 | 102 | ### `get_douyin_download_link` 103 | 104 | 获取抖音视频的无水印下载链接。 105 | 106 | **参数:** 107 | - `share_link` (string): 抖音分享链接或包含链接的文本 108 | 109 | **返回:** 110 | - JSON 格式的下载链接和视频信息 111 | 112 | **特点:** 无需 API 密钥即可使用 113 | 114 | ### `extract_douyin_text` 115 | 116 | 完整的文本提取工具,一站式完成视频到文本的转换。 117 | 118 | **处理流程:** 119 | 1. 解析抖音分享链接 120 | 2. 直接使用视频 URL 进行语音识别 121 | 3. 返回提取的文本内容 122 | 123 | **参数:** 124 | - `share_link` (string): 抖音分享链接或包含链接的文本 125 | - `model` (string, 可选): 语音识别模型,默认使用 `paraformer-v2` 126 | 127 | **环境变量要求:** 128 | - `DASHSCOPE_API_KEY`: 阿里云百炼 API 密钥(必需) 129 | 130 | ### `parse_douyin_video_info` 131 | 132 | 轻量级视频信息解析工具。 133 | 134 | **参数:** 135 | - `share_link` (string): 抖音分享链接 136 | 137 | **特点:** 仅解析视频基本信息,不下载视频文件 138 | 139 | ### 资源访问 140 | 141 | - `douyin://video/{video_id}`: 通过视频 ID 获取详细信息 142 | 143 | ## 📦 系统要求 144 | 145 | ### 运行环境 146 | - **Python**: 3.10 或更高版本 147 | 148 | ### 依赖库 149 | - `requests` - HTTP 请求处理 150 | - `ffmpeg-python` - 音视频处理 151 | - `tqdm` - 进度条显示 152 | - `mcp` - Model Context Protocol 支持 153 | - `dashscope` - 阿里云百炼 API 客户端 154 | 155 | ## ⚠️ 注意事项 156 | 157 | - 🔑 **API 密钥必需**:文本提取功能需要有效的阿里云百炼 API 密钥 158 | - 🆓 **部分功能免费**:获取下载链接功能无需 API 密钥 159 | - 📱 **格式支持**:支持大部分抖音视频格式 160 | - 🚀 **性能优化**:使用阿里云百炼 API 获得更快更准确的识别效果 161 | 162 | ## 🔧 开发指南 163 | 164 | ### 本地开发环境搭建 165 | 166 | ```bash 167 | # 克隆项目 168 | git clone https://github.com/yzfly/douyin-mcp-server.git 169 | cd douyin-mcp-server 170 | 171 | # 安装依赖(开发模式) 172 | pip install -e . 173 | ``` 174 | 175 | ### 运行测试 176 | 177 | ```bash 178 | # 启动服务器进行测试 179 | python -m douyin_mcp_server.server 180 | ``` 181 | 182 | ### Claude Desktop 本地开发配置 183 | 184 | 在 Claude Desktop 配置文件中添加本地开发配置: 185 | 186 | ```json 187 | { 188 | "mcpServers": { 189 | "douyin-mcp": { 190 | "command": "uv", 191 | "args": [ 192 | "run", 193 | "--directory", 194 | "/path/to/your/douyin-mcp-server", 195 | "python", 196 | "-m", 197 | "douyin_mcp_server" 198 | ], 199 | "env": { 200 | "DASHSCOPE_API_KEY": "your-api-key-here" 201 | } 202 | } 203 | } 204 | } 205 | ``` 206 | 207 | ## ⚠️ 免责声明 208 | 209 | ### 使用风险 210 | - 使用者对本项目的使用完全自主决定,并自行承担所有风险 211 | - 作者对使用者因使用本项目而产生的任何损失、责任或风险概不负责 212 | 213 | ### 代码质量 214 | - 本项目基于现有知识和技术开发,作者努力确保代码的正确性和安全性 215 | - 但不保证代码完全没有错误或缺陷,使用者需自行评估和测试 216 | 217 | ### 第三方依赖 218 | - 本项目依赖的第三方库、插件或服务遵循各自的开源或商业许可 219 | - 使用者需自行查阅并遵守相应协议 220 | - 作者不对第三方组件的稳定性、安全性及合规性承担责任 221 | 222 | ### 法律合规 223 | - 使用者必须自行研究相关法律法规,确保使用行为合法合规 224 | - 任何违反法律法规导致的法律责任和风险,均由使用者自行承担 225 | - 禁止使用本工具从事任何侵犯知识产权的行为 226 | - 开发者不参与、不支持、不认可任何非法内容的获取或分发 227 | 228 | ### 数据处理 229 | - 本项目不对使用者的数据收集、存储、传输等处理活动的合规性承担责任 230 | - 使用者应自行遵守相关法律法规,确保数据处理行为合法正当 231 | 232 | ### 责任限制 233 | - 使用者不得将项目作者、贡献者或相关方与使用行为联系起来 234 | - 不得要求作者对使用项目产生的任何损失或损害负责 235 | - 基于本项目的二次开发、修改或编译程序与原作者无关 236 | 237 | ### 知识产权 238 | - 本项目不授予使用者任何专利许可 239 | - 若使用本项目导致专利纠纷或侵权,使用者自行承担全部风险和责任 240 | - 未经书面授权,不得用于商业宣传、推广或再授权 241 | 242 | ### 服务终止 243 | - 作者保留随时终止向违反声明的使用者提供服务的权利 244 | - 可能要求违规使用者销毁已获取的代码及衍生作品 245 | - 作者保留在不另行通知的情况下更新本声明的权利 246 | 247 | **⚠️ 重要提醒:在使用本项目前,请认真阅读并完全理解上述免责声明。如有疑问或不同意任何条款,请勿使用本项目。继续使用即视为完全接受上述声明并自愿承担所有风险和后果。** 248 | 249 | ## 📄 许可证 250 | 251 | Apache License 2.0 252 | 253 | ## 👨‍💻 作者 254 | 255 | - **yzfly** - [yz.liu.me@gmail.com](mailto:yz.liu.me@gmail.com) 256 | - GitHub: [https://github.com/yzfly](https://github.com/yzfly) 257 | 258 | ## 🤝 贡献 259 | 260 | 欢迎提交 Issue 和 Pull Request!我们期待您的参与和贡献。 261 | 262 | ## 📝 更新日志 263 | 264 | ### v1.2.0 (最新) 265 | - 🚀 **性能提升**:更快、更准确的视频文案提取 266 | - 🔄 **API 升级**:切换到阿里云百炼 API,显著提升识别准确率 267 | - 🔧 **配置更新**:环境变量从 `DOUYIN_API_KEY` 更新为 `DASHSCOPE_API_KEY` 268 | 269 | ### v1.1.0 270 | - 🐛 **问题修复**:修复提取视频时文件名过长导致的错误 271 | 272 | ### v1.0.0 273 | - 🎉 **首次发布**:初始版本 274 | - ✨ **核心功能**:支持抖音视频文本提取 275 | - 🔗 **链接获取**:支持获取无水印视频下载链接 276 | - 🔐 **环境配置**:从环境变量读取 API 密钥 277 | - 🧹 **自动清理**:自动清理临时文件 278 | - ⚙️ **灵活配置**:支持自定义 API 配置 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control 57 | systems, and issue tracking systems that are managed by, or on behalf 58 | of, the Licensor for the purpose of discussing and improving the Work, 59 | but excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright notice to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. When redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2025 yzfly 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /douyin_mcp_server/server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | 抖音无水印视频下载并提取文本的 MCP 服务器 4 | 5 | 该服务器提供以下功能: 6 | 1. 解析抖音分享链接获取无水印视频链接 7 | 2. 下载视频并提取音频 8 | 3. 从音频中提取文本内容 9 | 4. 自动清理中间文件 10 | """ 11 | 12 | import os 13 | import re 14 | import json 15 | import requests 16 | import tempfile 17 | import asyncio 18 | from pathlib import Path 19 | from typing import Optional, Tuple 20 | import ffmpeg 21 | from tqdm.asyncio import tqdm 22 | from urllib import request 23 | from http import HTTPStatus 24 | import dashscope 25 | 26 | from mcp.server.fastmcp import FastMCP 27 | from mcp.server.fastmcp import Context 28 | 29 | 30 | # 创建 MCP 服务器实例 31 | mcp = FastMCP("Douyin MCP Server", 32 | dependencies=["requests", "ffmpeg-python", "tqdm", "dashscope"]) 33 | 34 | # 请求头,模拟移动端访问 35 | HEADERS = { 36 | 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) EdgiOS/121.0.2277.107 Version/17.0 Mobile/15E148 Safari/604.1' 37 | } 38 | 39 | # 默认 API 配置 40 | DEFAULT_MODEL = "paraformer-v2" 41 | 42 | 43 | class DouyinProcessor: 44 | """抖音视频处理器""" 45 | 46 | def __init__(self, api_key: str, model: Optional[str] = None): 47 | self.api_key = api_key 48 | self.model = model or DEFAULT_MODEL 49 | self.temp_dir = Path(tempfile.mkdtemp()) 50 | # 设置阿里云百炼API密钥 51 | dashscope.api_key = api_key 52 | 53 | def __del__(self): 54 | """清理临时目录""" 55 | import shutil 56 | if hasattr(self, 'temp_dir') and self.temp_dir.exists(): 57 | shutil.rmtree(self.temp_dir, ignore_errors=True) 58 | 59 | def parse_share_url(self, share_text: str) -> dict: 60 | """从分享文本中提取无水印视频链接""" 61 | # 提取分享链接 62 | urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', share_text) 63 | if not urls: 64 | raise ValueError("未找到有效的分享链接") 65 | 66 | share_url = urls[0] 67 | share_response = requests.get(share_url, headers=HEADERS) 68 | video_id = share_response.url.split("?")[0].strip("/").split("/")[-1] 69 | share_url = f'https://www.iesdouyin.com/share/video/{video_id}' 70 | 71 | # 获取视频页面内容 72 | response = requests.get(share_url, headers=HEADERS) 73 | response.raise_for_status() 74 | 75 | pattern = re.compile( 76 | pattern=r"window\._ROUTER_DATA\s*=\s*(.*?)", 77 | flags=re.DOTALL, 78 | ) 79 | find_res = pattern.search(response.text) 80 | 81 | if not find_res or not find_res.group(1): 82 | raise ValueError("从HTML中解析视频信息失败") 83 | 84 | # 解析JSON数据 85 | json_data = json.loads(find_res.group(1).strip()) 86 | VIDEO_ID_PAGE_KEY = "video_(id)/page" 87 | NOTE_ID_PAGE_KEY = "note_(id)/page" 88 | 89 | if VIDEO_ID_PAGE_KEY in json_data["loaderData"]: 90 | original_video_info = json_data["loaderData"][VIDEO_ID_PAGE_KEY]["videoInfoRes"] 91 | elif NOTE_ID_PAGE_KEY in json_data["loaderData"]: 92 | original_video_info = json_data["loaderData"][NOTE_ID_PAGE_KEY]["videoInfoRes"] 93 | else: 94 | raise Exception("无法从JSON中解析视频或图集信息") 95 | 96 | data = original_video_info["item_list"][0] 97 | 98 | # 获取视频信息 99 | video_url = data["video"]["play_addr"]["url_list"][0].replace("playwm", "play") 100 | desc = data.get("desc", "").strip() or f"douyin_{video_id}" 101 | 102 | # 替换文件名中的非法字符 103 | desc = re.sub(r'[\\/:*?"<>|]', '_', desc) 104 | 105 | return { 106 | "url": video_url, 107 | "title": desc, 108 | "video_id": video_id 109 | } 110 | 111 | async def download_video(self, video_info: dict, ctx: Context) -> Path: 112 | """异步下载视频到临时目录""" 113 | filename = f"{video_info['video_id']}.mp4" 114 | filepath = self.temp_dir / filename 115 | 116 | ctx.info(f"正在下载视频: {video_info['title']}") 117 | 118 | response = requests.get(video_info['url'], headers=HEADERS, stream=True) 119 | response.raise_for_status() 120 | 121 | # 获取文件大小 122 | total_size = int(response.headers.get('content-length', 0)) 123 | 124 | # 异步下载文件,显示进度 125 | with open(filepath, 'wb') as f: 126 | downloaded = 0 127 | for chunk in response.iter_content(chunk_size=8192): 128 | if chunk: 129 | f.write(chunk) 130 | downloaded += len(chunk) 131 | if total_size > 0: 132 | progress = downloaded / total_size 133 | await ctx.report_progress(downloaded, total_size) 134 | 135 | ctx.info(f"视频下载完成: {filepath}") 136 | return filepath 137 | 138 | def extract_audio(self, video_path: Path) -> Path: 139 | """从视频文件中提取音频""" 140 | audio_path = video_path.with_suffix('.mp3') 141 | 142 | try: 143 | ( 144 | ffmpeg 145 | .input(str(video_path)) 146 | .output(str(audio_path), acodec='libmp3lame', q=0) 147 | .run(capture_stdout=True, capture_stderr=True, overwrite_output=True) 148 | ) 149 | return audio_path 150 | except Exception as e: 151 | raise Exception(f"提取音频时出错: {str(e)}") 152 | 153 | def extract_text_from_video_url(self, video_url: str) -> str: 154 | """从视频URL中提取文字(使用阿里云百炼API)""" 155 | try: 156 | # 发起异步转录任务 157 | task_response = dashscope.audio.asr.Transcription.async_call( 158 | model=self.model, 159 | file_urls=[video_url], 160 | language_hints=['zh', 'en'] 161 | ) 162 | 163 | # 等待转录完成 164 | transcription_response = dashscope.audio.asr.Transcription.wait( 165 | task=task_response.output.task_id 166 | ) 167 | 168 | if transcription_response.status_code == HTTPStatus.OK: 169 | # 获取转录结果 170 | for transcription in transcription_response.output['results']: 171 | url = transcription['transcription_url'] 172 | result = json.loads(request.urlopen(url).read().decode('utf8')) 173 | 174 | # 保存结果到临时文件 175 | temp_json_path = self.temp_dir / 'transcription.json' 176 | with open(temp_json_path, 'w') as f: 177 | json.dump(result, f, indent=4, ensure_ascii=False) 178 | 179 | # 提取文本内容 180 | if 'transcripts' in result and len(result['transcripts']) > 0: 181 | return result['transcripts'][0]['text'] 182 | else: 183 | return "未识别到文本内容" 184 | 185 | else: 186 | raise Exception(f"转录失败: {transcription_response.output.message}") 187 | 188 | except Exception as e: 189 | raise Exception(f"提取文字时出错: {str(e)}") 190 | 191 | def cleanup_files(self, *file_paths: Path): 192 | """清理指定的文件""" 193 | for file_path in file_paths: 194 | if file_path.exists(): 195 | file_path.unlink() 196 | 197 | 198 | @mcp.tool() 199 | def get_douyin_download_link(share_link: str) -> str: 200 | """ 201 | 获取抖音视频的无水印下载链接 202 | 203 | 参数: 204 | - share_link: 抖音分享链接或包含链接的文本 205 | 206 | 返回: 207 | - 包含下载链接和视频信息的JSON字符串 208 | """ 209 | try: 210 | processor = DouyinProcessor("") # 获取下载链接不需要API密钥 211 | video_info = processor.parse_share_url(share_link) 212 | 213 | return json.dumps({ 214 | "status": "success", 215 | "video_id": video_info["video_id"], 216 | "title": video_info["title"], 217 | "download_url": video_info["url"], 218 | "description": f"视频标题: {video_info['title']}", 219 | "usage_tip": "可以直接使用此链接下载无水印视频" 220 | }, ensure_ascii=False, indent=2) 221 | 222 | except Exception as e: 223 | return json.dumps({ 224 | "status": "error", 225 | "error": f"获取下载链接失败: {str(e)}" 226 | }, ensure_ascii=False, indent=2) 227 | 228 | 229 | @mcp.tool() 230 | async def extract_douyin_text( 231 | share_link: str, 232 | model: Optional[str] = None, 233 | ctx: Context = None 234 | ) -> str: 235 | """ 236 | 从抖音分享链接提取视频中的文本内容 237 | 238 | 参数: 239 | - share_link: 抖音分享链接或包含链接的文本 240 | - model: 语音识别模型(可选,默认使用paraformer-v2) 241 | 242 | 返回: 243 | - 提取的文本内容 244 | 245 | 注意: 需要设置环境变量 DASHSCOPE_API_KEY 246 | """ 247 | try: 248 | # 从环境变量获取API密钥 249 | api_key = os.getenv('DASHSCOPE_API_KEY') 250 | if not api_key: 251 | raise ValueError("未设置环境变量 DASHSCOPE_API_KEY,请在配置中添加阿里云百炼API密钥") 252 | 253 | processor = DouyinProcessor(api_key, model) 254 | 255 | # 解析视频链接 256 | ctx.info("正在解析抖音分享链接...") 257 | video_info = processor.parse_share_url(share_link) 258 | 259 | # 直接使用视频URL进行文本提取 260 | ctx.info("正在从视频中提取文本...") 261 | text_content = processor.extract_text_from_video_url(video_info['url']) 262 | 263 | ctx.info("文本提取完成!") 264 | return text_content 265 | 266 | except Exception as e: 267 | ctx.error(f"处理过程中出现错误: {str(e)}") 268 | raise Exception(f"提取抖音视频文本失败: {str(e)}") 269 | 270 | 271 | @mcp.tool() 272 | def parse_douyin_video_info(share_link: str) -> str: 273 | """ 274 | 解析抖音分享链接,获取视频基本信息 275 | 276 | 参数: 277 | - share_link: 抖音分享链接或包含链接的文本 278 | 279 | 返回: 280 | - 视频信息(JSON格式字符串) 281 | """ 282 | try: 283 | processor = DouyinProcessor("") # 不需要API密钥来解析链接 284 | video_info = processor.parse_share_url(share_link) 285 | 286 | return json.dumps({ 287 | "video_id": video_info["video_id"], 288 | "title": video_info["title"], 289 | "download_url": video_info["url"], 290 | "status": "success" 291 | }, ensure_ascii=False, indent=2) 292 | 293 | except Exception as e: 294 | return json.dumps({ 295 | "status": "error", 296 | "error": str(e) 297 | }, ensure_ascii=False, indent=2) 298 | 299 | 300 | @mcp.resource("douyin://video/{video_id}") 301 | def get_video_info(video_id: str) -> str: 302 | """ 303 | 获取指定视频ID的详细信息 304 | 305 | 参数: 306 | - video_id: 抖音视频ID 307 | 308 | 返回: 309 | - 视频详细信息 310 | """ 311 | share_url = f"https://www.iesdouyin.com/share/video/{video_id}" 312 | try: 313 | processor = DouyinProcessor("") 314 | video_info = processor.parse_share_url(share_url) 315 | return json.dumps(video_info, ensure_ascii=False, indent=2) 316 | except Exception as e: 317 | return f"获取视频信息失败: {str(e)}" 318 | 319 | 320 | @mcp.prompt() 321 | def douyin_text_extraction_guide() -> str: 322 | """抖音视频文本提取使用指南""" 323 | return """ 324 | # 抖音视频文本提取使用指南 325 | 326 | ## 功能说明 327 | 这个MCP服务器可以从抖音分享链接中提取视频的文本内容,以及获取无水印下载链接。 328 | 329 | ## 环境变量配置 330 | 请确保设置了以下环境变量: 331 | - `DASHSCOPE_API_KEY`: 阿里云百炼API密钥 332 | 333 | ## 使用步骤 334 | 1. 复制抖音视频的分享链接 335 | 2. 在Claude Desktop配置中设置环境变量 DASHSCOPE_API_KEY 336 | 3. 使用相应的工具进行操作 337 | 338 | ## 工具说明 339 | - `extract_douyin_text`: 完整的文本提取流程(需要API密钥) 340 | - `get_douyin_download_link`: 获取无水印视频下载链接(无需API密钥) 341 | - `parse_douyin_video_info`: 仅解析视频基本信息 342 | - `douyin://video/{video_id}`: 获取指定视频的详细信息 343 | 344 | ## Claude Desktop 配置示例 345 | ```json 346 | { 347 | "mcpServers": { 348 | "douyin-mcp": { 349 | "command": "uvx", 350 | "args": ["douyin-mcp-server"], 351 | "env": { 352 | "DASHSCOPE_API_KEY": "your-dashscope-api-key-here" 353 | } 354 | } 355 | } 356 | } 357 | ``` 358 | 359 | ## 注意事项 360 | - 需要提供有效的阿里云百炼API密钥(通过环境变量) 361 | - 使用阿里云百炼的paraformer-v2模型进行语音识别 362 | - 支持大部分抖音视频格式 363 | - 获取下载链接无需API密钥 364 | """ 365 | 366 | 367 | def main(): 368 | """启动MCP服务器""" 369 | mcp.run() 370 | 371 | 372 | if __name__ == "__main__": 373 | main() --------------------------------------------------------------------------------