├── .gitignore ├── requirements.txt ├── config.json ├── __init__.py ├── install_dependencies.py ├── README.md ├── verify_installation.py ├── troubleshoot.py ├── qwen_text_node.py ├── qwen_vision_node.py ├── qwen_video_node.py ├── qwen_detection_node.py └── qwen_image_node.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / cache 2 | __pycache__/ 3 | *.py[cod] 4 | *.class 5 | 6 | # macOS 7 | .DS_Store 8 | 9 | # Env / tokens / outputs 10 | .qwen_token 11 | *.jpg 12 | *.png 13 | *.jpeg 14 | 15 | # Editors 16 | .vscode/ 17 | .idea/ 18 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Qwen-Image ComfyUI Plugin Requirements 2 | 3 | # Core dependencies (usually already available in ComfyUI) 4 | requests>=2.25.0 5 | pillow>=8.0.0 6 | torch>=1.9.0 7 | numpy>=1.20.0 8 | 9 | # Additional dependencies for vision functionality 10 | openai>=1.0.0 11 | 12 | # Dependencies for object detection functionality 13 | torchvision 14 | matplotlib 15 | 16 | # Network and proxy support 17 | httpx[socks]>=0.24.0 18 | socksio>=1.0.0 19 | 20 | # Optional dependencies for enhanced functionality 21 | # pydantic-settings # For advanced configuration management -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "default_model": "Qwen/Qwen-Image", 3 | "timeout": 720, 4 | "image_download_timeout": 30, 5 | "default_prompt": "A beautiful landscape", 6 | "default_negative_prompt": "", 7 | "default_width": 512, 8 | "default_height": 512, 9 | "default_seed": -1, 10 | "default_steps": 30, 11 | "default_guidance": 7.5, 12 | "default_chat_model": "Qwen/Qwen3-Coder-480B-A35B-Chat", 13 | "default_chat_prompt": "你好,请介绍一下你自己。", 14 | "default_text_model": "Qwen/Qwen3-Coder-480B-A35B-Instruct", 15 | "default_system_prompt": "You are a helpful assistant.", 16 | "default_user_prompt": "你好,请介绍一下你自己。", 17 | "default_text_seed": -1, 18 | "default_vision_seed": -1, 19 | "api_token": "", 20 | "cloudinary_cloud_name": "dxao8lzi7", 21 | "cloudinary_api_key": "259917876186436", 22 | "cloudinary_api_secret": "" 23 | } -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .qwen_image_node import NODE_CLASS_MAPPINGS as IMAGE_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as IMAGE_DISPLAY_MAPPINGS 2 | from .qwen_vision_node import NODE_CLASS_MAPPINGS as VISION_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as VISION_DISPLAY_MAPPINGS 3 | from .qwen_text_node import NODE_CLASS_MAPPINGS as TEXT_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as TEXT_DISPLAY_MAPPINGS 4 | from .qwen_detection_node import NODE_CLASS_MAPPINGS as DETECTION_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as DETECTION_DISPLAY_MAPPINGS 5 | from .qwen_video_node import NODE_CLASS_MAPPINGS as VIDEO_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as VIDEO_DISPLAY_MAPPINGS 6 | 7 | NODE_CLASS_MAPPINGS = {**IMAGE_MAPPINGS, **VISION_MAPPINGS, **TEXT_MAPPINGS, **DETECTION_MAPPINGS, **VIDEO_MAPPINGS} 8 | NODE_DISPLAY_NAME_MAPPINGS = {**IMAGE_DISPLAY_MAPPINGS, **VISION_DISPLAY_MAPPINGS, **TEXT_DISPLAY_MAPPINGS, **DETECTION_DISPLAY_MAPPINGS, **VIDEO_DISPLAY_MAPPINGS} 9 | 10 | __all__ = ['NODE_CLASS_MAPPINGS', 'NODE_DISPLAY_NAME_MAPPINGS'] 11 | 12 | WEB_DIRECTORY = "./js" 13 | __version__ = "1.0.0" -------------------------------------------------------------------------------- /install_dependencies.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import subprocess 5 | import sys 6 | import os 7 | 8 | def install_package(package): 9 | """安装Python包""" 10 | try: 11 | subprocess.check_call([sys.executable, "-m", "pip", "install", package]) 12 | return True 13 | except subprocess.CalledProcessError as e: 14 | print(f"安装 {package} 失败: {e}") 15 | return False 16 | 17 | def check_package(package_name): 18 | """检查包是否已安装""" 19 | try: 20 | __import__(package_name) 21 | return True 22 | except ImportError: 23 | return False 24 | 25 | def main(): 26 | print("=" * 60) 27 | print("Qwen-Image ComfyUI 插件依赖安装工具") 28 | print("=" * 60) 29 | 30 | # 检查核心依赖 31 | core_deps = { 32 | 'requests': 'requests', 33 | 'PIL': 'pillow', 34 | 'torch': 'torch', 35 | 'numpy': 'numpy' 36 | } 37 | 38 | print("\n🔍 检查核心依赖...") 39 | missing_core = [] 40 | for import_name, package_name in core_deps.items(): 41 | if check_package(import_name): 42 | print(f"✅ {package_name} 已安装") 43 | else: 44 | print(f"❌ {package_name} 未安装") 45 | missing_core.append(package_name) 46 | 47 | # 检查图生文功能依赖 48 | print("\n🔍 检查图生文功能依赖...") 49 | vision_deps = { 50 | 'openai': 'openai', 51 | 'httpx': 'httpx[socks]', 52 | 'socksio': 'socksio' 53 | } 54 | 55 | missing_vision = [] 56 | for import_name, package_name in vision_deps.items(): 57 | if check_package(import_name): 58 | print(f"✅ {package_name} 已安装") 59 | else: 60 | print(f"❌ {package_name} 未安装") 61 | missing_vision.append(package_name) 62 | 63 | # 安装缺失的依赖 64 | all_missing = missing_core + missing_vision 65 | 66 | if not all_missing: 67 | print("\n🎉 所有依赖都已安装!") 68 | return 69 | 70 | print(f"\n📦 需要安装 {len(all_missing)} 个依赖包:") 71 | for pkg in all_missing: 72 | print(f" - {pkg}") 73 | 74 | response = input("\n是否现在安装这些依赖?(y/n): ").lower().strip() 75 | 76 | if response in ['y', 'yes', '是']: 77 | print("\n🚀 开始安装依赖...") 78 | success_count = 0 79 | 80 | for package in all_missing: 81 | print(f"\n📦 安装 {package}...") 82 | if install_package(package): 83 | print(f"✅ {package} 安装成功") 84 | success_count += 1 85 | else: 86 | print(f"❌ {package} 安装失败") 87 | 88 | print(f"\n📊 安装结果: {success_count}/{len(all_missing)} 个包安装成功") 89 | 90 | if success_count == len(all_missing): 91 | print("🎉 所有依赖安装完成!请重启ComfyUI。") 92 | else: 93 | print("⚠️ 部分依赖安装失败,请手动安装或检查网络连接。") 94 | print("\n手动安装命令:") 95 | for package in all_missing: 96 | print(f" pip install {package}") 97 | else: 98 | print("\n取消安装。") 99 | print("\n手动安装命令:") 100 | for package in all_missing: 101 | print(f" pip install {package}") 102 | 103 | if __name__ == "__main__": 104 | main() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ComfyUI Qwen-Image 节点 2 | 3 | 本仓库提供了 [魔搭社区开放API](https://modelscope.cn/) 的 Qwen-Image 模型在 ComfyUI 中的节点实现。 4 | 5 | ## 特性 6 | 7 | - 支持通过 [魔搭社区](https://modelscope.cn/) 的 API 调用 Qwen-Image 模型 8 | - 支持图像尺寸、采样步数、引导系数等参数设置 9 | - 支持随机种子与固定种子 10 | - 支持负向提示词 11 | - 支持 API Token 保存(首次填写后自动保存到 config.json) 12 | - 支持图像编辑功能 (Qwen-Image-Edit 模型) 13 | - 支持目标检测功能 (Qwen2.5-VL 模型) 14 | - 支持边界框检测和可视化 15 | - 支持与 SAM2 等分割节点配合使用 16 | 17 | ## 安装 18 | 19 | 1. 克隆本仓库到 ComfyUI 的 `custom_nodes` 目录下: 20 | 21 | ``` 22 | cd ComfyUI/custom_nodes 23 | git clone https://github.com/111496583yzy/comfyui-modelscope-qwen-image.git comfyui-qwen-image 24 | ``` 25 | 26 | 2. 重启 ComfyUI 服务 27 | 28 | ## 使用方法 29 | 30 | ### 1. 获取魔搭API Token 31 | 32 | 访问 [魔搭社区](https://modelscope.cn/) 并登录,在个人资料页获取 API Token。 33 | 34 | ### 2. Qwen-Image 生图节点 35 | 36 | 在 ComfyUI 编辑器中添加 `Qwen-Image 生图节点`,设置以下参数: 37 | 38 | - **prompt**: 文本提示词 39 | - **api_token**: 魔搭API Token (首次填写后会自动保存) 40 | - **model**: 模型名称(默认为 "Qwen/Qwen-Image") 41 | - **negative_prompt**: 负向提示词(可选) 42 | - **width/height**: 图像宽高(默认512x512) 43 | - **seed**: 随机种子(-1表示使用随机种子) 44 | - **steps**: 采样步数(默认30) 45 | - **guidance**: 引导系数(默认7.5) 46 | 47 | ### 3. Qwen-Image 图像编辑节点 48 | 49 | 在 ComfyUI 编辑器中添加 `Qwen-Image 图像编辑节点`,设置以下参数: 50 | 51 | - **image**: 要编辑的原始图像 52 | - **prompt**: 描述要进行的编辑的文本提示词 53 | - **api_token**: 魔搭API Token (首次填写后会自动保存) 54 | - **model**: 模型名称(默认为 "Qwen/Qwen-Image-Edit") 55 | - **negative_prompt**: 负向提示词(可选) 56 | - **width/height**: 图像宽高(默认512x512,范围64-1664) 57 | - **steps**: 采样步数(范围1-100,默认30) 58 | - **guidance**: 引导系数(范围1.5-20.0,默认3.5) 59 | - **seed**: 随机种子(-1表示使用随机种子,0-2147483647为固定种子) 60 | 61 | ### 4. Qwen2.5-VL 目标检测节点 62 | 63 | #### 4.1 Qwen2.5-VL API 配置节点 64 | 65 | 在 ComfyUI 编辑器中添加 `Qwen2.5-VL API Configuration` 节点,设置以下参数: 66 | 67 | - **base_url**: API服务的基础URL(默认:https://api-inference.modelscope.cn/v1) 68 | - **api_key**: API密钥(必需) 69 | - **model_name**: 模型名称(如:Qwen/Qwen2.5-VL-72B-Instruct) 70 | - **timeout**: 请求超时时间(秒) 71 | 72 | #### 4.2 Qwen2.5-VL API 目标检测节点 73 | 74 | 在 ComfyUI 编辑器中添加 `Qwen2.5-VL API Object Detection` 节点,设置以下参数: 75 | 76 | - **qwen_api_config**: 连接上述配置节点的输出 77 | - **image**: 要检测的图像 78 | - **target**: 要检测的目标对象(如 "cat"、"人脸"、"logo" 等) 79 | - **bbox_selection**: 边界框选择("all" 返回所有框,或指定索引如 "0,2") 80 | - **score_threshold**: 置信度阈值(0.0-1.0) 81 | - **merge_boxes**: 是否合并选定的边界框 82 | 83 | #### 4.3 为 SAM2 准备边界框节点 84 | 85 | 在 ComfyUI 编辑器中添加 `Prepare BBoxes for SAM2` 节点,用于将检测结果转换为 SAM2 节点期望的格式。 86 | 87 | ## 工作流示例 88 | 89 | ### 文本生图 90 | 91 | 1. 添加 `Qwen-Image 生图节点` 并设置提示词和其他参数 92 | 2. 连接输出到 `Preview Image` 节点 93 | 94 | ### 图像编辑 95 | 96 | 1. 准备一张原始图像(使用 `Load Image` 或其他方式) 97 | 2. 添加 `Qwen-Image 图像编辑节点` 98 | 3. 将原始图像连接到编辑节点的 `image` 输入 99 | 4. 设置编辑提示词(如"把狗变成猫") 100 | 5. 连接输出到 `Preview Image` 节点 101 | 102 | ### 目标检测 103 | 104 | 1. 准备一张要检测的图像(使用 `Load Image` 或其他方式) 105 | 2. 添加 `Qwen2.5-VL API Configuration` 节点并配置API参数 106 | 3. 添加 `Qwen2.5-VL API Object Detection` 节点 107 | 4. 将配置节点连接到检测节点的 `qwen_api_config` 输入 108 | 5. 将图像连接到检测节点的 `image` 输入 109 | 6. 设置要检测的目标对象(如 "cat"、"人脸"、"logo" 等) 110 | 7. 连接检测节点的 `preview` 输出到 `Preview Image` 节点查看检测结果 111 | 8. 连接检测节点的 `bboxes` 输出到 `Prepare BBoxes for SAM2` 节点(可选) 112 | 9. 将 SAM2 准备节点的输出连接到 SAM2 分割节点进行进一步处理 113 | 114 | ## Cloudinary 视频存储配置 115 | 116 | 本插件支持将视频上传到 Cloudinary 云存储服务,以获得更稳定的视频URL用于AI分析。 117 | 118 | ### 配置步骤 119 | 120 | #### 1. 注册 Cloudinary 账号 121 | - 访问 [Cloudinary官网](https://cloudinary.com/) 122 | - 注册免费账号(每月有免费额度) 123 | 124 | #### 2. 获取 API 凭据 125 | 登录 Cloudinary 控制台后,在 **API Keys** 页面可以找到: 126 | - **Cloud Name**: 你的云名称(在页面顶部显示) 127 | - **API Key**: API密钥(在表格的 "API Key" 列中) 128 | - **API Secret**: API密钥密码(在表格的 "API Secret" 列中,点击眼睛图标显示) 129 | 130 | **重要**: API Secret 默认被星号隐藏,需要点击旁边的眼睛图标 👁️ 才能看到完整内容! 131 | 132 | #### 3. 配置 config.json 133 | 在 `config.json` 文件中添加以下配置: 134 | 135 | ```json 136 | { 137 | "cloudinary_cloud_name": "你的云名称", 138 | "cloudinary_api_key": "你的API密钥", 139 | "cloudinary_api_secret": "点击眼睛图标后显示的完整密钥" 140 | } 141 | ``` 142 | 143 | ### 使用说明 144 | - 配置完成后,插件会优先使用 Cloudinary 上传视频 145 | - 如果 Cloudinary 上传失败,会自动回退到 base64 方式直接传输视频数据 146 | - 上传成功后,会使用 Cloudinary 的 HTTPS URL 进行AI分析 147 | 148 | ### 优势 149 | - **更稳定**: Cloudinary 是专业的云存储服务 150 | - **更快速**: 全球CDN加速 151 | - **更安全**: HTTPS 加密传输 152 | - **更可靠**: 99.9% 服务可用性 153 | 154 | ### 注意事项 155 | - 请妥善保管你的 API 凭据,不要泄露给他人 156 | - 免费账号有使用限制,超出后需要付费 157 | - 建议定期检查 Cloudinary 控制台的使用情况 158 | 159 | ## 注意事项 160 | 161 | - API 调用需要网络连接 162 | - 高峰时期可能需要等待较长时间 163 | - 请遵守魔搭社区的使用政策 164 | - 如遇到错误代码429,表示请求过多,需要等待一段时间后重试 165 | 166 | ## License 167 | 168 | MIT 169 | -------------------------------------------------------------------------------- /verify_installation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import sys 6 | 7 | def check_files(): 8 | required_files = [ 9 | '__init__.py', 10 | 'qwen_image_node.py', 11 | 'qwen_vision_node.py', 12 | 'qwen_text_node.py', 13 | 'config.json', 14 | 'README.md', 15 | 'requirements.txt' 16 | ] 17 | 18 | print("📁 检查文件完整性...") 19 | missing_files = [] 20 | 21 | for file in required_files: 22 | if os.path.exists(file): 23 | print(f"✅ {file}") 24 | else: 25 | print(f"❌ {file} (缺失)") 26 | missing_files.append(file) 27 | 28 | return len(missing_files) == 0 29 | 30 | def check_dependencies(): 31 | print("\n📦 检查依赖包...") 32 | 33 | deps = { 34 | 'requests': '网络请求', 35 | 'PIL': '图像处理', 36 | 'torch': '深度学习框架', 37 | 'numpy': '数值计算', 38 | 'openai': '文本生成和图生文功能', 39 | 'httpx': '高级HTTP客户端', 40 | 'socksio': 'SOCKS代理支持' 41 | } 42 | 43 | missing_deps = [] 44 | 45 | for dep, desc in deps.items(): 46 | try: 47 | __import__(dep) 48 | print(f"✅ {dep} ({desc})") 49 | except ImportError: 50 | print(f"❌ {dep} ({desc}) - 未安装") 51 | missing_deps.append(dep) 52 | 53 | return len(missing_deps) == 0, missing_deps 54 | 55 | def check_proxy_support(): 56 | print("\n🌐 检查代理支持...") 57 | 58 | try: 59 | import httpx 60 | try: 61 | import socksio 62 | print("✅ SOCKS代理支持已安装") 63 | return True 64 | except ImportError: 65 | print("⚠️ SOCKS代理支持未安装,如果使用代理可能会出错") 66 | print(" 建议运行: pip install httpx[socks] socksio") 67 | return False 68 | except ImportError: 69 | print("❌ httpx未安装") 70 | return False 71 | 72 | def check_node_loading(): 73 | print("\n🔧 检查节点加载...") 74 | 75 | try: 76 | from qwen_image_node import QwenImageNode 77 | node = QwenImageNode() 78 | input_types = node.INPUT_TYPES() 79 | print("✅ 文生图节点加载成功") 80 | 81 | from qwen_vision_node import QwenVisionNode, OPENAI_AVAILABLE 82 | if OPENAI_AVAILABLE: 83 | vision_node = QwenVisionNode() 84 | vision_input_types = vision_node.INPUT_TYPES() 85 | print("✅ 图生文节点加载成功") 86 | else: 87 | print("⚠️ 图生文节点加载成功,但OpenAI库不可用") 88 | 89 | from qwen_text_node import QwenTextNode 90 | if OPENAI_AVAILABLE: 91 | text_node = QwenTextNode() 92 | text_input_types = text_node.INPUT_TYPES() 93 | print("✅ 文本生成节点加载成功") 94 | else: 95 | print("⚠️ 文本生成节点加载成功,但OpenAI库不可用") 96 | 97 | return True 98 | except Exception as e: 99 | print(f"❌ 节点加载失败: {e}") 100 | return False 101 | 102 | def check_config(): 103 | print("\n⚙️ 检查配置文件...") 104 | 105 | try: 106 | import json 107 | with open('config.json', 'r', encoding='utf-8') as f: 108 | config = json.load(f) 109 | 110 | required_keys = [ 111 | 'default_model', 112 | 'default_vision_model', 113 | 'default_text_model', 114 | 'timeout', 115 | 'default_prompt' 116 | ] 117 | 118 | missing_keys = [] 119 | for key in required_keys: 120 | if key in config: 121 | print(f"✅ {key}: {config[key]}") 122 | else: 123 | print(f"❌ {key} (缺失)") 124 | missing_keys.append(key) 125 | 126 | return len(missing_keys) == 0 127 | except Exception as e: 128 | print(f"❌ 配置文件读取失败: {e}") 129 | return False 130 | 131 | def main(): 132 | print("=" * 60) 133 | print("Qwen-Image ComfyUI 插件安装验证") 134 | print("=" * 60) 135 | 136 | checks = [ 137 | ("文件完整性", check_files), 138 | ("依赖包", lambda: check_dependencies()[0]), 139 | ("代理支持", check_proxy_support), 140 | ("配置文件", check_config), 141 | ("节点加载", check_node_loading), 142 | ] 143 | 144 | passed = 0 145 | total = len(checks) 146 | 147 | for check_name, check_func in checks: 148 | print(f"\n🔍 {check_name}检查...") 149 | try: 150 | if check_func(): 151 | passed += 1 152 | print(f"✅ {check_name}检查通过") 153 | else: 154 | print(f"❌ {check_name}检查失败") 155 | except Exception as e: 156 | print(f"❌ {check_name}检查出错: {e}") 157 | 158 | print("-" * 40) 159 | 160 | deps_ok, missing_deps = check_dependencies() 161 | if not deps_ok: 162 | print(f"\n📦 缺失的依赖包: {', '.join(missing_deps)}") 163 | print("运行以下命令安装:") 164 | print("python install_dependencies.py") 165 | print("或手动安装:") 166 | for dep in missing_deps: 167 | if dep == 'httpx': 168 | print(f" pip install httpx[socks]") 169 | else: 170 | print(f" pip install {dep}") 171 | 172 | print(f"\n📊 验证结果: {passed}/{total} 项检查通过") 173 | 174 | if passed >= total - 1: 175 | print("\n🎉 插件安装验证成功!") 176 | print("\n📋 下一步操作:") 177 | print("1. 将整个插件文件夹复制到 ComfyUI/custom_nodes/ 目录") 178 | print("2. 重启ComfyUI") 179 | print("3. 在节点列表中查找 'QwenImage' 分类") 180 | print("4. 准备好您的魔搭API Token") 181 | 182 | current_path = os.getcwd() 183 | if 'custom_nodes' in current_path: 184 | print("\n✅ 检测到您已在ComfyUI的custom_nodes目录中") 185 | print(" 请直接重启ComfyUI即可使用") 186 | else: 187 | print(f"\n📁 当前路径: {current_path}") 188 | print(" 请确保将插件复制到正确的ComfyUI目录") 189 | 190 | if not check_proxy_support(): 191 | print("\n⚠️ 代理支持提醒:") 192 | print(" 如果您使用代理上网,建议安装代理支持包:") 193 | print(" pip install httpx[socks] socksio") 194 | else: 195 | print("\n⚠️ 插件安装验证失败,请修复上述问题后重试") 196 | 197 | if __name__ == "__main__": 198 | main() -------------------------------------------------------------------------------- /troubleshoot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Qwen-Image ComfyUI 插件故障排除工具 6 | 自动诊断和解决常见问题 7 | """ 8 | 9 | import os 10 | import sys 11 | import subprocess 12 | import json 13 | 14 | def print_header(title): 15 | """打印标题""" 16 | print("\n" + "=" * 60) 17 | print(f" {title}") 18 | print("=" * 60) 19 | 20 | def print_section(title): 21 | """打印章节标题""" 22 | print(f"\n🔍 {title}") 23 | print("-" * 40) 24 | 25 | def run_command(command, description): 26 | """运行命令并返回结果""" 27 | print(f"📋 {description}") 28 | print(f"💻 命令: {command}") 29 | 30 | try: 31 | result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=30) 32 | if result.returncode == 0: 33 | print("✅ 成功") 34 | if result.stdout.strip(): 35 | print(f"📄 输出: {result.stdout.strip()}") 36 | return True, result.stdout 37 | else: 38 | print("❌ 失败") 39 | if result.stderr.strip(): 40 | print(f"🚨 错误: {result.stderr.strip()}") 41 | return False, result.stderr 42 | except subprocess.TimeoutExpired: 43 | print("⏰ 超时") 44 | return False, "命令执行超时" 45 | except Exception as e: 46 | print(f"💥 异常: {str(e)}") 47 | return False, str(e) 48 | 49 | def check_python_environment(): 50 | """检查Python环境""" 51 | print_section("Python环境检查") 52 | 53 | # Python版本 54 | run_command("python --version", "检查Python版本") 55 | 56 | # pip版本 57 | run_command("pip --version", "检查pip版本") 58 | 59 | # 已安装的包 60 | print("\n📦 检查关键包安装状态:") 61 | packages = ['requests', 'pillow', 'torch', 'numpy', 'openai', 'httpx', 'socksio'] 62 | 63 | for package in packages: 64 | try: 65 | __import__(package) 66 | print(f"✅ {package}") 67 | except ImportError: 68 | print(f"❌ {package} (未安装)") 69 | 70 | def check_files(): 71 | """检查文件完整性""" 72 | print_section("文件完整性检查") 73 | 74 | required_files = [ 75 | '__init__.py', 76 | 'qwen_image_node.py', 77 | 'qwen_vision_node.py', 78 | 'config.json', 79 | 'requirements.txt' 80 | ] 81 | 82 | for file in required_files: 83 | if os.path.exists(file): 84 | size = os.path.getsize(file) 85 | print(f"✅ {file} ({size} bytes)") 86 | else: 87 | print(f"❌ {file} (缺失)") 88 | 89 | def check_config(): 90 | """检查配置文件""" 91 | print_section("配置文件检查") 92 | 93 | try: 94 | with open('config.json', 'r', encoding='utf-8') as f: 95 | config = json.load(f) 96 | 97 | print("✅ config.json 格式正确") 98 | 99 | # 检查关键配置项 100 | key_configs = [ 101 | 'default_model', 102 | 'default_vision_model', 103 | 'timeout', 104 | 'default_prompt' 105 | ] 106 | 107 | for key in key_configs: 108 | if key in config: 109 | print(f"✅ {key}: {config[key]}") 110 | else: 111 | print(f"❌ {key} (缺失)") 112 | 113 | except Exception as e: 114 | print(f"❌ config.json 读取失败: {e}") 115 | 116 | def check_network(): 117 | """检查网络连接""" 118 | print_section("网络连接检查") 119 | 120 | # 检查基本网络连接 121 | run_command("ping -c 3 8.8.8.8", "检查基本网络连接") 122 | 123 | # 检查API服务器连接 124 | try: 125 | import requests 126 | response = requests.get('https://api-inference.modelscope.cn', timeout=10) 127 | print(f"✅ API服务器连接正常 (状态码: {response.status_code})") 128 | except Exception as e: 129 | print(f"❌ API服务器连接失败: {e}") 130 | 131 | # 检查代理设置 132 | proxy_vars = ['HTTP_PROXY', 'HTTPS_PROXY', 'SOCKS_PROXY'] 133 | print("\n🌐 代理环境变量:") 134 | for var in proxy_vars: 135 | value = os.environ.get(var) 136 | if value: 137 | print(f"✅ {var}: {value}") 138 | else: 139 | print(f"⚪ {var}: 未设置") 140 | 141 | def check_token(): 142 | """检查API Token""" 143 | print_section("API Token检查") 144 | 145 | token_sources = ['.qwen_token', 'config.json'] 146 | token_found = False 147 | 148 | for source in token_sources: 149 | if source == '.qwen_token' and os.path.exists(source): 150 | try: 151 | with open(source, 'r', encoding='utf-8') as f: 152 | token = f.read().strip() 153 | if token: 154 | print(f"✅ 在 {source} 中找到token (长度: {len(token)})") 155 | token_found = True 156 | else: 157 | print(f"⚪ {source} 存在但为空") 158 | except Exception as e: 159 | print(f"❌ 读取 {source} 失败: {e}") 160 | 161 | elif source == 'config.json': 162 | try: 163 | with open(source, 'r', encoding='utf-8') as f: 164 | config = json.load(f) 165 | token = config.get('api_token', '').strip() 166 | if token: 167 | print(f"✅ 在 {source} 中找到token (长度: {len(token)})") 168 | token_found = True 169 | else: 170 | print(f"⚪ {source} 中token为空") 171 | except Exception as e: 172 | print(f"❌ 读取 {source} 失败: {e}") 173 | 174 | if not token_found: 175 | print("❌ 未找到有效的API token") 176 | 177 | def run_diagnostic_tests(): 178 | """运行诊断测试""" 179 | print_section("诊断测试") 180 | 181 | tests = [ 182 | ("python verify_installation.py", "运行安装验证"), 183 | ("python test_vision_with_proxy.py", "运行代理测试"), 184 | ] 185 | 186 | for command, description in tests: 187 | if os.path.exists(command.split()[1]): 188 | success, output = run_command(command, description) 189 | if not success: 190 | print(f"⚠️ {description} 失败,请查看详细输出") 191 | else: 192 | print(f"⚪ {command.split()[1]} 不存在,跳过测试") 193 | 194 | def suggest_solutions(): 195 | """建议解决方案""" 196 | print_section("建议解决方案") 197 | 198 | solutions = [ 199 | "🔧 安装缺失依赖: python install_dependencies.py", 200 | "🔍 验证安装: python verify_installation.py", 201 | "🌐 测试代理: python test_vision_with_proxy.py", 202 | "📖 查看快速指南: cat QUICKSTART.md", 203 | "🔗 查看代理指南: cat PROXY_GUIDE.md", 204 | "🖼️ 查看图生文指南: cat VISION_GUIDE.md", 205 | "🔄 重启ComfyUI以加载更新", 206 | "🧹 清理Python缓存: rm -rf __pycache__", 207 | ] 208 | 209 | for solution in solutions: 210 | print(solution) 211 | 212 | def main(): 213 | print_header("Qwen-Image ComfyUI 插件故障排除工具") 214 | 215 | print("🚀 开始全面诊断...") 216 | 217 | # 运行所有检查 218 | check_python_environment() 219 | check_files() 220 | check_config() 221 | check_network() 222 | check_token() 223 | run_diagnostic_tests() 224 | suggest_solutions() 225 | 226 | print_header("诊断完成") 227 | 228 | print("\n💡 根据上述诊断结果:") 229 | print("1. 如果发现缺失依赖,运行: python install_dependencies.py") 230 | print("2. 如果网络有问题,查看: PROXY_GUIDE.md") 231 | print("3. 如果token有问题,重新输入API token") 232 | print("4. 如果文件缺失,重新下载插件") 233 | print("5. 完成修复后,重启ComfyUI") 234 | 235 | print("\n📞 如果问题仍然存在:") 236 | print("- 查看ComfyUI控制台的完整错误日志") 237 | print("- 尝试在不同网络环境下测试") 238 | print("- 确认ComfyUI版本兼容性") 239 | 240 | if __name__ == "__main__": 241 | main() -------------------------------------------------------------------------------- /qwen_text_node.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import time 4 | import os 5 | 6 | try: 7 | from openai import OpenAI 8 | OPENAI_AVAILABLE = True 9 | except ImportError: 10 | print("警告: 未安装openai库,文本生成功能将不可用") 11 | print("请运行: pip install openai") 12 | OPENAI_AVAILABLE = False 13 | OpenAI = None 14 | 15 | def load_config(): 16 | config_path = os.path.join(os.path.dirname(__file__), 'config.json') 17 | try: 18 | with open(config_path, 'r', encoding='utf-8') as f: 19 | return json.load(f) 20 | except: 21 | return { 22 | "default_text_model": "Qwen/Qwen3-Coder-480B-A35B-Instruct", 23 | "timeout": 60, 24 | "default_system_prompt": "You are a helpful assistant.", 25 | "default_user_prompt": "你好" 26 | } 27 | 28 | def load_api_token(): 29 | token_path = os.path.join(os.path.dirname(__file__), '.qwen_token') 30 | try: 31 | cfg = load_config() 32 | token_from_cfg = cfg.get("api_token", "").strip() 33 | if token_from_cfg: 34 | return token_from_cfg 35 | except Exception as e: 36 | print(f"读取config.json中的token失败: {e}") 37 | try: 38 | if os.path.exists(token_path): 39 | with open(token_path, 'r', encoding='utf-8') as f: 40 | token = f.read().strip() 41 | return token if token else "" 42 | return "" 43 | except Exception as e: 44 | print(f"加载token失败: {e}") 45 | return "" 46 | 47 | def save_api_token(token): 48 | token_path = os.path.join(os.path.dirname(__file__), '.qwen_token') 49 | try: 50 | with open(token_path, 'w', encoding='utf-8') as f: 51 | f.write(token) 52 | cfg = load_config() 53 | cfg["api_token"] = token 54 | config_path = os.path.join(os.path.dirname(__file__), 'config.json') 55 | with open(config_path, 'w', encoding='utf-8') as f: 56 | json.dump(cfg, f, ensure_ascii=False, indent=2) 57 | return True 58 | except Exception as e: 59 | print(f"保存token失败: {e}") 60 | return False 61 | 62 | class QwenTextNode: 63 | def __init__(self): 64 | pass 65 | 66 | @classmethod 67 | def INPUT_TYPES(cls): 68 | if not OPENAI_AVAILABLE: 69 | return { 70 | "required": { 71 | "error_message": ("STRING", { 72 | "default": "请先安装openai库: pip install openai", 73 | "multiline": True 74 | }), 75 | } 76 | } 77 | config = load_config() 78 | saved_token = load_api_token() 79 | return { 80 | "required": { 81 | "user_prompt": ("STRING", { 82 | "multiline": True, 83 | "default": config.get("default_user_prompt", "你好") 84 | }), 85 | "api_token": ("STRING", { 86 | "default": saved_token, 87 | "placeholder": "请输入您的魔搭API Token" 88 | }), 89 | }, 90 | "optional": { 91 | "system_prompt": ("STRING", { 92 | "multiline": True, 93 | "default": config.get("default_system_prompt", "You are a helpful assistant.") 94 | }), 95 | "model": ("STRING", { 96 | "default": config.get("default_text_model", "Qwen/Qwen3-Coder-480B-A35B-Instruct") 97 | }), 98 | "max_tokens": ("INT", { 99 | "default": 2000, 100 | "min": 100, 101 | "max": 8000 102 | }), 103 | "temperature": ("FLOAT", { 104 | "default": 0.7, 105 | "min": 0.1, 106 | "max": 2.0, 107 | "step": 0.1 108 | }), 109 | "stream": ("BOOLEAN", { 110 | "default": True 111 | }), 112 | "seed": ("INT", { 113 | "default": config.get("default_text_seed", -1), 114 | "min": -1, 115 | "max": 2147483647 116 | }), 117 | } 118 | } 119 | 120 | RETURN_TYPES = ("STRING",) 121 | RETURN_NAMES = ("response",) 122 | FUNCTION = "generate_text" 123 | CATEGORY = "QwenImage" 124 | 125 | def generate_text(self, user_prompt="", api_token="", system_prompt="You are a helpful assistant.", model="Qwen/Qwen3-Coder-480B-A35B-Instruct", max_tokens=2000, temperature=0.7, stream=True, seed=-1, error_message=""): 126 | if not OPENAI_AVAILABLE: 127 | return ("请先安装openai库: pip install openai",) 128 | 129 | config = load_config() 130 | 131 | if not api_token or api_token.strip() == "": 132 | raise Exception("请输入有效的API Token") 133 | 134 | saved_token = load_api_token() 135 | if api_token != saved_token: 136 | if save_api_token(api_token): 137 | print("API Token已自动保存") 138 | else: 139 | print("API Token保存失败,但不影响当前使用") 140 | 141 | try: 142 | print(f"💬 开始文本生成...") 143 | print(f"🤖 模型: {model}") 144 | print(f"📝 用户提示: {user_prompt[:50]}...") 145 | print(f"系统提示: {system_prompt[:50]}...") 146 | print(f"温度: {temperature}") 147 | print(f"📊 最大tokens: {max_tokens}") 148 | print(f"⚡ 流式输出: {stream}") 149 | 150 | # 处理随机种子 151 | if seed != -1: 152 | print(f"🎯 使用指定种子: {seed}") 153 | else: 154 | import random 155 | random_seed = random.randint(0, 2147483647) 156 | print(f"🎲 使用随机种子: {random_seed}") 157 | seed = random_seed 158 | 159 | client = OpenAI( 160 | base_url='https://api-inference.modelscope.cn/v1', 161 | api_key=api_token 162 | ) 163 | 164 | messages = [ 165 | { 166 | 'role': 'system', 167 | 'content': system_prompt 168 | }, 169 | { 170 | 'role': 'user', 171 | 'content': user_prompt 172 | } 173 | ] 174 | 175 | print(f"🚀 发送API请求...") 176 | 177 | response = client.chat.completions.create( 178 | model=model, 179 | messages=messages, 180 | max_tokens=max_tokens, 181 | temperature=temperature, 182 | stream=stream, 183 | seed=seed 184 | ) 185 | 186 | if stream: 187 | print("📡 接收流式响应...") 188 | full_response = "" 189 | for chunk in response: 190 | if chunk.choices[0].delta.content: 191 | content = chunk.choices[0].delta.content 192 | full_response += content 193 | print(content, end='', flush=True) 194 | 195 | print(f"\n流式生成完成!") 196 | print(f"📄 总长度: {len(full_response)} 字符") 197 | return (full_response,) 198 | else: 199 | result = response.choices[0].message.content 200 | print(f"文本生成完成!") 201 | print(f"📄 结果长度: {len(result)} 字符") 202 | print(f"📝 结果预览: {result[:100]}...") 203 | return (result,) 204 | 205 | except Exception as e: 206 | error_msg = f"文本生成失败: {str(e)}" 207 | print(f"{error_msg}") 208 | return (error_msg,) 209 | 210 | if OPENAI_AVAILABLE: 211 | NODE_CLASS_MAPPINGS = { 212 | "QwenTextNode": QwenTextNode 213 | } 214 | 215 | NODE_DISPLAY_NAME_MAPPINGS = { 216 | "QwenTextNode": "Qwen-Text 文本生成节点" 217 | } 218 | else: 219 | class OpenAINotInstalledNode: 220 | @classmethod 221 | def INPUT_TYPES(cls): 222 | return { 223 | "required": { 224 | "install_command": ("STRING", { 225 | "default": "pip install openai", 226 | "multiline": False 227 | }), 228 | } 229 | } 230 | 231 | RETURN_TYPES = ("STRING",) 232 | RETURN_NAMES = ("message",) 233 | FUNCTION = "show_install_message" 234 | CATEGORY = "QwenImage" 235 | 236 | def show_install_message(self, install_command): 237 | return ("请先安装openai库才能使用文本生成功能: " + install_command,) 238 | 239 | NODE_CLASS_MAPPINGS = { 240 | "QwenTextNode": OpenAINotInstalledNode 241 | } 242 | 243 | NODE_DISPLAY_NAME_MAPPINGS = { 244 | "QwenTextNode": "Qwen-Text 文本生成节点 (需要安装openai)" 245 | } -------------------------------------------------------------------------------- /qwen_vision_node.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import time 4 | import torch 5 | import numpy as np 6 | from PIL import Image 7 | from io import BytesIO 8 | import os 9 | import base64 10 | import tempfile 11 | 12 | try: 13 | from openai import OpenAI 14 | OPENAI_AVAILABLE = True 15 | except ImportError: 16 | print("警告: 未安装openai库,图生文功能将不可用") 17 | print("请运行: pip install openai") 18 | OPENAI_AVAILABLE = False 19 | OpenAI = None 20 | 21 | def load_config(): 22 | config_path = os.path.join(os.path.dirname(__file__), 'config.json') 23 | try: 24 | with open(config_path, 'r', encoding='utf-8') as f: 25 | return json.load(f) 26 | except: 27 | return { 28 | "default_model": "stepfun-ai/step3", 29 | "timeout": 60, 30 | "default_prompt": "描述这幅图" 31 | } 32 | 33 | def load_api_token(): 34 | token_path = os.path.join(os.path.dirname(__file__), '.qwen_token') 35 | try: 36 | cfg = load_config() 37 | token_from_cfg = cfg.get("api_token", "").strip() 38 | if token_from_cfg: 39 | return token_from_cfg 40 | except Exception as e: 41 | print(f"读取config.json中的token失败: {e}") 42 | try: 43 | if os.path.exists(token_path): 44 | with open(token_path, 'r', encoding='utf-8') as f: 45 | token = f.read().strip() 46 | return token if token else "" 47 | return "" 48 | except Exception as e: 49 | print(f"加载token失败: {e}") 50 | return "" 51 | 52 | def save_api_token(token): 53 | token_path = os.path.join(os.path.dirname(__file__), '.qwen_token') 54 | try: 55 | with open(token_path, 'w', encoding='utf-8') as f: 56 | f.write(token) 57 | cfg = load_config() 58 | cfg["api_token"] = token 59 | config_path = os.path.join(os.path.dirname(__file__), 'config.json') 60 | with open(config_path, 'w', encoding='utf-8') as f: 61 | json.dump(cfg, f, ensure_ascii=False, indent=2) 62 | return True 63 | except Exception as e: 64 | print(f"保存token失败: {e}") 65 | return False 66 | 67 | def tensor_to_base64_url(image_tensor): 68 | try: 69 | if len(image_tensor.shape) == 4: 70 | image_tensor = image_tensor.squeeze(0) 71 | 72 | if image_tensor.max() <= 1.0: 73 | image_np = (image_tensor.cpu().numpy() * 255).astype(np.uint8) 74 | else: 75 | image_np = image_tensor.cpu().numpy().astype(np.uint8) 76 | 77 | pil_image = Image.fromarray(image_np) 78 | 79 | buffer = BytesIO() 80 | pil_image.save(buffer, format='JPEG', quality=85) 81 | img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8') 82 | 83 | return f"data:image/jpeg;base64,{img_base64}" 84 | 85 | except Exception as e: 86 | print(f"图像转换失败: {e}") 87 | raise Exception(f"图像格式转换失败: {str(e)}") 88 | 89 | class QwenVisionNode: 90 | def __init__(self): 91 | pass 92 | 93 | @classmethod 94 | def INPUT_TYPES(cls): 95 | if not OPENAI_AVAILABLE: 96 | return { 97 | "required": { 98 | "error_message": ("STRING", { 99 | "default": "请先安装openai库: pip install openai", 100 | "multiline": True 101 | }), 102 | } 103 | } 104 | config = load_config() 105 | saved_token = load_api_token() 106 | return { 107 | "required": { 108 | "image": ("IMAGE",), 109 | "prompt": ("STRING", { 110 | "multiline": True, 111 | "default": config.get("default_prompt", "描述这幅图") 112 | }), 113 | "api_token": ("STRING", { 114 | "default": saved_token, 115 | "placeholder": "请输入您的魔搭API Token" 116 | }), 117 | }, 118 | "optional": { 119 | "system_prompt": ("STRING", { 120 | "multiline": True, 121 | "default": config.get("default_system_prompt", "You are a helpful assistant.") 122 | }), 123 | "model": ("STRING", { 124 | "default": config.get("default_vision_model", "stepfun-ai/step3") 125 | }), 126 | "max_tokens": ("INT", { 127 | "default": 1000, 128 | "min": 100, 129 | "max": 4000 130 | }), 131 | "temperature": ("FLOAT", { 132 | "default": 0.7, 133 | "min": 0.1, 134 | "max": 2.0, 135 | "step": 0.1 136 | }), 137 | "seed": ("INT", { 138 | "default": config.get("default_vision_seed", -1), 139 | "min": -1, 140 | "max": 2147483647 141 | }), 142 | } 143 | } 144 | 145 | RETURN_TYPES = ("STRING",) 146 | RETURN_NAMES = ("description",) 147 | FUNCTION = "analyze_image" 148 | CATEGORY = "QwenImage" 149 | 150 | def analyze_image(self, image=None, prompt="", api_token="", system_prompt="You are a helpful assistant.", model="stepfun-ai/step3", max_tokens=1000, temperature=0.7, seed=-1, error_message=""): 151 | if not OPENAI_AVAILABLE: 152 | return ("请先安装openai库: pip install openai",) 153 | 154 | config = load_config() 155 | 156 | if not api_token or api_token.strip() == "": 157 | raise Exception("请输入有效的API Token") 158 | 159 | saved_token = load_api_token() 160 | if api_token != saved_token: 161 | if save_api_token(api_token): 162 | print("API Token已自动保存") 163 | else: 164 | print("API Token保存失败,但不影响当前使用") 165 | 166 | try: 167 | print(f"开始分析图像...") 168 | print(f"📝 提示词: {prompt}") 169 | if system_prompt: 170 | print(f"系统提示: {system_prompt[:50]}...") 171 | print(f"🤖 模型: {model}") 172 | 173 | # 处理随机种子 174 | if seed != -1: 175 | print(f"🎯 使用指定种子: {seed}") 176 | else: 177 | import random 178 | random_seed = random.randint(0, 2147483647) 179 | print(f"🎲 使用随机种子: {random_seed}") 180 | seed = random_seed 181 | 182 | image_url = tensor_to_base64_url(image) 183 | print(f"图像已转换为base64格式") 184 | 185 | client = OpenAI( 186 | base_url='https://api-inference.modelscope.cn/v1', 187 | api_key=api_token 188 | ) 189 | 190 | messages = [] 191 | 192 | # 如果有系统提示词,添加到messages中 193 | if system_prompt and system_prompt.strip(): 194 | messages.append({ 195 | 'role': 'system', 196 | 'content': system_prompt 197 | }) 198 | 199 | # 添加用户消息(包含文本和图像) 200 | messages.append({ 201 | 'role': 'user', 202 | 'content': [{ 203 | 'type': 'text', 204 | 'text': prompt, 205 | }, { 206 | 'type': 'image_url', 207 | 'image_url': { 208 | 'url': image_url, 209 | }, 210 | }], 211 | }) 212 | 213 | print(f"🚀 发送API请求...") 214 | 215 | response = client.chat.completions.create( 216 | model=model, 217 | messages=messages, 218 | max_tokens=max_tokens, 219 | temperature=temperature, 220 | stream=False, 221 | seed=seed 222 | ) 223 | 224 | description = response.choices[0].message.content 225 | print(f"分析完成!") 226 | print(f"📄 结果: {description[:100]}...") 227 | 228 | return (description,) 229 | 230 | except Exception as e: 231 | error_msg = f"图像分析失败: {str(e)}" 232 | print(f"{error_msg}") 233 | return (error_msg,) 234 | 235 | if OPENAI_AVAILABLE: 236 | NODE_CLASS_MAPPINGS = { 237 | "QwenVisionNode": QwenVisionNode 238 | } 239 | 240 | NODE_DISPLAY_NAME_MAPPINGS = { 241 | "QwenVisionNode": "Qwen-Vision 图生文节点" 242 | } 243 | else: 244 | class OpenAINotInstalledNode: 245 | @classmethod 246 | def INPUT_TYPES(cls): 247 | return { 248 | "required": { 249 | "install_command": ("STRING", { 250 | "default": "pip install openai", 251 | "multiline": False 252 | }), 253 | } 254 | } 255 | 256 | RETURN_TYPES = ("STRING",) 257 | RETURN_NAMES = ("message",) 258 | FUNCTION = "show_install_message" 259 | CATEGORY = "QwenImage" 260 | 261 | def show_install_message(self, install_command): 262 | return ("请先安装openai库才能使用图生文功能: " + install_command,) 263 | 264 | NODE_CLASS_MAPPINGS = { 265 | "QwenVisionNode": OpenAINotInstalledNode 266 | } 267 | 268 | NODE_DISPLAY_NAME_MAPPINGS = { 269 | "QwenVisionNode": "Qwen-Vision 图生文节点 (需要安装openai)" 270 | } -------------------------------------------------------------------------------- /qwen_video_node.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import time 4 | import os 5 | import tempfile 6 | import base64 7 | 8 | try: 9 | from openai import OpenAI 10 | OPENAI_AVAILABLE = True 11 | except ImportError: 12 | print("警告: 未安装openai库,视频生文功能将不可用") 13 | print("请运行: pip install openai") 14 | OPENAI_AVAILABLE = False 15 | OpenAI = None 16 | 17 | def load_config(): 18 | config_path = os.path.join(os.path.dirname(__file__), 'config.json') 19 | try: 20 | with open(config_path, 'r', encoding='utf-8') as f: 21 | return json.load(f) 22 | except: 23 | return { 24 | "default_model": "Qwen/Qwen3-VL-235B-A22B-Instruct", 25 | "timeout": 60, 26 | "default_prompt": "描述这个视频的内容", 27 | "cloudinary_cloud_name": "", 28 | "cloudinary_api_key": "", 29 | "cloudinary_api_secret": "" 30 | } 31 | 32 | def load_api_token(): 33 | token_path = os.path.join(os.path.dirname(__file__), '.qwen_token') 34 | try: 35 | cfg = load_config() 36 | token_from_cfg = cfg.get("api_token", "").strip() 37 | if token_from_cfg: 38 | return token_from_cfg 39 | except Exception as e: 40 | print(f"读取config.json中的token失败: {e}") 41 | try: 42 | if os.path.exists(token_path): 43 | with open(token_path, 'r', encoding='utf-8') as f: 44 | token = f.read().strip() 45 | return token if token else "" 46 | return "" 47 | except Exception as e: 48 | print(f"加载token失败: {e}") 49 | return "" 50 | 51 | def save_api_token(token): 52 | token_path = os.path.join(os.path.dirname(__file__), '.qwen_token') 53 | try: 54 | with open(token_path, 'w', encoding='utf-8') as f: 55 | f.write(token) 56 | cfg = load_config() 57 | cfg["api_token"] = token 58 | config_path = os.path.join(os.path.dirname(__file__), 'config.json') 59 | with open(config_path, 'w', encoding='utf-8') as f: 60 | json.dump(cfg, f, ensure_ascii=False, indent=2) 61 | return True 62 | except Exception as e: 63 | print(f"保存token失败: {e}") 64 | return False 65 | 66 | def save_cloudinary_config(cloud_name, api_key, api_secret): 67 | """保存Cloudinary配置到config.json""" 68 | try: 69 | cfg = load_config() 70 | cfg["cloudinary_cloud_name"] = cloud_name 71 | cfg["cloudinary_api_key"] = api_key 72 | cfg["cloudinary_api_secret"] = api_secret 73 | config_path = os.path.join(os.path.dirname(__file__), 'config.json') 74 | with open(config_path, 'w', encoding='utf-8') as f: 75 | json.dump(cfg, f, ensure_ascii=False, indent=2) 76 | return True 77 | except Exception as e: 78 | print(f"保存Cloudinary配置失败: {e}") 79 | return False 80 | 81 | def upload_video_to_cloudinary(video_path, cloud_name=None, api_key=None, api_secret=None): 82 | """上传视频到Cloudinary获取URL""" 83 | try: 84 | import hashlib 85 | import time 86 | 87 | # 如果没有传入参数,从配置文件读取 88 | if not cloud_name or not api_key or not api_secret: 89 | config = load_config() 90 | cloud_name = cloud_name or config.get('cloudinary_cloud_name', '').strip() 91 | api_key = api_key or config.get('cloudinary_api_key', '').strip() 92 | api_secret = api_secret or config.get('cloudinary_api_secret', '').strip() 93 | 94 | if not all([cloud_name, api_key, api_secret]): 95 | print("Cloudinary配置不完整,请检查config.json中的cloudinary配置") 96 | return None 97 | 98 | # 生成签名 99 | timestamp = str(int(time.time())) 100 | public_id = f"comfyui_video_{timestamp}" 101 | 102 | # 创建签名字符串 103 | sign_string = f"public_id={public_id}×tamp={timestamp}{api_secret}" 104 | signature = hashlib.sha1(sign_string.encode()).hexdigest() 105 | 106 | # Cloudinary上传URL 107 | upload_url = f"https://api.cloudinary.com/v1_1/{cloud_name}/video/upload" 108 | 109 | # 准备上传数据 110 | with open(video_path, 'rb') as video_file: 111 | files = {'file': video_file} 112 | data = { 113 | 'api_key': api_key, 114 | 'timestamp': timestamp, 115 | 'signature': signature, 116 | 'public_id': public_id, 117 | 'resource_type': 'video' 118 | } 119 | 120 | print(f"正在上传视频到Cloudinary...") 121 | upload_response = requests.post( 122 | upload_url, 123 | files=files, 124 | data=data, 125 | timeout=120 # 视频文件可能较大,增加超时时间 126 | ) 127 | 128 | if upload_response.status_code == 200: 129 | upload_data = upload_response.json() 130 | if 'secure_url' in upload_data: 131 | video_url = upload_data['secure_url'] 132 | print(f"视频已上传到Cloudinary成功,获取URL: {video_url}") 133 | return video_url 134 | else: 135 | print(f"Cloudinary上传返回格式错误: {upload_response.text}") 136 | return None 137 | else: 138 | print(f"Cloudinary上传失败: {upload_response.status_code}, {upload_response.text}") 139 | return None 140 | except Exception as e: 141 | print(f"Cloudinary上传异常: {str(e)}") 142 | return None 143 | 144 | 145 | def video_to_base64(video_path): 146 | """将视频文件转换为base64格式""" 147 | try: 148 | with open(video_path, 'rb') as video_file: 149 | video_data = video_file.read() 150 | video_base64 = base64.b64encode(video_data).decode('utf-8') 151 | return f"data:video/mp4;base64,{video_base64}" 152 | except Exception as e: 153 | print(f"视频base64转换失败: {e}") 154 | raise Exception(f"视频格式转换失败: {str(e)}") 155 | 156 | class QwenVideoNode: 157 | def __init__(self): 158 | pass 159 | 160 | @classmethod 161 | def INPUT_TYPES(cls): 162 | if not OPENAI_AVAILABLE: 163 | return { 164 | "required": { 165 | "error_message": ("STRING", { 166 | "default": "请先安装openai库: pip install openai", 167 | "multiline": True 168 | }), 169 | } 170 | } 171 | config = load_config() 172 | saved_token = load_api_token() 173 | return { 174 | "required": { 175 | "prompt": ("STRING", { 176 | "multiline": True, 177 | "default": config.get("default_prompt", "描述这个视频的内容") 178 | }), 179 | "api_token": ("STRING", { 180 | "default": saved_token, 181 | "placeholder": "请输入您的魔搭API Token" 182 | }), 183 | }, 184 | "optional": { 185 | "system_prompt": ("STRING", { 186 | "multiline": True, 187 | "default": config.get("default_system_prompt", "You are a helpful assistant.") 188 | }), 189 | "video": ("VIDEO",), 190 | "video_path": ("STRING", { 191 | "default": "", 192 | "placeholder": "或者直接输入视频文件路径" 193 | }), 194 | "model": ("STRING", { 195 | "default": config.get("default_video_model", "stepfun-ai/step3") 196 | }), 197 | "max_tokens": ("INT", { 198 | "default": 1000, 199 | "min": 100, 200 | "max": 4000 201 | }), 202 | "temperature": ("FLOAT", { 203 | "default": 0.7, 204 | "min": 0.1, 205 | "max": 2.0, 206 | "step": 0.1 207 | }), 208 | "seed": ("INT", { 209 | "default": config.get("default_video_seed", -1), 210 | "min": -1, 211 | "max": 2147483647 212 | }), 213 | "cloudinary_cloud_name": ("STRING", { 214 | "default": config.get("cloudinary_cloud_name", ""), 215 | "placeholder": "Cloudinary Cloud Name" 216 | }), 217 | "cloudinary_api_key": ("STRING", { 218 | "default": config.get("cloudinary_api_key", ""), 219 | "placeholder": "Cloudinary API Key" 220 | }), 221 | "cloudinary_api_secret": ("STRING", { 222 | "default": config.get("cloudinary_api_secret", ""), 223 | "placeholder": "Cloudinary API Secret" 224 | }), 225 | } 226 | } 227 | 228 | RETURN_TYPES = ("STRING",) 229 | RETURN_NAMES = ("description",) 230 | FUNCTION = "analyze_video" 231 | CATEGORY = "QwenImage" 232 | 233 | def analyze_video(self, prompt="", api_token="", system_prompt="You are a helpful assistant.", video=None, video_path="", model="stepfun-ai/step3", max_tokens=1000, temperature=0.7, seed=-1, cloudinary_cloud_name="", cloudinary_api_key="", cloudinary_api_secret="", error_message=""): 234 | if not OPENAI_AVAILABLE: 235 | return ("请先安装openai库: pip install openai",) 236 | 237 | config = load_config() 238 | 239 | if not api_token or api_token.strip() == "": 240 | raise Exception("请输入有效的API Token") 241 | 242 | # 优先使用VIDEO输入,如果没有则使用video_path 243 | actual_video_path = None 244 | 245 | if video is not None: 246 | # 从VIDEO输入中提取视频路径 247 | try: 248 | print(f"调试VIDEO输入:") 249 | print(f" 类型: {type(video)}") 250 | print(f" 内容: {video}") 251 | 252 | # 打印所有属性 253 | if hasattr(video, '__dict__'): 254 | print(f" 属性: {video.__dict__}") 255 | elif hasattr(video, '__slots__'): 256 | print(f" 槽位: {video.__slots__}") 257 | 258 | # 尝试各种可能的属性名,包括私有属性 259 | possible_attrs = ['filename', 'path', 'file_path', 'name', 'file', 'video_path', 'src', 'url', 'file_path', 'input_path', '_VideoFromFile__file'] 260 | for attr in possible_attrs: 261 | if hasattr(video, attr): 262 | value = getattr(video, attr) 263 | print(f" 找到属性 {attr}: {value}") 264 | if isinstance(value, str) and value.strip(): 265 | actual_video_path = value 266 | break 267 | 268 | # 如果是字符串 269 | if isinstance(video, str) and video.strip(): 270 | actual_video_path = video 271 | print(f" VIDEO是字符串: {video}") 272 | 273 | # 如果是列表或元组 274 | elif hasattr(video, '__getitem__') and len(video) > 0: 275 | print(f" VIDEO是序列,长度: {len(video)}") 276 | first_item = video[0] 277 | print(f" 第一个元素类型: {type(first_item)}") 278 | print(f" 第一个元素内容: {first_item}") 279 | 280 | for attr in possible_attrs: 281 | if hasattr(first_item, attr): 282 | value = getattr(first_item, attr) 283 | print(f" 第一个元素属性 {attr}: {value}") 284 | if isinstance(value, str) and value.strip(): 285 | actual_video_path = value 286 | break 287 | 288 | if actual_video_path: 289 | print(f"成功提取视频路径: {actual_video_path}") 290 | else: 291 | print(f"无法从VIDEO输入中提取路径") 292 | 293 | except Exception as e: 294 | print(f"从VIDEO输入提取路径失败: {e}") 295 | import traceback 296 | traceback.print_exc() 297 | 298 | # 如果VIDEO输入没有提供有效路径,使用video_path 299 | if not actual_video_path and video_path: 300 | actual_video_path = video_path 301 | 302 | if not actual_video_path or not os.path.exists(actual_video_path): 303 | raise Exception("请提供有效的视频文件(通过VIDEO输入或video_path参数)") 304 | 305 | saved_token = load_api_token() 306 | if api_token != saved_token: 307 | if save_api_token(api_token): 308 | print("API Token已自动保存") 309 | else: 310 | print("API Token保存失败,但不影响当前使用") 311 | 312 | # 保存Cloudinary配置(如果提供了新的配置) 313 | if cloudinary_cloud_name and cloudinary_api_key and cloudinary_api_secret: 314 | current_config = load_config() 315 | if (current_config.get("cloudinary_cloud_name") != cloudinary_cloud_name or 316 | current_config.get("cloudinary_api_key") != cloudinary_api_key or 317 | current_config.get("cloudinary_api_secret") != cloudinary_api_secret): 318 | if save_cloudinary_config(cloudinary_cloud_name, cloudinary_api_key, cloudinary_api_secret): 319 | print("Cloudinary配置已自动保存") 320 | else: 321 | print("Cloudinary配置保存失败,但不影响当前使用") 322 | 323 | try: 324 | print(f"🎬 开始分析视频...") 325 | print(f"📝 提示词: {prompt}") 326 | if system_prompt: 327 | print(f"系统提示: {system_prompt[:50]}...") 328 | print(f"🤖 模型: {model}") 329 | print(f"视频路径: {actual_video_path}") 330 | 331 | # 处理随机种子 332 | if seed != -1: 333 | print(f"🎯 使用指定种子: {seed}") 334 | else: 335 | import random 336 | random_seed = random.randint(0, 2147483647) 337 | print(f"🎲 使用随机种子: {random_seed}") 338 | seed = random_seed 339 | 340 | # 尝试上传视频到Cloudinary获取URL 341 | video_url = upload_video_to_cloudinary(actual_video_path, cloudinary_cloud_name, cloudinary_api_key, cloudinary_api_secret) 342 | 343 | if video_url: 344 | # 使用URL方式 345 | print(f"🌐 使用视频URL: {video_url}") 346 | video_content = { 347 | 'type': 'video_url', 348 | 'video_url': { 349 | 'url': video_url, 350 | }, 351 | } 352 | else: 353 | # 回退到base64方式 354 | print("视频URL获取失败,回退到使用base64") 355 | video_data = video_to_base64(actual_video_path) 356 | video_content = { 357 | 'type': 'video_url', 358 | 'video_url': { 359 | 'url': video_data, 360 | }, 361 | } 362 | 363 | client = OpenAI( 364 | base_url='https://api-inference.modelscope.cn/v1', 365 | api_key=api_token 366 | ) 367 | 368 | messages = [] 369 | 370 | # 如果有系统提示词,添加到messages中 371 | if system_prompt and system_prompt.strip(): 372 | messages.append({ 373 | 'role': 'system', 374 | 'content': system_prompt 375 | }) 376 | 377 | # 添加用户消息(包含文本和视频) 378 | messages.append({ 379 | 'role': 'user', 380 | 'content': [{ 381 | 'type': 'text', 382 | 'text': prompt, 383 | }, video_content], 384 | }) 385 | 386 | print(f"🚀 发送API请求...") 387 | 388 | response = client.chat.completions.create( 389 | model=model, 390 | messages=messages, 391 | max_tokens=max_tokens, 392 | temperature=temperature, 393 | stream=False, 394 | seed=seed 395 | ) 396 | 397 | description = response.choices[0].message.content 398 | print(f"视频分析完成!") 399 | print(f"📄 结果: {description[:100]}...") 400 | 401 | return (description,) 402 | 403 | except Exception as e: 404 | error_msg = f"视频分析失败: {str(e)}" 405 | print(f"{error_msg}") 406 | return (error_msg,) 407 | 408 | if OPENAI_AVAILABLE: 409 | NODE_CLASS_MAPPINGS = { 410 | "QwenVideoNode": QwenVideoNode 411 | } 412 | 413 | NODE_DISPLAY_NAME_MAPPINGS = { 414 | "QwenVideoNode": "Qwen-Video 视频生文节点" 415 | } 416 | else: 417 | class OpenAINotInstalledVideoNode: 418 | @classmethod 419 | def INPUT_TYPES(cls): 420 | return { 421 | "required": { 422 | "install_command": ("STRING", { 423 | "default": "pip install openai", 424 | "multiline": False 425 | }), 426 | } 427 | } 428 | 429 | RETURN_TYPES = ("STRING",) 430 | RETURN_NAMES = ("message",) 431 | FUNCTION = "show_install_message" 432 | CATEGORY = "QwenImage" 433 | 434 | def show_install_message(self, install_command): 435 | return ("请先安装openai库才能使用视频生文功能: " + install_command,) 436 | 437 | NODE_CLASS_MAPPINGS = { 438 | "QwenVideoNode": OpenAINotInstalledVideoNode 439 | } 440 | 441 | NODE_DISPLAY_NAME_MAPPINGS = { 442 | "QwenVideoNode": "Qwen-Video 视频生文节点 (需要安装openai)" 443 | } 444 | -------------------------------------------------------------------------------- /qwen_detection_node.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ast 3 | import json 4 | import base64 5 | import io 6 | from dataclasses import dataclass 7 | from typing import List, Dict, Any, Tuple 8 | 9 | import torch 10 | import numpy as np 11 | from PIL import Image, ImageDraw, ImageFont 12 | from openai import OpenAI 13 | 14 | try: 15 | import folder_paths 16 | except ImportError: 17 | # folder_paths 只在 ComfyUI 环境中可用 18 | folder_paths = None 19 | 20 | 21 | def parse_json(json_output: str) -> str: 22 | """Extract the JSON payload from a model response string.""" 23 | if "```json" in json_output: 24 | json_output = json_output.split("```json", 1)[1] 25 | json_output = json_output.split("```", 1)[0] 26 | 27 | try: 28 | parsed = json.loads(json_output) 29 | if isinstance(parsed, dict) and "content" in parsed: 30 | inner = parsed["content"] 31 | if isinstance(inner, str): 32 | json_output = inner 33 | except Exception: 34 | pass 35 | return json_output 36 | 37 | 38 | def draw_bboxes_on_image(image, bboxes_data, target_label="object"): 39 | """直接使用KJNodes的DrawInstanceDiffusionTracking实现""" 40 | import matplotlib.cm as cm 41 | import torch 42 | from torchvision import transforms 43 | 44 | if not bboxes_data: 45 | return image 46 | 47 | # 确保图像是PIL格式 48 | if hasattr(image, 'mode'): # 已经是PIL图像 49 | pil_image = image.copy() 50 | else: # 如果是tensor,转换为PIL 51 | if len(image.shape) == 4: # batch 52 | current_image = image[0, :, :, :].permute(2, 0, 1) 53 | else: # single image 54 | current_image = image.permute(2, 0, 1) 55 | pil_image = transforms.ToPILImage()(current_image) 56 | 57 | draw = ImageDraw.Draw(pil_image) 58 | 59 | # 使用KJNodes的彩虹色彩映射 60 | colormap = cm.get_cmap('rainbow', len(bboxes_data)) 61 | 62 | # 尝试加载字体 63 | try: 64 | font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 20) 65 | except: 66 | try: 67 | font = ImageFont.load_default() 68 | except: 69 | font = None 70 | 71 | # 直接使用KJNodes的绘制逻辑 72 | for j, bbox_data in enumerate(bboxes_data): 73 | if isinstance(bbox_data, dict): 74 | bbox = bbox_data.get("bbox_2d", bbox_data.get("bbox", [])) 75 | label = bbox_data.get("label", target_label) 76 | else: 77 | bbox = bbox_data 78 | label = target_label 79 | 80 | if len(bbox) != 4: 81 | continue 82 | 83 | x1, y1, x2, y2 = bbox 84 | # 转换为整数(KJNodes的做法) 85 | x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) 86 | 87 | # 使用KJNodes的颜色生成方式 88 | color = tuple(int(255 * x) for x in colormap(j / len(bboxes_data)))[:3] 89 | 90 | # 添加调试信息 91 | print(f"绘制边界框 {j+1}: ({x1}, {y1}) -> ({x2}, {y2}), 标签: {label}") 92 | 93 | # 使用KJNodes的绘制方式 94 | draw.rectangle([x1, y1, x2, y2], outline=color, width=2) 95 | 96 | if font: 97 | # 使用KJNodes的文本绘制方式 98 | text = f"{j+1}.{label}" 99 | # 计算文本尺寸(KJNodes的方式) 100 | _, _, text_width, text_height = draw.textbbox((0, 0), text=text, font=font) 101 | # 使用KJNodes的文本位置 102 | text_position = (x1, y1 - text_height) 103 | draw.text(text_position, text, fill=color, font=font) 104 | 105 | return pil_image 106 | 107 | 108 | def parse_boxes( 109 | text: str, 110 | img_width: int, 111 | img_height: int, 112 | input_w: int, 113 | input_h: int, 114 | score_threshold: float = 0.0, 115 | ) -> List[Dict[str, Any]]: 116 | """Return bounding boxes parsed from the model's raw JSON output.""" 117 | text = parse_json(text) 118 | try: 119 | data = json.loads(text) 120 | except Exception: 121 | try: 122 | data = ast.literal_eval(text) 123 | except Exception: 124 | end_idx = text.rfind('"}') + len('"}') 125 | truncated = text[:end_idx] + "]" 126 | data = ast.literal_eval(truncated) 127 | if isinstance(data, dict): 128 | inner = data.get("content") 129 | if isinstance(inner, str): 130 | try: 131 | data = ast.literal_eval(inner) 132 | except Exception: 133 | data = [] 134 | else: 135 | data = [] 136 | items: List[DetectedBox] = [] 137 | x_scale = img_width / input_w 138 | y_scale = img_height / input_h 139 | 140 | for item in data: 141 | box = item.get("bbox_2d") or item.get("bbox") or item 142 | label = item.get("label", "") 143 | score = float(item.get("score", 1.0)) 144 | # 修复坐标顺序:确保是 [x1, y1, x2, y2] 格式 145 | if len(box) >= 4: 146 | x1, y1, x2, y2 = box[0], box[1], box[2], box[3] 147 | else: 148 | x1, y1, x2, y2 = 0, 0, 0, 0 149 | 150 | abs_x1 = int(x1 * x_scale) 151 | abs_y1 = int(y1 * y_scale) 152 | abs_x2 = int(x2 * x_scale) 153 | abs_y2 = int(y2 * y_scale) 154 | 155 | # 确保坐标顺序正确 156 | if abs_x1 > abs_x2: 157 | abs_x1, abs_x2 = abs_x2, abs_x1 158 | if abs_y1 > abs_y2: 159 | abs_y1, abs_y2 = abs_y2, abs_y1 160 | 161 | if score >= score_threshold: 162 | items.append(DetectedBox([abs_x1, abs_y1, abs_x2, abs_y2], score, label)) 163 | items.sort(key=lambda x: x.score, reverse=True) 164 | return [ 165 | {"score": b.score, "bbox": b.bbox, "label": b.label} 166 | for b in items 167 | ] 168 | 169 | 170 | @dataclass 171 | class DetectedBox: 172 | bbox: List[int] 173 | score: float 174 | label: str = "" 175 | 176 | 177 | @dataclass 178 | class QwenAPIConfig: 179 | client: Any 180 | model_name: str 181 | base_url: str 182 | api_key: str 183 | 184 | 185 | def encode_image_to_base64(image: Image.Image) -> str: 186 | """将PIL图像编码为base64字符串""" 187 | buffer = io.BytesIO() 188 | image.save(buffer, format='PNG') 189 | img_str = base64.b64encode(buffer.getvalue()).decode() 190 | return f"data:image/png;base64,{img_str}" 191 | 192 | 193 | class QwenAPIConfig: 194 | def __init__(self, client=None, model_name=None, base_url=None, api_key=None): 195 | self.client = client 196 | self.model_name = model_name 197 | self.base_url = base_url 198 | self.api_key = api_key 199 | 200 | @classmethod 201 | def INPUT_TYPES(cls): 202 | return { 203 | "required": { 204 | "base_url": ("STRING", {"default": "https://api-inference.modelscope.cn/v1"}), 205 | "api_key": ("STRING", {"default": ""}), 206 | "model_name": ("STRING", {"default": "Qwen/Qwen2.5-VL-72B-Instruct"}), 207 | "timeout": ("INT", {"default": 60, "min": 10, "max": 300}), 208 | } 209 | } 210 | 211 | RETURN_TYPES = ("QWEN_API_CONFIG",) 212 | RETURN_NAMES = ("qwen_api_config",) 213 | FUNCTION = "configure" 214 | CATEGORY = "Qwen2.5-VL" 215 | 216 | def configure(self, base_url: str, api_key: str, model_name: str, timeout: int): 217 | """配置API客户端""" 218 | if not api_key: 219 | raise ValueError("API密钥不能为空") 220 | 221 | client = OpenAI( 222 | base_url=base_url, 223 | api_key=api_key, 224 | timeout=timeout 225 | ) 226 | 227 | return (QwenAPIConfig( 228 | client=client, 229 | model_name=model_name, 230 | base_url=base_url, 231 | api_key=api_key 232 | ),) 233 | 234 | 235 | class QwenAPIDetection: 236 | @classmethod 237 | def INPUT_TYPES(cls): 238 | return { 239 | "required": { 240 | "qwen_api_config": ("QWEN_API_CONFIG",), 241 | "image": ("IMAGE",), 242 | "target": ("STRING", {"default": "object"}), 243 | "bbox_selection": ("STRING", {"default": "all"}), 244 | "score_threshold": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}), 245 | "merge_boxes": ("BOOLEAN", {"default": False}), 246 | "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 247 | }, 248 | } 249 | 250 | RETURN_TYPES = ("JSON", "BBOX", "IMAGE") 251 | RETURN_NAMES = ("text", "bboxes", "preview") 252 | FUNCTION = "detect" 253 | CATEGORY = "Qwen2.5-VL" 254 | 255 | def detect( 256 | self, 257 | qwen_api_config: QwenAPIConfig, 258 | image, 259 | target: str, 260 | bbox_selection: str = "all", 261 | score_threshold: float = 0.0, 262 | merge_boxes: bool = False, 263 | seed: int = 0, 264 | ): 265 | """使用API生成目标检测边界框""" 266 | client = qwen_api_config.client 267 | model_name = qwen_api_config.model_name 268 | 269 | # 添加随机种子到prompt中,确保每次请求都不同 270 | random_seed_text = f"Random seed: {seed}. " 271 | 272 | prompt = f"""You are a precise object detection system. Your task is to detect {target} in the image. 273 | 274 | {random_seed_text}STRICT REQUIREMENTS: 275 | 1. You MUST create a SEPARATE bounding box for EACH individual {target} you see 276 | 2. NEVER combine multiple {target} objects into one large bounding box 277 | 3. Each {target} should have its own tight, precise bounding box 278 | 4. If you see 6 {target} objects, you MUST return 6 separate bounding boxes 279 | 5. Count each {target} individually and create one box per object 280 | 281 | OUTPUT FORMAT: Return a JSON array with separate objects for each {target}: 282 | [{{"bbox_2d": [x1, y1, x2, y2], "label": "{target}"}}, {{"bbox_2d": [x1, y1, x2, y2], "label": "{target}"}}, ...] 283 | 284 | EXAMPLE: If you see 3 logos, return 3 separate boxes: 285 | [{{"bbox_2d": [100, 100, 150, 120], "label": "{target}"}}, {{"bbox_2d": [200, 100, 250, 120], "label": "{target}"}}, {{"bbox_2d": [300, 100, 350, 120], "label": "{target}"}}] 286 | 287 | DO NOT return one large box covering multiple objects!""" 288 | 289 | # 处理图像输入 290 | if isinstance(image, torch.Tensor): 291 | image = (image.squeeze().clamp(0, 1) * 255).to(torch.uint8).cpu().numpy() 292 | image = Image.fromarray(image) 293 | elif not isinstance(image, Image.Image): 294 | raise ValueError("不支持的图像类型") 295 | 296 | # 编码图像为base64 297 | image_data = encode_image_to_base64(image) 298 | 299 | try: 300 | # 调用API 301 | response = client.chat.completions.create( 302 | model=model_name, 303 | messages=[{ 304 | 'role': 'user', 305 | 'content': [ 306 | {'type': 'text', 'text': prompt}, 307 | {'type': 'image_url', 'image_url': {'url': image_data}} 308 | ] 309 | }], 310 | max_tokens=1024, 311 | temperature=0.1 312 | ) 313 | 314 | output_text = response.choices[0].message.content 315 | 316 | except Exception as e: 317 | raise RuntimeError(f"API调用失败: {str(e)}") 318 | 319 | # 解析响应 320 | # 使用固定的输入尺寸,因为API返回的坐标通常是相对坐标 321 | input_h = 1024 # 假设的输入高度 322 | input_w = 1024 # 假设的输入宽度 323 | 324 | items = parse_boxes( 325 | output_text, 326 | image.width, 327 | image.height, 328 | input_w, 329 | input_h, 330 | score_threshold, 331 | ) 332 | 333 | 334 | # 处理边界框选择 335 | selection = bbox_selection.strip().lower() 336 | boxes = items 337 | if selection != "all" and selection: 338 | idxs = [] 339 | for part in selection.replace(",", " ").split(): 340 | try: 341 | idxs.append(int(part)) 342 | except Exception: 343 | continue 344 | boxes = [boxes[i] for i in idxs if 0 <= i < len(boxes)] 345 | 346 | # 合并边界框 347 | if merge_boxes and boxes: 348 | x1 = min(b["bbox"][0] for b in boxes) 349 | y1 = min(b["bbox"][1] for b in boxes) 350 | x2 = max(b["bbox"][2] for b in boxes) 351 | y2 = max(b["bbox"][3] for b in boxes) 352 | score = max(b["score"] for b in boxes) 353 | label = boxes[0].get("label", target) 354 | boxes = [{"bbox": [x1, y1, x2, y2], "score": score, "label": label}] 355 | 356 | # 格式化输出 357 | json_boxes = [ 358 | {"bbox_2d": b["bbox"], "label": b.get("label", target)} for b in boxes 359 | ] 360 | json_output = json.dumps(json_boxes, ensure_ascii=False) 361 | bboxes_only = [b["bbox"] for b in boxes] 362 | 363 | # 生成预览图像 364 | preview_image = self._create_preview_image(image, json_boxes, target) 365 | 366 | return (json_output, bboxes_only, preview_image) 367 | 368 | def _create_preview_image(self, image, bboxes_data, target_label): 369 | """创建带边界框的预览图像""" 370 | try: 371 | # 转换tensor为PIL图像 372 | if isinstance(image, torch.Tensor): 373 | print(f"原始图像tensor形状: {image.shape}") 374 | 375 | # 假设图像是 [batch, channels, height, width] 格式 (ComfyUI标准) 376 | if image.dim() == 4: 377 | image_tensor = image[0] # 取第一个batch,现在是 [C, H, W] 378 | else: 379 | image_tensor = image 380 | 381 | print(f"处理后图像tensor形状: {image_tensor.shape}") 382 | 383 | # 转换为numpy然后PIL 384 | image_np = image_tensor.cpu().numpy() 385 | print(f"转换为numpy后形状: {image_np.shape}") 386 | 387 | if image_np.max() <= 1.0: 388 | image_np = (image_np * 255).astype('uint8') 389 | else: 390 | image_np = image_np.astype('uint8') 391 | 392 | # 从CHW转换为HWC格式 393 | if len(image_np.shape) == 3 and image_np.shape[0] in [1, 3, 4]: 394 | image_np = np.transpose(image_np, (1, 2, 0)) 395 | print(f"转置后形状: {image_np.shape}") 396 | 397 | # 确保图像是RGB格式(3通道) 398 | if len(image_np.shape) == 3 and image_np.shape[2] == 1: 399 | # 如果是单通道,转换为RGB 400 | image_np = np.repeat(image_np, 3, axis=2) 401 | print(f"单通道转RGB后形状: {image_np.shape}") 402 | elif len(image_np.shape) == 2: 403 | # 如果是灰度图,转换为RGB 404 | image_np = np.stack([image_np] * 3, axis=2) 405 | print(f"灰度图转RGB后形状: {image_np.shape}") 406 | 407 | pil_image = Image.fromarray(image_np, 'RGB') 408 | else: 409 | pil_image = image 410 | 411 | # 绘制边界框 412 | preview_image = draw_bboxes_on_image(pil_image, bboxes_data, target_label) 413 | 414 | # 转换回tensor格式 - ComfyUI的IMAGE类型期望(B, H, W, C)格式 415 | preview_np = np.array(preview_image) 416 | print(f"绘制边界框后numpy形状: {preview_np.shape}") 417 | 418 | # 确保是RGB格式 419 | if len(preview_np.shape) == 3 and preview_np.shape[2] == 3: 420 | # 已经是HWC格式,直接转换 421 | preview_tensor = torch.from_numpy(preview_np).float() / 255.0 422 | print(f"转换为tensor后形状: {preview_tensor.shape}") 423 | 424 | # 添加batch维度 - ComfyUI的IMAGE类型期望(B, H, W, C)格式 425 | preview_tensor = preview_tensor.unsqueeze(0) # HWC -> BHWC 426 | print(f"添加batch维度后形状: {preview_tensor.shape}") 427 | 428 | # 确保数据类型和范围正确 429 | preview_tensor = torch.clamp(preview_tensor, 0.0, 1.0) 430 | print(f"最终tensor形状: {preview_tensor.shape}, 数据类型: {preview_tensor.dtype}") 431 | 432 | # 确保tensor是连续的,这对ComfyUI很重要 433 | preview_tensor = preview_tensor.contiguous() 434 | 435 | return preview_tensor 436 | else: 437 | print(f"警告: 预览图像格式不正确: {preview_np.shape}") 438 | return image 439 | 440 | except Exception as e: 441 | print(f"创建预览图像时出错: {e}") 442 | import traceback 443 | traceback.print_exc() 444 | # 如果出错,返回原始图像 445 | return image 446 | 447 | 448 | 449 | class BBoxesToSAM2: 450 | """Convert a list of bounding boxes to the format expected by SAM2 nodes.""" 451 | 452 | @classmethod 453 | def INPUT_TYPES(cls): 454 | return {"required": {"bboxes": ("BBOX",)}} 455 | 456 | RETURN_TYPES = ("BBOXES",) 457 | RETURN_NAMES = ("sam2_bboxes",) 458 | FUNCTION = "convert" 459 | CATEGORY = "Qwen2.5-VL" 460 | 461 | def convert(self, bboxes): 462 | if not isinstance(bboxes, list): 463 | raise ValueError("bboxes must be a list") 464 | 465 | # If already batched, return as-is 466 | if bboxes and isinstance(bboxes[0], (list, tuple)) and bboxes[0] and isinstance(bboxes[0][0], (list, tuple)): 467 | return (bboxes,) 468 | 469 | return ([bboxes],) 470 | 471 | 472 | NODE_CLASS_MAPPINGS = { 473 | "QwenAPIConfig": QwenAPIConfig, 474 | "QwenAPIDetection": QwenAPIDetection, 475 | "BBoxesToSAM2": BBoxesToSAM2, 476 | } 477 | 478 | NODE_DISPLAY_NAME_MAPPINGS = { 479 | "QwenAPIConfig": "Qwen2.5-VL API Configuration", 480 | "QwenAPIDetection": "Qwen2.5-VL API Object Detection", 481 | "BBoxesToSAM2": "Prepare BBoxes for SAM2", 482 | } 483 | -------------------------------------------------------------------------------- /qwen_image_node.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import time 4 | import torch 5 | import numpy as np 6 | from PIL import Image 7 | from io import BytesIO 8 | import os 9 | try: 10 | import folder_paths 11 | except ImportError: 12 | # folder_paths 只在 ComfyUI 环境中可用 13 | folder_paths = None 14 | import base64 15 | import tempfile 16 | 17 | def load_config(): 18 | config_path = os.path.join(os.path.dirname(__file__), 'config.json') 19 | try: 20 | with open(config_path, 'r', encoding='utf-8') as f: 21 | return json.load(f) 22 | except: 23 | return { 24 | "default_model": "Qwen/Qwen-Image", 25 | "timeout": 720, 26 | "image_download_timeout": 30, 27 | "default_prompt": "A beautiful landscape" 28 | } 29 | 30 | def save_config(config: dict) -> bool: 31 | config_path = os.path.join(os.path.dirname(__file__), 'config.json') 32 | try: 33 | with open(config_path, 'w', encoding='utf-8') as f: 34 | json.dump(config, f, ensure_ascii=False, indent=2) 35 | return True 36 | except Exception as e: 37 | print(f"保存配置失败: {e}") 38 | return False 39 | 40 | def save_api_token(token): 41 | token_path = os.path.join(os.path.dirname(__file__), '.qwen_token') 42 | try: 43 | with open(token_path, 'w', encoding='utf-8') as f: 44 | f.write(token) 45 | except Exception as e: 46 | print(f"保存token失败(.qwen_token): {e}") 47 | try: 48 | cfg = load_config() 49 | cfg["api_token"] = token 50 | if save_config(cfg): 51 | return True 52 | return False 53 | except Exception as e: 54 | print(f"保存token失败(config.json): {e}") 55 | return False 56 | 57 | def load_api_token(): 58 | token_path = os.path.join(os.path.dirname(__file__), '.qwen_token') 59 | try: 60 | cfg = load_config() 61 | token_from_cfg = cfg.get("api_token", "").strip() 62 | if token_from_cfg: 63 | return token_from_cfg 64 | except Exception as e: 65 | print(f"读取config.json中的token失败: {e}") 66 | try: 67 | if os.path.exists(token_path): 68 | with open(token_path, 'r', encoding='utf-8') as f: 69 | token = f.read().strip() 70 | return token if token else "" 71 | return "" 72 | except Exception as e: 73 | print(f"加载token失败: {e}") 74 | return "" 75 | 76 | def tensor_to_base64_url(image_tensor): 77 | try: 78 | if len(image_tensor.shape) == 4: 79 | image_tensor = image_tensor.squeeze(0) 80 | 81 | if image_tensor.max() <= 1.0: 82 | image_np = (image_tensor.cpu().numpy() * 255).astype(np.uint8) 83 | else: 84 | image_np = image_tensor.cpu().numpy().astype(np.uint8) 85 | 86 | pil_image = Image.fromarray(image_np) 87 | 88 | buffer = BytesIO() 89 | pil_image.save(buffer, format='JPEG', quality=85) 90 | img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8') 91 | 92 | return f"data:image/jpeg;base64,{img_base64}" 93 | 94 | except Exception as e: 95 | print(f"图像转换失败: {e}") 96 | raise Exception(f"图像格式转换失败: {str(e)}") 97 | 98 | class QwenImageNode: 99 | def __init__(self): 100 | pass 101 | 102 | @classmethod 103 | def INPUT_TYPES(cls): 104 | config = load_config() 105 | saved_token = load_api_token() 106 | return { 107 | "required": { 108 | "prompt": ("STRING", { 109 | "multiline": True, 110 | "default": config.get("default_prompt", "A beautiful landscape") 111 | }), 112 | "api_token": ("STRING", { 113 | "default": saved_token, 114 | "placeholder": "请输入您的魔搭API Token" 115 | }), 116 | }, 117 | "optional": { 118 | "model": ("STRING", { 119 | "default": config.get("default_model", "Qwen/Qwen-Image") 120 | }), 121 | "negative_prompt": ("STRING", { 122 | "multiline": True, 123 | "default": config.get("default_negative_prompt", "") 124 | }), 125 | "width": ("INT", { 126 | "default": config.get("default_width", 512), 127 | "min": 64, 128 | "max": 2048, 129 | "step": 64 130 | }), 131 | "height": ("INT", { 132 | "default": config.get("default_height", 512), 133 | "min": 64, 134 | "max": 2048, 135 | "step": 64 136 | }), 137 | "seed": ("INT", { 138 | "default": config.get("default_seed", -1), 139 | "min": -1, 140 | "max": 2147483647 141 | }), 142 | "steps": ("INT", { 143 | "default": config.get("default_steps", 30), 144 | "min": 1, 145 | "max": 100 146 | }), 147 | "guidance": ("FLOAT", { 148 | "default": config.get("default_guidance", 7.5), 149 | "min": 1.5, 150 | "max": 20.0, 151 | "step": 0.1 152 | }), 153 | } 154 | } 155 | 156 | RETURN_TYPES = ("IMAGE",) 157 | RETURN_NAMES = ("image",) 158 | FUNCTION = "generate_image" 159 | CATEGORY = "QwenImage" 160 | 161 | def generate_image(self, prompt, api_token, model="Qwen/Qwen-Image", negative_prompt="", width=512, height=512, seed=-1, steps=30, guidance=7.5): 162 | config = load_config() 163 | if not api_token or api_token.strip() == "": 164 | raise Exception("请输入有效的API Token") 165 | saved_token = load_api_token() 166 | if api_token != saved_token: 167 | if save_api_token(api_token): 168 | print("API Token已自动保存") 169 | else: 170 | print("API Token保存失败,但不影响当前使用") 171 | try: 172 | url = 'https://api-inference.modelscope.cn/v1/images/generations' 173 | payload = { 174 | 'model': model, 175 | 'prompt': prompt, 176 | 'size': f"{width}x{height}", 177 | 'steps': steps, 178 | 'guidance': guidance 179 | } 180 | if negative_prompt.strip(): 181 | payload['negative_prompt'] = negative_prompt 182 | print(f"🚫 负向提示词: {negative_prompt}") 183 | if seed != -1: 184 | payload['seed'] = seed 185 | print(f"🎯 使用指定种子: {seed}") 186 | else: 187 | import random 188 | random_seed = random.randint(0, 2147483647) 189 | payload['seed'] = random_seed 190 | print(f"🎲 使用随机种子: {random_seed}") 191 | print(f"📐 图像尺寸: {width}x{height}") 192 | print(f"🔧 采样步数: {steps}") 193 | print(f"🎨 引导系数: {guidance}") 194 | headers = { 195 | 'Authorization': f'Bearer {api_token}', 196 | 'Content-Type': 'application/json', 197 | 'X-ModelScope-Async-Mode': 'true' 198 | } 199 | submission_response = requests.post( 200 | url, 201 | data=json.dumps(payload, ensure_ascii=False).encode('utf-8'), 202 | headers=headers, 203 | timeout=config.get("timeout", 60) 204 | ) 205 | if submission_response.status_code == 400: 206 | print("提交失败,尝试使用最小参数重试...") 207 | minimal_payload = { 208 | 'model': model, 209 | 'prompt': prompt 210 | } 211 | submission_response = requests.post( 212 | url, 213 | data=json.dumps(minimal_payload, ensure_ascii=False).encode('utf-8'), 214 | headers=headers, 215 | timeout=config.get("timeout", 60) 216 | ) 217 | if submission_response.status_code != 200: 218 | raise Exception(f"API请求失败: {submission_response.status_code}, {submission_response.text}") 219 | submission_json = submission_response.json() 220 | image_url = None 221 | if 'task_id' in submission_json: 222 | task_id = submission_json['task_id'] 223 | print(f"🕒 已提交任务,任务ID: {task_id},开始轮询...") 224 | poll_start = time.time() 225 | max_wait_seconds = max(60, config.get('timeout', 720)) 226 | while True: 227 | task_resp = requests.get( 228 | f"https://api-inference.modelscope.cn/v1/tasks/{task_id}", 229 | headers={ 230 | 'Authorization': f'Bearer {api_token}', 231 | 'X-ModelScope-Task-Type': 'image_generation' 232 | }, 233 | timeout=config.get("image_download_timeout", 120) 234 | ) 235 | if task_resp.status_code != 200: 236 | raise Exception(f"任务查询失败: {task_resp.status_code}, {task_resp.text}") 237 | task_data = task_resp.json() 238 | status = task_data.get('task_status') 239 | if status == 'SUCCEED': 240 | output_images = task_data.get('output_images') or [] 241 | if not output_images: 242 | raise Exception("任务成功但未返回图片URL") 243 | image_url = output_images[0] 244 | print("任务完成,开始下载图片...") 245 | break 246 | if status == 'FAILED': 247 | raise Exception(f"任务失败: {task_data}") 248 | if time.time() - poll_start > max_wait_seconds: 249 | raise Exception("任务轮询超时,请稍后重试或降低并发") 250 | time.sleep(5) 251 | elif 'images' in submission_json and len(submission_json['images']) > 0: 252 | image_url = submission_json['images'][0]['url'] 253 | print(f"下载生成的图片...") 254 | else: 255 | raise Exception(f"未识别的API返回格式: {submission_json}") 256 | img_response = requests.get(image_url, timeout=config.get("image_download_timeout", 30)) 257 | if img_response.status_code != 200: 258 | raise Exception(f"图片下载失败: {img_response.status_code}") 259 | pil_image = Image.open(BytesIO(img_response.content)) 260 | if pil_image.mode != 'RGB': 261 | pil_image = pil_image.convert('RGB') 262 | image_np = np.array(pil_image).astype(np.float32) / 255.0 263 | image_tensor = torch.from_numpy(image_np)[None,] 264 | print(f"🎉 图片处理完成!") 265 | return (image_tensor,) 266 | except Exception as e: 267 | print(f"Qwen-Image API调用失败: {str(e)}") 268 | error_image = Image.new('RGB', (width, height), color='red') 269 | error_np = np.array(error_image).astype(np.float32) / 255.0 270 | error_tensor = torch.from_numpy(error_np)[None,] 271 | return (error_tensor,) 272 | 273 | class QwenImageEditNode: 274 | def __init__(self): 275 | pass 276 | 277 | @classmethod 278 | def INPUT_TYPES(cls): 279 | config = load_config() 280 | saved_token = load_api_token() 281 | return { 282 | "required": { 283 | "image": ("IMAGE",), 284 | "prompt": ("STRING", { 285 | "multiline": True, 286 | "default": "修改图片中的内容" 287 | }), 288 | "api_token": ("STRING", { 289 | "default": saved_token, 290 | "placeholder": "请输入您的魔搭API Token" 291 | }), 292 | }, 293 | "optional": { 294 | "image_2": ("IMAGE",), 295 | "image_3": ("IMAGE",), 296 | "model": ("STRING", { 297 | "default": "Qwen/Qwen-Image-Edit" 298 | }), 299 | "negative_prompt": ("STRING", { 300 | "multiline": True, 301 | "default": "" 302 | }), 303 | "width": ("INT", { 304 | "default": 512, 305 | "min": 64, 306 | "max": 1664, 307 | "step": 8 308 | }), 309 | "height": ("INT", { 310 | "default": 512, 311 | "min": 64, 312 | "max": 1664, 313 | "step": 8 314 | }), 315 | "steps": ("INT", { 316 | "default": 30, 317 | "min": 1, 318 | "max": 100, 319 | "step": 1 320 | }), 321 | "guidance": ("FLOAT", { 322 | "default": 3.5, 323 | "min": 1.5, 324 | "max": 20.0, 325 | "step": 0.1 326 | }), 327 | "seed": ("INT", { 328 | "default": -1, 329 | "min": -1, 330 | "max": 2147483647 331 | }), 332 | } 333 | } 334 | 335 | RETURN_TYPES = ("IMAGE",) 336 | RETURN_NAMES = ("edited_image",) 337 | FUNCTION = "edit_image" 338 | CATEGORY = "QwenImage" 339 | 340 | def edit_image(self, image, prompt, api_token, model="Qwen/Qwen-Image-Edit", negative_prompt="", 341 | width=512, height=512, steps=30, guidance=3.5, seed=-1, image_2=None, image_3=None): 342 | config = load_config() 343 | if not api_token or api_token.strip() == "": 344 | raise Exception("请输入有效的API Token") 345 | saved_token = load_api_token() 346 | if api_token != saved_token: 347 | if save_api_token(api_token): 348 | print("API Token已自动保存") 349 | else: 350 | print("API Token保存失败,但不影响当前使用") 351 | 352 | try: 353 | # 处理上传多张图片的函数 354 | def upload_single_image(img_tensor, index): 355 | temp_path = None 356 | img_url = None 357 | try: 358 | # 保存图像到临时文件 359 | temp_path = os.path.join(tempfile.gettempdir(), f"qwen_edit_temp_{index}_{int(time.time())}.jpg") 360 | if len(img_tensor.shape) == 4: 361 | img = img_tensor[0] 362 | else: 363 | img = img_tensor 364 | 365 | i = 255. * img.cpu().numpy() 366 | img_pil = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8)) 367 | img_pil.save(temp_path) 368 | print(f"图像{index}已保存到临时文件: {temp_path}") 369 | 370 | # 上传图像到kefan.cn获取URL 371 | upload_url = 'https://ai.kefan.cn/api/upload/local' 372 | with open(temp_path, 'rb') as img_file: 373 | files = {'file': img_file} 374 | upload_response = requests.post( 375 | upload_url, 376 | files=files, 377 | timeout=30 378 | ) 379 | if upload_response.status_code == 200: 380 | upload_data = upload_response.json() 381 | if upload_data.get('success') == True and 'data' in upload_data: 382 | img_url = upload_data['data'] 383 | print(f"图像{index}已上传成功,获取URL: {img_url}") 384 | else: 385 | print(f"图像{index}上传返回错误: {upload_response.text}") 386 | else: 387 | print(f"图像{index}上传失败: {upload_response.status_code}, {upload_response.text}") 388 | except Exception as e: 389 | print(f"图像{index}上传异常: {str(e)}") 390 | 391 | return temp_path, img_url 392 | 393 | # 上传主图像 394 | temp_img_path, image_url = upload_single_image(image, 1) 395 | temp_paths = [temp_img_path] 396 | 397 | # 上传第二张图像(如果提供) 398 | image_2_url = None 399 | if image_2 is not None: 400 | temp_path_2, image_2_url = upload_single_image(image_2, 2) 401 | if temp_path_2: 402 | temp_paths.append(temp_path_2) 403 | 404 | # 上传第三张图像(如果提供) 405 | image_3_url = None 406 | if image_3 is not None: 407 | temp_path_3, image_3_url = upload_single_image(image_3, 3) 408 | if temp_path_3: 409 | temp_paths.append(temp_path_3) 410 | 411 | # 构建payload - 根据官方文档,多图使用 image_url 数组 412 | # 收集所有图片URL 413 | image_urls = [] 414 | image_base64s = [] 415 | 416 | if image_url: 417 | image_urls.append(image_url) 418 | else: 419 | image_base64s.append(tensor_to_base64_url(image)) 420 | 421 | # 添加第二张图片 422 | if image_2 is not None: 423 | if image_2_url: 424 | image_urls.append(image_2_url) 425 | print(f"✅ 已添加第二张图片URL") 426 | else: 427 | image_base64s.append(tensor_to_base64_url(image_2)) 428 | print(f"✅ 已添加第二张图片(base64)") 429 | 430 | # 添加第三张图片 431 | if image_3 is not None: 432 | if image_3_url: 433 | image_urls.append(image_3_url) 434 | print(f"✅ 已添加第三张图片URL") 435 | else: 436 | image_base64s.append(tensor_to_base64_url(image_3)) 437 | print(f"✅ 已添加第三张图片(base64)") 438 | 439 | # 构建payload 440 | payload = { 441 | 'model': model, 442 | 'prompt': prompt 443 | } 444 | 445 | # 根据官方文档,image_url 始终使用数组格式(即使单张图片) 446 | if len(image_urls) > 0: 447 | payload['image_url'] = image_urls # 统一使用数组格式 448 | print(f"📸 使用URL模式,共{len(image_urls)}张图片: {image_urls}") 449 | elif len(image_base64s) > 0: 450 | # Base64模式也尝试使用数组 451 | payload['image'] = image_base64s 452 | print(f"📸 使用Base64模式,共{len(image_base64s)}张图片") 453 | 454 | if negative_prompt.strip(): 455 | payload['negative_prompt'] = negative_prompt 456 | print(f"🚫 负向提示词: {negative_prompt}") 457 | 458 | # 添加新参数 459 | if width != 512 or height != 512: 460 | size = f"{width}x{height}" 461 | payload['size'] = size 462 | print(f"📏 图像尺寸: {size}") 463 | 464 | if steps != 30: 465 | payload['steps'] = steps 466 | print(f"采样步数: {steps}") 467 | 468 | if guidance != 3.5: 469 | payload['guidance'] = guidance 470 | print(f"🧭 引导系数: {guidance}") 471 | 472 | if seed != -1: 473 | payload['seed'] = seed 474 | print(f"🎲 随机种子: {seed}") 475 | 476 | headers = { 477 | 'Authorization': f'Bearer {api_token}', 478 | 'Content-Type': 'application/json', 479 | 'X-ModelScope-Async-Mode': 'true' 480 | } 481 | 482 | print(f"开始编辑图片...") 483 | print(f"编辑提示: {prompt}") 484 | 485 | url = 'https://api-inference.modelscope.cn/v1/images/generations' 486 | submission_response = requests.post( 487 | url, 488 | data=json.dumps(payload, ensure_ascii=False).encode('utf-8'), 489 | headers=headers, 490 | timeout=config.get("timeout", 60) 491 | ) 492 | 493 | if submission_response.status_code != 200: 494 | raise Exception(f"API请求失败: {submission_response.status_code}, {submission_response.text}") 495 | 496 | submission_json = submission_response.json() 497 | result_image_url = None 498 | 499 | if 'task_id' in submission_json: 500 | task_id = submission_json['task_id'] 501 | print(f"🕒 已提交任务,任务ID: {task_id},开始轮询...") 502 | poll_start = time.time() 503 | max_wait_seconds = max(60, config.get('timeout', 720)) 504 | 505 | while True: 506 | task_resp = requests.get( 507 | f"https://api-inference.modelscope.cn/v1/tasks/{task_id}", 508 | headers={ 509 | 'Authorization': f'Bearer {api_token}', 510 | 'X-ModelScope-Task-Type': 'image_generation' 511 | }, 512 | timeout=config.get("image_download_timeout", 120) 513 | ) 514 | 515 | if task_resp.status_code != 200: 516 | raise Exception(f"任务查询失败: {task_resp.status_code}, {task_resp.text}") 517 | 518 | task_data = task_resp.json() 519 | status = task_data.get('task_status') 520 | 521 | if status == 'SUCCEED': 522 | output_images = task_data.get('output_images') or [] 523 | if not output_images: 524 | raise Exception("任务成功但未返回图片URL") 525 | result_image_url = output_images[0] 526 | print("任务完成,开始下载编辑后的图片...") 527 | break 528 | 529 | if status == 'FAILED': 530 | error_message = task_data.get('errors', {}).get('message', '未知错误') 531 | error_code = task_data.get('errors', {}).get('code', '未知错误码') 532 | raise Exception(f"任务失败: 错误码 {error_code}, 错误信息: {error_message}") 533 | 534 | if time.time() - poll_start > max_wait_seconds: 535 | raise Exception("任务轮询超时,请稍后重试或降低并发") 536 | 537 | time.sleep(5) 538 | else: 539 | raise Exception(f"未识别的API返回格式: {submission_json}") 540 | 541 | img_response = requests.get(result_image_url, timeout=config.get("image_download_timeout", 30)) 542 | if img_response.status_code != 200: 543 | raise Exception(f"图片下载失败: {img_response.status_code}") 544 | 545 | pil_image = Image.open(BytesIO(img_response.content)) 546 | if pil_image.mode != 'RGB': 547 | pil_image = pil_image.convert('RGB') 548 | 549 | image_np = np.array(pil_image).astype(np.float32) / 255.0 550 | image_tensor = torch.from_numpy(image_np)[None,] 551 | 552 | # 清理所有临时文件 553 | for temp_path in temp_paths: 554 | if temp_path and os.path.exists(temp_path): 555 | try: 556 | os.remove(temp_path) 557 | except: 558 | pass 559 | 560 | print(f"🎉 图片编辑完成!") 561 | return (image_tensor,) 562 | 563 | except Exception as e: 564 | print(f"Qwen-Image-Edit API调用失败: {str(e)}") 565 | # 返回原图像作为错误回退 566 | return (image.unsqueeze(0),) 567 | 568 | NODE_CLASS_MAPPINGS = { 569 | "QwenImageNode": QwenImageNode, 570 | "QwenImageEditNode": QwenImageEditNode 571 | } 572 | 573 | NODE_DISPLAY_NAME_MAPPINGS = { 574 | "QwenImageNode": "Qwen-Image 生图节点", 575 | "QwenImageEditNode": "Qwen-Image 图像编辑节点" 576 | } --------------------------------------------------------------------------------