├── .gitignore
├── requirements.txt
├── config.json
├── __init__.py
├── install_dependencies.py
├── README.md
├── verify_installation.py
├── troubleshoot.py
├── qwen_text_node.py
├── qwen_vision_node.py
├── qwen_video_node.py
├── qwen_detection_node.py
└── qwen_image_node.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / cache
 2 | __pycache__/
 3 | *.py[cod]
 4 | *.class
 5 | 
 6 | # macOS
 7 | .DS_Store
 8 | 
 9 | # Env / tokens / outputs
10 | .qwen_token
11 | *.jpg
12 | *.png
13 | *.jpeg
14 | 
15 | # Editors
16 | .vscode/
17 | .idea/
18 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Qwen-Image ComfyUI Plugin Requirements
 2 | 
 3 | # Core dependencies (usually already available in ComfyUI)
 4 | requests>=2.25.0
 5 | pillow>=8.0.0
 6 | torch>=1.9.0
 7 | numpy>=1.20.0
 8 | 
 9 | # Additional dependencies for vision functionality
10 | openai>=1.0.0
11 | 
12 | # Dependencies for object detection functionality
13 | torchvision
14 | matplotlib
15 | 
16 | # Network and proxy support
17 | httpx[socks]>=0.24.0
18 | socksio>=1.0.0
19 | 
20 | # Optional dependencies for enhanced functionality
21 | # pydantic-settings  # For advanced configuration management


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "default_model": "Qwen/Qwen-Image",
 3 |   "timeout": 720,
 4 |   "image_download_timeout": 30,
 5 |   "default_prompt": "A beautiful landscape",
 6 |   "default_negative_prompt": "",
 7 |   "default_width": 512,
 8 |   "default_height": 512,
 9 |   "default_seed": -1,
10 |   "default_steps": 30,
11 |   "default_guidance": 7.5,
12 |   "default_chat_model": "Qwen/Qwen3-Coder-480B-A35B-Chat",
13 |   "default_chat_prompt": "你好，请介绍一下你自己。",
14 |   "default_text_model": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
15 |   "default_system_prompt": "You are a helpful assistant.",
16 |   "default_user_prompt": "你好，请介绍一下你自己。",
17 |   "default_text_seed": -1,
18 |   "default_vision_seed": -1,
19 |   "api_token": "",
20 |   "cloudinary_cloud_name": "dxao8lzi7",
21 |   "cloudinary_api_key": "259917876186436",
22 |   "cloudinary_api_secret": ""
23 | }


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | from .qwen_image_node import NODE_CLASS_MAPPINGS as IMAGE_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as IMAGE_DISPLAY_MAPPINGS
 2 | from .qwen_vision_node import NODE_CLASS_MAPPINGS as VISION_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as VISION_DISPLAY_MAPPINGS
 3 | from .qwen_text_node import NODE_CLASS_MAPPINGS as TEXT_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as TEXT_DISPLAY_MAPPINGS
 4 | from .qwen_detection_node import NODE_CLASS_MAPPINGS as DETECTION_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as DETECTION_DISPLAY_MAPPINGS
 5 | from .qwen_video_node import NODE_CLASS_MAPPINGS as VIDEO_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS as VIDEO_DISPLAY_MAPPINGS
 6 | 
 7 | NODE_CLASS_MAPPINGS = {**IMAGE_MAPPINGS, **VISION_MAPPINGS, **TEXT_MAPPINGS, **DETECTION_MAPPINGS, **VIDEO_MAPPINGS}
 8 | NODE_DISPLAY_NAME_MAPPINGS = {**IMAGE_DISPLAY_MAPPINGS, **VISION_DISPLAY_MAPPINGS, **TEXT_DISPLAY_MAPPINGS, **DETECTION_DISPLAY_MAPPINGS, **VIDEO_DISPLAY_MAPPINGS}
 9 | 
10 | __all__ = ['NODE_CLASS_MAPPINGS', 'NODE_DISPLAY_NAME_MAPPINGS']
11 | 
12 | WEB_DIRECTORY = "./js"
13 | __version__ = "1.0.0" 


--------------------------------------------------------------------------------
/install_dependencies.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import subprocess
  5 | import sys
  6 | import os
  7 | 
  8 | def install_package(package):
  9 |     """安装Python包"""
 10 |     try:
 11 |         subprocess.check_call([sys.executable, "-m", "pip", "install", package])
 12 |         return True
 13 |     except subprocess.CalledProcessError as e:
 14 |         print(f"安装 {package} 失败: {e}")
 15 |         return False
 16 | 
 17 | def check_package(package_name):
 18 |     """检查包是否已安装"""
 19 |     try:
 20 |         __import__(package_name)
 21 |         return True
 22 |     except ImportError:
 23 |         return False
 24 | 
 25 | def main():
 26 |     print("=" * 60)
 27 |     print("Qwen-Image ComfyUI 插件依赖安装工具")
 28 |     print("=" * 60)
 29 |     
 30 |     # 检查核心依赖
 31 |     core_deps = {
 32 |         'requests': 'requests',
 33 |         'PIL': 'pillow',
 34 |         'torch': 'torch',
 35 |         'numpy': 'numpy'
 36 |     }
 37 |     
 38 |     print("\n🔍 检查核心依赖...")
 39 |     missing_core = []
 40 |     for import_name, package_name in core_deps.items():
 41 |         if check_package(import_name):
 42 |             print(f"✅ {package_name} 已安装")
 43 |         else:
 44 |             print(f"❌ {package_name} 未安装")
 45 |             missing_core.append(package_name)
 46 |     
 47 |     # 检查图生文功能依赖
 48 |     print("\n🔍 检查图生文功能依赖...")
 49 |     vision_deps = {
 50 |         'openai': 'openai',
 51 |         'httpx': 'httpx[socks]',
 52 |         'socksio': 'socksio'
 53 |     }
 54 |     
 55 |     missing_vision = []
 56 |     for import_name, package_name in vision_deps.items():
 57 |         if check_package(import_name):
 58 |             print(f"✅ {package_name} 已安装")
 59 |         else:
 60 |             print(f"❌ {package_name} 未安装")
 61 |             missing_vision.append(package_name)
 62 |     
 63 |     # 安装缺失的依赖
 64 |     all_missing = missing_core + missing_vision
 65 |     
 66 |     if not all_missing:
 67 |         print("\n🎉 所有依赖都已安装！")
 68 |         return
 69 |     
 70 |     print(f"\n📦 需要安装 {len(all_missing)} 个依赖包:")
 71 |     for pkg in all_missing:
 72 |         print(f"  - {pkg}")
 73 |     
 74 |     response = input("\n是否现在安装这些依赖？(y/n): ").lower().strip()
 75 |     
 76 |     if response in ['y', 'yes', '是']:
 77 |         print("\n🚀 开始安装依赖...")
 78 |         success_count = 0
 79 |         
 80 |         for package in all_missing:
 81 |             print(f"\n📦 安装 {package}...")
 82 |             if install_package(package):
 83 |                 print(f"✅ {package} 安装成功")
 84 |                 success_count += 1
 85 |             else:
 86 |                 print(f"❌ {package} 安装失败")
 87 |         
 88 |         print(f"\n📊 安装结果: {success_count}/{len(all_missing)} 个包安装成功")
 89 |         
 90 |         if success_count == len(all_missing):
 91 |             print("🎉 所有依赖安装完成！请重启ComfyUI。")
 92 |         else:
 93 |             print("⚠️ 部分依赖安装失败，请手动安装或检查网络连接。")
 94 |             print("\n手动安装命令:")
 95 |             for package in all_missing:
 96 |                 print(f"  pip install {package}")
 97 |     else:
 98 |         print("\n取消安装。")
 99 |         print("\n手动安装命令:")
100 |         for package in all_missing:
101 |             print(f"  pip install {package}")
102 | 
103 | if __name__ == "__main__":
104 |     main()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ComfyUI Qwen-Image 节点
  2 | 
  3 | 本仓库提供了 [魔搭社区开放API](https://modelscope.cn/) 的 Qwen-Image 模型在 ComfyUI 中的节点实现。
  4 | 
  5 | ## 特性
  6 | 
  7 | - 支持通过 [魔搭社区](https://modelscope.cn/) 的 API 调用 Qwen-Image 模型
  8 | - 支持图像尺寸、采样步数、引导系数等参数设置
  9 | - 支持随机种子与固定种子
 10 | - 支持负向提示词
 11 | - 支持 API Token 保存（首次填写后自动保存到 config.json）
 12 | - 支持图像编辑功能 (Qwen-Image-Edit 模型)
 13 | - 支持目标检测功能 (Qwen2.5-VL 模型)
 14 | - 支持边界框检测和可视化
 15 | - 支持与 SAM2 等分割节点配合使用
 16 | 
 17 | ## 安装
 18 | 
 19 | 1. 克隆本仓库到 ComfyUI 的 `custom_nodes` 目录下：
 20 | 
 21 | ```
 22 | cd ComfyUI/custom_nodes
 23 | git clone https://github.com/111496583yzy/comfyui-modelscope-qwen-image.git comfyui-qwen-image
 24 | ```
 25 | 
 26 | 2. 重启 ComfyUI 服务
 27 | 
 28 | ## 使用方法
 29 | 
 30 | ### 1. 获取魔搭API Token
 31 | 
 32 | 访问 [魔搭社区](https://modelscope.cn/) 并登录，在个人资料页获取 API Token。
 33 | 
 34 | ### 2. Qwen-Image 生图节点
 35 | 
 36 | 在 ComfyUI 编辑器中添加 `Qwen-Image 生图节点`，设置以下参数：
 37 | 
 38 | - **prompt**: 文本提示词
 39 | - **api_token**: 魔搭API Token (首次填写后会自动保存)
 40 | - **model**: 模型名称（默认为 "Qwen/Qwen-Image"）
 41 | - **negative_prompt**: 负向提示词（可选）
 42 | - **width/height**: 图像宽高（默认512x512）
 43 | - **seed**: 随机种子（-1表示使用随机种子）
 44 | - **steps**: 采样步数（默认30）
 45 | - **guidance**: 引导系数（默认7.5）
 46 | 
 47 | ### 3. Qwen-Image 图像编辑节点
 48 | 
 49 | 在 ComfyUI 编辑器中添加 `Qwen-Image 图像编辑节点`，设置以下参数：
 50 | 
 51 | - **image**: 要编辑的原始图像
 52 | - **prompt**: 描述要进行的编辑的文本提示词
 53 | - **api_token**: 魔搭API Token (首次填写后会自动保存)
 54 | - **model**: 模型名称（默认为 "Qwen/Qwen-Image-Edit"）
 55 | - **negative_prompt**: 负向提示词（可选）
 56 | - **width/height**: 图像宽高（默认512x512，范围64-1664）
 57 | - **steps**: 采样步数（范围1-100，默认30）
 58 | - **guidance**: 引导系数（范围1.5-20.0，默认3.5）
 59 | - **seed**: 随机种子（-1表示使用随机种子，0-2147483647为固定种子）
 60 | 
 61 | ### 4. Qwen2.5-VL 目标检测节点
 62 | 
 63 | #### 4.1 Qwen2.5-VL API 配置节点
 64 | 
 65 | 在 ComfyUI 编辑器中添加 `Qwen2.5-VL API Configuration` 节点，设置以下参数：
 66 | 
 67 | - **base_url**: API服务的基础URL（默认：https://api-inference.modelscope.cn/v1）
 68 | - **api_key**: API密钥（必需）
 69 | - **model_name**: 模型名称（如：Qwen/Qwen2.5-VL-72B-Instruct）
 70 | - **timeout**: 请求超时时间（秒）
 71 | 
 72 | #### 4.2 Qwen2.5-VL API 目标检测节点
 73 | 
 74 | 在 ComfyUI 编辑器中添加 `Qwen2.5-VL API Object Detection` 节点，设置以下参数：
 75 | 
 76 | - **qwen_api_config**: 连接上述配置节点的输出
 77 | - **image**: 要检测的图像
 78 | - **target**: 要检测的目标对象（如 "cat"、"人脸"、"logo" 等）
 79 | - **bbox_selection**: 边界框选择（"all" 返回所有框，或指定索引如 "0,2"）
 80 | - **score_threshold**: 置信度阈值（0.0-1.0）
 81 | - **merge_boxes**: 是否合并选定的边界框
 82 | 
 83 | #### 4.3 为 SAM2 准备边界框节点
 84 | 
 85 | 在 ComfyUI 编辑器中添加 `Prepare BBoxes for SAM2` 节点，用于将检测结果转换为 SAM2 节点期望的格式。
 86 | 
 87 | ## 工作流示例
 88 | 
 89 | ### 文本生图
 90 | 
 91 | 1. 添加 `Qwen-Image 生图节点` 并设置提示词和其他参数
 92 | 2. 连接输出到 `Preview Image` 节点
 93 | 
 94 | ### 图像编辑
 95 | 
 96 | 1. 准备一张原始图像（使用 `Load Image` 或其他方式）
 97 | 2. 添加 `Qwen-Image 图像编辑节点`
 98 | 3. 将原始图像连接到编辑节点的 `image` 输入
 99 | 4. 设置编辑提示词（如"把狗变成猫"）
100 | 5. 连接输出到 `Preview Image` 节点
101 | 
102 | ### 目标检测
103 | 
104 | 1. 准备一张要检测的图像（使用 `Load Image` 或其他方式）
105 | 2. 添加 `Qwen2.5-VL API Configuration` 节点并配置API参数
106 | 3. 添加 `Qwen2.5-VL API Object Detection` 节点
107 | 4. 将配置节点连接到检测节点的 `qwen_api_config` 输入
108 | 5. 将图像连接到检测节点的 `image` 输入
109 | 6. 设置要检测的目标对象（如 "cat"、"人脸"、"logo" 等）
110 | 7. 连接检测节点的 `preview` 输出到 `Preview Image` 节点查看检测结果
111 | 8. 连接检测节点的 `bboxes` 输出到 `Prepare BBoxes for SAM2` 节点（可选）
112 | 9. 将 SAM2 准备节点的输出连接到 SAM2 分割节点进行进一步处理
113 | 
114 | ## Cloudinary 视频存储配置
115 | 
116 | 本插件支持将视频上传到 Cloudinary 云存储服务，以获得更稳定的视频URL用于AI分析。
117 | 
118 | ### 配置步骤
119 | 
120 | #### 1. 注册 Cloudinary 账号
121 | - 访问 [Cloudinary官网](https://cloudinary.com/)
122 | - 注册免费账号（每月有免费额度）
123 | 
124 | #### 2. 获取 API 凭据
125 | 登录 Cloudinary 控制台后，在 **API Keys** 页面可以找到：
126 | - **Cloud Name**: 你的云名称（在页面顶部显示）
127 | - **API Key**: API密钥（在表格的 "API Key" 列中）
128 | - **API Secret**: API密钥密码（在表格的 "API Secret" 列中，点击眼睛图标显示）
129 | 
130 | **重要**: API Secret 默认被星号隐藏，需要点击旁边的眼睛图标 👁️ 才能看到完整内容！
131 | 
132 | #### 3. 配置 config.json
133 | 在 `config.json` 文件中添加以下配置：
134 | 
135 | ```json
136 | {
137 |   "cloudinary_cloud_name": "你的云名称",
138 |   "cloudinary_api_key": "你的API密钥", 
139 |   "cloudinary_api_secret": "点击眼睛图标后显示的完整密钥"
140 | }
141 | ```
142 | 
143 | ### 使用说明
144 | - 配置完成后，插件会优先使用 Cloudinary 上传视频
145 | - 如果 Cloudinary 上传失败，会自动回退到 base64 方式直接传输视频数据
146 | - 上传成功后，会使用 Cloudinary 的 HTTPS URL 进行AI分析
147 | 
148 | ### 优势
149 | - **更稳定**: Cloudinary 是专业的云存储服务
150 | - **更快速**: 全球CDN加速
151 | - **更安全**: HTTPS 加密传输
152 | - **更可靠**: 99.9% 服务可用性
153 | 
154 | ### 注意事项
155 | - 请妥善保管你的 API 凭据，不要泄露给他人
156 | - 免费账号有使用限制，超出后需要付费
157 | - 建议定期检查 Cloudinary 控制台的使用情况
158 | 
159 | ## 注意事项
160 | 
161 | - API 调用需要网络连接
162 | - 高峰时期可能需要等待较长时间
163 | - 请遵守魔搭社区的使用政策
164 | - 如遇到错误代码429，表示请求过多，需要等待一段时间后重试
165 | 
166 | ## License
167 | 
168 | MIT
169 | 


--------------------------------------------------------------------------------
/verify_installation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import os
  5 | import sys
  6 | 
  7 | def check_files():
  8 |     required_files = [
  9 |         '__init__.py',
 10 |         'qwen_image_node.py',
 11 |         'qwen_vision_node.py',
 12 |         'qwen_text_node.py',
 13 |         'config.json',
 14 |         'README.md',
 15 |         'requirements.txt'
 16 |     ]
 17 |     
 18 |     print("📁 检查文件完整性...")
 19 |     missing_files = []
 20 |     
 21 |     for file in required_files:
 22 |         if os.path.exists(file):
 23 |             print(f"✅ {file}")
 24 |         else:
 25 |             print(f"❌ {file} (缺失)")
 26 |             missing_files.append(file)
 27 |     
 28 |     return len(missing_files) == 0
 29 | 
 30 | def check_dependencies():
 31 |     print("\n📦 检查依赖包...")
 32 |     
 33 |     deps = {
 34 |         'requests': '网络请求',
 35 |         'PIL': '图像处理',
 36 |         'torch': '深度学习框架',
 37 |         'numpy': '数值计算',
 38 |         'openai': '文本生成和图生文功能',
 39 |         'httpx': '高级HTTP客户端',
 40 |         'socksio': 'SOCKS代理支持'
 41 |     }
 42 |     
 43 |     missing_deps = []
 44 |     
 45 |     for dep, desc in deps.items():
 46 |         try:
 47 |             __import__(dep)
 48 |             print(f"✅ {dep} ({desc})")
 49 |         except ImportError:
 50 |             print(f"❌ {dep} ({desc}) - 未安装")
 51 |             missing_deps.append(dep)
 52 |     
 53 |     return len(missing_deps) == 0, missing_deps
 54 | 
 55 | def check_proxy_support():
 56 |     print("\n🌐 检查代理支持...")
 57 |     
 58 |     try:
 59 |         import httpx
 60 |         try:
 61 |             import socksio
 62 |             print("✅ SOCKS代理支持已安装")
 63 |             return True
 64 |         except ImportError:
 65 |             print("⚠️ SOCKS代理支持未安装，如果使用代理可能会出错")
 66 |             print("   建议运行: pip install httpx[socks] socksio")
 67 |             return False
 68 |     except ImportError:
 69 |         print("❌ httpx未安装")
 70 |         return False
 71 | 
 72 | def check_node_loading():
 73 |     print("\n🔧 检查节点加载...")
 74 |     
 75 |     try:
 76 |         from qwen_image_node import QwenImageNode
 77 |         node = QwenImageNode()
 78 |         input_types = node.INPUT_TYPES()
 79 |         print("✅ 文生图节点加载成功")
 80 |         
 81 |         from qwen_vision_node import QwenVisionNode, OPENAI_AVAILABLE
 82 |         if OPENAI_AVAILABLE:
 83 |             vision_node = QwenVisionNode()
 84 |             vision_input_types = vision_node.INPUT_TYPES()
 85 |             print("✅ 图生文节点加载成功")
 86 |         else:
 87 |             print("⚠️ 图生文节点加载成功，但OpenAI库不可用")
 88 |         
 89 |         from qwen_text_node import QwenTextNode
 90 |         if OPENAI_AVAILABLE:
 91 |             text_node = QwenTextNode()
 92 |             text_input_types = text_node.INPUT_TYPES()
 93 |             print("✅ 文本生成节点加载成功")
 94 |         else:
 95 |             print("⚠️ 文本生成节点加载成功，但OpenAI库不可用")
 96 |         
 97 |         return True
 98 |     except Exception as e:
 99 |         print(f"❌ 节点加载失败: {e}")
100 |         return False
101 | 
102 | def check_config():
103 |     print("\n⚙️ 检查配置文件...")
104 |     
105 |     try:
106 |         import json
107 |         with open('config.json', 'r', encoding='utf-8') as f:
108 |             config = json.load(f)
109 |         
110 |         required_keys = [
111 |             'default_model',
112 |             'default_vision_model',
113 |             'default_text_model',
114 |             'timeout',
115 |             'default_prompt'
116 |         ]
117 |         
118 |         missing_keys = []
119 |         for key in required_keys:
120 |             if key in config:
121 |                 print(f"✅ {key}: {config[key]}")
122 |             else:
123 |                 print(f"❌ {key} (缺失)")
124 |                 missing_keys.append(key)
125 |         
126 |         return len(missing_keys) == 0
127 |     except Exception as e:
128 |         print(f"❌ 配置文件读取失败: {e}")
129 |         return False
130 | 
131 | def main():
132 |     print("=" * 60)
133 |     print("Qwen-Image ComfyUI 插件安装验证")
134 |     print("=" * 60)
135 |     
136 |     checks = [
137 |         ("文件完整性", check_files),
138 |         ("依赖包", lambda: check_dependencies()[0]),
139 |         ("代理支持", check_proxy_support),
140 |         ("配置文件", check_config),
141 |         ("节点加载", check_node_loading),
142 |     ]
143 |     
144 |     passed = 0
145 |     total = len(checks)
146 |     
147 |     for check_name, check_func in checks:
148 |         print(f"\n🔍 {check_name}检查...")
149 |         try:
150 |             if check_func():
151 |                 passed += 1
152 |                 print(f"✅ {check_name}检查通过")
153 |             else:
154 |                 print(f"❌ {check_name}检查失败")
155 |         except Exception as e:
156 |             print(f"❌ {check_name}检查出错: {e}")
157 |         
158 |         print("-" * 40)
159 |     
160 |     deps_ok, missing_deps = check_dependencies()
161 |     if not deps_ok:
162 |         print(f"\n📦 缺失的依赖包: {', '.join(missing_deps)}")
163 |         print("运行以下命令安装:")
164 |         print("python install_dependencies.py")
165 |         print("或手动安装:")
166 |         for dep in missing_deps:
167 |             if dep == 'httpx':
168 |                 print(f"  pip install httpx[socks]")
169 |             else:
170 |                 print(f"  pip install {dep}")
171 |     
172 |     print(f"\n📊 验证结果: {passed}/{total} 项检查通过")
173 |     
174 |     if passed >= total - 1:
175 |         print("\n🎉 插件安装验证成功！")
176 |         print("\n📋 下一步操作:")
177 |         print("1. 将整个插件文件夹复制到 ComfyUI/custom_nodes/ 目录")
178 |         print("2. 重启ComfyUI")
179 |         print("3. 在节点列表中查找 'QwenImage' 分类")
180 |         print("4. 准备好您的魔搭API Token")
181 |         
182 |         current_path = os.getcwd()
183 |         if 'custom_nodes' in current_path:
184 |             print("\n✅ 检测到您已在ComfyUI的custom_nodes目录中")
185 |             print("   请直接重启ComfyUI即可使用")
186 |         else:
187 |             print(f"\n📁 当前路径: {current_path}")
188 |             print("   请确保将插件复制到正确的ComfyUI目录")
189 |             
190 |         if not check_proxy_support():
191 |             print("\n⚠️ 代理支持提醒:")
192 |             print("   如果您使用代理上网，建议安装代理支持包:")
193 |             print("   pip install httpx[socks] socksio")
194 |     else:
195 |         print("\n⚠️ 插件安装验证失败，请修复上述问题后重试")
196 | 
197 | if __name__ == "__main__":
198 |     main()


--------------------------------------------------------------------------------
/troubleshoot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | Qwen-Image ComfyUI 插件故障排除工具
  6 | 自动诊断和解决常见问题
  7 | """
  8 | 
  9 | import os
 10 | import sys
 11 | import subprocess
 12 | import json
 13 | 
 14 | def print_header(title):
 15 |     """打印标题"""
 16 |     print("\n" + "=" * 60)
 17 |     print(f" {title}")
 18 |     print("=" * 60)
 19 | 
 20 | def print_section(title):
 21 |     """打印章节标题"""
 22 |     print(f"\n🔍 {title}")
 23 |     print("-" * 40)
 24 | 
 25 | def run_command(command, description):
 26 |     """运行命令并返回结果"""
 27 |     print(f"📋 {description}")
 28 |     print(f"💻 命令: {command}")
 29 |     
 30 |     try:
 31 |         result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=30)
 32 |         if result.returncode == 0:
 33 |             print("✅ 成功")
 34 |             if result.stdout.strip():
 35 |                 print(f"📄 输出: {result.stdout.strip()}")
 36 |             return True, result.stdout
 37 |         else:
 38 |             print("❌ 失败")
 39 |             if result.stderr.strip():
 40 |                 print(f"🚨 错误: {result.stderr.strip()}")
 41 |             return False, result.stderr
 42 |     except subprocess.TimeoutExpired:
 43 |         print("⏰ 超时")
 44 |         return False, "命令执行超时"
 45 |     except Exception as e:
 46 |         print(f"💥 异常: {str(e)}")
 47 |         return False, str(e)
 48 | 
 49 | def check_python_environment():
 50 |     """检查Python环境"""
 51 |     print_section("Python环境检查")
 52 |     
 53 |     # Python版本
 54 |     run_command("python --version", "检查Python版本")
 55 |     
 56 |     # pip版本
 57 |     run_command("pip --version", "检查pip版本")
 58 |     
 59 |     # 已安装的包
 60 |     print("\n📦 检查关键包安装状态:")
 61 |     packages = ['requests', 'pillow', 'torch', 'numpy', 'openai', 'httpx', 'socksio']
 62 |     
 63 |     for package in packages:
 64 |         try:
 65 |             __import__(package)
 66 |             print(f"✅ {package}")
 67 |         except ImportError:
 68 |             print(f"❌ {package} (未安装)")
 69 | 
 70 | def check_files():
 71 |     """检查文件完整性"""
 72 |     print_section("文件完整性检查")
 73 |     
 74 |     required_files = [
 75 |         '__init__.py',
 76 |         'qwen_image_node.py', 
 77 |         'qwen_vision_node.py',
 78 |         'config.json',
 79 |         'requirements.txt'
 80 |     ]
 81 |     
 82 |     for file in required_files:
 83 |         if os.path.exists(file):
 84 |             size = os.path.getsize(file)
 85 |             print(f"✅ {file} ({size} bytes)")
 86 |         else:
 87 |             print(f"❌ {file} (缺失)")
 88 | 
 89 | def check_config():
 90 |     """检查配置文件"""
 91 |     print_section("配置文件检查")
 92 |     
 93 |     try:
 94 |         with open('config.json', 'r', encoding='utf-8') as f:
 95 |             config = json.load(f)
 96 |         
 97 |         print("✅ config.json 格式正确")
 98 |         
 99 |         # 检查关键配置项
100 |         key_configs = [
101 |             'default_model',
102 |             'default_vision_model', 
103 |             'timeout',
104 |             'default_prompt'
105 |         ]
106 |         
107 |         for key in key_configs:
108 |             if key in config:
109 |                 print(f"✅ {key}: {config[key]}")
110 |             else:
111 |                 print(f"❌ {key} (缺失)")
112 |                 
113 |     except Exception as e:
114 |         print(f"❌ config.json 读取失败: {e}")
115 | 
116 | def check_network():
117 |     """检查网络连接"""
118 |     print_section("网络连接检查")
119 |     
120 |     # 检查基本网络连接
121 |     run_command("ping -c 3 8.8.8.8", "检查基本网络连接")
122 |     
123 |     # 检查API服务器连接
124 |     try:
125 |         import requests
126 |         response = requests.get('https://api-inference.modelscope.cn', timeout=10)
127 |         print(f"✅ API服务器连接正常 (状态码: {response.status_code})")
128 |     except Exception as e:
129 |         print(f"❌ API服务器连接失败: {e}")
130 |     
131 |     # 检查代理设置
132 |     proxy_vars = ['HTTP_PROXY', 'HTTPS_PROXY', 'SOCKS_PROXY']
133 |     print("\n🌐 代理环境变量:")
134 |     for var in proxy_vars:
135 |         value = os.environ.get(var)
136 |         if value:
137 |             print(f"✅ {var}: {value}")
138 |         else:
139 |             print(f"⚪ {var}: 未设置")
140 | 
141 | def check_token():
142 |     """检查API Token"""
143 |     print_section("API Token检查")
144 |     
145 |     token_sources = ['.qwen_token', 'config.json']
146 |     token_found = False
147 |     
148 |     for source in token_sources:
149 |         if source == '.qwen_token' and os.path.exists(source):
150 |             try:
151 |                 with open(source, 'r', encoding='utf-8') as f:
152 |                     token = f.read().strip()
153 |                 if token:
154 |                     print(f"✅ 在 {source} 中找到token (长度: {len(token)})")
155 |                     token_found = True
156 |                 else:
157 |                     print(f"⚪ {source} 存在但为空")
158 |             except Exception as e:
159 |                 print(f"❌ 读取 {source} 失败: {e}")
160 |         
161 |         elif source == 'config.json':
162 |             try:
163 |                 with open(source, 'r', encoding='utf-8') as f:
164 |                     config = json.load(f)
165 |                 token = config.get('api_token', '').strip()
166 |                 if token:
167 |                     print(f"✅ 在 {source} 中找到token (长度: {len(token)})")
168 |                     token_found = True
169 |                 else:
170 |                     print(f"⚪ {source} 中token为空")
171 |             except Exception as e:
172 |                 print(f"❌ 读取 {source} 失败: {e}")
173 |     
174 |     if not token_found:
175 |         print("❌ 未找到有效的API token")
176 | 
177 | def run_diagnostic_tests():
178 |     """运行诊断测试"""
179 |     print_section("诊断测试")
180 |     
181 |     tests = [
182 |         ("python verify_installation.py", "运行安装验证"),
183 |         ("python test_vision_with_proxy.py", "运行代理测试"),
184 |     ]
185 |     
186 |     for command, description in tests:
187 |         if os.path.exists(command.split()[1]):
188 |             success, output = run_command(command, description)
189 |             if not success:
190 |                 print(f"⚠️ {description} 失败，请查看详细输出")
191 |         else:
192 |             print(f"⚪ {command.split()[1]} 不存在，跳过测试")
193 | 
194 | def suggest_solutions():
195 |     """建议解决方案"""
196 |     print_section("建议解决方案")
197 |     
198 |     solutions = [
199 |         "🔧 安装缺失依赖: python install_dependencies.py",
200 |         "🔍 验证安装: python verify_installation.py", 
201 |         "🌐 测试代理: python test_vision_with_proxy.py",
202 |         "📖 查看快速指南: cat QUICKSTART.md",
203 |         "🔗 查看代理指南: cat PROXY_GUIDE.md",
204 |         "🖼️ 查看图生文指南: cat VISION_GUIDE.md",
205 |         "🔄 重启ComfyUI以加载更新",
206 |         "🧹 清理Python缓存: rm -rf __pycache__",
207 |     ]
208 |     
209 |     for solution in solutions:
210 |         print(solution)
211 | 
212 | def main():
213 |     print_header("Qwen-Image ComfyUI 插件故障排除工具")
214 |     
215 |     print("🚀 开始全面诊断...")
216 |     
217 |     # 运行所有检查
218 |     check_python_environment()
219 |     check_files()
220 |     check_config()
221 |     check_network()
222 |     check_token()
223 |     run_diagnostic_tests()
224 |     suggest_solutions()
225 |     
226 |     print_header("诊断完成")
227 |     
228 |     print("\n💡 根据上述诊断结果:")
229 |     print("1. 如果发现缺失依赖，运行: python install_dependencies.py")
230 |     print("2. 如果网络有问题，查看: PROXY_GUIDE.md")
231 |     print("3. 如果token有问题，重新输入API token")
232 |     print("4. 如果文件缺失，重新下载插件")
233 |     print("5. 完成修复后，重启ComfyUI")
234 |     
235 |     print("\n📞 如果问题仍然存在:")
236 |     print("- 查看ComfyUI控制台的完整错误日志")
237 |     print("- 尝试在不同网络环境下测试")
238 |     print("- 确认ComfyUI版本兼容性")
239 | 
240 | if __name__ == "__main__":
241 |     main()


--------------------------------------------------------------------------------
/qwen_text_node.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | import time
  4 | import os
  5 | 
  6 | try:
  7 |     from openai import OpenAI
  8 |     OPENAI_AVAILABLE = True
  9 | except ImportError:
 10 |     print("警告: 未安装openai库，文本生成功能将不可用")
 11 |     print("请运行: pip install openai")
 12 |     OPENAI_AVAILABLE = False
 13 |     OpenAI = None
 14 | 
 15 | def load_config():
 16 |     config_path = os.path.join(os.path.dirname(__file__), 'config.json')
 17 |     try:
 18 |         with open(config_path, 'r', encoding='utf-8') as f:
 19 |             return json.load(f)
 20 |     except:
 21 |         return {
 22 |             "default_text_model": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
 23 |             "timeout": 60,
 24 |             "default_system_prompt": "You are a helpful assistant.",
 25 |             "default_user_prompt": "你好"
 26 |         }
 27 | 
 28 | def load_api_token():
 29 |     token_path = os.path.join(os.path.dirname(__file__), '.qwen_token')
 30 |     try:
 31 |         cfg = load_config()
 32 |         token_from_cfg = cfg.get("api_token", "").strip()
 33 |         if token_from_cfg:
 34 |             return token_from_cfg
 35 |     except Exception as e:
 36 |         print(f"读取config.json中的token失败: {e}")
 37 |     try:
 38 |         if os.path.exists(token_path):
 39 |             with open(token_path, 'r', encoding='utf-8') as f:
 40 |                 token = f.read().strip()
 41 |                 return token if token else ""
 42 |         return ""
 43 |     except Exception as e:
 44 |         print(f"加载token失败: {e}")
 45 |         return ""
 46 | 
 47 | def save_api_token(token):
 48 |     token_path = os.path.join(os.path.dirname(__file__), '.qwen_token')
 49 |     try:
 50 |         with open(token_path, 'w', encoding='utf-8') as f:
 51 |             f.write(token)
 52 |         cfg = load_config()
 53 |         cfg["api_token"] = token
 54 |         config_path = os.path.join(os.path.dirname(__file__), 'config.json')
 55 |         with open(config_path, 'w', encoding='utf-8') as f:
 56 |             json.dump(cfg, f, ensure_ascii=False, indent=2)
 57 |         return True
 58 |     except Exception as e:
 59 |         print(f"保存token失败: {e}")
 60 |         return False
 61 | 
 62 | class QwenTextNode:
 63 |     def __init__(self):
 64 |         pass
 65 | 
 66 |     @classmethod
 67 |     def INPUT_TYPES(cls):
 68 |         if not OPENAI_AVAILABLE:
 69 |             return {
 70 |                 "required": {
 71 |                     "error_message": ("STRING", {
 72 |                         "default": "请先安装openai库: pip install openai",
 73 |                         "multiline": True
 74 |                     }),
 75 |                 }
 76 |             }
 77 |         config = load_config()
 78 |         saved_token = load_api_token()
 79 |         return {
 80 |             "required": {
 81 |                 "user_prompt": ("STRING", {
 82 |                     "multiline": True,
 83 |                     "default": config.get("default_user_prompt", "你好")
 84 |                 }),
 85 |                 "api_token": ("STRING", {
 86 |                     "default": saved_token,
 87 |                     "placeholder": "请输入您的魔搭API Token"
 88 |                 }),
 89 |             },
 90 |             "optional": {
 91 |                 "system_prompt": ("STRING", {
 92 |                     "multiline": True,
 93 |                     "default": config.get("default_system_prompt", "You are a helpful assistant.")
 94 |                 }),
 95 |                 "model": ("STRING", {
 96 |                     "default": config.get("default_text_model", "Qwen/Qwen3-Coder-480B-A35B-Instruct")
 97 |                 }),
 98 |                 "max_tokens": ("INT", {
 99 |                     "default": 2000,
100 |                     "min": 100,
101 |                     "max": 8000
102 |                 }),
103 |                 "temperature": ("FLOAT", {
104 |                     "default": 0.7,
105 |                     "min": 0.1,
106 |                     "max": 2.0,
107 |                     "step": 0.1
108 |                 }),
109 |                 "stream": ("BOOLEAN", {
110 |                     "default": True
111 |                 }),
112 |                 "seed": ("INT", {
113 |                     "default": config.get("default_text_seed", -1),
114 |                     "min": -1,
115 |                     "max": 2147483647
116 |                 }),
117 |             }
118 |         }
119 | 
120 |     RETURN_TYPES = ("STRING",)
121 |     RETURN_NAMES = ("response",)
122 |     FUNCTION = "generate_text"
123 |     CATEGORY = "QwenImage"
124 | 
125 |     def generate_text(self, user_prompt="", api_token="", system_prompt="You are a helpful assistant.", model="Qwen/Qwen3-Coder-480B-A35B-Instruct", max_tokens=2000, temperature=0.7, stream=True, seed=-1, error_message=""):
126 |         if not OPENAI_AVAILABLE:
127 |             return ("请先安装openai库: pip install openai",)
128 |         
129 |         config = load_config()
130 |         
131 |         if not api_token or api_token.strip() == "":
132 |             raise Exception("请输入有效的API Token")
133 |         
134 |         saved_token = load_api_token()
135 |         if api_token != saved_token:
136 |             if save_api_token(api_token):
137 |                 print("API Token已自动保存")
138 |             else:
139 |                 print("API Token保存失败，但不影响当前使用")
140 |         
141 |         try:
142 |             print(f"💬 开始文本生成...")
143 |             print(f"🤖 模型: {model}")
144 |             print(f"📝 用户提示: {user_prompt[:50]}...")
145 |             print(f"系统提示: {system_prompt[:50]}...")
146 |             print(f"温度: {temperature}")
147 |             print(f"📊 最大tokens: {max_tokens}")
148 |             print(f"⚡ 流式输出: {stream}")
149 |             
150 |             # 处理随机种子
151 |             if seed != -1:
152 |                 print(f"🎯 使用指定种子: {seed}")
153 |             else:
154 |                 import random
155 |                 random_seed = random.randint(0, 2147483647)
156 |                 print(f"🎲 使用随机种子: {random_seed}")
157 |                 seed = random_seed
158 |             
159 |             client = OpenAI(
160 |                 base_url='https://api-inference.modelscope.cn/v1',
161 |                 api_key=api_token
162 |             )
163 |             
164 |             messages = [
165 |                 {
166 |                     'role': 'system',
167 |                     'content': system_prompt
168 |                 },
169 |                 {
170 |                     'role': 'user',
171 |                     'content': user_prompt
172 |                 }
173 |             ]
174 |             
175 |             print(f"🚀 发送API请求...")
176 |             
177 |             response = client.chat.completions.create(
178 |                 model=model,
179 |                 messages=messages,
180 |                 max_tokens=max_tokens,
181 |                 temperature=temperature,
182 |                 stream=stream,
183 |                 seed=seed
184 |             )
185 |             
186 |             if stream:
187 |                 print("📡 接收流式响应...")
188 |                 full_response = ""
189 |                 for chunk in response:
190 |                     if chunk.choices[0].delta.content:
191 |                         content = chunk.choices[0].delta.content
192 |                         full_response += content
193 |                         print(content, end='', flush=True)
194 |                 
195 |                 print(f"\n流式生成完成!")
196 |                 print(f"📄 总长度: {len(full_response)} 字符")
197 |                 return (full_response,)
198 |             else:
199 |                 result = response.choices[0].message.content
200 |                 print(f"文本生成完成!")
201 |                 print(f"📄 结果长度: {len(result)} 字符")
202 |                 print(f"📝 结果预览: {result[:100]}...")
203 |                 return (result,)
204 |             
205 |         except Exception as e:
206 |             error_msg = f"文本生成失败: {str(e)}"
207 |             print(f"{error_msg}")
208 |             return (error_msg,)
209 | 
210 | if OPENAI_AVAILABLE:
211 |     NODE_CLASS_MAPPINGS = {
212 |         "QwenTextNode": QwenTextNode
213 |     }
214 | 
215 |     NODE_DISPLAY_NAME_MAPPINGS = {
216 |         "QwenTextNode": "Qwen-Text 文本生成节点"
217 |     }
218 | else:
219 |     class OpenAINotInstalledNode:
220 |         @classmethod
221 |         def INPUT_TYPES(cls):
222 |             return {
223 |                 "required": {
224 |                     "install_command": ("STRING", {
225 |                         "default": "pip install openai",
226 |                         "multiline": False
227 |                     }),
228 |                 }
229 |             }
230 |         
231 |         RETURN_TYPES = ("STRING",)
232 |         RETURN_NAMES = ("message",)
233 |         FUNCTION = "show_install_message"
234 |         CATEGORY = "QwenImage"
235 |         
236 |         def show_install_message(self, install_command):
237 |             return ("请先安装openai库才能使用文本生成功能: " + install_command,)
238 |     
239 |     NODE_CLASS_MAPPINGS = {
240 |         "QwenTextNode": OpenAINotInstalledNode
241 |     }
242 | 
243 |     NODE_DISPLAY_NAME_MAPPINGS = {
244 |         "QwenTextNode": "Qwen-Text 文本生成节点 (需要安装openai)"
245 |     }


--------------------------------------------------------------------------------
/qwen_vision_node.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | import time
  4 | import torch
  5 | import numpy as np
  6 | from PIL import Image
  7 | from io import BytesIO
  8 | import os
  9 | import base64
 10 | import tempfile
 11 | 
 12 | try:
 13 |     from openai import OpenAI
 14 |     OPENAI_AVAILABLE = True
 15 | except ImportError:
 16 |     print("警告: 未安装openai库，图生文功能将不可用")
 17 |     print("请运行: pip install openai")
 18 |     OPENAI_AVAILABLE = False
 19 |     OpenAI = None
 20 | 
 21 | def load_config():
 22 |     config_path = os.path.join(os.path.dirname(__file__), 'config.json')
 23 |     try:
 24 |         with open(config_path, 'r', encoding='utf-8') as f:
 25 |             return json.load(f)
 26 |     except:
 27 |         return {
 28 |             "default_model": "stepfun-ai/step3",
 29 |             "timeout": 60,
 30 |             "default_prompt": "描述这幅图"
 31 |         }
 32 | 
 33 | def load_api_token():
 34 |     token_path = os.path.join(os.path.dirname(__file__), '.qwen_token')
 35 |     try:
 36 |         cfg = load_config()
 37 |         token_from_cfg = cfg.get("api_token", "").strip()
 38 |         if token_from_cfg:
 39 |             return token_from_cfg
 40 |     except Exception as e:
 41 |         print(f"读取config.json中的token失败: {e}")
 42 |     try:
 43 |         if os.path.exists(token_path):
 44 |             with open(token_path, 'r', encoding='utf-8') as f:
 45 |                 token = f.read().strip()
 46 |                 return token if token else ""
 47 |         return ""
 48 |     except Exception as e:
 49 |         print(f"加载token失败: {e}")
 50 |         return ""
 51 | 
 52 | def save_api_token(token):
 53 |     token_path = os.path.join(os.path.dirname(__file__), '.qwen_token')
 54 |     try:
 55 |         with open(token_path, 'w', encoding='utf-8') as f:
 56 |             f.write(token)
 57 |         cfg = load_config()
 58 |         cfg["api_token"] = token
 59 |         config_path = os.path.join(os.path.dirname(__file__), 'config.json')
 60 |         with open(config_path, 'w', encoding='utf-8') as f:
 61 |             json.dump(cfg, f, ensure_ascii=False, indent=2)
 62 |         return True
 63 |     except Exception as e:
 64 |         print(f"保存token失败: {e}")
 65 |         return False
 66 | 
 67 | def tensor_to_base64_url(image_tensor):
 68 |     try:
 69 |         if len(image_tensor.shape) == 4:
 70 |             image_tensor = image_tensor.squeeze(0)
 71 |         
 72 |         if image_tensor.max() <= 1.0:
 73 |             image_np = (image_tensor.cpu().numpy() * 255).astype(np.uint8)
 74 |         else:
 75 |             image_np = image_tensor.cpu().numpy().astype(np.uint8)
 76 |         
 77 |         pil_image = Image.fromarray(image_np)
 78 |         
 79 |         buffer = BytesIO()
 80 |         pil_image.save(buffer, format='JPEG', quality=85)
 81 |         img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
 82 |         
 83 |         return f"data:image/jpeg;base64,{img_base64}"
 84 |         
 85 |     except Exception as e:
 86 |         print(f"图像转换失败: {e}")
 87 |         raise Exception(f"图像格式转换失败: {str(e)}")
 88 | 
 89 | class QwenVisionNode:
 90 |     def __init__(self):
 91 |         pass
 92 | 
 93 |     @classmethod
 94 |     def INPUT_TYPES(cls):
 95 |         if not OPENAI_AVAILABLE:
 96 |             return {
 97 |                 "required": {
 98 |                     "error_message": ("STRING", {
 99 |                         "default": "请先安装openai库: pip install openai",
100 |                         "multiline": True
101 |                     }),
102 |                 }
103 |             }
104 |         config = load_config()
105 |         saved_token = load_api_token()
106 |         return {
107 |             "required": {
108 |                 "image": ("IMAGE",),
109 |                 "prompt": ("STRING", {
110 |                     "multiline": True,
111 |                     "default": config.get("default_prompt", "描述这幅图")
112 |                 }),
113 |                 "api_token": ("STRING", {
114 |                     "default": saved_token,
115 |                     "placeholder": "请输入您的魔搭API Token"
116 |                 }),
117 |             },
118 |             "optional": {
119 |                 "system_prompt": ("STRING", {
120 |                     "multiline": True,
121 |                     "default": config.get("default_system_prompt", "You are a helpful assistant.")
122 |                 }),
123 |                 "model": ("STRING", {
124 |                     "default": config.get("default_vision_model", "stepfun-ai/step3")
125 |                 }),
126 |                 "max_tokens": ("INT", {
127 |                     "default": 1000,
128 |                     "min": 100,
129 |                     "max": 4000
130 |                 }),
131 |                 "temperature": ("FLOAT", {
132 |                     "default": 0.7,
133 |                     "min": 0.1,
134 |                     "max": 2.0,
135 |                     "step": 0.1
136 |                 }),
137 |                 "seed": ("INT", {
138 |                     "default": config.get("default_vision_seed", -1),
139 |                     "min": -1,
140 |                     "max": 2147483647
141 |                 }),
142 |             }
143 |         }
144 | 
145 |     RETURN_TYPES = ("STRING",)
146 |     RETURN_NAMES = ("description",)
147 |     FUNCTION = "analyze_image"
148 |     CATEGORY = "QwenImage"
149 | 
150 |     def analyze_image(self, image=None, prompt="", api_token="", system_prompt="You are a helpful assistant.", model="stepfun-ai/step3", max_tokens=1000, temperature=0.7, seed=-1, error_message=""):
151 |         if not OPENAI_AVAILABLE:
152 |             return ("请先安装openai库: pip install openai",)
153 |         
154 |         config = load_config()
155 |         
156 |         if not api_token or api_token.strip() == "":
157 |             raise Exception("请输入有效的API Token")
158 |         
159 |         saved_token = load_api_token()
160 |         if api_token != saved_token:
161 |             if save_api_token(api_token):
162 |                 print("API Token已自动保存")
163 |             else:
164 |                 print("API Token保存失败，但不影响当前使用")
165 |         
166 |         try:
167 |             print(f"开始分析图像...")
168 |             print(f"📝 提示词: {prompt}")
169 |             if system_prompt:
170 |                 print(f"系统提示: {system_prompt[:50]}...")
171 |             print(f"🤖 模型: {model}")
172 |             
173 |             # 处理随机种子
174 |             if seed != -1:
175 |                 print(f"🎯 使用指定种子: {seed}")
176 |             else:
177 |                 import random
178 |                 random_seed = random.randint(0, 2147483647)
179 |                 print(f"🎲 使用随机种子: {random_seed}")
180 |                 seed = random_seed
181 |             
182 |             image_url = tensor_to_base64_url(image)
183 |             print(f"图像已转换为base64格式")
184 |             
185 |             client = OpenAI(
186 |                 base_url='https://api-inference.modelscope.cn/v1',
187 |                 api_key=api_token
188 |             )
189 |             
190 |             messages = []
191 |             
192 |             # 如果有系统提示词，添加到messages中
193 |             if system_prompt and system_prompt.strip():
194 |                 messages.append({
195 |                     'role': 'system',
196 |                     'content': system_prompt
197 |                 })
198 |             
199 |             # 添加用户消息（包含文本和图像）
200 |             messages.append({
201 |                 'role': 'user',
202 |                 'content': [{
203 |                     'type': 'text',
204 |                     'text': prompt,
205 |                 }, {
206 |                     'type': 'image_url',
207 |                     'image_url': {
208 |                         'url': image_url,
209 |                     },
210 |                 }],
211 |             })
212 |             
213 |             print(f"🚀 发送API请求...")
214 |             
215 |             response = client.chat.completions.create(
216 |                 model=model,
217 |                 messages=messages,
218 |                 max_tokens=max_tokens,
219 |                 temperature=temperature,
220 |                 stream=False,
221 |                 seed=seed
222 |             )
223 |             
224 |             description = response.choices[0].message.content
225 |             print(f"分析完成!")
226 |             print(f"📄 结果: {description[:100]}...")
227 |             
228 |             return (description,)
229 |             
230 |         except Exception as e:
231 |             error_msg = f"图像分析失败: {str(e)}"
232 |             print(f"{error_msg}")
233 |             return (error_msg,)
234 | 
235 | if OPENAI_AVAILABLE:
236 |     NODE_CLASS_MAPPINGS = {
237 |         "QwenVisionNode": QwenVisionNode
238 |     }
239 | 
240 |     NODE_DISPLAY_NAME_MAPPINGS = {
241 |         "QwenVisionNode": "Qwen-Vision 图生文节点"
242 |     }
243 | else:
244 |     class OpenAINotInstalledNode:
245 |         @classmethod
246 |         def INPUT_TYPES(cls):
247 |             return {
248 |                 "required": {
249 |                     "install_command": ("STRING", {
250 |                         "default": "pip install openai",
251 |                         "multiline": False
252 |                     }),
253 |                 }
254 |             }
255 |         
256 |         RETURN_TYPES = ("STRING",)
257 |         RETURN_NAMES = ("message",)
258 |         FUNCTION = "show_install_message"
259 |         CATEGORY = "QwenImage"
260 |         
261 |         def show_install_message(self, install_command):
262 |             return ("请先安装openai库才能使用图生文功能: " + install_command,)
263 |     
264 |     NODE_CLASS_MAPPINGS = {
265 |         "QwenVisionNode": OpenAINotInstalledNode
266 |     }
267 | 
268 |     NODE_DISPLAY_NAME_MAPPINGS = {
269 |         "QwenVisionNode": "Qwen-Vision 图生文节点 (需要安装openai)"
270 |     }


--------------------------------------------------------------------------------
/qwen_video_node.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | import time
  4 | import os
  5 | import tempfile
  6 | import base64
  7 | 
  8 | try:
  9 |     from openai import OpenAI
 10 |     OPENAI_AVAILABLE = True
 11 | except ImportError:
 12 |     print("警告: 未安装openai库，视频生文功能将不可用")
 13 |     print("请运行: pip install openai")
 14 |     OPENAI_AVAILABLE = False
 15 |     OpenAI = None
 16 | 
 17 | def load_config():
 18 |     config_path = os.path.join(os.path.dirname(__file__), 'config.json')
 19 |     try:
 20 |         with open(config_path, 'r', encoding='utf-8') as f:
 21 |             return json.load(f)
 22 |     except:
 23 |         return {
 24 |             "default_model": "Qwen/Qwen3-VL-235B-A22B-Instruct",
 25 |             "timeout": 60,
 26 |             "default_prompt": "描述这个视频的内容",
 27 |             "cloudinary_cloud_name": "",
 28 |             "cloudinary_api_key": "",
 29 |             "cloudinary_api_secret": ""
 30 |         }
 31 | 
 32 | def load_api_token():
 33 |     token_path = os.path.join(os.path.dirname(__file__), '.qwen_token')
 34 |     try:
 35 |         cfg = load_config()
 36 |         token_from_cfg = cfg.get("api_token", "").strip()
 37 |         if token_from_cfg:
 38 |             return token_from_cfg
 39 |     except Exception as e:
 40 |         print(f"读取config.json中的token失败: {e}")
 41 |     try:
 42 |         if os.path.exists(token_path):
 43 |             with open(token_path, 'r', encoding='utf-8') as f:
 44 |                 token = f.read().strip()
 45 |                 return token if token else ""
 46 |         return ""
 47 |     except Exception as e:
 48 |         print(f"加载token失败: {e}")
 49 |         return ""
 50 | 
 51 | def save_api_token(token):
 52 |     token_path = os.path.join(os.path.dirname(__file__), '.qwen_token')
 53 |     try:
 54 |         with open(token_path, 'w', encoding='utf-8') as f:
 55 |             f.write(token)
 56 |         cfg = load_config()
 57 |         cfg["api_token"] = token
 58 |         config_path = os.path.join(os.path.dirname(__file__), 'config.json')
 59 |         with open(config_path, 'w', encoding='utf-8') as f:
 60 |             json.dump(cfg, f, ensure_ascii=False, indent=2)
 61 |         return True
 62 |     except Exception as e:
 63 |         print(f"保存token失败: {e}")
 64 |         return False
 65 | 
 66 | def save_cloudinary_config(cloud_name, api_key, api_secret):
 67 |     """保存Cloudinary配置到config.json"""
 68 |     try:
 69 |         cfg = load_config()
 70 |         cfg["cloudinary_cloud_name"] = cloud_name
 71 |         cfg["cloudinary_api_key"] = api_key
 72 |         cfg["cloudinary_api_secret"] = api_secret
 73 |         config_path = os.path.join(os.path.dirname(__file__), 'config.json')
 74 |         with open(config_path, 'w', encoding='utf-8') as f:
 75 |             json.dump(cfg, f, ensure_ascii=False, indent=2)
 76 |         return True
 77 |     except Exception as e:
 78 |         print(f"保存Cloudinary配置失败: {e}")
 79 |         return False
 80 | 
 81 | def upload_video_to_cloudinary(video_path, cloud_name=None, api_key=None, api_secret=None):
 82 |     """上传视频到Cloudinary获取URL"""
 83 |     try:
 84 |         import hashlib
 85 |         import time
 86 |         
 87 |         # 如果没有传入参数，从配置文件读取
 88 |         if not cloud_name or not api_key or not api_secret:
 89 |             config = load_config()
 90 |             cloud_name = cloud_name or config.get('cloudinary_cloud_name', '').strip()
 91 |             api_key = api_key or config.get('cloudinary_api_key', '').strip()
 92 |             api_secret = api_secret or config.get('cloudinary_api_secret', '').strip()
 93 |         
 94 |         if not all([cloud_name, api_key, api_secret]):
 95 |             print("Cloudinary配置不完整，请检查config.json中的cloudinary配置")
 96 |             return None
 97 |             
 98 |         # 生成签名
 99 |         timestamp = str(int(time.time()))
100 |         public_id = f"comfyui_video_{timestamp}"
101 |         
102 |         # 创建签名字符串
103 |         sign_string = f"public_id={public_id}&timestamp={timestamp}{api_secret}"
104 |         signature = hashlib.sha1(sign_string.encode()).hexdigest()
105 |         
106 |         # Cloudinary上传URL
107 |         upload_url = f"https://api.cloudinary.com/v1_1/{cloud_name}/video/upload"
108 |         
109 |         # 准备上传数据
110 |         with open(video_path, 'rb') as video_file:
111 |             files = {'file': video_file}
112 |             data = {
113 |                 'api_key': api_key,
114 |                 'timestamp': timestamp,
115 |                 'signature': signature,
116 |                 'public_id': public_id,
117 |                 'resource_type': 'video'
118 |             }
119 |             
120 |             print(f"正在上传视频到Cloudinary...")
121 |             upload_response = requests.post(
122 |                 upload_url,
123 |                 files=files,
124 |                 data=data,
125 |                 timeout=120  # 视频文件可能较大，增加超时时间
126 |             )
127 |             
128 |             if upload_response.status_code == 200:
129 |                 upload_data = upload_response.json()
130 |                 if 'secure_url' in upload_data:
131 |                     video_url = upload_data['secure_url']
132 |                     print(f"视频已上传到Cloudinary成功，获取URL: {video_url}")
133 |                     return video_url
134 |                 else:
135 |                     print(f"Cloudinary上传返回格式错误: {upload_response.text}")
136 |                     return None
137 |             else:
138 |                 print(f"Cloudinary上传失败: {upload_response.status_code}, {upload_response.text}")
139 |                 return None
140 |     except Exception as e:
141 |         print(f"Cloudinary上传异常: {str(e)}")
142 |         return None
143 | 
144 | 
145 | def video_to_base64(video_path):
146 |     """将视频文件转换为base64格式"""
147 |     try:
148 |         with open(video_path, 'rb') as video_file:
149 |             video_data = video_file.read()
150 |             video_base64 = base64.b64encode(video_data).decode('utf-8')
151 |             return f"data:video/mp4;base64,{video_base64}"
152 |     except Exception as e:
153 |         print(f"视频base64转换失败: {e}")
154 |         raise Exception(f"视频格式转换失败: {str(e)}")
155 | 
156 | class QwenVideoNode:
157 |     def __init__(self):
158 |         pass
159 | 
160 |     @classmethod
161 |     def INPUT_TYPES(cls):
162 |         if not OPENAI_AVAILABLE:
163 |             return {
164 |                 "required": {
165 |                     "error_message": ("STRING", {
166 |                         "default": "请先安装openai库: pip install openai",
167 |                         "multiline": True
168 |                     }),
169 |                 }
170 |             }
171 |         config = load_config()
172 |         saved_token = load_api_token()
173 |         return {
174 |             "required": {
175 |                 "prompt": ("STRING", {
176 |                     "multiline": True,
177 |                     "default": config.get("default_prompt", "描述这个视频的内容")
178 |                 }),
179 |                 "api_token": ("STRING", {
180 |                     "default": saved_token,
181 |                     "placeholder": "请输入您的魔搭API Token"
182 |                 }),
183 |             },
184 |             "optional": {
185 |                 "system_prompt": ("STRING", {
186 |                     "multiline": True,
187 |                     "default": config.get("default_system_prompt", "You are a helpful assistant.")
188 |                 }),
189 |                 "video": ("VIDEO",),
190 |                 "video_path": ("STRING", {
191 |                     "default": "",
192 |                     "placeholder": "或者直接输入视频文件路径"
193 |                 }),
194 |                 "model": ("STRING", {
195 |                     "default": config.get("default_video_model", "stepfun-ai/step3")
196 |                 }),
197 |                 "max_tokens": ("INT", {
198 |                     "default": 1000,
199 |                     "min": 100,
200 |                     "max": 4000
201 |                 }),
202 |                 "temperature": ("FLOAT", {
203 |                     "default": 0.7,
204 |                     "min": 0.1,
205 |                     "max": 2.0,
206 |                     "step": 0.1
207 |                 }),
208 |                 "seed": ("INT", {
209 |                     "default": config.get("default_video_seed", -1),
210 |                     "min": -1,
211 |                     "max": 2147483647
212 |                 }),
213 |                 "cloudinary_cloud_name": ("STRING", {
214 |                     "default": config.get("cloudinary_cloud_name", ""),
215 |                     "placeholder": "Cloudinary Cloud Name"
216 |                 }),
217 |                 "cloudinary_api_key": ("STRING", {
218 |                     "default": config.get("cloudinary_api_key", ""),
219 |                     "placeholder": "Cloudinary API Key"
220 |                 }),
221 |                 "cloudinary_api_secret": ("STRING", {
222 |                     "default": config.get("cloudinary_api_secret", ""),
223 |                     "placeholder": "Cloudinary API Secret"
224 |                 }),
225 |             }
226 |         }
227 | 
228 |     RETURN_TYPES = ("STRING",)
229 |     RETURN_NAMES = ("description",)
230 |     FUNCTION = "analyze_video"
231 |     CATEGORY = "QwenImage"
232 | 
233 |     def analyze_video(self, prompt="", api_token="", system_prompt="You are a helpful assistant.", video=None, video_path="", model="stepfun-ai/step3", max_tokens=1000, temperature=0.7, seed=-1, cloudinary_cloud_name="", cloudinary_api_key="", cloudinary_api_secret="", error_message=""):
234 |         if not OPENAI_AVAILABLE:
235 |             return ("请先安装openai库: pip install openai",)
236 |         
237 |         config = load_config()
238 |         
239 |         if not api_token or api_token.strip() == "":
240 |             raise Exception("请输入有效的API Token")
241 |         
242 |         # 优先使用VIDEO输入，如果没有则使用video_path
243 |         actual_video_path = None
244 |         
245 |         if video is not None:
246 |             # 从VIDEO输入中提取视频路径
247 |             try:
248 |                 print(f"调试VIDEO输入:")
249 |                 print(f"   类型: {type(video)}")
250 |                 print(f"   内容: {video}")
251 |                 
252 |                 # 打印所有属性
253 |                 if hasattr(video, '__dict__'):
254 |                     print(f"   属性: {video.__dict__}")
255 |                 elif hasattr(video, '__slots__'):
256 |                     print(f"   槽位: {video.__slots__}")
257 |                 
258 |                 # 尝试各种可能的属性名，包括私有属性
259 |                 possible_attrs = ['filename', 'path', 'file_path', 'name', 'file', 'video_path', 'src', 'url', 'file_path', 'input_path', '_VideoFromFile__file']
260 |                 for attr in possible_attrs:
261 |                     if hasattr(video, attr):
262 |                         value = getattr(video, attr)
263 |                         print(f"   找到属性 {attr}: {value}")
264 |                         if isinstance(value, str) and value.strip():
265 |                             actual_video_path = value
266 |                             break
267 |                 
268 |                 # 如果是字符串
269 |                 if isinstance(video, str) and video.strip():
270 |                     actual_video_path = video
271 |                     print(f"   VIDEO是字符串: {video}")
272 |                 
273 |                 # 如果是列表或元组
274 |                 elif hasattr(video, '__getitem__') and len(video) > 0:
275 |                     print(f"   VIDEO是序列，长度: {len(video)}")
276 |                     first_item = video[0]
277 |                     print(f"   第一个元素类型: {type(first_item)}")
278 |                     print(f"   第一个元素内容: {first_item}")
279 |                     
280 |                     for attr in possible_attrs:
281 |                         if hasattr(first_item, attr):
282 |                             value = getattr(first_item, attr)
283 |                             print(f"   第一个元素属性 {attr}: {value}")
284 |                             if isinstance(value, str) and value.strip():
285 |                                 actual_video_path = value
286 |                                 break
287 |                 
288 |                 if actual_video_path:
289 |                     print(f"成功提取视频路径: {actual_video_path}")
290 |                 else:
291 |                     print(f"无法从VIDEO输入中提取路径")
292 |                     
293 |             except Exception as e:
294 |                 print(f"从VIDEO输入提取路径失败: {e}")
295 |                 import traceback
296 |                 traceback.print_exc()
297 |         
298 |         # 如果VIDEO输入没有提供有效路径，使用video_path
299 |         if not actual_video_path and video_path:
300 |             actual_video_path = video_path
301 |         
302 |         if not actual_video_path or not os.path.exists(actual_video_path):
303 |             raise Exception("请提供有效的视频文件（通过VIDEO输入或video_path参数）")
304 |         
305 |         saved_token = load_api_token()
306 |         if api_token != saved_token:
307 |             if save_api_token(api_token):
308 |                 print("API Token已自动保存")
309 |             else:
310 |                 print("API Token保存失败，但不影响当前使用")
311 |         
312 |         # 保存Cloudinary配置（如果提供了新的配置）
313 |         if cloudinary_cloud_name and cloudinary_api_key and cloudinary_api_secret:
314 |             current_config = load_config()
315 |             if (current_config.get("cloudinary_cloud_name") != cloudinary_cloud_name or 
316 |                 current_config.get("cloudinary_api_key") != cloudinary_api_key or 
317 |                 current_config.get("cloudinary_api_secret") != cloudinary_api_secret):
318 |                 if save_cloudinary_config(cloudinary_cloud_name, cloudinary_api_key, cloudinary_api_secret):
319 |                     print("Cloudinary配置已自动保存")
320 |                 else:
321 |                     print("Cloudinary配置保存失败，但不影响当前使用")
322 |         
323 |         try:
324 |             print(f"🎬 开始分析视频...")
325 |             print(f"📝 提示词: {prompt}")
326 |             if system_prompt:
327 |                 print(f"系统提示: {system_prompt[:50]}...")
328 |             print(f"🤖 模型: {model}")
329 |             print(f"视频路径: {actual_video_path}")
330 |             
331 |             # 处理随机种子
332 |             if seed != -1:
333 |                 print(f"🎯 使用指定种子: {seed}")
334 |             else:
335 |                 import random
336 |                 random_seed = random.randint(0, 2147483647)
337 |                 print(f"🎲 使用随机种子: {random_seed}")
338 |                 seed = random_seed
339 |             
340 |             # 尝试上传视频到Cloudinary获取URL
341 |             video_url = upload_video_to_cloudinary(actual_video_path, cloudinary_cloud_name, cloudinary_api_key, cloudinary_api_secret)
342 |             
343 |             if video_url:
344 |                 # 使用URL方式
345 |                 print(f"🌐 使用视频URL: {video_url}")
346 |                 video_content = {
347 |                     'type': 'video_url',
348 |                     'video_url': {
349 |                         'url': video_url,
350 |                     },
351 |                 }
352 |             else:
353 |                 # 回退到base64方式
354 |                 print("视频URL获取失败，回退到使用base64")
355 |                 video_data = video_to_base64(actual_video_path)
356 |                 video_content = {
357 |                     'type': 'video_url',
358 |                     'video_url': {
359 |                         'url': video_data,
360 |                     },
361 |                 }
362 |             
363 |             client = OpenAI(
364 |                 base_url='https://api-inference.modelscope.cn/v1',
365 |                 api_key=api_token
366 |             )
367 |             
368 |             messages = []
369 |             
370 |             # 如果有系统提示词，添加到messages中
371 |             if system_prompt and system_prompt.strip():
372 |                 messages.append({
373 |                     'role': 'system',
374 |                     'content': system_prompt
375 |                 })
376 |             
377 |             # 添加用户消息（包含文本和视频）
378 |             messages.append({
379 |                 'role': 'user',
380 |                 'content': [{
381 |                     'type': 'text',
382 |                     'text': prompt,
383 |                 }, video_content],
384 |             })
385 |             
386 |             print(f"🚀 发送API请求...")
387 |             
388 |             response = client.chat.completions.create(
389 |                 model=model,
390 |                 messages=messages,
391 |                 max_tokens=max_tokens,
392 |                 temperature=temperature,
393 |                 stream=False,
394 |                 seed=seed
395 |             )
396 |             
397 |             description = response.choices[0].message.content
398 |             print(f"视频分析完成!")
399 |             print(f"📄 结果: {description[:100]}...")
400 |             
401 |             return (description,)
402 |             
403 |         except Exception as e:
404 |             error_msg = f"视频分析失败: {str(e)}"
405 |             print(f"{error_msg}")
406 |             return (error_msg,)
407 | 
408 | if OPENAI_AVAILABLE:
409 |     NODE_CLASS_MAPPINGS = {
410 |         "QwenVideoNode": QwenVideoNode
411 |     }
412 | 
413 |     NODE_DISPLAY_NAME_MAPPINGS = {
414 |         "QwenVideoNode": "Qwen-Video 视频生文节点"
415 |     }
416 | else:
417 |     class OpenAINotInstalledVideoNode:
418 |         @classmethod
419 |         def INPUT_TYPES(cls):
420 |             return {
421 |                 "required": {
422 |                     "install_command": ("STRING", {
423 |                         "default": "pip install openai",
424 |                         "multiline": False
425 |                     }),
426 |                 }
427 |             }
428 |         
429 |         RETURN_TYPES = ("STRING",)
430 |         RETURN_NAMES = ("message",)
431 |         FUNCTION = "show_install_message"
432 |         CATEGORY = "QwenImage"
433 |         
434 |         def show_install_message(self, install_command):
435 |             return ("请先安装openai库才能使用视频生文功能: " + install_command,)
436 |     
437 |     NODE_CLASS_MAPPINGS = {
438 |         "QwenVideoNode": OpenAINotInstalledVideoNode
439 |     }
440 | 
441 |     NODE_DISPLAY_NAME_MAPPINGS = {
442 |         "QwenVideoNode": "Qwen-Video 视频生文节点 (需要安装openai)"
443 |     }
444 | 


--------------------------------------------------------------------------------
/qwen_detection_node.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import ast
  3 | import json
  4 | import base64
  5 | import io
  6 | from dataclasses import dataclass
  7 | from typing import List, Dict, Any, Tuple
  8 | 
  9 | import torch
 10 | import numpy as np
 11 | from PIL import Image, ImageDraw, ImageFont
 12 | from openai import OpenAI
 13 | 
 14 | try:
 15 |     import folder_paths
 16 | except ImportError:
 17 |     # folder_paths 只在 ComfyUI 环境中可用
 18 |     folder_paths = None
 19 | 
 20 | 
 21 | def parse_json(json_output: str) -> str:
 22 |     """Extract the JSON payload from a model response string."""
 23 |     if "```json" in json_output:
 24 |         json_output = json_output.split("```json", 1)[1]
 25 |         json_output = json_output.split("```", 1)[0]
 26 | 
 27 |     try:
 28 |         parsed = json.loads(json_output)
 29 |         if isinstance(parsed, dict) and "content" in parsed:
 30 |             inner = parsed["content"]
 31 |             if isinstance(inner, str):
 32 |                 json_output = inner
 33 |     except Exception:
 34 |         pass
 35 |     return json_output
 36 | 
 37 | 
 38 | def draw_bboxes_on_image(image, bboxes_data, target_label="object"):
 39 |     """直接使用KJNodes的DrawInstanceDiffusionTracking实现"""
 40 |     import matplotlib.cm as cm
 41 |     import torch
 42 |     from torchvision import transforms
 43 |     
 44 |     if not bboxes_data:
 45 |         return image
 46 |     
 47 |     # 确保图像是PIL格式
 48 |     if hasattr(image, 'mode'):  # 已经是PIL图像
 49 |         pil_image = image.copy()
 50 |     else:  # 如果是tensor，转换为PIL
 51 |         if len(image.shape) == 4:  # batch
 52 |             current_image = image[0, :, :, :].permute(2, 0, 1)
 53 |         else:  # single image
 54 |             current_image = image.permute(2, 0, 1)
 55 |         pil_image = transforms.ToPILImage()(current_image)
 56 |     
 57 |     draw = ImageDraw.Draw(pil_image)
 58 |     
 59 |     # 使用KJNodes的彩虹色彩映射
 60 |     colormap = cm.get_cmap('rainbow', len(bboxes_data))
 61 |     
 62 |     # 尝试加载字体
 63 |     try:
 64 |         font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 20)
 65 |     except:
 66 |         try:
 67 |             font = ImageFont.load_default()
 68 |         except:
 69 |             font = None
 70 |     
 71 |     # 直接使用KJNodes的绘制逻辑
 72 |     for j, bbox_data in enumerate(bboxes_data):
 73 |         if isinstance(bbox_data, dict):
 74 |             bbox = bbox_data.get("bbox_2d", bbox_data.get("bbox", []))
 75 |             label = bbox_data.get("label", target_label)
 76 |         else:
 77 |             bbox = bbox_data
 78 |             label = target_label
 79 |             
 80 |         if len(bbox) != 4:
 81 |             continue
 82 |             
 83 |         x1, y1, x2, y2 = bbox
 84 |         # 转换为整数（KJNodes的做法）
 85 |         x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
 86 |         
 87 |         # 使用KJNodes的颜色生成方式
 88 |         color = tuple(int(255 * x) for x in colormap(j / len(bboxes_data)))[:3]
 89 |         
 90 |         # 添加调试信息
 91 |         print(f"绘制边界框 {j+1}: ({x1}, {y1}) -> ({x2}, {y2}), 标签: {label}")
 92 |         
 93 |         # 使用KJNodes的绘制方式
 94 |         draw.rectangle([x1, y1, x2, y2], outline=color, width=2)
 95 |         
 96 |         if font:
 97 |             # 使用KJNodes的文本绘制方式
 98 |             text = f"{j+1}.{label}"
 99 |             # 计算文本尺寸（KJNodes的方式）
100 |             _, _, text_width, text_height = draw.textbbox((0, 0), text=text, font=font)
101 |             # 使用KJNodes的文本位置
102 |             text_position = (x1, y1 - text_height)
103 |             draw.text(text_position, text, fill=color, font=font)
104 |     
105 |     return pil_image
106 | 
107 | 
108 | def parse_boxes(
109 |     text: str,
110 |     img_width: int,
111 |     img_height: int,
112 |     input_w: int,
113 |     input_h: int,
114 |     score_threshold: float = 0.0,
115 | ) -> List[Dict[str, Any]]:
116 |     """Return bounding boxes parsed from the model's raw JSON output."""
117 |     text = parse_json(text)
118 |     try:
119 |         data = json.loads(text)
120 |     except Exception:
121 |         try:
122 |             data = ast.literal_eval(text)
123 |         except Exception:
124 |             end_idx = text.rfind('"}') + len('"}')
125 |             truncated = text[:end_idx] + "]"
126 |             data = ast.literal_eval(truncated)
127 |     if isinstance(data, dict):
128 |         inner = data.get("content")
129 |         if isinstance(inner, str):
130 |             try:
131 |                 data = ast.literal_eval(inner)
132 |             except Exception:
133 |                 data = []
134 |         else:
135 |             data = []
136 |     items: List[DetectedBox] = []
137 |     x_scale = img_width / input_w
138 |     y_scale = img_height / input_h
139 | 
140 |     for item in data:
141 |         box = item.get("bbox_2d") or item.get("bbox") or item
142 |         label = item.get("label", "")
143 |         score = float(item.get("score", 1.0))
144 |         # 修复坐标顺序：确保是 [x1, y1, x2, y2] 格式
145 |         if len(box) >= 4:
146 |             x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
147 |         else:
148 |             x1, y1, x2, y2 = 0, 0, 0, 0
149 |             
150 |         abs_x1 = int(x1 * x_scale)
151 |         abs_y1 = int(y1 * y_scale)
152 |         abs_x2 = int(x2 * x_scale)
153 |         abs_y2 = int(y2 * y_scale)
154 |         
155 |         # 确保坐标顺序正确
156 |         if abs_x1 > abs_x2:
157 |             abs_x1, abs_x2 = abs_x2, abs_x1
158 |         if abs_y1 > abs_y2:
159 |             abs_y1, abs_y2 = abs_y2, abs_y1
160 |             
161 |         if score >= score_threshold:
162 |             items.append(DetectedBox([abs_x1, abs_y1, abs_x2, abs_y2], score, label))
163 |     items.sort(key=lambda x: x.score, reverse=True)
164 |     return [
165 |         {"score": b.score, "bbox": b.bbox, "label": b.label}
166 |         for b in items
167 |     ]
168 | 
169 | 
170 | @dataclass
171 | class DetectedBox:
172 |     bbox: List[int]
173 |     score: float
174 |     label: str = ""
175 | 
176 | 
177 | @dataclass
178 | class QwenAPIConfig:
179 |     client: Any
180 |     model_name: str
181 |     base_url: str
182 |     api_key: str
183 | 
184 | 
185 | def encode_image_to_base64(image: Image.Image) -> str:
186 |     """将PIL图像编码为base64字符串"""
187 |     buffer = io.BytesIO()
188 |     image.save(buffer, format='PNG')
189 |     img_str = base64.b64encode(buffer.getvalue()).decode()
190 |     return f"data:image/png;base64,{img_str}"
191 | 
192 | 
193 | class QwenAPIConfig:
194 |     def __init__(self, client=None, model_name=None, base_url=None, api_key=None):
195 |         self.client = client
196 |         self.model_name = model_name
197 |         self.base_url = base_url
198 |         self.api_key = api_key
199 | 
200 |     @classmethod
201 |     def INPUT_TYPES(cls):
202 |         return {
203 |             "required": {
204 |                 "base_url": ("STRING", {"default": "https://api-inference.modelscope.cn/v1"}),
205 |                 "api_key": ("STRING", {"default": ""}),
206 |                 "model_name": ("STRING", {"default": "Qwen/Qwen2.5-VL-72B-Instruct"}),
207 |                 "timeout": ("INT", {"default": 60, "min": 10, "max": 300}),
208 |             }
209 |         }
210 | 
211 |     RETURN_TYPES = ("QWEN_API_CONFIG",)
212 |     RETURN_NAMES = ("qwen_api_config",)
213 |     FUNCTION = "configure"
214 |     CATEGORY = "Qwen2.5-VL"
215 | 
216 |     def configure(self, base_url: str, api_key: str, model_name: str, timeout: int):
217 |         """配置API客户端"""
218 |         if not api_key:
219 |             raise ValueError("API密钥不能为空")
220 |         
221 |         client = OpenAI(
222 |             base_url=base_url,
223 |             api_key=api_key,
224 |             timeout=timeout
225 |         )
226 |         
227 |         return (QwenAPIConfig(
228 |             client=client,
229 |             model_name=model_name,
230 |             base_url=base_url,
231 |             api_key=api_key
232 |         ),)
233 | 
234 | 
235 | class QwenAPIDetection:
236 |     @classmethod
237 |     def INPUT_TYPES(cls):
238 |         return {
239 |             "required": {
240 |                 "qwen_api_config": ("QWEN_API_CONFIG",),
241 |                 "image": ("IMAGE",),
242 |                 "target": ("STRING", {"default": "object"}),
243 |                 "bbox_selection": ("STRING", {"default": "all"}),
244 |                 "score_threshold": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01}),
245 |                 "merge_boxes": ("BOOLEAN", {"default": False}),
246 |                 "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
247 |             },
248 |         }
249 | 
250 |     RETURN_TYPES = ("JSON", "BBOX", "IMAGE")
251 |     RETURN_NAMES = ("text", "bboxes", "preview")
252 |     FUNCTION = "detect"
253 |     CATEGORY = "Qwen2.5-VL"
254 | 
255 |     def detect(
256 |         self,
257 |         qwen_api_config: QwenAPIConfig,
258 |         image,
259 |         target: str,
260 |         bbox_selection: str = "all",
261 |         score_threshold: float = 0.0,
262 |         merge_boxes: bool = False,
263 |         seed: int = 0,
264 |     ):
265 |         """使用API生成目标检测边界框"""
266 |         client = qwen_api_config.client
267 |         model_name = qwen_api_config.model_name
268 |         
269 |         # 添加随机种子到prompt中，确保每次请求都不同
270 |         random_seed_text = f"Random seed: {seed}. "
271 |         
272 |         prompt = f"""You are a precise object detection system. Your task is to detect {target} in the image.
273 | 
274 | {random_seed_text}STRICT REQUIREMENTS:
275 | 1. You MUST create a SEPARATE bounding box for EACH individual {target} you see
276 | 2. NEVER combine multiple {target} objects into one large bounding box
277 | 3. Each {target} should have its own tight, precise bounding box
278 | 4. If you see 6 {target} objects, you MUST return 6 separate bounding boxes
279 | 5. Count each {target} individually and create one box per object
280 | 
281 | OUTPUT FORMAT: Return a JSON array with separate objects for each {target}:
282 | [{{"bbox_2d": [x1, y1, x2, y2], "label": "{target}"}}, {{"bbox_2d": [x1, y1, x2, y2], "label": "{target}"}}, ...]
283 | 
284 | EXAMPLE: If you see 3 logos, return 3 separate boxes:
285 | [{{"bbox_2d": [100, 100, 150, 120], "label": "{target}"}}, {{"bbox_2d": [200, 100, 250, 120], "label": "{target}"}}, {{"bbox_2d": [300, 100, 350, 120], "label": "{target}"}}]
286 | 
287 | DO NOT return one large box covering multiple objects!"""
288 | 
289 |         # 处理图像输入
290 |         if isinstance(image, torch.Tensor):
291 |             image = (image.squeeze().clamp(0, 1) * 255).to(torch.uint8).cpu().numpy()
292 |             image = Image.fromarray(image)
293 |         elif not isinstance(image, Image.Image):
294 |             raise ValueError("不支持的图像类型")
295 | 
296 |         # 编码图像为base64
297 |         image_data = encode_image_to_base64(image)
298 |         
299 |         try:
300 |             # 调用API
301 |             response = client.chat.completions.create(
302 |                 model=model_name,
303 |                 messages=[{
304 |                     'role': 'user',
305 |                     'content': [
306 |                         {'type': 'text', 'text': prompt},
307 |                         {'type': 'image_url', 'image_url': {'url': image_data}}
308 |                     ]
309 |                 }],
310 |                 max_tokens=1024,
311 |                 temperature=0.1
312 |             )
313 |             
314 |             output_text = response.choices[0].message.content
315 |             
316 |         except Exception as e:
317 |             raise RuntimeError(f"API调用失败: {str(e)}")
318 | 
319 |         # 解析响应
320 |         # 使用固定的输入尺寸，因为API返回的坐标通常是相对坐标
321 |         input_h = 1024  # 假设的输入高度
322 |         input_w = 1024  # 假设的输入宽度
323 |         
324 |         items = parse_boxes(
325 |             output_text,
326 |             image.width,
327 |             image.height,
328 |             input_w,
329 |             input_h,
330 |             score_threshold,
331 |         )
332 | 
333 |         
334 |         # 处理边界框选择
335 |         selection = bbox_selection.strip().lower()
336 |         boxes = items
337 |         if selection != "all" and selection:
338 |             idxs = []
339 |             for part in selection.replace(",", " ").split():
340 |                 try:
341 |                     idxs.append(int(part))
342 |                 except Exception:
343 |                     continue
344 |             boxes = [boxes[i] for i in idxs if 0 <= i < len(boxes)]
345 | 
346 |         # 合并边界框
347 |         if merge_boxes and boxes:
348 |             x1 = min(b["bbox"][0] for b in boxes)
349 |             y1 = min(b["bbox"][1] for b in boxes)
350 |             x2 = max(b["bbox"][2] for b in boxes)
351 |             y2 = max(b["bbox"][3] for b in boxes)
352 |             score = max(b["score"] for b in boxes)
353 |             label = boxes[0].get("label", target)
354 |             boxes = [{"bbox": [x1, y1, x2, y2], "score": score, "label": label}]
355 | 
356 |         # 格式化输出
357 |         json_boxes = [
358 |             {"bbox_2d": b["bbox"], "label": b.get("label", target)} for b in boxes
359 |         ]
360 |         json_output = json.dumps(json_boxes, ensure_ascii=False)
361 |         bboxes_only = [b["bbox"] for b in boxes]
362 |         
363 |         # 生成预览图像
364 |         preview_image = self._create_preview_image(image, json_boxes, target)
365 |         
366 |         return (json_output, bboxes_only, preview_image)
367 |     
368 |     def _create_preview_image(self, image, bboxes_data, target_label):
369 |         """创建带边界框的预览图像"""
370 |         try:
371 |             # 转换tensor为PIL图像
372 |             if isinstance(image, torch.Tensor):
373 |                 print(f"原始图像tensor形状: {image.shape}")
374 |                 
375 |                 # 假设图像是 [batch, channels, height, width] 格式 (ComfyUI标准)
376 |                 if image.dim() == 4:
377 |                     image_tensor = image[0]  # 取第一个batch，现在是 [C, H, W]
378 |                 else:
379 |                     image_tensor = image
380 |                 
381 |                 print(f"处理后图像tensor形状: {image_tensor.shape}")
382 |                 
383 |                 # 转换为numpy然后PIL
384 |                 image_np = image_tensor.cpu().numpy()
385 |                 print(f"转换为numpy后形状: {image_np.shape}")
386 |                 
387 |                 if image_np.max() <= 1.0:
388 |                     image_np = (image_np * 255).astype('uint8')
389 |                 else:
390 |                     image_np = image_np.astype('uint8')
391 |                 
392 |                 # 从CHW转换为HWC格式
393 |                 if len(image_np.shape) == 3 and image_np.shape[0] in [1, 3, 4]:
394 |                     image_np = np.transpose(image_np, (1, 2, 0))
395 |                     print(f"转置后形状: {image_np.shape}")
396 |                 
397 |                 # 确保图像是RGB格式（3通道）
398 |                 if len(image_np.shape) == 3 and image_np.shape[2] == 1:
399 |                     # 如果是单通道，转换为RGB
400 |                     image_np = np.repeat(image_np, 3, axis=2)
401 |                     print(f"单通道转RGB后形状: {image_np.shape}")
402 |                 elif len(image_np.shape) == 2:
403 |                     # 如果是灰度图，转换为RGB
404 |                     image_np = np.stack([image_np] * 3, axis=2)
405 |                     print(f"灰度图转RGB后形状: {image_np.shape}")
406 |                 
407 |                 pil_image = Image.fromarray(image_np, 'RGB')
408 |             else:
409 |                 pil_image = image
410 |             
411 |             # 绘制边界框
412 |             preview_image = draw_bboxes_on_image(pil_image, bboxes_data, target_label)
413 |             
414 |             # 转换回tensor格式 - ComfyUI的IMAGE类型期望(B, H, W, C)格式
415 |             preview_np = np.array(preview_image)
416 |             print(f"绘制边界框后numpy形状: {preview_np.shape}")
417 |             
418 |             # 确保是RGB格式
419 |             if len(preview_np.shape) == 3 and preview_np.shape[2] == 3:
420 |                 # 已经是HWC格式，直接转换
421 |                 preview_tensor = torch.from_numpy(preview_np).float() / 255.0
422 |                 print(f"转换为tensor后形状: {preview_tensor.shape}")
423 |                 
424 |                 # 添加batch维度 - ComfyUI的IMAGE类型期望(B, H, W, C)格式
425 |                 preview_tensor = preview_tensor.unsqueeze(0)  # HWC -> BHWC
426 |                 print(f"添加batch维度后形状: {preview_tensor.shape}")
427 |                 
428 |                 # 确保数据类型和范围正确
429 |                 preview_tensor = torch.clamp(preview_tensor, 0.0, 1.0)
430 |                 print(f"最终tensor形状: {preview_tensor.shape}, 数据类型: {preview_tensor.dtype}")
431 |                 
432 |                 # 确保tensor是连续的，这对ComfyUI很重要
433 |                 preview_tensor = preview_tensor.contiguous()
434 |                 
435 |                 return preview_tensor
436 |             else:
437 |                 print(f"警告: 预览图像格式不正确: {preview_np.shape}")
438 |                 return image
439 |             
440 |         except Exception as e:
441 |             print(f"创建预览图像时出错: {e}")
442 |             import traceback
443 |             traceback.print_exc()
444 |             # 如果出错，返回原始图像
445 |             return image
446 |     
447 | 
448 | 
449 | class BBoxesToSAM2:
450 |     """Convert a list of bounding boxes to the format expected by SAM2 nodes."""
451 | 
452 |     @classmethod
453 |     def INPUT_TYPES(cls):
454 |         return {"required": {"bboxes": ("BBOX",)}}
455 | 
456 |     RETURN_TYPES = ("BBOXES",)
457 |     RETURN_NAMES = ("sam2_bboxes",)
458 |     FUNCTION = "convert"
459 |     CATEGORY = "Qwen2.5-VL"
460 | 
461 |     def convert(self, bboxes):
462 |         if not isinstance(bboxes, list):
463 |             raise ValueError("bboxes must be a list")
464 | 
465 |         # If already batched, return as-is
466 |         if bboxes and isinstance(bboxes[0], (list, tuple)) and bboxes[0] and isinstance(bboxes[0][0], (list, tuple)):
467 |             return (bboxes,)
468 | 
469 |         return ([bboxes],)
470 | 
471 | 
472 | NODE_CLASS_MAPPINGS = {
473 |     "QwenAPIConfig": QwenAPIConfig,
474 |     "QwenAPIDetection": QwenAPIDetection,
475 |     "BBoxesToSAM2": BBoxesToSAM2,
476 | }
477 | 
478 | NODE_DISPLAY_NAME_MAPPINGS = {
479 |     "QwenAPIConfig": "Qwen2.5-VL API Configuration",
480 |     "QwenAPIDetection": "Qwen2.5-VL API Object Detection",
481 |     "BBoxesToSAM2": "Prepare BBoxes for SAM2",
482 | }
483 | 


--------------------------------------------------------------------------------
/qwen_image_node.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | import time
  4 | import torch
  5 | import numpy as np
  6 | from PIL import Image
  7 | from io import BytesIO
  8 | import os
  9 | try:
 10 |     import folder_paths
 11 | except ImportError:
 12 |     # folder_paths 只在 ComfyUI 环境中可用
 13 |     folder_paths = None
 14 | import base64
 15 | import tempfile
 16 | 
 17 | def load_config():
 18 |     config_path = os.path.join(os.path.dirname(__file__), 'config.json')
 19 |     try:
 20 |         with open(config_path, 'r', encoding='utf-8') as f:
 21 |             return json.load(f)
 22 |     except:
 23 |         return {
 24 |             "default_model": "Qwen/Qwen-Image",
 25 |             "timeout": 720,
 26 |             "image_download_timeout": 30,
 27 |             "default_prompt": "A beautiful landscape"
 28 |         }
 29 | 
 30 | def save_config(config: dict) -> bool:
 31 |     config_path = os.path.join(os.path.dirname(__file__), 'config.json')
 32 |     try:
 33 |         with open(config_path, 'w', encoding='utf-8') as f:
 34 |             json.dump(config, f, ensure_ascii=False, indent=2)
 35 |         return True
 36 |     except Exception as e:
 37 |         print(f"保存配置失败: {e}")
 38 |         return False
 39 | 
 40 | def save_api_token(token):
 41 |     token_path = os.path.join(os.path.dirname(__file__), '.qwen_token')
 42 |     try:
 43 |         with open(token_path, 'w', encoding='utf-8') as f:
 44 |             f.write(token)
 45 |     except Exception as e:
 46 |         print(f"保存token失败(.qwen_token): {e}")
 47 |     try:
 48 |         cfg = load_config()
 49 |         cfg["api_token"] = token
 50 |         if save_config(cfg):
 51 |             return True
 52 |         return False
 53 |     except Exception as e:
 54 |         print(f"保存token失败(config.json): {e}")
 55 |         return False
 56 | 
 57 | def load_api_token():
 58 |     token_path = os.path.join(os.path.dirname(__file__), '.qwen_token')
 59 |     try:
 60 |         cfg = load_config()
 61 |         token_from_cfg = cfg.get("api_token", "").strip()
 62 |         if token_from_cfg:
 63 |             return token_from_cfg
 64 |     except Exception as e:
 65 |         print(f"读取config.json中的token失败: {e}")
 66 |     try:
 67 |         if os.path.exists(token_path):
 68 |             with open(token_path, 'r', encoding='utf-8') as f:
 69 |                 token = f.read().strip()
 70 |                 return token if token else ""
 71 |         return ""
 72 |     except Exception as e:
 73 |         print(f"加载token失败: {e}")
 74 |         return ""
 75 | 
 76 | def tensor_to_base64_url(image_tensor):
 77 |     try:
 78 |         if len(image_tensor.shape) == 4:
 79 |             image_tensor = image_tensor.squeeze(0)
 80 |         
 81 |         if image_tensor.max() <= 1.0:
 82 |             image_np = (image_tensor.cpu().numpy() * 255).astype(np.uint8)
 83 |         else:
 84 |             image_np = image_tensor.cpu().numpy().astype(np.uint8)
 85 |         
 86 |         pil_image = Image.fromarray(image_np)
 87 |         
 88 |         buffer = BytesIO()
 89 |         pil_image.save(buffer, format='JPEG', quality=85)
 90 |         img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
 91 |         
 92 |         return f"data:image/jpeg;base64,{img_base64}"
 93 |         
 94 |     except Exception as e:
 95 |         print(f"图像转换失败: {e}")
 96 |         raise Exception(f"图像格式转换失败: {str(e)}")
 97 | 
 98 | class QwenImageNode:
 99 |     def __init__(self):
100 |         pass
101 |     
102 |     @classmethod
103 |     def INPUT_TYPES(cls):
104 |         config = load_config()
105 |         saved_token = load_api_token()
106 |         return {
107 |             "required": {
108 |                 "prompt": ("STRING", {
109 |                     "multiline": True,
110 |                     "default": config.get("default_prompt", "A beautiful landscape")
111 |                 }),
112 |                 "api_token": ("STRING", {
113 |                     "default": saved_token,
114 |                     "placeholder": "请输入您的魔搭API Token"
115 |                 }),
116 |             },
117 |             "optional": {
118 |                 "model": ("STRING", {
119 |                     "default": config.get("default_model", "Qwen/Qwen-Image")
120 |                 }),
121 |                 "negative_prompt": ("STRING", {
122 |                     "multiline": True,
123 |                     "default": config.get("default_negative_prompt", "")
124 |                 }),
125 |                 "width": ("INT", {
126 |                     "default": config.get("default_width", 512),
127 |                     "min": 64,
128 |                     "max": 2048,
129 |                     "step": 64
130 |                 }),
131 |                 "height": ("INT", {
132 |                     "default": config.get("default_height", 512),
133 |                     "min": 64,
134 |                     "max": 2048,
135 |                     "step": 64
136 |                 }),
137 |                 "seed": ("INT", {
138 |                     "default": config.get("default_seed", -1),
139 |                     "min": -1,
140 |                     "max": 2147483647
141 |                 }),
142 |                 "steps": ("INT", {
143 |                     "default": config.get("default_steps", 30),
144 |                     "min": 1,
145 |                     "max": 100
146 |                 }),
147 |                 "guidance": ("FLOAT", {
148 |                     "default": config.get("default_guidance", 7.5),
149 |                     "min": 1.5,
150 |                     "max": 20.0,
151 |                     "step": 0.1
152 |                 }),
153 |             }
154 |         }
155 |     
156 |     RETURN_TYPES = ("IMAGE",)
157 |     RETURN_NAMES = ("image",)
158 |     FUNCTION = "generate_image"
159 |     CATEGORY = "QwenImage"
160 |     
161 |     def generate_image(self, prompt, api_token, model="Qwen/Qwen-Image", negative_prompt="", width=512, height=512, seed=-1, steps=30, guidance=7.5):
162 |         config = load_config()
163 |         if not api_token or api_token.strip() == "":
164 |             raise Exception("请输入有效的API Token")
165 |         saved_token = load_api_token()
166 |         if api_token != saved_token:
167 |             if save_api_token(api_token):
168 |                 print("API Token已自动保存")
169 |             else:
170 |                 print("API Token保存失败，但不影响当前使用")
171 |         try:
172 |             url = 'https://api-inference.modelscope.cn/v1/images/generations'
173 |             payload = {
174 |                 'model': model,
175 |                 'prompt': prompt,
176 |                 'size': f"{width}x{height}",
177 |                 'steps': steps,
178 |                 'guidance': guidance
179 |             }
180 |             if negative_prompt.strip():
181 |                 payload['negative_prompt'] = negative_prompt
182 |                 print(f"🚫 负向提示词: {negative_prompt}")
183 |             if seed != -1:
184 |                 payload['seed'] = seed
185 |                 print(f"🎯 使用指定种子: {seed}")
186 |             else:
187 |                 import random
188 |                 random_seed = random.randint(0, 2147483647)
189 |                 payload['seed'] = random_seed
190 |                 print(f"🎲 使用随机种子: {random_seed}")
191 |             print(f"📐 图像尺寸: {width}x{height}")
192 |             print(f"🔧 采样步数: {steps}")
193 |             print(f"🎨 引导系数: {guidance}")
194 |             headers = {
195 |                 'Authorization': f'Bearer {api_token}',
196 |                 'Content-Type': 'application/json',
197 |                 'X-ModelScope-Async-Mode': 'true'
198 |             }
199 |             submission_response = requests.post(
200 |                 url, 
201 |                 data=json.dumps(payload, ensure_ascii=False).encode('utf-8'), 
202 |                 headers=headers,
203 |                 timeout=config.get("timeout", 60)
204 |             )
205 |             if submission_response.status_code == 400:
206 |                 print("提交失败，尝试使用最小参数重试...")
207 |                 minimal_payload = {
208 |                     'model': model,
209 |                     'prompt': prompt
210 |                 }
211 |                 submission_response = requests.post(
212 |                     url,
213 |                     data=json.dumps(minimal_payload, ensure_ascii=False).encode('utf-8'),
214 |                     headers=headers,
215 |                     timeout=config.get("timeout", 60)
216 |                 )
217 |             if submission_response.status_code != 200:
218 |                 raise Exception(f"API请求失败: {submission_response.status_code}, {submission_response.text}")
219 |             submission_json = submission_response.json()
220 |             image_url = None
221 |             if 'task_id' in submission_json:
222 |                 task_id = submission_json['task_id']
223 |                 print(f"🕒 已提交任务，任务ID: {task_id}，开始轮询...")
224 |                 poll_start = time.time()
225 |                 max_wait_seconds = max(60, config.get('timeout', 720))
226 |                 while True:
227 |                     task_resp = requests.get(
228 |                         f"https://api-inference.modelscope.cn/v1/tasks/{task_id}",
229 |                         headers={
230 |                             'Authorization': f'Bearer {api_token}',
231 |                             'X-ModelScope-Task-Type': 'image_generation'
232 |                         },
233 |                         timeout=config.get("image_download_timeout", 120)
234 |                     )
235 |                     if task_resp.status_code != 200:
236 |                         raise Exception(f"任务查询失败: {task_resp.status_code}, {task_resp.text}")
237 |                     task_data = task_resp.json()
238 |                     status = task_data.get('task_status')
239 |                     if status == 'SUCCEED':
240 |                         output_images = task_data.get('output_images') or []
241 |                         if not output_images:
242 |                             raise Exception("任务成功但未返回图片URL")
243 |                         image_url = output_images[0]
244 |                         print("任务完成，开始下载图片...")
245 |                         break
246 |                     if status == 'FAILED':
247 |                         raise Exception(f"任务失败: {task_data}")
248 |                     if time.time() - poll_start > max_wait_seconds:
249 |                         raise Exception("任务轮询超时，请稍后重试或降低并发")
250 |                     time.sleep(5)
251 |             elif 'images' in submission_json and len(submission_json['images']) > 0:
252 |                 image_url = submission_json['images'][0]['url']
253 |                 print(f"下载生成的图片...")
254 |             else:
255 |                 raise Exception(f"未识别的API返回格式: {submission_json}")
256 |             img_response = requests.get(image_url, timeout=config.get("image_download_timeout", 30))
257 |             if img_response.status_code != 200:
258 |                 raise Exception(f"图片下载失败: {img_response.status_code}")
259 |             pil_image = Image.open(BytesIO(img_response.content))
260 |             if pil_image.mode != 'RGB':
261 |                 pil_image = pil_image.convert('RGB')
262 |             image_np = np.array(pil_image).astype(np.float32) / 255.0
263 |             image_tensor = torch.from_numpy(image_np)[None,]
264 |             print(f"🎉 图片处理完成！")
265 |             return (image_tensor,)
266 |         except Exception as e:
267 |             print(f"Qwen-Image API调用失败: {str(e)}")
268 |             error_image = Image.new('RGB', (width, height), color='red')
269 |             error_np = np.array(error_image).astype(np.float32) / 255.0
270 |             error_tensor = torch.from_numpy(error_np)[None,]
271 |             return (error_tensor,)
272 | 
273 | class QwenImageEditNode:
274 |     def __init__(self):
275 |         pass
276 | 
277 |     @classmethod
278 |     def INPUT_TYPES(cls):
279 |         config = load_config()
280 |         saved_token = load_api_token()
281 |         return {
282 |             "required": {
283 |                 "image": ("IMAGE",),
284 |                 "prompt": ("STRING", {
285 |                     "multiline": True,
286 |                     "default": "修改图片中的内容"
287 |                 }),
288 |                 "api_token": ("STRING", {
289 |                     "default": saved_token,
290 |                     "placeholder": "请输入您的魔搭API Token"
291 |                 }),
292 |             },
293 |             "optional": {
294 |                 "image_2": ("IMAGE",),
295 |                 "image_3": ("IMAGE",),
296 |                 "model": ("STRING", {
297 |                     "default": "Qwen/Qwen-Image-Edit"
298 |                 }),
299 |                 "negative_prompt": ("STRING", {
300 |                     "multiline": True,
301 |                     "default": ""
302 |                 }),
303 |                 "width": ("INT", {
304 |                     "default": 512,
305 |                     "min": 64,
306 |                     "max": 1664,
307 |                     "step": 8
308 |                 }),
309 |                 "height": ("INT", {
310 |                     "default": 512,
311 |                     "min": 64,
312 |                     "max": 1664,
313 |                     "step": 8
314 |                 }),
315 |                 "steps": ("INT", {
316 |                     "default": 30,
317 |                     "min": 1,
318 |                     "max": 100,
319 |                     "step": 1
320 |                 }),
321 |                 "guidance": ("FLOAT", {
322 |                     "default": 3.5,
323 |                     "min": 1.5,
324 |                     "max": 20.0,
325 |                     "step": 0.1
326 |                 }),
327 |                 "seed": ("INT", {
328 |                     "default": -1,
329 |                     "min": -1,
330 |                     "max": 2147483647
331 |                 }),
332 |             }
333 |         }
334 | 
335 |     RETURN_TYPES = ("IMAGE",)
336 |     RETURN_NAMES = ("edited_image",)
337 |     FUNCTION = "edit_image"
338 |     CATEGORY = "QwenImage"
339 | 
340 |     def edit_image(self, image, prompt, api_token, model="Qwen/Qwen-Image-Edit", negative_prompt="", 
341 |                    width=512, height=512, steps=30, guidance=3.5, seed=-1, image_2=None, image_3=None):
342 |         config = load_config()
343 |         if not api_token or api_token.strip() == "":
344 |             raise Exception("请输入有效的API Token")
345 |         saved_token = load_api_token()
346 |         if api_token != saved_token:
347 |             if save_api_token(api_token):
348 |                 print("API Token已自动保存")
349 |             else:
350 |                 print("API Token保存失败，但不影响当前使用")
351 | 
352 |         try:
353 |             # 处理上传多张图片的函数
354 |             def upload_single_image(img_tensor, index):
355 |                 temp_path = None
356 |                 img_url = None
357 |                 try:
358 |                     # 保存图像到临时文件
359 |                     temp_path = os.path.join(tempfile.gettempdir(), f"qwen_edit_temp_{index}_{int(time.time())}.jpg")
360 |                     if len(img_tensor.shape) == 4:
361 |                         img = img_tensor[0]
362 |                     else:
363 |                         img = img_tensor
364 |                     
365 |                     i = 255. * img.cpu().numpy()
366 |                     img_pil = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8))
367 |                     img_pil.save(temp_path)
368 |                     print(f"图像{index}已保存到临时文件: {temp_path}")
369 |                     
370 |                     # 上传图像到kefan.cn获取URL
371 |                     upload_url = 'https://ai.kefan.cn/api/upload/local'
372 |                     with open(temp_path, 'rb') as img_file:
373 |                         files = {'file': img_file}
374 |                         upload_response = requests.post(
375 |                             upload_url,
376 |                             files=files,
377 |                             timeout=30
378 |                         )
379 |                         if upload_response.status_code == 200:
380 |                             upload_data = upload_response.json()
381 |                             if upload_data.get('success') == True and 'data' in upload_data:
382 |                                 img_url = upload_data['data']
383 |                                 print(f"图像{index}已上传成功，获取URL: {img_url}")
384 |                             else:
385 |                                 print(f"图像{index}上传返回错误: {upload_response.text}")
386 |                         else:
387 |                             print(f"图像{index}上传失败: {upload_response.status_code}, {upload_response.text}")
388 |                 except Exception as e:
389 |                     print(f"图像{index}上传异常: {str(e)}")
390 |                 
391 |                 return temp_path, img_url
392 |             
393 |             # 上传主图像
394 |             temp_img_path, image_url = upload_single_image(image, 1)
395 |             temp_paths = [temp_img_path]
396 |             
397 |             # 上传第二张图像(如果提供)
398 |             image_2_url = None
399 |             if image_2 is not None:
400 |                 temp_path_2, image_2_url = upload_single_image(image_2, 2)
401 |                 if temp_path_2:
402 |                     temp_paths.append(temp_path_2)
403 |             
404 |             # 上传第三张图像(如果提供)
405 |             image_3_url = None
406 |             if image_3 is not None:
407 |                 temp_path_3, image_3_url = upload_single_image(image_3, 3)
408 |                 if temp_path_3:
409 |                     temp_paths.append(temp_path_3)
410 |             
411 |             # 构建payload - 根据官方文档，多图使用 image_url 数组
412 |             # 收集所有图片URL
413 |             image_urls = []
414 |             image_base64s = []
415 |             
416 |             if image_url:
417 |                 image_urls.append(image_url)
418 |             else:
419 |                 image_base64s.append(tensor_to_base64_url(image))
420 |             
421 |             # 添加第二张图片
422 |             if image_2 is not None:
423 |                 if image_2_url:
424 |                     image_urls.append(image_2_url)
425 |                     print(f"✅ 已添加第二张图片URL")
426 |                 else:
427 |                     image_base64s.append(tensor_to_base64_url(image_2))
428 |                     print(f"✅ 已添加第二张图片(base64)")
429 |             
430 |             # 添加第三张图片
431 |             if image_3 is not None:
432 |                 if image_3_url:
433 |                     image_urls.append(image_3_url)
434 |                     print(f"✅ 已添加第三张图片URL")
435 |                 else:
436 |                     image_base64s.append(tensor_to_base64_url(image_3))
437 |                     print(f"✅ 已添加第三张图片(base64)")
438 |             
439 |             # 构建payload
440 |             payload = {
441 |                 'model': model,
442 |                 'prompt': prompt
443 |             }
444 |             
445 |             # 根据官方文档，image_url 始终使用数组格式（即使单张图片）
446 |             if len(image_urls) > 0:
447 |                 payload['image_url'] = image_urls  # 统一使用数组格式
448 |                 print(f"📸 使用URL模式，共{len(image_urls)}张图片: {image_urls}")
449 |             elif len(image_base64s) > 0:
450 |                 # Base64模式也尝试使用数组
451 |                 payload['image'] = image_base64s
452 |                 print(f"📸 使用Base64模式，共{len(image_base64s)}张图片")
453 |             
454 |             if negative_prompt.strip():
455 |                 payload['negative_prompt'] = negative_prompt
456 |                 print(f"🚫 负向提示词: {negative_prompt}")
457 |                 
458 |             # 添加新参数
459 |             if width != 512 or height != 512:
460 |                 size = f"{width}x{height}"
461 |                 payload['size'] = size
462 |                 print(f"📏 图像尺寸: {size}")
463 |                 
464 |             if steps != 30:
465 |                 payload['steps'] = steps
466 |                 print(f"采样步数: {steps}")
467 |                 
468 |             if guidance != 3.5:
469 |                 payload['guidance'] = guidance
470 |                 print(f"🧭 引导系数: {guidance}")
471 |                 
472 |             if seed != -1:
473 |                 payload['seed'] = seed
474 |                 print(f"🎲 随机种子: {seed}")
475 |             
476 |             headers = {
477 |                 'Authorization': f'Bearer {api_token}',
478 |                 'Content-Type': 'application/json',
479 |                 'X-ModelScope-Async-Mode': 'true'
480 |             }
481 |             
482 |             print(f"开始编辑图片...")
483 |             print(f"编辑提示: {prompt}")
484 |             
485 |             url = 'https://api-inference.modelscope.cn/v1/images/generations'
486 |             submission_response = requests.post(
487 |                 url,
488 |                 data=json.dumps(payload, ensure_ascii=False).encode('utf-8'),
489 |                 headers=headers,
490 |                 timeout=config.get("timeout", 60)
491 |             )
492 |             
493 |             if submission_response.status_code != 200:
494 |                 raise Exception(f"API请求失败: {submission_response.status_code}, {submission_response.text}")
495 |                 
496 |             submission_json = submission_response.json()
497 |             result_image_url = None
498 |             
499 |             if 'task_id' in submission_json:
500 |                 task_id = submission_json['task_id']
501 |                 print(f"🕒 已提交任务，任务ID: {task_id}，开始轮询...")
502 |                 poll_start = time.time()
503 |                 max_wait_seconds = max(60, config.get('timeout', 720))
504 |                 
505 |                 while True:
506 |                     task_resp = requests.get(
507 |                         f"https://api-inference.modelscope.cn/v1/tasks/{task_id}",
508 |                         headers={
509 |                             'Authorization': f'Bearer {api_token}',
510 |                             'X-ModelScope-Task-Type': 'image_generation'
511 |                         },
512 |                         timeout=config.get("image_download_timeout", 120)
513 |                     )
514 |                     
515 |                     if task_resp.status_code != 200:
516 |                         raise Exception(f"任务查询失败: {task_resp.status_code}, {task_resp.text}")
517 |                         
518 |                     task_data = task_resp.json()
519 |                     status = task_data.get('task_status')
520 |                     
521 |                     if status == 'SUCCEED':
522 |                         output_images = task_data.get('output_images') or []
523 |                         if not output_images:
524 |                             raise Exception("任务成功但未返回图片URL")
525 |                         result_image_url = output_images[0]
526 |                         print("任务完成，开始下载编辑后的图片...")
527 |                         break
528 |                         
529 |                     if status == 'FAILED':
530 |                         error_message = task_data.get('errors', {}).get('message', '未知错误')
531 |                         error_code = task_data.get('errors', {}).get('code', '未知错误码')
532 |                         raise Exception(f"任务失败: 错误码 {error_code}, 错误信息: {error_message}")
533 |                         
534 |                     if time.time() - poll_start > max_wait_seconds:
535 |                         raise Exception("任务轮询超时，请稍后重试或降低并发")
536 |                         
537 |                     time.sleep(5)
538 |             else:
539 |                 raise Exception(f"未识别的API返回格式: {submission_json}")
540 |                 
541 |             img_response = requests.get(result_image_url, timeout=config.get("image_download_timeout", 30))
542 |             if img_response.status_code != 200:
543 |                 raise Exception(f"图片下载失败: {img_response.status_code}")
544 |                 
545 |             pil_image = Image.open(BytesIO(img_response.content))
546 |             if pil_image.mode != 'RGB':
547 |                 pil_image = pil_image.convert('RGB')
548 |                 
549 |             image_np = np.array(pil_image).astype(np.float32) / 255.0
550 |             image_tensor = torch.from_numpy(image_np)[None,]
551 |             
552 |             # 清理所有临时文件
553 |             for temp_path in temp_paths:
554 |                 if temp_path and os.path.exists(temp_path):
555 |                     try:
556 |                         os.remove(temp_path)
557 |                     except:
558 |                         pass
559 |             
560 |             print(f"🎉 图片编辑完成！")
561 |             return (image_tensor,)
562 |             
563 |         except Exception as e:
564 |             print(f"Qwen-Image-Edit API调用失败: {str(e)}")
565 |             # 返回原图像作为错误回退
566 |             return (image.unsqueeze(0),)
567 | 
568 | NODE_CLASS_MAPPINGS = {
569 |     "QwenImageNode": QwenImageNode,
570 |     "QwenImageEditNode": QwenImageEditNode
571 | }
572 | 
573 | NODE_DISPLAY_NAME_MAPPINGS = {
574 |     "QwenImageNode": "Qwen-Image 生图节点",
575 |     "QwenImageEditNode": "Qwen-Image 图像编辑节点"
576 | } 


--------------------------------------------------------------------------------