├── workflows
    ├── 测试图1.png
    ├── 测试图2.png
    └── workflow.png
├── LICENSE.txt
├── README.md
└── __init__.py


/workflows/测试图1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cardenluo/easy_qwenEdit_2509/HEAD/workflows/测试图1.png


--------------------------------------------------------------------------------
/workflows/测试图2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cardenluo/easy_qwenEdit_2509/HEAD/workflows/测试图2.png


--------------------------------------------------------------------------------
/workflows/workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cardenluo/easy_qwenEdit_2509/HEAD/workflows/workflow.png


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 pythongosssss
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## 插件介绍
 3 | 
 4 | 修改了官方的尺寸限制，同时做了尺寸一致性处理，可以最大程度减少偏移问题。如果尺寸前处理规范，可以很容易实现零偏移, 
 5 | 
 6 | https://github.com/cardenluo/ComfyUI-Apt_Preset 插件的"总控_QwenEditplus堆"在B站做很多无偏移的案例， 此为相同原理的非管线版本
 7 | 
 8 | 
 9 | <img width="2457" height="1296" alt="image" src="https://github.com/user-attachments/assets/dec6cd9e-6814-43db-bdbf-54175c81ee03" />
10 | 
11 | 演示：附件有工作流
12 | 
13 | <img width="1320" height="697" alt="image" src="https://github.com/user-attachments/assets/7c86a5ba-7470-4bc9-85c3-b826dfc46eb2" />
14 | 
15 | 
16 | 更新：3种可选自动统一尺寸的方式
17 | 
18 | auto resize 缩放模式（crop=中心裁剪，pad=中心黑色填充，stretch=强制拉伸）
19 | 
20 | <img width="1911" height="1145" alt="image" src="https://github.com/user-attachments/assets/ec5e4280-0244-493f-8229-f345f47ca03c" />
21 | 
22 | <img width="2914" height="1160" alt="image" src="https://github.com/user-attachments/assets/ca04ee3f-5b0f-4ea7-9518-2bc9425dbc71" />
23 | 
24 | 
25 | **可选参数：**
26 | - `image1`: 第一张参考图像
27 | - `image2`: 第二张参考图像
28 | - `image3`: 第三张参考图像
29 | - `vl_size`: 视觉尺寸，会影响细节（默认：384，范围：64-2048，步长：64）
30 | - `prompt`: 文本提示（多行支持）
31 | - `latent_image`: 生成图尺寸基准图（必填）
32 | - `latent_mask`: 生成图遮罩（可选）
33 | 
34 | ### 输出参数
35 | 
36 | - `positive`: 正条件
37 | - `zero_negative`: 零负条件
38 | - `latent`: 潜变量
39 | 
40 | 
41 | ## 版本历史
42 | 
43 | - 2509: 初始版本
44 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import node_helpers
  3 | import comfy.utils
  4 | import math
  5 | import torch
  6 | import numpy as np
  7 | from PIL import Image
  8 | import json
  9 | import os
 10 | import copy
 11 | import folder_paths
 12 | import hashlib
 13 | 
 14 | 
 15 | 
 16 | 
 17 | class Easy_QwenEdit2509:
 18 |     @classmethod
 19 |     def INPUT_TYPES(s):
 20 |         return {
 21 |             "required": {
 22 |                 "clip": ("CLIP",),
 23 |                 "vae": ("VAE",),
 24 |             },
 25 |             "optional": {
 26 |                 "image1": ("IMAGE", ),
 27 |                 "image2": ("IMAGE", ),
 28 |                 "image3": ("IMAGE", ),
 29 |                 "auto_resize": (["crop", "pad", "stretch"], {"default": "crop"}), 
 30 |                 "vl_size": ("INT", {"default": 384, "min": 64, "max": 2048, "step": 64}),
 31 |                 "prompt": ("STRING", {"multiline": True, "default": ""}),
 32 |                 "latent_image": ("IMAGE", ),
 33 |                 "latent_mask": ("MASK", ),
 34 | 
 35 |                 "system_prompt": ("STRING", {"multiline": False, "default": "Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate."}),
 36 |             }
 37 |         }
 38 |     
 39 |     RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT",)
 40 |     RETURN_NAMES = ("positive", "zero_negative", "latent",)
 41 |     FUNCTION = "QWENencode"
 42 |     CATEGORY = "conditioning"
 43 |     DESCRIPTION = """
 44 |     vl_size:视觉尺寸，会影响细节 
 45 |     latent_image: 生成图尺寸。Generate the size of the figure. 
 46 |     latent_mask: 生成图遮罩 
 47 |     system_prompt:系统提示词，指导图像特征描述与修改逻辑（默认提供基础配置） 
 48 |     auto_resize:尺寸适配模式（crop-中心裁剪/pad-黑色填充/stretch-强制拉伸）"""
 49 | 
 50 |     def _process_image_channels(self, image):
 51 |         if image is None:
 52 |             return None
 53 |         if len(image.shape) == 4:
 54 |             b, h, w, c = image.shape
 55 |             if c == 4:
 56 |                 rgb = image[..., :3]
 57 |                 alpha = image[..., 3:4]
 58 |                 black_bg = torch.zeros_like(rgb)
 59 |                 image = rgb * alpha + black_bg * (1 - alpha)
 60 |                 image = image[..., :3]
 61 |             elif c != 3:
 62 |                 image = image[..., :3]
 63 |         elif len(image.shape) == 3:
 64 |             h, w, c = image.shape
 65 |             if c == 4:
 66 |                 rgb = image[..., :3]
 67 |                 alpha = image[..., 3:4]
 68 |                 black_bg = torch.zeros_like(rgb)
 69 |                 image = rgb * alpha + black_bg * (1 - alpha)
 70 |                 image = image[..., :3]
 71 |             elif c != 3:
 72 |                 image = image[..., :3]
 73 |         image = image.clamp(0.0, 1.0)
 74 |         return image
 75 | 
 76 |     def _auto_resize(self, image: torch.Tensor, target_h: int, target_w: int, auto_resize: str) -> torch.Tensor:
 77 |         batch, ch, orig_h, orig_w = image.shape
 78 |         
 79 |         # 强制最小尺寸≥32（适配VAE 3×3卷积核）
 80 |         target_h = max(target_h, 32)
 81 |         target_w = max(target_w, 32)
 82 |         orig_h = max(orig_h, 32)
 83 |         orig_w = max(orig_w, 32)
 84 |         
 85 |         if auto_resize == "crop":
 86 |             scale = max(target_w / orig_w, target_h / orig_h)
 87 |             new_w = int(orig_w * scale)
 88 |             new_h = int(orig_h * scale)
 89 |             # 强制新尺寸≥目标尺寸，避免裁剪后不足
 90 |             new_w = max(new_w, target_w)
 91 |             new_h = max(new_h, target_h)
 92 |             scaled = comfy.utils.common_upscale(image, new_w, new_h, "bicubic", "disabled")
 93 |             x_offset = (new_w - target_w) // 2
 94 |             y_offset = (new_h - target_h) // 2
 95 |             # 裁剪后强制宽高≥32，避免过小
 96 |             crop_h = min(target_h, new_h - y_offset)
 97 |             crop_w = min(target_w, new_w - x_offset)
 98 |             crop_h = max(crop_h, 32)
 99 |             crop_w = max(crop_w, 32)
100 |             result = scaled[:, :, y_offset:y_offset + crop_h, x_offset:x_offset + crop_w]
101 |             
102 |         elif auto_resize == "pad":
103 |             scale = min(target_w / orig_w, target_h / orig_h)
104 |             new_w = int(orig_w * scale)
105 |             new_h = int(orig_h * scale)
106 |             scaled = comfy.utils.common_upscale(image, new_w, new_h, "bicubic", "disabled")
107 |             black_bg = torch.zeros((batch, ch, target_h, target_w), dtype=image.dtype, device=image.device)
108 |             x_offset = (target_w - new_w) // 2
109 |             y_offset = (target_h - new_h) // 2
110 |             black_bg[:, :, y_offset:y_offset + new_h, x_offset:x_offset + new_w] = scaled
111 |             result = black_bg
112 |             
113 |         elif auto_resize == "stretch":
114 |             result = comfy.utils.common_upscale(image, target_w, target_h, "bicubic", "disabled")
115 |             
116 |         else:
117 |             scale = max(target_w / orig_w, target_h / orig_h)
118 |             new_w = int(orig_w * scale)
119 |             new_h = int(orig_h * scale)
120 |             scaled = comfy.utils.common_upscale(image, new_w, new_h, "bicubic", "disabled")
121 |             x_offset = (new_w - target_w) // 2
122 |             y_offset = (new_h - target_h) // 2
123 |             result = scaled[:, :, y_offset:y_offset + target_h, x_offset:x_offset + target_w]
124 |         
125 |         # 最终尺寸确保是8的倍数且≥32
126 |         final_w = max(32, (result.shape[3] // 8) * 8)
127 |         final_h = max(32, (result.shape[2] // 8) * 8)
128 |         
129 |         if final_w != result.shape[3] or final_h != result.shape[2]:
130 |             x_offset = (result.shape[3] - final_w) // 2
131 |             y_offset = (result.shape[2] - final_h) // 2
132 |             result = result[:, :, y_offset:y_offset + final_h, x_offset:x_offset + final_w]
133 |         
134 |         return result
135 | 
136 |     def QWENencode(self, prompt="", image1=None, image2=None, image3=None, vae=None, clip=None, vl_size=384, 
137 |                    latent_image=None, latent_mask=None, system_prompt="", auto_resize="crop"):
138 |         
139 |         if latent_image is None:
140 |             raise ValueError("latent_image Must be input to determine the size of the generated image；latent_image 必须输入以确定生成图像的尺寸")
141 |         
142 |         image1 = self._process_image_channels(image1)
143 |         image2 = self._process_image_channels(image2)
144 |         image3 = self._process_image_channels(image3)
145 |         orig_images = [image1, image2, image3]
146 |         images_vl = []
147 |         llama_template = self.get_system_prompt(system_prompt)
148 |         image_prompt = ""
149 | 
150 |         for i, image in enumerate(orig_images):
151 |             if image is not None:
152 |                 samples = image.movedim(-1, 1)
153 |                 current_total = samples.shape[3] * samples.shape[2]
154 |                 scale_by = math.sqrt(vl_size * vl_size / current_total) if current_total > 0 else 1.0
155 |                 width = max(64, round(samples.shape[3] * scale_by))
156 |                 height = max(64, round(samples.shape[2] * scale_by))
157 |                 s = comfy.utils.common_upscale(samples, width, height, "area", "disabled")
158 |                 images_vl.append(s.movedim(1, -1))
159 |                 image_prompt += f"Picture {i + 1}: <|vision_start|><|image_pad|><|vision_end|>"
160 | 
161 |         if latent_image is not None:
162 |             latent_image = self._process_image_channels(latent_image)
163 |             getsamples = latent_image.movedim(-1, 1)
164 |             target_h, target_w = getsamples.shape[2], getsamples.shape[3]
165 |             
166 |             for i in range(3):
167 |                 if orig_images[i] is not None:
168 |                     img_bchw = orig_images[i].movedim(-1, 1)
169 |                     resized_img_bchw = self._auto_resize(img_bchw, target_h, target_w, auto_resize)
170 |                     orig_images[i] = resized_img_bchw.movedim(1, -1)
171 | 
172 |         ref_latents = []
173 |         for i, image in enumerate(orig_images):
174 |             if image is not None and vae is not None:
175 |                 samples = image.movedim(-1, 1)
176 |                 # 强制尺寸≥32，避免VAE卷积报错
177 |                 orig_sample_h = max(samples.shape[2], 32)
178 |                 orig_sample_w = max(samples.shape[3], 32)
179 |                 if samples.shape[2] != orig_sample_h or samples.shape[3] != orig_sample_w:
180 |                     samples = comfy.utils.common_upscale(samples, orig_sample_w, orig_sample_h, "bicubic", "disabled")
181 |                 # 计算8的倍数尺寸，仍强制≥32
182 |                 width = (orig_sample_w // 8) * 8
183 |                 height = (orig_sample_h // 8) * 8
184 |                 width = max(width, 32)
185 |                 height = max(height, 32)
186 |                 scaled_img = comfy.utils.common_upscale(samples, width, height, "bicubic", "disabled")
187 |                 ref_latents.append(vae.encode(scaled_img.movedim(1, -1)[:, :, :, :3]))
188 | 
189 |         tokens = clip.tokenize(image_prompt + prompt, images=images_vl, llama_template=llama_template)
190 |         conditioning = clip.encode_from_tokens_scheduled(tokens)
191 |         if len(ref_latents) > 0:
192 |             conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": ref_latents}, append=True)
193 |         positive = conditioning
194 |         negative = self.zero_out(positive)
195 | 
196 |         latent = {"samples": torch.zeros(1, 4, 64, 64)}
197 |         if latent_image is not None:
198 |             positive, negative, latent = self.addConditioning(positive, negative, latent_image, vae, mask=latent_mask if latent_mask is not None else None)
199 | 
200 |         return (positive, negative, latent)
201 | 
202 |     def addConditioning(self, positive, negative, pixels, vae, mask=None):
203 |         pixels = self._process_image_channels(pixels)
204 |         x = (pixels.shape[1] // 8) * 8
205 |         y = (pixels.shape[2] // 8) * 8
206 |         orig_pixels = pixels
207 |         pixels = orig_pixels.clone()
208 | 
209 |         if mask is not None:
210 |             mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear")
211 |             if pixels.shape[1] != x or pixels.shape[2] != y:
212 |                 x_offset = (pixels.shape[1] % 8) // 2
213 |                 y_offset = (pixels.shape[2] % 8) // 2
214 |                 pixels = pixels[:, x_offset:x + x_offset, y_offset:y + y_offset, :]
215 |                 mask = mask[:, :, x_offset:x + x_offset, y_offset:y + y_offset]
216 |             m = (1.0 - mask.round()).squeeze(1)
217 |             for i in range(3):
218 |                 pixels[:, :, :, i] = pixels[:, :, :, i] * m + 0.5 * (1 - m)
219 |             concat_latent = vae.encode(pixels)
220 |             out_latent = {"samples": vae.encode(orig_pixels), "noise_mask": mask}
221 |         else:
222 |             if pixels.shape[1] != x or pixels.shape[2] != y:
223 |                 x_offset = (pixels.shape[1] % 8) // 2
224 |                 y_offset = (pixels.shape[2] % 8) // 2
225 |                 pixels = pixels[:, x_offset:x + x_offset, y_offset:y + y_offset, :]
226 |             concat_latent = vae.encode(pixels)
227 |             out_latent = {"samples": concat_latent}
228 | 
229 |         out = []
230 |         for conditioning in [positive, negative]:
231 |             c = node_helpers.conditioning_set_values(conditioning, {"concat_latent_image": concat_latent})
232 |             if mask is not None:
233 |                 c = node_helpers.conditioning_set_values(c, {"concat_mask": mask})
234 |             out.append(c)
235 |         return (out[0], out[1], out_latent)
236 | 
237 |     def zero_out(self, conditioning):
238 |         c = []
239 |         for t in conditioning:
240 |             d = t[1].copy()
241 |             pooled_output = d.get("pooled_output", None)
242 |             if pooled_output is not None:
243 |                 d["pooled_output"] = torch.zeros_like(pooled_output)
244 |             conditioning_lyrics = d.get("conditioning_lyrics", None)
245 |             if conditioning_lyrics is not None:
246 |                 d["conditioning_lyrics"] = torch.zeros_like(conditioning_lyrics)
247 |             n = [torch.zeros_like(t[0]), d]
248 |             c.append(n)
249 |         return c
250 | 
251 |     def get_system_prompt(self, instruction):
252 |         template_prefix = "<|im_start|>system\n"
253 |         template_suffix = "<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"
254 |         if instruction == "":
255 |             instruction_content = "Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate."
256 |         else:
257 |             if template_prefix in instruction:
258 |                 instruction = instruction.split(template_prefix)[1]
259 |             if template_suffix in instruction:
260 |                 instruction = instruction.split(template_suffix)[0]
261 |             if "{}" in instruction:
262 |                 instruction = instruction.replace("{}", "")
263 |             instruction_content = instruction
264 |         return template_prefix + instruction_content + template_suffix
265 | 
266 | NODE_CLASS_MAPPINGS = {
267 |     "Easy_QwenEdit2509": Easy_QwenEdit2509,
268 | }
269 | 
270 | NODE_DISPLAY_NAME_MAPPINGS = {
271 |     "Easy_QwenEdit2509": "Easy_QwenEdit2509",
272 | }
273 | 
274 | 
275 | 
276 | 
277 | 
278 | 
279 | 


--------------------------------------------------------------------------------