├── workflows
├── 测试图1.png
├── 测试图2.png
└── workflow.png
├── LICENSE.txt
├── README.md
└── __init__.py
/workflows/测试图1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cardenluo/easy_qwenEdit_2509/HEAD/workflows/测试图1.png
--------------------------------------------------------------------------------
/workflows/测试图2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cardenluo/easy_qwenEdit_2509/HEAD/workflows/测试图2.png
--------------------------------------------------------------------------------
/workflows/workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cardenluo/easy_qwenEdit_2509/HEAD/workflows/workflow.png
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 pythongosssss
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | ## 插件介绍
3 |
4 | 修改了官方的尺寸限制,同时做了尺寸一致性处理,可以最大程度减少偏移问题。如果尺寸前处理规范,可以很容易实现零偏移,
5 |
6 | https://github.com/cardenluo/ComfyUI-Apt_Preset 插件的"总控_QwenEditplus堆"在B站做很多无偏移的案例, 此为相同原理的非管线版本
7 |
8 |
9 |
10 |
11 | 演示:附件有工作流
12 |
13 |
14 |
15 |
16 | 更新:3种可选自动统一尺寸的方式
17 |
18 | auto resize 缩放模式(crop=中心裁剪,pad=中心黑色填充,stretch=强制拉伸)
19 |
20 |
21 |
22 |
23 |
24 |
25 | **可选参数:**
26 | - `image1`: 第一张参考图像
27 | - `image2`: 第二张参考图像
28 | - `image3`: 第三张参考图像
29 | - `vl_size`: 视觉尺寸,会影响细节(默认:384,范围:64-2048,步长:64)
30 | - `prompt`: 文本提示(多行支持)
31 | - `latent_image`: 生成图尺寸基准图(必填)
32 | - `latent_mask`: 生成图遮罩(可选)
33 |
34 | ### 输出参数
35 |
36 | - `positive`: 正条件
37 | - `zero_negative`: 零负条件
38 | - `latent`: 潜变量
39 |
40 |
41 | ## 版本历史
42 |
43 | - 2509: 初始版本
44 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | import node_helpers
3 | import comfy.utils
4 | import math
5 | import torch
6 | import numpy as np
7 | from PIL import Image
8 | import json
9 | import os
10 | import copy
11 | import folder_paths
12 | import hashlib
13 |
14 |
15 |
16 |
17 | class Easy_QwenEdit2509:
18 | @classmethod
19 | def INPUT_TYPES(s):
20 | return {
21 | "required": {
22 | "clip": ("CLIP",),
23 | "vae": ("VAE",),
24 | },
25 | "optional": {
26 | "image1": ("IMAGE", ),
27 | "image2": ("IMAGE", ),
28 | "image3": ("IMAGE", ),
29 | "auto_resize": (["crop", "pad", "stretch"], {"default": "crop"}),
30 | "vl_size": ("INT", {"default": 384, "min": 64, "max": 2048, "step": 64}),
31 | "prompt": ("STRING", {"multiline": True, "default": ""}),
32 | "latent_image": ("IMAGE", ),
33 | "latent_mask": ("MASK", ),
34 |
35 | "system_prompt": ("STRING", {"multiline": False, "default": "Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate."}),
36 | }
37 | }
38 |
39 | RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT",)
40 | RETURN_NAMES = ("positive", "zero_negative", "latent",)
41 | FUNCTION = "QWENencode"
42 | CATEGORY = "conditioning"
43 | DESCRIPTION = """
44 | vl_size:视觉尺寸,会影响细节
45 | latent_image: 生成图尺寸。Generate the size of the figure.
46 | latent_mask: 生成图遮罩
47 | system_prompt:系统提示词,指导图像特征描述与修改逻辑(默认提供基础配置)
48 | auto_resize:尺寸适配模式(crop-中心裁剪/pad-黑色填充/stretch-强制拉伸)"""
49 |
50 | def _process_image_channels(self, image):
51 | if image is None:
52 | return None
53 | if len(image.shape) == 4:
54 | b, h, w, c = image.shape
55 | if c == 4:
56 | rgb = image[..., :3]
57 | alpha = image[..., 3:4]
58 | black_bg = torch.zeros_like(rgb)
59 | image = rgb * alpha + black_bg * (1 - alpha)
60 | image = image[..., :3]
61 | elif c != 3:
62 | image = image[..., :3]
63 | elif len(image.shape) == 3:
64 | h, w, c = image.shape
65 | if c == 4:
66 | rgb = image[..., :3]
67 | alpha = image[..., 3:4]
68 | black_bg = torch.zeros_like(rgb)
69 | image = rgb * alpha + black_bg * (1 - alpha)
70 | image = image[..., :3]
71 | elif c != 3:
72 | image = image[..., :3]
73 | image = image.clamp(0.0, 1.0)
74 | return image
75 |
76 | def _auto_resize(self, image: torch.Tensor, target_h: int, target_w: int, auto_resize: str) -> torch.Tensor:
77 | batch, ch, orig_h, orig_w = image.shape
78 |
79 | # 强制最小尺寸≥32(适配VAE 3×3卷积核)
80 | target_h = max(target_h, 32)
81 | target_w = max(target_w, 32)
82 | orig_h = max(orig_h, 32)
83 | orig_w = max(orig_w, 32)
84 |
85 | if auto_resize == "crop":
86 | scale = max(target_w / orig_w, target_h / orig_h)
87 | new_w = int(orig_w * scale)
88 | new_h = int(orig_h * scale)
89 | # 强制新尺寸≥目标尺寸,避免裁剪后不足
90 | new_w = max(new_w, target_w)
91 | new_h = max(new_h, target_h)
92 | scaled = comfy.utils.common_upscale(image, new_w, new_h, "bicubic", "disabled")
93 | x_offset = (new_w - target_w) // 2
94 | y_offset = (new_h - target_h) // 2
95 | # 裁剪后强制宽高≥32,避免过小
96 | crop_h = min(target_h, new_h - y_offset)
97 | crop_w = min(target_w, new_w - x_offset)
98 | crop_h = max(crop_h, 32)
99 | crop_w = max(crop_w, 32)
100 | result = scaled[:, :, y_offset:y_offset + crop_h, x_offset:x_offset + crop_w]
101 |
102 | elif auto_resize == "pad":
103 | scale = min(target_w / orig_w, target_h / orig_h)
104 | new_w = int(orig_w * scale)
105 | new_h = int(orig_h * scale)
106 | scaled = comfy.utils.common_upscale(image, new_w, new_h, "bicubic", "disabled")
107 | black_bg = torch.zeros((batch, ch, target_h, target_w), dtype=image.dtype, device=image.device)
108 | x_offset = (target_w - new_w) // 2
109 | y_offset = (target_h - new_h) // 2
110 | black_bg[:, :, y_offset:y_offset + new_h, x_offset:x_offset + new_w] = scaled
111 | result = black_bg
112 |
113 | elif auto_resize == "stretch":
114 | result = comfy.utils.common_upscale(image, target_w, target_h, "bicubic", "disabled")
115 |
116 | else:
117 | scale = max(target_w / orig_w, target_h / orig_h)
118 | new_w = int(orig_w * scale)
119 | new_h = int(orig_h * scale)
120 | scaled = comfy.utils.common_upscale(image, new_w, new_h, "bicubic", "disabled")
121 | x_offset = (new_w - target_w) // 2
122 | y_offset = (new_h - target_h) // 2
123 | result = scaled[:, :, y_offset:y_offset + target_h, x_offset:x_offset + target_w]
124 |
125 | # 最终尺寸确保是8的倍数且≥32
126 | final_w = max(32, (result.shape[3] // 8) * 8)
127 | final_h = max(32, (result.shape[2] // 8) * 8)
128 |
129 | if final_w != result.shape[3] or final_h != result.shape[2]:
130 | x_offset = (result.shape[3] - final_w) // 2
131 | y_offset = (result.shape[2] - final_h) // 2
132 | result = result[:, :, y_offset:y_offset + final_h, x_offset:x_offset + final_w]
133 |
134 | return result
135 |
136 | def QWENencode(self, prompt="", image1=None, image2=None, image3=None, vae=None, clip=None, vl_size=384,
137 | latent_image=None, latent_mask=None, system_prompt="", auto_resize="crop"):
138 |
139 | if latent_image is None:
140 | raise ValueError("latent_image Must be input to determine the size of the generated image;latent_image 必须输入以确定生成图像的尺寸")
141 |
142 | image1 = self._process_image_channels(image1)
143 | image2 = self._process_image_channels(image2)
144 | image3 = self._process_image_channels(image3)
145 | orig_images = [image1, image2, image3]
146 | images_vl = []
147 | llama_template = self.get_system_prompt(system_prompt)
148 | image_prompt = ""
149 |
150 | for i, image in enumerate(orig_images):
151 | if image is not None:
152 | samples = image.movedim(-1, 1)
153 | current_total = samples.shape[3] * samples.shape[2]
154 | scale_by = math.sqrt(vl_size * vl_size / current_total) if current_total > 0 else 1.0
155 | width = max(64, round(samples.shape[3] * scale_by))
156 | height = max(64, round(samples.shape[2] * scale_by))
157 | s = comfy.utils.common_upscale(samples, width, height, "area", "disabled")
158 | images_vl.append(s.movedim(1, -1))
159 | image_prompt += f"Picture {i + 1}: <|vision_start|><|image_pad|><|vision_end|>"
160 |
161 | if latent_image is not None:
162 | latent_image = self._process_image_channels(latent_image)
163 | getsamples = latent_image.movedim(-1, 1)
164 | target_h, target_w = getsamples.shape[2], getsamples.shape[3]
165 |
166 | for i in range(3):
167 | if orig_images[i] is not None:
168 | img_bchw = orig_images[i].movedim(-1, 1)
169 | resized_img_bchw = self._auto_resize(img_bchw, target_h, target_w, auto_resize)
170 | orig_images[i] = resized_img_bchw.movedim(1, -1)
171 |
172 | ref_latents = []
173 | for i, image in enumerate(orig_images):
174 | if image is not None and vae is not None:
175 | samples = image.movedim(-1, 1)
176 | # 强制尺寸≥32,避免VAE卷积报错
177 | orig_sample_h = max(samples.shape[2], 32)
178 | orig_sample_w = max(samples.shape[3], 32)
179 | if samples.shape[2] != orig_sample_h or samples.shape[3] != orig_sample_w:
180 | samples = comfy.utils.common_upscale(samples, orig_sample_w, orig_sample_h, "bicubic", "disabled")
181 | # 计算8的倍数尺寸,仍强制≥32
182 | width = (orig_sample_w // 8) * 8
183 | height = (orig_sample_h // 8) * 8
184 | width = max(width, 32)
185 | height = max(height, 32)
186 | scaled_img = comfy.utils.common_upscale(samples, width, height, "bicubic", "disabled")
187 | ref_latents.append(vae.encode(scaled_img.movedim(1, -1)[:, :, :, :3]))
188 |
189 | tokens = clip.tokenize(image_prompt + prompt, images=images_vl, llama_template=llama_template)
190 | conditioning = clip.encode_from_tokens_scheduled(tokens)
191 | if len(ref_latents) > 0:
192 | conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": ref_latents}, append=True)
193 | positive = conditioning
194 | negative = self.zero_out(positive)
195 |
196 | latent = {"samples": torch.zeros(1, 4, 64, 64)}
197 | if latent_image is not None:
198 | positive, negative, latent = self.addConditioning(positive, negative, latent_image, vae, mask=latent_mask if latent_mask is not None else None)
199 |
200 | return (positive, negative, latent)
201 |
202 | def addConditioning(self, positive, negative, pixels, vae, mask=None):
203 | pixels = self._process_image_channels(pixels)
204 | x = (pixels.shape[1] // 8) * 8
205 | y = (pixels.shape[2] // 8) * 8
206 | orig_pixels = pixels
207 | pixels = orig_pixels.clone()
208 |
209 | if mask is not None:
210 | mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear")
211 | if pixels.shape[1] != x or pixels.shape[2] != y:
212 | x_offset = (pixels.shape[1] % 8) // 2
213 | y_offset = (pixels.shape[2] % 8) // 2
214 | pixels = pixels[:, x_offset:x + x_offset, y_offset:y + y_offset, :]
215 | mask = mask[:, :, x_offset:x + x_offset, y_offset:y + y_offset]
216 | m = (1.0 - mask.round()).squeeze(1)
217 | for i in range(3):
218 | pixels[:, :, :, i] = pixels[:, :, :, i] * m + 0.5 * (1 - m)
219 | concat_latent = vae.encode(pixels)
220 | out_latent = {"samples": vae.encode(orig_pixels), "noise_mask": mask}
221 | else:
222 | if pixels.shape[1] != x or pixels.shape[2] != y:
223 | x_offset = (pixels.shape[1] % 8) // 2
224 | y_offset = (pixels.shape[2] % 8) // 2
225 | pixels = pixels[:, x_offset:x + x_offset, y_offset:y + y_offset, :]
226 | concat_latent = vae.encode(pixels)
227 | out_latent = {"samples": concat_latent}
228 |
229 | out = []
230 | for conditioning in [positive, negative]:
231 | c = node_helpers.conditioning_set_values(conditioning, {"concat_latent_image": concat_latent})
232 | if mask is not None:
233 | c = node_helpers.conditioning_set_values(c, {"concat_mask": mask})
234 | out.append(c)
235 | return (out[0], out[1], out_latent)
236 |
237 | def zero_out(self, conditioning):
238 | c = []
239 | for t in conditioning:
240 | d = t[1].copy()
241 | pooled_output = d.get("pooled_output", None)
242 | if pooled_output is not None:
243 | d["pooled_output"] = torch.zeros_like(pooled_output)
244 | conditioning_lyrics = d.get("conditioning_lyrics", None)
245 | if conditioning_lyrics is not None:
246 | d["conditioning_lyrics"] = torch.zeros_like(conditioning_lyrics)
247 | n = [torch.zeros_like(t[0]), d]
248 | c.append(n)
249 | return c
250 |
251 | def get_system_prompt(self, instruction):
252 | template_prefix = "<|im_start|>system\n"
253 | template_suffix = "<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"
254 | if instruction == "":
255 | instruction_content = "Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate."
256 | else:
257 | if template_prefix in instruction:
258 | instruction = instruction.split(template_prefix)[1]
259 | if template_suffix in instruction:
260 | instruction = instruction.split(template_suffix)[0]
261 | if "{}" in instruction:
262 | instruction = instruction.replace("{}", "")
263 | instruction_content = instruction
264 | return template_prefix + instruction_content + template_suffix
265 |
266 | NODE_CLASS_MAPPINGS = {
267 | "Easy_QwenEdit2509": Easy_QwenEdit2509,
268 | }
269 |
270 | NODE_DISPLAY_NAME_MAPPINGS = {
271 | "Easy_QwenEdit2509": "Easy_QwenEdit2509",
272 | }
273 |
274 |
275 |
276 |
277 |
278 |
279 |
--------------------------------------------------------------------------------