├── workflows ├── 测试图1.png ├── 测试图2.png └── workflow.png ├── LICENSE.txt ├── README.md └── __init__.py /workflows/测试图1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardenluo/easy_qwenEdit_2509/HEAD/workflows/测试图1.png -------------------------------------------------------------------------------- /workflows/测试图2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardenluo/easy_qwenEdit_2509/HEAD/workflows/测试图2.png -------------------------------------------------------------------------------- /workflows/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cardenluo/easy_qwenEdit_2509/HEAD/workflows/workflow.png -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 pythongosssss 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ## 插件介绍 3 | 4 | 修改了官方的尺寸限制,同时做了尺寸一致性处理,可以最大程度减少偏移问题。如果尺寸前处理规范,可以很容易实现零偏移, 5 | 6 | https://github.com/cardenluo/ComfyUI-Apt_Preset 插件的"总控_QwenEditplus堆"在B站做很多无偏移的案例, 此为相同原理的非管线版本 7 | 8 | 9 | image 10 | 11 | 演示:附件有工作流 12 | 13 | image 14 | 15 | 16 | 更新:3种可选自动统一尺寸的方式 17 | 18 | auto resize 缩放模式(crop=中心裁剪,pad=中心黑色填充,stretch=强制拉伸) 19 | 20 | image 21 | 22 | image 23 | 24 | 25 | **可选参数:** 26 | - `image1`: 第一张参考图像 27 | - `image2`: 第二张参考图像 28 | - `image3`: 第三张参考图像 29 | - `vl_size`: 视觉尺寸,会影响细节(默认:384,范围:64-2048,步长:64) 30 | - `prompt`: 文本提示(多行支持) 31 | - `latent_image`: 生成图尺寸基准图(必填) 32 | - `latent_mask`: 生成图遮罩(可选) 33 | 34 | ### 输出参数 35 | 36 | - `positive`: 正条件 37 | - `zero_negative`: 零负条件 38 | - `latent`: 潜变量 39 | 40 | 41 | ## 版本历史 42 | 43 | - 2509: 初始版本 44 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | 2 | import node_helpers 3 | import comfy.utils 4 | import math 5 | import torch 6 | import numpy as np 7 | from PIL import Image 8 | import json 9 | import os 10 | import copy 11 | import folder_paths 12 | import hashlib 13 | 14 | 15 | 16 | 17 | class Easy_QwenEdit2509: 18 | @classmethod 19 | def INPUT_TYPES(s): 20 | return { 21 | "required": { 22 | "clip": ("CLIP",), 23 | "vae": ("VAE",), 24 | }, 25 | "optional": { 26 | "image1": ("IMAGE", ), 27 | "image2": ("IMAGE", ), 28 | "image3": ("IMAGE", ), 29 | "auto_resize": (["crop", "pad", "stretch"], {"default": "crop"}), 30 | "vl_size": ("INT", {"default": 384, "min": 64, "max": 2048, "step": 64}), 31 | "prompt": ("STRING", {"multiline": True, "default": ""}), 32 | "latent_image": ("IMAGE", ), 33 | "latent_mask": ("MASK", ), 34 | 35 | "system_prompt": ("STRING", {"multiline": False, "default": "Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate."}), 36 | } 37 | } 38 | 39 | RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT",) 40 | RETURN_NAMES = ("positive", "zero_negative", "latent",) 41 | FUNCTION = "QWENencode" 42 | CATEGORY = "conditioning" 43 | DESCRIPTION = """ 44 | vl_size:视觉尺寸,会影响细节 45 | latent_image: 生成图尺寸。Generate the size of the figure. 46 | latent_mask: 生成图遮罩 47 | system_prompt:系统提示词,指导图像特征描述与修改逻辑(默认提供基础配置) 48 | auto_resize:尺寸适配模式(crop-中心裁剪/pad-黑色填充/stretch-强制拉伸)""" 49 | 50 | def _process_image_channels(self, image): 51 | if image is None: 52 | return None 53 | if len(image.shape) == 4: 54 | b, h, w, c = image.shape 55 | if c == 4: 56 | rgb = image[..., :3] 57 | alpha = image[..., 3:4] 58 | black_bg = torch.zeros_like(rgb) 59 | image = rgb * alpha + black_bg * (1 - alpha) 60 | image = image[..., :3] 61 | elif c != 3: 62 | image = image[..., :3] 63 | elif len(image.shape) == 3: 64 | h, w, c = image.shape 65 | if c == 4: 66 | rgb = image[..., :3] 67 | alpha = image[..., 3:4] 68 | black_bg = torch.zeros_like(rgb) 69 | image = rgb * alpha + black_bg * (1 - alpha) 70 | image = image[..., :3] 71 | elif c != 3: 72 | image = image[..., :3] 73 | image = image.clamp(0.0, 1.0) 74 | return image 75 | 76 | def _auto_resize(self, image: torch.Tensor, target_h: int, target_w: int, auto_resize: str) -> torch.Tensor: 77 | batch, ch, orig_h, orig_w = image.shape 78 | 79 | # 强制最小尺寸≥32(适配VAE 3×3卷积核) 80 | target_h = max(target_h, 32) 81 | target_w = max(target_w, 32) 82 | orig_h = max(orig_h, 32) 83 | orig_w = max(orig_w, 32) 84 | 85 | if auto_resize == "crop": 86 | scale = max(target_w / orig_w, target_h / orig_h) 87 | new_w = int(orig_w * scale) 88 | new_h = int(orig_h * scale) 89 | # 强制新尺寸≥目标尺寸,避免裁剪后不足 90 | new_w = max(new_w, target_w) 91 | new_h = max(new_h, target_h) 92 | scaled = comfy.utils.common_upscale(image, new_w, new_h, "bicubic", "disabled") 93 | x_offset = (new_w - target_w) // 2 94 | y_offset = (new_h - target_h) // 2 95 | # 裁剪后强制宽高≥32,避免过小 96 | crop_h = min(target_h, new_h - y_offset) 97 | crop_w = min(target_w, new_w - x_offset) 98 | crop_h = max(crop_h, 32) 99 | crop_w = max(crop_w, 32) 100 | result = scaled[:, :, y_offset:y_offset + crop_h, x_offset:x_offset + crop_w] 101 | 102 | elif auto_resize == "pad": 103 | scale = min(target_w / orig_w, target_h / orig_h) 104 | new_w = int(orig_w * scale) 105 | new_h = int(orig_h * scale) 106 | scaled = comfy.utils.common_upscale(image, new_w, new_h, "bicubic", "disabled") 107 | black_bg = torch.zeros((batch, ch, target_h, target_w), dtype=image.dtype, device=image.device) 108 | x_offset = (target_w - new_w) // 2 109 | y_offset = (target_h - new_h) // 2 110 | black_bg[:, :, y_offset:y_offset + new_h, x_offset:x_offset + new_w] = scaled 111 | result = black_bg 112 | 113 | elif auto_resize == "stretch": 114 | result = comfy.utils.common_upscale(image, target_w, target_h, "bicubic", "disabled") 115 | 116 | else: 117 | scale = max(target_w / orig_w, target_h / orig_h) 118 | new_w = int(orig_w * scale) 119 | new_h = int(orig_h * scale) 120 | scaled = comfy.utils.common_upscale(image, new_w, new_h, "bicubic", "disabled") 121 | x_offset = (new_w - target_w) // 2 122 | y_offset = (new_h - target_h) // 2 123 | result = scaled[:, :, y_offset:y_offset + target_h, x_offset:x_offset + target_w] 124 | 125 | # 最终尺寸确保是8的倍数且≥32 126 | final_w = max(32, (result.shape[3] // 8) * 8) 127 | final_h = max(32, (result.shape[2] // 8) * 8) 128 | 129 | if final_w != result.shape[3] or final_h != result.shape[2]: 130 | x_offset = (result.shape[3] - final_w) // 2 131 | y_offset = (result.shape[2] - final_h) // 2 132 | result = result[:, :, y_offset:y_offset + final_h, x_offset:x_offset + final_w] 133 | 134 | return result 135 | 136 | def QWENencode(self, prompt="", image1=None, image2=None, image3=None, vae=None, clip=None, vl_size=384, 137 | latent_image=None, latent_mask=None, system_prompt="", auto_resize="crop"): 138 | 139 | if latent_image is None: 140 | raise ValueError("latent_image Must be input to determine the size of the generated image;latent_image 必须输入以确定生成图像的尺寸") 141 | 142 | image1 = self._process_image_channels(image1) 143 | image2 = self._process_image_channels(image2) 144 | image3 = self._process_image_channels(image3) 145 | orig_images = [image1, image2, image3] 146 | images_vl = [] 147 | llama_template = self.get_system_prompt(system_prompt) 148 | image_prompt = "" 149 | 150 | for i, image in enumerate(orig_images): 151 | if image is not None: 152 | samples = image.movedim(-1, 1) 153 | current_total = samples.shape[3] * samples.shape[2] 154 | scale_by = math.sqrt(vl_size * vl_size / current_total) if current_total > 0 else 1.0 155 | width = max(64, round(samples.shape[3] * scale_by)) 156 | height = max(64, round(samples.shape[2] * scale_by)) 157 | s = comfy.utils.common_upscale(samples, width, height, "area", "disabled") 158 | images_vl.append(s.movedim(1, -1)) 159 | image_prompt += f"Picture {i + 1}: <|vision_start|><|image_pad|><|vision_end|>" 160 | 161 | if latent_image is not None: 162 | latent_image = self._process_image_channels(latent_image) 163 | getsamples = latent_image.movedim(-1, 1) 164 | target_h, target_w = getsamples.shape[2], getsamples.shape[3] 165 | 166 | for i in range(3): 167 | if orig_images[i] is not None: 168 | img_bchw = orig_images[i].movedim(-1, 1) 169 | resized_img_bchw = self._auto_resize(img_bchw, target_h, target_w, auto_resize) 170 | orig_images[i] = resized_img_bchw.movedim(1, -1) 171 | 172 | ref_latents = [] 173 | for i, image in enumerate(orig_images): 174 | if image is not None and vae is not None: 175 | samples = image.movedim(-1, 1) 176 | # 强制尺寸≥32,避免VAE卷积报错 177 | orig_sample_h = max(samples.shape[2], 32) 178 | orig_sample_w = max(samples.shape[3], 32) 179 | if samples.shape[2] != orig_sample_h or samples.shape[3] != orig_sample_w: 180 | samples = comfy.utils.common_upscale(samples, orig_sample_w, orig_sample_h, "bicubic", "disabled") 181 | # 计算8的倍数尺寸,仍强制≥32 182 | width = (orig_sample_w // 8) * 8 183 | height = (orig_sample_h // 8) * 8 184 | width = max(width, 32) 185 | height = max(height, 32) 186 | scaled_img = comfy.utils.common_upscale(samples, width, height, "bicubic", "disabled") 187 | ref_latents.append(vae.encode(scaled_img.movedim(1, -1)[:, :, :, :3])) 188 | 189 | tokens = clip.tokenize(image_prompt + prompt, images=images_vl, llama_template=llama_template) 190 | conditioning = clip.encode_from_tokens_scheduled(tokens) 191 | if len(ref_latents) > 0: 192 | conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_latents": ref_latents}, append=True) 193 | positive = conditioning 194 | negative = self.zero_out(positive) 195 | 196 | latent = {"samples": torch.zeros(1, 4, 64, 64)} 197 | if latent_image is not None: 198 | positive, negative, latent = self.addConditioning(positive, negative, latent_image, vae, mask=latent_mask if latent_mask is not None else None) 199 | 200 | return (positive, negative, latent) 201 | 202 | def addConditioning(self, positive, negative, pixels, vae, mask=None): 203 | pixels = self._process_image_channels(pixels) 204 | x = (pixels.shape[1] // 8) * 8 205 | y = (pixels.shape[2] // 8) * 8 206 | orig_pixels = pixels 207 | pixels = orig_pixels.clone() 208 | 209 | if mask is not None: 210 | mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear") 211 | if pixels.shape[1] != x or pixels.shape[2] != y: 212 | x_offset = (pixels.shape[1] % 8) // 2 213 | y_offset = (pixels.shape[2] % 8) // 2 214 | pixels = pixels[:, x_offset:x + x_offset, y_offset:y + y_offset, :] 215 | mask = mask[:, :, x_offset:x + x_offset, y_offset:y + y_offset] 216 | m = (1.0 - mask.round()).squeeze(1) 217 | for i in range(3): 218 | pixels[:, :, :, i] = pixels[:, :, :, i] * m + 0.5 * (1 - m) 219 | concat_latent = vae.encode(pixels) 220 | out_latent = {"samples": vae.encode(orig_pixels), "noise_mask": mask} 221 | else: 222 | if pixels.shape[1] != x or pixels.shape[2] != y: 223 | x_offset = (pixels.shape[1] % 8) // 2 224 | y_offset = (pixels.shape[2] % 8) // 2 225 | pixels = pixels[:, x_offset:x + x_offset, y_offset:y + y_offset, :] 226 | concat_latent = vae.encode(pixels) 227 | out_latent = {"samples": concat_latent} 228 | 229 | out = [] 230 | for conditioning in [positive, negative]: 231 | c = node_helpers.conditioning_set_values(conditioning, {"concat_latent_image": concat_latent}) 232 | if mask is not None: 233 | c = node_helpers.conditioning_set_values(c, {"concat_mask": mask}) 234 | out.append(c) 235 | return (out[0], out[1], out_latent) 236 | 237 | def zero_out(self, conditioning): 238 | c = [] 239 | for t in conditioning: 240 | d = t[1].copy() 241 | pooled_output = d.get("pooled_output", None) 242 | if pooled_output is not None: 243 | d["pooled_output"] = torch.zeros_like(pooled_output) 244 | conditioning_lyrics = d.get("conditioning_lyrics", None) 245 | if conditioning_lyrics is not None: 246 | d["conditioning_lyrics"] = torch.zeros_like(conditioning_lyrics) 247 | n = [torch.zeros_like(t[0]), d] 248 | c.append(n) 249 | return c 250 | 251 | def get_system_prompt(self, instruction): 252 | template_prefix = "<|im_start|>system\n" 253 | template_suffix = "<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n" 254 | if instruction == "": 255 | instruction_content = "Describe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate." 256 | else: 257 | if template_prefix in instruction: 258 | instruction = instruction.split(template_prefix)[1] 259 | if template_suffix in instruction: 260 | instruction = instruction.split(template_suffix)[0] 261 | if "{}" in instruction: 262 | instruction = instruction.replace("{}", "") 263 | instruction_content = instruction 264 | return template_prefix + instruction_content + template_suffix 265 | 266 | NODE_CLASS_MAPPINGS = { 267 | "Easy_QwenEdit2509": Easy_QwenEdit2509, 268 | } 269 | 270 | NODE_DISPLAY_NAME_MAPPINGS = { 271 | "Easy_QwenEdit2509": "Easy_QwenEdit2509", 272 | } 273 | 274 | 275 | 276 | 277 | 278 | 279 | --------------------------------------------------------------------------------