├── .github └── workflows │ └── publish.yml ├── .gitignore ├── README.md ├── __init__.py ├── attn_handler.py ├── diffusers_magic_clothing ├── MagicClothingDiffusionPipeline.py ├── attention_processor.py ├── garment_diffusion.py └── utils.py ├── diffusers_warp_nodes.py ├── example.json ├── ipadapter.json ├── nodes.py ├── pyproject.toml └── utils.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to Comfy registry 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | paths: 9 | - "pyproject.toml" 10 | 11 | jobs: 12 | publish-node: 13 | name: Publish Custom Node to registry 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Check out code 17 | uses: actions/checkout@v4 18 | - name: Publish Custom Node 19 | uses: Comfy-Org/publish-node-action@main 20 | with: 21 | ## Add your own personal access token to your Github Repository secrets and reference it here. 22 | personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }} 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .DS_Store 3 | /*.log 4 | /conversion/ 5 | .vscode -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # comfyui-magic-clothing 2 | 3 | The comfyui supported version of the [Magic Clothing](https://github.com/ShineChen1024/MagicClothing) project, not the diffusers version, allows direct integration with modules such as ipadapter 4 | 5 | ## Installation 6 | 7 | * use `ComfyUI-Manager` or put this code into `custom_nodes` 8 | * Go to [huggingface](https://huggingface.co/ShineChen1024/MagicClothing) to download the models and move them to the `comfyui/models/unet` folder 9 | 10 | ## For samples, please refer to [here](./example.json) 11 | ## For ipadapter samples, please refer to [here](./ipadapter.json) 12 | 13 | 14 | # Note 15 | 16 | * Currently there are still problems with the low success rate of some of the adopters, which doesn't work well for dense patterns, (meanwhile the [sigma] parameter serves as a temporary solution to the input scaling of the clothing feature in comfyui) 17 | * 当前实现抽卡概率还不够,主要是对于第一次unet采样时model.model_sampling.calculate_input 处理问题。还在研究中,先释放一个版本 18 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .nodes import NODE_CLASS_MAPPINGS as CM_O, NODE_DISPLAY_NAME_MAPPINGS as NM_O 2 | from .diffusers_warp_nodes import NODE_CLASS_MAPPINGS as CM_D, NODE_DISPLAY_NAME_MAPPINGS as NM_D 3 | import torch 4 | from .utils import pt_hash,pt_first_line 5 | 6 | torch.Tensor.__hash_log__ = pt_hash 7 | torch.Tensor.__fl_log__ = pt_first_line 8 | 9 | NODE_CLASS_MAPPINGS = { 10 | **CM_O, 11 | **CM_D 12 | } 13 | NODE_DISPLAY_NAME_MAPPINGS = { 14 | **NM_O, 15 | **NM_D 16 | } 17 | __all__ = ['NODE_CLASS_MAPPINGS', 'NODE_DISPLAY_NAME_MAPPINGS'] -------------------------------------------------------------------------------- /attn_handler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from typing import Any 3 | from comfy import model_management 4 | from comfy.ldm.modules.attention import optimized_attention 5 | from .utils import clean_attn_stored_memory 6 | 7 | class SamplerCfgFunctionWrapper: 8 | 9 | def __call__(self, parameters) -> Any: 10 | cond = parameters["cond"] 11 | uncond = parameters["uncond"] 12 | input_x = parameters["input"] 13 | cond_scale = parameters["cond_scale"] 14 | model_options = parameters["model_options"] 15 | transformer_options = model_options["transformer_options"] 16 | if "attn_stored" in transformer_options: 17 | attn_stored = transformer_options["attn_stored"] 18 | feature_guidance_scale = attn_stored["feature_guidance_scale"] 19 | cond_or_uncond_out_cond = attn_stored["cond_or_uncond_out_cond"] 20 | cond_or_uncond_out_count = attn_stored["cond_or_uncond_out_count"] 21 | # clear memory 22 | clean_attn_stored_memory(attn_stored) 23 | if cond_or_uncond_out_cond is None: 24 | return uncond + (cond - uncond) * cond_scale 25 | else: 26 | cond = input_x - cond 27 | uncond = input_x - uncond 28 | cond_or_uncond_out_cond /= cond_or_uncond_out_count 29 | noise_pred = ( 30 | uncond 31 | + cond_scale * (cond - cond_or_uncond_out_cond) 32 | + feature_guidance_scale * 33 | (cond_or_uncond_out_cond - uncond) 34 | ) 35 | return input_x - noise_pred 36 | else: 37 | return uncond + (cond - uncond) * cond_scale 38 | 39 | 40 | class UnetFunctionWrapper: 41 | 42 | def _is_inject_batch_(self, model, input, inject_batch_count): 43 | free_memory = model_management.get_free_memory(input.device) 44 | input_shape = [input[0] + inject_batch_count] + list(input)[1:] 45 | return model.memory_required(input_shape) < free_memory 46 | 47 | def _reorganization_c_data_(self,c,key): 48 | if key in c: 49 | return self._chunk_data_(c[key]) 50 | return None,None 51 | 52 | def _chunk_data_(self,data): 53 | if data is None: 54 | return None,None 55 | return torch.chunk(data,data.shape[0]),[] 56 | 57 | def __call__(self, apply_model, parameters): 58 | input = parameters["input"] 59 | timestep = parameters["timestep"] 60 | c = parameters["c"] 61 | transformer_options = c["transformer_options"] 62 | if "attn_stored" in transformer_options: 63 | attn_stored = transformer_options["attn_stored"] 64 | enable_feature_guidance = attn_stored["enable_feature_guidance"] 65 | cond_or_uncond = parameters["cond_or_uncond"] 66 | cond_or_uncond_replenishment = [] 67 | cond_or_uncond_new = [] 68 | # 对传入参数进行调整,调整方式如下 69 | # A 对负向提示词,复制一份,这是为了计算出空数据的情况,插入的方式在前面 70 | # B 对正向忽略 71 | input_array = torch.chunk(input, input.shape[0]) 72 | timestep_array = torch.chunk(timestep, timestep.shape[0]) 73 | new_input_array = [] 74 | new_timestep = [] 75 | 76 | c_concat_data,c_concat_data_new = self._reorganization_c_data_(c,"c_concat") 77 | c_crossattn_data,c_crossattn_data_new = self._reorganization_c_data_(c,"c_crossattn") 78 | c_attn_stored_mult_data,_ = self._reorganization_c_data_(c,"c_attn_stored_mult") 79 | c_attn_stored_area_data = c["c_attn_stored_area"] if "c_attn_stored_area" in c else None 80 | c_attn_stored_control_data = c["c_attn_stored_control"] if "c_attn_stored_control" in c else None 81 | #移除因为注入增加的内容,后续已不再需要 82 | c["c_attn_stored_mult"] = None 83 | c["c_attn_stored_area"] = None 84 | c["c_attn_stored_control"] = None 85 | 86 | cond_or_uncond_extra_options = {} 87 | for i in range(len(input_array)): 88 | # 需注意,3月底comfyui更新,为了支持多conds实现,移除了cond本身的判定,这个值存的是index 89 | cond_flag = cond_or_uncond[i] 90 | new_input_array.append(input_array[i]) 91 | new_timestep.append(timestep_array[i]) 92 | if c_concat_data is not None: 93 | c_concat_data_new.append(c_concat_data[i]) 94 | if c_crossattn_data is not None: 95 | c_crossattn_data_new.append(c_crossattn_data[i]) 96 | 97 | cond_or_uncond_replenishment.append(1 if cond_flag == 1 else 0) 98 | cond_or_uncond_new.append(1 if cond_flag == 1 else 0) 99 | if enable_feature_guidance and cond_flag == 1: 100 | 101 | if c_attn_stored_mult_data is not None and c_attn_stored_area_data is not None: 102 | mult = c_attn_stored_mult_data[i] 103 | area = c_attn_stored_area_data[i] 104 | cond_or_uncond_extra_options[i+1] = { 105 | "mult": mult.squeeze(0), 106 | "area": area 107 | } 108 | # 注意,在启用特征引导的时候,需要增加一个负向空特征来处理,这个复制的负向特征是给后面计算空特征用的 109 | cond_or_uncond_replenishment.append(2) 110 | cond_or_uncond_new.append(1) 111 | new_input_array.append(input_array[i]) 112 | new_timestep.append(timestep_array[i]) 113 | if c_concat_data is not None: 114 | c_concat_data_new.append(c_concat_data[i]) 115 | if c_crossattn_data is not None: 116 | c_crossattn_data_new.append(c_crossattn_data[i]) 117 | input = torch.cat(new_input_array,) 118 | timestep = torch.cat(new_timestep,) 119 | if c_concat_data_new is not None: 120 | c["c_concat"] = torch.cat(c_concat_data_new,) 121 | if c_crossattn_data_new is not None: 122 | c["c_crossattn"] = torch.cat(c_crossattn_data_new,) 123 | if "out_cond_init" not in attn_stored: 124 | attn_stored["out_cond_init"] = torch.zeros_like(input_array[0]) 125 | if "out_count_init" not in attn_stored: 126 | attn_stored["out_count_init"] = torch.zeros_like(input_array[0] * 1e-37) 127 | if c_attn_stored_control_data is not None: 128 | c['control'] = c_attn_stored_control_data.get_control(input, timestep, c, len(cond_or_uncond_replenishment)) 129 | attn_stored["cond_or_uncond_replenishment"] = cond_or_uncond_replenishment 130 | attn_stored["cond_or_uncond_extra_options"] = cond_or_uncond_extra_options 131 | c["cond_or_uncond"] = cond_or_uncond_new 132 | c["transformer_options"]["cond_or_uncond"] = cond_or_uncond_new 133 | # 直接清理,节省内存 134 | del input_array 135 | del timestep_array 136 | del new_input_array 137 | del new_timestep 138 | del c_concat_data 139 | del c_concat_data_new 140 | del c_crossattn_data 141 | del c_crossattn_data_new 142 | del c_attn_stored_mult_data 143 | del c_attn_stored_area_data 144 | del cond_or_uncond_extra_options 145 | 146 | output = apply_model(input, timestep, **c) 147 | if "attn_stored" in transformer_options: 148 | attn_stored = transformer_options["attn_stored"] 149 | enable_feature_guidance = attn_stored["enable_feature_guidance"] 150 | 151 | cond_or_uncond_replenishment = attn_stored["cond_or_uncond_replenishment"] 152 | cond_or_uncond_extra_options = attn_stored["cond_or_uncond_extra_options"] 153 | pred_result = torch.chunk( 154 | output, len(cond_or_uncond_replenishment)) 155 | new_output = [] 156 | for i in range(len(cond_or_uncond_replenishment)): 157 | cond_flag = cond_or_uncond_replenishment[i] 158 | if cond_flag == 2: 159 | cond_or_uncond_extra_option = cond_or_uncond_extra_options[i] 160 | if "cond_or_uncond_out_cond" not in attn_stored: 161 | attn_stored["cond_or_uncond_out_cond"] = attn_stored["out_cond_init"] 162 | if "cond_or_uncond_out_count" not in attn_stored: 163 | attn_stored["cond_or_uncond_out_count"] = attn_stored["out_count_init"] 164 | mult = cond_or_uncond_extra_option["mult"] 165 | area = cond_or_uncond_extra_option["area"] 166 | if area is None: 167 | attn_stored["cond_or_uncond_out_cond"] += pred_result[i] * mult 168 | attn_stored["cond_or_uncond_out_count"] += mult 169 | else: 170 | out_c = attn_stored["cond_or_uncond_out_cond"] 171 | out_cts = attn_stored["cond_or_uncond_out_count"] 172 | dims = len(area) // 2 173 | for i in range(dims): 174 | out_c = out_c.narrow(i + 2, area[i + dims], area[i]) 175 | out_cts = out_cts.narrow(i + 2, area[i + dims], area[i]) 176 | out_c += pred_result[i] * mult 177 | out_cts += mult 178 | else: 179 | new_output.append(pred_result[i]) 180 | output = torch.cat(new_output) 181 | del new_output 182 | del pred_result 183 | return output 184 | 185 | 186 | class SaveAttnInputPatch: 187 | 188 | def __call__(self, q, k, v, extra_options): 189 | if "attn_stored" in extra_options: 190 | attn_stored = extra_options["attn_stored"] 191 | if attn_stored is None: 192 | return (q, k, v) 193 | attn_stored_data = attn_stored["data"] 194 | block_name = extra_options["block"][0] 195 | block_id = extra_options["block"][1] 196 | block_index = extra_options["block_index"] 197 | if block_name not in attn_stored_data: 198 | attn_stored_data[block_name] = {} 199 | if block_id not in attn_stored_data[block_name]: 200 | attn_stored_data[block_name][block_id] = {} 201 | attn_stored_data[block_name][block_id][block_index] = q 202 | return (q, k, v) 203 | 204 | 205 | def _check_(calc_sigmas,sigma): 206 | if calc_sigmas is None: 207 | return True 208 | for i in range(len(calc_sigmas)): 209 | if abs(calc_sigmas[i] - sigma.item()) < 0.000001: 210 | return True 211 | return False 212 | 213 | class InputPatch: 214 | 215 | def _calculate_input_(hideen_states, sigma): 216 | return hideen_states / (sigma ** 2 + 1) ** 0.5 217 | 218 | def __call__(self, q, k, v, extra_options): 219 | if "attn_stored" in extra_options: 220 | attn_stored = extra_options["attn_stored"] 221 | if attn_stored is None: 222 | return (q, k, v) 223 | attn_stored_data = attn_stored["data"] 224 | cond_or_uncond_replenishment = attn_stored["cond_or_uncond_replenishment"] 225 | block_name = extra_options["block"][0] 226 | block_id = extra_options["block"][1] 227 | block_index = extra_options["block_index"] 228 | sigma = extra_options["sigmas"] 229 | calc_sigmas = attn_stored.get("calc_sigmas",None) 230 | if _check_(calc_sigmas,sigma) and block_name in attn_stored_data and block_id in attn_stored_data[block_name] and block_index in attn_stored_data[block_name][block_id]: 231 | FLAG_OUT_CHANNEL = 2 232 | qEQk = q.shape[FLAG_OUT_CHANNEL] == k.shape[FLAG_OUT_CHANNEL] 233 | qEQv = q.shape[FLAG_OUT_CHANNEL] == v.shape[FLAG_OUT_CHANNEL] 234 | feature_hidden_states = attn_stored_data[block_name][block_id][block_index] 235 | # feature_hidden_states = self._calculate_input_(feature_hidden_states, sigma) 236 | if q.shape[1] != feature_hidden_states.shape[1]: 237 | clean_attn_stored_memory(attn_stored) 238 | raise ValueError( 239 | "Your featured image must be the same width and height as the image you want to generate!") 240 | feature_hidden_states = feature_hidden_states.to(q.dtype) 241 | combo_feature_hidden_states = [] 242 | for i in range(len(cond_or_uncond_replenishment)): 243 | cond_flag = cond_or_uncond_replenishment[i] 244 | if cond_flag == 0 or cond_flag == 2: 245 | combo_feature_hidden_states.append(feature_hidden_states) 246 | else: 247 | empty_feature = torch.zeros_like(feature_hidden_states) 248 | combo_feature_hidden_states.append(empty_feature) 249 | feature_hidden_states = torch.cat(combo_feature_hidden_states) 250 | q = torch.cat([q, feature_hidden_states], dim=1) 251 | return (q, q if qEQk else k, q if qEQv else v) 252 | return (q, k, v) 253 | 254 | 255 | class ReplacePatch: 256 | 257 | def __call__(self, q, k, v, extra_options): 258 | if extra_options is None: 259 | extra_options = {} 260 | n_heads = extra_options["n_heads"] 261 | q = optimized_attention(q, k, v, n_heads if n_heads is not None else 8) 262 | if "attn_stored" in extra_options: 263 | attn_stored = extra_options["attn_stored"] 264 | if attn_stored is None: 265 | return q 266 | sigma = extra_options["sigmas"] 267 | calc_sigmas = attn_stored.get("calc_sigmas",None) 268 | if _check_(calc_sigmas,sigma): 269 | q, _ = torch.chunk(q, 2, dim=1) # 抹除额外内容 270 | return q 271 | -------------------------------------------------------------------------------- /diffusers_magic_clothing/MagicClothingDiffusionPipeline.py: -------------------------------------------------------------------------------- 1 | from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import * 2 | import comfy.utils 3 | 4 | def prepare_callback(steps): 5 | pbar = comfy.utils.ProgressBar(steps) 6 | def callback(step, total_steps): 7 | pbar.update_absolute(step + 1, total_steps, None) 8 | return callback 9 | 10 | class MagicClothingDiffusionPipeline(StableDiffusionPipeline): 11 | def __call__( 12 | self, 13 | prompt: Union[str, List[str]] = None, 14 | height: Optional[int] = None, 15 | width: Optional[int] = None, 16 | num_inference_steps: int = 50, 17 | timesteps: List[int] = None, 18 | guidance_scale: float = 5., 19 | cloth_guidance_scale: float = 2.5, 20 | negative_prompt: Optional[Union[str, List[str]]] = None, 21 | num_images_per_prompt: Optional[int] = 1, 22 | eta: float = 0.0, 23 | generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, 24 | latents: Optional[torch.FloatTensor] = None, 25 | prompt_embeds: Optional[torch.FloatTensor] = None, 26 | negative_prompt_embeds: Optional[torch.FloatTensor] = None, 27 | ip_adapter_image: Optional[PipelineImageInput] = None, 28 | output_type: Optional[str] = "pil", 29 | return_dict: bool = True, 30 | cross_attention_kwargs: Optional[Dict[str, Any]] = None, 31 | guidance_rescale: float = 0.0, 32 | clip_skip: Optional[int] = None, 33 | callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None, 34 | callback_on_step_end_tensor_inputs: List[str] = ["latents"], 35 | **kwargs, 36 | ): 37 | r""" 38 | The call function to the pipeline for generation. 39 | 40 | Args: 41 | prompt (`str` or `List[str]`, *optional*): 42 | The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`. 43 | height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`): 44 | The height in pixels of the generated image. 45 | width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`): 46 | The width in pixels of the generated image. 47 | num_inference_steps (`int`, *optional*, defaults to 50): 48 | The number of denoising steps. More denoising steps usually lead to a higher quality image at the 49 | expense of slower inference. 50 | timesteps (`List[int]`, *optional*): 51 | Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument 52 | in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is 53 | passed will be used. Must be in descending order. 54 | guidance_scale (`float`, *optional*, defaults to 7.5): 55 | A higher guidance scale value encourages the model to generate images closely linked to the text 56 | `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`. 57 | negative_prompt (`str` or `List[str]`, *optional*): 58 | The prompt or prompts to guide what to not include in image generation. If not defined, you need to 59 | pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`). 60 | num_images_per_prompt (`int`, *optional*, defaults to 1): 61 | The number of images to generate per prompt. 62 | eta (`float`, *optional*, defaults to 0.0): 63 | Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies 64 | to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers. 65 | generator (`torch.Generator` or `List[torch.Generator]`, *optional*): 66 | A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make 67 | generation deterministic. 68 | latents (`torch.FloatTensor`, *optional*): 69 | Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image 70 | generation. Can be used to tweak the same generation with different prompts. If not provided, a latents 71 | tensor is generated by sampling using the supplied random `generator`. 72 | prompt_embeds (`torch.FloatTensor`, *optional*): 73 | Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not 74 | provided, text embeddings are generated from the `prompt` input argument. 75 | negative_prompt_embeds (`torch.FloatTensor`, *optional*): 76 | Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If 77 | not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument. 78 | ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters. 79 | output_type (`str`, *optional*, defaults to `"pil"`): 80 | The output format of the generated image. Choose between `PIL.Image` or `np.array`. 81 | return_dict (`bool`, *optional*, defaults to `True`): 82 | Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a 83 | plain tuple. 84 | cross_attention_kwargs (`dict`, *optional*): 85 | A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in 86 | [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py). 87 | guidance_rescale (`float`, *optional*, defaults to 0.0): 88 | Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are 89 | Flawed](https://arxiv.org/pdf/2305.08891.pdf). Guidance rescale factor should fix overexposure when 90 | using zero terminal SNR. 91 | clip_skip (`int`, *optional*): 92 | Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that 93 | the output of the pre-final layer will be used for computing the prompt embeddings. 94 | callback_on_step_end (`Callable`, *optional*): 95 | A function that calls at the end of each denoising steps during the inference. The function is called 96 | with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, 97 | callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by 98 | `callback_on_step_end_tensor_inputs`. 99 | callback_on_step_end_tensor_inputs (`List`, *optional*): 100 | The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list 101 | will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the 102 | `._callback_tensor_inputs` attribute of your pipeline class. 103 | 104 | Examples: 105 | 106 | Returns: 107 | [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: 108 | If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] is returned, 109 | otherwise a `tuple` is returned where the first element is a list with the generated images and the 110 | second element is a list of `bool`s indicating whether the corresponding generated image contains 111 | "not-safe-for-work" (nsfw) content. 112 | """ 113 | 114 | callback = kwargs.pop("callback", None) 115 | callback_steps = kwargs.pop("callback_steps", None) 116 | 117 | if callback is not None: 118 | deprecate( 119 | "callback", 120 | "1.0.0", 121 | "Passing `callback` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`", 122 | ) 123 | if callback_steps is not None: 124 | deprecate( 125 | "callback_steps", 126 | "1.0.0", 127 | "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`", 128 | ) 129 | # to deal with lora scaling and other possible forward hooks 130 | 131 | self._guidance_scale = guidance_scale 132 | self._guidance_rescale = guidance_rescale 133 | self._clip_skip = clip_skip 134 | self._cross_attention_kwargs = cross_attention_kwargs 135 | self._interrupt = False 136 | 137 | # 2. Define call parameters 138 | batch_size = prompt_embeds.shape[0] 139 | 140 | device = self._execution_device 141 | 142 | # For classifier free guidance, we need to do two forward passes. 143 | # Here we concatenate the unconditional and text embeddings into a single batch 144 | # to avoid doing two forward passes 145 | if self.do_classifier_free_guidance: 146 | prompt_embeds = torch.cat([negative_prompt_embeds, negative_prompt_embeds, prompt_embeds]) 147 | 148 | 149 | # 4. Prepare timesteps 150 | timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps) 151 | 152 | 153 | # 5. Prepare latent variables 154 | num_channels_latents = self.unet.config.in_channels 155 | latents = self.prepare_latents( 156 | batch_size * num_images_per_prompt, 157 | num_channels_latents, 158 | height, 159 | width, 160 | prompt_embeds.dtype, 161 | device, 162 | generator, 163 | None, 164 | ) 165 | 166 | # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline 167 | extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) 168 | # 6.2 Optionally get Guidance Scale Embedding 169 | timestep_cond = None 170 | if self.unet.config.time_cond_proj_dim is not None: 171 | guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt) 172 | timestep_cond = self.get_guidance_scale_embedding( 173 | guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim 174 | ).to(device=device, dtype=latents.dtype) 175 | 176 | # 7. Denoising loop 177 | num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order 178 | self._num_timesteps = len(timesteps) 179 | comfyui_callback = prepare_callback(num_inference_steps) 180 | with self.progress_bar(total=num_inference_steps) as progress_bar: 181 | for i, t in enumerate(timesteps): 182 | if self.interrupt: 183 | continue 184 | 185 | # expand the latents if we are doing classifier free guidance 186 | latent_model_input = torch.cat([latents] * 3) if self.do_classifier_free_guidance else latents 187 | latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) 188 | # predict the noise residual 189 | noise_pred = self.unet( 190 | latent_model_input, 191 | t, 192 | encoder_hidden_states=prompt_embeds, 193 | timestep_cond=timestep_cond, 194 | cross_attention_kwargs=self.cross_attention_kwargs, 195 | return_dict=False, 196 | )[0] 197 | # perform guidance 198 | if self.do_classifier_free_guidance: 199 | noise_pred_uncond, noise_pred_cloth, noise_pred_text = noise_pred.chunk(3) 200 | noise_pred = ( 201 | noise_pred_uncond 202 | + guidance_scale * (noise_pred_text - noise_pred_cloth) 203 | + cloth_guidance_scale * (noise_pred_cloth - noise_pred_uncond) 204 | ) 205 | 206 | if self.do_classifier_free_guidance and self.guidance_rescale > 0.0: 207 | # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf 208 | noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale) 209 | 210 | # compute the previous noisy sample x_t -> x_t-1 211 | latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0] 212 | if callback_on_step_end is not None: 213 | callback_kwargs = {} 214 | for k in callback_on_step_end_tensor_inputs: 215 | callback_kwargs[k] = locals()[k] 216 | callback_outputs = callback_on_step_end(self, i, t, callback_kwargs) 217 | 218 | latents = callback_outputs.pop("latents", latents) 219 | prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds) 220 | negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds) 221 | 222 | # call the callback, if provided 223 | if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): 224 | progress_bar.update() 225 | if callback is not None and i % callback_steps == 0: 226 | step_idx = i // getattr(self.scheduler, "order", 1) 227 | callback(step_idx, t, latents) 228 | 229 | if comfyui_callback is not None: 230 | comfyui_callback(i,num_inference_steps) 231 | 232 | # Offload all models 233 | self.maybe_free_model_hooks() 234 | latents = 1.0/0.18215 * latents 235 | return latents 236 | -------------------------------------------------------------------------------- /diffusers_magic_clothing/attention_processor.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | 3 | import torch 4 | from typing import Optional 5 | import torch.nn.functional as F 6 | from diffusers.utils import USE_PEFT_BACKEND 7 | import torch.nn as nn 8 | from diffusers.models.attention_processor import Attention 9 | 10 | Linear_Call_Needs_Extra_Args = False 11 | 12 | class AttnProcessor(nn.Module): 13 | r""" 14 | Default processor for performing attention-related computations. 15 | """ 16 | 17 | def __init__(self): 18 | super().__init__() 19 | 20 | def __call__( 21 | self, 22 | attn: Attention, 23 | hidden_states: torch.FloatTensor, 24 | encoder_hidden_states: Optional[torch.FloatTensor] = None, 25 | attention_mask: Optional[torch.FloatTensor] = None, 26 | temb: Optional[torch.FloatTensor] = None, 27 | scale: float = 1.0, 28 | attn_store=None, 29 | do_classifier_free_guidance=None, 30 | enable_cloth_guidance=None 31 | ) -> torch.Tensor: 32 | residual = hidden_states 33 | 34 | args = () if USE_PEFT_BACKEND else (scale,) 35 | 36 | if attn.spatial_norm is not None: 37 | hidden_states = attn.spatial_norm(hidden_states, temb) 38 | 39 | input_ndim = hidden_states.ndim 40 | 41 | if input_ndim == 4: 42 | batch_size, channel, height, width = hidden_states.shape 43 | hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) 44 | 45 | batch_size, sequence_length, _ = ( 46 | hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape 47 | ) 48 | attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) 49 | 50 | if attn.group_norm is not None: 51 | hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) 52 | 53 | if Linear_Call_Needs_Extra_Args: 54 | query = attn.to_q(hidden_states, *args) 55 | else: 56 | query = attn.to_q(hidden_states) 57 | 58 | if encoder_hidden_states is None: 59 | encoder_hidden_states = hidden_states 60 | elif attn.norm_cross: 61 | encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) 62 | 63 | 64 | if Linear_Call_Needs_Extra_Args: 65 | key = attn.to_k(encoder_hidden_states, *args) 66 | value = attn.to_v(encoder_hidden_states, *args) 67 | else: 68 | key = attn.to_k(encoder_hidden_states) 69 | value = attn.to_v(encoder_hidden_states) 70 | 71 | query = attn.head_to_batch_dim(query) 72 | key = attn.head_to_batch_dim(key) 73 | value = attn.head_to_batch_dim(value) 74 | 75 | attention_probs = attn.get_attention_scores(query, key, attention_mask) 76 | hidden_states = torch.bmm(attention_probs, value) 77 | hidden_states = attn.batch_to_head_dim(hidden_states) 78 | 79 | # linear proj 80 | if Linear_Call_Needs_Extra_Args: 81 | hidden_states = attn.to_out[0](hidden_states, *args) 82 | else: 83 | hidden_states = attn.to_out[0](hidden_states) 84 | # dropout 85 | hidden_states = attn.to_out[1](hidden_states) 86 | 87 | if input_ndim == 4: 88 | hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) 89 | 90 | if attn.residual_connection: 91 | hidden_states = hidden_states + residual 92 | 93 | hidden_states = hidden_states / attn.rescale_output_factor 94 | 95 | return hidden_states 96 | 97 | 98 | class REFAttnProcessor(nn.Module): 99 | def __init__(self, name, type="read"): 100 | super().__init__() 101 | self.name = name 102 | self.type = type 103 | 104 | def __call__( 105 | self, 106 | attn: Attention, 107 | hidden_states: torch.FloatTensor, 108 | encoder_hidden_states: Optional[torch.FloatTensor] = None, 109 | attention_mask: Optional[torch.FloatTensor] = None, 110 | temb: Optional[torch.FloatTensor] = None, 111 | scale: float = 1.0, 112 | attn_store=None, 113 | do_classifier_free_guidance=None, 114 | enable_cloth_guidance=None 115 | ) -> torch.Tensor: 116 | if self.type == "read": 117 | attn_store[self.name] = hidden_states 118 | elif self.type == "write": 119 | ref_hidden_states = attn_store[self.name] 120 | if do_classifier_free_guidance: 121 | empty_copy = torch.zeros_like(ref_hidden_states) 122 | if enable_cloth_guidance: 123 | ref_hidden_states = torch.cat([empty_copy, ref_hidden_states, ref_hidden_states]) 124 | else: 125 | ref_hidden_states = torch.cat([empty_copy, ref_hidden_states]) 126 | hidden_states = torch.cat([hidden_states, ref_hidden_states], dim=1) 127 | else: 128 | raise ValueError("unsupport type") 129 | residual = hidden_states 130 | 131 | args = () if USE_PEFT_BACKEND else (scale,) 132 | 133 | if attn.spatial_norm is not None: 134 | hidden_states = attn.spatial_norm(hidden_states, temb) 135 | 136 | input_ndim = hidden_states.ndim 137 | 138 | if input_ndim == 4: 139 | batch_size, channel, height, width = hidden_states.shape 140 | hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) 141 | 142 | batch_size, sequence_length, _ = ( 143 | hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape 144 | ) 145 | attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) 146 | 147 | if attn.group_norm is not None: 148 | hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) 149 | 150 | if Linear_Call_Needs_Extra_Args: 151 | query = attn.to_q(hidden_states, *args) 152 | else: 153 | query = attn.to_q(hidden_states) 154 | 155 | if encoder_hidden_states is None: 156 | encoder_hidden_states = hidden_states 157 | elif attn.norm_cross: 158 | encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) 159 | 160 | if Linear_Call_Needs_Extra_Args: 161 | key = attn.to_k(encoder_hidden_states, *args) 162 | value = attn.to_v(encoder_hidden_states, *args) 163 | else: 164 | key = attn.to_k(encoder_hidden_states) 165 | value = attn.to_v(encoder_hidden_states) 166 | 167 | query = attn.head_to_batch_dim(query) 168 | key = attn.head_to_batch_dim(key) 169 | value = attn.head_to_batch_dim(value) 170 | 171 | attention_probs = attn.get_attention_scores(query, key, attention_mask) 172 | hidden_states = torch.bmm(attention_probs, value) 173 | hidden_states = attn.batch_to_head_dim(hidden_states) 174 | 175 | if self.type == "write": 176 | hidden_states, _ = torch.chunk(hidden_states, 2, dim=1) 177 | 178 | # linear proj 179 | if Linear_Call_Needs_Extra_Args: 180 | hidden_states = attn.to_out[0](hidden_states, *args) 181 | else: 182 | hidden_states = attn.to_out[0](hidden_states) 183 | # dropout 184 | hidden_states = attn.to_out[1](hidden_states) 185 | 186 | if input_ndim == 4: 187 | hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) 188 | 189 | if attn.residual_connection: 190 | hidden_states = hidden_states + residual 191 | 192 | hidden_states = hidden_states / attn.rescale_output_factor 193 | return hidden_states 194 | 195 | 196 | class AttnProcessor2_0(nn.Module): 197 | r""" 198 | Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0). 199 | """ 200 | 201 | def __init__(self): 202 | super().__init__() 203 | if not hasattr(F, "scaled_dot_product_attention"): 204 | raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") 205 | 206 | def __call__( 207 | self, 208 | attn: Attention, 209 | hidden_states: torch.FloatTensor, 210 | encoder_hidden_states: Optional[torch.FloatTensor] = None, 211 | attention_mask: Optional[torch.FloatTensor] = None, 212 | temb: Optional[torch.FloatTensor] = None, 213 | scale: float = 1.0, 214 | attn_store=None, 215 | do_classifier_free_guidance=None, 216 | enable_cloth_guidance=None 217 | ) -> torch.FloatTensor: 218 | residual = hidden_states 219 | if attn.spatial_norm is not None: 220 | hidden_states = attn.spatial_norm(hidden_states, temb) 221 | 222 | input_ndim = hidden_states.ndim 223 | 224 | if input_ndim == 4: 225 | batch_size, channel, height, width = hidden_states.shape 226 | hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) 227 | 228 | batch_size, sequence_length, _ = ( 229 | hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape 230 | ) 231 | 232 | if attention_mask is not None: 233 | attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) 234 | # scaled_dot_product_attention expects attention_mask shape to be 235 | # (batch, heads, source_length, target_length) 236 | attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) 237 | 238 | if attn.group_norm is not None: 239 | hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) 240 | 241 | args = () if USE_PEFT_BACKEND else (scale,) 242 | if Linear_Call_Needs_Extra_Args: 243 | query = attn.to_q(hidden_states, *args) 244 | else: 245 | query = attn.to_q(hidden_states) 246 | 247 | if encoder_hidden_states is None: 248 | encoder_hidden_states = hidden_states 249 | elif attn.norm_cross: 250 | encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) 251 | 252 | if Linear_Call_Needs_Extra_Args: 253 | key = attn.to_k(encoder_hidden_states, *args) 254 | value = attn.to_v(encoder_hidden_states, *args) 255 | else: 256 | key = attn.to_k(encoder_hidden_states) 257 | value = attn.to_v(encoder_hidden_states) 258 | 259 | inner_dim = key.shape[-1] 260 | head_dim = inner_dim // attn.heads 261 | 262 | query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 263 | 264 | key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 265 | value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 266 | 267 | # the output of sdp = (batch, num_heads, seq_len, head_dim) 268 | # TODO: add support for attn.scale when we move to Torch 2.1 269 | hidden_states = F.scaled_dot_product_attention( 270 | query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False 271 | ) 272 | 273 | hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) 274 | hidden_states = hidden_states.to(query.dtype) 275 | 276 | # linear proj 277 | if Linear_Call_Needs_Extra_Args: 278 | hidden_states = attn.to_out[0](hidden_states, *args) 279 | else: 280 | hidden_states = attn.to_out[0](hidden_states) 281 | # dropout 282 | hidden_states = attn.to_out[1](hidden_states) 283 | 284 | if input_ndim == 4: 285 | hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) 286 | 287 | if attn.residual_connection: 288 | hidden_states = hidden_states + residual 289 | 290 | hidden_states = hidden_states / attn.rescale_output_factor 291 | 292 | return hidden_states 293 | 294 | 295 | class REFAttnProcessor2_0(nn.Module): 296 | def __init__(self, name, type="read"): 297 | super().__init__() 298 | if not hasattr(F, "scaled_dot_product_attention"): 299 | raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") 300 | self.name = name 301 | self.type = type 302 | 303 | def __call__( 304 | self, 305 | attn: Attention, 306 | hidden_states: torch.FloatTensor, 307 | encoder_hidden_states: Optional[torch.FloatTensor] = None, 308 | attention_mask: Optional[torch.FloatTensor] = None, 309 | temb: Optional[torch.FloatTensor] = None, 310 | scale: float = 1.0, 311 | attn_store=None, 312 | do_classifier_free_guidance=False, 313 | enable_cloth_guidance=True 314 | ) -> torch.FloatTensor: 315 | if self.type == "read": 316 | attn_store[self.name] = hidden_states 317 | elif self.type == "write": 318 | ref_hidden_states = attn_store[self.name] 319 | if do_classifier_free_guidance: 320 | empty_copy = torch.zeros_like(ref_hidden_states) 321 | if enable_cloth_guidance: 322 | ref_hidden_states = torch.cat([empty_copy, ref_hidden_states, ref_hidden_states]) 323 | else: 324 | ref_hidden_states = torch.cat([empty_copy, ref_hidden_states]) 325 | hidden_states = torch.cat([hidden_states, ref_hidden_states], dim=1) 326 | else: 327 | raise ValueError("unsupport type") 328 | residual = hidden_states 329 | if attn.spatial_norm is not None: 330 | hidden_states = attn.spatial_norm(hidden_states, temb) 331 | 332 | input_ndim = hidden_states.ndim 333 | 334 | if input_ndim == 4: 335 | batch_size, channel, height, width = hidden_states.shape 336 | hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) 337 | 338 | batch_size, sequence_length, _ = ( 339 | hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape 340 | ) 341 | 342 | if attention_mask is not None: 343 | attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) 344 | # scaled_dot_product_attention expects attention_mask shape to be 345 | # (batch, heads, source_length, target_length) 346 | attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) 347 | 348 | if attn.group_norm is not None: 349 | hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) 350 | 351 | args = () if USE_PEFT_BACKEND else (scale,) 352 | if Linear_Call_Needs_Extra_Args: 353 | query = attn.to_q(hidden_states, *args) 354 | else: 355 | query = attn.to_q(hidden_states) 356 | 357 | if encoder_hidden_states is None: 358 | encoder_hidden_states = hidden_states 359 | elif attn.norm_cross: 360 | encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) 361 | 362 | if Linear_Call_Needs_Extra_Args: 363 | key = attn.to_k(encoder_hidden_states, *args) 364 | value = attn.to_v(encoder_hidden_states, *args) 365 | else: 366 | key = attn.to_k(encoder_hidden_states) 367 | value = attn.to_v(encoder_hidden_states) 368 | 369 | inner_dim = key.shape[-1] 370 | head_dim = inner_dim // attn.heads 371 | 372 | query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 373 | 374 | key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 375 | value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 376 | 377 | # the output of sdp = (batch, num_heads, seq_len, head_dim) 378 | # TODO: add support for attn.scale when we move to Torch 2.1 379 | hidden_states = F.scaled_dot_product_attention( 380 | query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False 381 | ) 382 | hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) 383 | hidden_states = hidden_states.to(query.dtype) 384 | 385 | if self.type == "write": 386 | hidden_states, _ = torch.chunk(hidden_states, 2, dim=1) 387 | # linear proj 388 | if Linear_Call_Needs_Extra_Args: 389 | hidden_states = attn.to_out[0](hidden_states, *args) 390 | else: 391 | hidden_states = attn.to_out[0](hidden_states) 392 | # dropout 393 | hidden_states = attn.to_out[1](hidden_states) 394 | 395 | if input_ndim == 4: 396 | hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) 397 | 398 | if attn.residual_connection: 399 | hidden_states = hidden_states + residual 400 | 401 | hidden_states = hidden_states / attn.rescale_output_factor 402 | return hidden_states 403 | 404 | 405 | class REFAnimateDiffAttnProcessor2_0(nn.Module): 406 | def __init__(self, cross_attention_dim, hidden_size, name): 407 | super().__init__() 408 | if not hasattr(F, "scaled_dot_product_attention"): 409 | raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") 410 | self.name = name 411 | self.scale = 1.0 412 | self.to_k_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) 413 | self.to_v_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) 414 | 415 | def __call__( 416 | self, 417 | attn: Attention, 418 | hidden_states: torch.FloatTensor, 419 | encoder_hidden_states: Optional[torch.FloatTensor] = None, 420 | attention_mask: Optional[torch.FloatTensor] = None, 421 | temb: Optional[torch.FloatTensor] = None, 422 | scale: float = 1.0, 423 | attn_store=None, 424 | do_classifier_free_guidance=False, 425 | ) -> torch.FloatTensor: 426 | ref_hidden_states = attn_store[self.name] 427 | if do_classifier_free_guidance: 428 | empty_copy = torch.zeros_like(ref_hidden_states) 429 | repeat_num = hidden_states.shape[0] // 3 430 | ref_hidden_states = torch.cat( 431 | [empty_copy.repeat(repeat_num, 1, 1), ref_hidden_states.repeat(repeat_num, 1, 1), 432 | ref_hidden_states.repeat(repeat_num, 1, 1)]) 433 | 434 | if hidden_states.shape[0] % ref_hidden_states.shape[0] != 0: 435 | raise ValueError("not evenly divisible") 436 | 437 | residual = hidden_states 438 | if attn.spatial_norm is not None: 439 | hidden_states = attn.spatial_norm(hidden_states, temb) 440 | 441 | input_ndim = hidden_states.ndim 442 | 443 | if input_ndim == 4: 444 | batch_size, channel, height, width = hidden_states.shape 445 | hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) 446 | 447 | batch_size, sequence_length, _ = ( 448 | hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape 449 | ) 450 | 451 | if attention_mask is not None: 452 | attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) 453 | # scaled_dot_product_attention expects attention_mask shape to be 454 | # (batch, heads, source_length, target_length) 455 | attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) 456 | 457 | if attn.group_norm is not None: 458 | hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) 459 | 460 | args = () if USE_PEFT_BACKEND else (scale,) 461 | if Linear_Call_Needs_Extra_Args: 462 | query = attn.to_q(hidden_states, *args) 463 | else: 464 | query = attn.to_q(hidden_states) 465 | 466 | if encoder_hidden_states is None: 467 | encoder_hidden_states = hidden_states 468 | elif attn.norm_cross: 469 | encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) 470 | 471 | if Linear_Call_Needs_Extra_Args: 472 | key = attn.to_k(encoder_hidden_states, *args) 473 | value = attn.to_v(encoder_hidden_states, *args) 474 | else: 475 | key = attn.to_k(encoder_hidden_states) 476 | value = attn.to_v(encoder_hidden_states) 477 | 478 | inner_dim = key.shape[-1] 479 | head_dim = inner_dim // attn.heads 480 | 481 | query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 482 | 483 | key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 484 | value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 485 | 486 | # the output of sdp = (batch, num_heads, seq_len, head_dim) 487 | # TODO: add support for attn.scale when we move to Torch 2.1 488 | hidden_states = F.scaled_dot_product_attention( 489 | query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False 490 | ) 491 | 492 | hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) 493 | hidden_states = hidden_states.to(query.dtype) 494 | 495 | ref_key = self.to_k_ip(ref_hidden_states.float()).view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 496 | ref_value = self.to_v_ip(ref_hidden_states.float()).view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 497 | ref_hidden_states = F.scaled_dot_product_attention( 498 | query.float(), ref_key, ref_value, attn_mask=None, dropout_p=0.0, is_causal=False 499 | ) 500 | 501 | ref_hidden_states = ref_hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) 502 | ref_hidden_states = ref_hidden_states.to(query.dtype) 503 | 504 | hidden_states = hidden_states + self.scale * ref_hidden_states 505 | # linear proj 506 | if Linear_Call_Needs_Extra_Args: 507 | hidden_states = attn.to_out[0](hidden_states, *args) 508 | else: 509 | hidden_states = attn.to_out[0](hidden_states) 510 | # dropout 511 | hidden_states = attn.to_out[1](hidden_states) 512 | 513 | if input_ndim == 4: 514 | hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) 515 | 516 | if attn.residual_connection: 517 | hidden_states = hidden_states + residual 518 | 519 | hidden_states = hidden_states / attn.rescale_output_factor 520 | return hidden_states 521 | 522 | 523 | class IPAttnProcessor(nn.Module): 524 | 525 | def __init__(self, hidden_size, cross_attention_dim=None, scale=1.0, num_tokens=4): 526 | super().__init__() 527 | 528 | self.hidden_size = hidden_size 529 | self.cross_attention_dim = cross_attention_dim 530 | self.scale = scale 531 | self.num_tokens = num_tokens 532 | 533 | self.to_k_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) 534 | self.to_v_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) 535 | 536 | def __call__( 537 | self, 538 | attn, 539 | hidden_states, 540 | encoder_hidden_states=None, 541 | attention_mask=None, 542 | temb=None, 543 | scale: float = 1.0, 544 | attn_store=None, 545 | do_classifier_free_guidance=None, 546 | enable_cloth_guidance=None 547 | ): 548 | residual = hidden_states 549 | 550 | if attn.spatial_norm is not None: 551 | hidden_states = attn.spatial_norm(hidden_states, temb) 552 | 553 | input_ndim = hidden_states.ndim 554 | 555 | if input_ndim == 4: 556 | batch_size, channel, height, width = hidden_states.shape 557 | hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) 558 | 559 | batch_size, sequence_length, _ = ( 560 | hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape 561 | ) 562 | attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) 563 | 564 | if attn.group_norm is not None: 565 | hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) 566 | 567 | args = () if USE_PEFT_BACKEND else (scale,) 568 | if Linear_Call_Needs_Extra_Args: 569 | query = attn.to_q(hidden_states, *args) 570 | else: 571 | query = attn.to_q(hidden_states) 572 | 573 | if encoder_hidden_states is None: 574 | encoder_hidden_states = hidden_states 575 | else: 576 | # get encoder_hidden_states, ip_hidden_states 577 | end_pos = encoder_hidden_states.shape[1] - self.num_tokens 578 | encoder_hidden_states, ip_hidden_states = ( 579 | encoder_hidden_states[:, :end_pos, :], 580 | encoder_hidden_states[:, end_pos:, :], 581 | ) 582 | if attn.norm_cross: 583 | encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) 584 | 585 | if Linear_Call_Needs_Extra_Args: 586 | key = attn.to_k(encoder_hidden_states, *args) 587 | value = attn.to_v(encoder_hidden_states, *args) 588 | else: 589 | key = attn.to_k(encoder_hidden_states) 590 | value = attn.to_v(encoder_hidden_states) 591 | 592 | query = attn.head_to_batch_dim(query) 593 | key = attn.head_to_batch_dim(key) 594 | value = attn.head_to_batch_dim(value) 595 | 596 | attention_probs = attn.get_attention_scores(query, key, attention_mask) 597 | hidden_states = torch.bmm(attention_probs, value) 598 | hidden_states = attn.batch_to_head_dim(hidden_states) 599 | 600 | # for ip-adapter 601 | ip_key = self.to_k_ip(ip_hidden_states) 602 | ip_value = self.to_v_ip(ip_hidden_states) 603 | 604 | ip_key = attn.head_to_batch_dim(ip_key) 605 | ip_value = attn.head_to_batch_dim(ip_value) 606 | 607 | ip_attention_probs = attn.get_attention_scores(query, ip_key, None) 608 | self.attn_map = ip_attention_probs 609 | ip_hidden_states = torch.bmm(ip_attention_probs, ip_value) 610 | ip_hidden_states = attn.batch_to_head_dim(ip_hidden_states) 611 | 612 | hidden_states = hidden_states + self.scale * ip_hidden_states 613 | 614 | # linear proj 615 | if Linear_Call_Needs_Extra_Args: 616 | hidden_states = attn.to_out[0](hidden_states, *args) 617 | else: 618 | hidden_states = attn.to_out[0](hidden_states) 619 | # dropout 620 | hidden_states = attn.to_out[1](hidden_states) 621 | 622 | if input_ndim == 4: 623 | hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) 624 | 625 | if attn.residual_connection: 626 | hidden_states = hidden_states + residual 627 | 628 | hidden_states = hidden_states / attn.rescale_output_factor 629 | 630 | return hidden_states 631 | 632 | 633 | class IPAttnProcessor2_0(torch.nn.Module): 634 | 635 | def __init__(self, hidden_size, cross_attention_dim=None, scale=1.0, num_tokens=4): 636 | super().__init__() 637 | 638 | if not hasattr(F, "scaled_dot_product_attention"): 639 | raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.") 640 | 641 | self.hidden_size = hidden_size 642 | self.cross_attention_dim = cross_attention_dim 643 | self.scale = scale 644 | self.num_tokens = num_tokens 645 | 646 | self.to_k_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) 647 | self.to_v_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False) 648 | 649 | def __call__( 650 | self, 651 | attn, 652 | hidden_states, 653 | encoder_hidden_states=None, 654 | attention_mask=None, 655 | temb=None, 656 | scale: float = 1.0, 657 | attn_store=None, 658 | do_classifier_free_guidance=None, 659 | enable_cloth_guidance=None 660 | ): 661 | residual = hidden_states 662 | 663 | if attn.spatial_norm is not None: 664 | hidden_states = attn.spatial_norm(hidden_states, temb) 665 | 666 | input_ndim = hidden_states.ndim 667 | 668 | if input_ndim == 4: 669 | batch_size, channel, height, width = hidden_states.shape 670 | hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2) 671 | 672 | batch_size, sequence_length, _ = ( 673 | hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape 674 | ) 675 | 676 | if attention_mask is not None: 677 | attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size) 678 | # scaled_dot_product_attention expects attention_mask shape to be 679 | # (batch, heads, source_length, target_length) 680 | attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1]) 681 | 682 | if attn.group_norm is not None: 683 | hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2) 684 | 685 | args = () if USE_PEFT_BACKEND else (scale,) 686 | if Linear_Call_Needs_Extra_Args: 687 | query = attn.to_q(hidden_states, *args) 688 | else: 689 | query = attn.to_q(hidden_states) 690 | 691 | if encoder_hidden_states is None: 692 | encoder_hidden_states = hidden_states 693 | else: 694 | # get encoder_hidden_states, ip_hidden_states 695 | end_pos = encoder_hidden_states.shape[1] - self.num_tokens 696 | encoder_hidden_states, ip_hidden_states = ( 697 | encoder_hidden_states[:, :end_pos, :], 698 | encoder_hidden_states[:, end_pos:, :], 699 | ) 700 | if attn.norm_cross: 701 | encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states) 702 | 703 | if Linear_Call_Needs_Extra_Args: 704 | key = attn.to_k(encoder_hidden_states, *args) 705 | value = attn.to_v(encoder_hidden_states, *args) 706 | else: 707 | key = attn.to_k(encoder_hidden_states) 708 | value = attn.to_v(encoder_hidden_states) 709 | 710 | inner_dim = key.shape[-1] 711 | head_dim = inner_dim // attn.heads 712 | 713 | query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 714 | 715 | key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 716 | value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 717 | 718 | # the output of sdp = (batch, num_heads, seq_len, head_dim) 719 | # TODO: add support for attn.scale when we move to Torch 2.1 720 | hidden_states = F.scaled_dot_product_attention( 721 | query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False 722 | ) 723 | 724 | hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) 725 | hidden_states = hidden_states.to(query.dtype) 726 | 727 | # for ip-adapter 728 | ip_key = self.to_k_ip(ip_hidden_states) 729 | ip_value = self.to_v_ip(ip_hidden_states) 730 | 731 | ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 732 | ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) 733 | 734 | # the output of sdp = (batch, num_heads, seq_len, head_dim) 735 | # TODO: add support for attn.scale when we move to Torch 2.1 736 | ip_hidden_states = F.scaled_dot_product_attention( 737 | query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False 738 | ) 739 | with torch.no_grad(): 740 | self.attn_map = query @ ip_key.transpose(-2, -1).softmax(dim=-1) 741 | # print(self.attn_map.shape) 742 | 743 | ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) 744 | ip_hidden_states = ip_hidden_states.to(query.dtype) 745 | 746 | hidden_states = hidden_states + self.scale * ip_hidden_states 747 | 748 | # linear proj 749 | if Linear_Call_Needs_Extra_Args: 750 | hidden_states = attn.to_out[0](hidden_states, *args) 751 | else: 752 | hidden_states = attn.to_out[0](hidden_states) 753 | # dropout 754 | hidden_states = attn.to_out[1](hidden_states) 755 | 756 | if input_ndim == 4: 757 | hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) 758 | 759 | if attn.residual_connection: 760 | hidden_states = hidden_states + residual 761 | 762 | hidden_states = hidden_states / attn.rescale_output_factor 763 | 764 | return hidden_states 765 | -------------------------------------------------------------------------------- /diffusers_magic_clothing/garment_diffusion.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import torch 3 | from safetensors import safe_open 4 | from .utils import is_torch2_available 5 | from diffusers import UNet2DConditionModel 6 | 7 | if is_torch2_available(): 8 | from .attention_processor import REFAttnProcessor2_0 as REFAttnProcessor 9 | from .attention_processor import AttnProcessor2_0 as AttnProcessor 10 | from .attention_processor import REFAnimateDiffAttnProcessor2_0 as REFAnimateDiffAttnProcessor 11 | else: 12 | from .attention_processor import REFAttnProcessor, AttnProcessor 13 | import torch.nn.functional as F 14 | 15 | class ClothAdapter: 16 | def __init__(self, sd_pipe, ref_path): 17 | self.enable_cloth_guidance = True 18 | self.pipe = sd_pipe 19 | self.set_adapter(self.pipe.unet, "write") 20 | 21 | ref_unet = copy.deepcopy(sd_pipe.unet) 22 | if ref_unet.config.in_channels == 9: 23 | ref_unet.conv_in = torch.nn.Conv2d(4, 320, ref_unet.conv_in.kernel_size, ref_unet.conv_in.stride, ref_unet.conv_in.padding) 24 | ref_unet.register_to_config(in_channels=4) 25 | state_dict = {} 26 | with safe_open(ref_path, framework="pt", device="cpu") as f: 27 | for key in f.keys(): 28 | state_dict[key] = f.get_tensor(key) 29 | ref_unet.load_state_dict(state_dict, strict=False) 30 | 31 | self.ref_unet = ref_unet.to(self.pipe.device, dtype=self.pipe.dtype) 32 | self.set_adapter(self.ref_unet, "read") 33 | self.attn_store = {} 34 | 35 | def set_adapter(self, unet, type): 36 | attn_procs = {} 37 | for name in unet.attn_processors.keys(): 38 | if "attn1" in name: 39 | attn_procs[name] = REFAttnProcessor(name=name, type=type) 40 | else: 41 | attn_procs[name] = AttnProcessor() 42 | unet.set_attn_processor(attn_procs) 43 | 44 | def generate( 45 | self, 46 | cloth_latent, 47 | gen_latents, 48 | prompt_embeds_null, 49 | positive=None, 50 | negative=None, 51 | num_images_per_prompt=4, 52 | seed=-1, 53 | guidance_scale=7.5, 54 | cloth_guidance_scale=2.5, 55 | num_inference_steps=20, 56 | height=512, 57 | width=384, 58 | **kwargs, 59 | ): 60 | if gen_latents is not None: 61 | gen_latents = 0.18215 * gen_latents 62 | gen_latents=gen_latents.to(self.pipe.device,dtype=self.pipe.dtype) 63 | cloth_latent=cloth_latent.to(self.pipe.device,dtype=self.pipe.dtype) 64 | prompt_embeds_null = prompt_embeds_null.to(self.pipe.device,dtype=self.pipe.dtype) 65 | positive = positive.to(self.pipe.device,dtype=self.pipe.dtype) 66 | negative = negative.to(self.pipe.device,dtype=self.pipe.dtype) 67 | cloth_latent = 0.18215 * cloth_latent 68 | self.ref_unet(torch.cat([cloth_latent] * num_images_per_prompt), 0, torch.cat([prompt_embeds_null] * num_images_per_prompt), cross_attention_kwargs={"attn_store": self.attn_store}) 69 | 70 | 71 | self.generator = torch.Generator(self.pipe.device).manual_seed(seed) if seed is not None else None 72 | if self.enable_cloth_guidance: 73 | images = self.pipe( 74 | prompt_embeds=positive, 75 | negative_prompt_embeds=negative, 76 | guidance_scale=guidance_scale, 77 | cloth_guidance_scale=cloth_guidance_scale, 78 | num_inference_steps=num_inference_steps, 79 | latents = gen_latents, 80 | generator=self.generator, 81 | height=height, 82 | width=width, 83 | cross_attention_kwargs={"attn_store": self.attn_store, "do_classifier_free_guidance": guidance_scale > 1.0, "enable_cloth_guidance": self.enable_cloth_guidance}, 84 | **kwargs, 85 | ) 86 | else: 87 | images = self.pipe( 88 | prompt_embeds=positive, 89 | negative_prompt_embeds=negative, 90 | guidance_scale=guidance_scale, 91 | num_inference_steps=num_inference_steps, 92 | generator=self.generator, 93 | latents = gen_latents, 94 | height=height, 95 | width=width, 96 | cross_attention_kwargs={"attn_store": self.attn_store, "do_classifier_free_guidance": guidance_scale > 1.0, "enable_cloth_guidance": self.enable_cloth_guidance}, 97 | **kwargs, 98 | ) 99 | 100 | return images -------------------------------------------------------------------------------- /diffusers_magic_clothing/utils.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | import numpy as np 3 | import PIL 4 | import torch 5 | 6 | 7 | def is_torch2_available(): 8 | return hasattr(F, "scaled_dot_product_attention") 9 | 10 | 11 | def prepare_image(image, height, width): 12 | if image is None: 13 | raise ValueError("`image` input cannot be undefined.") 14 | 15 | if isinstance(image, torch.Tensor): 16 | # Batch single image 17 | if image.ndim == 3: 18 | assert image.shape[0] == 3, "Image outside a batch should be of shape (3, H, W)" 19 | image = image.unsqueeze(0) 20 | 21 | # Check image is in [-1, 1] 22 | if image.min() < -1 or image.max() > 1: 23 | raise ValueError("Image should be in [-1, 1] range") 24 | 25 | # Image as float32 26 | image = image.to(dtype=torch.float32) 27 | else: 28 | # preprocess image 29 | if isinstance(image, (PIL.Image.Image, np.ndarray)): 30 | image = [image] 31 | if isinstance(image, list) and isinstance(image[0], PIL.Image.Image): 32 | # resize all images w.r.t passed height an width 33 | image = [i.resize((width, height), resample=PIL.Image.LANCZOS) for i in image] 34 | image = [np.array(i.convert("RGB"))[None, :] for i in image] 35 | image = np.concatenate(image, axis=0) 36 | elif isinstance(image, list) and isinstance(image[0], np.ndarray): 37 | image = np.concatenate([i[None, :] for i in image], axis=0) 38 | 39 | image = image.transpose(0, 3, 1, 2) 40 | image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0 41 | 42 | return image 43 | 44 | 45 | def prepare_mask(image, height, width): 46 | if image is None: 47 | raise ValueError("`image` input cannot be undefined.") 48 | 49 | if isinstance(image, torch.Tensor): 50 | # Batch single image 51 | if image.ndim == 3: 52 | assert image.shape[0] == 1, "Image outside a batch should be of shape (3, H, W)" 53 | image = image.unsqueeze(0) 54 | image = image.to(dtype=torch.float32) 55 | else: 56 | # preprocess image 57 | if isinstance(image, (PIL.Image.Image, np.ndarray)): 58 | image = [image] 59 | if isinstance(image, list) and isinstance(image[0], PIL.Image.Image): 60 | # resize all images w.r.t passed height an width 61 | image = [i.resize((width, height), resample=PIL.Image.NEAREST) for i in image] 62 | image = [np.array(i.convert("L"))[..., None] for i in image] 63 | image = np.stack(image, axis=0) 64 | elif isinstance(image, list) and isinstance(image[0], np.ndarray): 65 | image = np.stack([i[..., None] for i in image], axis=0) 66 | 67 | image = image.transpose(0, 3, 1, 2) 68 | image = torch.from_numpy(image).to(dtype=torch.float32) / 255. 69 | image[image > 0.5] = 1 70 | image[image <= 0.5] = 0 71 | 72 | return image 73 | -------------------------------------------------------------------------------- /diffusers_warp_nodes.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import folder_paths 4 | from pathlib import Path 5 | 6 | from comfy import model_management 7 | 8 | from .diffusers_magic_clothing.garment_diffusion import ClothAdapter 9 | from .diffusers_magic_clothing.MagicClothingDiffusionPipeline import MagicClothingDiffusionPipeline 10 | from diffusers import ( 11 | AutoencoderKL, 12 | DDIMScheduler, 13 | DDPMScheduler, 14 | DEISMultistepScheduler, 15 | DPMSolverMultistepScheduler, 16 | DPMSolverSinglestepScheduler, 17 | EulerAncestralDiscreteScheduler, 18 | EulerDiscreteScheduler, 19 | HeunDiscreteScheduler, 20 | KDPM2AncestralDiscreteScheduler, 21 | KDPM2DiscreteScheduler, 22 | UniPCMultistepScheduler, 23 | ) 24 | 25 | SCHEDULERS = { 26 | 'DDIM' : DDIMScheduler, 27 | 'DDPM' : DDPMScheduler, 28 | 'DEISMultistep' : DEISMultistepScheduler, 29 | 'DPMSolverMultistep' : DPMSolverMultistepScheduler, 30 | 'DPMSolverSinglestep' : DPMSolverSinglestepScheduler, 31 | 'EulerAncestralDiscrete' : EulerAncestralDiscreteScheduler, 32 | 'EulerDiscrete' : EulerDiscreteScheduler, 33 | 'HeunDiscrete' : HeunDiscreteScheduler, 34 | 'KDPM2AncestralDiscrete' : KDPM2AncestralDiscreteScheduler, 35 | 'KDPM2Discrete' : KDPM2DiscreteScheduler, 36 | 'UniPCMultistep' : UniPCMultistepScheduler 37 | } 38 | 39 | class ChangePixelValueNormalization: 40 | @classmethod 41 | def INPUT_TYPES(s): 42 | return {"required": 43 | {"pixels": ("IMAGE", ), 44 | "mode": (["[0,1]=>[-1,1]", "[-1,1]=>[0,1]"],), 45 | } 46 | } 47 | RETURN_TYPES = ("IMAGE",) 48 | FUNCTION = "normalization" 49 | 50 | CATEGORY = "image" 51 | 52 | def normalization(self, pixels, mode): 53 | if mode == "[0,1]=>[-1,1]": 54 | pixels = (pixels * 255).round().clamp(min=0, max=255) / 127.5 - 1.0 55 | elif mode == "[-1,1]=>[0,1]": 56 | pixels = ((pixels+1) * 127.5).clamp(min=0, max=255) / 255.0 57 | else: 58 | pixels = pixels 59 | return (pixels,) 60 | 61 | 62 | class ChangePipelineDtypeAndDevice: 63 | @classmethod 64 | def INPUT_TYPES(s): 65 | return {"required": 66 | {"pipeline": ("PIPELINE", ), 67 | "dtype": (["default", "float32", "float16", "bfloat16"],), 68 | "device": (["default", "cpu", "cuda", "cuda:0", "cuda:1"],), 69 | } 70 | } 71 | RETURN_TYPES = ("PIPELINE",) 72 | FUNCTION = "change_dtype" 73 | 74 | CATEGORY = "pipeline" 75 | 76 | def change_dtype(self, pipeline, dtype="default", device="default"): 77 | if dtype == "float16": 78 | seleted_type = torch.float16 79 | elif dtype == "bfloat16": 80 | seleted_type = torch.bfloat16 81 | else: 82 | seleted_type = torch.float32 83 | if device == "default": 84 | seleted_device = model_management.get_torch_device() 85 | else: 86 | seleted_device = torch.device(device) 87 | pipeline = pipeline.to(seleted_device, dtype=seleted_type) 88 | pipeline.device = seleted_device 89 | pipeline.dtype = seleted_type 90 | return (pipeline,) 91 | 92 | 93 | class RunMagicClothingDiffusersModel: 94 | @classmethod 95 | def INPUT_TYPES(s): 96 | return {"required": {"cloth_image": ("IMAGE",), 97 | "magicClothingAdapter": ("MAGIC_CLOTHING_ADAPTER",), 98 | "positive": ("STRING", { 99 | "dynamicPrompts": False, 100 | "multiline": True, 101 | "default": "" 102 | }), 103 | "negative": ("STRING", { 104 | "dynamicPrompts": False, 105 | "multiline": True, 106 | "default": "" 107 | }), 108 | "height": ("INT", {"default": 768, "min": 0, "max": 2048}), 109 | "width": ("INT", {"default": 576, "min": 0, "max": 2048}), 110 | "batch_size": ("INT", {"default": 1, "min": 1, "max": 4}), 111 | "steps": ("INT", {"default": 25, "min": 0, "max": 100}), 112 | "cfg": ("FLOAT", {"default": 5, "min": 0.0, "max": 10.0, "step": 0.01}), 113 | "cloth_guidance_scale": ("FLOAT", {"default": 2.5, "min": 0.0, "max": 10.0, "step": 0.01}), 114 | "seed": ("INT", {"default": 1234, "min": 0, "max": 0xffffffffffffffff}), 115 | } 116 | } 117 | 118 | RETURN_TYPES = ("IMAGE",) 119 | FUNCTION = "run_model" 120 | 121 | CATEGORY = "loaders" 122 | 123 | def run_model(self, cloth_image, magicClothingAdapter, positive, negative, height, width, batch_size, steps, cfg, cloth_guidance_scale, seed,): 124 | cloth_image = (cloth_image * 255).round().clamp(min=0, 125 | max=255).to(dtype=torch.float32) / 127.5 - 1.0 126 | cloth_image = cloth_image.permute(0, 3, 1, 2) 127 | if not isinstance(magicClothingAdapter, ClothAdapter): 128 | # 如果发现不是正确的模型,就返回原始图片,不进行处理 129 | gen_image = cloth_image.permute(0, 2, 3, 1) 130 | gen_image = ((gen_image+1) * 127.5).clamp(min=0, 131 | max=255).to(dtype=torch.float32) / 255.0 132 | return (gen_image,) 133 | magicClothingAdapter.enable_cloth_guidance = True 134 | cloth_image = cloth_image.to( 135 | magicClothingAdapter.pipe.device, dtype=magicClothingAdapter.pipe.dtype) 136 | with torch.inference_mode(): 137 | prompt_embeds_null = magicClothingAdapter.pipe.encode_prompt( 138 | [""], device=magicClothingAdapter.pipe.device, num_images_per_prompt=1, do_classifier_free_guidance=False)[0] 139 | prompt_embeds, negative_prompt_embeds = magicClothingAdapter.pipe.encode_prompt( 140 | positive, 141 | magicClothingAdapter.pipe.device, 142 | batch_size, 143 | True, 144 | negative, 145 | prompt_embeds=None, 146 | negative_prompt_embeds=None, 147 | lora_scale=None, 148 | clip_skip=None, 149 | ) 150 | cloth_latent = magicClothingAdapter.pipe.vae.encode( 151 | cloth_image).latent_dist.mode() 152 | gen_image = magicClothingAdapter.generate(cloth_latent, None, prompt_embeds_null, prompt_embeds, negative_prompt_embeds, batch_size, seed, cfg, cloth_guidance_scale, steps, height, width) 153 | gen_image = magicClothingAdapter.pipe.vae.decode( 154 | gen_image, return_dict=False, generator=magicClothingAdapter.generator)[0] 155 | gen_image = gen_image.permute(0, 2, 3, 1) 156 | gen_image = ((gen_image+1) * 127.5).clamp(min=0, 157 | max=255).to(dtype=torch.float32) / 255.0 158 | return (gen_image,) 159 | 160 | 161 | class LoadMagicClothingPipelineWithPath: 162 | @classmethod 163 | def INPUT_TYPES(cls): 164 | paths = [] 165 | my_path = os.path.dirname(__file__) 166 | my_pipeline_path = os.path.join(my_path, "conversion") 167 | for search_path in folder_paths.get_folder_paths("diffusers"): 168 | if os.path.exists(search_path): 169 | client_paths = next(os.walk(search_path))[1] 170 | client_paths = ["diffusers/" + item for item in client_paths] 171 | paths += client_paths 172 | if os.path.exists(my_pipeline_path): 173 | client_paths = next(os.walk(my_pipeline_path))[1] 174 | client_paths = ["conversion/" + item for item in client_paths] 175 | paths += client_paths 176 | return {"required": {"model_path": (paths,), 177 | "dtype": (["default", "float32", "float16", "bfloat16"],), 178 | "device": (["default", "cpu", "cuda", "cuda:0", "cuda:1"],), }} 179 | RETURN_TYPES = ("PIPELINE", "AUTOENCODER", "SCHEDULER",) 180 | FUNCTION = "load_checkpoint" 181 | 182 | CATEGORY = "Diffusers" 183 | 184 | def load_checkpoint(self, model_path,dtype,device): 185 | if dtype == "float16": 186 | seleted_type = torch.float16 187 | elif dtype == "bfloat16": 188 | seleted_type = torch.bfloat16 189 | else: 190 | seleted_type = torch.float32 191 | if device == "default": 192 | seleted_device = model_management.get_torch_device() 193 | else: 194 | seleted_device = torch.device(device) 195 | 196 | if model_path.startswith("conversion/"): 197 | model_path = model_path.replace("conversion/", "") 198 | my_path = os.path.dirname(__file__) 199 | my_pipeline_path = os.path.join(my_path, "conversion") 200 | model_real_path = os.path.join(my_pipeline_path, model_path) 201 | model_real_dir = my_pipeline_path 202 | elif model_path.startswith("diffusers/"): 203 | model_path = model_path.replace("diffusers/", "") 204 | diffusers_path = folder_paths.get_folder_paths("diffusers")[0] 205 | model_real_path = os.path.join(diffusers_path, model_path) 206 | model_real_dir = diffusers_path 207 | else: 208 | raise ValueError("未选择模型") 209 | 210 | pipe = MagicClothingDiffusionPipeline.from_pretrained( 211 | pretrained_model_name_or_path=model_real_path, 212 | torch_dtype=seleted_type, 213 | cache_dir=model_real_dir, 214 | ) 215 | pipe.to(seleted_device, dtype=seleted_type) 216 | return ((pipe, model_real_path), pipe.vae, pipe.scheduler) 217 | 218 | class LoadMagicClothingPipelinWithConversion: 219 | # code base from https://github.com/Limitex/ComfyUI-Diffusers.git 220 | 221 | @classmethod 222 | def INPUT_TYPES(s): 223 | return {"required": {"ckpt_name": (folder_paths.get_filename_list("checkpoints"), ), 224 | "dtype": (["default", "float32", "float16", "bfloat16"],), 225 | "device": (["default", "cpu", "cuda", "cuda:0", "cuda:1"],), }} 226 | 227 | RETURN_TYPES = ("PIPELINE", "AUTOENCODER", "SCHEDULER",) 228 | 229 | FUNCTION = "create_pipeline" 230 | 231 | CATEGORY = "Diffusers" 232 | 233 | def create_pipeline(self, ckpt_name,dtype,device): 234 | if dtype == "float16": 235 | seleted_type = torch.float16 236 | elif dtype == "bfloat16": 237 | seleted_type = torch.bfloat16 238 | else: 239 | seleted_type = torch.float32 240 | if device == "default": 241 | seleted_device = model_management.get_torch_device() 242 | else: 243 | seleted_device = torch.device(device) 244 | my_path = os.path.dirname(__file__) 245 | my_pipeline_path = os.path.join(my_path, "conversion") 246 | if not os.path.exists(my_pipeline_path): 247 | os.makedirs(my_pipeline_path) 248 | real_ckpt_name = Path(ckpt_name).stem 249 | real_ckpt_name = real_ckpt_name +"_"+str(seleted_type) 250 | real_ckpt_name = real_ckpt_name.replace(" ", "_").replace(".", "_").replace("/", "_") 251 | ckpt_conversion_path = os.path.join(my_pipeline_path, real_ckpt_name) 252 | if not os.path.exists(ckpt_conversion_path): 253 | # 不存在,则进行转换 254 | MagicClothingDiffusionPipeline.from_single_file( 255 | pretrained_model_link_or_path=folder_paths.get_full_path("checkpoints", ckpt_name), 256 | torch_dtype=seleted_type, 257 | cache_dir=my_pipeline_path, 258 | ).save_pretrained(ckpt_conversion_path, safe_serialization=True) 259 | 260 | pipe = MagicClothingDiffusionPipeline.from_pretrained( 261 | pretrained_model_name_or_path=ckpt_conversion_path, 262 | torch_dtype=seleted_type, 263 | cache_dir=my_pipeline_path, 264 | ) 265 | pipe.to(seleted_device, dtype=seleted_type) 266 | return ((pipe, ckpt_conversion_path), pipe.vae, pipe.scheduler) 267 | 268 | 269 | 270 | class DiffusersSchedulerLoader: 271 | # code copy from https://github.com/Limitex/ComfyUI-Diffusers.git 272 | 273 | @classmethod 274 | def INPUT_TYPES(s): 275 | return { 276 | "required": { 277 | "pipeline": ("PIPELINE", ), 278 | "scheduler_name": (list(SCHEDULERS.keys()), ), 279 | } 280 | } 281 | 282 | RETURN_TYPES = ("SCHEDULER",) 283 | 284 | FUNCTION = "load_scheduler" 285 | 286 | CATEGORY = "Diffusers" 287 | 288 | def load_scheduler(self, pipeline, scheduler_name): 289 | my_path = os.path.dirname(__file__) 290 | my_pipeline_path = os.path.join(my_path, "conversion") 291 | if not os.path.exists(my_pipeline_path): 292 | os.makedirs(my_pipeline_path) 293 | scheduler = SCHEDULERS[scheduler_name].from_pretrained( 294 | pretrained_model_name_or_path=pipeline[1], 295 | torch_dtype=pipeline[0].dtype, 296 | cache_dir=my_pipeline_path, 297 | subfolder='scheduler' 298 | ) 299 | return (scheduler,) 300 | 301 | class DiffusersModelMakeup: 302 | # code copy from https://github.com/Limitex/ComfyUI-Diffusers.git 303 | @classmethod 304 | def INPUT_TYPES(s): 305 | return { 306 | "required": { 307 | "pipeline": ("PIPELINE", ), 308 | "scheduler": ("SCHEDULER", ), 309 | "autoencoder": ("AUTOENCODER", ), 310 | }, 311 | } 312 | 313 | RETURN_TYPES = ("MAKED_PIPELINE",) 314 | 315 | FUNCTION = "makeup_pipeline" 316 | 317 | CATEGORY = "Diffusers" 318 | 319 | def makeup_pipeline(self, pipeline, scheduler, autoencoder): 320 | pipeline = pipeline[0] 321 | autoencoder.to(pipeline.device, dtype=pipeline.dtype) 322 | pipeline.vae = autoencoder 323 | pipeline.scheduler = scheduler 324 | pipeline.safety_checker = None if pipeline.safety_checker is None else lambda images, **kwargs: (images, [False]) 325 | pipeline.enable_attention_slicing() 326 | return (pipeline,) 327 | 328 | class LoadMagicClothingAdapter: 329 | @classmethod 330 | def INPUT_TYPES(s): 331 | return {"required": 332 | {"magicClothingUnet": (folder_paths.get_filename_list("unet"), ), 333 | "pipeline": ("MAKED_PIPELINE", ), 334 | }, 335 | } 336 | 337 | RETURN_TYPES = ("MAGIC_CLOTHING_ADAPTER",) 338 | RETURN_NAMES = ("MagicClothingAdapter",) 339 | FUNCTION = "load_model" 340 | 341 | CATEGORY = "loaders" 342 | 343 | def load_model(self, magicClothingUnet, pipeline): 344 | unet_path = folder_paths.get_full_path("unet", magicClothingUnet) 345 | full_model = ClothAdapter(pipeline, unet_path) 346 | return (full_model,) 347 | 348 | 349 | NODE_CLASS_MAPPINGS = { 350 | "Diffusers Model Makeup &MC": DiffusersModelMakeup, 351 | "Diffusers Scheduler Loader &MC": DiffusersSchedulerLoader, 352 | "Change Pixel Value Normalization": ChangePixelValueNormalization, 353 | "Change Pipeline Dtype And Device": ChangePipelineDtypeAndDevice, 354 | "Load Magic Clothing Pipeline With Path": LoadMagicClothingPipelineWithPath, 355 | "Load Magic Clothing Pipeline": LoadMagicClothingPipelinWithConversion, 356 | "Load Magic Clothing Adapter": LoadMagicClothingAdapter, 357 | "RUN Magic Clothing Diffusers Model": RunMagicClothingDiffusersModel, 358 | } 359 | 360 | NODE_DISPLAY_NAME_MAPPINGS = { 361 | "Diffusers Model Makeup &MC": "Diffusers Model Makeup &MC", 362 | "Diffusers Scheduler Loader &MC": "Diffusers Scheduler Loader &MC", 363 | "Change Pipeline Dtype And Device": "Change Pipeline Dtype And Device", 364 | "Change Pixel Value Normalization": "Change Pixel Value Normalization", 365 | "Load Magic Clothing Pipeline With Path":"Load Magic Clothing Pipeline With Path&Diffusers", 366 | "Load Magic Clothing Pipeline":"Load Magic Clothing Pipeline&Diffusers", 367 | "Load Magic Clothing Adapter": "Load Magic Clothing Adapter &Diffusers", 368 | "RUN Magic Clothing Adapter": "RUN Magic Clothing Adapter &Diffusers", 369 | } 370 | -------------------------------------------------------------------------------- /example.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 104, 3 | "last_link_id": 233, 4 | "nodes": [ 5 | { 6 | "id": 19, 7 | "type": "LoadImage", 8 | "pos": [ 9 | 220, 10 | 170 11 | ], 12 | "size": { 13 | "0": 315, 14 | "1": 314 15 | }, 16 | "flags": {}, 17 | "order": 0, 18 | "mode": 0, 19 | "outputs": [ 20 | { 21 | "name": "IMAGE", 22 | "type": "IMAGE", 23 | "links": [ 24 | 228, 25 | 230 26 | ], 27 | "shape": 3, 28 | "slot_index": 0 29 | }, 30 | { 31 | "name": "MASK", 32 | "type": "MASK", 33 | "links": null, 34 | "shape": 3 35 | } 36 | ], 37 | "properties": { 38 | "Node name for S&R": "LoadImage" 39 | }, 40 | "widgets_values": [ 41 | "ComfyUI_00002_.png", 42 | "image" 43 | ] 44 | }, 45 | { 46 | "id": 101, 47 | "type": "BiRefNet", 48 | "pos": [ 49 | 550, 50 | 300 51 | ], 52 | "size": { 53 | "0": 315, 54 | "1": 58 55 | }, 56 | "flags": {}, 57 | "order": 3, 58 | "mode": 0, 59 | "inputs": [ 60 | { 61 | "name": "image", 62 | "type": "IMAGE", 63 | "link": 228 64 | } 65 | ], 66 | "outputs": [ 67 | { 68 | "name": "mask", 69 | "type": "MASK", 70 | "links": [ 71 | 229 72 | ], 73 | "shape": 3, 74 | "slot_index": 0 75 | } 76 | ], 77 | "properties": { 78 | "Node name for S&R": "BiRefNet" 79 | }, 80 | "widgets_values": [ 81 | "cuda:0" 82 | ] 83 | }, 84 | { 85 | "id": 102, 86 | "type": "Image Adaptive Crop With Mask", 87 | "pos": [ 88 | 880, 89 | 170 90 | ], 91 | "size": { 92 | "0": 315, 93 | "1": 126 94 | }, 95 | "flags": {}, 96 | "order": 7, 97 | "mode": 0, 98 | "inputs": [ 99 | { 100 | "name": "image", 101 | "type": "IMAGE", 102 | "link": 230 103 | }, 104 | { 105 | "name": "mask", 106 | "type": "MASK", 107 | "link": 229 108 | } 109 | ], 110 | "outputs": [ 111 | { 112 | "name": "image", 113 | "type": "IMAGE", 114 | "links": [ 115 | 231, 116 | 232 117 | ], 118 | "shape": 3, 119 | "slot_index": 0 120 | } 121 | ], 122 | "properties": { 123 | "Node name for S&R": "Image Adaptive Crop With Mask" 124 | }, 125 | "widgets_values": [ 126 | 576, 127 | 768, 128 | 50 129 | ] 130 | }, 131 | { 132 | "id": 42, 133 | "type": "VAEEncode", 134 | "pos": [ 135 | 1240, 136 | 170 137 | ], 138 | "size": { 139 | "0": 210, 140 | "1": 46 141 | }, 142 | "flags": {}, 143 | "order": 8, 144 | "mode": 0, 145 | "inputs": [ 146 | { 147 | "name": "pixels", 148 | "type": "IMAGE", 149 | "link": 231 150 | }, 151 | { 152 | "name": "vae", 153 | "type": "VAE", 154 | "link": 217 155 | } 156 | ], 157 | "outputs": [ 158 | { 159 | "name": "LATENT", 160 | "type": "LATENT", 161 | "links": [ 162 | 222 163 | ], 164 | "shape": 3, 165 | "slot_index": 0 166 | } 167 | ], 168 | "properties": { 169 | "Node name for S&R": "VAEEncode" 170 | } 171 | }, 172 | { 173 | "id": 67, 174 | "type": "CLIPTextEncode", 175 | "pos": [ 176 | 890, 177 | 780 178 | ], 179 | "size": { 180 | "0": 210, 181 | "1": 76 182 | }, 183 | "flags": {}, 184 | "order": 6, 185 | "mode": 0, 186 | "inputs": [ 187 | { 188 | "name": "clip", 189 | "type": "CLIP", 190 | "link": 166 191 | } 192 | ], 193 | "outputs": [ 194 | { 195 | "name": "CONDITIONING", 196 | "type": "CONDITIONING", 197 | "links": [ 198 | 141 199 | ], 200 | "shape": 3, 201 | "slot_index": 0 202 | } 203 | ], 204 | "properties": { 205 | "Node name for S&R": "CLIPTextEncode" 206 | }, 207 | "widgets_values": [ 208 | "bare, monochrome, lowres, bad anatomy, worst quality, low quality" 209 | ] 210 | }, 211 | { 212 | "id": 89, 213 | "type": "EmptyLatentImage", 214 | "pos": [ 215 | 1540, 216 | 580 217 | ], 218 | "size": { 219 | "0": 315, 220 | "1": 106 221 | }, 222 | "flags": {}, 223 | "order": 1, 224 | "mode": 0, 225 | "outputs": [ 226 | { 227 | "name": "LATENT", 228 | "type": "LATENT", 229 | "links": [ 230 | 192 231 | ], 232 | "shape": 3, 233 | "slot_index": 0 234 | } 235 | ], 236 | "properties": { 237 | "Node name for S&R": "EmptyLatentImage" 238 | }, 239 | "widgets_values": [ 240 | 576, 241 | 768, 242 | 1 243 | ] 244 | }, 245 | { 246 | "id": 35, 247 | "type": "VAEDecode", 248 | "pos": [ 249 | 1890, 250 | 730 251 | ], 252 | "size": { 253 | "0": 210, 254 | "1": 46 255 | }, 256 | "flags": {}, 257 | "order": 12, 258 | "mode": 0, 259 | "inputs": [ 260 | { 261 | "name": "samples", 262 | "type": "LATENT", 263 | "link": 170 264 | }, 265 | { 266 | "name": "vae", 267 | "type": "VAE", 268 | "link": 225 269 | } 270 | ], 271 | "outputs": [ 272 | { 273 | "name": "IMAGE", 274 | "type": "IMAGE", 275 | "links": [ 276 | 227 277 | ], 278 | "shape": 3, 279 | "slot_index": 0 280 | } 281 | ], 282 | "properties": { 283 | "Node name for S&R": "VAEDecode" 284 | } 285 | }, 286 | { 287 | "id": 100, 288 | "type": "PreviewImage", 289 | "pos": [ 290 | 1760, 291 | 880 292 | ], 293 | "size": { 294 | "0": 210, 295 | "1": 246 296 | }, 297 | "flags": {}, 298 | "order": 13, 299 | "mode": 0, 300 | "inputs": [ 301 | { 302 | "name": "images", 303 | "type": "IMAGE", 304 | "link": 227 305 | } 306 | ], 307 | "properties": { 308 | "Node name for S&R": "PreviewImage" 309 | } 310 | }, 311 | { 312 | "id": 103, 313 | "type": "PreviewImage", 314 | "pos": [ 315 | 1510, 316 | 880 317 | ], 318 | "size": { 319 | "0": 210, 320 | "1": 246 321 | }, 322 | "flags": {}, 323 | "order": 9, 324 | "mode": 0, 325 | "inputs": [ 326 | { 327 | "name": "images", 328 | "type": "IMAGE", 329 | "link": 232 330 | } 331 | ], 332 | "properties": { 333 | "Node name for S&R": "PreviewImage" 334 | } 335 | }, 336 | { 337 | "id": 76, 338 | "type": "Load Magic Clothing Model", 339 | "pos": [ 340 | 218, 341 | 651 342 | ], 343 | "size": { 344 | "0": 380.4000244140625, 345 | "1": 78 346 | }, 347 | "flags": {}, 348 | "order": 4, 349 | "mode": 0, 350 | "inputs": [ 351 | { 352 | "name": "sourceModel", 353 | "type": "MODEL", 354 | "link": 167 355 | } 356 | ], 357 | "outputs": [ 358 | { 359 | "name": "sourceModel", 360 | "type": "MODEL", 361 | "links": [ 362 | 223 363 | ], 364 | "shape": 3, 365 | "slot_index": 0 366 | }, 367 | { 368 | "name": "magicClothingModel", 369 | "type": "MODEL", 370 | "links": [ 371 | 224 372 | ], 373 | "shape": 3, 374 | "slot_index": 1 375 | } 376 | ], 377 | "properties": { 378 | "Node name for S&R": "Load Magic Clothing Model" 379 | }, 380 | "widgets_values": [ 381 | "oms_diffusion_768_200000.safetensors" 382 | ] 383 | }, 384 | { 385 | "id": 80, 386 | "type": "CheckpointLoaderSimple", 387 | "pos": [ 388 | 217, 389 | 929 390 | ], 391 | "size": { 392 | "0": 315, 393 | "1": 98 394 | }, 395 | "flags": {}, 396 | "order": 2, 397 | "mode": 0, 398 | "outputs": [ 399 | { 400 | "name": "MODEL", 401 | "type": "MODEL", 402 | "links": [ 403 | 167 404 | ], 405 | "shape": 3, 406 | "slot_index": 0 407 | }, 408 | { 409 | "name": "CLIP", 410 | "type": "CLIP", 411 | "links": [ 412 | 165, 413 | 166, 414 | 226 415 | ], 416 | "shape": 3, 417 | "slot_index": 1 418 | }, 419 | { 420 | "name": "VAE", 421 | "type": "VAE", 422 | "links": [ 423 | 217, 424 | 225 425 | ], 426 | "shape": 3, 427 | "slot_index": 2 428 | } 429 | ], 430 | "properties": { 431 | "Node name for S&R": "CheckpointLoaderSimple" 432 | }, 433 | "widgets_values": [ 434 | "Realistic_Vision_V4.0_fp16-no-ema.safetensors" 435 | ] 436 | }, 437 | { 438 | "id": 66, 439 | "type": "CLIPTextEncode", 440 | "pos": [ 441 | 890, 442 | 650 443 | ], 444 | "size": { 445 | "0": 210, 446 | "1": 76 447 | }, 448 | "flags": {}, 449 | "order": 5, 450 | "mode": 0, 451 | "inputs": [ 452 | { 453 | "name": "clip", 454 | "type": "CLIP", 455 | "link": 165 456 | } 457 | ], 458 | "outputs": [ 459 | { 460 | "name": "CONDITIONING", 461 | "type": "CONDITIONING", 462 | "links": [ 463 | 140 464 | ], 465 | "shape": 3, 466 | "slot_index": 0 467 | } 468 | ], 469 | "properties": { 470 | "Node name for S&R": "CLIPTextEncode" 471 | }, 472 | "widgets_values": [ 473 | "a photography of a model,best quality, high quality" 474 | ] 475 | }, 476 | { 477 | "id": 98, 478 | "type": "Add Magic Clothing Attention", 479 | "pos": [ 480 | 1540, 481 | 181 482 | ], 483 | "size": { 484 | "0": 315, 485 | "1": 166 486 | }, 487 | "flags": {}, 488 | "order": 10, 489 | "mode": 0, 490 | "inputs": [ 491 | { 492 | "name": "sourceModel", 493 | "type": "MODEL", 494 | "link": 223 495 | }, 496 | { 497 | "name": "magicClothingModel", 498 | "type": "MODEL", 499 | "link": 224 500 | }, 501 | { 502 | "name": "clip", 503 | "type": "CLIP", 504 | "link": 226 505 | }, 506 | { 507 | "name": "feature_image", 508 | "type": "LATENT", 509 | "link": 222 510 | } 511 | ], 512 | "outputs": [ 513 | { 514 | "name": "MODEL", 515 | "type": "MODEL", 516 | "links": [ 517 | 221 518 | ], 519 | "shape": 3, 520 | "slot_index": 0 521 | } 522 | ], 523 | "properties": { 524 | "Node name for S&R": "Add Magic Clothing Attention" 525 | }, 526 | "widgets_values": [ 527 | true, 528 | 2.5, 529 | 0.71 530 | ] 531 | }, 532 | { 533 | "id": 77, 534 | "type": "KSampler", 535 | "pos": [ 536 | 1926, 537 | 183 538 | ], 539 | "size": { 540 | "0": 315, 541 | "1": 262 542 | }, 543 | "flags": {}, 544 | "order": 11, 545 | "mode": 0, 546 | "inputs": [ 547 | { 548 | "name": "model", 549 | "type": "MODEL", 550 | "link": 221 551 | }, 552 | { 553 | "name": "positive", 554 | "type": "CONDITIONING", 555 | "link": 140 556 | }, 557 | { 558 | "name": "negative", 559 | "type": "CONDITIONING", 560 | "link": 141 561 | }, 562 | { 563 | "name": "latent_image", 564 | "type": "LATENT", 565 | "link": 192 566 | } 567 | ], 568 | "outputs": [ 569 | { 570 | "name": "LATENT", 571 | "type": "LATENT", 572 | "links": [ 573 | 170 574 | ], 575 | "shape": 3, 576 | "slot_index": 0 577 | } 578 | ], 579 | "properties": { 580 | "Node name for S&R": "KSampler" 581 | }, 582 | "widgets_values": [ 583 | 834064559708728, 584 | "fixed", 585 | 20, 586 | 5, 587 | "uni_pc", 588 | "normal", 589 | 1 590 | ] 591 | } 592 | ], 593 | "links": [ 594 | [ 595 | 140, 596 | 66, 597 | 0, 598 | 77, 599 | 1, 600 | "CONDITIONING" 601 | ], 602 | [ 603 | 141, 604 | 67, 605 | 0, 606 | 77, 607 | 2, 608 | "CONDITIONING" 609 | ], 610 | [ 611 | 165, 612 | 80, 613 | 1, 614 | 66, 615 | 0, 616 | "CLIP" 617 | ], 618 | [ 619 | 166, 620 | 80, 621 | 1, 622 | 67, 623 | 0, 624 | "CLIP" 625 | ], 626 | [ 627 | 167, 628 | 80, 629 | 0, 630 | 76, 631 | 0, 632 | "MODEL" 633 | ], 634 | [ 635 | 170, 636 | 77, 637 | 0, 638 | 35, 639 | 0, 640 | "LATENT" 641 | ], 642 | [ 643 | 192, 644 | 89, 645 | 0, 646 | 77, 647 | 3, 648 | "LATENT" 649 | ], 650 | [ 651 | 217, 652 | 80, 653 | 2, 654 | 42, 655 | 1, 656 | "VAE" 657 | ], 658 | [ 659 | 221, 660 | 98, 661 | 0, 662 | 77, 663 | 0, 664 | "MODEL" 665 | ], 666 | [ 667 | 222, 668 | 42, 669 | 0, 670 | 98, 671 | 3, 672 | "LATENT" 673 | ], 674 | [ 675 | 223, 676 | 76, 677 | 0, 678 | 98, 679 | 0, 680 | "MODEL" 681 | ], 682 | [ 683 | 224, 684 | 76, 685 | 1, 686 | 98, 687 | 1, 688 | "MODEL" 689 | ], 690 | [ 691 | 225, 692 | 80, 693 | 2, 694 | 35, 695 | 1, 696 | "VAE" 697 | ], 698 | [ 699 | 226, 700 | 80, 701 | 1, 702 | 98, 703 | 2, 704 | "CLIP" 705 | ], 706 | [ 707 | 227, 708 | 35, 709 | 0, 710 | 100, 711 | 0, 712 | "IMAGE" 713 | ], 714 | [ 715 | 228, 716 | 19, 717 | 0, 718 | 101, 719 | 0, 720 | "IMAGE" 721 | ], 722 | [ 723 | 229, 724 | 101, 725 | 0, 726 | 102, 727 | 1, 728 | "MASK" 729 | ], 730 | [ 731 | 230, 732 | 19, 733 | 0, 734 | 102, 735 | 0, 736 | "IMAGE" 737 | ], 738 | [ 739 | 231, 740 | 102, 741 | 0, 742 | 42, 743 | 0, 744 | "IMAGE" 745 | ], 746 | [ 747 | 232, 748 | 102, 749 | 0, 750 | 103, 751 | 0, 752 | "IMAGE" 753 | ] 754 | ], 755 | "groups": [], 756 | "config": {}, 757 | "extra": {}, 758 | "version": 0.4 759 | } -------------------------------------------------------------------------------- /ipadapter.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 110, 3 | "last_link_id": 250, 4 | "nodes": [ 5 | { 6 | "id": 101, 7 | "type": "BiRefNet", 8 | "pos": [ 9 | 550, 10 | 300 11 | ], 12 | "size": { 13 | "0": 315, 14 | "1": 58 15 | }, 16 | "flags": {}, 17 | "order": 6, 18 | "mode": 0, 19 | "inputs": [ 20 | { 21 | "name": "image", 22 | "type": "IMAGE", 23 | "link": 228 24 | } 25 | ], 26 | "outputs": [ 27 | { 28 | "name": "mask", 29 | "type": "MASK", 30 | "links": [ 31 | 229 32 | ], 33 | "shape": 3, 34 | "slot_index": 0 35 | } 36 | ], 37 | "properties": { 38 | "Node name for S&R": "BiRefNet" 39 | }, 40 | "widgets_values": [ 41 | "cuda:0" 42 | ] 43 | }, 44 | { 45 | "id": 67, 46 | "type": "CLIPTextEncode", 47 | "pos": [ 48 | 890, 49 | 780 50 | ], 51 | "size": { 52 | "0": 210, 53 | "1": 76 54 | }, 55 | "flags": {}, 56 | "order": 5, 57 | "mode": 0, 58 | "inputs": [ 59 | { 60 | "name": "clip", 61 | "type": "CLIP", 62 | "link": 166 63 | } 64 | ], 65 | "outputs": [ 66 | { 67 | "name": "CONDITIONING", 68 | "type": "CONDITIONING", 69 | "links": [ 70 | 141 71 | ], 72 | "shape": 3, 73 | "slot_index": 0 74 | } 75 | ], 76 | "properties": { 77 | "Node name for S&R": "CLIPTextEncode" 78 | }, 79 | "widgets_values": [ 80 | "bare, monochrome, lowres, bad anatomy, worst quality, low quality" 81 | ] 82 | }, 83 | { 84 | "id": 35, 85 | "type": "VAEDecode", 86 | "pos": [ 87 | 1890, 88 | 730 89 | ], 90 | "size": { 91 | "0": 210, 92 | "1": 46 93 | }, 94 | "flags": {}, 95 | "order": 14, 96 | "mode": 0, 97 | "inputs": [ 98 | { 99 | "name": "samples", 100 | "type": "LATENT", 101 | "link": 170 102 | }, 103 | { 104 | "name": "vae", 105 | "type": "VAE", 106 | "link": 225 107 | } 108 | ], 109 | "outputs": [ 110 | { 111 | "name": "IMAGE", 112 | "type": "IMAGE", 113 | "links": [ 114 | 227 115 | ], 116 | "shape": 3, 117 | "slot_index": 0 118 | } 119 | ], 120 | "properties": { 121 | "Node name for S&R": "VAEDecode" 122 | } 123 | }, 124 | { 125 | "id": 100, 126 | "type": "PreviewImage", 127 | "pos": [ 128 | 1760, 129 | 880 130 | ], 131 | "size": { 132 | "0": 210, 133 | "1": 246 134 | }, 135 | "flags": {}, 136 | "order": 15, 137 | "mode": 0, 138 | "inputs": [ 139 | { 140 | "name": "images", 141 | "type": "IMAGE", 142 | "link": 227 143 | } 144 | ], 145 | "properties": { 146 | "Node name for S&R": "PreviewImage" 147 | } 148 | }, 149 | { 150 | "id": 103, 151 | "type": "PreviewImage", 152 | "pos": [ 153 | 1510, 154 | 880 155 | ], 156 | "size": { 157 | "0": 210, 158 | "1": 246 159 | }, 160 | "flags": {}, 161 | "order": 9, 162 | "mode": 0, 163 | "inputs": [ 164 | { 165 | "name": "images", 166 | "type": "IMAGE", 167 | "link": 232 168 | } 169 | ], 170 | "properties": { 171 | "Node name for S&R": "PreviewImage" 172 | } 173 | }, 174 | { 175 | "id": 66, 176 | "type": "CLIPTextEncode", 177 | "pos": [ 178 | 890, 179 | 650 180 | ], 181 | "size": { 182 | "0": 210, 183 | "1": 76 184 | }, 185 | "flags": {}, 186 | "order": 4, 187 | "mode": 0, 188 | "inputs": [ 189 | { 190 | "name": "clip", 191 | "type": "CLIP", 192 | "link": 165 193 | } 194 | ], 195 | "outputs": [ 196 | { 197 | "name": "CONDITIONING", 198 | "type": "CONDITIONING", 199 | "links": [ 200 | 140 201 | ], 202 | "shape": 3, 203 | "slot_index": 0 204 | } 205 | ], 206 | "properties": { 207 | "Node name for S&R": "CLIPTextEncode" 208 | }, 209 | "widgets_values": [ 210 | "a photography of a model,best quality, high quality" 211 | ] 212 | }, 213 | { 214 | "id": 77, 215 | "type": "KSampler", 216 | "pos": [ 217 | 1926, 218 | 183 219 | ], 220 | "size": { 221 | "0": 315, 222 | "1": 262 223 | }, 224 | "flags": {}, 225 | "order": 13, 226 | "mode": 0, 227 | "inputs": [ 228 | { 229 | "name": "model", 230 | "type": "MODEL", 231 | "link": 250 232 | }, 233 | { 234 | "name": "positive", 235 | "type": "CONDITIONING", 236 | "link": 140 237 | }, 238 | { 239 | "name": "negative", 240 | "type": "CONDITIONING", 241 | "link": 141 242 | }, 243 | { 244 | "name": "latent_image", 245 | "type": "LATENT", 246 | "link": 192 247 | } 248 | ], 249 | "outputs": [ 250 | { 251 | "name": "LATENT", 252 | "type": "LATENT", 253 | "links": [ 254 | 170 255 | ], 256 | "shape": 3, 257 | "slot_index": 0 258 | } 259 | ], 260 | "properties": { 261 | "Node name for S&R": "KSampler" 262 | }, 263 | "widgets_values": [ 264 | 834064559708728, 265 | "fixed", 266 | 20, 267 | 5, 268 | "uni_pc", 269 | "normal", 270 | 1 271 | ] 272 | }, 273 | { 274 | "id": 107, 275 | "type": "IPAdapterUnifiedLoader", 276 | "pos": [ 277 | 360, 278 | 1075 279 | ], 280 | "size": { 281 | "0": 315, 282 | "1": 78 283 | }, 284 | "flags": {}, 285 | "order": 3, 286 | "mode": 0, 287 | "inputs": [ 288 | { 289 | "name": "model", 290 | "type": "MODEL", 291 | "link": 248 292 | }, 293 | { 294 | "name": "ipadapter", 295 | "type": "IPADAPTER", 296 | "link": null 297 | } 298 | ], 299 | "outputs": [ 300 | { 301 | "name": "model", 302 | "type": "MODEL", 303 | "links": [ 304 | 245 305 | ], 306 | "shape": 3, 307 | "slot_index": 0 308 | }, 309 | { 310 | "name": "ipadapter", 311 | "type": "IPADAPTER", 312 | "links": [ 313 | 243 314 | ], 315 | "shape": 3, 316 | "slot_index": 1 317 | } 318 | ], 319 | "properties": { 320 | "Node name for S&R": "IPAdapterUnifiedLoader" 321 | }, 322 | "widgets_values": [ 323 | "STANDARD (medium strength)" 324 | ] 325 | }, 326 | { 327 | "id": 42, 328 | "type": "VAEEncode", 329 | "pos": [ 330 | 1240, 331 | 170 332 | ], 333 | "size": { 334 | "0": 210, 335 | "1": 46 336 | }, 337 | "flags": {}, 338 | "order": 8, 339 | "mode": 0, 340 | "inputs": [ 341 | { 342 | "name": "pixels", 343 | "type": "IMAGE", 344 | "link": 231 345 | }, 346 | { 347 | "name": "vae", 348 | "type": "VAE", 349 | "link": 217 350 | } 351 | ], 352 | "outputs": [ 353 | { 354 | "name": "LATENT", 355 | "type": "LATENT", 356 | "links": [ 357 | 222 358 | ], 359 | "shape": 3, 360 | "slot_index": 0 361 | } 362 | ], 363 | "properties": { 364 | "Node name for S&R": "VAEEncode" 365 | } 366 | }, 367 | { 368 | "id": 80, 369 | "type": "CheckpointLoaderSimple", 370 | "pos": [ 371 | 217, 372 | 929 373 | ], 374 | "size": { 375 | "0": 315, 376 | "1": 98 377 | }, 378 | "flags": {}, 379 | "order": 1, 380 | "mode": 0, 381 | "outputs": [ 382 | { 383 | "name": "MODEL", 384 | "type": "MODEL", 385 | "links": [ 386 | 248 387 | ], 388 | "shape": 3, 389 | "slot_index": 0 390 | }, 391 | { 392 | "name": "CLIP", 393 | "type": "CLIP", 394 | "links": [ 395 | 165, 396 | 166, 397 | 226 398 | ], 399 | "shape": 3, 400 | "slot_index": 1 401 | }, 402 | { 403 | "name": "VAE", 404 | "type": "VAE", 405 | "links": [ 406 | 217, 407 | 225 408 | ], 409 | "shape": 3, 410 | "slot_index": 2 411 | } 412 | ], 413 | "properties": { 414 | "Node name for S&R": "CheckpointLoaderSimple" 415 | }, 416 | "widgets_values": [ 417 | "Realistic_Vision_V4.0_fp16-no-ema.safetensors" 418 | ] 419 | }, 420 | { 421 | "id": 110, 422 | "type": "IPAdapter", 423 | "pos": [ 424 | 785, 425 | 1068 426 | ], 427 | "size": { 428 | "0": 315, 429 | "1": 190 430 | }, 431 | "flags": {}, 432 | "order": 10, 433 | "mode": 0, 434 | "inputs": [ 435 | { 436 | "name": "model", 437 | "type": "MODEL", 438 | "link": 245 439 | }, 440 | { 441 | "name": "ipadapter", 442 | "type": "IPADAPTER", 443 | "link": 243 444 | }, 445 | { 446 | "name": "image", 447 | "type": "IMAGE", 448 | "link": 247 449 | }, 450 | { 451 | "name": "attn_mask", 452 | "type": "MASK", 453 | "link": null 454 | } 455 | ], 456 | "outputs": [ 457 | { 458 | "name": "MODEL", 459 | "type": "MODEL", 460 | "links": [ 461 | 249 462 | ], 463 | "shape": 3, 464 | "slot_index": 0 465 | } 466 | ], 467 | "properties": { 468 | "Node name for S&R": "IPAdapter" 469 | }, 470 | "widgets_values": [ 471 | 1, 472 | 0, 473 | 1, 474 | "standard" 475 | ] 476 | }, 477 | { 478 | "id": 76, 479 | "type": "Load Magic Clothing Model", 480 | "pos": [ 481 | 218, 482 | 651 483 | ], 484 | "size": { 485 | "0": 380.4000244140625, 486 | "1": 78 487 | }, 488 | "flags": {}, 489 | "order": 11, 490 | "mode": 0, 491 | "inputs": [ 492 | { 493 | "name": "sourceModel", 494 | "type": "MODEL", 495 | "link": 249 496 | } 497 | ], 498 | "outputs": [ 499 | { 500 | "name": "sourceModel", 501 | "type": "MODEL", 502 | "links": [ 503 | 223 504 | ], 505 | "shape": 3, 506 | "slot_index": 0 507 | }, 508 | { 509 | "name": "magicClothingModel", 510 | "type": "MODEL", 511 | "links": [ 512 | 224 513 | ], 514 | "shape": 3, 515 | "slot_index": 1 516 | } 517 | ], 518 | "properties": { 519 | "Node name for S&R": "Load Magic Clothing Model" 520 | }, 521 | "widgets_values": [ 522 | "oms_diffusion_768_200000.safetensors" 523 | ] 524 | }, 525 | { 526 | "id": 19, 527 | "type": "LoadImage", 528 | "pos": [ 529 | 220, 530 | 170 531 | ], 532 | "size": { 533 | "0": 315, 534 | "1": 314 535 | }, 536 | "flags": {}, 537 | "order": 2, 538 | "mode": 0, 539 | "outputs": [ 540 | { 541 | "name": "IMAGE", 542 | "type": "IMAGE", 543 | "links": [ 544 | 228, 545 | 230 546 | ], 547 | "shape": 3, 548 | "slot_index": 0 549 | }, 550 | { 551 | "name": "MASK", 552 | "type": "MASK", 553 | "links": null, 554 | "shape": 3 555 | } 556 | ], 557 | "properties": { 558 | "Node name for S&R": "LoadImage" 559 | }, 560 | "widgets_values": [ 561 | "ComfyUI_00002_.png", 562 | "image" 563 | ] 564 | }, 565 | { 566 | "id": 98, 567 | "type": "Add Magic Clothing Attention", 568 | "pos": [ 569 | 1492, 570 | 164 571 | ], 572 | "size": { 573 | "0": 315, 574 | "1": 166 575 | }, 576 | "flags": {}, 577 | "order": 12, 578 | "mode": 0, 579 | "inputs": [ 580 | { 581 | "name": "sourceModel", 582 | "type": "MODEL", 583 | "link": 223 584 | }, 585 | { 586 | "name": "magicClothingModel", 587 | "type": "MODEL", 588 | "link": 224 589 | }, 590 | { 591 | "name": "clip", 592 | "type": "CLIP", 593 | "link": 226 594 | }, 595 | { 596 | "name": "feature_image", 597 | "type": "LATENT", 598 | "link": 222 599 | } 600 | ], 601 | "outputs": [ 602 | { 603 | "name": "MODEL", 604 | "type": "MODEL", 605 | "links": [ 606 | 250 607 | ], 608 | "shape": 3, 609 | "slot_index": 0 610 | } 611 | ], 612 | "properties": { 613 | "Node name for S&R": "Add Magic Clothing Attention" 614 | }, 615 | "widgets_values": [ 616 | true, 617 | 2.5 618 | ] 619 | }, 620 | { 621 | "id": 102, 622 | "type": "Image Adaptive Crop With Mask", 623 | "pos": [ 624 | 880, 625 | 170 626 | ], 627 | "size": { 628 | "0": 315, 629 | "1": 126 630 | }, 631 | "flags": {}, 632 | "order": 7, 633 | "mode": 0, 634 | "inputs": [ 635 | { 636 | "name": "image", 637 | "type": "IMAGE", 638 | "link": 230 639 | }, 640 | { 641 | "name": "mask", 642 | "type": "MASK", 643 | "link": 229 644 | } 645 | ], 646 | "outputs": [ 647 | { 648 | "name": "image", 649 | "type": "IMAGE", 650 | "links": [ 651 | 231, 652 | 232, 653 | 247 654 | ], 655 | "shape": 3, 656 | "slot_index": 0 657 | } 658 | ], 659 | "properties": { 660 | "Node name for S&R": "Image Adaptive Crop With Mask" 661 | }, 662 | "widgets_values": [ 663 | 512, 664 | 512, 665 | 8 666 | ] 667 | }, 668 | { 669 | "id": 89, 670 | "type": "EmptyLatentImage", 671 | "pos": [ 672 | 1540, 673 | 580 674 | ], 675 | "size": { 676 | "0": 315, 677 | "1": 106 678 | }, 679 | "flags": {}, 680 | "order": 0, 681 | "mode": 0, 682 | "outputs": [ 683 | { 684 | "name": "LATENT", 685 | "type": "LATENT", 686 | "links": [ 687 | 192 688 | ], 689 | "shape": 3, 690 | "slot_index": 0 691 | } 692 | ], 693 | "properties": { 694 | "Node name for S&R": "EmptyLatentImage" 695 | }, 696 | "widgets_values": [ 697 | 512, 698 | 512, 699 | 1 700 | ] 701 | } 702 | ], 703 | "links": [ 704 | [ 705 | 140, 706 | 66, 707 | 0, 708 | 77, 709 | 1, 710 | "CONDITIONING" 711 | ], 712 | [ 713 | 141, 714 | 67, 715 | 0, 716 | 77, 717 | 2, 718 | "CONDITIONING" 719 | ], 720 | [ 721 | 165, 722 | 80, 723 | 1, 724 | 66, 725 | 0, 726 | "CLIP" 727 | ], 728 | [ 729 | 166, 730 | 80, 731 | 1, 732 | 67, 733 | 0, 734 | "CLIP" 735 | ], 736 | [ 737 | 170, 738 | 77, 739 | 0, 740 | 35, 741 | 0, 742 | "LATENT" 743 | ], 744 | [ 745 | 192, 746 | 89, 747 | 0, 748 | 77, 749 | 3, 750 | "LATENT" 751 | ], 752 | [ 753 | 217, 754 | 80, 755 | 2, 756 | 42, 757 | 1, 758 | "VAE" 759 | ], 760 | [ 761 | 222, 762 | 42, 763 | 0, 764 | 98, 765 | 3, 766 | "LATENT" 767 | ], 768 | [ 769 | 223, 770 | 76, 771 | 0, 772 | 98, 773 | 0, 774 | "MODEL" 775 | ], 776 | [ 777 | 224, 778 | 76, 779 | 1, 780 | 98, 781 | 1, 782 | "MODEL" 783 | ], 784 | [ 785 | 225, 786 | 80, 787 | 2, 788 | 35, 789 | 1, 790 | "VAE" 791 | ], 792 | [ 793 | 226, 794 | 80, 795 | 1, 796 | 98, 797 | 2, 798 | "CLIP" 799 | ], 800 | [ 801 | 227, 802 | 35, 803 | 0, 804 | 100, 805 | 0, 806 | "IMAGE" 807 | ], 808 | [ 809 | 228, 810 | 19, 811 | 0, 812 | 101, 813 | 0, 814 | "IMAGE" 815 | ], 816 | [ 817 | 229, 818 | 101, 819 | 0, 820 | 102, 821 | 1, 822 | "MASK" 823 | ], 824 | [ 825 | 230, 826 | 19, 827 | 0, 828 | 102, 829 | 0, 830 | "IMAGE" 831 | ], 832 | [ 833 | 231, 834 | 102, 835 | 0, 836 | 42, 837 | 0, 838 | "IMAGE" 839 | ], 840 | [ 841 | 232, 842 | 102, 843 | 0, 844 | 103, 845 | 0, 846 | "IMAGE" 847 | ], 848 | [ 849 | 243, 850 | 107, 851 | 1, 852 | 110, 853 | 1, 854 | "IPADAPTER" 855 | ], 856 | [ 857 | 245, 858 | 107, 859 | 0, 860 | 110, 861 | 0, 862 | "MODEL" 863 | ], 864 | [ 865 | 247, 866 | 102, 867 | 0, 868 | 110, 869 | 2, 870 | "IMAGE" 871 | ], 872 | [ 873 | 248, 874 | 80, 875 | 0, 876 | 107, 877 | 0, 878 | "MODEL" 879 | ], 880 | [ 881 | 249, 882 | 110, 883 | 0, 884 | 76, 885 | 0, 886 | "MODEL" 887 | ], 888 | [ 889 | 250, 890 | 98, 891 | 0, 892 | 77, 893 | 0, 894 | "MODEL" 895 | ] 896 | ], 897 | "groups": [], 898 | "config": {}, 899 | "extra": { 900 | "workspace_info": { 901 | "id": "uYaY8JVGliGxX8okXvJP4", 902 | "name": "Untitled Flow", 903 | "saveLock": false, 904 | "cloudID": null, 905 | "coverMediaPath": null 906 | }, 907 | "ds": { 908 | "scale": 1.1, 909 | "offset": { 910 | "0": -231.94835430034072, 911 | "1": -63.33645446524065 912 | } 913 | } 914 | }, 915 | "version": 0.4 916 | } -------------------------------------------------------------------------------- /nodes.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import torch 3 | import folder_paths 4 | 5 | import comfy.model_patcher 6 | import comfy.ldm.models.autoencoder 7 | import comfy.utils 8 | import comfy.sample 9 | import comfy.samplers 10 | import comfy.sampler_helpers 11 | 12 | from .utils import pt_hash 13 | from comfy import model_management 14 | from .attn_handler import SaveAttnInputPatch, InputPatch, ReplacePatch, UnetFunctionWrapper, SamplerCfgFunctionWrapper 15 | 16 | class AttnStoredExtra: 17 | def __init__(self,extra,type=1) -> None: 18 | self.type = type 19 | if type == 1: 20 | self.data = extra.unsqueeze(0) 21 | else: 22 | self.data = extra 23 | 24 | def can_concat(self,other): 25 | return True 26 | 27 | def concat(self, extras): 28 | if self.type == 1: 29 | out = [self.data] 30 | for x in extras: 31 | out.append(x.data) 32 | return torch.cat(out) 33 | elif self.type == 2: 34 | out = [self.data] 35 | for x in extras: 36 | out.append(x.data) 37 | return out 38 | else: 39 | if self.data is not None: 40 | return self.data 41 | else: 42 | for x in extras: 43 | if x.data is not None: 44 | return x.data 45 | return None 46 | 47 | class LoadMagicClothingModel: 48 | @classmethod 49 | def INPUT_TYPES(s): 50 | return {"required": 51 | {"sourceModel": ("MODEL",), 52 | "magicClothingUnet": (folder_paths.get_filename_list("unet"), ), 53 | } 54 | } 55 | RETURN_TYPES = ("MODEL", "MODEL") 56 | RETURN_NAMES = ("sourceModel", "magicClothingModel") 57 | FUNCTION = "load_unet" 58 | 59 | CATEGORY = "loaders" 60 | 61 | def load_unet(self, sourceModel, magicClothingUnet): 62 | unet_path = folder_paths.get_full_path("unet", magicClothingUnet) 63 | unet_state_dict = comfy.utils.load_torch_file(unet_path) 64 | model_config = copy.deepcopy(sourceModel.model.model_config) 65 | if model_config.unet_config["in_channels"] == 9: 66 | model_config.unet_config["in_channels"] = 4 67 | model_config.unet_config["model_channels"] = 320 68 | 69 | source_state_dict = sourceModel.model.diffusion_model.state_dict() 70 | 71 | diffusers_keys = comfy.utils.unet_to_diffusers(model_config.unet_config) 72 | 73 | new_sd = {} 74 | for k in diffusers_keys: 75 | ldm_k = diffusers_keys[k] 76 | if k in unet_state_dict: 77 | new_sd[diffusers_keys[k]] = unet_state_dict.pop(k) 78 | elif ldm_k in source_state_dict: 79 | new_sd[ldm_k] = source_state_dict[ldm_k] 80 | 81 | parameters = comfy.utils.calculate_parameters(new_sd) 82 | 83 | load_device = model_management.get_torch_device() 84 | offload_device = model_management.unet_offload_device() 85 | unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=model_config.supported_inference_dtypes) 86 | manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device, model_config.supported_inference_dtypes) 87 | model_config.set_inference_dtype(unet_dtype, manual_cast_dtype) 88 | model = model_config.get_model(new_sd, "") 89 | model = model.to(offload_device) 90 | model.load_model_weights(new_sd, "") 91 | left_over = unet_state_dict.keys() 92 | if len(left_over) > 0: 93 | print("left over keys in unet: {}".format(left_over)) 94 | model_patcher = comfy.model_patcher.ModelPatcher(model, load_device=load_device, offload_device=offload_device) 95 | return (sourceModel,model_patcher) 96 | 97 | 98 | class AddMagicClothingAttention: 99 | @classmethod 100 | def INPUT_TYPES(s): 101 | return {"required": 102 | {"sourceModel": ("MODEL",), 103 | "magicClothingModel": ("MODEL",), 104 | "clip": ("CLIP", ), 105 | "enable_feature_guidance": ("BOOLEAN", {"default": True}), 106 | "feature_image": ("LATENT", ), 107 | "feature_guidance_scale": ("FLOAT", {"default": 2.5, "min": 0.0, "max": 10.0, "step": 0.1, "round": 0.01}), 108 | # "sigma": ("FLOAT", {"default": 0.71, "min": 0.0, "max": 3.0, "step": 0.01, "round": 0.01}), 109 | # "sampler_name": (comfy.samplers.KSampler.SAMPLERS, ), 110 | # "scheduler": (comfy.samplers.KSampler.SCHEDULERS, ), 111 | # "sigma": ("FLOAT", {"default": 0, "min": 0.0, "max": 100.0, "step": 0.05}), 112 | # "start_step":("INT", {"default": 0, "min": 0, "max": 100, "step": 1}), 113 | # "end_step":("INT", {"default": 100, "min": 0, "max": 100, "step": 1}), 114 | # "steps": ("INT", {"default": 20, "min": 1, "max": 100, "step": 1}), 115 | } 116 | } 117 | RETURN_TYPES = ("MODEL",) 118 | RETURN_NAMES = ("MODEL",) 119 | 120 | FUNCTION = "add_features" 121 | 122 | CATEGORY = "model_patches" 123 | 124 | def add_features(self, sourceModel,magicClothingModel, clip,enable_feature_guidance ,feature_image,feature_guidance_scale, 125 | # sigma,sampler_name,scheduler,start_step=0,end_step = 100,steps = 20, 126 | ): 127 | attn_stored = self.calculate_features_zj(magicClothingModel,clip, feature_image) 128 | attn_stored["enable_feature_guidance"] = enable_feature_guidance 129 | attn_stored["feature_guidance_scale"] = feature_guidance_scale 130 | attn_stored_data = attn_stored["data"] 131 | sourceModel = sourceModel.clone() 132 | sourceModel.set_model_unet_function_wrapper(UnetFunctionWrapper()) 133 | sourceModel.set_model_sampler_cfg_function(SamplerCfgFunctionWrapper()) 134 | sourceModel.set_model_attn1_patch(InputPatch()) 135 | for block_name in attn_stored_data.keys(): 136 | for block_number in attn_stored_data[block_name].keys(): 137 | for attention_index in attn_stored_data[block_name][block_number].keys(): 138 | sourceModel.set_model_attn1_replace(ReplacePatch(), block_name, block_number, attention_index) 139 | self.inject_comfyui() 140 | sourceModel.model_options["transformer_options"]["attn_stored"] = attn_stored 141 | return (sourceModel,) 142 | 143 | def inject_comfyui(self): 144 | old_get_area_and_mult = comfy.samplers.get_area_and_mult 145 | def get_area_and_mult(self, *args, **kwargs): 146 | result = old_get_area_and_mult(self, *args, **kwargs) 147 | mult = result[1] 148 | conditioning = result[2] 149 | area = result[3] 150 | control = result[4] 151 | conditioning["c_attn_stored_mult"] = AttnStoredExtra(mult, 1) 152 | conditioning["c_attn_stored_area"] = AttnStoredExtra(area, 2) 153 | conditioning["c_attn_stored_control"] = AttnStoredExtra(control, 3) 154 | return result 155 | comfy.samplers.get_area_and_mult = get_area_and_mult 156 | 157 | def calculate_features(self,magicClothingModel, source_clip,feature_image,sigma =None,start_step =None,end_step =None,steps =None,scheduler =None,sampler_name =None): 158 | magicClothingModel.set_model_attn1_patch(SaveAttnInputPatch()) 159 | attn_stored = {} 160 | attn_stored["data"] = {} 161 | magicClothingModel.model_options["transformer_options"]["attn_stored"] = attn_stored 162 | 163 | latent_image = feature_image["samples"] 164 | if latent_image.shape[0] > 1: 165 | latent_image = torch.chunk(latent_image, latent_image.shape[0])[0] 166 | noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") 167 | noise = noise+0 168 | disable_pbar = not comfy.utils.PROGRESS_BAR_ENABLED 169 | positive_tokens = source_clip.tokenize("") 170 | positive_cond, positive_pooled = source_clip.encode_from_tokens( 171 | positive_tokens, return_pooled=True) 172 | positive = [[positive_cond, {"pooled_output": positive_pooled}]] 173 | negative = [] 174 | dtype = magicClothingModel.model.get_dtype() 175 | latent_image = latent_image.to(magicClothingModel.load_device).to(dtype) 176 | noise = noise.to(magicClothingModel.load_device).to(dtype) 177 | sigmas = torch.tensor([1,0]) 178 | samples = comfy.sample.sample(magicClothingModel, noise, 1, 1, "uni_pc", "karras", 179 | positive, negative, latent_image, denoise=1.0, 180 | disable_noise=False, start_step=None, 181 | last_step=None, force_full_denoise=False,sigmas=sigmas, 182 | noise_mask=None, callback=None, disable_pbar=disable_pbar, seed=41) 183 | del positive_cond 184 | del positive_pooled 185 | del positive_tokens 186 | latent_image = feature_image["samples"].to(model_management.unet_offload_device()) 187 | return attn_stored 188 | 189 | def _calculate_sigmas(self,steps,model_sampling,scheduler,sampler_name): 190 | sigmas = None 191 | 192 | discard_penultimate_sigma = False 193 | if sampler_name in comfy.samplers.KSampler.DISCARD_PENULTIMATE_SIGMA_SAMPLERS: 194 | steps += 1 195 | discard_penultimate_sigma = True 196 | 197 | sigmas = comfy.samplers.calculate_sigmas(model_sampling,scheduler, steps) 198 | 199 | if discard_penultimate_sigma: 200 | sigmas = torch.cat([sigmas[:-2], sigmas[-1:]]) 201 | return sigmas 202 | 203 | def calculate_features_zj(self,magicClothingModel, source_clip,feature_image,sigma = 0,start_step =None,end_step =None,steps =None,scheduler =None,sampler_name =None): 204 | magicClothingModel.set_model_attn1_patch(SaveAttnInputPatch()) 205 | attn_stored = {} 206 | attn_stored["data"] = {} 207 | magicClothingModel.model_options["transformer_options"]["attn_stored"] = attn_stored 208 | 209 | latent_image = feature_image["samples"] 210 | if latent_image.shape[0] > 1: 211 | latent_image = torch.chunk(latent_image, latent_image.shape[0])[0] 212 | positive_tokens = source_clip.tokenize("") 213 | positive_cond, positive_pooled = source_clip.encode_from_tokens(positive_tokens, return_pooled=True) 214 | dtype = magicClothingModel.model.get_dtype() 215 | 216 | latent_image = magicClothingModel.model.process_latent_in(latent_image).to(magicClothingModel.load_device) 217 | context = positive_cond.to(magicClothingModel.load_device).to(dtype) 218 | # sigmas = self._calculate_sigmas(steps,magicClothingModel.model.model_sampling,scheduler,sampler_name) 219 | # sigmas = sigmas.to(magicClothingModel.load_device) 220 | # start_step = max(0, min(start_step, steps)) 221 | # end_step = max(0, min(end_step, steps)) 222 | # calc_steps = sigmas[start_step:end_step] 223 | # calc_sigmas = [calc_steps[i].item() for i in range(calc_steps.shape[0])] 224 | # attn_stored["calc_sigmas"] = calc_sigmas 225 | # real_sigma = sigmas[0].expand((latent_image.shape[0])) 226 | # real_sigma = (real_sigma*0+sigma).to(dtype) 227 | real_sigma = torch.tensor([sigma], dtype=dtype).to(magicClothingModel.load_device) 228 | timestep = real_sigma * 0 229 | latent_image=latent_image.to(magicClothingModel.load_device).to(dtype) 230 | # xc = magicClothingModel.model.model_sampling.calculate_input(real_sigma, latent_image).to(dtype) 231 | model_management.load_model_gpu(magicClothingModel) 232 | magicClothingModel.model.diffusion_model(latent_image, timestep, context=context, control=None, transformer_options=magicClothingModel.model_options["transformer_options"]) 233 | comfy.sampler_helpers.cleanup_models({}, [magicClothingModel]) 234 | return attn_stored 235 | 236 | NODE_CLASS_MAPPINGS = { 237 | "Load Magic Clothing Model": LoadMagicClothingModel, 238 | "Add Magic Clothing Attention": AddMagicClothingAttention, 239 | } 240 | 241 | NODE_DISPLAY_NAME_MAPPINGS = { 242 | "Load Magic Clothing Model": "Load Magic Clothing Model", 243 | "Add Magic Clothing Attention": "Add Magic Clothing Attention", 244 | } 245 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "comfyui-magic-clothing" 3 | description = "The comfyui supported version of the [a/Magic Clothing](https://github.com/ShineChen1024/MagicClothing) project, not the diffusers version, allows direct integration with modules such as ipadapter" 4 | version = "1.0.0" 5 | license = "LICENSE" 6 | 7 | [project.urls] 8 | Repository = "https://github.com/longgui0318/comfyui-magic-clothing" 9 | # Used by Comfy Registry https://comfyregistry.org 10 | 11 | [tool.comfy] 12 | PublisherId = "longgui0318" 13 | DisplayName = "comfyui-magic-clothing" 14 | Icon = "" 15 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | 3 | def handle_block_info(block_key, detection_unet_diffusers_keys, type="attn1"): 4 | block_weight_key = block_key[:block_key.find(type)+len(type)] 5 | real_key = None 6 | for __key in detection_unet_diffusers_keys: 7 | if block_weight_key in __key: 8 | real_key = detection_unet_diffusers_keys[__key] 9 | break 10 | if real_key is None: 11 | return (None, None, None) 12 | block_level = real_key.split(".") 13 | if block_level[0] == "input_blocks": 14 | block_name = "input" 15 | block_number = int(block_level[1]) 16 | elif block_level[0] == "middle_block": 17 | block_name = "middle" 18 | block_number = int(block_level[1]) 19 | elif block_level[0] == "output_blocks": 20 | block_name = "output" 21 | block_number = int(block_level[1]) 22 | else: 23 | block_name = None 24 | block_number = 0 25 | attention_index = 0 26 | for i, v in enumerate(block_level): 27 | if v == "transformer_blocks": 28 | attention_index = int(block_level[i+1]) 29 | break 30 | return (block_name, block_number, attention_index) 31 | 32 | def save_attn(value, attn_store, block_name, block_number, attention_index): 33 | if attn_store is None: 34 | return 35 | if block_name not in attn_store: 36 | attn_store[block_name] = {} 37 | if block_number not in attn_store[block_name]: 38 | attn_store[block_name][block_number] = {} 39 | attn_store[block_name][block_number][attention_index] = value 40 | 41 | def clean_attn_stored_memory(attn_stored): 42 | del_key_if_exists(attn_stored,"cond_or_uncond_out_cond") 43 | del_key_if_exists(attn_stored,"cond_or_uncond_out_count") 44 | del_key_if_exists(attn_stored,"input_x_extra_options") 45 | del_key_if_exists(attn_stored,"out_cond_init") 46 | del_key_if_exists(attn_stored,"out_count_init") 47 | del_key_if_exists(attn_stored,"cond_or_uncond_replenishment") 48 | del_key_if_exists(attn_stored,"cond_or_uncond_extra_options") 49 | 50 | def del_key_if_exists(obj,key): 51 | if key in obj: 52 | del obj[key] 53 | 54 | 55 | def pt_hash(self,key=None): 56 | if True: 57 | return "" 58 | data = self.cpu().numpy() 59 | if not data.flags['C_CONTIGUOUS']: 60 | data = data.copy(order='C') 61 | has_object = hashlib.sha256(data) 62 | has_value = has_object.hexdigest() 63 | del has_object 64 | del data 65 | if key is not None: 66 | print(f"Debug Test: {key}====={has_value}") 67 | return has_value 68 | 69 | def pt_first_line(self,key=None): 70 | if False: 71 | return 72 | first_line = self 73 | while first_line.dim() > 1: 74 | first_line = first_line[0] 75 | if first_line.dim() <= 1 and key is not None: 76 | print(f"Debug: {key}====={first_line}") --------------------------------------------------------------------------------