├── .github
    └── workflows
    │   └── publish.yml
├── .gitignore
├── README.md
├── __init__.py
├── attn_handler.py
├── diffusers_magic_clothing
    ├── MagicClothingDiffusionPipeline.py
    ├── attention_processor.py
    ├── garment_diffusion.py
    └── utils.py
├── diffusers_warp_nodes.py
├── example.json
├── ipadapter.json
├── nodes.py
├── pyproject.toml
└── utils.py


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to Comfy registry
 2 | on:
 3 |   workflow_dispatch:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - master
 8 |     paths:
 9 |       - "pyproject.toml"
10 | 
11 | jobs:
12 |   publish-node:
13 |     name: Publish Custom Node to registry
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: Check out code
17 |         uses: actions/checkout@v4
18 |       - name: Publish Custom Node
19 |         uses: Comfy-Org/publish-node-action@main
20 |         with:
21 |           ## Add your own personal access token to your Github Repository secrets and reference it here.
22 |           personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .DS_Store
3 | /*.log
4 | /conversion/
5 | .vscode


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # comfyui-magic-clothing
 2 | 
 3 | The comfyui supported version of the [Magic Clothing](https://github.com/ShineChen1024/MagicClothing) project, not the diffusers version, allows direct integration with modules such as ipadapter
 4 | 
 5 | ## Installation
 6 | 
 7 | * use `ComfyUI-Manager` or put this code into `custom_nodes`
 8 | * Go to [huggingface](https://huggingface.co/ShineChen1024/MagicClothing) to download the models and move them to the `comfyui/models/unet` folder
 9 | 
10 | ## For samples, please refer to [here](./example.json)
11 | ## For ipadapter samples, please refer to [here](./ipadapter.json)
12 | 
13 | 
14 | # Note
15 | 
16 | * Currently there are still problems with the low success rate of some of the adopters, which doesn't work well for dense patterns, (meanwhile the [sigma] parameter serves as a temporary solution to the input scaling of the clothing feature in comfyui)
17 | * 当前实现抽卡概率还不够，主要是对于第一次unet采样时model.model_sampling.calculate_input 处理问题。还在研究中，先释放一个版本
18 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | from .nodes import NODE_CLASS_MAPPINGS as CM_O, NODE_DISPLAY_NAME_MAPPINGS as NM_O
 2 | from .diffusers_warp_nodes import NODE_CLASS_MAPPINGS as CM_D, NODE_DISPLAY_NAME_MAPPINGS as NM_D
 3 | import torch
 4 | from .utils import pt_hash,pt_first_line
 5 | 
 6 | torch.Tensor.__hash_log__ = pt_hash
 7 | torch.Tensor.__fl_log__ = pt_first_line
 8 | 
 9 | NODE_CLASS_MAPPINGS = {
10 |     **CM_O, 
11 |     **CM_D
12 |     }
13 | NODE_DISPLAY_NAME_MAPPINGS = {
14 |     **NM_O,
15 |     **NM_D
16 |     }
17 | __all__ = ['NODE_CLASS_MAPPINGS', 'NODE_DISPLAY_NAME_MAPPINGS']


--------------------------------------------------------------------------------
/attn_handler.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from typing import Any
  3 | from comfy import model_management
  4 | from comfy.ldm.modules.attention import optimized_attention
  5 | from .utils import clean_attn_stored_memory
  6 | 
  7 | class SamplerCfgFunctionWrapper:
  8 | 
  9 |     def __call__(self, parameters) -> Any:
 10 |         cond = parameters["cond"]
 11 |         uncond = parameters["uncond"]
 12 |         input_x = parameters["input"]
 13 |         cond_scale = parameters["cond_scale"]
 14 |         model_options = parameters["model_options"]
 15 |         transformer_options = model_options["transformer_options"]
 16 |         if "attn_stored" in transformer_options:
 17 |             attn_stored = transformer_options["attn_stored"]
 18 |             feature_guidance_scale = attn_stored["feature_guidance_scale"]
 19 |             cond_or_uncond_out_cond = attn_stored["cond_or_uncond_out_cond"]
 20 |             cond_or_uncond_out_count = attn_stored["cond_or_uncond_out_count"]
 21 |             # clear memory
 22 |             clean_attn_stored_memory(attn_stored)
 23 |             if cond_or_uncond_out_cond is None:
 24 |                 return uncond + (cond - uncond) * cond_scale
 25 |             else:
 26 |                 cond = input_x - cond
 27 |                 uncond = input_x - uncond
 28 |                 cond_or_uncond_out_cond /= cond_or_uncond_out_count
 29 |                 noise_pred = (
 30 |                     uncond
 31 |                     + cond_scale * (cond - cond_or_uncond_out_cond)
 32 |                     + feature_guidance_scale *
 33 |                     (cond_or_uncond_out_cond - uncond)
 34 |                 )
 35 |                 return input_x - noise_pred
 36 |         else:
 37 |             return uncond + (cond - uncond) * cond_scale
 38 | 
 39 | 
 40 | class UnetFunctionWrapper:
 41 | 
 42 |     def _is_inject_batch_(self, model, input, inject_batch_count):
 43 |         free_memory = model_management.get_free_memory(input.device)
 44 |         input_shape = [input[0] + inject_batch_count] + list(input)[1:]
 45 |         return model.memory_required(input_shape) < free_memory
 46 | 
 47 |     def _reorganization_c_data_(self,c,key):
 48 |         if key in c:
 49 |             return self._chunk_data_(c[key]) 
 50 |         return None,None
 51 |     
 52 |     def _chunk_data_(self,data):
 53 |         if data is None:
 54 |             return None,None
 55 |         return torch.chunk(data,data.shape[0]),[]
 56 | 
 57 |     def __call__(self, apply_model, parameters):
 58 |         input = parameters["input"]
 59 |         timestep = parameters["timestep"]
 60 |         c = parameters["c"]
 61 |         transformer_options = c["transformer_options"]
 62 |         if "attn_stored" in transformer_options:
 63 |             attn_stored = transformer_options["attn_stored"]
 64 |             enable_feature_guidance = attn_stored["enable_feature_guidance"]
 65 |             cond_or_uncond = parameters["cond_or_uncond"]
 66 |             cond_or_uncond_replenishment = []
 67 |             cond_or_uncond_new = []
 68 |             # 对传入参数进行调整，调整方式如下
 69 |             # A 对负向提示词，复制一份，这是为了计算出空数据的情况，插入的方式在前面
 70 |             # B 对正向忽略
 71 |             input_array = torch.chunk(input, input.shape[0])
 72 |             timestep_array = torch.chunk(timestep, timestep.shape[0])
 73 |             new_input_array = []
 74 |             new_timestep = []
 75 |             
 76 |             c_concat_data,c_concat_data_new = self._reorganization_c_data_(c,"c_concat")
 77 |             c_crossattn_data,c_crossattn_data_new = self._reorganization_c_data_(c,"c_crossattn")
 78 |             c_attn_stored_mult_data,_ = self._reorganization_c_data_(c,"c_attn_stored_mult")
 79 |             c_attn_stored_area_data = c["c_attn_stored_area"] if "c_attn_stored_area" in c else None
 80 |             c_attn_stored_control_data = c["c_attn_stored_control"] if "c_attn_stored_control" in c else None
 81 |             #移除因为注入增加的内容，后续已不再需要
 82 |             c["c_attn_stored_mult"] = None
 83 |             c["c_attn_stored_area"] = None
 84 |             c["c_attn_stored_control"] = None
 85 |   
 86 |             cond_or_uncond_extra_options = {}
 87 |             for i in range(len(input_array)):
 88 |                 # 需注意，3月底comfyui更新，为了支持多conds实现，移除了cond本身的判定，这个值存的是index
 89 |                 cond_flag = cond_or_uncond[i]
 90 |                 new_input_array.append(input_array[i])
 91 |                 new_timestep.append(timestep_array[i])
 92 |                 if c_concat_data is not None:
 93 |                     c_concat_data_new.append(c_concat_data[i])
 94 |                 if c_crossattn_data is not None:
 95 |                     c_crossattn_data_new.append(c_crossattn_data[i])
 96 |                 
 97 |                 cond_or_uncond_replenishment.append(1 if cond_flag == 1 else 0)
 98 |                 cond_or_uncond_new.append(1 if cond_flag == 1 else 0)
 99 |                 if enable_feature_guidance and cond_flag == 1:
100 |                     
101 |                     if c_attn_stored_mult_data is not None and  c_attn_stored_area_data is not None:
102 |                         mult = c_attn_stored_mult_data[i]
103 |                         area = c_attn_stored_area_data[i]
104 |                         cond_or_uncond_extra_options[i+1] = {
105 |                             "mult": mult.squeeze(0),
106 |                             "area": area
107 |                         }
108 |                     # 注意，在启用特征引导的时候，需要增加一个负向空特征来处理，这个复制的负向特征是给后面计算空特征用的
109 |                     cond_or_uncond_replenishment.append(2)
110 |                     cond_or_uncond_new.append(1)
111 |                     new_input_array.append(input_array[i])
112 |                     new_timestep.append(timestep_array[i])
113 |                     if c_concat_data is not None:
114 |                         c_concat_data_new.append(c_concat_data[i])
115 |                     if c_crossattn_data is not None:
116 |                         c_crossattn_data_new.append(c_crossattn_data[i])
117 |             input = torch.cat(new_input_array,)
118 |             timestep = torch.cat(new_timestep,)
119 |             if c_concat_data_new is not None:
120 |                 c["c_concat"] = torch.cat(c_concat_data_new,)
121 |             if c_crossattn_data_new is not None:
122 |                 c["c_crossattn"] = torch.cat(c_crossattn_data_new,)
123 |             if "out_cond_init" not in attn_stored:
124 |                 attn_stored["out_cond_init"] = torch.zeros_like(input_array[0])
125 |             if "out_count_init" not in attn_stored:
126 |                 attn_stored["out_count_init"] = torch.zeros_like(input_array[0] * 1e-37)
127 |             if c_attn_stored_control_data is not None:
128 |                 c['control'] = c_attn_stored_control_data.get_control(input, timestep, c, len(cond_or_uncond_replenishment))
129 |             attn_stored["cond_or_uncond_replenishment"] = cond_or_uncond_replenishment
130 |             attn_stored["cond_or_uncond_extra_options"] = cond_or_uncond_extra_options
131 |             c["cond_or_uncond"] = cond_or_uncond_new
132 |             c["transformer_options"]["cond_or_uncond"] = cond_or_uncond_new
133 |             # 直接清理，节省内存
134 |             del input_array
135 |             del timestep_array
136 |             del new_input_array
137 |             del new_timestep
138 |             del c_concat_data
139 |             del c_concat_data_new
140 |             del c_crossattn_data
141 |             del c_crossattn_data_new
142 |             del c_attn_stored_mult_data
143 |             del c_attn_stored_area_data
144 |             del cond_or_uncond_extra_options
145 | 
146 |         output = apply_model(input, timestep, **c)
147 |         if "attn_stored" in transformer_options:
148 |             attn_stored = transformer_options["attn_stored"]
149 |             enable_feature_guidance = attn_stored["enable_feature_guidance"]
150 | 
151 |             cond_or_uncond_replenishment = attn_stored["cond_or_uncond_replenishment"]
152 |             cond_or_uncond_extra_options = attn_stored["cond_or_uncond_extra_options"]
153 |             pred_result = torch.chunk(
154 |                 output, len(cond_or_uncond_replenishment))
155 |             new_output = []
156 |             for i in range(len(cond_or_uncond_replenishment)):
157 |                 cond_flag = cond_or_uncond_replenishment[i]
158 |                 if cond_flag == 2:
159 |                     cond_or_uncond_extra_option = cond_or_uncond_extra_options[i]
160 |                     if "cond_or_uncond_out_cond" not in attn_stored:
161 |                         attn_stored["cond_or_uncond_out_cond"] = attn_stored["out_cond_init"]
162 |                     if "cond_or_uncond_out_count" not in attn_stored:
163 |                         attn_stored["cond_or_uncond_out_count"] = attn_stored["out_count_init"]
164 |                     mult = cond_or_uncond_extra_option["mult"]
165 |                     area = cond_or_uncond_extra_option["area"]
166 |                     if area is None:
167 |                         attn_stored["cond_or_uncond_out_cond"] += pred_result[i] * mult
168 |                         attn_stored["cond_or_uncond_out_count"] += mult
169 |                     else:
170 |                         out_c = attn_stored["cond_or_uncond_out_cond"]
171 |                         out_cts = attn_stored["cond_or_uncond_out_count"]
172 |                         dims = len(area) // 2
173 |                         for i in range(dims):
174 |                             out_c = out_c.narrow(i + 2, area[i + dims], area[i])
175 |                             out_cts = out_cts.narrow(i + 2, area[i + dims], area[i])
176 |                         out_c += pred_result[i] * mult
177 |                         out_cts += mult
178 |                 else:
179 |                     new_output.append(pred_result[i])
180 |             output = torch.cat(new_output)
181 |             del new_output
182 |             del pred_result
183 |         return output
184 | 
185 | 
186 | class SaveAttnInputPatch:
187 | 
188 |     def __call__(self, q, k, v, extra_options):
189 |         if "attn_stored" in extra_options:
190 |             attn_stored = extra_options["attn_stored"]
191 |         if attn_stored is None:
192 |             return (q, k, v)
193 |         attn_stored_data = attn_stored["data"]
194 |         block_name = extra_options["block"][0]
195 |         block_id = extra_options["block"][1]
196 |         block_index = extra_options["block_index"]
197 |         if block_name not in attn_stored_data:
198 |             attn_stored_data[block_name] = {}
199 |         if block_id not in attn_stored_data[block_name]:
200 |             attn_stored_data[block_name][block_id] = {}
201 |         attn_stored_data[block_name][block_id][block_index] = q
202 |         return (q, k, v)
203 | 
204 | 
205 | def _check_(calc_sigmas,sigma):
206 |     if calc_sigmas is None:
207 |         return True
208 |     for i in range(len(calc_sigmas)):
209 |         if abs(calc_sigmas[i] - sigma.item()) < 0.000001:
210 |             return True
211 |     return False
212 | 
213 | class InputPatch:
214 |     
215 |     def _calculate_input_(hideen_states, sigma):
216 |         return hideen_states / (sigma ** 2 + 1) ** 0.5
217 | 
218 |     def __call__(self, q, k, v, extra_options):
219 |         if "attn_stored" in extra_options:
220 |             attn_stored = extra_options["attn_stored"]
221 |         if attn_stored is None:
222 |             return (q, k, v)
223 |         attn_stored_data = attn_stored["data"]
224 |         cond_or_uncond_replenishment = attn_stored["cond_or_uncond_replenishment"]
225 |         block_name = extra_options["block"][0]
226 |         block_id = extra_options["block"][1]
227 |         block_index = extra_options["block_index"]
228 |         sigma = extra_options["sigmas"]
229 |         calc_sigmas = attn_stored.get("calc_sigmas",None)
230 |         if _check_(calc_sigmas,sigma) and block_name in attn_stored_data and block_id in attn_stored_data[block_name] and block_index in attn_stored_data[block_name][block_id]:
231 |             FLAG_OUT_CHANNEL = 2
232 |             qEQk = q.shape[FLAG_OUT_CHANNEL] == k.shape[FLAG_OUT_CHANNEL]
233 |             qEQv = q.shape[FLAG_OUT_CHANNEL] == v.shape[FLAG_OUT_CHANNEL]
234 |             feature_hidden_states = attn_stored_data[block_name][block_id][block_index]
235 |             # feature_hidden_states = self._calculate_input_(feature_hidden_states, sigma)
236 |             if q.shape[1] != feature_hidden_states.shape[1]:
237 |                 clean_attn_stored_memory(attn_stored)
238 |                 raise ValueError(
239 |                     "Your featured image must be the same width and height as the image you want to generate!")
240 |             feature_hidden_states = feature_hidden_states.to(q.dtype)
241 |             combo_feature_hidden_states = []
242 |             for i in range(len(cond_or_uncond_replenishment)):
243 |                 cond_flag = cond_or_uncond_replenishment[i]
244 |                 if cond_flag == 0 or cond_flag == 2:
245 |                     combo_feature_hidden_states.append(feature_hidden_states)
246 |                 else:
247 |                     empty_feature = torch.zeros_like(feature_hidden_states)
248 |                     combo_feature_hidden_states.append(empty_feature)
249 |             feature_hidden_states = torch.cat(combo_feature_hidden_states)
250 |             q = torch.cat([q, feature_hidden_states], dim=1)
251 |             return (q, q if qEQk else k, q if qEQv else v)
252 |         return (q, k, v)
253 | 
254 | 
255 | class ReplacePatch:
256 | 
257 |     def __call__(self, q, k, v, extra_options):
258 |         if extra_options is None:
259 |             extra_options = {}
260 |         n_heads = extra_options["n_heads"]
261 |         q = optimized_attention(q, k, v, n_heads if n_heads is not None else 8)
262 |         if "attn_stored" in extra_options:
263 |             attn_stored = extra_options["attn_stored"]
264 |         if attn_stored is None:
265 |             return q
266 |         sigma = extra_options["sigmas"]
267 |         calc_sigmas = attn_stored.get("calc_sigmas",None)
268 |         if _check_(calc_sigmas,sigma):
269 |             q, _ = torch.chunk(q, 2, dim=1)  # 抹除额外内容
270 |         return q
271 | 


--------------------------------------------------------------------------------
/diffusers_magic_clothing/MagicClothingDiffusionPipeline.py:
--------------------------------------------------------------------------------
  1 | from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import *
  2 | import comfy.utils
  3 | 
  4 | def prepare_callback(steps):
  5 |     pbar = comfy.utils.ProgressBar(steps)
  6 |     def callback(step, total_steps):
  7 |         pbar.update_absolute(step + 1, total_steps, None)
  8 |     return callback
  9 | 
 10 | class MagicClothingDiffusionPipeline(StableDiffusionPipeline):
 11 |     def __call__(
 12 |             self,
 13 |             prompt: Union[str, List[str]] = None,
 14 |             height: Optional[int] = None,
 15 |             width: Optional[int] = None,
 16 |             num_inference_steps: int = 50,
 17 |             timesteps: List[int] = None,
 18 |             guidance_scale: float = 5.,
 19 |             cloth_guidance_scale: float = 2.5,
 20 |             negative_prompt: Optional[Union[str, List[str]]] = None,
 21 |             num_images_per_prompt: Optional[int] = 1,
 22 |             eta: float = 0.0,
 23 |             generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
 24 |             latents: Optional[torch.FloatTensor] = None,
 25 |             prompt_embeds: Optional[torch.FloatTensor] = None,
 26 |             negative_prompt_embeds: Optional[torch.FloatTensor] = None,
 27 |             ip_adapter_image: Optional[PipelineImageInput] = None,
 28 |             output_type: Optional[str] = "pil",
 29 |             return_dict: bool = True,
 30 |             cross_attention_kwargs: Optional[Dict[str, Any]] = None,
 31 |             guidance_rescale: float = 0.0,
 32 |             clip_skip: Optional[int] = None,
 33 |             callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
 34 |             callback_on_step_end_tensor_inputs: List[str] = ["latents"],
 35 |             **kwargs,
 36 |     ):
 37 |         r"""
 38 |         The call function to the pipeline for generation.
 39 | 
 40 |         Args:
 41 |             prompt (`str` or `List[str]`, *optional*):
 42 |                 The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
 43 |             height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
 44 |                 The height in pixels of the generated image.
 45 |             width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
 46 |                 The width in pixels of the generated image.
 47 |             num_inference_steps (`int`, *optional*, defaults to 50):
 48 |                 The number of denoising steps. More denoising steps usually lead to a higher quality image at the
 49 |                 expense of slower inference.
 50 |             timesteps (`List[int]`, *optional*):
 51 |                 Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
 52 |                 in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
 53 |                 passed will be used. Must be in descending order.
 54 |             guidance_scale (`float`, *optional*, defaults to 7.5):
 55 |                 A higher guidance scale value encourages the model to generate images closely linked to the text
 56 |                 `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
 57 |             negative_prompt (`str` or `List[str]`, *optional*):
 58 |                 The prompt or prompts to guide what to not include in image generation. If not defined, you need to
 59 |                 pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
 60 |             num_images_per_prompt (`int`, *optional*, defaults to 1):
 61 |                 The number of images to generate per prompt.
 62 |             eta (`float`, *optional*, defaults to 0.0):
 63 |                 Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
 64 |                 to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
 65 |             generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
 66 |                 A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
 67 |                 generation deterministic.
 68 |             latents (`torch.FloatTensor`, *optional*):
 69 |                 Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
 70 |                 generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
 71 |                 tensor is generated by sampling using the supplied random `generator`.
 72 |             prompt_embeds (`torch.FloatTensor`, *optional*):
 73 |                 Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
 74 |                 provided, text embeddings are generated from the `prompt` input argument.
 75 |             negative_prompt_embeds (`torch.FloatTensor`, *optional*):
 76 |                 Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
 77 |                 not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
 78 |             ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
 79 |             output_type (`str`, *optional*, defaults to `"pil"`):
 80 |                 The output format of the generated image. Choose between `PIL.Image` or `np.array`.
 81 |             return_dict (`bool`, *optional*, defaults to `True`):
 82 |                 Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
 83 |                 plain tuple.
 84 |             cross_attention_kwargs (`dict`, *optional*):
 85 |                 A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
 86 |                 [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
 87 |             guidance_rescale (`float`, *optional*, defaults to 0.0):
 88 |                 Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
 89 |                 Flawed](https://arxiv.org/pdf/2305.08891.pdf). Guidance rescale factor should fix overexposure when
 90 |                 using zero terminal SNR.
 91 |             clip_skip (`int`, *optional*):
 92 |                 Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
 93 |                 the output of the pre-final layer will be used for computing the prompt embeddings.
 94 |             callback_on_step_end (`Callable`, *optional*):
 95 |                 A function that calls at the end of each denoising steps during the inference. The function is called
 96 |                 with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
 97 |                 callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
 98 |                 `callback_on_step_end_tensor_inputs`.
 99 |             callback_on_step_end_tensor_inputs (`List`, *optional*):
100 |                 The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
101 |                 will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
102 |                 `._callback_tensor_inputs` attribute of your pipeline class.
103 | 
104 |         Examples:
105 | 
106 |         Returns:
107 |             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
108 |                 If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] is returned,
109 |                 otherwise a `tuple` is returned where the first element is a list with the generated images and the
110 |                 second element is a list of `bool`s indicating whether the corresponding generated image contains
111 |                 "not-safe-for-work" (nsfw) content.
112 |         """
113 |         
114 |         callback = kwargs.pop("callback", None)
115 |         callback_steps = kwargs.pop("callback_steps", None)
116 | 
117 |         if callback is not None:
118 |             deprecate(
119 |                 "callback",
120 |                 "1.0.0",
121 |                 "Passing `callback` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
122 |             )
123 |         if callback_steps is not None:
124 |             deprecate(
125 |                 "callback_steps",
126 |                 "1.0.0",
127 |                 "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider using `callback_on_step_end`",
128 |             )
129 |         # to deal with lora scaling and other possible forward hooks
130 | 
131 |         self._guidance_scale = guidance_scale
132 |         self._guidance_rescale = guidance_rescale
133 |         self._clip_skip = clip_skip
134 |         self._cross_attention_kwargs = cross_attention_kwargs
135 |         self._interrupt = False
136 | 
137 |         # 2. Define call parameters
138 |         batch_size = prompt_embeds.shape[0]
139 | 
140 |         device = self._execution_device
141 | 
142 |         # For classifier free guidance, we need to do two forward passes.
143 |         # Here we concatenate the unconditional and text embeddings into a single batch
144 |         # to avoid doing two forward passes
145 |         if self.do_classifier_free_guidance:
146 |             prompt_embeds = torch.cat([negative_prompt_embeds, negative_prompt_embeds, prompt_embeds])
147 | 
148 | 
149 |         # 4. Prepare timesteps
150 |         timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
151 | 
152 | 
153 |         # 5. Prepare latent variables
154 |         num_channels_latents = self.unet.config.in_channels
155 |         latents = self.prepare_latents(
156 |             batch_size * num_images_per_prompt,
157 |             num_channels_latents,
158 |             height,
159 |             width,
160 |             prompt_embeds.dtype,
161 |             device,
162 |             generator,
163 |             None,
164 |         )
165 | 
166 |         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
167 |         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
168 |         # 6.2 Optionally get Guidance Scale Embedding
169 |         timestep_cond = None
170 |         if self.unet.config.time_cond_proj_dim is not None:
171 |             guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
172 |             timestep_cond = self.get_guidance_scale_embedding(
173 |                 guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
174 |             ).to(device=device, dtype=latents.dtype)
175 | 
176 |         # 7. Denoising loop
177 |         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
178 |         self._num_timesteps = len(timesteps)
179 |         comfyui_callback = prepare_callback(num_inference_steps)
180 |         with self.progress_bar(total=num_inference_steps) as progress_bar:
181 |             for i, t in enumerate(timesteps):
182 |                 if self.interrupt:
183 |                     continue
184 | 
185 |                 # expand the latents if we are doing classifier free guidance
186 |                 latent_model_input = torch.cat([latents] * 3) if self.do_classifier_free_guidance else latents
187 |                 latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
188 |                 # predict the noise residual
189 |                 noise_pred = self.unet(
190 |                     latent_model_input,
191 |                     t,
192 |                     encoder_hidden_states=prompt_embeds,
193 |                     timestep_cond=timestep_cond,
194 |                     cross_attention_kwargs=self.cross_attention_kwargs,
195 |                     return_dict=False,
196 |                 )[0]
197 |                 # perform guidance
198 |                 if self.do_classifier_free_guidance:
199 |                     noise_pred_uncond, noise_pred_cloth, noise_pred_text = noise_pred.chunk(3)
200 |                     noise_pred = (
201 |                         noise_pred_uncond
202 |                         + guidance_scale * (noise_pred_text - noise_pred_cloth)
203 |                         + cloth_guidance_scale * (noise_pred_cloth - noise_pred_uncond)
204 |                     )
205 | 
206 |                 if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
207 |                     # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
208 |                     noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
209 | 
210 |                 # compute the previous noisy sample x_t -> x_t-1
211 |                 latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
212 |                 if callback_on_step_end is not None:
213 |                     callback_kwargs = {}
214 |                     for k in callback_on_step_end_tensor_inputs:
215 |                         callback_kwargs[k] = locals()[k]
216 |                     callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
217 | 
218 |                     latents = callback_outputs.pop("latents", latents)
219 |                     prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
220 |                     negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
221 | 
222 |                 # call the callback, if provided
223 |                 if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
224 |                     progress_bar.update()
225 |                     if callback is not None and i % callback_steps == 0:
226 |                         step_idx = i // getattr(self.scheduler, "order", 1)
227 |                         callback(step_idx, t, latents)
228 |                         
229 |                 if comfyui_callback is not None:
230 |                     comfyui_callback(i,num_inference_steps)
231 | 
232 |         # Offload all models
233 |         self.maybe_free_model_hooks()
234 |         latents = 1.0/0.18215 * latents
235 |         return latents
236 | 


--------------------------------------------------------------------------------
/diffusers_magic_clothing/attention_processor.py:
--------------------------------------------------------------------------------
  1 | import pdb
  2 | 
  3 | import torch
  4 | from typing import Optional
  5 | import torch.nn.functional as F
  6 | from diffusers.utils import USE_PEFT_BACKEND
  7 | import torch.nn as nn
  8 | from diffusers.models.attention_processor import Attention
  9 | 
 10 | Linear_Call_Needs_Extra_Args = False
 11 | 
 12 | class AttnProcessor(nn.Module):
 13 |     r"""
 14 |     Default processor for performing attention-related computations.
 15 |     """
 16 | 
 17 |     def __init__(self):
 18 |         super().__init__()
 19 | 
 20 |     def __call__(
 21 |             self,
 22 |             attn: Attention,
 23 |             hidden_states: torch.FloatTensor,
 24 |             encoder_hidden_states: Optional[torch.FloatTensor] = None,
 25 |             attention_mask: Optional[torch.FloatTensor] = None,
 26 |             temb: Optional[torch.FloatTensor] = None,
 27 |             scale: float = 1.0,
 28 |             attn_store=None,
 29 |             do_classifier_free_guidance=None,
 30 |             enable_cloth_guidance=None
 31 |     ) -> torch.Tensor:
 32 |         residual = hidden_states
 33 | 
 34 |         args = () if USE_PEFT_BACKEND else (scale,)
 35 | 
 36 |         if attn.spatial_norm is not None:
 37 |             hidden_states = attn.spatial_norm(hidden_states, temb)
 38 | 
 39 |         input_ndim = hidden_states.ndim
 40 | 
 41 |         if input_ndim == 4:
 42 |             batch_size, channel, height, width = hidden_states.shape
 43 |             hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
 44 | 
 45 |         batch_size, sequence_length, _ = (
 46 |             hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
 47 |         )
 48 |         attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
 49 | 
 50 |         if attn.group_norm is not None:
 51 |             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
 52 | 
 53 |         if Linear_Call_Needs_Extra_Args:
 54 |             query = attn.to_q(hidden_states, *args)
 55 |         else:
 56 |             query = attn.to_q(hidden_states)
 57 | 
 58 |         if encoder_hidden_states is None:
 59 |             encoder_hidden_states = hidden_states
 60 |         elif attn.norm_cross:
 61 |             encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
 62 | 
 63 | 
 64 |         if Linear_Call_Needs_Extra_Args:
 65 |             key = attn.to_k(encoder_hidden_states, *args)
 66 |             value = attn.to_v(encoder_hidden_states, *args)
 67 |         else:
 68 |             key = attn.to_k(encoder_hidden_states)
 69 |             value = attn.to_v(encoder_hidden_states)
 70 | 
 71 |         query = attn.head_to_batch_dim(query)
 72 |         key = attn.head_to_batch_dim(key)
 73 |         value = attn.head_to_batch_dim(value)
 74 | 
 75 |         attention_probs = attn.get_attention_scores(query, key, attention_mask)
 76 |         hidden_states = torch.bmm(attention_probs, value)
 77 |         hidden_states = attn.batch_to_head_dim(hidden_states)
 78 | 
 79 |         # linear proj
 80 |         if Linear_Call_Needs_Extra_Args:
 81 |             hidden_states = attn.to_out[0](hidden_states, *args)
 82 |         else:
 83 |             hidden_states = attn.to_out[0](hidden_states)
 84 |         # dropout
 85 |         hidden_states = attn.to_out[1](hidden_states)
 86 | 
 87 |         if input_ndim == 4:
 88 |             hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
 89 | 
 90 |         if attn.residual_connection:
 91 |             hidden_states = hidden_states + residual
 92 | 
 93 |         hidden_states = hidden_states / attn.rescale_output_factor
 94 | 
 95 |         return hidden_states
 96 | 
 97 | 
 98 | class REFAttnProcessor(nn.Module):
 99 |     def __init__(self, name, type="read"):
100 |         super().__init__()
101 |         self.name = name
102 |         self.type = type
103 | 
104 |     def __call__(
105 |             self,
106 |             attn: Attention,
107 |             hidden_states: torch.FloatTensor,
108 |             encoder_hidden_states: Optional[torch.FloatTensor] = None,
109 |             attention_mask: Optional[torch.FloatTensor] = None,
110 |             temb: Optional[torch.FloatTensor] = None,
111 |             scale: float = 1.0,
112 |             attn_store=None,
113 |             do_classifier_free_guidance=None,
114 |             enable_cloth_guidance=None
115 |     ) -> torch.Tensor:
116 |         if self.type == "read":
117 |             attn_store[self.name] = hidden_states
118 |         elif self.type == "write":
119 |             ref_hidden_states = attn_store[self.name]
120 |             if do_classifier_free_guidance:
121 |                 empty_copy = torch.zeros_like(ref_hidden_states)
122 |                 if enable_cloth_guidance:
123 |                     ref_hidden_states = torch.cat([empty_copy, ref_hidden_states, ref_hidden_states])
124 |                 else:
125 |                     ref_hidden_states = torch.cat([empty_copy, ref_hidden_states])
126 |             hidden_states = torch.cat([hidden_states, ref_hidden_states], dim=1)
127 |         else:
128 |             raise ValueError("unsupport type")
129 |         residual = hidden_states
130 | 
131 |         args = () if USE_PEFT_BACKEND else (scale,)
132 | 
133 |         if attn.spatial_norm is not None:
134 |             hidden_states = attn.spatial_norm(hidden_states, temb)
135 | 
136 |         input_ndim = hidden_states.ndim
137 | 
138 |         if input_ndim == 4:
139 |             batch_size, channel, height, width = hidden_states.shape
140 |             hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
141 | 
142 |         batch_size, sequence_length, _ = (
143 |             hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
144 |         )
145 |         attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
146 | 
147 |         if attn.group_norm is not None:
148 |             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
149 | 
150 |         if Linear_Call_Needs_Extra_Args:
151 |             query = attn.to_q(hidden_states, *args)
152 |         else:
153 |             query = attn.to_q(hidden_states)
154 | 
155 |         if encoder_hidden_states is None:
156 |             encoder_hidden_states = hidden_states
157 |         elif attn.norm_cross:
158 |             encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
159 | 
160 |         if Linear_Call_Needs_Extra_Args:
161 |             key = attn.to_k(encoder_hidden_states, *args)
162 |             value = attn.to_v(encoder_hidden_states, *args)
163 |         else:
164 |             key = attn.to_k(encoder_hidden_states)
165 |             value = attn.to_v(encoder_hidden_states)
166 | 
167 |         query = attn.head_to_batch_dim(query)
168 |         key = attn.head_to_batch_dim(key)
169 |         value = attn.head_to_batch_dim(value)
170 | 
171 |         attention_probs = attn.get_attention_scores(query, key, attention_mask)
172 |         hidden_states = torch.bmm(attention_probs, value)
173 |         hidden_states = attn.batch_to_head_dim(hidden_states)
174 | 
175 |         if self.type == "write":
176 |             hidden_states, _ = torch.chunk(hidden_states, 2, dim=1)
177 | 
178 |         # linear proj
179 |         if Linear_Call_Needs_Extra_Args:
180 |             hidden_states = attn.to_out[0](hidden_states, *args)
181 |         else:
182 |             hidden_states = attn.to_out[0](hidden_states)
183 |         # dropout
184 |         hidden_states = attn.to_out[1](hidden_states)
185 | 
186 |         if input_ndim == 4:
187 |             hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
188 | 
189 |         if attn.residual_connection:
190 |             hidden_states = hidden_states + residual
191 | 
192 |         hidden_states = hidden_states / attn.rescale_output_factor
193 |         return hidden_states
194 | 
195 | 
196 | class AttnProcessor2_0(nn.Module):
197 |     r"""
198 |     Processor for implementing scaled dot-product attention (enabled by default if you're using PyTorch 2.0).
199 |     """
200 | 
201 |     def __init__(self):
202 |         super().__init__()
203 |         if not hasattr(F, "scaled_dot_product_attention"):
204 |             raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.")
205 | 
206 |     def __call__(
207 |             self,
208 |             attn: Attention,
209 |             hidden_states: torch.FloatTensor,
210 |             encoder_hidden_states: Optional[torch.FloatTensor] = None,
211 |             attention_mask: Optional[torch.FloatTensor] = None,
212 |             temb: Optional[torch.FloatTensor] = None,
213 |             scale: float = 1.0,
214 |             attn_store=None,
215 |             do_classifier_free_guidance=None,
216 |             enable_cloth_guidance=None
217 |     ) -> torch.FloatTensor:
218 |         residual = hidden_states
219 |         if attn.spatial_norm is not None:
220 |             hidden_states = attn.spatial_norm(hidden_states, temb)
221 | 
222 |         input_ndim = hidden_states.ndim
223 | 
224 |         if input_ndim == 4:
225 |             batch_size, channel, height, width = hidden_states.shape
226 |             hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
227 | 
228 |         batch_size, sequence_length, _ = (
229 |             hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
230 |         )
231 | 
232 |         if attention_mask is not None:
233 |             attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
234 |             # scaled_dot_product_attention expects attention_mask shape to be
235 |             # (batch, heads, source_length, target_length)
236 |             attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
237 | 
238 |         if attn.group_norm is not None:
239 |             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
240 | 
241 |         args = () if USE_PEFT_BACKEND else (scale,)
242 |         if Linear_Call_Needs_Extra_Args:
243 |             query = attn.to_q(hidden_states, *args)
244 |         else:
245 |             query = attn.to_q(hidden_states)
246 | 
247 |         if encoder_hidden_states is None:
248 |             encoder_hidden_states = hidden_states
249 |         elif attn.norm_cross:
250 |             encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
251 | 
252 |         if Linear_Call_Needs_Extra_Args:
253 |             key = attn.to_k(encoder_hidden_states, *args)
254 |             value = attn.to_v(encoder_hidden_states, *args)
255 |         else:
256 |             key = attn.to_k(encoder_hidden_states)
257 |             value = attn.to_v(encoder_hidden_states)
258 | 
259 |         inner_dim = key.shape[-1]
260 |         head_dim = inner_dim // attn.heads
261 | 
262 |         query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
263 | 
264 |         key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
265 |         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
266 | 
267 |         # the output of sdp = (batch, num_heads, seq_len, head_dim)
268 |         # TODO: add support for attn.scale when we move to Torch 2.1
269 |         hidden_states = F.scaled_dot_product_attention(
270 |             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
271 |         )
272 | 
273 |         hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
274 |         hidden_states = hidden_states.to(query.dtype)
275 | 
276 |         # linear proj
277 |         if Linear_Call_Needs_Extra_Args:
278 |             hidden_states = attn.to_out[0](hidden_states, *args)
279 |         else:
280 |             hidden_states = attn.to_out[0](hidden_states)
281 |         # dropout
282 |         hidden_states = attn.to_out[1](hidden_states)
283 | 
284 |         if input_ndim == 4:
285 |             hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
286 | 
287 |         if attn.residual_connection:
288 |             hidden_states = hidden_states + residual
289 | 
290 |         hidden_states = hidden_states / attn.rescale_output_factor
291 | 
292 |         return hidden_states
293 | 
294 | 
295 | class REFAttnProcessor2_0(nn.Module):
296 |     def __init__(self, name, type="read"):
297 |         super().__init__()
298 |         if not hasattr(F, "scaled_dot_product_attention"):
299 |             raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.")
300 |         self.name = name
301 |         self.type = type
302 | 
303 |     def __call__(
304 |             self,
305 |             attn: Attention,
306 |             hidden_states: torch.FloatTensor,
307 |             encoder_hidden_states: Optional[torch.FloatTensor] = None,
308 |             attention_mask: Optional[torch.FloatTensor] = None,
309 |             temb: Optional[torch.FloatTensor] = None,
310 |             scale: float = 1.0,
311 |             attn_store=None,
312 |             do_classifier_free_guidance=False,
313 |             enable_cloth_guidance=True
314 |     ) -> torch.FloatTensor:
315 |         if self.type == "read":
316 |             attn_store[self.name] = hidden_states
317 |         elif self.type == "write":
318 |             ref_hidden_states = attn_store[self.name]
319 |             if do_classifier_free_guidance:
320 |                 empty_copy = torch.zeros_like(ref_hidden_states)
321 |                 if enable_cloth_guidance:
322 |                     ref_hidden_states = torch.cat([empty_copy, ref_hidden_states, ref_hidden_states])
323 |                 else:
324 |                     ref_hidden_states = torch.cat([empty_copy, ref_hidden_states])
325 |             hidden_states = torch.cat([hidden_states, ref_hidden_states], dim=1)
326 |         else:
327 |             raise ValueError("unsupport type")
328 |         residual = hidden_states
329 |         if attn.spatial_norm is not None:
330 |             hidden_states = attn.spatial_norm(hidden_states, temb)
331 | 
332 |         input_ndim = hidden_states.ndim
333 | 
334 |         if input_ndim == 4:
335 |             batch_size, channel, height, width = hidden_states.shape
336 |             hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
337 | 
338 |         batch_size, sequence_length, _ = (
339 |             hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
340 |         )
341 | 
342 |         if attention_mask is not None:
343 |             attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
344 |             # scaled_dot_product_attention expects attention_mask shape to be
345 |             # (batch, heads, source_length, target_length)
346 |             attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
347 | 
348 |         if attn.group_norm is not None:
349 |             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
350 | 
351 |         args = () if USE_PEFT_BACKEND else (scale,)
352 |         if Linear_Call_Needs_Extra_Args:
353 |             query = attn.to_q(hidden_states, *args)
354 |         else:
355 |             query = attn.to_q(hidden_states)
356 | 
357 |         if encoder_hidden_states is None:
358 |             encoder_hidden_states = hidden_states
359 |         elif attn.norm_cross:
360 |             encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
361 | 
362 |         if Linear_Call_Needs_Extra_Args:
363 |             key = attn.to_k(encoder_hidden_states, *args)
364 |             value = attn.to_v(encoder_hidden_states, *args)
365 |         else:
366 |             key = attn.to_k(encoder_hidden_states)
367 |             value = attn.to_v(encoder_hidden_states)
368 | 
369 |         inner_dim = key.shape[-1]
370 |         head_dim = inner_dim // attn.heads
371 | 
372 |         query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
373 | 
374 |         key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
375 |         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
376 |         
377 |         # the output of sdp = (batch, num_heads, seq_len, head_dim)
378 |         # TODO: add support for attn.scale when we move to Torch 2.1
379 |         hidden_states = F.scaled_dot_product_attention(
380 |             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
381 |         )
382 |         hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
383 |         hidden_states = hidden_states.to(query.dtype)
384 | 
385 |         if self.type == "write":
386 |             hidden_states, _ = torch.chunk(hidden_states, 2, dim=1)
387 |         # linear proj
388 |         if Linear_Call_Needs_Extra_Args:
389 |             hidden_states = attn.to_out[0](hidden_states, *args)
390 |         else:
391 |             hidden_states = attn.to_out[0](hidden_states)
392 |         # dropout
393 |         hidden_states = attn.to_out[1](hidden_states)
394 | 
395 |         if input_ndim == 4:
396 |             hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
397 | 
398 |         if attn.residual_connection:
399 |             hidden_states = hidden_states + residual
400 | 
401 |         hidden_states = hidden_states / attn.rescale_output_factor
402 |         return hidden_states
403 | 
404 | 
405 | class REFAnimateDiffAttnProcessor2_0(nn.Module):
406 |     def __init__(self, cross_attention_dim, hidden_size, name):
407 |         super().__init__()
408 |         if not hasattr(F, "scaled_dot_product_attention"):
409 |             raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.")
410 |         self.name = name
411 |         self.scale = 1.0
412 |         self.to_k_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False)
413 |         self.to_v_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False)
414 | 
415 |     def __call__(
416 |             self,
417 |             attn: Attention,
418 |             hidden_states: torch.FloatTensor,
419 |             encoder_hidden_states: Optional[torch.FloatTensor] = None,
420 |             attention_mask: Optional[torch.FloatTensor] = None,
421 |             temb: Optional[torch.FloatTensor] = None,
422 |             scale: float = 1.0,
423 |             attn_store=None,
424 |             do_classifier_free_guidance=False,
425 |     ) -> torch.FloatTensor:
426 |         ref_hidden_states = attn_store[self.name]
427 |         if do_classifier_free_guidance:
428 |             empty_copy = torch.zeros_like(ref_hidden_states)
429 |             repeat_num = hidden_states.shape[0] // 3
430 |             ref_hidden_states = torch.cat(
431 |                 [empty_copy.repeat(repeat_num, 1, 1), ref_hidden_states.repeat(repeat_num, 1, 1),
432 |                     ref_hidden_states.repeat(repeat_num, 1, 1)])
433 |         
434 |         if hidden_states.shape[0] % ref_hidden_states.shape[0] != 0:
435 |             raise ValueError("not evenly divisible")
436 |         
437 |         residual = hidden_states
438 |         if attn.spatial_norm is not None:
439 |             hidden_states = attn.spatial_norm(hidden_states, temb)
440 | 
441 |         input_ndim = hidden_states.ndim
442 | 
443 |         if input_ndim == 4:
444 |             batch_size, channel, height, width = hidden_states.shape
445 |             hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
446 | 
447 |         batch_size, sequence_length, _ = (
448 |             hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
449 |         )
450 | 
451 |         if attention_mask is not None:
452 |             attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
453 |             # scaled_dot_product_attention expects attention_mask shape to be
454 |             # (batch, heads, source_length, target_length)
455 |             attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
456 | 
457 |         if attn.group_norm is not None:
458 |             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
459 | 
460 |         args = () if USE_PEFT_BACKEND else (scale,)
461 |         if Linear_Call_Needs_Extra_Args:
462 |             query = attn.to_q(hidden_states, *args)
463 |         else:
464 |             query = attn.to_q(hidden_states)
465 | 
466 |         if encoder_hidden_states is None:
467 |             encoder_hidden_states = hidden_states
468 |         elif attn.norm_cross:
469 |             encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
470 | 
471 |         if Linear_Call_Needs_Extra_Args:
472 |             key = attn.to_k(encoder_hidden_states, *args)
473 |             value = attn.to_v(encoder_hidden_states, *args)
474 |         else:
475 |             key = attn.to_k(encoder_hidden_states)
476 |             value = attn.to_v(encoder_hidden_states)
477 | 
478 |         inner_dim = key.shape[-1]
479 |         head_dim = inner_dim // attn.heads
480 | 
481 |         query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
482 | 
483 |         key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
484 |         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
485 | 
486 |         # the output of sdp = (batch, num_heads, seq_len, head_dim)
487 |         # TODO: add support for attn.scale when we move to Torch 2.1
488 |         hidden_states = F.scaled_dot_product_attention(
489 |             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
490 |         )
491 | 
492 |         hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
493 |         hidden_states = hidden_states.to(query.dtype)
494 | 
495 |         ref_key = self.to_k_ip(ref_hidden_states.float()).view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
496 |         ref_value = self.to_v_ip(ref_hidden_states.float()).view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
497 |         ref_hidden_states = F.scaled_dot_product_attention(
498 |             query.float(), ref_key, ref_value, attn_mask=None, dropout_p=0.0, is_causal=False
499 |         )
500 | 
501 |         ref_hidden_states = ref_hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
502 |         ref_hidden_states = ref_hidden_states.to(query.dtype)
503 |         
504 |         hidden_states = hidden_states + self.scale * ref_hidden_states
505 |         # linear proj
506 |         if Linear_Call_Needs_Extra_Args:
507 |             hidden_states = attn.to_out[0](hidden_states, *args)
508 |         else:
509 |             hidden_states = attn.to_out[0](hidden_states)
510 |         # dropout
511 |         hidden_states = attn.to_out[1](hidden_states)
512 | 
513 |         if input_ndim == 4:
514 |             hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
515 | 
516 |         if attn.residual_connection:
517 |             hidden_states = hidden_states + residual
518 | 
519 |         hidden_states = hidden_states / attn.rescale_output_factor
520 |         return hidden_states
521 | 
522 | 
523 | class IPAttnProcessor(nn.Module):
524 | 
525 |     def __init__(self, hidden_size, cross_attention_dim=None, scale=1.0, num_tokens=4):
526 |         super().__init__()
527 | 
528 |         self.hidden_size = hidden_size
529 |         self.cross_attention_dim = cross_attention_dim
530 |         self.scale = scale
531 |         self.num_tokens = num_tokens
532 | 
533 |         self.to_k_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False)
534 |         self.to_v_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False)
535 | 
536 |     def __call__(
537 |             self,
538 |             attn,
539 |             hidden_states,
540 |             encoder_hidden_states=None,
541 |             attention_mask=None,
542 |             temb=None,
543 |             scale: float = 1.0,
544 |             attn_store=None,
545 |             do_classifier_free_guidance=None,
546 |             enable_cloth_guidance=None
547 |     ):
548 |         residual = hidden_states
549 | 
550 |         if attn.spatial_norm is not None:
551 |             hidden_states = attn.spatial_norm(hidden_states, temb)
552 | 
553 |         input_ndim = hidden_states.ndim
554 | 
555 |         if input_ndim == 4:
556 |             batch_size, channel, height, width = hidden_states.shape
557 |             hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
558 | 
559 |         batch_size, sequence_length, _ = (
560 |             hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
561 |         )
562 |         attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
563 | 
564 |         if attn.group_norm is not None:
565 |             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
566 | 
567 |         args = () if USE_PEFT_BACKEND else (scale,)
568 |         if Linear_Call_Needs_Extra_Args:
569 |             query = attn.to_q(hidden_states, *args)
570 |         else:
571 |             query = attn.to_q(hidden_states)
572 | 
573 |         if encoder_hidden_states is None:
574 |             encoder_hidden_states = hidden_states
575 |         else:
576 |             # get encoder_hidden_states, ip_hidden_states
577 |             end_pos = encoder_hidden_states.shape[1] - self.num_tokens
578 |             encoder_hidden_states, ip_hidden_states = (
579 |                 encoder_hidden_states[:, :end_pos, :],
580 |                 encoder_hidden_states[:, end_pos:, :],
581 |             )
582 |             if attn.norm_cross:
583 |                 encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
584 | 
585 |         if Linear_Call_Needs_Extra_Args:
586 |             key = attn.to_k(encoder_hidden_states, *args)
587 |             value = attn.to_v(encoder_hidden_states, *args)
588 |         else:
589 |             key = attn.to_k(encoder_hidden_states)
590 |             value = attn.to_v(encoder_hidden_states)
591 | 
592 |         query = attn.head_to_batch_dim(query)
593 |         key = attn.head_to_batch_dim(key)
594 |         value = attn.head_to_batch_dim(value)
595 | 
596 |         attention_probs = attn.get_attention_scores(query, key, attention_mask)
597 |         hidden_states = torch.bmm(attention_probs, value)
598 |         hidden_states = attn.batch_to_head_dim(hidden_states)
599 | 
600 |         # for ip-adapter
601 |         ip_key = self.to_k_ip(ip_hidden_states)
602 |         ip_value = self.to_v_ip(ip_hidden_states)
603 | 
604 |         ip_key = attn.head_to_batch_dim(ip_key)
605 |         ip_value = attn.head_to_batch_dim(ip_value)
606 | 
607 |         ip_attention_probs = attn.get_attention_scores(query, ip_key, None)
608 |         self.attn_map = ip_attention_probs
609 |         ip_hidden_states = torch.bmm(ip_attention_probs, ip_value)
610 |         ip_hidden_states = attn.batch_to_head_dim(ip_hidden_states)
611 | 
612 |         hidden_states = hidden_states + self.scale * ip_hidden_states
613 | 
614 |         # linear proj
615 |         if Linear_Call_Needs_Extra_Args:
616 |             hidden_states = attn.to_out[0](hidden_states, *args)
617 |         else:
618 |             hidden_states = attn.to_out[0](hidden_states)
619 |         # dropout
620 |         hidden_states = attn.to_out[1](hidden_states)
621 | 
622 |         if input_ndim == 4:
623 |             hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
624 | 
625 |         if attn.residual_connection:
626 |             hidden_states = hidden_states + residual
627 | 
628 |         hidden_states = hidden_states / attn.rescale_output_factor
629 | 
630 |         return hidden_states
631 | 
632 | 
633 | class IPAttnProcessor2_0(torch.nn.Module):
634 | 
635 |     def __init__(self, hidden_size, cross_attention_dim=None, scale=1.0, num_tokens=4):
636 |         super().__init__()
637 | 
638 |         if not hasattr(F, "scaled_dot_product_attention"):
639 |             raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.")
640 | 
641 |         self.hidden_size = hidden_size
642 |         self.cross_attention_dim = cross_attention_dim
643 |         self.scale = scale
644 |         self.num_tokens = num_tokens
645 | 
646 |         self.to_k_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False)
647 |         self.to_v_ip = nn.Linear(cross_attention_dim or hidden_size, hidden_size, bias=False)
648 | 
649 |     def __call__(
650 |             self,
651 |             attn,
652 |             hidden_states,
653 |             encoder_hidden_states=None,
654 |             attention_mask=None,
655 |             temb=None,
656 |             scale: float = 1.0,
657 |             attn_store=None,
658 |             do_classifier_free_guidance=None,
659 |             enable_cloth_guidance=None
660 |     ):
661 |         residual = hidden_states
662 | 
663 |         if attn.spatial_norm is not None:
664 |             hidden_states = attn.spatial_norm(hidden_states, temb)
665 | 
666 |         input_ndim = hidden_states.ndim
667 | 
668 |         if input_ndim == 4:
669 |             batch_size, channel, height, width = hidden_states.shape
670 |             hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
671 | 
672 |         batch_size, sequence_length, _ = (
673 |             hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
674 |         )
675 | 
676 |         if attention_mask is not None:
677 |             attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
678 |             # scaled_dot_product_attention expects attention_mask shape to be
679 |             # (batch, heads, source_length, target_length)
680 |             attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
681 | 
682 |         if attn.group_norm is not None:
683 |             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
684 | 
685 |         args = () if USE_PEFT_BACKEND else (scale,)
686 |         if Linear_Call_Needs_Extra_Args:
687 |             query = attn.to_q(hidden_states, *args)
688 |         else:
689 |             query = attn.to_q(hidden_states)
690 | 
691 |         if encoder_hidden_states is None:
692 |             encoder_hidden_states = hidden_states
693 |         else:
694 |             # get encoder_hidden_states, ip_hidden_states
695 |             end_pos = encoder_hidden_states.shape[1] - self.num_tokens
696 |             encoder_hidden_states, ip_hidden_states = (
697 |                 encoder_hidden_states[:, :end_pos, :],
698 |                 encoder_hidden_states[:, end_pos:, :],
699 |             )
700 |             if attn.norm_cross:
701 |                 encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
702 | 
703 |         if Linear_Call_Needs_Extra_Args:
704 |             key = attn.to_k(encoder_hidden_states, *args)
705 |             value = attn.to_v(encoder_hidden_states, *args)
706 |         else:
707 |             key = attn.to_k(encoder_hidden_states)
708 |             value = attn.to_v(encoder_hidden_states)
709 | 
710 |         inner_dim = key.shape[-1]
711 |         head_dim = inner_dim // attn.heads
712 | 
713 |         query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
714 | 
715 |         key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
716 |         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
717 | 
718 |         # the output of sdp = (batch, num_heads, seq_len, head_dim)
719 |         # TODO: add support for attn.scale when we move to Torch 2.1
720 |         hidden_states = F.scaled_dot_product_attention(
721 |             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
722 |         )
723 | 
724 |         hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
725 |         hidden_states = hidden_states.to(query.dtype)
726 | 
727 |         # for ip-adapter
728 |         ip_key = self.to_k_ip(ip_hidden_states)
729 |         ip_value = self.to_v_ip(ip_hidden_states)
730 | 
731 |         ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
732 |         ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
733 | 
734 |         # the output of sdp = (batch, num_heads, seq_len, head_dim)
735 |         # TODO: add support for attn.scale when we move to Torch 2.1
736 |         ip_hidden_states = F.scaled_dot_product_attention(
737 |             query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False
738 |         )
739 |         with torch.no_grad():
740 |             self.attn_map = query @ ip_key.transpose(-2, -1).softmax(dim=-1)
741 |             # print(self.attn_map.shape)
742 | 
743 |         ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
744 |         ip_hidden_states = ip_hidden_states.to(query.dtype)
745 | 
746 |         hidden_states = hidden_states + self.scale * ip_hidden_states
747 | 
748 |         # linear proj
749 |         if Linear_Call_Needs_Extra_Args:
750 |             hidden_states = attn.to_out[0](hidden_states, *args)
751 |         else:
752 |             hidden_states = attn.to_out[0](hidden_states)
753 |         # dropout
754 |         hidden_states = attn.to_out[1](hidden_states)
755 | 
756 |         if input_ndim == 4:
757 |             hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
758 | 
759 |         if attn.residual_connection:
760 |             hidden_states = hidden_states + residual
761 | 
762 |         hidden_states = hidden_states / attn.rescale_output_factor
763 | 
764 |         return hidden_states
765 | 


--------------------------------------------------------------------------------
/diffusers_magic_clothing/garment_diffusion.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import torch
  3 | from safetensors import safe_open
  4 | from .utils import is_torch2_available
  5 | from diffusers import UNet2DConditionModel
  6 | 
  7 | if is_torch2_available():
  8 |     from .attention_processor import REFAttnProcessor2_0 as REFAttnProcessor
  9 |     from .attention_processor import AttnProcessor2_0 as AttnProcessor
 10 |     from .attention_processor import REFAnimateDiffAttnProcessor2_0 as REFAnimateDiffAttnProcessor
 11 | else:
 12 |     from .attention_processor import REFAttnProcessor, AttnProcessor
 13 | import torch.nn.functional as F
 14 | 
 15 | class ClothAdapter:
 16 |     def __init__(self, sd_pipe, ref_path):
 17 |         self.enable_cloth_guidance = True
 18 |         self.pipe = sd_pipe
 19 |         self.set_adapter(self.pipe.unet, "write")
 20 |         
 21 |         ref_unet = copy.deepcopy(sd_pipe.unet)
 22 |         if ref_unet.config.in_channels == 9:
 23 |             ref_unet.conv_in = torch.nn.Conv2d(4, 320, ref_unet.conv_in.kernel_size, ref_unet.conv_in.stride, ref_unet.conv_in.padding)
 24 |             ref_unet.register_to_config(in_channels=4)
 25 |         state_dict = {}
 26 |         with safe_open(ref_path, framework="pt", device="cpu") as f:
 27 |             for key in f.keys():
 28 |                 state_dict[key] = f.get_tensor(key)
 29 |         ref_unet.load_state_dict(state_dict, strict=False)
 30 |         
 31 |         self.ref_unet = ref_unet.to(self.pipe.device, dtype=self.pipe.dtype)
 32 |         self.set_adapter(self.ref_unet, "read")
 33 |         self.attn_store = {}
 34 | 
 35 |     def set_adapter(self, unet, type):
 36 |         attn_procs = {}
 37 |         for name in unet.attn_processors.keys():
 38 |             if "attn1" in name:
 39 |                 attn_procs[name] = REFAttnProcessor(name=name, type=type)
 40 |             else:
 41 |                 attn_procs[name] = AttnProcessor()
 42 |         unet.set_attn_processor(attn_procs)
 43 | 
 44 |     def generate(
 45 |             self,
 46 |             cloth_latent,
 47 |             gen_latents,
 48 |             prompt_embeds_null,
 49 |             positive=None,
 50 |             negative=None,
 51 |             num_images_per_prompt=4,
 52 |             seed=-1,
 53 |             guidance_scale=7.5,
 54 |             cloth_guidance_scale=2.5,
 55 |             num_inference_steps=20,
 56 |             height=512,
 57 |             width=384,
 58 |             **kwargs,
 59 |     ):
 60 |         if gen_latents is not None:
 61 |             gen_latents = 0.18215 * gen_latents
 62 |             gen_latents=gen_latents.to(self.pipe.device,dtype=self.pipe.dtype)
 63 |         cloth_latent=cloth_latent.to(self.pipe.device,dtype=self.pipe.dtype)
 64 |         prompt_embeds_null = prompt_embeds_null.to(self.pipe.device,dtype=self.pipe.dtype)
 65 |         positive = positive.to(self.pipe.device,dtype=self.pipe.dtype)
 66 |         negative = negative.to(self.pipe.device,dtype=self.pipe.dtype)
 67 |         cloth_latent = 0.18215 * cloth_latent
 68 |         self.ref_unet(torch.cat([cloth_latent] * num_images_per_prompt), 0, torch.cat([prompt_embeds_null] * num_images_per_prompt), cross_attention_kwargs={"attn_store": self.attn_store})
 69 |         
 70 | 
 71 |         self.generator = torch.Generator(self.pipe.device).manual_seed(seed) if seed is not None else None
 72 |         if self.enable_cloth_guidance:
 73 |             images = self.pipe(
 74 |                 prompt_embeds=positive,
 75 |                 negative_prompt_embeds=negative,
 76 |                 guidance_scale=guidance_scale,
 77 |                 cloth_guidance_scale=cloth_guidance_scale,
 78 |                 num_inference_steps=num_inference_steps,
 79 |                 latents = gen_latents,
 80 |                 generator=self.generator,
 81 |                 height=height,
 82 |                 width=width,
 83 |                 cross_attention_kwargs={"attn_store": self.attn_store, "do_classifier_free_guidance": guidance_scale > 1.0, "enable_cloth_guidance": self.enable_cloth_guidance},
 84 |                 **kwargs,
 85 |             )
 86 |         else:
 87 |             images = self.pipe(
 88 |                 prompt_embeds=positive,
 89 |                 negative_prompt_embeds=negative,
 90 |                 guidance_scale=guidance_scale,
 91 |                 num_inference_steps=num_inference_steps,
 92 |                 generator=self.generator,
 93 |                 latents = gen_latents,
 94 |                 height=height,
 95 |                 width=width,
 96 |                 cross_attention_kwargs={"attn_store": self.attn_store, "do_classifier_free_guidance": guidance_scale > 1.0, "enable_cloth_guidance": self.enable_cloth_guidance},
 97 |                 **kwargs,
 98 |             )
 99 | 
100 |         return images


--------------------------------------------------------------------------------
/diffusers_magic_clothing/utils.py:
--------------------------------------------------------------------------------
 1 | import torch.nn.functional as F
 2 | import numpy as np
 3 | import PIL
 4 | import torch
 5 | 
 6 | 
 7 | def is_torch2_available():
 8 |     return hasattr(F, "scaled_dot_product_attention")
 9 | 
10 | 
11 | def prepare_image(image, height, width):
12 |     if image is None:
13 |         raise ValueError("`image` input cannot be undefined.")
14 | 
15 |     if isinstance(image, torch.Tensor):
16 |         # Batch single image
17 |         if image.ndim == 3:
18 |             assert image.shape[0] == 3, "Image outside a batch should be of shape (3, H, W)"
19 |             image = image.unsqueeze(0)
20 | 
21 |         # Check image is in [-1, 1]
22 |         if image.min() < -1 or image.max() > 1:
23 |             raise ValueError("Image should be in [-1, 1] range")
24 | 
25 |         # Image as float32
26 |         image = image.to(dtype=torch.float32)
27 |     else:
28 |         # preprocess image
29 |         if isinstance(image, (PIL.Image.Image, np.ndarray)):
30 |             image = [image]
31 |         if isinstance(image, list) and isinstance(image[0], PIL.Image.Image):
32 |             # resize all images w.r.t passed height an width
33 |             image = [i.resize((width, height), resample=PIL.Image.LANCZOS) for i in image]
34 |             image = [np.array(i.convert("RGB"))[None, :] for i in image]
35 |             image = np.concatenate(image, axis=0)
36 |         elif isinstance(image, list) and isinstance(image[0], np.ndarray):
37 |             image = np.concatenate([i[None, :] for i in image], axis=0)
38 | 
39 |         image = image.transpose(0, 3, 1, 2)
40 |         image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
41 | 
42 |     return image
43 | 
44 | 
45 | def prepare_mask(image, height, width):
46 |     if image is None:
47 |         raise ValueError("`image` input cannot be undefined.")
48 | 
49 |     if isinstance(image, torch.Tensor):
50 |         # Batch single image
51 |         if image.ndim == 3:
52 |             assert image.shape[0] == 1, "Image outside a batch should be of shape (3, H, W)"
53 |             image = image.unsqueeze(0)
54 |         image = image.to(dtype=torch.float32)
55 |     else:
56 |         # preprocess image
57 |         if isinstance(image, (PIL.Image.Image, np.ndarray)):
58 |             image = [image]
59 |         if isinstance(image, list) and isinstance(image[0], PIL.Image.Image):
60 |             # resize all images w.r.t passed height an width
61 |             image = [i.resize((width, height), resample=PIL.Image.NEAREST) for i in image]
62 |             image = [np.array(i.convert("L"))[..., None] for i in image]
63 |             image = np.stack(image, axis=0)
64 |         elif isinstance(image, list) and isinstance(image[0], np.ndarray):
65 |             image = np.stack([i[..., None] for i in image], axis=0)
66 | 
67 |         image = image.transpose(0, 3, 1, 2)
68 |         image = torch.from_numpy(image).to(dtype=torch.float32) / 255.
69 |         image[image > 0.5] = 1
70 |         image[image <= 0.5] = 0
71 | 
72 |     return image
73 | 


--------------------------------------------------------------------------------
/diffusers_warp_nodes.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import os
  3 | import folder_paths
  4 | from pathlib import Path
  5 | 
  6 | from comfy import model_management
  7 | 
  8 | from .diffusers_magic_clothing.garment_diffusion import ClothAdapter
  9 | from .diffusers_magic_clothing.MagicClothingDiffusionPipeline import MagicClothingDiffusionPipeline
 10 | from diffusers import (
 11 |     AutoencoderKL,
 12 |     DDIMScheduler,
 13 |     DDPMScheduler,
 14 |     DEISMultistepScheduler,
 15 |     DPMSolverMultistepScheduler,
 16 |     DPMSolverSinglestepScheduler,
 17 |     EulerAncestralDiscreteScheduler,
 18 |     EulerDiscreteScheduler,
 19 |     HeunDiscreteScheduler,
 20 |     KDPM2AncestralDiscreteScheduler,
 21 |     KDPM2DiscreteScheduler,
 22 |     UniPCMultistepScheduler,
 23 | )
 24 | 
 25 | SCHEDULERS = {
 26 |     'DDIM' : DDIMScheduler,
 27 |     'DDPM' : DDPMScheduler,
 28 |     'DEISMultistep' : DEISMultistepScheduler,
 29 |     'DPMSolverMultistep' : DPMSolverMultistepScheduler,
 30 |     'DPMSolverSinglestep' : DPMSolverSinglestepScheduler,
 31 |     'EulerAncestralDiscrete' : EulerAncestralDiscreteScheduler,
 32 |     'EulerDiscrete' : EulerDiscreteScheduler,
 33 |     'HeunDiscrete' : HeunDiscreteScheduler,
 34 |     'KDPM2AncestralDiscrete' : KDPM2AncestralDiscreteScheduler,
 35 |     'KDPM2Discrete' : KDPM2DiscreteScheduler,
 36 |     'UniPCMultistep' : UniPCMultistepScheduler
 37 | }
 38 | 
 39 | class ChangePixelValueNormalization:
 40 |     @classmethod
 41 |     def INPUT_TYPES(s):
 42 |         return {"required":
 43 |                 {"pixels": ("IMAGE", ),
 44 |                  "mode": (["[0,1]=>[-1,1]", "[-1,1]=>[0,1]"],),
 45 |                  }
 46 |                 }
 47 |     RETURN_TYPES = ("IMAGE",)
 48 |     FUNCTION = "normalization"
 49 | 
 50 |     CATEGORY = "image"
 51 | 
 52 |     def normalization(self, pixels, mode):
 53 |         if mode == "[0,1]=>[-1,1]":
 54 |             pixels = (pixels * 255).round().clamp(min=0, max=255) / 127.5 - 1.0
 55 |         elif mode == "[-1,1]=>[0,1]":
 56 |             pixels = ((pixels+1) * 127.5).clamp(min=0, max=255) / 255.0
 57 |         else:
 58 |             pixels = pixels
 59 |         return (pixels,)
 60 | 
 61 | 
 62 | class ChangePipelineDtypeAndDevice:
 63 |     @classmethod
 64 |     def INPUT_TYPES(s):
 65 |         return {"required":
 66 |                 {"pipeline": ("PIPELINE", ),
 67 |                  "dtype": (["default", "float32", "float16", "bfloat16"],),
 68 |                  "device": (["default", "cpu", "cuda", "cuda:0", "cuda:1"],),
 69 |                  }
 70 |                 }
 71 |     RETURN_TYPES = ("PIPELINE",)
 72 |     FUNCTION = "change_dtype"
 73 | 
 74 |     CATEGORY = "pipeline"
 75 | 
 76 |     def change_dtype(self, pipeline, dtype="default", device="default"):
 77 |         if dtype == "float16":
 78 |             seleted_type = torch.float16
 79 |         elif dtype == "bfloat16":
 80 |             seleted_type = torch.bfloat16
 81 |         else:
 82 |             seleted_type = torch.float32
 83 |         if device == "default":
 84 |             seleted_device = model_management.get_torch_device()
 85 |         else:
 86 |             seleted_device = torch.device(device)
 87 |         pipeline = pipeline.to(seleted_device, dtype=seleted_type)
 88 |         pipeline.device = seleted_device
 89 |         pipeline.dtype = seleted_type
 90 |         return (pipeline,)
 91 | 
 92 | 
 93 | class RunMagicClothingDiffusersModel:
 94 |     @classmethod
 95 |     def INPUT_TYPES(s):
 96 |         return {"required": {"cloth_image": ("IMAGE",),
 97 |                              "magicClothingAdapter": ("MAGIC_CLOTHING_ADAPTER",),
 98 |                              "positive": ("STRING", {
 99 |                                  "dynamicPrompts": False,
100 |                                  "multiline": True,
101 |                                  "default": ""
102 |                              }),
103 |                              "negative": ("STRING", {
104 |                                  "dynamicPrompts": False,
105 |                                  "multiline": True,
106 |                                  "default": ""
107 |                              }),
108 |                              "height": ("INT", {"default": 768, "min": 0, "max": 2048}),
109 |                              "width": ("INT", {"default": 576, "min": 0, "max": 2048}),
110 |                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4}),
111 |                              "steps": ("INT", {"default": 25, "min": 0, "max": 100}),
112 |                              "cfg": ("FLOAT", {"default": 5, "min": 0.0, "max": 10.0, "step": 0.01}),
113 |                              "cloth_guidance_scale": ("FLOAT", {"default": 2.5, "min": 0.0, "max": 10.0, "step": 0.01}),
114 |                              "seed": ("INT", {"default": 1234, "min": 0, "max": 0xffffffffffffffff}),
115 |                              }
116 |                 }
117 | 
118 |     RETURN_TYPES = ("IMAGE",)
119 |     FUNCTION = "run_model"
120 | 
121 |     CATEGORY = "loaders"
122 | 
123 |     def run_model(self, cloth_image, magicClothingAdapter, positive, negative, height, width, batch_size, steps, cfg, cloth_guidance_scale, seed,):
124 |         cloth_image = (cloth_image * 255).round().clamp(min=0,
125 |                                                         max=255).to(dtype=torch.float32) / 127.5 - 1.0
126 |         cloth_image = cloth_image.permute(0, 3, 1, 2)
127 |         if not isinstance(magicClothingAdapter, ClothAdapter):
128 |             # 如果发现不是正确的模型，就返回原始图片，不进行处理
129 |             gen_image = cloth_image.permute(0, 2, 3, 1)
130 |             gen_image = ((gen_image+1) * 127.5).clamp(min=0,
131 |                                                       max=255).to(dtype=torch.float32) / 255.0
132 |             return (gen_image,)
133 |         magicClothingAdapter.enable_cloth_guidance = True
134 |         cloth_image = cloth_image.to(
135 |             magicClothingAdapter.pipe.device, dtype=magicClothingAdapter.pipe.dtype)
136 |         with torch.inference_mode():
137 |             prompt_embeds_null = magicClothingAdapter.pipe.encode_prompt(
138 |                 [""], device=magicClothingAdapter.pipe.device, num_images_per_prompt=1, do_classifier_free_guidance=False)[0]
139 |             prompt_embeds, negative_prompt_embeds = magicClothingAdapter.pipe.encode_prompt(
140 |                 positive,
141 |                 magicClothingAdapter.pipe.device,
142 |                 batch_size,
143 |                 True,
144 |                 negative,
145 |                 prompt_embeds=None,
146 |                 negative_prompt_embeds=None,
147 |                 lora_scale=None,
148 |                 clip_skip=None,
149 |             )
150 |             cloth_latent = magicClothingAdapter.pipe.vae.encode(
151 |                 cloth_image).latent_dist.mode()
152 |             gen_image = magicClothingAdapter.generate(cloth_latent, None, prompt_embeds_null, prompt_embeds, negative_prompt_embeds, batch_size, seed, cfg, cloth_guidance_scale, steps, height, width)
153 |             gen_image = magicClothingAdapter.pipe.vae.decode(
154 |                 gen_image, return_dict=False, generator=magicClothingAdapter.generator)[0]
155 |         gen_image = gen_image.permute(0, 2, 3, 1)
156 |         gen_image = ((gen_image+1) * 127.5).clamp(min=0,
157 |                                                   max=255).to(dtype=torch.float32) / 255.0
158 |         return (gen_image,)
159 | 
160 | 
161 | class LoadMagicClothingPipelineWithPath:
162 |     @classmethod
163 |     def INPUT_TYPES(cls):
164 |         paths = []
165 |         my_path = os.path.dirname(__file__)
166 |         my_pipeline_path = os.path.join(my_path, "conversion")
167 |         for search_path in folder_paths.get_folder_paths("diffusers"):
168 |             if os.path.exists(search_path):
169 |                 client_paths = next(os.walk(search_path))[1]
170 |                 client_paths = ["diffusers/" + item for item in client_paths]
171 |                 paths += client_paths
172 |         if os.path.exists(my_pipeline_path):
173 |             client_paths = next(os.walk(my_pipeline_path))[1]
174 |             client_paths = ["conversion/" + item for item in client_paths]
175 |             paths += client_paths
176 |         return {"required": {"model_path": (paths,),
177 |                              "dtype": (["default", "float32", "float16", "bfloat16"],),
178 |                              "device": (["default", "cpu", "cuda", "cuda:0", "cuda:1"],), }}
179 |     RETURN_TYPES = ("PIPELINE", "AUTOENCODER", "SCHEDULER",)
180 |     FUNCTION = "load_checkpoint"
181 | 
182 |     CATEGORY = "Diffusers"
183 | 
184 |     def load_checkpoint(self, model_path,dtype,device):
185 |         if dtype == "float16":
186 |             seleted_type = torch.float16
187 |         elif dtype == "bfloat16":
188 |             seleted_type = torch.bfloat16
189 |         else:
190 |             seleted_type = torch.float32
191 |         if device == "default":
192 |             seleted_device = model_management.get_torch_device()
193 |         else:
194 |             seleted_device = torch.device(device)
195 |         
196 |         if model_path.startswith("conversion/"):
197 |             model_path = model_path.replace("conversion/", "")
198 |             my_path = os.path.dirname(__file__)
199 |             my_pipeline_path = os.path.join(my_path, "conversion")
200 |             model_real_path  = os.path.join(my_pipeline_path, model_path)
201 |             model_real_dir = my_pipeline_path
202 |         elif model_path.startswith("diffusers/"):
203 |             model_path = model_path.replace("diffusers/", "")
204 |             diffusers_path = folder_paths.get_folder_paths("diffusers")[0]
205 |             model_real_path  = os.path.join(diffusers_path, model_path)
206 |             model_real_dir = diffusers_path
207 |         else:
208 |             raise ValueError("未选择模型")
209 |   
210 |         pipe = MagicClothingDiffusionPipeline.from_pretrained(
211 |             pretrained_model_name_or_path=model_real_path,
212 |             torch_dtype=seleted_type,
213 |             cache_dir=model_real_dir,
214 |         )
215 |         pipe.to(seleted_device, dtype=seleted_type)
216 |         return ((pipe, model_real_path), pipe.vae, pipe.scheduler)
217 | 
218 | class LoadMagicClothingPipelinWithConversion:
219 |     # code base from https://github.com/Limitex/ComfyUI-Diffusers.git
220 | 
221 |     @classmethod
222 |     def INPUT_TYPES(s):
223 |         return {"required": {"ckpt_name": (folder_paths.get_filename_list("checkpoints"), ),
224 |                              "dtype": (["default", "float32", "float16", "bfloat16"],),
225 |                              "device": (["default", "cpu", "cuda", "cuda:0", "cuda:1"],), }}
226 | 
227 |     RETURN_TYPES = ("PIPELINE", "AUTOENCODER", "SCHEDULER",)
228 | 
229 |     FUNCTION = "create_pipeline"
230 | 
231 |     CATEGORY = "Diffusers"
232 | 
233 |     def create_pipeline(self, ckpt_name,dtype,device):
234 |         if dtype == "float16":
235 |             seleted_type = torch.float16
236 |         elif dtype == "bfloat16":
237 |             seleted_type = torch.bfloat16
238 |         else:
239 |             seleted_type = torch.float32
240 |         if device == "default":
241 |             seleted_device = model_management.get_torch_device()
242 |         else:
243 |             seleted_device = torch.device(device)
244 |         my_path = os.path.dirname(__file__)
245 |         my_pipeline_path = os.path.join(my_path, "conversion")
246 |         if not os.path.exists(my_pipeline_path):
247 |             os.makedirs(my_pipeline_path)
248 |         real_ckpt_name = Path(ckpt_name).stem
249 |         real_ckpt_name = real_ckpt_name +"_"+str(seleted_type)
250 |         real_ckpt_name = real_ckpt_name.replace(" ", "_").replace(".", "_").replace("/", "_")
251 |         ckpt_conversion_path = os.path.join(my_pipeline_path, real_ckpt_name)
252 |         if not os.path.exists(ckpt_conversion_path):
253 |             # 不存在，则进行转换
254 |             MagicClothingDiffusionPipeline.from_single_file(
255 |                 pretrained_model_link_or_path=folder_paths.get_full_path("checkpoints", ckpt_name),
256 |                 torch_dtype=seleted_type,
257 |                 cache_dir=my_pipeline_path,
258 |             ).save_pretrained(ckpt_conversion_path, safe_serialization=True)
259 | 
260 |         pipe = MagicClothingDiffusionPipeline.from_pretrained(
261 |             pretrained_model_name_or_path=ckpt_conversion_path,
262 |             torch_dtype=seleted_type,
263 |             cache_dir=my_pipeline_path,
264 |         )
265 |         pipe.to(seleted_device, dtype=seleted_type)
266 |         return ((pipe, ckpt_conversion_path), pipe.vae, pipe.scheduler)
267 | 
268 | 
269 | 
270 | class DiffusersSchedulerLoader:
271 |     # code copy from https://github.com/Limitex/ComfyUI-Diffusers.git
272 | 
273 |     @classmethod
274 |     def INPUT_TYPES(s):
275 |         return {
276 |             "required": {
277 |                 "pipeline": ("PIPELINE", ),
278 |                 "scheduler_name": (list(SCHEDULERS.keys()), ), 
279 |             }
280 |         }
281 | 
282 |     RETURN_TYPES = ("SCHEDULER",)
283 | 
284 |     FUNCTION = "load_scheduler"
285 | 
286 |     CATEGORY = "Diffusers"
287 | 
288 |     def load_scheduler(self, pipeline, scheduler_name):
289 |         my_path = os.path.dirname(__file__)
290 |         my_pipeline_path = os.path.join(my_path, "conversion")
291 |         if not os.path.exists(my_pipeline_path):
292 |             os.makedirs(my_pipeline_path)
293 |         scheduler = SCHEDULERS[scheduler_name].from_pretrained(
294 |             pretrained_model_name_or_path=pipeline[1],
295 |             torch_dtype=pipeline[0].dtype,
296 |             cache_dir=my_pipeline_path,
297 |             subfolder='scheduler'
298 |         )
299 |         return (scheduler,)
300 | 
301 | class DiffusersModelMakeup:
302 |     # code copy from https://github.com/Limitex/ComfyUI-Diffusers.git
303 |     @classmethod
304 |     def INPUT_TYPES(s):
305 |         return {
306 |             "required": {
307 |                 "pipeline": ("PIPELINE", ), 
308 |                 "scheduler": ("SCHEDULER", ),
309 |                 "autoencoder": ("AUTOENCODER", ),
310 |             }, 
311 |         }
312 | 
313 |     RETURN_TYPES = ("MAKED_PIPELINE",)
314 | 
315 |     FUNCTION = "makeup_pipeline"
316 | 
317 |     CATEGORY = "Diffusers"
318 | 
319 |     def makeup_pipeline(self, pipeline, scheduler, autoencoder):
320 |         pipeline = pipeline[0]
321 |         autoencoder.to(pipeline.device, dtype=pipeline.dtype)
322 |         pipeline.vae = autoencoder
323 |         pipeline.scheduler = scheduler
324 |         pipeline.safety_checker = None if pipeline.safety_checker is None else lambda images, **kwargs: (images, [False])
325 |         pipeline.enable_attention_slicing()
326 |         return (pipeline,)
327 | 
328 | class LoadMagicClothingAdapter:
329 |     @classmethod
330 |     def INPUT_TYPES(s):
331 |         return {"required":
332 |                 {"magicClothingUnet": (folder_paths.get_filename_list("unet"), ),
333 |                  "pipeline": ("MAKED_PIPELINE", ),
334 |                  },
335 |                 }
336 | 
337 |     RETURN_TYPES = ("MAGIC_CLOTHING_ADAPTER",)
338 |     RETURN_NAMES = ("MagicClothingAdapter",)
339 |     FUNCTION = "load_model"
340 | 
341 |     CATEGORY = "loaders"
342 | 
343 |     def load_model(self, magicClothingUnet, pipeline):
344 |         unet_path = folder_paths.get_full_path("unet", magicClothingUnet)
345 |         full_model = ClothAdapter(pipeline, unet_path)
346 |         return (full_model,)
347 | 
348 | 
349 | NODE_CLASS_MAPPINGS = {
350 |     "Diffusers Model Makeup &MC": DiffusersModelMakeup,
351 |     "Diffusers Scheduler Loader &MC": DiffusersSchedulerLoader,
352 |     "Change Pixel Value Normalization": ChangePixelValueNormalization,
353 |     "Change Pipeline Dtype And Device": ChangePipelineDtypeAndDevice,
354 |     "Load Magic Clothing Pipeline With Path": LoadMagicClothingPipelineWithPath,
355 |     "Load Magic Clothing Pipeline": LoadMagicClothingPipelinWithConversion,
356 |     "Load Magic Clothing Adapter": LoadMagicClothingAdapter,
357 |     "RUN Magic Clothing Diffusers Model": RunMagicClothingDiffusersModel,
358 | }
359 | 
360 | NODE_DISPLAY_NAME_MAPPINGS = {
361 |     "Diffusers Model Makeup &MC": "Diffusers Model Makeup &MC",
362 |     "Diffusers Scheduler Loader &MC": "Diffusers Scheduler Loader &MC",
363 |     "Change Pipeline Dtype And Device": "Change Pipeline Dtype And Device",
364 |     "Change Pixel Value Normalization": "Change Pixel Value Normalization",
365 |     "Load Magic Clothing Pipeline With Path":"Load Magic Clothing Pipeline With Path&Diffusers",
366 |     "Load Magic Clothing Pipeline":"Load Magic Clothing Pipeline&Diffusers",
367 |     "Load Magic Clothing Adapter": "Load Magic Clothing Adapter &Diffusers",
368 |     "RUN Magic Clothing Adapter": "RUN Magic Clothing Adapter &Diffusers",
369 | }
370 | 


--------------------------------------------------------------------------------
/example.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "last_node_id": 104,
  3 |   "last_link_id": 233,
  4 |   "nodes": [
  5 |     {
  6 |       "id": 19,
  7 |       "type": "LoadImage",
  8 |       "pos": [
  9 |         220,
 10 |         170
 11 |       ],
 12 |       "size": {
 13 |         "0": 315,
 14 |         "1": 314
 15 |       },
 16 |       "flags": {},
 17 |       "order": 0,
 18 |       "mode": 0,
 19 |       "outputs": [
 20 |         {
 21 |           "name": "IMAGE",
 22 |           "type": "IMAGE",
 23 |           "links": [
 24 |             228,
 25 |             230
 26 |           ],
 27 |           "shape": 3,
 28 |           "slot_index": 0
 29 |         },
 30 |         {
 31 |           "name": "MASK",
 32 |           "type": "MASK",
 33 |           "links": null,
 34 |           "shape": 3
 35 |         }
 36 |       ],
 37 |       "properties": {
 38 |         "Node name for S&R": "LoadImage"
 39 |       },
 40 |       "widgets_values": [
 41 |         "ComfyUI_00002_.png",
 42 |         "image"
 43 |       ]
 44 |     },
 45 |     {
 46 |       "id": 101,
 47 |       "type": "BiRefNet",
 48 |       "pos": [
 49 |         550,
 50 |         300
 51 |       ],
 52 |       "size": {
 53 |         "0": 315,
 54 |         "1": 58
 55 |       },
 56 |       "flags": {},
 57 |       "order": 3,
 58 |       "mode": 0,
 59 |       "inputs": [
 60 |         {
 61 |           "name": "image",
 62 |           "type": "IMAGE",
 63 |           "link": 228
 64 |         }
 65 |       ],
 66 |       "outputs": [
 67 |         {
 68 |           "name": "mask",
 69 |           "type": "MASK",
 70 |           "links": [
 71 |             229
 72 |           ],
 73 |           "shape": 3,
 74 |           "slot_index": 0
 75 |         }
 76 |       ],
 77 |       "properties": {
 78 |         "Node name for S&R": "BiRefNet"
 79 |       },
 80 |       "widgets_values": [
 81 |         "cuda:0"
 82 |       ]
 83 |     },
 84 |     {
 85 |       "id": 102,
 86 |       "type": "Image Adaptive Crop With Mask",
 87 |       "pos": [
 88 |         880,
 89 |         170
 90 |       ],
 91 |       "size": {
 92 |         "0": 315,
 93 |         "1": 126
 94 |       },
 95 |       "flags": {},
 96 |       "order": 7,
 97 |       "mode": 0,
 98 |       "inputs": [
 99 |         {
100 |           "name": "image",
101 |           "type": "IMAGE",
102 |           "link": 230
103 |         },
104 |         {
105 |           "name": "mask",
106 |           "type": "MASK",
107 |           "link": 229
108 |         }
109 |       ],
110 |       "outputs": [
111 |         {
112 |           "name": "image",
113 |           "type": "IMAGE",
114 |           "links": [
115 |             231,
116 |             232
117 |           ],
118 |           "shape": 3,
119 |           "slot_index": 0
120 |         }
121 |       ],
122 |       "properties": {
123 |         "Node name for S&R": "Image Adaptive Crop With Mask"
124 |       },
125 |       "widgets_values": [
126 |         576,
127 |         768,
128 |         50
129 |       ]
130 |     },
131 |     {
132 |       "id": 42,
133 |       "type": "VAEEncode",
134 |       "pos": [
135 |         1240,
136 |         170
137 |       ],
138 |       "size": {
139 |         "0": 210,
140 |         "1": 46
141 |       },
142 |       "flags": {},
143 |       "order": 8,
144 |       "mode": 0,
145 |       "inputs": [
146 |         {
147 |           "name": "pixels",
148 |           "type": "IMAGE",
149 |           "link": 231
150 |         },
151 |         {
152 |           "name": "vae",
153 |           "type": "VAE",
154 |           "link": 217
155 |         }
156 |       ],
157 |       "outputs": [
158 |         {
159 |           "name": "LATENT",
160 |           "type": "LATENT",
161 |           "links": [
162 |             222
163 |           ],
164 |           "shape": 3,
165 |           "slot_index": 0
166 |         }
167 |       ],
168 |       "properties": {
169 |         "Node name for S&R": "VAEEncode"
170 |       }
171 |     },
172 |     {
173 |       "id": 67,
174 |       "type": "CLIPTextEncode",
175 |       "pos": [
176 |         890,
177 |         780
178 |       ],
179 |       "size": {
180 |         "0": 210,
181 |         "1": 76
182 |       },
183 |       "flags": {},
184 |       "order": 6,
185 |       "mode": 0,
186 |       "inputs": [
187 |         {
188 |           "name": "clip",
189 |           "type": "CLIP",
190 |           "link": 166
191 |         }
192 |       ],
193 |       "outputs": [
194 |         {
195 |           "name": "CONDITIONING",
196 |           "type": "CONDITIONING",
197 |           "links": [
198 |             141
199 |           ],
200 |           "shape": 3,
201 |           "slot_index": 0
202 |         }
203 |       ],
204 |       "properties": {
205 |         "Node name for S&R": "CLIPTextEncode"
206 |       },
207 |       "widgets_values": [
208 |         "bare, monochrome, lowres, bad anatomy, worst quality, low quality"
209 |       ]
210 |     },
211 |     {
212 |       "id": 89,
213 |       "type": "EmptyLatentImage",
214 |       "pos": [
215 |         1540,
216 |         580
217 |       ],
218 |       "size": {
219 |         "0": 315,
220 |         "1": 106
221 |       },
222 |       "flags": {},
223 |       "order": 1,
224 |       "mode": 0,
225 |       "outputs": [
226 |         {
227 |           "name": "LATENT",
228 |           "type": "LATENT",
229 |           "links": [
230 |             192
231 |           ],
232 |           "shape": 3,
233 |           "slot_index": 0
234 |         }
235 |       ],
236 |       "properties": {
237 |         "Node name for S&R": "EmptyLatentImage"
238 |       },
239 |       "widgets_values": [
240 |         576,
241 |         768,
242 |         1
243 |       ]
244 |     },
245 |     {
246 |       "id": 35,
247 |       "type": "VAEDecode",
248 |       "pos": [
249 |         1890,
250 |         730
251 |       ],
252 |       "size": {
253 |         "0": 210,
254 |         "1": 46
255 |       },
256 |       "flags": {},
257 |       "order": 12,
258 |       "mode": 0,
259 |       "inputs": [
260 |         {
261 |           "name": "samples",
262 |           "type": "LATENT",
263 |           "link": 170
264 |         },
265 |         {
266 |           "name": "vae",
267 |           "type": "VAE",
268 |           "link": 225
269 |         }
270 |       ],
271 |       "outputs": [
272 |         {
273 |           "name": "IMAGE",
274 |           "type": "IMAGE",
275 |           "links": [
276 |             227
277 |           ],
278 |           "shape": 3,
279 |           "slot_index": 0
280 |         }
281 |       ],
282 |       "properties": {
283 |         "Node name for S&R": "VAEDecode"
284 |       }
285 |     },
286 |     {
287 |       "id": 100,
288 |       "type": "PreviewImage",
289 |       "pos": [
290 |         1760,
291 |         880
292 |       ],
293 |       "size": {
294 |         "0": 210,
295 |         "1": 246
296 |       },
297 |       "flags": {},
298 |       "order": 13,
299 |       "mode": 0,
300 |       "inputs": [
301 |         {
302 |           "name": "images",
303 |           "type": "IMAGE",
304 |           "link": 227
305 |         }
306 |       ],
307 |       "properties": {
308 |         "Node name for S&R": "PreviewImage"
309 |       }
310 |     },
311 |     {
312 |       "id": 103,
313 |       "type": "PreviewImage",
314 |       "pos": [
315 |         1510,
316 |         880
317 |       ],
318 |       "size": {
319 |         "0": 210,
320 |         "1": 246
321 |       },
322 |       "flags": {},
323 |       "order": 9,
324 |       "mode": 0,
325 |       "inputs": [
326 |         {
327 |           "name": "images",
328 |           "type": "IMAGE",
329 |           "link": 232
330 |         }
331 |       ],
332 |       "properties": {
333 |         "Node name for S&R": "PreviewImage"
334 |       }
335 |     },
336 |     {
337 |       "id": 76,
338 |       "type": "Load Magic Clothing Model",
339 |       "pos": [
340 |         218,
341 |         651
342 |       ],
343 |       "size": {
344 |         "0": 380.4000244140625,
345 |         "1": 78
346 |       },
347 |       "flags": {},
348 |       "order": 4,
349 |       "mode": 0,
350 |       "inputs": [
351 |         {
352 |           "name": "sourceModel",
353 |           "type": "MODEL",
354 |           "link": 167
355 |         }
356 |       ],
357 |       "outputs": [
358 |         {
359 |           "name": "sourceModel",
360 |           "type": "MODEL",
361 |           "links": [
362 |             223
363 |           ],
364 |           "shape": 3,
365 |           "slot_index": 0
366 |         },
367 |         {
368 |           "name": "magicClothingModel",
369 |           "type": "MODEL",
370 |           "links": [
371 |             224
372 |           ],
373 |           "shape": 3,
374 |           "slot_index": 1
375 |         }
376 |       ],
377 |       "properties": {
378 |         "Node name for S&R": "Load Magic Clothing Model"
379 |       },
380 |       "widgets_values": [
381 |         "oms_diffusion_768_200000.safetensors"
382 |       ]
383 |     },
384 |     {
385 |       "id": 80,
386 |       "type": "CheckpointLoaderSimple",
387 |       "pos": [
388 |         217,
389 |         929
390 |       ],
391 |       "size": {
392 |         "0": 315,
393 |         "1": 98
394 |       },
395 |       "flags": {},
396 |       "order": 2,
397 |       "mode": 0,
398 |       "outputs": [
399 |         {
400 |           "name": "MODEL",
401 |           "type": "MODEL",
402 |           "links": [
403 |             167
404 |           ],
405 |           "shape": 3,
406 |           "slot_index": 0
407 |         },
408 |         {
409 |           "name": "CLIP",
410 |           "type": "CLIP",
411 |           "links": [
412 |             165,
413 |             166,
414 |             226
415 |           ],
416 |           "shape": 3,
417 |           "slot_index": 1
418 |         },
419 |         {
420 |           "name": "VAE",
421 |           "type": "VAE",
422 |           "links": [
423 |             217,
424 |             225
425 |           ],
426 |           "shape": 3,
427 |           "slot_index": 2
428 |         }
429 |       ],
430 |       "properties": {
431 |         "Node name for S&R": "CheckpointLoaderSimple"
432 |       },
433 |       "widgets_values": [
434 |         "Realistic_Vision_V4.0_fp16-no-ema.safetensors"
435 |       ]
436 |     },
437 |     {
438 |       "id": 66,
439 |       "type": "CLIPTextEncode",
440 |       "pos": [
441 |         890,
442 |         650
443 |       ],
444 |       "size": {
445 |         "0": 210,
446 |         "1": 76
447 |       },
448 |       "flags": {},
449 |       "order": 5,
450 |       "mode": 0,
451 |       "inputs": [
452 |         {
453 |           "name": "clip",
454 |           "type": "CLIP",
455 |           "link": 165
456 |         }
457 |       ],
458 |       "outputs": [
459 |         {
460 |           "name": "CONDITIONING",
461 |           "type": "CONDITIONING",
462 |           "links": [
463 |             140
464 |           ],
465 |           "shape": 3,
466 |           "slot_index": 0
467 |         }
468 |       ],
469 |       "properties": {
470 |         "Node name for S&R": "CLIPTextEncode"
471 |       },
472 |       "widgets_values": [
473 |         "a photography of a model,best quality, high quality"
474 |       ]
475 |     },
476 |     {
477 |       "id": 98,
478 |       "type": "Add Magic Clothing Attention",
479 |       "pos": [
480 |         1540,
481 |         181
482 |       ],
483 |       "size": {
484 |         "0": 315,
485 |         "1": 166
486 |       },
487 |       "flags": {},
488 |       "order": 10,
489 |       "mode": 0,
490 |       "inputs": [
491 |         {
492 |           "name": "sourceModel",
493 |           "type": "MODEL",
494 |           "link": 223
495 |         },
496 |         {
497 |           "name": "magicClothingModel",
498 |           "type": "MODEL",
499 |           "link": 224
500 |         },
501 |         {
502 |           "name": "clip",
503 |           "type": "CLIP",
504 |           "link": 226
505 |         },
506 |         {
507 |           "name": "feature_image",
508 |           "type": "LATENT",
509 |           "link": 222
510 |         }
511 |       ],
512 |       "outputs": [
513 |         {
514 |           "name": "MODEL",
515 |           "type": "MODEL",
516 |           "links": [
517 |             221
518 |           ],
519 |           "shape": 3,
520 |           "slot_index": 0
521 |         }
522 |       ],
523 |       "properties": {
524 |         "Node name for S&R": "Add Magic Clothing Attention"
525 |       },
526 |       "widgets_values": [
527 |         true,
528 |         2.5,
529 |         0.71
530 |       ]
531 |     },
532 |     {
533 |       "id": 77,
534 |       "type": "KSampler",
535 |       "pos": [
536 |         1926,
537 |         183
538 |       ],
539 |       "size": {
540 |         "0": 315,
541 |         "1": 262
542 |       },
543 |       "flags": {},
544 |       "order": 11,
545 |       "mode": 0,
546 |       "inputs": [
547 |         {
548 |           "name": "model",
549 |           "type": "MODEL",
550 |           "link": 221
551 |         },
552 |         {
553 |           "name": "positive",
554 |           "type": "CONDITIONING",
555 |           "link": 140
556 |         },
557 |         {
558 |           "name": "negative",
559 |           "type": "CONDITIONING",
560 |           "link": 141
561 |         },
562 |         {
563 |           "name": "latent_image",
564 |           "type": "LATENT",
565 |           "link": 192
566 |         }
567 |       ],
568 |       "outputs": [
569 |         {
570 |           "name": "LATENT",
571 |           "type": "LATENT",
572 |           "links": [
573 |             170
574 |           ],
575 |           "shape": 3,
576 |           "slot_index": 0
577 |         }
578 |       ],
579 |       "properties": {
580 |         "Node name for S&R": "KSampler"
581 |       },
582 |       "widgets_values": [
583 |         834064559708728,
584 |         "fixed",
585 |         20,
586 |         5,
587 |         "uni_pc",
588 |         "normal",
589 |         1
590 |       ]
591 |     }
592 |   ],
593 |   "links": [
594 |     [
595 |       140,
596 |       66,
597 |       0,
598 |       77,
599 |       1,
600 |       "CONDITIONING"
601 |     ],
602 |     [
603 |       141,
604 |       67,
605 |       0,
606 |       77,
607 |       2,
608 |       "CONDITIONING"
609 |     ],
610 |     [
611 |       165,
612 |       80,
613 |       1,
614 |       66,
615 |       0,
616 |       "CLIP"
617 |     ],
618 |     [
619 |       166,
620 |       80,
621 |       1,
622 |       67,
623 |       0,
624 |       "CLIP"
625 |     ],
626 |     [
627 |       167,
628 |       80,
629 |       0,
630 |       76,
631 |       0,
632 |       "MODEL"
633 |     ],
634 |     [
635 |       170,
636 |       77,
637 |       0,
638 |       35,
639 |       0,
640 |       "LATENT"
641 |     ],
642 |     [
643 |       192,
644 |       89,
645 |       0,
646 |       77,
647 |       3,
648 |       "LATENT"
649 |     ],
650 |     [
651 |       217,
652 |       80,
653 |       2,
654 |       42,
655 |       1,
656 |       "VAE"
657 |     ],
658 |     [
659 |       221,
660 |       98,
661 |       0,
662 |       77,
663 |       0,
664 |       "MODEL"
665 |     ],
666 |     [
667 |       222,
668 |       42,
669 |       0,
670 |       98,
671 |       3,
672 |       "LATENT"
673 |     ],
674 |     [
675 |       223,
676 |       76,
677 |       0,
678 |       98,
679 |       0,
680 |       "MODEL"
681 |     ],
682 |     [
683 |       224,
684 |       76,
685 |       1,
686 |       98,
687 |       1,
688 |       "MODEL"
689 |     ],
690 |     [
691 |       225,
692 |       80,
693 |       2,
694 |       35,
695 |       1,
696 |       "VAE"
697 |     ],
698 |     [
699 |       226,
700 |       80,
701 |       1,
702 |       98,
703 |       2,
704 |       "CLIP"
705 |     ],
706 |     [
707 |       227,
708 |       35,
709 |       0,
710 |       100,
711 |       0,
712 |       "IMAGE"
713 |     ],
714 |     [
715 |       228,
716 |       19,
717 |       0,
718 |       101,
719 |       0,
720 |       "IMAGE"
721 |     ],
722 |     [
723 |       229,
724 |       101,
725 |       0,
726 |       102,
727 |       1,
728 |       "MASK"
729 |     ],
730 |     [
731 |       230,
732 |       19,
733 |       0,
734 |       102,
735 |       0,
736 |       "IMAGE"
737 |     ],
738 |     [
739 |       231,
740 |       102,
741 |       0,
742 |       42,
743 |       0,
744 |       "IMAGE"
745 |     ],
746 |     [
747 |       232,
748 |       102,
749 |       0,
750 |       103,
751 |       0,
752 |       "IMAGE"
753 |     ]
754 |   ],
755 |   "groups": [],
756 |   "config": {},
757 |   "extra": {},
758 |   "version": 0.4
759 | }


--------------------------------------------------------------------------------
/ipadapter.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "last_node_id": 110,
  3 |   "last_link_id": 250,
  4 |   "nodes": [
  5 |     {
  6 |       "id": 101,
  7 |       "type": "BiRefNet",
  8 |       "pos": [
  9 |         550,
 10 |         300
 11 |       ],
 12 |       "size": {
 13 |         "0": 315,
 14 |         "1": 58
 15 |       },
 16 |       "flags": {},
 17 |       "order": 6,
 18 |       "mode": 0,
 19 |       "inputs": [
 20 |         {
 21 |           "name": "image",
 22 |           "type": "IMAGE",
 23 |           "link": 228
 24 |         }
 25 |       ],
 26 |       "outputs": [
 27 |         {
 28 |           "name": "mask",
 29 |           "type": "MASK",
 30 |           "links": [
 31 |             229
 32 |           ],
 33 |           "shape": 3,
 34 |           "slot_index": 0
 35 |         }
 36 |       ],
 37 |       "properties": {
 38 |         "Node name for S&R": "BiRefNet"
 39 |       },
 40 |       "widgets_values": [
 41 |         "cuda:0"
 42 |       ]
 43 |     },
 44 |     {
 45 |       "id": 67,
 46 |       "type": "CLIPTextEncode",
 47 |       "pos": [
 48 |         890,
 49 |         780
 50 |       ],
 51 |       "size": {
 52 |         "0": 210,
 53 |         "1": 76
 54 |       },
 55 |       "flags": {},
 56 |       "order": 5,
 57 |       "mode": 0,
 58 |       "inputs": [
 59 |         {
 60 |           "name": "clip",
 61 |           "type": "CLIP",
 62 |           "link": 166
 63 |         }
 64 |       ],
 65 |       "outputs": [
 66 |         {
 67 |           "name": "CONDITIONING",
 68 |           "type": "CONDITIONING",
 69 |           "links": [
 70 |             141
 71 |           ],
 72 |           "shape": 3,
 73 |           "slot_index": 0
 74 |         }
 75 |       ],
 76 |       "properties": {
 77 |         "Node name for S&R": "CLIPTextEncode"
 78 |       },
 79 |       "widgets_values": [
 80 |         "bare, monochrome, lowres, bad anatomy, worst quality, low quality"
 81 |       ]
 82 |     },
 83 |     {
 84 |       "id": 35,
 85 |       "type": "VAEDecode",
 86 |       "pos": [
 87 |         1890,
 88 |         730
 89 |       ],
 90 |       "size": {
 91 |         "0": 210,
 92 |         "1": 46
 93 |       },
 94 |       "flags": {},
 95 |       "order": 14,
 96 |       "mode": 0,
 97 |       "inputs": [
 98 |         {
 99 |           "name": "samples",
100 |           "type": "LATENT",
101 |           "link": 170
102 |         },
103 |         {
104 |           "name": "vae",
105 |           "type": "VAE",
106 |           "link": 225
107 |         }
108 |       ],
109 |       "outputs": [
110 |         {
111 |           "name": "IMAGE",
112 |           "type": "IMAGE",
113 |           "links": [
114 |             227
115 |           ],
116 |           "shape": 3,
117 |           "slot_index": 0
118 |         }
119 |       ],
120 |       "properties": {
121 |         "Node name for S&R": "VAEDecode"
122 |       }
123 |     },
124 |     {
125 |       "id": 100,
126 |       "type": "PreviewImage",
127 |       "pos": [
128 |         1760,
129 |         880
130 |       ],
131 |       "size": {
132 |         "0": 210,
133 |         "1": 246
134 |       },
135 |       "flags": {},
136 |       "order": 15,
137 |       "mode": 0,
138 |       "inputs": [
139 |         {
140 |           "name": "images",
141 |           "type": "IMAGE",
142 |           "link": 227
143 |         }
144 |       ],
145 |       "properties": {
146 |         "Node name for S&R": "PreviewImage"
147 |       }
148 |     },
149 |     {
150 |       "id": 103,
151 |       "type": "PreviewImage",
152 |       "pos": [
153 |         1510,
154 |         880
155 |       ],
156 |       "size": {
157 |         "0": 210,
158 |         "1": 246
159 |       },
160 |       "flags": {},
161 |       "order": 9,
162 |       "mode": 0,
163 |       "inputs": [
164 |         {
165 |           "name": "images",
166 |           "type": "IMAGE",
167 |           "link": 232
168 |         }
169 |       ],
170 |       "properties": {
171 |         "Node name for S&R": "PreviewImage"
172 |       }
173 |     },
174 |     {
175 |       "id": 66,
176 |       "type": "CLIPTextEncode",
177 |       "pos": [
178 |         890,
179 |         650
180 |       ],
181 |       "size": {
182 |         "0": 210,
183 |         "1": 76
184 |       },
185 |       "flags": {},
186 |       "order": 4,
187 |       "mode": 0,
188 |       "inputs": [
189 |         {
190 |           "name": "clip",
191 |           "type": "CLIP",
192 |           "link": 165
193 |         }
194 |       ],
195 |       "outputs": [
196 |         {
197 |           "name": "CONDITIONING",
198 |           "type": "CONDITIONING",
199 |           "links": [
200 |             140
201 |           ],
202 |           "shape": 3,
203 |           "slot_index": 0
204 |         }
205 |       ],
206 |       "properties": {
207 |         "Node name for S&R": "CLIPTextEncode"
208 |       },
209 |       "widgets_values": [
210 |         "a photography of a model,best quality, high quality"
211 |       ]
212 |     },
213 |     {
214 |       "id": 77,
215 |       "type": "KSampler",
216 |       "pos": [
217 |         1926,
218 |         183
219 |       ],
220 |       "size": {
221 |         "0": 315,
222 |         "1": 262
223 |       },
224 |       "flags": {},
225 |       "order": 13,
226 |       "mode": 0,
227 |       "inputs": [
228 |         {
229 |           "name": "model",
230 |           "type": "MODEL",
231 |           "link": 250
232 |         },
233 |         {
234 |           "name": "positive",
235 |           "type": "CONDITIONING",
236 |           "link": 140
237 |         },
238 |         {
239 |           "name": "negative",
240 |           "type": "CONDITIONING",
241 |           "link": 141
242 |         },
243 |         {
244 |           "name": "latent_image",
245 |           "type": "LATENT",
246 |           "link": 192
247 |         }
248 |       ],
249 |       "outputs": [
250 |         {
251 |           "name": "LATENT",
252 |           "type": "LATENT",
253 |           "links": [
254 |             170
255 |           ],
256 |           "shape": 3,
257 |           "slot_index": 0
258 |         }
259 |       ],
260 |       "properties": {
261 |         "Node name for S&R": "KSampler"
262 |       },
263 |       "widgets_values": [
264 |         834064559708728,
265 |         "fixed",
266 |         20,
267 |         5,
268 |         "uni_pc",
269 |         "normal",
270 |         1
271 |       ]
272 |     },
273 |     {
274 |       "id": 107,
275 |       "type": "IPAdapterUnifiedLoader",
276 |       "pos": [
277 |         360,
278 |         1075
279 |       ],
280 |       "size": {
281 |         "0": 315,
282 |         "1": 78
283 |       },
284 |       "flags": {},
285 |       "order": 3,
286 |       "mode": 0,
287 |       "inputs": [
288 |         {
289 |           "name": "model",
290 |           "type": "MODEL",
291 |           "link": 248
292 |         },
293 |         {
294 |           "name": "ipadapter",
295 |           "type": "IPADAPTER",
296 |           "link": null
297 |         }
298 |       ],
299 |       "outputs": [
300 |         {
301 |           "name": "model",
302 |           "type": "MODEL",
303 |           "links": [
304 |             245
305 |           ],
306 |           "shape": 3,
307 |           "slot_index": 0
308 |         },
309 |         {
310 |           "name": "ipadapter",
311 |           "type": "IPADAPTER",
312 |           "links": [
313 |             243
314 |           ],
315 |           "shape": 3,
316 |           "slot_index": 1
317 |         }
318 |       ],
319 |       "properties": {
320 |         "Node name for S&R": "IPAdapterUnifiedLoader"
321 |       },
322 |       "widgets_values": [
323 |         "STANDARD (medium strength)"
324 |       ]
325 |     },
326 |     {
327 |       "id": 42,
328 |       "type": "VAEEncode",
329 |       "pos": [
330 |         1240,
331 |         170
332 |       ],
333 |       "size": {
334 |         "0": 210,
335 |         "1": 46
336 |       },
337 |       "flags": {},
338 |       "order": 8,
339 |       "mode": 0,
340 |       "inputs": [
341 |         {
342 |           "name": "pixels",
343 |           "type": "IMAGE",
344 |           "link": 231
345 |         },
346 |         {
347 |           "name": "vae",
348 |           "type": "VAE",
349 |           "link": 217
350 |         }
351 |       ],
352 |       "outputs": [
353 |         {
354 |           "name": "LATENT",
355 |           "type": "LATENT",
356 |           "links": [
357 |             222
358 |           ],
359 |           "shape": 3,
360 |           "slot_index": 0
361 |         }
362 |       ],
363 |       "properties": {
364 |         "Node name for S&R": "VAEEncode"
365 |       }
366 |     },
367 |     {
368 |       "id": 80,
369 |       "type": "CheckpointLoaderSimple",
370 |       "pos": [
371 |         217,
372 |         929
373 |       ],
374 |       "size": {
375 |         "0": 315,
376 |         "1": 98
377 |       },
378 |       "flags": {},
379 |       "order": 1,
380 |       "mode": 0,
381 |       "outputs": [
382 |         {
383 |           "name": "MODEL",
384 |           "type": "MODEL",
385 |           "links": [
386 |             248
387 |           ],
388 |           "shape": 3,
389 |           "slot_index": 0
390 |         },
391 |         {
392 |           "name": "CLIP",
393 |           "type": "CLIP",
394 |           "links": [
395 |             165,
396 |             166,
397 |             226
398 |           ],
399 |           "shape": 3,
400 |           "slot_index": 1
401 |         },
402 |         {
403 |           "name": "VAE",
404 |           "type": "VAE",
405 |           "links": [
406 |             217,
407 |             225
408 |           ],
409 |           "shape": 3,
410 |           "slot_index": 2
411 |         }
412 |       ],
413 |       "properties": {
414 |         "Node name for S&R": "CheckpointLoaderSimple"
415 |       },
416 |       "widgets_values": [
417 |         "Realistic_Vision_V4.0_fp16-no-ema.safetensors"
418 |       ]
419 |     },
420 |     {
421 |       "id": 110,
422 |       "type": "IPAdapter",
423 |       "pos": [
424 |         785,
425 |         1068
426 |       ],
427 |       "size": {
428 |         "0": 315,
429 |         "1": 190
430 |       },
431 |       "flags": {},
432 |       "order": 10,
433 |       "mode": 0,
434 |       "inputs": [
435 |         {
436 |           "name": "model",
437 |           "type": "MODEL",
438 |           "link": 245
439 |         },
440 |         {
441 |           "name": "ipadapter",
442 |           "type": "IPADAPTER",
443 |           "link": 243
444 |         },
445 |         {
446 |           "name": "image",
447 |           "type": "IMAGE",
448 |           "link": 247
449 |         },
450 |         {
451 |           "name": "attn_mask",
452 |           "type": "MASK",
453 |           "link": null
454 |         }
455 |       ],
456 |       "outputs": [
457 |         {
458 |           "name": "MODEL",
459 |           "type": "MODEL",
460 |           "links": [
461 |             249
462 |           ],
463 |           "shape": 3,
464 |           "slot_index": 0
465 |         }
466 |       ],
467 |       "properties": {
468 |         "Node name for S&R": "IPAdapter"
469 |       },
470 |       "widgets_values": [
471 |         1,
472 |         0,
473 |         1,
474 |         "standard"
475 |       ]
476 |     },
477 |     {
478 |       "id": 76,
479 |       "type": "Load Magic Clothing Model",
480 |       "pos": [
481 |         218,
482 |         651
483 |       ],
484 |       "size": {
485 |         "0": 380.4000244140625,
486 |         "1": 78
487 |       },
488 |       "flags": {},
489 |       "order": 11,
490 |       "mode": 0,
491 |       "inputs": [
492 |         {
493 |           "name": "sourceModel",
494 |           "type": "MODEL",
495 |           "link": 249
496 |         }
497 |       ],
498 |       "outputs": [
499 |         {
500 |           "name": "sourceModel",
501 |           "type": "MODEL",
502 |           "links": [
503 |             223
504 |           ],
505 |           "shape": 3,
506 |           "slot_index": 0
507 |         },
508 |         {
509 |           "name": "magicClothingModel",
510 |           "type": "MODEL",
511 |           "links": [
512 |             224
513 |           ],
514 |           "shape": 3,
515 |           "slot_index": 1
516 |         }
517 |       ],
518 |       "properties": {
519 |         "Node name for S&R": "Load Magic Clothing Model"
520 |       },
521 |       "widgets_values": [
522 |         "oms_diffusion_768_200000.safetensors"
523 |       ]
524 |     },
525 |     {
526 |       "id": 19,
527 |       "type": "LoadImage",
528 |       "pos": [
529 |         220,
530 |         170
531 |       ],
532 |       "size": {
533 |         "0": 315,
534 |         "1": 314
535 |       },
536 |       "flags": {},
537 |       "order": 2,
538 |       "mode": 0,
539 |       "outputs": [
540 |         {
541 |           "name": "IMAGE",
542 |           "type": "IMAGE",
543 |           "links": [
544 |             228,
545 |             230
546 |           ],
547 |           "shape": 3,
548 |           "slot_index": 0
549 |         },
550 |         {
551 |           "name": "MASK",
552 |           "type": "MASK",
553 |           "links": null,
554 |           "shape": 3
555 |         }
556 |       ],
557 |       "properties": {
558 |         "Node name for S&R": "LoadImage"
559 |       },
560 |       "widgets_values": [
561 |         "ComfyUI_00002_.png",
562 |         "image"
563 |       ]
564 |     },
565 |     {
566 |       "id": 98,
567 |       "type": "Add Magic Clothing Attention",
568 |       "pos": [
569 |         1492,
570 |         164
571 |       ],
572 |       "size": {
573 |         "0": 315,
574 |         "1": 166
575 |       },
576 |       "flags": {},
577 |       "order": 12,
578 |       "mode": 0,
579 |       "inputs": [
580 |         {
581 |           "name": "sourceModel",
582 |           "type": "MODEL",
583 |           "link": 223
584 |         },
585 |         {
586 |           "name": "magicClothingModel",
587 |           "type": "MODEL",
588 |           "link": 224
589 |         },
590 |         {
591 |           "name": "clip",
592 |           "type": "CLIP",
593 |           "link": 226
594 |         },
595 |         {
596 |           "name": "feature_image",
597 |           "type": "LATENT",
598 |           "link": 222
599 |         }
600 |       ],
601 |       "outputs": [
602 |         {
603 |           "name": "MODEL",
604 |           "type": "MODEL",
605 |           "links": [
606 |             250
607 |           ],
608 |           "shape": 3,
609 |           "slot_index": 0
610 |         }
611 |       ],
612 |       "properties": {
613 |         "Node name for S&R": "Add Magic Clothing Attention"
614 |       },
615 |       "widgets_values": [
616 |         true,
617 |         2.5
618 |       ]
619 |     },
620 |     {
621 |       "id": 102,
622 |       "type": "Image Adaptive Crop With Mask",
623 |       "pos": [
624 |         880,
625 |         170
626 |       ],
627 |       "size": {
628 |         "0": 315,
629 |         "1": 126
630 |       },
631 |       "flags": {},
632 |       "order": 7,
633 |       "mode": 0,
634 |       "inputs": [
635 |         {
636 |           "name": "image",
637 |           "type": "IMAGE",
638 |           "link": 230
639 |         },
640 |         {
641 |           "name": "mask",
642 |           "type": "MASK",
643 |           "link": 229
644 |         }
645 |       ],
646 |       "outputs": [
647 |         {
648 |           "name": "image",
649 |           "type": "IMAGE",
650 |           "links": [
651 |             231,
652 |             232,
653 |             247
654 |           ],
655 |           "shape": 3,
656 |           "slot_index": 0
657 |         }
658 |       ],
659 |       "properties": {
660 |         "Node name for S&R": "Image Adaptive Crop With Mask"
661 |       },
662 |       "widgets_values": [
663 |         512,
664 |         512,
665 |         8
666 |       ]
667 |     },
668 |     {
669 |       "id": 89,
670 |       "type": "EmptyLatentImage",
671 |       "pos": [
672 |         1540,
673 |         580
674 |       ],
675 |       "size": {
676 |         "0": 315,
677 |         "1": 106
678 |       },
679 |       "flags": {},
680 |       "order": 0,
681 |       "mode": 0,
682 |       "outputs": [
683 |         {
684 |           "name": "LATENT",
685 |           "type": "LATENT",
686 |           "links": [
687 |             192
688 |           ],
689 |           "shape": 3,
690 |           "slot_index": 0
691 |         }
692 |       ],
693 |       "properties": {
694 |         "Node name for S&R": "EmptyLatentImage"
695 |       },
696 |       "widgets_values": [
697 |         512,
698 |         512,
699 |         1
700 |       ]
701 |     }
702 |   ],
703 |   "links": [
704 |     [
705 |       140,
706 |       66,
707 |       0,
708 |       77,
709 |       1,
710 |       "CONDITIONING"
711 |     ],
712 |     [
713 |       141,
714 |       67,
715 |       0,
716 |       77,
717 |       2,
718 |       "CONDITIONING"
719 |     ],
720 |     [
721 |       165,
722 |       80,
723 |       1,
724 |       66,
725 |       0,
726 |       "CLIP"
727 |     ],
728 |     [
729 |       166,
730 |       80,
731 |       1,
732 |       67,
733 |       0,
734 |       "CLIP"
735 |     ],
736 |     [
737 |       170,
738 |       77,
739 |       0,
740 |       35,
741 |       0,
742 |       "LATENT"
743 |     ],
744 |     [
745 |       192,
746 |       89,
747 |       0,
748 |       77,
749 |       3,
750 |       "LATENT"
751 |     ],
752 |     [
753 |       217,
754 |       80,
755 |       2,
756 |       42,
757 |       1,
758 |       "VAE"
759 |     ],
760 |     [
761 |       222,
762 |       42,
763 |       0,
764 |       98,
765 |       3,
766 |       "LATENT"
767 |     ],
768 |     [
769 |       223,
770 |       76,
771 |       0,
772 |       98,
773 |       0,
774 |       "MODEL"
775 |     ],
776 |     [
777 |       224,
778 |       76,
779 |       1,
780 |       98,
781 |       1,
782 |       "MODEL"
783 |     ],
784 |     [
785 |       225,
786 |       80,
787 |       2,
788 |       35,
789 |       1,
790 |       "VAE"
791 |     ],
792 |     [
793 |       226,
794 |       80,
795 |       1,
796 |       98,
797 |       2,
798 |       "CLIP"
799 |     ],
800 |     [
801 |       227,
802 |       35,
803 |       0,
804 |       100,
805 |       0,
806 |       "IMAGE"
807 |     ],
808 |     [
809 |       228,
810 |       19,
811 |       0,
812 |       101,
813 |       0,
814 |       "IMAGE"
815 |     ],
816 |     [
817 |       229,
818 |       101,
819 |       0,
820 |       102,
821 |       1,
822 |       "MASK"
823 |     ],
824 |     [
825 |       230,
826 |       19,
827 |       0,
828 |       102,
829 |       0,
830 |       "IMAGE"
831 |     ],
832 |     [
833 |       231,
834 |       102,
835 |       0,
836 |       42,
837 |       0,
838 |       "IMAGE"
839 |     ],
840 |     [
841 |       232,
842 |       102,
843 |       0,
844 |       103,
845 |       0,
846 |       "IMAGE"
847 |     ],
848 |     [
849 |       243,
850 |       107,
851 |       1,
852 |       110,
853 |       1,
854 |       "IPADAPTER"
855 |     ],
856 |     [
857 |       245,
858 |       107,
859 |       0,
860 |       110,
861 |       0,
862 |       "MODEL"
863 |     ],
864 |     [
865 |       247,
866 |       102,
867 |       0,
868 |       110,
869 |       2,
870 |       "IMAGE"
871 |     ],
872 |     [
873 |       248,
874 |       80,
875 |       0,
876 |       107,
877 |       0,
878 |       "MODEL"
879 |     ],
880 |     [
881 |       249,
882 |       110,
883 |       0,
884 |       76,
885 |       0,
886 |       "MODEL"
887 |     ],
888 |     [
889 |       250,
890 |       98,
891 |       0,
892 |       77,
893 |       0,
894 |       "MODEL"
895 |     ]
896 |   ],
897 |   "groups": [],
898 |   "config": {},
899 |   "extra": {
900 |     "workspace_info": {
901 |       "id": "uYaY8JVGliGxX8okXvJP4",
902 |       "name": "Untitled Flow",
903 |       "saveLock": false,
904 |       "cloudID": null,
905 |       "coverMediaPath": null
906 |     },
907 |     "ds": {
908 |       "scale": 1.1,
909 |       "offset": {
910 |         "0": -231.94835430034072,
911 |         "1": -63.33645446524065
912 |       }
913 |     }
914 |   },
915 |   "version": 0.4
916 | }


--------------------------------------------------------------------------------
/nodes.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import torch
  3 | import folder_paths
  4 | 
  5 | import comfy.model_patcher
  6 | import comfy.ldm.models.autoencoder
  7 | import comfy.utils
  8 | import comfy.sample
  9 | import comfy.samplers
 10 | import comfy.sampler_helpers
 11 | 
 12 | from .utils import pt_hash
 13 | from comfy import model_management
 14 | from .attn_handler import SaveAttnInputPatch, InputPatch, ReplacePatch, UnetFunctionWrapper, SamplerCfgFunctionWrapper
 15 | 
 16 | class AttnStoredExtra:
 17 |     def __init__(self,extra,type=1) -> None:
 18 |         self.type = type
 19 |         if type == 1:
 20 |             self.data = extra.unsqueeze(0)
 21 |         else:
 22 |             self.data = extra
 23 |     
 24 |     def can_concat(self,other):
 25 |         return True
 26 |     
 27 |     def concat(self, extras):
 28 |         if self.type == 1:
 29 |             out = [self.data]
 30 |             for x in extras:
 31 |                 out.append(x.data)
 32 |             return torch.cat(out)
 33 |         elif self.type == 2:
 34 |             out = [self.data]
 35 |             for x in extras:
 36 |                 out.append(x.data)
 37 |             return out
 38 |         else:
 39 |             if self.data is not None:
 40 |                 return self.data
 41 |             else:
 42 |                 for x in extras:
 43 |                     if x.data is not None:
 44 |                         return x.data
 45 |                 return None
 46 |     
 47 | class LoadMagicClothingModel:
 48 |     @classmethod
 49 |     def INPUT_TYPES(s):
 50 |         return {"required":
 51 |                 {"sourceModel": ("MODEL",),
 52 |                  "magicClothingUnet": (folder_paths.get_filename_list("unet"), ),
 53 |                  }
 54 |                 }
 55 |     RETURN_TYPES = ("MODEL", "MODEL")
 56 |     RETURN_NAMES = ("sourceModel", "magicClothingModel")
 57 |     FUNCTION = "load_unet"
 58 | 
 59 |     CATEGORY = "loaders"
 60 | 
 61 |     def load_unet(self, sourceModel, magicClothingUnet):
 62 |         unet_path = folder_paths.get_full_path("unet", magicClothingUnet)
 63 |         unet_state_dict = comfy.utils.load_torch_file(unet_path)
 64 |         model_config = copy.deepcopy(sourceModel.model.model_config)
 65 |         if model_config.unet_config["in_channels"] == 9:
 66 |             model_config.unet_config["in_channels"] = 4
 67 |             model_config.unet_config["model_channels"] = 320
 68 |         
 69 |         source_state_dict = sourceModel.model.diffusion_model.state_dict()
 70 |         
 71 |         diffusers_keys = comfy.utils.unet_to_diffusers(model_config.unet_config)
 72 | 
 73 |         new_sd = {}
 74 |         for k in diffusers_keys:
 75 |             ldm_k = diffusers_keys[k]
 76 |             if k in unet_state_dict:
 77 |                 new_sd[diffusers_keys[k]] = unet_state_dict.pop(k)
 78 |             elif ldm_k in source_state_dict:
 79 |                 new_sd[ldm_k] = source_state_dict[ldm_k]
 80 |         
 81 |         parameters = comfy.utils.calculate_parameters(new_sd)
 82 |         
 83 |         load_device = model_management.get_torch_device()
 84 |         offload_device = model_management.unet_offload_device()
 85 |         unet_dtype = model_management.unet_dtype(model_params=parameters, supported_dtypes=model_config.supported_inference_dtypes)
 86 |         manual_cast_dtype = model_management.unet_manual_cast(unet_dtype, load_device, model_config.supported_inference_dtypes)
 87 |         model_config.set_inference_dtype(unet_dtype, manual_cast_dtype)
 88 |         model = model_config.get_model(new_sd, "")
 89 |         model = model.to(offload_device)
 90 |         model.load_model_weights(new_sd, "")
 91 |         left_over = unet_state_dict.keys()
 92 |         if len(left_over) > 0:
 93 |             print("left over keys in unet: {}".format(left_over))
 94 |         model_patcher = comfy.model_patcher.ModelPatcher(model, load_device=load_device, offload_device=offload_device)
 95 |         return (sourceModel,model_patcher)
 96 | 
 97 | 
 98 | class AddMagicClothingAttention:
 99 |     @classmethod
100 |     def INPUT_TYPES(s):
101 |         return {"required":
102 |                 {"sourceModel": ("MODEL",),
103 |                  "magicClothingModel": ("MODEL",),
104 |                  "clip": ("CLIP", ),
105 |                  "enable_feature_guidance": ("BOOLEAN", {"default": True}),
106 |                  "feature_image": ("LATENT", ),
107 |                  "feature_guidance_scale": ("FLOAT", {"default": 2.5, "min": 0.0, "max": 10.0, "step": 0.1, "round": 0.01}),
108 |                 #  "sigma": ("FLOAT", {"default": 0.71, "min": 0.0, "max": 3.0, "step": 0.01, "round": 0.01}),
109 |                 #  "sampler_name": (comfy.samplers.KSampler.SAMPLERS, ),
110 |                 #  "scheduler": (comfy.samplers.KSampler.SCHEDULERS, ),
111 |                 #  "sigma": ("FLOAT", {"default": 0, "min": 0.0, "max": 100.0, "step": 0.05}),
112 |                 #  "start_step":("INT", {"default": 0, "min": 0, "max": 100, "step": 1}),
113 |                 #  "end_step":("INT", {"default": 100, "min": 0, "max": 100, "step": 1}),
114 |                 #  "steps": ("INT", {"default": 20, "min": 1, "max": 100, "step": 1}),
115 |                  }
116 |                 }
117 |     RETURN_TYPES = ("MODEL",)
118 |     RETURN_NAMES = ("MODEL",)
119 | 
120 |     FUNCTION = "add_features"
121 | 
122 |     CATEGORY = "model_patches"
123 | 
124 |     def add_features(self, sourceModel,magicClothingModel, clip,enable_feature_guidance ,feature_image,feature_guidance_scale,
125 |                     #  sigma,sampler_name,scheduler,start_step=0,end_step = 100,steps = 20,
126 |                      ):
127 |         attn_stored = self.calculate_features_zj(magicClothingModel,clip, feature_image)
128 |         attn_stored["enable_feature_guidance"] = enable_feature_guidance
129 |         attn_stored["feature_guidance_scale"] = feature_guidance_scale
130 |         attn_stored_data = attn_stored["data"]
131 |         sourceModel = sourceModel.clone()
132 |         sourceModel.set_model_unet_function_wrapper(UnetFunctionWrapper())
133 |         sourceModel.set_model_sampler_cfg_function(SamplerCfgFunctionWrapper())
134 |         sourceModel.set_model_attn1_patch(InputPatch())
135 |         for block_name in attn_stored_data.keys():
136 |             for block_number in attn_stored_data[block_name].keys():
137 |                 for attention_index in attn_stored_data[block_name][block_number].keys():
138 |                     sourceModel.set_model_attn1_replace(ReplacePatch(), block_name, block_number, attention_index)
139 |         self.inject_comfyui()
140 |         sourceModel.model_options["transformer_options"]["attn_stored"] = attn_stored
141 |         return (sourceModel,)
142 | 
143 |     def inject_comfyui(self):
144 |         old_get_area_and_mult = comfy.samplers.get_area_and_mult
145 |         def get_area_and_mult(self, *args, **kwargs):
146 |             result = old_get_area_and_mult(self, *args, **kwargs)           
147 |             mult = result[1]
148 |             conditioning = result[2]
149 |             area = result[3]
150 |             control = result[4]
151 |             conditioning["c_attn_stored_mult"] = AttnStoredExtra(mult, 1)
152 |             conditioning["c_attn_stored_area"] = AttnStoredExtra(area, 2)
153 |             conditioning["c_attn_stored_control"] = AttnStoredExtra(control, 3)
154 |             return result
155 |         comfy.samplers.get_area_and_mult = get_area_and_mult
156 | 
157 |     def calculate_features(self,magicClothingModel, source_clip,feature_image,sigma =None,start_step =None,end_step =None,steps =None,scheduler =None,sampler_name =None):
158 |         magicClothingModel.set_model_attn1_patch(SaveAttnInputPatch())
159 |         attn_stored = {}
160 |         attn_stored["data"] = {}
161 |         magicClothingModel.model_options["transformer_options"]["attn_stored"] = attn_stored
162 | 
163 |         latent_image = feature_image["samples"]
164 |         if latent_image.shape[0] > 1:
165 |             latent_image = torch.chunk(latent_image, latent_image.shape[0])[0]
166 |         noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
167 |         noise = noise+0
168 |         disable_pbar = not comfy.utils.PROGRESS_BAR_ENABLED
169 |         positive_tokens = source_clip.tokenize("")
170 |         positive_cond, positive_pooled = source_clip.encode_from_tokens(
171 |             positive_tokens, return_pooled=True)
172 |         positive = [[positive_cond, {"pooled_output": positive_pooled}]]
173 |         negative = []        
174 |         dtype = magicClothingModel.model.get_dtype()
175 |         latent_image = latent_image.to(magicClothingModel.load_device).to(dtype)
176 |         noise = noise.to(magicClothingModel.load_device).to(dtype)  
177 |         sigmas = torch.tensor([1,0])
178 |         samples = comfy.sample.sample(magicClothingModel, noise, 1, 1, "uni_pc", "karras",
179 |                                       positive, negative, latent_image, denoise=1.0,
180 |                                       disable_noise=False, start_step=None,
181 |                                       last_step=None, force_full_denoise=False,sigmas=sigmas,
182 |                                       noise_mask=None, callback=None, disable_pbar=disable_pbar, seed=41)
183 |         del positive_cond
184 |         del positive_pooled
185 |         del positive_tokens
186 |         latent_image = feature_image["samples"].to(model_management.unet_offload_device())
187 |         return attn_stored
188 |     
189 |     def _calculate_sigmas(self,steps,model_sampling,scheduler,sampler_name):
190 |         sigmas = None
191 | 
192 |         discard_penultimate_sigma = False
193 |         if sampler_name in comfy.samplers.KSampler.DISCARD_PENULTIMATE_SIGMA_SAMPLERS:
194 |             steps += 1
195 |             discard_penultimate_sigma = True
196 | 
197 |         sigmas = comfy.samplers.calculate_sigmas(model_sampling,scheduler, steps)
198 | 
199 |         if discard_penultimate_sigma:
200 |             sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
201 |         return sigmas
202 |         
203 |     def calculate_features_zj(self,magicClothingModel, source_clip,feature_image,sigma = 0,start_step =None,end_step =None,steps =None,scheduler =None,sampler_name =None):
204 |         magicClothingModel.set_model_attn1_patch(SaveAttnInputPatch())
205 |         attn_stored = {}
206 |         attn_stored["data"] = {}
207 |         magicClothingModel.model_options["transformer_options"]["attn_stored"] = attn_stored
208 | 
209 |         latent_image = feature_image["samples"]
210 |         if latent_image.shape[0] > 1:
211 |             latent_image = torch.chunk(latent_image, latent_image.shape[0])[0]
212 |         positive_tokens = source_clip.tokenize("")
213 |         positive_cond, positive_pooled = source_clip.encode_from_tokens(positive_tokens, return_pooled=True)
214 |         dtype = magicClothingModel.model.get_dtype()
215 |         
216 |         latent_image = magicClothingModel.model.process_latent_in(latent_image).to(magicClothingModel.load_device)
217 |         context = positive_cond.to(magicClothingModel.load_device).to(dtype)
218 |         # sigmas = self._calculate_sigmas(steps,magicClothingModel.model.model_sampling,scheduler,sampler_name)
219 |         # sigmas = sigmas.to(magicClothingModel.load_device)
220 |         # start_step = max(0, min(start_step, steps))
221 |         # end_step = max(0, min(end_step, steps))
222 |         # calc_steps = sigmas[start_step:end_step]
223 |         # calc_sigmas = [calc_steps[i].item() for i in range(calc_steps.shape[0])]
224 |         # attn_stored["calc_sigmas"] = calc_sigmas
225 |         # real_sigma = sigmas[0].expand((latent_image.shape[0]))
226 |         # real_sigma = (real_sigma*0+sigma).to(dtype)
227 |         real_sigma = torch.tensor([sigma], dtype=dtype).to(magicClothingModel.load_device)
228 |         timestep = real_sigma * 0
229 |         latent_image=latent_image.to(magicClothingModel.load_device).to(dtype)
230 |         # xc = magicClothingModel.model.model_sampling.calculate_input(real_sigma, latent_image).to(dtype)
231 |         model_management.load_model_gpu(magicClothingModel)                      
232 |         magicClothingModel.model.diffusion_model(latent_image, timestep, context=context, control=None, transformer_options=magicClothingModel.model_options["transformer_options"])
233 |         comfy.sampler_helpers.cleanup_models({}, [magicClothingModel])
234 |         return attn_stored
235 | 
236 | NODE_CLASS_MAPPINGS = {
237 |     "Load Magic Clothing Model": LoadMagicClothingModel,
238 |     "Add Magic Clothing Attention": AddMagicClothingAttention,
239 | }
240 | 
241 | NODE_DISPLAY_NAME_MAPPINGS = {
242 |     "Load Magic Clothing Model": "Load Magic Clothing Model",
243 |     "Add Magic Clothing Attention": "Add Magic Clothing Attention",
244 | }
245 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "comfyui-magic-clothing"
 3 | description = "The comfyui supported version of the [a/Magic Clothing](https://github.com/ShineChen1024/MagicClothing) project, not the diffusers version, allows direct integration with modules such as ipadapter"
 4 | version = "1.0.0"
 5 | license = "LICENSE"
 6 | 
 7 | [project.urls]
 8 | Repository = "https://github.com/longgui0318/comfyui-magic-clothing"
 9 | #  Used by Comfy Registry https://comfyregistry.org
10 | 
11 | [tool.comfy]
12 | PublisherId = "longgui0318"
13 | DisplayName = "comfyui-magic-clothing"
14 | Icon = ""
15 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | 
 3 | def handle_block_info(block_key, detection_unet_diffusers_keys, type="attn1"):
 4 |     block_weight_key = block_key[:block_key.find(type)+len(type)]
 5 |     real_key = None
 6 |     for __key in detection_unet_diffusers_keys:
 7 |         if block_weight_key in __key:
 8 |             real_key = detection_unet_diffusers_keys[__key]
 9 |             break
10 |     if real_key is None:
11 |         return (None, None, None)
12 |     block_level = real_key.split(".")
13 |     if block_level[0] == "input_blocks":
14 |         block_name = "input"
15 |         block_number = int(block_level[1])
16 |     elif block_level[0] == "middle_block":
17 |         block_name = "middle"
18 |         block_number = int(block_level[1])
19 |     elif block_level[0] == "output_blocks":
20 |         block_name = "output"
21 |         block_number = int(block_level[1])
22 |     else:
23 |         block_name = None
24 |         block_number = 0
25 |     attention_index = 0
26 |     for i, v in enumerate(block_level):
27 |         if v == "transformer_blocks":
28 |             attention_index = int(block_level[i+1])
29 |             break
30 |     return (block_name, block_number, attention_index)
31 | 
32 | def save_attn(value, attn_store, block_name, block_number, attention_index):
33 |     if attn_store is None:
34 |         return
35 |     if block_name not in attn_store:
36 |         attn_store[block_name] = {}
37 |     if block_number not in attn_store[block_name]:
38 |         attn_store[block_name][block_number] = {}
39 |     attn_store[block_name][block_number][attention_index] = value
40 |     
41 | def clean_attn_stored_memory(attn_stored):
42 |     del_key_if_exists(attn_stored,"cond_or_uncond_out_cond")
43 |     del_key_if_exists(attn_stored,"cond_or_uncond_out_count")
44 |     del_key_if_exists(attn_stored,"input_x_extra_options")
45 |     del_key_if_exists(attn_stored,"out_cond_init")
46 |     del_key_if_exists(attn_stored,"out_count_init")
47 |     del_key_if_exists(attn_stored,"cond_or_uncond_replenishment")
48 |     del_key_if_exists(attn_stored,"cond_or_uncond_extra_options")
49 |     
50 | def del_key_if_exists(obj,key):
51 |     if key in obj:
52 |         del obj[key]
53 | 
54 | 
55 | def pt_hash(self,key=None):
56 |     if True:
57 |         return ""
58 |     data = self.cpu().numpy()
59 |     if not data.flags['C_CONTIGUOUS']:
60 |         data = data.copy(order='C')
61 |     has_object = hashlib.sha256(data)
62 |     has_value = has_object.hexdigest()
63 |     del has_object
64 |     del data
65 |     if key is not None:
66 |         print(f"Debug Test: {key}====={has_value}")
67 |     return has_value
68 | 
69 | def pt_first_line(self,key=None):
70 |     if False:
71 |         return 
72 |     first_line = self
73 |     while first_line.dim() > 1:
74 |         first_line = first_line[0]
75 |     if first_line.dim() <= 1 and key is not None:
76 |         print(f"Debug: {key}====={first_line}")


--------------------------------------------------------------------------------