├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── configs ├── text_encoder_config.json ├── tokenizer │ ├── config.json │ ├── merges.txt │ ├── preprocessor_config.json │ ├── special_tokens_map.json │ ├── tokenizer.json │ ├── tokenizer_config.json │ └── vocab.json ├── tokenizer_config.json └── v1-inference.yaml ├── examples ├── IC-Light_example_spotlight_01.json └── iclight_spotlight_batch_example.json ├── hidiffusion ├── __init__.py ├── hidiffusion.py ├── sd_module_key │ ├── sd15_module_key.txt │ └── sdxl_module_key.txt └── utils.py ├── nodes.py ├── requirements.txt └── scheduling_tcd.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | checkpoints/ 3 | *.py[cod] 4 | *$py.class 5 | *.egg-info 6 | .pytest_cache -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ComfyUI wrapper nodes for IC-light 2 | 3 | # UPDATE: 4 | moving my efforts into a more native implementation: https://github.com/kijai/ComfyUI-IC-Light 5 | 6 | ## unfinished and development stopped 7 | Original repo: https://github.com/lllyasviel/IC-Light/ 8 | 9 | Models: https://huggingface.co/lllyasviel/ic-light/tree/main 10 | 11 | models go into `ComfyUI/models/unet` 12 | 13 | ![image](https://github.com/kijai/ComfyUI-IC-Light-Wrapper/assets/40791699/9687a243-d7af-4b08-99e9-d260f1859584) 14 | 15 | 16 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS 2 | 3 | __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"] -------------------------------------------------------------------------------- /configs/text_encoder_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "openai/clip-vit-large-patch14", 3 | "architectures": [ 4 | "CLIPTextModel" 5 | ], 6 | "attention_dropout": 0.0, 7 | "bos_token_id": 0, 8 | "dropout": 0.0, 9 | "eos_token_id": 2, 10 | "hidden_act": "quick_gelu", 11 | "hidden_size": 768, 12 | "initializer_factor": 1.0, 13 | "initializer_range": 0.02, 14 | "intermediate_size": 3072, 15 | "layer_norm_eps": 1e-05, 16 | "max_position_embeddings": 77, 17 | "model_type": "clip_text_model", 18 | "num_attention_heads": 12, 19 | "num_hidden_layers": 12, 20 | "pad_token_id": 1, 21 | "projection_dim": 768, 22 | "torch_dtype": "float32", 23 | "transformers_version": "4.22.0.dev0", 24 | "vocab_size": 49408 25 | } 26 | -------------------------------------------------------------------------------- /configs/tokenizer/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "clip-vit-large-patch14/", 3 | "architectures": [ 4 | "CLIPModel" 5 | ], 6 | "initializer_factor": 1.0, 7 | "logit_scale_init_value": 2.6592, 8 | "model_type": "clip", 9 | "projection_dim": 768, 10 | "text_config": { 11 | "_name_or_path": "", 12 | "add_cross_attention": false, 13 | "architectures": null, 14 | "attention_dropout": 0.0, 15 | "bad_words_ids": null, 16 | "bos_token_id": 0, 17 | "chunk_size_feed_forward": 0, 18 | "cross_attention_hidden_size": null, 19 | "decoder_start_token_id": null, 20 | "diversity_penalty": 0.0, 21 | "do_sample": false, 22 | "dropout": 0.0, 23 | "early_stopping": false, 24 | "encoder_no_repeat_ngram_size": 0, 25 | "eos_token_id": 2, 26 | "finetuning_task": null, 27 | "forced_bos_token_id": null, 28 | "forced_eos_token_id": null, 29 | "hidden_act": "quick_gelu", 30 | "hidden_size": 768, 31 | "id2label": { 32 | "0": "LABEL_0", 33 | "1": "LABEL_1" 34 | }, 35 | "initializer_factor": 1.0, 36 | "initializer_range": 0.02, 37 | "intermediate_size": 3072, 38 | "is_decoder": false, 39 | "is_encoder_decoder": false, 40 | "label2id": { 41 | "LABEL_0": 0, 42 | "LABEL_1": 1 43 | }, 44 | "layer_norm_eps": 1e-05, 45 | "length_penalty": 1.0, 46 | "max_length": 20, 47 | "max_position_embeddings": 77, 48 | "min_length": 0, 49 | "model_type": "clip_text_model", 50 | "no_repeat_ngram_size": 0, 51 | "num_attention_heads": 12, 52 | "num_beam_groups": 1, 53 | "num_beams": 1, 54 | "num_hidden_layers": 12, 55 | "num_return_sequences": 1, 56 | "output_attentions": false, 57 | "output_hidden_states": false, 58 | "output_scores": false, 59 | "pad_token_id": 1, 60 | "prefix": null, 61 | "problem_type": null, 62 | "projection_dim" : 768, 63 | "pruned_heads": {}, 64 | "remove_invalid_values": false, 65 | "repetition_penalty": 1.0, 66 | "return_dict": true, 67 | "return_dict_in_generate": false, 68 | "sep_token_id": null, 69 | "task_specific_params": null, 70 | "temperature": 1.0, 71 | "tie_encoder_decoder": false, 72 | "tie_word_embeddings": true, 73 | "tokenizer_class": null, 74 | "top_k": 50, 75 | "top_p": 1.0, 76 | "torch_dtype": null, 77 | "torchscript": false, 78 | "transformers_version": "4.16.0.dev0", 79 | "use_bfloat16": false, 80 | "vocab_size": 49408 81 | }, 82 | "text_config_dict": { 83 | "hidden_size": 768, 84 | "intermediate_size": 3072, 85 | "num_attention_heads": 12, 86 | "num_hidden_layers": 12, 87 | "projection_dim": 768 88 | }, 89 | "torch_dtype": "float32", 90 | "transformers_version": null, 91 | "vision_config": { 92 | "_name_or_path": "", 93 | "add_cross_attention": false, 94 | "architectures": null, 95 | "attention_dropout": 0.0, 96 | "bad_words_ids": null, 97 | "bos_token_id": null, 98 | "chunk_size_feed_forward": 0, 99 | "cross_attention_hidden_size": null, 100 | "decoder_start_token_id": null, 101 | "diversity_penalty": 0.0, 102 | "do_sample": false, 103 | "dropout": 0.0, 104 | "early_stopping": false, 105 | "encoder_no_repeat_ngram_size": 0, 106 | "eos_token_id": null, 107 | "finetuning_task": null, 108 | "forced_bos_token_id": null, 109 | "forced_eos_token_id": null, 110 | "hidden_act": "quick_gelu", 111 | "hidden_size": 1024, 112 | "id2label": { 113 | "0": "LABEL_0", 114 | "1": "LABEL_1" 115 | }, 116 | "image_size": 224, 117 | "initializer_factor": 1.0, 118 | "initializer_range": 0.02, 119 | "intermediate_size": 4096, 120 | "is_decoder": false, 121 | "is_encoder_decoder": false, 122 | "label2id": { 123 | "LABEL_0": 0, 124 | "LABEL_1": 1 125 | }, 126 | "layer_norm_eps": 1e-05, 127 | "length_penalty": 1.0, 128 | "max_length": 20, 129 | "min_length": 0, 130 | "model_type": "clip_vision_model", 131 | "no_repeat_ngram_size": 0, 132 | "num_attention_heads": 16, 133 | "num_beam_groups": 1, 134 | "num_beams": 1, 135 | "num_hidden_layers": 24, 136 | "num_return_sequences": 1, 137 | "output_attentions": false, 138 | "output_hidden_states": false, 139 | "output_scores": false, 140 | "pad_token_id": null, 141 | "patch_size": 14, 142 | "prefix": null, 143 | "problem_type": null, 144 | "projection_dim" : 768, 145 | "pruned_heads": {}, 146 | "remove_invalid_values": false, 147 | "repetition_penalty": 1.0, 148 | "return_dict": true, 149 | "return_dict_in_generate": false, 150 | "sep_token_id": null, 151 | "task_specific_params": null, 152 | "temperature": 1.0, 153 | "tie_encoder_decoder": false, 154 | "tie_word_embeddings": true, 155 | "tokenizer_class": null, 156 | "top_k": 50, 157 | "top_p": 1.0, 158 | "torch_dtype": null, 159 | "torchscript": false, 160 | "transformers_version": "4.16.0.dev0", 161 | "use_bfloat16": false 162 | }, 163 | "vision_config_dict": { 164 | "hidden_size": 1024, 165 | "intermediate_size": 4096, 166 | "num_attention_heads": 16, 167 | "num_hidden_layers": 24, 168 | "patch_size": 14, 169 | "projection_dim": 768 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /configs/tokenizer/preprocessor_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "crop_size": 224, 3 | "do_center_crop": true, 4 | "do_normalize": true, 5 | "do_resize": true, 6 | "feature_extractor_type": "CLIPFeatureExtractor", 7 | "image_mean": [ 8 | 0.48145466, 9 | 0.4578275, 10 | 0.40821073 11 | ], 12 | "image_std": [ 13 | 0.26862954, 14 | 0.26130258, 15 | 0.27577711 16 | ], 17 | "resample": 3, 18 | "size": 224 19 | } 20 | -------------------------------------------------------------------------------- /configs/tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | {"bos_token": {"content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "<|endoftext|>"} -------------------------------------------------------------------------------- /configs/tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "unk_token": { 3 | "content": "<|endoftext|>", 4 | "single_word": false, 5 | "lstrip": false, 6 | "rstrip": false, 7 | "normalized": true, 8 | "__type": "AddedToken" 9 | }, 10 | "bos_token": { 11 | "content": "<|startoftext|>", 12 | "single_word": false, 13 | "lstrip": false, 14 | "rstrip": false, 15 | "normalized": true, 16 | "__type": "AddedToken" 17 | }, 18 | "eos_token": { 19 | "content": "<|endoftext|>", 20 | "single_word": false, 21 | "lstrip": false, 22 | "rstrip": false, 23 | "normalized": true, 24 | "__type": "AddedToken" 25 | }, 26 | "pad_token": "<|endoftext|>", 27 | "add_prefix_space": false, 28 | "errors": "replace", 29 | "do_lower_case": true, 30 | "name_or_path": "openai/clip-vit-base-patch32", 31 | "model_max_length": 77, 32 | "special_tokens_map_file": "./special_tokens_map.json", 33 | "tokenizer_class": "CLIPTokenizer" 34 | } 35 | -------------------------------------------------------------------------------- /configs/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_prefix_space": false, 3 | "bos_token": { 4 | "__type": "AddedToken", 5 | "content": "<|startoftext|>", 6 | "lstrip": false, 7 | "normalized": true, 8 | "rstrip": false, 9 | "single_word": false 10 | }, 11 | "do_lower_case": true, 12 | "eos_token": { 13 | "__type": "AddedToken", 14 | "content": "<|endoftext|>", 15 | "lstrip": false, 16 | "normalized": true, 17 | "rstrip": false, 18 | "single_word": false 19 | }, 20 | "errors": "replace", 21 | "model_max_length": 77, 22 | "name_or_path": "openai/clip-vit-large-patch14", 23 | "pad_token": "<|endoftext|>", 24 | "special_tokens_map_file": "./special_tokens_map.json", 25 | "tokenizer_class": "CLIPTokenizer", 26 | "unk_token": { 27 | "__type": "AddedToken", 28 | "content": "<|endoftext|>", 29 | "lstrip": false, 30 | "normalized": true, 31 | "rstrip": false, 32 | "single_word": false 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /configs/v1-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 10000 ] 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | image_size: 32 # unused 33 | in_channels: 4 34 | out_channels: 4 35 | model_channels: 320 36 | attention_resolutions: [ 4, 2, 1 ] 37 | num_res_blocks: 2 38 | channel_mult: [ 1, 2, 4, 4 ] 39 | num_heads: 8 40 | use_spatial_transformer: True 41 | transformer_depth: 1 42 | context_dim: 768 43 | use_checkpoint: True 44 | legacy: False 45 | 46 | first_stage_config: 47 | target: ldm.models.autoencoder.AutoencoderKL 48 | params: 49 | embed_dim: 4 50 | monitor: val/rec_loss 51 | ddconfig: 52 | double_z: true 53 | z_channels: 4 54 | resolution: 256 55 | in_channels: 3 56 | out_ch: 3 57 | ch: 128 58 | ch_mult: 59 | - 1 60 | - 2 61 | - 4 62 | - 4 63 | num_res_blocks: 2 64 | attn_resolutions: [] 65 | dropout: 0.0 66 | lossconfig: 67 | target: torch.nn.Identity 68 | 69 | cond_stage_config: 70 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 71 | -------------------------------------------------------------------------------- /examples/IC-Light_example_spotlight_01.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 36, 3 | "last_link_id": 63, 4 | "nodes": [ 5 | { 6 | "id": 5, 7 | "type": "diffusers_model_loader", 8 | "pos": [ 9 | 996, 10 | 292 11 | ], 12 | "size": { 13 | "0": 267, 14 | "1": 66 15 | }, 16 | "flags": {}, 17 | "order": 3, 18 | "mode": 0, 19 | "inputs": [ 20 | { 21 | "name": "model", 22 | "type": "MODEL", 23 | "link": 47, 24 | "slot_index": 0 25 | }, 26 | { 27 | "name": "clip", 28 | "type": "CLIP", 29 | "link": 2 30 | }, 31 | { 32 | "name": "vae", 33 | "type": "VAE", 34 | "link": 3 35 | } 36 | ], 37 | "outputs": [ 38 | { 39 | "name": "diffusers_model", 40 | "type": "DIFFUSERSMODEL", 41 | "links": [ 42 | 26 43 | ], 44 | "shape": 3, 45 | "slot_index": 0 46 | } 47 | ], 48 | "properties": { 49 | "Node name for S&R": "diffusers_model_loader" 50 | } 51 | }, 52 | { 53 | "id": 22, 54 | "type": "LoadICLightUnetDiffusers", 55 | "pos": [ 56 | 1343, 57 | 301 58 | ], 59 | "size": { 60 | "0": 447, 61 | "1": 58 62 | }, 63 | "flags": {}, 64 | "order": 6, 65 | "mode": 0, 66 | "inputs": [ 67 | { 68 | "name": "diffusersmodel", 69 | "type": "DIFFUSERSMODEL", 70 | "link": 26 71 | } 72 | ], 73 | "outputs": [ 74 | { 75 | "name": "DIFFUSERSMODEL", 76 | "type": "DIFFUSERSMODEL", 77 | "links": [ 78 | 58 79 | ], 80 | "shape": 3, 81 | "slot_index": 0 82 | } 83 | ], 84 | "properties": { 85 | "Node name for S&R": "LoadICLightUnetDiffusers" 86 | }, 87 | "widgets_values": [ 88 | "iclight_sd15_fc.safetensors" 89 | ] 90 | }, 91 | { 92 | "id": 8, 93 | "type": "VAEEncode", 94 | "pos": [ 95 | 1022, 96 | 469 97 | ], 98 | "size": { 99 | "0": 210, 100 | "1": 46 101 | }, 102 | "flags": {}, 103 | "order": 4, 104 | "mode": 0, 105 | "inputs": [ 106 | { 107 | "name": "pixels", 108 | "type": "IMAGE", 109 | "link": 41, 110 | "slot_index": 0 111 | }, 112 | { 113 | "name": "vae", 114 | "type": "VAE", 115 | "link": 8 116 | } 117 | ], 118 | "outputs": [ 119 | { 120 | "name": "LATENT", 121 | "type": "LATENT", 122 | "links": [ 123 | 59 124 | ], 125 | "shape": 3, 126 | "slot_index": 0 127 | } 128 | ], 129 | "properties": { 130 | "Node name for S&R": "VAEEncode" 131 | } 132 | }, 133 | { 134 | "id": 25, 135 | "type": "VAEEncode", 136 | "pos": [ 137 | 1055, 138 | 649 139 | ], 140 | "size": { 141 | "0": 210, 142 | "1": 46 143 | }, 144 | "flags": {}, 145 | "order": 10, 146 | "mode": 0, 147 | "inputs": [ 148 | { 149 | "name": "pixels", 150 | "type": "IMAGE", 151 | "link": 56, 152 | "slot_index": 0 153 | }, 154 | { 155 | "name": "vae", 156 | "type": "VAE", 157 | "link": 32, 158 | "slot_index": 1 159 | } 160 | ], 161 | "outputs": [ 162 | { 163 | "name": "LATENT", 164 | "type": "LATENT", 165 | "links": [ 166 | 60 167 | ], 168 | "shape": 3, 169 | "slot_index": 0 170 | } 171 | ], 172 | "properties": { 173 | "Node name for S&R": "VAEEncode" 174 | } 175 | }, 176 | { 177 | "id": 34, 178 | "type": "GrowMaskWithBlur", 179 | "pos": [ 180 | 620, 181 | 1030 182 | ], 183 | "size": { 184 | "0": 315, 185 | "1": 246 186 | }, 187 | "flags": {}, 188 | "order": 7, 189 | "mode": 0, 190 | "inputs": [ 191 | { 192 | "name": "mask", 193 | "type": "MASK", 194 | "link": 52 195 | } 196 | ], 197 | "outputs": [ 198 | { 199 | "name": "mask", 200 | "type": "MASK", 201 | "links": [ 202 | 53 203 | ], 204 | "shape": 3, 205 | "slot_index": 0 206 | }, 207 | { 208 | "name": "mask_inverted", 209 | "type": "MASK", 210 | "links": null, 211 | "shape": 3 212 | } 213 | ], 214 | "properties": { 215 | "Node name for S&R": "GrowMaskWithBlur" 216 | }, 217 | "widgets_values": [ 218 | 0, 219 | 0, 220 | true, 221 | false, 222 | 59.400000000000006, 223 | 1, 224 | 1, 225 | false 226 | ] 227 | }, 228 | { 229 | "id": 9, 230 | "type": "LoadImage", 231 | "pos": [ 232 | 104, 233 | 474 234 | ], 235 | "size": { 236 | "0": 315, 237 | "1": 314 238 | }, 239 | "flags": {}, 240 | "order": 0, 241 | "mode": 0, 242 | "outputs": [ 243 | { 244 | "name": "IMAGE", 245 | "type": "IMAGE", 246 | "links": [ 247 | 40 248 | ], 249 | "shape": 3, 250 | "slot_index": 0 251 | }, 252 | { 253 | "name": "MASK", 254 | "type": "MASK", 255 | "links": null, 256 | "shape": 3 257 | } 258 | ], 259 | "properties": { 260 | "Node name for S&R": "LoadImage" 261 | }, 262 | "widgets_values": [ 263 | "oldman.jpg", 264 | "image" 265 | ] 266 | }, 267 | { 268 | "id": 28, 269 | "type": "ImageResize+", 270 | "pos": [ 271 | 482, 272 | 479 273 | ], 274 | "size": { 275 | "0": 315, 276 | "1": 218 277 | }, 278 | "flags": {}, 279 | "order": 2, 280 | "mode": 0, 281 | "inputs": [ 282 | { 283 | "name": "image", 284 | "type": "IMAGE", 285 | "link": 40 286 | } 287 | ], 288 | "outputs": [ 289 | { 290 | "name": "IMAGE", 291 | "type": "IMAGE", 292 | "links": [ 293 | 41 294 | ], 295 | "shape": 3, 296 | "slot_index": 0 297 | }, 298 | { 299 | "name": "width", 300 | "type": "INT", 301 | "links": [ 302 | 54 303 | ], 304 | "shape": 3 305 | }, 306 | { 307 | "name": "height", 308 | "type": "INT", 309 | "links": [ 310 | 55 311 | ], 312 | "shape": 3 313 | } 314 | ], 315 | "properties": { 316 | "Node name for S&R": "ImageResize+" 317 | }, 318 | "widgets_values": [ 319 | 512, 320 | 512, 321 | "nearest", 322 | false, 323 | "always", 324 | 8 325 | ] 326 | }, 327 | { 328 | "id": 30, 329 | "type": "MaskToImage", 330 | "pos": [ 331 | 663, 332 | 806 333 | ], 334 | "size": { 335 | "0": 210, 336 | "1": 26 337 | }, 338 | "flags": {}, 339 | "order": 9, 340 | "mode": 0, 341 | "inputs": [ 342 | { 343 | "name": "mask", 344 | "type": "MASK", 345 | "link": 50 346 | } 347 | ], 348 | "outputs": [ 349 | { 350 | "name": "IMAGE", 351 | "type": "IMAGE", 352 | "links": [ 353 | 56, 354 | 57 355 | ], 356 | "shape": 3, 357 | "slot_index": 0 358 | } 359 | ], 360 | "properties": { 361 | "Node name for S&R": "MaskToImage" 362 | } 363 | }, 364 | { 365 | "id": 29, 366 | "type": "CreateShapeMask", 367 | "pos": [ 368 | 262, 369 | 912 370 | ], 371 | "size": { 372 | "0": 315, 373 | "1": 270 374 | }, 375 | "flags": {}, 376 | "order": 5, 377 | "mode": 0, 378 | "inputs": [ 379 | { 380 | "name": "frame_width", 381 | "type": "INT", 382 | "link": 54, 383 | "widget": { 384 | "name": "frame_width" 385 | }, 386 | "slot_index": 0 387 | }, 388 | { 389 | "name": "frame_height", 390 | "type": "INT", 391 | "link": 55, 392 | "widget": { 393 | "name": "frame_height" 394 | }, 395 | "slot_index": 1 396 | } 397 | ], 398 | "outputs": [ 399 | { 400 | "name": "mask", 401 | "type": "MASK", 402 | "links": [ 403 | 52 404 | ], 405 | "shape": 3, 406 | "slot_index": 0 407 | }, 408 | { 409 | "name": "mask_inverted", 410 | "type": "MASK", 411 | "links": null, 412 | "shape": 3 413 | } 414 | ], 415 | "properties": { 416 | "Node name for S&R": "CreateShapeMask" 417 | }, 418 | "widgets_values": [ 419 | "circle", 420 | 1, 421 | 256, 422 | 256, 423 | 0, 424 | 512, 425 | 512, 426 | 256, 427 | 256 428 | ] 429 | }, 430 | { 431 | "id": 32, 432 | "type": "RemapMaskRange", 433 | "pos": [ 434 | 610, 435 | 890 436 | ], 437 | "size": { 438 | "0": 315, 439 | "1": 82 440 | }, 441 | "flags": {}, 442 | "order": 8, 443 | "mode": 0, 444 | "inputs": [ 445 | { 446 | "name": "mask", 447 | "type": "MASK", 448 | "link": 53 449 | } 450 | ], 451 | "outputs": [ 452 | { 453 | "name": "mask", 454 | "type": "MASK", 455 | "links": [ 456 | 50 457 | ], 458 | "shape": 3, 459 | "slot_index": 0 460 | } 461 | ], 462 | "properties": { 463 | "Node name for S&R": "RemapMaskRange" 464 | }, 465 | "widgets_values": [ 466 | 0, 467 | 0.9 468 | ] 469 | }, 470 | { 471 | "id": 33, 472 | "type": "PreviewImage", 473 | "pos": [ 474 | 989, 475 | 893 476 | ], 477 | "size": { 478 | "0": 342, 479 | "1": 347 480 | }, 481 | "flags": {}, 482 | "order": 11, 483 | "mode": 0, 484 | "inputs": [ 485 | { 486 | "name": "images", 487 | "type": "IMAGE", 488 | "link": 57 489 | } 490 | ], 491 | "properties": { 492 | "Node name for S&R": "PreviewImage" 493 | } 494 | }, 495 | { 496 | "id": 10, 497 | "type": "PreviewImage", 498 | "pos": [ 499 | 2076, 500 | 347 501 | ], 502 | "size": { 503 | "0": 526.656982421875, 504 | "1": 580.8809814453125 505 | }, 506 | "flags": {}, 507 | "order": 14, 508 | "mode": 0, 509 | "inputs": [ 510 | { 511 | "name": "images", 512 | "type": "IMAGE", 513 | "link": 63 514 | } 515 | ], 516 | "properties": { 517 | "Node name for S&R": "PreviewImage" 518 | } 519 | }, 520 | { 521 | "id": 35, 522 | "type": "iclight_diffusers_sampler", 523 | "pos": [ 524 | 1372, 525 | 434 526 | ], 527 | "size": { 528 | "0": 400, 529 | "1": 406 530 | }, 531 | "flags": {}, 532 | "order": 12, 533 | "mode": 0, 534 | "inputs": [ 535 | { 536 | "name": "diffusers_model", 537 | "type": "DIFFUSERSMODEL", 538 | "link": 58 539 | }, 540 | { 541 | "name": "latent", 542 | "type": "LATENT", 543 | "link": 59 544 | }, 545 | { 546 | "name": "bg_latent", 547 | "type": "LATENT", 548 | "link": 60 549 | } 550 | ], 551 | "outputs": [ 552 | { 553 | "name": "samples", 554 | "type": "LATENT", 555 | "links": [ 556 | 61 557 | ], 558 | "shape": 3, 559 | "slot_index": 0 560 | } 561 | ], 562 | "properties": { 563 | "Node name for S&R": "iclight_diffusers_sampler" 564 | }, 565 | "widgets_values": [ 566 | 512, 567 | 512, 568 | 25, 569 | 2, 570 | 0.9, 571 | 819223299872531, 572 | "randomize", 573 | "DPMSolverMultistepScheduler", 574 | "positive", 575 | "negative", 576 | false, 577 | true 578 | ] 579 | }, 580 | { 581 | "id": 36, 582 | "type": "VAEDecode", 583 | "pos": [ 584 | 1817, 585 | 427 586 | ], 587 | "size": { 588 | "0": 210, 589 | "1": 46 590 | }, 591 | "flags": {}, 592 | "order": 13, 593 | "mode": 0, 594 | "inputs": [ 595 | { 596 | "name": "samples", 597 | "type": "LATENT", 598 | "link": 61 599 | }, 600 | { 601 | "name": "vae", 602 | "type": "VAE", 603 | "link": 62, 604 | "slot_index": 1 605 | } 606 | ], 607 | "outputs": [ 608 | { 609 | "name": "IMAGE", 610 | "type": "IMAGE", 611 | "links": [ 612 | 63 613 | ], 614 | "shape": 3, 615 | "slot_index": 0 616 | } 617 | ], 618 | "properties": { 619 | "Node name for S&R": "VAEDecode" 620 | } 621 | }, 622 | { 623 | "id": 6, 624 | "type": "CheckpointLoaderSimple", 625 | "pos": [ 626 | 563, 627 | 285 628 | ], 629 | "size": { 630 | "0": 315, 631 | "1": 98 632 | }, 633 | "flags": {}, 634 | "order": 1, 635 | "mode": 0, 636 | "outputs": [ 637 | { 638 | "name": "MODEL", 639 | "type": "MODEL", 640 | "links": [ 641 | 47 642 | ], 643 | "shape": 3, 644 | "slot_index": 0 645 | }, 646 | { 647 | "name": "CLIP", 648 | "type": "CLIP", 649 | "links": [ 650 | 2 651 | ], 652 | "shape": 3, 653 | "slot_index": 1 654 | }, 655 | { 656 | "name": "VAE", 657 | "type": "VAE", 658 | "links": [ 659 | 3, 660 | 8, 661 | 32, 662 | 62 663 | ], 664 | "shape": 3, 665 | "slot_index": 2 666 | } 667 | ], 668 | "properties": { 669 | "Node name for S&R": "CheckpointLoaderSimple" 670 | }, 671 | "widgets_values": [ 672 | "1_5\\photon_v1.safetensors" 673 | ] 674 | } 675 | ], 676 | "links": [ 677 | [ 678 | 2, 679 | 6, 680 | 1, 681 | 5, 682 | 1, 683 | "CLIP" 684 | ], 685 | [ 686 | 3, 687 | 6, 688 | 2, 689 | 5, 690 | 2, 691 | "VAE" 692 | ], 693 | [ 694 | 8, 695 | 6, 696 | 2, 697 | 8, 698 | 1, 699 | "VAE" 700 | ], 701 | [ 702 | 26, 703 | 5, 704 | 0, 705 | 22, 706 | 0, 707 | "DIFFUSERSMODEL" 708 | ], 709 | [ 710 | 32, 711 | 6, 712 | 2, 713 | 25, 714 | 1, 715 | "VAE" 716 | ], 717 | [ 718 | 40, 719 | 9, 720 | 0, 721 | 28, 722 | 0, 723 | "IMAGE" 724 | ], 725 | [ 726 | 41, 727 | 28, 728 | 0, 729 | 8, 730 | 0, 731 | "IMAGE" 732 | ], 733 | [ 734 | 47, 735 | 6, 736 | 0, 737 | 5, 738 | 0, 739 | "MODEL" 740 | ], 741 | [ 742 | 50, 743 | 32, 744 | 0, 745 | 30, 746 | 0, 747 | "MASK" 748 | ], 749 | [ 750 | 52, 751 | 29, 752 | 0, 753 | 34, 754 | 0, 755 | "MASK" 756 | ], 757 | [ 758 | 53, 759 | 34, 760 | 0, 761 | 32, 762 | 0, 763 | "MASK" 764 | ], 765 | [ 766 | 54, 767 | 28, 768 | 1, 769 | 29, 770 | 0, 771 | "INT" 772 | ], 773 | [ 774 | 55, 775 | 28, 776 | 2, 777 | 29, 778 | 1, 779 | "INT" 780 | ], 781 | [ 782 | 56, 783 | 30, 784 | 0, 785 | 25, 786 | 0, 787 | "IMAGE" 788 | ], 789 | [ 790 | 57, 791 | 30, 792 | 0, 793 | 33, 794 | 0, 795 | "IMAGE" 796 | ], 797 | [ 798 | 58, 799 | 22, 800 | 0, 801 | 35, 802 | 0, 803 | "DIFFUSERSMODEL" 804 | ], 805 | [ 806 | 59, 807 | 8, 808 | 0, 809 | 35, 810 | 1, 811 | "LATENT" 812 | ], 813 | [ 814 | 60, 815 | 25, 816 | 0, 817 | 35, 818 | 2, 819 | "LATENT" 820 | ], 821 | [ 822 | 61, 823 | 35, 824 | 0, 825 | 36, 826 | 0, 827 | "LATENT" 828 | ], 829 | [ 830 | 62, 831 | 6, 832 | 2, 833 | 36, 834 | 1, 835 | "VAE" 836 | ], 837 | [ 838 | 63, 839 | 36, 840 | 0, 841 | 10, 842 | 0, 843 | "IMAGE" 844 | ] 845 | ], 846 | "groups": [], 847 | "config": {}, 848 | "extra": {}, 849 | "version": 0.4 850 | } -------------------------------------------------------------------------------- /examples/iclight_spotlight_batch_example.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 43, 3 | "last_link_id": 76, 4 | "nodes": [ 5 | { 6 | "id": 5, 7 | "type": "diffusers_model_loader", 8 | "pos": [ 9 | 996, 10 | 292 11 | ], 12 | "size": { 13 | "0": 267, 14 | "1": 66 15 | }, 16 | "flags": {}, 17 | "order": 4, 18 | "mode": 0, 19 | "inputs": [ 20 | { 21 | "name": "model", 22 | "type": "MODEL", 23 | "link": 47, 24 | "slot_index": 0 25 | }, 26 | { 27 | "name": "clip", 28 | "type": "CLIP", 29 | "link": 2 30 | }, 31 | { 32 | "name": "vae", 33 | "type": "VAE", 34 | "link": 3 35 | } 36 | ], 37 | "outputs": [ 38 | { 39 | "name": "diffusers_model", 40 | "type": "DIFFUSERSMODEL", 41 | "links": [ 42 | 26 43 | ], 44 | "shape": 3, 45 | "slot_index": 0 46 | } 47 | ], 48 | "properties": { 49 | "Node name for S&R": "diffusers_model_loader" 50 | } 51 | }, 52 | { 53 | "id": 22, 54 | "type": "LoadICLightUnetDiffusers", 55 | "pos": [ 56 | 1343, 57 | 301 58 | ], 59 | "size": { 60 | "0": 447, 61 | "1": 58 62 | }, 63 | "flags": {}, 64 | "order": 7, 65 | "mode": 0, 66 | "inputs": [ 67 | { 68 | "name": "diffusersmodel", 69 | "type": "DIFFUSERSMODEL", 70 | "link": 26 71 | } 72 | ], 73 | "outputs": [ 74 | { 75 | "name": "DIFFUSERSMODEL", 76 | "type": "DIFFUSERSMODEL", 77 | "links": [ 78 | 70 79 | ], 80 | "shape": 3, 81 | "slot_index": 0 82 | } 83 | ], 84 | "properties": { 85 | "Node name for S&R": "LoadICLightUnetDiffusers" 86 | }, 87 | "widgets_values": [ 88 | "iclight_sd15_fc.safetensors" 89 | ] 90 | }, 91 | { 92 | "id": 8, 93 | "type": "VAEEncode", 94 | "pos": [ 95 | 1022, 96 | 469 97 | ], 98 | "size": { 99 | "0": 210, 100 | "1": 46 101 | }, 102 | "flags": {}, 103 | "order": 9, 104 | "mode": 0, 105 | "inputs": [ 106 | { 107 | "name": "pixels", 108 | "type": "IMAGE", 109 | "link": 59, 110 | "slot_index": 0 111 | }, 112 | { 113 | "name": "vae", 114 | "type": "VAE", 115 | "link": 8 116 | } 117 | ], 118 | "outputs": [ 119 | { 120 | "name": "LATENT", 121 | "type": "LATENT", 122 | "links": [ 123 | 71 124 | ], 125 | "shape": 3, 126 | "slot_index": 0 127 | } 128 | ], 129 | "properties": { 130 | "Node name for S&R": "VAEEncode" 131 | } 132 | }, 133 | { 134 | "id": 9, 135 | "type": "LoadImage", 136 | "pos": [ 137 | -80, 138 | 470 139 | ], 140 | "size": { 141 | "0": 315, 142 | "1": 314 143 | }, 144 | "flags": {}, 145 | "order": 0, 146 | "mode": 0, 147 | "outputs": [ 148 | { 149 | "name": "IMAGE", 150 | "type": "IMAGE", 151 | "links": [ 152 | 40 153 | ], 154 | "shape": 3, 155 | "slot_index": 0 156 | }, 157 | { 158 | "name": "MASK", 159 | "type": "MASK", 160 | "links": null, 161 | "shape": 3 162 | } 163 | ], 164 | "properties": { 165 | "Node name for S&R": "LoadImage" 166 | }, 167 | "widgets_values": [ 168 | "oldman.jpg", 169 | "image" 170 | ] 171 | }, 172 | { 173 | "id": 28, 174 | "type": "ImageResize+", 175 | "pos": [ 176 | 290, 177 | 480 178 | ], 179 | "size": { 180 | "0": 315, 181 | "1": 218 182 | }, 183 | "flags": {}, 184 | "order": 3, 185 | "mode": 0, 186 | "inputs": [ 187 | { 188 | "name": "image", 189 | "type": "IMAGE", 190 | "link": 40 191 | } 192 | ], 193 | "outputs": [ 194 | { 195 | "name": "IMAGE", 196 | "type": "IMAGE", 197 | "links": [ 198 | 58 199 | ], 200 | "shape": 3, 201 | "slot_index": 0 202 | }, 203 | { 204 | "name": "width", 205 | "type": "INT", 206 | "links": [], 207 | "shape": 3 208 | }, 209 | { 210 | "name": "height", 211 | "type": "INT", 212 | "links": [], 213 | "shape": 3 214 | } 215 | ], 216 | "properties": { 217 | "Node name for S&R": "ImageResize+" 218 | }, 219 | "widgets_values": [ 220 | 512, 221 | 512, 222 | "nearest", 223 | false, 224 | "always", 225 | 8 226 | ] 227 | }, 228 | { 229 | "id": 6, 230 | "type": "CheckpointLoaderSimple", 231 | "pos": [ 232 | 563, 233 | 285 234 | ], 235 | "size": { 236 | "0": 315, 237 | "1": 98 238 | }, 239 | "flags": {}, 240 | "order": 1, 241 | "mode": 0, 242 | "outputs": [ 243 | { 244 | "name": "MODEL", 245 | "type": "MODEL", 246 | "links": [ 247 | 47 248 | ], 249 | "shape": 3, 250 | "slot_index": 0 251 | }, 252 | { 253 | "name": "CLIP", 254 | "type": "CLIP", 255 | "links": [ 256 | 2 257 | ], 258 | "shape": 3, 259 | "slot_index": 1 260 | }, 261 | { 262 | "name": "VAE", 263 | "type": "VAE", 264 | "links": [ 265 | 3, 266 | 8, 267 | 32, 268 | 74 269 | ], 270 | "shape": 3, 271 | "slot_index": 2 272 | } 273 | ], 274 | "properties": { 275 | "Node name for S&R": "CheckpointLoaderSimple" 276 | }, 277 | "widgets_values": [ 278 | "1_5\\photon_v1.safetensors" 279 | ] 280 | }, 281 | { 282 | "id": 25, 283 | "type": "VAEEncode", 284 | "pos": [ 285 | 1028, 286 | 597 287 | ], 288 | "size": { 289 | "0": 210, 290 | "1": 46 291 | }, 292 | "flags": {}, 293 | "order": 12, 294 | "mode": 0, 295 | "inputs": [ 296 | { 297 | "name": "pixels", 298 | "type": "IMAGE", 299 | "link": 56, 300 | "slot_index": 0 301 | }, 302 | { 303 | "name": "vae", 304 | "type": "VAE", 305 | "link": 32, 306 | "slot_index": 1 307 | } 308 | ], 309 | "outputs": [ 310 | { 311 | "name": "LATENT", 312 | "type": "LATENT", 313 | "links": [ 314 | 72 315 | ], 316 | "shape": 3, 317 | "slot_index": 0 318 | } 319 | ], 320 | "properties": { 321 | "Node name for S&R": "VAEEncode" 322 | } 323 | }, 324 | { 325 | "id": 32, 326 | "type": "RemapMaskRange", 327 | "pos": [ 328 | 610, 329 | 890 330 | ], 331 | "size": { 332 | "0": 315, 333 | "1": 82 334 | }, 335 | "flags": {}, 336 | "order": 10, 337 | "mode": 0, 338 | "inputs": [ 339 | { 340 | "name": "mask", 341 | "type": "MASK", 342 | "link": 53 343 | } 344 | ], 345 | "outputs": [ 346 | { 347 | "name": "mask", 348 | "type": "MASK", 349 | "links": [ 350 | 50 351 | ], 352 | "shape": 3, 353 | "slot_index": 0 354 | } 355 | ], 356 | "properties": { 357 | "Node name for S&R": "RemapMaskRange" 358 | }, 359 | "widgets_values": [ 360 | 0, 361 | 1 362 | ] 363 | }, 364 | { 365 | "id": 34, 366 | "type": "GrowMaskWithBlur", 367 | "pos": [ 368 | 620, 369 | 1030 370 | ], 371 | "size": { 372 | "0": 315, 373 | "1": 246 374 | }, 375 | "flags": {}, 376 | "order": 8, 377 | "mode": 0, 378 | "inputs": [ 379 | { 380 | "name": "mask", 381 | "type": "MASK", 382 | "link": 62 383 | } 384 | ], 385 | "outputs": [ 386 | { 387 | "name": "mask", 388 | "type": "MASK", 389 | "links": [ 390 | 53 391 | ], 392 | "shape": 3, 393 | "slot_index": 0 394 | }, 395 | { 396 | "name": "mask_inverted", 397 | "type": "MASK", 398 | "links": null, 399 | "shape": 3 400 | } 401 | ], 402 | "properties": { 403 | "Node name for S&R": "GrowMaskWithBlur" 404 | }, 405 | "widgets_values": [ 406 | 0, 407 | 0, 408 | true, 409 | false, 410 | 48.6, 411 | 1, 412 | 1, 413 | false 414 | ] 415 | }, 416 | { 417 | "id": 30, 418 | "type": "MaskToImage", 419 | "pos": [ 420 | 663, 421 | 806 422 | ], 423 | "size": { 424 | "0": 210, 425 | "1": 26 426 | }, 427 | "flags": {}, 428 | "order": 11, 429 | "mode": 0, 430 | "inputs": [ 431 | { 432 | "name": "mask", 433 | "type": "MASK", 434 | "link": 50 435 | } 436 | ], 437 | "outputs": [ 438 | { 439 | "name": "IMAGE", 440 | "type": "IMAGE", 441 | "links": [ 442 | 56, 443 | 63 444 | ], 445 | "shape": 3, 446 | "slot_index": 0 447 | } 448 | ], 449 | "properties": { 450 | "Node name for S&R": "MaskToImage" 451 | } 452 | }, 453 | { 454 | "id": 40, 455 | "type": "ImageConcanate", 456 | "pos": [ 457 | 1106, 458 | 1035 459 | ], 460 | "size": { 461 | "0": 315, 462 | "1": 102 463 | }, 464 | "flags": {}, 465 | "order": 16, 466 | "mode": 0, 467 | "inputs": [ 468 | { 469 | "name": "image1", 470 | "type": "IMAGE", 471 | "link": 63 472 | }, 473 | { 474 | "name": "image2", 475 | "type": "IMAGE", 476 | "link": 76 477 | } 478 | ], 479 | "outputs": [ 480 | { 481 | "name": "IMAGE", 482 | "type": "IMAGE", 483 | "links": [ 484 | 65 485 | ], 486 | "shape": 3, 487 | "slot_index": 0 488 | } 489 | ], 490 | "properties": { 491 | "Node name for S&R": "ImageConcanate" 492 | }, 493 | "widgets_values": [ 494 | "right", 495 | false 496 | ] 497 | }, 498 | { 499 | "id": 37, 500 | "type": "VHS_VideoCombine", 501 | "pos": [ 502 | 1461, 503 | 963 504 | ], 505 | "size": [ 506 | 640.8259887695312, 507 | 614.4129943847656 508 | ], 509 | "flags": {}, 510 | "order": 17, 511 | "mode": 0, 512 | "inputs": [ 513 | { 514 | "name": "images", 515 | "type": "IMAGE", 516 | "link": 65 517 | }, 518 | { 519 | "name": "audio", 520 | "type": "VHS_AUDIO", 521 | "link": null 522 | }, 523 | { 524 | "name": "batch_manager", 525 | "type": "VHS_BatchManager", 526 | "link": null 527 | } 528 | ], 529 | "outputs": [ 530 | { 531 | "name": "Filenames", 532 | "type": "VHS_FILENAMES", 533 | "links": null, 534 | "shape": 3 535 | } 536 | ], 537 | "properties": { 538 | "Node name for S&R": "VHS_VideoCombine" 539 | }, 540 | "widgets_values": { 541 | "frame_rate": 8, 542 | "loop_count": 0, 543 | "filename_prefix": "AnimateDiff", 544 | "format": "video/h264-mp4", 545 | "pix_fmt": "yuv420p", 546 | "crf": 19, 547 | "save_metadata": true, 548 | "pingpong": true, 549 | "save_output": false, 550 | "videopreview": { 551 | "hidden": false, 552 | "paused": false, 553 | "params": { 554 | "filename": "AnimateDiff_00003.mp4", 555 | "subfolder": "", 556 | "type": "temp", 557 | "format": "video/h264-mp4" 558 | } 559 | } 560 | } 561 | }, 562 | { 563 | "id": 10, 564 | "type": "PreviewImage", 565 | "pos": [ 566 | 2185, 567 | 293 568 | ], 569 | "size": { 570 | "0": 526.656982421875, 571 | "1": 580.8809814453125 572 | }, 573 | "flags": {}, 574 | "order": 15, 575 | "mode": 0, 576 | "inputs": [ 577 | { 578 | "name": "images", 579 | "type": "IMAGE", 580 | "link": 75 581 | } 582 | ], 583 | "properties": { 584 | "Node name for S&R": "PreviewImage" 585 | } 586 | }, 587 | { 588 | "id": 35, 589 | "type": "VHS_DuplicateImages", 590 | "pos": [ 591 | 644, 592 | 480 593 | ], 594 | "size": [ 595 | 289.9799993896486, 596 | 54 597 | ], 598 | "flags": {}, 599 | "order": 6, 600 | "mode": 0, 601 | "inputs": [ 602 | { 603 | "name": "images", 604 | "type": "IMAGE", 605 | "link": 58, 606 | "slot_index": 0 607 | }, 608 | { 609 | "name": "multiply_by", 610 | "type": "INT", 611 | "link": 66, 612 | "widget": { 613 | "name": "multiply_by" 614 | } 615 | } 616 | ], 617 | "outputs": [ 618 | { 619 | "name": "IMAGE", 620 | "type": "IMAGE", 621 | "links": [ 622 | 59 623 | ], 624 | "shape": 3, 625 | "slot_index": 0 626 | }, 627 | { 628 | "name": "count", 629 | "type": "INT", 630 | "links": null, 631 | "shape": 3 632 | } 633 | ], 634 | "properties": { 635 | "Node name for S&R": "VHS_DuplicateImages" 636 | }, 637 | "widgets_values": { 638 | "multiply_by": 32 639 | } 640 | }, 641 | { 642 | "id": 42, 643 | "type": "iclight_diffusers_sampler", 644 | "pos": [ 645 | 1372, 646 | 434 647 | ], 648 | "size": { 649 | "0": 400, 650 | "1": 406 651 | }, 652 | "flags": {}, 653 | "order": 13, 654 | "mode": 0, 655 | "inputs": [ 656 | { 657 | "name": "diffusers_model", 658 | "type": "DIFFUSERSMODEL", 659 | "link": 70 660 | }, 661 | { 662 | "name": "latent", 663 | "type": "LATENT", 664 | "link": 71 665 | }, 666 | { 667 | "name": "bg_latent", 668 | "type": "LATENT", 669 | "link": 72 670 | } 671 | ], 672 | "outputs": [ 673 | { 674 | "name": "samples", 675 | "type": "LATENT", 676 | "links": [ 677 | 73 678 | ], 679 | "shape": 3, 680 | "slot_index": 0 681 | } 682 | ], 683 | "properties": { 684 | "Node name for S&R": "iclight_diffusers_sampler" 685 | }, 686 | "widgets_values": [ 687 | 512, 688 | 512, 689 | 25, 690 | 2, 691 | 0.9, 692 | 908247852472673, 693 | "randomize", 694 | "DPMSolverMultistepScheduler", 695 | "spotlight", 696 | "bad quality", 697 | false, 698 | true 699 | ] 700 | }, 701 | { 702 | "id": 38, 703 | "type": "SplineEditor", 704 | "pos": [ 705 | -29, 706 | 893 707 | ], 708 | "size": { 709 | "0": 550, 710 | "1": 920 711 | }, 712 | "flags": {}, 713 | "order": 2, 714 | "mode": 0, 715 | "outputs": [ 716 | { 717 | "name": "mask", 718 | "type": "MASK", 719 | "links": null, 720 | "shape": 3 721 | }, 722 | { 723 | "name": "coord_str", 724 | "type": "STRING", 725 | "links": [ 726 | 61 727 | ], 728 | "shape": 3 729 | }, 730 | { 731 | "name": "float", 732 | "type": "FLOAT", 733 | "links": null, 734 | "shape": 3 735 | }, 736 | { 737 | "name": "count", 738 | "type": "INT", 739 | "links": [ 740 | 66 741 | ], 742 | "shape": 3, 743 | "slot_index": 3 744 | } 745 | ], 746 | "properties": { 747 | "Node name for S&R": "SplineEditor", 748 | "points": "SplineEditor" 749 | }, 750 | "widgets_values": [ 751 | "[{\"x\":121.00000000000001,\"y\":447.70000000000005},{\"x\":412.61000000000007,\"y\":419.87000000000006},{\"x\":141.57000000000002,\"y\":278.3},{\"x\":262.07390000000004,\"y\":84.91780000000003},{\"x\":405.35,\"y\":256.52000000000004},{\"x\":107.69000000000001,\"y\":416.24000000000007}]", 752 | "[{\"x\":121,\"y\":447.70001220703125},{\"x\":168.39727783203125,\"y\":447.5857238769531},{\"x\":215.79052734375,\"y\":446.96728515625},{\"x\":263.1689453125,\"y\":445.6412658691406},{\"x\":310.5037841796875,\"y\":443.2364501953125},{\"x\":357.69171142578125,\"y\":438.8589782714844},{\"x\":403.7711181640625,\"y\":428.43505859375},{\"x\":389.68817138671875,\"y\":396.6990051269531},{\"x\":345.1974792480469,\"y\":380.45477294921875},{\"x\":299.83392333984375,\"y\":366.7195129394531},{\"x\":254.7096710205078,\"y\":352.2271728515625},{\"x\":210.65040588378906,\"y\":334.8033447265625},{\"x\":169.68922424316406,\"y\":311.13885498046875},{\"x\":140.11099243164062,\"y\":274.8868713378906},{\"x\":133.01222229003906,\"y\":228.4459686279297},{\"x\":144.95265197753906,\"y\":182.82485961914062},{\"x\":169.87940979003906,\"y\":142.66854858398438},{\"x\":203.91860961914062,\"y\":109.85896301269531},{\"x\":245.73171997070312,\"y\":88.0564956665039},{\"x\":292.4783935546875,\"y\":88.1949462890625},{\"x\":335.642578125,\"y\":107.3690414428711},{\"x\":372.6259765625,\"y\":136.83396911621094},{\"x\":400.75732421875,\"y\":174.7657928466797},{\"x\":413.12457275390625,\"y\":220.08438110351562},{\"x\":400.2603454589844,\"y\":264.89129638671875},{\"x\":364.9109802246094,\"y\":296.1686096191406},{\"x\":324.4112243652344,\"y\":320.7547912597656},{\"x\":282.25390625,\"y\":342.40655517578125},{\"x\":239.25035095214844,\"y\":362.3324279785156},{\"x\":195.72447204589844,\"y\":381.0921936035156},{\"x\":151.8393096923828,\"y\":398.99676513671875},{\"x\":107.69000244140625,\"y\":416.239990234375}]", 753 | 512, 754 | 512, 755 | 32, 756 | "path", 757 | "cardinal", 758 | 0.49666656494140643, 759 | 1, 760 | "list", 761 | 0, 762 | 1, 763 | null, 764 | null 765 | ] 766 | }, 767 | { 768 | "id": 43, 769 | "type": "VAEDecode", 770 | "pos": [ 771 | 1858, 772 | 437 773 | ], 774 | "size": { 775 | "0": 210, 776 | "1": 46 777 | }, 778 | "flags": {}, 779 | "order": 14, 780 | "mode": 0, 781 | "inputs": [ 782 | { 783 | "name": "samples", 784 | "type": "LATENT", 785 | "link": 73 786 | }, 787 | { 788 | "name": "vae", 789 | "type": "VAE", 790 | "link": 74, 791 | "slot_index": 1 792 | } 793 | ], 794 | "outputs": [ 795 | { 796 | "name": "IMAGE", 797 | "type": "IMAGE", 798 | "links": [ 799 | 75, 800 | 76 801 | ], 802 | "shape": 3, 803 | "slot_index": 0 804 | } 805 | ], 806 | "properties": { 807 | "Node name for S&R": "VAEDecode" 808 | } 809 | }, 810 | { 811 | "id": 39, 812 | "type": "CreateShapeMaskOnPath", 813 | "pos": [ 814 | 627, 815 | 1343 816 | ], 817 | "size": { 818 | "0": 315, 819 | "1": 222 820 | }, 821 | "flags": {}, 822 | "order": 5, 823 | "mode": 0, 824 | "inputs": [ 825 | { 826 | "name": "coordinates", 827 | "type": "STRING", 828 | "link": 61, 829 | "widget": { 830 | "name": "coordinates" 831 | }, 832 | "slot_index": 0 833 | }, 834 | { 835 | "name": "size_multiplier", 836 | "type": "FLOAT", 837 | "link": null, 838 | "widget": { 839 | "name": "size_multiplier" 840 | } 841 | } 842 | ], 843 | "outputs": [ 844 | { 845 | "name": "mask", 846 | "type": "MASK", 847 | "links": [ 848 | 62 849 | ], 850 | "shape": 3, 851 | "slot_index": 0 852 | }, 853 | { 854 | "name": "mask_inverted", 855 | "type": "MASK", 856 | "links": null, 857 | "shape": 3 858 | } 859 | ], 860 | "properties": { 861 | "Node name for S&R": "CreateShapeMaskOnPath" 862 | }, 863 | "widgets_values": [ 864 | "circle", 865 | "", 866 | 512, 867 | 512, 868 | 128, 869 | 128, 870 | [ 871 | 1 872 | ] 873 | ] 874 | } 875 | ], 876 | "links": [ 877 | [ 878 | 2, 879 | 6, 880 | 1, 881 | 5, 882 | 1, 883 | "CLIP" 884 | ], 885 | [ 886 | 3, 887 | 6, 888 | 2, 889 | 5, 890 | 2, 891 | "VAE" 892 | ], 893 | [ 894 | 8, 895 | 6, 896 | 2, 897 | 8, 898 | 1, 899 | "VAE" 900 | ], 901 | [ 902 | 26, 903 | 5, 904 | 0, 905 | 22, 906 | 0, 907 | "DIFFUSERSMODEL" 908 | ], 909 | [ 910 | 32, 911 | 6, 912 | 2, 913 | 25, 914 | 1, 915 | "VAE" 916 | ], 917 | [ 918 | 40, 919 | 9, 920 | 0, 921 | 28, 922 | 0, 923 | "IMAGE" 924 | ], 925 | [ 926 | 47, 927 | 6, 928 | 0, 929 | 5, 930 | 0, 931 | "MODEL" 932 | ], 933 | [ 934 | 50, 935 | 32, 936 | 0, 937 | 30, 938 | 0, 939 | "MASK" 940 | ], 941 | [ 942 | 53, 943 | 34, 944 | 0, 945 | 32, 946 | 0, 947 | "MASK" 948 | ], 949 | [ 950 | 56, 951 | 30, 952 | 0, 953 | 25, 954 | 0, 955 | "IMAGE" 956 | ], 957 | [ 958 | 58, 959 | 28, 960 | 0, 961 | 35, 962 | 0, 963 | "IMAGE" 964 | ], 965 | [ 966 | 59, 967 | 35, 968 | 0, 969 | 8, 970 | 0, 971 | "IMAGE" 972 | ], 973 | [ 974 | 61, 975 | 38, 976 | 1, 977 | 39, 978 | 0, 979 | "STRING" 980 | ], 981 | [ 982 | 62, 983 | 39, 984 | 0, 985 | 34, 986 | 0, 987 | "MASK" 988 | ], 989 | [ 990 | 63, 991 | 30, 992 | 0, 993 | 40, 994 | 0, 995 | "IMAGE" 996 | ], 997 | [ 998 | 65, 999 | 40, 1000 | 0, 1001 | 37, 1002 | 0, 1003 | "IMAGE" 1004 | ], 1005 | [ 1006 | 66, 1007 | 38, 1008 | 3, 1009 | 35, 1010 | 1, 1011 | "INT" 1012 | ], 1013 | [ 1014 | 70, 1015 | 22, 1016 | 0, 1017 | 42, 1018 | 0, 1019 | "DIFFUSERSMODEL" 1020 | ], 1021 | [ 1022 | 71, 1023 | 8, 1024 | 0, 1025 | 42, 1026 | 1, 1027 | "LATENT" 1028 | ], 1029 | [ 1030 | 72, 1031 | 25, 1032 | 0, 1033 | 42, 1034 | 2, 1035 | "LATENT" 1036 | ], 1037 | [ 1038 | 73, 1039 | 42, 1040 | 0, 1041 | 43, 1042 | 0, 1043 | "LATENT" 1044 | ], 1045 | [ 1046 | 74, 1047 | 6, 1048 | 2, 1049 | 43, 1050 | 1, 1051 | "VAE" 1052 | ], 1053 | [ 1054 | 75, 1055 | 43, 1056 | 0, 1057 | 10, 1058 | 0, 1059 | "IMAGE" 1060 | ], 1061 | [ 1062 | 76, 1063 | 43, 1064 | 0, 1065 | 40, 1066 | 1, 1067 | "IMAGE" 1068 | ] 1069 | ], 1070 | "groups": [], 1071 | "config": {}, 1072 | "extra": {}, 1073 | "version": 0.4 1074 | } -------------------------------------------------------------------------------- /hidiffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from .hidiffusion import apply_hidiffusion, remove_hidiffusion 2 | 3 | __all__ = ["apply_hidiffusion", "remove_hidiffusion"] 4 | -------------------------------------------------------------------------------- /hidiffusion/sd_module_key/sd15_module_key.txt: -------------------------------------------------------------------------------- 1 | conv_in 2 | time_proj 3 | time_embedding 4 | time_embedding.linear_1 5 | time_embedding.act 6 | time_embedding.linear_2 7 | down_blocks 8 | down_blocks.0 9 | down_blocks.0.attentions 10 | down_blocks.0.attentions.0 11 | down_blocks.0.attentions.0.norm 12 | down_blocks.0.attentions.0.proj_in 13 | down_blocks.0.attentions.0.transformer_blocks 14 | down_blocks.0.attentions.0.transformer_blocks.0 15 | down_blocks.0.attentions.0.transformer_blocks.0.norm1 16 | down_blocks.0.attentions.0.transformer_blocks.0.attn1 17 | down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q 18 | down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k 19 | down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v 20 | down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out 21 | down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0 22 | down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.1 23 | down_blocks.0.attentions.0.transformer_blocks.0.norm2 24 | down_blocks.0.attentions.0.transformer_blocks.0.attn2 25 | down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q 26 | down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k 27 | down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v 28 | down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out 29 | down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0 30 | down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.1 31 | down_blocks.0.attentions.0.transformer_blocks.0.norm3 32 | down_blocks.0.attentions.0.transformer_blocks.0.ff 33 | down_blocks.0.attentions.0.transformer_blocks.0.ff.net 34 | down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0 35 | down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj 36 | down_blocks.0.attentions.0.transformer_blocks.0.ff.net.1 37 | down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2 38 | down_blocks.0.attentions.0.proj_out 39 | down_blocks.0.attentions.1 40 | down_blocks.0.attentions.1.norm 41 | down_blocks.0.attentions.1.proj_in 42 | down_blocks.0.attentions.1.transformer_blocks 43 | down_blocks.0.attentions.1.transformer_blocks.0 44 | down_blocks.0.attentions.1.transformer_blocks.0.norm1 45 | down_blocks.0.attentions.1.transformer_blocks.0.attn1 46 | down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q 47 | down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k 48 | down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v 49 | down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out 50 | down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0 51 | down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.1 52 | down_blocks.0.attentions.1.transformer_blocks.0.norm2 53 | down_blocks.0.attentions.1.transformer_blocks.0.attn2 54 | down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q 55 | down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k 56 | down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v 57 | down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out 58 | down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0 59 | down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.1 60 | down_blocks.0.attentions.1.transformer_blocks.0.norm3 61 | down_blocks.0.attentions.1.transformer_blocks.0.ff 62 | down_blocks.0.attentions.1.transformer_blocks.0.ff.net 63 | down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0 64 | down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj 65 | down_blocks.0.attentions.1.transformer_blocks.0.ff.net.1 66 | down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2 67 | down_blocks.0.attentions.1.proj_out 68 | down_blocks.0.resnets 69 | down_blocks.0.resnets.0 70 | down_blocks.0.resnets.0.norm1 71 | down_blocks.0.resnets.0.conv1 72 | down_blocks.0.resnets.0.time_emb_proj 73 | down_blocks.0.resnets.0.norm2 74 | down_blocks.0.resnets.0.dropout 75 | down_blocks.0.resnets.0.conv2 76 | down_blocks.0.resnets.1 77 | down_blocks.0.resnets.1.norm1 78 | down_blocks.0.resnets.1.conv1 79 | down_blocks.0.resnets.1.time_emb_proj 80 | down_blocks.0.resnets.1.norm2 81 | down_blocks.0.resnets.1.dropout 82 | down_blocks.0.resnets.1.conv2 83 | down_blocks.0.downsamplers 84 | down_blocks.0.downsamplers.0 85 | down_blocks.0.downsamplers.0.conv 86 | down_blocks.1 87 | down_blocks.1.attentions 88 | down_blocks.1.attentions.0 89 | down_blocks.1.attentions.0.norm 90 | down_blocks.1.attentions.0.proj_in 91 | down_blocks.1.attentions.0.transformer_blocks 92 | down_blocks.1.attentions.0.transformer_blocks.0 93 | down_blocks.1.attentions.0.transformer_blocks.0.norm1 94 | down_blocks.1.attentions.0.transformer_blocks.0.attn1 95 | down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q 96 | down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k 97 | down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v 98 | down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out 99 | down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0 100 | down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.1 101 | down_blocks.1.attentions.0.transformer_blocks.0.norm2 102 | down_blocks.1.attentions.0.transformer_blocks.0.attn2 103 | down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q 104 | down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k 105 | down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v 106 | down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out 107 | down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0 108 | down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.1 109 | down_blocks.1.attentions.0.transformer_blocks.0.norm3 110 | down_blocks.1.attentions.0.transformer_blocks.0.ff 111 | down_blocks.1.attentions.0.transformer_blocks.0.ff.net 112 | down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0 113 | down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj 114 | down_blocks.1.attentions.0.transformer_blocks.0.ff.net.1 115 | down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2 116 | down_blocks.1.attentions.0.proj_out 117 | down_blocks.1.attentions.1 118 | down_blocks.1.attentions.1.norm 119 | down_blocks.1.attentions.1.proj_in 120 | down_blocks.1.attentions.1.transformer_blocks 121 | down_blocks.1.attentions.1.transformer_blocks.0 122 | down_blocks.1.attentions.1.transformer_blocks.0.norm1 123 | down_blocks.1.attentions.1.transformer_blocks.0.attn1 124 | down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q 125 | down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k 126 | down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v 127 | down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out 128 | down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0 129 | down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.1 130 | down_blocks.1.attentions.1.transformer_blocks.0.norm2 131 | down_blocks.1.attentions.1.transformer_blocks.0.attn2 132 | down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q 133 | down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k 134 | down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v 135 | down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out 136 | down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0 137 | down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.1 138 | down_blocks.1.attentions.1.transformer_blocks.0.norm3 139 | down_blocks.1.attentions.1.transformer_blocks.0.ff 140 | down_blocks.1.attentions.1.transformer_blocks.0.ff.net 141 | down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0 142 | down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj 143 | down_blocks.1.attentions.1.transformer_blocks.0.ff.net.1 144 | down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2 145 | down_blocks.1.attentions.1.proj_out 146 | down_blocks.1.resnets 147 | down_blocks.1.resnets.0 148 | down_blocks.1.resnets.0.norm1 149 | down_blocks.1.resnets.0.conv1 150 | down_blocks.1.resnets.0.time_emb_proj 151 | down_blocks.1.resnets.0.norm2 152 | down_blocks.1.resnets.0.dropout 153 | down_blocks.1.resnets.0.conv2 154 | down_blocks.1.resnets.0.conv_shortcut 155 | down_blocks.1.resnets.1 156 | down_blocks.1.resnets.1.norm1 157 | down_blocks.1.resnets.1.conv1 158 | down_blocks.1.resnets.1.time_emb_proj 159 | down_blocks.1.resnets.1.norm2 160 | down_blocks.1.resnets.1.dropout 161 | down_blocks.1.resnets.1.conv2 162 | down_blocks.1.downsamplers 163 | down_blocks.1.downsamplers.0 164 | down_blocks.1.downsamplers.0.conv 165 | down_blocks.2 166 | down_blocks.2.attentions 167 | down_blocks.2.attentions.0 168 | down_blocks.2.attentions.0.norm 169 | down_blocks.2.attentions.0.proj_in 170 | down_blocks.2.attentions.0.transformer_blocks 171 | down_blocks.2.attentions.0.transformer_blocks.0 172 | down_blocks.2.attentions.0.transformer_blocks.0.norm1 173 | down_blocks.2.attentions.0.transformer_blocks.0.attn1 174 | down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q 175 | down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k 176 | down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v 177 | down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out 178 | down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0 179 | down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.1 180 | down_blocks.2.attentions.0.transformer_blocks.0.norm2 181 | down_blocks.2.attentions.0.transformer_blocks.0.attn2 182 | down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q 183 | down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k 184 | down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v 185 | down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out 186 | down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0 187 | down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.1 188 | down_blocks.2.attentions.0.transformer_blocks.0.norm3 189 | down_blocks.2.attentions.0.transformer_blocks.0.ff 190 | down_blocks.2.attentions.0.transformer_blocks.0.ff.net 191 | down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0 192 | down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj 193 | down_blocks.2.attentions.0.transformer_blocks.0.ff.net.1 194 | down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2 195 | down_blocks.2.attentions.0.proj_out 196 | down_blocks.2.attentions.1 197 | down_blocks.2.attentions.1.norm 198 | down_blocks.2.attentions.1.proj_in 199 | down_blocks.2.attentions.1.transformer_blocks 200 | down_blocks.2.attentions.1.transformer_blocks.0 201 | down_blocks.2.attentions.1.transformer_blocks.0.norm1 202 | down_blocks.2.attentions.1.transformer_blocks.0.attn1 203 | down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q 204 | down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k 205 | down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v 206 | down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out 207 | down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0 208 | down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.1 209 | down_blocks.2.attentions.1.transformer_blocks.0.norm2 210 | down_blocks.2.attentions.1.transformer_blocks.0.attn2 211 | down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q 212 | down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k 213 | down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v 214 | down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out 215 | down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0 216 | down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.1 217 | down_blocks.2.attentions.1.transformer_blocks.0.norm3 218 | down_blocks.2.attentions.1.transformer_blocks.0.ff 219 | down_blocks.2.attentions.1.transformer_blocks.0.ff.net 220 | down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0 221 | down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj 222 | down_blocks.2.attentions.1.transformer_blocks.0.ff.net.1 223 | down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2 224 | down_blocks.2.attentions.1.proj_out 225 | down_blocks.2.resnets 226 | down_blocks.2.resnets.0 227 | down_blocks.2.resnets.0.norm1 228 | down_blocks.2.resnets.0.conv1 229 | down_blocks.2.resnets.0.time_emb_proj 230 | down_blocks.2.resnets.0.norm2 231 | down_blocks.2.resnets.0.dropout 232 | down_blocks.2.resnets.0.conv2 233 | down_blocks.2.resnets.0.conv_shortcut 234 | down_blocks.2.resnets.1 235 | down_blocks.2.resnets.1.norm1 236 | down_blocks.2.resnets.1.conv1 237 | down_blocks.2.resnets.1.time_emb_proj 238 | down_blocks.2.resnets.1.norm2 239 | down_blocks.2.resnets.1.dropout 240 | down_blocks.2.resnets.1.conv2 241 | down_blocks.2.downsamplers 242 | down_blocks.2.downsamplers.0 243 | down_blocks.2.downsamplers.0.conv 244 | down_blocks.3 245 | down_blocks.3.resnets 246 | down_blocks.3.resnets.0 247 | down_blocks.3.resnets.0.norm1 248 | down_blocks.3.resnets.0.conv1 249 | down_blocks.3.resnets.0.time_emb_proj 250 | down_blocks.3.resnets.0.norm2 251 | down_blocks.3.resnets.0.dropout 252 | down_blocks.3.resnets.0.conv2 253 | down_blocks.3.resnets.1 254 | down_blocks.3.resnets.1.norm1 255 | down_blocks.3.resnets.1.conv1 256 | down_blocks.3.resnets.1.time_emb_proj 257 | down_blocks.3.resnets.1.norm2 258 | down_blocks.3.resnets.1.dropout 259 | down_blocks.3.resnets.1.conv2 260 | up_blocks 261 | up_blocks.0 262 | up_blocks.0.resnets 263 | up_blocks.0.resnets.0 264 | up_blocks.0.resnets.0.norm1 265 | up_blocks.0.resnets.0.conv1 266 | up_blocks.0.resnets.0.time_emb_proj 267 | up_blocks.0.resnets.0.norm2 268 | up_blocks.0.resnets.0.dropout 269 | up_blocks.0.resnets.0.conv2 270 | up_blocks.0.resnets.0.conv_shortcut 271 | up_blocks.0.resnets.1 272 | up_blocks.0.resnets.1.norm1 273 | up_blocks.0.resnets.1.conv1 274 | up_blocks.0.resnets.1.time_emb_proj 275 | up_blocks.0.resnets.1.norm2 276 | up_blocks.0.resnets.1.dropout 277 | up_blocks.0.resnets.1.conv2 278 | up_blocks.0.resnets.1.conv_shortcut 279 | up_blocks.0.resnets.2 280 | up_blocks.0.resnets.2.norm1 281 | up_blocks.0.resnets.2.conv1 282 | up_blocks.0.resnets.2.time_emb_proj 283 | up_blocks.0.resnets.2.norm2 284 | up_blocks.0.resnets.2.dropout 285 | up_blocks.0.resnets.2.conv2 286 | up_blocks.0.resnets.2.conv_shortcut 287 | up_blocks.0.upsamplers 288 | up_blocks.0.upsamplers.0 289 | up_blocks.0.upsamplers.0.conv 290 | up_blocks.1 291 | up_blocks.1.attentions 292 | up_blocks.1.attentions.0 293 | up_blocks.1.attentions.0.norm 294 | up_blocks.1.attentions.0.proj_in 295 | up_blocks.1.attentions.0.transformer_blocks 296 | up_blocks.1.attentions.0.transformer_blocks.0 297 | up_blocks.1.attentions.0.transformer_blocks.0.norm1 298 | up_blocks.1.attentions.0.transformer_blocks.0.attn1 299 | up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q 300 | up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k 301 | up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v 302 | up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out 303 | up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0 304 | up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.1 305 | up_blocks.1.attentions.0.transformer_blocks.0.norm2 306 | up_blocks.1.attentions.0.transformer_blocks.0.attn2 307 | up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q 308 | up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k 309 | up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v 310 | up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out 311 | up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0 312 | up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.1 313 | up_blocks.1.attentions.0.transformer_blocks.0.norm3 314 | up_blocks.1.attentions.0.transformer_blocks.0.ff 315 | up_blocks.1.attentions.0.transformer_blocks.0.ff.net 316 | up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0 317 | up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj 318 | up_blocks.1.attentions.0.transformer_blocks.0.ff.net.1 319 | up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2 320 | up_blocks.1.attentions.0.proj_out 321 | up_blocks.1.attentions.1 322 | up_blocks.1.attentions.1.norm 323 | up_blocks.1.attentions.1.proj_in 324 | up_blocks.1.attentions.1.transformer_blocks 325 | up_blocks.1.attentions.1.transformer_blocks.0 326 | up_blocks.1.attentions.1.transformer_blocks.0.norm1 327 | up_blocks.1.attentions.1.transformer_blocks.0.attn1 328 | up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q 329 | up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k 330 | up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v 331 | up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out 332 | up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0 333 | up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.1 334 | up_blocks.1.attentions.1.transformer_blocks.0.norm2 335 | up_blocks.1.attentions.1.transformer_blocks.0.attn2 336 | up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q 337 | up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k 338 | up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v 339 | up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out 340 | up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0 341 | up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.1 342 | up_blocks.1.attentions.1.transformer_blocks.0.norm3 343 | up_blocks.1.attentions.1.transformer_blocks.0.ff 344 | up_blocks.1.attentions.1.transformer_blocks.0.ff.net 345 | up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0 346 | up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj 347 | up_blocks.1.attentions.1.transformer_blocks.0.ff.net.1 348 | up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2 349 | up_blocks.1.attentions.1.proj_out 350 | up_blocks.1.attentions.2 351 | up_blocks.1.attentions.2.norm 352 | up_blocks.1.attentions.2.proj_in 353 | up_blocks.1.attentions.2.transformer_blocks 354 | up_blocks.1.attentions.2.transformer_blocks.0 355 | up_blocks.1.attentions.2.transformer_blocks.0.norm1 356 | up_blocks.1.attentions.2.transformer_blocks.0.attn1 357 | up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q 358 | up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k 359 | up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v 360 | up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out 361 | up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0 362 | up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.1 363 | up_blocks.1.attentions.2.transformer_blocks.0.norm2 364 | up_blocks.1.attentions.2.transformer_blocks.0.attn2 365 | up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q 366 | up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k 367 | up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v 368 | up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out 369 | up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0 370 | up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.1 371 | up_blocks.1.attentions.2.transformer_blocks.0.norm3 372 | up_blocks.1.attentions.2.transformer_blocks.0.ff 373 | up_blocks.1.attentions.2.transformer_blocks.0.ff.net 374 | up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0 375 | up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj 376 | up_blocks.1.attentions.2.transformer_blocks.0.ff.net.1 377 | up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2 378 | up_blocks.1.attentions.2.proj_out 379 | up_blocks.1.resnets 380 | up_blocks.1.resnets.0 381 | up_blocks.1.resnets.0.norm1 382 | up_blocks.1.resnets.0.conv1 383 | up_blocks.1.resnets.0.time_emb_proj 384 | up_blocks.1.resnets.0.norm2 385 | up_blocks.1.resnets.0.dropout 386 | up_blocks.1.resnets.0.conv2 387 | up_blocks.1.resnets.0.conv_shortcut 388 | up_blocks.1.resnets.1 389 | up_blocks.1.resnets.1.norm1 390 | up_blocks.1.resnets.1.conv1 391 | up_blocks.1.resnets.1.time_emb_proj 392 | up_blocks.1.resnets.1.norm2 393 | up_blocks.1.resnets.1.dropout 394 | up_blocks.1.resnets.1.conv2 395 | up_blocks.1.resnets.1.conv_shortcut 396 | up_blocks.1.resnets.2 397 | up_blocks.1.resnets.2.norm1 398 | up_blocks.1.resnets.2.conv1 399 | up_blocks.1.resnets.2.time_emb_proj 400 | up_blocks.1.resnets.2.norm2 401 | up_blocks.1.resnets.2.dropout 402 | up_blocks.1.resnets.2.conv2 403 | up_blocks.1.resnets.2.conv_shortcut 404 | up_blocks.1.upsamplers 405 | up_blocks.1.upsamplers.0 406 | up_blocks.1.upsamplers.0.conv 407 | up_blocks.2 408 | up_blocks.2.attentions 409 | up_blocks.2.attentions.0 410 | up_blocks.2.attentions.0.norm 411 | up_blocks.2.attentions.0.proj_in 412 | up_blocks.2.attentions.0.transformer_blocks 413 | up_blocks.2.attentions.0.transformer_blocks.0 414 | up_blocks.2.attentions.0.transformer_blocks.0.norm1 415 | up_blocks.2.attentions.0.transformer_blocks.0.attn1 416 | up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q 417 | up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k 418 | up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v 419 | up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out 420 | up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0 421 | up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.1 422 | up_blocks.2.attentions.0.transformer_blocks.0.norm2 423 | up_blocks.2.attentions.0.transformer_blocks.0.attn2 424 | up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q 425 | up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k 426 | up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v 427 | up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out 428 | up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0 429 | up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.1 430 | up_blocks.2.attentions.0.transformer_blocks.0.norm3 431 | up_blocks.2.attentions.0.transformer_blocks.0.ff 432 | up_blocks.2.attentions.0.transformer_blocks.0.ff.net 433 | up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0 434 | up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj 435 | up_blocks.2.attentions.0.transformer_blocks.0.ff.net.1 436 | up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2 437 | up_blocks.2.attentions.0.proj_out 438 | up_blocks.2.attentions.1 439 | up_blocks.2.attentions.1.norm 440 | up_blocks.2.attentions.1.proj_in 441 | up_blocks.2.attentions.1.transformer_blocks 442 | up_blocks.2.attentions.1.transformer_blocks.0 443 | up_blocks.2.attentions.1.transformer_blocks.0.norm1 444 | up_blocks.2.attentions.1.transformer_blocks.0.attn1 445 | up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q 446 | up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k 447 | up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v 448 | up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out 449 | up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0 450 | up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.1 451 | up_blocks.2.attentions.1.transformer_blocks.0.norm2 452 | up_blocks.2.attentions.1.transformer_blocks.0.attn2 453 | up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q 454 | up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k 455 | up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v 456 | up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out 457 | up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0 458 | up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.1 459 | up_blocks.2.attentions.1.transformer_blocks.0.norm3 460 | up_blocks.2.attentions.1.transformer_blocks.0.ff 461 | up_blocks.2.attentions.1.transformer_blocks.0.ff.net 462 | up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0 463 | up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj 464 | up_blocks.2.attentions.1.transformer_blocks.0.ff.net.1 465 | up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2 466 | up_blocks.2.attentions.1.proj_out 467 | up_blocks.2.attentions.2 468 | up_blocks.2.attentions.2.norm 469 | up_blocks.2.attentions.2.proj_in 470 | up_blocks.2.attentions.2.transformer_blocks 471 | up_blocks.2.attentions.2.transformer_blocks.0 472 | up_blocks.2.attentions.2.transformer_blocks.0.norm1 473 | up_blocks.2.attentions.2.transformer_blocks.0.attn1 474 | up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q 475 | up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_k 476 | up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_v 477 | up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out 478 | up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0 479 | up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.1 480 | up_blocks.2.attentions.2.transformer_blocks.0.norm2 481 | up_blocks.2.attentions.2.transformer_blocks.0.attn2 482 | up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_q 483 | up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_k 484 | up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_v 485 | up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out 486 | up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0 487 | up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.1 488 | up_blocks.2.attentions.2.transformer_blocks.0.norm3 489 | up_blocks.2.attentions.2.transformer_blocks.0.ff 490 | up_blocks.2.attentions.2.transformer_blocks.0.ff.net 491 | up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0 492 | up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj 493 | up_blocks.2.attentions.2.transformer_blocks.0.ff.net.1 494 | up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2 495 | up_blocks.2.attentions.2.proj_out 496 | up_blocks.2.resnets 497 | up_blocks.2.resnets.0 498 | up_blocks.2.resnets.0.norm1 499 | up_blocks.2.resnets.0.conv1 500 | up_blocks.2.resnets.0.time_emb_proj 501 | up_blocks.2.resnets.0.norm2 502 | up_blocks.2.resnets.0.dropout 503 | up_blocks.2.resnets.0.conv2 504 | up_blocks.2.resnets.0.conv_shortcut 505 | up_blocks.2.resnets.1 506 | up_blocks.2.resnets.1.norm1 507 | up_blocks.2.resnets.1.conv1 508 | up_blocks.2.resnets.1.time_emb_proj 509 | up_blocks.2.resnets.1.norm2 510 | up_blocks.2.resnets.1.dropout 511 | up_blocks.2.resnets.1.conv2 512 | up_blocks.2.resnets.1.conv_shortcut 513 | up_blocks.2.resnets.2 514 | up_blocks.2.resnets.2.norm1 515 | up_blocks.2.resnets.2.conv1 516 | up_blocks.2.resnets.2.time_emb_proj 517 | up_blocks.2.resnets.2.norm2 518 | up_blocks.2.resnets.2.dropout 519 | up_blocks.2.resnets.2.conv2 520 | up_blocks.2.resnets.2.conv_shortcut 521 | up_blocks.2.upsamplers 522 | up_blocks.2.upsamplers.0 523 | up_blocks.2.upsamplers.0.conv 524 | up_blocks.3 525 | up_blocks.3.attentions 526 | up_blocks.3.attentions.0 527 | up_blocks.3.attentions.0.norm 528 | up_blocks.3.attentions.0.proj_in 529 | up_blocks.3.attentions.0.transformer_blocks 530 | up_blocks.3.attentions.0.transformer_blocks.0 531 | up_blocks.3.attentions.0.transformer_blocks.0.norm1 532 | up_blocks.3.attentions.0.transformer_blocks.0.attn1 533 | up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q 534 | up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_k 535 | up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_v 536 | up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out 537 | up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0 538 | up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.1 539 | up_blocks.3.attentions.0.transformer_blocks.0.norm2 540 | up_blocks.3.attentions.0.transformer_blocks.0.attn2 541 | up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_q 542 | up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_k 543 | up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_v 544 | up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out 545 | up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0 546 | up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.1 547 | up_blocks.3.attentions.0.transformer_blocks.0.norm3 548 | up_blocks.3.attentions.0.transformer_blocks.0.ff 549 | up_blocks.3.attentions.0.transformer_blocks.0.ff.net 550 | up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0 551 | up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj 552 | up_blocks.3.attentions.0.transformer_blocks.0.ff.net.1 553 | up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2 554 | up_blocks.3.attentions.0.proj_out 555 | up_blocks.3.attentions.1 556 | up_blocks.3.attentions.1.norm 557 | up_blocks.3.attentions.1.proj_in 558 | up_blocks.3.attentions.1.transformer_blocks 559 | up_blocks.3.attentions.1.transformer_blocks.0 560 | up_blocks.3.attentions.1.transformer_blocks.0.norm1 561 | up_blocks.3.attentions.1.transformer_blocks.0.attn1 562 | up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q 563 | up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_k 564 | up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_v 565 | up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out 566 | up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0 567 | up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.1 568 | up_blocks.3.attentions.1.transformer_blocks.0.norm2 569 | up_blocks.3.attentions.1.transformer_blocks.0.attn2 570 | up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_q 571 | up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_k 572 | up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_v 573 | up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out 574 | up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0 575 | up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.1 576 | up_blocks.3.attentions.1.transformer_blocks.0.norm3 577 | up_blocks.3.attentions.1.transformer_blocks.0.ff 578 | up_blocks.3.attentions.1.transformer_blocks.0.ff.net 579 | up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0 580 | up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj 581 | up_blocks.3.attentions.1.transformer_blocks.0.ff.net.1 582 | up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2 583 | up_blocks.3.attentions.1.proj_out 584 | up_blocks.3.attentions.2 585 | up_blocks.3.attentions.2.norm 586 | up_blocks.3.attentions.2.proj_in 587 | up_blocks.3.attentions.2.transformer_blocks 588 | up_blocks.3.attentions.2.transformer_blocks.0 589 | up_blocks.3.attentions.2.transformer_blocks.0.norm1 590 | up_blocks.3.attentions.2.transformer_blocks.0.attn1 591 | up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q 592 | up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_k 593 | up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_v 594 | up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out 595 | up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0 596 | up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.1 597 | up_blocks.3.attentions.2.transformer_blocks.0.norm2 598 | up_blocks.3.attentions.2.transformer_blocks.0.attn2 599 | up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_q 600 | up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_k 601 | up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_v 602 | up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out 603 | up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0 604 | up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.1 605 | up_blocks.3.attentions.2.transformer_blocks.0.norm3 606 | up_blocks.3.attentions.2.transformer_blocks.0.ff 607 | up_blocks.3.attentions.2.transformer_blocks.0.ff.net 608 | up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0 609 | up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj 610 | up_blocks.3.attentions.2.transformer_blocks.0.ff.net.1 611 | up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2 612 | up_blocks.3.attentions.2.proj_out 613 | up_blocks.3.resnets 614 | up_blocks.3.resnets.0 615 | up_blocks.3.resnets.0.norm1 616 | up_blocks.3.resnets.0.conv1 617 | up_blocks.3.resnets.0.time_emb_proj 618 | up_blocks.3.resnets.0.norm2 619 | up_blocks.3.resnets.0.dropout 620 | up_blocks.3.resnets.0.conv2 621 | up_blocks.3.resnets.0.conv_shortcut 622 | up_blocks.3.resnets.1 623 | up_blocks.3.resnets.1.norm1 624 | up_blocks.3.resnets.1.conv1 625 | up_blocks.3.resnets.1.time_emb_proj 626 | up_blocks.3.resnets.1.norm2 627 | up_blocks.3.resnets.1.dropout 628 | up_blocks.3.resnets.1.conv2 629 | up_blocks.3.resnets.1.conv_shortcut 630 | up_blocks.3.resnets.2 631 | up_blocks.3.resnets.2.norm1 632 | up_blocks.3.resnets.2.conv1 633 | up_blocks.3.resnets.2.time_emb_proj 634 | up_blocks.3.resnets.2.norm2 635 | up_blocks.3.resnets.2.dropout 636 | up_blocks.3.resnets.2.conv2 637 | up_blocks.3.resnets.2.conv_shortcut 638 | mid_block 639 | mid_block.attentions 640 | mid_block.attentions.0 641 | mid_block.attentions.0.norm 642 | mid_block.attentions.0.proj_in 643 | mid_block.attentions.0.transformer_blocks 644 | mid_block.attentions.0.transformer_blocks.0 645 | mid_block.attentions.0.transformer_blocks.0.norm1 646 | mid_block.attentions.0.transformer_blocks.0.attn1 647 | mid_block.attentions.0.transformer_blocks.0.attn1.to_q 648 | mid_block.attentions.0.transformer_blocks.0.attn1.to_k 649 | mid_block.attentions.0.transformer_blocks.0.attn1.to_v 650 | mid_block.attentions.0.transformer_blocks.0.attn1.to_out 651 | mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0 652 | mid_block.attentions.0.transformer_blocks.0.attn1.to_out.1 653 | mid_block.attentions.0.transformer_blocks.0.norm2 654 | mid_block.attentions.0.transformer_blocks.0.attn2 655 | mid_block.attentions.0.transformer_blocks.0.attn2.to_q 656 | mid_block.attentions.0.transformer_blocks.0.attn2.to_k 657 | mid_block.attentions.0.transformer_blocks.0.attn2.to_v 658 | mid_block.attentions.0.transformer_blocks.0.attn2.to_out 659 | mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0 660 | mid_block.attentions.0.transformer_blocks.0.attn2.to_out.1 661 | mid_block.attentions.0.transformer_blocks.0.norm3 662 | mid_block.attentions.0.transformer_blocks.0.ff 663 | mid_block.attentions.0.transformer_blocks.0.ff.net 664 | mid_block.attentions.0.transformer_blocks.0.ff.net.0 665 | mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj 666 | mid_block.attentions.0.transformer_blocks.0.ff.net.1 667 | mid_block.attentions.0.transformer_blocks.0.ff.net.2 668 | mid_block.attentions.0.proj_out 669 | mid_block.resnets 670 | mid_block.resnets.0 671 | mid_block.resnets.0.norm1 672 | mid_block.resnets.0.conv1 673 | mid_block.resnets.0.time_emb_proj 674 | mid_block.resnets.0.norm2 675 | mid_block.resnets.0.dropout 676 | mid_block.resnets.0.conv2 677 | mid_block.resnets.1 678 | mid_block.resnets.1.norm1 679 | mid_block.resnets.1.conv1 680 | mid_block.resnets.1.time_emb_proj 681 | mid_block.resnets.1.norm2 682 | mid_block.resnets.1.dropout 683 | mid_block.resnets.1.conv2 684 | conv_norm_out 685 | conv_out 686 | -------------------------------------------------------------------------------- /hidiffusion/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def isinstance_str(x: object, cls_name: str): 5 | """ 6 | Checks whether x has any class *named* cls_name in its ancestry. 7 | Doesn't require access to the class's implementation. 8 | 9 | Useful for patching! 10 | """ 11 | 12 | for _cls in x.__class__.__mro__: 13 | if _cls.__name__ == cls_name: 14 | return True 15 | 16 | return False 17 | 18 | 19 | def init_generator(device: torch.device, fallback: torch.Generator=None): 20 | """ 21 | Forks the current default random generator given device. 22 | """ 23 | if device.type == "cpu": 24 | return torch.Generator(device="cpu").set_state(torch.get_rng_state()) 25 | elif device.type == "cuda": 26 | return torch.Generator(device=device).set_state(torch.cuda.get_rng_state()) 27 | else: 28 | if fallback is None: 29 | return init_generator(torch.device("cpu")) 30 | else: 31 | return fallback 32 | -------------------------------------------------------------------------------- /nodes.py: -------------------------------------------------------------------------------- 1 | import os 2 | from contextlib import nullcontext 3 | import torch 4 | try: 5 | from diffusers import ( 6 | DPMSolverMultistepScheduler, 7 | StableDiffusionPipeline, 8 | StableDiffusionImg2ImgPipeline, 9 | EulerDiscreteScheduler, 10 | EulerAncestralDiscreteScheduler, 11 | AutoencoderKL, 12 | UNet2DConditionModel, 13 | LCMScheduler, 14 | DDPMScheduler, 15 | DEISMultistepScheduler, 16 | PNDMScheduler, 17 | UniPCMultistepScheduler 18 | ) 19 | from diffusers.loaders.single_file_utils import ( 20 | convert_ldm_vae_checkpoint, 21 | convert_ldm_unet_checkpoint, 22 | create_vae_diffusers_config, 23 | create_unet_diffusers_config, 24 | create_text_encoder_from_ldm_clip_checkpoint 25 | ) 26 | except: 27 | raise ImportError("Diffusers version too old. Please update to 0.26.0 minimum.") 28 | from .scheduling_tcd import TCDScheduler 29 | from contextlib import nullcontext 30 | from diffusers.utils import is_accelerate_available 31 | if is_accelerate_available(): 32 | from accelerate import init_empty_weights 33 | from accelerate.utils import set_module_tensor_to_device 34 | 35 | from .hidiffusion import apply_hidiffusion, remove_hidiffusion 36 | 37 | from omegaconf import OmegaConf 38 | from transformers import CLIPTokenizer 39 | import comfy.model_management as mm 40 | import comfy.utils 41 | import folder_paths 42 | 43 | script_directory = os.path.dirname(os.path.abspath(__file__)) 44 | 45 | class diffusers_model_loader: 46 | @classmethod 47 | def INPUT_TYPES(s): 48 | return {"required": { 49 | "model": ("MODEL",), 50 | "clip": ("CLIP",), 51 | "vae": ("VAE",), 52 | }, 53 | } 54 | 55 | RETURN_TYPES = ("DIFFUSERSMODEL",) 56 | RETURN_NAMES = ("diffusers_model",) 57 | FUNCTION = "loadmodel" 58 | CATEGORY = "IC-Light-Wrapper" 59 | 60 | def loadmodel(self, model, clip, vae): 61 | mm.soft_empty_cache() 62 | dtype = mm.unet_dtype() 63 | vae_dtype = mm.vae_dtype() 64 | device = mm.get_torch_device() 65 | 66 | custom_config = { 67 | 'model': model, 68 | 'vae': vae, 69 | } 70 | if not hasattr(self, 'model') or self.model == None or custom_config != self.current_config: 71 | pbar = comfy.utils.ProgressBar(5) 72 | self.current_config = custom_config 73 | # setup pretrained models 74 | original_config = OmegaConf.load(os.path.join(script_directory, f"configs/v1-inference.yaml")) 75 | 76 | print("loading ELLA") 77 | checkpoint_path = os.path.join(folder_paths.models_dir,'ella') 78 | ella_path = os.path.join(checkpoint_path, 'ella-sd1.5-tsc-t5xl.safetensors') 79 | if not os.path.exists(ella_path): 80 | from huggingface_hub import snapshot_download 81 | snapshot_download(repo_id="QQGYLab/ELLA", local_dir=checkpoint_path, local_dir_use_symlinks=False) 82 | 83 | with (init_empty_weights() if is_accelerate_available() else nullcontext()): 84 | converted_vae_config = create_vae_diffusers_config(original_config, image_size=512) 85 | new_vae = AutoencoderKL(**converted_vae_config) 86 | 87 | converted_unet_config = create_unet_diffusers_config(original_config, image_size=512) 88 | unet = UNet2DConditionModel(**converted_unet_config) 89 | 90 | clip_sd = None 91 | load_models = [model] 92 | load_models.append(clip.load_model()) 93 | clip_sd = clip.get_sd() 94 | comfy.model_management.load_models_gpu(load_models) 95 | sd = model.model.state_dict_for_saving(clip_sd, vae.get_sd(), None) 96 | 97 | converted_vae = convert_ldm_vae_checkpoint(sd, converted_vae_config) 98 | if is_accelerate_available(): 99 | for key in converted_vae: 100 | set_module_tensor_to_device(new_vae, key, device=device, dtype=dtype, value=converted_vae[key]) 101 | else: 102 | new_vae.load_state_dict(converted_vae, strict=False) 103 | del converted_vae 104 | pbar.update(1) 105 | 106 | converted_unet = convert_ldm_unet_checkpoint(sd, converted_unet_config) 107 | if is_accelerate_available(): 108 | for key in converted_unet: 109 | set_module_tensor_to_device(unet, key, device=device, dtype=dtype, value=converted_unet[key]) 110 | else: 111 | unet.load_state_dict(converted_unet, strict=False) 112 | del converted_unet 113 | 114 | pbar.update(1) 115 | # 3. text_model 116 | print("loading text model") 117 | text_encoder = create_text_encoder_from_ldm_clip_checkpoint("openai/clip-vit-large-patch14",sd) 118 | scheduler_config = { 119 | 'num_train_timesteps': 1000, 120 | 'beta_start': 0.00085, 121 | 'beta_end': 0.012, 122 | 'beta_schedule': "scaled_linear", 123 | 'steps_offset': 1 124 | } 125 | # 4. tokenizer 126 | tokenizer_path = os.path.join(script_directory, "configs/tokenizer") 127 | tokenizer = CLIPTokenizer.from_pretrained(tokenizer_path) 128 | 129 | scheduler=DPMSolverMultistepScheduler(**scheduler_config) 130 | pbar.update(1) 131 | del sd 132 | 133 | pbar.update(1) 134 | 135 | print("creating pipeline") 136 | self.pipe = StableDiffusionImg2ImgPipeline( 137 | unet=unet, 138 | vae=new_vae, 139 | text_encoder=text_encoder, 140 | tokenizer=tokenizer, 141 | scheduler=scheduler, 142 | safety_checker=None, 143 | feature_extractor=None, 144 | requires_safety_checker=False, 145 | image_encoder=None 146 | ) 147 | print("pipeline created") 148 | pbar.update(1) 149 | #self.pipe.enable_model_cpu_offload() 150 | diffusers_model = { 151 | 'pipe': self.pipe, 152 | } 153 | 154 | return (diffusers_model,) 155 | 156 | class LoadICLightUnetDiffusers: 157 | @classmethod 158 | def INPUT_TYPES(s): 159 | return { 160 | "required": { 161 | "diffusersmodel": ("DIFFUSERSMODEL",), 162 | "model_path": (folder_paths.get_filename_list("unet"), ) 163 | } 164 | } 165 | 166 | RETURN_TYPES = ("DIFFUSERSMODEL",) 167 | FUNCTION = "load" 168 | CATEGORY = "IC-Light-Wrapper" 169 | 170 | def load(self, diffusersmodel, model_path): 171 | unet = diffusersmodel["pipe"].unet 172 | device = mm.get_torch_device() 173 | 174 | unet_original_forward = unet.forward 175 | 176 | new_conv_in = torch.nn.Conv2d(8, unet.conv_in.out_channels, unet.conv_in.kernel_size, unet.conv_in.stride, unet.conv_in.padding) 177 | new_conv_in.weight.zero_() 178 | new_conv_in.weight[:, :4, :, :].copy_(unet.conv_in.weight) 179 | new_conv_in.bias = unet.conv_in.bias 180 | unet.conv_in = new_conv_in 181 | 182 | def hooked_unet_forward(sample, timestep, encoder_hidden_states, **kwargs): 183 | c_concat = kwargs['cross_attention_kwargs']['concat_conds'].to(sample) 184 | c_concat = torch.cat([c_concat] * (sample.shape[0] // c_concat.shape[0]), dim=0) 185 | new_sample = torch.cat([sample, c_concat], dim=1) 186 | kwargs['cross_attention_kwargs'] = {} 187 | return unet_original_forward(new_sample, timestep, encoder_hidden_states, **kwargs) 188 | 189 | unet.forward = hooked_unet_forward 190 | 191 | model_full_path = folder_paths.get_full_path("unet", model_path) 192 | if not os.path.exists(model_full_path): 193 | raise Exception("Invalid model path") 194 | else: 195 | print("LoadICLightUnet: Loading LoadICLightUnet weights") 196 | from comfy.utils import load_torch_file 197 | sd_offset = load_torch_file(model_full_path, device=mm.get_torch_device()) 198 | sd_origin = unet.state_dict() 199 | keys = sd_origin.keys() 200 | sd_merged = {k: sd_origin[k].to(device) + sd_offset[k].to(device) for k in sd_origin.keys()} 201 | unet.load_state_dict(sd_merged, strict=True) 202 | del sd_offset, sd_origin, sd_merged, keys 203 | 204 | return diffusersmodel, 205 | 206 | class iclight_diffusers_sampler: 207 | @classmethod 208 | def INPUT_TYPES(s): 209 | return {"required": { 210 | "diffusers_model": ("DIFFUSERSMODEL",), 211 | "latent": ("LATENT",), 212 | "width": ("INT", {"default": 512, "min": 64, "max": 2048, "step": 64}), 213 | "height": ("INT", {"default": 512, "min": 64, "max": 2048, "step": 64}), 214 | "steps": ("INT", {"default": 25, "min": 1, "max": 200, "step": 1}), 215 | "guidance_scale": ("FLOAT", {"default": 2.0, "min": 1.01, "max": 20.0, "step": 0.01}), 216 | "denoise_strength": ("FLOAT", {"default": 0.9, "min": 0.01, "max": 1.0, "step": 0.01}), 217 | "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 218 | "scheduler": ( 219 | [ 220 | 'DPMSolverMultistepScheduler', 221 | 'DPMSolverMultistepScheduler_SDE_karras', 222 | 'DDPMScheduler', 223 | 'LCMScheduler', 224 | 'PNDMScheduler', 225 | 'DEISMultistepScheduler', 226 | 'EulerDiscreteScheduler', 227 | 'EulerAncestralDiscreteScheduler', 228 | 'UniPCMultistepScheduler', 229 | 'TCDScheduler' 230 | ], { 231 | "default": 'DPMSolverMultistepScheduler' 232 | }), 233 | "prompt": ("STRING", {"default": "positive", "multiline": True}), 234 | "n_prompt": ("STRING", {"default": "negative", "multiline": True}), 235 | "hidiffusion": ("BOOLEAN", {"default": False}), 236 | }, 237 | "optional" : { 238 | "bg_latent": ("LATENT",), 239 | "fixed_seed": ("BOOLEAN", {"default": True}), 240 | } 241 | } 242 | 243 | RETURN_TYPES = ("LATENT",) 244 | RETURN_NAMES = ("samples",) 245 | FUNCTION = "process" 246 | CATEGORY = "IC-Light-Wrapper" 247 | 248 | def process(self, latent, diffusers_model, width, height, steps, guidance_scale, denoise_strength, seed, scheduler, prompt, n_prompt, hidiffusion, bg_latent=None, fixed_seed=True): 249 | device = mm.get_torch_device() 250 | mm.unload_all_models() 251 | mm.soft_empty_cache() 252 | dtype = mm.unet_dtype() 253 | pipe=diffusers_model['pipe'] 254 | pipe.to(device, dtype=dtype) 255 | scale_factor = pipe.vae.config.scaling_factor 256 | 257 | scheduler_config = { 258 | 'num_train_timesteps': 1000, 259 | 'beta_start': 0.00085, 260 | 'beta_end': 0.012, 261 | 'beta_schedule': "scaled_linear", 262 | 'steps_offset': 1, 263 | } 264 | if scheduler == 'DPMSolverMultistepScheduler': 265 | noise_scheduler = DPMSolverMultistepScheduler(**scheduler_config) 266 | elif scheduler == 'DPMSolverMultistepScheduler_SDE_karras': 267 | scheduler_config.update({"algorithm_type": "sde-dpmsolver++"}) 268 | scheduler_config.update({"use_karras_sigmas": True}) 269 | noise_scheduler = DPMSolverMultistepScheduler(**scheduler_config) 270 | elif scheduler == 'DDPMScheduler': 271 | noise_scheduler = DDPMScheduler(**scheduler_config) 272 | elif scheduler == 'LCMScheduler': 273 | noise_scheduler = LCMScheduler(**scheduler_config) 274 | elif scheduler == 'PNDMScheduler': 275 | scheduler_config.update({"set_alpha_to_one": False}) 276 | scheduler_config.update({"trained_betas": None}) 277 | noise_scheduler = PNDMScheduler(**scheduler_config) 278 | elif scheduler == 'DEISMultistepScheduler': 279 | noise_scheduler = DEISMultistepScheduler(**scheduler_config) 280 | elif scheduler == 'EulerDiscreteScheduler': 281 | noise_scheduler = EulerDiscreteScheduler(**scheduler_config) 282 | elif scheduler == 'EulerAncestralDiscreteScheduler': 283 | noise_scheduler = EulerAncestralDiscreteScheduler(**scheduler_config) 284 | elif scheduler == 'UniPCMultistepScheduler': 285 | noise_scheduler = UniPCMultistepScheduler(**scheduler_config) 286 | elif scheduler == 'TCDScheduler': 287 | noise_scheduler = TCDScheduler(**scheduler_config) 288 | 289 | pipe.scheduler = noise_scheduler 290 | if hidiffusion: 291 | apply_hidiffusion(pipe) 292 | else: 293 | remove_hidiffusion(pipe) 294 | 295 | if bg_latent is not None: 296 | bg_latent = bg_latent["samples"] 297 | bg_latent = bg_latent * pipe.vae.config.scaling_factor 298 | else: 299 | bg_latent = None 300 | 301 | concat_conds = latent["samples"] 302 | concat_conds = concat_conds * pipe.vae.config.scaling_factor 303 | B, H, W, C = latent["samples"].shape 304 | prompt_list = [] 305 | prompt_list.append(prompt) 306 | if len(prompt_list) < B: 307 | prompt_list += [prompt_list[-1]] * (B - len(prompt_list)) 308 | 309 | n_prompt_list = [] 310 | n_prompt_list.append(n_prompt) 311 | if len(n_prompt_list) < B: 312 | 313 | n_prompt_list += [n_prompt_list[-1]] * (B - len(n_prompt_list)) 314 | 315 | if fixed_seed: 316 | generator = [torch.Generator(device=device).manual_seed(seed) for _ in range(B)] 317 | else: 318 | generator= [torch.Generator(device="cuda").manual_seed(i) for i in range(B)] 319 | 320 | pbar = comfy.utils.ProgressBar(steps) 321 | def progress_counter_callback(pipeline, step, timestep, callback_kwargs): 322 | pbar.update(1) 323 | return callback_kwargs or {} 324 | 325 | autocast_condition = (dtype != torch.float32) and not mm.is_device_mps(device) 326 | with torch.autocast(mm.get_autocast_device(device), dtype=dtype) if autocast_condition else nullcontext(): 327 | 328 | images = pipe( 329 | image=bg_latent, 330 | prompt = prompt_list, 331 | strength = denoise_strength, 332 | negative_prompt = n_prompt_list, 333 | prompt_embeds=None, 334 | negative_prompt_embeds=None, 335 | guidance_scale=guidance_scale, 336 | num_inference_steps=int(round(steps / denoise_strength)), 337 | height=height, 338 | width=width, 339 | cross_attention_kwargs={'concat_conds': concat_conds}, 340 | generator=generator, 341 | output_type="latent", 342 | callback_on_step_end=progress_counter_callback, 343 | #callback_on_step_end_tensor_inputs=["latents", "prompt_embeds", "negative_prompt_embeds"], 344 | ).images 345 | images = images / scale_factor 346 | #image_out = images.permute(0, 2, 3, 1).cpu().float() 347 | return ({"samples": images},) 348 | 349 | NODE_CLASS_MAPPINGS = { 350 | "diffusers_model_loader": diffusers_model_loader, 351 | "LoadICLightUnetDiffusers": LoadICLightUnetDiffusers, 352 | "iclight_diffusers_sampler": iclight_diffusers_sampler 353 | } 354 | NODE_DISPLAY_NAME_MAPPINGS = { 355 | "diffusers_model_loader": "Diffusers Model Loader", 356 | "LoadICLightUnetDiffusers": "LoadICLightUnetDiffusers", 357 | "iclight_diffusers_sampler": "IC-Light Diffusers Sampler" 358 | } 359 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers>=0.27.2 2 | accelerate>=0.29.2 3 | omegaconf 4 | -------------------------------------------------------------------------------- /scheduling_tcd.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 Stanford University Team and The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion 16 | # and https://github.com/hojonathanho/diffusion 17 | 18 | import math 19 | from dataclasses import dataclass 20 | from typing import List, Optional, Tuple, Union 21 | 22 | import numpy as np 23 | import torch 24 | 25 | from diffusers.configuration_utils import ConfigMixin, register_to_config 26 | from diffusers.schedulers.scheduling_utils import SchedulerMixin 27 | from diffusers.utils import BaseOutput, logging 28 | from diffusers.utils.torch_utils import randn_tensor 29 | 30 | 31 | logger = logging.get_logger(__name__) # pylint: disable=invalid-name 32 | 33 | 34 | @dataclass 35 | class TCDSchedulerOutput(BaseOutput): 36 | """ 37 | Output class for the scheduler's `step` function output. 38 | 39 | Args: 40 | prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): 41 | Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the 42 | denoising loop. 43 | pred_noised_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): 44 | The predicted noised sample `(x_{s})` based on the model output from the current timestep. 45 | """ 46 | 47 | prev_sample: torch.FloatTensor 48 | pred_noised_sample: Optional[torch.FloatTensor] = None 49 | 50 | 51 | # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar 52 | def betas_for_alpha_bar( 53 | num_diffusion_timesteps, 54 | max_beta=0.999, 55 | alpha_transform_type="cosine", 56 | ): 57 | """ 58 | Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of 59 | (1-beta) over time from t = [0,1]. 60 | 61 | Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up 62 | to that part of the diffusion process. 63 | 64 | 65 | Args: 66 | num_diffusion_timesteps (`int`): the number of betas to produce. 67 | max_beta (`float`): the maximum beta to use; use values lower than 1 to 68 | prevent singularities. 69 | alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. 70 | Choose from `cosine` or `exp` 71 | 72 | Returns: 73 | betas (`np.ndarray`): the betas used by the scheduler to step the model outputs 74 | """ 75 | if alpha_transform_type == "cosine": 76 | 77 | def alpha_bar_fn(t): 78 | return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 79 | 80 | elif alpha_transform_type == "exp": 81 | 82 | def alpha_bar_fn(t): 83 | return math.exp(t * -12.0) 84 | 85 | else: 86 | raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}") 87 | 88 | betas = [] 89 | for i in range(num_diffusion_timesteps): 90 | t1 = i / num_diffusion_timesteps 91 | t2 = (i + 1) / num_diffusion_timesteps 92 | betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) 93 | return torch.tensor(betas, dtype=torch.float32) 94 | 95 | 96 | # Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr 97 | def rescale_zero_terminal_snr(betas: torch.FloatTensor) -> torch.FloatTensor: 98 | """ 99 | Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1) 100 | 101 | 102 | Args: 103 | betas (`torch.FloatTensor`): 104 | the betas that the scheduler is being initialized with. 105 | 106 | Returns: 107 | `torch.FloatTensor`: rescaled betas with zero terminal SNR 108 | """ 109 | # Convert betas to alphas_bar_sqrt 110 | alphas = 1.0 - betas 111 | alphas_cumprod = torch.cumprod(alphas, dim=0) 112 | alphas_bar_sqrt = alphas_cumprod.sqrt() 113 | 114 | # Store old values. 115 | alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone() 116 | alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone() 117 | 118 | # Shift so the last timestep is zero. 119 | alphas_bar_sqrt -= alphas_bar_sqrt_T 120 | 121 | # Scale so the first timestep is back to the old value. 122 | alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T) 123 | 124 | # Convert alphas_bar_sqrt to betas 125 | alphas_bar = alphas_bar_sqrt**2 # Revert sqrt 126 | alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod 127 | alphas = torch.cat([alphas_bar[0:1], alphas]) 128 | betas = 1 - alphas 129 | 130 | return betas 131 | 132 | 133 | class TCDScheduler(SchedulerMixin, ConfigMixin): 134 | """ 135 | `TCDScheduler` incorporates the `Strategic Stochastic Sampling` introduced by the paper `Trajectory Consistency 136 | Distillation`, extending the original Multistep Consistency Sampling to enable unrestricted trajectory traversal. 137 | 138 | This code is based on the official repo of TCD(https://github.com/jabir-zheng/TCD). 139 | 140 | This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. [`~ConfigMixin`] takes care of storing all config 141 | attributes that are passed in the scheduler's `__init__` function, such as `num_train_timesteps`. They can be 142 | accessed via `scheduler.config.num_train_timesteps`. [`SchedulerMixin`] provides general loading and saving 143 | functionality via the [`SchedulerMixin.save_pretrained`] and [`~SchedulerMixin.from_pretrained`] functions. 144 | 145 | Args: 146 | num_train_timesteps (`int`, defaults to 1000): 147 | The number of diffusion steps to train the model. 148 | beta_start (`float`, defaults to 0.0001): 149 | The starting `beta` value of inference. 150 | beta_end (`float`, defaults to 0.02): 151 | The final `beta` value. 152 | beta_schedule (`str`, defaults to `"linear"`): 153 | The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from 154 | `linear`, `scaled_linear`, or `squaredcos_cap_v2`. 155 | trained_betas (`np.ndarray`, *optional*): 156 | Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. 157 | original_inference_steps (`int`, *optional*, defaults to 50): 158 | The default number of inference steps used to generate a linearly-spaced timestep schedule, from which we 159 | will ultimately take `num_inference_steps` evenly spaced timesteps to form the final timestep schedule. 160 | clip_sample (`bool`, defaults to `True`): 161 | Clip the predicted sample for numerical stability. 162 | clip_sample_range (`float`, defaults to 1.0): 163 | The maximum magnitude for sample clipping. Valid only when `clip_sample=True`. 164 | set_alpha_to_one (`bool`, defaults to `True`): 165 | Each diffusion step uses the alphas product value at that step and at the previous one. For the final step 166 | there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`, 167 | otherwise it uses the alpha value at step 0. 168 | steps_offset (`int`, defaults to 0): 169 | An offset added to the inference steps, as required by some model families. 170 | prediction_type (`str`, defaults to `epsilon`, *optional*): 171 | Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), 172 | `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen 173 | Video](https://imagen.research.google/video/paper.pdf) paper). 174 | thresholding (`bool`, defaults to `False`): 175 | Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such 176 | as Stable Diffusion. 177 | dynamic_thresholding_ratio (`float`, defaults to 0.995): 178 | The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. 179 | sample_max_value (`float`, defaults to 1.0): 180 | The threshold value for dynamic thresholding. Valid only when `thresholding=True`. 181 | timestep_spacing (`str`, defaults to `"leading"`): 182 | The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and 183 | Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. 184 | timestep_scaling (`float`, defaults to 10.0): 185 | The factor the timesteps will be multiplied by when calculating the consistency model boundary conditions 186 | `c_skip` and `c_out`. Increasing this will decrease the approximation error (although the approximation 187 | error at the default of `10.0` is already pretty small). 188 | rescale_betas_zero_snr (`bool`, defaults to `False`): 189 | Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and 190 | dark samples instead of limiting it to samples with medium brightness. Loosely related to 191 | [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506). 192 | """ 193 | 194 | order = 1 195 | 196 | @register_to_config 197 | def __init__( 198 | self, 199 | num_train_timesteps: int = 1000, 200 | beta_start: float = 0.00085, 201 | beta_end: float = 0.012, 202 | beta_schedule: str = "scaled_linear", 203 | trained_betas: Optional[Union[np.ndarray, List[float]]] = None, 204 | original_inference_steps: int = 50, 205 | clip_sample: bool = False, 206 | clip_sample_range: float = 1.0, 207 | set_alpha_to_one: bool = True, 208 | steps_offset: int = 0, 209 | prediction_type: str = "epsilon", 210 | thresholding: bool = False, 211 | dynamic_thresholding_ratio: float = 0.995, 212 | sample_max_value: float = 1.0, 213 | timestep_spacing: str = "leading", 214 | timestep_scaling: float = 10.0, 215 | rescale_betas_zero_snr: bool = False, 216 | ): 217 | if trained_betas is not None: 218 | self.betas = torch.tensor(trained_betas, dtype=torch.float32) 219 | elif beta_schedule == "linear": 220 | self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) 221 | elif beta_schedule == "scaled_linear": 222 | # this schedule is very specific to the latent diffusion model. 223 | self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 224 | elif beta_schedule == "squaredcos_cap_v2": 225 | # Glide cosine schedule 226 | self.betas = betas_for_alpha_bar(num_train_timesteps) 227 | else: 228 | raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") 229 | 230 | # Rescale for zero SNR 231 | if rescale_betas_zero_snr: 232 | self.betas = rescale_zero_terminal_snr(self.betas) 233 | 234 | self.alphas = 1.0 - self.betas 235 | self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) 236 | 237 | # At every step in ddim, we are looking into the previous alphas_cumprod 238 | # For the final step, there is no previous alphas_cumprod because we are already at 0 239 | # `set_alpha_to_one` decides whether we set this parameter simply to one or 240 | # whether we use the final alpha of the "non-previous" one. 241 | self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0] 242 | 243 | # standard deviation of the initial noise distribution 244 | self.init_noise_sigma = 1.0 245 | 246 | # setable values 247 | self.num_inference_steps = None 248 | self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) 249 | self.custom_timesteps = False 250 | 251 | self._step_index = None 252 | self._begin_index = None 253 | 254 | # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep 255 | def index_for_timestep(self, timestep, schedule_timesteps=None): 256 | if schedule_timesteps is None: 257 | schedule_timesteps = self.timesteps 258 | 259 | indices = (schedule_timesteps == timestep).nonzero() 260 | 261 | # The sigma index that is taken for the **very** first `step` 262 | # is always the second index (or the last index if there is only 1) 263 | # This way we can ensure we don't accidentally skip a sigma in 264 | # case we start in the middle of the denoising schedule (e.g. for image-to-image) 265 | pos = 1 if len(indices) > 1 else 0 266 | 267 | return indices[pos].item() 268 | 269 | # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index 270 | def _init_step_index(self, timestep): 271 | if self.begin_index is None: 272 | if isinstance(timestep, torch.Tensor): 273 | timestep = timestep.to(self.timesteps.device) 274 | self._step_index = self.index_for_timestep(timestep) 275 | else: 276 | self._step_index = self._begin_index 277 | 278 | @property 279 | def step_index(self): 280 | return self._step_index 281 | 282 | @property 283 | def begin_index(self): 284 | """ 285 | The index for the first timestep. It should be set from pipeline with `set_begin_index` method. 286 | """ 287 | return self._begin_index 288 | 289 | # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index 290 | def set_begin_index(self, begin_index: int = 0): 291 | """ 292 | Sets the begin index for the scheduler. This function should be run from pipeline before the inference. 293 | 294 | Args: 295 | begin_index (`int`): 296 | The begin index for the scheduler. 297 | """ 298 | self._begin_index = begin_index 299 | 300 | def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor: 301 | """ 302 | Ensures interchangeability with schedulers that need to scale the denoising model input depending on the 303 | current timestep. 304 | 305 | Args: 306 | sample (`torch.FloatTensor`): 307 | The input sample. 308 | timestep (`int`, *optional*): 309 | The current timestep in the diffusion chain. 310 | 311 | Returns: 312 | `torch.FloatTensor`: 313 | A scaled input sample. 314 | """ 315 | return sample 316 | 317 | # Copied from diffusers.schedulers.scheduling_ddim.DDIMScheduler._get_variance 318 | def _get_variance(self, timestep, prev_timestep): 319 | alpha_prod_t = self.alphas_cumprod[timestep] 320 | alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod 321 | beta_prod_t = 1 - alpha_prod_t 322 | beta_prod_t_prev = 1 - alpha_prod_t_prev 323 | 324 | variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) 325 | 326 | return variance 327 | 328 | # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample 329 | def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor: 330 | """ 331 | "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the 332 | prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by 333 | s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing 334 | pixels from saturation at each step. We find that dynamic thresholding results in significantly better 335 | photorealism as well as better image-text alignment, especially when using very large guidance weights." 336 | 337 | https://arxiv.org/abs/2205.11487 338 | """ 339 | dtype = sample.dtype 340 | batch_size, channels, *remaining_dims = sample.shape 341 | 342 | if dtype not in (torch.float32, torch.float64): 343 | sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half 344 | 345 | # Flatten sample for doing quantile calculation along each image 346 | sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) 347 | 348 | abs_sample = sample.abs() # "a certain percentile absolute pixel value" 349 | 350 | s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) 351 | s = torch.clamp( 352 | s, min=1, max=self.config.sample_max_value 353 | ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] 354 | s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 355 | sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" 356 | 357 | sample = sample.reshape(batch_size, channels, *remaining_dims) 358 | sample = sample.to(dtype) 359 | 360 | return sample 361 | 362 | def set_timesteps( 363 | self, 364 | num_inference_steps: Optional[int] = None, 365 | device: Union[str, torch.device] = None, 366 | original_inference_steps: Optional[int] = None, 367 | timesteps: Optional[List[int]] = None, 368 | strength: float = 1.0, 369 | ): 370 | """ 371 | Sets the discrete timesteps used for the diffusion chain (to be run before inference). 372 | 373 | Args: 374 | num_inference_steps (`int`, *optional*): 375 | The number of diffusion steps used when generating samples with a pre-trained model. If used, 376 | `timesteps` must be `None`. 377 | device (`str` or `torch.device`, *optional*): 378 | The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. 379 | original_inference_steps (`int`, *optional*): 380 | The original number of inference steps, which will be used to generate a linearly-spaced timestep 381 | schedule (which is different from the standard `diffusers` implementation). We will then take 382 | `num_inference_steps` timesteps from this schedule, evenly spaced in terms of indices, and use that as 383 | our final timestep schedule. If not set, this will default to the `original_inference_steps` attribute. 384 | timesteps (`List[int]`, *optional*): 385 | Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default 386 | timestep spacing strategy of equal spacing between timesteps on the training/distillation timestep 387 | schedule is used. If `timesteps` is passed, `num_inference_steps` must be `None`. 388 | strength (`float`, *optional*, defaults to 1.0): 389 | Used to determine the number of timesteps used for inference when using img2img, inpaint, etc. 390 | """ 391 | # 0. Check inputs 392 | if num_inference_steps is None and timesteps is None: 393 | raise ValueError("Must pass exactly one of `num_inference_steps` or `custom_timesteps`.") 394 | 395 | if num_inference_steps is not None and timesteps is not None: 396 | raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.") 397 | 398 | # 1. Calculate the TCD original training/distillation timestep schedule. 399 | original_steps = ( 400 | original_inference_steps if original_inference_steps is not None else self.config.original_inference_steps 401 | ) 402 | 403 | if original_inference_steps is None: 404 | # default option, timesteps align with discrete inference steps 405 | if original_steps > self.config.num_train_timesteps: 406 | raise ValueError( 407 | f"`original_steps`: {original_steps} cannot be larger than `self.config.train_timesteps`:" 408 | f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" 409 | f" maximal {self.config.num_train_timesteps} timesteps." 410 | ) 411 | # TCD Timesteps Setting 412 | # The skipping step parameter k from the paper. 413 | k = self.config.num_train_timesteps // original_steps 414 | # TCD Training/Distillation Steps Schedule 415 | tcd_origin_timesteps = np.asarray(list(range(1, int(original_steps * strength) + 1))) * k - 1 416 | else: 417 | # customised option, sampled timesteps can be any arbitrary value 418 | tcd_origin_timesteps = np.asarray(list(range(0, int(self.config.num_train_timesteps * strength)))) 419 | 420 | # 2. Calculate the TCD inference timestep schedule. 421 | if timesteps is not None: 422 | # 2.1 Handle custom timestep schedules. 423 | train_timesteps = set(tcd_origin_timesteps) 424 | non_train_timesteps = [] 425 | for i in range(1, len(timesteps)): 426 | if timesteps[i] >= timesteps[i - 1]: 427 | raise ValueError("`custom_timesteps` must be in descending order.") 428 | 429 | if timesteps[i] not in train_timesteps: 430 | non_train_timesteps.append(timesteps[i]) 431 | 432 | if timesteps[0] >= self.config.num_train_timesteps: 433 | raise ValueError( 434 | f"`timesteps` must start before `self.config.train_timesteps`:" 435 | f" {self.config.num_train_timesteps}." 436 | ) 437 | 438 | # Raise warning if timestep schedule does not start with self.config.num_train_timesteps - 1 439 | if strength == 1.0 and timesteps[0] != self.config.num_train_timesteps - 1: 440 | logger.warning( 441 | f"The first timestep on the custom timestep schedule is {timesteps[0]}, not" 442 | f" `self.config.num_train_timesteps - 1`: {self.config.num_train_timesteps - 1}. You may get" 443 | f" unexpected results when using this timestep schedule." 444 | ) 445 | 446 | # Raise warning if custom timestep schedule contains timesteps not on original timestep schedule 447 | if non_train_timesteps: 448 | logger.warning( 449 | f"The custom timestep schedule contains the following timesteps which are not on the original" 450 | f" training/distillation timestep schedule: {non_train_timesteps}. You may get unexpected results" 451 | f" when using this timestep schedule." 452 | ) 453 | 454 | # Raise warning if custom timestep schedule is longer than original_steps 455 | if original_steps is not None: 456 | if len(timesteps) > original_steps: 457 | logger.warning( 458 | f"The number of timesteps in the custom timestep schedule is {len(timesteps)}, which exceeds the" 459 | f" the length of the timestep schedule used for training: {original_steps}. You may get some" 460 | f" unexpected results when using this timestep schedule." 461 | ) 462 | else: 463 | if len(timesteps) > self.config.num_train_timesteps: 464 | logger.warning( 465 | f"The number of timesteps in the custom timestep schedule is {len(timesteps)}, which exceeds the" 466 | f" the length of the timestep schedule used for training: {self.config.num_train_timesteps}. You may get some" 467 | f" unexpected results when using this timestep schedule." 468 | ) 469 | 470 | timesteps = np.array(timesteps, dtype=np.int64) 471 | self.num_inference_steps = len(timesteps) 472 | self.custom_timesteps = True 473 | 474 | # Apply strength (e.g. for img2img pipelines) (see StableDiffusionImg2ImgPipeline.get_timesteps) 475 | init_timestep = min(int(self.num_inference_steps * strength), self.num_inference_steps) 476 | t_start = max(self.num_inference_steps - init_timestep, 0) 477 | timesteps = timesteps[t_start * self.order :] 478 | # TODO: also reset self.num_inference_steps? 479 | else: 480 | # 2.2 Create the "standard" TCD inference timestep schedule. 481 | if num_inference_steps > self.config.num_train_timesteps: 482 | raise ValueError( 483 | f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:" 484 | f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle" 485 | f" maximal {self.config.num_train_timesteps} timesteps." 486 | ) 487 | 488 | if original_steps is not None: 489 | skipping_step = len(tcd_origin_timesteps) // num_inference_steps 490 | 491 | if skipping_step < 1: 492 | raise ValueError( 493 | f"The combination of `original_steps x strength`: {original_steps} x {strength} is smaller than `num_inference_steps`: {num_inference_steps}. Make sure to either reduce `num_inference_steps` to a value smaller than {int(original_steps * strength)} or increase `strength` to a value higher than {float(num_inference_steps / original_steps)}." 494 | ) 495 | 496 | self.num_inference_steps = num_inference_steps 497 | 498 | if original_steps is not None: 499 | if num_inference_steps > original_steps: 500 | raise ValueError( 501 | f"`num_inference_steps`: {num_inference_steps} cannot be larger than `original_inference_steps`:" 502 | f" {original_steps} because the final timestep schedule will be a subset of the" 503 | f" `original_inference_steps`-sized initial timestep schedule." 504 | ) 505 | else: 506 | if num_inference_steps > self.config.num_train_timesteps: 507 | raise ValueError( 508 | f"`num_inference_steps`: {num_inference_steps} cannot be larger than `num_train_timesteps`:" 509 | f" {self.config.num_train_timesteps} because the final timestep schedule will be a subset of the" 510 | f" `num_train_timesteps`-sized initial timestep schedule." 511 | ) 512 | 513 | # TCD Inference Steps Schedule 514 | tcd_origin_timesteps = tcd_origin_timesteps[::-1].copy() 515 | # Select (approximately) evenly spaced indices from tcd_origin_timesteps. 516 | inference_indices = np.linspace(0, len(tcd_origin_timesteps), num=num_inference_steps, endpoint=False) 517 | inference_indices = np.floor(inference_indices).astype(np.int64) 518 | timesteps = tcd_origin_timesteps[inference_indices] 519 | 520 | self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.long) 521 | 522 | self._step_index = None 523 | self._begin_index = None 524 | 525 | def step( 526 | self, 527 | model_output: torch.FloatTensor, 528 | timestep: int, 529 | sample: torch.FloatTensor, 530 | eta: float = 0.3, 531 | generator: Optional[torch.Generator] = None, 532 | return_dict: bool = True, 533 | ) -> Union[TCDSchedulerOutput, Tuple]: 534 | """ 535 | Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion 536 | process from the learned model outputs (most often the predicted noise). 537 | 538 | Args: 539 | model_output (`torch.FloatTensor`): 540 | The direct output from learned diffusion model. 541 | timestep (`int`): 542 | The current discrete timestep in the diffusion chain. 543 | sample (`torch.FloatTensor`): 544 | A current instance of a sample created by the diffusion process. 545 | eta (`float`): 546 | A stochastic parameter (referred to as `gamma` in the paper) used to control the stochasticity in every 547 | step. When eta = 0, it represents deterministic sampling, whereas eta = 1 indicates full stochastic 548 | sampling. 549 | generator (`torch.Generator`, *optional*): 550 | A random number generator. 551 | return_dict (`bool`, *optional*, defaults to `True`): 552 | Whether or not to return a [`~schedulers.scheduling_tcd.TCDSchedulerOutput`] or `tuple`. 553 | Returns: 554 | [`~schedulers.scheduling_utils.TCDSchedulerOutput`] or `tuple`: 555 | If return_dict is `True`, [`~schedulers.scheduling_tcd.TCDSchedulerOutput`] is returned, otherwise a 556 | tuple is returned where the first element is the sample tensor. 557 | """ 558 | if self.num_inference_steps is None: 559 | raise ValueError( 560 | "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" 561 | ) 562 | 563 | if self.step_index is None: 564 | self._init_step_index(timestep) 565 | 566 | assert 0 <= eta <= 1.0, "gamma must be less than or equal to 1.0" 567 | 568 | # 1. get previous step value 569 | prev_step_index = self.step_index + 1 570 | if prev_step_index < len(self.timesteps): 571 | prev_timestep = self.timesteps[prev_step_index] 572 | else: 573 | prev_timestep = torch.tensor(0) 574 | 575 | timestep_s = torch.floor((1 - eta) * prev_timestep).to(dtype=torch.long) 576 | 577 | # 2. compute alphas, betas 578 | alpha_prod_t = self.alphas_cumprod[timestep] 579 | beta_prod_t = 1 - alpha_prod_t 580 | 581 | alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod 582 | 583 | alpha_prod_s = self.alphas_cumprod[timestep_s] 584 | beta_prod_s = 1 - alpha_prod_s 585 | 586 | # 3. Compute the predicted noised sample x_s based on the model parameterization 587 | if self.config.prediction_type == "epsilon": # noise-prediction 588 | pred_original_sample = (sample - beta_prod_t.sqrt() * model_output) / alpha_prod_t.sqrt() 589 | pred_epsilon = model_output 590 | pred_noised_sample = alpha_prod_s.sqrt() * pred_original_sample + beta_prod_s.sqrt() * pred_epsilon 591 | elif self.config.prediction_type == "sample": # x-prediction 592 | pred_original_sample = model_output 593 | pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5) 594 | pred_noised_sample = alpha_prod_s.sqrt() * pred_original_sample + beta_prod_s.sqrt() * pred_epsilon 595 | elif self.config.prediction_type == "v_prediction": # v-prediction 596 | pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output 597 | pred_epsilon = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample 598 | pred_noised_sample = alpha_prod_s.sqrt() * pred_original_sample + beta_prod_s.sqrt() * pred_epsilon 599 | else: 600 | raise ValueError( 601 | f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or" 602 | " `v_prediction` for `TCDScheduler`." 603 | ) 604 | 605 | # 4. Sample and inject noise z ~ N(0, I) for MultiStep Inference 606 | # Noise is not used on the final timestep of the timestep schedule. 607 | # This also means that noise is not used for one-step sampling. 608 | # Eta (referred to as "gamma" in the paper) was introduced to control the stochasticity in every step. 609 | # When eta = 0, it represents deterministic sampling, whereas eta = 1 indicates full stochastic sampling. 610 | if eta > 0: 611 | if self.step_index != self.num_inference_steps - 1: 612 | noise = randn_tensor( 613 | model_output.shape, generator=generator, device=model_output.device, dtype=pred_noised_sample.dtype 614 | ) 615 | prev_sample = (alpha_prod_t_prev / alpha_prod_s).sqrt() * pred_noised_sample + ( 616 | 1 - alpha_prod_t_prev / alpha_prod_s 617 | ).sqrt() * noise 618 | else: 619 | prev_sample = pred_noised_sample 620 | else: 621 | prev_sample = pred_noised_sample 622 | 623 | # upon completion increase step index by one 624 | self._step_index += 1 625 | 626 | if not return_dict: 627 | return (prev_sample, pred_noised_sample) 628 | 629 | return TCDSchedulerOutput(prev_sample=prev_sample, pred_noised_sample=pred_noised_sample) 630 | 631 | # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise 632 | def add_noise( 633 | self, 634 | original_samples: torch.FloatTensor, 635 | noise: torch.FloatTensor, 636 | timesteps: torch.IntTensor, 637 | ) -> torch.FloatTensor: 638 | # Make sure alphas_cumprod and timestep have same device and dtype as original_samples 639 | # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement 640 | # for the subsequent add_noise calls 641 | self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device) 642 | alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype) 643 | timesteps = timesteps.to(original_samples.device) 644 | 645 | sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 646 | sqrt_alpha_prod = sqrt_alpha_prod.flatten() 647 | while len(sqrt_alpha_prod.shape) < len(original_samples.shape): 648 | sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) 649 | 650 | sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 651 | sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() 652 | while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape): 653 | sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) 654 | 655 | noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise 656 | return noisy_samples 657 | 658 | # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity 659 | def get_velocity( 660 | self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor 661 | ) -> torch.FloatTensor: 662 | # Make sure alphas_cumprod and timestep have same device and dtype as sample 663 | self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) 664 | alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) 665 | timesteps = timesteps.to(sample.device) 666 | 667 | sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5 668 | sqrt_alpha_prod = sqrt_alpha_prod.flatten() 669 | while len(sqrt_alpha_prod.shape) < len(sample.shape): 670 | sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1) 671 | 672 | sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5 673 | sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten() 674 | while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape): 675 | sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1) 676 | 677 | velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample 678 | return velocity 679 | 680 | def __len__(self): 681 | return self.config.num_train_timesteps 682 | 683 | # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep 684 | def previous_timestep(self, timestep): 685 | if self.custom_timesteps: 686 | index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0] 687 | if index == self.timesteps.shape[0] - 1: 688 | prev_t = torch.tensor(-1) 689 | else: 690 | prev_t = self.timesteps[index + 1] 691 | else: 692 | num_inference_steps = ( 693 | self.num_inference_steps if self.num_inference_steps else self.config.num_train_timesteps 694 | ) 695 | prev_t = timestep - self.config.num_train_timesteps // num_inference_steps 696 | 697 | return prev_t 698 | --------------------------------------------------------------------------------