├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── configs
    ├── text_encoder_config.json
    ├── tokenizer
    │   ├── config.json
    │   ├── merges.txt
    │   ├── preprocessor_config.json
    │   ├── special_tokens_map.json
    │   ├── tokenizer.json
    │   ├── tokenizer_config.json
    │   └── vocab.json
    ├── tokenizer_config.json
    └── v1-inference.yaml
├── examples
    ├── IC-Light_example_spotlight_01.json
    └── iclight_spotlight_batch_example.json
├── hidiffusion
    ├── __init__.py
    ├── hidiffusion.py
    ├── sd_module_key
    │   ├── sd15_module_key.txt
    │   └── sdxl_module_key.txt
    └── utils.py
├── nodes.py
├── requirements.txt
└── scheduling_tcd.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | checkpoints/
3 | *.py[cod]
4 | *$py.class
5 | *.egg-info
6 | .pytest_cache


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ComfyUI wrapper nodes for IC-light
 2 | 
 3 | # UPDATE: 
 4 | moving my efforts into a more native implementation: https://github.com/kijai/ComfyUI-IC-Light
 5 | 
 6 | ## unfinished and development stopped
 7 | Original repo: https://github.com/lllyasviel/IC-Light/
 8 | 
 9 | Models: https://huggingface.co/lllyasviel/ic-light/tree/main
10 | 
11 | models go into `ComfyUI/models/unet`
12 | 
13 | ![image](https://github.com/kijai/ComfyUI-IC-Light-Wrapper/assets/40791699/9687a243-d7af-4b08-99e9-d260f1859584)
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
2 | 
3 | __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]


--------------------------------------------------------------------------------
/configs/text_encoder_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "openai/clip-vit-large-patch14",
 3 |   "architectures": [
 4 |     "CLIPTextModel"
 5 |   ],
 6 |   "attention_dropout": 0.0,
 7 |   "bos_token_id": 0,
 8 |   "dropout": 0.0,
 9 |   "eos_token_id": 2,
10 |   "hidden_act": "quick_gelu",
11 |   "hidden_size": 768,
12 |   "initializer_factor": 1.0,
13 |   "initializer_range": 0.02,
14 |   "intermediate_size": 3072,
15 |   "layer_norm_eps": 1e-05,
16 |   "max_position_embeddings": 77,
17 |   "model_type": "clip_text_model",
18 |   "num_attention_heads": 12,
19 |   "num_hidden_layers": 12,
20 |   "pad_token_id": 1,
21 |   "projection_dim": 768,
22 |   "torch_dtype": "float32",
23 |   "transformers_version": "4.22.0.dev0",
24 |   "vocab_size": 49408
25 | }
26 | 


--------------------------------------------------------------------------------
/configs/tokenizer/config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "_name_or_path": "clip-vit-large-patch14/",
  3 |   "architectures": [
  4 |     "CLIPModel"
  5 |   ],
  6 |   "initializer_factor": 1.0,
  7 |   "logit_scale_init_value": 2.6592,
  8 |   "model_type": "clip",
  9 |   "projection_dim": 768,
 10 |   "text_config": {
 11 |     "_name_or_path": "",
 12 |     "add_cross_attention": false,
 13 |     "architectures": null,
 14 |     "attention_dropout": 0.0,
 15 |     "bad_words_ids": null,
 16 |     "bos_token_id": 0,
 17 |     "chunk_size_feed_forward": 0,
 18 |     "cross_attention_hidden_size": null,
 19 |     "decoder_start_token_id": null,
 20 |     "diversity_penalty": 0.0,
 21 |     "do_sample": false,
 22 |     "dropout": 0.0,
 23 |     "early_stopping": false,
 24 |     "encoder_no_repeat_ngram_size": 0,
 25 |     "eos_token_id": 2,
 26 |     "finetuning_task": null,
 27 |     "forced_bos_token_id": null,
 28 |     "forced_eos_token_id": null,
 29 |     "hidden_act": "quick_gelu",
 30 |     "hidden_size": 768,
 31 |     "id2label": {
 32 |       "0": "LABEL_0",
 33 |       "1": "LABEL_1"
 34 |     },
 35 |     "initializer_factor": 1.0,
 36 |     "initializer_range": 0.02,
 37 |     "intermediate_size": 3072,
 38 |     "is_decoder": false,
 39 |     "is_encoder_decoder": false,
 40 |     "label2id": {
 41 |       "LABEL_0": 0,
 42 |       "LABEL_1": 1
 43 |     },
 44 |     "layer_norm_eps": 1e-05,
 45 |     "length_penalty": 1.0,
 46 |     "max_length": 20,
 47 |     "max_position_embeddings": 77,
 48 |     "min_length": 0,
 49 |     "model_type": "clip_text_model",
 50 |     "no_repeat_ngram_size": 0,
 51 |     "num_attention_heads": 12,
 52 |     "num_beam_groups": 1,
 53 |     "num_beams": 1,
 54 |     "num_hidden_layers": 12,
 55 |     "num_return_sequences": 1,
 56 |     "output_attentions": false,
 57 |     "output_hidden_states": false,
 58 |     "output_scores": false,
 59 |     "pad_token_id": 1,
 60 |     "prefix": null,
 61 |     "problem_type": null,
 62 |     "projection_dim" : 768,
 63 |     "pruned_heads": {},
 64 |     "remove_invalid_values": false,
 65 |     "repetition_penalty": 1.0,
 66 |     "return_dict": true,
 67 |     "return_dict_in_generate": false,
 68 |     "sep_token_id": null,
 69 |     "task_specific_params": null,
 70 |     "temperature": 1.0,
 71 |     "tie_encoder_decoder": false,
 72 |     "tie_word_embeddings": true,
 73 |     "tokenizer_class": null,
 74 |     "top_k": 50,
 75 |     "top_p": 1.0,
 76 |     "torch_dtype": null,
 77 |     "torchscript": false,
 78 |     "transformers_version": "4.16.0.dev0",
 79 |     "use_bfloat16": false,
 80 |     "vocab_size": 49408
 81 |   },
 82 |   "text_config_dict": {
 83 |     "hidden_size": 768,
 84 |     "intermediate_size": 3072,
 85 |     "num_attention_heads": 12,
 86 |     "num_hidden_layers": 12,
 87 |     "projection_dim": 768
 88 |   },
 89 |   "torch_dtype": "float32",
 90 |   "transformers_version": null,
 91 |   "vision_config": {
 92 |     "_name_or_path": "",
 93 |     "add_cross_attention": false,
 94 |     "architectures": null,
 95 |     "attention_dropout": 0.0,
 96 |     "bad_words_ids": null,
 97 |     "bos_token_id": null,
 98 |     "chunk_size_feed_forward": 0,
 99 |     "cross_attention_hidden_size": null,
100 |     "decoder_start_token_id": null,
101 |     "diversity_penalty": 0.0,
102 |     "do_sample": false,
103 |     "dropout": 0.0,
104 |     "early_stopping": false,
105 |     "encoder_no_repeat_ngram_size": 0,
106 |     "eos_token_id": null,
107 |     "finetuning_task": null,
108 |     "forced_bos_token_id": null,
109 |     "forced_eos_token_id": null,
110 |     "hidden_act": "quick_gelu",
111 |     "hidden_size": 1024,
112 |     "id2label": {
113 |       "0": "LABEL_0",
114 |       "1": "LABEL_1"
115 |     },
116 |     "image_size": 224,
117 |     "initializer_factor": 1.0,
118 |     "initializer_range": 0.02,
119 |     "intermediate_size": 4096,
120 |     "is_decoder": false,
121 |     "is_encoder_decoder": false,
122 |     "label2id": {
123 |       "LABEL_0": 0,
124 |       "LABEL_1": 1
125 |     },
126 |     "layer_norm_eps": 1e-05,
127 |     "length_penalty": 1.0,
128 |     "max_length": 20,
129 |     "min_length": 0,
130 |     "model_type": "clip_vision_model",
131 |     "no_repeat_ngram_size": 0,
132 |     "num_attention_heads": 16,
133 |     "num_beam_groups": 1,
134 |     "num_beams": 1,
135 |     "num_hidden_layers": 24,
136 |     "num_return_sequences": 1,
137 |     "output_attentions": false,
138 |     "output_hidden_states": false,
139 |     "output_scores": false,
140 |     "pad_token_id": null,
141 |     "patch_size": 14,
142 |     "prefix": null,
143 |     "problem_type": null,
144 |     "projection_dim" : 768,
145 |     "pruned_heads": {},
146 |     "remove_invalid_values": false,
147 |     "repetition_penalty": 1.0,
148 |     "return_dict": true,
149 |     "return_dict_in_generate": false,
150 |     "sep_token_id": null,
151 |     "task_specific_params": null,
152 |     "temperature": 1.0,
153 |     "tie_encoder_decoder": false,
154 |     "tie_word_embeddings": true,
155 |     "tokenizer_class": null,
156 |     "top_k": 50,
157 |     "top_p": 1.0,
158 |     "torch_dtype": null,
159 |     "torchscript": false,
160 |     "transformers_version": "4.16.0.dev0",
161 |     "use_bfloat16": false
162 |   },
163 |   "vision_config_dict": {
164 |     "hidden_size": 1024,
165 |     "intermediate_size": 4096,
166 |     "num_attention_heads": 16,
167 |     "num_hidden_layers": 24,
168 |     "patch_size": 14,
169 |     "projection_dim": 768
170 |   }
171 | }
172 | 


--------------------------------------------------------------------------------
/configs/tokenizer/preprocessor_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "crop_size": 224,
 3 |   "do_center_crop": true,
 4 |   "do_normalize": true,
 5 |   "do_resize": true,
 6 |   "feature_extractor_type": "CLIPFeatureExtractor",
 7 |   "image_mean": [
 8 |     0.48145466,
 9 |     0.4578275,
10 |     0.40821073
11 |   ],
12 |   "image_std": [
13 |     0.26862954,
14 |     0.26130258,
15 |     0.27577711
16 |   ],
17 |   "resample": 3,
18 |   "size": 224
19 | }
20 | 


--------------------------------------------------------------------------------
/configs/tokenizer/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {"bos_token": {"content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "<|endoftext|>"}


--------------------------------------------------------------------------------
/configs/tokenizer/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "unk_token": {
 3 |         "content": "<|endoftext|>",
 4 |         "single_word": false,
 5 |         "lstrip": false,
 6 |         "rstrip": false,
 7 |         "normalized": true,
 8 |         "__type": "AddedToken"
 9 |     },
10 |     "bos_token": {
11 |         "content": "<|startoftext|>",
12 |         "single_word": false,
13 |         "lstrip": false,
14 |         "rstrip": false,
15 |         "normalized": true,
16 |         "__type": "AddedToken"
17 |     },
18 |     "eos_token": {
19 |         "content": "<|endoftext|>",
20 |         "single_word": false,
21 |         "lstrip": false,
22 |         "rstrip": false,
23 |         "normalized": true,
24 |         "__type": "AddedToken"
25 |     },
26 |     "pad_token": "<|endoftext|>",
27 |     "add_prefix_space": false,
28 |     "errors": "replace",
29 |     "do_lower_case": true,
30 |     "name_or_path": "openai/clip-vit-base-patch32",
31 |     "model_max_length": 77,
32 |     "special_tokens_map_file": "./special_tokens_map.json",
33 |     "tokenizer_class": "CLIPTokenizer"
34 | }
35 | 


--------------------------------------------------------------------------------
/configs/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "add_prefix_space": false,
 3 |   "bos_token": {
 4 |     "__type": "AddedToken",
 5 |     "content": "<|startoftext|>",
 6 |     "lstrip": false,
 7 |     "normalized": true,
 8 |     "rstrip": false,
 9 |     "single_word": false
10 |   },
11 |   "do_lower_case": true,
12 |   "eos_token": {
13 |     "__type": "AddedToken",
14 |     "content": "<|endoftext|>",
15 |     "lstrip": false,
16 |     "normalized": true,
17 |     "rstrip": false,
18 |     "single_word": false
19 |   },
20 |   "errors": "replace",
21 |   "model_max_length": 77,
22 |   "name_or_path": "openai/clip-vit-large-patch14",
23 |   "pad_token": "<|endoftext|>",
24 |   "special_tokens_map_file": "./special_tokens_map.json",
25 |   "tokenizer_class": "CLIPTokenizer",
26 |   "unk_token": {
27 |     "__type": "AddedToken",
28 |     "content": "<|endoftext|>",
29 |     "lstrip": false,
30 |     "normalized": true,
31 |     "rstrip": false,
32 |     "single_word": false
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/configs/v1-inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-04
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "jpg"
11 |     cond_stage_key: "txt"
12 |     image_size: 64
13 |     channels: 4
14 |     cond_stage_trainable: false   # Note: different from the one we trained before
15 |     conditioning_key: crossattn
16 |     monitor: val/loss_simple_ema
17 |     scale_factor: 0.18215
18 |     use_ema: False
19 | 
20 |     scheduler_config: # 10000 warmup steps
21 |       target: ldm.lr_scheduler.LambdaLinearScheduler
22 |       params:
23 |         warm_up_steps: [ 10000 ]
24 |         cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25 |         f_start: [ 1.e-6 ]
26 |         f_max: [ 1. ]
27 |         f_min: [ 1. ]
28 | 
29 |     unet_config:
30 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31 |       params:
32 |         image_size: 32 # unused
33 |         in_channels: 4
34 |         out_channels: 4
35 |         model_channels: 320
36 |         attention_resolutions: [ 4, 2, 1 ]
37 |         num_res_blocks: 2
38 |         channel_mult: [ 1, 2, 4, 4 ]
39 |         num_heads: 8
40 |         use_spatial_transformer: True
41 |         transformer_depth: 1
42 |         context_dim: 768
43 |         use_checkpoint: True
44 |         legacy: False
45 | 
46 |     first_stage_config:
47 |       target: ldm.models.autoencoder.AutoencoderKL
48 |       params:
49 |         embed_dim: 4
50 |         monitor: val/rec_loss
51 |         ddconfig:
52 |           double_z: true
53 |           z_channels: 4
54 |           resolution: 256
55 |           in_channels: 3
56 |           out_ch: 3
57 |           ch: 128
58 |           ch_mult:
59 |           - 1
60 |           - 2
61 |           - 4
62 |           - 4
63 |           num_res_blocks: 2
64 |           attn_resolutions: []
65 |           dropout: 0.0
66 |         lossconfig:
67 |           target: torch.nn.Identity
68 | 
69 |     cond_stage_config:
70 |       target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
71 | 


--------------------------------------------------------------------------------
/examples/IC-Light_example_spotlight_01.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "last_node_id": 36,
  3 |   "last_link_id": 63,
  4 |   "nodes": [
  5 |     {
  6 |       "id": 5,
  7 |       "type": "diffusers_model_loader",
  8 |       "pos": [
  9 |         996,
 10 |         292
 11 |       ],
 12 |       "size": {
 13 |         "0": 267,
 14 |         "1": 66
 15 |       },
 16 |       "flags": {},
 17 |       "order": 3,
 18 |       "mode": 0,
 19 |       "inputs": [
 20 |         {
 21 |           "name": "model",
 22 |           "type": "MODEL",
 23 |           "link": 47,
 24 |           "slot_index": 0
 25 |         },
 26 |         {
 27 |           "name": "clip",
 28 |           "type": "CLIP",
 29 |           "link": 2
 30 |         },
 31 |         {
 32 |           "name": "vae",
 33 |           "type": "VAE",
 34 |           "link": 3
 35 |         }
 36 |       ],
 37 |       "outputs": [
 38 |         {
 39 |           "name": "diffusers_model",
 40 |           "type": "DIFFUSERSMODEL",
 41 |           "links": [
 42 |             26
 43 |           ],
 44 |           "shape": 3,
 45 |           "slot_index": 0
 46 |         }
 47 |       ],
 48 |       "properties": {
 49 |         "Node name for S&R": "diffusers_model_loader"
 50 |       }
 51 |     },
 52 |     {
 53 |       "id": 22,
 54 |       "type": "LoadICLightUnetDiffusers",
 55 |       "pos": [
 56 |         1343,
 57 |         301
 58 |       ],
 59 |       "size": {
 60 |         "0": 447,
 61 |         "1": 58
 62 |       },
 63 |       "flags": {},
 64 |       "order": 6,
 65 |       "mode": 0,
 66 |       "inputs": [
 67 |         {
 68 |           "name": "diffusersmodel",
 69 |           "type": "DIFFUSERSMODEL",
 70 |           "link": 26
 71 |         }
 72 |       ],
 73 |       "outputs": [
 74 |         {
 75 |           "name": "DIFFUSERSMODEL",
 76 |           "type": "DIFFUSERSMODEL",
 77 |           "links": [
 78 |             58
 79 |           ],
 80 |           "shape": 3,
 81 |           "slot_index": 0
 82 |         }
 83 |       ],
 84 |       "properties": {
 85 |         "Node name for S&R": "LoadICLightUnetDiffusers"
 86 |       },
 87 |       "widgets_values": [
 88 |         "iclight_sd15_fc.safetensors"
 89 |       ]
 90 |     },
 91 |     {
 92 |       "id": 8,
 93 |       "type": "VAEEncode",
 94 |       "pos": [
 95 |         1022,
 96 |         469
 97 |       ],
 98 |       "size": {
 99 |         "0": 210,
100 |         "1": 46
101 |       },
102 |       "flags": {},
103 |       "order": 4,
104 |       "mode": 0,
105 |       "inputs": [
106 |         {
107 |           "name": "pixels",
108 |           "type": "IMAGE",
109 |           "link": 41,
110 |           "slot_index": 0
111 |         },
112 |         {
113 |           "name": "vae",
114 |           "type": "VAE",
115 |           "link": 8
116 |         }
117 |       ],
118 |       "outputs": [
119 |         {
120 |           "name": "LATENT",
121 |           "type": "LATENT",
122 |           "links": [
123 |             59
124 |           ],
125 |           "shape": 3,
126 |           "slot_index": 0
127 |         }
128 |       ],
129 |       "properties": {
130 |         "Node name for S&R": "VAEEncode"
131 |       }
132 |     },
133 |     {
134 |       "id": 25,
135 |       "type": "VAEEncode",
136 |       "pos": [
137 |         1055,
138 |         649
139 |       ],
140 |       "size": {
141 |         "0": 210,
142 |         "1": 46
143 |       },
144 |       "flags": {},
145 |       "order": 10,
146 |       "mode": 0,
147 |       "inputs": [
148 |         {
149 |           "name": "pixels",
150 |           "type": "IMAGE",
151 |           "link": 56,
152 |           "slot_index": 0
153 |         },
154 |         {
155 |           "name": "vae",
156 |           "type": "VAE",
157 |           "link": 32,
158 |           "slot_index": 1
159 |         }
160 |       ],
161 |       "outputs": [
162 |         {
163 |           "name": "LATENT",
164 |           "type": "LATENT",
165 |           "links": [
166 |             60
167 |           ],
168 |           "shape": 3,
169 |           "slot_index": 0
170 |         }
171 |       ],
172 |       "properties": {
173 |         "Node name for S&R": "VAEEncode"
174 |       }
175 |     },
176 |     {
177 |       "id": 34,
178 |       "type": "GrowMaskWithBlur",
179 |       "pos": [
180 |         620,
181 |         1030
182 |       ],
183 |       "size": {
184 |         "0": 315,
185 |         "1": 246
186 |       },
187 |       "flags": {},
188 |       "order": 7,
189 |       "mode": 0,
190 |       "inputs": [
191 |         {
192 |           "name": "mask",
193 |           "type": "MASK",
194 |           "link": 52
195 |         }
196 |       ],
197 |       "outputs": [
198 |         {
199 |           "name": "mask",
200 |           "type": "MASK",
201 |           "links": [
202 |             53
203 |           ],
204 |           "shape": 3,
205 |           "slot_index": 0
206 |         },
207 |         {
208 |           "name": "mask_inverted",
209 |           "type": "MASK",
210 |           "links": null,
211 |           "shape": 3
212 |         }
213 |       ],
214 |       "properties": {
215 |         "Node name for S&R": "GrowMaskWithBlur"
216 |       },
217 |       "widgets_values": [
218 |         0,
219 |         0,
220 |         true,
221 |         false,
222 |         59.400000000000006,
223 |         1,
224 |         1,
225 |         false
226 |       ]
227 |     },
228 |     {
229 |       "id": 9,
230 |       "type": "LoadImage",
231 |       "pos": [
232 |         104,
233 |         474
234 |       ],
235 |       "size": {
236 |         "0": 315,
237 |         "1": 314
238 |       },
239 |       "flags": {},
240 |       "order": 0,
241 |       "mode": 0,
242 |       "outputs": [
243 |         {
244 |           "name": "IMAGE",
245 |           "type": "IMAGE",
246 |           "links": [
247 |             40
248 |           ],
249 |           "shape": 3,
250 |           "slot_index": 0
251 |         },
252 |         {
253 |           "name": "MASK",
254 |           "type": "MASK",
255 |           "links": null,
256 |           "shape": 3
257 |         }
258 |       ],
259 |       "properties": {
260 |         "Node name for S&R": "LoadImage"
261 |       },
262 |       "widgets_values": [
263 |         "oldman.jpg",
264 |         "image"
265 |       ]
266 |     },
267 |     {
268 |       "id": 28,
269 |       "type": "ImageResize+",
270 |       "pos": [
271 |         482,
272 |         479
273 |       ],
274 |       "size": {
275 |         "0": 315,
276 |         "1": 218
277 |       },
278 |       "flags": {},
279 |       "order": 2,
280 |       "mode": 0,
281 |       "inputs": [
282 |         {
283 |           "name": "image",
284 |           "type": "IMAGE",
285 |           "link": 40
286 |         }
287 |       ],
288 |       "outputs": [
289 |         {
290 |           "name": "IMAGE",
291 |           "type": "IMAGE",
292 |           "links": [
293 |             41
294 |           ],
295 |           "shape": 3,
296 |           "slot_index": 0
297 |         },
298 |         {
299 |           "name": "width",
300 |           "type": "INT",
301 |           "links": [
302 |             54
303 |           ],
304 |           "shape": 3
305 |         },
306 |         {
307 |           "name": "height",
308 |           "type": "INT",
309 |           "links": [
310 |             55
311 |           ],
312 |           "shape": 3
313 |         }
314 |       ],
315 |       "properties": {
316 |         "Node name for S&R": "ImageResize+"
317 |       },
318 |       "widgets_values": [
319 |         512,
320 |         512,
321 |         "nearest",
322 |         false,
323 |         "always",
324 |         8
325 |       ]
326 |     },
327 |     {
328 |       "id": 30,
329 |       "type": "MaskToImage",
330 |       "pos": [
331 |         663,
332 |         806
333 |       ],
334 |       "size": {
335 |         "0": 210,
336 |         "1": 26
337 |       },
338 |       "flags": {},
339 |       "order": 9,
340 |       "mode": 0,
341 |       "inputs": [
342 |         {
343 |           "name": "mask",
344 |           "type": "MASK",
345 |           "link": 50
346 |         }
347 |       ],
348 |       "outputs": [
349 |         {
350 |           "name": "IMAGE",
351 |           "type": "IMAGE",
352 |           "links": [
353 |             56,
354 |             57
355 |           ],
356 |           "shape": 3,
357 |           "slot_index": 0
358 |         }
359 |       ],
360 |       "properties": {
361 |         "Node name for S&R": "MaskToImage"
362 |       }
363 |     },
364 |     {
365 |       "id": 29,
366 |       "type": "CreateShapeMask",
367 |       "pos": [
368 |         262,
369 |         912
370 |       ],
371 |       "size": {
372 |         "0": 315,
373 |         "1": 270
374 |       },
375 |       "flags": {},
376 |       "order": 5,
377 |       "mode": 0,
378 |       "inputs": [
379 |         {
380 |           "name": "frame_width",
381 |           "type": "INT",
382 |           "link": 54,
383 |           "widget": {
384 |             "name": "frame_width"
385 |           },
386 |           "slot_index": 0
387 |         },
388 |         {
389 |           "name": "frame_height",
390 |           "type": "INT",
391 |           "link": 55,
392 |           "widget": {
393 |             "name": "frame_height"
394 |           },
395 |           "slot_index": 1
396 |         }
397 |       ],
398 |       "outputs": [
399 |         {
400 |           "name": "mask",
401 |           "type": "MASK",
402 |           "links": [
403 |             52
404 |           ],
405 |           "shape": 3,
406 |           "slot_index": 0
407 |         },
408 |         {
409 |           "name": "mask_inverted",
410 |           "type": "MASK",
411 |           "links": null,
412 |           "shape": 3
413 |         }
414 |       ],
415 |       "properties": {
416 |         "Node name for S&R": "CreateShapeMask"
417 |       },
418 |       "widgets_values": [
419 |         "circle",
420 |         1,
421 |         256,
422 |         256,
423 |         0,
424 |         512,
425 |         512,
426 |         256,
427 |         256
428 |       ]
429 |     },
430 |     {
431 |       "id": 32,
432 |       "type": "RemapMaskRange",
433 |       "pos": [
434 |         610,
435 |         890
436 |       ],
437 |       "size": {
438 |         "0": 315,
439 |         "1": 82
440 |       },
441 |       "flags": {},
442 |       "order": 8,
443 |       "mode": 0,
444 |       "inputs": [
445 |         {
446 |           "name": "mask",
447 |           "type": "MASK",
448 |           "link": 53
449 |         }
450 |       ],
451 |       "outputs": [
452 |         {
453 |           "name": "mask",
454 |           "type": "MASK",
455 |           "links": [
456 |             50
457 |           ],
458 |           "shape": 3,
459 |           "slot_index": 0
460 |         }
461 |       ],
462 |       "properties": {
463 |         "Node name for S&R": "RemapMaskRange"
464 |       },
465 |       "widgets_values": [
466 |         0,
467 |         0.9
468 |       ]
469 |     },
470 |     {
471 |       "id": 33,
472 |       "type": "PreviewImage",
473 |       "pos": [
474 |         989,
475 |         893
476 |       ],
477 |       "size": {
478 |         "0": 342,
479 |         "1": 347
480 |       },
481 |       "flags": {},
482 |       "order": 11,
483 |       "mode": 0,
484 |       "inputs": [
485 |         {
486 |           "name": "images",
487 |           "type": "IMAGE",
488 |           "link": 57
489 |         }
490 |       ],
491 |       "properties": {
492 |         "Node name for S&R": "PreviewImage"
493 |       }
494 |     },
495 |     {
496 |       "id": 10,
497 |       "type": "PreviewImage",
498 |       "pos": [
499 |         2076,
500 |         347
501 |       ],
502 |       "size": {
503 |         "0": 526.656982421875,
504 |         "1": 580.8809814453125
505 |       },
506 |       "flags": {},
507 |       "order": 14,
508 |       "mode": 0,
509 |       "inputs": [
510 |         {
511 |           "name": "images",
512 |           "type": "IMAGE",
513 |           "link": 63
514 |         }
515 |       ],
516 |       "properties": {
517 |         "Node name for S&R": "PreviewImage"
518 |       }
519 |     },
520 |     {
521 |       "id": 35,
522 |       "type": "iclight_diffusers_sampler",
523 |       "pos": [
524 |         1372,
525 |         434
526 |       ],
527 |       "size": {
528 |         "0": 400,
529 |         "1": 406
530 |       },
531 |       "flags": {},
532 |       "order": 12,
533 |       "mode": 0,
534 |       "inputs": [
535 |         {
536 |           "name": "diffusers_model",
537 |           "type": "DIFFUSERSMODEL",
538 |           "link": 58
539 |         },
540 |         {
541 |           "name": "latent",
542 |           "type": "LATENT",
543 |           "link": 59
544 |         },
545 |         {
546 |           "name": "bg_latent",
547 |           "type": "LATENT",
548 |           "link": 60
549 |         }
550 |       ],
551 |       "outputs": [
552 |         {
553 |           "name": "samples",
554 |           "type": "LATENT",
555 |           "links": [
556 |             61
557 |           ],
558 |           "shape": 3,
559 |           "slot_index": 0
560 |         }
561 |       ],
562 |       "properties": {
563 |         "Node name for S&R": "iclight_diffusers_sampler"
564 |       },
565 |       "widgets_values": [
566 |         512,
567 |         512,
568 |         25,
569 |         2,
570 |         0.9,
571 |         819223299872531,
572 |         "randomize",
573 |         "DPMSolverMultistepScheduler",
574 |         "positive",
575 |         "negative",
576 |         false,
577 |         true
578 |       ]
579 |     },
580 |     {
581 |       "id": 36,
582 |       "type": "VAEDecode",
583 |       "pos": [
584 |         1817,
585 |         427
586 |       ],
587 |       "size": {
588 |         "0": 210,
589 |         "1": 46
590 |       },
591 |       "flags": {},
592 |       "order": 13,
593 |       "mode": 0,
594 |       "inputs": [
595 |         {
596 |           "name": "samples",
597 |           "type": "LATENT",
598 |           "link": 61
599 |         },
600 |         {
601 |           "name": "vae",
602 |           "type": "VAE",
603 |           "link": 62,
604 |           "slot_index": 1
605 |         }
606 |       ],
607 |       "outputs": [
608 |         {
609 |           "name": "IMAGE",
610 |           "type": "IMAGE",
611 |           "links": [
612 |             63
613 |           ],
614 |           "shape": 3,
615 |           "slot_index": 0
616 |         }
617 |       ],
618 |       "properties": {
619 |         "Node name for S&R": "VAEDecode"
620 |       }
621 |     },
622 |     {
623 |       "id": 6,
624 |       "type": "CheckpointLoaderSimple",
625 |       "pos": [
626 |         563,
627 |         285
628 |       ],
629 |       "size": {
630 |         "0": 315,
631 |         "1": 98
632 |       },
633 |       "flags": {},
634 |       "order": 1,
635 |       "mode": 0,
636 |       "outputs": [
637 |         {
638 |           "name": "MODEL",
639 |           "type": "MODEL",
640 |           "links": [
641 |             47
642 |           ],
643 |           "shape": 3,
644 |           "slot_index": 0
645 |         },
646 |         {
647 |           "name": "CLIP",
648 |           "type": "CLIP",
649 |           "links": [
650 |             2
651 |           ],
652 |           "shape": 3,
653 |           "slot_index": 1
654 |         },
655 |         {
656 |           "name": "VAE",
657 |           "type": "VAE",
658 |           "links": [
659 |             3,
660 |             8,
661 |             32,
662 |             62
663 |           ],
664 |           "shape": 3,
665 |           "slot_index": 2
666 |         }
667 |       ],
668 |       "properties": {
669 |         "Node name for S&R": "CheckpointLoaderSimple"
670 |       },
671 |       "widgets_values": [
672 |         "1_5\\photon_v1.safetensors"
673 |       ]
674 |     }
675 |   ],
676 |   "links": [
677 |     [
678 |       2,
679 |       6,
680 |       1,
681 |       5,
682 |       1,
683 |       "CLIP"
684 |     ],
685 |     [
686 |       3,
687 |       6,
688 |       2,
689 |       5,
690 |       2,
691 |       "VAE"
692 |     ],
693 |     [
694 |       8,
695 |       6,
696 |       2,
697 |       8,
698 |       1,
699 |       "VAE"
700 |     ],
701 |     [
702 |       26,
703 |       5,
704 |       0,
705 |       22,
706 |       0,
707 |       "DIFFUSERSMODEL"
708 |     ],
709 |     [
710 |       32,
711 |       6,
712 |       2,
713 |       25,
714 |       1,
715 |       "VAE"
716 |     ],
717 |     [
718 |       40,
719 |       9,
720 |       0,
721 |       28,
722 |       0,
723 |       "IMAGE"
724 |     ],
725 |     [
726 |       41,
727 |       28,
728 |       0,
729 |       8,
730 |       0,
731 |       "IMAGE"
732 |     ],
733 |     [
734 |       47,
735 |       6,
736 |       0,
737 |       5,
738 |       0,
739 |       "MODEL"
740 |     ],
741 |     [
742 |       50,
743 |       32,
744 |       0,
745 |       30,
746 |       0,
747 |       "MASK"
748 |     ],
749 |     [
750 |       52,
751 |       29,
752 |       0,
753 |       34,
754 |       0,
755 |       "MASK"
756 |     ],
757 |     [
758 |       53,
759 |       34,
760 |       0,
761 |       32,
762 |       0,
763 |       "MASK"
764 |     ],
765 |     [
766 |       54,
767 |       28,
768 |       1,
769 |       29,
770 |       0,
771 |       "INT"
772 |     ],
773 |     [
774 |       55,
775 |       28,
776 |       2,
777 |       29,
778 |       1,
779 |       "INT"
780 |     ],
781 |     [
782 |       56,
783 |       30,
784 |       0,
785 |       25,
786 |       0,
787 |       "IMAGE"
788 |     ],
789 |     [
790 |       57,
791 |       30,
792 |       0,
793 |       33,
794 |       0,
795 |       "IMAGE"
796 |     ],
797 |     [
798 |       58,
799 |       22,
800 |       0,
801 |       35,
802 |       0,
803 |       "DIFFUSERSMODEL"
804 |     ],
805 |     [
806 |       59,
807 |       8,
808 |       0,
809 |       35,
810 |       1,
811 |       "LATENT"
812 |     ],
813 |     [
814 |       60,
815 |       25,
816 |       0,
817 |       35,
818 |       2,
819 |       "LATENT"
820 |     ],
821 |     [
822 |       61,
823 |       35,
824 |       0,
825 |       36,
826 |       0,
827 |       "LATENT"
828 |     ],
829 |     [
830 |       62,
831 |       6,
832 |       2,
833 |       36,
834 |       1,
835 |       "VAE"
836 |     ],
837 |     [
838 |       63,
839 |       36,
840 |       0,
841 |       10,
842 |       0,
843 |       "IMAGE"
844 |     ]
845 |   ],
846 |   "groups": [],
847 |   "config": {},
848 |   "extra": {},
849 |   "version": 0.4
850 | }


--------------------------------------------------------------------------------
/examples/iclight_spotlight_batch_example.json:
--------------------------------------------------------------------------------
   1 | {
   2 |   "last_node_id": 43,
   3 |   "last_link_id": 76,
   4 |   "nodes": [
   5 |     {
   6 |       "id": 5,
   7 |       "type": "diffusers_model_loader",
   8 |       "pos": [
   9 |         996,
  10 |         292
  11 |       ],
  12 |       "size": {
  13 |         "0": 267,
  14 |         "1": 66
  15 |       },
  16 |       "flags": {},
  17 |       "order": 4,
  18 |       "mode": 0,
  19 |       "inputs": [
  20 |         {
  21 |           "name": "model",
  22 |           "type": "MODEL",
  23 |           "link": 47,
  24 |           "slot_index": 0
  25 |         },
  26 |         {
  27 |           "name": "clip",
  28 |           "type": "CLIP",
  29 |           "link": 2
  30 |         },
  31 |         {
  32 |           "name": "vae",
  33 |           "type": "VAE",
  34 |           "link": 3
  35 |         }
  36 |       ],
  37 |       "outputs": [
  38 |         {
  39 |           "name": "diffusers_model",
  40 |           "type": "DIFFUSERSMODEL",
  41 |           "links": [
  42 |             26
  43 |           ],
  44 |           "shape": 3,
  45 |           "slot_index": 0
  46 |         }
  47 |       ],
  48 |       "properties": {
  49 |         "Node name for S&R": "diffusers_model_loader"
  50 |       }
  51 |     },
  52 |     {
  53 |       "id": 22,
  54 |       "type": "LoadICLightUnetDiffusers",
  55 |       "pos": [
  56 |         1343,
  57 |         301
  58 |       ],
  59 |       "size": {
  60 |         "0": 447,
  61 |         "1": 58
  62 |       },
  63 |       "flags": {},
  64 |       "order": 7,
  65 |       "mode": 0,
  66 |       "inputs": [
  67 |         {
  68 |           "name": "diffusersmodel",
  69 |           "type": "DIFFUSERSMODEL",
  70 |           "link": 26
  71 |         }
  72 |       ],
  73 |       "outputs": [
  74 |         {
  75 |           "name": "DIFFUSERSMODEL",
  76 |           "type": "DIFFUSERSMODEL",
  77 |           "links": [
  78 |             70
  79 |           ],
  80 |           "shape": 3,
  81 |           "slot_index": 0
  82 |         }
  83 |       ],
  84 |       "properties": {
  85 |         "Node name for S&R": "LoadICLightUnetDiffusers"
  86 |       },
  87 |       "widgets_values": [
  88 |         "iclight_sd15_fc.safetensors"
  89 |       ]
  90 |     },
  91 |     {
  92 |       "id": 8,
  93 |       "type": "VAEEncode",
  94 |       "pos": [
  95 |         1022,
  96 |         469
  97 |       ],
  98 |       "size": {
  99 |         "0": 210,
 100 |         "1": 46
 101 |       },
 102 |       "flags": {},
 103 |       "order": 9,
 104 |       "mode": 0,
 105 |       "inputs": [
 106 |         {
 107 |           "name": "pixels",
 108 |           "type": "IMAGE",
 109 |           "link": 59,
 110 |           "slot_index": 0
 111 |         },
 112 |         {
 113 |           "name": "vae",
 114 |           "type": "VAE",
 115 |           "link": 8
 116 |         }
 117 |       ],
 118 |       "outputs": [
 119 |         {
 120 |           "name": "LATENT",
 121 |           "type": "LATENT",
 122 |           "links": [
 123 |             71
 124 |           ],
 125 |           "shape": 3,
 126 |           "slot_index": 0
 127 |         }
 128 |       ],
 129 |       "properties": {
 130 |         "Node name for S&R": "VAEEncode"
 131 |       }
 132 |     },
 133 |     {
 134 |       "id": 9,
 135 |       "type": "LoadImage",
 136 |       "pos": [
 137 |         -80,
 138 |         470
 139 |       ],
 140 |       "size": {
 141 |         "0": 315,
 142 |         "1": 314
 143 |       },
 144 |       "flags": {},
 145 |       "order": 0,
 146 |       "mode": 0,
 147 |       "outputs": [
 148 |         {
 149 |           "name": "IMAGE",
 150 |           "type": "IMAGE",
 151 |           "links": [
 152 |             40
 153 |           ],
 154 |           "shape": 3,
 155 |           "slot_index": 0
 156 |         },
 157 |         {
 158 |           "name": "MASK",
 159 |           "type": "MASK",
 160 |           "links": null,
 161 |           "shape": 3
 162 |         }
 163 |       ],
 164 |       "properties": {
 165 |         "Node name for S&R": "LoadImage"
 166 |       },
 167 |       "widgets_values": [
 168 |         "oldman.jpg",
 169 |         "image"
 170 |       ]
 171 |     },
 172 |     {
 173 |       "id": 28,
 174 |       "type": "ImageResize+",
 175 |       "pos": [
 176 |         290,
 177 |         480
 178 |       ],
 179 |       "size": {
 180 |         "0": 315,
 181 |         "1": 218
 182 |       },
 183 |       "flags": {},
 184 |       "order": 3,
 185 |       "mode": 0,
 186 |       "inputs": [
 187 |         {
 188 |           "name": "image",
 189 |           "type": "IMAGE",
 190 |           "link": 40
 191 |         }
 192 |       ],
 193 |       "outputs": [
 194 |         {
 195 |           "name": "IMAGE",
 196 |           "type": "IMAGE",
 197 |           "links": [
 198 |             58
 199 |           ],
 200 |           "shape": 3,
 201 |           "slot_index": 0
 202 |         },
 203 |         {
 204 |           "name": "width",
 205 |           "type": "INT",
 206 |           "links": [],
 207 |           "shape": 3
 208 |         },
 209 |         {
 210 |           "name": "height",
 211 |           "type": "INT",
 212 |           "links": [],
 213 |           "shape": 3
 214 |         }
 215 |       ],
 216 |       "properties": {
 217 |         "Node name for S&R": "ImageResize+"
 218 |       },
 219 |       "widgets_values": [
 220 |         512,
 221 |         512,
 222 |         "nearest",
 223 |         false,
 224 |         "always",
 225 |         8
 226 |       ]
 227 |     },
 228 |     {
 229 |       "id": 6,
 230 |       "type": "CheckpointLoaderSimple",
 231 |       "pos": [
 232 |         563,
 233 |         285
 234 |       ],
 235 |       "size": {
 236 |         "0": 315,
 237 |         "1": 98
 238 |       },
 239 |       "flags": {},
 240 |       "order": 1,
 241 |       "mode": 0,
 242 |       "outputs": [
 243 |         {
 244 |           "name": "MODEL",
 245 |           "type": "MODEL",
 246 |           "links": [
 247 |             47
 248 |           ],
 249 |           "shape": 3,
 250 |           "slot_index": 0
 251 |         },
 252 |         {
 253 |           "name": "CLIP",
 254 |           "type": "CLIP",
 255 |           "links": [
 256 |             2
 257 |           ],
 258 |           "shape": 3,
 259 |           "slot_index": 1
 260 |         },
 261 |         {
 262 |           "name": "VAE",
 263 |           "type": "VAE",
 264 |           "links": [
 265 |             3,
 266 |             8,
 267 |             32,
 268 |             74
 269 |           ],
 270 |           "shape": 3,
 271 |           "slot_index": 2
 272 |         }
 273 |       ],
 274 |       "properties": {
 275 |         "Node name for S&R": "CheckpointLoaderSimple"
 276 |       },
 277 |       "widgets_values": [
 278 |         "1_5\\photon_v1.safetensors"
 279 |       ]
 280 |     },
 281 |     {
 282 |       "id": 25,
 283 |       "type": "VAEEncode",
 284 |       "pos": [
 285 |         1028,
 286 |         597
 287 |       ],
 288 |       "size": {
 289 |         "0": 210,
 290 |         "1": 46
 291 |       },
 292 |       "flags": {},
 293 |       "order": 12,
 294 |       "mode": 0,
 295 |       "inputs": [
 296 |         {
 297 |           "name": "pixels",
 298 |           "type": "IMAGE",
 299 |           "link": 56,
 300 |           "slot_index": 0
 301 |         },
 302 |         {
 303 |           "name": "vae",
 304 |           "type": "VAE",
 305 |           "link": 32,
 306 |           "slot_index": 1
 307 |         }
 308 |       ],
 309 |       "outputs": [
 310 |         {
 311 |           "name": "LATENT",
 312 |           "type": "LATENT",
 313 |           "links": [
 314 |             72
 315 |           ],
 316 |           "shape": 3,
 317 |           "slot_index": 0
 318 |         }
 319 |       ],
 320 |       "properties": {
 321 |         "Node name for S&R": "VAEEncode"
 322 |       }
 323 |     },
 324 |     {
 325 |       "id": 32,
 326 |       "type": "RemapMaskRange",
 327 |       "pos": [
 328 |         610,
 329 |         890
 330 |       ],
 331 |       "size": {
 332 |         "0": 315,
 333 |         "1": 82
 334 |       },
 335 |       "flags": {},
 336 |       "order": 10,
 337 |       "mode": 0,
 338 |       "inputs": [
 339 |         {
 340 |           "name": "mask",
 341 |           "type": "MASK",
 342 |           "link": 53
 343 |         }
 344 |       ],
 345 |       "outputs": [
 346 |         {
 347 |           "name": "mask",
 348 |           "type": "MASK",
 349 |           "links": [
 350 |             50
 351 |           ],
 352 |           "shape": 3,
 353 |           "slot_index": 0
 354 |         }
 355 |       ],
 356 |       "properties": {
 357 |         "Node name for S&R": "RemapMaskRange"
 358 |       },
 359 |       "widgets_values": [
 360 |         0,
 361 |         1
 362 |       ]
 363 |     },
 364 |     {
 365 |       "id": 34,
 366 |       "type": "GrowMaskWithBlur",
 367 |       "pos": [
 368 |         620,
 369 |         1030
 370 |       ],
 371 |       "size": {
 372 |         "0": 315,
 373 |         "1": 246
 374 |       },
 375 |       "flags": {},
 376 |       "order": 8,
 377 |       "mode": 0,
 378 |       "inputs": [
 379 |         {
 380 |           "name": "mask",
 381 |           "type": "MASK",
 382 |           "link": 62
 383 |         }
 384 |       ],
 385 |       "outputs": [
 386 |         {
 387 |           "name": "mask",
 388 |           "type": "MASK",
 389 |           "links": [
 390 |             53
 391 |           ],
 392 |           "shape": 3,
 393 |           "slot_index": 0
 394 |         },
 395 |         {
 396 |           "name": "mask_inverted",
 397 |           "type": "MASK",
 398 |           "links": null,
 399 |           "shape": 3
 400 |         }
 401 |       ],
 402 |       "properties": {
 403 |         "Node name for S&R": "GrowMaskWithBlur"
 404 |       },
 405 |       "widgets_values": [
 406 |         0,
 407 |         0,
 408 |         true,
 409 |         false,
 410 |         48.6,
 411 |         1,
 412 |         1,
 413 |         false
 414 |       ]
 415 |     },
 416 |     {
 417 |       "id": 30,
 418 |       "type": "MaskToImage",
 419 |       "pos": [
 420 |         663,
 421 |         806
 422 |       ],
 423 |       "size": {
 424 |         "0": 210,
 425 |         "1": 26
 426 |       },
 427 |       "flags": {},
 428 |       "order": 11,
 429 |       "mode": 0,
 430 |       "inputs": [
 431 |         {
 432 |           "name": "mask",
 433 |           "type": "MASK",
 434 |           "link": 50
 435 |         }
 436 |       ],
 437 |       "outputs": [
 438 |         {
 439 |           "name": "IMAGE",
 440 |           "type": "IMAGE",
 441 |           "links": [
 442 |             56,
 443 |             63
 444 |           ],
 445 |           "shape": 3,
 446 |           "slot_index": 0
 447 |         }
 448 |       ],
 449 |       "properties": {
 450 |         "Node name for S&R": "MaskToImage"
 451 |       }
 452 |     },
 453 |     {
 454 |       "id": 40,
 455 |       "type": "ImageConcanate",
 456 |       "pos": [
 457 |         1106,
 458 |         1035
 459 |       ],
 460 |       "size": {
 461 |         "0": 315,
 462 |         "1": 102
 463 |       },
 464 |       "flags": {},
 465 |       "order": 16,
 466 |       "mode": 0,
 467 |       "inputs": [
 468 |         {
 469 |           "name": "image1",
 470 |           "type": "IMAGE",
 471 |           "link": 63
 472 |         },
 473 |         {
 474 |           "name": "image2",
 475 |           "type": "IMAGE",
 476 |           "link": 76
 477 |         }
 478 |       ],
 479 |       "outputs": [
 480 |         {
 481 |           "name": "IMAGE",
 482 |           "type": "IMAGE",
 483 |           "links": [
 484 |             65
 485 |           ],
 486 |           "shape": 3,
 487 |           "slot_index": 0
 488 |         }
 489 |       ],
 490 |       "properties": {
 491 |         "Node name for S&R": "ImageConcanate"
 492 |       },
 493 |       "widgets_values": [
 494 |         "right",
 495 |         false
 496 |       ]
 497 |     },
 498 |     {
 499 |       "id": 37,
 500 |       "type": "VHS_VideoCombine",
 501 |       "pos": [
 502 |         1461,
 503 |         963
 504 |       ],
 505 |       "size": [
 506 |         640.8259887695312,
 507 |         614.4129943847656
 508 |       ],
 509 |       "flags": {},
 510 |       "order": 17,
 511 |       "mode": 0,
 512 |       "inputs": [
 513 |         {
 514 |           "name": "images",
 515 |           "type": "IMAGE",
 516 |           "link": 65
 517 |         },
 518 |         {
 519 |           "name": "audio",
 520 |           "type": "VHS_AUDIO",
 521 |           "link": null
 522 |         },
 523 |         {
 524 |           "name": "batch_manager",
 525 |           "type": "VHS_BatchManager",
 526 |           "link": null
 527 |         }
 528 |       ],
 529 |       "outputs": [
 530 |         {
 531 |           "name": "Filenames",
 532 |           "type": "VHS_FILENAMES",
 533 |           "links": null,
 534 |           "shape": 3
 535 |         }
 536 |       ],
 537 |       "properties": {
 538 |         "Node name for S&R": "VHS_VideoCombine"
 539 |       },
 540 |       "widgets_values": {
 541 |         "frame_rate": 8,
 542 |         "loop_count": 0,
 543 |         "filename_prefix": "AnimateDiff",
 544 |         "format": "video/h264-mp4",
 545 |         "pix_fmt": "yuv420p",
 546 |         "crf": 19,
 547 |         "save_metadata": true,
 548 |         "pingpong": true,
 549 |         "save_output": false,
 550 |         "videopreview": {
 551 |           "hidden": false,
 552 |           "paused": false,
 553 |           "params": {
 554 |             "filename": "AnimateDiff_00003.mp4",
 555 |             "subfolder": "",
 556 |             "type": "temp",
 557 |             "format": "video/h264-mp4"
 558 |           }
 559 |         }
 560 |       }
 561 |     },
 562 |     {
 563 |       "id": 10,
 564 |       "type": "PreviewImage",
 565 |       "pos": [
 566 |         2185,
 567 |         293
 568 |       ],
 569 |       "size": {
 570 |         "0": 526.656982421875,
 571 |         "1": 580.8809814453125
 572 |       },
 573 |       "flags": {},
 574 |       "order": 15,
 575 |       "mode": 0,
 576 |       "inputs": [
 577 |         {
 578 |           "name": "images",
 579 |           "type": "IMAGE",
 580 |           "link": 75
 581 |         }
 582 |       ],
 583 |       "properties": {
 584 |         "Node name for S&R": "PreviewImage"
 585 |       }
 586 |     },
 587 |     {
 588 |       "id": 35,
 589 |       "type": "VHS_DuplicateImages",
 590 |       "pos": [
 591 |         644,
 592 |         480
 593 |       ],
 594 |       "size": [
 595 |         289.9799993896486,
 596 |         54
 597 |       ],
 598 |       "flags": {},
 599 |       "order": 6,
 600 |       "mode": 0,
 601 |       "inputs": [
 602 |         {
 603 |           "name": "images",
 604 |           "type": "IMAGE",
 605 |           "link": 58,
 606 |           "slot_index": 0
 607 |         },
 608 |         {
 609 |           "name": "multiply_by",
 610 |           "type": "INT",
 611 |           "link": 66,
 612 |           "widget": {
 613 |             "name": "multiply_by"
 614 |           }
 615 |         }
 616 |       ],
 617 |       "outputs": [
 618 |         {
 619 |           "name": "IMAGE",
 620 |           "type": "IMAGE",
 621 |           "links": [
 622 |             59
 623 |           ],
 624 |           "shape": 3,
 625 |           "slot_index": 0
 626 |         },
 627 |         {
 628 |           "name": "count",
 629 |           "type": "INT",
 630 |           "links": null,
 631 |           "shape": 3
 632 |         }
 633 |       ],
 634 |       "properties": {
 635 |         "Node name for S&R": "VHS_DuplicateImages"
 636 |       },
 637 |       "widgets_values": {
 638 |         "multiply_by": 32
 639 |       }
 640 |     },
 641 |     {
 642 |       "id": 42,
 643 |       "type": "iclight_diffusers_sampler",
 644 |       "pos": [
 645 |         1372,
 646 |         434
 647 |       ],
 648 |       "size": {
 649 |         "0": 400,
 650 |         "1": 406
 651 |       },
 652 |       "flags": {},
 653 |       "order": 13,
 654 |       "mode": 0,
 655 |       "inputs": [
 656 |         {
 657 |           "name": "diffusers_model",
 658 |           "type": "DIFFUSERSMODEL",
 659 |           "link": 70
 660 |         },
 661 |         {
 662 |           "name": "latent",
 663 |           "type": "LATENT",
 664 |           "link": 71
 665 |         },
 666 |         {
 667 |           "name": "bg_latent",
 668 |           "type": "LATENT",
 669 |           "link": 72
 670 |         }
 671 |       ],
 672 |       "outputs": [
 673 |         {
 674 |           "name": "samples",
 675 |           "type": "LATENT",
 676 |           "links": [
 677 |             73
 678 |           ],
 679 |           "shape": 3,
 680 |           "slot_index": 0
 681 |         }
 682 |       ],
 683 |       "properties": {
 684 |         "Node name for S&R": "iclight_diffusers_sampler"
 685 |       },
 686 |       "widgets_values": [
 687 |         512,
 688 |         512,
 689 |         25,
 690 |         2,
 691 |         0.9,
 692 |         908247852472673,
 693 |         "randomize",
 694 |         "DPMSolverMultistepScheduler",
 695 |         "spotlight",
 696 |         "bad quality",
 697 |         false,
 698 |         true
 699 |       ]
 700 |     },
 701 |     {
 702 |       "id": 38,
 703 |       "type": "SplineEditor",
 704 |       "pos": [
 705 |         -29,
 706 |         893
 707 |       ],
 708 |       "size": {
 709 |         "0": 550,
 710 |         "1": 920
 711 |       },
 712 |       "flags": {},
 713 |       "order": 2,
 714 |       "mode": 0,
 715 |       "outputs": [
 716 |         {
 717 |           "name": "mask",
 718 |           "type": "MASK",
 719 |           "links": null,
 720 |           "shape": 3
 721 |         },
 722 |         {
 723 |           "name": "coord_str",
 724 |           "type": "STRING",
 725 |           "links": [
 726 |             61
 727 |           ],
 728 |           "shape": 3
 729 |         },
 730 |         {
 731 |           "name": "float",
 732 |           "type": "FLOAT",
 733 |           "links": null,
 734 |           "shape": 3
 735 |         },
 736 |         {
 737 |           "name": "count",
 738 |           "type": "INT",
 739 |           "links": [
 740 |             66
 741 |           ],
 742 |           "shape": 3,
 743 |           "slot_index": 3
 744 |         }
 745 |       ],
 746 |       "properties": {
 747 |         "Node name for S&R": "SplineEditor",
 748 |         "points": "SplineEditor"
 749 |       },
 750 |       "widgets_values": [
 751 |         "[{\"x\":121.00000000000001,\"y\":447.70000000000005},{\"x\":412.61000000000007,\"y\":419.87000000000006},{\"x\":141.57000000000002,\"y\":278.3},{\"x\":262.07390000000004,\"y\":84.91780000000003},{\"x\":405.35,\"y\":256.52000000000004},{\"x\":107.69000000000001,\"y\":416.24000000000007}]",
 752 |         "[{\"x\":121,\"y\":447.70001220703125},{\"x\":168.39727783203125,\"y\":447.5857238769531},{\"x\":215.79052734375,\"y\":446.96728515625},{\"x\":263.1689453125,\"y\":445.6412658691406},{\"x\":310.5037841796875,\"y\":443.2364501953125},{\"x\":357.69171142578125,\"y\":438.8589782714844},{\"x\":403.7711181640625,\"y\":428.43505859375},{\"x\":389.68817138671875,\"y\":396.6990051269531},{\"x\":345.1974792480469,\"y\":380.45477294921875},{\"x\":299.83392333984375,\"y\":366.7195129394531},{\"x\":254.7096710205078,\"y\":352.2271728515625},{\"x\":210.65040588378906,\"y\":334.8033447265625},{\"x\":169.68922424316406,\"y\":311.13885498046875},{\"x\":140.11099243164062,\"y\":274.8868713378906},{\"x\":133.01222229003906,\"y\":228.4459686279297},{\"x\":144.95265197753906,\"y\":182.82485961914062},{\"x\":169.87940979003906,\"y\":142.66854858398438},{\"x\":203.91860961914062,\"y\":109.85896301269531},{\"x\":245.73171997070312,\"y\":88.0564956665039},{\"x\":292.4783935546875,\"y\":88.1949462890625},{\"x\":335.642578125,\"y\":107.3690414428711},{\"x\":372.6259765625,\"y\":136.83396911621094},{\"x\":400.75732421875,\"y\":174.7657928466797},{\"x\":413.12457275390625,\"y\":220.08438110351562},{\"x\":400.2603454589844,\"y\":264.89129638671875},{\"x\":364.9109802246094,\"y\":296.1686096191406},{\"x\":324.4112243652344,\"y\":320.7547912597656},{\"x\":282.25390625,\"y\":342.40655517578125},{\"x\":239.25035095214844,\"y\":362.3324279785156},{\"x\":195.72447204589844,\"y\":381.0921936035156},{\"x\":151.8393096923828,\"y\":398.99676513671875},{\"x\":107.69000244140625,\"y\":416.239990234375}]",
 753 |         512,
 754 |         512,
 755 |         32,
 756 |         "path",
 757 |         "cardinal",
 758 |         0.49666656494140643,
 759 |         1,
 760 |         "list",
 761 |         0,
 762 |         1,
 763 |         null,
 764 |         null
 765 |       ]
 766 |     },
 767 |     {
 768 |       "id": 43,
 769 |       "type": "VAEDecode",
 770 |       "pos": [
 771 |         1858,
 772 |         437
 773 |       ],
 774 |       "size": {
 775 |         "0": 210,
 776 |         "1": 46
 777 |       },
 778 |       "flags": {},
 779 |       "order": 14,
 780 |       "mode": 0,
 781 |       "inputs": [
 782 |         {
 783 |           "name": "samples",
 784 |           "type": "LATENT",
 785 |           "link": 73
 786 |         },
 787 |         {
 788 |           "name": "vae",
 789 |           "type": "VAE",
 790 |           "link": 74,
 791 |           "slot_index": 1
 792 |         }
 793 |       ],
 794 |       "outputs": [
 795 |         {
 796 |           "name": "IMAGE",
 797 |           "type": "IMAGE",
 798 |           "links": [
 799 |             75,
 800 |             76
 801 |           ],
 802 |           "shape": 3,
 803 |           "slot_index": 0
 804 |         }
 805 |       ],
 806 |       "properties": {
 807 |         "Node name for S&R": "VAEDecode"
 808 |       }
 809 |     },
 810 |     {
 811 |       "id": 39,
 812 |       "type": "CreateShapeMaskOnPath",
 813 |       "pos": [
 814 |         627,
 815 |         1343
 816 |       ],
 817 |       "size": {
 818 |         "0": 315,
 819 |         "1": 222
 820 |       },
 821 |       "flags": {},
 822 |       "order": 5,
 823 |       "mode": 0,
 824 |       "inputs": [
 825 |         {
 826 |           "name": "coordinates",
 827 |           "type": "STRING",
 828 |           "link": 61,
 829 |           "widget": {
 830 |             "name": "coordinates"
 831 |           },
 832 |           "slot_index": 0
 833 |         },
 834 |         {
 835 |           "name": "size_multiplier",
 836 |           "type": "FLOAT",
 837 |           "link": null,
 838 |           "widget": {
 839 |             "name": "size_multiplier"
 840 |           }
 841 |         }
 842 |       ],
 843 |       "outputs": [
 844 |         {
 845 |           "name": "mask",
 846 |           "type": "MASK",
 847 |           "links": [
 848 |             62
 849 |           ],
 850 |           "shape": 3,
 851 |           "slot_index": 0
 852 |         },
 853 |         {
 854 |           "name": "mask_inverted",
 855 |           "type": "MASK",
 856 |           "links": null,
 857 |           "shape": 3
 858 |         }
 859 |       ],
 860 |       "properties": {
 861 |         "Node name for S&R": "CreateShapeMaskOnPath"
 862 |       },
 863 |       "widgets_values": [
 864 |         "circle",
 865 |         "",
 866 |         512,
 867 |         512,
 868 |         128,
 869 |         128,
 870 |         [
 871 |           1
 872 |         ]
 873 |       ]
 874 |     }
 875 |   ],
 876 |   "links": [
 877 |     [
 878 |       2,
 879 |       6,
 880 |       1,
 881 |       5,
 882 |       1,
 883 |       "CLIP"
 884 |     ],
 885 |     [
 886 |       3,
 887 |       6,
 888 |       2,
 889 |       5,
 890 |       2,
 891 |       "VAE"
 892 |     ],
 893 |     [
 894 |       8,
 895 |       6,
 896 |       2,
 897 |       8,
 898 |       1,
 899 |       "VAE"
 900 |     ],
 901 |     [
 902 |       26,
 903 |       5,
 904 |       0,
 905 |       22,
 906 |       0,
 907 |       "DIFFUSERSMODEL"
 908 |     ],
 909 |     [
 910 |       32,
 911 |       6,
 912 |       2,
 913 |       25,
 914 |       1,
 915 |       "VAE"
 916 |     ],
 917 |     [
 918 |       40,
 919 |       9,
 920 |       0,
 921 |       28,
 922 |       0,
 923 |       "IMAGE"
 924 |     ],
 925 |     [
 926 |       47,
 927 |       6,
 928 |       0,
 929 |       5,
 930 |       0,
 931 |       "MODEL"
 932 |     ],
 933 |     [
 934 |       50,
 935 |       32,
 936 |       0,
 937 |       30,
 938 |       0,
 939 |       "MASK"
 940 |     ],
 941 |     [
 942 |       53,
 943 |       34,
 944 |       0,
 945 |       32,
 946 |       0,
 947 |       "MASK"
 948 |     ],
 949 |     [
 950 |       56,
 951 |       30,
 952 |       0,
 953 |       25,
 954 |       0,
 955 |       "IMAGE"
 956 |     ],
 957 |     [
 958 |       58,
 959 |       28,
 960 |       0,
 961 |       35,
 962 |       0,
 963 |       "IMAGE"
 964 |     ],
 965 |     [
 966 |       59,
 967 |       35,
 968 |       0,
 969 |       8,
 970 |       0,
 971 |       "IMAGE"
 972 |     ],
 973 |     [
 974 |       61,
 975 |       38,
 976 |       1,
 977 |       39,
 978 |       0,
 979 |       "STRING"
 980 |     ],
 981 |     [
 982 |       62,
 983 |       39,
 984 |       0,
 985 |       34,
 986 |       0,
 987 |       "MASK"
 988 |     ],
 989 |     [
 990 |       63,
 991 |       30,
 992 |       0,
 993 |       40,
 994 |       0,
 995 |       "IMAGE"
 996 |     ],
 997 |     [
 998 |       65,
 999 |       40,
1000 |       0,
1001 |       37,
1002 |       0,
1003 |       "IMAGE"
1004 |     ],
1005 |     [
1006 |       66,
1007 |       38,
1008 |       3,
1009 |       35,
1010 |       1,
1011 |       "INT"
1012 |     ],
1013 |     [
1014 |       70,
1015 |       22,
1016 |       0,
1017 |       42,
1018 |       0,
1019 |       "DIFFUSERSMODEL"
1020 |     ],
1021 |     [
1022 |       71,
1023 |       8,
1024 |       0,
1025 |       42,
1026 |       1,
1027 |       "LATENT"
1028 |     ],
1029 |     [
1030 |       72,
1031 |       25,
1032 |       0,
1033 |       42,
1034 |       2,
1035 |       "LATENT"
1036 |     ],
1037 |     [
1038 |       73,
1039 |       42,
1040 |       0,
1041 |       43,
1042 |       0,
1043 |       "LATENT"
1044 |     ],
1045 |     [
1046 |       74,
1047 |       6,
1048 |       2,
1049 |       43,
1050 |       1,
1051 |       "VAE"
1052 |     ],
1053 |     [
1054 |       75,
1055 |       43,
1056 |       0,
1057 |       10,
1058 |       0,
1059 |       "IMAGE"
1060 |     ],
1061 |     [
1062 |       76,
1063 |       43,
1064 |       0,
1065 |       40,
1066 |       1,
1067 |       "IMAGE"
1068 |     ]
1069 |   ],
1070 |   "groups": [],
1071 |   "config": {},
1072 |   "extra": {},
1073 |   "version": 0.4
1074 | }


--------------------------------------------------------------------------------
/hidiffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .hidiffusion import apply_hidiffusion, remove_hidiffusion
2 | 
3 | __all__ = ["apply_hidiffusion", "remove_hidiffusion"]
4 | 


--------------------------------------------------------------------------------
/hidiffusion/sd_module_key/sd15_module_key.txt:
--------------------------------------------------------------------------------
  1 | conv_in
  2 | time_proj
  3 | time_embedding
  4 | time_embedding.linear_1
  5 | time_embedding.act
  6 | time_embedding.linear_2
  7 | down_blocks
  8 | down_blocks.0
  9 | down_blocks.0.attentions
 10 | down_blocks.0.attentions.0
 11 | down_blocks.0.attentions.0.norm
 12 | down_blocks.0.attentions.0.proj_in
 13 | down_blocks.0.attentions.0.transformer_blocks
 14 | down_blocks.0.attentions.0.transformer_blocks.0
 15 | down_blocks.0.attentions.0.transformer_blocks.0.norm1
 16 | down_blocks.0.attentions.0.transformer_blocks.0.attn1
 17 | down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q
 18 | down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k
 19 | down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v
 20 | down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out
 21 | down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0
 22 | down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.1
 23 | down_blocks.0.attentions.0.transformer_blocks.0.norm2
 24 | down_blocks.0.attentions.0.transformer_blocks.0.attn2
 25 | down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q
 26 | down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k
 27 | down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v
 28 | down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out
 29 | down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0
 30 | down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.1
 31 | down_blocks.0.attentions.0.transformer_blocks.0.norm3
 32 | down_blocks.0.attentions.0.transformer_blocks.0.ff
 33 | down_blocks.0.attentions.0.transformer_blocks.0.ff.net
 34 | down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0
 35 | down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj
 36 | down_blocks.0.attentions.0.transformer_blocks.0.ff.net.1
 37 | down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2
 38 | down_blocks.0.attentions.0.proj_out
 39 | down_blocks.0.attentions.1
 40 | down_blocks.0.attentions.1.norm
 41 | down_blocks.0.attentions.1.proj_in
 42 | down_blocks.0.attentions.1.transformer_blocks
 43 | down_blocks.0.attentions.1.transformer_blocks.0
 44 | down_blocks.0.attentions.1.transformer_blocks.0.norm1
 45 | down_blocks.0.attentions.1.transformer_blocks.0.attn1
 46 | down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q
 47 | down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k
 48 | down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v
 49 | down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out
 50 | down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0
 51 | down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.1
 52 | down_blocks.0.attentions.1.transformer_blocks.0.norm2
 53 | down_blocks.0.attentions.1.transformer_blocks.0.attn2
 54 | down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q
 55 | down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k
 56 | down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v
 57 | down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out
 58 | down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0
 59 | down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.1
 60 | down_blocks.0.attentions.1.transformer_blocks.0.norm3
 61 | down_blocks.0.attentions.1.transformer_blocks.0.ff
 62 | down_blocks.0.attentions.1.transformer_blocks.0.ff.net
 63 | down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0
 64 | down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj
 65 | down_blocks.0.attentions.1.transformer_blocks.0.ff.net.1
 66 | down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2
 67 | down_blocks.0.attentions.1.proj_out
 68 | down_blocks.0.resnets
 69 | down_blocks.0.resnets.0
 70 | down_blocks.0.resnets.0.norm1
 71 | down_blocks.0.resnets.0.conv1
 72 | down_blocks.0.resnets.0.time_emb_proj
 73 | down_blocks.0.resnets.0.norm2
 74 | down_blocks.0.resnets.0.dropout
 75 | down_blocks.0.resnets.0.conv2
 76 | down_blocks.0.resnets.1
 77 | down_blocks.0.resnets.1.norm1
 78 | down_blocks.0.resnets.1.conv1
 79 | down_blocks.0.resnets.1.time_emb_proj
 80 | down_blocks.0.resnets.1.norm2
 81 | down_blocks.0.resnets.1.dropout
 82 | down_blocks.0.resnets.1.conv2
 83 | down_blocks.0.downsamplers
 84 | down_blocks.0.downsamplers.0
 85 | down_blocks.0.downsamplers.0.conv
 86 | down_blocks.1
 87 | down_blocks.1.attentions
 88 | down_blocks.1.attentions.0
 89 | down_blocks.1.attentions.0.norm
 90 | down_blocks.1.attentions.0.proj_in
 91 | down_blocks.1.attentions.0.transformer_blocks
 92 | down_blocks.1.attentions.0.transformer_blocks.0
 93 | down_blocks.1.attentions.0.transformer_blocks.0.norm1
 94 | down_blocks.1.attentions.0.transformer_blocks.0.attn1
 95 | down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q
 96 | down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k
 97 | down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v
 98 | down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out
 99 | down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0
100 | down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.1
101 | down_blocks.1.attentions.0.transformer_blocks.0.norm2
102 | down_blocks.1.attentions.0.transformer_blocks.0.attn2
103 | down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q
104 | down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k
105 | down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v
106 | down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out
107 | down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0
108 | down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.1
109 | down_blocks.1.attentions.0.transformer_blocks.0.norm3
110 | down_blocks.1.attentions.0.transformer_blocks.0.ff
111 | down_blocks.1.attentions.0.transformer_blocks.0.ff.net
112 | down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0
113 | down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj
114 | down_blocks.1.attentions.0.transformer_blocks.0.ff.net.1
115 | down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2
116 | down_blocks.1.attentions.0.proj_out
117 | down_blocks.1.attentions.1
118 | down_blocks.1.attentions.1.norm
119 | down_blocks.1.attentions.1.proj_in
120 | down_blocks.1.attentions.1.transformer_blocks
121 | down_blocks.1.attentions.1.transformer_blocks.0
122 | down_blocks.1.attentions.1.transformer_blocks.0.norm1
123 | down_blocks.1.attentions.1.transformer_blocks.0.attn1
124 | down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q
125 | down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k
126 | down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v
127 | down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out
128 | down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0
129 | down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.1
130 | down_blocks.1.attentions.1.transformer_blocks.0.norm2
131 | down_blocks.1.attentions.1.transformer_blocks.0.attn2
132 | down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q
133 | down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k
134 | down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v
135 | down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out
136 | down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0
137 | down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.1
138 | down_blocks.1.attentions.1.transformer_blocks.0.norm3
139 | down_blocks.1.attentions.1.transformer_blocks.0.ff
140 | down_blocks.1.attentions.1.transformer_blocks.0.ff.net
141 | down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0
142 | down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj
143 | down_blocks.1.attentions.1.transformer_blocks.0.ff.net.1
144 | down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2
145 | down_blocks.1.attentions.1.proj_out
146 | down_blocks.1.resnets
147 | down_blocks.1.resnets.0
148 | down_blocks.1.resnets.0.norm1
149 | down_blocks.1.resnets.0.conv1
150 | down_blocks.1.resnets.0.time_emb_proj
151 | down_blocks.1.resnets.0.norm2
152 | down_blocks.1.resnets.0.dropout
153 | down_blocks.1.resnets.0.conv2
154 | down_blocks.1.resnets.0.conv_shortcut
155 | down_blocks.1.resnets.1
156 | down_blocks.1.resnets.1.norm1
157 | down_blocks.1.resnets.1.conv1
158 | down_blocks.1.resnets.1.time_emb_proj
159 | down_blocks.1.resnets.1.norm2
160 | down_blocks.1.resnets.1.dropout
161 | down_blocks.1.resnets.1.conv2
162 | down_blocks.1.downsamplers
163 | down_blocks.1.downsamplers.0
164 | down_blocks.1.downsamplers.0.conv
165 | down_blocks.2
166 | down_blocks.2.attentions
167 | down_blocks.2.attentions.0
168 | down_blocks.2.attentions.0.norm
169 | down_blocks.2.attentions.0.proj_in
170 | down_blocks.2.attentions.0.transformer_blocks
171 | down_blocks.2.attentions.0.transformer_blocks.0
172 | down_blocks.2.attentions.0.transformer_blocks.0.norm1
173 | down_blocks.2.attentions.0.transformer_blocks.0.attn1
174 | down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q
175 | down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k
176 | down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v
177 | down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out
178 | down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0
179 | down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.1
180 | down_blocks.2.attentions.0.transformer_blocks.0.norm2
181 | down_blocks.2.attentions.0.transformer_blocks.0.attn2
182 | down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q
183 | down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k
184 | down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v
185 | down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out
186 | down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0
187 | down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.1
188 | down_blocks.2.attentions.0.transformer_blocks.0.norm3
189 | down_blocks.2.attentions.0.transformer_blocks.0.ff
190 | down_blocks.2.attentions.0.transformer_blocks.0.ff.net
191 | down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0
192 | down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj
193 | down_blocks.2.attentions.0.transformer_blocks.0.ff.net.1
194 | down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2
195 | down_blocks.2.attentions.0.proj_out
196 | down_blocks.2.attentions.1
197 | down_blocks.2.attentions.1.norm
198 | down_blocks.2.attentions.1.proj_in
199 | down_blocks.2.attentions.1.transformer_blocks
200 | down_blocks.2.attentions.1.transformer_blocks.0
201 | down_blocks.2.attentions.1.transformer_blocks.0.norm1
202 | down_blocks.2.attentions.1.transformer_blocks.0.attn1
203 | down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q
204 | down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k
205 | down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v
206 | down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out
207 | down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0
208 | down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.1
209 | down_blocks.2.attentions.1.transformer_blocks.0.norm2
210 | down_blocks.2.attentions.1.transformer_blocks.0.attn2
211 | down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q
212 | down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k
213 | down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v
214 | down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out
215 | down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0
216 | down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.1
217 | down_blocks.2.attentions.1.transformer_blocks.0.norm3
218 | down_blocks.2.attentions.1.transformer_blocks.0.ff
219 | down_blocks.2.attentions.1.transformer_blocks.0.ff.net
220 | down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0
221 | down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj
222 | down_blocks.2.attentions.1.transformer_blocks.0.ff.net.1
223 | down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2
224 | down_blocks.2.attentions.1.proj_out
225 | down_blocks.2.resnets
226 | down_blocks.2.resnets.0
227 | down_blocks.2.resnets.0.norm1
228 | down_blocks.2.resnets.0.conv1
229 | down_blocks.2.resnets.0.time_emb_proj
230 | down_blocks.2.resnets.0.norm2
231 | down_blocks.2.resnets.0.dropout
232 | down_blocks.2.resnets.0.conv2
233 | down_blocks.2.resnets.0.conv_shortcut
234 | down_blocks.2.resnets.1
235 | down_blocks.2.resnets.1.norm1
236 | down_blocks.2.resnets.1.conv1
237 | down_blocks.2.resnets.1.time_emb_proj
238 | down_blocks.2.resnets.1.norm2
239 | down_blocks.2.resnets.1.dropout
240 | down_blocks.2.resnets.1.conv2
241 | down_blocks.2.downsamplers
242 | down_blocks.2.downsamplers.0
243 | down_blocks.2.downsamplers.0.conv
244 | down_blocks.3
245 | down_blocks.3.resnets
246 | down_blocks.3.resnets.0
247 | down_blocks.3.resnets.0.norm1
248 | down_blocks.3.resnets.0.conv1
249 | down_blocks.3.resnets.0.time_emb_proj
250 | down_blocks.3.resnets.0.norm2
251 | down_blocks.3.resnets.0.dropout
252 | down_blocks.3.resnets.0.conv2
253 | down_blocks.3.resnets.1
254 | down_blocks.3.resnets.1.norm1
255 | down_blocks.3.resnets.1.conv1
256 | down_blocks.3.resnets.1.time_emb_proj
257 | down_blocks.3.resnets.1.norm2
258 | down_blocks.3.resnets.1.dropout
259 | down_blocks.3.resnets.1.conv2
260 | up_blocks
261 | up_blocks.0
262 | up_blocks.0.resnets
263 | up_blocks.0.resnets.0
264 | up_blocks.0.resnets.0.norm1
265 | up_blocks.0.resnets.0.conv1
266 | up_blocks.0.resnets.0.time_emb_proj
267 | up_blocks.0.resnets.0.norm2
268 | up_blocks.0.resnets.0.dropout
269 | up_blocks.0.resnets.0.conv2
270 | up_blocks.0.resnets.0.conv_shortcut
271 | up_blocks.0.resnets.1
272 | up_blocks.0.resnets.1.norm1
273 | up_blocks.0.resnets.1.conv1
274 | up_blocks.0.resnets.1.time_emb_proj
275 | up_blocks.0.resnets.1.norm2
276 | up_blocks.0.resnets.1.dropout
277 | up_blocks.0.resnets.1.conv2
278 | up_blocks.0.resnets.1.conv_shortcut
279 | up_blocks.0.resnets.2
280 | up_blocks.0.resnets.2.norm1
281 | up_blocks.0.resnets.2.conv1
282 | up_blocks.0.resnets.2.time_emb_proj
283 | up_blocks.0.resnets.2.norm2
284 | up_blocks.0.resnets.2.dropout
285 | up_blocks.0.resnets.2.conv2
286 | up_blocks.0.resnets.2.conv_shortcut
287 | up_blocks.0.upsamplers
288 | up_blocks.0.upsamplers.0
289 | up_blocks.0.upsamplers.0.conv
290 | up_blocks.1
291 | up_blocks.1.attentions
292 | up_blocks.1.attentions.0
293 | up_blocks.1.attentions.0.norm
294 | up_blocks.1.attentions.0.proj_in
295 | up_blocks.1.attentions.0.transformer_blocks
296 | up_blocks.1.attentions.0.transformer_blocks.0
297 | up_blocks.1.attentions.0.transformer_blocks.0.norm1
298 | up_blocks.1.attentions.0.transformer_blocks.0.attn1
299 | up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q
300 | up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k
301 | up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v
302 | up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out
303 | up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0
304 | up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.1
305 | up_blocks.1.attentions.0.transformer_blocks.0.norm2
306 | up_blocks.1.attentions.0.transformer_blocks.0.attn2
307 | up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q
308 | up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k
309 | up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v
310 | up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out
311 | up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0
312 | up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.1
313 | up_blocks.1.attentions.0.transformer_blocks.0.norm3
314 | up_blocks.1.attentions.0.transformer_blocks.0.ff
315 | up_blocks.1.attentions.0.transformer_blocks.0.ff.net
316 | up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0
317 | up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj
318 | up_blocks.1.attentions.0.transformer_blocks.0.ff.net.1
319 | up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2
320 | up_blocks.1.attentions.0.proj_out
321 | up_blocks.1.attentions.1
322 | up_blocks.1.attentions.1.norm
323 | up_blocks.1.attentions.1.proj_in
324 | up_blocks.1.attentions.1.transformer_blocks
325 | up_blocks.1.attentions.1.transformer_blocks.0
326 | up_blocks.1.attentions.1.transformer_blocks.0.norm1
327 | up_blocks.1.attentions.1.transformer_blocks.0.attn1
328 | up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q
329 | up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k
330 | up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v
331 | up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out
332 | up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0
333 | up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.1
334 | up_blocks.1.attentions.1.transformer_blocks.0.norm2
335 | up_blocks.1.attentions.1.transformer_blocks.0.attn2
336 | up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q
337 | up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k
338 | up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v
339 | up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out
340 | up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0
341 | up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.1
342 | up_blocks.1.attentions.1.transformer_blocks.0.norm3
343 | up_blocks.1.attentions.1.transformer_blocks.0.ff
344 | up_blocks.1.attentions.1.transformer_blocks.0.ff.net
345 | up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0
346 | up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj
347 | up_blocks.1.attentions.1.transformer_blocks.0.ff.net.1
348 | up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2
349 | up_blocks.1.attentions.1.proj_out
350 | up_blocks.1.attentions.2
351 | up_blocks.1.attentions.2.norm
352 | up_blocks.1.attentions.2.proj_in
353 | up_blocks.1.attentions.2.transformer_blocks
354 | up_blocks.1.attentions.2.transformer_blocks.0
355 | up_blocks.1.attentions.2.transformer_blocks.0.norm1
356 | up_blocks.1.attentions.2.transformer_blocks.0.attn1
357 | up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q
358 | up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k
359 | up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v
360 | up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out
361 | up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0
362 | up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.1
363 | up_blocks.1.attentions.2.transformer_blocks.0.norm2
364 | up_blocks.1.attentions.2.transformer_blocks.0.attn2
365 | up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q
366 | up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k
367 | up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v
368 | up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out
369 | up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0
370 | up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.1
371 | up_blocks.1.attentions.2.transformer_blocks.0.norm3
372 | up_blocks.1.attentions.2.transformer_blocks.0.ff
373 | up_blocks.1.attentions.2.transformer_blocks.0.ff.net
374 | up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0
375 | up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj
376 | up_blocks.1.attentions.2.transformer_blocks.0.ff.net.1
377 | up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2
378 | up_blocks.1.attentions.2.proj_out
379 | up_blocks.1.resnets
380 | up_blocks.1.resnets.0
381 | up_blocks.1.resnets.0.norm1
382 | up_blocks.1.resnets.0.conv1
383 | up_blocks.1.resnets.0.time_emb_proj
384 | up_blocks.1.resnets.0.norm2
385 | up_blocks.1.resnets.0.dropout
386 | up_blocks.1.resnets.0.conv2
387 | up_blocks.1.resnets.0.conv_shortcut
388 | up_blocks.1.resnets.1
389 | up_blocks.1.resnets.1.norm1
390 | up_blocks.1.resnets.1.conv1
391 | up_blocks.1.resnets.1.time_emb_proj
392 | up_blocks.1.resnets.1.norm2
393 | up_blocks.1.resnets.1.dropout
394 | up_blocks.1.resnets.1.conv2
395 | up_blocks.1.resnets.1.conv_shortcut
396 | up_blocks.1.resnets.2
397 | up_blocks.1.resnets.2.norm1
398 | up_blocks.1.resnets.2.conv1
399 | up_blocks.1.resnets.2.time_emb_proj
400 | up_blocks.1.resnets.2.norm2
401 | up_blocks.1.resnets.2.dropout
402 | up_blocks.1.resnets.2.conv2
403 | up_blocks.1.resnets.2.conv_shortcut
404 | up_blocks.1.upsamplers
405 | up_blocks.1.upsamplers.0
406 | up_blocks.1.upsamplers.0.conv
407 | up_blocks.2
408 | up_blocks.2.attentions
409 | up_blocks.2.attentions.0
410 | up_blocks.2.attentions.0.norm
411 | up_blocks.2.attentions.0.proj_in
412 | up_blocks.2.attentions.0.transformer_blocks
413 | up_blocks.2.attentions.0.transformer_blocks.0
414 | up_blocks.2.attentions.0.transformer_blocks.0.norm1
415 | up_blocks.2.attentions.0.transformer_blocks.0.attn1
416 | up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q
417 | up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k
418 | up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v
419 | up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out
420 | up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0
421 | up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.1
422 | up_blocks.2.attentions.0.transformer_blocks.0.norm2
423 | up_blocks.2.attentions.0.transformer_blocks.0.attn2
424 | up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q
425 | up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k
426 | up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v
427 | up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out
428 | up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0
429 | up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.1
430 | up_blocks.2.attentions.0.transformer_blocks.0.norm3
431 | up_blocks.2.attentions.0.transformer_blocks.0.ff
432 | up_blocks.2.attentions.0.transformer_blocks.0.ff.net
433 | up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0
434 | up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj
435 | up_blocks.2.attentions.0.transformer_blocks.0.ff.net.1
436 | up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2
437 | up_blocks.2.attentions.0.proj_out
438 | up_blocks.2.attentions.1
439 | up_blocks.2.attentions.1.norm
440 | up_blocks.2.attentions.1.proj_in
441 | up_blocks.2.attentions.1.transformer_blocks
442 | up_blocks.2.attentions.1.transformer_blocks.0
443 | up_blocks.2.attentions.1.transformer_blocks.0.norm1
444 | up_blocks.2.attentions.1.transformer_blocks.0.attn1
445 | up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q
446 | up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k
447 | up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v
448 | up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out
449 | up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0
450 | up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.1
451 | up_blocks.2.attentions.1.transformer_blocks.0.norm2
452 | up_blocks.2.attentions.1.transformer_blocks.0.attn2
453 | up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q
454 | up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k
455 | up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v
456 | up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out
457 | up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0
458 | up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.1
459 | up_blocks.2.attentions.1.transformer_blocks.0.norm3
460 | up_blocks.2.attentions.1.transformer_blocks.0.ff
461 | up_blocks.2.attentions.1.transformer_blocks.0.ff.net
462 | up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0
463 | up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj
464 | up_blocks.2.attentions.1.transformer_blocks.0.ff.net.1
465 | up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2
466 | up_blocks.2.attentions.1.proj_out
467 | up_blocks.2.attentions.2
468 | up_blocks.2.attentions.2.norm
469 | up_blocks.2.attentions.2.proj_in
470 | up_blocks.2.attentions.2.transformer_blocks
471 | up_blocks.2.attentions.2.transformer_blocks.0
472 | up_blocks.2.attentions.2.transformer_blocks.0.norm1
473 | up_blocks.2.attentions.2.transformer_blocks.0.attn1
474 | up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q
475 | up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_k
476 | up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_v
477 | up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out
478 | up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0
479 | up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.1
480 | up_blocks.2.attentions.2.transformer_blocks.0.norm2
481 | up_blocks.2.attentions.2.transformer_blocks.0.attn2
482 | up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_q
483 | up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_k
484 | up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_v
485 | up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out
486 | up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0
487 | up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.1
488 | up_blocks.2.attentions.2.transformer_blocks.0.norm3
489 | up_blocks.2.attentions.2.transformer_blocks.0.ff
490 | up_blocks.2.attentions.2.transformer_blocks.0.ff.net
491 | up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0
492 | up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj
493 | up_blocks.2.attentions.2.transformer_blocks.0.ff.net.1
494 | up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2
495 | up_blocks.2.attentions.2.proj_out
496 | up_blocks.2.resnets
497 | up_blocks.2.resnets.0
498 | up_blocks.2.resnets.0.norm1
499 | up_blocks.2.resnets.0.conv1
500 | up_blocks.2.resnets.0.time_emb_proj
501 | up_blocks.2.resnets.0.norm2
502 | up_blocks.2.resnets.0.dropout
503 | up_blocks.2.resnets.0.conv2
504 | up_blocks.2.resnets.0.conv_shortcut
505 | up_blocks.2.resnets.1
506 | up_blocks.2.resnets.1.norm1
507 | up_blocks.2.resnets.1.conv1
508 | up_blocks.2.resnets.1.time_emb_proj
509 | up_blocks.2.resnets.1.norm2
510 | up_blocks.2.resnets.1.dropout
511 | up_blocks.2.resnets.1.conv2
512 | up_blocks.2.resnets.1.conv_shortcut
513 | up_blocks.2.resnets.2
514 | up_blocks.2.resnets.2.norm1
515 | up_blocks.2.resnets.2.conv1
516 | up_blocks.2.resnets.2.time_emb_proj
517 | up_blocks.2.resnets.2.norm2
518 | up_blocks.2.resnets.2.dropout
519 | up_blocks.2.resnets.2.conv2
520 | up_blocks.2.resnets.2.conv_shortcut
521 | up_blocks.2.upsamplers
522 | up_blocks.2.upsamplers.0
523 | up_blocks.2.upsamplers.0.conv
524 | up_blocks.3
525 | up_blocks.3.attentions
526 | up_blocks.3.attentions.0
527 | up_blocks.3.attentions.0.norm
528 | up_blocks.3.attentions.0.proj_in
529 | up_blocks.3.attentions.0.transformer_blocks
530 | up_blocks.3.attentions.0.transformer_blocks.0
531 | up_blocks.3.attentions.0.transformer_blocks.0.norm1
532 | up_blocks.3.attentions.0.transformer_blocks.0.attn1
533 | up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q
534 | up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_k
535 | up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_v
536 | up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out
537 | up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0
538 | up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.1
539 | up_blocks.3.attentions.0.transformer_blocks.0.norm2
540 | up_blocks.3.attentions.0.transformer_blocks.0.attn2
541 | up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_q
542 | up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_k
543 | up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_v
544 | up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out
545 | up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0
546 | up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.1
547 | up_blocks.3.attentions.0.transformer_blocks.0.norm3
548 | up_blocks.3.attentions.0.transformer_blocks.0.ff
549 | up_blocks.3.attentions.0.transformer_blocks.0.ff.net
550 | up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0
551 | up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj
552 | up_blocks.3.attentions.0.transformer_blocks.0.ff.net.1
553 | up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2
554 | up_blocks.3.attentions.0.proj_out
555 | up_blocks.3.attentions.1
556 | up_blocks.3.attentions.1.norm
557 | up_blocks.3.attentions.1.proj_in
558 | up_blocks.3.attentions.1.transformer_blocks
559 | up_blocks.3.attentions.1.transformer_blocks.0
560 | up_blocks.3.attentions.1.transformer_blocks.0.norm1
561 | up_blocks.3.attentions.1.transformer_blocks.0.attn1
562 | up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q
563 | up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_k
564 | up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_v
565 | up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out
566 | up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0
567 | up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.1
568 | up_blocks.3.attentions.1.transformer_blocks.0.norm2
569 | up_blocks.3.attentions.1.transformer_blocks.0.attn2
570 | up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_q
571 | up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_k
572 | up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_v
573 | up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out
574 | up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0
575 | up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.1
576 | up_blocks.3.attentions.1.transformer_blocks.0.norm3
577 | up_blocks.3.attentions.1.transformer_blocks.0.ff
578 | up_blocks.3.attentions.1.transformer_blocks.0.ff.net
579 | up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0
580 | up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj
581 | up_blocks.3.attentions.1.transformer_blocks.0.ff.net.1
582 | up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2
583 | up_blocks.3.attentions.1.proj_out
584 | up_blocks.3.attentions.2
585 | up_blocks.3.attentions.2.norm
586 | up_blocks.3.attentions.2.proj_in
587 | up_blocks.3.attentions.2.transformer_blocks
588 | up_blocks.3.attentions.2.transformer_blocks.0
589 | up_blocks.3.attentions.2.transformer_blocks.0.norm1
590 | up_blocks.3.attentions.2.transformer_blocks.0.attn1
591 | up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q
592 | up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_k
593 | up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_v
594 | up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out
595 | up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0
596 | up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.1
597 | up_blocks.3.attentions.2.transformer_blocks.0.norm2
598 | up_blocks.3.attentions.2.transformer_blocks.0.attn2
599 | up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_q
600 | up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_k
601 | up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_v
602 | up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out
603 | up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0
604 | up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.1
605 | up_blocks.3.attentions.2.transformer_blocks.0.norm3
606 | up_blocks.3.attentions.2.transformer_blocks.0.ff
607 | up_blocks.3.attentions.2.transformer_blocks.0.ff.net
608 | up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0
609 | up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj
610 | up_blocks.3.attentions.2.transformer_blocks.0.ff.net.1
611 | up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2
612 | up_blocks.3.attentions.2.proj_out
613 | up_blocks.3.resnets
614 | up_blocks.3.resnets.0
615 | up_blocks.3.resnets.0.norm1
616 | up_blocks.3.resnets.0.conv1
617 | up_blocks.3.resnets.0.time_emb_proj
618 | up_blocks.3.resnets.0.norm2
619 | up_blocks.3.resnets.0.dropout
620 | up_blocks.3.resnets.0.conv2
621 | up_blocks.3.resnets.0.conv_shortcut
622 | up_blocks.3.resnets.1
623 | up_blocks.3.resnets.1.norm1
624 | up_blocks.3.resnets.1.conv1
625 | up_blocks.3.resnets.1.time_emb_proj
626 | up_blocks.3.resnets.1.norm2
627 | up_blocks.3.resnets.1.dropout
628 | up_blocks.3.resnets.1.conv2
629 | up_blocks.3.resnets.1.conv_shortcut
630 | up_blocks.3.resnets.2
631 | up_blocks.3.resnets.2.norm1
632 | up_blocks.3.resnets.2.conv1
633 | up_blocks.3.resnets.2.time_emb_proj
634 | up_blocks.3.resnets.2.norm2
635 | up_blocks.3.resnets.2.dropout
636 | up_blocks.3.resnets.2.conv2
637 | up_blocks.3.resnets.2.conv_shortcut
638 | mid_block
639 | mid_block.attentions
640 | mid_block.attentions.0
641 | mid_block.attentions.0.norm
642 | mid_block.attentions.0.proj_in
643 | mid_block.attentions.0.transformer_blocks
644 | mid_block.attentions.0.transformer_blocks.0
645 | mid_block.attentions.0.transformer_blocks.0.norm1
646 | mid_block.attentions.0.transformer_blocks.0.attn1
647 | mid_block.attentions.0.transformer_blocks.0.attn1.to_q
648 | mid_block.attentions.0.transformer_blocks.0.attn1.to_k
649 | mid_block.attentions.0.transformer_blocks.0.attn1.to_v
650 | mid_block.attentions.0.transformer_blocks.0.attn1.to_out
651 | mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0
652 | mid_block.attentions.0.transformer_blocks.0.attn1.to_out.1
653 | mid_block.attentions.0.transformer_blocks.0.norm2
654 | mid_block.attentions.0.transformer_blocks.0.attn2
655 | mid_block.attentions.0.transformer_blocks.0.attn2.to_q
656 | mid_block.attentions.0.transformer_blocks.0.attn2.to_k
657 | mid_block.attentions.0.transformer_blocks.0.attn2.to_v
658 | mid_block.attentions.0.transformer_blocks.0.attn2.to_out
659 | mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0
660 | mid_block.attentions.0.transformer_blocks.0.attn2.to_out.1
661 | mid_block.attentions.0.transformer_blocks.0.norm3
662 | mid_block.attentions.0.transformer_blocks.0.ff
663 | mid_block.attentions.0.transformer_blocks.0.ff.net
664 | mid_block.attentions.0.transformer_blocks.0.ff.net.0
665 | mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj
666 | mid_block.attentions.0.transformer_blocks.0.ff.net.1
667 | mid_block.attentions.0.transformer_blocks.0.ff.net.2
668 | mid_block.attentions.0.proj_out
669 | mid_block.resnets
670 | mid_block.resnets.0
671 | mid_block.resnets.0.norm1
672 | mid_block.resnets.0.conv1
673 | mid_block.resnets.0.time_emb_proj
674 | mid_block.resnets.0.norm2
675 | mid_block.resnets.0.dropout
676 | mid_block.resnets.0.conv2
677 | mid_block.resnets.1
678 | mid_block.resnets.1.norm1
679 | mid_block.resnets.1.conv1
680 | mid_block.resnets.1.time_emb_proj
681 | mid_block.resnets.1.norm2
682 | mid_block.resnets.1.dropout
683 | mid_block.resnets.1.conv2
684 | conv_norm_out
685 | conv_out
686 | 


--------------------------------------------------------------------------------
/hidiffusion/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def isinstance_str(x: object, cls_name: str):
 5 |     """
 6 |     Checks whether x has any class *named* cls_name in its ancestry.
 7 |     Doesn't require access to the class's implementation.
 8 |     
 9 |     Useful for patching!
10 |     """
11 | 
12 |     for _cls in x.__class__.__mro__:
13 |         if _cls.__name__ == cls_name:
14 |             return True
15 |     
16 |     return False
17 | 
18 | 
19 | def init_generator(device: torch.device, fallback: torch.Generator=None):
20 |     """
21 |     Forks the current default random generator given device.
22 |     """
23 |     if device.type == "cpu":
24 |         return torch.Generator(device="cpu").set_state(torch.get_rng_state())
25 |     elif device.type == "cuda":
26 |         return torch.Generator(device=device).set_state(torch.cuda.get_rng_state())
27 |     else:
28 |         if fallback is None:
29 |             return init_generator(torch.device("cpu"))
30 |         else:
31 |             return fallback
32 |     


--------------------------------------------------------------------------------
/nodes.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from contextlib import nullcontext
  3 | import torch
  4 | try:
  5 |     from diffusers import (
  6 |         DPMSolverMultistepScheduler, 
  7 |         StableDiffusionPipeline,
  8 |         StableDiffusionImg2ImgPipeline,
  9 |         EulerDiscreteScheduler, 
 10 |         EulerAncestralDiscreteScheduler, 
 11 |         AutoencoderKL, 
 12 |         UNet2DConditionModel, 
 13 |         LCMScheduler, 
 14 |         DDPMScheduler, 
 15 |         DEISMultistepScheduler, 
 16 |         PNDMScheduler,
 17 |         UniPCMultistepScheduler
 18 |     )
 19 |     from diffusers.loaders.single_file_utils import (
 20 |         convert_ldm_vae_checkpoint, 
 21 |         convert_ldm_unet_checkpoint, 
 22 |         create_vae_diffusers_config, 
 23 |         create_unet_diffusers_config,
 24 |         create_text_encoder_from_ldm_clip_checkpoint
 25 |     )            
 26 | except:
 27 |     raise ImportError("Diffusers version too old. Please update to 0.26.0 minimum.")
 28 | from .scheduling_tcd import TCDScheduler
 29 | from contextlib import nullcontext
 30 | from diffusers.utils import is_accelerate_available
 31 | if is_accelerate_available():
 32 |     from accelerate import init_empty_weights
 33 |     from accelerate.utils import set_module_tensor_to_device
 34 | 
 35 | from .hidiffusion import apply_hidiffusion, remove_hidiffusion
 36 | 
 37 | from omegaconf import OmegaConf
 38 | from transformers import CLIPTokenizer
 39 | import comfy.model_management as mm
 40 | import comfy.utils
 41 | import folder_paths
 42 | 
 43 | script_directory = os.path.dirname(os.path.abspath(__file__))
 44 |     
 45 | class diffusers_model_loader:
 46 |     @classmethod
 47 |     def INPUT_TYPES(s):
 48 |         return {"required": {
 49 |             "model": ("MODEL",),
 50 |             "clip": ("CLIP",),
 51 |             "vae": ("VAE",),
 52 |             },
 53 |         }
 54 | 
 55 |     RETURN_TYPES = ("DIFFUSERSMODEL",)
 56 |     RETURN_NAMES = ("diffusers_model",)
 57 |     FUNCTION = "loadmodel"
 58 |     CATEGORY = "IC-Light-Wrapper"
 59 | 
 60 |     def loadmodel(self, model, clip, vae):
 61 |         mm.soft_empty_cache()
 62 |         dtype = mm.unet_dtype()
 63 |         vae_dtype = mm.vae_dtype()
 64 |         device = mm.get_torch_device()
 65 | 
 66 |         custom_config = {
 67 |             'model': model,
 68 |             'vae': vae,
 69 |         }
 70 |         if not hasattr(self, 'model') or self.model == None or custom_config != self.current_config:
 71 |             pbar = comfy.utils.ProgressBar(5)
 72 |             self.current_config = custom_config
 73 |             # setup pretrained models
 74 |             original_config = OmegaConf.load(os.path.join(script_directory, f"configs/v1-inference.yaml"))
 75 | 
 76 |             print("loading ELLA")
 77 |             checkpoint_path = os.path.join(folder_paths.models_dir,'ella')
 78 |             ella_path = os.path.join(checkpoint_path, 'ella-sd1.5-tsc-t5xl.safetensors')
 79 |             if not os.path.exists(ella_path):
 80 |                 from huggingface_hub import snapshot_download
 81 |                 snapshot_download(repo_id="QQGYLab/ELLA", local_dir=checkpoint_path, local_dir_use_symlinks=False)
 82 |             
 83 |             with (init_empty_weights() if is_accelerate_available() else nullcontext()):
 84 |                 converted_vae_config = create_vae_diffusers_config(original_config, image_size=512)
 85 |                 new_vae = AutoencoderKL(**converted_vae_config)
 86 | 
 87 |                 converted_unet_config = create_unet_diffusers_config(original_config, image_size=512)
 88 |                 unet = UNet2DConditionModel(**converted_unet_config)
 89 |                 
 90 |             clip_sd = None
 91 |             load_models = [model]
 92 |             load_models.append(clip.load_model())
 93 |             clip_sd = clip.get_sd()
 94 |             comfy.model_management.load_models_gpu(load_models)
 95 |             sd = model.model.state_dict_for_saving(clip_sd, vae.get_sd(), None)
 96 | 
 97 |             converted_vae = convert_ldm_vae_checkpoint(sd, converted_vae_config)
 98 |             if is_accelerate_available():
 99 |                 for key in converted_vae:
100 |                     set_module_tensor_to_device(new_vae, key, device=device, dtype=dtype, value=converted_vae[key])
101 |             else:
102 |                 new_vae.load_state_dict(converted_vae, strict=False)
103 |             del converted_vae
104 |             pbar.update(1)
105 | 
106 |             converted_unet = convert_ldm_unet_checkpoint(sd, converted_unet_config)
107 |             if is_accelerate_available():
108 |                 for key in converted_unet:
109 |                     set_module_tensor_to_device(unet, key, device=device, dtype=dtype, value=converted_unet[key])
110 |             else:
111 |                 unet.load_state_dict(converted_unet, strict=False)
112 |             del converted_unet
113 | 
114 |             pbar.update(1)
115 |             # 3. text_model
116 |             print("loading text model")
117 |             text_encoder = create_text_encoder_from_ldm_clip_checkpoint("openai/clip-vit-large-patch14",sd)
118 |             scheduler_config = {
119 |                 'num_train_timesteps': 1000,
120 |                 'beta_start':    0.00085,
121 |                 'beta_end':      0.012,
122 |                 'beta_schedule': "scaled_linear",
123 |                 'steps_offset': 1
124 |             }
125 |             # 4. tokenizer
126 |             tokenizer_path = os.path.join(script_directory, "configs/tokenizer")
127 |             tokenizer = CLIPTokenizer.from_pretrained(tokenizer_path)
128 | 
129 |             scheduler=DPMSolverMultistepScheduler(**scheduler_config)
130 |             pbar.update(1)
131 |             del sd
132 | 
133 |             pbar.update(1)
134 | 
135 |             print("creating pipeline")
136 |             self.pipe = StableDiffusionImg2ImgPipeline(
137 |                 unet=unet,
138 |                 vae=new_vae,
139 |                 text_encoder=text_encoder,
140 |                 tokenizer=tokenizer,
141 |                 scheduler=scheduler,
142 |                 safety_checker=None,
143 |                 feature_extractor=None,
144 |                 requires_safety_checker=False,
145 |                 image_encoder=None
146 |             )
147 |             print("pipeline created")
148 |             pbar.update(1)
149 |             #self.pipe.enable_model_cpu_offload()
150 |             diffusers_model = {
151 |                 'pipe': self.pipe,
152 |             }
153 |    
154 |         return (diffusers_model,)        
155 |         
156 | class LoadICLightUnetDiffusers:
157 |     @classmethod
158 |     def INPUT_TYPES(s):
159 |         return {
160 |             "required": {
161 |                 "diffusersmodel": ("DIFFUSERSMODEL",),
162 |                 "model_path": (folder_paths.get_filename_list("unet"), )
163 |             } 
164 |         }
165 | 
166 |     RETURN_TYPES = ("DIFFUSERSMODEL",)
167 |     FUNCTION = "load"
168 |     CATEGORY = "IC-Light-Wrapper"
169 | 
170 |     def load(self, diffusersmodel, model_path):
171 |         unet = diffusersmodel["pipe"].unet
172 |         device = mm.get_torch_device()
173 | 
174 |         unet_original_forward = unet.forward
175 | 
176 |         new_conv_in = torch.nn.Conv2d(8, unet.conv_in.out_channels, unet.conv_in.kernel_size, unet.conv_in.stride, unet.conv_in.padding)
177 |         new_conv_in.weight.zero_()
178 |         new_conv_in.weight[:, :4, :, :].copy_(unet.conv_in.weight)
179 |         new_conv_in.bias = unet.conv_in.bias
180 |         unet.conv_in = new_conv_in
181 | 
182 |         def hooked_unet_forward(sample, timestep, encoder_hidden_states, **kwargs):
183 |             c_concat = kwargs['cross_attention_kwargs']['concat_conds'].to(sample)
184 |             c_concat = torch.cat([c_concat] * (sample.shape[0] // c_concat.shape[0]), dim=0)
185 |             new_sample = torch.cat([sample, c_concat], dim=1)
186 |             kwargs['cross_attention_kwargs'] = {}
187 |             return unet_original_forward(new_sample, timestep, encoder_hidden_states, **kwargs)
188 |         
189 |         unet.forward = hooked_unet_forward
190 | 
191 |         model_full_path = folder_paths.get_full_path("unet", model_path)
192 |         if not os.path.exists(model_full_path):
193 |             raise Exception("Invalid model path")
194 |         else:
195 |             print("LoadICLightUnet: Loading LoadICLightUnet weights")
196 |             from comfy.utils import load_torch_file
197 |             sd_offset = load_torch_file(model_full_path, device=mm.get_torch_device())
198 |             sd_origin = unet.state_dict()
199 |             keys = sd_origin.keys()
200 |             sd_merged = {k: sd_origin[k].to(device) + sd_offset[k].to(device) for k in sd_origin.keys()}
201 |             unet.load_state_dict(sd_merged, strict=True)
202 |             del sd_offset, sd_origin, sd_merged, keys
203 | 
204 |         return diffusersmodel,
205 | 
206 | class iclight_diffusers_sampler:
207 |     @classmethod
208 |     def INPUT_TYPES(s):
209 |         return {"required": {
210 |             "diffusers_model": ("DIFFUSERSMODEL",),
211 |             "latent": ("LATENT",),
212 |             "width": ("INT", {"default": 512, "min": 64, "max": 2048, "step": 64}),
213 |             "height": ("INT", {"default": 512, "min": 64, "max": 2048, "step": 64}),
214 |             "steps": ("INT", {"default": 25, "min": 1, "max": 200, "step": 1}),
215 |             "guidance_scale": ("FLOAT", {"default": 2.0, "min": 1.01, "max": 20.0, "step": 0.01}),
216 |             "denoise_strength": ("FLOAT", {"default": 0.9, "min": 0.01, "max": 1.0, "step": 0.01}),
217 |             "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
218 |             "scheduler": (
219 |                 [
220 |                     'DPMSolverMultistepScheduler',
221 |                     'DPMSolverMultistepScheduler_SDE_karras',
222 |                     'DDPMScheduler',
223 |                     'LCMScheduler',
224 |                     'PNDMScheduler',
225 |                     'DEISMultistepScheduler',
226 |                     'EulerDiscreteScheduler',
227 |                     'EulerAncestralDiscreteScheduler',
228 |                     'UniPCMultistepScheduler',
229 |                     'TCDScheduler'
230 |                 ], {
231 |                     "default": 'DPMSolverMultistepScheduler'
232 |                 }),
233 |             "prompt": ("STRING", {"default": "positive", "multiline": True}),
234 |             "n_prompt": ("STRING", {"default": "negative", "multiline": True}),
235 |             "hidiffusion": ("BOOLEAN", {"default": False}),
236 |             },
237 |             "optional"  : {
238 |                 "bg_latent": ("LATENT",),
239 |                 "fixed_seed": ("BOOLEAN", {"default": True}),
240 |             }
241 |         }
242 | 
243 |     RETURN_TYPES = ("LATENT",)
244 |     RETURN_NAMES = ("samples",)
245 |     FUNCTION = "process"
246 |     CATEGORY = "IC-Light-Wrapper"
247 | 
248 |     def process(self, latent, diffusers_model, width, height, steps, guidance_scale, denoise_strength, seed, scheduler, prompt, n_prompt, hidiffusion, bg_latent=None, fixed_seed=True):
249 |         device = mm.get_torch_device()
250 |         mm.unload_all_models()
251 |         mm.soft_empty_cache()
252 |         dtype = mm.unet_dtype()
253 |         pipe=diffusers_model['pipe']
254 |         pipe.to(device, dtype=dtype)
255 |         scale_factor = pipe.vae.config.scaling_factor
256 | 
257 |         scheduler_config = {
258 |                 'num_train_timesteps': 1000,
259 |                 'beta_start':    0.00085,
260 |                 'beta_end':      0.012,
261 |                 'beta_schedule': "scaled_linear",
262 |                 'steps_offset': 1,
263 |             }
264 |         if scheduler == 'DPMSolverMultistepScheduler':
265 |             noise_scheduler = DPMSolverMultistepScheduler(**scheduler_config)
266 |         elif scheduler == 'DPMSolverMultistepScheduler_SDE_karras':
267 |             scheduler_config.update({"algorithm_type": "sde-dpmsolver++"})
268 |             scheduler_config.update({"use_karras_sigmas": True})
269 |             noise_scheduler = DPMSolverMultistepScheduler(**scheduler_config)
270 |         elif scheduler == 'DDPMScheduler':
271 |             noise_scheduler = DDPMScheduler(**scheduler_config)
272 |         elif scheduler == 'LCMScheduler':
273 |             noise_scheduler = LCMScheduler(**scheduler_config)
274 |         elif scheduler == 'PNDMScheduler':
275 |             scheduler_config.update({"set_alpha_to_one": False})
276 |             scheduler_config.update({"trained_betas": None})
277 |             noise_scheduler = PNDMScheduler(**scheduler_config)
278 |         elif scheduler == 'DEISMultistepScheduler':
279 |             noise_scheduler = DEISMultistepScheduler(**scheduler_config)
280 |         elif scheduler == 'EulerDiscreteScheduler':
281 |             noise_scheduler = EulerDiscreteScheduler(**scheduler_config)
282 |         elif scheduler == 'EulerAncestralDiscreteScheduler':
283 |             noise_scheduler = EulerAncestralDiscreteScheduler(**scheduler_config)
284 |         elif scheduler == 'UniPCMultistepScheduler':
285 |             noise_scheduler = UniPCMultistepScheduler(**scheduler_config)
286 |         elif scheduler == 'TCDScheduler':
287 |             noise_scheduler = TCDScheduler(**scheduler_config)
288 |         
289 |         pipe.scheduler = noise_scheduler
290 |         if hidiffusion:
291 |             apply_hidiffusion(pipe)
292 |         else:
293 |             remove_hidiffusion(pipe)
294 | 
295 |         if bg_latent is not None:
296 |             bg_latent = bg_latent["samples"]
297 |             bg_latent = bg_latent * pipe.vae.config.scaling_factor
298 |         else:
299 |             bg_latent = None
300 | 
301 |         concat_conds = latent["samples"]
302 |         concat_conds = concat_conds * pipe.vae.config.scaling_factor
303 |         B, H, W, C = latent["samples"].shape
304 |         prompt_list = []
305 |         prompt_list.append(prompt)
306 |         if len(prompt_list) < B:
307 |             prompt_list += [prompt_list[-1]] * (B - len(prompt_list))
308 | 
309 |         n_prompt_list = []
310 |         n_prompt_list.append(n_prompt)
311 |         if len(n_prompt_list) < B:
312 | 
313 |             n_prompt_list += [n_prompt_list[-1]] * (B - len(n_prompt_list))
314 | 
315 |         if fixed_seed:
316 |             generator = [torch.Generator(device=device).manual_seed(seed) for _ in range(B)]
317 |         else:
318 |             generator= [torch.Generator(device="cuda").manual_seed(i) for i in range(B)]
319 | 
320 |         pbar = comfy.utils.ProgressBar(steps)
321 |         def progress_counter_callback(pipeline, step, timestep, callback_kwargs):
322 |             pbar.update(1)
323 |             return callback_kwargs or {}
324 | 
325 |         autocast_condition = (dtype != torch.float32) and not mm.is_device_mps(device)
326 |         with torch.autocast(mm.get_autocast_device(device), dtype=dtype) if autocast_condition else nullcontext():
327 |             
328 |             images = pipe(
329 |             image=bg_latent,
330 |             prompt = prompt_list,
331 |             strength = denoise_strength,
332 |             negative_prompt = n_prompt_list,
333 |             prompt_embeds=None,
334 |             negative_prompt_embeds=None,
335 |             guidance_scale=guidance_scale,
336 |             num_inference_steps=int(round(steps / denoise_strength)),
337 |             height=height,
338 |             width=width,
339 |             cross_attention_kwargs={'concat_conds': concat_conds},
340 |             generator=generator,
341 |             output_type="latent",
342 |             callback_on_step_end=progress_counter_callback,
343 |             #callback_on_step_end_tensor_inputs=["latents", "prompt_embeds", "negative_prompt_embeds"],
344 |             ).images
345 |             images = images / scale_factor
346 |             #image_out = images.permute(0, 2, 3, 1).cpu().float()
347 |             return ({"samples": images},)
348 |                 
349 | NODE_CLASS_MAPPINGS = {
350 |     "diffusers_model_loader": diffusers_model_loader,
351 |     "LoadICLightUnetDiffusers": LoadICLightUnetDiffusers,
352 |     "iclight_diffusers_sampler": iclight_diffusers_sampler
353 | }
354 | NODE_DISPLAY_NAME_MAPPINGS = {
355 |     "diffusers_model_loader": "Diffusers Model Loader",
356 |     "LoadICLightUnetDiffusers": "LoadICLightUnetDiffusers",
357 |     "iclight_diffusers_sampler": "IC-Light Diffusers Sampler"
358 | }
359 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | diffusers>=0.27.2
2 | accelerate>=0.29.2
3 | omegaconf
4 | 


--------------------------------------------------------------------------------
/scheduling_tcd.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 Stanford University Team and The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | # DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion
 16 | # and https://github.com/hojonathanho/diffusion
 17 | 
 18 | import math
 19 | from dataclasses import dataclass
 20 | from typing import List, Optional, Tuple, Union
 21 | 
 22 | import numpy as np
 23 | import torch
 24 | 
 25 | from diffusers.configuration_utils import ConfigMixin, register_to_config
 26 | from diffusers.schedulers.scheduling_utils import SchedulerMixin
 27 | from diffusers.utils import BaseOutput, logging
 28 | from diffusers.utils.torch_utils import randn_tensor
 29 | 
 30 | 
 31 | logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 32 | 
 33 | 
 34 | @dataclass
 35 | class TCDSchedulerOutput(BaseOutput):
 36 |     """
 37 |     Output class for the scheduler's `step` function output.
 38 | 
 39 |     Args:
 40 |         prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
 41 |             Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
 42 |             denoising loop.
 43 |         pred_noised_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
 44 |             The predicted noised sample `(x_{s})` based on the model output from the current timestep.
 45 |     """
 46 | 
 47 |     prev_sample: torch.FloatTensor
 48 |     pred_noised_sample: Optional[torch.FloatTensor] = None
 49 | 
 50 | 
 51 | # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
 52 | def betas_for_alpha_bar(
 53 |     num_diffusion_timesteps,
 54 |     max_beta=0.999,
 55 |     alpha_transform_type="cosine",
 56 | ):
 57 |     """
 58 |     Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
 59 |     (1-beta) over time from t = [0,1].
 60 | 
 61 |     Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
 62 |     to that part of the diffusion process.
 63 | 
 64 | 
 65 |     Args:
 66 |         num_diffusion_timesteps (`int`): the number of betas to produce.
 67 |         max_beta (`float`): the maximum beta to use; use values lower than 1 to
 68 |                      prevent singularities.
 69 |         alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
 70 |                      Choose from `cosine` or `exp`
 71 | 
 72 |     Returns:
 73 |         betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
 74 |     """
 75 |     if alpha_transform_type == "cosine":
 76 | 
 77 |         def alpha_bar_fn(t):
 78 |             return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
 79 | 
 80 |     elif alpha_transform_type == "exp":
 81 | 
 82 |         def alpha_bar_fn(t):
 83 |             return math.exp(t * -12.0)
 84 | 
 85 |     else:
 86 |         raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}")
 87 | 
 88 |     betas = []
 89 |     for i in range(num_diffusion_timesteps):
 90 |         t1 = i / num_diffusion_timesteps
 91 |         t2 = (i + 1) / num_diffusion_timesteps
 92 |         betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
 93 |     return torch.tensor(betas, dtype=torch.float32)
 94 | 
 95 | 
 96 | # Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
 97 | def rescale_zero_terminal_snr(betas: torch.FloatTensor) -> torch.FloatTensor:
 98 |     """
 99 |     Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
100 | 
101 | 
102 |     Args:
103 |         betas (`torch.FloatTensor`):
104 |             the betas that the scheduler is being initialized with.
105 | 
106 |     Returns:
107 |         `torch.FloatTensor`: rescaled betas with zero terminal SNR
108 |     """
109 |     # Convert betas to alphas_bar_sqrt
110 |     alphas = 1.0 - betas
111 |     alphas_cumprod = torch.cumprod(alphas, dim=0)
112 |     alphas_bar_sqrt = alphas_cumprod.sqrt()
113 | 
114 |     # Store old values.
115 |     alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
116 |     alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
117 | 
118 |     # Shift so the last timestep is zero.
119 |     alphas_bar_sqrt -= alphas_bar_sqrt_T
120 | 
121 |     # Scale so the first timestep is back to the old value.
122 |     alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
123 | 
124 |     # Convert alphas_bar_sqrt to betas
125 |     alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
126 |     alphas = alphas_bar[1:] / alphas_bar[:-1]  # Revert cumprod
127 |     alphas = torch.cat([alphas_bar[0:1], alphas])
128 |     betas = 1 - alphas
129 | 
130 |     return betas
131 | 
132 | 
133 | class TCDScheduler(SchedulerMixin, ConfigMixin):
134 |     """
135 |     `TCDScheduler` incorporates the `Strategic Stochastic Sampling` introduced by the paper `Trajectory Consistency
136 |     Distillation`, extending the original Multistep Consistency Sampling to enable unrestricted trajectory traversal.
137 | 
138 |     This code is based on the official repo of TCD(https://github.com/jabir-zheng/TCD).
139 | 
140 |     This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. [`~ConfigMixin`] takes care of storing all config
141 |     attributes that are passed in the scheduler's `__init__` function, such as `num_train_timesteps`. They can be
142 |     accessed via `scheduler.config.num_train_timesteps`. [`SchedulerMixin`] provides general loading and saving
143 |     functionality via the [`SchedulerMixin.save_pretrained`] and [`~SchedulerMixin.from_pretrained`] functions.
144 | 
145 |     Args:
146 |         num_train_timesteps (`int`, defaults to 1000):
147 |             The number of diffusion steps to train the model.
148 |         beta_start (`float`, defaults to 0.0001):
149 |             The starting `beta` value of inference.
150 |         beta_end (`float`, defaults to 0.02):
151 |             The final `beta` value.
152 |         beta_schedule (`str`, defaults to `"linear"`):
153 |             The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
154 |             `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
155 |         trained_betas (`np.ndarray`, *optional*):
156 |             Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
157 |         original_inference_steps (`int`, *optional*, defaults to 50):
158 |             The default number of inference steps used to generate a linearly-spaced timestep schedule, from which we
159 |             will ultimately take `num_inference_steps` evenly spaced timesteps to form the final timestep schedule.
160 |         clip_sample (`bool`, defaults to `True`):
161 |             Clip the predicted sample for numerical stability.
162 |         clip_sample_range (`float`, defaults to 1.0):
163 |             The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
164 |         set_alpha_to_one (`bool`, defaults to `True`):
165 |             Each diffusion step uses the alphas product value at that step and at the previous one. For the final step
166 |             there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
167 |             otherwise it uses the alpha value at step 0.
168 |         steps_offset (`int`, defaults to 0):
169 |             An offset added to the inference steps, as required by some model families.
170 |         prediction_type (`str`, defaults to `epsilon`, *optional*):
171 |             Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
172 |             `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
173 |             Video](https://imagen.research.google/video/paper.pdf) paper).
174 |         thresholding (`bool`, defaults to `False`):
175 |             Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
176 |             as Stable Diffusion.
177 |         dynamic_thresholding_ratio (`float`, defaults to 0.995):
178 |             The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
179 |         sample_max_value (`float`, defaults to 1.0):
180 |             The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
181 |         timestep_spacing (`str`, defaults to `"leading"`):
182 |             The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
183 |             Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
184 |         timestep_scaling (`float`, defaults to 10.0):
185 |             The factor the timesteps will be multiplied by when calculating the consistency model boundary conditions
186 |             `c_skip` and `c_out`. Increasing this will decrease the approximation error (although the approximation
187 |             error at the default of `10.0` is already pretty small).
188 |         rescale_betas_zero_snr (`bool`, defaults to `False`):
189 |             Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
190 |             dark samples instead of limiting it to samples with medium brightness. Loosely related to
191 |             [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
192 |     """
193 | 
194 |     order = 1
195 | 
196 |     @register_to_config
197 |     def __init__(
198 |         self,
199 |         num_train_timesteps: int = 1000,
200 |         beta_start: float = 0.00085,
201 |         beta_end: float = 0.012,
202 |         beta_schedule: str = "scaled_linear",
203 |         trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
204 |         original_inference_steps: int = 50,
205 |         clip_sample: bool = False,
206 |         clip_sample_range: float = 1.0,
207 |         set_alpha_to_one: bool = True,
208 |         steps_offset: int = 0,
209 |         prediction_type: str = "epsilon",
210 |         thresholding: bool = False,
211 |         dynamic_thresholding_ratio: float = 0.995,
212 |         sample_max_value: float = 1.0,
213 |         timestep_spacing: str = "leading",
214 |         timestep_scaling: float = 10.0,
215 |         rescale_betas_zero_snr: bool = False,
216 |     ):
217 |         if trained_betas is not None:
218 |             self.betas = torch.tensor(trained_betas, dtype=torch.float32)
219 |         elif beta_schedule == "linear":
220 |             self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
221 |         elif beta_schedule == "scaled_linear":
222 |             # this schedule is very specific to the latent diffusion model.
223 |             self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
224 |         elif beta_schedule == "squaredcos_cap_v2":
225 |             # Glide cosine schedule
226 |             self.betas = betas_for_alpha_bar(num_train_timesteps)
227 |         else:
228 |             raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
229 | 
230 |         # Rescale for zero SNR
231 |         if rescale_betas_zero_snr:
232 |             self.betas = rescale_zero_terminal_snr(self.betas)
233 | 
234 |         self.alphas = 1.0 - self.betas
235 |         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
236 | 
237 |         # At every step in ddim, we are looking into the previous alphas_cumprod
238 |         # For the final step, there is no previous alphas_cumprod because we are already at 0
239 |         # `set_alpha_to_one` decides whether we set this parameter simply to one or
240 |         # whether we use the final alpha of the "non-previous" one.
241 |         self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0]
242 | 
243 |         # standard deviation of the initial noise distribution
244 |         self.init_noise_sigma = 1.0
245 | 
246 |         # setable values
247 |         self.num_inference_steps = None
248 |         self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
249 |         self.custom_timesteps = False
250 | 
251 |         self._step_index = None
252 |         self._begin_index = None
253 | 
254 |     # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep
255 |     def index_for_timestep(self, timestep, schedule_timesteps=None):
256 |         if schedule_timesteps is None:
257 |             schedule_timesteps = self.timesteps
258 | 
259 |         indices = (schedule_timesteps == timestep).nonzero()
260 | 
261 |         # The sigma index that is taken for the **very** first `step`
262 |         # is always the second index (or the last index if there is only 1)
263 |         # This way we can ensure we don't accidentally skip a sigma in
264 |         # case we start in the middle of the denoising schedule (e.g. for image-to-image)
265 |         pos = 1 if len(indices) > 1 else 0
266 | 
267 |         return indices[pos].item()
268 | 
269 |     # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
270 |     def _init_step_index(self, timestep):
271 |         if self.begin_index is None:
272 |             if isinstance(timestep, torch.Tensor):
273 |                 timestep = timestep.to(self.timesteps.device)
274 |             self._step_index = self.index_for_timestep(timestep)
275 |         else:
276 |             self._step_index = self._begin_index
277 | 
278 |     @property
279 |     def step_index(self):
280 |         return self._step_index
281 | 
282 |     @property
283 |     def begin_index(self):
284 |         """
285 |         The index for the first timestep. It should be set from pipeline with `set_begin_index` method.
286 |         """
287 |         return self._begin_index
288 | 
289 |     # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index
290 |     def set_begin_index(self, begin_index: int = 0):
291 |         """
292 |         Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
293 | 
294 |         Args:
295 |             begin_index (`int`):
296 |                 The begin index for the scheduler.
297 |         """
298 |         self._begin_index = begin_index
299 | 
300 |     def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor:
301 |         """
302 |         Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
303 |         current timestep.
304 | 
305 |         Args:
306 |             sample (`torch.FloatTensor`):
307 |                 The input sample.
308 |             timestep (`int`, *optional*):
309 |                 The current timestep in the diffusion chain.
310 | 
311 |         Returns:
312 |             `torch.FloatTensor`:
313 |                 A scaled input sample.
314 |         """
315 |         return sample
316 | 
317 |     # Copied from diffusers.schedulers.scheduling_ddim.DDIMScheduler._get_variance
318 |     def _get_variance(self, timestep, prev_timestep):
319 |         alpha_prod_t = self.alphas_cumprod[timestep]
320 |         alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
321 |         beta_prod_t = 1 - alpha_prod_t
322 |         beta_prod_t_prev = 1 - alpha_prod_t_prev
323 | 
324 |         variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev)
325 | 
326 |         return variance
327 | 
328 |     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
329 |     def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
330 |         """
331 |         "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
332 |         prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
333 |         s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
334 |         pixels from saturation at each step. We find that dynamic thresholding results in significantly better
335 |         photorealism as well as better image-text alignment, especially when using very large guidance weights."
336 | 
337 |         https://arxiv.org/abs/2205.11487
338 |         """
339 |         dtype = sample.dtype
340 |         batch_size, channels, *remaining_dims = sample.shape
341 | 
342 |         if dtype not in (torch.float32, torch.float64):
343 |             sample = sample.float()  # upcast for quantile calculation, and clamp not implemented for cpu half
344 | 
345 |         # Flatten sample for doing quantile calculation along each image
346 |         sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
347 | 
348 |         abs_sample = sample.abs()  # "a certain percentile absolute pixel value"
349 | 
350 |         s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
351 |         s = torch.clamp(
352 |             s, min=1, max=self.config.sample_max_value
353 |         )  # When clamped to min=1, equivalent to standard clipping to [-1, 1]
354 |         s = s.unsqueeze(1)  # (batch_size, 1) because clamp will broadcast along dim=0
355 |         sample = torch.clamp(sample, -s, s) / s  # "we threshold xt0 to the range [-s, s] and then divide by s"
356 | 
357 |         sample = sample.reshape(batch_size, channels, *remaining_dims)
358 |         sample = sample.to(dtype)
359 | 
360 |         return sample
361 | 
362 |     def set_timesteps(
363 |         self,
364 |         num_inference_steps: Optional[int] = None,
365 |         device: Union[str, torch.device] = None,
366 |         original_inference_steps: Optional[int] = None,
367 |         timesteps: Optional[List[int]] = None,
368 |         strength: float = 1.0,
369 |     ):
370 |         """
371 |         Sets the discrete timesteps used for the diffusion chain (to be run before inference).
372 | 
373 |         Args:
374 |             num_inference_steps (`int`, *optional*):
375 |                 The number of diffusion steps used when generating samples with a pre-trained model. If used,
376 |                 `timesteps` must be `None`.
377 |             device (`str` or `torch.device`, *optional*):
378 |                 The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
379 |             original_inference_steps (`int`, *optional*):
380 |                 The original number of inference steps, which will be used to generate a linearly-spaced timestep
381 |                 schedule (which is different from the standard `diffusers` implementation). We will then take
382 |                 `num_inference_steps` timesteps from this schedule, evenly spaced in terms of indices, and use that as
383 |                 our final timestep schedule. If not set, this will default to the `original_inference_steps` attribute.
384 |             timesteps (`List[int]`, *optional*):
385 |                 Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
386 |                 timestep spacing strategy of equal spacing between timesteps on the training/distillation timestep
387 |                 schedule is used. If `timesteps` is passed, `num_inference_steps` must be `None`.
388 |             strength (`float`, *optional*, defaults to 1.0):
389 |                 Used to determine the number of timesteps used for inference when using img2img, inpaint, etc.
390 |         """
391 |         # 0. Check inputs
392 |         if num_inference_steps is None and timesteps is None:
393 |             raise ValueError("Must pass exactly one of `num_inference_steps` or `custom_timesteps`.")
394 | 
395 |         if num_inference_steps is not None and timesteps is not None:
396 |             raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.")
397 | 
398 |         # 1. Calculate the TCD original training/distillation timestep schedule.
399 |         original_steps = (
400 |             original_inference_steps if original_inference_steps is not None else self.config.original_inference_steps
401 |         )
402 | 
403 |         if original_inference_steps is None:
404 |             # default option, timesteps align with discrete inference steps
405 |             if original_steps > self.config.num_train_timesteps:
406 |                 raise ValueError(
407 |                     f"`original_steps`: {original_steps} cannot be larger than `self.config.train_timesteps`:"
408 |                     f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
409 |                     f" maximal {self.config.num_train_timesteps} timesteps."
410 |                 )
411 |             # TCD Timesteps Setting
412 |             # The skipping step parameter k from the paper.
413 |             k = self.config.num_train_timesteps // original_steps
414 |             # TCD Training/Distillation Steps Schedule
415 |             tcd_origin_timesteps = np.asarray(list(range(1, int(original_steps * strength) + 1))) * k - 1
416 |         else:
417 |             # customised option, sampled timesteps can be any arbitrary value
418 |             tcd_origin_timesteps = np.asarray(list(range(0, int(self.config.num_train_timesteps * strength))))
419 | 
420 |         # 2. Calculate the TCD inference timestep schedule.
421 |         if timesteps is not None:
422 |             # 2.1 Handle custom timestep schedules.
423 |             train_timesteps = set(tcd_origin_timesteps)
424 |             non_train_timesteps = []
425 |             for i in range(1, len(timesteps)):
426 |                 if timesteps[i] >= timesteps[i - 1]:
427 |                     raise ValueError("`custom_timesteps` must be in descending order.")
428 | 
429 |                 if timesteps[i] not in train_timesteps:
430 |                     non_train_timesteps.append(timesteps[i])
431 | 
432 |             if timesteps[0] >= self.config.num_train_timesteps:
433 |                 raise ValueError(
434 |                     f"`timesteps` must start before `self.config.train_timesteps`:"
435 |                     f" {self.config.num_train_timesteps}."
436 |                 )
437 | 
438 |             # Raise warning if timestep schedule does not start with self.config.num_train_timesteps - 1
439 |             if strength == 1.0 and timesteps[0] != self.config.num_train_timesteps - 1:
440 |                 logger.warning(
441 |                     f"The first timestep on the custom timestep schedule is {timesteps[0]}, not"
442 |                     f" `self.config.num_train_timesteps - 1`: {self.config.num_train_timesteps - 1}. You may get"
443 |                     f" unexpected results when using this timestep schedule."
444 |                 )
445 | 
446 |             # Raise warning if custom timestep schedule contains timesteps not on original timestep schedule
447 |             if non_train_timesteps:
448 |                 logger.warning(
449 |                     f"The custom timestep schedule contains the following timesteps which are not on the original"
450 |                     f" training/distillation timestep schedule: {non_train_timesteps}. You may get unexpected results"
451 |                     f" when using this timestep schedule."
452 |                 )
453 | 
454 |             # Raise warning if custom timestep schedule is longer than original_steps
455 |             if original_steps is not None:
456 |                 if len(timesteps) > original_steps:
457 |                     logger.warning(
458 |                         f"The number of timesteps in the custom timestep schedule is {len(timesteps)}, which exceeds the"
459 |                         f" the length of the timestep schedule used for training: {original_steps}. You may get some"
460 |                         f" unexpected results when using this timestep schedule."
461 |                     )
462 |             else:
463 |                 if len(timesteps) > self.config.num_train_timesteps:
464 |                     logger.warning(
465 |                         f"The number of timesteps in the custom timestep schedule is {len(timesteps)}, which exceeds the"
466 |                         f" the length of the timestep schedule used for training: {self.config.num_train_timesteps}. You may get some"
467 |                         f" unexpected results when using this timestep schedule."
468 |                     )
469 | 
470 |             timesteps = np.array(timesteps, dtype=np.int64)
471 |             self.num_inference_steps = len(timesteps)
472 |             self.custom_timesteps = True
473 | 
474 |             # Apply strength (e.g. for img2img pipelines) (see StableDiffusionImg2ImgPipeline.get_timesteps)
475 |             init_timestep = min(int(self.num_inference_steps * strength), self.num_inference_steps)
476 |             t_start = max(self.num_inference_steps - init_timestep, 0)
477 |             timesteps = timesteps[t_start * self.order :]
478 |             # TODO: also reset self.num_inference_steps?
479 |         else:
480 |             # 2.2 Create the "standard" TCD inference timestep schedule.
481 |             if num_inference_steps > self.config.num_train_timesteps:
482 |                 raise ValueError(
483 |                     f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
484 |                     f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
485 |                     f" maximal {self.config.num_train_timesteps} timesteps."
486 |                 )
487 | 
488 |             if original_steps is not None:
489 |                 skipping_step = len(tcd_origin_timesteps) // num_inference_steps
490 | 
491 |                 if skipping_step < 1:
492 |                     raise ValueError(
493 |                         f"The combination of `original_steps x strength`: {original_steps} x {strength} is smaller than `num_inference_steps`: {num_inference_steps}. Make sure to either reduce `num_inference_steps` to a value smaller than {int(original_steps * strength)} or increase `strength` to a value higher than {float(num_inference_steps / original_steps)}."
494 |                     )
495 | 
496 |             self.num_inference_steps = num_inference_steps
497 | 
498 |             if original_steps is not None:
499 |                 if num_inference_steps > original_steps:
500 |                     raise ValueError(
501 |                         f"`num_inference_steps`: {num_inference_steps} cannot be larger than `original_inference_steps`:"
502 |                         f" {original_steps} because the final timestep schedule will be a subset of the"
503 |                         f" `original_inference_steps`-sized initial timestep schedule."
504 |                     )
505 |             else:
506 |                 if num_inference_steps > self.config.num_train_timesteps:
507 |                     raise ValueError(
508 |                         f"`num_inference_steps`: {num_inference_steps} cannot be larger than `num_train_timesteps`:"
509 |                         f" {self.config.num_train_timesteps} because the final timestep schedule will be a subset of the"
510 |                         f" `num_train_timesteps`-sized initial timestep schedule."
511 |                     )
512 | 
513 |             # TCD Inference Steps Schedule
514 |             tcd_origin_timesteps = tcd_origin_timesteps[::-1].copy()
515 |             # Select (approximately) evenly spaced indices from tcd_origin_timesteps.
516 |             inference_indices = np.linspace(0, len(tcd_origin_timesteps), num=num_inference_steps, endpoint=False)
517 |             inference_indices = np.floor(inference_indices).astype(np.int64)
518 |             timesteps = tcd_origin_timesteps[inference_indices]
519 | 
520 |         self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.long)
521 | 
522 |         self._step_index = None
523 |         self._begin_index = None
524 | 
525 |     def step(
526 |         self,
527 |         model_output: torch.FloatTensor,
528 |         timestep: int,
529 |         sample: torch.FloatTensor,
530 |         eta: float = 0.3,
531 |         generator: Optional[torch.Generator] = None,
532 |         return_dict: bool = True,
533 |     ) -> Union[TCDSchedulerOutput, Tuple]:
534 |         """
535 |         Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
536 |         process from the learned model outputs (most often the predicted noise).
537 | 
538 |         Args:
539 |             model_output (`torch.FloatTensor`):
540 |                 The direct output from learned diffusion model.
541 |             timestep (`int`):
542 |                 The current discrete timestep in the diffusion chain.
543 |             sample (`torch.FloatTensor`):
544 |                 A current instance of a sample created by the diffusion process.
545 |             eta (`float`):
546 |                 A stochastic parameter (referred to as `gamma` in the paper) used to control the stochasticity in every
547 |                 step. When eta = 0, it represents deterministic sampling, whereas eta = 1 indicates full stochastic
548 |                 sampling.
549 |             generator (`torch.Generator`, *optional*):
550 |                 A random number generator.
551 |             return_dict (`bool`, *optional*, defaults to `True`):
552 |                 Whether or not to return a [`~schedulers.scheduling_tcd.TCDSchedulerOutput`] or `tuple`.
553 |         Returns:
554 |             [`~schedulers.scheduling_utils.TCDSchedulerOutput`] or `tuple`:
555 |                 If return_dict is `True`, [`~schedulers.scheduling_tcd.TCDSchedulerOutput`] is returned, otherwise a
556 |                 tuple is returned where the first element is the sample tensor.
557 |         """
558 |         if self.num_inference_steps is None:
559 |             raise ValueError(
560 |                 "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
561 |             )
562 | 
563 |         if self.step_index is None:
564 |             self._init_step_index(timestep)
565 | 
566 |         assert 0 <= eta <= 1.0, "gamma must be less than or equal to 1.0"
567 | 
568 |         # 1. get previous step value
569 |         prev_step_index = self.step_index + 1
570 |         if prev_step_index < len(self.timesteps):
571 |             prev_timestep = self.timesteps[prev_step_index]
572 |         else:
573 |             prev_timestep = torch.tensor(0)
574 | 
575 |         timestep_s = torch.floor((1 - eta) * prev_timestep).to(dtype=torch.long)
576 | 
577 |         # 2. compute alphas, betas
578 |         alpha_prod_t = self.alphas_cumprod[timestep]
579 |         beta_prod_t = 1 - alpha_prod_t
580 | 
581 |         alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
582 | 
583 |         alpha_prod_s = self.alphas_cumprod[timestep_s]
584 |         beta_prod_s = 1 - alpha_prod_s
585 | 
586 |         # 3. Compute the predicted noised sample x_s based on the model parameterization
587 |         if self.config.prediction_type == "epsilon":  # noise-prediction
588 |             pred_original_sample = (sample - beta_prod_t.sqrt() * model_output) / alpha_prod_t.sqrt()
589 |             pred_epsilon = model_output
590 |             pred_noised_sample = alpha_prod_s.sqrt() * pred_original_sample + beta_prod_s.sqrt() * pred_epsilon
591 |         elif self.config.prediction_type == "sample":  # x-prediction
592 |             pred_original_sample = model_output
593 |             pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
594 |             pred_noised_sample = alpha_prod_s.sqrt() * pred_original_sample + beta_prod_s.sqrt() * pred_epsilon
595 |         elif self.config.prediction_type == "v_prediction":  # v-prediction
596 |             pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output
597 |             pred_epsilon = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample
598 |             pred_noised_sample = alpha_prod_s.sqrt() * pred_original_sample + beta_prod_s.sqrt() * pred_epsilon
599 |         else:
600 |             raise ValueError(
601 |                 f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or"
602 |                 " `v_prediction` for `TCDScheduler`."
603 |             )
604 | 
605 |         # 4. Sample and inject noise z ~ N(0, I) for MultiStep Inference
606 |         # Noise is not used on the final timestep of the timestep schedule.
607 |         # This also means that noise is not used for one-step sampling.
608 |         # Eta (referred to as "gamma" in the paper) was introduced to control the stochasticity in every step.
609 |         # When eta = 0, it represents deterministic sampling, whereas eta = 1 indicates full stochastic sampling.
610 |         if eta > 0:
611 |             if self.step_index != self.num_inference_steps - 1:
612 |                 noise = randn_tensor(
613 |                     model_output.shape, generator=generator, device=model_output.device, dtype=pred_noised_sample.dtype
614 |                 )
615 |                 prev_sample = (alpha_prod_t_prev / alpha_prod_s).sqrt() * pred_noised_sample + (
616 |                     1 - alpha_prod_t_prev / alpha_prod_s
617 |                 ).sqrt() * noise
618 |             else:
619 |                 prev_sample = pred_noised_sample
620 |         else:
621 |             prev_sample = pred_noised_sample
622 | 
623 |         # upon completion increase step index by one
624 |         self._step_index += 1
625 | 
626 |         if not return_dict:
627 |             return (prev_sample, pred_noised_sample)
628 | 
629 |         return TCDSchedulerOutput(prev_sample=prev_sample, pred_noised_sample=pred_noised_sample)
630 | 
631 |     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
632 |     def add_noise(
633 |         self,
634 |         original_samples: torch.FloatTensor,
635 |         noise: torch.FloatTensor,
636 |         timesteps: torch.IntTensor,
637 |     ) -> torch.FloatTensor:
638 |         # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
639 |         # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
640 |         # for the subsequent add_noise calls
641 |         self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
642 |         alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
643 |         timesteps = timesteps.to(original_samples.device)
644 | 
645 |         sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
646 |         sqrt_alpha_prod = sqrt_alpha_prod.flatten()
647 |         while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
648 |             sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
649 | 
650 |         sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
651 |         sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
652 |         while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
653 |             sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
654 | 
655 |         noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
656 |         return noisy_samples
657 | 
658 |     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
659 |     def get_velocity(
660 |         self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
661 |     ) -> torch.FloatTensor:
662 |         # Make sure alphas_cumprod and timestep have same device and dtype as sample
663 |         self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
664 |         alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
665 |         timesteps = timesteps.to(sample.device)
666 | 
667 |         sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
668 |         sqrt_alpha_prod = sqrt_alpha_prod.flatten()
669 |         while len(sqrt_alpha_prod.shape) < len(sample.shape):
670 |             sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
671 | 
672 |         sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
673 |         sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
674 |         while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape):
675 |             sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
676 | 
677 |         velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
678 |         return velocity
679 | 
680 |     def __len__(self):
681 |         return self.config.num_train_timesteps
682 | 
683 |     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep
684 |     def previous_timestep(self, timestep):
685 |         if self.custom_timesteps:
686 |             index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]
687 |             if index == self.timesteps.shape[0] - 1:
688 |                 prev_t = torch.tensor(-1)
689 |             else:
690 |                 prev_t = self.timesteps[index + 1]
691 |         else:
692 |             num_inference_steps = (
693 |                 self.num_inference_steps if self.num_inference_steps else self.config.num_train_timesteps
694 |             )
695 |             prev_t = timestep - self.config.num_train_timesteps // num_inference_steps
696 | 
697 |         return prev_t
698 | 


--------------------------------------------------------------------------------