├── .gitignore
├── LICENSE
├── README.md
├── app.py
├── assets
    └── looseControl_teaser.png
├── cross_frame_attention.py
├── loose_controlnet_example
    ├── comfyui_workflow.json
    ├── comfyui_workflow_lcm.json
    └── depth.jpeg
├── loosecontrol.py
└── weight_fusion.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | env/
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | cover/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | .pybuilder/
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | #   For a library or package, you might want to ignore these files since the code is
 89 | #   intended to run in multiple environments; otherwise, check them in:
 90 | # .python-version
 91 | 
 92 | # pipenv
 93 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 94 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 95 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 96 | #   install all needed dependencies.
 97 | #Pipfile.lock
 98 | 
 99 | # poetry
100 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
102 | #   commonly ignored for libraries.
103 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104 | #poetry.lock
105 | 
106 | # pdm
107 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108 | #pdm.lock
109 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110 | #   in version control.
111 | #   https://pdm.fming.dev/#use-with-ide
112 | .pdm.toml
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Shariq F. Bhat
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # LooseControlNet: Fused ControlNet Weights from LooseControl
 2 | 
 3 | [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
 4 | 
 5 | ## How it works
 6 | 
 7 | In LooseControl, the authors trained a LoRA of `ControlNet-depth`, but now few libraries or frameworks support *LoRA of
 8 | ControlNet*, so they hacked through `ControlNetModel` of `diffusers` with `UNet2DConditionLoadersMixin`.
 9 | 
10 | However, we can't run the code in frameworks like A1111's WebUI or ComfyUI, so we fused the weights
11 | of `ControlNet-depth` and `LooseControl` to make it work in any frameworks. For details, please refer to [the script](./weight_fusion.py).
12 | 
13 | > *Important Note:*
14 | > The authors of LooseControl did more than just training a LoRA. Let's not forget that. Please refer to the original paper and code for more
15 | > details.
16 | 
17 | ## Usage
18 | 
19 | Download the fused ControlNet weights from [huggingface](https://huggingface.co/AIRDGempoll/LooseControlNet) and used it
20 | anywhere (e.g. A1111's WebUI or ComfyUI) you can use `ControlNet-depth` to loosely control image generation using depth
21 | images.
22 | 
23 | [Example folder](./loose_controlnet_example) contains an simple workflow for using LooseControlNet in ComfyUI.
24 | 
25 | ## Contributing
26 | 
27 | If you like it, you can contribute by:
28 | 
29 | * Upvote this [issue](https://github.com/huggingface/diffusers/issues/6354) in `diffusers` repo or possibly make a PR to
30 |   resolve it.
31 | * Bring consistency mechanisms devised in LooseControl to frameworks like A1111's WebUI or ComfyUI.
32 | * Bring box editors to frameworks like A1111's WebUI or ComfyUI.
33 | * Perhaps train a better LooseControlNet
34 | 
35 | ## Licenses
36 | 
37 | The extra code we add is released under MIT License and the fused weights are released under Apache 2.0 License,
38 | which follows the original license, MIT License, of LooseControl and Apache 2.0 License of ControlNet.
39 | 
40 | ## References
41 | 
42 | ### LooseControl
43 | 
44 | This is the official repository for LooseControl:
45 | > #### [LooseControl: Lifting ControlNet for Generalized Depth Conditioning](#)
46 | > ##### [Shariq Farooq Bhat](https://shariqfarooq123.github.io), [Niloy J. Mitra](http://www0.cs.ucl.ac.uk/staff/n.mitra/), [Peter Wonka](http://peterwonka.net/)
47 | >
48 | 
49 | [[Project Page]](https://shariqfarooq123.github.io/loose-control/) [[Paper]](https://arxiv.org/abs/2312.03079) [[Demo 🤗]](https://huggingface.co/spaces/shariqfarooq/LooseControl) [[Weights (3D Box Control)]](https://huggingface.co/shariqfarooq/loose-control-3dbox)
50 | 
51 | ![teaser](assets/looseControl_teaser.png)
52 | 
53 | #### Citation
54 | 
55 | ```bibtex
56 | @misc{bhat2023loosecontrol,
57 |       title={LooseControl: Lifting ControlNet for Generalized Depth Conditioning}, 
58 |       author={Shariq Farooq Bhat and Niloy J. Mitra and Peter Wonka},
59 |       year={2023},
60 |       eprint={2312.03079},
61 |       archivePrefix={arXiv},
62 |       primaryClass={cs.CV}
63 | }
64 | ```
65 | 
66 | ### ControlNet
67 | 
68 | Please refer to its official [repository](https://github.com/lllyasviel/ControlNet) for more details.
69 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import gradio as gr
  2 | from dataclasses import dataclass
  3 | import PIL
  4 | import PIL.Image
  5 | 
  6 | import torch
  7 | import numpy as np
  8 | from gradio_editor3d import Editor3D as g3deditor
  9 | import copy
 10 | from loosecontrol import LooseControlNet
 11 | 
 12 | 
 13 | device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 14 | cn = LooseControlNet()
 15 | cn.pipe = cn.pipe.to(torch_device=device, torch_dtype=torch.float16)
 16 | 
 17 | # Need to figure out a better way how to do this per user, making 'cf attention' act like a state per user.
 18 | # For now, we just copy the model. 
 19 | cn_with_cf = copy.deepcopy(cn)
 20 | cn_with_cf.set_cf_attention()
 21 | 
 22 | 
 23 | @dataclass
 24 | class FixedInputs:
 25 |     prompt: str
 26 |     seed: int
 27 |     depth: PIL.Image.Image
 28 | 
 29 | 
 30 | negative_prompt = "blurry, text, caption, lowquality, lowresolution, low res, grainy, ugly"
 31 | def depth2image(prompt, seed, depth):
 32 |     seed = int(seed)
 33 |     gen = cn(prompt, control_image=depth, controlnet_conditioning_scale=1.0, generator=torch.Generator().manual_seed(seed), num_inference_steps=20, negative_prompt=negative_prompt)
 34 |     return gen
 35 | 
 36 | def edit_previous(prompt, seed, depth, fixed_inputs):
 37 |     seed = int(seed)
 38 |     control_image = [fixed_inputs.depth, depth]
 39 |     prompt = [fixed_inputs.prompt, prompt]
 40 |     neg_prompt = [negative_prompt, negative_prompt]
 41 |     generator = [torch.Generator().manual_seed(fixed_inputs.seed), torch.Generator().manual_seed(seed)]
 42 |     gen = cn_with_cf(prompt, control_image=control_image, controlnet_conditioning_scale=1.0, generator=generator, num_inference_steps=20, negative_prompt=neg_prompt)[-1]
 43 |     return gen
 44 | 
 45 | def run(prompt, seed, depth, should_edit, fixed_inputs):
 46 |     depth = depth.convert("RGB")
 47 |     # all values below [3,3,3] in depth should actually be set to [255,255,255]
 48 |     # This is to due the nature of training data and is experimental right now. 
 49 |     # Not in use for now.
 50 |     # depth = np.array(depth)
 51 |     # depth[depth < 3] = 255
 52 |     # depth = PIL.Image.fromarray(depth)
 53 | 
 54 |     fixed_inputs = fixed_inputs[0]
 55 |     if should_edit and fixed_inputs is not None:
 56 |         return edit_previous(prompt, seed, depth, fixed_inputs)
 57 |     else:
 58 |         return depth2image(prompt, seed, depth)
 59 |     
 60 | def handle_edit_change(edit, prompt, seed, image_input, fixed_inputs):
 61 |     if edit:
 62 |         fixed_inputs[0] = FixedInputs(prompt, int(seed), image_input)
 63 |     else:
 64 |         fixed_inputs[0] = None
 65 |     return fixed_inputs
 66 | 
 67 | 
 68 | css = """
 69 | 
 70 | #image_output {
 71 | width: 512px;
 72 | height: 512px; 
 73 | """
 74 | 
 75 | 
 76 | main_description = """
 77 | # LooseControl
 78 | 
 79 | This is the official demo for the paper [LooseControl: Lifting ControlNet for Generalized Depth Conditioning](https://shariqfarooq123.github.io/loose-control/).
 80 | Our 3D Box Editing allows users to interactively edit the 3D boxes representing objects in the scene. Users can change the position, size, and orientation of 3D boxes, allowing to quickly create and edit the scenes to their liking in a 3D-aware manner.
 81 | Best viewed on desktop.
 82 | """
 83 | 
 84 | instructions_editor3d = """
 85 | ## Instructions for Editor3D UI
 86 | - Use 'WASD' keys to move the camera.
 87 | - Click on an object to select it.
 88 | - Use the sliders to change the position, size, and orientation of the selected object. Sliders support click and drag for faster editing.
 89 | - Use the 'Add Box', 'Delete', and 'Duplicate' buttons to add, delete, and duplicate objects.
 90 | - Delete and Duplicate buttons work on the selected object. Duplicate creates a copy and selects it.
 91 | - Use the 'Toggle Mode' to switch between "normal" and "depth" mode. Final image sent to the model should be in "depth" mode.
 92 | - Use the 'Render' button to render the scene and send it to the model for generation.
 93 | 
 94 | ### Lock style checkbox - Fixes the style of the latest generated image. 
 95 | This allows users to edit the 3D boxes without changing the style of the generated image. This is useful when the user is satisfied with the style/content of the generated image and wants to edit the 3D boxes without changing the overall essence of the scene.
 96 | It can be used to create stop motion videos like those shown [here](https://shariqfarooq123.github.io/loose-control/).
 97 | 
 98 | """
 99 | 
100 | 
101 | 
102 | with gr.Blocks(css=css) as demo:
103 |     gr.Markdown(main_description)
104 | 
105 |     fixed_inputs = gr.State([None])
106 |     with gr.Row():
107 |         prompt = gr.Textbox(label="Prompt", placeholder="Write your prompt", elem_id="input")
108 |         seed = gr.Textbox(value=42, label="Seed", elem_id="seed")
109 |         should_edit = gr.Checkbox(label="Lock style", elem_id="edit")
110 |     
111 |     with gr.Row():
112 |         image_input = g3deditor(elem_id="image_input")
113 |     
114 |     with gr.Row():
115 |         image_output = gr.Image(elem_id="image_output", type='pil')
116 | 
117 |     should_edit.change(fn=handle_edit_change, inputs=[should_edit, prompt, seed, image_input, fixed_inputs], outputs=[fixed_inputs])
118 |     image_input.change(fn=run, inputs=[prompt, seed, image_input, should_edit, fixed_inputs], outputs=[image_output])
119 |     with gr.Accordion("Instructions"):
120 |         gr.Markdown(instructions_editor3d)
121 | 
122 | demo.queue().launch()
123 | 
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/assets/looseControl_teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GempollAI/LooseControlNet/d9e7989e454f546cbc43df806cf04b0b6b696d05/assets/looseControl_teaser.png


--------------------------------------------------------------------------------
/cross_frame_attention.py:
--------------------------------------------------------------------------------
  1 | # Adapted from https://github.com/Picsart-AI-Research/Text2Video-Zero
  2 | import torch
  3 | from einops import rearrange
  4 | 
  5 | class CrossFrameAttnProcessor:
  6 |     def __init__(self, unet_chunk_size=2):
  7 |         self.unet_chunk_size = unet_chunk_size
  8 | 
  9 |     def __call__(
 10 |             self,
 11 |             attn,
 12 |             hidden_states,
 13 |             encoder_hidden_states=None,
 14 |             attention_mask=None, **kwargs):
 15 |         batch_size, sequence_length, _ = hidden_states.shape
 16 |         attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
 17 |         query = attn.to_q(hidden_states)
 18 | 
 19 |         is_cross_attention = encoder_hidden_states is not None
 20 |         if encoder_hidden_states is None:
 21 |             encoder_hidden_states = hidden_states
 22 |         elif attn.norm_cross:
 23 |             encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
 24 |         key = attn.to_k(encoder_hidden_states)
 25 |         value = attn.to_v(encoder_hidden_states)
 26 |         # Sparse Attention
 27 |         if not is_cross_attention:
 28 |             video_length = key.size()[0] // self.unet_chunk_size
 29 |             # print("Video length is", video_length)
 30 |             # former_frame_index = torch.arange(video_length) - 1
 31 |             # former_frame_index[0] = 0
 32 |             former_frame_index = [0] * video_length
 33 |             key = rearrange(key, "(b f) d c -> b f d c", f=video_length)
 34 |             key = key[:, former_frame_index]
 35 |             key = rearrange(key, "b f d c -> (b f) d c")
 36 |             value = rearrange(value, "(b f) d c -> b f d c", f=video_length)
 37 |             value = value[:, former_frame_index]
 38 |             value = rearrange(value, "b f d c -> (b f) d c")
 39 | 
 40 |         query = attn.head_to_batch_dim(query)
 41 |         key = attn.head_to_batch_dim(key)
 42 |         value = attn.head_to_batch_dim(value)
 43 | 
 44 |         attention_probs = attn.get_attention_scores(query, key, attention_mask)
 45 |         hidden_states = torch.bmm(attention_probs, value)
 46 |         hidden_states = attn.batch_to_head_dim(hidden_states)
 47 | 
 48 |         # linear proj
 49 |         hidden_states = attn.to_out[0](hidden_states)
 50 |         # dropout
 51 |         hidden_states = attn.to_out[1](hidden_states)
 52 | 
 53 |         return hidden_states
 54 |     
 55 | 
 56 | 
 57 | class AttnProcessorX:
 58 |     r"""
 59 |     Default processor for performing attention-related computations.
 60 |     """
 61 | 
 62 |     def __call__(
 63 |         self,
 64 |         attn,
 65 |         hidden_states,
 66 |         encoder_hidden_states=None,
 67 |         attention_mask=None,
 68 |         temb=None,
 69 |         scale=1.0,
 70 |     ):
 71 |         residual = hidden_states
 72 | 
 73 |         if attn.spatial_norm is not None:
 74 |             hidden_states = attn.spatial_norm(hidden_states, temb)
 75 | 
 76 |         input_ndim = hidden_states.ndim
 77 | 
 78 |         if input_ndim == 4:
 79 |             batch_size, channel, height, width = hidden_states.shape
 80 |             hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
 81 | 
 82 |         batch_size, sequence_length, _ = (
 83 |             hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
 84 |         )
 85 |         attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
 86 | 
 87 |         if attn.group_norm is not None:
 88 |             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
 89 | 
 90 |         query = attn.to_q(hidden_states, scale=scale)
 91 | 
 92 |         if encoder_hidden_states is None:
 93 |             encoder_hidden_states = hidden_states
 94 |         elif attn.norm_cross:
 95 |             encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
 96 | 
 97 |         key = attn.to_k(encoder_hidden_states, scale=scale)
 98 |         value = attn.to_v(encoder_hidden_states, scale=scale)
 99 | 
100 |         query = attn.head_to_batch_dim(query)
101 |         key = attn.head_to_batch_dim(key)
102 |         value = attn.head_to_batch_dim(value)
103 | 
104 |         attention_probs = attn.get_attention_scores(query, key, attention_mask)
105 |         hidden_states = torch.bmm(attention_probs, value)
106 |         hidden_states = attn.batch_to_head_dim(hidden_states)
107 | 
108 |         # linear proj
109 |         hidden_states = attn.to_out[0](hidden_states, scale=scale)
110 |         # dropout
111 |         hidden_states = attn.to_out[1](hidden_states)
112 | 
113 |         if input_ndim == 4:
114 |             hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
115 | 
116 |         if attn.residual_connection:
117 |             hidden_states = hidden_states + residual
118 | 
119 |         hidden_states = hidden_states / attn.rescale_output_factor
120 | 
121 |         return hidden_states
122 | 


--------------------------------------------------------------------------------
/loose_controlnet_example/comfyui_workflow.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "last_node_id": 17,
  3 |   "last_link_id": 16,
  4 |   "nodes": [
  5 |     {
  6 |       "id": 4,
  7 |       "type": "CheckpointLoaderSimple",
  8 |       "pos": [
  9 |         -67,
 10 |         31
 11 |       ],
 12 |       "size": {
 13 |         "0": 315,
 14 |         "1": 98
 15 |       },
 16 |       "flags": {},
 17 |       "order": 0,
 18 |       "mode": 0,
 19 |       "outputs": [
 20 |         {
 21 |           "name": "MODEL",
 22 |           "type": "MODEL",
 23 |           "links": [
 24 |             1
 25 |           ],
 26 |           "slot_index": 0,
 27 |           "label": "MODEL"
 28 |         },
 29 |         {
 30 |           "name": "CLIP",
 31 |           "type": "CLIP",
 32 |           "links": [
 33 |             3,
 34 |             5
 35 |           ],
 36 |           "slot_index": 1,
 37 |           "label": "CLIP"
 38 |         },
 39 |         {
 40 |           "name": "VAE",
 41 |           "type": "VAE",
 42 |           "links": [
 43 |             8
 44 |           ],
 45 |           "slot_index": 2,
 46 |           "label": "VAE"
 47 |         }
 48 |       ],
 49 |       "properties": {
 50 |         "Node name for S&R": "CheckpointLoaderSimple"
 51 |       },
 52 |       "widgets_values": [
 53 |         "revAnimated_v11.safetensors"
 54 |       ]
 55 |     },
 56 |     {
 57 |       "id": 16,
 58 |       "type": "LoadImage",
 59 |       "pos": [
 60 |         -91,
 61 |         688
 62 |       ],
 63 |       "size": [
 64 |         315,
 65 |         314
 66 |       ],
 67 |       "flags": {},
 68 |       "order": 1,
 69 |       "mode": 0,
 70 |       "outputs": [
 71 |         {
 72 |           "name": "IMAGE",
 73 |           "type": "IMAGE",
 74 |           "links": [
 75 |             14
 76 |           ],
 77 |           "shape": 3,
 78 |           "label": "IMAGE",
 79 |           "slot_index": 0
 80 |         },
 81 |         {
 82 |           "name": "MASK",
 83 |           "type": "MASK",
 84 |           "links": null,
 85 |           "shape": 3,
 86 |           "label": "MASK"
 87 |         }
 88 |       ],
 89 |       "properties": {
 90 |         "Node name for S&R": "LoadImage"
 91 |       },
 92 |       "widgets_values": [
 93 |         "depth.jpeg",
 94 |         "image"
 95 |       ]
 96 |     },
 97 |     {
 98 |       "id": 6,
 99 |       "type": "CLIPTextEncode",
100 |       "pos": [
101 |         384,
102 |         -28
103 |       ],
104 |       "size": {
105 |         "0": 422.84503173828125,
106 |         "1": 164.31304931640625
107 |       },
108 |       "flags": {},
109 |       "order": 4,
110 |       "mode": 0,
111 |       "inputs": [
112 |         {
113 |           "name": "clip",
114 |           "type": "CLIP",
115 |           "link": 3,
116 |           "label": "clip"
117 |         }
118 |       ],
119 |       "outputs": [
120 |         {
121 |           "name": "CONDITIONING",
122 |           "type": "CONDITIONING",
123 |           "links": [
124 |             10
125 |           ],
126 |           "slot_index": 0,
127 |           "label": "CONDITIONING"
128 |         }
129 |       ],
130 |       "properties": {
131 |         "Node name for S&R": "CLIPTextEncode"
132 |       },
133 |       "widgets_values": [
134 |         "Sofa in a living room, masterpiece, photorealistic, 8k"
135 |       ]
136 |     },
137 |     {
138 |       "id": 5,
139 |       "type": "EmptyLatentImage",
140 |       "pos": [
141 |         424,
142 |         484
143 |       ],
144 |       "size": {
145 |         "0": 315,
146 |         "1": 106
147 |       },
148 |       "flags": {},
149 |       "order": 2,
150 |       "mode": 0,
151 |       "outputs": [
152 |         {
153 |           "name": "LATENT",
154 |           "type": "LATENT",
155 |           "links": [
156 |             2
157 |           ],
158 |           "slot_index": 0,
159 |           "label": "LATENT"
160 |         }
161 |       ],
162 |       "properties": {
163 |         "Node name for S&R": "EmptyLatentImage"
164 |       },
165 |       "widgets_values": [
166 |         960,
167 |         544,
168 |         1
169 |       ]
170 |     },
171 |     {
172 |       "id": 3,
173 |       "type": "KSampler",
174 |       "pos": [
175 |         1337,
176 |         284
177 |       ],
178 |       "size": {
179 |         "0": 315,
180 |         "1": 262
181 |       },
182 |       "flags": {},
183 |       "order": 7,
184 |       "mode": 0,
185 |       "inputs": [
186 |         {
187 |           "name": "model",
188 |           "type": "MODEL",
189 |           "link": 1,
190 |           "label": "model"
191 |         },
192 |         {
193 |           "name": "positive",
194 |           "type": "CONDITIONING",
195 |           "link": 15,
196 |           "label": "positive"
197 |         },
198 |         {
199 |           "name": "negative",
200 |           "type": "CONDITIONING",
201 |           "link": 6,
202 |           "label": "negative"
203 |         },
204 |         {
205 |           "name": "latent_image",
206 |           "type": "LATENT",
207 |           "link": 2,
208 |           "label": "latent_image"
209 |         }
210 |       ],
211 |       "outputs": [
212 |         {
213 |           "name": "LATENT",
214 |           "type": "LATENT",
215 |           "links": [
216 |             7
217 |           ],
218 |           "slot_index": 0,
219 |           "label": "LATENT"
220 |         }
221 |       ],
222 |       "properties": {
223 |         "Node name for S&R": "KSampler"
224 |       },
225 |       "widgets_values": [
226 |         485278465394722,
227 |         "randomize",
228 |         30,
229 |         7,
230 |         "euler",
231 |         "normal",
232 |         1
233 |       ]
234 |     },
235 |     {
236 |       "id": 8,
237 |       "type": "VAEDecode",
238 |       "pos": [
239 |         1687,
240 |         42
241 |       ],
242 |       "size": {
243 |         "0": 210,
244 |         "1": 46
245 |       },
246 |       "flags": {},
247 |       "order": 8,
248 |       "mode": 0,
249 |       "inputs": [
250 |         {
251 |           "name": "samples",
252 |           "type": "LATENT",
253 |           "link": 7,
254 |           "label": "samples"
255 |         },
256 |         {
257 |           "name": "vae",
258 |           "type": "VAE",
259 |           "link": 8,
260 |           "label": "vae"
261 |         }
262 |       ],
263 |       "outputs": [
264 |         {
265 |           "name": "IMAGE",
266 |           "type": "IMAGE",
267 |           "links": [
268 |             16
269 |           ],
270 |           "slot_index": 0,
271 |           "label": "IMAGE"
272 |         }
273 |       ],
274 |       "properties": {
275 |         "Node name for S&R": "VAEDecode"
276 |       }
277 |     },
278 |     {
279 |       "id": 12,
280 |       "type": "ControlNetApply",
281 |       "pos": [
282 |         943,
283 |         677
284 |       ],
285 |       "size": {
286 |         "0": 317.4000244140625,
287 |         "1": 98
288 |       },
289 |       "flags": {},
290 |       "order": 6,
291 |       "mode": 0,
292 |       "inputs": [
293 |         {
294 |           "name": "conditioning",
295 |           "type": "CONDITIONING",
296 |           "link": 10,
297 |           "label": "conditioning"
298 |         },
299 |         {
300 |           "name": "control_net",
301 |           "type": "CONTROL_NET",
302 |           "link": 13,
303 |           "label": "control_net"
304 |         },
305 |         {
306 |           "name": "image",
307 |           "type": "IMAGE",
308 |           "link": 14,
309 |           "label": "image",
310 |           "slot_index": 2
311 |         }
312 |       ],
313 |       "outputs": [
314 |         {
315 |           "name": "CONDITIONING",
316 |           "type": "CONDITIONING",
317 |           "links": [
318 |             15
319 |           ],
320 |           "shape": 3,
321 |           "label": "CONDITIONING",
322 |           "slot_index": 0
323 |         }
324 |       ],
325 |       "properties": {
326 |         "Node name for S&R": "ControlNetApply"
327 |       },
328 |       "widgets_values": [
329 |         0.8
330 |       ]
331 |     },
332 |     {
333 |       "id": 17,
334 |       "type": "PreviewImage",
335 |       "pos": [
336 |         2095,
337 |         42
338 |       ],
339 |       "size": [
340 |         210,
341 |         246
342 |       ],
343 |       "flags": {},
344 |       "order": 9,
345 |       "mode": 0,
346 |       "inputs": [
347 |         {
348 |           "name": "images",
349 |           "type": "IMAGE",
350 |           "link": 16,
351 |           "label": "images"
352 |         }
353 |       ],
354 |       "properties": {
355 |         "Node name for S&R": "PreviewImage"
356 |       }
357 |     },
358 |     {
359 |       "id": 15,
360 |       "type": "ControlNetLoader",
361 |       "pos": [
362 |         -94,
363 |         574
364 |       ],
365 |       "size": {
366 |         "0": 315,
367 |         "1": 58
368 |       },
369 |       "flags": {},
370 |       "order": 3,
371 |       "mode": 0,
372 |       "outputs": [
373 |         {
374 |           "name": "CONTROL_NET",
375 |           "type": "CONTROL_NET",
376 |           "links": [
377 |             13
378 |           ],
379 |           "shape": 3,
380 |           "label": "CONTROL_NET",
381 |           "slot_index": 0
382 |         }
383 |       ],
384 |       "properties": {
385 |         "Node name for S&R": "ControlNetLoader"
386 |       },
387 |       "widgets_values": [
388 |         "loose_controlnet.safetensors"
389 |       ]
390 |     },
391 |     {
392 |       "id": 7,
393 |       "type": "CLIPTextEncode",
394 |       "pos": [
395 |         395,
396 |         224
397 |       ],
398 |       "size": {
399 |         "0": 425.27801513671875,
400 |         "1": 180.6060791015625
401 |       },
402 |       "flags": {},
403 |       "order": 5,
404 |       "mode": 0,
405 |       "inputs": [
406 |         {
407 |           "name": "clip",
408 |           "type": "CLIP",
409 |           "link": 5,
410 |           "label": "clip"
411 |         }
412 |       ],
413 |       "outputs": [
414 |         {
415 |           "name": "CONDITIONING",
416 |           "type": "CONDITIONING",
417 |           "links": [
418 |             6
419 |           ],
420 |           "slot_index": 0,
421 |           "label": "CONDITIONING"
422 |         }
423 |       ],
424 |       "properties": {
425 |         "Node name for S&R": "CLIPTextEncode"
426 |       },
427 |       "widgets_values": [
428 |         "text, watermark, blur"
429 |       ]
430 |     }
431 |   ],
432 |   "links": [
433 |     [
434 |       1,
435 |       4,
436 |       0,
437 |       3,
438 |       0,
439 |       "MODEL"
440 |     ],
441 |     [
442 |       2,
443 |       5,
444 |       0,
445 |       3,
446 |       3,
447 |       "LATENT"
448 |     ],
449 |     [
450 |       3,
451 |       4,
452 |       1,
453 |       6,
454 |       0,
455 |       "CLIP"
456 |     ],
457 |     [
458 |       5,
459 |       4,
460 |       1,
461 |       7,
462 |       0,
463 |       "CLIP"
464 |     ],
465 |     [
466 |       6,
467 |       7,
468 |       0,
469 |       3,
470 |       2,
471 |       "CONDITIONING"
472 |     ],
473 |     [
474 |       7,
475 |       3,
476 |       0,
477 |       8,
478 |       0,
479 |       "LATENT"
480 |     ],
481 |     [
482 |       8,
483 |       4,
484 |       2,
485 |       8,
486 |       1,
487 |       "VAE"
488 |     ],
489 |     [
490 |       10,
491 |       6,
492 |       0,
493 |       12,
494 |       0,
495 |       "CONDITIONING"
496 |     ],
497 |     [
498 |       13,
499 |       15,
500 |       0,
501 |       12,
502 |       1,
503 |       "CONTROL_NET"
504 |     ],
505 |     [
506 |       14,
507 |       16,
508 |       0,
509 |       12,
510 |       2,
511 |       "IMAGE"
512 |     ],
513 |     [
514 |       15,
515 |       12,
516 |       0,
517 |       3,
518 |       1,
519 |       "CONDITIONING"
520 |     ],
521 |     [
522 |       16,
523 |       8,
524 |       0,
525 |       17,
526 |       0,
527 |       "IMAGE"
528 |     ]
529 |   ],
530 |   "groups": [],
531 |   "config": {},
532 |   "extra": {},
533 |   "version": 0.4
534 | }


--------------------------------------------------------------------------------
/loose_controlnet_example/comfyui_workflow_lcm.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "last_node_id": 18,
  3 |   "last_link_id": 21,
  4 |   "nodes": [
  5 |     {
  6 |       "id": 16,
  7 |       "type": "LoadImage",
  8 |       "pos": [
  9 |         -91,
 10 |         688
 11 |       ],
 12 |       "size": [
 13 |         315,
 14 |         314
 15 |       ],
 16 |       "flags": {},
 17 |       "order": 0,
 18 |       "mode": 0,
 19 |       "outputs": [
 20 |         {
 21 |           "name": "IMAGE",
 22 |           "type": "IMAGE",
 23 |           "links": [
 24 |             14
 25 |           ],
 26 |           "shape": 3,
 27 |           "label": "IMAGE",
 28 |           "slot_index": 0
 29 |         },
 30 |         {
 31 |           "name": "MASK",
 32 |           "type": "MASK",
 33 |           "links": null,
 34 |           "shape": 3,
 35 |           "label": "MASK"
 36 |         }
 37 |       ],
 38 |       "properties": {
 39 |         "Node name for S&R": "LoadImage"
 40 |       },
 41 |       "widgets_values": [
 42 |         "depth.jpeg",
 43 |         "image"
 44 |       ]
 45 |     },
 46 |     {
 47 |       "id": 6,
 48 |       "type": "CLIPTextEncode",
 49 |       "pos": [
 50 |         384,
 51 |         -28
 52 |       ],
 53 |       "size": {
 54 |         "0": 422.84503173828125,
 55 |         "1": 164.31304931640625
 56 |       },
 57 |       "flags": {},
 58 |       "order": 5,
 59 |       "mode": 0,
 60 |       "inputs": [
 61 |         {
 62 |           "name": "clip",
 63 |           "type": "CLIP",
 64 |           "link": 19,
 65 |           "label": "clip"
 66 |         }
 67 |       ],
 68 |       "outputs": [
 69 |         {
 70 |           "name": "CONDITIONING",
 71 |           "type": "CONDITIONING",
 72 |           "links": [
 73 |             10
 74 |           ],
 75 |           "slot_index": 0,
 76 |           "label": "CONDITIONING"
 77 |         }
 78 |       ],
 79 |       "properties": {
 80 |         "Node name for S&R": "CLIPTextEncode"
 81 |       },
 82 |       "widgets_values": [
 83 |         "Sofa in a living room, masterpiece, photorealistic, 8k"
 84 |       ]
 85 |     },
 86 |     {
 87 |       "id": 5,
 88 |       "type": "EmptyLatentImage",
 89 |       "pos": [
 90 |         424,
 91 |         484
 92 |       ],
 93 |       "size": {
 94 |         "0": 315,
 95 |         "1": 106
 96 |       },
 97 |       "flags": {},
 98 |       "order": 1,
 99 |       "mode": 0,
100 |       "outputs": [
101 |         {
102 |           "name": "LATENT",
103 |           "type": "LATENT",
104 |           "links": [
105 |             2
106 |           ],
107 |           "slot_index": 0,
108 |           "label": "LATENT"
109 |         }
110 |       ],
111 |       "properties": {
112 |         "Node name for S&R": "EmptyLatentImage"
113 |       },
114 |       "widgets_values": [
115 |         960,
116 |         544,
117 |         1
118 |       ]
119 |     },
120 |     {
121 |       "id": 15,
122 |       "type": "ControlNetLoader",
123 |       "pos": [
124 |         -94,
125 |         574
126 |       ],
127 |       "size": {
128 |         "0": 315,
129 |         "1": 58
130 |       },
131 |       "flags": {},
132 |       "order": 2,
133 |       "mode": 0,
134 |       "outputs": [
135 |         {
136 |           "name": "CONTROL_NET",
137 |           "type": "CONTROL_NET",
138 |           "links": [
139 |             13
140 |           ],
141 |           "shape": 3,
142 |           "label": "CONTROL_NET",
143 |           "slot_index": 0
144 |         }
145 |       ],
146 |       "properties": {
147 |         "Node name for S&R": "ControlNetLoader"
148 |       },
149 |       "widgets_values": [
150 |         "loose_controlnet.safetensors"
151 |       ]
152 |     },
153 |     {
154 |       "id": 7,
155 |       "type": "CLIPTextEncode",
156 |       "pos": [
157 |         395,
158 |         224
159 |       ],
160 |       "size": {
161 |         "0": 425.27801513671875,
162 |         "1": 180.6060791015625
163 |       },
164 |       "flags": {},
165 |       "order": 6,
166 |       "mode": 0,
167 |       "inputs": [
168 |         {
169 |           "name": "clip",
170 |           "type": "CLIP",
171 |           "link": 20,
172 |           "label": "clip"
173 |         }
174 |       ],
175 |       "outputs": [
176 |         {
177 |           "name": "CONDITIONING",
178 |           "type": "CONDITIONING",
179 |           "links": [
180 |             6
181 |           ],
182 |           "slot_index": 0,
183 |           "label": "CONDITIONING"
184 |         }
185 |       ],
186 |       "properties": {
187 |         "Node name for S&R": "CLIPTextEncode"
188 |       },
189 |       "widgets_values": [
190 |         "text, watermark, blur"
191 |       ]
192 |     },
193 |     {
194 |       "id": 8,
195 |       "type": "VAEDecode",
196 |       "pos": [
197 |         1724,
198 |         81
199 |       ],
200 |       "size": {
201 |         "0": 210,
202 |         "1": 46
203 |       },
204 |       "flags": {},
205 |       "order": 9,
206 |       "mode": 0,
207 |       "inputs": [
208 |         {
209 |           "name": "samples",
210 |           "type": "LATENT",
211 |           "link": 7,
212 |           "label": "samples"
213 |         },
214 |         {
215 |           "name": "vae",
216 |           "type": "VAE",
217 |           "link": 8,
218 |           "label": "vae"
219 |         }
220 |       ],
221 |       "outputs": [
222 |         {
223 |           "name": "IMAGE",
224 |           "type": "IMAGE",
225 |           "links": [
226 |             16
227 |           ],
228 |           "slot_index": 0,
229 |           "label": "IMAGE"
230 |         }
231 |       ],
232 |       "properties": {
233 |         "Node name for S&R": "VAEDecode"
234 |       }
235 |     },
236 |     {
237 |       "id": 3,
238 |       "type": "KSampler",
239 |       "pos": [
240 |         1387,
241 |         286
242 |       ],
243 |       "size": {
244 |         "0": 315,
245 |         "1": 262
246 |       },
247 |       "flags": {},
248 |       "order": 8,
249 |       "mode": 0,
250 |       "inputs": [
251 |         {
252 |           "name": "model",
253 |           "type": "MODEL",
254 |           "link": 21,
255 |           "label": "model"
256 |         },
257 |         {
258 |           "name": "positive",
259 |           "type": "CONDITIONING",
260 |           "link": 15,
261 |           "label": "positive"
262 |         },
263 |         {
264 |           "name": "negative",
265 |           "type": "CONDITIONING",
266 |           "link": 6,
267 |           "label": "negative"
268 |         },
269 |         {
270 |           "name": "latent_image",
271 |           "type": "LATENT",
272 |           "link": 2,
273 |           "label": "latent_image"
274 |         }
275 |       ],
276 |       "outputs": [
277 |         {
278 |           "name": "LATENT",
279 |           "type": "LATENT",
280 |           "links": [
281 |             7
282 |           ],
283 |           "slot_index": 0,
284 |           "label": "LATENT"
285 |         }
286 |       ],
287 |       "properties": {
288 |         "Node name for S&R": "KSampler"
289 |       },
290 |       "widgets_values": [
291 |         63675398705672,
292 |         "randomize",
293 |         5,
294 |         1.5,
295 |         "lcm",
296 |         "normal",
297 |         1
298 |       ]
299 |     },
300 |     {
301 |       "id": 17,
302 |       "type": "PreviewImage",
303 |       "pos": [
304 |         2154,
305 |         89
306 |       ],
307 |       "size": [
308 |         210,
309 |         246
310 |       ],
311 |       "flags": {},
312 |       "order": 10,
313 |       "mode": 0,
314 |       "inputs": [
315 |         {
316 |           "name": "images",
317 |           "type": "IMAGE",
318 |           "link": 16,
319 |           "label": "images"
320 |         }
321 |       ],
322 |       "properties": {
323 |         "Node name for S&R": "PreviewImage"
324 |       }
325 |     },
326 |     {
327 |       "id": 18,
328 |       "type": "LoraLoader",
329 |       "pos": [
330 |         -297,
331 |         -3
332 |       ],
333 |       "size": {
334 |         "0": 315,
335 |         "1": 126
336 |       },
337 |       "flags": {},
338 |       "order": 4,
339 |       "mode": 0,
340 |       "inputs": [
341 |         {
342 |           "name": "model",
343 |           "type": "MODEL",
344 |           "link": 17,
345 |           "label": "model"
346 |         },
347 |         {
348 |           "name": "clip",
349 |           "type": "CLIP",
350 |           "link": 18,
351 |           "label": "clip"
352 |         }
353 |       ],
354 |       "outputs": [
355 |         {
356 |           "name": "MODEL",
357 |           "type": "MODEL",
358 |           "links": [
359 |             21
360 |           ],
361 |           "shape": 3,
362 |           "label": "MODEL",
363 |           "slot_index": 0
364 |         },
365 |         {
366 |           "name": "CLIP",
367 |           "type": "CLIP",
368 |           "links": [
369 |             19,
370 |             20
371 |           ],
372 |           "shape": 3,
373 |           "label": "CLIP",
374 |           "slot_index": 1
375 |         }
376 |       ],
377 |       "properties": {
378 |         "Node name for S&R": "LoraLoader"
379 |       },
380 |       "widgets_values": [
381 |         "LCM_LoRA_Weights_SD15.safetensors",
382 |         1,
383 |         1
384 |       ]
385 |     },
386 |     {
387 |       "id": 4,
388 |       "type": "CheckpointLoaderSimple",
389 |       "pos": [
390 |         -763,
391 |         -6
392 |       ],
393 |       "size": {
394 |         "0": 315,
395 |         "1": 98
396 |       },
397 |       "flags": {},
398 |       "order": 3,
399 |       "mode": 0,
400 |       "outputs": [
401 |         {
402 |           "name": "MODEL",
403 |           "type": "MODEL",
404 |           "links": [
405 |             17
406 |           ],
407 |           "slot_index": 0,
408 |           "label": "MODEL"
409 |         },
410 |         {
411 |           "name": "CLIP",
412 |           "type": "CLIP",
413 |           "links": [
414 |             18
415 |           ],
416 |           "slot_index": 1,
417 |           "label": "CLIP"
418 |         },
419 |         {
420 |           "name": "VAE",
421 |           "type": "VAE",
422 |           "links": [
423 |             8
424 |           ],
425 |           "slot_index": 2,
426 |           "label": "VAE"
427 |         }
428 |       ],
429 |       "properties": {
430 |         "Node name for S&R": "CheckpointLoaderSimple"
431 |       },
432 |       "widgets_values": [
433 |         "revAnimated_v11.safetensors"
434 |       ]
435 |     },
436 |     {
437 |       "id": 12,
438 |       "type": "ControlNetApply",
439 |       "pos": [
440 |         943,
441 |         677
442 |       ],
443 |       "size": {
444 |         "0": 317.4000244140625,
445 |         "1": 98
446 |       },
447 |       "flags": {},
448 |       "order": 7,
449 |       "mode": 0,
450 |       "inputs": [
451 |         {
452 |           "name": "conditioning",
453 |           "type": "CONDITIONING",
454 |           "link": 10,
455 |           "label": "conditioning"
456 |         },
457 |         {
458 |           "name": "control_net",
459 |           "type": "CONTROL_NET",
460 |           "link": 13,
461 |           "label": "control_net"
462 |         },
463 |         {
464 |           "name": "image",
465 |           "type": "IMAGE",
466 |           "link": 14,
467 |           "label": "image",
468 |           "slot_index": 2
469 |         }
470 |       ],
471 |       "outputs": [
472 |         {
473 |           "name": "CONDITIONING",
474 |           "type": "CONDITIONING",
475 |           "links": [
476 |             15
477 |           ],
478 |           "shape": 3,
479 |           "label": "CONDITIONING",
480 |           "slot_index": 0
481 |         }
482 |       ],
483 |       "properties": {
484 |         "Node name for S&R": "ControlNetApply"
485 |       },
486 |       "widgets_values": [
487 |         0.7000000000000001
488 |       ]
489 |     }
490 |   ],
491 |   "links": [
492 |     [
493 |       2,
494 |       5,
495 |       0,
496 |       3,
497 |       3,
498 |       "LATENT"
499 |     ],
500 |     [
501 |       6,
502 |       7,
503 |       0,
504 |       3,
505 |       2,
506 |       "CONDITIONING"
507 |     ],
508 |     [
509 |       7,
510 |       3,
511 |       0,
512 |       8,
513 |       0,
514 |       "LATENT"
515 |     ],
516 |     [
517 |       8,
518 |       4,
519 |       2,
520 |       8,
521 |       1,
522 |       "VAE"
523 |     ],
524 |     [
525 |       10,
526 |       6,
527 |       0,
528 |       12,
529 |       0,
530 |       "CONDITIONING"
531 |     ],
532 |     [
533 |       13,
534 |       15,
535 |       0,
536 |       12,
537 |       1,
538 |       "CONTROL_NET"
539 |     ],
540 |     [
541 |       14,
542 |       16,
543 |       0,
544 |       12,
545 |       2,
546 |       "IMAGE"
547 |     ],
548 |     [
549 |       15,
550 |       12,
551 |       0,
552 |       3,
553 |       1,
554 |       "CONDITIONING"
555 |     ],
556 |     [
557 |       16,
558 |       8,
559 |       0,
560 |       17,
561 |       0,
562 |       "IMAGE"
563 |     ],
564 |     [
565 |       17,
566 |       4,
567 |       0,
568 |       18,
569 |       0,
570 |       "MODEL"
571 |     ],
572 |     [
573 |       18,
574 |       4,
575 |       1,
576 |       18,
577 |       1,
578 |       "CLIP"
579 |     ],
580 |     [
581 |       19,
582 |       18,
583 |       1,
584 |       6,
585 |       0,
586 |       "CLIP"
587 |     ],
588 |     [
589 |       20,
590 |       18,
591 |       1,
592 |       7,
593 |       0,
594 |       "CLIP"
595 |     ],
596 |     [
597 |       21,
598 |       18,
599 |       0,
600 |       3,
601 |       0,
602 |       "MODEL"
603 |     ]
604 |   ],
605 |   "groups": [],
606 |   "config": {},
607 |   "extra": {},
608 |   "version": 0.4
609 | }


--------------------------------------------------------------------------------
/loose_controlnet_example/depth.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GempollAI/LooseControlNet/d9e7989e454f546cbc43df806cf04b0b6b696d05/loose_controlnet_example/depth.jpeg


--------------------------------------------------------------------------------
/loosecontrol.py:
--------------------------------------------------------------------------------
  1 | from diffusers import (
  2 |     ControlNetModel,
  3 |     StableDiffusionControlNetPipeline,
  4 |     UniPCMultistepScheduler,
  5 | )
  6 | import torch
  7 | import PIL
  8 | import PIL.Image
  9 | from diffusers.loaders import UNet2DConditionLoadersMixin
 10 | from typing import Dict
 11 | from diffusers.models.attention_processor import AttentionProcessor, AttnProcessor
 12 | import functools
 13 | from cross_frame_attention import CrossFrameAttnProcessor
 14 | 
 15 | TEXT_ENCODER_NAME = "text_encoder"
 16 | UNET_NAME = "unet"
 17 | NEGATIVE_PROMPT = "blurry, text, caption, lowquality, lowresolution, low res, grainy, ugly"
 18 | 
 19 | def attach_loaders_mixin(model):
 20 |     # hacky way to make ControlNet work with LoRA. This may not be required in future versions of diffusers.
 21 |     model.text_encoder_name = TEXT_ENCODER_NAME
 22 |     model.unet_name = UNET_NAME
 23 |     r"""
 24 |     Attach the [`UNet2DConditionLoadersMixin`] to a model. This will add the
 25 |     all the methods from the mixin 'UNet2DConditionLoadersMixin' to the model.
 26 |     """
 27 |     # mixin_instance = UNet2DConditionLoadersMixin()
 28 |     for attr_name, attr_value in vars(UNet2DConditionLoadersMixin).items():
 29 |         # print(attr_name)
 30 |         if callable(attr_value):
 31 |             # setattr(model, attr_name, functools.partialmethod(attr_value, model).__get__(model, model.__class__))
 32 |             setattr(model, attr_name, functools.partial(attr_value, model))
 33 |     return model
 34 | 
 35 | def set_attn_processor(module, processor, _remove_lora=False):
 36 |     def fn_recursive_attn_processor(name: str, module: torch.nn.Module, processor):
 37 |         if hasattr(module, "set_processor"):
 38 |             if not isinstance(processor, dict):
 39 |                 module.set_processor(processor, _remove_lora=_remove_lora)
 40 |             else:
 41 |                 module.set_processor(processor.pop(f"{name}.processor"), _remove_lora=_remove_lora)
 42 | 
 43 |         for sub_name, child in module.named_children():
 44 |             fn_recursive_attn_processor(f"{name}.{sub_name}", child, processor)
 45 | 
 46 |     for name, module in module.named_children():
 47 |         fn_recursive_attn_processor(name, module, processor)
 48 | 
 49 | 
 50 | 
 51 | class ControlNetX(ControlNetModel, UNet2DConditionLoadersMixin):
 52 |     # Copied from diffusers.models.unet_2d_condition.UNet2DConditionModel.attn_processors
 53 |     # This may not be required in future versions of diffusers.
 54 |     @property
 55 |     def attn_processors(self) -> Dict[str, AttentionProcessor]:
 56 |         r"""
 57 |         Returns:
 58 |             `dict` of attention processors: A dictionary containing all attention processors used in the model with
 59 |             indexed by its weight name.
 60 |         """
 61 |         # set recursively
 62 |         processors = {}
 63 | 
 64 |         def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
 65 |             if hasattr(module, "get_processor"):
 66 |                 processors[f"{name}.processor"] = module.get_processor(return_deprecated_lora=True)
 67 | 
 68 |             for sub_name, child in module.named_children():
 69 |                 fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
 70 | 
 71 |             return processors
 72 | 
 73 |         for name, module in self.named_children():
 74 |             fn_recursive_add_processors(name, module, processors)
 75 | 
 76 |         return processors
 77 | 
 78 | class ControlNetPipeline:
 79 |     def __init__(self, checkpoint="lllyasviel/control_v11f1p_sd15_depth", sd_checkpoint="runwayml/stable-diffusion-v1-5") -> None:
 80 |         controlnet = ControlNetX.from_pretrained(checkpoint)
 81 |         self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
 82 |                         sd_checkpoint, controlnet=controlnet, requires_safety_checker=False, safety_checker=None,
 83 |                         torch_dtype=torch.float16)
 84 |         self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
 85 | 
 86 |     @torch.no_grad()
 87 |     def __call__(self, 
 88 |                     prompt: str="",
 89 |                     height=512,
 90 |                     width=512, 
 91 |                     control_image=None, 
 92 |                     controlnet_conditioning_scale=1.0, 
 93 |                     num_inference_steps: int=20,
 94 |                    **kwargs) -> PIL.Image.Image:
 95 |         
 96 |         out =  self.pipe(prompt, control_image,
 97 |                             height=height, width=width,
 98 |                             num_inference_steps=num_inference_steps,
 99 |                             controlnet_conditioning_scale=controlnet_conditioning_scale,
100 |                             **kwargs).images
101 | 
102 |         return out[0] if len(out) == 1 else out
103 |     
104 |     def to(self, *args, **kwargs):
105 |         self.pipe.to(*args, **kwargs)
106 |         return self
107 | 
108 | 
109 | class LooseControlNet(ControlNetPipeline):
110 |     def __init__(self, loose_control_weights="shariqfarooq/loose-control-3dbox", cn_checkpoint="lllyasviel/control_v11f1p_sd15_depth", sd_checkpoint="runwayml/stable-diffusion-v1-5") -> None:
111 |         super().__init__(cn_checkpoint, sd_checkpoint)
112 |         self.pipe.controlnet = attach_loaders_mixin(self.pipe.controlnet)
113 |         self.pipe.controlnet.load_attn_procs(loose_control_weights)
114 | 
115 |     def set_normal_attention(self):
116 |         self.pipe.unet.set_attn_processor(AttnProcessor())
117 | 
118 |     def set_cf_attention(self, _remove_lora=False):
119 |         for upblocks in self.pipe.unet.up_blocks[-2:]:
120 |             set_attn_processor(upblocks, CrossFrameAttnProcessor(), _remove_lora=_remove_lora)
121 | 
122 |     def edit(self, depth, depth_edit, prompt, prompt_edit=None, seed=42, seed_edit=None, negative_prompt=NEGATIVE_PROMPT, controlnet_conditioning_scale=1.0, num_inference_steps=20, **kwargs):
123 |         if prompt_edit is None:
124 |             prompt_edit = prompt
125 | 
126 |         if seed_edit is None:
127 |             seed_edit = seed
128 |     
129 |         seed = int(seed)
130 |         seed_edit = int(seed_edit)
131 |         control_image = [depth, depth_edit]
132 |         prompt = [prompt, prompt_edit]
133 |         generator = [torch.Generator().manual_seed(seed), torch.Generator().manual_seed(seed_edit)]
134 |         gen = self.pipe(prompt, control_image=control_image, controlnet_conditioning_scale=controlnet_conditioning_scale, generator=generator, num_inference_steps=num_inference_steps, negative_prompt=negative_prompt, **kwargs)[-1]
135 |         return gen


--------------------------------------------------------------------------------
/weight_fusion.py:
--------------------------------------------------------------------------------
 1 | from loosecontrol import LooseControlNet
 2 | 
 3 | FUSION_SCALE = 1.0
 4 | USE_CUDA = True
 5 | USE_HUGGINGFACE_WEIGHTS = True
 6 | 
 7 | if __name__ == "__main__":
 8 |     print(f"""
 9 | Fusing weights with configs:
10 |     FUSION_SCALE: {FUSION_SCALE}
11 |     USE_CUDA: {USE_CUDA}
12 |     USE_HUGGINGFACE_WEIGHTS: {USE_HUGGINGFACE_WEIGHTS}
13 | 
14 | You can modify these in this script.
15 | """)
16 | 
17 |     if USE_HUGGINGFACE_WEIGHTS:
18 |         lcn = LooseControlNet("shariqfarooq/loose-control-3dbox")
19 |     else:
20 |         # Modify below to use your pre-downloaded weights
21 |         lcn = LooseControlNet(loose_control_weights="..", cn_checkpoint="..", sd_checkpoint="..")
22 | 
23 |     if USE_CUDA:
24 |         lcn = lcn.to("cuda")
25 | 
26 |     lcn.pipe.controlnet.fuse_lora(lora_scale=FUSION_SCALE)
27 |     lcn.pipe.controlnet.save_pretrained("./fused_weights")
28 |     print("Done! Saved to ./fused_weights")
29 | 


--------------------------------------------------------------------------------