├── .github └── workflows │ └── publish_action.yaml ├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── densediffusion_node.py ├── enums.py ├── examples └── densediffusion_compare.json └── pyproject.toml /.github/workflows/publish_action.yaml: -------------------------------------------------------------------------------- 1 | name: Publish to Comfy registry 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - "pyproject.toml" 9 | 10 | permissions: 11 | issues: write 12 | 13 | jobs: 14 | publish-node: 15 | name: Publish Custom Node to registry 16 | runs-on: ubuntu-latest 17 | if: ${{ github.repository_owner == 'huchenlei' }} 18 | steps: 19 | - name: Check out code 20 | uses: actions/checkout@v4 21 | - name: Publish Custom Node 22 | uses: Comfy-Org/publish-node-action@v1 23 | with: 24 | personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }} 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Update 2 | 3 | 2024/12/27 Changed to allow multiple masks to be entered in batches. 4 | 5 | 2024/12/26 If there is no mask input, a solid mask is used instead. 6 | 7 | 8 | 9 | # ComfyUI_densediffusion 10 | DenseDiffusion custom node for ComfyUI. Implements the [DenseDiffusion](https://github.com/naver-ai/DenseDiffusion)-like method for regional prompt used in [Omost](https://github.com/lllyasviel/Omost) project. 11 | 12 | ## What this repo implements 13 | Normal attention calculation can be written as `y=softmax(q@k)@v`. DenseDiffusion introduces the method of attention manipulation on `q@k`, which makes the expression look like `y=softmax(modify(q@k))@v`. 14 | The original DenseDiffusion's implementation does not perform very well according to my testing so here I only implemented the version used in Omost repo. Refer to https://github.com/lllyasviel/Omost#regional-prompter for other regional prompt methods. 15 | 16 | ## How to use 17 | ![image](https://github.com/huchenlei/ComfyUI_densediffusion/assets/20929282/d75c1354-8f62-4e84-9b9c-67698e2a5f32) 18 | 19 | ## Limitation [IMPORTANT] 20 | Currently ComfyUI's attention replacements do not compose with each other, so this regional prompt method does not compose with IPAdapter. I am currently working on a universal model patcher to solve this issue. 21 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .densediffusion_node import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS 2 | 3 | __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"] 4 | -------------------------------------------------------------------------------- /densediffusion_node.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import itertools 3 | import math 4 | from typing import Literal 5 | from dataclasses import dataclass 6 | 7 | import torch 8 | 9 | import comfy 10 | from comfy.model_patcher import ModelPatcher 11 | 12 | from .enums import StableDiffusionVersion 13 | 14 | 15 | ComfyUIConditioning = list # Dummy type definitions for ComfyUI 16 | 17 | 18 | def get_sd_version(model: ModelPatcher) -> StableDiffusionVersion: 19 | """Check if the model is a Stable Diffusion XL model. 20 | TODO: Make this a part of comfy.model_patcher.ModelPatcher 21 | """ 22 | is_sdxl = isinstance( 23 | model.model, 24 | ( 25 | comfy.model_base.SDXL, 26 | comfy.model_base.SDXLRefiner, 27 | comfy.model_base.SDXL_instructpix2pix, 28 | ), 29 | ) 30 | return StableDiffusionVersion.SDXL if is_sdxl else StableDiffusionVersion.SD1x 31 | 32 | @dataclass 33 | class DenseDiffusionConditioning: 34 | # Text embeddings 35 | cond: torch.Tensor 36 | # The mask to apply. Shape: [H, W] 37 | mask: torch.Tensor 38 | pooled_output: torch.Tensor | None = None 39 | 40 | 41 | class OmostDenseDiffusionCrossAttention(torch.nn.Module): 42 | @staticmethod 43 | def calc_mask_cond( 44 | q: torch.Tensor, 45 | extra_options: dict, 46 | ) -> tuple[torch.Tensor, torch.Tensor]: 47 | dd_conds: list[DenseDiffusionConditioning] = extra_options.get( 48 | "dense_diffusion_cond", [] 49 | ) 50 | 51 | cond_or_uncond: list[Literal[0, 1]] = extra_options["cond_or_uncond"] 52 | assert len(cond_or_uncond) in (1, 2) 53 | 54 | batch_size = q.size(0) 55 | cond_batch_size = batch_size // len(cond_or_uncond) if 0 in cond_or_uncond else 0 56 | uncond_batch_size = q.size(0) - cond_batch_size 57 | 58 | if dd_conds and cond_batch_size > 0: 59 | _, _, latent_height, latent_width = extra_options["original_shape"] 60 | H, W = OmostDenseDiffusionCrossAttention.calc_hidden_state_shape( 61 | q.size(2), latent_height, latent_width 62 | ) 63 | masks = [] 64 | 65 | for dd_cond in dd_conds: 66 | mask = dd_cond.mask 67 | # Handle different mask shapes 68 | if len(mask.shape) == 2: # [H, W] 69 | mask = mask.unsqueeze(0) # [1, H, W] 70 | 71 | # Ensure batch dimension matches cond_batch_size 72 | if mask.size(0) == 1: 73 | mask = mask.repeat(cond_batch_size, 1, 1) 74 | elif mask.size(0) != cond_batch_size: 75 | mask = mask[:cond_batch_size] # Take first cond_batch_size masks 76 | 77 | # Resize mask 78 | m = torch.nn.functional.interpolate( 79 | mask.unsqueeze(1), # [B, 1, H, W] 80 | (H, W), 81 | mode="nearest-exact" 82 | ).flatten(1) # [B, H*W] 83 | 84 | # Repeat for each embedding dimension 85 | m = m.unsqueeze(2).repeat(1, 1, dd_cond.cond.size(1)) # [B, H*W, emb_dim] 86 | masks.append(m) 87 | 88 | masks = torch.cat(masks, dim=2) # Concatenate along embedding dimension 89 | 90 | mask_bool = masks > 0.5 91 | mask_scale = (H * W) / torch.sum(masks, dim=1, keepdim=True) 92 | 93 | # Reshape for attention computation 94 | mask_bool = mask_bool.unsqueeze(1).repeat(1, q.size(1), 1, 1) 95 | mask_scale = mask_scale.unsqueeze(1).repeat(1, q.size(1), 1, 1) 96 | 97 | # Handle unconditional part 98 | if uncond_batch_size > 0: 99 | assert len(cond_or_uncond) == 2 100 | uncond_first = cond_or_uncond.index(1) == 0 101 | 102 | uncond_mask_bool = torch.ones_like(mask_bool[:uncond_batch_size]) 103 | uncond_mask_scale = torch.ones_like(mask_scale[:uncond_batch_size]) 104 | 105 | if uncond_first: 106 | mask_bool = torch.cat([uncond_mask_bool, mask_bool], dim=0) 107 | mask_scale = torch.cat([uncond_mask_scale, mask_scale], dim=0) 108 | else: 109 | mask_bool = torch.cat([mask_bool, uncond_mask_bool], dim=0) 110 | mask_scale = torch.cat([mask_scale, uncond_mask_scale], dim=0) 111 | 112 | return mask_bool, mask_scale 113 | return None, None 114 | 115 | @staticmethod 116 | def calc_hidden_state_shape( 117 | sequence_length: int, H: int, W: int 118 | ) -> tuple[int, int]: 119 | ratio = W / H 120 | mask_h = int(round(math.sqrt(sequence_length / ratio))) 121 | mask_w = int(round(math.sqrt(sequence_length * ratio))) 122 | return mask_h, mask_w 123 | 124 | @staticmethod 125 | def scaled_dot_product_attention( 126 | query: torch.Tensor, 127 | key: torch.Tensor, 128 | value: torch.Tensor, 129 | mask_bool: torch.Tensor | None = None, 130 | mask_scale: torch.Tensor | None = None, 131 | ) -> torch.Tensor: 132 | """Modified scaled dot product attention that applies mask_bool 133 | and mask_scale on q@k before softmax calculation. 134 | """ 135 | scale_factor = 1 / math.sqrt(query.size(-1)) 136 | attn_weight = query @ key.transpose(-2, -1) * scale_factor 137 | 138 | if mask_scale is not None: 139 | attn_weight = attn_weight * mask_scale 140 | if mask_bool is not None: 141 | attn_weight.masked_fill_(mask_bool.logical_not(), float("-inf")) 142 | 143 | attn_weight = torch.softmax(attn_weight, dim=-1) 144 | return attn_weight @ value 145 | 146 | def forward( 147 | self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, extra_options: dict 148 | ): 149 | """ 150 | y=softmax(modify(q@k))@v 151 | where modify() is a complicated non-linear function with many normalizations 152 | and tricks to change the score's distributions. 153 | 154 | This function implements the `modify` function used in Omost, instead 155 | of the original DenseDiffusion repo. 156 | 157 | https://github.com/lllyasviel/Omost/blob/731e74922fc6be91171688574d07624f93d3b658/lib_omost/pipeline.py#L129-L173 158 | """ 159 | heads: int = extra_options["n_heads"] 160 | 161 | b, _, dim_head = q.shape 162 | dim_head //= heads 163 | q, k, v = map( 164 | lambda t: t.view(b, -1, heads, dim_head).transpose(1, 2), 165 | (q, k, v), 166 | ) 167 | mask_bool, mask_scale = self.calc_mask_cond(q, extra_options) 168 | out = self.scaled_dot_product_attention( 169 | q, k, v, mask_bool=mask_bool, mask_scale=mask_scale 170 | ) 171 | out = out.transpose(1, 2).reshape(b, -1, heads * dim_head) 172 | return out 173 | 174 | 175 | class DenseDiffusionApplyNode: 176 | @classmethod 177 | def INPUT_TYPES(s): 178 | return {"required": {"model": ("MODEL",)}} 179 | 180 | RETURN_TYPES = ("MODEL", "CONDITIONING") 181 | FUNCTION = "apply" 182 | CATEGORY = "DenseDiffusion" 183 | DESCRIPTION = "Apply DenseDiffusion model." 184 | 185 | def apply(self, model: ModelPatcher) -> tuple[ModelPatcher]: 186 | work_model: ModelPatcher = model.clone() 187 | 188 | # TODO: Make patching cross-attn easier/more composable with Unified ModelPatcher. 189 | # Current approach does not compose with IPAdapter. 190 | sd_version: StableDiffusionVersion = get_sd_version(work_model) 191 | input_ids, output_ids, middle_ids = sd_version.transformer_ids 192 | for transformer_id in itertools.chain(input_ids, output_ids, middle_ids): 193 | work_model.set_model_attn2_replace( 194 | OmostDenseDiffusionCrossAttention(), 195 | block_name=transformer_id.block_type.value, 196 | number=transformer_id.block_id, 197 | # transformer_index param here specifies the depth index of the transformer 198 | transformer_index=transformer_id.block_index, 199 | ) 200 | 201 | dd_conds: list[DenseDiffusionConditioning] = work_model.model_options[ 202 | "transformer_options" 203 | ].get("dense_diffusion_cond", []) 204 | assert dd_conds, "No DenseDiffusion conditioning found!" 205 | 206 | # Move all mask to unet device to avoid move during inference. 207 | for dd_cond in dd_conds: 208 | dd_cond.mask = dd_cond.mask.to(model.load_device, model.model_dtype()) 209 | 210 | cond = [ 211 | [ 212 | # cond 213 | torch.cat([dd_cond.cond for dd_cond in dd_conds], dim=1), 214 | # pooled_output 215 | {"pooled_output": dd_conds[0].pooled_output}, 216 | ] 217 | ] 218 | return (work_model, cond) 219 | 220 | 221 | class DenseDiffusionAddCondNode: 222 | @classmethod 223 | def INPUT_TYPES(s): 224 | return { 225 | "required": { 226 | "model": ("MODEL",), 227 | "conditioning": ("CONDITIONING",), 228 | "strength": ( 229 | "FLOAT", 230 | {"default": 1.0, "min": 0.0, "max": 2.0, "step": 0.01}, 231 | ), 232 | }, 233 | "optional": { 234 | "mask": ("MASK", ), 235 | } 236 | } 237 | 238 | RETURN_TYPES = ("MODEL",) 239 | FUNCTION = "append" 240 | CATEGORY = "DenseDiffusion" 241 | DESCRIPTION = "Set a regional prompt for DenseDiffusion." 242 | 243 | def append( 244 | self, 245 | model: ModelPatcher, 246 | conditioning: ComfyUIConditioning, 247 | strength: float, 248 | mask: torch.Tensor=None, 249 | ) -> tuple[ModelPatcher]: 250 | work_model: ModelPatcher = model.clone() 251 | work_model.model_options["transformer_options"].setdefault( 252 | "dense_diffusion_cond", [] 253 | ) 254 | assert len(conditioning) == 1 255 | cond, extra_fields = conditioning[0] 256 | assert isinstance(extra_fields, dict) 257 | assert "pooled_output" in extra_fields 258 | 259 | if mask == None: 260 | mask = torch.full((1, 512, 512), 1.0, dtype=torch.float32, device="cpu") 261 | 262 | work_model.model_options["transformer_options"]["dense_diffusion_cond"].append( 263 | DenseDiffusionConditioning( 264 | cond=cond, 265 | mask=mask.squeeze() * strength, 266 | pooled_output=extra_fields["pooled_output"], 267 | ) 268 | ) 269 | return (work_model,) 270 | 271 | 272 | NODE_CLASS_MAPPINGS = { 273 | "DenseDiffusionApplyNode": DenseDiffusionApplyNode, 274 | "DenseDiffusionAddCondNode": DenseDiffusionAddCondNode, 275 | } 276 | 277 | NODE_DISPLAY_NAME_MAPPINGS = { 278 | "DenseDiffusionApplyNode": "DenseDiffusion Apply", 279 | "DenseDiffusionAddCondNode": "DenseDiffusion Add Cond", 280 | } 281 | -------------------------------------------------------------------------------- /enums.py: -------------------------------------------------------------------------------- 1 | # From https://github.com/Mikubill/sd-webui-controlnet/blob/main/scripts/enums.py 2 | 3 | from enum import Enum 4 | from typing import List, NamedTuple 5 | from functools import lru_cache 6 | 7 | 8 | class UnetBlockType(Enum): 9 | INPUT = "input" 10 | OUTPUT = "output" 11 | MIDDLE = "middle" 12 | 13 | 14 | class TransformerID(NamedTuple): 15 | block_type: UnetBlockType 16 | # The id of the block the transformer is in. Not all blocks have cross attn. 17 | block_id: int 18 | # The index of transformer within the block. 19 | # A block can have multiple transformers in SDXL. 20 | block_index: int 21 | # The call index of transformer if in a single step of diffusion. 22 | transformer_index: int 23 | 24 | 25 | class TransformerIDResult(NamedTuple): 26 | input_ids: List[TransformerID] 27 | output_ids: List[TransformerID] 28 | middle_ids: List[TransformerID] 29 | 30 | def get(self, idx: int) -> TransformerID: 31 | return self.to_list()[idx] 32 | 33 | def to_list(self) -> List[TransformerID]: 34 | return sorted( 35 | self.input_ids + self.output_ids + self.middle_ids, 36 | key=lambda i: i.transformer_index, 37 | ) 38 | 39 | 40 | class StableDiffusionVersion(Enum): 41 | """The version family of stable diffusion model.""" 42 | 43 | UNKNOWN = 0 44 | SD1x = 1 45 | SD2x = 2 46 | SDXL = 3 47 | 48 | @staticmethod 49 | def detect_from_model_name(model_name: str) -> "StableDiffusionVersion": 50 | """Based on the model name provided, guess what stable diffusion version it is. 51 | This might not be accurate without actually inspect the file content. 52 | """ 53 | if any(f"sd{v}" in model_name.lower() for v in ("14", "15", "16")): 54 | return StableDiffusionVersion.SD1x 55 | 56 | if "sd21" in model_name or "2.1" in model_name: 57 | return StableDiffusionVersion.SD2x 58 | 59 | if "xl" in model_name.lower(): 60 | return StableDiffusionVersion.SDXL 61 | 62 | return StableDiffusionVersion.UNKNOWN 63 | 64 | def encoder_block_num(self) -> int: 65 | if self in ( 66 | StableDiffusionVersion.SD1x, 67 | StableDiffusionVersion.SD2x, 68 | StableDiffusionVersion.UNKNOWN, 69 | ): 70 | return 12 71 | else: 72 | return 9 # SDXL 73 | 74 | def controlnet_layer_num(self) -> int: 75 | return self.encoder_block_num() + 1 76 | 77 | @property 78 | def transformer_block_num(self) -> int: 79 | """Number of blocks that has cross attn transformers in unet.""" 80 | if self in ( 81 | StableDiffusionVersion.SD1x, 82 | StableDiffusionVersion.SD2x, 83 | StableDiffusionVersion.UNKNOWN, 84 | ): 85 | return 16 86 | else: 87 | return 11 # SDXL 88 | 89 | @property 90 | @lru_cache(maxsize=None) 91 | def transformer_ids(self) -> List[TransformerID]: 92 | """id of blocks that have cross attention""" 93 | if self in ( 94 | StableDiffusionVersion.SD1x, 95 | StableDiffusionVersion.SD2x, 96 | StableDiffusionVersion.UNKNOWN, 97 | ): 98 | transformer_index = 0 99 | input_ids = [] 100 | for block_id in [1, 2, 4, 5, 7, 8]: 101 | input_ids.append( 102 | TransformerID(UnetBlockType.INPUT, block_id, 0, transformer_index) 103 | ) 104 | transformer_index += 1 105 | middle_id = TransformerID(UnetBlockType.MIDDLE, 0, 0, transformer_index) 106 | transformer_index += 1 107 | output_ids = [] 108 | for block_id in [3, 4, 5, 6, 7, 8, 9, 10, 11]: 109 | input_ids.append( 110 | TransformerID(UnetBlockType.OUTPUT, block_id, 0, transformer_index) 111 | ) 112 | transformer_index += 1 113 | return TransformerIDResult(input_ids, output_ids, [middle_id]) 114 | else: 115 | # SDXL 116 | transformer_index = 0 117 | input_ids = [] 118 | for block_id in [4, 5, 7, 8]: 119 | block_indices = ( 120 | range(2) if block_id in [4, 5] else range(10) 121 | ) # transformer_depth 122 | for index in block_indices: 123 | input_ids.append( 124 | TransformerID( 125 | UnetBlockType.INPUT, block_id, index, transformer_index 126 | ) 127 | ) 128 | transformer_index += 1 129 | 130 | middle_ids = [ 131 | TransformerID(UnetBlockType.MIDDLE, 0, index, transformer_index) 132 | for index in range(10) 133 | ] 134 | transformer_index += 1 135 | 136 | output_ids = [] 137 | for block_id in range(6): 138 | block_indices = ( 139 | range(2) if block_id in [3, 4, 5] else range(10) 140 | ) # transformer_depth 141 | for index in block_indices: 142 | output_ids.append( 143 | TransformerID( 144 | UnetBlockType.OUTPUT, block_id, index, transformer_index 145 | ) 146 | ) 147 | transformer_index += 1 148 | return TransformerIDResult(input_ids, output_ids, middle_ids) 149 | 150 | def is_compatible_with(self, other: "StableDiffusionVersion") -> bool: 151 | """Incompatible only when one of version is SDXL and other is not.""" 152 | return ( 153 | any(v == StableDiffusionVersion.UNKNOWN for v in [self, other]) 154 | or sum(v == StableDiffusionVersion.SDXL for v in [self, other]) != 1 155 | ) 156 | -------------------------------------------------------------------------------- /examples/densediffusion_compare.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 36, 3 | "last_link_id": 84, 4 | "nodes": [ 5 | { 6 | "id": 15, 7 | "type": "LoadImage", 8 | "pos": [ 9 | -422, 10 | 221 11 | ], 12 | "size": { 13 | "0": 315, 14 | "1": 314 15 | }, 16 | "flags": {}, 17 | "order": 0, 18 | "mode": 0, 19 | "outputs": [ 20 | { 21 | "name": "IMAGE", 22 | "type": "IMAGE", 23 | "links": [ 24 | 15 25 | ], 26 | "shape": 3, 27 | "slot_index": 0 28 | }, 29 | { 30 | "name": "MASK", 31 | "type": "MASK", 32 | "links": null, 33 | "shape": 3 34 | } 35 | ], 36 | "properties": { 37 | "Node name for S&R": "LoadImage" 38 | }, 39 | "widgets_values": [ 40 | "RGBMask (1).png", 41 | "image" 42 | ] 43 | }, 44 | { 45 | "id": 16, 46 | "type": "DenseDiffusionAddCondNode", 47 | "pos": [ 48 | 962.4942647623988, 49 | -394.14986919536267 50 | ], 51 | "size": { 52 | "0": 315, 53 | "1": 98 54 | }, 55 | "flags": {}, 56 | "order": 17, 57 | "mode": 0, 58 | "inputs": [ 59 | { 60 | "name": "model", 61 | "type": "MODEL", 62 | "link": 20 63 | }, 64 | { 65 | "name": "conditioning", 66 | "type": "CONDITIONING", 67 | "link": 18 68 | }, 69 | { 70 | "name": "mask", 71 | "type": "MASK", 72 | "link": 21 73 | } 74 | ], 75 | "outputs": [ 76 | { 77 | "name": "MODEL", 78 | "type": "MODEL", 79 | "links": [ 80 | 26 81 | ], 82 | "shape": 3, 83 | "slot_index": 0 84 | } 85 | ], 86 | "properties": { 87 | "Node name for S&R": "DenseDiffusionAddCondNode" 88 | }, 89 | "widgets_values": [ 90 | 1 91 | ] 92 | }, 93 | { 94 | "id": 10, 95 | "type": "DenseDiffusionApplyNode", 96 | "pos": [ 97 | 1356.4942647623986, 98 | -97.14986919536271 99 | ], 100 | "size": { 101 | "0": 218.0396270751953, 102 | "1": 49.7765007019043 103 | }, 104 | "flags": {}, 105 | "order": 20, 106 | "mode": 0, 107 | "inputs": [ 108 | { 109 | "name": "model", 110 | "type": "MODEL", 111 | "link": 26 112 | } 113 | ], 114 | "outputs": [ 115 | { 116 | "name": "MODEL", 117 | "type": "MODEL", 118 | "links": [ 119 | 27 120 | ], 121 | "shape": 3, 122 | "slot_index": 0 123 | }, 124 | { 125 | "name": "CONDITIONING", 126 | "type": "CONDITIONING", 127 | "links": [ 128 | 28 129 | ], 130 | "shape": 3, 131 | "slot_index": 1 132 | } 133 | ], 134 | "properties": { 135 | "Node name for S&R": "DenseDiffusionApplyNode" 136 | } 137 | }, 138 | { 139 | "id": 14, 140 | "type": "MaskFromRGBCMYBW+", 141 | "pos": [ 142 | -16, 143 | 218 144 | ], 145 | "size": { 146 | "0": 315, 147 | "1": 294 148 | }, 149 | "flags": {}, 150 | "order": 4, 151 | "mode": 0, 152 | "inputs": [ 153 | { 154 | "name": "image", 155 | "type": "IMAGE", 156 | "link": 15 157 | } 158 | ], 159 | "outputs": [ 160 | { 161 | "name": "red", 162 | "type": "MASK", 163 | "links": [ 164 | 16, 165 | 72 166 | ], 167 | "shape": 3, 168 | "slot_index": 0 169 | }, 170 | { 171 | "name": "green", 172 | "type": "MASK", 173 | "links": [ 174 | 21, 175 | 53 176 | ], 177 | "shape": 3, 178 | "slot_index": 1 179 | }, 180 | { 181 | "name": "blue", 182 | "type": "MASK", 183 | "links": null, 184 | "shape": 3 185 | }, 186 | { 187 | "name": "cyan", 188 | "type": "MASK", 189 | "links": null, 190 | "shape": 3 191 | }, 192 | { 193 | "name": "magenta", 194 | "type": "MASK", 195 | "links": null, 196 | "shape": 3 197 | }, 198 | { 199 | "name": "yellow", 200 | "type": "MASK", 201 | "links": null, 202 | "shape": 3 203 | }, 204 | { 205 | "name": "black", 206 | "type": "MASK", 207 | "links": [], 208 | "shape": 3 209 | }, 210 | { 211 | "name": "white", 212 | "type": "MASK", 213 | "links": null, 214 | "shape": 3 215 | } 216 | ], 217 | "properties": { 218 | "Node name for S&R": "MaskFromRGBCMYBW+" 219 | }, 220 | "widgets_values": [ 221 | 0.15, 222 | 0.15, 223 | 0.15, 224 | 0, 225 | false 226 | ] 227 | }, 228 | { 229 | "id": 19, 230 | "type": "DenseDiffusionAddCondNode", 231 | "pos": [ 232 | 168.4942647623998, 233 | -399.14986919536267 234 | ], 235 | "size": { 236 | "0": 315, 237 | "1": 98 238 | }, 239 | "flags": {}, 240 | "order": 11, 241 | "mode": 0, 242 | "inputs": [ 243 | { 244 | "name": "model", 245 | "type": "MODEL", 246 | "link": 30 247 | }, 248 | { 249 | "name": "conditioning", 250 | "type": "CONDITIONING", 251 | "link": 33, 252 | "slot_index": 1 253 | }, 254 | { 255 | "name": "mask", 256 | "type": "MASK", 257 | "link": 35, 258 | "slot_index": 2 259 | } 260 | ], 261 | "outputs": [ 262 | { 263 | "name": "MODEL", 264 | "type": "MODEL", 265 | "links": [ 266 | 29 267 | ], 268 | "shape": 3, 269 | "slot_index": 0 270 | } 271 | ], 272 | "properties": { 273 | "Node name for S&R": "DenseDiffusionAddCondNode" 274 | }, 275 | "widgets_values": [ 276 | 1 277 | ] 278 | }, 279 | { 280 | "id": 11, 281 | "type": "DenseDiffusionAddCondNode", 282 | "pos": [ 283 | 564.4942647623997, 284 | -396.14986919536267 285 | ], 286 | "size": { 287 | "0": 315, 288 | "1": 98 289 | }, 290 | "flags": {}, 291 | "order": 14, 292 | "mode": 0, 293 | "inputs": [ 294 | { 295 | "name": "model", 296 | "type": "MODEL", 297 | "link": 29 298 | }, 299 | { 300 | "name": "conditioning", 301 | "type": "CONDITIONING", 302 | "link": 14 303 | }, 304 | { 305 | "name": "mask", 306 | "type": "MASK", 307 | "link": 16 308 | } 309 | ], 310 | "outputs": [ 311 | { 312 | "name": "MODEL", 313 | "type": "MODEL", 314 | "links": [ 315 | 20 316 | ], 317 | "shape": 3, 318 | "slot_index": 0 319 | } 320 | ], 321 | "properties": { 322 | "Node name for S&R": "DenseDiffusionAddCondNode" 323 | }, 324 | "widgets_values": [ 325 | 1 326 | ] 327 | }, 328 | { 329 | "id": 7, 330 | "type": "CLIPTextEncode", 331 | "pos": [ 332 | 416, 333 | 356 334 | ], 335 | "size": { 336 | "0": 425.27801513671875, 337 | "1": 180.6060791015625 338 | }, 339 | "flags": {}, 340 | "order": 5, 341 | "mode": 0, 342 | "inputs": [ 343 | { 344 | "name": "clip", 345 | "type": "CLIP", 346 | "link": 5 347 | } 348 | ], 349 | "outputs": [ 350 | { 351 | "name": "CONDITIONING", 352 | "type": "CONDITIONING", 353 | "links": [ 354 | 6, 355 | 40, 356 | 60 357 | ], 358 | "slot_index": 0 359 | } 360 | ], 361 | "properties": { 362 | "Node name for S&R": "CLIPTextEncode" 363 | }, 364 | "widgets_values": [ 365 | "text, watermark, worst quality, low res" 366 | ] 367 | }, 368 | { 369 | "id": 25, 370 | "type": "VAEDecode", 371 | "pos": [ 372 | 2000, 373 | 380 374 | ], 375 | "size": { 376 | "0": 210, 377 | "1": 46 378 | }, 379 | "flags": {}, 380 | "order": 15, 381 | "mode": 0, 382 | "inputs": [ 383 | { 384 | "name": "samples", 385 | "type": "LATENT", 386 | "link": 42 387 | }, 388 | { 389 | "name": "vae", 390 | "type": "VAE", 391 | "link": 43 392 | } 393 | ], 394 | "outputs": [ 395 | { 396 | "name": "IMAGE", 397 | "type": "IMAGE", 398 | "links": [ 399 | 44 400 | ], 401 | "slot_index": 0 402 | } 403 | ], 404 | "properties": { 405 | "Node name for S&R": "VAEDecode" 406 | } 407 | }, 408 | { 409 | "id": 4, 410 | "type": "CheckpointLoaderSimple", 411 | "pos": [ 412 | -17, 413 | 623 414 | ], 415 | "size": { 416 | "0": 315, 417 | "1": 98 418 | }, 419 | "flags": {}, 420 | "order": 1, 421 | "mode": 0, 422 | "outputs": [ 423 | { 424 | "name": "MODEL", 425 | "type": "MODEL", 426 | "links": [ 427 | 30, 428 | 47, 429 | 58 430 | ], 431 | "slot_index": 0 432 | }, 433 | { 434 | "name": "CLIP", 435 | "type": "CLIP", 436 | "links": [ 437 | 5, 438 | 23, 439 | 24, 440 | 31 441 | ], 442 | "slot_index": 1 443 | }, 444 | { 445 | "name": "VAE", 446 | "type": "VAE", 447 | "links": [ 448 | 8, 449 | 43, 450 | 63 451 | ], 452 | "slot_index": 2 453 | } 454 | ], 455 | "properties": { 456 | "Node name for S&R": "CheckpointLoaderSimple" 457 | }, 458 | "widgets_values": [ 459 | "animagine-xl-2.0.safetensors" 460 | ] 461 | }, 462 | { 463 | "id": 20, 464 | "type": "CLIPTextEncode", 465 | "pos": [ 466 | 64.4942647623998, 467 | -234.14986919536267 468 | ], 469 | "size": { 470 | "0": 400, 471 | "1": 200 472 | }, 473 | "flags": {}, 474 | "order": 8, 475 | "mode": 0, 476 | "inputs": [ 477 | { 478 | "name": "clip", 479 | "type": "CLIP", 480 | "link": 31, 481 | "slot_index": 0 482 | } 483 | ], 484 | "outputs": [ 485 | { 486 | "name": "CONDITIONING", 487 | "type": "CONDITIONING", 488 | "links": [ 489 | 33, 490 | 45, 491 | 48 492 | ], 493 | "shape": 3, 494 | "slot_index": 0 495 | } 496 | ], 497 | "properties": { 498 | "Node name for S&R": "CLIPTextEncode" 499 | }, 500 | "widgets_values": [ 501 | "(Sunshine, beach:0.6), masterpiece, best quality,\n\n1girl, watermelon on table" 502 | ] 503 | }, 504 | { 505 | "id": 12, 506 | "type": "CLIPTextEncode", 507 | "pos": [ 508 | 496.4942647623997, 509 | -237.14986919536267 510 | ], 511 | "size": { 512 | "0": 400, 513 | "1": 200 514 | }, 515 | "flags": {}, 516 | "order": 7, 517 | "mode": 0, 518 | "inputs": [ 519 | { 520 | "name": "clip", 521 | "type": "CLIP", 522 | "link": 24, 523 | "slot_index": 0 524 | } 525 | ], 526 | "outputs": [ 527 | { 528 | "name": "CONDITIONING", 529 | "type": "CONDITIONING", 530 | "links": [ 531 | 14, 532 | 50 533 | ], 534 | "shape": 3, 535 | "slot_index": 0 536 | } 537 | ], 538 | "properties": { 539 | "Node name for S&R": "CLIPTextEncode" 540 | }, 541 | "widgets_values": [ 542 | "1girl, long hair," 543 | ] 544 | }, 545 | { 546 | "id": 24, 547 | "type": "KSampler", 548 | "pos": [ 549 | 1650, 550 | 370 551 | ], 552 | "size": { 553 | "0": 315, 554 | "1": 262 555 | }, 556 | "flags": {}, 557 | "order": 12, 558 | "mode": 0, 559 | "inputs": [ 560 | { 561 | "name": "model", 562 | "type": "MODEL", 563 | "link": 47 564 | }, 565 | { 566 | "name": "positive", 567 | "type": "CONDITIONING", 568 | "link": 45 569 | }, 570 | { 571 | "name": "negative", 572 | "type": "CONDITIONING", 573 | "link": 40 574 | }, 575 | { 576 | "name": "latent_image", 577 | "type": "LATENT", 578 | "link": 41 579 | } 580 | ], 581 | "outputs": [ 582 | { 583 | "name": "LATENT", 584 | "type": "LATENT", 585 | "links": [ 586 | 42 587 | ], 588 | "slot_index": 0 589 | } 590 | ], 591 | "properties": { 592 | "Node name for S&R": "KSampler" 593 | }, 594 | "widgets_values": [ 595 | 497887434749534, 596 | "fixed", 597 | 20, 598 | 8, 599 | "euler", 600 | "normal", 601 | 1 602 | ] 603 | }, 604 | { 605 | "id": 17, 606 | "type": "CLIPTextEncode", 607 | "pos": [ 608 | 927.4942647623983, 609 | -237.14986919536267 610 | ], 611 | "size": { 612 | "0": 400, 613 | "1": 200 614 | }, 615 | "flags": {}, 616 | "order": 6, 617 | "mode": 0, 618 | "inputs": [ 619 | { 620 | "name": "clip", 621 | "type": "CLIP", 622 | "link": 23, 623 | "slot_index": 0 624 | } 625 | ], 626 | "outputs": [ 627 | { 628 | "name": "CONDITIONING", 629 | "type": "CONDITIONING", 630 | "links": [ 631 | 18, 632 | 52 633 | ], 634 | "shape": 3, 635 | "slot_index": 0 636 | } 637 | ], 638 | "properties": { 639 | "Node name for S&R": "CLIPTextEncode" 640 | }, 641 | "widgets_values": [ 642 | "watermelon" 643 | ] 644 | }, 645 | { 646 | "id": 28, 647 | "type": "ConditioningSetMask", 648 | "pos": [ 649 | 591.4340233055909, 650 | -918.5021301420154 651 | ], 652 | "size": { 653 | "0": 317.4000244140625, 654 | "1": 102 655 | }, 656 | "flags": {}, 657 | "order": 10, 658 | "mode": 0, 659 | "inputs": [ 660 | { 661 | "name": "conditioning", 662 | "type": "CONDITIONING", 663 | "link": 50 664 | }, 665 | { 666 | "name": "mask", 667 | "type": "MASK", 668 | "link": 72, 669 | "slot_index": 1 670 | } 671 | ], 672 | "outputs": [ 673 | { 674 | "name": "CONDITIONING", 675 | "type": "CONDITIONING", 676 | "links": [ 677 | 76 678 | ], 679 | "shape": 3, 680 | "slot_index": 0 681 | } 682 | ], 683 | "properties": { 684 | "Node name for S&R": "ConditioningSetMask" 685 | }, 686 | "widgets_values": [ 687 | 1, 688 | "default" 689 | ] 690 | }, 691 | { 692 | "id": 3, 693 | "type": "KSampler", 694 | "pos": [ 695 | 1636.4942647623986, 696 | -107.14986919536271 697 | ], 698 | "size": { 699 | "0": 315, 700 | "1": 262 701 | }, 702 | "flags": {}, 703 | "order": 22, 704 | "mode": 0, 705 | "inputs": [ 706 | { 707 | "name": "model", 708 | "type": "MODEL", 709 | "link": 27 710 | }, 711 | { 712 | "name": "positive", 713 | "type": "CONDITIONING", 714 | "link": 28 715 | }, 716 | { 717 | "name": "negative", 718 | "type": "CONDITIONING", 719 | "link": 6 720 | }, 721 | { 722 | "name": "latent_image", 723 | "type": "LATENT", 724 | "link": 2 725 | } 726 | ], 727 | "outputs": [ 728 | { 729 | "name": "LATENT", 730 | "type": "LATENT", 731 | "links": [ 732 | 7 733 | ], 734 | "slot_index": 0 735 | } 736 | ], 737 | "properties": { 738 | "Node name for S&R": "KSampler" 739 | }, 740 | "widgets_values": [ 741 | 497887434749534, 742 | "fixed", 743 | 20, 744 | 8, 745 | "euler", 746 | "normal", 747 | 1 748 | ] 749 | }, 750 | { 751 | "id": 35, 752 | "type": "ConditioningCombine", 753 | "pos": [ 754 | 671.4340233055909, 755 | -1108.5021301420156 756 | ], 757 | "size": { 758 | "0": 342.5999755859375, 759 | "1": 46 760 | }, 761 | "flags": {}, 762 | "order": 16, 763 | "mode": 0, 764 | "inputs": [ 765 | { 766 | "name": "conditioning_1", 767 | "type": "CONDITIONING", 768 | "link": 82, 769 | "slot_index": 0 770 | }, 771 | { 772 | "name": "conditioning_2", 773 | "type": "CONDITIONING", 774 | "link": 76 775 | } 776 | ], 777 | "outputs": [ 778 | { 779 | "name": "CONDITIONING", 780 | "type": "CONDITIONING", 781 | "links": [ 782 | 75 783 | ], 784 | "shape": 3, 785 | "slot_index": 0 786 | } 787 | ], 788 | "properties": { 789 | "Node name for S&R": "ConditioningCombine" 790 | } 791 | }, 792 | { 793 | "id": 36, 794 | "type": "ConditioningCombine", 795 | "pos": [ 796 | 1101.434023305593, 797 | -1108.5021301420156 798 | ], 799 | "size": { 800 | "0": 342.5999755859375, 801 | "1": 46 802 | }, 803 | "flags": {}, 804 | "order": 19, 805 | "mode": 0, 806 | "inputs": [ 807 | { 808 | "name": "conditioning_1", 809 | "type": "CONDITIONING", 810 | "link": 75 811 | }, 812 | { 813 | "name": "conditioning_2", 814 | "type": "CONDITIONING", 815 | "link": 78, 816 | "slot_index": 1 817 | } 818 | ], 819 | "outputs": [ 820 | { 821 | "name": "CONDITIONING", 822 | "type": "CONDITIONING", 823 | "links": [ 824 | 84 825 | ], 826 | "shape": 3, 827 | "slot_index": 0 828 | } 829 | ], 830 | "properties": { 831 | "Node name for S&R": "ConditioningCombine" 832 | } 833 | }, 834 | { 835 | "id": 27, 836 | "type": "ConditioningSetMask", 837 | "pos": [ 838 | 191.43402330559087, 839 | -918.5021301420154 840 | ], 841 | "size": { 842 | "0": 317.4000244140625, 843 | "1": 102 844 | }, 845 | "flags": {}, 846 | "order": 13, 847 | "mode": 0, 848 | "inputs": [ 849 | { 850 | "name": "conditioning", 851 | "type": "CONDITIONING", 852 | "link": 48 853 | }, 854 | { 855 | "name": "mask", 856 | "type": "MASK", 857 | "link": 49, 858 | "slot_index": 1 859 | } 860 | ], 861 | "outputs": [ 862 | { 863 | "name": "CONDITIONING", 864 | "type": "CONDITIONING", 865 | "links": [ 866 | 82 867 | ], 868 | "shape": 3, 869 | "slot_index": 0 870 | } 871 | ], 872 | "properties": { 873 | "Node name for S&R": "ConditioningSetMask" 874 | }, 875 | "widgets_values": [ 876 | 0.3, 877 | "default" 878 | ] 879 | }, 880 | { 881 | "id": 32, 882 | "type": "KSampler", 883 | "pos": [ 884 | 1461.4340233055932, 885 | -918.5021301420154 886 | ], 887 | "size": { 888 | "0": 315, 889 | "1": 262 890 | }, 891 | "flags": {}, 892 | "order": 21, 893 | "mode": 0, 894 | "inputs": [ 895 | { 896 | "name": "model", 897 | "type": "MODEL", 898 | "link": 58 899 | }, 900 | { 901 | "name": "positive", 902 | "type": "CONDITIONING", 903 | "link": 84 904 | }, 905 | { 906 | "name": "negative", 907 | "type": "CONDITIONING", 908 | "link": 60 909 | }, 910 | { 911 | "name": "latent_image", 912 | "type": "LATENT", 913 | "link": 61 914 | } 915 | ], 916 | "outputs": [ 917 | { 918 | "name": "LATENT", 919 | "type": "LATENT", 920 | "links": [ 921 | 62 922 | ], 923 | "slot_index": 0 924 | } 925 | ], 926 | "properties": { 927 | "Node name for S&R": "KSampler" 928 | }, 929 | "widgets_values": [ 930 | 497887434749534, 931 | "fixed", 932 | 20, 933 | 8, 934 | "euler", 935 | "normal", 936 | 1 937 | ] 938 | }, 939 | { 940 | "id": 33, 941 | "type": "VAEDecode", 942 | "pos": [ 943 | 1831.4340233055932, 944 | -908.5021301420154 945 | ], 946 | "size": { 947 | "0": 210, 948 | "1": 46 949 | }, 950 | "flags": {}, 951 | "order": 23, 952 | "mode": 0, 953 | "inputs": [ 954 | { 955 | "name": "samples", 956 | "type": "LATENT", 957 | "link": 62 958 | }, 959 | { 960 | "name": "vae", 961 | "type": "VAE", 962 | "link": 63 963 | } 964 | ], 965 | "outputs": [ 966 | { 967 | "name": "IMAGE", 968 | "type": "IMAGE", 969 | "links": [ 970 | 64 971 | ], 972 | "slot_index": 0 973 | } 974 | ], 975 | "properties": { 976 | "Node name for S&R": "VAEDecode" 977 | } 978 | }, 979 | { 980 | "id": 29, 981 | "type": "ConditioningSetMask", 982 | "pos": [ 983 | 991.4340233055909, 984 | -918.5021301420154 985 | ], 986 | "size": { 987 | "0": 317.4000244140625, 988 | "1": 102 989 | }, 990 | "flags": {}, 991 | "order": 9, 992 | "mode": 0, 993 | "inputs": [ 994 | { 995 | "name": "conditioning", 996 | "type": "CONDITIONING", 997 | "link": 52, 998 | "slot_index": 0 999 | }, 1000 | { 1001 | "name": "mask", 1002 | "type": "MASK", 1003 | "link": 53, 1004 | "slot_index": 1 1005 | } 1006 | ], 1007 | "outputs": [ 1008 | { 1009 | "name": "CONDITIONING", 1010 | "type": "CONDITIONING", 1011 | "links": [ 1012 | 78 1013 | ], 1014 | "shape": 3, 1015 | "slot_index": 0 1016 | } 1017 | ], 1018 | "properties": { 1019 | "Node name for S&R": "ConditioningSetMask" 1020 | }, 1021 | "widgets_values": [ 1022 | 1, 1023 | "default" 1024 | ] 1025 | }, 1026 | { 1027 | "id": 5, 1028 | "type": "EmptyLatentImage", 1029 | "pos": [ 1030 | 473, 1031 | 609 1032 | ], 1033 | "size": { 1034 | "0": 315, 1035 | "1": 106 1036 | }, 1037 | "flags": {}, 1038 | "order": 2, 1039 | "mode": 0, 1040 | "outputs": [ 1041 | { 1042 | "name": "LATENT", 1043 | "type": "LATENT", 1044 | "links": [ 1045 | 2, 1046 | 41, 1047 | 61 1048 | ], 1049 | "slot_index": 0 1050 | } 1051 | ], 1052 | "properties": { 1053 | "Node name for S&R": "EmptyLatentImage" 1054 | }, 1055 | "widgets_values": [ 1056 | 1024, 1057 | 768, 1058 | 1 1059 | ] 1060 | }, 1061 | { 1062 | "id": 34, 1063 | "type": "SaveImage", 1064 | "pos": [ 1065 | 2130.5951554622643, 1066 | -1077.5021301420156 1067 | ], 1068 | "size": { 1069 | "0": 479.31683349609375, 1070 | "1": 444.45648193359375 1071 | }, 1072 | "flags": {}, 1073 | "order": 25, 1074 | "mode": 0, 1075 | "inputs": [ 1076 | { 1077 | "name": "images", 1078 | "type": "IMAGE", 1079 | "link": 64 1080 | } 1081 | ], 1082 | "properties": { 1083 | "Node name for S&R": "SaveImage" 1084 | }, 1085 | "widgets_values": [ 1086 | "ComfyUI" 1087 | ] 1088 | }, 1089 | { 1090 | "id": 8, 1091 | "type": "VAEDecode", 1092 | "pos": [ 1093 | 1984, 1094 | -107 1095 | ], 1096 | "size": { 1097 | "0": 210, 1098 | "1": 46 1099 | }, 1100 | "flags": {}, 1101 | "order": 24, 1102 | "mode": 0, 1103 | "inputs": [ 1104 | { 1105 | "name": "samples", 1106 | "type": "LATENT", 1107 | "link": 7 1108 | }, 1109 | { 1110 | "name": "vae", 1111 | "type": "VAE", 1112 | "link": 8 1113 | } 1114 | ], 1115 | "outputs": [ 1116 | { 1117 | "name": "IMAGE", 1118 | "type": "IMAGE", 1119 | "links": [ 1120 | 9 1121 | ], 1122 | "slot_index": 0 1123 | } 1124 | ], 1125 | "properties": { 1126 | "Node name for S&R": "VAEDecode" 1127 | } 1128 | }, 1129 | { 1130 | "id": 9, 1131 | "type": "SaveImage", 1132 | "pos": [ 1133 | 2264, 1134 | -364 1135 | ], 1136 | "size": { 1137 | "0": 479.31683349609375, 1138 | "1": 444.45648193359375 1139 | }, 1140 | "flags": {}, 1141 | "order": 26, 1142 | "mode": 0, 1143 | "inputs": [ 1144 | { 1145 | "name": "images", 1146 | "type": "IMAGE", 1147 | "link": 9 1148 | } 1149 | ], 1150 | "properties": { 1151 | "Node name for S&R": "SaveImage" 1152 | }, 1153 | "widgets_values": [ 1154 | "ComfyUI" 1155 | ] 1156 | }, 1157 | { 1158 | "id": 18, 1159 | "type": "SolidMask", 1160 | "pos": [ 1161 | -802, 1162 | 235 1163 | ], 1164 | "size": { 1165 | "0": 315, 1166 | "1": 106 1167 | }, 1168 | "flags": {}, 1169 | "order": 3, 1170 | "mode": 0, 1171 | "outputs": [ 1172 | { 1173 | "name": "MASK", 1174 | "type": "MASK", 1175 | "links": [ 1176 | 35, 1177 | 49 1178 | ], 1179 | "shape": 3, 1180 | "slot_index": 0 1181 | } 1182 | ], 1183 | "properties": { 1184 | "Node name for S&R": "SolidMask" 1185 | }, 1186 | "widgets_values": [ 1187 | 1, 1188 | 512, 1189 | 512 1190 | ] 1191 | }, 1192 | { 1193 | "id": 26, 1194 | "type": "SaveImage", 1195 | "pos": [ 1196 | 2244, 1197 | 382 1198 | ], 1199 | "size": { 1200 | "0": 479.31683349609375, 1201 | "1": 444.45648193359375 1202 | }, 1203 | "flags": {}, 1204 | "order": 18, 1205 | "mode": 0, 1206 | "inputs": [ 1207 | { 1208 | "name": "images", 1209 | "type": "IMAGE", 1210 | "link": 44 1211 | } 1212 | ], 1213 | "properties": { 1214 | "Node name for S&R": "SaveImage" 1215 | }, 1216 | "widgets_values": [ 1217 | "ComfyUI" 1218 | ] 1219 | } 1220 | ], 1221 | "links": [ 1222 | [ 1223 | 2, 1224 | 5, 1225 | 0, 1226 | 3, 1227 | 3, 1228 | "LATENT" 1229 | ], 1230 | [ 1231 | 5, 1232 | 4, 1233 | 1, 1234 | 7, 1235 | 0, 1236 | "CLIP" 1237 | ], 1238 | [ 1239 | 6, 1240 | 7, 1241 | 0, 1242 | 3, 1243 | 2, 1244 | "CONDITIONING" 1245 | ], 1246 | [ 1247 | 7, 1248 | 3, 1249 | 0, 1250 | 8, 1251 | 0, 1252 | "LATENT" 1253 | ], 1254 | [ 1255 | 8, 1256 | 4, 1257 | 2, 1258 | 8, 1259 | 1, 1260 | "VAE" 1261 | ], 1262 | [ 1263 | 9, 1264 | 8, 1265 | 0, 1266 | 9, 1267 | 0, 1268 | "IMAGE" 1269 | ], 1270 | [ 1271 | 14, 1272 | 12, 1273 | 0, 1274 | 11, 1275 | 1, 1276 | "CONDITIONING" 1277 | ], 1278 | [ 1279 | 15, 1280 | 15, 1281 | 0, 1282 | 14, 1283 | 0, 1284 | "IMAGE" 1285 | ], 1286 | [ 1287 | 16, 1288 | 14, 1289 | 0, 1290 | 11, 1291 | 2, 1292 | "MASK" 1293 | ], 1294 | [ 1295 | 18, 1296 | 17, 1297 | 0, 1298 | 16, 1299 | 1, 1300 | "CONDITIONING" 1301 | ], 1302 | [ 1303 | 20, 1304 | 11, 1305 | 0, 1306 | 16, 1307 | 0, 1308 | "MODEL" 1309 | ], 1310 | [ 1311 | 21, 1312 | 14, 1313 | 1, 1314 | 16, 1315 | 2, 1316 | "MASK" 1317 | ], 1318 | [ 1319 | 23, 1320 | 4, 1321 | 1, 1322 | 17, 1323 | 0, 1324 | "CLIP" 1325 | ], 1326 | [ 1327 | 24, 1328 | 4, 1329 | 1, 1330 | 12, 1331 | 0, 1332 | "CLIP" 1333 | ], 1334 | [ 1335 | 26, 1336 | 16, 1337 | 0, 1338 | 10, 1339 | 0, 1340 | "MODEL" 1341 | ], 1342 | [ 1343 | 27, 1344 | 10, 1345 | 0, 1346 | 3, 1347 | 0, 1348 | "MODEL" 1349 | ], 1350 | [ 1351 | 28, 1352 | 10, 1353 | 1, 1354 | 3, 1355 | 1, 1356 | "CONDITIONING" 1357 | ], 1358 | [ 1359 | 29, 1360 | 19, 1361 | 0, 1362 | 11, 1363 | 0, 1364 | "MODEL" 1365 | ], 1366 | [ 1367 | 30, 1368 | 4, 1369 | 0, 1370 | 19, 1371 | 0, 1372 | "MODEL" 1373 | ], 1374 | [ 1375 | 31, 1376 | 4, 1377 | 1, 1378 | 20, 1379 | 0, 1380 | "CLIP" 1381 | ], 1382 | [ 1383 | 33, 1384 | 20, 1385 | 0, 1386 | 19, 1387 | 1, 1388 | "CONDITIONING" 1389 | ], 1390 | [ 1391 | 35, 1392 | 18, 1393 | 0, 1394 | 19, 1395 | 2, 1396 | "MASK" 1397 | ], 1398 | [ 1399 | 40, 1400 | 7, 1401 | 0, 1402 | 24, 1403 | 2, 1404 | "CONDITIONING" 1405 | ], 1406 | [ 1407 | 41, 1408 | 5, 1409 | 0, 1410 | 24, 1411 | 3, 1412 | "LATENT" 1413 | ], 1414 | [ 1415 | 42, 1416 | 24, 1417 | 0, 1418 | 25, 1419 | 0, 1420 | "LATENT" 1421 | ], 1422 | [ 1423 | 43, 1424 | 4, 1425 | 2, 1426 | 25, 1427 | 1, 1428 | "VAE" 1429 | ], 1430 | [ 1431 | 44, 1432 | 25, 1433 | 0, 1434 | 26, 1435 | 0, 1436 | "IMAGE" 1437 | ], 1438 | [ 1439 | 45, 1440 | 20, 1441 | 0, 1442 | 24, 1443 | 1, 1444 | "CONDITIONING" 1445 | ], 1446 | [ 1447 | 47, 1448 | 4, 1449 | 0, 1450 | 24, 1451 | 0, 1452 | "MODEL" 1453 | ], 1454 | [ 1455 | 48, 1456 | 20, 1457 | 0, 1458 | 27, 1459 | 0, 1460 | "CONDITIONING" 1461 | ], 1462 | [ 1463 | 49, 1464 | 18, 1465 | 0, 1466 | 27, 1467 | 1, 1468 | "MASK" 1469 | ], 1470 | [ 1471 | 50, 1472 | 12, 1473 | 0, 1474 | 28, 1475 | 0, 1476 | "CONDITIONING" 1477 | ], 1478 | [ 1479 | 52, 1480 | 17, 1481 | 0, 1482 | 29, 1483 | 0, 1484 | "CONDITIONING" 1485 | ], 1486 | [ 1487 | 53, 1488 | 14, 1489 | 1, 1490 | 29, 1491 | 1, 1492 | "MASK" 1493 | ], 1494 | [ 1495 | 58, 1496 | 4, 1497 | 0, 1498 | 32, 1499 | 0, 1500 | "MODEL" 1501 | ], 1502 | [ 1503 | 60, 1504 | 7, 1505 | 0, 1506 | 32, 1507 | 2, 1508 | "CONDITIONING" 1509 | ], 1510 | [ 1511 | 61, 1512 | 5, 1513 | 0, 1514 | 32, 1515 | 3, 1516 | "LATENT" 1517 | ], 1518 | [ 1519 | 62, 1520 | 32, 1521 | 0, 1522 | 33, 1523 | 0, 1524 | "LATENT" 1525 | ], 1526 | [ 1527 | 63, 1528 | 4, 1529 | 2, 1530 | 33, 1531 | 1, 1532 | "VAE" 1533 | ], 1534 | [ 1535 | 64, 1536 | 33, 1537 | 0, 1538 | 34, 1539 | 0, 1540 | "IMAGE" 1541 | ], 1542 | [ 1543 | 72, 1544 | 14, 1545 | 0, 1546 | 28, 1547 | 1, 1548 | "MASK" 1549 | ], 1550 | [ 1551 | 75, 1552 | 35, 1553 | 0, 1554 | 36, 1555 | 0, 1556 | "CONDITIONING" 1557 | ], 1558 | [ 1559 | 76, 1560 | 28, 1561 | 0, 1562 | 35, 1563 | 1, 1564 | "CONDITIONING" 1565 | ], 1566 | [ 1567 | 78, 1568 | 29, 1569 | 0, 1570 | 36, 1571 | 1, 1572 | "CONDITIONING" 1573 | ], 1574 | [ 1575 | 82, 1576 | 27, 1577 | 0, 1578 | 35, 1579 | 0, 1580 | "CONDITIONING" 1581 | ], 1582 | [ 1583 | 84, 1584 | 36, 1585 | 0, 1586 | 32, 1587 | 1, 1588 | "CONDITIONING" 1589 | ] 1590 | ], 1591 | "groups": [ 1592 | { 1593 | "title": "ComfyUI Area Cond", 1594 | "bounding": [ 1595 | -325, 1596 | -1195, 1597 | 3052, 1598 | 596 1599 | ], 1600 | "color": "#3f789e", 1601 | "font_size": 24 1602 | }, 1603 | { 1604 | "title": "Omost DenseDiffusion", 1605 | "bounding": [ 1606 | -318, 1607 | -485, 1608 | 3077, 1609 | 651 1610 | ], 1611 | "color": "#b06634", 1612 | "font_size": 24 1613 | } 1614 | ], 1615 | "config": {}, 1616 | "extra": { 1617 | "ds": { 1618 | "scale": 0.6303940863128572, 1619 | "offset": [ 1620 | 772.6396882786081, 1621 | 1198.3530998374802 1622 | ] 1623 | }, 1624 | "info": { 1625 | "name": "workflow", 1626 | "author": "", 1627 | "description": "", 1628 | "version": "1", 1629 | "created": "2024-06-05T16:06:42.169Z", 1630 | "modified": "2024-06-06T03:05:01.076Z", 1631 | "software": "ComfyUI" 1632 | } 1633 | }, 1634 | "version": 0.4 1635 | } -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "comfyui_densediffusion" 3 | description = "Regional prompt with attention score manipulation" 4 | version = "1.0.2" 5 | license = { file = "LICENSE" } 6 | 7 | [project.urls] 8 | Repository = "https://github.com/huchenlei/ComfyUI_densediffusion" 9 | # Used by Comfy Registry https://comfyregistry.org 10 | 11 | [tool.comfy] 12 | PublisherId = "huchenlei" 13 | DisplayName = "ComfyUI_densediffusion" 14 | Icon = "" 15 | --------------------------------------------------------------------------------