├── .github
    └── workflows
    │   └── publish.yml
├── LICENSE
├── README.md
├── __init__.py
├── assets
    ├── comfyui_i2mv.png
    ├── comfyui_i2mv_lora.png
    ├── comfyui_i2mv_view_selector.png
    ├── comfyui_ldm_vae.png
    ├── comfyui_model_makeup.png
    ├── comfyui_t2mv.png
    ├── comfyui_t2mv_controlnet.png
    ├── comfyui_t2mv_lora.png
    └── demo
    │   └── scribbles
    │       ├── scribble_0.png
    │       ├── scribble_1.png
    │       ├── scribble_2.png
    │       ├── scribble_3.png
    │       ├── scribble_4.png
    │       └── scribble_5.png
├── cache
    └── stable-diffusion-v1-inference.yaml
├── mvadapter
    ├── __init__.py
    ├── loaders
    │   ├── __init__.py
    │   └── custom_adapter.py
    ├── models
    │   ├── __init__.py
    │   └── attention_processor.py
    ├── pipelines
    │   ├── pipeline_mvadapter_i2mv_sd.py
    │   ├── pipeline_mvadapter_i2mv_sdxl.py
    │   ├── pipeline_mvadapter_t2mv_sd.py
    │   └── pipeline_mvadapter_t2mv_sdxl.py
    ├── schedulers
    │   ├── scheduler_utils.py
    │   └── scheduling_shift_snr.py
    └── utils
    │   ├── __init__.py
    │   ├── camera.py
    │   ├── geometry.py
    │   └── saving.py
├── nodes.py
├── pyproject.toml
├── requirements.txt
├── utils.py
└── workflows
    ├── i2mv_sdxl_diffusers.json
    ├── i2mv_sdxl_ldm.json
    ├── i2mv_sdxl_ldm_lora.json
    ├── i2mv_sdxl_ldm_view_selector.json
    ├── t2mv_sdxl_diffusers.json
    ├── t2mv_sdxl_ldm.json
    ├── t2mv_sdxl_ldm_controlnet.json
    └── t2mv_sdxl_ldm_lora.json


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to Comfy registry
 2 | on:
 3 |   workflow_dispatch:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       - "pyproject.toml"
 9 | 
10 | permissions:
11 |   issues: write
12 | 
13 | jobs:
14 |   publish-node:
15 |     name: Publish Custom Node to registry
16 |     runs-on: ubuntu-latest
17 |     if: ${{ github.repository_owner == 'huanngzh' }}
18 |     steps:
19 |       - name: Check out code
20 |         uses: actions/checkout@v4
21 |       - name: Publish Custom Node
22 |         uses: Comfy-Org/publish-node-action@v1
23 |         with:
24 |           ## Add your own personal access token to your Github Repository secrets and reference it here.
25 |           personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ComfyUI-MVAdapter
 2 | 
 3 | This extension integrates [MV-Adapter](https://github.com/huanngzh/MV-Adapter) into ComfyUI, allowing users to generate multi-view consistent images from text prompts or single images directly within the ComfyUI interface.
 4 | 
 5 | ## 🔥 Feature Updates
 6 | 
 7 | * [2025-01-15] Support selection of generated perspectives, such as generating only 2 views (front&back) [See [here](#view-selection)]
 8 | * [2024-12-25] Support integration with ControlNet, for applications like scribble to multi-view images [See [here](#with-controlnet)]
 9 | * [2024-12-09] Support integration with SDXL LoRA [See [here](#with-lora)]
10 | * [2024-12-02] Generate multi-view consistent images from text prompts or a single image
11 | 
12 | ## Installation
13 | 
14 | ### From Source
15 | 
16 | * Clone or download this repository into your `ComfyUI/custom_nodes/` directory.
17 | * Install the required dependencies by running `pip install -r requirements.txt`.
18 | 
19 | ## Notes
20 | 
21 | ### Workflows
22 | 
23 | We provide the example workflows in `workflows` directory.
24 | 
25 | Note that our code depends on diffusers, and will automatically download the model weights from huggingface to the hf cache path at the first time. The `ckpt_name` in the node corresponds to the model name in huggingface, such as `stabilityai/stable-diffusion-xl-base-1.0`.
26 | 
27 | We also provide the nodes `Ldm**Loader` to support loading text-to-image models in `ldm` format. Please see the workflow files with the suffix `_ldm.json`.
28 | 
29 | ### GPU Memory
30 | 
31 | If your GPU resources are limited, we recommend using the following configuration:
32 | 
33 | * Use [madebyollin/sdxl-vae-fp16-fix](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix) as VAE. If using ldm-format pipeline, remember to set `upcast_fp32` to `False`.
34 | 
35 | ![upcast_fp32_to_false](assets/comfyui_ldm_vae.png)
36 | 
37 | * Set `enable_vae_slicing` in the Diffusers Model Makeup node to `True`.
38 | 
39 | ![enable_vae_slicing](assets/comfyui_model_makeup.png)
40 | 
41 | However, since SDXL is used as the base model, it still requires about 13G to 14G GPU memory.
42 | 
43 | ## Usage
44 | 
45 | ### Text to Multi-view Images
46 | 
47 | #### With SDXL or other base models
48 | 
49 | ![comfyui_t2mv](assets/comfyui_t2mv.png)
50 | 
51 | * `workflows/t2mv_sdxl_diffusers.json` for loading diffusers-format models
52 | * `workflows/t2mv_sdxl_ldm.json` for loading ldm-format models
53 | 
54 | #### With LoRA
55 | 
56 | ![comfyui_t2mv_lora](assets/comfyui_t2mv_lora.png)
57 | 
58 | `workflows/t2mv_sdxl_ldm_lora.json` for loading ldm-format models with LoRA for text-to-multi-view generation
59 | 
60 | #### With ControlNet
61 | 
62 | ![comfyui_t2mv_controlnet](assets/comfyui_t2mv_controlnet.png)
63 | 
64 | `workflows/t2mv_sdxl_ldm_controlnet.json` for loading diffusers-format controlnets for text-scribble-to-multi-view generation
65 | 
66 | ### Image to Multi-view Images
67 | 
68 | #### With SDXL or other base models
69 | 
70 | ![comfyui_i2mv](assets/comfyui_i2mv.png)
71 | 
72 | * `workflows/i2mv_sdxl_diffusers.json` for loading diffusers-format models
73 | * `workflows/i2mv_sdxl_ldm.json` for loading ldm-format models
74 | 
75 | #### With LoRA
76 | 
77 | ![comfyui_i2mv_lora](assets/comfyui_i2mv_lora.png)
78 | 
79 | `workflows/i2mv_sdxl_ldm_lora.json` for loading ldm-format models with LoRA for image-to-multi-view generation
80 | 
81 | #### View Selection
82 | 
83 | ![comfyui_i2mv_pair_views](assets/comfyui_i2mv_view_selector.png)
84 | 
85 | `workflows/i2mv_sdxl_ldm_view_selector.json` for loading ldm-format models and selecting specific views to generate
86 | 
87 | The key is to replace the `adapter_name` in `Diffusers Model Makeup` with `mvadapter_i2mv_sdxl_beta.safetensors`, and add a `View Selector` node to choose which views you want to generate. After a rough test, the beta model is better at generating 2 views (front&back), 3 views (front&right&back), 4 views (front&right&back&left). Note that the attribute `num_views` is not used and can be ignored.
88 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from .nodes import *
2 | 
3 | __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
4 | 


--------------------------------------------------------------------------------
/assets/comfyui_i2mv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/comfyui_i2mv.png


--------------------------------------------------------------------------------
/assets/comfyui_i2mv_lora.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/comfyui_i2mv_lora.png


--------------------------------------------------------------------------------
/assets/comfyui_i2mv_view_selector.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/comfyui_i2mv_view_selector.png


--------------------------------------------------------------------------------
/assets/comfyui_ldm_vae.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/comfyui_ldm_vae.png


--------------------------------------------------------------------------------
/assets/comfyui_model_makeup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/comfyui_model_makeup.png


--------------------------------------------------------------------------------
/assets/comfyui_t2mv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/comfyui_t2mv.png


--------------------------------------------------------------------------------
/assets/comfyui_t2mv_controlnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/comfyui_t2mv_controlnet.png


--------------------------------------------------------------------------------
/assets/comfyui_t2mv_lora.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/comfyui_t2mv_lora.png


--------------------------------------------------------------------------------
/assets/demo/scribbles/scribble_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/demo/scribbles/scribble_0.png


--------------------------------------------------------------------------------
/assets/demo/scribbles/scribble_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/demo/scribbles/scribble_1.png


--------------------------------------------------------------------------------
/assets/demo/scribbles/scribble_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/demo/scribbles/scribble_2.png


--------------------------------------------------------------------------------
/assets/demo/scribbles/scribble_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/demo/scribbles/scribble_3.png


--------------------------------------------------------------------------------
/assets/demo/scribbles/scribble_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/demo/scribbles/scribble_4.png


--------------------------------------------------------------------------------
/assets/demo/scribbles/scribble_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/assets/demo/scribbles/scribble_5.png


--------------------------------------------------------------------------------
/cache/stable-diffusion-v1-inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-04
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "jpg"
11 |     cond_stage_key: "txt"
12 |     image_size: 64
13 |     channels: 4
14 |     cond_stage_trainable: false   # Note: different from the one we trained before
15 |     conditioning_key: crossattn
16 |     monitor: val/loss_simple_ema
17 |     scale_factor: 0.18215
18 |     use_ema: False
19 | 
20 |     scheduler_config: # 10000 warmup steps
21 |       target: ldm.lr_scheduler.LambdaLinearScheduler
22 |       params:
23 |         warm_up_steps: [ 10000 ]
24 |         cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25 |         f_start: [ 1.e-6 ]
26 |         f_max: [ 1. ]
27 |         f_min: [ 1. ]
28 | 
29 |     unet_config:
30 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31 |       params:
32 |         image_size: 32 # unused
33 |         in_channels: 4
34 |         out_channels: 4
35 |         model_channels: 320
36 |         attention_resolutions: [ 4, 2, 1 ]
37 |         num_res_blocks: 2
38 |         channel_mult: [ 1, 2, 4, 4 ]
39 |         num_heads: 8
40 |         use_spatial_transformer: True
41 |         transformer_depth: 1
42 |         context_dim: 768
43 |         use_checkpoint: True
44 |         legacy: False
45 | 
46 |     first_stage_config:
47 |       target: ldm.models.autoencoder.AutoencoderKL
48 |       params:
49 |         embed_dim: 4
50 |         monitor: val/rec_loss
51 |         ddconfig:
52 |           double_z: true
53 |           z_channels: 4
54 |           resolution: 256
55 |           in_channels: 3
56 |           out_ch: 3
57 |           ch: 128
58 |           ch_mult:
59 |           - 1
60 |           - 2
61 |           - 4
62 |           - 4
63 |           num_res_blocks: 2
64 |           attn_resolutions: []
65 |           dropout: 0.0
66 |         lossconfig:
67 |           target: torch.nn.Identity
68 | 
69 |     cond_stage_config:
70 |       target: ldm.modules.encoders.modules.FrozenCLIPEmbedder


--------------------------------------------------------------------------------
/mvadapter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/mvadapter/__init__.py


--------------------------------------------------------------------------------
/mvadapter/loaders/__init__.py:
--------------------------------------------------------------------------------
1 | from .custom_adapter import CustomAdapterMixin
2 | 


--------------------------------------------------------------------------------
/mvadapter/loaders/custom_adapter.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Dict, Optional, Union
 3 | 
 4 | import safetensors
 5 | import torch
 6 | from diffusers.utils import _get_model_file, logging
 7 | from safetensors import safe_open
 8 | 
 9 | logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
10 | 
11 | 
12 | class CustomAdapterMixin:
13 |     def init_custom_adapter(self, *args, **kwargs):
14 |         self._init_custom_adapter(*args, **kwargs)
15 | 
16 |     def _init_custom_adapter(self, *args, **kwargs):
17 |         raise NotImplementedError
18 | 
19 |     def load_custom_adapter(
20 |         self,
21 |         pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]],
22 |         weight_name: str,
23 |         subfolder: Optional[str] = None,
24 |         **kwargs,
25 |     ):
26 |         # Load the main state dict first.
27 |         cache_dir = kwargs.pop("cache_dir", None)
28 |         force_download = kwargs.pop("force_download", False)
29 |         proxies = kwargs.pop("proxies", None)
30 |         local_files_only = kwargs.pop("local_files_only", None)
31 |         token = kwargs.pop("token", None)
32 |         revision = kwargs.pop("revision", None)
33 | 
34 |         user_agent = {
35 |             "file_type": "attn_procs_weights",
36 |             "framework": "pytorch",
37 |         }
38 | 
39 |         if not isinstance(pretrained_model_name_or_path_or_dict, dict):
40 |             model_file = _get_model_file(
41 |                 pretrained_model_name_or_path_or_dict,
42 |                 weights_name=weight_name,
43 |                 subfolder=subfolder,
44 |                 cache_dir=cache_dir,
45 |                 force_download=force_download,
46 |                 proxies=proxies,
47 |                 local_files_only=local_files_only,
48 |                 token=token,
49 |                 revision=revision,
50 |                 user_agent=user_agent,
51 |             )
52 |             if weight_name.endswith(".safetensors"):
53 |                 state_dict = {}
54 |                 with safe_open(model_file, framework="pt", device="cpu") as f:
55 |                     for key in f.keys():
56 |                         state_dict[key] = f.get_tensor(key)
57 |             else:
58 |                 state_dict = torch.load(model_file, map_location="cpu")
59 |         else:
60 |             state_dict = pretrained_model_name_or_path_or_dict
61 | 
62 |         self._load_custom_adapter(state_dict)
63 | 
64 |     def _load_custom_adapter(self, state_dict):
65 |         raise NotImplementedError
66 | 
67 |     def save_custom_adapter(
68 |         self,
69 |         save_directory: Union[str, os.PathLike],
70 |         weight_name: str,
71 |         safe_serialization: bool = False,
72 |         **kwargs,
73 |     ):
74 |         if os.path.isfile(save_directory):
75 |             logger.error(
76 |                 f"Provided path ({save_directory}) should be a directory, not a file"
77 |             )
78 |             return
79 | 
80 |         if safe_serialization:
81 | 
82 |             def save_function(weights, filename):
83 |                 return safetensors.torch.save_file(
84 |                     weights, filename, metadata={"format": "pt"}
85 |                 )
86 | 
87 |         else:
88 |             save_function = torch.save
89 | 
90 |         # Save the model
91 |         state_dict = self._save_custom_adapter(**kwargs)
92 |         save_function(state_dict, os.path.join(save_directory, weight_name))
93 |         logger.info(
94 |             f"Custom adapter weights saved in {os.path.join(save_directory, weight_name)}"
95 |         )
96 | 
97 |     def _save_custom_adapter(self):
98 |         raise NotImplementedError
99 | 


--------------------------------------------------------------------------------
/mvadapter/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huanngzh/ComfyUI-MVAdapter/df94381ecb6db47d31748a229065d553c090775e/mvadapter/models/__init__.py


--------------------------------------------------------------------------------
/mvadapter/models/attention_processor.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import Callable, List, Optional, Union
  3 | 
  4 | import torch
  5 | import torch.nn.functional as F
  6 | from diffusers.models.attention_processor import Attention
  7 | from diffusers.models.unets import UNet2DConditionModel
  8 | from diffusers.utils import deprecate, logging
  9 | from diffusers.utils.import_utils import is_torch_npu_available, is_xformers_available
 10 | from einops import rearrange
 11 | from torch import nn
 12 | 
 13 | 
 14 | def default_set_attn_proc_func(
 15 |     name: str,
 16 |     hidden_size: int,
 17 |     cross_attention_dim: Optional[int],
 18 |     ori_attn_proc: object,
 19 | ) -> object:
 20 |     return ori_attn_proc
 21 | 
 22 | 
 23 | def set_unet_2d_condition_attn_processor(
 24 |     unet: UNet2DConditionModel,
 25 |     set_self_attn_proc_func: Callable = default_set_attn_proc_func,
 26 |     set_cross_attn_proc_func: Callable = default_set_attn_proc_func,
 27 |     set_custom_attn_proc_func: Callable = default_set_attn_proc_func,
 28 |     set_self_attn_module_names: Optional[List[str]] = None,
 29 |     set_cross_attn_module_names: Optional[List[str]] = None,
 30 |     set_custom_attn_module_names: Optional[List[str]] = None,
 31 | ) -> None:
 32 |     do_set_processor = lambda name, module_names: (
 33 |         any([name.startswith(module_name) for module_name in module_names])
 34 |         if module_names is not None
 35 |         else True
 36 |     )  # prefix match
 37 | 
 38 |     attn_procs = {}
 39 |     for name, attn_processor in unet.attn_processors.items():
 40 |         # set attn_processor by default, if module_names is None
 41 |         set_self_attn_processor = do_set_processor(name, set_self_attn_module_names)
 42 |         set_cross_attn_processor = do_set_processor(name, set_cross_attn_module_names)
 43 |         set_custom_attn_processor = do_set_processor(name, set_custom_attn_module_names)
 44 | 
 45 |         if name.startswith("mid_block"):
 46 |             hidden_size = unet.config.block_out_channels[-1]
 47 |         elif name.startswith("up_blocks"):
 48 |             block_id = int(name[len("up_blocks.")])
 49 |             hidden_size = list(reversed(unet.config.block_out_channels))[block_id]
 50 |         elif name.startswith("down_blocks"):
 51 |             block_id = int(name[len("down_blocks.")])
 52 |             hidden_size = unet.config.block_out_channels[block_id]
 53 | 
 54 |         is_custom = "attn_mid_blocks" in name or "attn_post_blocks" in name
 55 |         if is_custom:
 56 |             attn_procs[name] = (
 57 |                 set_custom_attn_proc_func(name, hidden_size, None, attn_processor)
 58 |                 if set_custom_attn_processor
 59 |                 else attn_processor
 60 |             )
 61 |         else:
 62 |             cross_attention_dim = (
 63 |                 None
 64 |                 if name.endswith("attn1.processor")
 65 |                 else unet.config.cross_attention_dim
 66 |             )
 67 |             if cross_attention_dim is None or "motion_modules" in name:
 68 |                 # self attention
 69 |                 attn_procs[name] = (
 70 |                     set_self_attn_proc_func(
 71 |                         name, hidden_size, cross_attention_dim, attn_processor
 72 |                     )
 73 |                     if set_self_attn_processor
 74 |                     else attn_processor
 75 |                 )
 76 |             else:
 77 |                 # cross attention
 78 |                 attn_procs[name] = (
 79 |                     set_cross_attn_proc_func(
 80 |                         name, hidden_size, cross_attention_dim, attn_processor
 81 |                     )
 82 |                     if set_cross_attn_processor
 83 |                     else attn_processor
 84 |                 )
 85 | 
 86 |     unet.set_attn_processor(attn_procs)
 87 | 
 88 | 
 89 | class DecoupledMVRowSelfAttnProcessor2_0(torch.nn.Module):
 90 |     r"""
 91 |     Attention processor for Decoupled Row-wise Self-Attention and Image Cross-Attention for PyTorch 2.0.
 92 |     """
 93 | 
 94 |     def __init__(
 95 |         self,
 96 |         query_dim: int,
 97 |         inner_dim: int,
 98 |         num_views: int = 1,
 99 |         name: Optional[str] = None,
100 |         use_mv: bool = True,
101 |         use_ref: bool = False,
102 |     ):
103 |         if not hasattr(F, "scaled_dot_product_attention"):
104 |             raise ImportError(
105 |                 "DecoupledMVRowSelfAttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0."
106 |             )
107 | 
108 |         super().__init__()
109 | 
110 |         self.num_views = num_views
111 |         self.name = name  # NOTE: need for image cross-attention
112 |         self.use_mv = use_mv
113 |         self.use_ref = use_ref
114 | 
115 |         if self.use_mv:
116 |             self.to_q_mv = nn.Linear(
117 |                 in_features=query_dim, out_features=inner_dim, bias=False
118 |             )
119 |             self.to_k_mv = nn.Linear(
120 |                 in_features=query_dim, out_features=inner_dim, bias=False
121 |             )
122 |             self.to_v_mv = nn.Linear(
123 |                 in_features=query_dim, out_features=inner_dim, bias=False
124 |             )
125 |             self.to_out_mv = nn.ModuleList(
126 |                 [
127 |                     nn.Linear(in_features=inner_dim, out_features=query_dim, bias=True),
128 |                     nn.Dropout(0.0),
129 |                 ]
130 |             )
131 | 
132 |         if self.use_ref:
133 |             self.to_q_ref = nn.Linear(
134 |                 in_features=query_dim, out_features=inner_dim, bias=False
135 |             )
136 |             self.to_k_ref = nn.Linear(
137 |                 in_features=query_dim, out_features=inner_dim, bias=False
138 |             )
139 |             self.to_v_ref = nn.Linear(
140 |                 in_features=query_dim, out_features=inner_dim, bias=False
141 |             )
142 |             self.to_out_ref = nn.ModuleList(
143 |                 [
144 |                     nn.Linear(in_features=inner_dim, out_features=query_dim, bias=True),
145 |                     nn.Dropout(0.0),
146 |                 ]
147 |             )
148 | 
149 |     def __call__(
150 |         self,
151 |         attn: Attention,
152 |         hidden_states: torch.FloatTensor,
153 |         encoder_hidden_states: Optional[torch.FloatTensor] = None,
154 |         attention_mask: Optional[torch.FloatTensor] = None,
155 |         temb: Optional[torch.FloatTensor] = None,
156 |         mv_scale: float = 1.0,
157 |         ref_hidden_states: Optional[torch.FloatTensor] = None,
158 |         ref_scale: float = 1.0,
159 |         cache_hidden_states: Optional[List[torch.FloatTensor]] = None,
160 |         use_mv: bool = True,
161 |         use_ref: bool = True,
162 |         num_views: Optional[int] = None,
163 |         *args,
164 |         **kwargs,
165 |     ) -> torch.FloatTensor:
166 |         """
167 |         New args:
168 |             mv_scale (float): scale for multi-view self-attention.
169 |             ref_hidden_states (torch.FloatTensor): reference encoder hidden states for image cross-attention.
170 |             ref_scale (float): scale for image cross-attention.
171 |             cache_hidden_states (List[torch.FloatTensor]): cache hidden states from reference unet.
172 | 
173 |         """
174 |         if len(args) > 0 or kwargs.get("scale", None) is not None:
175 |             deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`."
176 |             deprecate("scale", "1.0.0", deprecation_message)
177 | 
178 |         if num_views is not None:
179 |             self.num_views = num_views
180 | 
181 |         # NEW: cache hidden states for reference unet
182 |         if cache_hidden_states is not None:
183 |             cache_hidden_states[self.name] = hidden_states.clone()
184 | 
185 |         # NEW: whether to use multi-view attention and image cross-attention
186 |         use_mv = self.use_mv and use_mv
187 |         use_ref = self.use_ref and use_ref
188 | 
189 |         residual = hidden_states
190 |         if attn.spatial_norm is not None:
191 |             hidden_states = attn.spatial_norm(hidden_states, temb)
192 | 
193 |         input_ndim = hidden_states.ndim
194 | 
195 |         if input_ndim == 4:
196 |             batch_size, channel, height, width = hidden_states.shape
197 |             hidden_states = hidden_states.view(
198 |                 batch_size, channel, height * width
199 |             ).transpose(1, 2)
200 | 
201 |         batch_size, sequence_length, _ = (
202 |             hidden_states.shape
203 |             if encoder_hidden_states is None
204 |             else encoder_hidden_states.shape
205 |         )
206 | 
207 |         if attention_mask is not None:
208 |             attention_mask = attn.prepare_attention_mask(
209 |                 attention_mask, sequence_length, batch_size
210 |             )
211 |             # scaled_dot_product_attention expects attention_mask shape to be
212 |             # (batch, heads, source_length, target_length)
213 |             attention_mask = attention_mask.view(
214 |                 batch_size, attn.heads, -1, attention_mask.shape[-1]
215 |             )
216 | 
217 |         if attn.group_norm is not None:
218 |             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(
219 |                 1, 2
220 |             )
221 | 
222 |         query = attn.to_q(hidden_states)
223 | 
224 |         # NEW: for decoupled multi-view attention
225 |         if use_mv:
226 |             query_mv = self.to_q_mv(hidden_states)
227 | 
228 |         # NEW: for decoupled reference cross attention
229 |         if use_ref:
230 |             query_ref = self.to_q_ref(hidden_states)
231 | 
232 |         if encoder_hidden_states is None:
233 |             encoder_hidden_states = hidden_states
234 |         elif attn.norm_cross:
235 |             encoder_hidden_states = attn.norm_encoder_hidden_states(
236 |                 encoder_hidden_states
237 |             )
238 | 
239 |         key = attn.to_k(encoder_hidden_states)
240 |         value = attn.to_v(encoder_hidden_states)
241 | 
242 |         inner_dim = key.shape[-1]
243 |         head_dim = inner_dim // attn.heads
244 | 
245 |         query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
246 | 
247 |         key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
248 |         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
249 | 
250 |         # the output of sdp = (batch, num_heads, seq_len, head_dim)
251 |         # TODO: add support for attn.scale when we move to Torch 2.1
252 |         hidden_states = F.scaled_dot_product_attention(
253 |             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
254 |         )
255 | 
256 |         hidden_states = hidden_states.transpose(1, 2).reshape(
257 |             batch_size, -1, attn.heads * head_dim
258 |         )
259 |         hidden_states = hidden_states.to(query.dtype)
260 | 
261 |         ####### Decoupled multi-view self-attention ########
262 |         if use_mv:
263 |             key_mv = self.to_k_mv(encoder_hidden_states)
264 |             value_mv = self.to_v_mv(encoder_hidden_states)
265 | 
266 |             query_mv = query_mv.view(batch_size, -1, attn.heads, head_dim)
267 |             key_mv = key_mv.view(batch_size, -1, attn.heads, head_dim)
268 |             value_mv = value_mv.view(batch_size, -1, attn.heads, head_dim)
269 | 
270 |             height = width = math.isqrt(sequence_length)
271 | 
272 |             # row self-attention
273 |             query_mv = rearrange(
274 |                 query_mv,
275 |                 "(b nv) (ih iw) h c -> (b nv ih) iw h c",
276 |                 nv=self.num_views,
277 |                 ih=height,
278 |                 iw=width,
279 |             ).transpose(1, 2)
280 |             key_mv = rearrange(
281 |                 key_mv,
282 |                 "(b nv) (ih iw) h c -> b ih (nv iw) h c",
283 |                 nv=self.num_views,
284 |                 ih=height,
285 |                 iw=width,
286 |             )
287 |             key_mv = (
288 |                 key_mv.repeat_interleave(self.num_views, dim=0)
289 |                 .view(batch_size * height, -1, attn.heads, head_dim)
290 |                 .transpose(1, 2)
291 |             )
292 |             value_mv = rearrange(
293 |                 value_mv,
294 |                 "(b nv) (ih iw) h c -> b ih (nv iw) h c",
295 |                 nv=self.num_views,
296 |                 ih=height,
297 |                 iw=width,
298 |             )
299 |             value_mv = (
300 |                 value_mv.repeat_interleave(self.num_views, dim=0)
301 |                 .view(batch_size * height, -1, attn.heads, head_dim)
302 |                 .transpose(1, 2)
303 |             )
304 | 
305 |             hidden_states_mv = F.scaled_dot_product_attention(
306 |                 query_mv,
307 |                 key_mv,
308 |                 value_mv,
309 |                 dropout_p=0.0,
310 |                 is_causal=False,
311 |             )
312 |             hidden_states_mv = rearrange(
313 |                 hidden_states_mv,
314 |                 "(b nv ih) h iw c -> (b nv) (ih iw) (h c)",
315 |                 nv=self.num_views,
316 |                 ih=height,
317 |             )
318 |             hidden_states_mv = hidden_states_mv.to(query.dtype)
319 | 
320 |             # linear proj
321 |             hidden_states_mv = self.to_out_mv[0](hidden_states_mv)
322 |             # dropout
323 |             hidden_states_mv = self.to_out_mv[1](hidden_states_mv)
324 | 
325 |         if use_ref:
326 |             reference_hidden_states = ref_hidden_states[self.name]
327 | 
328 |             key_ref = self.to_k_ref(reference_hidden_states)
329 |             value_ref = self.to_v_ref(reference_hidden_states)
330 | 
331 |             query_ref = query_ref.view(batch_size, -1, attn.heads, head_dim).transpose(
332 |                 1, 2
333 |             )
334 |             key_ref = key_ref.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
335 |             value_ref = value_ref.view(batch_size, -1, attn.heads, head_dim).transpose(
336 |                 1, 2
337 |             )
338 | 
339 |             hidden_states_ref = F.scaled_dot_product_attention(
340 |                 query_ref, key_ref, value_ref, dropout_p=0.0, is_causal=False
341 |             )
342 | 
343 |             hidden_states_ref = hidden_states_ref.transpose(1, 2).reshape(
344 |                 batch_size, -1, attn.heads * head_dim
345 |             )
346 |             hidden_states_ref = hidden_states_ref.to(query.dtype)
347 | 
348 |             # linear proj
349 |             hidden_states_ref = self.to_out_ref[0](hidden_states_ref)
350 |             # dropout
351 |             hidden_states_ref = self.to_out_ref[1](hidden_states_ref)
352 | 
353 |         # linear proj
354 |         hidden_states = attn.to_out[0](hidden_states)
355 |         # dropout
356 |         hidden_states = attn.to_out[1](hidden_states)
357 | 
358 |         if use_mv:
359 |             hidden_states = hidden_states + hidden_states_mv * mv_scale
360 | 
361 |         if use_ref:
362 |             hidden_states = hidden_states + hidden_states_ref * ref_scale
363 | 
364 |         if input_ndim == 4:
365 |             hidden_states = hidden_states.transpose(-1, -2).reshape(
366 |                 batch_size, channel, height, width
367 |             )
368 | 
369 |         if attn.residual_connection:
370 |             hidden_states = hidden_states + residual
371 | 
372 |         hidden_states = hidden_states / attn.rescale_output_factor
373 | 
374 |         return hidden_states
375 | 
376 |     def set_num_views(self, num_views: int) -> None:
377 |         self.num_views = num_views
378 | 


--------------------------------------------------------------------------------
/mvadapter/schedulers/scheduler_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def get_sigmas(noise_scheduler, timesteps, n_dim=4, dtype=torch.float32, device=None):
 5 |     sigmas = noise_scheduler.sigmas.to(device=device, dtype=dtype)
 6 |     schedule_timesteps = noise_scheduler.timesteps.to(device)
 7 |     timesteps = timesteps.to(device)
 8 |     step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
 9 |     sigma = sigmas[step_indices].flatten()
10 |     while len(sigma.shape) < n_dim:
11 |         sigma = sigma.unsqueeze(-1)
12 |     return sigma
13 | 
14 | 
15 | def SNR_to_betas(snr):
16 |     """
17 |     Converts SNR to betas
18 |     """
19 |     # alphas_cumprod = pass
20 |     # snr = (alpha / ) ** 2
21 |     # alpha_t^2 / (1 - alpha_t^2) = snr
22 |     alpha_t = (snr / (1 + snr)) ** 0.5
23 |     alphas_cumprod = alpha_t**2
24 |     alphas = alphas_cumprod / torch.cat(
25 |         [torch.ones(1, device=snr.device), alphas_cumprod[:-1]]
26 |     )
27 |     betas = 1 - alphas
28 |     return betas
29 | 
30 | 
31 | def compute_snr(timesteps, noise_scheduler):
32 |     """
33 |     Computes SNR as per Min-SNR-Diffusion-Training/guided_diffusion/gaussian_diffusion.py at 521b624bd70c67cee4bdf49225915f5
34 |     """
35 |     alphas_cumprod = noise_scheduler.alphas_cumprod
36 |     sqrt_alphas_cumprod = alphas_cumprod**0.5
37 |     sqrt_one_minus_alphas_cumprod = (1.0 - alphas_cumprod) ** 0.5
38 | 
39 |     # Expand the tensors.
40 |     # Adapted from Min-SNR-Diffusion-Training/guided_diffusion/gaussian_diffusion.py at 521b624bd70c67cee4bdf49225915f5
41 |     sqrt_alphas_cumprod = sqrt_alphas_cumprod.to(device=timesteps.device)[
42 |         timesteps
43 |     ].float()
44 |     while len(sqrt_alphas_cumprod.shape) < len(timesteps.shape):
45 |         sqrt_alphas_cumprod = sqrt_alphas_cumprod[..., None]
46 |     alpha = sqrt_alphas_cumprod.expand(timesteps.shape)
47 | 
48 |     sqrt_one_minus_alphas_cumprod = sqrt_one_minus_alphas_cumprod.to(
49 |         device=timesteps.device
50 |     )[timesteps].float()
51 |     while len(sqrt_one_minus_alphas_cumprod.shape) < len(timesteps.shape):
52 |         sqrt_one_minus_alphas_cumprod = sqrt_one_minus_alphas_cumprod[..., None]
53 |     sigma = sqrt_one_minus_alphas_cumprod.expand(timesteps.shape)
54 | 
55 |     # Compute SNR.
56 |     snr = (alpha / sigma) ** 2
57 |     return snr
58 | 
59 | 
60 | def compute_alpha(timesteps, noise_scheduler):
61 |     alphas_cumprod = noise_scheduler.alphas_cumprod
62 |     sqrt_alphas_cumprod = alphas_cumprod**0.5
63 |     sqrt_alphas_cumprod = sqrt_alphas_cumprod.to(device=timesteps.device)[
64 |         timesteps
65 |     ].float()
66 |     while len(sqrt_alphas_cumprod.shape) < len(timesteps.shape):
67 |         sqrt_alphas_cumprod = sqrt_alphas_cumprod[..., None]
68 |     alpha = sqrt_alphas_cumprod.expand(timesteps.shape)
69 | 
70 |     return alpha
71 | 


--------------------------------------------------------------------------------
/mvadapter/schedulers/scheduling_shift_snr.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | 
  3 | import torch
  4 | 
  5 | from .scheduler_utils import SNR_to_betas, compute_snr
  6 | 
  7 | 
  8 | class ShiftSNRScheduler:
  9 |     SHIFT_MODES = ["default", "interpolated"]
 10 | 
 11 |     def __init__(
 12 |         self,
 13 |         noise_scheduler: Any,
 14 |         timesteps: Any,
 15 |         shift_scale: float,
 16 |         scheduler_class: Any,
 17 |     ):
 18 |         self.noise_scheduler = noise_scheduler
 19 |         self.timesteps = timesteps
 20 |         self.shift_scale = shift_scale
 21 |         self.scheduler_class = scheduler_class
 22 | 
 23 |     def _get_shift_scheduler(self):
 24 |         """
 25 |         Prepare scheduler for shifted betas.
 26 | 
 27 |         :return: A scheduler object configured with shifted betas
 28 |         """
 29 |         snr = compute_snr(self.timesteps, self.noise_scheduler)
 30 |         shifted_betas = SNR_to_betas(snr / self.shift_scale)
 31 | 
 32 |         return self.scheduler_class.from_config(
 33 |             self.noise_scheduler.config, trained_betas=shifted_betas.numpy()
 34 |         )
 35 | 
 36 |     def _get_interpolated_shift_scheduler(self):
 37 |         """
 38 |         Prepare scheduler for shifted betas and interpolate with the original betas in log space.
 39 | 
 40 |         :return: A scheduler object configured with interpolated shifted betas
 41 |         """
 42 |         snr = compute_snr(self.timesteps, self.noise_scheduler)
 43 |         shifted_snr = snr / self.shift_scale
 44 | 
 45 |         weighting = self.timesteps.float() / (
 46 |             self.noise_scheduler.config.num_train_timesteps - 1
 47 |         )
 48 |         interpolated_snr = torch.exp(
 49 |             torch.log(snr) * (1 - weighting) + torch.log(shifted_snr) * weighting
 50 |         )
 51 | 
 52 |         shifted_betas = SNR_to_betas(interpolated_snr)
 53 | 
 54 |         return self.scheduler_class.from_config(
 55 |             self.noise_scheduler.config, trained_betas=shifted_betas.numpy()
 56 |         )
 57 | 
 58 |     @classmethod
 59 |     def from_scheduler(
 60 |         cls,
 61 |         noise_scheduler: Any,
 62 |         shift_mode: str = "default",
 63 |         timesteps: Any = None,
 64 |         shift_scale: float = 1.0,
 65 |         scheduler_class: Any = None,
 66 |     ):
 67 |         # Check input
 68 |         if timesteps is None:
 69 |             timesteps = torch.arange(0, noise_scheduler.config.num_train_timesteps)
 70 |         if scheduler_class is None:
 71 |             scheduler_class = noise_scheduler.__class__
 72 | 
 73 |         # Create scheduler
 74 |         shift_scheduler = cls(
 75 |             noise_scheduler=noise_scheduler,
 76 |             timesteps=timesteps,
 77 |             shift_scale=shift_scale,
 78 |             scheduler_class=scheduler_class,
 79 |         )
 80 | 
 81 |         if shift_mode == "default":
 82 |             return shift_scheduler._get_shift_scheduler()
 83 |         elif shift_mode == "interpolated":
 84 |             return shift_scheduler._get_interpolated_shift_scheduler()
 85 |         else:
 86 |             raise ValueError(f"Unknown shift_mode: {shift_mode}")
 87 | 
 88 | 
 89 | if __name__ == "__main__":
 90 |     """
 91 |     Compare the alpha values for different noise schedulers.
 92 |     """
 93 |     import matplotlib.pyplot as plt
 94 |     from diffusers import DDPMScheduler
 95 | 
 96 |     from .scheduler_utils import compute_alpha
 97 | 
 98 |     # Base
 99 |     timesteps = torch.arange(0, 1000)
100 |     noise_scheduler_base = DDPMScheduler.from_pretrained(
101 |         "runwayml/stable-diffusion-v1-5", subfolder="scheduler"
102 |     )
103 |     alpha = compute_alpha(timesteps, noise_scheduler_base)
104 |     plt.plot(timesteps.numpy(), alpha.numpy(), label="Base")
105 | 
106 |     # Kolors
107 |     num_train_timesteps_ = 1100
108 |     timesteps_ = torch.arange(0, num_train_timesteps_)
109 |     noise_kwargs = {"beta_end": 0.014, "num_train_timesteps": num_train_timesteps_}
110 |     noise_scheduler_kolors = DDPMScheduler.from_config(
111 |         noise_scheduler_base.config, **noise_kwargs
112 |     )
113 |     alpha = compute_alpha(timesteps_, noise_scheduler_kolors)
114 |     plt.plot(timesteps_.numpy(), alpha.numpy(), label="Kolors")
115 | 
116 |     # Shift betas
117 |     shift_scale = 8.0
118 |     noise_scheduler_shift = ShiftSNRScheduler.from_scheduler(
119 |         noise_scheduler_base, shift_mode="default", shift_scale=shift_scale
120 |     )
121 |     alpha = compute_alpha(timesteps, noise_scheduler_shift)
122 |     plt.plot(timesteps.numpy(), alpha.numpy(), label="Shift Noise (scale 8.0)")
123 | 
124 |     # Shift betas (interpolated)
125 |     noise_scheduler_inter = ShiftSNRScheduler.from_scheduler(
126 |         noise_scheduler_base, shift_mode="interpolated", shift_scale=shift_scale
127 |     )
128 |     alpha = compute_alpha(timesteps, noise_scheduler_inter)
129 |     plt.plot(timesteps.numpy(), alpha.numpy(), label="Interpolated (scale 8.0)")
130 | 
131 |     # ZeroSNR
132 |     noise_scheduler = DDPMScheduler.from_config(
133 |         noise_scheduler_base.config, rescale_betas_zero_snr=True
134 |     )
135 |     alpha = compute_alpha(timesteps, noise_scheduler)
136 |     plt.plot(timesteps.numpy(), alpha.numpy(), label="ZeroSNR")
137 | 
138 |     plt.legend()
139 |     plt.grid()
140 |     plt.savefig("check_alpha.png")
141 | 


--------------------------------------------------------------------------------
/mvadapter/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .camera import get_camera, get_orthogonal_camera
2 | from .geometry import get_plucker_embeds_from_cameras_ortho
3 | from .saving import make_image_grid, tensor_to_image
4 | 


--------------------------------------------------------------------------------
/mvadapter/utils/camera.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from dataclasses import dataclass
  3 | from typing import List, Optional, Union
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn.functional as F
  8 | import trimesh
  9 | from PIL import Image
 10 | from torch import BoolTensor, FloatTensor
 11 | 
 12 | LIST_TYPE = Union[list, np.ndarray, torch.Tensor]
 13 | 
 14 | 
 15 | def list_to_pt(
 16 |     x: LIST_TYPE, dtype: Optional[torch.dtype] = None, device: Optional[str] = None
 17 | ) -> torch.Tensor:
 18 |     if isinstance(x, list) or isinstance(x, np.ndarray):
 19 |         return torch.tensor(x, dtype=dtype, device=device)
 20 |     return x.to(dtype=dtype)
 21 | 
 22 | 
 23 | def get_c2w(
 24 |     elevation_deg: LIST_TYPE,
 25 |     distance: LIST_TYPE,
 26 |     azimuth_deg: Optional[LIST_TYPE],
 27 |     num_views: Optional[int] = 1,
 28 |     device: Optional[str] = None,
 29 | ) -> torch.FloatTensor:
 30 |     if azimuth_deg is None:
 31 |         assert (
 32 |             num_views is not None
 33 |         ), "num_views must be provided if azimuth_deg is None."
 34 |         azimuth_deg = torch.linspace(
 35 |             0, 360, num_views + 1, dtype=torch.float32, device=device
 36 |         )[:-1]
 37 |     else:
 38 |         num_views = len(azimuth_deg)
 39 |     azimuth_deg = list_to_pt(azimuth_deg, dtype=torch.float32, device=device)
 40 |     elevation_deg = list_to_pt(elevation_deg, dtype=torch.float32, device=device)
 41 |     camera_distances = list_to_pt(distance, dtype=torch.float32, device=device)
 42 |     elevation = elevation_deg * math.pi / 180
 43 |     azimuth = azimuth_deg * math.pi / 180
 44 |     camera_positions = torch.stack(
 45 |         [
 46 |             camera_distances * torch.cos(elevation) * torch.cos(azimuth),
 47 |             camera_distances * torch.cos(elevation) * torch.sin(azimuth),
 48 |             camera_distances * torch.sin(elevation),
 49 |         ],
 50 |         dim=-1,
 51 |     )
 52 |     center = torch.zeros_like(camera_positions)
 53 |     up = torch.tensor([0, 0, 1], dtype=torch.float32, device=device)[None, :].repeat(
 54 |         num_views, 1
 55 |     )
 56 |     lookat = F.normalize(center - camera_positions, dim=-1)
 57 |     right = F.normalize(torch.cross(lookat, up, dim=-1), dim=-1)
 58 |     up = F.normalize(torch.cross(right, lookat, dim=-1), dim=-1)
 59 |     c2w3x4 = torch.cat(
 60 |         [torch.stack([right, up, -lookat], dim=-1), camera_positions[:, :, None]],
 61 |         dim=-1,
 62 |     )
 63 |     c2w = torch.cat([c2w3x4, torch.zeros_like(c2w3x4[:, :1])], dim=1)
 64 |     c2w[:, 3, 3] = 1.0
 65 |     return c2w
 66 | 
 67 | 
 68 | def get_projection_matrix(
 69 |     fovy_deg: LIST_TYPE,
 70 |     aspect_wh: float = 1.0,
 71 |     near: float = 0.1,
 72 |     far: float = 100.0,
 73 |     device: Optional[str] = None,
 74 | ) -> torch.FloatTensor:
 75 |     fovy_deg = list_to_pt(fovy_deg, dtype=torch.float32, device=device)
 76 |     batch_size = fovy_deg.shape[0]
 77 |     fovy = fovy_deg * math.pi / 180
 78 |     tan_half_fovy = torch.tan(fovy / 2)
 79 |     projection_matrix = torch.zeros(
 80 |         batch_size, 4, 4, dtype=torch.float32, device=device
 81 |     )
 82 |     projection_matrix[:, 0, 0] = 1 / (aspect_wh * tan_half_fovy)
 83 |     projection_matrix[:, 1, 1] = -1 / tan_half_fovy
 84 |     projection_matrix[:, 2, 2] = -(far + near) / (far - near)
 85 |     projection_matrix[:, 2, 3] = -2 * far * near / (far - near)
 86 |     projection_matrix[:, 3, 2] = -1
 87 |     return projection_matrix
 88 | 
 89 | 
 90 | def get_orthogonal_projection_matrix(
 91 |     batch_size: int,
 92 |     left: float,
 93 |     right: float,
 94 |     bottom: float,
 95 |     top: float,
 96 |     near: float = 0.1,
 97 |     far: float = 100.0,
 98 |     device: Optional[str] = None,
 99 | ) -> torch.FloatTensor:
100 |     projection_matrix = torch.zeros(
101 |         batch_size, 4, 4, dtype=torch.float32, device=device
102 |     )
103 |     projection_matrix[:, 0, 0] = 2 / (right - left)
104 |     projection_matrix[:, 1, 1] = -2 / (top - bottom)
105 |     projection_matrix[:, 2, 2] = -2 / (far - near)
106 |     projection_matrix[:, 0, 3] = -(right + left) / (right - left)
107 |     projection_matrix[:, 1, 3] = -(top + bottom) / (top - bottom)
108 |     projection_matrix[:, 2, 3] = -(far + near) / (far - near)
109 |     projection_matrix[:, 3, 3] = 1
110 |     return projection_matrix
111 | 
112 | 
113 | @dataclass
114 | class Camera:
115 |     c2w: Optional[torch.FloatTensor]
116 |     w2c: torch.FloatTensor
117 |     proj_mtx: torch.FloatTensor
118 |     mvp_mtx: torch.FloatTensor
119 |     cam_pos: Optional[torch.FloatTensor]
120 | 
121 |     def __getitem__(self, index):
122 |         if isinstance(index, int):
123 |             sl = slice(index, index + 1)
124 |         elif isinstance(index, slice):
125 |             sl = index
126 |         else:
127 |             raise NotImplementedError
128 | 
129 |         return Camera(
130 |             c2w=self.c2w[sl] if self.c2w is not None else None,
131 |             w2c=self.w2c[sl],
132 |             proj_mtx=self.proj_mtx[sl],
133 |             mvp_mtx=self.mvp_mtx[sl],
134 |             cam_pos=self.cam_pos[sl] if self.cam_pos is not None else None,
135 |         )
136 | 
137 |     def to(self, device: Optional[str] = None):
138 |         if self.c2w is not None:
139 |             self.c2w = self.c2w.to(device)
140 |         self.w2c = self.w2c.to(device)
141 |         self.proj_mtx = self.proj_mtx.to(device)
142 |         self.mvp_mtx = self.mvp_mtx.to(device)
143 |         if self.cam_pos is not None:
144 |             self.cam_pos = self.cam_pos.to(device)
145 | 
146 |     def __len__(self):
147 |         return self.c2w.shape[0]
148 | 
149 | 
150 | def get_camera(
151 |     elevation_deg: Optional[LIST_TYPE] = None,
152 |     distance: Optional[LIST_TYPE] = None,
153 |     fovy_deg: Optional[LIST_TYPE] = None,
154 |     azimuth_deg: Optional[LIST_TYPE] = None,
155 |     num_views: Optional[int] = 1,
156 |     c2w: Optional[torch.FloatTensor] = None,
157 |     w2c: Optional[torch.FloatTensor] = None,
158 |     proj_mtx: Optional[torch.FloatTensor] = None,
159 |     aspect_wh: float = 1.0,
160 |     near: float = 0.1,
161 |     far: float = 100.0,
162 |     device: Optional[str] = None,
163 | ):
164 |     if w2c is None:
165 |         if c2w is None:
166 |             c2w = get_c2w(elevation_deg, distance, azimuth_deg, num_views, device)
167 |         camera_positions = c2w[:, :3, 3]
168 |         w2c = torch.linalg.inv(c2w)
169 |     else:
170 |         camera_positions = None
171 |         c2w = None
172 |     if proj_mtx is None:
173 |         proj_mtx = get_projection_matrix(
174 |             fovy_deg, aspect_wh=aspect_wh, near=near, far=far, device=device
175 |         )
176 |     mvp_mtx = proj_mtx @ w2c
177 |     return Camera(
178 |         c2w=c2w, w2c=w2c, proj_mtx=proj_mtx, mvp_mtx=mvp_mtx, cam_pos=camera_positions
179 |     )
180 | 
181 | 
182 | def get_orthogonal_camera(
183 |     elevation_deg: LIST_TYPE,
184 |     distance: LIST_TYPE,
185 |     left: float,
186 |     right: float,
187 |     bottom: float,
188 |     top: float,
189 |     azimuth_deg: Optional[LIST_TYPE] = None,
190 |     num_views: Optional[int] = 1,
191 |     near: float = 0.1,
192 |     far: float = 100.0,
193 |     device: Optional[str] = None,
194 | ):
195 |     c2w = get_c2w(elevation_deg, distance, azimuth_deg, num_views, device)
196 |     camera_positions = c2w[:, :3, 3]
197 |     w2c = torch.linalg.inv(c2w)
198 |     proj_mtx = get_orthogonal_projection_matrix(
199 |         batch_size=c2w.shape[0],
200 |         left=left,
201 |         right=right,
202 |         bottom=bottom,
203 |         top=top,
204 |         near=near,
205 |         far=far,
206 |         device=device,
207 |     )
208 |     mvp_mtx = proj_mtx @ w2c
209 |     return Camera(
210 |         c2w=c2w, w2c=w2c, proj_mtx=proj_mtx, mvp_mtx=mvp_mtx, cam_pos=camera_positions
211 |     )
212 | 


--------------------------------------------------------------------------------
/mvadapter/utils/geometry.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional, Tuple
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | from torch.nn import functional as F
  6 | 
  7 | 
  8 | def get_position_map_from_depth(depth, mask, intrinsics, extrinsics, image_wh=None):
  9 |     """Compute the position map from the depth map and the camera parameters for a batch of views.
 10 | 
 11 |     Args:
 12 |         depth (torch.Tensor): The depth maps with the shape (B, H, W, 1).
 13 |         mask (torch.Tensor): The masks with the shape (B, H, W, 1).
 14 |         intrinsics (torch.Tensor): The camera intrinsics matrices with the shape (B, 3, 3).
 15 |         extrinsics (torch.Tensor): The camera extrinsics matrices with the shape (B, 4, 4).
 16 |         image_wh (Tuple[int, int]): The image width and height.
 17 | 
 18 |     Returns:
 19 |         torch.Tensor: The position maps with the shape (B, H, W, 3).
 20 |     """
 21 |     if image_wh is None:
 22 |         image_wh = depth.shape[2], depth.shape[1]
 23 | 
 24 |     B, H, W, _ = depth.shape
 25 |     depth = depth.squeeze(-1)
 26 | 
 27 |     u_coord, v_coord = torch.meshgrid(
 28 |         torch.arange(image_wh[0]), torch.arange(image_wh[1]), indexing="xy"
 29 |     )
 30 |     u_coord = u_coord.type_as(depth).unsqueeze(0).expand(B, -1, -1)
 31 |     v_coord = v_coord.type_as(depth).unsqueeze(0).expand(B, -1, -1)
 32 | 
 33 |     # Compute the position map by back-projecting depth pixels to 3D space
 34 |     x = (
 35 |         (u_coord - intrinsics[:, 0, 2].unsqueeze(-1).unsqueeze(-1))
 36 |         * depth
 37 |         / intrinsics[:, 0, 0].unsqueeze(-1).unsqueeze(-1)
 38 |     )
 39 |     y = (
 40 |         (v_coord - intrinsics[:, 1, 2].unsqueeze(-1).unsqueeze(-1))
 41 |         * depth
 42 |         / intrinsics[:, 1, 1].unsqueeze(-1).unsqueeze(-1)
 43 |     )
 44 |     z = depth
 45 | 
 46 |     # Concatenate to form the 3D coordinates in the camera frame
 47 |     camera_coords = torch.stack([x, y, z], dim=-1)
 48 | 
 49 |     # Apply the extrinsic matrix to get coordinates in the world frame
 50 |     coords_homogeneous = torch.nn.functional.pad(
 51 |         camera_coords, (0, 1), "constant", 1.0
 52 |     )  # Add a homogeneous coordinate
 53 |     world_coords = torch.matmul(
 54 |         coords_homogeneous.view(B, -1, 4), extrinsics.transpose(1, 2)
 55 |     ).view(B, H, W, 4)
 56 | 
 57 |     # Apply the mask to the position map
 58 |     position_map = world_coords[..., :3] * mask
 59 | 
 60 |     return position_map
 61 | 
 62 | 
 63 | def get_position_map_from_depth_ortho(
 64 |     depth, mask, extrinsics, ortho_scale, image_wh=None
 65 | ):
 66 |     """Compute the position map from the depth map and the camera parameters for a batch of views
 67 |     using orthographic projection with a given ortho_scale.
 68 | 
 69 |     Args:
 70 |         depth (torch.Tensor): The depth maps with the shape (B, H, W, 1).
 71 |         mask (torch.Tensor): The masks with the shape (B, H, W, 1).
 72 |         extrinsics (torch.Tensor): The camera extrinsics matrices with the shape (B, 4, 4).
 73 |         ortho_scale (torch.Tensor): The scaling factor for the orthographic projection with the shape (B, 1, 1, 1).
 74 |         image_wh (Tuple[int, int]): Optional. The image width and height.
 75 | 
 76 |     Returns:
 77 |         torch.Tensor: The position maps with the shape (B, H, W, 3).
 78 |     """
 79 |     if image_wh is None:
 80 |         image_wh = depth.shape[2], depth.shape[1]
 81 | 
 82 |     B, H, W, _ = depth.shape
 83 |     depth = depth.squeeze(-1)
 84 | 
 85 |     # Generating grid of coordinates in the image space
 86 |     u_coord, v_coord = torch.meshgrid(
 87 |         torch.arange(0, image_wh[0]), torch.arange(0, image_wh[1]), indexing="xy"
 88 |     )
 89 |     u_coord = u_coord.type_as(depth).unsqueeze(0).expand(B, -1, -1)
 90 |     v_coord = v_coord.type_as(depth).unsqueeze(0).expand(B, -1, -1)
 91 | 
 92 |     # Compute the position map using orthographic projection with ortho_scale
 93 |     x = (u_coord - image_wh[0] / 2) / ortho_scale / image_wh[0]
 94 |     y = (v_coord - image_wh[1] / 2) / ortho_scale / image_wh[1]
 95 |     z = depth
 96 | 
 97 |     # Concatenate to form the 3D coordinates in the camera frame
 98 |     camera_coords = torch.stack([x, y, z], dim=-1)
 99 | 
100 |     # Apply the extrinsic matrix to get coordinates in the world frame
101 |     coords_homogeneous = torch.nn.functional.pad(
102 |         camera_coords, (0, 1), "constant", 1.0
103 |     )  # Add a homogeneous coordinate
104 |     world_coords = torch.matmul(
105 |         coords_homogeneous.view(B, -1, 4), extrinsics.transpose(1, 2)
106 |     ).view(B, H, W, 4)
107 | 
108 |     # Apply the mask to the position map
109 |     position_map = world_coords[..., :3] * mask
110 | 
111 |     return position_map
112 | 
113 | 
114 | def get_opencv_from_blender(matrix_world, fov=None, image_size=None):
115 |     # convert matrix_world to opencv format extrinsics
116 |     opencv_world_to_cam = matrix_world.inverse()
117 |     opencv_world_to_cam[1, :] *= -1
118 |     opencv_world_to_cam[2, :] *= -1
119 |     R, T = opencv_world_to_cam[:3, :3], opencv_world_to_cam[:3, 3]
120 | 
121 |     if fov is None:  # orthographic camera
122 |         return R, T
123 | 
124 |     R, T = R.unsqueeze(0), T.unsqueeze(0)
125 |     # convert fov to opencv format intrinsics
126 |     focal = 1 / np.tan(fov / 2)
127 |     intrinsics = np.diag(np.array([focal, focal, 1])).astype(np.float32)
128 |     opencv_cam_matrix = (
129 |         torch.from_numpy(intrinsics).unsqueeze(0).float().to(matrix_world.device)
130 |     )
131 |     opencv_cam_matrix[:, :2, -1] += torch.tensor([image_size / 2, image_size / 2]).to(
132 |         matrix_world.device
133 |     )
134 |     opencv_cam_matrix[:, [0, 1], [0, 1]] *= image_size / 2
135 | 
136 |     return R, T, opencv_cam_matrix
137 | 
138 | 
139 | def get_ray_directions(
140 |     H: int,
141 |     W: int,
142 |     focal: float,
143 |     principal: Optional[Tuple[float, float]] = None,
144 |     use_pixel_centers: bool = True,
145 | ) -> torch.Tensor:
146 |     """
147 |     Get ray directions for all pixels in camera coordinate.
148 |     Args:
149 |         H, W, focal, principal, use_pixel_centers: image height, width, focal length, principal point and whether use pixel centers
150 |     Outputs:
151 |         directions: (H, W, 3), the direction of the rays in camera coordinate
152 |     """
153 |     pixel_center = 0.5 if use_pixel_centers else 0
154 |     cx, cy = W / 2, H / 2 if principal is None else principal
155 |     i, j = torch.meshgrid(
156 |         torch.arange(W, dtype=torch.float32) + pixel_center,
157 |         torch.arange(H, dtype=torch.float32) + pixel_center,
158 |         indexing="xy",
159 |     )
160 |     directions = torch.stack(
161 |         [(i - cx) / focal, -(j - cy) / focal, -torch.ones_like(i)], -1
162 |     )
163 |     return F.normalize(directions, dim=-1)
164 | 
165 | 
166 | def get_rays(
167 |     directions: torch.Tensor, c2w: torch.Tensor
168 | ) -> Tuple[torch.Tensor, torch.Tensor]:
169 |     """
170 |     Get ray origins and directions from camera coordinates to world coordinates
171 |     Args:
172 |         directions: (H, W, 3) ray directions in camera coordinates
173 |         c2w: (4, 4) camera-to-world transformation matrix
174 |     Outputs:
175 |         rays_o, rays_d: (H, W, 3) ray origins and directions in world coordinates
176 |     """
177 |     # Rotate ray directions from camera coordinate to the world coordinate
178 |     rays_d = directions @ c2w[:3, :3].T
179 |     rays_o = c2w[:3, 3].expand(rays_d.shape)
180 |     return rays_o, rays_d
181 | 
182 | 
183 | def compute_plucker_embed(
184 |     c2w: torch.Tensor, image_width: int, image_height: int, focal: float
185 | ) -> torch.Tensor:
186 |     """
187 |     Computes Plucker coordinates for a camera.
188 |     Args:
189 |         c2w: (4, 4) camera-to-world transformation matrix
190 |         image_width: Image width
191 |         image_height: Image height
192 |         focal: Focal length of the camera
193 |     Returns:
194 |         plucker: (6, H, W) Plucker embedding
195 |     """
196 |     directions = get_ray_directions(image_height, image_width, focal)
197 |     rays_o, rays_d = get_rays(directions, c2w)
198 |     # Cross product to get Plucker coordinates
199 |     cross = torch.cross(rays_o, rays_d, dim=-1)
200 |     plucker = torch.cat((rays_d, cross), dim=-1)
201 |     return plucker.permute(2, 0, 1)
202 | 
203 | 
204 | def get_plucker_embeds_from_cameras(
205 |     c2w: List[torch.Tensor], fov: List[float], image_size: int
206 | ) -> torch.Tensor:
207 |     """
208 |     Given lists of camera transformations and fov, returns the batched plucker embeddings.
209 |     Args:
210 |         c2w: list of camera-to-world transformation matrices
211 |         fov: list of field of view values
212 |         image_size: size of the image
213 |     Returns:
214 |         plucker_embeds: (B, 6, H, W) batched plucker embeddings
215 |     """
216 |     plucker_embeds = []
217 |     for cam_matrix, cam_fov in zip(c2w, fov):
218 |         focal = 0.5 * image_size / np.tan(0.5 * cam_fov)
219 |         plucker = compute_plucker_embed(cam_matrix, image_size, image_size, focal)
220 |         plucker_embeds.append(plucker)
221 |     return torch.stack(plucker_embeds)
222 | 
223 | 
224 | def get_plucker_embeds_from_cameras_ortho(
225 |     c2w: List[torch.Tensor], ortho_scale: List[float], image_size: int
226 | ):
227 |     """
228 |     Given lists of camera transformations and fov, returns the batched plucker embeddings.
229 | 
230 |     Parameters:
231 |         c2w: list of camera-to-world transformation matrices
232 |         fov: list of field of view values
233 |         image_size: size of the image
234 | 
235 |     Returns:
236 |         plucker_embeds: plucker embeddings (B, 6, H, W)
237 |     """
238 |     plucker_embeds = []
239 |     # compute pairwise mask and plucker embeddings
240 |     for cam_matrix, scale in zip(c2w, ortho_scale):
241 |         # blender to opencv to pytorch3d
242 |         R, T = get_opencv_from_blender(cam_matrix)
243 |         cam_pos = -R.T @ T
244 |         view_dir = R.T @ torch.tensor([0, 0, 1]).float().to(cam_matrix.device)
245 |         # normalize camera position
246 |         cam_pos = F.normalize(cam_pos, dim=0)
247 |         plucker = torch.concat([view_dir, cam_pos])
248 |         plucker = plucker.unsqueeze(-1).unsqueeze(-1).repeat(1, image_size, image_size)
249 |         plucker_embeds.append(plucker)
250 | 
251 |     plucker_embeds = torch.stack(plucker_embeds)
252 | 
253 |     return plucker_embeds
254 | 


--------------------------------------------------------------------------------
/mvadapter/utils/saving.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from typing import List, Optional, Union
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | from PIL import Image
 7 | 
 8 | 
 9 | def tensor_to_image(
10 |     data: Union[Image.Image, torch.Tensor, np.ndarray],
11 |     batched: bool = False,
12 |     format: str = "HWC",
13 | ) -> Union[Image.Image, List[Image.Image]]:
14 |     if isinstance(data, Image.Image):
15 |         return data
16 |     if isinstance(data, torch.Tensor):
17 |         data = data.detach().cpu().numpy()
18 |     if data.dtype == np.float32 or data.dtype == np.float16:
19 |         data = (data * 255).astype(np.uint8)
20 |     elif data.dtype == np.bool_:
21 |         data = data.astype(np.uint8) * 255
22 |     assert data.dtype == np.uint8
23 |     if format == "CHW":
24 |         if batched and data.ndim == 4:
25 |             data = data.transpose((0, 2, 3, 1))
26 |         elif not batched and data.ndim == 3:
27 |             data = data.transpose((1, 2, 0))
28 | 
29 |     if batched:
30 |         return [Image.fromarray(d) for d in data]
31 |     return Image.fromarray(data)
32 | 
33 | 
34 | def largest_factor_near_sqrt(n: int) -> int:
35 |     """
36 |     Finds the largest factor of n that is closest to the square root of n.
37 | 
38 |     Args:
39 |         n (int): The integer for which to find the largest factor near its square root.
40 | 
41 |     Returns:
42 |         int: The largest factor of n that is closest to the square root of n.
43 |     """
44 |     sqrt_n = int(math.sqrt(n))  # Get the integer part of the square root
45 | 
46 |     # First, check if the square root itself is a factor
47 |     if sqrt_n * sqrt_n == n:
48 |         return sqrt_n
49 | 
50 |     # Otherwise, find the largest factor by iterating from sqrt_n downwards
51 |     for i in range(sqrt_n, 0, -1):
52 |         if n % i == 0:
53 |             return i
54 | 
55 |     # If n is 1, return 1
56 |     return 1
57 | 
58 | 
59 | def make_image_grid(
60 |     images: List[Image.Image],
61 |     rows: Optional[int] = None,
62 |     cols: Optional[int] = None,
63 |     resize: Optional[int] = None,
64 | ) -> Image.Image:
65 |     """
66 |     Prepares a single grid of images. Useful for visualization purposes.
67 |     """
68 |     if rows is None and cols is not None:
69 |         assert len(images) % cols == 0
70 |         rows = len(images) // cols
71 |     elif cols is None and rows is not None:
72 |         assert len(images) % rows == 0
73 |         cols = len(images) // rows
74 |     elif rows is None and cols is None:
75 |         rows = largest_factor_near_sqrt(len(images))
76 |         cols = len(images) // rows
77 | 
78 |     assert len(images) == rows * cols
79 | 
80 |     if resize is not None:
81 |         images = [img.resize((resize, resize)) for img in images]
82 | 
83 |     w, h = images[0].size
84 |     grid = Image.new("RGB", size=(cols * w, rows * h))
85 | 
86 |     for i, img in enumerate(images):
87 |         grid.paste(img, box=(i % cols * w, i // cols * h))
88 |     return grid
89 | 


--------------------------------------------------------------------------------
/nodes.py:
--------------------------------------------------------------------------------
  1 | # Adapted from https://github.com/Limitex/ComfyUI-Diffusers/blob/main/nodes.py
  2 | import copy
  3 | import os
  4 | import torch
  5 | from safetensors.torch import load_file
  6 | from torchvision import transforms
  7 | from .utils import (
  8 |     SCHEDULERS,
  9 |     PIPELINES,
 10 |     MVADAPTERS,
 11 |     vae_pt_to_vae_diffuser,
 12 |     convert_images_to_tensors,
 13 |     convert_tensors_to_images,
 14 |     prepare_camera_embed,
 15 |     preprocess_image,
 16 | )
 17 | from comfy.model_management import get_torch_device
 18 | import folder_paths
 19 | 
 20 | from diffusers import StableDiffusionXLPipeline, AutoencoderKL, ControlNetModel
 21 | from transformers import AutoModelForImageSegmentation
 22 | 
 23 | from .mvadapter.pipelines.pipeline_mvadapter_t2mv_sdxl import MVAdapterT2MVSDXLPipeline
 24 | from .mvadapter.schedulers.scheduling_shift_snr import ShiftSNRScheduler
 25 | 
 26 | 
 27 | class DiffusersMVPipelineLoader:
 28 |     def __init__(self):
 29 |         self.hf_dir = folder_paths.get_folder_paths("diffusers")[0]
 30 |         self.dtype = torch.float16
 31 | 
 32 |     @classmethod
 33 |     def INPUT_TYPES(s):
 34 |         return {
 35 |             "required": {
 36 |                 "ckpt_name": (
 37 |                     "STRING",
 38 |                     {"default": "stabilityai/stable-diffusion-xl-base-1.0"},
 39 |                 ),
 40 |                 "pipeline_name": (
 41 |                     list(PIPELINES.keys()),
 42 |                     {"default": "MVAdapterT2MVSDXLPipeline"},
 43 |                 ),
 44 |             }
 45 |         }
 46 | 
 47 |     RETURN_TYPES = (
 48 |         "PIPELINE",
 49 |         "AUTOENCODER",
 50 |         "SCHEDULER",
 51 |     )
 52 | 
 53 |     FUNCTION = "create_pipeline"
 54 | 
 55 |     CATEGORY = "MV-Adapter"
 56 | 
 57 |     def create_pipeline(self, ckpt_name, pipeline_name):
 58 |         pipeline_class = PIPELINES[pipeline_name]
 59 |         pipe = pipeline_class.from_pretrained(
 60 |             pretrained_model_name_or_path=ckpt_name,
 61 |             torch_dtype=self.dtype,
 62 |             cache_dir=self.hf_dir,
 63 |         )
 64 |         return (pipe, pipe.vae, pipe.scheduler)
 65 | 
 66 | 
 67 | class LdmPipelineLoader:
 68 |     def __init__(self):
 69 |         self.hf_dir = folder_paths.get_folder_paths("diffusers")[0]
 70 |         self.dtype = torch.float16
 71 | 
 72 |     @classmethod
 73 |     def INPUT_TYPES(s):
 74 |         return {
 75 |             "required": {
 76 |                 "ckpt_name": (folder_paths.get_filename_list("checkpoints"),),
 77 |                 "pipeline_name": (
 78 |                     list(PIPELINES.keys()),
 79 |                     {"default": "MVAdapterT2MVSDXLPipeline"},
 80 |                 ),
 81 |             }
 82 |         }
 83 | 
 84 |     RETURN_TYPES = (
 85 |         "PIPELINE",
 86 |         "AUTOENCODER",
 87 |         "SCHEDULER",
 88 |     )
 89 | 
 90 |     FUNCTION = "create_pipeline"
 91 | 
 92 |     CATEGORY = "MV-Adapter"
 93 | 
 94 |     def create_pipeline(self, ckpt_name, pipeline_name):
 95 |         pipeline_class = PIPELINES[pipeline_name]
 96 | 
 97 |         pipe = pipeline_class.from_single_file(
 98 |             pretrained_model_link_or_path=folder_paths.get_full_path(
 99 |                 "checkpoints", ckpt_name
100 |             ),
101 |             torch_dtype=self.dtype,
102 |             cache_dir=self.hf_dir,
103 |         )
104 | 
105 |         return (pipe, pipe.vae, pipe.scheduler)
106 | 
107 | 
108 | class DiffusersMVVaeLoader:
109 |     def __init__(self):
110 |         self.hf_dir = folder_paths.get_folder_paths("diffusers")[0]
111 |         self.dtype = torch.float16
112 | 
113 |     @classmethod
114 |     def INPUT_TYPES(s):
115 |         return {
116 |             "required": {
117 |                 "vae_name": (
118 |                     "STRING",
119 |                     {"default": "madebyollin/sdxl-vae-fp16-fix"},
120 |                 ),
121 |             }
122 |         }
123 | 
124 |     RETURN_TYPES = ("AUTOENCODER",)
125 | 
126 |     FUNCTION = "create_pipeline"
127 | 
128 |     CATEGORY = "MV-Adapter"
129 | 
130 |     def create_pipeline(self, vae_name):
131 |         vae = AutoencoderKL.from_pretrained(
132 |             pretrained_model_name_or_path=vae_name,
133 |             torch_dtype=self.dtype,
134 |             cache_dir=self.hf_dir,
135 |         )
136 | 
137 |         return (vae,)
138 | 
139 | 
140 | class LdmVaeLoader:
141 |     def __init__(self):
142 |         self.dtype = torch.float16
143 | 
144 |     @classmethod
145 |     def INPUT_TYPES(s):
146 |         return {
147 |             "required": {
148 |                 "vae_name": (folder_paths.get_filename_list("vae"),),
149 |                 "upcast_fp32": ("BOOLEAN", {"default": True}),
150 |             },
151 |         }
152 | 
153 |     RETURN_TYPES = ("AUTOENCODER",)
154 | 
155 |     FUNCTION = "create_pipeline"
156 | 
157 |     CATEGORY = "MV-Adapter"
158 | 
159 |     def create_pipeline(self, vae_name, upcast_fp32):
160 |         vae = vae_pt_to_vae_diffuser(
161 |             folder_paths.get_full_path("vae", vae_name), force_upcast=upcast_fp32
162 |         ).to(self.dtype)
163 | 
164 |         return (vae,)
165 | 
166 | 
167 | class DiffusersMVSchedulerLoader:
168 |     def __init__(self):
169 |         self.hf_dir = folder_paths.get_folder_paths("diffusers")[0]
170 |         self.dtype = torch.float16
171 | 
172 |     @classmethod
173 |     def INPUT_TYPES(s):
174 |         return {
175 |             "required": {
176 |                 "pipeline": ("PIPELINE",),
177 |                 "scheduler_name": (list(SCHEDULERS.keys()),),
178 |                 "shift_snr": ("BOOLEAN", {"default": True}),
179 |                 "shift_mode": (
180 |                     list(ShiftSNRScheduler.SHIFT_MODES),
181 |                     {"default": "interpolated"},
182 |                 ),
183 |                 "shift_scale": (
184 |                     "FLOAT",
185 |                     {"default": 8.0, "min": 0.0, "max": 50.0, "step": 1.0},
186 |                 ),
187 |             }
188 |         }
189 | 
190 |     RETURN_TYPES = ("SCHEDULER",)
191 | 
192 |     FUNCTION = "load_scheduler"
193 | 
194 |     CATEGORY = "MV-Adapter"
195 | 
196 |     def load_scheduler(
197 |         self, pipeline, scheduler_name, shift_snr, shift_mode, shift_scale
198 |     ):
199 |         scheduler = SCHEDULERS[scheduler_name].from_config(
200 |             pipeline.scheduler.config, torch_dtype=self.dtype
201 |         )
202 |         if shift_snr:
203 |             scheduler = ShiftSNRScheduler.from_scheduler(
204 |                 scheduler,
205 |                 shift_mode=shift_mode,
206 |                 shift_scale=shift_scale,
207 |                 scheduler_class=scheduler.__class__,
208 |             )
209 |         return (scheduler,)
210 | 
211 | 
212 | class LoraModelLoader:
213 |     def __init__(self):
214 |         self.loaded_lora = None
215 | 
216 |     @classmethod
217 |     def INPUT_TYPES(s):
218 |         return {
219 |             "required": {
220 |                 "pipeline": ("PIPELINE",),
221 |                 "lora_name": (folder_paths.get_filename_list("loras"),),
222 |                 "strength_model": (
223 |                     "FLOAT",
224 |                     {"default": 1.0, "min": -100.0, "max": 100.0, "step": 0.01},
225 |                 ),
226 |             }
227 |         }
228 | 
229 |     RETURN_TYPES = ("PIPELINE",)
230 |     FUNCTION = "load_lora"
231 |     CATEGORY = "MV-Adapter"
232 | 
233 |     def load_lora(self, pipeline, lora_name, strength_model):
234 |         if strength_model == 0:
235 |             return (pipeline,)
236 | 
237 |         lora_path = folder_paths.get_full_path("loras", lora_name)
238 |         lora_dir = os.path.dirname(lora_path)
239 |         lora_name = os.path.basename(lora_path)
240 |         lora = None
241 |         if self.loaded_lora is not None:
242 |             if self.loaded_lora[0] == lora_path:
243 |                 lora = self.loaded_lora[1]
244 |             else:
245 |                 temp = self.loaded_lora
246 |                 pipeline.delete_adapters(temp[1])
247 |                 self.loaded_lora = None
248 | 
249 |         if lora is None:
250 |             adapter_name = lora_name.rsplit(".", 1)[0]
251 |             pipeline.load_lora_weights(
252 |                 lora_dir, weight_name=lora_name, adapter_name=adapter_name
253 |             )
254 |             pipeline.set_adapters(adapter_name, strength_model)
255 |             self.loaded_lora = (lora_path, adapter_name)
256 |             lora = adapter_name
257 | 
258 |         return (pipeline,)
259 | 
260 | 
261 | class ControlNetModelLoader:
262 |     def __init__(self):
263 |         self.loaded_controlnet = None
264 |         self.dtype = torch.float16
265 |         self.torch_device = get_torch_device()
266 |         self.hf_dir = folder_paths.get_folder_paths("diffusers")[0]
267 | 
268 |     @classmethod
269 |     def INPUT_TYPES(s):
270 |         return {
271 |             "required": {
272 |                 "pipeline": ("PIPELINE",),
273 |                 "controlnet_name": (
274 |                     "STRING",
275 |                     {"default": "xinsir/controlnet-scribble-sdxl-1.0"},
276 |                 ),
277 |             }
278 |         }
279 | 
280 |     RETURN_TYPES = ("PIPELINE",)
281 |     FUNCTION = "load_controlnet"
282 |     CATEGORY = "MV-Adapter"
283 | 
284 |     def load_controlnet(self, pipeline, controlnet_name):
285 |         controlnet = None
286 |         if self.loaded_controlnet is not None:
287 |             if self.loaded_controlnet == controlnet_name:
288 |                 controlnet = self.loaded_controlnet
289 |             else:
290 |                 del pipeline.controlnet
291 |                 self.loaded_controlnet = None
292 | 
293 |         if controlnet is None:
294 |             controlnet = ControlNetModel.from_pretrained(
295 |                 controlnet_name, cache_dir=self.hf_dir, torch_dtype=self.dtype
296 |             )
297 |             pipeline.controlnet = controlnet
298 |             pipeline.controlnet.to(device=self.torch_device, dtype=self.dtype)
299 | 
300 |             self.loaded_controlnet = controlnet_name
301 |             controlnet = controlnet_name
302 | 
303 |         return (pipeline,)
304 | 
305 | 
306 | class DiffusersMVModelMakeup:
307 |     def __init__(self):
308 |         self.hf_dir = folder_paths.get_folder_paths("diffusers")[0]
309 |         self.torch_device = get_torch_device()
310 |         self.dtype = torch.float16
311 | 
312 |     @classmethod
313 |     def INPUT_TYPES(s):
314 |         return {
315 |             "required": {
316 |                 "pipeline": ("PIPELINE",),
317 |                 "scheduler": ("SCHEDULER",),
318 |                 "autoencoder": ("AUTOENCODER",),
319 |                 "load_mvadapter": ("BOOLEAN", {"default": True}),
320 |                 "adapter_path": ("STRING", {"default": "huanngzh/mv-adapter"}),
321 |                 "adapter_name": (
322 |                     MVADAPTERS,
323 |                     {"default": "mvadapter_t2mv_sdxl.safetensors"},
324 |                 ),
325 |                 "num_views": ("INT", {"default": 6, "min": 1, "max": 12}),
326 |             },
327 |             "optional": {
328 |                 "enable_vae_slicing": ("BOOLEAN", {"default": True}),
329 |                 "enable_vae_tiling": ("BOOLEAN", {"default": False}),
330 |             },
331 |         }
332 | 
333 |     RETURN_TYPES = ("PIPELINE",)
334 | 
335 |     FUNCTION = "makeup_pipeline"
336 | 
337 |     CATEGORY = "MV-Adapter"
338 | 
339 |     def makeup_pipeline(
340 |         self,
341 |         pipeline,
342 |         scheduler,
343 |         autoencoder,
344 |         load_mvadapter,
345 |         adapter_path,
346 |         adapter_name,
347 |         num_views,
348 |         enable_vae_slicing=True,
349 |         enable_vae_tiling=False,
350 |     ):
351 |         pipeline.vae = autoencoder
352 |         pipeline.scheduler = scheduler
353 | 
354 |         if load_mvadapter:
355 |             pipeline.init_custom_adapter(num_views=num_views)
356 |             pipeline.load_custom_adapter(
357 |                 adapter_path, weight_name=adapter_name, cache_dir=self.hf_dir
358 |             )
359 |             pipeline.cond_encoder.to(device=self.torch_device, dtype=self.dtype)
360 | 
361 |         pipeline = pipeline.to(self.torch_device, self.dtype)
362 | 
363 |         if enable_vae_slicing:
364 |             pipeline.enable_vae_slicing()
365 |         if enable_vae_tiling:
366 |             pipeline.enable_vae_tiling()
367 | 
368 |         return (pipeline,)
369 | 
370 | 
371 | class DiffusersSampler:
372 |     def __init__(self):
373 |         self.torch_device = get_torch_device()
374 | 
375 |     @classmethod
376 |     def INPUT_TYPES(s):
377 |         return {
378 |             "required": {
379 |                 "pipeline": ("PIPELINE",),
380 |                 "prompt": (
381 |                     "STRING",
382 |                     {"multiline": True, "default": "a photo of a cat"},
383 |                 ),
384 |                 "negative_prompt": (
385 |                     "STRING",
386 |                     {
387 |                         "multiline": True,
388 |                         "default": "watermark, ugly, deformed, noisy, blurry, low contrast",
389 |                     },
390 |                 ),
391 |                 "width": ("INT", {"default": 768, "min": 1, "max": 2048, "step": 1}),
392 |                 "height": ("INT", {"default": 768, "min": 1, "max": 2048, "step": 1}),
393 |                 "steps": ("INT", {"default": 50, "min": 1, "max": 2000}),
394 |                 "cfg": (
395 |                     "FLOAT",
396 |                     {
397 |                         "default": 7.0,
398 |                         "min": 0.0,
399 |                         "max": 100.0,
400 |                         "step": 0.1,
401 |                         "round": 0.01,
402 |                     },
403 |                 ),
404 |                 "seed": ("INT", {"default": 0, "min": 0, "max": 0xFFFFFFFFFFFFFFFF}),
405 |             }
406 |         }
407 | 
408 |     RETURN_TYPES = ("IMAGE",)
409 | 
410 |     FUNCTION = "sample"
411 | 
412 |     CATEGORY = "MV-Adapter"
413 | 
414 |     def sample(
415 |         self,
416 |         pipeline,
417 |         prompt,
418 |         negative_prompt,
419 |         height,
420 |         width,
421 |         steps,
422 |         cfg,
423 |         seed,
424 |     ):
425 |         images = pipeline(
426 |             prompt=prompt,
427 |             height=height,
428 |             width=width,
429 |             num_inference_steps=steps,
430 |             guidance_scale=cfg,
431 |             negative_prompt=negative_prompt,
432 |             generator=torch.Generator(self.torch_device).manual_seed(seed),
433 |         ).images
434 |         return (convert_images_to_tensors(images),)
435 | 
436 | 
437 | class DiffusersMVSampler:
438 |     def __init__(self):
439 |         self.torch_device = get_torch_device()
440 | 
441 |     @classmethod
442 |     def INPUT_TYPES(s):
443 |         return {
444 |             "required": {
445 |                 "pipeline": ("PIPELINE",),
446 |                 "num_views": ("INT", {"default": 6, "min": 1, "max": 12}),
447 |                 "prompt": (
448 |                     "STRING",
449 |                     {"multiline": True, "default": "an astronaut riding a horse"},
450 |                 ),
451 |                 "negative_prompt": (
452 |                     "STRING",
453 |                     {
454 |                         "multiline": True,
455 |                         "default": "watermark, ugly, deformed, noisy, blurry, low contrast",
456 |                     },
457 |                 ),
458 |                 "width": ("INT", {"default": 768, "min": 1, "max": 2048, "step": 1}),
459 |                 "height": ("INT", {"default": 768, "min": 1, "max": 2048, "step": 1}),
460 |                 "steps": ("INT", {"default": 50, "min": 1, "max": 2000}),
461 |                 "cfg": (
462 |                     "FLOAT",
463 |                     {
464 |                         "default": 7.0,
465 |                         "min": 0.0,
466 |                         "max": 100.0,
467 |                         "step": 0.1,
468 |                         "round": 0.01,
469 |                     },
470 |                 ),
471 |                 "seed": ("INT", {"default": 0, "min": 0, "max": 0xFFFFFFFFFFFFFFFF}),
472 |             },
473 |             "optional": {
474 |                 "reference_image": ("IMAGE",),
475 |                 "controlnet_image": ("IMAGE",),
476 |                 "controlnet_conditioning_scale": ("FLOAT", {"default": 1.0}),
477 |                 "azimuth_degrees": ("LIST", {"default": [0, 45, 90, 180, 270, 315]}),
478 |             },
479 |         }
480 | 
481 |     RETURN_TYPES = ("IMAGE",)
482 | 
483 |     FUNCTION = "sample"
484 | 
485 |     CATEGORY = "MV-Adapter"
486 | 
487 |     def sample(
488 |         self,
489 |         pipeline,
490 |         num_views,
491 |         prompt,
492 |         negative_prompt,
493 |         height,
494 |         width,
495 |         steps,
496 |         cfg,
497 |         seed,
498 |         reference_image=None,
499 |         controlnet_image=None,
500 |         controlnet_conditioning_scale=1.0,
501 |         azimuth_degrees=[0, 45, 90, 180, 270, 315],
502 |     ):
503 |         num_views = len(azimuth_degrees)
504 |         control_images = prepare_camera_embed(
505 |             num_views, width, self.torch_device, azimuth_degrees
506 |         )
507 | 
508 |         pipe_kwargs = {}
509 |         if reference_image is not None:
510 |             pipe_kwargs.update(
511 |                 {
512 |                     "reference_image": convert_tensors_to_images(reference_image)[0],
513 |                     "reference_conditioning_scale": 1.0,
514 |                 }
515 |             )
516 |         if controlnet_image is not None:
517 |             controlnet_image = convert_tensors_to_images(controlnet_image)
518 |             pipe_kwargs.update(
519 |                 {
520 |                     "controlnet_image": controlnet_image,
521 |                     "controlnet_conditioning_scale": controlnet_conditioning_scale,
522 |                 }
523 |             )
524 | 
525 |         images = pipeline(
526 |             prompt=prompt,
527 |             height=height,
528 |             width=width,
529 |             num_inference_steps=steps,
530 |             guidance_scale=cfg,
531 |             num_images_per_prompt=num_views,
532 |             control_image=control_images,
533 |             control_conditioning_scale=1.0,
534 |             negative_prompt=negative_prompt,
535 |             generator=torch.Generator(self.torch_device).manual_seed(seed),
536 |             cross_attention_kwargs={"num_views": num_views},
537 |             **pipe_kwargs,
538 |         ).images
539 |         return (convert_images_to_tensors(images),)
540 | 
541 | 
542 | class BiRefNet:
543 |     def __init__(self):
544 |         self.hf_dir = folder_paths.get_folder_paths("diffusers")[0]
545 |         self.torch_device = get_torch_device()
546 |         self.dtype = torch.float32
547 | 
548 |     RETURN_TYPES = ("FUNCTION",)
549 | 
550 |     FUNCTION = "load_model_fn"
551 | 
552 |     CATEGORY = "MV-Adapter"
553 | 
554 |     @classmethod
555 |     def INPUT_TYPES(s):
556 |         return {
557 |             "required": {"ckpt_name": ("STRING", {"default": "briaai/RMBG-2.0"})}
558 |         }
559 | 
560 |     def remove_bg(self, image, net, transform, device):
561 |         image_size = image.size
562 |         input_images = transform(image).unsqueeze(0).to(device)
563 |         with torch.no_grad():
564 |             preds = net(input_images)[-1].sigmoid().cpu()
565 |         pred = preds[0].squeeze()
566 |         pred_pil = transforms.ToPILImage()(pred)
567 |         mask = pred_pil.resize(image_size)
568 |         image.putalpha(mask)
569 |         return image
570 | 
571 |     def load_model_fn(self, ckpt_name):
572 |         model = AutoModelForImageSegmentation.from_pretrained(
573 |             ckpt_name, trust_remote_code=True, cache_dir=self.hf_dir
574 |         ).to(self.torch_device, self.dtype)
575 | 
576 |         transform_image = transforms.Compose(
577 |             [
578 |                 transforms.Resize((1024, 1024)),
579 |                 transforms.ToTensor(),
580 |                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
581 |             ]
582 |         )
583 | 
584 |         remove_bg_fn = lambda x: self.remove_bg(
585 |             x, model, transform_image, self.torch_device
586 |         )
587 |         return (remove_bg_fn,)
588 | 
589 | 
590 | class ImagePreprocessor:
591 |     def __init__(self):
592 |         self.torch_device = get_torch_device()
593 | 
594 |     @classmethod
595 |     def INPUT_TYPES(s):
596 |         return {
597 |             "required": {
598 |                 "remove_bg_fn": ("FUNCTION",),
599 |                 "image": ("IMAGE",),
600 |                 "height": ("INT", {"default": 768, "min": 1, "max": 2048, "step": 1}),
601 |                 "width": ("INT", {"default": 768, "min": 1, "max": 2048, "step": 1}),
602 |             }
603 |         }
604 | 
605 |     RETURN_TYPES = ("IMAGE",)
606 | 
607 |     FUNCTION = "process"
608 | 
609 |     def process(self, remove_bg_fn, image, height, width):
610 |         images = convert_tensors_to_images(image)
611 |         images = [
612 |             preprocess_image(remove_bg_fn(img.convert("RGB")), height, width)
613 |             for img in images
614 |         ]
615 | 
616 |         return (convert_images_to_tensors(images),)
617 | 
618 | 
619 | class ControlImagePreprocessor:
620 |     def __init__(self):
621 |         self.torch_device = get_torch_device()
622 | 
623 |     @classmethod
624 |     def INPUT_TYPES(s):
625 |         return {
626 |             "required": {
627 |                 "front_view": ("IMAGE",),
628 |                 "front_right_view": ("IMAGE",),
629 |                 "right_view": ("IMAGE",),
630 |                 "back_view": ("IMAGE",),
631 |                 "left_view": ("IMAGE",),
632 |                 "front_left_view": ("IMAGE",),
633 |                 "width": ("INT", {"default": 768, "min": 1, "max": 2048, "step": 1}),
634 |                 "height": ("INT", {"default": 768, "min": 1, "max": 2048, "step": 1}),
635 |             }
636 |         }
637 | 
638 |     RETURN_TYPES = ("IMAGE",)
639 | 
640 |     FUNCTION = "process"
641 | 
642 |     def process(
643 |         self,
644 |         front_view,
645 |         front_right_view,
646 |         right_view,
647 |         back_view,
648 |         left_view,
649 |         front_left_view,
650 |         width,
651 |         height,
652 |     ):
653 |         images = torch.cat(
654 |             [
655 |                 front_view,
656 |                 front_right_view,
657 |                 right_view,
658 |                 back_view,
659 |                 left_view,
660 |                 front_left_view,
661 |             ],
662 |             dim=0,
663 |         )
664 |         images = convert_tensors_to_images(images)
665 |         images = [img.resize((width, height)).convert("RGB") for img in images]
666 |         return (convert_images_to_tensors(images),)
667 | 
668 | 
669 | class ViewSelector:
670 |     def __init__(self):
671 |         pass
672 | 
673 |     @classmethod
674 |     def INPUT_TYPES(s):
675 |         return {
676 |             "required": {
677 |                 "front_view": ("BOOLEAN", {"default": True}),
678 |                 "front_right_view": ("BOOLEAN", {"default": True}),
679 |                 "right_view": ("BOOLEAN", {"default": True}),
680 |                 "back_view": ("BOOLEAN", {"default": True}),
681 |                 "left_view": ("BOOLEAN", {"default": True}),
682 |                 "front_left_view": ("BOOLEAN", {"default": True}),
683 |             }
684 |         }
685 | 
686 |     RETURN_TYPES = ("LIST",)
687 |     FUNCTION = "process"
688 |     CATEGORY = "MV-Adapter"
689 | 
690 |     def process(
691 |         self,
692 |         front_view,
693 |         front_right_view,
694 |         right_view,
695 |         back_view,
696 |         left_view,
697 |         front_left_view,
698 |     ):
699 |         azimuth_deg = []
700 |         if front_view:
701 |             azimuth_deg.append(0)
702 |         if front_right_view:
703 |             azimuth_deg.append(45)
704 |         if right_view:
705 |             azimuth_deg.append(90)
706 |         if back_view:
707 |             azimuth_deg.append(180)
708 |         if left_view:
709 |             azimuth_deg.append(270)
710 |         if front_left_view:
711 |             azimuth_deg.append(315)
712 | 
713 |         return (azimuth_deg,)
714 | 
715 | 
716 | NODE_CLASS_MAPPINGS = {
717 |     "LdmPipelineLoader": LdmPipelineLoader,
718 |     "LdmVaeLoader": LdmVaeLoader,
719 |     "DiffusersMVPipelineLoader": DiffusersMVPipelineLoader,
720 |     "DiffusersMVVaeLoader": DiffusersMVVaeLoader,
721 |     "DiffusersMVSchedulerLoader": DiffusersMVSchedulerLoader,
722 |     "DiffusersMVModelMakeup": DiffusersMVModelMakeup,
723 |     "LoraModelLoader": LoraModelLoader,
724 |     "DiffusersMVSampler": DiffusersMVSampler,
725 |     "BiRefNet": BiRefNet,
726 |     "ImagePreprocessor": ImagePreprocessor,
727 |     "ControlNetModelLoader": ControlNetModelLoader,
728 |     "ControlImagePreprocessor": ControlImagePreprocessor,
729 |     "ViewSelector": ViewSelector,
730 | }
731 | 
732 | NODE_DISPLAY_NAME_MAPPINGS = {
733 |     "LdmPipelineLoader": "LDM Pipeline Loader",
734 |     "LdmVaeLoader": "LDM Vae Loader",
735 |     "DiffusersMVPipelineLoader": "Diffusers MV Pipeline Loader",
736 |     "DiffusersMVVaeLoader": "Diffusers MV Vae Loader",
737 |     "DiffusersMVSchedulerLoader": "Diffusers MV Scheduler Loader",
738 |     "DiffusersMVModelMakeup": "Diffusers MV Model Makeup",
739 |     "LoraModelLoader": "Lora Model Loader",
740 |     "DiffusersMVSampler": "Diffusers MV Sampler",
741 |     "BiRefNet": "BiRefNet",
742 |     "ImagePreprocessor": "Image Preprocessor",
743 |     "ControlNetModelLoader": "ControlNet Model Loader",
744 |     "ControlImagePreprocessor": "Control Image Preprocessor",
745 |     "ViewSelector": "View Selector",
746 | }
747 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "comfyui-mvadapter"
 3 | description = "This extension integrates [a/MV-Adapter](https://github.com/huanngzh/MV-Adapter) into ComfyUI, allowing users to generate multi-view consistent images from text prompts or single images directly within the ComfyUI interface."
 4 | version = "1.0.1"
 5 | license = {file = "LICENSE"}
 6 | dependencies = ["torch>=2.1.1", "torchvision>=0.16.1", "diffusers>=0.31.0", "transformers>=4.46.3", "peft", "numpy>=1.26.2", "huggingface_hub>=0.24.6", "accelerate>=1.1.1", "opencv-python", "safetensors", "pillow", "omegaconf", "trimesh", "einops", "timm", "kornia", "scikit-image"]
 7 | 
 8 | [project.urls]
 9 | Repository = "https://github.com/huanngzh/ComfyUI-MVAdapter"
10 | #  Used by Comfy Registry https://comfyregistry.org
11 | 
12 | [tool.comfy]
13 | PublisherId = "huanngzh"
14 | DisplayName = "ComfyUI-MVAdapter"
15 | Icon = ""
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch>=2.1.1
 2 | torchvision>=0.16.1
 3 | diffusers==0.31.0
 4 | transformers==4.46.3
 5 | peft
 6 | numpy>=1.26.2
 7 | huggingface_hub==0.24.6
 8 | accelerate==1.1.1
 9 | opencv-python
10 | safetensors
11 | pillow
12 | omegaconf
13 | trimesh
14 | einops
15 | timm
16 | kornia
17 | scikit-image


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | # Adapted from https://github.com/Limitex/ComfyUI-Diffusers/blob/main/utils.py
  2 | 
  3 | import io
  4 | import os
  5 | import torch
  6 | import requests
  7 | import numpy as np
  8 | from PIL import Image
  9 | from omegaconf import OmegaConf
 10 | from torchvision.transforms import ToTensor
 11 | from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
 12 |     assign_to_checkpoint,
 13 |     conv_attn_to_linear,
 14 |     create_vae_diffusers_config,
 15 |     renew_vae_attention_paths,
 16 |     renew_vae_resnet_paths,
 17 | )
 18 | from diffusers import (
 19 |     AutoencoderKL,
 20 |     DDIMScheduler,
 21 |     DDPMScheduler,
 22 |     DEISMultistepScheduler,
 23 |     DPMSolverMultistepScheduler,
 24 |     DPMSolverSinglestepScheduler,
 25 |     EulerAncestralDiscreteScheduler,
 26 |     EulerDiscreteScheduler,
 27 |     HeunDiscreteScheduler,
 28 |     KDPM2AncestralDiscreteScheduler,
 29 |     KDPM2DiscreteScheduler,
 30 |     UniPCMultistepScheduler,
 31 |     LCMScheduler,
 32 |     StableDiffusionXLPipeline,
 33 | )
 34 | 
 35 | from .mvadapter.pipelines.pipeline_mvadapter_t2mv_sdxl import MVAdapterT2MVSDXLPipeline
 36 | from .mvadapter.pipelines.pipeline_mvadapter_i2mv_sdxl import MVAdapterI2MVSDXLPipeline
 37 | from .mvadapter.pipelines.pipeline_mvadapter_i2mv_sd import MVAdapterI2MVSDPipeline
 38 | from .mvadapter.pipelines.pipeline_mvadapter_t2mv_sd import MVAdapterT2MVSDPipeline
 39 | from .mvadapter.utils import (
 40 |     get_orthogonal_camera,
 41 |     get_plucker_embeds_from_cameras_ortho,
 42 |     make_image_grid,
 43 | )
 44 | 
 45 | 
 46 | NODE_CACHE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "cache")
 47 | 
 48 | PIPELINES = {
 49 |     "StableDiffusionXLPipeline": StableDiffusionXLPipeline,
 50 |     "MVAdapterT2MVSDXLPipeline": MVAdapterT2MVSDXLPipeline,
 51 |     "MVAdapterI2MVSDXLPipeline": MVAdapterI2MVSDXLPipeline,
 52 |     "MVAdapterI2MVSDPipeline": MVAdapterI2MVSDPipeline,
 53 |     "MVAdapterT2MVSDPipeline": MVAdapterT2MVSDPipeline,
 54 | }
 55 | 
 56 | SCHEDULERS = {
 57 |     "DDIM": DDIMScheduler,
 58 |     "DDPM": DDPMScheduler,
 59 |     "DEISMultistep": DEISMultistepScheduler,
 60 |     "DPMSolverMultistep": DPMSolverMultistepScheduler,
 61 |     "DPMSolverSinglestep": DPMSolverSinglestepScheduler,
 62 |     "EulerAncestralDiscrete": EulerAncestralDiscreteScheduler,
 63 |     "EulerDiscrete": EulerDiscreteScheduler,
 64 |     "HeunDiscrete": HeunDiscreteScheduler,
 65 |     "KDPM2AncestralDiscrete": KDPM2AncestralDiscreteScheduler,
 66 |     "KDPM2Discrete": KDPM2DiscreteScheduler,
 67 |     "UniPCMultistep": UniPCMultistepScheduler,
 68 |     "LCM": LCMScheduler,
 69 | }
 70 | 
 71 | MVADAPTERS = [
 72 |     "mvadapter_t2mv_sdxl.safetensors",
 73 |     "mvadapter_i2mv_sdxl.safetensors",
 74 |     "mvadapter_i2mv_sdxl_beta.safetensors",
 75 |     "mvadapter_t2mv_sd21.safetensors",
 76 |     "mvadapter_i2mv_sd21.safetensors",
 77 | ]
 78 | 
 79 | 
 80 | # Reference from : https://github.com/huggingface/diffusers/blob/main/scripts/convert_vae_pt_to_diffusers.py
 81 | def custom_convert_ldm_vae_checkpoint(checkpoint, config):
 82 |     vae_state_dict = checkpoint
 83 | 
 84 |     new_checkpoint = {}
 85 | 
 86 |     new_checkpoint["encoder.conv_in.weight"] = vae_state_dict["encoder.conv_in.weight"]
 87 |     new_checkpoint["encoder.conv_in.bias"] = vae_state_dict["encoder.conv_in.bias"]
 88 |     new_checkpoint["encoder.conv_out.weight"] = vae_state_dict[
 89 |         "encoder.conv_out.weight"
 90 |     ]
 91 |     new_checkpoint["encoder.conv_out.bias"] = vae_state_dict["encoder.conv_out.bias"]
 92 |     new_checkpoint["encoder.conv_norm_out.weight"] = vae_state_dict[
 93 |         "encoder.norm_out.weight"
 94 |     ]
 95 |     new_checkpoint["encoder.conv_norm_out.bias"] = vae_state_dict[
 96 |         "encoder.norm_out.bias"
 97 |     ]
 98 | 
 99 |     new_checkpoint["decoder.conv_in.weight"] = vae_state_dict["decoder.conv_in.weight"]
100 |     new_checkpoint["decoder.conv_in.bias"] = vae_state_dict["decoder.conv_in.bias"]
101 |     new_checkpoint["decoder.conv_out.weight"] = vae_state_dict[
102 |         "decoder.conv_out.weight"
103 |     ]
104 |     new_checkpoint["decoder.conv_out.bias"] = vae_state_dict["decoder.conv_out.bias"]
105 |     new_checkpoint["decoder.conv_norm_out.weight"] = vae_state_dict[
106 |         "decoder.norm_out.weight"
107 |     ]
108 |     new_checkpoint["decoder.conv_norm_out.bias"] = vae_state_dict[
109 |         "decoder.norm_out.bias"
110 |     ]
111 | 
112 |     new_checkpoint["quant_conv.weight"] = vae_state_dict["quant_conv.weight"]
113 |     new_checkpoint["quant_conv.bias"] = vae_state_dict["quant_conv.bias"]
114 |     new_checkpoint["post_quant_conv.weight"] = vae_state_dict["post_quant_conv.weight"]
115 |     new_checkpoint["post_quant_conv.bias"] = vae_state_dict["post_quant_conv.bias"]
116 | 
117 |     # Retrieves the keys for the encoder down blocks only
118 |     num_down_blocks = len(
119 |         {
120 |             ".".join(layer.split(".")[:3])
121 |             for layer in vae_state_dict
122 |             if "encoder.down" in layer
123 |         }
124 |     )
125 |     down_blocks = {
126 |         layer_id: [key for key in vae_state_dict if f"down.{layer_id}" in key]
127 |         for layer_id in range(num_down_blocks)
128 |     }
129 | 
130 |     # Retrieves the keys for the decoder up blocks only
131 |     num_up_blocks = len(
132 |         {
133 |             ".".join(layer.split(".")[:3])
134 |             for layer in vae_state_dict
135 |             if "decoder.up" in layer
136 |         }
137 |     )
138 |     up_blocks = {
139 |         layer_id: [key for key in vae_state_dict if f"up.{layer_id}" in key]
140 |         for layer_id in range(num_up_blocks)
141 |     }
142 | 
143 |     for i in range(num_down_blocks):
144 |         resnets = [
145 |             key
146 |             for key in down_blocks[i]
147 |             if f"down.{i}" in key and f"down.{i}.downsample" not in key
148 |         ]
149 | 
150 |         if f"encoder.down.{i}.downsample.conv.weight" in vae_state_dict:
151 |             new_checkpoint[f"encoder.down_blocks.{i}.downsamplers.0.conv.weight"] = (
152 |                 vae_state_dict.pop(f"encoder.down.{i}.downsample.conv.weight")
153 |             )
154 |             new_checkpoint[f"encoder.down_blocks.{i}.downsamplers.0.conv.bias"] = (
155 |                 vae_state_dict.pop(f"encoder.down.{i}.downsample.conv.bias")
156 |             )
157 | 
158 |         paths = renew_vae_resnet_paths(resnets)
159 |         meta_path = {"old": f"down.{i}.block", "new": f"down_blocks.{i}.resnets"}
160 |         assign_to_checkpoint(
161 |             paths,
162 |             new_checkpoint,
163 |             vae_state_dict,
164 |             additional_replacements=[meta_path],
165 |             config=config,
166 |         )
167 | 
168 |     mid_resnets = [key for key in vae_state_dict if "encoder.mid.block" in key]
169 |     num_mid_res_blocks = 2
170 |     for i in range(1, num_mid_res_blocks + 1):
171 |         resnets = [key for key in mid_resnets if f"encoder.mid.block_{i}" in key]
172 | 
173 |         paths = renew_vae_resnet_paths(resnets)
174 |         meta_path = {"old": f"mid.block_{i}", "new": f"mid_block.resnets.{i - 1}"}
175 |         assign_to_checkpoint(
176 |             paths,
177 |             new_checkpoint,
178 |             vae_state_dict,
179 |             additional_replacements=[meta_path],
180 |             config=config,
181 |         )
182 | 
183 |     mid_attentions = [key for key in vae_state_dict if "encoder.mid.attn" in key]
184 |     paths = renew_vae_attention_paths(mid_attentions)
185 |     meta_path = {"old": "mid.attn_1", "new": "mid_block.attentions.0"}
186 |     assign_to_checkpoint(
187 |         paths,
188 |         new_checkpoint,
189 |         vae_state_dict,
190 |         additional_replacements=[meta_path],
191 |         config=config,
192 |     )
193 |     conv_attn_to_linear(new_checkpoint)
194 | 
195 |     for i in range(num_up_blocks):
196 |         block_id = num_up_blocks - 1 - i
197 |         resnets = [
198 |             key
199 |             for key in up_blocks[block_id]
200 |             if f"up.{block_id}" in key and f"up.{block_id}.upsample" not in key
201 |         ]
202 | 
203 |         if f"decoder.up.{block_id}.upsample.conv.weight" in vae_state_dict:
204 |             new_checkpoint[f"decoder.up_blocks.{i}.upsamplers.0.conv.weight"] = (
205 |                 vae_state_dict[f"decoder.up.{block_id}.upsample.conv.weight"]
206 |             )
207 |             new_checkpoint[f"decoder.up_blocks.{i}.upsamplers.0.conv.bias"] = (
208 |                 vae_state_dict[f"decoder.up.{block_id}.upsample.conv.bias"]
209 |             )
210 | 
211 |         paths = renew_vae_resnet_paths(resnets)
212 |         meta_path = {"old": f"up.{block_id}.block", "new": f"up_blocks.{i}.resnets"}
213 |         assign_to_checkpoint(
214 |             paths,
215 |             new_checkpoint,
216 |             vae_state_dict,
217 |             additional_replacements=[meta_path],
218 |             config=config,
219 |         )
220 | 
221 |     mid_resnets = [key for key in vae_state_dict if "decoder.mid.block" in key]
222 |     num_mid_res_blocks = 2
223 |     for i in range(1, num_mid_res_blocks + 1):
224 |         resnets = [key for key in mid_resnets if f"decoder.mid.block_{i}" in key]
225 | 
226 |         paths = renew_vae_resnet_paths(resnets)
227 |         meta_path = {"old": f"mid.block_{i}", "new": f"mid_block.resnets.{i - 1}"}
228 |         assign_to_checkpoint(
229 |             paths,
230 |             new_checkpoint,
231 |             vae_state_dict,
232 |             additional_replacements=[meta_path],
233 |             config=config,
234 |         )
235 | 
236 |     mid_attentions = [key for key in vae_state_dict if "decoder.mid.attn" in key]
237 |     paths = renew_vae_attention_paths(mid_attentions)
238 |     meta_path = {"old": "mid.attn_1", "new": "mid_block.attentions.0"}
239 |     assign_to_checkpoint(
240 |         paths,
241 |         new_checkpoint,
242 |         vae_state_dict,
243 |         additional_replacements=[meta_path],
244 |         config=config,
245 |     )
246 |     conv_attn_to_linear(new_checkpoint)
247 |     return new_checkpoint
248 | 
249 | 
250 | # Reference from : https://github.com/huggingface/diffusers/blob/main/scripts/convert_vae_pt_to_diffusers.py
251 | def vae_pt_to_vae_diffuser(checkpoint_path: str, force_upcast: bool = True):
252 |     try:
253 |         config_path = os.path.join(
254 |             NODE_CACHE_PATH, "stable-diffusion-v1-inference.yaml"
255 |         )
256 |         original_config = OmegaConf.load(config_path)
257 |     except FileNotFoundError as e:
258 |         print(f"Warning: {e}")
259 | 
260 |         r = requests.get(
261 |             "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml"
262 |         )
263 |         io_obj = io.BytesIO(r.content)
264 |         original_config = OmegaConf.load(io_obj)
265 | 
266 |     image_size = 512
267 |     device = "cuda" if torch.cuda.is_available() else "cpu"
268 |     if checkpoint_path.endswith("safetensors"):
269 |         from safetensors import safe_open
270 | 
271 |         checkpoint = {}
272 |         with safe_open(checkpoint_path, framework="pt", device="cpu") as f:
273 |             for key in f.keys():
274 |                 checkpoint[key] = f.get_tensor(key)
275 |     else:
276 |         checkpoint = torch.load(checkpoint_path, map_location=device)["state_dict"]
277 | 
278 |     # Convert the VAE model.
279 |     vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
280 |     vae_config.update({"force_upcast": force_upcast})
281 |     converted_vae_checkpoint = custom_convert_ldm_vae_checkpoint(checkpoint, vae_config)
282 | 
283 |     vae = AutoencoderKL(**vae_config)
284 |     vae.load_state_dict(converted_vae_checkpoint)
285 | 
286 |     return vae
287 | 
288 | 
289 | def convert_images_to_tensors(images: list[Image.Image]):
290 |     return torch.stack([np.transpose(ToTensor()(image), (1, 2, 0)) for image in images])
291 | 
292 | 
293 | def convert_tensors_to_images(images: torch.tensor):
294 |     return [
295 |         Image.fromarray(np.clip(255.0 * image.cpu().numpy(), 0, 255).astype(np.uint8))
296 |         for image in images
297 |     ]
298 | 
299 | 
300 | def resize_images(images: list[Image.Image], size: tuple[int, int]):
301 |     return [image.resize(size) for image in images]
302 | 
303 | 
304 | def prepare_camera_embed(num_views, size, device, azimuth_degrees=None):
305 |     cameras = get_orthogonal_camera(
306 |         elevation_deg=[0] * num_views,
307 |         distance=[1.8] * num_views,
308 |         left=-0.55,
309 |         right=0.55,
310 |         bottom=-0.55,
311 |         top=0.55,
312 |         azimuth_deg=[x - 90 for x in azimuth_degrees],
313 |         device=device,
314 |     )
315 | 
316 |     plucker_embeds = get_plucker_embeds_from_cameras_ortho(
317 |         cameras.c2w, [1.1] * num_views, size
318 |     )
319 |     control_images = ((plucker_embeds + 1.0) / 2.0).clamp(0, 1)
320 | 
321 |     return control_images
322 | 
323 | 
324 | def preprocess_image(image: Image.Image, height, width):
325 |     image = np.array(image)
326 |     alpha = image[..., 3] > 0
327 |     H, W = alpha.shape
328 |     # get the bounding box of alpha
329 |     y, x = np.where(alpha)
330 |     y0, y1 = max(y.min() - 1, 0), min(y.max() + 1, H)
331 |     x0, x1 = max(x.min() - 1, 0), min(x.max() + 1, W)
332 |     image_center = image[y0:y1, x0:x1]
333 |     # resize the longer side to H * 0.9
334 |     H, W, _ = image_center.shape
335 |     if H > W:
336 |         W = int(W * (height * 0.9) / H)
337 |         H = int(height * 0.9)
338 |     else:
339 |         H = int(H * (width * 0.9) / W)
340 |         W = int(width * 0.9)
341 |     image_center = np.array(Image.fromarray(image_center).resize((W, H)))
342 |     # pad to H, W
343 |     start_h = (height - H) // 2
344 |     start_w = (width - W) // 2
345 |     image = np.zeros((height, width, 4), dtype=np.uint8)
346 |     image[start_h : start_h + H, start_w : start_w + W] = image_center
347 |     image = image.astype(np.float32) / 255.0
348 |     image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5
349 |     image = (image * 255).clip(0, 255).astype(np.uint8)
350 |     image = Image.fromarray(image)
351 | 
352 |     return image
353 | 


--------------------------------------------------------------------------------
/workflows/i2mv_sdxl_diffusers.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "last_node_id": 11,
  3 |     "last_link_id": 10,
  4 |     "nodes": [
  5 |         {
  6 |             "id": 6,
  7 |             "type": "DiffusersMVModelMakeup",
  8 |             "pos": [
  9 |                 944.978759765625,
 10 |                 234.29940795898438
 11 |             ],
 12 |             "size": [
 13 |                 315,
 14 |                 170
 15 |             ],
 16 |             "flags": {},
 17 |             "order": 7,
 18 |             "mode": 0,
 19 |             "inputs": [
 20 |                 {
 21 |                     "name": "pipeline",
 22 |                     "type": "PIPELINE",
 23 |                     "link": 2
 24 |                 },
 25 |                 {
 26 |                     "name": "scheduler",
 27 |                     "type": "SCHEDULER",
 28 |                     "link": 3
 29 |                 },
 30 |                 {
 31 |                     "name": "autoencoder",
 32 |                     "type": "AUTOENCODER",
 33 |                     "link": 4
 34 |                 }
 35 |             ],
 36 |             "outputs": [
 37 |                 {
 38 |                     "name": "PIPELINE",
 39 |                     "type": "PIPELINE",
 40 |                     "links": [
 41 |                         5
 42 |                     ],
 43 |                     "slot_index": 0
 44 |                 }
 45 |             ],
 46 |             "properties": {
 47 |                 "Node name for S&R": "DiffusersMVModelMakeup"
 48 |             },
 49 |             "widgets_values": [
 50 |                 true,
 51 |                 "huanngzh/mv-adapter",
 52 |                 "mvadapter_i2mv_sdxl.safetensors",
 53 |                 6
 54 |             ]
 55 |         },
 56 |         {
 57 |             "id": 5,
 58 |             "type": "DiffusersMVVaeLoader",
 59 |             "pos": [
 60 |                 519.3989868164062,
 61 |                 334.48828125
 62 |             ],
 63 |             "size": [
 64 |                 315,
 65 |                 58
 66 |             ],
 67 |             "flags": {},
 68 |             "order": 0,
 69 |             "mode": 0,
 70 |             "inputs": [],
 71 |             "outputs": [
 72 |                 {
 73 |                     "name": "AUTOENCODER",
 74 |                     "type": "AUTOENCODER",
 75 |                     "links": [
 76 |                         4
 77 |                     ],
 78 |                     "slot_index": 0
 79 |                 }
 80 |             ],
 81 |             "properties": {
 82 |                 "Node name for S&R": "DiffusersMVVaeLoader"
 83 |             },
 84 |             "widgets_values": [
 85 |                 "madebyollin/sdxl-vae-fp16-fix"
 86 |             ]
 87 |         },
 88 |         {
 89 |             "id": 3,
 90 |             "type": "DiffusersMVSchedulerLoader",
 91 |             "pos": [
 92 |                 515.5944213867188,
 93 |                 125.65931701660156
 94 |             ],
 95 |             "size": [
 96 |                 327.5999755859375,
 97 |                 130
 98 |             ],
 99 |             "flags": {},
100 |             "order": 5,
101 |             "mode": 0,
102 |             "inputs": [
103 |                 {
104 |                     "name": "pipeline",
105 |                     "type": "PIPELINE",
106 |                     "link": 1
107 |                 }
108 |             ],
109 |             "outputs": [
110 |                 {
111 |                     "name": "SCHEDULER",
112 |                     "type": "SCHEDULER",
113 |                     "links": [
114 |                         3
115 |                     ],
116 |                     "slot_index": 0
117 |                 }
118 |             ],
119 |             "properties": {
120 |                 "Node name for S&R": "DiffusersMVSchedulerLoader"
121 |             },
122 |             "widgets_values": [
123 |                 "DDPM",
124 |                 true,
125 |                 "interpolated",
126 |                 8
127 |             ]
128 |         },
129 |         {
130 |             "id": 2,
131 |             "type": "BiRefNet",
132 |             "pos": [
133 |                 521.8474731445312,
134 |                 -224.9335479736328
135 |             ],
136 |             "size": [
137 |                 315,
138 |                 58
139 |             ],
140 |             "flags": {},
141 |             "order": 1,
142 |             "mode": 0,
143 |             "inputs": [],
144 |             "outputs": [
145 |                 {
146 |                     "name": "FUNCTION",
147 |                     "type": "FUNCTION",
148 |                     "links": [
149 |                         6
150 |                     ],
151 |                     "slot_index": 0
152 |                 }
153 |             ],
154 |             "properties": {
155 |                 "Node name for S&R": "BiRefNet"
156 |             },
157 |             "widgets_values": [
158 |                 "ZhengPeng7/BiRefNet"
159 |             ]
160 |         },
161 |         {
162 |             "id": 8,
163 |             "type": "LoadImage",
164 |             "pos": [
165 |                 940.2247314453125,
166 |                 -300.4877014160156
167 |             ],
168 |             "size": [
169 |                 315,
170 |                 314
171 |             ],
172 |             "flags": {},
173 |             "order": 2,
174 |             "mode": 0,
175 |             "inputs": [],
176 |             "outputs": [
177 |                 {
178 |                     "name": "IMAGE",
179 |                     "type": "IMAGE",
180 |                     "links": [
181 |                         7
182 |                     ],
183 |                     "slot_index": 0
184 |                 },
185 |                 {
186 |                     "name": "MASK",
187 |                     "type": "MASK",
188 |                     "links": null
189 |                 }
190 |             ],
191 |             "properties": {
192 |                 "Node name for S&R": "LoadImage"
193 |             },
194 |             "widgets_values": [
195 |                 "已移除背景的image (1).jpeg",
196 |                 "image"
197 |             ]
198 |         },
199 |         {
200 |             "id": 10,
201 |             "type": "PreviewImage",
202 |             "pos": [
203 |                 1337.1131591796875,
204 |                 -263.8614501953125
205 |             ],
206 |             "size": [
207 |                 313.3982849121094,
208 |                 246
209 |             ],
210 |             "flags": {},
211 |             "order": 6,
212 |             "mode": 0,
213 |             "inputs": [
214 |                 {
215 |                     "name": "images",
216 |                     "type": "IMAGE",
217 |                     "link": 8
218 |                 }
219 |             ],
220 |             "outputs": [],
221 |             "properties": {
222 |                 "Node name for S&R": "PreviewImage"
223 |             },
224 |             "widgets_values": []
225 |         },
226 |         {
227 |             "id": 9,
228 |             "type": "ImagePreprocessor",
229 |             "pos": [
230 |                 944.402099609375,
231 |                 75.06153869628906
232 |             ],
233 |             "size": [
234 |                 315,
235 |                 102
236 |             ],
237 |             "flags": {},
238 |             "order": 4,
239 |             "mode": 0,
240 |             "inputs": [
241 |                 {
242 |                     "name": "remove_bg_fn",
243 |                     "type": "FUNCTION",
244 |                     "link": 6
245 |                 },
246 |                 {
247 |                     "name": "image",
248 |                     "type": "IMAGE",
249 |                     "link": 7
250 |                 }
251 |             ],
252 |             "outputs": [
253 |                 {
254 |                     "name": "IMAGE",
255 |                     "type": "IMAGE",
256 |                     "links": [
257 |                         8,
258 |                         9
259 |                     ],
260 |                     "slot_index": 0
261 |                 }
262 |             ],
263 |             "properties": {
264 |                 "Node name for S&R": "ImagePreprocessor"
265 |             },
266 |             "widgets_values": [
267 |                 768,
268 |                 768
269 |             ]
270 |         },
271 |         {
272 |             "id": 7,
273 |             "type": "DiffusersMVSampler",
274 |             "pos": [
275 |                 1324.947265625,
276 |                 70.82652282714844
277 |             ],
278 |             "size": [
279 |                 400,
280 |                 314
281 |             ],
282 |             "flags": {},
283 |             "order": 8,
284 |             "mode": 0,
285 |             "inputs": [
286 |                 {
287 |                     "name": "pipeline",
288 |                     "type": "PIPELINE",
289 |                     "link": 5
290 |                 },
291 |                 {
292 |                     "name": "reference_image",
293 |                     "type": "IMAGE",
294 |                     "link": 9,
295 |                     "shape": 7
296 |                 }
297 |             ],
298 |             "outputs": [
299 |                 {
300 |                     "name": "IMAGE",
301 |                     "type": "IMAGE",
302 |                     "links": [
303 |                         10
304 |                     ],
305 |                     "slot_index": 0
306 |                 }
307 |             ],
308 |             "properties": {
309 |                 "Node name for S&R": "DiffusersMVSampler"
310 |             },
311 |             "widgets_values": [
312 |                 6,
313 |                 "A decorative figurine of a young anime-style girl",
314 |                 "watermark, ugly, deformed, noisy, blurry, low contrast",
315 |                 768,
316 |                 768,
317 |                 50,
318 |                 3,
319 |                 490054611146870,
320 |                 "randomize"
321 |             ]
322 |         },
323 |         {
324 |             "id": 11,
325 |             "type": "PreviewImage",
326 |             "pos": [
327 |                 1778.79638671875,
328 |                 -213.63694763183594
329 |             ],
330 |             "size": [
331 |                 365.73077392578125,
332 |                 534.254150390625
333 |             ],
334 |             "flags": {},
335 |             "order": 9,
336 |             "mode": 0,
337 |             "inputs": [
338 |                 {
339 |                     "name": "images",
340 |                     "type": "IMAGE",
341 |                     "link": 10
342 |                 }
343 |             ],
344 |             "outputs": [],
345 |             "properties": {
346 |                 "Node name for S&R": "PreviewImage"
347 |             },
348 |             "widgets_values": []
349 |         },
350 |         {
351 |             "id": 1,
352 |             "type": "DiffusersMVPipelineLoader",
353 |             "pos": [
354 |                 519.635498046875,
355 |                 -73.85352325439453
356 |             ],
357 |             "size": [
358 |                 315,
359 |                 122
360 |             ],
361 |             "flags": {},
362 |             "order": 3,
363 |             "mode": 0,
364 |             "inputs": [],
365 |             "outputs": [
366 |                 {
367 |                     "name": "PIPELINE",
368 |                     "type": "PIPELINE",
369 |                     "links": [
370 |                         1,
371 |                         2
372 |                     ],
373 |                     "slot_index": 0
374 |                 },
375 |                 {
376 |                     "name": "AUTOENCODER",
377 |                     "type": "AUTOENCODER",
378 |                     "links": null
379 |                 },
380 |                 {
381 |                     "name": "SCHEDULER",
382 |                     "type": "SCHEDULER",
383 |                     "links": null
384 |                 }
385 |             ],
386 |             "properties": {
387 |                 "Node name for S&R": "DiffusersMVPipelineLoader"
388 |             },
389 |             "widgets_values": [
390 |                 "stabilityai/stable-diffusion-xl-base-1.0",
391 |                 "MVAdapterI2MVSDXLPipeline"
392 |             ]
393 |         }
394 |     ],
395 |     "links": [
396 |         [
397 |             1,
398 |             1,
399 |             0,
400 |             3,
401 |             0,
402 |             "PIPELINE"
403 |         ],
404 |         [
405 |             2,
406 |             1,
407 |             0,
408 |             6,
409 |             0,
410 |             "PIPELINE"
411 |         ],
412 |         [
413 |             3,
414 |             3,
415 |             0,
416 |             6,
417 |             1,
418 |             "SCHEDULER"
419 |         ],
420 |         [
421 |             4,
422 |             5,
423 |             0,
424 |             6,
425 |             2,
426 |             "AUTOENCODER"
427 |         ],
428 |         [
429 |             5,
430 |             6,
431 |             0,
432 |             7,
433 |             0,
434 |             "PIPELINE"
435 |         ],
436 |         [
437 |             6,
438 |             2,
439 |             0,
440 |             9,
441 |             0,
442 |             "FUNCTION"
443 |         ],
444 |         [
445 |             7,
446 |             8,
447 |             0,
448 |             9,
449 |             1,
450 |             "IMAGE"
451 |         ],
452 |         [
453 |             8,
454 |             9,
455 |             0,
456 |             10,
457 |             0,
458 |             "IMAGE"
459 |         ],
460 |         [
461 |             9,
462 |             9,
463 |             0,
464 |             7,
465 |             1,
466 |             "IMAGE"
467 |         ],
468 |         [
469 |             10,
470 |             7,
471 |             0,
472 |             11,
473 |             0,
474 |             "IMAGE"
475 |         ]
476 |     ],
477 |     "groups": [],
478 |     "config": {},
479 |     "extra": {
480 |         "ds": {
481 |             "scale": 0.8264462809917354,
482 |             "offset": [
483 |                 -46.02437931617331,
484 |                 392.2111603041893
485 |             ]
486 |         }
487 |     },
488 |     "version": 0.4
489 | }


--------------------------------------------------------------------------------
/workflows/i2mv_sdxl_ldm.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "last_node_id": 11,
  3 |     "last_link_id": 10,
  4 |     "nodes": [
  5 |         {
  6 |             "id": 3,
  7 |             "type": "LdmVaeLoader",
  8 |             "pos": [
  9 |                 460.4966125488281,
 10 |                 490.3278503417969
 11 |             ],
 12 |             "size": [
 13 |                 315,
 14 |                 58
 15 |             ],
 16 |             "flags": {},
 17 |             "order": 0,
 18 |             "mode": 0,
 19 |             "inputs": [],
 20 |             "outputs": [
 21 |                 {
 22 |                     "name": "AUTOENCODER",
 23 |                     "type": "AUTOENCODER",
 24 |                     "links": [
 25 |                         4
 26 |                     ],
 27 |                     "slot_index": 0
 28 |                 }
 29 |             ],
 30 |             "properties": {
 31 |                 "Node name for S&R": "LdmVaeLoader"
 32 |             },
 33 |             "widgets_values": [
 34 |                 "sdxl_vae.safetensors",
 35 |                 true
 36 |             ]
 37 |         },
 38 |         {
 39 |             "id": 4,
 40 |             "type": "DiffusersMVModelMakeup",
 41 |             "pos": [
 42 |                 873.04052734375,
 43 |                 378.09552001953125
 44 |             ],
 45 |             "size": [
 46 |                 315,
 47 |                 170
 48 |             ],
 49 |             "flags": {},
 50 |             "order": 6,
 51 |             "mode": 0,
 52 |             "inputs": [
 53 |                 {
 54 |                     "name": "pipeline",
 55 |                     "type": "PIPELINE",
 56 |                     "link": 2
 57 |                 },
 58 |                 {
 59 |                     "name": "scheduler",
 60 |                     "type": "SCHEDULER",
 61 |                     "link": 3
 62 |                 },
 63 |                 {
 64 |                     "name": "autoencoder",
 65 |                     "type": "AUTOENCODER",
 66 |                     "link": 4
 67 |                 }
 68 |             ],
 69 |             "outputs": [
 70 |                 {
 71 |                     "name": "PIPELINE",
 72 |                     "type": "PIPELINE",
 73 |                     "links": [
 74 |                         5
 75 |                     ],
 76 |                     "slot_index": 0
 77 |                 }
 78 |             ],
 79 |             "properties": {
 80 |                 "Node name for S&R": "DiffusersMVModelMakeup"
 81 |             },
 82 |             "widgets_values": [
 83 |                 true,
 84 |                 "huanngzh/mv-adapter",
 85 |                 "mvadapter_i2mv_sdxl.safetensors",
 86 |                 6
 87 |             ]
 88 |         },
 89 |         {
 90 |             "id": 1,
 91 |             "type": "LdmPipelineLoader",
 92 |             "pos": [
 93 |                 459.6553649902344,
 94 |                 47.24098205566406
 95 |             ],
 96 |             "size": [
 97 |                 315,
 98 |                 122
 99 |             ],
100 |             "flags": {},
101 |             "order": 1,
102 |             "mode": 0,
103 |             "inputs": [],
104 |             "outputs": [
105 |                 {
106 |                     "name": "PIPELINE",
107 |                     "type": "PIPELINE",
108 |                     "links": [
109 |                         1,
110 |                         2
111 |                     ],
112 |                     "slot_index": 0
113 |                 },
114 |                 {
115 |                     "name": "AUTOENCODER",
116 |                     "type": "AUTOENCODER",
117 |                     "links": null
118 |                 },
119 |                 {
120 |                     "name": "SCHEDULER",
121 |                     "type": "SCHEDULER",
122 |                     "links": null
123 |                 }
124 |             ],
125 |             "properties": {
126 |                 "Node name for S&R": "LdmPipelineLoader"
127 |             },
128 |             "widgets_values": [
129 |                 "sd_xl_base_1.0.safetensors",
130 |                 "MVAdapterI2MVSDXLPipeline"
131 |             ]
132 |         },
133 |         {
134 |             "id": 2,
135 |             "type": "DiffusersMVSchedulerLoader",
136 |             "pos": [
137 |                 452.2912292480469,
138 |                 260.1961975097656
139 |             ],
140 |             "size": [
141 |                 327.5999755859375,
142 |                 130
143 |             ],
144 |             "flags": {},
145 |             "order": 4,
146 |             "mode": 0,
147 |             "inputs": [
148 |                 {
149 |                     "name": "pipeline",
150 |                     "type": "PIPELINE",
151 |                     "link": 1
152 |                 }
153 |             ],
154 |             "outputs": [
155 |                 {
156 |                     "name": "SCHEDULER",
157 |                     "type": "SCHEDULER",
158 |                     "links": [
159 |                         3
160 |                     ],
161 |                     "slot_index": 0
162 |                 }
163 |             ],
164 |             "properties": {
165 |                 "Node name for S&R": "DiffusersMVSchedulerLoader"
166 |             },
167 |             "widgets_values": [
168 |                 "DDPM",
169 |                 true,
170 |                 "interpolated",
171 |                 8
172 |             ]
173 |         },
174 |         {
175 |             "id": 9,
176 |             "type": "BiRefNet",
177 |             "pos": [
178 |                 461.276123046875,
179 |                 -98.90441131591797
180 |             ],
181 |             "size": [
182 |                 315,
183 |                 58
184 |             ],
185 |             "flags": {},
186 |             "order": 2,
187 |             "mode": 0,
188 |             "inputs": [],
189 |             "outputs": [
190 |                 {
191 |                     "name": "FUNCTION",
192 |                     "type": "FUNCTION",
193 |                     "links": [
194 |                         6
195 |                     ],
196 |                     "slot_index": 0
197 |                 }
198 |             ],
199 |             "properties": {
200 |                 "Node name for S&R": "BiRefNet"
201 |             },
202 |             "widgets_values": [
203 |                 "ZhengPeng7/BiRefNet"
204 |             ]
205 |         },
206 |         {
207 |             "id": 8,
208 |             "type": "ImagePreprocessor",
209 |             "pos": [
210 |                 879.4166870117188,
211 |                 160.73989868164062
212 |             ],
213 |             "size": [
214 |                 315,
215 |                 102
216 |             ],
217 |             "flags": {},
218 |             "order": 5,
219 |             "mode": 0,
220 |             "inputs": [
221 |                 {
222 |                     "name": "remove_bg_fn",
223 |                     "type": "FUNCTION",
224 |                     "link": 6
225 |                 },
226 |                 {
227 |                     "name": "image",
228 |                     "type": "IMAGE",
229 |                     "link": 7
230 |                 }
231 |             ],
232 |             "outputs": [
233 |                 {
234 |                     "name": "IMAGE",
235 |                     "type": "IMAGE",
236 |                     "links": [
237 |                         8,
238 |                         9
239 |                     ],
240 |                     "slot_index": 0
241 |                 }
242 |             ],
243 |             "properties": {
244 |                 "Node name for S&R": "ImagePreprocessor"
245 |             },
246 |             "widgets_values": [
247 |                 768,
248 |                 768
249 |             ]
250 |         },
251 |         {
252 |             "id": 7,
253 |             "type": "LoadImage",
254 |             "pos": [
255 |                 873.844482421875,
256 |                 -214.40762329101562
257 |             ],
258 |             "size": [
259 |                 316.98516845703125,
260 |                 314
261 |             ],
262 |             "flags": {},
263 |             "order": 3,
264 |             "mode": 0,
265 |             "inputs": [],
266 |             "outputs": [
267 |                 {
268 |                     "name": "IMAGE",
269 |                     "type": "IMAGE",
270 |                     "links": [
271 |                         7
272 |                     ],
273 |                     "slot_index": 0
274 |                 },
275 |                 {
276 |                     "name": "MASK",
277 |                     "type": "MASK",
278 |                     "links": null
279 |                 }
280 |             ],
281 |             "properties": {
282 |                 "Node name for S&R": "LoadImage"
283 |             },
284 |             "widgets_values": [
285 |                 "已移除背景的image (1).jpeg",
286 |                 "image"
287 |             ]
288 |         },
289 |         {
290 |             "id": 10,
291 |             "type": "PreviewImage",
292 |             "pos": [
293 |                 1288.9661865234375,
294 |                 -175.12034606933594
295 |             ],
296 |             "size": [
297 |                 267.6073303222656,
298 |                 276.0325927734375
299 |             ],
300 |             "flags": {},
301 |             "order": 7,
302 |             "mode": 0,
303 |             "inputs": [
304 |                 {
305 |                     "name": "images",
306 |                     "type": "IMAGE",
307 |                     "link": 8
308 |                 }
309 |             ],
310 |             "outputs": [],
311 |             "properties": {
312 |                 "Node name for S&R": "PreviewImage"
313 |             },
314 |             "widgets_values": []
315 |         },
316 |         {
317 |             "id": 11,
318 |             "type": "PreviewImage",
319 |             "pos": [
320 |                 1719.0726318359375,
321 |                 -112.28995513916016
322 |             ],
323 |             "size": [
324 |                 390.2191162109375,
325 |                 614.5867919921875
326 |             ],
327 |             "flags": {},
328 |             "order": 9,
329 |             "mode": 0,
330 |             "inputs": [
331 |                 {
332 |                     "name": "images",
333 |                     "type": "IMAGE",
334 |                     "link": 10
335 |                 }
336 |             ],
337 |             "outputs": [],
338 |             "properties": {
339 |                 "Node name for S&R": "PreviewImage"
340 |             },
341 |             "widgets_values": []
342 |         },
343 |         {
344 |             "id": 6,
345 |             "type": "DiffusersMVSampler",
346 |             "pos": [
347 |                 1270.0447998046875,
348 |                 187.63858032226562
349 |             ],
350 |             "size": [
351 |                 398.4827880859375,
352 |                 355.830078125
353 |             ],
354 |             "flags": {},
355 |             "order": 8,
356 |             "mode": 0,
357 |             "inputs": [
358 |                 {
359 |                     "name": "pipeline",
360 |                     "type": "PIPELINE",
361 |                     "link": 5
362 |                 },
363 |                 {
364 |                     "name": "reference_image",
365 |                     "type": "IMAGE",
366 |                     "link": 9,
367 |                     "shape": 7
368 |                 }
369 |             ],
370 |             "outputs": [
371 |                 {
372 |                     "name": "IMAGE",
373 |                     "type": "IMAGE",
374 |                     "links": [
375 |                         10
376 |                     ],
377 |                     "slot_index": 0
378 |                 }
379 |             ],
380 |             "properties": {
381 |                 "Node name for S&R": "DiffusersMVSampler"
382 |             },
383 |             "widgets_values": [
384 |                 6,
385 |                 "A decorative figurine of a young anime-style girl",
386 |                 "watermark, ugly, deformed, noisy, blurry, low contrast",
387 |                 768,
388 |                 768,
389 |                 50,
390 |                 3,
391 |                 21,
392 |                 "fixed"
393 |             ]
394 |         }
395 |     ],
396 |     "links": [
397 |         [
398 |             1,
399 |             1,
400 |             0,
401 |             2,
402 |             0,
403 |             "PIPELINE"
404 |         ],
405 |         [
406 |             2,
407 |             1,
408 |             0,
409 |             4,
410 |             0,
411 |             "PIPELINE"
412 |         ],
413 |         [
414 |             3,
415 |             2,
416 |             0,
417 |             4,
418 |             1,
419 |             "SCHEDULER"
420 |         ],
421 |         [
422 |             4,
423 |             3,
424 |             0,
425 |             4,
426 |             2,
427 |             "AUTOENCODER"
428 |         ],
429 |         [
430 |             5,
431 |             4,
432 |             0,
433 |             6,
434 |             0,
435 |             "PIPELINE"
436 |         ],
437 |         [
438 |             6,
439 |             9,
440 |             0,
441 |             8,
442 |             0,
443 |             "FUNCTION"
444 |         ],
445 |         [
446 |             7,
447 |             7,
448 |             0,
449 |             8,
450 |             1,
451 |             "IMAGE"
452 |         ],
453 |         [
454 |             8,
455 |             8,
456 |             0,
457 |             10,
458 |             0,
459 |             "IMAGE"
460 |         ],
461 |         [
462 |             9,
463 |             8,
464 |             0,
465 |             6,
466 |             1,
467 |             "IMAGE"
468 |         ],
469 |         [
470 |             10,
471 |             6,
472 |             0,
473 |             11,
474 |             0,
475 |             "IMAGE"
476 |         ]
477 |     ],
478 |     "groups": [],
479 |     "config": {},
480 |     "extra": {
481 |         "ds": {
482 |             "scale": 0.8264462809917354,
483 |             "offset": [
484 |                 5.887456621326669,
485 |                 285.11670717918946
486 |             ]
487 |         }
488 |     },
489 |     "version": 0.4
490 | }


--------------------------------------------------------------------------------
/workflows/i2mv_sdxl_ldm_lora.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "last_node_id": 12,
  3 |     "last_link_id": 12,
  4 |     "nodes": [
  5 |         {
  6 |             "id": 3,
  7 |             "type": "LdmVaeLoader",
  8 |             "pos": [
  9 |                 460.4966125488281,
 10 |                 490.3278503417969
 11 |             ],
 12 |             "size": [
 13 |                 315,
 14 |                 82
 15 |             ],
 16 |             "flags": {},
 17 |             "order": 0,
 18 |             "mode": 0,
 19 |             "inputs": [],
 20 |             "outputs": [
 21 |                 {
 22 |                     "name": "AUTOENCODER",
 23 |                     "type": "AUTOENCODER",
 24 |                     "links": [
 25 |                         4
 26 |                     ],
 27 |                     "slot_index": 0
 28 |                 }
 29 |             ],
 30 |             "properties": {
 31 |                 "Node name for S&R": "LdmVaeLoader"
 32 |             },
 33 |             "widgets_values": [
 34 |                 "sdxl_vae.safetensors",
 35 |                 true
 36 |             ]
 37 |         },
 38 |         {
 39 |             "id": 1,
 40 |             "type": "LdmPipelineLoader",
 41 |             "pos": [
 42 |                 459.6553649902344,
 43 |                 47.24098205566406
 44 |             ],
 45 |             "size": [
 46 |                 315,
 47 |                 122
 48 |             ],
 49 |             "flags": {},
 50 |             "order": 1,
 51 |             "mode": 0,
 52 |             "inputs": [],
 53 |             "outputs": [
 54 |                 {
 55 |                     "name": "PIPELINE",
 56 |                     "type": "PIPELINE",
 57 |                     "links": [
 58 |                         1,
 59 |                         2
 60 |                     ],
 61 |                     "slot_index": 0
 62 |                 },
 63 |                 {
 64 |                     "name": "AUTOENCODER",
 65 |                     "type": "AUTOENCODER",
 66 |                     "links": null
 67 |                 },
 68 |                 {
 69 |                     "name": "SCHEDULER",
 70 |                     "type": "SCHEDULER",
 71 |                     "links": null
 72 |                 }
 73 |             ],
 74 |             "properties": {
 75 |                 "Node name for S&R": "LdmPipelineLoader"
 76 |             },
 77 |             "widgets_values": [
 78 |                 "sd_xl_base_1.0.safetensors",
 79 |                 "MVAdapterI2MVSDXLPipeline"
 80 |             ]
 81 |         },
 82 |         {
 83 |             "id": 2,
 84 |             "type": "DiffusersMVSchedulerLoader",
 85 |             "pos": [
 86 |                 452.2912292480469,
 87 |                 260.1961975097656
 88 |             ],
 89 |             "size": [
 90 |                 327.5999755859375,
 91 |                 130
 92 |             ],
 93 |             "flags": {},
 94 |             "order": 4,
 95 |             "mode": 0,
 96 |             "inputs": [
 97 |                 {
 98 |                     "name": "pipeline",
 99 |                     "type": "PIPELINE",
100 |                     "link": 1
101 |                 }
102 |             ],
103 |             "outputs": [
104 |                 {
105 |                     "name": "SCHEDULER",
106 |                     "type": "SCHEDULER",
107 |                     "links": [
108 |                         3
109 |                     ],
110 |                     "slot_index": 0
111 |                 }
112 |             ],
113 |             "properties": {
114 |                 "Node name for S&R": "DiffusersMVSchedulerLoader"
115 |             },
116 |             "widgets_values": [
117 |                 "DDPM",
118 |                 true,
119 |                 "interpolated",
120 |                 8
121 |             ]
122 |         },
123 |         {
124 |             "id": 9,
125 |             "type": "BiRefNet",
126 |             "pos": [
127 |                 461.276123046875,
128 |                 -98.90441131591797
129 |             ],
130 |             "size": [
131 |                 315,
132 |                 58
133 |             ],
134 |             "flags": {},
135 |             "order": 2,
136 |             "mode": 0,
137 |             "inputs": [],
138 |             "outputs": [
139 |                 {
140 |                     "name": "FUNCTION",
141 |                     "type": "FUNCTION",
142 |                     "links": [
143 |                         6
144 |                     ],
145 |                     "slot_index": 0
146 |                 }
147 |             ],
148 |             "properties": {
149 |                 "Node name for S&R": "BiRefNet"
150 |             },
151 |             "widgets_values": [
152 |                 "ZhengPeng7/BiRefNet"
153 |             ]
154 |         },
155 |         {
156 |             "id": 8,
157 |             "type": "ImagePreprocessor",
158 |             "pos": [
159 |                 879.4166870117188,
160 |                 160.73989868164062
161 |             ],
162 |             "size": [
163 |                 315,
164 |                 102
165 |             ],
166 |             "flags": {},
167 |             "order": 5,
168 |             "mode": 0,
169 |             "inputs": [
170 |                 {
171 |                     "name": "remove_bg_fn",
172 |                     "type": "FUNCTION",
173 |                     "link": 6
174 |                 },
175 |                 {
176 |                     "name": "image",
177 |                     "type": "IMAGE",
178 |                     "link": 7
179 |                 }
180 |             ],
181 |             "outputs": [
182 |                 {
183 |                     "name": "IMAGE",
184 |                     "type": "IMAGE",
185 |                     "links": [
186 |                         8,
187 |                         9
188 |                     ],
189 |                     "slot_index": 0
190 |                 }
191 |             ],
192 |             "properties": {
193 |                 "Node name for S&R": "ImagePreprocessor"
194 |             },
195 |             "widgets_values": [
196 |                 768,
197 |                 768
198 |             ]
199 |         },
200 |         {
201 |             "id": 7,
202 |             "type": "LoadImage",
203 |             "pos": [
204 |                 873.844482421875,
205 |                 -214.40762329101562
206 |             ],
207 |             "size": [
208 |                 316.98516845703125,
209 |                 314
210 |             ],
211 |             "flags": {},
212 |             "order": 3,
213 |             "mode": 0,
214 |             "inputs": [],
215 |             "outputs": [
216 |                 {
217 |                     "name": "IMAGE",
218 |                     "type": "IMAGE",
219 |                     "links": [
220 |                         7
221 |                     ],
222 |                     "slot_index": 0
223 |                 },
224 |                 {
225 |                     "name": "MASK",
226 |                     "type": "MASK",
227 |                     "links": null
228 |                 }
229 |             ],
230 |             "properties": {
231 |                 "Node name for S&R": "LoadImage"
232 |             },
233 |             "widgets_values": [
234 |                 "已移除背景的image (1).jpeg",
235 |                 "image"
236 |             ]
237 |         },
238 |         {
239 |             "id": 11,
240 |             "type": "PreviewImage",
241 |             "pos": [
242 |                 1987.6944580078125,
243 |                 -87.29558563232422
244 |             ],
245 |             "size": [
246 |                 390.2191162109375,
247 |                 614.5867919921875
248 |             ],
249 |             "flags": {},
250 |             "order": 10,
251 |             "mode": 0,
252 |             "inputs": [
253 |                 {
254 |                     "name": "images",
255 |                     "type": "IMAGE",
256 |                     "link": 10
257 |                 }
258 |             ],
259 |             "outputs": [],
260 |             "properties": {
261 |                 "Node name for S&R": "PreviewImage"
262 |             },
263 |             "widgets_values": []
264 |         },
265 |         {
266 |             "id": 10,
267 |             "type": "PreviewImage",
268 |             "pos": [
269 |                 1395.756591796875,
270 |                 -179.6309814453125
271 |             ],
272 |             "size": [
273 |                 267.6073303222656,
274 |                 276.0325927734375
275 |             ],
276 |             "flags": {},
277 |             "order": 7,
278 |             "mode": 0,
279 |             "inputs": [
280 |                 {
281 |                     "name": "images",
282 |                     "type": "IMAGE",
283 |                     "link": 8
284 |                 }
285 |             ],
286 |             "outputs": [],
287 |             "properties": {
288 |                 "Node name for S&R": "PreviewImage"
289 |             },
290 |             "widgets_values": []
291 |         },
292 |         {
293 |             "id": 4,
294 |             "type": "DiffusersMVModelMakeup",
295 |             "pos": [
296 |                 873.04052734375,
297 |                 378.09552001953125
298 |             ],
299 |             "size": [
300 |                 315,
301 |                 194
302 |             ],
303 |             "flags": {},
304 |             "order": 6,
305 |             "mode": 0,
306 |             "inputs": [
307 |                 {
308 |                     "name": "pipeline",
309 |                     "type": "PIPELINE",
310 |                     "link": 2
311 |                 },
312 |                 {
313 |                     "name": "scheduler",
314 |                     "type": "SCHEDULER",
315 |                     "link": 3
316 |                 },
317 |                 {
318 |                     "name": "autoencoder",
319 |                     "type": "AUTOENCODER",
320 |                     "link": 4
321 |                 }
322 |             ],
323 |             "outputs": [
324 |                 {
325 |                     "name": "PIPELINE",
326 |                     "type": "PIPELINE",
327 |                     "links": [
328 |                         11
329 |                     ],
330 |                     "slot_index": 0
331 |                 }
332 |             ],
333 |             "properties": {
334 |                 "Node name for S&R": "DiffusersMVModelMakeup"
335 |             },
336 |             "widgets_values": [
337 |                 true,
338 |                 "huanngzh/mv-adapter",
339 |                 "mvadapter_i2mv_sdxl.safetensors",
340 |                 6,
341 |                 true
342 |             ]
343 |         },
344 |         {
345 |             "id": 12,
346 |             "type": "LoraModelLoader",
347 |             "pos": [
348 |                 1206.2666015625,
349 |                 348.32861328125
350 |             ],
351 |             "size": [
352 |                 315,
353 |                 82
354 |             ],
355 |             "flags": {},
356 |             "order": 8,
357 |             "mode": 0,
358 |             "inputs": [
359 |                 {
360 |                     "name": "pipeline",
361 |                     "type": "PIPELINE",
362 |                     "link": 11
363 |                 }
364 |             ],
365 |             "outputs": [
366 |                 {
367 |                     "name": "PIPELINE",
368 |                     "type": "PIPELINE",
369 |                     "links": [
370 |                         12
371 |                     ],
372 |                     "slot_index": 0
373 |                 }
374 |             ],
375 |             "properties": {
376 |                 "Node name for S&R": "LoraModelLoader"
377 |             },
378 |             "widgets_values": [
379 |                 "3d_render_style_xl.safetensors",
380 |                 1
381 |             ]
382 |         },
383 |         {
384 |             "id": 6,
385 |             "type": "DiffusersMVSampler",
386 |             "pos": [
387 |                 1545.4605712890625,
388 |                 165.60733032226562
389 |             ],
390 |             "size": [
391 |                 398.4827880859375,
392 |                 355.830078125
393 |             ],
394 |             "flags": {},
395 |             "order": 9,
396 |             "mode": 0,
397 |             "inputs": [
398 |                 {
399 |                     "name": "pipeline",
400 |                     "type": "PIPELINE",
401 |                     "link": 12
402 |                 },
403 |                 {
404 |                     "name": "reference_image",
405 |                     "type": "IMAGE",
406 |                     "link": 9,
407 |                     "shape": 7
408 |                 }
409 |             ],
410 |             "outputs": [
411 |                 {
412 |                     "name": "IMAGE",
413 |                     "type": "IMAGE",
414 |                     "links": [
415 |                         10
416 |                     ],
417 |                     "slot_index": 0
418 |                 }
419 |             ],
420 |             "properties": {
421 |                 "Node name for S&R": "DiffusersMVSampler"
422 |             },
423 |             "widgets_values": [
424 |                 6,
425 |                 "3d style, A decorative figurine of a young anime-style girl",
426 |                 "watermark, ugly, deformed, noisy, blurry, low contrast",
427 |                 768,
428 |                 768,
429 |                 50,
430 |                 3,
431 |                 21,
432 |                 "fixed"
433 |             ]
434 |         }
435 |     ],
436 |     "links": [
437 |         [
438 |             1,
439 |             1,
440 |             0,
441 |             2,
442 |             0,
443 |             "PIPELINE"
444 |         ],
445 |         [
446 |             2,
447 |             1,
448 |             0,
449 |             4,
450 |             0,
451 |             "PIPELINE"
452 |         ],
453 |         [
454 |             3,
455 |             2,
456 |             0,
457 |             4,
458 |             1,
459 |             "SCHEDULER"
460 |         ],
461 |         [
462 |             4,
463 |             3,
464 |             0,
465 |             4,
466 |             2,
467 |             "AUTOENCODER"
468 |         ],
469 |         [
470 |             6,
471 |             9,
472 |             0,
473 |             8,
474 |             0,
475 |             "FUNCTION"
476 |         ],
477 |         [
478 |             7,
479 |             7,
480 |             0,
481 |             8,
482 |             1,
483 |             "IMAGE"
484 |         ],
485 |         [
486 |             8,
487 |             8,
488 |             0,
489 |             10,
490 |             0,
491 |             "IMAGE"
492 |         ],
493 |         [
494 |             9,
495 |             8,
496 |             0,
497 |             6,
498 |             1,
499 |             "IMAGE"
500 |         ],
501 |         [
502 |             10,
503 |             6,
504 |             0,
505 |             11,
506 |             0,
507 |             "IMAGE"
508 |         ],
509 |         [
510 |             11,
511 |             4,
512 |             0,
513 |             12,
514 |             0,
515 |             "PIPELINE"
516 |         ],
517 |         [
518 |             12,
519 |             12,
520 |             0,
521 |             6,
522 |             0,
523 |             "PIPELINE"
524 |         ]
525 |     ],
526 |     "groups": [],
527 |     "config": {},
528 |     "extra": {
529 |         "ds": {
530 |             "scale": 0.620921323059155,
531 |             "offset": [
532 |                 -328.02932510914184,
533 |                 300.9334967924711
534 |             ]
535 |         }
536 |     },
537 |     "version": 0.4
538 | }


--------------------------------------------------------------------------------
/workflows/i2mv_sdxl_ldm_view_selector.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "last_node_id": 12,
  3 |     "last_link_id": 11,
  4 |     "nodes": [
  5 |         {
  6 |             "id": 3,
  7 |             "type": "LdmVaeLoader",
  8 |             "pos": [
  9 |                 460.4966125488281,
 10 |                 490.3278503417969
 11 |             ],
 12 |             "size": [
 13 |                 315,
 14 |                 82
 15 |             ],
 16 |             "flags": {},
 17 |             "order": 0,
 18 |             "mode": 0,
 19 |             "inputs": [],
 20 |             "outputs": [
 21 |                 {
 22 |                     "name": "AUTOENCODER",
 23 |                     "type": "AUTOENCODER",
 24 |                     "links": [
 25 |                         4
 26 |                     ],
 27 |                     "slot_index": 0
 28 |                 }
 29 |             ],
 30 |             "properties": {
 31 |                 "Node name for S&R": "LdmVaeLoader"
 32 |             },
 33 |             "widgets_values": [
 34 |                 "sdxl_vae.safetensors",
 35 |                 true
 36 |             ]
 37 |         },
 38 |         {
 39 |             "id": 2,
 40 |             "type": "DiffusersMVSchedulerLoader",
 41 |             "pos": [
 42 |                 452.2912292480469,
 43 |                 260.1961975097656
 44 |             ],
 45 |             "size": [
 46 |                 327.5999755859375,
 47 |                 130
 48 |             ],
 49 |             "flags": {},
 50 |             "order": 6,
 51 |             "mode": 0,
 52 |             "inputs": [
 53 |                 {
 54 |                     "name": "pipeline",
 55 |                     "type": "PIPELINE",
 56 |                     "link": 1
 57 |                 }
 58 |             ],
 59 |             "outputs": [
 60 |                 {
 61 |                     "name": "SCHEDULER",
 62 |                     "type": "SCHEDULER",
 63 |                     "links": [
 64 |                         3
 65 |                     ],
 66 |                     "slot_index": 0
 67 |                 }
 68 |             ],
 69 |             "properties": {
 70 |                 "Node name for S&R": "DiffusersMVSchedulerLoader"
 71 |             },
 72 |             "widgets_values": [
 73 |                 "DDPM",
 74 |                 true,
 75 |                 "interpolated",
 76 |                 8
 77 |             ]
 78 |         },
 79 |         {
 80 |             "id": 9,
 81 |             "type": "BiRefNet",
 82 |             "pos": [
 83 |                 461.276123046875,
 84 |                 -98.90441131591797
 85 |             ],
 86 |             "size": [
 87 |                 315,
 88 |                 58
 89 |             ],
 90 |             "flags": {},
 91 |             "order": 1,
 92 |             "mode": 0,
 93 |             "inputs": [],
 94 |             "outputs": [
 95 |                 {
 96 |                     "name": "FUNCTION",
 97 |                     "type": "FUNCTION",
 98 |                     "links": [
 99 |                         6
100 |                     ],
101 |                     "slot_index": 0
102 |                 }
103 |             ],
104 |             "properties": {
105 |                 "Node name for S&R": "BiRefNet"
106 |             },
107 |             "widgets_values": [
108 |                 "ZhengPeng7/BiRefNet"
109 |             ]
110 |         },
111 |         {
112 |             "id": 8,
113 |             "type": "ImagePreprocessor",
114 |             "pos": [
115 |                 879.4166870117188,
116 |                 160.73989868164062
117 |             ],
118 |             "size": [
119 |                 315,
120 |                 102
121 |             ],
122 |             "flags": {},
123 |             "order": 5,
124 |             "mode": 0,
125 |             "inputs": [
126 |                 {
127 |                     "name": "remove_bg_fn",
128 |                     "type": "FUNCTION",
129 |                     "link": 6
130 |                 },
131 |                 {
132 |                     "name": "image",
133 |                     "type": "IMAGE",
134 |                     "link": 7
135 |                 }
136 |             ],
137 |             "outputs": [
138 |                 {
139 |                     "name": "IMAGE",
140 |                     "type": "IMAGE",
141 |                     "links": [
142 |                         8,
143 |                         9
144 |                     ],
145 |                     "slot_index": 0
146 |                 }
147 |             ],
148 |             "properties": {
149 |                 "Node name for S&R": "ImagePreprocessor"
150 |             },
151 |             "widgets_values": [
152 |                 768,
153 |                 768
154 |             ]
155 |         },
156 |         {
157 |             "id": 7,
158 |             "type": "LoadImage",
159 |             "pos": [
160 |                 873.844482421875,
161 |                 -214.40762329101562
162 |             ],
163 |             "size": [
164 |                 316.98516845703125,
165 |                 314
166 |             ],
167 |             "flags": {},
168 |             "order": 2,
169 |             "mode": 0,
170 |             "inputs": [],
171 |             "outputs": [
172 |                 {
173 |                     "name": "IMAGE",
174 |                     "type": "IMAGE",
175 |                     "links": [
176 |                         7
177 |                     ],
178 |                     "slot_index": 0
179 |                 },
180 |                 {
181 |                     "name": "MASK",
182 |                     "type": "MASK",
183 |                     "links": null
184 |                 }
185 |             ],
186 |             "properties": {
187 |                 "Node name for S&R": "LoadImage"
188 |             },
189 |             "widgets_values": [
190 |                 "已移除背景的image (1).jpeg",
191 |                 "image"
192 |             ]
193 |         },
194 |         {
195 |             "id": 10,
196 |             "type": "PreviewImage",
197 |             "pos": [
198 |                 1288.9661865234375,
199 |                 -175.12034606933594
200 |             ],
201 |             "size": [
202 |                 267.6073303222656,
203 |                 276.0325927734375
204 |             ],
205 |             "flags": {},
206 |             "order": 7,
207 |             "mode": 0,
208 |             "inputs": [
209 |                 {
210 |                     "name": "images",
211 |                     "type": "IMAGE",
212 |                     "link": 8
213 |                 }
214 |             ],
215 |             "outputs": [],
216 |             "properties": {
217 |                 "Node name for S&R": "PreviewImage"
218 |             },
219 |             "widgets_values": []
220 |         },
221 |         {
222 |             "id": 1,
223 |             "type": "LdmPipelineLoader",
224 |             "pos": [
225 |                 459.6553649902344,
226 |                 47.24098205566406
227 |             ],
228 |             "size": [
229 |                 315,
230 |                 122
231 |             ],
232 |             "flags": {},
233 |             "order": 3,
234 |             "mode": 0,
235 |             "inputs": [],
236 |             "outputs": [
237 |                 {
238 |                     "name": "PIPELINE",
239 |                     "type": "PIPELINE",
240 |                     "links": [
241 |                         1,
242 |                         2
243 |                     ],
244 |                     "slot_index": 0
245 |                 },
246 |                 {
247 |                     "name": "AUTOENCODER",
248 |                     "type": "AUTOENCODER",
249 |                     "links": null
250 |                 },
251 |                 {
252 |                     "name": "SCHEDULER",
253 |                     "type": "SCHEDULER",
254 |                     "links": null
255 |                 }
256 |             ],
257 |             "properties": {
258 |                 "Node name for S&R": "LdmPipelineLoader"
259 |             },
260 |             "widgets_values": [
261 |                 "sd_xl_base_1.0.safetensors",
262 |                 "MVAdapterI2MVSDXLPipeline"
263 |             ]
264 |         },
265 |         {
266 |             "id": 12,
267 |             "type": "ViewSelector",
268 |             "pos": [
269 |                 1219.32373046875,
270 |                 335.45733642578125
271 |             ],
272 |             "size": [
273 |                 315,
274 |                 178
275 |             ],
276 |             "flags": {},
277 |             "order": 4,
278 |             "mode": 0,
279 |             "inputs": [],
280 |             "outputs": [
281 |                 {
282 |                     "name": "LIST",
283 |                     "type": "LIST",
284 |                     "links": [
285 |                         11
286 |                     ],
287 |                     "slot_index": 0
288 |                 }
289 |             ],
290 |             "properties": {
291 |                 "Node name for S&R": "ViewSelector"
292 |             },
293 |             "widgets_values": [
294 |                 true,
295 |                 false,
296 |                 true,
297 |                 true,
298 |                 false,
299 |                 false
300 |             ]
301 |         },
302 |         {
303 |             "id": 11,
304 |             "type": "PreviewImage",
305 |             "pos": [
306 |                 2021.5838623046875,
307 |                 -108.2677001953125
308 |             ],
309 |             "size": [
310 |                 390.2191162109375,
311 |                 614.5867919921875
312 |             ],
313 |             "flags": {},
314 |             "order": 10,
315 |             "mode": 0,
316 |             "inputs": [
317 |                 {
318 |                     "name": "images",
319 |                     "type": "IMAGE",
320 |                     "link": 10
321 |                 }
322 |             ],
323 |             "outputs": [],
324 |             "properties": {
325 |                 "Node name for S&R": "PreviewImage"
326 |             },
327 |             "widgets_values": []
328 |         },
329 |         {
330 |             "id": 4,
331 |             "type": "DiffusersMVModelMakeup",
332 |             "pos": [
333 |                 844.7613525390625,
334 |                 364.16156005859375
335 |             ],
336 |             "size": [
337 |                 350.9596862792969,
338 |                 218
339 |             ],
340 |             "flags": {},
341 |             "order": 8,
342 |             "mode": 0,
343 |             "inputs": [
344 |                 {
345 |                     "name": "pipeline",
346 |                     "type": "PIPELINE",
347 |                     "link": 2
348 |                 },
349 |                 {
350 |                     "name": "scheduler",
351 |                     "type": "SCHEDULER",
352 |                     "link": 3
353 |                 },
354 |                 {
355 |                     "name": "autoencoder",
356 |                     "type": "AUTOENCODER",
357 |                     "link": 4
358 |                 }
359 |             ],
360 |             "outputs": [
361 |                 {
362 |                     "name": "PIPELINE",
363 |                     "type": "PIPELINE",
364 |                     "links": [
365 |                         5
366 |                     ],
367 |                     "slot_index": 0
368 |                 }
369 |             ],
370 |             "properties": {
371 |                 "Node name for S&R": "DiffusersMVModelMakeup"
372 |             },
373 |             "widgets_values": [
374 |                 true,
375 |                 "huanngzh/mv-adapter",
376 |                 "mvadapter_i2mv_sdxl_beta.safetensors",
377 |                 6,
378 |                 true,
379 |                 false
380 |             ]
381 |         },
382 |         {
383 |             "id": 6,
384 |             "type": "DiffusersMVSampler",
385 |             "pos": [
386 |                 1561.8929443359375,
387 |                 158.94821166992188
388 |             ],
389 |             "size": [
390 |                 398.4827880859375,
391 |                 378
392 |             ],
393 |             "flags": {},
394 |             "order": 9,
395 |             "mode": 0,
396 |             "inputs": [
397 |                 {
398 |                     "name": "pipeline",
399 |                     "type": "PIPELINE",
400 |                     "link": 5
401 |                 },
402 |                 {
403 |                     "name": "reference_image",
404 |                     "type": "IMAGE",
405 |                     "link": 9,
406 |                     "shape": 7
407 |                 },
408 |                 {
409 |                     "name": "controlnet_image",
410 |                     "type": "IMAGE",
411 |                     "link": null,
412 |                     "shape": 7
413 |                 },
414 |                 {
415 |                     "name": "azimuth_degrees",
416 |                     "type": "LIST",
417 |                     "link": 11,
418 |                     "shape": 7
419 |                 }
420 |             ],
421 |             "outputs": [
422 |                 {
423 |                     "name": "IMAGE",
424 |                     "type": "IMAGE",
425 |                     "links": [
426 |                         10
427 |                     ],
428 |                     "slot_index": 0
429 |                 }
430 |             ],
431 |             "properties": {
432 |                 "Node name for S&R": "DiffusersMVSampler"
433 |             },
434 |             "widgets_values": [
435 |                 6,
436 |                 "A decorative figurine of a young anime-style girl",
437 |                 "watermark, ugly, deformed, noisy, blurry, low contrast",
438 |                 768,
439 |                 768,
440 |                 50,
441 |                 3,
442 |                 0,
443 |                 "fixed",
444 |                 1
445 |             ]
446 |         }
447 |     ],
448 |     "links": [
449 |         [
450 |             1,
451 |             1,
452 |             0,
453 |             2,
454 |             0,
455 |             "PIPELINE"
456 |         ],
457 |         [
458 |             2,
459 |             1,
460 |             0,
461 |             4,
462 |             0,
463 |             "PIPELINE"
464 |         ],
465 |         [
466 |             3,
467 |             2,
468 |             0,
469 |             4,
470 |             1,
471 |             "SCHEDULER"
472 |         ],
473 |         [
474 |             4,
475 |             3,
476 |             0,
477 |             4,
478 |             2,
479 |             "AUTOENCODER"
480 |         ],
481 |         [
482 |             5,
483 |             4,
484 |             0,
485 |             6,
486 |             0,
487 |             "PIPELINE"
488 |         ],
489 |         [
490 |             6,
491 |             9,
492 |             0,
493 |             8,
494 |             0,
495 |             "FUNCTION"
496 |         ],
497 |         [
498 |             7,
499 |             7,
500 |             0,
501 |             8,
502 |             1,
503 |             "IMAGE"
504 |         ],
505 |         [
506 |             8,
507 |             8,
508 |             0,
509 |             10,
510 |             0,
511 |             "IMAGE"
512 |         ],
513 |         [
514 |             9,
515 |             8,
516 |             0,
517 |             6,
518 |             1,
519 |             "IMAGE"
520 |         ],
521 |         [
522 |             10,
523 |             6,
524 |             0,
525 |             11,
526 |             0,
527 |             "IMAGE"
528 |         ],
529 |         [
530 |             11,
531 |             12,
532 |             0,
533 |             6,
534 |             3,
535 |             "LIST"
536 |         ]
537 |     ],
538 |     "groups": [],
539 |     "config": {},
540 |     "extra": {
541 |         "ds": {
542 |             "scale": 0.8264462809917354,
543 |             "offset": [
544 |                 -237.18891056617306,
545 |                 276.270472804189
546 |             ]
547 |         }
548 |     },
549 |     "version": 0.4
550 | }


--------------------------------------------------------------------------------
/workflows/t2mv_sdxl_diffusers.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "last_node_id": 7,
  3 |     "last_link_id": 6,
  4 |     "nodes": [
  5 |         {
  6 |             "id": 1,
  7 |             "type": "DiffusersMVPipelineLoader",
  8 |             "pos": [
  9 |                 324.3054504394531,
 10 |                 130.34339904785156
 11 |             ],
 12 |             "size": [
 13 |                 315,
 14 |                 122
 15 |             ],
 16 |             "flags": {},
 17 |             "order": 0,
 18 |             "mode": 0,
 19 |             "inputs": [],
 20 |             "outputs": [
 21 |                 {
 22 |                     "name": "PIPELINE",
 23 |                     "type": "PIPELINE",
 24 |                     "links": [
 25 |                         1,
 26 |                         2
 27 |                     ],
 28 |                     "slot_index": 0
 29 |                 },
 30 |                 {
 31 |                     "name": "AUTOENCODER",
 32 |                     "type": "AUTOENCODER",
 33 |                     "links": null
 34 |                 },
 35 |                 {
 36 |                     "name": "SCHEDULER",
 37 |                     "type": "SCHEDULER",
 38 |                     "links": null
 39 |                 }
 40 |             ],
 41 |             "properties": {
 42 |                 "Node name for S&R": "DiffusersMVPipelineLoader"
 43 |             },
 44 |             "widgets_values": [
 45 |                 "stabilityai/stable-diffusion-xl-base-1.0",
 46 |                 "MVAdapterT2MVSDXLPipeline"
 47 |             ]
 48 |         },
 49 |         {
 50 |             "id": 3,
 51 |             "type": "DiffusersMVSchedulerLoader",
 52 |             "pos": [
 53 |                 320.6045227050781,
 54 |                 323.5510559082031
 55 |             ],
 56 |             "size": [
 57 |                 327.5999755859375,
 58 |                 130
 59 |             ],
 60 |             "flags": {},
 61 |             "order": 2,
 62 |             "mode": 0,
 63 |             "inputs": [
 64 |                 {
 65 |                     "name": "pipeline",
 66 |                     "type": "PIPELINE",
 67 |                     "link": 1
 68 |                 }
 69 |             ],
 70 |             "outputs": [
 71 |                 {
 72 |                     "name": "SCHEDULER",
 73 |                     "type": "SCHEDULER",
 74 |                     "links": [
 75 |                         3
 76 |                     ],
 77 |                     "slot_index": 0
 78 |                 }
 79 |             ],
 80 |             "properties": {
 81 |                 "Node name for S&R": "DiffusersMVSchedulerLoader"
 82 |             },
 83 |             "widgets_values": [
 84 |                 "DDPM",
 85 |                 true,
 86 |                 "interpolated",
 87 |                 8
 88 |             ]
 89 |         },
 90 |         {
 91 |             "id": 2,
 92 |             "type": "DiffusersMVVaeLoader",
 93 |             "pos": [
 94 |                 328.6159362792969,
 95 |                 541.3416748046875
 96 |             ],
 97 |             "size": [
 98 |                 315,
 99 |                 58
100 |             ],
101 |             "flags": {},
102 |             "order": 1,
103 |             "mode": 0,
104 |             "inputs": [],
105 |             "outputs": [
106 |                 {
107 |                     "name": "AUTOENCODER",
108 |                     "type": "AUTOENCODER",
109 |                     "links": [
110 |                         4
111 |                     ],
112 |                     "slot_index": 0
113 |                 }
114 |             ],
115 |             "properties": {
116 |                 "Node name for S&R": "DiffusersMVVaeLoader"
117 |             },
118 |             "widgets_values": [
119 |                 "madebyollin/sdxl-vae-fp16-fix"
120 |             ]
121 |         },
122 |         {
123 |             "id": 4,
124 |             "type": "DiffusersMVModelMakeup",
125 |             "pos": [
126 |                 728.7667846679688,
127 |                 261.0943298339844
128 |             ],
129 |             "size": [
130 |                 315,
131 |                 170
132 |             ],
133 |             "flags": {},
134 |             "order": 3,
135 |             "mode": 0,
136 |             "inputs": [
137 |                 {
138 |                     "name": "pipeline",
139 |                     "type": "PIPELINE",
140 |                     "link": 2
141 |                 },
142 |                 {
143 |                     "name": "scheduler",
144 |                     "type": "SCHEDULER",
145 |                     "link": 3
146 |                 },
147 |                 {
148 |                     "name": "autoencoder",
149 |                     "type": "AUTOENCODER",
150 |                     "link": 4
151 |                 }
152 |             ],
153 |             "outputs": [
154 |                 {
155 |                     "name": "PIPELINE",
156 |                     "type": "PIPELINE",
157 |                     "links": [
158 |                         5
159 |                     ],
160 |                     "slot_index": 0
161 |                 }
162 |             ],
163 |             "properties": {
164 |                 "Node name for S&R": "DiffusersMVModelMakeup"
165 |             },
166 |             "widgets_values": [
167 |                 true,
168 |                 "huanngzh/mv-adapter",
169 |                 "mvadapter_t2mv_sdxl.safetensors",
170 |                 6
171 |             ]
172 |         },
173 |         {
174 |             "id": 6,
175 |             "type": "DiffusersMVSampler",
176 |             "pos": [
177 |                 1124.235595703125,
178 |                 195.65020751953125
179 |             ],
180 |             "size": [
181 |                 400,
182 |                 314
183 |             ],
184 |             "flags": {},
185 |             "order": 4,
186 |             "mode": 0,
187 |             "inputs": [
188 |                 {
189 |                     "name": "pipeline",
190 |                     "type": "PIPELINE",
191 |                     "link": 5
192 |                 },
193 |                 {
194 |                     "name": "reference_image",
195 |                     "type": "IMAGE",
196 |                     "link": null,
197 |                     "shape": 7
198 |                 }
199 |             ],
200 |             "outputs": [
201 |                 {
202 |                     "name": "IMAGE",
203 |                     "type": "IMAGE",
204 |                     "links": [
205 |                         6
206 |                     ],
207 |                     "slot_index": 0
208 |                 }
209 |             ],
210 |             "properties": {
211 |                 "Node name for S&R": "DiffusersMVSampler"
212 |             },
213 |             "widgets_values": [
214 |                 6,
215 |                 "an astronaut riding a horse",
216 |                 "watermark, ugly, deformed, noisy, blurry, low contrast",
217 |                 768,
218 |                 768,
219 |                 50,
220 |                 7,
221 |                 26340599063291,
222 |                 "randomize"
223 |             ]
224 |         },
225 |         {
226 |             "id": 7,
227 |             "type": "PreviewImage",
228 |             "pos": [
229 |                 1592.892822265625,
230 |                 111.47964477539062
231 |             ],
232 |             "size": [
233 |                 391.566162109375,
234 |                 532.7274780273438
235 |             ],
236 |             "flags": {},
237 |             "order": 5,
238 |             "mode": 0,
239 |             "inputs": [
240 |                 {
241 |                     "name": "images",
242 |                     "type": "IMAGE",
243 |                     "link": 6
244 |                 }
245 |             ],
246 |             "outputs": [],
247 |             "properties": {
248 |                 "Node name for S&R": "PreviewImage"
249 |             }
250 |         }
251 |     ],
252 |     "links": [
253 |         [
254 |             1,
255 |             1,
256 |             0,
257 |             3,
258 |             0,
259 |             "PIPELINE"
260 |         ],
261 |         [
262 |             2,
263 |             1,
264 |             0,
265 |             4,
266 |             0,
267 |             "PIPELINE"
268 |         ],
269 |         [
270 |             3,
271 |             3,
272 |             0,
273 |             4,
274 |             1,
275 |             "SCHEDULER"
276 |         ],
277 |         [
278 |             4,
279 |             2,
280 |             0,
281 |             4,
282 |             2,
283 |             "AUTOENCODER"
284 |         ],
285 |         [
286 |             5,
287 |             4,
288 |             0,
289 |             6,
290 |             0,
291 |             "PIPELINE"
292 |         ],
293 |         [
294 |             6,
295 |             6,
296 |             0,
297 |             7,
298 |             0,
299 |             "IMAGE"
300 |         ]
301 |     ],
302 |     "groups": [],
303 |     "config": {},
304 |     "extra": {
305 |         "ds": {
306 |             "scale": 0.8264462809917354,
307 |             "offset": [
308 |                 137.93343318382662,
309 |                 114.90373842918925
310 |             ]
311 |         }
312 |     },
313 |     "version": 0.4
314 | }


--------------------------------------------------------------------------------
/workflows/t2mv_sdxl_ldm.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "last_node_id": 10,
  3 |     "last_link_id": 15,
  4 |     "nodes": [
  5 |         {
  6 |             "id": 4,
  7 |             "type": "LdmVaeLoader",
  8 |             "pos": [
  9 |                 247.52098083496094,
 10 |                 558.488525390625
 11 |             ],
 12 |             "size": [
 13 |                 315,
 14 |                 58
 15 |             ],
 16 |             "flags": {},
 17 |             "order": 0,
 18 |             "mode": 0,
 19 |             "inputs": [],
 20 |             "outputs": [
 21 |                 {
 22 |                     "name": "AUTOENCODER",
 23 |                     "type": "AUTOENCODER",
 24 |                     "links": [
 25 |                         14
 26 |                     ],
 27 |                     "slot_index": 0
 28 |                 }
 29 |             ],
 30 |             "properties": {
 31 |                 "Node name for S&R": "LdmVaeLoader"
 32 |             },
 33 |             "widgets_values": [
 34 |                 "sdxl_vae.safetensors"
 35 |             ]
 36 |         },
 37 |         {
 38 |             "id": 10,
 39 |             "type": "DiffusersMVModelMakeup",
 40 |             "pos": [
 41 |                 651.51123046875,
 42 |                 328.2811584472656
 43 |             ],
 44 |             "size": [
 45 |                 315,
 46 |                 170
 47 |             ],
 48 |             "flags": {},
 49 |             "order": 3,
 50 |             "mode": 0,
 51 |             "inputs": [
 52 |                 {
 53 |                     "name": "pipeline",
 54 |                     "type": "PIPELINE",
 55 |                     "link": 12
 56 |                 },
 57 |                 {
 58 |                     "name": "scheduler",
 59 |                     "type": "SCHEDULER",
 60 |                     "link": 13
 61 |                 },
 62 |                 {
 63 |                     "name": "autoencoder",
 64 |                     "type": "AUTOENCODER",
 65 |                     "link": 14
 66 |                 }
 67 |             ],
 68 |             "outputs": [
 69 |                 {
 70 |                     "name": "PIPELINE",
 71 |                     "type": "PIPELINE",
 72 |                     "links": [
 73 |                         15
 74 |                     ],
 75 |                     "slot_index": 0
 76 |                 }
 77 |             ],
 78 |             "properties": {
 79 |                 "Node name for S&R": "DiffusersMVModelMakeup"
 80 |             },
 81 |             "widgets_values": [
 82 |                 true,
 83 |                 "huanngzh/mv-adapter",
 84 |                 "mvadapter_t2mv_sdxl.safetensors",
 85 |                 6
 86 |             ]
 87 |         },
 88 |         {
 89 |             "id": 8,
 90 |             "type": "PreviewImage",
 91 |             "pos": [
 92 |                 1521.929443359375,
 93 |                 102.87110137939453
 94 |             ],
 95 |             "size": [
 96 |                 337.5841064453125,
 97 |                 545.9476318359375
 98 |             ],
 99 |             "flags": {},
100 |             "order": 5,
101 |             "mode": 0,
102 |             "inputs": [
103 |                 {
104 |                     "name": "images",
105 |                     "type": "IMAGE",
106 |                     "link": 7
107 |                 }
108 |             ],
109 |             "outputs": [],
110 |             "properties": {
111 |                 "Node name for S&R": "PreviewImage"
112 |             },
113 |             "widgets_values": []
114 |         },
115 |         {
116 |             "id": 9,
117 |             "type": "DiffusersMVSchedulerLoader",
118 |             "pos": [
119 |                 236.58033752441406,
120 |                 346.41693115234375
121 |             ],
122 |             "size": [
123 |                 327.5999755859375,
124 |                 130
125 |             ],
126 |             "flags": {},
127 |             "order": 2,
128 |             "mode": 0,
129 |             "inputs": [
130 |                 {
131 |                     "name": "pipeline",
132 |                     "type": "PIPELINE",
133 |                     "link": 9
134 |                 }
135 |             ],
136 |             "outputs": [
137 |                 {
138 |                     "name": "SCHEDULER",
139 |                     "type": "SCHEDULER",
140 |                     "links": [
141 |                         13
142 |                     ],
143 |                     "slot_index": 0
144 |                 }
145 |             ],
146 |             "properties": {
147 |                 "Node name for S&R": "DiffusersMVSchedulerLoader"
148 |             },
149 |             "widgets_values": [
150 |                 "DDIM",
151 |                 true,
152 |                 "interpolated",
153 |                 8
154 |             ]
155 |         },
156 |         {
157 |             "id": 1,
158 |             "type": "LdmPipelineLoader",
159 |             "pos": [
160 |                 245.8949432373047,
161 |                 130.0254364013672
162 |             ],
163 |             "size": [
164 |                 315,
165 |                 122
166 |             ],
167 |             "flags": {},
168 |             "order": 1,
169 |             "mode": 0,
170 |             "inputs": [],
171 |             "outputs": [
172 |                 {
173 |                     "name": "PIPELINE",
174 |                     "type": "PIPELINE",
175 |                     "links": [
176 |                         9,
177 |                         12
178 |                     ],
179 |                     "slot_index": 0
180 |                 },
181 |                 {
182 |                     "name": "AUTOENCODER",
183 |                     "type": "AUTOENCODER",
184 |                     "links": null
185 |                 },
186 |                 {
187 |                     "name": "SCHEDULER",
188 |                     "type": "SCHEDULER",
189 |                     "links": null
190 |                 }
191 |             ],
192 |             "properties": {
193 |                 "Node name for S&R": "LdmPipelineLoader"
194 |             },
195 |             "widgets_values": [
196 |                 "sd_xl_base_1.0.safetensors",
197 |                 "MVAdapterT2MVSDXLPipeline"
198 |             ]
199 |         },
200 |         {
201 |             "id": 7,
202 |             "type": "DiffusersMVSampler",
203 |             "pos": [
204 |                 1050.723388671875,
205 |                 218.12826538085938
206 |             ],
207 |             "size": [
208 |                 400,
209 |                 314
210 |             ],
211 |             "flags": {},
212 |             "order": 4,
213 |             "mode": 0,
214 |             "inputs": [
215 |                 {
216 |                     "name": "pipeline",
217 |                     "type": "PIPELINE",
218 |                     "link": 15
219 |                 },
220 |                 {
221 |                     "name": "reference_image",
222 |                     "type": "IMAGE",
223 |                     "link": null,
224 |                     "shape": 7
225 |                 }
226 |             ],
227 |             "outputs": [
228 |                 {
229 |                     "name": "IMAGE",
230 |                     "type": "IMAGE",
231 |                     "links": [
232 |                         7
233 |                     ],
234 |                     "slot_index": 0
235 |                 }
236 |             ],
237 |             "properties": {
238 |                 "Node name for S&R": "DiffusersMVSampler"
239 |             },
240 |             "widgets_values": [
241 |                 6,
242 |                 "an astronaut riding a horse",
243 |                 "watermark, ugly, deformed, noisy, blurry, low contrast",
244 |                 768,
245 |                 768,
246 |                 50,
247 |                 7,
248 |                 1081631136394980,
249 |                 "randomize"
250 |             ]
251 |         }
252 |     ],
253 |     "links": [
254 |         [
255 |             7,
256 |             7,
257 |             0,
258 |             8,
259 |             0,
260 |             "IMAGE"
261 |         ],
262 |         [
263 |             9,
264 |             1,
265 |             0,
266 |             9,
267 |             0,
268 |             "PIPELINE"
269 |         ],
270 |         [
271 |             12,
272 |             1,
273 |             0,
274 |             10,
275 |             0,
276 |             "PIPELINE"
277 |         ],
278 |         [
279 |             13,
280 |             9,
281 |             0,
282 |             10,
283 |             1,
284 |             "SCHEDULER"
285 |         ],
286 |         [
287 |             14,
288 |             4,
289 |             0,
290 |             10,
291 |             2,
292 |             "AUTOENCODER"
293 |         ],
294 |         [
295 |             15,
296 |             10,
297 |             0,
298 |             7,
299 |             0,
300 |             "PIPELINE"
301 |         ]
302 |     ],
303 |     "groups": [],
304 |     "config": {},
305 |     "extra": {
306 |         "ds": {
307 |             "scale": 0.8264462809917354,
308 |             "offset": [
309 |                 227.13784724632666,
310 |                 76.3208087416891
311 |             ]
312 |         }
313 |     },
314 |     "version": 0.4
315 | }


--------------------------------------------------------------------------------
/workflows/t2mv_sdxl_ldm_controlnet.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "last_node_id": 20,
  3 |     "last_link_id": 24,
  4 |     "nodes": [
  5 |         {
  6 |             "id": 4,
  7 |             "type": "LdmVaeLoader",
  8 |             "pos": [
  9 |                 247.52098083496094,
 10 |                 558.488525390625
 11 |             ],
 12 |             "size": [
 13 |                 315,
 14 |                 82
 15 |             ],
 16 |             "flags": {},
 17 |             "order": 0,
 18 |             "mode": 0,
 19 |             "inputs": [],
 20 |             "outputs": [
 21 |                 {
 22 |                     "name": "AUTOENCODER",
 23 |                     "type": "AUTOENCODER",
 24 |                     "links": [
 25 |                         14
 26 |                     ],
 27 |                     "slot_index": 0
 28 |                 }
 29 |             ],
 30 |             "properties": {
 31 |                 "Node name for S&R": "LdmVaeLoader"
 32 |             },
 33 |             "widgets_values": [
 34 |                 "sdxl_vae.safetensors",
 35 |                 true
 36 |             ]
 37 |         },
 38 |         {
 39 |             "id": 9,
 40 |             "type": "DiffusersMVSchedulerLoader",
 41 |             "pos": [
 42 |                 236.58033752441406,
 43 |                 346.41693115234375
 44 |             ],
 45 |             "size": [
 46 |                 327.5999755859375,
 47 |                 130
 48 |             ],
 49 |             "flags": {},
 50 |             "order": 8,
 51 |             "mode": 0,
 52 |             "inputs": [
 53 |                 {
 54 |                     "name": "pipeline",
 55 |                     "type": "PIPELINE",
 56 |                     "link": 9
 57 |                 }
 58 |             ],
 59 |             "outputs": [
 60 |                 {
 61 |                     "name": "SCHEDULER",
 62 |                     "type": "SCHEDULER",
 63 |                     "links": [
 64 |                         13
 65 |                     ],
 66 |                     "slot_index": 0
 67 |                 }
 68 |             ],
 69 |             "properties": {
 70 |                 "Node name for S&R": "DiffusersMVSchedulerLoader"
 71 |             },
 72 |             "widgets_values": [
 73 |                 "DDIM",
 74 |                 true,
 75 |                 "interpolated",
 76 |                 8
 77 |             ]
 78 |         },
 79 |         {
 80 |             "id": 1,
 81 |             "type": "LdmPipelineLoader",
 82 |             "pos": [
 83 |                 246.9536590576172,
 84 |                 153.577880859375
 85 |             ],
 86 |             "size": [
 87 |                 315,
 88 |                 122
 89 |             ],
 90 |             "flags": {},
 91 |             "order": 1,
 92 |             "mode": 0,
 93 |             "inputs": [],
 94 |             "outputs": [
 95 |                 {
 96 |                     "name": "PIPELINE",
 97 |                     "type": "PIPELINE",
 98 |                     "links": [
 99 |                         9,
100 |                         12
101 |                     ],
102 |                     "slot_index": 0
103 |                 },
104 |                 {
105 |                     "name": "AUTOENCODER",
106 |                     "type": "AUTOENCODER",
107 |                     "links": null
108 |                 },
109 |                 {
110 |                     "name": "SCHEDULER",
111 |                     "type": "SCHEDULER",
112 |                     "links": null
113 |                 }
114 |             ],
115 |             "properties": {
116 |                 "Node name for S&R": "LdmPipelineLoader"
117 |             },
118 |             "widgets_values": [
119 |                 "sd_xl_base_1.0.safetensors",
120 |                 "MVAdapterT2MVSDXLPipeline"
121 |             ]
122 |         },
123 |         {
124 |             "id": 10,
125 |             "type": "DiffusersMVModelMakeup",
126 |             "pos": [
127 |                 654.0358276367188,
128 |                 338.9443054199219
129 |             ],
130 |             "size": [
131 |                 315,
132 |                 218
133 |             ],
134 |             "flags": {},
135 |             "order": 10,
136 |             "mode": 0,
137 |             "inputs": [
138 |                 {
139 |                     "name": "pipeline",
140 |                     "type": "PIPELINE",
141 |                     "link": 12
142 |                 },
143 |                 {
144 |                     "name": "scheduler",
145 |                     "type": "SCHEDULER",
146 |                     "link": 13
147 |                 },
148 |                 {
149 |                     "name": "autoencoder",
150 |                     "type": "AUTOENCODER",
151 |                     "link": 14
152 |                 }
153 |             ],
154 |             "outputs": [
155 |                 {
156 |                     "name": "PIPELINE",
157 |                     "type": "PIPELINE",
158 |                     "links": [
159 |                         16
160 |                     ],
161 |                     "slot_index": 0
162 |                 }
163 |             ],
164 |             "properties": {
165 |                 "Node name for S&R": "DiffusersMVModelMakeup"
166 |             },
167 |             "widgets_values": [
168 |                 true,
169 |                 "huanngzh/mv-adapter",
170 |                 "mvadapter_t2mv_sdxl.safetensors",
171 |                 6,
172 |                 true,
173 |                 false
174 |             ]
175 |         },
176 |         {
177 |             "id": 11,
178 |             "type": "LoadImage",
179 |             "pos": [
180 |                 220.8171844482422,
181 |                 -212.83360290527344
182 |             ],
183 |             "size": [
184 |                 210,
185 |                 314
186 |             ],
187 |             "flags": {},
188 |             "order": 2,
189 |             "mode": 0,
190 |             "inputs": [],
191 |             "outputs": [
192 |                 {
193 |                     "name": "IMAGE",
194 |                     "type": "IMAGE",
195 |                     "links": [
196 |                         18
197 |                     ],
198 |                     "slot_index": 0
199 |                 },
200 |                 {
201 |                     "name": "MASK",
202 |                     "type": "MASK",
203 |                     "links": null
204 |                 }
205 |             ],
206 |             "properties": {
207 |                 "Node name for S&R": "LoadImage"
208 |             },
209 |             "widgets_values": [
210 |                 "scribble_0.png",
211 |                 "image"
212 |             ]
213 |         },
214 |         {
215 |             "id": 14,
216 |             "type": "LoadImage",
217 |             "pos": [
218 |                 451.238037109375,
219 |                 -214.33116149902344
220 |             ],
221 |             "size": [
222 |                 214.94398498535156,
223 |                 314
224 |             ],
225 |             "flags": {},
226 |             "order": 3,
227 |             "mode": 0,
228 |             "inputs": [],
229 |             "outputs": [
230 |                 {
231 |                     "name": "IMAGE",
232 |                     "type": "IMAGE",
233 |                     "links": [
234 |                         19
235 |                     ],
236 |                     "slot_index": 0
237 |                 },
238 |                 {
239 |                     "name": "MASK",
240 |                     "type": "MASK",
241 |                     "links": null
242 |                 }
243 |             ],
244 |             "properties": {
245 |                 "Node name for S&R": "LoadImage"
246 |             },
247 |             "widgets_values": [
248 |                 "scribble_1.png",
249 |                 "image"
250 |             ]
251 |         },
252 |         {
253 |             "id": 15,
254 |             "type": "LoadImage",
255 |             "pos": [
256 |                 684.787841796875,
257 |                 -216.00900268554688
258 |             ],
259 |             "size": [
260 |                 210,
261 |                 314
262 |             ],
263 |             "flags": {},
264 |             "order": 4,
265 |             "mode": 0,
266 |             "inputs": [],
267 |             "outputs": [
268 |                 {
269 |                     "name": "IMAGE",
270 |                     "type": "IMAGE",
271 |                     "links": [
272 |                         20
273 |                     ],
274 |                     "slot_index": 0
275 |                 },
276 |                 {
277 |                     "name": "MASK",
278 |                     "type": "MASK",
279 |                     "links": null
280 |                 }
281 |             ],
282 |             "properties": {
283 |                 "Node name for S&R": "LoadImage"
284 |             },
285 |             "widgets_values": [
286 |                 "scribble_2.png",
287 |                 "image"
288 |             ]
289 |         },
290 |         {
291 |             "id": 16,
292 |             "type": "LoadImage",
293 |             "pos": [
294 |                 911.8894653320312,
295 |                 -214.99267578125
296 |             ],
297 |             "size": [
298 |                 210,
299 |                 314
300 |             ],
301 |             "flags": {},
302 |             "order": 5,
303 |             "mode": 0,
304 |             "inputs": [],
305 |             "outputs": [
306 |                 {
307 |                     "name": "IMAGE",
308 |                     "type": "IMAGE",
309 |                     "links": [
310 |                         21
311 |                     ],
312 |                     "slot_index": 0
313 |                 },
314 |                 {
315 |                     "name": "MASK",
316 |                     "type": "MASK",
317 |                     "links": null
318 |                 }
319 |             ],
320 |             "properties": {
321 |                 "Node name for S&R": "LoadImage"
322 |             },
323 |             "widgets_values": [
324 |                 "scribble_3.png",
325 |                 "image"
326 |             ]
327 |         },
328 |         {
329 |             "id": 17,
330 |             "type": "LoadImage",
331 |             "pos": [
332 |                 1140.20751953125,
333 |                 -213.4659423828125
334 |             ],
335 |             "size": [
336 |                 210,
337 |                 314
338 |             ],
339 |             "flags": {},
340 |             "order": 6,
341 |             "mode": 0,
342 |             "inputs": [],
343 |             "outputs": [
344 |                 {
345 |                     "name": "IMAGE",
346 |                     "type": "IMAGE",
347 |                     "links": [
348 |                         22
349 |                     ],
350 |                     "slot_index": 0
351 |                 },
352 |                 {
353 |                     "name": "MASK",
354 |                     "type": "MASK",
355 |                     "links": null
356 |                 }
357 |             ],
358 |             "properties": {
359 |                 "Node name for S&R": "LoadImage"
360 |             },
361 |             "widgets_values": [
362 |                 "scribble_4.png",
363 |                 "image"
364 |             ]
365 |         },
366 |         {
367 |             "id": 18,
368 |             "type": "LoadImage",
369 |             "pos": [
370 |                 1370.2098388671875,
371 |                 -214.5530548095703
372 |             ],
373 |             "size": [
374 |                 210,
375 |                 314
376 |             ],
377 |             "flags": {},
378 |             "order": 7,
379 |             "mode": 0,
380 |             "inputs": [],
381 |             "outputs": [
382 |                 {
383 |                     "name": "IMAGE",
384 |                     "type": "IMAGE",
385 |                     "links": [
386 |                         23
387 |                     ],
388 |                     "slot_index": 0
389 |                 },
390 |                 {
391 |                     "name": "MASK",
392 |                     "type": "MASK",
393 |                     "links": null
394 |                 }
395 |             ],
396 |             "properties": {
397 |                 "Node name for S&R": "LoadImage"
398 |             },
399 |             "widgets_values": [
400 |                 "scribble_5.png",
401 |                 "image"
402 |             ]
403 |         },
404 |         {
405 |             "id": 20,
406 |             "type": "ControlImagePreprocessor",
407 |             "pos": [
408 |                 1646.3026123046875,
409 |                 -156.30767822265625
410 |             ],
411 |             "size": [
412 |                 327.5999755859375,
413 |                 182
414 |             ],
415 |             "flags": {},
416 |             "order": 9,
417 |             "mode": 0,
418 |             "inputs": [
419 |                 {
420 |                     "name": "front_view",
421 |                     "type": "IMAGE",
422 |                     "link": 18
423 |                 },
424 |                 {
425 |                     "name": "front_right_view",
426 |                     "type": "IMAGE",
427 |                     "link": 19
428 |                 },
429 |                 {
430 |                     "name": "right_view",
431 |                     "type": "IMAGE",
432 |                     "link": 20
433 |                 },
434 |                 {
435 |                     "name": "back_view",
436 |                     "type": "IMAGE",
437 |                     "link": 21
438 |                 },
439 |                 {
440 |                     "name": "left_view",
441 |                     "type": "IMAGE",
442 |                     "link": 22
443 |                 },
444 |                 {
445 |                     "name": "front_left_view",
446 |                     "type": "IMAGE",
447 |                     "link": 23
448 |                 }
449 |             ],
450 |             "outputs": [
451 |                 {
452 |                     "name": "IMAGE",
453 |                     "type": "IMAGE",
454 |                     "links": [
455 |                         24
456 |                     ],
457 |                     "slot_index": 0
458 |                 }
459 |             ],
460 |             "properties": {
461 |                 "Node name for S&R": "ControlImagePreprocessor"
462 |             },
463 |             "widgets_values": [
464 |                 768,
465 |                 768
466 |             ]
467 |         },
468 |         {
469 |             "id": 19,
470 |             "type": "ControlNetModelLoader",
471 |             "pos": [
472 |                 777.6534423828125,
473 |                 168.4779052734375
474 |             ],
475 |             "size": [
476 |                 315,
477 |                 58
478 |             ],
479 |             "flags": {},
480 |             "order": 11,
481 |             "mode": 0,
482 |             "inputs": [
483 |                 {
484 |                     "name": "pipeline",
485 |                     "type": "PIPELINE",
486 |                     "link": 16
487 |                 }
488 |             ],
489 |             "outputs": [
490 |                 {
491 |                     "name": "PIPELINE",
492 |                     "type": "PIPELINE",
493 |                     "links": [
494 |                         17
495 |                     ],
496 |                     "slot_index": 0
497 |                 }
498 |             ],
499 |             "properties": {
500 |                 "Node name for S&R": "ControlNetModelLoader"
501 |             },
502 |             "widgets_values": [
503 |                 "xinsir/controlnet-scribble-sdxl-1.0"
504 |             ]
505 |         },
506 |         {
507 |             "id": 8,
508 |             "type": "PreviewImage",
509 |             "pos": [
510 |                 1707.440185546875,
511 |                 93.78192138671875
512 |             ],
513 |             "size": [
514 |                 337.5841064453125,
515 |                 545.9476318359375
516 |             ],
517 |             "flags": {},
518 |             "order": 13,
519 |             "mode": 0,
520 |             "inputs": [
521 |                 {
522 |                     "name": "images",
523 |                     "type": "IMAGE",
524 |                     "link": 7
525 |                 }
526 |             ],
527 |             "outputs": [],
528 |             "properties": {
529 |                 "Node name for S&R": "PreviewImage"
530 |             },
531 |             "widgets_values": []
532 |         },
533 |         {
534 |             "id": 7,
535 |             "type": "DiffusersMVSampler",
536 |             "pos": [
537 |                 1211.4144287109375,
538 |                 199.29754638671875
539 |             ],
540 |             "size": [
541 |                 400,
542 |                 358
543 |             ],
544 |             "flags": {},
545 |             "order": 12,
546 |             "mode": 0,
547 |             "inputs": [
548 |                 {
549 |                     "name": "pipeline",
550 |                     "type": "PIPELINE",
551 |                     "link": 17
552 |                 },
553 |                 {
554 |                     "name": "reference_image",
555 |                     "type": "IMAGE",
556 |                     "link": null,
557 |                     "shape": 7
558 |                 },
559 |                 {
560 |                     "name": "controlnet_image",
561 |                     "type": "IMAGE",
562 |                     "link": 24,
563 |                     "shape": 7
564 |                 }
565 |             ],
566 |             "outputs": [
567 |                 {
568 |                     "name": "IMAGE",
569 |                     "type": "IMAGE",
570 |                     "links": [
571 |                         7
572 |                     ],
573 |                     "slot_index": 0
574 |                 }
575 |             ],
576 |             "properties": {
577 |                 "Node name for S&R": "DiffusersMVSampler"
578 |             },
579 |             "widgets_values": [
580 |                 6,
581 |                 "A 3D model of Finn the Human from the animated television series Adventure Time. He is wearing his iconic blue shirt and green backpack and has a neutral expression on his face. He is standing in a relaxed pose with his left foot slightly forward and his right foot back. His arms are at his sides and his head is turned slightly to the right. The model is made up of simple shapes and has a stylized, cartoon-like appearance.",
582 |                 "watermark, ugly, deformed, noisy, blurry, low contrast",
583 |                 768,
584 |                 768,
585 |                 50,
586 |                 7,
587 |                 153327331713128,
588 |                 "randomize",
589 |                 0.7000000000000001
590 |             ]
591 |         }
592 |     ],
593 |     "links": [
594 |         [
595 |             7,
596 |             7,
597 |             0,
598 |             8,
599 |             0,
600 |             "IMAGE"
601 |         ],
602 |         [
603 |             9,
604 |             1,
605 |             0,
606 |             9,
607 |             0,
608 |             "PIPELINE"
609 |         ],
610 |         [
611 |             12,
612 |             1,
613 |             0,
614 |             10,
615 |             0,
616 |             "PIPELINE"
617 |         ],
618 |         [
619 |             13,
620 |             9,
621 |             0,
622 |             10,
623 |             1,
624 |             "SCHEDULER"
625 |         ],
626 |         [
627 |             14,
628 |             4,
629 |             0,
630 |             10,
631 |             2,
632 |             "AUTOENCODER"
633 |         ],
634 |         [
635 |             16,
636 |             10,
637 |             0,
638 |             19,
639 |             0,
640 |             "PIPELINE"
641 |         ],
642 |         [
643 |             17,
644 |             19,
645 |             0,
646 |             7,
647 |             0,
648 |             "PIPELINE"
649 |         ],
650 |         [
651 |             18,
652 |             11,
653 |             0,
654 |             20,
655 |             0,
656 |             "IMAGE"
657 |         ],
658 |         [
659 |             19,
660 |             14,
661 |             0,
662 |             20,
663 |             1,
664 |             "IMAGE"
665 |         ],
666 |         [
667 |             20,
668 |             15,
669 |             0,
670 |             20,
671 |             2,
672 |             "IMAGE"
673 |         ],
674 |         [
675 |             21,
676 |             16,
677 |             0,
678 |             20,
679 |             3,
680 |             "IMAGE"
681 |         ],
682 |         [
683 |             22,
684 |             17,
685 |             0,
686 |             20,
687 |             4,
688 |             "IMAGE"
689 |         ],
690 |         [
691 |             23,
692 |             18,
693 |             0,
694 |             20,
695 |             5,
696 |             "IMAGE"
697 |         ],
698 |         [
699 |             24,
700 |             20,
701 |             0,
702 |             7,
703 |             2,
704 |             "IMAGE"
705 |         ]
706 |     ],
707 |     "groups": [],
708 |     "config": {},
709 |     "extra": {
710 |         "ds": {
711 |             "scale": 0.8264462809917354,
712 |             "offset": [
713 |                 -77.40918400367313,
714 |                 268.5256134291891
715 |             ]
716 |         }
717 |     },
718 |     "version": 0.4
719 | }


--------------------------------------------------------------------------------
/workflows/t2mv_sdxl_ldm_lora.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "last_node_id": 11,
  3 |     "last_link_id": 17,
  4 |     "nodes": [
  5 |         {
  6 |             "id": 4,
  7 |             "type": "LdmVaeLoader",
  8 |             "pos": [
  9 |                 247.52098083496094,
 10 |                 558.488525390625
 11 |             ],
 12 |             "size": [
 13 |                 315,
 14 |                 82
 15 |             ],
 16 |             "flags": {},
 17 |             "order": 0,
 18 |             "mode": 0,
 19 |             "inputs": [],
 20 |             "outputs": [
 21 |                 {
 22 |                     "name": "AUTOENCODER",
 23 |                     "type": "AUTOENCODER",
 24 |                     "links": [
 25 |                         14
 26 |                     ],
 27 |                     "slot_index": 0
 28 |                 }
 29 |             ],
 30 |             "properties": {
 31 |                 "Node name for S&R": "LdmVaeLoader"
 32 |             },
 33 |             "widgets_values": [
 34 |                 "sdxl_vae.safetensors",
 35 |                 true
 36 |             ]
 37 |         },
 38 |         {
 39 |             "id": 9,
 40 |             "type": "DiffusersMVSchedulerLoader",
 41 |             "pos": [
 42 |                 236.58033752441406,
 43 |                 346.41693115234375
 44 |             ],
 45 |             "size": [
 46 |                 327.5999755859375,
 47 |                 130
 48 |             ],
 49 |             "flags": {},
 50 |             "order": 2,
 51 |             "mode": 0,
 52 |             "inputs": [
 53 |                 {
 54 |                     "name": "pipeline",
 55 |                     "type": "PIPELINE",
 56 |                     "link": 9
 57 |                 }
 58 |             ],
 59 |             "outputs": [
 60 |                 {
 61 |                     "name": "SCHEDULER",
 62 |                     "type": "SCHEDULER",
 63 |                     "links": [
 64 |                         13
 65 |                     ],
 66 |                     "slot_index": 0
 67 |                 }
 68 |             ],
 69 |             "properties": {
 70 |                 "Node name for S&R": "DiffusersMVSchedulerLoader"
 71 |             },
 72 |             "widgets_values": [
 73 |                 "DDIM",
 74 |                 true,
 75 |                 "interpolated",
 76 |                 8
 77 |             ]
 78 |         },
 79 |         {
 80 |             "id": 1,
 81 |             "type": "LdmPipelineLoader",
 82 |             "pos": [
 83 |                 245.8949432373047,
 84 |                 130.0254364013672
 85 |             ],
 86 |             "size": [
 87 |                 315,
 88 |                 122
 89 |             ],
 90 |             "flags": {},
 91 |             "order": 1,
 92 |             "mode": 0,
 93 |             "inputs": [],
 94 |             "outputs": [
 95 |                 {
 96 |                     "name": "PIPELINE",
 97 |                     "type": "PIPELINE",
 98 |                     "links": [
 99 |                         9,
100 |                         12
101 |                     ],
102 |                     "slot_index": 0
103 |                 },
104 |                 {
105 |                     "name": "AUTOENCODER",
106 |                     "type": "AUTOENCODER",
107 |                     "links": null
108 |                 },
109 |                 {
110 |                     "name": "SCHEDULER",
111 |                     "type": "SCHEDULER",
112 |                     "links": null
113 |                 }
114 |             ],
115 |             "properties": {
116 |                 "Node name for S&R": "LdmPipelineLoader"
117 |             },
118 |             "widgets_values": [
119 |                 "sd_xl_base_1.0.safetensors",
120 |                 "MVAdapterT2MVSDXLPipeline"
121 |             ]
122 |         },
123 |         {
124 |             "id": 10,
125 |             "type": "DiffusersMVModelMakeup",
126 |             "pos": [
127 |                 659.5178833007812,
128 |                 174.95619201660156
129 |             ],
130 |             "size": [
131 |                 315,
132 |                 214
133 |             ],
134 |             "flags": {},
135 |             "order": 3,
136 |             "mode": 0,
137 |             "inputs": [
138 |                 {
139 |                     "name": "pipeline",
140 |                     "type": "PIPELINE",
141 |                     "link": 12
142 |                 },
143 |                 {
144 |                     "name": "scheduler",
145 |                     "type": "SCHEDULER",
146 |                     "link": 13
147 |                 },
148 |                 {
149 |                     "name": "autoencoder",
150 |                     "type": "AUTOENCODER",
151 |                     "link": 14
152 |                 },
153 |                 {
154 |                     "name": "lora",
155 |                     "type": "L",
156 |                     "link": null,
157 |                     "shape": 7
158 |                 }
159 |             ],
160 |             "outputs": [
161 |                 {
162 |                     "name": "PIPELINE",
163 |                     "type": "PIPELINE",
164 |                     "links": [
165 |                         16
166 |                     ],
167 |                     "slot_index": 0
168 |                 }
169 |             ],
170 |             "properties": {
171 |                 "Node name for S&R": "DiffusersMVModelMakeup"
172 |             },
173 |             "widgets_values": [
174 |                 true,
175 |                 "huanngzh/mv-adapter",
176 |                 "mvadapter_t2mv_sdxl.safetensors",
177 |                 6,
178 |                 true
179 |             ]
180 |         },
181 |         {
182 |             "id": 7,
183 |             "type": "DiffusersMVSampler",
184 |             "pos": [
185 |                 1050.723388671875,
186 |                 218.12826538085938
187 |             ],
188 |             "size": [
189 |                 400,
190 |                 314
191 |             ],
192 |             "flags": {},
193 |             "order": 5,
194 |             "mode": 0,
195 |             "inputs": [
196 |                 {
197 |                     "name": "pipeline",
198 |                     "type": "PIPELINE",
199 |                     "link": 17
200 |                 },
201 |                 {
202 |                     "name": "reference_image",
203 |                     "type": "IMAGE",
204 |                     "link": null,
205 |                     "shape": 7
206 |                 }
207 |             ],
208 |             "outputs": [
209 |                 {
210 |                     "name": "IMAGE",
211 |                     "type": "IMAGE",
212 |                     "links": [
213 |                         7
214 |                     ],
215 |                     "slot_index": 0
216 |                 }
217 |             ],
218 |             "properties": {
219 |                 "Node name for S&R": "DiffusersMVSampler"
220 |             },
221 |             "widgets_values": [
222 |                 6,
223 |                 "3d style, a fox with flowers around it",
224 |                 "watermark, ugly, deformed, noisy, blurry, low contrast",
225 |                 768,
226 |                 768,
227 |                 50,
228 |                 7,
229 |                 66009262624567,
230 |                 "randomize"
231 |             ]
232 |         },
233 |         {
234 |             "id": 8,
235 |             "type": "PreviewImage",
236 |             "pos": [
237 |                 1521.929443359375,
238 |                 102.87110137939453
239 |             ],
240 |             "size": [
241 |                 337.5841064453125,
242 |                 545.9476318359375
243 |             ],
244 |             "flags": {},
245 |             "order": 6,
246 |             "mode": 0,
247 |             "inputs": [
248 |                 {
249 |                     "name": "images",
250 |                     "type": "IMAGE",
251 |                     "link": 7
252 |                 }
253 |             ],
254 |             "outputs": [],
255 |             "properties": {
256 |                 "Node name for S&R": "PreviewImage"
257 |             },
258 |             "widgets_values": []
259 |         },
260 |         {
261 |             "id": 11,
262 |             "type": "LoraModelLoader",
263 |             "pos": [
264 |                 656.7574462890625,
265 |                 490.3138427734375
266 |             ],
267 |             "size": [
268 |                 315,
269 |                 82
270 |             ],
271 |             "flags": {},
272 |             "order": 4,
273 |             "mode": 0,
274 |             "inputs": [
275 |                 {
276 |                     "name": "pipeline",
277 |                     "type": "PIPELINE",
278 |                     "link": 16
279 |                 }
280 |             ],
281 |             "outputs": [
282 |                 {
283 |                     "name": "PIPELINE",
284 |                     "type": "PIPELINE",
285 |                     "links": [
286 |                         17
287 |                     ],
288 |                     "slot_index": 0
289 |                 }
290 |             ],
291 |             "properties": {
292 |                 "Node name for S&R": "LoraModelLoader"
293 |             },
294 |             "widgets_values": [
295 |                 "anime_sdxl_v1.safetensors",
296 |                 1
297 |             ]
298 |         }
299 |     ],
300 |     "links": [
301 |         [
302 |             7,
303 |             7,
304 |             0,
305 |             8,
306 |             0,
307 |             "IMAGE"
308 |         ],
309 |         [
310 |             9,
311 |             1,
312 |             0,
313 |             9,
314 |             0,
315 |             "PIPELINE"
316 |         ],
317 |         [
318 |             12,
319 |             1,
320 |             0,
321 |             10,
322 |             0,
323 |             "PIPELINE"
324 |         ],
325 |         [
326 |             13,
327 |             9,
328 |             0,
329 |             10,
330 |             1,
331 |             "SCHEDULER"
332 |         ],
333 |         [
334 |             14,
335 |             4,
336 |             0,
337 |             10,
338 |             2,
339 |             "AUTOENCODER"
340 |         ],
341 |         [
342 |             16,
343 |             10,
344 |             0,
345 |             11,
346 |             0,
347 |             "PIPELINE"
348 |         ],
349 |         [
350 |             17,
351 |             11,
352 |             0,
353 |             7,
354 |             0,
355 |             "PIPELINE"
356 |         ]
357 |     ],
358 |     "groups": [],
359 |     "config": {},
360 |     "extra": {
361 |         "ds": {
362 |             "scale": 0.803552158862256,
363 |             "offset": [
364 |                 -210.691560897588,
365 |                 -27.3938617433714
366 |             ]
367 |         }
368 |     },
369 |     "version": 0.4
370 | }


--------------------------------------------------------------------------------