├── mattepro_nodes.py
├── src
├── __init__.py
├── SDMatte
│ ├── README.md
│ ├── scheduler
│ │ └── scheduler_config.json
│ ├── tokenizer
│ │ ├── special_tokens_map.json
│ │ └── tokenizer_config.json
│ ├── vae
│ │ └── config.json
│ ├── text_encoder
│ │ └── config.json
│ ├── unet
│ │ └── config.json
│ └── .gitattributes
├── modeling
│ ├── SDMatte
│ │ ├── __init__.py
│ │ └── meta_arch.py
│ └── __init__.py
├── utils
│ ├── __init__.py
│ ├── utils.py
│ └── replace.py
└── .gitignore
├── node.zip
├── example_workflow
├── test_1.png
├── test_2.png
└── 超强抠图遮罩细化工作流.json
├── __init__.py
├── requirements.txt
├── .gitattributes
├── .github
└── workflows
│ ├── publish_action.yml
│ └── publish.yml
├── pyproject.toml
├── .gitignore
├── README_CN.md
├── README.md
└── sdmatte_nodes.py
/mattepro_nodes.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | # 使 SDMatte 目录成为可导入包
2 |
3 |
--------------------------------------------------------------------------------
/src/SDMatte/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | license: mit
3 | ---
4 |
--------------------------------------------------------------------------------
/src/modeling/SDMatte/__init__.py:
--------------------------------------------------------------------------------
1 | from .meta_arch import *
2 |
--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import *
2 | from .replace import *
3 |
--------------------------------------------------------------------------------
/node.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flybirdxx/ComfyUI-SDMatte/HEAD/node.zip
--------------------------------------------------------------------------------
/src/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .SDMatte import *
2 | # from .LiteSDMatte import *
3 |
--------------------------------------------------------------------------------
/example_workflow/test_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flybirdxx/ComfyUI-SDMatte/HEAD/example_workflow/test_1.png
--------------------------------------------------------------------------------
/example_workflow/test_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flybirdxx/ComfyUI-SDMatte/HEAD/example_workflow/test_2.png
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from .sdmatte_nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
2 |
3 | __all__ = [
4 | "NODE_CLASS_MAPPINGS",
5 | "NODE_DISPLAY_NAME_MAPPINGS",
6 | ]
7 |
8 |
9 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | diffusers>=0.25.0
2 | transformers>=4.38.0
3 | timm>=0.9.7
4 | einops>=0.7.0
5 | easydict>=1.10
6 | omegaconf>=2.3.0
7 | fvcore>=0.1.5.post20221221
8 | torchvision>=0.16.0
9 | opencv-python>=4.8.0
10 | safetensors>=0.4.0
11 |
12 |
--------------------------------------------------------------------------------
/src/SDMatte/scheduler/scheduler_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "_class_name": "DDIMScheduler",
3 | "_diffusers_version": "0.8.0",
4 | "beta_end": 0.012,
5 | "beta_schedule": "scaled_linear",
6 | "beta_start": 0.00085,
7 | "clip_sample": false,
8 | "num_train_timesteps": 1000,
9 | "prediction_type": "v_prediction",
10 | "set_alpha_to_one": false,
11 | "skip_prk_steps": true,
12 | "steps_offset": 1,
13 | "trained_betas": null
14 | }
15 |
--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .vscode/
3 | .history/
4 | .hypothesis/
5 |
6 | infer_output/
7 | output/
8 | checkpoints/
9 | engine/
10 | dinov2/
11 | detectron2/
12 | modeling/LiteSDMatte/
13 |
14 | configs/LiteSDMatte.py
15 | script/eva.sh
16 | script/train.sh
17 | script/infer.sh
18 | model_arch.txt
19 | teacher_model_arch.txt
20 | attn_map_vis.py
21 | data_check.ipynb
22 | flops.py
23 | gen_one_image.py
24 | gen.sh
25 | main.py
26 | utils.ipynb
27 | utils/visualization.py
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Set default behavior to automatically normalize line endings.
2 | * text=auto
3 |
4 | # Force UTF-8 encoding for JSON files
5 | *.json text eol=lf encoding=utf-8
6 |
7 | # Force UTF-8 encoding for workflow files
8 | example_workflow/*.json text eol=lf encoding=utf-8
9 |
10 | # Python files
11 | *.py text eol=lf encoding=utf-8
12 |
13 | # Markdown files
14 | *.md text eol=lf encoding=utf-8
15 |
16 | # Text files
17 | *.txt text eol=lf encoding=utf-8
18 |
19 |
--------------------------------------------------------------------------------
/src/SDMatte/tokenizer/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {
2 | "bos_token": {
3 | "content": "<|startoftext|>",
4 | "lstrip": false,
5 | "normalized": true,
6 | "rstrip": false,
7 | "single_word": false
8 | },
9 | "eos_token": {
10 | "content": "<|endoftext|>",
11 | "lstrip": false,
12 | "normalized": true,
13 | "rstrip": false,
14 | "single_word": false
15 | },
16 | "pad_token": "!",
17 | "unk_token": {
18 | "content": "<|endoftext|>",
19 | "lstrip": false,
20 | "normalized": true,
21 | "rstrip": false,
22 | "single_word": false
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/.github/workflows/publish_action.yml:
--------------------------------------------------------------------------------
1 | name: Publish to Comfy registry
2 | on:
3 | workflow_dispatch:
4 | push:
5 | branches:
6 | - main
7 | paths:
8 | - "pyproject.toml"
9 |
10 | jobs:
11 | publish-node:
12 | name: Publish Custom Node to registry
13 | runs-on: ubuntu-latest
14 | steps:
15 | - name: Check out code
16 | uses: actions/checkout@v4
17 | - name: Publish Custom Node
18 | uses: Comfy-Org/publish-node-action@main
19 | with:
20 | personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }} ## Add your own personal access token to your Github Repository secrets and reference it here.
21 |
--------------------------------------------------------------------------------
/src/SDMatte/vae/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "_class_name": "AutoencoderKL",
3 | "_diffusers_version": "0.8.0",
4 | "_name_or_path": "hf-models/stable-diffusion-v2-768x768/vae",
5 | "act_fn": "silu",
6 | "block_out_channels": [
7 | 128,
8 | 256,
9 | 512,
10 | 512
11 | ],
12 | "down_block_types": [
13 | "DownEncoderBlock2D",
14 | "DownEncoderBlock2D",
15 | "DownEncoderBlock2D",
16 | "DownEncoderBlock2D"
17 | ],
18 | "in_channels": 3,
19 | "latent_channels": 4,
20 | "layers_per_block": 2,
21 | "norm_num_groups": 32,
22 | "out_channels": 3,
23 | "sample_size": 768,
24 | "up_block_types": [
25 | "UpDecoderBlock2D",
26 | "UpDecoderBlock2D",
27 | "UpDecoderBlock2D",
28 | "UpDecoderBlock2D"
29 | ]
30 | }
31 |
--------------------------------------------------------------------------------
/src/SDMatte/text_encoder/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "_name_or_path": "hf-models/stable-diffusion-v2-768x768/text_encoder",
3 | "architectures": [
4 | "CLIPTextModel"
5 | ],
6 | "attention_dropout": 0.0,
7 | "bos_token_id": 0,
8 | "dropout": 0.0,
9 | "eos_token_id": 2,
10 | "hidden_act": "gelu",
11 | "hidden_size": 1024,
12 | "initializer_factor": 1.0,
13 | "initializer_range": 0.02,
14 | "intermediate_size": 4096,
15 | "layer_norm_eps": 1e-05,
16 | "max_position_embeddings": 77,
17 | "model_type": "clip_text_model",
18 | "num_attention_heads": 16,
19 | "num_hidden_layers": 23,
20 | "pad_token_id": 1,
21 | "projection_dim": 512,
22 | "torch_dtype": "float32",
23 | "transformers_version": "4.25.0.dev0",
24 | "vocab_size": 49408
25 | }
26 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish to Comfy registry
2 | on:
3 | workflow_dispatch:
4 | push:
5 | branches:
6 | - main
7 | - master
8 | paths:
9 | - "pyproject.toml"
10 |
11 | permissions:
12 | issues: write
13 |
14 | jobs:
15 | publish-node:
16 | name: Publish Custom Node to registry
17 | runs-on: ubuntu-latest
18 | if: ${{ github.repository_owner == 'flybirdxx' }}
19 | steps:
20 | - name: Check out code
21 | uses: actions/checkout@v4
22 | with:
23 | submodules: true
24 | - name: Publish Custom Node
25 | uses: Comfy-Org/publish-node-action@v1
26 | with:
27 | ## Add your own personal access token to your Github Repository secrets and reference it here.
28 | personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}
29 |
--------------------------------------------------------------------------------
/src/SDMatte/tokenizer/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "add_prefix_space": false,
3 | "bos_token": {
4 | "__type": "AddedToken",
5 | "content": "<|startoftext|>",
6 | "lstrip": false,
7 | "normalized": true,
8 | "rstrip": false,
9 | "single_word": false
10 | },
11 | "do_lower_case": true,
12 | "eos_token": {
13 | "__type": "AddedToken",
14 | "content": "<|endoftext|>",
15 | "lstrip": false,
16 | "normalized": true,
17 | "rstrip": false,
18 | "single_word": false
19 | },
20 | "errors": "replace",
21 | "model_max_length": 77,
22 | "name_or_path": "hf-models/stable-diffusion-v2-768x768/tokenizer",
23 | "pad_token": "<|endoftext|>",
24 | "special_tokens_map_file": "./special_tokens_map.json",
25 | "tokenizer_class": "CLIPTokenizer",
26 | "unk_token": {
27 | "__type": "AddedToken",
28 | "content": "<|endoftext|>",
29 | "lstrip": false,
30 | "normalized": true,
31 | "rstrip": false,
32 | "single_word": false
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/src/SDMatte/unet/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "_class_name": "UNet2DConditionModel",
3 | "_diffusers_version": "0.8.0",
4 | "_name_or_path": "hf-models/stable-diffusion-v2-768x768/unet",
5 | "act_fn": "silu",
6 | "bbox_time_embed_dim": 320,
7 | "point_embeddings_input_dim": 1680,
8 | "bbox_embeddings_input_dim": 1280,
9 | "attention_head_dim": [
10 | 5,
11 | 10,
12 | 20,
13 | 20
14 | ],
15 | "block_out_channels": [
16 | 320,
17 | 640,
18 | 1280,
19 | 1280
20 | ],
21 | "center_input_sample": false,
22 | "cross_attention_dim": 1024,
23 | "down_block_types": [
24 | "CrossAttnDownBlock2D",
25 | "CrossAttnDownBlock2D",
26 | "CrossAttnDownBlock2D",
27 | "DownBlock2D"
28 | ],
29 | "downsample_padding": 1,
30 | "dual_cross_attention": false,
31 | "flip_sin_to_cos": true,
32 | "freq_shift": 0,
33 | "in_channels": 4,
34 | "layers_per_block": 2,
35 | "mid_block_scale_factor": 1,
36 | "norm_eps": 1e-05,
37 | "norm_num_groups": 32,
38 | "out_channels": 4,
39 | "sample_size": 96,
40 | "up_block_types": [
41 | "UpBlock2D",
42 | "CrossAttnUpBlock2D",
43 | "CrossAttnUpBlock2D",
44 | "CrossAttnUpBlock2D"
45 | ],
46 | "use_linear_projection": true
47 | }
48 |
--------------------------------------------------------------------------------
/src/SDMatte/.gitattributes:
--------------------------------------------------------------------------------
1 | *.7z filter=lfs diff=lfs merge=lfs -text
2 | *.arrow filter=lfs diff=lfs merge=lfs -text
3 | *.bin filter=lfs diff=lfs merge=lfs -text
4 | *.bz2 filter=lfs diff=lfs merge=lfs -text
5 | *.ckpt filter=lfs diff=lfs merge=lfs -text
6 | *.ftz filter=lfs diff=lfs merge=lfs -text
7 | *.gz filter=lfs diff=lfs merge=lfs -text
8 | *.h5 filter=lfs diff=lfs merge=lfs -text
9 | *.joblib filter=lfs diff=lfs merge=lfs -text
10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text
11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text
12 | *.model filter=lfs diff=lfs merge=lfs -text
13 | *.msgpack filter=lfs diff=lfs merge=lfs -text
14 | *.npy filter=lfs diff=lfs merge=lfs -text
15 | *.npz filter=lfs diff=lfs merge=lfs -text
16 | *.onnx filter=lfs diff=lfs merge=lfs -text
17 | *.ot filter=lfs diff=lfs merge=lfs -text
18 | *.parquet filter=lfs diff=lfs merge=lfs -text
19 | *.pb filter=lfs diff=lfs merge=lfs -text
20 | *.pickle filter=lfs diff=lfs merge=lfs -text
21 | *.pkl filter=lfs diff=lfs merge=lfs -text
22 | *.pt filter=lfs diff=lfs merge=lfs -text
23 | *.pth filter=lfs diff=lfs merge=lfs -text
24 | *.rar filter=lfs diff=lfs merge=lfs -text
25 | *.safetensors filter=lfs diff=lfs merge=lfs -text
26 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27 | *.tar.* filter=lfs diff=lfs merge=lfs -text
28 | *.tar filter=lfs diff=lfs merge=lfs -text
29 | *.tflite filter=lfs diff=lfs merge=lfs -text
30 | *.tgz filter=lfs diff=lfs merge=lfs -text
31 | *.wasm filter=lfs diff=lfs merge=lfs -text
32 | *.xz filter=lfs diff=lfs merge=lfs -text
33 | *.zip filter=lfs diff=lfs merge=lfs -text
34 | *.zst filter=lfs diff=lfs merge=lfs -text
35 | *tfevents* filter=lfs diff=lfs merge=lfs -text
36 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "sdmatte"
3 | description = "SDMatte is an interactive image matting method based on stable diffusion, which supports three types of visual prompts (points, boxes, and masks) for accurately extracting target objects from natural images."
4 | version = "1.0.0"
5 | license = {file = "LICENSE"}
6 | # classifiers = [
7 | # # For OS-independent nodes (works on all operating systems)
8 | # "Operating System :: OS Independent",
9 | #
10 | # # OR for OS-specific nodes, specify the supported systems:
11 | # "Operating System :: Microsoft :: Windows", # Windows specific
12 | # "Operating System :: POSIX :: Linux", # Linux specific
13 | # "Operating System :: MacOS", # macOS specific
14 | #
15 | # # GPU Accelerator support. Pick the ones that are supported by your extension.
16 | # "Environment :: GPU :: NVIDIA CUDA", # NVIDIA CUDA support
17 | # "Environment :: GPU :: AMD ROCm", # AMD ROCm support
18 | # "Environment :: GPU :: Intel Arc", # Intel Arc support
19 | # "Environment :: NPU :: Huawei Ascend", # Huawei Ascend support
20 | # "Environment :: GPU :: Apple Metal", # Apple Metal support
21 | # ]
22 |
23 | dependencies = ["diffusers>=0.25.0", "transformers>=4.38.0", "timm>=0.9.7", "einops>=0.7.0", "easydict>=1.10", "omegaconf>=2.3.0", "fvcore>=0.1.5.post20221221", "torchvision>=0.16.0", "opencv-python>=4.8.0", "safetensors>=0.4.0"]
24 |
25 | [project.urls]
26 | Repository = "https://github.com/flybirdxx/ComfyUI-SDMatte"
27 | # Used by Comfy Registry https://registry.comfy.org
28 | Documentation = "https://github.com/flybirdxx/ComfyUI-SDMatte/wiki"
29 | "Bug Tracker" = "https://github.com/flybirdxx/ComfyUI-SDMatte/issues"
30 |
31 | [tool.comfy]
32 | PublisherId = ""
33 | DisplayName = "ComfyUI-SDMatte"
34 | Icon = ""
35 | includes = []
36 | # "requires-comfyui" = ">=1.0.0" # ComfyUI version compatibility
37 |
38 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | *.so
6 | .Python
7 | build/
8 | develop-eggs/
9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | MANIFEST
23 |
24 | # PyInstaller
25 | *.manifest
26 | *.spec
27 |
28 | # Installer logs
29 | pip-log.txt
30 | pip-delete-this-directory.txt
31 |
32 | # Unit test / coverage reports
33 | htmlcov/
34 | .tox/
35 | .coverage
36 | .coverage.*
37 | .cache
38 | nosetests.xml
39 | coverage.xml
40 | *.cover
41 | .hypothesis/
42 | .pytest_cache/
43 |
44 | # Translations
45 | *.mo
46 | *.pot
47 |
48 | # Django stuff:
49 | *.log
50 | local_settings.py
51 | db.sqlite3
52 |
53 | # Flask stuff:
54 | instance/
55 | .webassets-cache
56 |
57 | # Scrapy stuff:
58 | .scrapy
59 |
60 | # Sphinx documentation
61 | docs/_build/
62 |
63 | # PyBuilder
64 | target/
65 |
66 | # Jupyter Notebook
67 | .ipynb_checkpoints
68 |
69 | # pyenv
70 | .python-version
71 |
72 | # celery beat schedule file
73 | celerybeat-schedule
74 |
75 | # SageMath parsed files
76 | *.sage.py
77 |
78 | # Environments
79 | .env
80 | .venv
81 | env/
82 | venv/
83 | ENV/
84 | env.bak/
85 | venv.bak/
86 |
87 | # Spyder project settings
88 | .spyderproject
89 | .spyproject
90 |
91 | # Rope project settings
92 | .ropeproject
93 |
94 | # mkdocs documentation
95 | /site
96 |
97 | # mypy
98 | .mypy_cache/
99 | .dmypy.json
100 | dmypy.json
101 |
102 | # IDE
103 | .vscode/
104 | .idea/
105 | *.swp
106 | *.swo
107 | *~
108 |
109 | # OS
110 | .DS_Store
111 | .DS_Store?
112 | ._*
113 | .Spotlight-V100
114 | .Trashes
115 | ehthumbs.db
116 | Thumbs.db
117 |
118 | # Project specific
119 | infer_output/
120 | output/
121 | checkpoints/
122 | engine/
123 | dinov2/
124 | detectron2/
125 | modeling/LiteSDMatte/
126 | configs/LiteSDMatte.py
127 | script/eva.sh
128 | script/train.sh
129 | script/infer.sh
130 | model_arch.txt
131 | teacher_model_arch.txt
132 | attn_map_vis.py
133 | data_check.ipynb
134 | flops.py
135 | gen_one_image.py
136 | gen.sh
137 | main.py
138 | utils.ipynb
139 | utils/visualization.py
140 |
--------------------------------------------------------------------------------
/src/utils/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import torch.nn as nn
4 | from torch.nn import Conv2d
5 | from torch.nn.parameter import Parameter
6 | from diffusers.models.attention_processor import Attention, AttnProcessor
7 | from .replace import custom_prepare_attention_mask, custom_get_attention_scores
8 | import cv2
9 | import torch
10 | import numpy as np
11 |
12 |
13 | def replace_unet_conv_in(unet, num):
14 | # replace the first layer to accept 8 in_channels
15 | _weight = unet.conv_in.weight.clone() # [320, 4, 3, 3]
16 | _bias = unet.conv_in.bias.clone() # [320]
17 | _weight = _weight.repeat((1, num, 1, 1)) # Keep selected channel(s)
18 | # half the activation magnitude
19 | _weight = _weight / num
20 | # new conv_in channel
21 | _n_convin_out_channel = unet.conv_in.out_channels
22 | _new_conv_in = Conv2d(4 * num, _n_convin_out_channel, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
23 | _new_conv_in.weight = Parameter(_weight)
24 | _new_conv_in.bias = Parameter(_bias)
25 | unet.conv_in = _new_conv_in
26 | print("Unet conv_in layer is replaced")
27 | # replace config
28 | unet.config["in_channels"] = 4 * num
29 | print("Unet config is updated")
30 | return unet
31 |
32 |
33 | def add_aux_conv_in(unet):
34 | aux_conv_in = nn.Conv2d(in_channels=4, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
35 | aux_conv_in.weight.data[:320, :, :, :] = unet.conv_in.weight.data.clone()
36 | aux_conv_in.weight.data[320:, :, :, :] = 0.0
37 | aux_conv_in.bias.data[:320] = unet.conv_in.bias.data.clone()
38 | aux_conv_in.bias.data[320:] = 0.0
39 | unet.aux_conv_in = aux_conv_in
40 | print("add aux_conv_in layer for unet")
41 | return unet
42 |
43 |
44 | def replace_attention_mask_method(module, residual_connection):
45 | if isinstance(module, Attention):
46 | module.processor = AttnProcessor()
47 | if hasattr(module, "prepare_attention_mask"):
48 | module.prepare_attention_mask = custom_prepare_attention_mask.__get__(module)
49 | if hasattr(module, "cross_attention_dim") and module.cross_attention_dim == 320:
50 | module.residual_connection = residual_connection
51 | if hasattr(module, "get_attention_scores"):
52 | module.get_attention_scores = custom_get_attention_scores.__get__(module)
53 |
54 | # 递归遍历所有子模块
55 | for child_name, child_module in module.named_children():
56 | replace_attention_mask_method(child_module, residual_connection)
57 |
58 |
59 | erosion_kernels = [None] + [cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (size, size)) for size in range(1, 30)]
60 |
61 |
62 | def get_unknown_tensor_from_pred(pred, rand_width=30, train_mode=True):
63 | ### pred: N, 1 ,H, W
64 | N, C, H, W = pred.shape
65 |
66 | pred = pred.data.cpu().numpy()
67 | uncertain_area = np.ones_like(pred, dtype=np.uint8)
68 | uncertain_area[pred < 1.0 / 255.0] = 0
69 | uncertain_area[pred > 1 - 1.0 / 255.0] = 0
70 |
71 | for n in range(N):
72 | uncertain_area_ = uncertain_area[n, 0, :, :] # H, W
73 | if train_mode:
74 | width = np.random.randint(1, rand_width)
75 | else:
76 | width = rand_width // 2
77 | uncertain_area_ = cv2.dilate(uncertain_area_, erosion_kernels[width])
78 | uncertain_area[n, 0, :, :] = uncertain_area_
79 |
80 | weight = np.zeros_like(uncertain_area)
81 | weight[uncertain_area == 1] = 1
82 | weight = torch.from_numpy(weight).float().cuda()
83 | return weight
84 |
--------------------------------------------------------------------------------
/README_CN.md:
--------------------------------------------------------------------------------
1 | # ComfyUI-SDMatte
2 |
3 | [English](README_CN.md) | 简体中文
4 |
5 | 基于 [SDMatte](https://github.com/vivoCameraResearch/SDMatte) 的 ComfyUI 自定义节点插件,用于交互式图像抠图。
6 |
7 | ## 🚀 快速开始
8 |
9 | > 📺 **视频教程**:[ComfyUI-SDMatte 使用教程](https://www.bilibili.com/video/BV1L6bzz8Ene/?spm_id_from=333.1387.homepage.video_card.click&vd_source=b340fd050dbe0d3e2ce863af909f1ee8)
10 | > 🔧 **示例工作流**:[高级抠图与遮罩优化工作流](https://www.runninghub.ai/post/1955928733028941826?inviteCode=rh-v1041)
11 | > 💡 **推荐**:先观看视频教程了解使用方法,再下载工作流进行实践
12 |
13 | ## 📖 简介
14 |
15 | SDMatte 是一个基于稳定扩散(Stable Diffusion)的交互式图像抠图方法,由 vivo 摄像研究团队开发,已被 ICCV 2025 接收。该方法利用预训练扩散模型的强大先验知识,支持多种视觉提示(点、框、掩码)来精确提取自然图像中的目标对象。
16 |
17 | 本插件将 SDMatte 集成到 ComfyUI 中,提供简洁易用的节点接口,专注于 trimap 引导的抠图功能,并内置了多种 VRAM 优化策略。
18 |
19 | ## 🖼️ 示例
20 |
21 | ### 抠图效果
22 |
23 |
24 |
25 | | 原始图像 |
26 | 三值图 |
27 | 抠图结果 |
28 |
29 |
30 |  |
31 |  |
32 | Alpha 遮罩输出 |
33 |
34 |
35 |
36 | *示例工作流展示了 SDMatte 基于三值图引导的高精度抠图能力。*
37 |
38 | ## ✨ 特性
39 |
40 | - 🎯 **高精度抠图**:基于扩散模型的强大先验,能够处理复杂边缘细节
41 | - 🖼️ **Trimap 引导**:支持三值图(trimap)引导的精确抠图
42 | - 🚀 **VRAM 优化**:内置混合精度、注意力切片等多种显存优化策略
43 | - 🔧 **ComfyUI 集成**:完全兼容 ComfyUI 工作流系统
44 | - 📥 **模型自动下载**:首次使用时自动下载模型权重
45 | - 📱 **灵活尺寸**:支持多种推理分辨率(512-1024px)
46 |
47 | ## 🛠️ 安装
48 |
49 | ### 1. 下载插件
50 |
51 | 将本插件放置到 ComfyUI 的自定义节点目录:
52 |
53 | ```bash
54 | cd ComfyUI/custom_nodes/
55 | git clone https://github.com/flybirdxx/ComfyUI-SDMatte.git
56 | ```
57 |
58 | ### 2. 安装依赖
59 |
60 | ComfyUI 会在启动时自动安装 `requirements.txt` 中的依赖包:
61 |
62 | - diffusers
63 | - timm
64 | - einops
65 | - lazyconfig
66 | - safetensors
67 |
68 | ### 3. 自动模型下载
69 |
70 | **无需手动下载模型。**
71 |
72 | 首次使用 `Apply SDMatte` 节点时,它会自动检查并从 Hugging Face 下载所需的模型权重。模型将被存放在:
73 | `ComfyUI/models/SDMatte/`
74 |
75 | 您可以直接在节点内选择使用标准版 (`SDMatte.safetensors`) 或增强版 (`SDMatte_plus.safetensors`)。
76 |
77 | ### 4. 重启 ComfyUI
78 |
79 | 重启 ComfyUI 以加载新的自定义节点。
80 |
81 | ## 🎮 使用方法
82 |
83 | ### 节点说明
84 |
85 | #### Apply SDMatte(SDMatte 应用)
86 |
87 | - **功能**:在一个节点内完成模型加载和抠图应用。
88 | - **输入**:
89 | - `ckpt_name`:选择要使用的模型(`SDMatte.safetensors` 或 `SDMatte_plus.safetensors`)。如果本地不存在,将自动下载。
90 | - `image`:输入图像(ComfyUI IMAGE 格式)
91 | - `trimap`:三值图掩码(ComfyUI MASK 格式)
92 | - `inference_size`:推理分辨率(512/640/768/896/1024)
93 | - `is_transparent`:图像是否包含透明区域
94 | - `output_mode`:输出模式(`alpha_only`, `matted_rgba`, `matted_rgb`)
95 | - `mask_refine`:启用遮罩优化以减少背景干扰
96 | - `trimap_constraint`:用于优化的三值图约束强度
97 | - `force_cpu`:强制使用 CPU 推理(可选)
98 | - **输出**:
99 | - `alpha_mask`:抠图结果的 alpha 遮罩
100 | - `matted_image`:抠图后的图像结果
101 |
102 | ### 基础工作流
103 |
104 | 1. **Load Image**:加载需要抠图的图像
105 | 2. **创建 Trimap**:使用绘图工具或其他节点创建三值图
106 | - 黑色(0):确定背景
107 | - 白色(1):确定前景
108 | - 灰色(0.5):未知区域
109 | 3. **Apply SDMatte**:应用抠图
110 | 4. **Preview Image**:预览抠图结果
111 |
112 | ### 推荐设置
113 |
114 | - **推理分辨率**:1024(最高质量)或 768(平衡性能)
115 | - **透明标志**:根据输入图像是否有透明通道设置
116 | - **强制 CPU**:仅在 GPU 显存不足时使用
117 |
118 | ## 🔧 技术细节
119 |
120 | ### 数据处理
121 |
122 | - **输入图像**:自动调整到推理分辨率,归一化到 [-1, 1]
123 | - **Trimap**:调整到推理分辨率,映射到 [-1, 1] 范围
124 | - **输出**:调整回原始分辨率,clamp 到 [0, 1] 范围
125 |
126 | ### VRAM 优化
127 |
128 | 插件内置多种显存优化策略(自动启用):
129 |
130 | - **混合精度**:使用 FP16 autocast 减少显存占用
131 | - **注意力切片**:SlicedAttnProcessor(slice_size=1) 最大化显存节省
132 | - **显存清理**:推理前后自动清理 CUDA 缓存
133 | - **设备管理**:智能的设备分配和模型移动
134 |
135 | ### 模型加载
136 |
137 | - **权重格式**:支持 .pth 和 .safetensors 格式
138 | - **安全加载**:处理 omegaconf 对象,支持 weights_only 模式
139 | - **嵌套结构**:自动处理复杂的 checkpoint 结构
140 | - **错误恢复**:多重fallback机制确保加载成功
141 |
142 | ## ❓ 常见问题
143 |
144 | ### Q: 节点无法被搜索到?
145 | A: 确保插件目录结构正确,重启 ComfyUI,检查控制台是否有错误信息。
146 |
147 | ### Q: 模型加载失败?
148 | A: 检查 SDMatte.safetensors 文件路径,确保基础模型目录结构完整,查看控制台详细错误信息。
149 |
150 | ### Q: 推理时显存不足?
151 | A: 尝试降低推理分辨率,启用 `force_cpu` 选项,或关闭其他占用显存的程序。
152 |
153 | ### Q: 抠图效果不理想?
154 | A: 优化 trimap 质量,确保前景/背景/未知区域标注准确,尝试不同的推理分辨率。
155 |
156 | ### Q: 首次推理很慢?
157 | A: 首次运行需要编译 CUDA 内核,后续推理会显著加速。
158 |
159 | ### Q: 应该选择哪个模型版本?
160 | A:
161 | - **SDMatte.safetensors(标准版)**:文件较小(~11GB),推理速度快,适合大多数场景
162 | - **SDMatte_plus.safetensors(增强版)**:文件较大,精度更高,适合对质量要求极高的专业用途
163 | - 建议先使用标准版测试,如需更高质量再升级到增强版
164 |
165 | ## 📋 系统要求
166 |
167 | - **ComfyUI**:最新版本
168 | - **Python**:3.8+
169 | - **PyTorch**:1.12+ (支持 CUDA 推荐)
170 | - **显存**:8GB+ 推荐(支持 CPU 推理)
171 | - **依赖**:diffusers, timm, einops, lazyconfig, safetensors
172 |
173 | ## 📝 版本更新日志
174 |
175 | ### v1.5.0 (2025-01-XX)
176 | - 🔄 **模型格式更新**:
177 | - 从 `.pth` 格式迁移到 `.safetensors` 格式,提供更好的安全性和性能
178 | - 更新模型下载链接,使用 Hugging Face 仓库 (1038lab/SDMatte)
179 | - 使用 SafeTensors 库改进模型加载,提供更安全的权重处理
180 | - 🔧 **技术改进**:
181 | - 增强模型加载稳定性,提供更好的错误处理
182 | - 优化模型加载过程中的内存使用
183 | - 改进与最新 ComfyUI 版本的兼容性
184 | - 📚 **文档更新**:
185 | - 更新安装说明以反映新的模型格式
186 | - 添加 SafeTensors 格式优势的相关信息
187 |
188 | ### v1.3.0 (2025-08-17)
189 | - ✨ **新增功能**:
190 | - 实现模型自动下载与检查功能,模型现在存放于 `ComfyUI/models/SDMatte/` 目录。
191 | - 🔧 **优化改进**:
192 | - 将 `SDMatte Model Loader` 和 `SDMatte Apply` 节点合并为单一的 `Apply SDMatte` 节点,简化了工作流。
193 | - 重构了部分代码,提升稳定性。
194 |
195 | ### v1.2.0 (2025-08-15)
196 | - ✨ **新增功能**:
197 | - 添加图像输出,同时输出alpha遮罩和抠图结果
198 | - 支持透明背景抠图模式
199 | - 添加多种输出模式:`alpha_only`、`matted_rgba`、`matted_rgb`
200 | - 新增遮罩优化功能,使用trimap约束过滤不需要的区域
201 | - 添加 `trimap_constraint` 参数控制约束强度
202 | - 为所有参数添加详细的tooltip说明
203 | - 🔧 **优化改进**:
204 | - 改进alpha遮罩处理逻辑,减少背景干扰
205 | - 优化前景区域提取算法
206 | - 增强低置信度区域过滤机制
207 | - 📚 **文档更新**:
208 | - 添加示例工作流链接
209 | - 添加视频教程链接
210 | - 🔧 **优化改进**:
211 | - 改进VRAM优化策略
212 | - 增强模型加载稳定性
213 | - 优化推理性能
214 |
215 | ### v1.0.0 (2025-08-14)
216 | - 🎉 **初始版本**:
217 | - 基础SDMatte模型集成
218 | - 支持trimap引导抠图
219 | - 内置VRAM优化功能
220 | - 支持多种推理分辨率
221 |
222 | ## 📚 参考
223 |
224 | - **示例工作流**:[高级抠图与遮罩优化工作流](https://www.runninghub.ai/post/1955928733028941826?inviteCode=rh-v1041)
225 | - **视频教程**:[ComfyUI-SDMatte 使用教程](https://www.bilibili.com/video/BV1L6bzz8Ene/?spm_id_from=333.1387.homepage.video_card.click&vd_source=b340fd050dbe0d3e2ce863af909f1ee8)
226 | - **原始论文**:[SDMatte: Grafting Diffusion Models for Interactive Matting](https://arxiv.org/abs/2408.00321) (ICCV 2025)
227 | - **原始代码**:[vivoCameraResearch/SDMatte](https://github.com/vivoCameraResearch/SDMatte)
228 | - **模型权重**:[LongfeiHuang/SDMatte](https://huggingface.co/LongfeiHuang/SDMatte)
229 |
230 | ## 📄 许可证
231 |
232 | 本项目遵循 MIT 许可证。原始 SDMatte 项目同样使用 MIT 许可证。
233 |
234 | ## 🙏 致谢
235 |
236 | 感谢 vivo 摄像研究团队开发的优秀 SDMatte 模型,以及 Stable Diffusion、ComfyUI 社区的贡献。
237 |
238 | ## 📧 支持
239 |
240 | 如有问题或建议,请在 GitHub 上提交 Issue。
241 |
242 | ---
243 |
244 | **注意**:本插件为第三方实现,与原始 SDMatte 团队无直接关联。使用前请确保遵循相关许可证条款。
--------------------------------------------------------------------------------
/src/modeling/SDMatte/meta_arch.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from transformers import CLIPTextModel, CLIPTokenizer, CLIPTextConfig
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from diffusers import DDIMScheduler, AutoencoderKL, UNet2DConditionModel
6 | from diffusers.models.embeddings import get_timestep_embedding
7 | from ...utils import replace_unet_conv_in, replace_attention_mask_method, add_aux_conv_in
8 | from ...utils.replace import CustomUNet
9 | import random
10 | import os
11 |
12 | # 解决离线本地目录层级差异,如存在 "subdir/subdir/config.json" 的情况
13 | def _resolve_nested_dir(base_dir: str, subdir: str, config_filename: str) -> str:
14 | direct = os.path.join(base_dir, subdir)
15 | nested = os.path.join(base_dir, subdir, subdir)
16 | if os.path.exists(os.path.join(direct, config_filename)):
17 | return direct
18 | if os.path.exists(os.path.join(nested, config_filename)):
19 | return nested
20 | return direct
21 |
22 | AUX_INPUT_DIT = {
23 | "auto_mask": "auto_coords",
24 | "point_mask": "point_coords",
25 | "bbox_mask": "bbox_coords",
26 | "mask": "mask_coords",
27 | "trimap": "trimap_coords",
28 | }
29 |
30 | class SDMatte(nn.Module):
31 | def __init__(
32 | self,
33 | pretrained_model_name_or_path,
34 | conv_scale=3,
35 | num_inference_steps=1,
36 | aux_input="bbox_mask",
37 | use_aux_input=False,
38 | use_coor_input=True,
39 | use_dis_loss=True,
40 | use_attention_mask=True,
41 | use_encoder_attention_mask=False,
42 | add_noise=False,
43 | attn_mask_aux_input=["point_mask", "bbox_mask", "mask"],
44 | aux_input_list=["point_mask", "bbox_mask", "mask"],
45 | use_encoder_hidden_states=True,
46 | residual_connection=False,
47 | use_attention_mask_list=[True, True, True],
48 | use_encoder_hidden_states_list=[True, True, True],
49 | load_weight = True,
50 | ):
51 | super().__init__()
52 | self.init_submodule(pretrained_model_name_or_path, load_weight)
53 | self.num_inference_steps = num_inference_steps
54 | self.aux_input = aux_input
55 | self.use_aux_input = use_aux_input
56 | self.use_coor_input = use_coor_input
57 | self.use_dis_loss = use_dis_loss
58 | self.use_attention_mask = use_attention_mask
59 | self.use_encoder_attention_mask = use_encoder_attention_mask
60 | self.add_noise = add_noise
61 | self.attn_mask_aux_input = attn_mask_aux_input
62 | self.aux_input_list = aux_input_list
63 | self.use_encoder_hidden_states = use_encoder_hidden_states
64 | if use_encoder_hidden_states:
65 | self.unet = add_aux_conv_in(self.unet)
66 | if not add_noise:
67 | conv_scale -= 1
68 | if not use_aux_input:
69 | conv_scale -= 1
70 | if conv_scale > 1:
71 | self.unet = replace_unet_conv_in(self.unet, conv_scale)
72 | replace_attention_mask_method(self.unet, residual_connection)
73 | self.text_encoder.requires_grad_(False)
74 | self.vae.requires_grad_(False)
75 | self.unet.train()
76 | self.unet.use_attention_mask_list = use_attention_mask_list
77 | self.unet.use_encoder_hidden_states_list = use_encoder_hidden_states_list
78 |
79 | def init_submodule(self, pretrained_model_name_or_path, load_weight):
80 | if load_weight:
81 | text_dir = _resolve_nested_dir(pretrained_model_name_or_path, "text_encoder", "config.json")
82 | vae_dir = _resolve_nested_dir(pretrained_model_name_or_path, "vae", "config.json")
83 | unet_dir = _resolve_nested_dir(pretrained_model_name_or_path, "unet", "config.json")
84 | sched_dir = _resolve_nested_dir(pretrained_model_name_or_path, "scheduler", "scheduler_config.json")
85 | tok_dir = _resolve_nested_dir(pretrained_model_name_or_path, "tokenizer", "tokenizer_config.json")
86 |
87 | self.text_encoder = CLIPTextModel.from_pretrained(text_dir)
88 | self.vae = AutoencoderKL.from_pretrained(vae_dir)
89 | self.unet = CustomUNet.from_pretrained(
90 | unet_dir, low_cpu_mem_usage=True, ignore_mismatched_sizes=False
91 | )
92 | self.noise_scheduler = DDIMScheduler.from_pretrained(sched_dir)
93 | self.tokenizer = CLIPTokenizer.from_pretrained(tok_dir)
94 | else:
95 | text_dir = _resolve_nested_dir(pretrained_model_name_or_path, "text_encoder", "config.json")
96 | text_config = CLIPTextConfig.from_pretrained(text_dir)
97 | self.text_encoder = CLIPTextModel(text_config)
98 |
99 | vae_path = _resolve_nested_dir(pretrained_model_name_or_path, "vae", "config.json")
100 | self.vae = AutoencoderKL.from_config(AutoencoderKL.load_config(vae_path))
101 |
102 | unet_path = _resolve_nested_dir(pretrained_model_name_or_path, "unet", "config.json")
103 | self.unet = CustomUNet.from_config(
104 | CustomUNet.load_config(unet_path),
105 | low_cpu_mem_usage=True,
106 | ignore_mismatched_sizes=False
107 | )
108 |
109 | scheduler_path = os.path.join(_resolve_nested_dir(pretrained_model_name_or_path, "scheduler", "scheduler_config.json"), "scheduler_config.json")
110 | self.noise_scheduler = DDIMScheduler.from_config(DDIMScheduler.load_config(scheduler_path))
111 |
112 | tok_dir = _resolve_nested_dir(pretrained_model_name_or_path, "tokenizer", "tokenizer_config.json")
113 | self.tokenizer = CLIPTokenizer.from_pretrained(tok_dir)
114 |
115 |
116 | def forward(self, data):
117 | rgb = data["image"].cuda()
118 | B = rgb.shape[0]
119 |
120 | if self.aux_input is None and self.training:
121 | aux_input_type = random.choice(self.aux_input_list)
122 | elif self.aux_input is None:
123 | aux_input_type = "point_mask"
124 | else:
125 | aux_input_type = self.aux_input
126 |
127 | # get aux input latent
128 | if self.use_aux_input:
129 | aux_input = data[aux_input_type].cuda()
130 | aux_input = aux_input.repeat(1, 3, 1, 1)
131 | aux_input_h = self.vae.encoder(aux_input.to(rgb.dtype))
132 | aux_input_moments = self.vae.quant_conv(aux_input_h)
133 | aux_input_mean, _ = torch.chunk(aux_input_moments, 2, dim=1)
134 | aux_input_latent = aux_input_mean * self.vae.config.scaling_factor
135 | else:
136 | aux_input_latent = None
137 |
138 | # get aux coordinate
139 | coor_name = AUX_INPUT_DIT[aux_input_type]
140 | coor = data[coor_name].cuda()
141 | if coor_name == "point_coords":
142 | N = coor.shape[1]
143 | for i in range(N, 1680):
144 | if 1680 % i == 0:
145 | num_channels = 1680 // i
146 | pad_size = i - N
147 | padding = torch.zeros((B, pad_size), dtype=coor.dtype, device=coor.device)
148 | coor = torch.cat([coor, padding], dim=1)
149 | zero_coor = torch.zeros((B, pad_size + N), dtype=coor.dtype, device=coor.device)
150 | break
151 | if self.use_coor_input:
152 | coor = get_timestep_embedding(
153 | coor.flatten(),
154 | num_channels,
155 | flip_sin_to_cos=True,
156 | downscale_freq_shift=0,
157 | )
158 | else:
159 | coor = get_timestep_embedding(
160 | zero_coor.flatten(),
161 | num_channels,
162 | flip_sin_to_cos=True,
163 | downscale_freq_shift=0,
164 | )
165 | added_cond_kwargs = {"point_coords": coor}
166 | else:
167 | if self.use_coor_input:
168 | added_cond_kwargs = {"bbox_mask_coords": coor}
169 | else:
170 | coor = torch.tensor([[0, 0, 1, 1]] * B).cuda()
171 | added_cond_kwargs = {"bbox_mask_coords": coor}
172 |
173 | # get attention mask
174 | if self.use_attention_mask and aux_input_type in self.attn_mask_aux_input:
175 | attention_mask = data[aux_input_type].cuda()
176 | attention_mask = (attention_mask + 1) / 2
177 | attention_mask = F.interpolate(attention_mask, scale_factor=1 / 8, mode="nearest")
178 | attention_mask = attention_mask.flatten(start_dim=1)
179 | else:
180 | attention_mask = None
181 |
182 | # encode rgb to latents
183 | rgb_h = self.vae.encoder(rgb)
184 | rgb_moments = self.vae.quant_conv(rgb_h)
185 | rgb_mean, _ = torch.chunk(rgb_moments, 2, dim=1)
186 | rgb_latent = rgb_mean * self.vae.config.scaling_factor
187 |
188 | # get encoder_hidden_states
189 | if self.use_encoder_hidden_states and aux_input_latent is not None:
190 | encoder_hidden_states = self.unet.aux_conv_in(aux_input_latent)
191 | encoder_hidden_states = encoder_hidden_states.view(B, 1024, -1)
192 | encoder_hidden_states = encoder_hidden_states.permute(0, 2, 1)
193 |
194 | if "caption" in data:
195 | prompt = data["caption"]
196 | else:
197 | prompt = [""] * B
198 | prompt = [prompt] if isinstance(prompt, str) else prompt
199 | text_inputs = self.tokenizer(
200 | prompt,
201 | padding="max_length",
202 | max_length=self.tokenizer.model_max_length,
203 | truncation=True,
204 | return_tensors="pt",
205 | )
206 | text_input_ids = text_inputs.input_ids.to("cuda")
207 | text_embed = self.text_encoder(text_input_ids)[0]
208 | encoder_hidden_states_2 = text_embed
209 |
210 | # get class_label
211 | is_trans = data["is_trans"].cuda()
212 | trans = 1 - is_trans
213 |
214 | # get timesteps
215 | timestep = torch.tensor([1], device="cuda").long()
216 |
217 | # unet
218 | unet_input = torch.cat([rgb_latent, aux_input_latent], dim=1)
219 | label_latent = self.unet(
220 | sample=unet_input,
221 | trans=trans,
222 | timestep=None,
223 | encoder_hidden_states=encoder_hidden_states,
224 | encoder_hidden_states_2=encoder_hidden_states_2,
225 | added_cond_kwargs=added_cond_kwargs,
226 | attention_mask=attention_mask,
227 | ).sample
228 | label_latent = label_latent / self.vae.config.scaling_factor
229 | z = self.vae.post_quant_conv(label_latent)
230 | stacked = self.vae.decoder(z)
231 | # mean of output channels
232 | label_mean = stacked.mean(dim=1, keepdim=True)
233 | output = torch.clip(label_mean, -1.0, 1.0)
234 | output = (output + 1.0) / 2.0
235 | return output
236 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ComfyUI-SDMatte
2 |
3 | English | [简体中文](README_CN.md)
4 |
5 | ComfyUI custom nodes plugin based on [SDMatte](https://github.com/vivoCameraResearch/SDMatte) for interactive image matting.
6 |
7 | ## 🚀 Quick Start
8 |
9 | > 📺 **Video Tutorial**: [ComfyUI-SDMatte Tutorial](https://www.youtube.com/watch?v=PDGDTJvdo8Q)
10 | > 🔧 **Example Workflow**: [Superior Image Cropping and Mask Refinement Workflow](https://www.runninghub.ai/post/1955928733028941826)
11 | > 💡 **Recommended**: Watch the video tutorial first to understand the usage, then download the workflow for practice
12 |
13 | ## 📖 Introduction
14 |
15 | SDMatte is an interactive image matting method based on Stable Diffusion, developed by the vivo Camera Research team and accepted by ICCV 2025. This method leverages the powerful priors of pre-trained diffusion models and supports multiple visual prompts (points, boxes, masks) for accurately extracting target objects from natural images.
16 |
17 | This plugin integrates SDMatte into ComfyUI, providing a simple and easy-to-use node interface focused on trimap-guided matting functionality with built-in VRAM optimization strategies.
18 |
19 | ## 🖼️ Examples
20 |
21 | ### Matting Results
22 |
23 |
24 |
25 | | Original Image |
26 | Trimap |
27 | Matting Result |
28 |
29 |
30 |  |
31 |  |
32 | Alpha mask output |
33 |
34 |
35 |
36 | *Example workflow demonstrating SDMatte's high-precision matting capabilities with trimap guidance.*
37 |
38 | ## ✨ Features
39 |
40 | - 🎯 **High-Precision Matting**: Based on powerful diffusion model priors, capable of handling complex edge details
41 | - 🖼️ **Trimap Guidance**: Supports trimap-guided precise matting
42 | - 🚀 **VRAM Optimization**: Built-in mixed precision, attention slicing, and other memory optimization strategies
43 | - 🔧 **ComfyUI Integration**: Fully compatible with ComfyUI workflow system
44 | - 📥 **Automatic Model Download**: Automatically downloads model weights on first use
45 | - 📱 **Flexible Sizes**: Supports multiple inference resolutions (512-1024px)
46 |
47 | ## 🛠️ Installation
48 |
49 | ### 1. Download Plugin
50 |
51 | Place this plugin in the ComfyUI custom nodes directory:
52 |
53 | ```bash
54 | cd ComfyUI/custom_nodes/
55 | git clone https://github.com/flybirdxx/ComfyUI-SDMatte.git
56 | ```
57 |
58 | ### 2. Install Dependencies
59 |
60 | ComfyUI will automatically install the dependencies in `requirements.txt` on startup:
61 |
62 | - diffusers
63 | - timm
64 | - einops
65 | - lazyconfig
66 | - safetensors
67 |
68 | ### 3. Automatic Model Download
69 |
70 | **No manual download is required.**
71 |
72 | The first time you use the `Apply SDMatte` node, it will automatically check for and download the necessary model weights from Hugging Face. The models will be stored in:
73 | `ComfyUI/models/SDMatte/`
74 |
75 | You can select between the standard (`SDMatte.safetensors`) and enhanced (`SDMatte_plus.safetensors`) versions directly within the node.
76 |
77 | ### 4. Restart ComfyUI
78 |
79 | Restart ComfyUI to load the new custom nodes.
80 |
81 | ## 🎮 Usage
82 |
83 | ### Node Description
84 |
85 | #### Apply SDMatte
86 |
87 | - **Function**: Loads the model and applies it for matting in a single node.
88 | - **Input**:
89 | - `ckpt_name`: Select the model to use (`SDMatte.safetensors` or `SDMatte_plus.safetensors`). It will be downloaded automatically if not found.
90 | - `image`: Input image (ComfyUI IMAGE format)
91 | - `trimap`: Trimap mask (ComfyUI MASK format)
92 | - `inference_size`: Inference resolution (512/640/768/896/1024)
93 | - `is_transparent`: Whether the image contains transparent areas
94 | - `output_mode`: Output mode (`alpha_only`, `matted_rgba`, `matted_rgb`)
95 | - `mask_refine`: Enable mask refinement to reduce background interference
96 | - `trimap_constraint`: Strength of the trimap constraint for refinement
97 | - `force_cpu`: Force CPU inference (optional)
98 | - **Output**:
99 | - `alpha_mask`: Alpha mask of the matting result
100 | - `matted_image`: The matted image result
101 |
102 | ### Basic Workflow
103 |
104 | 1. **Load Image**: Load the image that needs matting
105 | 2. **Create Trimap**: Use drawing tools or other nodes to create trimap
106 | - Black (0): Definite background
107 | - White (1): Definite foreground
108 | - Gray (0.5): Unknown region
109 | 3. **Apply SDMatte**: Apply matting
110 | 4. **Preview Image**: Preview matting result
111 |
112 | ### Recommended Settings
113 |
114 | - **Inference Resolution**: 1024 (highest quality) or 768 (balanced performance)
115 | - **Transparent Flag**: Set according to whether input image has transparent channel
116 | - **Force CPU**: Use only when GPU VRAM is insufficient
117 |
118 | ## 🔧 Technical Details
119 |
120 | ### Data Processing
121 |
122 | - **Input Image**: Automatically resized to inference resolution, normalized to [-1, 1]
123 | - **Trimap**: Resized to inference resolution, mapped to [-1, 1] range
124 | - **Output**: Resized back to original resolution, clamped to [0, 1] range
125 |
126 | ### VRAM Optimization
127 |
128 | The plugin has built-in memory optimization strategies (automatically enabled):
129 |
130 | - **Mixed Precision**: Uses FP16 autocast to reduce VRAM usage
131 | - **Attention Slicing**: SlicedAttnProcessor(slice_size=1) maximizes VRAM savings
132 | - **Memory Cleanup**: Automatically clears CUDA cache before and after inference
133 | - **Device Management**: Smart device allocation and model movement
134 |
135 | ### Model Loading
136 |
137 | - **Weight Formats**: Supports .pth and .safetensors formats
138 | - **Safe Loading**: Handles omegaconf objects, supports weights_only mode
139 | - **Nested Structure**: Automatically handles complex checkpoint structures
140 | - **Error Recovery**: Multiple fallback mechanisms ensure successful loading
141 |
142 | ## ❓ FAQ
143 |
144 | ### Q: Nodes cannot be searched?
145 | A: Ensure the plugin directory structure is correct, restart ComfyUI, check console for error messages.
146 |
147 | ### Q: Model loading failed?
148 | A: Check SDMatte.safetensors file path, ensure base model directory structure is complete, view console for detailed error messages.
149 |
150 | ### Q: Insufficient VRAM during inference?
151 | A: Try reducing inference resolution, enable `force_cpu` option, or close other VRAM-consuming programs.
152 |
153 | ### Q: Poor matting results?
154 | A: Optimize trimap quality, ensure accurate foreground/background/unknown region annotations, try different inference resolutions.
155 |
156 | ### Q: First inference is slow?
157 | A: First run needs to compile CUDA kernels, subsequent inference will be significantly faster.
158 |
159 | ### Q: Which model version should I choose?
160 | A:
161 | - **SDMatte.safetensors (Standard)**: Smaller file (~11GB), faster inference, suitable for most scenarios
162 | - **SDMatte_plus.safetensors (Enhanced)**: Larger file, higher accuracy, suitable for professional use with extremely high quality requirements
163 | - Recommend testing with standard version first, upgrade to enhanced version if higher quality is needed
164 |
165 | ## 📋 System Requirements
166 |
167 | - **ComfyUI**: Latest version
168 | - **Python**: 3.8+
169 | - **PyTorch**: 1.12+ (CUDA support recommended)
170 | - **VRAM**: 8GB+ recommended (CPU inference supported)
171 | - **Dependencies**: diffusers, timm, einops, lazyconfig, safetensors
172 |
173 | ## 📝 Changelog
174 |
175 | ### v1.5.0 (2025-01-XX)
176 | - 🔄 **Model Format Update**:
177 | - Migrated from `.pth` to `.safetensors` format for better security and performance
178 | - Updated model download URLs to use Hugging Face repository (1038lab/SDMatte)
179 | - Improved model loading with SafeTensors library for safer weight handling
180 | - 🔧 **Technical Improvements**:
181 | - Enhanced model loading stability with better error handling
182 | - Optimized memory usage during model loading process
183 | - Improved compatibility with latest ComfyUI versions
184 | - 📚 **Documentation Updates**:
185 | - Updated installation instructions to reflect new model format
186 | - Added information about SafeTensors format benefits
187 |
188 | ### v1.3.0 (2025-08-17)
189 | - ✨ **New Features**:
190 | - Implemented automatic model downloading and checking. Models are now stored in `ComfyUI/models/SDMatte/`.
191 | - 🔧 **Improvements**:
192 | - Merged `SDMatte Model Loader` and `SDMatte Apply` nodes into a single `Apply SDMatte` node for a more streamlined workflow.
193 | - Refactored code for better stability.
194 |
195 | ### v1.2.0 (2025-08-15)
196 | - ✨ **New Features**:
197 | - Added image output alongside alpha mask output
198 | - Support for transparent background matting mode
199 | - Added multiple output modes: `alpha_only`, `matted_rgba`, `matted_rgb`
200 | - Added mask refinement feature using trimap constraints to filter unwanted regions
201 | - Added `trimap_constraint` parameter to control constraint strength
202 | - Added detailed tooltips for all parameters
203 | - 🔧 **Improvements**:
204 | - Improved alpha mask processing logic to reduce background interference
205 | - Optimized foreground region extraction algorithm
206 | - Enhanced low-confidence region filtering mechanism
207 | - 📚 **Documentation**:
208 | - Added example workflow links
209 | - Added video tutorial links
210 | - Improved usage instructions and parameter explanations
211 | - 🔧 **Improvements**:
212 | - Improved VRAM optimization strategies
213 | - Enhanced model loading stability
214 | - Optimized inference performance
215 |
216 | ### v1.0.0 (2025-08-14)
217 | - 🎉 **Initial Release**:
218 | - Basic SDMatte model integration
219 | - Support for trimap-guided matting
220 | - Built-in VRAM optimization features
221 | - Support for multiple inference resolutions
222 |
223 | ## 📚 References
224 |
225 | - **Example Workflow**: [Superior Image Cropping and Mask Refinement Workflow](https://www.runninghub.ai/post/1955928733028941826)
226 | - **Video Tutorial**: [ComfyUI-SDMatte Tutorial](https://www.youtube.com/watch?v=PDGDTJvdo8Q)
227 | - **Original Paper**: [SDMatte: Grafting Diffusion Models for Interactive Matting](https://arxiv.org/abs/2408.00321) (ICCV 2025)
228 | - **Original Code**: [vivoCameraResearch/SDMatte](https://github.com/vivoCameraResearch/SDMatte)
229 | - **Model Weights**: [LongfeiHuang/SDMatte](https://huggingface.co/LongfeiHuang/SDMatte)
230 |
231 | ## 📄 License
232 |
233 | This project follows the MIT license. The original SDMatte project also uses the MIT license.
234 |
235 | ## 🙏 Acknowledgements
236 |
237 | Thanks to the vivo Camera Research team for developing the excellent SDMatte model, and to the Stable Diffusion and ComfyUI communities for their contributions.
238 |
239 | ## 📧 Support
240 |
241 | If you have any questions or suggestions, please submit an Issue on GitHub.
242 |
243 | ---
244 |
245 | **Note**: This plugin is a third-party implementation and is not directly affiliated with the original SDMatte team. Please ensure compliance with relevant license terms before use.
246 |
--------------------------------------------------------------------------------
/sdmatte_nodes.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torchvision import transforms
4 |
5 | import folder_paths
6 | import comfy
7 |
8 | # Get the models directory from ComfyUI
9 | MODEL_DIR = os.path.join(folder_paths.models_dir, "SDMatte")
10 |
11 | # Register the SDMatte folder path with ComfyUI
12 | folder_paths.add_model_folder_path("SDMatte", MODEL_DIR)
13 |
14 | MODEL_URLS = {
15 | "SDMatte.safetensors": "https://huggingface.co/1038lab/SDMatte/resolve/main/SDMatte.safetensors",
16 | "SDMatte_plus.safetensors": "https://huggingface.co/1038lab/SDMatte/resolve/main/SDMatte_plus.safetensors"
17 | }
18 |
19 | def download_model(model_name, models_dir=MODEL_DIR, model_urls=MODEL_URLS):
20 | # 1) Search in all registered SDMatte paths first
21 | all_search_paths = folder_paths.get_folder_paths("SDMatte") or []
22 | for search_path in all_search_paths:
23 | check_path = os.path.join(search_path, model_name)
24 | if os.path.isfile(check_path):
25 | try:
26 | if os.path.getsize(check_path) > 0:
27 | print(f"[SDMatte] Found model at: {check_path}")
28 | return check_path
29 | except OSError:
30 | pass # couldn't stat; continue
31 |
32 | # 2) Not found -> prepare to download to models_dir
33 | url = model_urls.get(model_name)
34 | if not url:
35 | raise ValueError(f"[SDMatte] Unknown model name: {model_name}")
36 |
37 | target_path = os.path.join(models_dir, model_name)
38 | os.makedirs(os.path.dirname(target_path), exist_ok=True)
39 |
40 | # if target exists and non-empty, use it
41 | if os.path.isfile(target_path):
42 | try:
43 | if os.path.getsize(target_path) > 0:
44 | return target_path
45 | except OSError:
46 | pass
47 |
48 | print(f"[SDMatte] Model '{model_name}' not found. Downloading to {target_path}...")
49 |
50 | tmp_path = target_path + ".tmp"
51 |
52 | try:
53 | try:
54 | import requests
55 | try:
56 | from tqdm import tqdm # optional
57 | except Exception:
58 | tqdm = None
59 |
60 | with requests.get(url, stream=True, timeout=60) as response:
61 | response.raise_for_status()
62 | total_size = int(response.headers.get('content-length', 0) or 0)
63 |
64 | with open(tmp_path, 'wb') as f:
65 | bar = None
66 | if tqdm and total_size > 0:
67 | bar = tqdm(desc=model_name, total=total_size, unit='iB', unit_scale=True, unit_divisor=1024)
68 |
69 | for chunk in response.iter_content(chunk_size=1024*1024):
70 | if chunk:
71 | f.write(chunk)
72 | if bar:
73 | bar.update(len(chunk))
74 |
75 | if bar:
76 | bar.close()
77 |
78 | # optional size check
79 | if total_size > 0:
80 | try:
81 | if os.path.getsize(tmp_path) != total_size:
82 | raise IOError(f"[SDMatte] Incomplete download: {os.path.getsize(tmp_path)} != {total_size}")
83 | except OSError:
84 | raise
85 |
86 | except (ImportError, ModuleNotFoundError):
87 | import urllib.request
88 | urllib.request.urlretrieve(url, tmp_path)
89 |
90 | # concurrent safety: if another process already finished
91 | if os.path.isfile(target_path) and os.path.getsize(target_path) > 0:
92 | try:
93 | os.remove(tmp_path)
94 | except OSError:
95 | pass
96 | return target_path
97 |
98 | os.replace(tmp_path, target_path) # atomic
99 | print(f"[SDMatte] Download complete: {target_path}")
100 | return target_path
101 |
102 | except KeyboardInterrupt:
103 | if os.path.exists(tmp_path):
104 | try:
105 | os.remove(tmp_path)
106 | except OSError:
107 | pass
108 | raise
109 | except Exception:
110 | if os.path.exists(tmp_path):
111 | try:
112 | os.remove(tmp_path)
113 | except OSError:
114 | pass
115 | raise
116 |
117 | SDMatteCore = None
118 |
119 |
120 | def _resize_norm_image_bchw(image_bchw: torch.Tensor, size_hw=(1024, 1024)) -> torch.Tensor:
121 | resize = transforms.Resize(size_hw, antialias=True)
122 | norm = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
123 | x = resize(image_bchw)
124 | x = norm(x)
125 | return x
126 |
127 |
128 | def _resize_mask_b1hw(mask_b1hw: torch.Tensor, size_hw=(1024, 1024)) -> torch.Tensor:
129 | resize = transforms.Resize(size_hw)
130 | return resize(mask_b1hw)
131 |
132 |
133 | class SDMatteApply:
134 |
135 | @classmethod
136 | def INPUT_TYPES(s):
137 | return {
138 | "required": {
139 | "ckpt_name": (list(MODEL_URLS.keys()), ),
140 | "image": ("IMAGE", {"tooltip": "需要进行抠图的输入图像"}),
141 | "trimap": ("MASK", {"tooltip": "三值图掩码:白色=前景,黑色=背景,灰色=未知区域"}),
142 | "inference_size": ([512, 640, 768, 896, 1024], {
143 | "default": 1024,
144 | "tooltip": "推理分辨率,越高质量越好但速度越慢。推荐1024(最高质量)或768(平衡性能)"
145 | }),
146 | "is_transparent": ("BOOLEAN", {
147 | "default": False,
148 | "tooltip": "输入图像是否包含透明通道。如果原图有透明背景请启用"
149 | }),
150 | "output_mode": (["alpha_only", "matted_rgba", "matted_rgb"], {
151 | "default": "alpha_only",
152 | "tooltip": "输出模式:alpha_only=只输出遮罩;matted_rgba=透明背景抠图;matted_rgb=黑色背景抠图(推荐,避免干扰)"
153 | }),
154 | "mask_refine": ("BOOLEAN", {
155 | "default": True,
156 | "tooltip": "启用遮罩优化,使用trimap约束过滤不需要的区域,减少背景干扰"
157 | }),
158 | "trimap_constraint": ("FLOAT", {
159 | "default": 0.8, "min": 0.1, "max": 1.0, "step": 0.1,
160 | "tooltip": "trimap约束强度(0.1-1.0)。越高约束越严格,0.8=平衡,0.9=严格过滤,0.6=宽松保留"
161 | }),
162 | },
163 | "optional": {
164 | "force_cpu": ("BOOLEAN", {"default": False}),
165 | },
166 | }
167 |
168 | RETURN_TYPES = ("MASK", "IMAGE")
169 | RETURN_NAMES = ("alpha_mask", "matted_image")
170 | FUNCTION = "apply_matte"
171 | CATEGORY = "Matting/SDMatte"
172 |
173 | def apply_matte(self, ckpt_name, image, trimap, inference_size, is_transparent, output_mode, mask_refine, trimap_constraint, force_cpu=False):
174 | device = comfy.model_management.get_torch_device()
175 | if force_cpu:
176 | device = torch.device('cpu')
177 |
178 | global SDMatteCore
179 | if SDMatteCore is None:
180 | from .src.modeling.SDMatte.meta_arch import SDMatte as SDMatteCore
181 |
182 | base_dir = os.path.dirname(__file__)
183 | pretrained_repo = os.path.join(base_dir, "src", "SDMatte")
184 | required_subdirs = ["text_encoder", "vae", "unet", "scheduler", "tokenizer"]
185 | missing = [d for d in required_subdirs if not os.path.isdir(os.path.join(pretrained_repo, d))]
186 | if missing:
187 | raise FileNotFoundError(f"Missing directories: {missing}. Expected path: {pretrained_repo}")
188 |
189 | sdmatte_model = SDMatteCore(
190 | pretrained_model_name_or_path=pretrained_repo,
191 | load_weight=False,
192 | use_aux_input=True,
193 | aux_input="trimap",
194 | aux_input_list=["point_mask", "bbox_mask", "mask", "trimap"],
195 | attn_mask_aux_input=["point_mask", "bbox_mask", "mask", "trimap"],
196 | use_encoder_hidden_states=True,
197 | use_attention_mask=True,
198 | add_noise=False,
199 | )
200 |
201 | ckpt_path = download_model(ckpt_name)
202 |
203 | from safetensors import safe_open
204 | state_dict = {}
205 | with safe_open(ckpt_path, framework="pt", device="cpu") as f:
206 | for key in f.keys():
207 | state_dict[key] = f.get_tensor(key)
208 | state_root = state_dict
209 |
210 | candidate_keys = [
211 | 'state_dict','model_state_dict','params','weights',
212 | 'ema','model_ema','ema_state_dict','net','module','model','unet'
213 | ]
214 | state_dict = None
215 | if isinstance(state_root, dict):
216 | for k in candidate_keys:
217 | inner = state_root.get(k)
218 | if isinstance(inner, dict):
219 | state_dict = inner
220 | break
221 | if state_dict is None:
222 | state_dict = state_root
223 |
224 | sdmatte_model.load_state_dict(state_dict, strict=False)
225 | sdmatte_model.eval()
226 | sdmatte_model.to(device)
227 |
228 | if device.type == 'cuda':
229 | try:
230 | torch.cuda.empty_cache()
231 | except Exception:
232 | pass
233 |
234 | try:
235 | unet = getattr(sdmatte_model, 'unet', None)
236 | if unet is not None and hasattr(unet, 'set_attn_processor'):
237 | from diffusers.models.attention_processor import SlicedAttnProcessor
238 | unet.set_attn_processor(SlicedAttnProcessor(slice_size=1))
239 | except Exception:
240 | pass
241 |
242 | B, H, W, C = image.shape
243 | orig_h, orig_w = H, W
244 |
245 | img_bchw = image.permute(0, 3, 1, 2).contiguous().to(device)
246 | img_in = _resize_norm_image_bchw(img_bchw, (int(inference_size), int(inference_size)))
247 |
248 | is_trans = torch.tensor([1 if is_transparent else 0] * B, device=device)
249 | data = {"image": img_in, "is_trans": is_trans, "caption": [""] * B}
250 |
251 | def to_b1hw(x):
252 | return _resize_mask_b1hw(x.unsqueeze(1).contiguous().to(device), (int(inference_size), int(inference_size)))
253 |
254 | tri = to_b1hw(trimap) * 2 - 1
255 | data["trimap"] = tri
256 | data["trimap_coords"] = torch.tensor([[0,0,1,1]]*B, dtype=tri.dtype, device=device)
257 |
258 | with torch.no_grad():
259 | if device.type == 'cuda':
260 | with torch.autocast(device_type='cuda', dtype=torch.float16):
261 | pred_alpha = sdmatte_model(data)
262 | else:
263 | pred_alpha = sdmatte_model(data)
264 |
265 | out = transforms.Resize((orig_h, orig_w))(pred_alpha)
266 | out = out.squeeze(1).clamp(0, 1).detach().cpu()
267 |
268 | if mask_refine:
269 | trimap_cpu = trimap.cpu()
270 |
271 | foreground_regions = trimap_cpu > trimap_constraint
272 | background_regions = trimap_cpu < (1.0 - trimap_constraint)
273 | unknown_regions = ~(foreground_regions | background_regions)
274 |
275 | refined_alpha = out.clone()
276 | refined_alpha[background_regions] = 0.0
277 | refined_alpha[foreground_regions] = torch.clamp(refined_alpha[foreground_regions] * 1.2, 0, 1)
278 |
279 | alpha_threshold = 0.3
280 | low_confidence = (refined_alpha < alpha_threshold) & unknown_regions
281 | refined_alpha[low_confidence] = 0.0
282 |
283 | out = refined_alpha
284 |
285 | alpha_expanded = out.unsqueeze(-1)
286 |
287 | if output_mode == "alpha_only":
288 | matted_image = torch.zeros_like(image.cpu())
289 | elif output_mode == "matted_rgba":
290 | matted_image = torch.cat([
291 | image.cpu(),
292 | alpha_expanded.expand(-1, -1, -1, 1)
293 | ], dim=-1)
294 | elif output_mode == "matted_rgb":
295 | trimap_cpu = trimap.cpu()
296 | trimap_expanded = trimap_cpu.unsqueeze(-1)
297 | foreground_mask = (trimap_expanded > 0.2) & (alpha_expanded > 0.1)
298 | matted_image = image.cpu() * foreground_mask.float()
299 | else:
300 | matted_image = image.cpu() * alpha_expanded
301 |
302 | if device.type == 'cuda':
303 | try:
304 | torch.cuda.empty_cache()
305 | except Exception:
306 | pass
307 |
308 | return (out, matted_image)
309 |
310 |
311 | NODE_CLASS_MAPPINGS = {
312 | "SDMatteApply": SDMatteApply,
313 | }
314 |
315 | NODE_DISPLAY_NAME_MAPPINGS = {
316 | "SDMatteApply": "Apply SDMatte",
317 | }
318 |
319 |
320 |
--------------------------------------------------------------------------------
/src/utils/replace.py:
--------------------------------------------------------------------------------
1 | import math
2 | from typing import Any, Dict, Optional, Tuple, Union
3 |
4 | import torch
5 | import torch.nn.functional as F
6 | from torch import nn
7 | import math
8 | from diffusers import UNet2DConditionModel
9 | from diffusers.models.embeddings import Timesteps, TimestepEmbedding
10 | from diffusers.models.unets.unet_2d_blocks import (
11 | get_down_block,
12 | get_up_block,
13 | get_mid_block,
14 | )
15 | from diffusers.models.activations import get_activation
16 | from diffusers.models.unets.unet_2d_condition import UNet2DConditionOutput
17 | from diffusers.utils import USE_PEFT_BACKEND, scale_lora_layers, unscale_lora_layers
18 |
19 |
20 | def custom_prepare_attention_mask(
21 | self, attention_mask: torch.Tensor, target_length: int, batch_size: int, out_dim: int = 3
22 | ) -> torch.Tensor:
23 | r"""
24 | Prepare the attention mask for the attention computation.
25 |
26 | Args:
27 | attention_mask (`torch.Tensor`):
28 | The attention mask to prepare.
29 | target_length (`int`):
30 | The target length of the attention mask. This is the length of the attention mask after padding.
31 | batch_size (`int`):
32 | The batch size, which is used to repeat the attention mask.
33 | out_dim (`int`, *optional*, defaults to `3`):
34 | The output dimension of the attention mask. Can be either `3` or `4`.
35 |
36 | Returns:
37 | `torch.Tensor`: The prepared attention mask.
38 | """
39 | head_size = self.heads
40 | if attention_mask is None:
41 | return attention_mask
42 |
43 | current_length: int = attention_mask.shape[-1]
44 | if current_length != target_length:
45 | if attention_mask.device.type == "mps":
46 | # HACK: MPS: Does not support padding by greater than dimension of input tensor.
47 | # Instead, we can manually construct the padding tensor.
48 | padding_shape = (attention_mask.shape[0], attention_mask.shape[1], target_length)
49 | padding = torch.zeros(padding_shape, dtype=attention_mask.dtype, device=attention_mask.device)
50 | attention_mask = torch.cat([attention_mask, padding], dim=2)
51 | else:
52 | # TODO: for pipelines such as stable-diffusion, padding cross-attn mask:
53 | # we want to instead pad by (0, remaining_length), where remaining_length is:
54 | # remaining_length: int = target_length - current_length
55 | # TODO: re-enable tests/models/test_models_unet_2d_condition.py#test_model_xattn_padding
56 | B = attention_mask.shape[0]
57 | current_size = int(math.sqrt(current_length))
58 | target_size = int(math.sqrt(target_length))
59 | assert current_size**2 == current_length, f"current_length ({current_length}) cannot be squared to an integer size"
60 | assert target_size**2 == target_length, f"target_length ({target_length}) cannot be squared to an integer size"
61 | attention_mask = attention_mask.view(B, -1, current_size, current_size)
62 | attention_mask = F.interpolate(attention_mask, size=(target_size, target_size), mode="nearest")
63 | attention_mask = attention_mask.view(B, 1, target_length)
64 |
65 | if out_dim == 3:
66 | if attention_mask.shape[0] < batch_size * head_size:
67 | attention_mask = attention_mask.repeat_interleave(head_size, dim=0)
68 | elif out_dim == 4:
69 | attention_mask = attention_mask.unsqueeze(1)
70 | attention_mask = attention_mask.repeat_interleave(head_size, dim=1)
71 |
72 | return attention_mask
73 |
74 |
75 | def custom_get_attention_scores(self, query: torch.Tensor, key: torch.Tensor, attention_mask: torch.Tensor = None) -> torch.Tensor:
76 | r"""
77 | Compute the attention scores.
78 |
79 | Args:
80 | query (`torch.Tensor`): The query tensor.
81 | key (`torch.Tensor`): The key tensor.
82 | attention_mask (`torch.Tensor`, *optional*): The attention mask to use. If `None`, no mask is applied.
83 |
84 | Returns:
85 | `torch.Tensor`: The attention probabilities/scores.
86 | """
87 | dtype = query.dtype
88 | if self.upcast_attention:
89 | query = query.float()
90 | key = key.float()
91 |
92 | # if attention_mask is not None and len(torch.unique(attention_mask)) <= 2:
93 | if attention_mask is not None:
94 | baddbmm_input = attention_mask
95 | beta = 1
96 | else:
97 | baddbmm_input = torch.empty(query.shape[0], query.shape[1], key.shape[1], dtype=query.dtype, device=query.device)
98 | beta = 0
99 |
100 | attention_scores = torch.baddbmm(
101 | baddbmm_input,
102 | query,
103 | key.transpose(-1, -2),
104 | beta=beta,
105 | alpha=self.scale,
106 | )
107 |
108 | # if attention_mask is not None and len(torch.unique(attention_mask)) > 2:
109 | # m = 1 - (attention_mask / -10000.0)
110 | # attention_scores = m * attention_scores
111 |
112 | del baddbmm_input
113 |
114 | if self.upcast_softmax:
115 | attention_scores = attention_scores.float()
116 |
117 | attention_probs = attention_scores.softmax(dim=-1)
118 | del attention_scores
119 |
120 | attention_probs = attention_probs.to(dtype)
121 |
122 | return attention_probs
123 |
124 |
125 | class CustomUNet(UNet2DConditionModel):
126 | def __init__(
127 | self,
128 | sample_size: Optional[int] = None,
129 | in_channels: int = 4,
130 | out_channels: int = 4,
131 | flip_sin_to_cos: bool = True,
132 | freq_shift: int = 0,
133 | down_block_types: Tuple[str] = (
134 | "CrossAttnDownBlock2D",
135 | "CrossAttnDownBlock2D",
136 | "CrossAttnDownBlock2D",
137 | "DownBlock2D",
138 | ),
139 | mid_block_type: Optional[str] = "UNetMidBlock2DCrossAttn",
140 | up_block_types: Tuple[str] = ("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D"),
141 | only_cross_attention: Union[bool, Tuple[bool]] = False,
142 | block_out_channels: Tuple[int] = (320, 640, 1280, 1280),
143 | layers_per_block: Union[int, Tuple[int]] = 2,
144 | downsample_padding: int = 1,
145 | mid_block_scale_factor: float = 1,
146 | dropout: float = 0.0,
147 | act_fn: str = "silu",
148 | norm_num_groups: Optional[int] = 32,
149 | norm_eps: float = 1e-5,
150 | cross_attention_dim: Union[int, Tuple[int]] = 1280,
151 | transformer_layers_per_block: Union[int, Tuple[int], Tuple[Tuple]] = 1,
152 | reverse_transformer_layers_per_block: Optional[Tuple[Tuple[int]]] = None,
153 | attention_head_dim: Union[int, Tuple[int]] = 8,
154 | num_attention_heads: Optional[Union[int, Tuple[int]]] = None,
155 | dual_cross_attention: bool = False,
156 | use_linear_projection: bool = False,
157 | upcast_attention: bool = False,
158 | resnet_time_scale_shift: str = "default",
159 | resnet_skip_time_act: bool = False,
160 | resnet_out_scale_factor: int = 1.0,
161 | time_embedding_dim: Optional[int] = None,
162 | timestep_post_act: Optional[str] = None,
163 | time_cond_proj_dim: Optional[int] = None,
164 | conv_in_kernel: int = 3,
165 | conv_out_kernel: int = 3,
166 | bbox_time_embed_dim: Optional[int] = None,
167 | point_embeddings_input_dim: Optional[int] = None,
168 | bbox_embeddings_input_dim: Optional[int] = None,
169 | attention_type: str = "default",
170 | class_embeddings_concat: bool = False,
171 | mid_block_only_cross_attention: Optional[bool] = None,
172 | cross_attention_norm: Optional[str] = None,
173 | use_attention_mask_list=[True, True, True],
174 | use_encoder_hidden_states_list=[True, True, True],
175 | ):
176 | super().__init__()
177 | self.use_attention_mask_list = use_attention_mask_list
178 | self.use_encoder_hidden_states_list = use_encoder_hidden_states_list
179 | self.sample_size = sample_size
180 | num_attention_heads = num_attention_heads or attention_head_dim
181 |
182 | # input
183 | conv_in_padding = (conv_in_kernel - 1) // 2
184 | self.conv_in = nn.Conv2d(in_channels, block_out_channels[0], kernel_size=conv_in_kernel, padding=conv_in_padding)
185 |
186 | # time
187 | time_embed_dim = time_embedding_dim or block_out_channels[0] * 4
188 | self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift)
189 | timestep_input_dim = block_out_channels[0]
190 | self.time_embedding = TimestepEmbedding(
191 | timestep_input_dim,
192 | time_embed_dim,
193 | act_fn=act_fn,
194 | post_act_fn=timestep_post_act,
195 | cond_proj_dim=time_cond_proj_dim,
196 | )
197 |
198 | self.point_embedding = TimestepEmbedding(point_embeddings_input_dim, time_embed_dim)
199 | self.bbox_time_proj = Timesteps(bbox_time_embed_dim, flip_sin_to_cos, freq_shift)
200 | self.bbox_embedding = TimestepEmbedding(bbox_embeddings_input_dim, time_embed_dim)
201 |
202 | self.down_blocks = nn.ModuleList([])
203 | self.up_blocks = nn.ModuleList([])
204 | if isinstance(only_cross_attention, bool):
205 | if mid_block_only_cross_attention is None:
206 | mid_block_only_cross_attention = only_cross_attention
207 | only_cross_attention = [only_cross_attention] * len(down_block_types)
208 |
209 | if mid_block_only_cross_attention is None:
210 | mid_block_only_cross_attention = False
211 |
212 | if isinstance(num_attention_heads, int):
213 | num_attention_heads = (num_attention_heads,) * len(down_block_types)
214 |
215 | if isinstance(attention_head_dim, int):
216 | attention_head_dim = (attention_head_dim,) * len(down_block_types)
217 |
218 | if isinstance(cross_attention_dim, int):
219 | cross_attention_dim = (cross_attention_dim,) * len(down_block_types)
220 |
221 | if isinstance(layers_per_block, int):
222 | layers_per_block = [layers_per_block] * len(down_block_types)
223 |
224 | if isinstance(transformer_layers_per_block, int):
225 | transformer_layers_per_block = [transformer_layers_per_block] * len(down_block_types)
226 |
227 | if class_embeddings_concat:
228 | blocks_time_embed_dim = time_embed_dim * 2
229 | else:
230 | blocks_time_embed_dim = time_embed_dim
231 |
232 | # down
233 | output_channel = block_out_channels[0]
234 | for i, down_block_type in enumerate(down_block_types):
235 | input_channel = output_channel
236 | output_channel = block_out_channels[i]
237 | is_final_block = i == len(block_out_channels) - 1
238 |
239 | down_block = get_down_block(
240 | down_block_type,
241 | num_layers=layers_per_block[i],
242 | transformer_layers_per_block=transformer_layers_per_block[i],
243 | in_channels=input_channel,
244 | out_channels=output_channel,
245 | temb_channels=blocks_time_embed_dim,
246 | add_downsample=not is_final_block,
247 | resnet_eps=norm_eps,
248 | resnet_act_fn=act_fn,
249 | resnet_groups=norm_num_groups,
250 | cross_attention_dim=cross_attention_dim[i],
251 | num_attention_heads=num_attention_heads[i],
252 | downsample_padding=downsample_padding,
253 | dual_cross_attention=dual_cross_attention,
254 | use_linear_projection=use_linear_projection,
255 | only_cross_attention=only_cross_attention[i],
256 | upcast_attention=upcast_attention,
257 | resnet_time_scale_shift=resnet_time_scale_shift,
258 | attention_type=attention_type,
259 | resnet_skip_time_act=resnet_skip_time_act,
260 | resnet_out_scale_factor=resnet_out_scale_factor,
261 | cross_attention_norm=cross_attention_norm,
262 | attention_head_dim=attention_head_dim[i] if attention_head_dim[i] is not None else output_channel,
263 | dropout=dropout,
264 | )
265 | self.down_blocks.append(down_block)
266 |
267 | # mid
268 | self.mid_block = get_mid_block(
269 | mid_block_type,
270 | temb_channels=blocks_time_embed_dim,
271 | in_channels=block_out_channels[-1],
272 | resnet_eps=norm_eps,
273 | resnet_act_fn=act_fn,
274 | resnet_groups=norm_num_groups,
275 | output_scale_factor=mid_block_scale_factor,
276 | transformer_layers_per_block=transformer_layers_per_block[-1],
277 | num_attention_heads=num_attention_heads[-1],
278 | cross_attention_dim=cross_attention_dim[-1],
279 | dual_cross_attention=dual_cross_attention,
280 | use_linear_projection=use_linear_projection,
281 | mid_block_only_cross_attention=mid_block_only_cross_attention,
282 | upcast_attention=upcast_attention,
283 | resnet_time_scale_shift=resnet_time_scale_shift,
284 | attention_type=attention_type,
285 | resnet_skip_time_act=resnet_skip_time_act,
286 | cross_attention_norm=cross_attention_norm,
287 | attention_head_dim=attention_head_dim[-1],
288 | dropout=dropout,
289 | )
290 |
291 | # count how many layers upsample the images
292 | self.num_upsamplers = 0
293 |
294 | # up
295 | reversed_block_out_channels = list(reversed(block_out_channels))
296 | reversed_num_attention_heads = list(reversed(num_attention_heads))
297 | reversed_layers_per_block = list(reversed(layers_per_block))
298 | reversed_cross_attention_dim = list(reversed(cross_attention_dim))
299 | reversed_transformer_layers_per_block = (
300 | list(reversed(transformer_layers_per_block))
301 | if reverse_transformer_layers_per_block is None
302 | else reverse_transformer_layers_per_block
303 | )
304 | only_cross_attention = list(reversed(only_cross_attention))
305 |
306 | output_channel = reversed_block_out_channels[0]
307 | for i, up_block_type in enumerate(up_block_types):
308 | is_final_block = i == len(block_out_channels) - 1
309 |
310 | prev_output_channel = output_channel
311 | output_channel = reversed_block_out_channels[i]
312 | input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)]
313 |
314 | # add upsample block for all BUT final layer
315 | if not is_final_block:
316 | add_upsample = True
317 | self.num_upsamplers += 1
318 | else:
319 | add_upsample = False
320 |
321 | up_block = get_up_block(
322 | up_block_type,
323 | num_layers=reversed_layers_per_block[i] + 1,
324 | transformer_layers_per_block=reversed_transformer_layers_per_block[i],
325 | in_channels=input_channel,
326 | out_channels=output_channel,
327 | prev_output_channel=prev_output_channel,
328 | temb_channels=blocks_time_embed_dim,
329 | add_upsample=add_upsample,
330 | resnet_eps=norm_eps,
331 | resnet_act_fn=act_fn,
332 | resolution_idx=i,
333 | resnet_groups=norm_num_groups,
334 | cross_attention_dim=reversed_cross_attention_dim[i],
335 | num_attention_heads=reversed_num_attention_heads[i],
336 | dual_cross_attention=dual_cross_attention,
337 | use_linear_projection=use_linear_projection,
338 | only_cross_attention=only_cross_attention[i],
339 | upcast_attention=upcast_attention,
340 | resnet_time_scale_shift=resnet_time_scale_shift,
341 | attention_type=attention_type,
342 | resnet_skip_time_act=resnet_skip_time_act,
343 | resnet_out_scale_factor=resnet_out_scale_factor,
344 | cross_attention_norm=cross_attention_norm,
345 | attention_head_dim=attention_head_dim[i] if attention_head_dim[i] is not None else output_channel,
346 | dropout=dropout,
347 | )
348 | self.up_blocks.append(up_block)
349 | prev_output_channel = output_channel
350 |
351 | # out
352 | if norm_num_groups is not None:
353 | self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], num_groups=norm_num_groups, eps=norm_eps)
354 |
355 | self.conv_act = get_activation(act_fn)
356 |
357 | else:
358 | self.conv_norm_out = None
359 | self.conv_act = None
360 |
361 | conv_out_padding = (conv_out_kernel - 1) // 2
362 | self.conv_out = nn.Conv2d(block_out_channels[0], out_channels, kernel_size=conv_out_kernel, padding=conv_out_padding)
363 |
364 | # distillation
365 | self.feature_map = []
366 |
367 | def _get_value(self, use_list, true_value, false_value):
368 | down_value = mid_value = up_value = false_value
369 |
370 | if use_list[0]:
371 | down_value = true_value
372 | if use_list[1]:
373 | mid_value = true_value
374 | if use_list[2]:
375 | up_value = true_value
376 |
377 | return down_value, mid_value, up_value
378 |
379 | def forward(
380 | self,
381 | sample: torch.FloatTensor,
382 | timestep: Union[torch.Tensor, float, int],
383 | trans: Union[torch.Tensor, float, int],
384 | encoder_hidden_states: torch.Tensor,
385 | encoder_hidden_states_2: Optional[torch.Tensor] = None,
386 | timestep_cond: Optional[torch.Tensor] = None,
387 | attention_mask: Optional[torch.Tensor] = None,
388 | cross_attention_kwargs: Optional[Dict[str, Any]] = None,
389 | added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
390 | encoder_attention_mask: Optional[torch.Tensor] = None,
391 | ) -> Union[UNet2DConditionOutput, Tuple]:
392 | default_overall_up_factor = 2**self.num_upsamplers
393 | forward_upsample_size = False
394 | upsample_size = None
395 |
396 | for dim in sample.shape[-2:]:
397 | if dim % default_overall_up_factor != 0:
398 | forward_upsample_size = True
399 | break
400 |
401 | if attention_mask is not None:
402 | attention_mask = (1 - attention_mask.to(sample.dtype)) * -10000.0
403 | attention_mask = attention_mask.unsqueeze(1)
404 |
405 | if encoder_attention_mask is not None:
406 | encoder_attention_mask = (1 - encoder_attention_mask.to(sample.dtype)) * -10000.0
407 | encoder_attention_mask = encoder_attention_mask.unsqueeze(1)
408 |
409 | # 0. center input if necessary
410 | if self.config.center_input_sample:
411 | sample = 2 * sample - 1.0
412 |
413 | down_attn_mask, mid_attn_mask, up_attn_mask = self._get_value(self.use_attention_mask_list, attention_mask, None)
414 | down_encoder_hidden_states, mid_encoder_hidden_states, up_encoder_hidden_states = self._get_value(
415 | self.use_encoder_hidden_states_list, encoder_hidden_states, encoder_hidden_states_2
416 | )
417 |
418 | # 1. time
419 | t_emb, op_emb, aug_emb = None, None, None
420 |
421 | if timestep is not None:
422 | timesteps = timestep
423 | timesteps = timesteps.expand(sample.shape[0])
424 | t_emb = self.time_proj(timesteps)
425 | t_emb = t_emb.to(dtype=sample.dtype)
426 |
427 | t_emb = self.time_embedding(t_emb, timestep_cond)
428 |
429 | # opacity
430 | if trans is not None:
431 | trans = trans.expand(sample.shape[0])
432 | op_emb = self.time_proj(trans)
433 | op_emb = op_emb.to(dtype=sample.dtype)
434 |
435 | op_emb = self.time_embedding(op_emb, timestep_cond)
436 |
437 | if t_emb is not None and op_emb is not None:
438 | emb = t_emb + op_emb
439 | elif op_emb is not None:
440 | emb = op_emb
441 | elif t_emb is not None:
442 | emb = t_emb
443 | else:
444 | raise ValueError("Missing required field: 'timestep' and 'trans'. Please ensure it is included in your input.")
445 |
446 | if "point_coords" in added_cond_kwargs:
447 | coords_embeds = added_cond_kwargs.get("point_coords")
448 | coords_embeds = coords_embeds.reshape((sample.shape[0], -1))
449 | coords_embeds = coords_embeds.to(emb.dtype)
450 | aug_emb = self.point_embedding(coords_embeds)
451 | elif "bbox_mask_coords" in added_cond_kwargs:
452 | coords = added_cond_kwargs.get("bbox_mask_coords")
453 | coords_embeds = self.bbox_time_proj(coords.flatten())
454 | coords_embeds = coords_embeds.reshape((sample.shape[0], -1))
455 | coords_embeds = coords_embeds.to(emb.dtype)
456 | aug_emb = self.bbox_embedding(coords_embeds)
457 | else:
458 | raise ValueError(f"{self.__class__} cannot find point_coords or bbox_coords in added_cond_kwargs.")
459 |
460 | emb = emb + aug_emb if aug_emb is not None else emb
461 |
462 | # 2. pre-process
463 | sample = self.conv_in(sample)
464 |
465 | # distillation
466 | self.feature_map = []
467 |
468 | # 3. down
469 | lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
470 | if USE_PEFT_BACKEND:
471 | scale_lora_layers(self, lora_scale)
472 |
473 | down_block_res_samples = (sample,)
474 | for downsample_block in self.down_blocks:
475 | if hasattr(downsample_block, "has_cross_attention") and downsample_block.has_cross_attention:
476 | additional_residuals = {}
477 | sample, res_samples = downsample_block(
478 | hidden_states=sample,
479 | temb=emb,
480 | encoder_hidden_states=down_encoder_hidden_states,
481 | attention_mask=down_attn_mask,
482 | cross_attention_kwargs=cross_attention_kwargs,
483 | encoder_attention_mask=encoder_attention_mask,
484 | **additional_residuals,
485 | )
486 | else:
487 | sample, res_samples = downsample_block(hidden_states=sample, temb=emb, scale=lora_scale)
488 |
489 | down_block_res_samples += res_samples
490 |
491 | self.feature_map.append(sample)
492 |
493 | # 4. mid
494 | if self.mid_block is not None:
495 | if hasattr(self.mid_block, "has_cross_attention") and self.mid_block.has_cross_attention:
496 | sample = self.mid_block(
497 | sample,
498 | emb,
499 | encoder_hidden_states=mid_encoder_hidden_states,
500 | attention_mask=mid_attn_mask,
501 | cross_attention_kwargs=cross_attention_kwargs,
502 | encoder_attention_mask=encoder_attention_mask,
503 | )
504 | else:
505 | sample = self.mid_block(sample, emb)
506 |
507 | self.feature_map.append(sample)
508 |
509 | # 5. up
510 | for i, upsample_block in enumerate(self.up_blocks):
511 | is_final_block = i == len(self.up_blocks) - 1
512 |
513 | res_samples = down_block_res_samples[-len(upsample_block.resnets) :]
514 | down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)]
515 |
516 | if not is_final_block and forward_upsample_size:
517 | upsample_size = down_block_res_samples[-1].shape[2:]
518 |
519 | if hasattr(upsample_block, "has_cross_attention") and upsample_block.has_cross_attention:
520 | sample = upsample_block(
521 | hidden_states=sample,
522 | temb=emb,
523 | res_hidden_states_tuple=res_samples,
524 | encoder_hidden_states=up_encoder_hidden_states,
525 | cross_attention_kwargs=cross_attention_kwargs,
526 | upsample_size=upsample_size,
527 | attention_mask=up_attn_mask,
528 | encoder_attention_mask=encoder_attention_mask,
529 | )
530 | else:
531 | sample = upsample_block(
532 | hidden_states=sample,
533 | temb=emb,
534 | res_hidden_states_tuple=res_samples,
535 | upsample_size=upsample_size,
536 | scale=lora_scale,
537 | )
538 |
539 | self.feature_map.append(sample)
540 |
541 | # 6. post-process
542 | if self.conv_norm_out:
543 | sample = self.conv_norm_out(sample)
544 | sample = self.conv_act(sample)
545 | sample = self.conv_out(sample)
546 |
547 | if USE_PEFT_BACKEND:
548 | unscale_lora_layers(self, lora_scale)
549 |
550 | return UNet2DConditionOutput(sample=sample)
551 |
--------------------------------------------------------------------------------
/example_workflow/超强抠图遮罩细化工作流.json:
--------------------------------------------------------------------------------
1 | {
2 | "last_link_id":62,
3 | "nodes":[
4 | {
5 | "mode":0,
6 | "outputs":[
7 | {
8 | "name":"IMAGE",
9 | "links":[
10 | 20,
11 | 57
12 | ],
13 | "label":"图像",
14 | "type":"IMAGE",
15 | "localized_name":"图像"
16 | },
17 | {
18 | "name":"MASK",
19 | "label":"遮罩",
20 | "type":"MASK",
21 | "localized_name":"遮罩"
22 | }
23 | ],
24 | "size":[
25 | 270,
26 | 314
27 | ],
28 | "pos":[
29 | 6107.099609375,
30 | 2105.3642578125
31 | ],
32 | "widgets_values":[
33 | "a967fc290d991c68f71a05bf6e55732cf5382689af321eb265ff7728f625300b.png",
34 | "image"
35 | ],
36 | "inputs":[
37 | {
38 | "widget":{
39 | "name":"image"
40 | },
41 | "name":"image",
42 | "type":"COMBO",
43 | "localized_name":"图像"
44 | },
45 | {
46 | "widget":{
47 | "name":"upload"
48 | },
49 | "name":"upload",
50 | "type":"IMAGEUPLOAD",
51 | "localized_name":"选择文件上传"
52 | }
53 | ],
54 | "flags":{
55 |
56 | },
57 | "id":18,
58 | "type":"LoadImage",
59 | "properties":{
60 | "hasSecondTab":false,
61 | "cnr_id":"comfy-core",
62 | "ver":"0.3.49",
63 | "ue_properties":{
64 | "widget_ue_connectable":{
65 | "image":true,
66 | "upload":true
67 | }
68 | },
69 | "widget_ue_connectable":{
70 |
71 | },
72 | "secondTabText":"Send Back",
73 | "enableTabs":false,
74 | "secondTabOffset":80,
75 | "Node name for S&R":"LoadImage",
76 | "tabWidth":65,
77 | "secondTabWidth":65,
78 | "tabXOffset":10
79 | },
80 | "order":0
81 | },
82 | {
83 | "mode":0,
84 | "outputs":[
85 | {
86 | "name":"IMAGE",
87 | "links":[
88 | 19,
89 | 60
90 | ],
91 | "label":"图像",
92 | "type":"IMAGE",
93 | "localized_name":"图像"
94 | },
95 | {
96 | "name":"MASK",
97 | "label":"遮罩",
98 | "type":"MASK",
99 | "localized_name":"遮罩"
100 | }
101 | ],
102 | "size":[
103 | 270,
104 | 314
105 | ],
106 | "pos":[
107 | 5850,
108 | 3450
109 | ],
110 | "widgets_values":[
111 | "pasted/4550ed051bb190f119094d7e4406c04b84347b55f94d07fd379165760a57b9c8.png",
112 | "image"
113 | ],
114 | "inputs":[
115 | {
116 | "widget":{
117 | "name":"image"
118 | },
119 | "name":"image",
120 | "type":"COMBO",
121 | "localized_name":"图像"
122 | },
123 | {
124 | "widget":{
125 | "name":"upload"
126 | },
127 | "name":"upload",
128 | "type":"IMAGEUPLOAD",
129 | "localized_name":"选择文件上传"
130 | }
131 | ],
132 | "flags":{
133 |
134 | },
135 | "id":19,
136 | "type":"LoadImage",
137 | "properties":{
138 | "hasSecondTab":false,
139 | "cnr_id":"comfy-core",
140 | "ver":"0.3.49",
141 | "ue_properties":{
142 | "widget_ue_connectable":{
143 | "image":true,
144 | "upload":true
145 | }
146 | },
147 | "widget_ue_connectable":{
148 |
149 | },
150 | "secondTabText":"Send Back",
151 | "enableTabs":false,
152 | "secondTabOffset":80,
153 | "Node name for S&R":"LoadImage",
154 | "tabWidth":65,
155 | "secondTabWidth":65,
156 | "tabXOffset":10
157 | },
158 | "order":1
159 | },
160 | {
161 | "mode":0,
162 | "outputs":[
163 | {
164 | "name":"IMAGE",
165 | "links":[
166 | 21,
167 | 54
168 | ],
169 | "label":"图像",
170 | "type":"IMAGE",
171 | "localized_name":"图像"
172 | },
173 | {
174 | "name":"MASK",
175 | "label":"遮罩",
176 | "type":"MASK",
177 | "localized_name":"遮罩"
178 | }
179 | ],
180 | "size":[
181 | 270,
182 | 314.0000305175781
183 | ],
184 | "pos":[
185 | 6018.3759765625,
186 | 1220.397705078125
187 | ],
188 | "widgets_values":[
189 | "49d7f1b44e2a78a5239a6b9ba4e0cdc3425ca973cacb512567680053b8e38949.jpg",
190 | "image"
191 | ],
192 | "inputs":[
193 | {
194 | "widget":{
195 | "name":"image"
196 | },
197 | "name":"image",
198 | "type":"COMBO",
199 | "localized_name":"图像"
200 | },
201 | {
202 | "widget":{
203 | "name":"upload"
204 | },
205 | "name":"upload",
206 | "type":"IMAGEUPLOAD",
207 | "localized_name":"选择文件上传"
208 | }
209 | ],
210 | "flags":{
211 |
212 | },
213 | "id":17,
214 | "type":"LoadImage",
215 | "properties":{
216 | "hasSecondTab":false,
217 | "cnr_id":"comfy-core",
218 | "ver":"0.3.49",
219 | "ue_properties":{
220 | "widget_ue_connectable":{
221 | "image":true,
222 | "upload":true
223 | }
224 | },
225 | "widget_ue_connectable":{
226 |
227 | },
228 | "secondTabText":"Send Back",
229 | "enableTabs":false,
230 | "secondTabOffset":80,
231 | "Node name for S&R":"LoadImage",
232 | "tabWidth":65,
233 | "secondTabWidth":65,
234 | "tabXOffset":10
235 | },
236 | "order":2
237 | },
238 | {
239 | "mode":0,
240 | "outputs":[
241 |
242 | ],
243 | "size":[
244 | 319.2306213378906,
245 | 353.56097412109375
246 | ],
247 | "pos":[
248 | 7707.099609375,
249 | 2125.3642578125
250 | ],
251 | "widgets_values":[
252 |
253 | ],
254 | "inputs":[
255 | {
256 | "name":"mask",
257 | "link":32,
258 | "label":"遮罩",
259 | "type":"MASK",
260 | "localized_name":"mask"
261 | }
262 | ],
263 | "flags":{
264 |
265 | },
266 | "id":30,
267 | "type":"MaskPreview+",
268 | "properties":{
269 | "hasSecondTab":false,
270 | "cnr_id":"comfyui_essentials",
271 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9",
272 | "ue_properties":{
273 | "widget_ue_connectable":{
274 |
275 | }
276 | },
277 | "widget_ue_connectable":{
278 |
279 | },
280 | "secondTabText":"Send Back",
281 | "enableTabs":false,
282 | "secondTabOffset":80,
283 | "Node name for S&R":"MaskPreview+",
284 | "tabWidth":65,
285 | "secondTabWidth":65,
286 | "tabXOffset":10
287 | },
288 | "order":12
289 | },
290 | {
291 | "mode":0,
292 | "outputs":[
293 |
294 | ],
295 | "size":[
296 | 405.8030700683594,
297 | 572.3388061523438
298 | ],
299 | "pos":[
300 | 7250,
301 | -320
302 | ],
303 | "widgets_values":[
304 |
305 | ],
306 | "inputs":[
307 | {
308 | "name":"mask",
309 | "link":53,
310 | "label":"遮罩",
311 | "type":"MASK",
312 | "localized_name":"mask"
313 | }
314 | ],
315 | "flags":{
316 |
317 | },
318 | "id":16,
319 | "type":"MaskPreview+",
320 | "properties":{
321 | "hasSecondTab":false,
322 | "cnr_id":"comfyui_essentials",
323 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9",
324 | "ue_properties":{
325 | "widget_ue_connectable":{
326 |
327 | }
328 | },
329 | "widget_ue_connectable":{
330 |
331 | },
332 | "secondTabText":"Send Back",
333 | "enableTabs":false,
334 | "secondTabOffset":80,
335 | "Node name for S&R":"MaskPreview+",
336 | "tabWidth":65,
337 | "secondTabWidth":65,
338 | "tabXOffset":10
339 | },
340 | "order":24
341 | },
342 | {
343 | "mode":0,
344 | "outputs":[
345 |
346 | ],
347 | "size":[
348 | 393.25921630859375,
349 | 565.4967041015625
350 | ],
351 | "pos":[
352 | 7685.49365234375,
353 | -311.3333435058594
354 | ],
355 | "widgets_values":[
356 |
357 | ],
358 | "inputs":[
359 | {
360 | "name":"mask",
361 | "link":8,
362 | "label":"遮罩",
363 | "type":"MASK",
364 | "localized_name":"mask"
365 | }
366 | ],
367 | "flags":{
368 |
369 | },
370 | "id":8,
371 | "type":"MaskPreview+",
372 | "properties":{
373 | "hasSecondTab":false,
374 | "cnr_id":"comfyui_essentials",
375 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9",
376 | "ue_properties":{
377 | "widget_ue_connectable":{
378 |
379 | }
380 | },
381 | "widget_ue_connectable":{
382 |
383 | },
384 | "secondTabText":"Send Back",
385 | "enableTabs":false,
386 | "secondTabOffset":80,
387 | "Node name for S&R":"MaskPreview+",
388 | "tabWidth":65,
389 | "secondTabWidth":65,
390 | "tabXOffset":10
391 | },
392 | "order":19
393 | },
394 | {
395 | "mode":0,
396 | "outputs":[
397 |
398 | ],
399 | "size":[
400 | 294.7272033691406,
401 | 313.9785461425781
402 | ],
403 | "pos":[
404 | 7347.099609375,
405 | 2175.364501953125
406 | ],
407 | "widgets_values":[
408 |
409 | ],
410 | "inputs":[
411 | {
412 | "name":"mask",
413 | "link":59,
414 | "label":"遮罩",
415 | "type":"MASK",
416 | "localized_name":"mask"
417 | }
418 | ],
419 | "flags":{
420 |
421 | },
422 | "id":29,
423 | "type":"MaskPreview+",
424 | "properties":{
425 | "hasSecondTab":false,
426 | "cnr_id":"comfyui_essentials",
427 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9",
428 | "ue_properties":{
429 | "widget_ue_connectable":{
430 |
431 | }
432 | },
433 | "widget_ue_connectable":{
434 |
435 | },
436 | "secondTabText":"Send Back",
437 | "enableTabs":false,
438 | "secondTabOffset":80,
439 | "Node name for S&R":"MaskPreview+",
440 | "tabWidth":65,
441 | "secondTabWidth":65,
442 | "tabXOffset":10
443 | },
444 | "order":21
445 | },
446 | {
447 | "mode":0,
448 | "outputs":[
449 |
450 | ],
451 | "size":[
452 | 438.7269592285156,
453 | 610.3634033203125
454 | ],
455 | "pos":[
456 | 7262.8671875,
457 | 985.3197631835938
458 | ],
459 | "widgets_values":[
460 |
461 | ],
462 | "inputs":[
463 | {
464 | "name":"mask",
465 | "link":56,
466 | "label":"遮罩",
467 | "type":"MASK",
468 | "localized_name":"mask"
469 | }
470 | ],
471 | "flags":{
472 |
473 | },
474 | "id":27,
475 | "type":"MaskPreview+",
476 | "properties":{
477 | "hasSecondTab":false,
478 | "cnr_id":"comfyui_essentials",
479 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9",
480 | "ue_properties":{
481 | "widget_ue_connectable":{
482 |
483 | }
484 | },
485 | "widget_ue_connectable":{
486 |
487 | },
488 | "secondTabText":"Send Back",
489 | "enableTabs":false,
490 | "secondTabOffset":80,
491 | "Node name for S&R":"MaskPreview+",
492 | "tabWidth":65,
493 | "secondTabWidth":65,
494 | "tabXOffset":10
495 | },
496 | "order":23
497 | },
498 | {
499 | "mode":0,
500 | "outputs":[
501 |
502 | ],
503 | "size":[
504 | 446.0731506347656,
505 | 603.0172729492188
506 | ],
507 | "pos":[
508 | 7734.06884765625,
509 | 990.8294067382812
510 | ],
511 | "widgets_values":[
512 |
513 | ],
514 | "inputs":[
515 | {
516 | "name":"mask",
517 | "link":37,
518 | "label":"遮罩",
519 | "type":"MASK",
520 | "localized_name":"mask"
521 | }
522 | ],
523 | "flags":{
524 |
525 | },
526 | "id":28,
527 | "type":"MaskPreview+",
528 | "properties":{
529 | "hasSecondTab":false,
530 | "cnr_id":"comfyui_essentials",
531 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9",
532 | "ue_properties":{
533 | "widget_ue_connectable":{
534 |
535 | }
536 | },
537 | "widget_ue_connectable":{
538 |
539 | },
540 | "secondTabText":"Send Back",
541 | "enableTabs":false,
542 | "secondTabOffset":80,
543 | "Node name for S&R":"MaskPreview+",
544 | "tabWidth":65,
545 | "secondTabWidth":65,
546 | "tabXOffset":10
547 | },
548 | "order":17
549 | },
550 | {
551 | "mode":0,
552 | "outputs":[
553 | {
554 | "name":"image",
555 | "label":"图像",
556 | "type":"IMAGE",
557 | "localized_name":"image"
558 | },
559 | {
560 | "name":"mask",
561 | "links":[
562 | 32,
563 | 58
564 | ],
565 | "label":"遮罩",
566 | "type":"MASK",
567 | "localized_name":"mask"
568 | }
569 | ],
570 | "size":[
571 | 390.8785095214844,
572 | 366
573 | ],
574 | "color":"rgba(27, 80, 119, 0.7)",
575 | "pos":[
576 | 6547.099609375,
577 | 2095.3642578125
578 | ],
579 | "widgets_values":[
580 | "sam_vit_h (2.56GB)",
581 | "GroundingDINO_SwinT_OGC (694MB)",
582 | 0.3,
583 | "VITMatte",
584 | 6,
585 | 6,
586 | 0.15,
587 | 0.99,
588 | true,
589 | "subject",
590 | "cuda",
591 | 2,
592 | false
593 | ],
594 | "inputs":[
595 | {
596 | "name":"image",
597 | "link":20,
598 | "label":"图像",
599 | "type":"IMAGE",
600 | "localized_name":"image"
601 | },
602 | {
603 | "widget":{
604 | "name":"sam_model"
605 | },
606 | "name":"sam_model",
607 | "type":"COMBO",
608 | "localized_name":"SAM模型"
609 | },
610 | {
611 | "widget":{
612 | "name":"grounding_dino_model"
613 | },
614 | "name":"grounding_dino_model",
615 | "type":"COMBO",
616 | "localized_name":"GroundingDINO模型"
617 | },
618 | {
619 | "widget":{
620 | "name":"threshold"
621 | },
622 | "name":"threshold",
623 | "type":"FLOAT",
624 | "localized_name":"阈值"
625 | },
626 | {
627 | "widget":{
628 | "name":"detail_method"
629 | },
630 | "name":"detail_method",
631 | "type":"COMBO",
632 | "localized_name":"细节处理方法"
633 | },
634 | {
635 | "widget":{
636 | "name":"detail_erode"
637 | },
638 | "name":"detail_erode",
639 | "type":"INT",
640 | "localized_name":"细节消融"
641 | },
642 | {
643 | "widget":{
644 | "name":"detail_dilate"
645 | },
646 | "name":"detail_dilate",
647 | "type":"INT",
648 | "localized_name":"细节膨胀"
649 | },
650 | {
651 | "widget":{
652 | "name":"black_point"
653 | },
654 | "name":"black_point",
655 | "type":"FLOAT",
656 | "localized_name":"黑色阈值"
657 | },
658 | {
659 | "widget":{
660 | "name":"white_point"
661 | },
662 | "name":"white_point",
663 | "type":"FLOAT",
664 | "localized_name":"白色阈值"
665 | },
666 | {
667 | "widget":{
668 | "name":"process_detail"
669 | },
670 | "name":"process_detail",
671 | "type":"BOOLEAN",
672 | "localized_name":"处理细节"
673 | },
674 | {
675 | "widget":{
676 | "name":"prompt"
677 | },
678 | "name":"prompt",
679 | "type":"STRING",
680 | "localized_name":"提示词"
681 | },
682 | {
683 | "widget":{
684 | "name":"device"
685 | },
686 | "name":"device",
687 | "type":"COMBO",
688 | "localized_name":"设备"
689 | },
690 | {
691 | "widget":{
692 | "name":"max_megapixels"
693 | },
694 | "name":"max_megapixels",
695 | "type":"FLOAT",
696 | "localized_name":"Vitmatte最大尺寸"
697 | },
698 | {
699 | "widget":{
700 | "name":"cache_model"
701 | },
702 | "name":"cache_model",
703 | "type":"BOOLEAN",
704 | "localized_name":"cache_model"
705 | }
706 | ],
707 | "flags":{
708 |
709 | },
710 | "id":24,
711 | "type":"LayerMask: SegmentAnythingUltra V2",
712 | "properties":{
713 | "hasSecondTab":false,
714 | "cnr_id":"ComfyUI_LayerStyle_Advance",
715 | "ver":"5f8c6f29c484100245bc6f8ea66277f53e23cdbe",
716 | "ue_properties":{
717 | "widget_ue_connectable":{
718 | "detail_dilate":true,
719 | "detail_erode":true,
720 | "threshold":true,
721 | "max_megapixels":true,
722 | "sam_model":true,
723 | "detail_method":true,
724 | "black_point":true,
725 | "process_detail":true,
726 | "grounding_dino_model":true,
727 | "white_point":true,
728 | "cache_model":true,
729 | "prompt":true,
730 | "device":true
731 | }
732 | },
733 | "widget_ue_connectable":{
734 |
735 | },
736 | "secondTabText":"Send Back",
737 | "enableTabs":false,
738 | "secondTabOffset":80,
739 | "Node name for S&R":"LayerMask: SegmentAnythingUltra V2",
740 | "tabWidth":65,
741 | "secondTabWidth":65,
742 | "tabXOffset":10
743 | },
744 | "order":8
745 | },
746 | {
747 | "mode":0,
748 | "outputs":[
749 |
750 | ],
751 | "size":[
752 | 210,
753 | 62
754 | ],
755 | "pos":[
756 | 6276.8818359375,
757 | -569.6011962890625
758 | ],
759 | "widgets_values":[
760 | "1",
761 | 1
762 | ],
763 | "inputs":[
764 |
765 | ],
766 | "flags":{
767 |
768 | },
769 | "id":33,
770 | "type":"Bookmark (rgthree)",
771 | "properties":{
772 | "widget_ue_connectable":{
773 |
774 | }
775 | },
776 | "order":3
777 | },
778 | {
779 | "mode":0,
780 | "outputs":[
781 |
782 | ],
783 | "size":[
784 | 210,
785 | 62
786 | ],
787 | "pos":[
788 | 5576.3740234375,
789 | 3681.332275390625
790 | ],
791 | "widgets_values":[
792 | "2",
793 | 1
794 | ],
795 | "inputs":[
796 |
797 | ],
798 | "flags":{
799 |
800 | },
801 | "id":36,
802 | "type":"Bookmark (rgthree)",
803 | "properties":{
804 | "widget_ue_connectable":{
805 |
806 | }
807 | },
808 | "order":4
809 | },
810 | {
811 | "mode":0,
812 | "outputs":[
813 | {
814 | "name":"IMAGE",
815 | "links":[
816 | 4,
817 | 51
818 | ],
819 | "label":"图像",
820 | "type":"IMAGE",
821 | "localized_name":"图像"
822 | },
823 | {
824 | "name":"MASK",
825 | "label":"遮罩",
826 | "type":"MASK",
827 | "localized_name":"遮罩"
828 | }
829 | ],
830 | "size":[
831 | 270,
832 | 314
833 | ],
834 | "pos":[
835 | 6019.25244140625,
836 | -299.09930419921875
837 | ],
838 | "widgets_values":[
839 | "18733ba03f229d3da22203ad98e1ecf6f302d36a3947b72b126b1019f1e370e7.png",
840 | "image"
841 | ],
842 | "inputs":[
843 | {
844 | "widget":{
845 | "name":"image"
846 | },
847 | "name":"image",
848 | "type":"COMBO",
849 | "localized_name":"图像"
850 | },
851 | {
852 | "widget":{
853 | "name":"upload"
854 | },
855 | "name":"upload",
856 | "type":"IMAGEUPLOAD",
857 | "localized_name":"选择文件上传"
858 | }
859 | ],
860 | "flags":{
861 |
862 | },
863 | "id":6,
864 | "type":"LoadImage",
865 | "properties":{
866 | "hasSecondTab":false,
867 | "cnr_id":"comfy-core",
868 | "ver":"0.3.49",
869 | "ue_properties":{
870 | "widget_ue_connectable":{
871 | "image":true,
872 | "upload":true
873 | }
874 | },
875 | "widget_ue_connectable":{
876 |
877 | },
878 | "secondTabText":"Send Back",
879 | "enableTabs":false,
880 | "secondTabOffset":80,
881 | "Node name for S&R":"LoadImage",
882 | "tabWidth":65,
883 | "secondTabWidth":65,
884 | "tabXOffset":10
885 | },
886 | "order":5
887 | },
888 | {
889 | "mode":0,
890 | "outputs":[
891 | {
892 | "name":"image",
893 | "label":"图像",
894 | "type":"IMAGE",
895 | "localized_name":"image"
896 | },
897 | {
898 | "name":"mask",
899 | "links":[
900 | 8,
901 | 52
902 | ],
903 | "label":"遮罩",
904 | "type":"MASK",
905 | "localized_name":"mask"
906 | }
907 | ],
908 | "size":[
909 | 390.8785095214844,
910 | 366
911 | ],
912 | "color":"rgba(27, 80, 119, 0.7)",
913 | "pos":[
914 | 6421.720703125,
915 | -337.93768310546875
916 | ],
917 | "widgets_values":[
918 | "sam_vit_h (2.56GB)",
919 | "GroundingDINO_SwinT_OGC (694MB)",
920 | 0.3,
921 | "VITMatte",
922 | 6,
923 | 6,
924 | 0.15,
925 | 0.99,
926 | true,
927 | "subject",
928 | "cuda",
929 | 2,
930 | false
931 | ],
932 | "inputs":[
933 | {
934 | "name":"image",
935 | "link":4,
936 | "label":"图像",
937 | "type":"IMAGE",
938 | "localized_name":"image"
939 | },
940 | {
941 | "widget":{
942 | "name":"sam_model"
943 | },
944 | "name":"sam_model",
945 | "type":"COMBO",
946 | "localized_name":"SAM模型"
947 | },
948 | {
949 | "widget":{
950 | "name":"grounding_dino_model"
951 | },
952 | "name":"grounding_dino_model",
953 | "type":"COMBO",
954 | "localized_name":"GroundingDINO模型"
955 | },
956 | {
957 | "widget":{
958 | "name":"threshold"
959 | },
960 | "name":"threshold",
961 | "type":"FLOAT",
962 | "localized_name":"阈值"
963 | },
964 | {
965 | "widget":{
966 | "name":"detail_method"
967 | },
968 | "name":"detail_method",
969 | "type":"COMBO",
970 | "localized_name":"细节处理方法"
971 | },
972 | {
973 | "widget":{
974 | "name":"detail_erode"
975 | },
976 | "name":"detail_erode",
977 | "type":"INT",
978 | "localized_name":"细节消融"
979 | },
980 | {
981 | "widget":{
982 | "name":"detail_dilate"
983 | },
984 | "name":"detail_dilate",
985 | "type":"INT",
986 | "localized_name":"细节膨胀"
987 | },
988 | {
989 | "widget":{
990 | "name":"black_point"
991 | },
992 | "name":"black_point",
993 | "type":"FLOAT",
994 | "localized_name":"黑色阈值"
995 | },
996 | {
997 | "widget":{
998 | "name":"white_point"
999 | },
1000 | "name":"white_point",
1001 | "type":"FLOAT",
1002 | "localized_name":"白色阈值"
1003 | },
1004 | {
1005 | "widget":{
1006 | "name":"process_detail"
1007 | },
1008 | "name":"process_detail",
1009 | "type":"BOOLEAN",
1010 | "localized_name":"处理细节"
1011 | },
1012 | {
1013 | "widget":{
1014 | "name":"prompt"
1015 | },
1016 | "name":"prompt",
1017 | "type":"STRING",
1018 | "localized_name":"提示词"
1019 | },
1020 | {
1021 | "widget":{
1022 | "name":"device"
1023 | },
1024 | "name":"device",
1025 | "type":"COMBO",
1026 | "localized_name":"设备"
1027 | },
1028 | {
1029 | "widget":{
1030 | "name":"max_megapixels"
1031 | },
1032 | "name":"max_megapixels",
1033 | "type":"FLOAT",
1034 | "localized_name":"Vitmatte最大尺寸"
1035 | },
1036 | {
1037 | "widget":{
1038 | "name":"cache_model"
1039 | },
1040 | "name":"cache_model",
1041 | "type":"BOOLEAN",
1042 | "localized_name":"cache_model"
1043 | }
1044 | ],
1045 | "flags":{
1046 |
1047 | },
1048 | "id":5,
1049 | "type":"LayerMask: SegmentAnythingUltra V2",
1050 | "properties":{
1051 | "hasSecondTab":false,
1052 | "cnr_id":"ComfyUI_LayerStyle_Advance",
1053 | "ver":"5f8c6f29c484100245bc6f8ea66277f53e23cdbe",
1054 | "ue_properties":{
1055 | "widget_ue_connectable":{
1056 | "detail_dilate":true,
1057 | "detail_erode":true,
1058 | "threshold":true,
1059 | "max_megapixels":true,
1060 | "sam_model":true,
1061 | "detail_method":true,
1062 | "black_point":true,
1063 | "process_detail":true,
1064 | "grounding_dino_model":true,
1065 | "white_point":true,
1066 | "cache_model":true,
1067 | "prompt":true,
1068 | "device":true
1069 | }
1070 | },
1071 | "widget_ue_connectable":{
1072 |
1073 | },
1074 | "secondTabText":"Send Back",
1075 | "enableTabs":false,
1076 | "secondTabOffset":80,
1077 | "Node name for S&R":"LayerMask: SegmentAnythingUltra V2",
1078 | "tabWidth":65,
1079 | "secondTabWidth":65,
1080 | "tabXOffset":10
1081 | },
1082 | "order":11
1083 | },
1084 | {
1085 | "mode":0,
1086 | "outputs":[
1087 | {
1088 | "name":"image",
1089 | "label":"图像",
1090 | "type":"IMAGE",
1091 | "localized_name":"image"
1092 | },
1093 | {
1094 | "name":"mask",
1095 | "links":[
1096 | 38,
1097 | 61
1098 | ],
1099 | "label":"遮罩",
1100 | "type":"MASK",
1101 | "localized_name":"mask"
1102 | }
1103 | ],
1104 | "size":[
1105 | 390.8785095214844,
1106 | 366
1107 | ],
1108 | "color":"rgba(27, 80, 119, 0.7)",
1109 | "pos":[
1110 | 6158.27734375,
1111 | 3462.401123046875
1112 | ],
1113 | "widgets_values":[
1114 | "sam_vit_h (2.56GB)",
1115 | "GroundingDINO_SwinT_OGC (694MB)",
1116 | 0.3,
1117 | "VITMatte",
1118 | 6,
1119 | 6,
1120 | 0.15,
1121 | 0.99,
1122 | true,
1123 | "subject",
1124 | "cuda",
1125 | 2,
1126 | false
1127 | ],
1128 | "inputs":[
1129 | {
1130 | "name":"image",
1131 | "link":19,
1132 | "label":"图像",
1133 | "type":"IMAGE",
1134 | "localized_name":"image"
1135 | },
1136 | {
1137 | "widget":{
1138 | "name":"sam_model"
1139 | },
1140 | "name":"sam_model",
1141 | "type":"COMBO",
1142 | "localized_name":"SAM模型"
1143 | },
1144 | {
1145 | "widget":{
1146 | "name":"grounding_dino_model"
1147 | },
1148 | "name":"grounding_dino_model",
1149 | "type":"COMBO",
1150 | "localized_name":"GroundingDINO模型"
1151 | },
1152 | {
1153 | "widget":{
1154 | "name":"threshold"
1155 | },
1156 | "name":"threshold",
1157 | "type":"FLOAT",
1158 | "localized_name":"阈值"
1159 | },
1160 | {
1161 | "widget":{
1162 | "name":"detail_method"
1163 | },
1164 | "name":"detail_method",
1165 | "type":"COMBO",
1166 | "localized_name":"细节处理方法"
1167 | },
1168 | {
1169 | "widget":{
1170 | "name":"detail_erode"
1171 | },
1172 | "name":"detail_erode",
1173 | "type":"INT",
1174 | "localized_name":"细节消融"
1175 | },
1176 | {
1177 | "widget":{
1178 | "name":"detail_dilate"
1179 | },
1180 | "name":"detail_dilate",
1181 | "type":"INT",
1182 | "localized_name":"细节膨胀"
1183 | },
1184 | {
1185 | "widget":{
1186 | "name":"black_point"
1187 | },
1188 | "name":"black_point",
1189 | "type":"FLOAT",
1190 | "localized_name":"黑色阈值"
1191 | },
1192 | {
1193 | "widget":{
1194 | "name":"white_point"
1195 | },
1196 | "name":"white_point",
1197 | "type":"FLOAT",
1198 | "localized_name":"白色阈值"
1199 | },
1200 | {
1201 | "widget":{
1202 | "name":"process_detail"
1203 | },
1204 | "name":"process_detail",
1205 | "type":"BOOLEAN",
1206 | "localized_name":"处理细节"
1207 | },
1208 | {
1209 | "widget":{
1210 | "name":"prompt"
1211 | },
1212 | "name":"prompt",
1213 | "type":"STRING",
1214 | "localized_name":"提示词"
1215 | },
1216 | {
1217 | "widget":{
1218 | "name":"device"
1219 | },
1220 | "name":"device",
1221 | "type":"COMBO",
1222 | "localized_name":"设备"
1223 | },
1224 | {
1225 | "widget":{
1226 | "name":"max_megapixels"
1227 | },
1228 | "name":"max_megapixels",
1229 | "type":"FLOAT",
1230 | "localized_name":"Vitmatte最大尺寸"
1231 | },
1232 | {
1233 | "widget":{
1234 | "name":"cache_model"
1235 | },
1236 | "name":"cache_model",
1237 | "type":"BOOLEAN",
1238 | "localized_name":"cache_model"
1239 | }
1240 | ],
1241 | "flags":{
1242 |
1243 | },
1244 | "id":25,
1245 | "type":"LayerMask: SegmentAnythingUltra V2",
1246 | "properties":{
1247 | "hasSecondTab":false,
1248 | "cnr_id":"ComfyUI_LayerStyle_Advance",
1249 | "ver":"5f8c6f29c484100245bc6f8ea66277f53e23cdbe",
1250 | "ue_properties":{
1251 | "widget_ue_connectable":{
1252 | "detail_dilate":true,
1253 | "detail_erode":true,
1254 | "threshold":true,
1255 | "max_megapixels":true,
1256 | "sam_model":true,
1257 | "detail_method":true,
1258 | "black_point":true,
1259 | "process_detail":true,
1260 | "grounding_dino_model":true,
1261 | "white_point":true,
1262 | "cache_model":true,
1263 | "prompt":true,
1264 | "device":true
1265 | }
1266 | },
1267 | "widget_ue_connectable":{
1268 |
1269 | },
1270 | "secondTabText":"Send Back",
1271 | "enableTabs":false,
1272 | "secondTabOffset":80,
1273 | "Node name for S&R":"LayerMask: SegmentAnythingUltra V2",
1274 | "tabWidth":65,
1275 | "secondTabWidth":65,
1276 | "tabXOffset":10
1277 | },
1278 | "order":9
1279 | },
1280 | {
1281 | "mode":0,
1282 | "outputs":[
1283 |
1284 | ],
1285 | "size":[
1286 | 526.8812255859375,
1287 | 432.2183532714844
1288 | ],
1289 | "pos":[
1290 | 6902.7294921875,
1291 | 3433.54345703125
1292 | ],
1293 | "widgets_values":[
1294 |
1295 | ],
1296 | "inputs":[
1297 | {
1298 | "name":"mask",
1299 | "link":62,
1300 | "label":"遮罩",
1301 | "type":"MASK",
1302 | "localized_name":"mask"
1303 | }
1304 | ],
1305 | "flags":{
1306 |
1307 | },
1308 | "id":31,
1309 | "type":"MaskPreview+",
1310 | "properties":{
1311 | "hasSecondTab":false,
1312 | "cnr_id":"comfyui_essentials",
1313 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9",
1314 | "ue_properties":{
1315 | "widget_ue_connectable":{
1316 |
1317 | }
1318 | },
1319 | "widget_ue_connectable":{
1320 |
1321 | },
1322 | "secondTabText":"Send Back",
1323 | "enableTabs":false,
1324 | "secondTabOffset":80,
1325 | "Node name for S&R":"MaskPreview+",
1326 | "tabWidth":65,
1327 | "secondTabWidth":65,
1328 | "tabXOffset":10
1329 | },
1330 | "order":22
1331 | },
1332 | {
1333 | "mode":0,
1334 | "outputs":[
1335 |
1336 | ],
1337 | "size":[
1338 | 554.429443359375,
1339 | 435.8914489746094
1340 | ],
1341 | "pos":[
1342 | 7463.20166015625,
1343 | 3439.820068359375
1344 | ],
1345 | "widgets_values":[
1346 |
1347 | ],
1348 | "inputs":[
1349 | {
1350 | "name":"mask",
1351 | "link":38,
1352 | "label":"遮罩",
1353 | "type":"MASK",
1354 | "localized_name":"mask"
1355 | }
1356 | ],
1357 | "flags":{
1358 |
1359 | },
1360 | "id":32,
1361 | "type":"MaskPreview+",
1362 | "properties":{
1363 | "hasSecondTab":false,
1364 | "cnr_id":"comfyui_essentials",
1365 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9",
1366 | "ue_properties":{
1367 | "widget_ue_connectable":{
1368 |
1369 | }
1370 | },
1371 | "widget_ue_connectable":{
1372 |
1373 | },
1374 | "secondTabText":"Send Back",
1375 | "enableTabs":false,
1376 | "secondTabOffset":80,
1377 | "Node name for S&R":"MaskPreview+",
1378 | "tabWidth":65,
1379 | "secondTabWidth":65,
1380 | "tabXOffset":10
1381 | },
1382 | "order":14
1383 | },
1384 | {
1385 | "mode":0,
1386 | "outputs":[
1387 |
1388 | ],
1389 | "size":[
1390 | 210,
1391 | 62
1392 | ],
1393 | "pos":[
1394 | 7009.439453125,
1395 | 2423.40869140625
1396 | ],
1397 | "widgets_values":[
1398 | "3",
1399 | 1
1400 | ],
1401 | "inputs":[
1402 |
1403 | ],
1404 | "flags":{
1405 |
1406 | },
1407 | "id":35,
1408 | "type":"Bookmark (rgthree)",
1409 | "properties":{
1410 | "widget_ue_connectable":{
1411 |
1412 | }
1413 | },
1414 | "order":6
1415 | },
1416 | {
1417 | "mode":0,
1418 | "outputs":[
1419 |
1420 | ],
1421 | "size":[
1422 | 210,
1423 | 62
1424 | ],
1425 | "pos":[
1426 | 6160.31201171875,
1427 | 1049.2061767578125
1428 | ],
1429 | "widgets_values":[
1430 | "4",
1431 | 1
1432 | ],
1433 | "inputs":[
1434 |
1435 | ],
1436 | "flags":{
1437 |
1438 | },
1439 | "id":34,
1440 | "type":"Bookmark (rgthree)",
1441 | "properties":{
1442 | "widget_ue_connectable":{
1443 |
1444 | }
1445 | },
1446 | "order":7
1447 | },
1448 | {
1449 | "mode":0,
1450 | "outputs":[
1451 | {
1452 | "name":"image",
1453 | "links":[
1454 | 50
1455 | ],
1456 | "label":"图像",
1457 | "type":"IMAGE",
1458 | "localized_name":"image"
1459 | },
1460 | {
1461 | "name":"mask",
1462 | "links":[
1463 | 37,
1464 | 55
1465 | ],
1466 | "label":"遮罩",
1467 | "type":"MASK",
1468 | "localized_name":"mask"
1469 | }
1470 | ],
1471 | "size":[
1472 | 390.8785095214844,
1473 | 366
1474 | ],
1475 | "color":"rgba(27, 80, 119, 0.7)",
1476 | "pos":[
1477 | 6448.3759765625,
1478 | 1170.3975830078125
1479 | ],
1480 | "widgets_values":[
1481 | "sam_vit_h (2.56GB)",
1482 | "GroundingDINO_SwinT_OGC (694MB)",
1483 | 0.3,
1484 | "VITMatte",
1485 | 6,
1486 | 6,
1487 | 0.15,
1488 | 0.99,
1489 | true,
1490 | "subject",
1491 | "cuda",
1492 | 2,
1493 | false
1494 | ],
1495 | "inputs":[
1496 | {
1497 | "name":"image",
1498 | "link":21,
1499 | "label":"图像",
1500 | "type":"IMAGE",
1501 | "localized_name":"image"
1502 | },
1503 | {
1504 | "widget":{
1505 | "name":"sam_model"
1506 | },
1507 | "name":"sam_model",
1508 | "type":"COMBO",
1509 | "localized_name":"SAM模型"
1510 | },
1511 | {
1512 | "widget":{
1513 | "name":"grounding_dino_model"
1514 | },
1515 | "name":"grounding_dino_model",
1516 | "type":"COMBO",
1517 | "localized_name":"GroundingDINO模型"
1518 | },
1519 | {
1520 | "widget":{
1521 | "name":"threshold"
1522 | },
1523 | "name":"threshold",
1524 | "type":"FLOAT",
1525 | "localized_name":"阈值"
1526 | },
1527 | {
1528 | "widget":{
1529 | "name":"detail_method"
1530 | },
1531 | "name":"detail_method",
1532 | "type":"COMBO",
1533 | "localized_name":"细节处理方法"
1534 | },
1535 | {
1536 | "widget":{
1537 | "name":"detail_erode"
1538 | },
1539 | "name":"detail_erode",
1540 | "type":"INT",
1541 | "localized_name":"细节消融"
1542 | },
1543 | {
1544 | "widget":{
1545 | "name":"detail_dilate"
1546 | },
1547 | "name":"detail_dilate",
1548 | "type":"INT",
1549 | "localized_name":"细节膨胀"
1550 | },
1551 | {
1552 | "widget":{
1553 | "name":"black_point"
1554 | },
1555 | "name":"black_point",
1556 | "type":"FLOAT",
1557 | "localized_name":"黑色阈值"
1558 | },
1559 | {
1560 | "widget":{
1561 | "name":"white_point"
1562 | },
1563 | "name":"white_point",
1564 | "type":"FLOAT",
1565 | "localized_name":"白色阈值"
1566 | },
1567 | {
1568 | "widget":{
1569 | "name":"process_detail"
1570 | },
1571 | "name":"process_detail",
1572 | "type":"BOOLEAN",
1573 | "localized_name":"处理细节"
1574 | },
1575 | {
1576 | "widget":{
1577 | "name":"prompt"
1578 | },
1579 | "name":"prompt",
1580 | "type":"STRING",
1581 | "localized_name":"提示词"
1582 | },
1583 | {
1584 | "widget":{
1585 | "name":"device"
1586 | },
1587 | "name":"device",
1588 | "type":"COMBO",
1589 | "localized_name":"设备"
1590 | },
1591 | {
1592 | "widget":{
1593 | "name":"max_megapixels"
1594 | },
1595 | "name":"max_megapixels",
1596 | "type":"FLOAT",
1597 | "localized_name":"Vitmatte最大尺寸"
1598 | },
1599 | {
1600 | "widget":{
1601 | "name":"cache_model"
1602 | },
1603 | "name":"cache_model",
1604 | "type":"BOOLEAN",
1605 | "localized_name":"cache_model"
1606 | }
1607 | ],
1608 | "flags":{
1609 |
1610 | },
1611 | "id":23,
1612 | "type":"LayerMask: SegmentAnythingUltra V2",
1613 | "properties":{
1614 | "hasSecondTab":false,
1615 | "cnr_id":"ComfyUI_LayerStyle_Advance",
1616 | "ver":"5f8c6f29c484100245bc6f8ea66277f53e23cdbe",
1617 | "ue_properties":{
1618 | "widget_ue_connectable":{
1619 | "detail_dilate":true,
1620 | "detail_erode":true,
1621 | "threshold":true,
1622 | "max_megapixels":true,
1623 | "sam_model":true,
1624 | "detail_method":true,
1625 | "black_point":true,
1626 | "process_detail":true,
1627 | "grounding_dino_model":true,
1628 | "white_point":true,
1629 | "cache_model":true,
1630 | "prompt":true,
1631 | "device":true
1632 | }
1633 | },
1634 | "widget_ue_connectable":{
1635 |
1636 | },
1637 | "secondTabText":"Send Back",
1638 | "enableTabs":false,
1639 | "secondTabOffset":80,
1640 | "Node name for S&R":"LayerMask: SegmentAnythingUltra V2",
1641 | "tabWidth":65,
1642 | "secondTabWidth":65,
1643 | "tabXOffset":10
1644 | },
1645 | "order":10
1646 | },
1647 | {
1648 | "mode":0,
1649 | "outputs":[
1650 | {
1651 | "name":"alpha_mask",
1652 | "links":[
1653 | 53
1654 | ],
1655 | "label":"alpha_mask",
1656 | "type":"MASK",
1657 | "localized_name":"alpha_mask"
1658 | },
1659 | {
1660 | "name":"matted_image",
1661 | "label":"matted_image",
1662 | "type":"IMAGE",
1663 | "localized_name":"matted_image"
1664 | }
1665 | ],
1666 | "size":[
1667 | 270,
1668 | 242
1669 | ],
1670 | "pos":[
1671 | 6893.458984375,
1672 | -143.44204711914062
1673 | ],
1674 | "widgets_values":[
1675 | "SDMatte_plus.safetensors",
1676 | 1024,
1677 | true,
1678 | "alpha_only",
1679 | true,
1680 | 0.8,
1681 | false
1682 | ],
1683 | "inputs":[
1684 | {
1685 | "name":"image",
1686 | "link":51,
1687 | "label":"image",
1688 | "type":"IMAGE",
1689 | "localized_name":"image"
1690 | },
1691 | {
1692 | "name":"trimap",
1693 | "link":52,
1694 | "label":"trimap",
1695 | "type":"MASK",
1696 | "localized_name":"trimap"
1697 | },
1698 | {
1699 | "widget":{
1700 | "name":"model_name"
1701 | },
1702 | "name":"model_name",
1703 | "label":"model_name",
1704 | "type":"COMBO",
1705 | "localized_name":"model_name"
1706 | },
1707 | {
1708 | "widget":{
1709 | "name":"inference_size"
1710 | },
1711 | "name":"inference_size",
1712 | "label":"inference_size",
1713 | "type":"COMBO",
1714 | "localized_name":"inference_size"
1715 | },
1716 | {
1717 | "widget":{
1718 | "name":"is_transparent"
1719 | },
1720 | "name":"is_transparent",
1721 | "label":"is_transparent",
1722 | "type":"BOOLEAN",
1723 | "localized_name":"is_transparent"
1724 | },
1725 | {
1726 | "widget":{
1727 | "name":"output_mode"
1728 | },
1729 | "name":"output_mode",
1730 | "label":"output_mode",
1731 | "type":"COMBO",
1732 | "localized_name":"output_mode"
1733 | },
1734 | {
1735 | "widget":{
1736 | "name":"mask_refine"
1737 | },
1738 | "name":"mask_refine",
1739 | "label":"mask_refine",
1740 | "type":"BOOLEAN",
1741 | "localized_name":"mask_refine"
1742 | },
1743 | {
1744 | "widget":{
1745 | "name":"trimap_constraint"
1746 | },
1747 | "name":"trimap_constraint",
1748 | "label":"trimap_constraint",
1749 | "type":"FLOAT",
1750 | "localized_name":"trimap_constraint"
1751 | },
1752 | {
1753 | "widget":{
1754 | "name":"force_cpu"
1755 | },
1756 | "shape":7,
1757 | "name":"force_cpu",
1758 | "label":"force_cpu",
1759 | "type":"BOOLEAN",
1760 | "localized_name":"force_cpu"
1761 | }
1762 | ],
1763 | "flags":{
1764 |
1765 | },
1766 | "id":47,
1767 | "type":"SDMatteApply",
1768 | "properties":{
1769 | "widget_ue_connectable":{
1770 |
1771 | },
1772 | "Node name for S&R":"SDMatteApply"
1773 | },
1774 | "order":20
1775 | },
1776 | {
1777 | "mode":0,
1778 | "outputs":[
1779 |
1780 | ],
1781 | "size":[
1782 | 270,
1783 | 58
1784 | ],
1785 | "pos":[
1786 | 6919.6875,
1787 | 1521.2562255859375
1788 | ],
1789 | "widgets_values":[
1790 | "ComfyUI"
1791 | ],
1792 | "inputs":[
1793 | {
1794 | "name":"images",
1795 | "link":50,
1796 | "label":"图像",
1797 | "type":"IMAGE",
1798 | "localized_name":"图片"
1799 | },
1800 | {
1801 | "widget":{
1802 | "name":"filename_prefix"
1803 | },
1804 | "name":"filename_prefix",
1805 | "type":"STRING",
1806 | "localized_name":"文件名前缀"
1807 | }
1808 | ],
1809 | "flags":{
1810 |
1811 | },
1812 | "id":46,
1813 | "type":"SaveImage",
1814 | "properties":{
1815 | "widget_ue_connectable":{
1816 |
1817 | },
1818 | "Node name for S&R":"SaveImage"
1819 | },
1820 | "order":16
1821 | },
1822 | {
1823 | "mode":0,
1824 | "outputs":[
1825 | {
1826 | "name":"alpha_mask",
1827 | "links":[
1828 | 56
1829 | ],
1830 | "label":"alpha_mask",
1831 | "type":"MASK",
1832 | "localized_name":"alpha_mask"
1833 | },
1834 | {
1835 | "name":"matted_image",
1836 | "label":"matted_image",
1837 | "type":"IMAGE",
1838 | "localized_name":"matted_image"
1839 | }
1840 | ],
1841 | "size":[
1842 | 270,
1843 | 242
1844 | ],
1845 | "pos":[
1846 | 6918.3759765625,
1847 | 1210.397705078125
1848 | ],
1849 | "widgets_values":[
1850 | "SDMatte_plus.safetensors",
1851 | 1024,
1852 | false,
1853 | "alpha_only",
1854 | true,
1855 | 0.8,
1856 | false
1857 | ],
1858 | "inputs":[
1859 | {
1860 | "name":"image",
1861 | "link":54,
1862 | "label":"image",
1863 | "type":"IMAGE",
1864 | "localized_name":"image"
1865 | },
1866 | {
1867 | "name":"trimap",
1868 | "link":55,
1869 | "label":"trimap",
1870 | "type":"MASK",
1871 | "localized_name":"trimap"
1872 | },
1873 | {
1874 | "widget":{
1875 | "name":"model_name"
1876 | },
1877 | "name":"model_name",
1878 | "label":"model_name",
1879 | "type":"COMBO",
1880 | "localized_name":"model_name"
1881 | },
1882 | {
1883 | "widget":{
1884 | "name":"inference_size"
1885 | },
1886 | "name":"inference_size",
1887 | "label":"inference_size",
1888 | "type":"COMBO",
1889 | "localized_name":"inference_size"
1890 | },
1891 | {
1892 | "widget":{
1893 | "name":"is_transparent"
1894 | },
1895 | "name":"is_transparent",
1896 | "label":"is_transparent",
1897 | "type":"BOOLEAN",
1898 | "localized_name":"is_transparent"
1899 | },
1900 | {
1901 | "widget":{
1902 | "name":"output_mode"
1903 | },
1904 | "name":"output_mode",
1905 | "label":"output_mode",
1906 | "type":"COMBO",
1907 | "localized_name":"output_mode"
1908 | },
1909 | {
1910 | "widget":{
1911 | "name":"mask_refine"
1912 | },
1913 | "name":"mask_refine",
1914 | "label":"mask_refine",
1915 | "type":"BOOLEAN",
1916 | "localized_name":"mask_refine"
1917 | },
1918 | {
1919 | "widget":{
1920 | "name":"trimap_constraint"
1921 | },
1922 | "name":"trimap_constraint",
1923 | "label":"trimap_constraint",
1924 | "type":"FLOAT",
1925 | "localized_name":"trimap_constraint"
1926 | },
1927 | {
1928 | "widget":{
1929 | "name":"force_cpu"
1930 | },
1931 | "shape":7,
1932 | "name":"force_cpu",
1933 | "label":"force_cpu",
1934 | "type":"BOOLEAN",
1935 | "localized_name":"force_cpu"
1936 | }
1937 | ],
1938 | "flags":{
1939 |
1940 | },
1941 | "id":48,
1942 | "type":"SDMatteApply",
1943 | "properties":{
1944 | "widget_ue_connectable":{
1945 |
1946 | },
1947 | "Node name for S&R":"SDMatteApply"
1948 | },
1949 | "order":18
1950 | },
1951 | {
1952 | "mode":0,
1953 | "outputs":[
1954 | {
1955 | "name":"alpha_mask",
1956 | "links":[
1957 | 59
1958 | ],
1959 | "label":"alpha_mask",
1960 | "type":"MASK",
1961 | "localized_name":"alpha_mask"
1962 | },
1963 | {
1964 | "name":"matted_image",
1965 | "label":"matted_image",
1966 | "type":"IMAGE",
1967 | "localized_name":"matted_image"
1968 | }
1969 | ],
1970 | "size":[
1971 | 270,
1972 | 242
1973 | ],
1974 | "pos":[
1975 | 7037.099609375,
1976 | 2225.364501953125
1977 | ],
1978 | "widgets_values":[
1979 | "SDMatte_plus.safetensors",
1980 | 1024,
1981 | false,
1982 | "alpha_only",
1983 | true,
1984 | 0.8,
1985 | false
1986 | ],
1987 | "inputs":[
1988 | {
1989 | "name":"image",
1990 | "link":57,
1991 | "label":"image",
1992 | "type":"IMAGE",
1993 | "localized_name":"image"
1994 | },
1995 | {
1996 | "name":"trimap",
1997 | "link":58,
1998 | "label":"trimap",
1999 | "type":"MASK",
2000 | "localized_name":"trimap"
2001 | },
2002 | {
2003 | "widget":{
2004 | "name":"model_name"
2005 | },
2006 | "name":"model_name",
2007 | "label":"model_name",
2008 | "type":"COMBO",
2009 | "localized_name":"model_name"
2010 | },
2011 | {
2012 | "widget":{
2013 | "name":"inference_size"
2014 | },
2015 | "name":"inference_size",
2016 | "label":"inference_size",
2017 | "type":"COMBO",
2018 | "localized_name":"inference_size"
2019 | },
2020 | {
2021 | "widget":{
2022 | "name":"is_transparent"
2023 | },
2024 | "name":"is_transparent",
2025 | "label":"is_transparent",
2026 | "type":"BOOLEAN",
2027 | "localized_name":"is_transparent"
2028 | },
2029 | {
2030 | "widget":{
2031 | "name":"output_mode"
2032 | },
2033 | "name":"output_mode",
2034 | "label":"output_mode",
2035 | "type":"COMBO",
2036 | "localized_name":"output_mode"
2037 | },
2038 | {
2039 | "widget":{
2040 | "name":"mask_refine"
2041 | },
2042 | "name":"mask_refine",
2043 | "label":"mask_refine",
2044 | "type":"BOOLEAN",
2045 | "localized_name":"mask_refine"
2046 | },
2047 | {
2048 | "widget":{
2049 | "name":"trimap_constraint"
2050 | },
2051 | "name":"trimap_constraint",
2052 | "label":"trimap_constraint",
2053 | "type":"FLOAT",
2054 | "localized_name":"trimap_constraint"
2055 | },
2056 | {
2057 | "widget":{
2058 | "name":"force_cpu"
2059 | },
2060 | "shape":7,
2061 | "name":"force_cpu",
2062 | "label":"force_cpu",
2063 | "type":"BOOLEAN",
2064 | "localized_name":"force_cpu"
2065 | }
2066 | ],
2067 | "flags":{
2068 |
2069 | },
2070 | "id":49,
2071 | "type":"SDMatteApply",
2072 | "properties":{
2073 | "widget_ue_connectable":{
2074 |
2075 | },
2076 | "Node name for S&R":"SDMatteApply"
2077 | },
2078 | "order":13
2079 | },
2080 | {
2081 | "mode":0,
2082 | "outputs":[
2083 | {
2084 | "name":"alpha_mask",
2085 | "links":[
2086 | 62
2087 | ],
2088 | "label":"alpha_mask",
2089 | "type":"MASK",
2090 | "localized_name":"alpha_mask"
2091 | },
2092 | {
2093 | "name":"matted_image",
2094 | "label":"matted_image",
2095 | "type":"IMAGE",
2096 | "localized_name":"matted_image"
2097 | }
2098 | ],
2099 | "size":[
2100 | 270,
2101 | 242
2102 | ],
2103 | "pos":[
2104 | 6589.41748046875,
2105 | 3463.54296875
2106 | ],
2107 | "widgets_values":[
2108 | "SDMatte_plus.safetensors",
2109 | 1024,
2110 | false,
2111 | "alpha_only",
2112 | true,
2113 | 0.8,
2114 | false
2115 | ],
2116 | "inputs":[
2117 | {
2118 | "name":"image",
2119 | "link":60,
2120 | "label":"image",
2121 | "type":"IMAGE",
2122 | "localized_name":"image"
2123 | },
2124 | {
2125 | "name":"trimap",
2126 | "link":61,
2127 | "label":"trimap",
2128 | "type":"MASK",
2129 | "localized_name":"trimap"
2130 | },
2131 | {
2132 | "widget":{
2133 | "name":"model_name"
2134 | },
2135 | "name":"model_name",
2136 | "label":"model_name",
2137 | "type":"COMBO",
2138 | "localized_name":"model_name"
2139 | },
2140 | {
2141 | "widget":{
2142 | "name":"inference_size"
2143 | },
2144 | "name":"inference_size",
2145 | "label":"inference_size",
2146 | "type":"COMBO",
2147 | "localized_name":"inference_size"
2148 | },
2149 | {
2150 | "widget":{
2151 | "name":"is_transparent"
2152 | },
2153 | "name":"is_transparent",
2154 | "label":"is_transparent",
2155 | "type":"BOOLEAN",
2156 | "localized_name":"is_transparent"
2157 | },
2158 | {
2159 | "widget":{
2160 | "name":"output_mode"
2161 | },
2162 | "name":"output_mode",
2163 | "label":"output_mode",
2164 | "type":"COMBO",
2165 | "localized_name":"output_mode"
2166 | },
2167 | {
2168 | "widget":{
2169 | "name":"mask_refine"
2170 | },
2171 | "name":"mask_refine",
2172 | "label":"mask_refine",
2173 | "type":"BOOLEAN",
2174 | "localized_name":"mask_refine"
2175 | },
2176 | {
2177 | "widget":{
2178 | "name":"trimap_constraint"
2179 | },
2180 | "name":"trimap_constraint",
2181 | "label":"trimap_constraint",
2182 | "type":"FLOAT",
2183 | "localized_name":"trimap_constraint"
2184 | },
2185 | {
2186 | "widget":{
2187 | "name":"force_cpu"
2188 | },
2189 | "shape":7,
2190 | "name":"force_cpu",
2191 | "label":"force_cpu",
2192 | "type":"BOOLEAN",
2193 | "localized_name":"force_cpu"
2194 | }
2195 | ],
2196 | "flags":{
2197 |
2198 | },
2199 | "id":50,
2200 | "type":"SDMatteApply",
2201 | "properties":{
2202 | "widget_ue_connectable":{
2203 |
2204 | },
2205 | "Node name for S&R":"SDMatteApply"
2206 | },
2207 | "order":15
2208 | }
2209 | ],
2210 | "extra":{
2211 | "links_added_by_ue":[
2212 |
2213 | ],
2214 | "VHS_KeepIntermediate":true,
2215 | "ue_links":[
2216 |
2217 | ],
2218 | "VHS_MetadataImage":true,
2219 | "0246.VERSION":[
2220 | 0,
2221 | 0,
2222 | 4
2223 | ],
2224 | "VHS_latentpreviewrate":0,
2225 | "frontendVersion":"1.23.4",
2226 | "VHS_latentpreview":false,
2227 | "ds":{
2228 | "offset":[
2229 | -5798.510608893245,
2230 | -3257.772095718232
2231 | ],
2232 | "scale":0.9646149645000017
2233 | }
2234 | },
2235 | "groups":[
2236 | {
2237 | "color":"#3f789e",
2238 | "font_size":24,
2239 | "flags":{
2240 |
2241 | },
2242 | "id":1,
2243 | "title":"Group",
2244 | "bounding":[
2245 | 5840,
2246 | 3356.39990234375,
2247 | 2439.1904296875,
2248 | 547.0269775390625
2249 | ]
2250 | },
2251 | {
2252 | "color":"#3f789e",
2253 | "font_size":24,
2254 | "flags":{
2255 |
2256 | },
2257 | "id":2,
2258 | "title":"Group",
2259 | "bounding":[
2260 | 6097.099609375,
2261 | 2021.7645263671875,
2262 | 1939.23046875,
2263 | 477.5785217285156
2264 | ]
2265 | },
2266 | {
2267 | "color":"#3f789e",
2268 | "font_size":24,
2269 | "flags":{
2270 |
2271 | },
2272 | "id":3,
2273 | "title":"Group",
2274 | "bounding":[
2275 | 6008.3759765625,
2276 | 911.7197265625,
2277 | 2181.765625,
2278 | 693.96337890625
2279 | ]
2280 | },
2281 | {
2282 | "color":"#3f789e",
2283 | "font_size":24,
2284 | "flags":{
2285 |
2286 | },
2287 | "id":4,
2288 | "title":"Group",
2289 | "bounding":[
2290 | 5868.4970703125,
2291 | -411.5376892089844,
2292 | 2220.255859375,
2293 | 675.7010498046875
2294 | ]
2295 | }
2296 | ],
2297 | "links":[
2298 | [
2299 | 4,
2300 | 6,
2301 | 0,
2302 | 5,
2303 | 0,
2304 | "IMAGE"
2305 | ],
2306 | [
2307 | 8,
2308 | 5,
2309 | 1,
2310 | 8,
2311 | 0,
2312 | "MASK"
2313 | ],
2314 | [
2315 | 19,
2316 | 19,
2317 | 0,
2318 | 25,
2319 | 0,
2320 | "IMAGE"
2321 | ],
2322 | [
2323 | 20,
2324 | 18,
2325 | 0,
2326 | 24,
2327 | 0,
2328 | "IMAGE"
2329 | ],
2330 | [
2331 | 21,
2332 | 17,
2333 | 0,
2334 | 23,
2335 | 0,
2336 | "IMAGE"
2337 | ],
2338 | [
2339 | 32,
2340 | 24,
2341 | 1,
2342 | 30,
2343 | 0,
2344 | "MASK"
2345 | ],
2346 | [
2347 | 37,
2348 | 23,
2349 | 1,
2350 | 28,
2351 | 0,
2352 | "MASK"
2353 | ],
2354 | [
2355 | 38,
2356 | 25,
2357 | 1,
2358 | 32,
2359 | 0,
2360 | "MASK"
2361 | ],
2362 | [
2363 | 50,
2364 | 23,
2365 | 0,
2366 | 46,
2367 | 0,
2368 | "IMAGE"
2369 | ],
2370 | [
2371 | 51,
2372 | 6,
2373 | 0,
2374 | 47,
2375 | 0,
2376 | "IMAGE"
2377 | ],
2378 | [
2379 | 52,
2380 | 5,
2381 | 1,
2382 | 47,
2383 | 1,
2384 | "MASK"
2385 | ],
2386 | [
2387 | 53,
2388 | 47,
2389 | 0,
2390 | 16,
2391 | 0,
2392 | "MASK"
2393 | ],
2394 | [
2395 | 54,
2396 | 17,
2397 | 0,
2398 | 48,
2399 | 0,
2400 | "IMAGE"
2401 | ],
2402 | [
2403 | 55,
2404 | 23,
2405 | 1,
2406 | 48,
2407 | 1,
2408 | "MASK"
2409 | ],
2410 | [
2411 | 56,
2412 | 48,
2413 | 0,
2414 | 27,
2415 | 0,
2416 | "MASK"
2417 | ],
2418 | [
2419 | 57,
2420 | 18,
2421 | 0,
2422 | 49,
2423 | 0,
2424 | "IMAGE"
2425 | ],
2426 | [
2427 | 58,
2428 | 24,
2429 | 1,
2430 | 49,
2431 | 1,
2432 | "MASK"
2433 | ],
2434 | [
2435 | 59,
2436 | 49,
2437 | 0,
2438 | 29,
2439 | 0,
2440 | "MASK"
2441 | ],
2442 | [
2443 | 60,
2444 | 19,
2445 | 0,
2446 | 50,
2447 | 0,
2448 | "IMAGE"
2449 | ],
2450 | [
2451 | 61,
2452 | 25,
2453 | 1,
2454 | 50,
2455 | 1,
2456 | "MASK"
2457 | ],
2458 | [
2459 | 62,
2460 | 50,
2461 | 0,
2462 | 31,
2463 | 0,
2464 | "MASK"
2465 | ]
2466 | ],
2467 | "id":"500ae763-00a0-4a77-839d-fab74ac03ec3",
2468 | "config":{
2469 |
2470 | },
2471 | "version":0.4,
2472 | "last_node_id":50,
2473 | "revision":0
2474 | }
--------------------------------------------------------------------------------