├── mattepro_nodes.py ├── src ├── __init__.py ├── SDMatte │ ├── README.md │ ├── scheduler │ │ └── scheduler_config.json │ ├── tokenizer │ │ ├── special_tokens_map.json │ │ └── tokenizer_config.json │ ├── vae │ │ └── config.json │ ├── text_encoder │ │ └── config.json │ ├── unet │ │ └── config.json │ └── .gitattributes ├── modeling │ ├── SDMatte │ │ ├── __init__.py │ │ └── meta_arch.py │ └── __init__.py ├── utils │ ├── __init__.py │ ├── utils.py │ └── replace.py └── .gitignore ├── node.zip ├── example_workflow ├── test_1.png ├── test_2.png └── 超强抠图遮罩细化工作流.json ├── __init__.py ├── requirements.txt ├── .gitattributes ├── .github └── workflows │ ├── publish_action.yml │ └── publish.yml ├── pyproject.toml ├── .gitignore ├── README_CN.md ├── README.md └── sdmatte_nodes.py /mattepro_nodes.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | # 使 SDMatte 目录成为可导入包 2 | 3 | -------------------------------------------------------------------------------- /src/SDMatte/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: mit 3 | --- 4 | -------------------------------------------------------------------------------- /src/modeling/SDMatte/__init__.py: -------------------------------------------------------------------------------- 1 | from .meta_arch import * 2 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import * 2 | from .replace import * 3 | -------------------------------------------------------------------------------- /node.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flybirdxx/ComfyUI-SDMatte/HEAD/node.zip -------------------------------------------------------------------------------- /src/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .SDMatte import * 2 | # from .LiteSDMatte import * 3 | -------------------------------------------------------------------------------- /example_workflow/test_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flybirdxx/ComfyUI-SDMatte/HEAD/example_workflow/test_1.png -------------------------------------------------------------------------------- /example_workflow/test_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flybirdxx/ComfyUI-SDMatte/HEAD/example_workflow/test_2.png -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .sdmatte_nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS 2 | 3 | __all__ = [ 4 | "NODE_CLASS_MAPPINGS", 5 | "NODE_DISPLAY_NAME_MAPPINGS", 6 | ] 7 | 8 | 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers>=0.25.0 2 | transformers>=4.38.0 3 | timm>=0.9.7 4 | einops>=0.7.0 5 | easydict>=1.10 6 | omegaconf>=2.3.0 7 | fvcore>=0.1.5.post20221221 8 | torchvision>=0.16.0 9 | opencv-python>=4.8.0 10 | safetensors>=0.4.0 11 | 12 | -------------------------------------------------------------------------------- /src/SDMatte/scheduler/scheduler_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_class_name": "DDIMScheduler", 3 | "_diffusers_version": "0.8.0", 4 | "beta_end": 0.012, 5 | "beta_schedule": "scaled_linear", 6 | "beta_start": 0.00085, 7 | "clip_sample": false, 8 | "num_train_timesteps": 1000, 9 | "prediction_type": "v_prediction", 10 | "set_alpha_to_one": false, 11 | "skip_prk_steps": true, 12 | "steps_offset": 1, 13 | "trained_betas": null 14 | } 15 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .vscode/ 3 | .history/ 4 | .hypothesis/ 5 | 6 | infer_output/ 7 | output/ 8 | checkpoints/ 9 | engine/ 10 | dinov2/ 11 | detectron2/ 12 | modeling/LiteSDMatte/ 13 | 14 | configs/LiteSDMatte.py 15 | script/eva.sh 16 | script/train.sh 17 | script/infer.sh 18 | model_arch.txt 19 | teacher_model_arch.txt 20 | attn_map_vis.py 21 | data_check.ipynb 22 | flops.py 23 | gen_one_image.py 24 | gen.sh 25 | main.py 26 | utils.ipynb 27 | utils/visualization.py -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set default behavior to automatically normalize line endings. 2 | * text=auto 3 | 4 | # Force UTF-8 encoding for JSON files 5 | *.json text eol=lf encoding=utf-8 6 | 7 | # Force UTF-8 encoding for workflow files 8 | example_workflow/*.json text eol=lf encoding=utf-8 9 | 10 | # Python files 11 | *.py text eol=lf encoding=utf-8 12 | 13 | # Markdown files 14 | *.md text eol=lf encoding=utf-8 15 | 16 | # Text files 17 | *.txt text eol=lf encoding=utf-8 18 | 19 | -------------------------------------------------------------------------------- /src/SDMatte/tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": { 3 | "content": "<|startoftext|>", 4 | "lstrip": false, 5 | "normalized": true, 6 | "rstrip": false, 7 | "single_word": false 8 | }, 9 | "eos_token": { 10 | "content": "<|endoftext|>", 11 | "lstrip": false, 12 | "normalized": true, 13 | "rstrip": false, 14 | "single_word": false 15 | }, 16 | "pad_token": "!", 17 | "unk_token": { 18 | "content": "<|endoftext|>", 19 | "lstrip": false, 20 | "normalized": true, 21 | "rstrip": false, 22 | "single_word": false 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /.github/workflows/publish_action.yml: -------------------------------------------------------------------------------- 1 | name: Publish to Comfy registry 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - "pyproject.toml" 9 | 10 | jobs: 11 | publish-node: 12 | name: Publish Custom Node to registry 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Check out code 16 | uses: actions/checkout@v4 17 | - name: Publish Custom Node 18 | uses: Comfy-Org/publish-node-action@main 19 | with: 20 | personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }} ## Add your own personal access token to your Github Repository secrets and reference it here. 21 | -------------------------------------------------------------------------------- /src/SDMatte/vae/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_class_name": "AutoencoderKL", 3 | "_diffusers_version": "0.8.0", 4 | "_name_or_path": "hf-models/stable-diffusion-v2-768x768/vae", 5 | "act_fn": "silu", 6 | "block_out_channels": [ 7 | 128, 8 | 256, 9 | 512, 10 | 512 11 | ], 12 | "down_block_types": [ 13 | "DownEncoderBlock2D", 14 | "DownEncoderBlock2D", 15 | "DownEncoderBlock2D", 16 | "DownEncoderBlock2D" 17 | ], 18 | "in_channels": 3, 19 | "latent_channels": 4, 20 | "layers_per_block": 2, 21 | "norm_num_groups": 32, 22 | "out_channels": 3, 23 | "sample_size": 768, 24 | "up_block_types": [ 25 | "UpDecoderBlock2D", 26 | "UpDecoderBlock2D", 27 | "UpDecoderBlock2D", 28 | "UpDecoderBlock2D" 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /src/SDMatte/text_encoder/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "hf-models/stable-diffusion-v2-768x768/text_encoder", 3 | "architectures": [ 4 | "CLIPTextModel" 5 | ], 6 | "attention_dropout": 0.0, 7 | "bos_token_id": 0, 8 | "dropout": 0.0, 9 | "eos_token_id": 2, 10 | "hidden_act": "gelu", 11 | "hidden_size": 1024, 12 | "initializer_factor": 1.0, 13 | "initializer_range": 0.02, 14 | "intermediate_size": 4096, 15 | "layer_norm_eps": 1e-05, 16 | "max_position_embeddings": 77, 17 | "model_type": "clip_text_model", 18 | "num_attention_heads": 16, 19 | "num_hidden_layers": 23, 20 | "pad_token_id": 1, 21 | "projection_dim": 512, 22 | "torch_dtype": "float32", 23 | "transformers_version": "4.25.0.dev0", 24 | "vocab_size": 49408 25 | } 26 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to Comfy registry 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | paths: 9 | - "pyproject.toml" 10 | 11 | permissions: 12 | issues: write 13 | 14 | jobs: 15 | publish-node: 16 | name: Publish Custom Node to registry 17 | runs-on: ubuntu-latest 18 | if: ${{ github.repository_owner == 'flybirdxx' }} 19 | steps: 20 | - name: Check out code 21 | uses: actions/checkout@v4 22 | with: 23 | submodules: true 24 | - name: Publish Custom Node 25 | uses: Comfy-Org/publish-node-action@v1 26 | with: 27 | ## Add your own personal access token to your Github Repository secrets and reference it here. 28 | personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }} 29 | -------------------------------------------------------------------------------- /src/SDMatte/tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_prefix_space": false, 3 | "bos_token": { 4 | "__type": "AddedToken", 5 | "content": "<|startoftext|>", 6 | "lstrip": false, 7 | "normalized": true, 8 | "rstrip": false, 9 | "single_word": false 10 | }, 11 | "do_lower_case": true, 12 | "eos_token": { 13 | "__type": "AddedToken", 14 | "content": "<|endoftext|>", 15 | "lstrip": false, 16 | "normalized": true, 17 | "rstrip": false, 18 | "single_word": false 19 | }, 20 | "errors": "replace", 21 | "model_max_length": 77, 22 | "name_or_path": "hf-models/stable-diffusion-v2-768x768/tokenizer", 23 | "pad_token": "<|endoftext|>", 24 | "special_tokens_map_file": "./special_tokens_map.json", 25 | "tokenizer_class": "CLIPTokenizer", 26 | "unk_token": { 27 | "__type": "AddedToken", 28 | "content": "<|endoftext|>", 29 | "lstrip": false, 30 | "normalized": true, 31 | "rstrip": false, 32 | "single_word": false 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/SDMatte/unet/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_class_name": "UNet2DConditionModel", 3 | "_diffusers_version": "0.8.0", 4 | "_name_or_path": "hf-models/stable-diffusion-v2-768x768/unet", 5 | "act_fn": "silu", 6 | "bbox_time_embed_dim": 320, 7 | "point_embeddings_input_dim": 1680, 8 | "bbox_embeddings_input_dim": 1280, 9 | "attention_head_dim": [ 10 | 5, 11 | 10, 12 | 20, 13 | 20 14 | ], 15 | "block_out_channels": [ 16 | 320, 17 | 640, 18 | 1280, 19 | 1280 20 | ], 21 | "center_input_sample": false, 22 | "cross_attention_dim": 1024, 23 | "down_block_types": [ 24 | "CrossAttnDownBlock2D", 25 | "CrossAttnDownBlock2D", 26 | "CrossAttnDownBlock2D", 27 | "DownBlock2D" 28 | ], 29 | "downsample_padding": 1, 30 | "dual_cross_attention": false, 31 | "flip_sin_to_cos": true, 32 | "freq_shift": 0, 33 | "in_channels": 4, 34 | "layers_per_block": 2, 35 | "mid_block_scale_factor": 1, 36 | "norm_eps": 1e-05, 37 | "norm_num_groups": 32, 38 | "out_channels": 4, 39 | "sample_size": 96, 40 | "up_block_types": [ 41 | "UpBlock2D", 42 | "CrossAttnUpBlock2D", 43 | "CrossAttnUpBlock2D", 44 | "CrossAttnUpBlock2D" 45 | ], 46 | "use_linear_projection": true 47 | } 48 | -------------------------------------------------------------------------------- /src/SDMatte/.gitattributes: -------------------------------------------------------------------------------- 1 | *.7z filter=lfs diff=lfs merge=lfs -text 2 | *.arrow filter=lfs diff=lfs merge=lfs -text 3 | *.bin filter=lfs diff=lfs merge=lfs -text 4 | *.bz2 filter=lfs diff=lfs merge=lfs -text 5 | *.ckpt filter=lfs diff=lfs merge=lfs -text 6 | *.ftz filter=lfs diff=lfs merge=lfs -text 7 | *.gz filter=lfs diff=lfs merge=lfs -text 8 | *.h5 filter=lfs diff=lfs merge=lfs -text 9 | *.joblib filter=lfs diff=lfs merge=lfs -text 10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text 11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text 12 | *.model filter=lfs diff=lfs merge=lfs -text 13 | *.msgpack filter=lfs diff=lfs merge=lfs -text 14 | *.npy filter=lfs diff=lfs merge=lfs -text 15 | *.npz filter=lfs diff=lfs merge=lfs -text 16 | *.onnx filter=lfs diff=lfs merge=lfs -text 17 | *.ot filter=lfs diff=lfs merge=lfs -text 18 | *.parquet filter=lfs diff=lfs merge=lfs -text 19 | *.pb filter=lfs diff=lfs merge=lfs -text 20 | *.pickle filter=lfs diff=lfs merge=lfs -text 21 | *.pkl filter=lfs diff=lfs merge=lfs -text 22 | *.pt filter=lfs diff=lfs merge=lfs -text 23 | *.pth filter=lfs diff=lfs merge=lfs -text 24 | *.rar filter=lfs diff=lfs merge=lfs -text 25 | *.safetensors filter=lfs diff=lfs merge=lfs -text 26 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text 27 | *.tar.* filter=lfs diff=lfs merge=lfs -text 28 | *.tar filter=lfs diff=lfs merge=lfs -text 29 | *.tflite filter=lfs diff=lfs merge=lfs -text 30 | *.tgz filter=lfs diff=lfs merge=lfs -text 31 | *.wasm filter=lfs diff=lfs merge=lfs -text 32 | *.xz filter=lfs diff=lfs merge=lfs -text 33 | *.zip filter=lfs diff=lfs merge=lfs -text 34 | *.zst filter=lfs diff=lfs merge=lfs -text 35 | *tfevents* filter=lfs diff=lfs merge=lfs -text 36 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "sdmatte" 3 | description = "SDMatte is an interactive image matting method based on stable diffusion, which supports three types of visual prompts (points, boxes, and masks) for accurately extracting target objects from natural images." 4 | version = "1.0.0" 5 | license = {file = "LICENSE"} 6 | # classifiers = [ 7 | # # For OS-independent nodes (works on all operating systems) 8 | # "Operating System :: OS Independent", 9 | # 10 | # # OR for OS-specific nodes, specify the supported systems: 11 | # "Operating System :: Microsoft :: Windows", # Windows specific 12 | # "Operating System :: POSIX :: Linux", # Linux specific 13 | # "Operating System :: MacOS", # macOS specific 14 | # 15 | # # GPU Accelerator support. Pick the ones that are supported by your extension. 16 | # "Environment :: GPU :: NVIDIA CUDA", # NVIDIA CUDA support 17 | # "Environment :: GPU :: AMD ROCm", # AMD ROCm support 18 | # "Environment :: GPU :: Intel Arc", # Intel Arc support 19 | # "Environment :: NPU :: Huawei Ascend", # Huawei Ascend support 20 | # "Environment :: GPU :: Apple Metal", # Apple Metal support 21 | # ] 22 | 23 | dependencies = ["diffusers>=0.25.0", "transformers>=4.38.0", "timm>=0.9.7", "einops>=0.7.0", "easydict>=1.10", "omegaconf>=2.3.0", "fvcore>=0.1.5.post20221221", "torchvision>=0.16.0", "opencv-python>=4.8.0", "safetensors>=0.4.0"] 24 | 25 | [project.urls] 26 | Repository = "https://github.com/flybirdxx/ComfyUI-SDMatte" 27 | # Used by Comfy Registry https://registry.comfy.org 28 | Documentation = "https://github.com/flybirdxx/ComfyUI-SDMatte/wiki" 29 | "Bug Tracker" = "https://github.com/flybirdxx/ComfyUI-SDMatte/issues" 30 | 31 | [tool.comfy] 32 | PublisherId = "" 33 | DisplayName = "ComfyUI-SDMatte" 34 | Icon = "" 35 | includes = [] 36 | # "requires-comfyui" = ">=1.0.0" # ComfyUI version compatibility 37 | 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | MANIFEST 23 | 24 | # PyInstaller 25 | *.manifest 26 | *.spec 27 | 28 | # Installer logs 29 | pip-log.txt 30 | pip-delete-this-directory.txt 31 | 32 | # Unit test / coverage reports 33 | htmlcov/ 34 | .tox/ 35 | .coverage 36 | .coverage.* 37 | .cache 38 | nosetests.xml 39 | coverage.xml 40 | *.cover 41 | .hypothesis/ 42 | .pytest_cache/ 43 | 44 | # Translations 45 | *.mo 46 | *.pot 47 | 48 | # Django stuff: 49 | *.log 50 | local_settings.py 51 | db.sqlite3 52 | 53 | # Flask stuff: 54 | instance/ 55 | .webassets-cache 56 | 57 | # Scrapy stuff: 58 | .scrapy 59 | 60 | # Sphinx documentation 61 | docs/_build/ 62 | 63 | # PyBuilder 64 | target/ 65 | 66 | # Jupyter Notebook 67 | .ipynb_checkpoints 68 | 69 | # pyenv 70 | .python-version 71 | 72 | # celery beat schedule file 73 | celerybeat-schedule 74 | 75 | # SageMath parsed files 76 | *.sage.py 77 | 78 | # Environments 79 | .env 80 | .venv 81 | env/ 82 | venv/ 83 | ENV/ 84 | env.bak/ 85 | venv.bak/ 86 | 87 | # Spyder project settings 88 | .spyderproject 89 | .spyproject 90 | 91 | # Rope project settings 92 | .ropeproject 93 | 94 | # mkdocs documentation 95 | /site 96 | 97 | # mypy 98 | .mypy_cache/ 99 | .dmypy.json 100 | dmypy.json 101 | 102 | # IDE 103 | .vscode/ 104 | .idea/ 105 | *.swp 106 | *.swo 107 | *~ 108 | 109 | # OS 110 | .DS_Store 111 | .DS_Store? 112 | ._* 113 | .Spotlight-V100 114 | .Trashes 115 | ehthumbs.db 116 | Thumbs.db 117 | 118 | # Project specific 119 | infer_output/ 120 | output/ 121 | checkpoints/ 122 | engine/ 123 | dinov2/ 124 | detectron2/ 125 | modeling/LiteSDMatte/ 126 | configs/LiteSDMatte.py 127 | script/eva.sh 128 | script/train.sh 129 | script/infer.sh 130 | model_arch.txt 131 | teacher_model_arch.txt 132 | attn_map_vis.py 133 | data_check.ipynb 134 | flops.py 135 | gen_one_image.py 136 | gen.sh 137 | main.py 138 | utils.ipynb 139 | utils/visualization.py 140 | -------------------------------------------------------------------------------- /src/utils/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch.nn as nn 4 | from torch.nn import Conv2d 5 | from torch.nn.parameter import Parameter 6 | from diffusers.models.attention_processor import Attention, AttnProcessor 7 | from .replace import custom_prepare_attention_mask, custom_get_attention_scores 8 | import cv2 9 | import torch 10 | import numpy as np 11 | 12 | 13 | def replace_unet_conv_in(unet, num): 14 | # replace the first layer to accept 8 in_channels 15 | _weight = unet.conv_in.weight.clone() # [320, 4, 3, 3] 16 | _bias = unet.conv_in.bias.clone() # [320] 17 | _weight = _weight.repeat((1, num, 1, 1)) # Keep selected channel(s) 18 | # half the activation magnitude 19 | _weight = _weight / num 20 | # new conv_in channel 21 | _n_convin_out_channel = unet.conv_in.out_channels 22 | _new_conv_in = Conv2d(4 * num, _n_convin_out_channel, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) 23 | _new_conv_in.weight = Parameter(_weight) 24 | _new_conv_in.bias = Parameter(_bias) 25 | unet.conv_in = _new_conv_in 26 | print("Unet conv_in layer is replaced") 27 | # replace config 28 | unet.config["in_channels"] = 4 * num 29 | print("Unet config is updated") 30 | return unet 31 | 32 | 33 | def add_aux_conv_in(unet): 34 | aux_conv_in = nn.Conv2d(in_channels=4, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) 35 | aux_conv_in.weight.data[:320, :, :, :] = unet.conv_in.weight.data.clone() 36 | aux_conv_in.weight.data[320:, :, :, :] = 0.0 37 | aux_conv_in.bias.data[:320] = unet.conv_in.bias.data.clone() 38 | aux_conv_in.bias.data[320:] = 0.0 39 | unet.aux_conv_in = aux_conv_in 40 | print("add aux_conv_in layer for unet") 41 | return unet 42 | 43 | 44 | def replace_attention_mask_method(module, residual_connection): 45 | if isinstance(module, Attention): 46 | module.processor = AttnProcessor() 47 | if hasattr(module, "prepare_attention_mask"): 48 | module.prepare_attention_mask = custom_prepare_attention_mask.__get__(module) 49 | if hasattr(module, "cross_attention_dim") and module.cross_attention_dim == 320: 50 | module.residual_connection = residual_connection 51 | if hasattr(module, "get_attention_scores"): 52 | module.get_attention_scores = custom_get_attention_scores.__get__(module) 53 | 54 | # 递归遍历所有子模块 55 | for child_name, child_module in module.named_children(): 56 | replace_attention_mask_method(child_module, residual_connection) 57 | 58 | 59 | erosion_kernels = [None] + [cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (size, size)) for size in range(1, 30)] 60 | 61 | 62 | def get_unknown_tensor_from_pred(pred, rand_width=30, train_mode=True): 63 | ### pred: N, 1 ,H, W 64 | N, C, H, W = pred.shape 65 | 66 | pred = pred.data.cpu().numpy() 67 | uncertain_area = np.ones_like(pred, dtype=np.uint8) 68 | uncertain_area[pred < 1.0 / 255.0] = 0 69 | uncertain_area[pred > 1 - 1.0 / 255.0] = 0 70 | 71 | for n in range(N): 72 | uncertain_area_ = uncertain_area[n, 0, :, :] # H, W 73 | if train_mode: 74 | width = np.random.randint(1, rand_width) 75 | else: 76 | width = rand_width // 2 77 | uncertain_area_ = cv2.dilate(uncertain_area_, erosion_kernels[width]) 78 | uncertain_area[n, 0, :, :] = uncertain_area_ 79 | 80 | weight = np.zeros_like(uncertain_area) 81 | weight[uncertain_area == 1] = 1 82 | weight = torch.from_numpy(weight).float().cuda() 83 | return weight 84 | -------------------------------------------------------------------------------- /README_CN.md: -------------------------------------------------------------------------------- 1 | # ComfyUI-SDMatte 2 | 3 | [English](README_CN.md) | 简体中文 4 | 5 | 基于 [SDMatte](https://github.com/vivoCameraResearch/SDMatte) 的 ComfyUI 自定义节点插件,用于交互式图像抠图。 6 | 7 | ## 🚀 快速开始 8 | 9 | > 📺 **视频教程**:[ComfyUI-SDMatte 使用教程](https://www.bilibili.com/video/BV1L6bzz8Ene/?spm_id_from=333.1387.homepage.video_card.click&vd_source=b340fd050dbe0d3e2ce863af909f1ee8) 10 | > 🔧 **示例工作流**:[高级抠图与遮罩优化工作流](https://www.runninghub.ai/post/1955928733028941826?inviteCode=rh-v1041) 11 | > 💡 **推荐**:先观看视频教程了解使用方法,再下载工作流进行实践 12 | 13 | ## 📖 简介 14 | 15 | SDMatte 是一个基于稳定扩散(Stable Diffusion)的交互式图像抠图方法,由 vivo 摄像研究团队开发,已被 ICCV 2025 接收。该方法利用预训练扩散模型的强大先验知识,支持多种视觉提示(点、框、掩码)来精确提取自然图像中的目标对象。 16 | 17 | 本插件将 SDMatte 集成到 ComfyUI 中,提供简洁易用的节点接口,专注于 trimap 引导的抠图功能,并内置了多种 VRAM 优化策略。 18 | 19 | ## 🖼️ 示例 20 | 21 | ### 抠图效果 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
原始图像三值图抠图结果
Alpha 遮罩输出
35 | 36 | *示例工作流展示了 SDMatte 基于三值图引导的高精度抠图能力。* 37 | 38 | ## ✨ 特性 39 | 40 | - 🎯 **高精度抠图**:基于扩散模型的强大先验,能够处理复杂边缘细节 41 | - 🖼️ **Trimap 引导**:支持三值图(trimap)引导的精确抠图 42 | - 🚀 **VRAM 优化**:内置混合精度、注意力切片等多种显存优化策略 43 | - 🔧 **ComfyUI 集成**:完全兼容 ComfyUI 工作流系统 44 | - 📥 **模型自动下载**:首次使用时自动下载模型权重 45 | - 📱 **灵活尺寸**:支持多种推理分辨率(512-1024px) 46 | 47 | ## 🛠️ 安装 48 | 49 | ### 1. 下载插件 50 | 51 | 将本插件放置到 ComfyUI 的自定义节点目录: 52 | 53 | ```bash 54 | cd ComfyUI/custom_nodes/ 55 | git clone https://github.com/flybirdxx/ComfyUI-SDMatte.git 56 | ``` 57 | 58 | ### 2. 安装依赖 59 | 60 | ComfyUI 会在启动时自动安装 `requirements.txt` 中的依赖包: 61 | 62 | - diffusers 63 | - timm 64 | - einops 65 | - lazyconfig 66 | - safetensors 67 | 68 | ### 3. 自动模型下载 69 | 70 | **无需手动下载模型。** 71 | 72 | 首次使用 `Apply SDMatte` 节点时,它会自动检查并从 Hugging Face 下载所需的模型权重。模型将被存放在: 73 | `ComfyUI/models/SDMatte/` 74 | 75 | 您可以直接在节点内选择使用标准版 (`SDMatte.safetensors`) 或增强版 (`SDMatte_plus.safetensors`)。 76 | 77 | ### 4. 重启 ComfyUI 78 | 79 | 重启 ComfyUI 以加载新的自定义节点。 80 | 81 | ## 🎮 使用方法 82 | 83 | ### 节点说明 84 | 85 | #### Apply SDMatte(SDMatte 应用) 86 | 87 | - **功能**:在一个节点内完成模型加载和抠图应用。 88 | - **输入**: 89 | - `ckpt_name`:选择要使用的模型(`SDMatte.safetensors` 或 `SDMatte_plus.safetensors`)。如果本地不存在,将自动下载。 90 | - `image`:输入图像(ComfyUI IMAGE 格式) 91 | - `trimap`:三值图掩码(ComfyUI MASK 格式) 92 | - `inference_size`:推理分辨率(512/640/768/896/1024) 93 | - `is_transparent`:图像是否包含透明区域 94 | - `output_mode`:输出模式(`alpha_only`, `matted_rgba`, `matted_rgb`) 95 | - `mask_refine`:启用遮罩优化以减少背景干扰 96 | - `trimap_constraint`:用于优化的三值图约束强度 97 | - `force_cpu`:强制使用 CPU 推理(可选) 98 | - **输出**: 99 | - `alpha_mask`:抠图结果的 alpha 遮罩 100 | - `matted_image`:抠图后的图像结果 101 | 102 | ### 基础工作流 103 | 104 | 1. **Load Image**:加载需要抠图的图像 105 | 2. **创建 Trimap**:使用绘图工具或其他节点创建三值图 106 | - 黑色(0):确定背景 107 | - 白色(1):确定前景 108 | - 灰色(0.5):未知区域 109 | 3. **Apply SDMatte**:应用抠图 110 | 4. **Preview Image**:预览抠图结果 111 | 112 | ### 推荐设置 113 | 114 | - **推理分辨率**:1024(最高质量)或 768(平衡性能) 115 | - **透明标志**:根据输入图像是否有透明通道设置 116 | - **强制 CPU**:仅在 GPU 显存不足时使用 117 | 118 | ## 🔧 技术细节 119 | 120 | ### 数据处理 121 | 122 | - **输入图像**:自动调整到推理分辨率,归一化到 [-1, 1] 123 | - **Trimap**:调整到推理分辨率,映射到 [-1, 1] 范围 124 | - **输出**:调整回原始分辨率,clamp 到 [0, 1] 范围 125 | 126 | ### VRAM 优化 127 | 128 | 插件内置多种显存优化策略(自动启用): 129 | 130 | - **混合精度**:使用 FP16 autocast 减少显存占用 131 | - **注意力切片**:SlicedAttnProcessor(slice_size=1) 最大化显存节省 132 | - **显存清理**:推理前后自动清理 CUDA 缓存 133 | - **设备管理**:智能的设备分配和模型移动 134 | 135 | ### 模型加载 136 | 137 | - **权重格式**:支持 .pth 和 .safetensors 格式 138 | - **安全加载**:处理 omegaconf 对象,支持 weights_only 模式 139 | - **嵌套结构**:自动处理复杂的 checkpoint 结构 140 | - **错误恢复**:多重fallback机制确保加载成功 141 | 142 | ## ❓ 常见问题 143 | 144 | ### Q: 节点无法被搜索到? 145 | A: 确保插件目录结构正确,重启 ComfyUI,检查控制台是否有错误信息。 146 | 147 | ### Q: 模型加载失败? 148 | A: 检查 SDMatte.safetensors 文件路径,确保基础模型目录结构完整,查看控制台详细错误信息。 149 | 150 | ### Q: 推理时显存不足? 151 | A: 尝试降低推理分辨率,启用 `force_cpu` 选项,或关闭其他占用显存的程序。 152 | 153 | ### Q: 抠图效果不理想? 154 | A: 优化 trimap 质量,确保前景/背景/未知区域标注准确,尝试不同的推理分辨率。 155 | 156 | ### Q: 首次推理很慢? 157 | A: 首次运行需要编译 CUDA 内核,后续推理会显著加速。 158 | 159 | ### Q: 应该选择哪个模型版本? 160 | A: 161 | - **SDMatte.safetensors(标准版)**:文件较小(~11GB),推理速度快,适合大多数场景 162 | - **SDMatte_plus.safetensors(增强版)**:文件较大,精度更高,适合对质量要求极高的专业用途 163 | - 建议先使用标准版测试,如需更高质量再升级到增强版 164 | 165 | ## 📋 系统要求 166 | 167 | - **ComfyUI**:最新版本 168 | - **Python**:3.8+ 169 | - **PyTorch**:1.12+ (支持 CUDA 推荐) 170 | - **显存**:8GB+ 推荐(支持 CPU 推理) 171 | - **依赖**:diffusers, timm, einops, lazyconfig, safetensors 172 | 173 | ## 📝 版本更新日志 174 | 175 | ### v1.5.0 (2025-01-XX) 176 | - 🔄 **模型格式更新**: 177 | - 从 `.pth` 格式迁移到 `.safetensors` 格式,提供更好的安全性和性能 178 | - 更新模型下载链接,使用 Hugging Face 仓库 (1038lab/SDMatte) 179 | - 使用 SafeTensors 库改进模型加载,提供更安全的权重处理 180 | - 🔧 **技术改进**: 181 | - 增强模型加载稳定性,提供更好的错误处理 182 | - 优化模型加载过程中的内存使用 183 | - 改进与最新 ComfyUI 版本的兼容性 184 | - 📚 **文档更新**: 185 | - 更新安装说明以反映新的模型格式 186 | - 添加 SafeTensors 格式优势的相关信息 187 | 188 | ### v1.3.0 (2025-08-17) 189 | - ✨ **新增功能**: 190 | - 实现模型自动下载与检查功能,模型现在存放于 `ComfyUI/models/SDMatte/` 目录。 191 | - 🔧 **优化改进**: 192 | - 将 `SDMatte Model Loader` 和 `SDMatte Apply` 节点合并为单一的 `Apply SDMatte` 节点,简化了工作流。 193 | - 重构了部分代码,提升稳定性。 194 | 195 | ### v1.2.0 (2025-08-15) 196 | - ✨ **新增功能**: 197 | - 添加图像输出,同时输出alpha遮罩和抠图结果 198 | - 支持透明背景抠图模式 199 | - 添加多种输出模式:`alpha_only`、`matted_rgba`、`matted_rgb` 200 | - 新增遮罩优化功能,使用trimap约束过滤不需要的区域 201 | - 添加 `trimap_constraint` 参数控制约束强度 202 | - 为所有参数添加详细的tooltip说明 203 | - 🔧 **优化改进**: 204 | - 改进alpha遮罩处理逻辑,减少背景干扰 205 | - 优化前景区域提取算法 206 | - 增强低置信度区域过滤机制 207 | - 📚 **文档更新**: 208 | - 添加示例工作流链接 209 | - 添加视频教程链接 210 | - 🔧 **优化改进**: 211 | - 改进VRAM优化策略 212 | - 增强模型加载稳定性 213 | - 优化推理性能 214 | 215 | ### v1.0.0 (2025-08-14) 216 | - 🎉 **初始版本**: 217 | - 基础SDMatte模型集成 218 | - 支持trimap引导抠图 219 | - 内置VRAM优化功能 220 | - 支持多种推理分辨率 221 | 222 | ## 📚 参考 223 | 224 | - **示例工作流**:[高级抠图与遮罩优化工作流](https://www.runninghub.ai/post/1955928733028941826?inviteCode=rh-v1041) 225 | - **视频教程**:[ComfyUI-SDMatte 使用教程](https://www.bilibili.com/video/BV1L6bzz8Ene/?spm_id_from=333.1387.homepage.video_card.click&vd_source=b340fd050dbe0d3e2ce863af909f1ee8) 226 | - **原始论文**:[SDMatte: Grafting Diffusion Models for Interactive Matting](https://arxiv.org/abs/2408.00321) (ICCV 2025) 227 | - **原始代码**:[vivoCameraResearch/SDMatte](https://github.com/vivoCameraResearch/SDMatte) 228 | - **模型权重**:[LongfeiHuang/SDMatte](https://huggingface.co/LongfeiHuang/SDMatte) 229 | 230 | ## 📄 许可证 231 | 232 | 本项目遵循 MIT 许可证。原始 SDMatte 项目同样使用 MIT 许可证。 233 | 234 | ## 🙏 致谢 235 | 236 | 感谢 vivo 摄像研究团队开发的优秀 SDMatte 模型,以及 Stable Diffusion、ComfyUI 社区的贡献。 237 | 238 | ## 📧 支持 239 | 240 | 如有问题或建议,请在 GitHub 上提交 Issue。 241 | 242 | --- 243 | 244 | **注意**:本插件为第三方实现,与原始 SDMatte 团队无直接关联。使用前请确保遵循相关许可证条款。 -------------------------------------------------------------------------------- /src/modeling/SDMatte/meta_arch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from transformers import CLIPTextModel, CLIPTokenizer, CLIPTextConfig 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from diffusers import DDIMScheduler, AutoencoderKL, UNet2DConditionModel 6 | from diffusers.models.embeddings import get_timestep_embedding 7 | from ...utils import replace_unet_conv_in, replace_attention_mask_method, add_aux_conv_in 8 | from ...utils.replace import CustomUNet 9 | import random 10 | import os 11 | 12 | # 解决离线本地目录层级差异,如存在 "subdir/subdir/config.json" 的情况 13 | def _resolve_nested_dir(base_dir: str, subdir: str, config_filename: str) -> str: 14 | direct = os.path.join(base_dir, subdir) 15 | nested = os.path.join(base_dir, subdir, subdir) 16 | if os.path.exists(os.path.join(direct, config_filename)): 17 | return direct 18 | if os.path.exists(os.path.join(nested, config_filename)): 19 | return nested 20 | return direct 21 | 22 | AUX_INPUT_DIT = { 23 | "auto_mask": "auto_coords", 24 | "point_mask": "point_coords", 25 | "bbox_mask": "bbox_coords", 26 | "mask": "mask_coords", 27 | "trimap": "trimap_coords", 28 | } 29 | 30 | class SDMatte(nn.Module): 31 | def __init__( 32 | self, 33 | pretrained_model_name_or_path, 34 | conv_scale=3, 35 | num_inference_steps=1, 36 | aux_input="bbox_mask", 37 | use_aux_input=False, 38 | use_coor_input=True, 39 | use_dis_loss=True, 40 | use_attention_mask=True, 41 | use_encoder_attention_mask=False, 42 | add_noise=False, 43 | attn_mask_aux_input=["point_mask", "bbox_mask", "mask"], 44 | aux_input_list=["point_mask", "bbox_mask", "mask"], 45 | use_encoder_hidden_states=True, 46 | residual_connection=False, 47 | use_attention_mask_list=[True, True, True], 48 | use_encoder_hidden_states_list=[True, True, True], 49 | load_weight = True, 50 | ): 51 | super().__init__() 52 | self.init_submodule(pretrained_model_name_or_path, load_weight) 53 | self.num_inference_steps = num_inference_steps 54 | self.aux_input = aux_input 55 | self.use_aux_input = use_aux_input 56 | self.use_coor_input = use_coor_input 57 | self.use_dis_loss = use_dis_loss 58 | self.use_attention_mask = use_attention_mask 59 | self.use_encoder_attention_mask = use_encoder_attention_mask 60 | self.add_noise = add_noise 61 | self.attn_mask_aux_input = attn_mask_aux_input 62 | self.aux_input_list = aux_input_list 63 | self.use_encoder_hidden_states = use_encoder_hidden_states 64 | if use_encoder_hidden_states: 65 | self.unet = add_aux_conv_in(self.unet) 66 | if not add_noise: 67 | conv_scale -= 1 68 | if not use_aux_input: 69 | conv_scale -= 1 70 | if conv_scale > 1: 71 | self.unet = replace_unet_conv_in(self.unet, conv_scale) 72 | replace_attention_mask_method(self.unet, residual_connection) 73 | self.text_encoder.requires_grad_(False) 74 | self.vae.requires_grad_(False) 75 | self.unet.train() 76 | self.unet.use_attention_mask_list = use_attention_mask_list 77 | self.unet.use_encoder_hidden_states_list = use_encoder_hidden_states_list 78 | 79 | def init_submodule(self, pretrained_model_name_or_path, load_weight): 80 | if load_weight: 81 | text_dir = _resolve_nested_dir(pretrained_model_name_or_path, "text_encoder", "config.json") 82 | vae_dir = _resolve_nested_dir(pretrained_model_name_or_path, "vae", "config.json") 83 | unet_dir = _resolve_nested_dir(pretrained_model_name_or_path, "unet", "config.json") 84 | sched_dir = _resolve_nested_dir(pretrained_model_name_or_path, "scheduler", "scheduler_config.json") 85 | tok_dir = _resolve_nested_dir(pretrained_model_name_or_path, "tokenizer", "tokenizer_config.json") 86 | 87 | self.text_encoder = CLIPTextModel.from_pretrained(text_dir) 88 | self.vae = AutoencoderKL.from_pretrained(vae_dir) 89 | self.unet = CustomUNet.from_pretrained( 90 | unet_dir, low_cpu_mem_usage=True, ignore_mismatched_sizes=False 91 | ) 92 | self.noise_scheduler = DDIMScheduler.from_pretrained(sched_dir) 93 | self.tokenizer = CLIPTokenizer.from_pretrained(tok_dir) 94 | else: 95 | text_dir = _resolve_nested_dir(pretrained_model_name_or_path, "text_encoder", "config.json") 96 | text_config = CLIPTextConfig.from_pretrained(text_dir) 97 | self.text_encoder = CLIPTextModel(text_config) 98 | 99 | vae_path = _resolve_nested_dir(pretrained_model_name_or_path, "vae", "config.json") 100 | self.vae = AutoencoderKL.from_config(AutoencoderKL.load_config(vae_path)) 101 | 102 | unet_path = _resolve_nested_dir(pretrained_model_name_or_path, "unet", "config.json") 103 | self.unet = CustomUNet.from_config( 104 | CustomUNet.load_config(unet_path), 105 | low_cpu_mem_usage=True, 106 | ignore_mismatched_sizes=False 107 | ) 108 | 109 | scheduler_path = os.path.join(_resolve_nested_dir(pretrained_model_name_or_path, "scheduler", "scheduler_config.json"), "scheduler_config.json") 110 | self.noise_scheduler = DDIMScheduler.from_config(DDIMScheduler.load_config(scheduler_path)) 111 | 112 | tok_dir = _resolve_nested_dir(pretrained_model_name_or_path, "tokenizer", "tokenizer_config.json") 113 | self.tokenizer = CLIPTokenizer.from_pretrained(tok_dir) 114 | 115 | 116 | def forward(self, data): 117 | rgb = data["image"].cuda() 118 | B = rgb.shape[0] 119 | 120 | if self.aux_input is None and self.training: 121 | aux_input_type = random.choice(self.aux_input_list) 122 | elif self.aux_input is None: 123 | aux_input_type = "point_mask" 124 | else: 125 | aux_input_type = self.aux_input 126 | 127 | # get aux input latent 128 | if self.use_aux_input: 129 | aux_input = data[aux_input_type].cuda() 130 | aux_input = aux_input.repeat(1, 3, 1, 1) 131 | aux_input_h = self.vae.encoder(aux_input.to(rgb.dtype)) 132 | aux_input_moments = self.vae.quant_conv(aux_input_h) 133 | aux_input_mean, _ = torch.chunk(aux_input_moments, 2, dim=1) 134 | aux_input_latent = aux_input_mean * self.vae.config.scaling_factor 135 | else: 136 | aux_input_latent = None 137 | 138 | # get aux coordinate 139 | coor_name = AUX_INPUT_DIT[aux_input_type] 140 | coor = data[coor_name].cuda() 141 | if coor_name == "point_coords": 142 | N = coor.shape[1] 143 | for i in range(N, 1680): 144 | if 1680 % i == 0: 145 | num_channels = 1680 // i 146 | pad_size = i - N 147 | padding = torch.zeros((B, pad_size), dtype=coor.dtype, device=coor.device) 148 | coor = torch.cat([coor, padding], dim=1) 149 | zero_coor = torch.zeros((B, pad_size + N), dtype=coor.dtype, device=coor.device) 150 | break 151 | if self.use_coor_input: 152 | coor = get_timestep_embedding( 153 | coor.flatten(), 154 | num_channels, 155 | flip_sin_to_cos=True, 156 | downscale_freq_shift=0, 157 | ) 158 | else: 159 | coor = get_timestep_embedding( 160 | zero_coor.flatten(), 161 | num_channels, 162 | flip_sin_to_cos=True, 163 | downscale_freq_shift=0, 164 | ) 165 | added_cond_kwargs = {"point_coords": coor} 166 | else: 167 | if self.use_coor_input: 168 | added_cond_kwargs = {"bbox_mask_coords": coor} 169 | else: 170 | coor = torch.tensor([[0, 0, 1, 1]] * B).cuda() 171 | added_cond_kwargs = {"bbox_mask_coords": coor} 172 | 173 | # get attention mask 174 | if self.use_attention_mask and aux_input_type in self.attn_mask_aux_input: 175 | attention_mask = data[aux_input_type].cuda() 176 | attention_mask = (attention_mask + 1) / 2 177 | attention_mask = F.interpolate(attention_mask, scale_factor=1 / 8, mode="nearest") 178 | attention_mask = attention_mask.flatten(start_dim=1) 179 | else: 180 | attention_mask = None 181 | 182 | # encode rgb to latents 183 | rgb_h = self.vae.encoder(rgb) 184 | rgb_moments = self.vae.quant_conv(rgb_h) 185 | rgb_mean, _ = torch.chunk(rgb_moments, 2, dim=1) 186 | rgb_latent = rgb_mean * self.vae.config.scaling_factor 187 | 188 | # get encoder_hidden_states 189 | if self.use_encoder_hidden_states and aux_input_latent is not None: 190 | encoder_hidden_states = self.unet.aux_conv_in(aux_input_latent) 191 | encoder_hidden_states = encoder_hidden_states.view(B, 1024, -1) 192 | encoder_hidden_states = encoder_hidden_states.permute(0, 2, 1) 193 | 194 | if "caption" in data: 195 | prompt = data["caption"] 196 | else: 197 | prompt = [""] * B 198 | prompt = [prompt] if isinstance(prompt, str) else prompt 199 | text_inputs = self.tokenizer( 200 | prompt, 201 | padding="max_length", 202 | max_length=self.tokenizer.model_max_length, 203 | truncation=True, 204 | return_tensors="pt", 205 | ) 206 | text_input_ids = text_inputs.input_ids.to("cuda") 207 | text_embed = self.text_encoder(text_input_ids)[0] 208 | encoder_hidden_states_2 = text_embed 209 | 210 | # get class_label 211 | is_trans = data["is_trans"].cuda() 212 | trans = 1 - is_trans 213 | 214 | # get timesteps 215 | timestep = torch.tensor([1], device="cuda").long() 216 | 217 | # unet 218 | unet_input = torch.cat([rgb_latent, aux_input_latent], dim=1) 219 | label_latent = self.unet( 220 | sample=unet_input, 221 | trans=trans, 222 | timestep=None, 223 | encoder_hidden_states=encoder_hidden_states, 224 | encoder_hidden_states_2=encoder_hidden_states_2, 225 | added_cond_kwargs=added_cond_kwargs, 226 | attention_mask=attention_mask, 227 | ).sample 228 | label_latent = label_latent / self.vae.config.scaling_factor 229 | z = self.vae.post_quant_conv(label_latent) 230 | stacked = self.vae.decoder(z) 231 | # mean of output channels 232 | label_mean = stacked.mean(dim=1, keepdim=True) 233 | output = torch.clip(label_mean, -1.0, 1.0) 234 | output = (output + 1.0) / 2.0 235 | return output 236 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ComfyUI-SDMatte 2 | 3 | English | [简体中文](README_CN.md) 4 | 5 | ComfyUI custom nodes plugin based on [SDMatte](https://github.com/vivoCameraResearch/SDMatte) for interactive image matting. 6 | 7 | ## 🚀 Quick Start 8 | 9 | > 📺 **Video Tutorial**: [ComfyUI-SDMatte Tutorial](https://www.youtube.com/watch?v=PDGDTJvdo8Q) 10 | > 🔧 **Example Workflow**: [Superior Image Cropping and Mask Refinement Workflow](https://www.runninghub.ai/post/1955928733028941826) 11 | > 💡 **Recommended**: Watch the video tutorial first to understand the usage, then download the workflow for practice 12 | 13 | ## 📖 Introduction 14 | 15 | SDMatte is an interactive image matting method based on Stable Diffusion, developed by the vivo Camera Research team and accepted by ICCV 2025. This method leverages the powerful priors of pre-trained diffusion models and supports multiple visual prompts (points, boxes, masks) for accurately extracting target objects from natural images. 16 | 17 | This plugin integrates SDMatte into ComfyUI, providing a simple and easy-to-use node interface focused on trimap-guided matting functionality with built-in VRAM optimization strategies. 18 | 19 | ## 🖼️ Examples 20 | 21 | ### Matting Results 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
Original ImageTrimapMatting Result
Alpha mask output
35 | 36 | *Example workflow demonstrating SDMatte's high-precision matting capabilities with trimap guidance.* 37 | 38 | ## ✨ Features 39 | 40 | - 🎯 **High-Precision Matting**: Based on powerful diffusion model priors, capable of handling complex edge details 41 | - 🖼️ **Trimap Guidance**: Supports trimap-guided precise matting 42 | - 🚀 **VRAM Optimization**: Built-in mixed precision, attention slicing, and other memory optimization strategies 43 | - 🔧 **ComfyUI Integration**: Fully compatible with ComfyUI workflow system 44 | - 📥 **Automatic Model Download**: Automatically downloads model weights on first use 45 | - 📱 **Flexible Sizes**: Supports multiple inference resolutions (512-1024px) 46 | 47 | ## 🛠️ Installation 48 | 49 | ### 1. Download Plugin 50 | 51 | Place this plugin in the ComfyUI custom nodes directory: 52 | 53 | ```bash 54 | cd ComfyUI/custom_nodes/ 55 | git clone https://github.com/flybirdxx/ComfyUI-SDMatte.git 56 | ``` 57 | 58 | ### 2. Install Dependencies 59 | 60 | ComfyUI will automatically install the dependencies in `requirements.txt` on startup: 61 | 62 | - diffusers 63 | - timm 64 | - einops 65 | - lazyconfig 66 | - safetensors 67 | 68 | ### 3. Automatic Model Download 69 | 70 | **No manual download is required.** 71 | 72 | The first time you use the `Apply SDMatte` node, it will automatically check for and download the necessary model weights from Hugging Face. The models will be stored in: 73 | `ComfyUI/models/SDMatte/` 74 | 75 | You can select between the standard (`SDMatte.safetensors`) and enhanced (`SDMatte_plus.safetensors`) versions directly within the node. 76 | 77 | ### 4. Restart ComfyUI 78 | 79 | Restart ComfyUI to load the new custom nodes. 80 | 81 | ## 🎮 Usage 82 | 83 | ### Node Description 84 | 85 | #### Apply SDMatte 86 | 87 | - **Function**: Loads the model and applies it for matting in a single node. 88 | - **Input**: 89 | - `ckpt_name`: Select the model to use (`SDMatte.safetensors` or `SDMatte_plus.safetensors`). It will be downloaded automatically if not found. 90 | - `image`: Input image (ComfyUI IMAGE format) 91 | - `trimap`: Trimap mask (ComfyUI MASK format) 92 | - `inference_size`: Inference resolution (512/640/768/896/1024) 93 | - `is_transparent`: Whether the image contains transparent areas 94 | - `output_mode`: Output mode (`alpha_only`, `matted_rgba`, `matted_rgb`) 95 | - `mask_refine`: Enable mask refinement to reduce background interference 96 | - `trimap_constraint`: Strength of the trimap constraint for refinement 97 | - `force_cpu`: Force CPU inference (optional) 98 | - **Output**: 99 | - `alpha_mask`: Alpha mask of the matting result 100 | - `matted_image`: The matted image result 101 | 102 | ### Basic Workflow 103 | 104 | 1. **Load Image**: Load the image that needs matting 105 | 2. **Create Trimap**: Use drawing tools or other nodes to create trimap 106 | - Black (0): Definite background 107 | - White (1): Definite foreground 108 | - Gray (0.5): Unknown region 109 | 3. **Apply SDMatte**: Apply matting 110 | 4. **Preview Image**: Preview matting result 111 | 112 | ### Recommended Settings 113 | 114 | - **Inference Resolution**: 1024 (highest quality) or 768 (balanced performance) 115 | - **Transparent Flag**: Set according to whether input image has transparent channel 116 | - **Force CPU**: Use only when GPU VRAM is insufficient 117 | 118 | ## 🔧 Technical Details 119 | 120 | ### Data Processing 121 | 122 | - **Input Image**: Automatically resized to inference resolution, normalized to [-1, 1] 123 | - **Trimap**: Resized to inference resolution, mapped to [-1, 1] range 124 | - **Output**: Resized back to original resolution, clamped to [0, 1] range 125 | 126 | ### VRAM Optimization 127 | 128 | The plugin has built-in memory optimization strategies (automatically enabled): 129 | 130 | - **Mixed Precision**: Uses FP16 autocast to reduce VRAM usage 131 | - **Attention Slicing**: SlicedAttnProcessor(slice_size=1) maximizes VRAM savings 132 | - **Memory Cleanup**: Automatically clears CUDA cache before and after inference 133 | - **Device Management**: Smart device allocation and model movement 134 | 135 | ### Model Loading 136 | 137 | - **Weight Formats**: Supports .pth and .safetensors formats 138 | - **Safe Loading**: Handles omegaconf objects, supports weights_only mode 139 | - **Nested Structure**: Automatically handles complex checkpoint structures 140 | - **Error Recovery**: Multiple fallback mechanisms ensure successful loading 141 | 142 | ## ❓ FAQ 143 | 144 | ### Q: Nodes cannot be searched? 145 | A: Ensure the plugin directory structure is correct, restart ComfyUI, check console for error messages. 146 | 147 | ### Q: Model loading failed? 148 | A: Check SDMatte.safetensors file path, ensure base model directory structure is complete, view console for detailed error messages. 149 | 150 | ### Q: Insufficient VRAM during inference? 151 | A: Try reducing inference resolution, enable `force_cpu` option, or close other VRAM-consuming programs. 152 | 153 | ### Q: Poor matting results? 154 | A: Optimize trimap quality, ensure accurate foreground/background/unknown region annotations, try different inference resolutions. 155 | 156 | ### Q: First inference is slow? 157 | A: First run needs to compile CUDA kernels, subsequent inference will be significantly faster. 158 | 159 | ### Q: Which model version should I choose? 160 | A: 161 | - **SDMatte.safetensors (Standard)**: Smaller file (~11GB), faster inference, suitable for most scenarios 162 | - **SDMatte_plus.safetensors (Enhanced)**: Larger file, higher accuracy, suitable for professional use with extremely high quality requirements 163 | - Recommend testing with standard version first, upgrade to enhanced version if higher quality is needed 164 | 165 | ## 📋 System Requirements 166 | 167 | - **ComfyUI**: Latest version 168 | - **Python**: 3.8+ 169 | - **PyTorch**: 1.12+ (CUDA support recommended) 170 | - **VRAM**: 8GB+ recommended (CPU inference supported) 171 | - **Dependencies**: diffusers, timm, einops, lazyconfig, safetensors 172 | 173 | ## 📝 Changelog 174 | 175 | ### v1.5.0 (2025-01-XX) 176 | - 🔄 **Model Format Update**: 177 | - Migrated from `.pth` to `.safetensors` format for better security and performance 178 | - Updated model download URLs to use Hugging Face repository (1038lab/SDMatte) 179 | - Improved model loading with SafeTensors library for safer weight handling 180 | - 🔧 **Technical Improvements**: 181 | - Enhanced model loading stability with better error handling 182 | - Optimized memory usage during model loading process 183 | - Improved compatibility with latest ComfyUI versions 184 | - 📚 **Documentation Updates**: 185 | - Updated installation instructions to reflect new model format 186 | - Added information about SafeTensors format benefits 187 | 188 | ### v1.3.0 (2025-08-17) 189 | - ✨ **New Features**: 190 | - Implemented automatic model downloading and checking. Models are now stored in `ComfyUI/models/SDMatte/`. 191 | - 🔧 **Improvements**: 192 | - Merged `SDMatte Model Loader` and `SDMatte Apply` nodes into a single `Apply SDMatte` node for a more streamlined workflow. 193 | - Refactored code for better stability. 194 | 195 | ### v1.2.0 (2025-08-15) 196 | - ✨ **New Features**: 197 | - Added image output alongside alpha mask output 198 | - Support for transparent background matting mode 199 | - Added multiple output modes: `alpha_only`, `matted_rgba`, `matted_rgb` 200 | - Added mask refinement feature using trimap constraints to filter unwanted regions 201 | - Added `trimap_constraint` parameter to control constraint strength 202 | - Added detailed tooltips for all parameters 203 | - 🔧 **Improvements**: 204 | - Improved alpha mask processing logic to reduce background interference 205 | - Optimized foreground region extraction algorithm 206 | - Enhanced low-confidence region filtering mechanism 207 | - 📚 **Documentation**: 208 | - Added example workflow links 209 | - Added video tutorial links 210 | - Improved usage instructions and parameter explanations 211 | - 🔧 **Improvements**: 212 | - Improved VRAM optimization strategies 213 | - Enhanced model loading stability 214 | - Optimized inference performance 215 | 216 | ### v1.0.0 (2025-08-14) 217 | - 🎉 **Initial Release**: 218 | - Basic SDMatte model integration 219 | - Support for trimap-guided matting 220 | - Built-in VRAM optimization features 221 | - Support for multiple inference resolutions 222 | 223 | ## 📚 References 224 | 225 | - **Example Workflow**: [Superior Image Cropping and Mask Refinement Workflow](https://www.runninghub.ai/post/1955928733028941826) 226 | - **Video Tutorial**: [ComfyUI-SDMatte Tutorial](https://www.youtube.com/watch?v=PDGDTJvdo8Q) 227 | - **Original Paper**: [SDMatte: Grafting Diffusion Models for Interactive Matting](https://arxiv.org/abs/2408.00321) (ICCV 2025) 228 | - **Original Code**: [vivoCameraResearch/SDMatte](https://github.com/vivoCameraResearch/SDMatte) 229 | - **Model Weights**: [LongfeiHuang/SDMatte](https://huggingface.co/LongfeiHuang/SDMatte) 230 | 231 | ## 📄 License 232 | 233 | This project follows the MIT license. The original SDMatte project also uses the MIT license. 234 | 235 | ## 🙏 Acknowledgements 236 | 237 | Thanks to the vivo Camera Research team for developing the excellent SDMatte model, and to the Stable Diffusion and ComfyUI communities for their contributions. 238 | 239 | ## 📧 Support 240 | 241 | If you have any questions or suggestions, please submit an Issue on GitHub. 242 | 243 | --- 244 | 245 | **Note**: This plugin is a third-party implementation and is not directly affiliated with the original SDMatte team. Please ensure compliance with relevant license terms before use. 246 | -------------------------------------------------------------------------------- /sdmatte_nodes.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torchvision import transforms 4 | 5 | import folder_paths 6 | import comfy 7 | 8 | # Get the models directory from ComfyUI 9 | MODEL_DIR = os.path.join(folder_paths.models_dir, "SDMatte") 10 | 11 | # Register the SDMatte folder path with ComfyUI 12 | folder_paths.add_model_folder_path("SDMatte", MODEL_DIR) 13 | 14 | MODEL_URLS = { 15 | "SDMatte.safetensors": "https://huggingface.co/1038lab/SDMatte/resolve/main/SDMatte.safetensors", 16 | "SDMatte_plus.safetensors": "https://huggingface.co/1038lab/SDMatte/resolve/main/SDMatte_plus.safetensors" 17 | } 18 | 19 | def download_model(model_name, models_dir=MODEL_DIR, model_urls=MODEL_URLS): 20 | # 1) Search in all registered SDMatte paths first 21 | all_search_paths = folder_paths.get_folder_paths("SDMatte") or [] 22 | for search_path in all_search_paths: 23 | check_path = os.path.join(search_path, model_name) 24 | if os.path.isfile(check_path): 25 | try: 26 | if os.path.getsize(check_path) > 0: 27 | print(f"[SDMatte] Found model at: {check_path}") 28 | return check_path 29 | except OSError: 30 | pass # couldn't stat; continue 31 | 32 | # 2) Not found -> prepare to download to models_dir 33 | url = model_urls.get(model_name) 34 | if not url: 35 | raise ValueError(f"[SDMatte] Unknown model name: {model_name}") 36 | 37 | target_path = os.path.join(models_dir, model_name) 38 | os.makedirs(os.path.dirname(target_path), exist_ok=True) 39 | 40 | # if target exists and non-empty, use it 41 | if os.path.isfile(target_path): 42 | try: 43 | if os.path.getsize(target_path) > 0: 44 | return target_path 45 | except OSError: 46 | pass 47 | 48 | print(f"[SDMatte] Model '{model_name}' not found. Downloading to {target_path}...") 49 | 50 | tmp_path = target_path + ".tmp" 51 | 52 | try: 53 | try: 54 | import requests 55 | try: 56 | from tqdm import tqdm # optional 57 | except Exception: 58 | tqdm = None 59 | 60 | with requests.get(url, stream=True, timeout=60) as response: 61 | response.raise_for_status() 62 | total_size = int(response.headers.get('content-length', 0) or 0) 63 | 64 | with open(tmp_path, 'wb') as f: 65 | bar = None 66 | if tqdm and total_size > 0: 67 | bar = tqdm(desc=model_name, total=total_size, unit='iB', unit_scale=True, unit_divisor=1024) 68 | 69 | for chunk in response.iter_content(chunk_size=1024*1024): 70 | if chunk: 71 | f.write(chunk) 72 | if bar: 73 | bar.update(len(chunk)) 74 | 75 | if bar: 76 | bar.close() 77 | 78 | # optional size check 79 | if total_size > 0: 80 | try: 81 | if os.path.getsize(tmp_path) != total_size: 82 | raise IOError(f"[SDMatte] Incomplete download: {os.path.getsize(tmp_path)} != {total_size}") 83 | except OSError: 84 | raise 85 | 86 | except (ImportError, ModuleNotFoundError): 87 | import urllib.request 88 | urllib.request.urlretrieve(url, tmp_path) 89 | 90 | # concurrent safety: if another process already finished 91 | if os.path.isfile(target_path) and os.path.getsize(target_path) > 0: 92 | try: 93 | os.remove(tmp_path) 94 | except OSError: 95 | pass 96 | return target_path 97 | 98 | os.replace(tmp_path, target_path) # atomic 99 | print(f"[SDMatte] Download complete: {target_path}") 100 | return target_path 101 | 102 | except KeyboardInterrupt: 103 | if os.path.exists(tmp_path): 104 | try: 105 | os.remove(tmp_path) 106 | except OSError: 107 | pass 108 | raise 109 | except Exception: 110 | if os.path.exists(tmp_path): 111 | try: 112 | os.remove(tmp_path) 113 | except OSError: 114 | pass 115 | raise 116 | 117 | SDMatteCore = None 118 | 119 | 120 | def _resize_norm_image_bchw(image_bchw: torch.Tensor, size_hw=(1024, 1024)) -> torch.Tensor: 121 | resize = transforms.Resize(size_hw, antialias=True) 122 | norm = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) 123 | x = resize(image_bchw) 124 | x = norm(x) 125 | return x 126 | 127 | 128 | def _resize_mask_b1hw(mask_b1hw: torch.Tensor, size_hw=(1024, 1024)) -> torch.Tensor: 129 | resize = transforms.Resize(size_hw) 130 | return resize(mask_b1hw) 131 | 132 | 133 | class SDMatteApply: 134 | 135 | @classmethod 136 | def INPUT_TYPES(s): 137 | return { 138 | "required": { 139 | "ckpt_name": (list(MODEL_URLS.keys()), ), 140 | "image": ("IMAGE", {"tooltip": "需要进行抠图的输入图像"}), 141 | "trimap": ("MASK", {"tooltip": "三值图掩码:白色=前景,黑色=背景,灰色=未知区域"}), 142 | "inference_size": ([512, 640, 768, 896, 1024], { 143 | "default": 1024, 144 | "tooltip": "推理分辨率,越高质量越好但速度越慢。推荐1024(最高质量)或768(平衡性能)" 145 | }), 146 | "is_transparent": ("BOOLEAN", { 147 | "default": False, 148 | "tooltip": "输入图像是否包含透明通道。如果原图有透明背景请启用" 149 | }), 150 | "output_mode": (["alpha_only", "matted_rgba", "matted_rgb"], { 151 | "default": "alpha_only", 152 | "tooltip": "输出模式:alpha_only=只输出遮罩;matted_rgba=透明背景抠图;matted_rgb=黑色背景抠图(推荐,避免干扰)" 153 | }), 154 | "mask_refine": ("BOOLEAN", { 155 | "default": True, 156 | "tooltip": "启用遮罩优化,使用trimap约束过滤不需要的区域,减少背景干扰" 157 | }), 158 | "trimap_constraint": ("FLOAT", { 159 | "default": 0.8, "min": 0.1, "max": 1.0, "step": 0.1, 160 | "tooltip": "trimap约束强度(0.1-1.0)。越高约束越严格,0.8=平衡,0.9=严格过滤,0.6=宽松保留" 161 | }), 162 | }, 163 | "optional": { 164 | "force_cpu": ("BOOLEAN", {"default": False}), 165 | }, 166 | } 167 | 168 | RETURN_TYPES = ("MASK", "IMAGE") 169 | RETURN_NAMES = ("alpha_mask", "matted_image") 170 | FUNCTION = "apply_matte" 171 | CATEGORY = "Matting/SDMatte" 172 | 173 | def apply_matte(self, ckpt_name, image, trimap, inference_size, is_transparent, output_mode, mask_refine, trimap_constraint, force_cpu=False): 174 | device = comfy.model_management.get_torch_device() 175 | if force_cpu: 176 | device = torch.device('cpu') 177 | 178 | global SDMatteCore 179 | if SDMatteCore is None: 180 | from .src.modeling.SDMatte.meta_arch import SDMatte as SDMatteCore 181 | 182 | base_dir = os.path.dirname(__file__) 183 | pretrained_repo = os.path.join(base_dir, "src", "SDMatte") 184 | required_subdirs = ["text_encoder", "vae", "unet", "scheduler", "tokenizer"] 185 | missing = [d for d in required_subdirs if not os.path.isdir(os.path.join(pretrained_repo, d))] 186 | if missing: 187 | raise FileNotFoundError(f"Missing directories: {missing}. Expected path: {pretrained_repo}") 188 | 189 | sdmatte_model = SDMatteCore( 190 | pretrained_model_name_or_path=pretrained_repo, 191 | load_weight=False, 192 | use_aux_input=True, 193 | aux_input="trimap", 194 | aux_input_list=["point_mask", "bbox_mask", "mask", "trimap"], 195 | attn_mask_aux_input=["point_mask", "bbox_mask", "mask", "trimap"], 196 | use_encoder_hidden_states=True, 197 | use_attention_mask=True, 198 | add_noise=False, 199 | ) 200 | 201 | ckpt_path = download_model(ckpt_name) 202 | 203 | from safetensors import safe_open 204 | state_dict = {} 205 | with safe_open(ckpt_path, framework="pt", device="cpu") as f: 206 | for key in f.keys(): 207 | state_dict[key] = f.get_tensor(key) 208 | state_root = state_dict 209 | 210 | candidate_keys = [ 211 | 'state_dict','model_state_dict','params','weights', 212 | 'ema','model_ema','ema_state_dict','net','module','model','unet' 213 | ] 214 | state_dict = None 215 | if isinstance(state_root, dict): 216 | for k in candidate_keys: 217 | inner = state_root.get(k) 218 | if isinstance(inner, dict): 219 | state_dict = inner 220 | break 221 | if state_dict is None: 222 | state_dict = state_root 223 | 224 | sdmatte_model.load_state_dict(state_dict, strict=False) 225 | sdmatte_model.eval() 226 | sdmatte_model.to(device) 227 | 228 | if device.type == 'cuda': 229 | try: 230 | torch.cuda.empty_cache() 231 | except Exception: 232 | pass 233 | 234 | try: 235 | unet = getattr(sdmatte_model, 'unet', None) 236 | if unet is not None and hasattr(unet, 'set_attn_processor'): 237 | from diffusers.models.attention_processor import SlicedAttnProcessor 238 | unet.set_attn_processor(SlicedAttnProcessor(slice_size=1)) 239 | except Exception: 240 | pass 241 | 242 | B, H, W, C = image.shape 243 | orig_h, orig_w = H, W 244 | 245 | img_bchw = image.permute(0, 3, 1, 2).contiguous().to(device) 246 | img_in = _resize_norm_image_bchw(img_bchw, (int(inference_size), int(inference_size))) 247 | 248 | is_trans = torch.tensor([1 if is_transparent else 0] * B, device=device) 249 | data = {"image": img_in, "is_trans": is_trans, "caption": [""] * B} 250 | 251 | def to_b1hw(x): 252 | return _resize_mask_b1hw(x.unsqueeze(1).contiguous().to(device), (int(inference_size), int(inference_size))) 253 | 254 | tri = to_b1hw(trimap) * 2 - 1 255 | data["trimap"] = tri 256 | data["trimap_coords"] = torch.tensor([[0,0,1,1]]*B, dtype=tri.dtype, device=device) 257 | 258 | with torch.no_grad(): 259 | if device.type == 'cuda': 260 | with torch.autocast(device_type='cuda', dtype=torch.float16): 261 | pred_alpha = sdmatte_model(data) 262 | else: 263 | pred_alpha = sdmatte_model(data) 264 | 265 | out = transforms.Resize((orig_h, orig_w))(pred_alpha) 266 | out = out.squeeze(1).clamp(0, 1).detach().cpu() 267 | 268 | if mask_refine: 269 | trimap_cpu = trimap.cpu() 270 | 271 | foreground_regions = trimap_cpu > trimap_constraint 272 | background_regions = trimap_cpu < (1.0 - trimap_constraint) 273 | unknown_regions = ~(foreground_regions | background_regions) 274 | 275 | refined_alpha = out.clone() 276 | refined_alpha[background_regions] = 0.0 277 | refined_alpha[foreground_regions] = torch.clamp(refined_alpha[foreground_regions] * 1.2, 0, 1) 278 | 279 | alpha_threshold = 0.3 280 | low_confidence = (refined_alpha < alpha_threshold) & unknown_regions 281 | refined_alpha[low_confidence] = 0.0 282 | 283 | out = refined_alpha 284 | 285 | alpha_expanded = out.unsqueeze(-1) 286 | 287 | if output_mode == "alpha_only": 288 | matted_image = torch.zeros_like(image.cpu()) 289 | elif output_mode == "matted_rgba": 290 | matted_image = torch.cat([ 291 | image.cpu(), 292 | alpha_expanded.expand(-1, -1, -1, 1) 293 | ], dim=-1) 294 | elif output_mode == "matted_rgb": 295 | trimap_cpu = trimap.cpu() 296 | trimap_expanded = trimap_cpu.unsqueeze(-1) 297 | foreground_mask = (trimap_expanded > 0.2) & (alpha_expanded > 0.1) 298 | matted_image = image.cpu() * foreground_mask.float() 299 | else: 300 | matted_image = image.cpu() * alpha_expanded 301 | 302 | if device.type == 'cuda': 303 | try: 304 | torch.cuda.empty_cache() 305 | except Exception: 306 | pass 307 | 308 | return (out, matted_image) 309 | 310 | 311 | NODE_CLASS_MAPPINGS = { 312 | "SDMatteApply": SDMatteApply, 313 | } 314 | 315 | NODE_DISPLAY_NAME_MAPPINGS = { 316 | "SDMatteApply": "Apply SDMatte", 317 | } 318 | 319 | 320 | -------------------------------------------------------------------------------- /src/utils/replace.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Any, Dict, Optional, Tuple, Union 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | from torch import nn 7 | import math 8 | from diffusers import UNet2DConditionModel 9 | from diffusers.models.embeddings import Timesteps, TimestepEmbedding 10 | from diffusers.models.unets.unet_2d_blocks import ( 11 | get_down_block, 12 | get_up_block, 13 | get_mid_block, 14 | ) 15 | from diffusers.models.activations import get_activation 16 | from diffusers.models.unets.unet_2d_condition import UNet2DConditionOutput 17 | from diffusers.utils import USE_PEFT_BACKEND, scale_lora_layers, unscale_lora_layers 18 | 19 | 20 | def custom_prepare_attention_mask( 21 | self, attention_mask: torch.Tensor, target_length: int, batch_size: int, out_dim: int = 3 22 | ) -> torch.Tensor: 23 | r""" 24 | Prepare the attention mask for the attention computation. 25 | 26 | Args: 27 | attention_mask (`torch.Tensor`): 28 | The attention mask to prepare. 29 | target_length (`int`): 30 | The target length of the attention mask. This is the length of the attention mask after padding. 31 | batch_size (`int`): 32 | The batch size, which is used to repeat the attention mask. 33 | out_dim (`int`, *optional*, defaults to `3`): 34 | The output dimension of the attention mask. Can be either `3` or `4`. 35 | 36 | Returns: 37 | `torch.Tensor`: The prepared attention mask. 38 | """ 39 | head_size = self.heads 40 | if attention_mask is None: 41 | return attention_mask 42 | 43 | current_length: int = attention_mask.shape[-1] 44 | if current_length != target_length: 45 | if attention_mask.device.type == "mps": 46 | # HACK: MPS: Does not support padding by greater than dimension of input tensor. 47 | # Instead, we can manually construct the padding tensor. 48 | padding_shape = (attention_mask.shape[0], attention_mask.shape[1], target_length) 49 | padding = torch.zeros(padding_shape, dtype=attention_mask.dtype, device=attention_mask.device) 50 | attention_mask = torch.cat([attention_mask, padding], dim=2) 51 | else: 52 | # TODO: for pipelines such as stable-diffusion, padding cross-attn mask: 53 | # we want to instead pad by (0, remaining_length), where remaining_length is: 54 | # remaining_length: int = target_length - current_length 55 | # TODO: re-enable tests/models/test_models_unet_2d_condition.py#test_model_xattn_padding 56 | B = attention_mask.shape[0] 57 | current_size = int(math.sqrt(current_length)) 58 | target_size = int(math.sqrt(target_length)) 59 | assert current_size**2 == current_length, f"current_length ({current_length}) cannot be squared to an integer size" 60 | assert target_size**2 == target_length, f"target_length ({target_length}) cannot be squared to an integer size" 61 | attention_mask = attention_mask.view(B, -1, current_size, current_size) 62 | attention_mask = F.interpolate(attention_mask, size=(target_size, target_size), mode="nearest") 63 | attention_mask = attention_mask.view(B, 1, target_length) 64 | 65 | if out_dim == 3: 66 | if attention_mask.shape[0] < batch_size * head_size: 67 | attention_mask = attention_mask.repeat_interleave(head_size, dim=0) 68 | elif out_dim == 4: 69 | attention_mask = attention_mask.unsqueeze(1) 70 | attention_mask = attention_mask.repeat_interleave(head_size, dim=1) 71 | 72 | return attention_mask 73 | 74 | 75 | def custom_get_attention_scores(self, query: torch.Tensor, key: torch.Tensor, attention_mask: torch.Tensor = None) -> torch.Tensor: 76 | r""" 77 | Compute the attention scores. 78 | 79 | Args: 80 | query (`torch.Tensor`): The query tensor. 81 | key (`torch.Tensor`): The key tensor. 82 | attention_mask (`torch.Tensor`, *optional*): The attention mask to use. If `None`, no mask is applied. 83 | 84 | Returns: 85 | `torch.Tensor`: The attention probabilities/scores. 86 | """ 87 | dtype = query.dtype 88 | if self.upcast_attention: 89 | query = query.float() 90 | key = key.float() 91 | 92 | # if attention_mask is not None and len(torch.unique(attention_mask)) <= 2: 93 | if attention_mask is not None: 94 | baddbmm_input = attention_mask 95 | beta = 1 96 | else: 97 | baddbmm_input = torch.empty(query.shape[0], query.shape[1], key.shape[1], dtype=query.dtype, device=query.device) 98 | beta = 0 99 | 100 | attention_scores = torch.baddbmm( 101 | baddbmm_input, 102 | query, 103 | key.transpose(-1, -2), 104 | beta=beta, 105 | alpha=self.scale, 106 | ) 107 | 108 | # if attention_mask is not None and len(torch.unique(attention_mask)) > 2: 109 | # m = 1 - (attention_mask / -10000.0) 110 | # attention_scores = m * attention_scores 111 | 112 | del baddbmm_input 113 | 114 | if self.upcast_softmax: 115 | attention_scores = attention_scores.float() 116 | 117 | attention_probs = attention_scores.softmax(dim=-1) 118 | del attention_scores 119 | 120 | attention_probs = attention_probs.to(dtype) 121 | 122 | return attention_probs 123 | 124 | 125 | class CustomUNet(UNet2DConditionModel): 126 | def __init__( 127 | self, 128 | sample_size: Optional[int] = None, 129 | in_channels: int = 4, 130 | out_channels: int = 4, 131 | flip_sin_to_cos: bool = True, 132 | freq_shift: int = 0, 133 | down_block_types: Tuple[str] = ( 134 | "CrossAttnDownBlock2D", 135 | "CrossAttnDownBlock2D", 136 | "CrossAttnDownBlock2D", 137 | "DownBlock2D", 138 | ), 139 | mid_block_type: Optional[str] = "UNetMidBlock2DCrossAttn", 140 | up_block_types: Tuple[str] = ("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D"), 141 | only_cross_attention: Union[bool, Tuple[bool]] = False, 142 | block_out_channels: Tuple[int] = (320, 640, 1280, 1280), 143 | layers_per_block: Union[int, Tuple[int]] = 2, 144 | downsample_padding: int = 1, 145 | mid_block_scale_factor: float = 1, 146 | dropout: float = 0.0, 147 | act_fn: str = "silu", 148 | norm_num_groups: Optional[int] = 32, 149 | norm_eps: float = 1e-5, 150 | cross_attention_dim: Union[int, Tuple[int]] = 1280, 151 | transformer_layers_per_block: Union[int, Tuple[int], Tuple[Tuple]] = 1, 152 | reverse_transformer_layers_per_block: Optional[Tuple[Tuple[int]]] = None, 153 | attention_head_dim: Union[int, Tuple[int]] = 8, 154 | num_attention_heads: Optional[Union[int, Tuple[int]]] = None, 155 | dual_cross_attention: bool = False, 156 | use_linear_projection: bool = False, 157 | upcast_attention: bool = False, 158 | resnet_time_scale_shift: str = "default", 159 | resnet_skip_time_act: bool = False, 160 | resnet_out_scale_factor: int = 1.0, 161 | time_embedding_dim: Optional[int] = None, 162 | timestep_post_act: Optional[str] = None, 163 | time_cond_proj_dim: Optional[int] = None, 164 | conv_in_kernel: int = 3, 165 | conv_out_kernel: int = 3, 166 | bbox_time_embed_dim: Optional[int] = None, 167 | point_embeddings_input_dim: Optional[int] = None, 168 | bbox_embeddings_input_dim: Optional[int] = None, 169 | attention_type: str = "default", 170 | class_embeddings_concat: bool = False, 171 | mid_block_only_cross_attention: Optional[bool] = None, 172 | cross_attention_norm: Optional[str] = None, 173 | use_attention_mask_list=[True, True, True], 174 | use_encoder_hidden_states_list=[True, True, True], 175 | ): 176 | super().__init__() 177 | self.use_attention_mask_list = use_attention_mask_list 178 | self.use_encoder_hidden_states_list = use_encoder_hidden_states_list 179 | self.sample_size = sample_size 180 | num_attention_heads = num_attention_heads or attention_head_dim 181 | 182 | # input 183 | conv_in_padding = (conv_in_kernel - 1) // 2 184 | self.conv_in = nn.Conv2d(in_channels, block_out_channels[0], kernel_size=conv_in_kernel, padding=conv_in_padding) 185 | 186 | # time 187 | time_embed_dim = time_embedding_dim or block_out_channels[0] * 4 188 | self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) 189 | timestep_input_dim = block_out_channels[0] 190 | self.time_embedding = TimestepEmbedding( 191 | timestep_input_dim, 192 | time_embed_dim, 193 | act_fn=act_fn, 194 | post_act_fn=timestep_post_act, 195 | cond_proj_dim=time_cond_proj_dim, 196 | ) 197 | 198 | self.point_embedding = TimestepEmbedding(point_embeddings_input_dim, time_embed_dim) 199 | self.bbox_time_proj = Timesteps(bbox_time_embed_dim, flip_sin_to_cos, freq_shift) 200 | self.bbox_embedding = TimestepEmbedding(bbox_embeddings_input_dim, time_embed_dim) 201 | 202 | self.down_blocks = nn.ModuleList([]) 203 | self.up_blocks = nn.ModuleList([]) 204 | if isinstance(only_cross_attention, bool): 205 | if mid_block_only_cross_attention is None: 206 | mid_block_only_cross_attention = only_cross_attention 207 | only_cross_attention = [only_cross_attention] * len(down_block_types) 208 | 209 | if mid_block_only_cross_attention is None: 210 | mid_block_only_cross_attention = False 211 | 212 | if isinstance(num_attention_heads, int): 213 | num_attention_heads = (num_attention_heads,) * len(down_block_types) 214 | 215 | if isinstance(attention_head_dim, int): 216 | attention_head_dim = (attention_head_dim,) * len(down_block_types) 217 | 218 | if isinstance(cross_attention_dim, int): 219 | cross_attention_dim = (cross_attention_dim,) * len(down_block_types) 220 | 221 | if isinstance(layers_per_block, int): 222 | layers_per_block = [layers_per_block] * len(down_block_types) 223 | 224 | if isinstance(transformer_layers_per_block, int): 225 | transformer_layers_per_block = [transformer_layers_per_block] * len(down_block_types) 226 | 227 | if class_embeddings_concat: 228 | blocks_time_embed_dim = time_embed_dim * 2 229 | else: 230 | blocks_time_embed_dim = time_embed_dim 231 | 232 | # down 233 | output_channel = block_out_channels[0] 234 | for i, down_block_type in enumerate(down_block_types): 235 | input_channel = output_channel 236 | output_channel = block_out_channels[i] 237 | is_final_block = i == len(block_out_channels) - 1 238 | 239 | down_block = get_down_block( 240 | down_block_type, 241 | num_layers=layers_per_block[i], 242 | transformer_layers_per_block=transformer_layers_per_block[i], 243 | in_channels=input_channel, 244 | out_channels=output_channel, 245 | temb_channels=blocks_time_embed_dim, 246 | add_downsample=not is_final_block, 247 | resnet_eps=norm_eps, 248 | resnet_act_fn=act_fn, 249 | resnet_groups=norm_num_groups, 250 | cross_attention_dim=cross_attention_dim[i], 251 | num_attention_heads=num_attention_heads[i], 252 | downsample_padding=downsample_padding, 253 | dual_cross_attention=dual_cross_attention, 254 | use_linear_projection=use_linear_projection, 255 | only_cross_attention=only_cross_attention[i], 256 | upcast_attention=upcast_attention, 257 | resnet_time_scale_shift=resnet_time_scale_shift, 258 | attention_type=attention_type, 259 | resnet_skip_time_act=resnet_skip_time_act, 260 | resnet_out_scale_factor=resnet_out_scale_factor, 261 | cross_attention_norm=cross_attention_norm, 262 | attention_head_dim=attention_head_dim[i] if attention_head_dim[i] is not None else output_channel, 263 | dropout=dropout, 264 | ) 265 | self.down_blocks.append(down_block) 266 | 267 | # mid 268 | self.mid_block = get_mid_block( 269 | mid_block_type, 270 | temb_channels=blocks_time_embed_dim, 271 | in_channels=block_out_channels[-1], 272 | resnet_eps=norm_eps, 273 | resnet_act_fn=act_fn, 274 | resnet_groups=norm_num_groups, 275 | output_scale_factor=mid_block_scale_factor, 276 | transformer_layers_per_block=transformer_layers_per_block[-1], 277 | num_attention_heads=num_attention_heads[-1], 278 | cross_attention_dim=cross_attention_dim[-1], 279 | dual_cross_attention=dual_cross_attention, 280 | use_linear_projection=use_linear_projection, 281 | mid_block_only_cross_attention=mid_block_only_cross_attention, 282 | upcast_attention=upcast_attention, 283 | resnet_time_scale_shift=resnet_time_scale_shift, 284 | attention_type=attention_type, 285 | resnet_skip_time_act=resnet_skip_time_act, 286 | cross_attention_norm=cross_attention_norm, 287 | attention_head_dim=attention_head_dim[-1], 288 | dropout=dropout, 289 | ) 290 | 291 | # count how many layers upsample the images 292 | self.num_upsamplers = 0 293 | 294 | # up 295 | reversed_block_out_channels = list(reversed(block_out_channels)) 296 | reversed_num_attention_heads = list(reversed(num_attention_heads)) 297 | reversed_layers_per_block = list(reversed(layers_per_block)) 298 | reversed_cross_attention_dim = list(reversed(cross_attention_dim)) 299 | reversed_transformer_layers_per_block = ( 300 | list(reversed(transformer_layers_per_block)) 301 | if reverse_transformer_layers_per_block is None 302 | else reverse_transformer_layers_per_block 303 | ) 304 | only_cross_attention = list(reversed(only_cross_attention)) 305 | 306 | output_channel = reversed_block_out_channels[0] 307 | for i, up_block_type in enumerate(up_block_types): 308 | is_final_block = i == len(block_out_channels) - 1 309 | 310 | prev_output_channel = output_channel 311 | output_channel = reversed_block_out_channels[i] 312 | input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)] 313 | 314 | # add upsample block for all BUT final layer 315 | if not is_final_block: 316 | add_upsample = True 317 | self.num_upsamplers += 1 318 | else: 319 | add_upsample = False 320 | 321 | up_block = get_up_block( 322 | up_block_type, 323 | num_layers=reversed_layers_per_block[i] + 1, 324 | transformer_layers_per_block=reversed_transformer_layers_per_block[i], 325 | in_channels=input_channel, 326 | out_channels=output_channel, 327 | prev_output_channel=prev_output_channel, 328 | temb_channels=blocks_time_embed_dim, 329 | add_upsample=add_upsample, 330 | resnet_eps=norm_eps, 331 | resnet_act_fn=act_fn, 332 | resolution_idx=i, 333 | resnet_groups=norm_num_groups, 334 | cross_attention_dim=reversed_cross_attention_dim[i], 335 | num_attention_heads=reversed_num_attention_heads[i], 336 | dual_cross_attention=dual_cross_attention, 337 | use_linear_projection=use_linear_projection, 338 | only_cross_attention=only_cross_attention[i], 339 | upcast_attention=upcast_attention, 340 | resnet_time_scale_shift=resnet_time_scale_shift, 341 | attention_type=attention_type, 342 | resnet_skip_time_act=resnet_skip_time_act, 343 | resnet_out_scale_factor=resnet_out_scale_factor, 344 | cross_attention_norm=cross_attention_norm, 345 | attention_head_dim=attention_head_dim[i] if attention_head_dim[i] is not None else output_channel, 346 | dropout=dropout, 347 | ) 348 | self.up_blocks.append(up_block) 349 | prev_output_channel = output_channel 350 | 351 | # out 352 | if norm_num_groups is not None: 353 | self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], num_groups=norm_num_groups, eps=norm_eps) 354 | 355 | self.conv_act = get_activation(act_fn) 356 | 357 | else: 358 | self.conv_norm_out = None 359 | self.conv_act = None 360 | 361 | conv_out_padding = (conv_out_kernel - 1) // 2 362 | self.conv_out = nn.Conv2d(block_out_channels[0], out_channels, kernel_size=conv_out_kernel, padding=conv_out_padding) 363 | 364 | # distillation 365 | self.feature_map = [] 366 | 367 | def _get_value(self, use_list, true_value, false_value): 368 | down_value = mid_value = up_value = false_value 369 | 370 | if use_list[0]: 371 | down_value = true_value 372 | if use_list[1]: 373 | mid_value = true_value 374 | if use_list[2]: 375 | up_value = true_value 376 | 377 | return down_value, mid_value, up_value 378 | 379 | def forward( 380 | self, 381 | sample: torch.FloatTensor, 382 | timestep: Union[torch.Tensor, float, int], 383 | trans: Union[torch.Tensor, float, int], 384 | encoder_hidden_states: torch.Tensor, 385 | encoder_hidden_states_2: Optional[torch.Tensor] = None, 386 | timestep_cond: Optional[torch.Tensor] = None, 387 | attention_mask: Optional[torch.Tensor] = None, 388 | cross_attention_kwargs: Optional[Dict[str, Any]] = None, 389 | added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None, 390 | encoder_attention_mask: Optional[torch.Tensor] = None, 391 | ) -> Union[UNet2DConditionOutput, Tuple]: 392 | default_overall_up_factor = 2**self.num_upsamplers 393 | forward_upsample_size = False 394 | upsample_size = None 395 | 396 | for dim in sample.shape[-2:]: 397 | if dim % default_overall_up_factor != 0: 398 | forward_upsample_size = True 399 | break 400 | 401 | if attention_mask is not None: 402 | attention_mask = (1 - attention_mask.to(sample.dtype)) * -10000.0 403 | attention_mask = attention_mask.unsqueeze(1) 404 | 405 | if encoder_attention_mask is not None: 406 | encoder_attention_mask = (1 - encoder_attention_mask.to(sample.dtype)) * -10000.0 407 | encoder_attention_mask = encoder_attention_mask.unsqueeze(1) 408 | 409 | # 0. center input if necessary 410 | if self.config.center_input_sample: 411 | sample = 2 * sample - 1.0 412 | 413 | down_attn_mask, mid_attn_mask, up_attn_mask = self._get_value(self.use_attention_mask_list, attention_mask, None) 414 | down_encoder_hidden_states, mid_encoder_hidden_states, up_encoder_hidden_states = self._get_value( 415 | self.use_encoder_hidden_states_list, encoder_hidden_states, encoder_hidden_states_2 416 | ) 417 | 418 | # 1. time 419 | t_emb, op_emb, aug_emb = None, None, None 420 | 421 | if timestep is not None: 422 | timesteps = timestep 423 | timesteps = timesteps.expand(sample.shape[0]) 424 | t_emb = self.time_proj(timesteps) 425 | t_emb = t_emb.to(dtype=sample.dtype) 426 | 427 | t_emb = self.time_embedding(t_emb, timestep_cond) 428 | 429 | # opacity 430 | if trans is not None: 431 | trans = trans.expand(sample.shape[0]) 432 | op_emb = self.time_proj(trans) 433 | op_emb = op_emb.to(dtype=sample.dtype) 434 | 435 | op_emb = self.time_embedding(op_emb, timestep_cond) 436 | 437 | if t_emb is not None and op_emb is not None: 438 | emb = t_emb + op_emb 439 | elif op_emb is not None: 440 | emb = op_emb 441 | elif t_emb is not None: 442 | emb = t_emb 443 | else: 444 | raise ValueError("Missing required field: 'timestep' and 'trans'. Please ensure it is included in your input.") 445 | 446 | if "point_coords" in added_cond_kwargs: 447 | coords_embeds = added_cond_kwargs.get("point_coords") 448 | coords_embeds = coords_embeds.reshape((sample.shape[0], -1)) 449 | coords_embeds = coords_embeds.to(emb.dtype) 450 | aug_emb = self.point_embedding(coords_embeds) 451 | elif "bbox_mask_coords" in added_cond_kwargs: 452 | coords = added_cond_kwargs.get("bbox_mask_coords") 453 | coords_embeds = self.bbox_time_proj(coords.flatten()) 454 | coords_embeds = coords_embeds.reshape((sample.shape[0], -1)) 455 | coords_embeds = coords_embeds.to(emb.dtype) 456 | aug_emb = self.bbox_embedding(coords_embeds) 457 | else: 458 | raise ValueError(f"{self.__class__} cannot find point_coords or bbox_coords in added_cond_kwargs.") 459 | 460 | emb = emb + aug_emb if aug_emb is not None else emb 461 | 462 | # 2. pre-process 463 | sample = self.conv_in(sample) 464 | 465 | # distillation 466 | self.feature_map = [] 467 | 468 | # 3. down 469 | lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0 470 | if USE_PEFT_BACKEND: 471 | scale_lora_layers(self, lora_scale) 472 | 473 | down_block_res_samples = (sample,) 474 | for downsample_block in self.down_blocks: 475 | if hasattr(downsample_block, "has_cross_attention") and downsample_block.has_cross_attention: 476 | additional_residuals = {} 477 | sample, res_samples = downsample_block( 478 | hidden_states=sample, 479 | temb=emb, 480 | encoder_hidden_states=down_encoder_hidden_states, 481 | attention_mask=down_attn_mask, 482 | cross_attention_kwargs=cross_attention_kwargs, 483 | encoder_attention_mask=encoder_attention_mask, 484 | **additional_residuals, 485 | ) 486 | else: 487 | sample, res_samples = downsample_block(hidden_states=sample, temb=emb, scale=lora_scale) 488 | 489 | down_block_res_samples += res_samples 490 | 491 | self.feature_map.append(sample) 492 | 493 | # 4. mid 494 | if self.mid_block is not None: 495 | if hasattr(self.mid_block, "has_cross_attention") and self.mid_block.has_cross_attention: 496 | sample = self.mid_block( 497 | sample, 498 | emb, 499 | encoder_hidden_states=mid_encoder_hidden_states, 500 | attention_mask=mid_attn_mask, 501 | cross_attention_kwargs=cross_attention_kwargs, 502 | encoder_attention_mask=encoder_attention_mask, 503 | ) 504 | else: 505 | sample = self.mid_block(sample, emb) 506 | 507 | self.feature_map.append(sample) 508 | 509 | # 5. up 510 | for i, upsample_block in enumerate(self.up_blocks): 511 | is_final_block = i == len(self.up_blocks) - 1 512 | 513 | res_samples = down_block_res_samples[-len(upsample_block.resnets) :] 514 | down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)] 515 | 516 | if not is_final_block and forward_upsample_size: 517 | upsample_size = down_block_res_samples[-1].shape[2:] 518 | 519 | if hasattr(upsample_block, "has_cross_attention") and upsample_block.has_cross_attention: 520 | sample = upsample_block( 521 | hidden_states=sample, 522 | temb=emb, 523 | res_hidden_states_tuple=res_samples, 524 | encoder_hidden_states=up_encoder_hidden_states, 525 | cross_attention_kwargs=cross_attention_kwargs, 526 | upsample_size=upsample_size, 527 | attention_mask=up_attn_mask, 528 | encoder_attention_mask=encoder_attention_mask, 529 | ) 530 | else: 531 | sample = upsample_block( 532 | hidden_states=sample, 533 | temb=emb, 534 | res_hidden_states_tuple=res_samples, 535 | upsample_size=upsample_size, 536 | scale=lora_scale, 537 | ) 538 | 539 | self.feature_map.append(sample) 540 | 541 | # 6. post-process 542 | if self.conv_norm_out: 543 | sample = self.conv_norm_out(sample) 544 | sample = self.conv_act(sample) 545 | sample = self.conv_out(sample) 546 | 547 | if USE_PEFT_BACKEND: 548 | unscale_lora_layers(self, lora_scale) 549 | 550 | return UNet2DConditionOutput(sample=sample) 551 | -------------------------------------------------------------------------------- /example_workflow/超强抠图遮罩细化工作流.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_link_id":62, 3 | "nodes":[ 4 | { 5 | "mode":0, 6 | "outputs":[ 7 | { 8 | "name":"IMAGE", 9 | "links":[ 10 | 20, 11 | 57 12 | ], 13 | "label":"图像", 14 | "type":"IMAGE", 15 | "localized_name":"图像" 16 | }, 17 | { 18 | "name":"MASK", 19 | "label":"遮罩", 20 | "type":"MASK", 21 | "localized_name":"遮罩" 22 | } 23 | ], 24 | "size":[ 25 | 270, 26 | 314 27 | ], 28 | "pos":[ 29 | 6107.099609375, 30 | 2105.3642578125 31 | ], 32 | "widgets_values":[ 33 | "a967fc290d991c68f71a05bf6e55732cf5382689af321eb265ff7728f625300b.png", 34 | "image" 35 | ], 36 | "inputs":[ 37 | { 38 | "widget":{ 39 | "name":"image" 40 | }, 41 | "name":"image", 42 | "type":"COMBO", 43 | "localized_name":"图像" 44 | }, 45 | { 46 | "widget":{ 47 | "name":"upload" 48 | }, 49 | "name":"upload", 50 | "type":"IMAGEUPLOAD", 51 | "localized_name":"选择文件上传" 52 | } 53 | ], 54 | "flags":{ 55 | 56 | }, 57 | "id":18, 58 | "type":"LoadImage", 59 | "properties":{ 60 | "hasSecondTab":false, 61 | "cnr_id":"comfy-core", 62 | "ver":"0.3.49", 63 | "ue_properties":{ 64 | "widget_ue_connectable":{ 65 | "image":true, 66 | "upload":true 67 | } 68 | }, 69 | "widget_ue_connectable":{ 70 | 71 | }, 72 | "secondTabText":"Send Back", 73 | "enableTabs":false, 74 | "secondTabOffset":80, 75 | "Node name for S&R":"LoadImage", 76 | "tabWidth":65, 77 | "secondTabWidth":65, 78 | "tabXOffset":10 79 | }, 80 | "order":0 81 | }, 82 | { 83 | "mode":0, 84 | "outputs":[ 85 | { 86 | "name":"IMAGE", 87 | "links":[ 88 | 19, 89 | 60 90 | ], 91 | "label":"图像", 92 | "type":"IMAGE", 93 | "localized_name":"图像" 94 | }, 95 | { 96 | "name":"MASK", 97 | "label":"遮罩", 98 | "type":"MASK", 99 | "localized_name":"遮罩" 100 | } 101 | ], 102 | "size":[ 103 | 270, 104 | 314 105 | ], 106 | "pos":[ 107 | 5850, 108 | 3450 109 | ], 110 | "widgets_values":[ 111 | "pasted/4550ed051bb190f119094d7e4406c04b84347b55f94d07fd379165760a57b9c8.png", 112 | "image" 113 | ], 114 | "inputs":[ 115 | { 116 | "widget":{ 117 | "name":"image" 118 | }, 119 | "name":"image", 120 | "type":"COMBO", 121 | "localized_name":"图像" 122 | }, 123 | { 124 | "widget":{ 125 | "name":"upload" 126 | }, 127 | "name":"upload", 128 | "type":"IMAGEUPLOAD", 129 | "localized_name":"选择文件上传" 130 | } 131 | ], 132 | "flags":{ 133 | 134 | }, 135 | "id":19, 136 | "type":"LoadImage", 137 | "properties":{ 138 | "hasSecondTab":false, 139 | "cnr_id":"comfy-core", 140 | "ver":"0.3.49", 141 | "ue_properties":{ 142 | "widget_ue_connectable":{ 143 | "image":true, 144 | "upload":true 145 | } 146 | }, 147 | "widget_ue_connectable":{ 148 | 149 | }, 150 | "secondTabText":"Send Back", 151 | "enableTabs":false, 152 | "secondTabOffset":80, 153 | "Node name for S&R":"LoadImage", 154 | "tabWidth":65, 155 | "secondTabWidth":65, 156 | "tabXOffset":10 157 | }, 158 | "order":1 159 | }, 160 | { 161 | "mode":0, 162 | "outputs":[ 163 | { 164 | "name":"IMAGE", 165 | "links":[ 166 | 21, 167 | 54 168 | ], 169 | "label":"图像", 170 | "type":"IMAGE", 171 | "localized_name":"图像" 172 | }, 173 | { 174 | "name":"MASK", 175 | "label":"遮罩", 176 | "type":"MASK", 177 | "localized_name":"遮罩" 178 | } 179 | ], 180 | "size":[ 181 | 270, 182 | 314.0000305175781 183 | ], 184 | "pos":[ 185 | 6018.3759765625, 186 | 1220.397705078125 187 | ], 188 | "widgets_values":[ 189 | "49d7f1b44e2a78a5239a6b9ba4e0cdc3425ca973cacb512567680053b8e38949.jpg", 190 | "image" 191 | ], 192 | "inputs":[ 193 | { 194 | "widget":{ 195 | "name":"image" 196 | }, 197 | "name":"image", 198 | "type":"COMBO", 199 | "localized_name":"图像" 200 | }, 201 | { 202 | "widget":{ 203 | "name":"upload" 204 | }, 205 | "name":"upload", 206 | "type":"IMAGEUPLOAD", 207 | "localized_name":"选择文件上传" 208 | } 209 | ], 210 | "flags":{ 211 | 212 | }, 213 | "id":17, 214 | "type":"LoadImage", 215 | "properties":{ 216 | "hasSecondTab":false, 217 | "cnr_id":"comfy-core", 218 | "ver":"0.3.49", 219 | "ue_properties":{ 220 | "widget_ue_connectable":{ 221 | "image":true, 222 | "upload":true 223 | } 224 | }, 225 | "widget_ue_connectable":{ 226 | 227 | }, 228 | "secondTabText":"Send Back", 229 | "enableTabs":false, 230 | "secondTabOffset":80, 231 | "Node name for S&R":"LoadImage", 232 | "tabWidth":65, 233 | "secondTabWidth":65, 234 | "tabXOffset":10 235 | }, 236 | "order":2 237 | }, 238 | { 239 | "mode":0, 240 | "outputs":[ 241 | 242 | ], 243 | "size":[ 244 | 319.2306213378906, 245 | 353.56097412109375 246 | ], 247 | "pos":[ 248 | 7707.099609375, 249 | 2125.3642578125 250 | ], 251 | "widgets_values":[ 252 | 253 | ], 254 | "inputs":[ 255 | { 256 | "name":"mask", 257 | "link":32, 258 | "label":"遮罩", 259 | "type":"MASK", 260 | "localized_name":"mask" 261 | } 262 | ], 263 | "flags":{ 264 | 265 | }, 266 | "id":30, 267 | "type":"MaskPreview+", 268 | "properties":{ 269 | "hasSecondTab":false, 270 | "cnr_id":"comfyui_essentials", 271 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9", 272 | "ue_properties":{ 273 | "widget_ue_connectable":{ 274 | 275 | } 276 | }, 277 | "widget_ue_connectable":{ 278 | 279 | }, 280 | "secondTabText":"Send Back", 281 | "enableTabs":false, 282 | "secondTabOffset":80, 283 | "Node name for S&R":"MaskPreview+", 284 | "tabWidth":65, 285 | "secondTabWidth":65, 286 | "tabXOffset":10 287 | }, 288 | "order":12 289 | }, 290 | { 291 | "mode":0, 292 | "outputs":[ 293 | 294 | ], 295 | "size":[ 296 | 405.8030700683594, 297 | 572.3388061523438 298 | ], 299 | "pos":[ 300 | 7250, 301 | -320 302 | ], 303 | "widgets_values":[ 304 | 305 | ], 306 | "inputs":[ 307 | { 308 | "name":"mask", 309 | "link":53, 310 | "label":"遮罩", 311 | "type":"MASK", 312 | "localized_name":"mask" 313 | } 314 | ], 315 | "flags":{ 316 | 317 | }, 318 | "id":16, 319 | "type":"MaskPreview+", 320 | "properties":{ 321 | "hasSecondTab":false, 322 | "cnr_id":"comfyui_essentials", 323 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9", 324 | "ue_properties":{ 325 | "widget_ue_connectable":{ 326 | 327 | } 328 | }, 329 | "widget_ue_connectable":{ 330 | 331 | }, 332 | "secondTabText":"Send Back", 333 | "enableTabs":false, 334 | "secondTabOffset":80, 335 | "Node name for S&R":"MaskPreview+", 336 | "tabWidth":65, 337 | "secondTabWidth":65, 338 | "tabXOffset":10 339 | }, 340 | "order":24 341 | }, 342 | { 343 | "mode":0, 344 | "outputs":[ 345 | 346 | ], 347 | "size":[ 348 | 393.25921630859375, 349 | 565.4967041015625 350 | ], 351 | "pos":[ 352 | 7685.49365234375, 353 | -311.3333435058594 354 | ], 355 | "widgets_values":[ 356 | 357 | ], 358 | "inputs":[ 359 | { 360 | "name":"mask", 361 | "link":8, 362 | "label":"遮罩", 363 | "type":"MASK", 364 | "localized_name":"mask" 365 | } 366 | ], 367 | "flags":{ 368 | 369 | }, 370 | "id":8, 371 | "type":"MaskPreview+", 372 | "properties":{ 373 | "hasSecondTab":false, 374 | "cnr_id":"comfyui_essentials", 375 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9", 376 | "ue_properties":{ 377 | "widget_ue_connectable":{ 378 | 379 | } 380 | }, 381 | "widget_ue_connectable":{ 382 | 383 | }, 384 | "secondTabText":"Send Back", 385 | "enableTabs":false, 386 | "secondTabOffset":80, 387 | "Node name for S&R":"MaskPreview+", 388 | "tabWidth":65, 389 | "secondTabWidth":65, 390 | "tabXOffset":10 391 | }, 392 | "order":19 393 | }, 394 | { 395 | "mode":0, 396 | "outputs":[ 397 | 398 | ], 399 | "size":[ 400 | 294.7272033691406, 401 | 313.9785461425781 402 | ], 403 | "pos":[ 404 | 7347.099609375, 405 | 2175.364501953125 406 | ], 407 | "widgets_values":[ 408 | 409 | ], 410 | "inputs":[ 411 | { 412 | "name":"mask", 413 | "link":59, 414 | "label":"遮罩", 415 | "type":"MASK", 416 | "localized_name":"mask" 417 | } 418 | ], 419 | "flags":{ 420 | 421 | }, 422 | "id":29, 423 | "type":"MaskPreview+", 424 | "properties":{ 425 | "hasSecondTab":false, 426 | "cnr_id":"comfyui_essentials", 427 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9", 428 | "ue_properties":{ 429 | "widget_ue_connectable":{ 430 | 431 | } 432 | }, 433 | "widget_ue_connectable":{ 434 | 435 | }, 436 | "secondTabText":"Send Back", 437 | "enableTabs":false, 438 | "secondTabOffset":80, 439 | "Node name for S&R":"MaskPreview+", 440 | "tabWidth":65, 441 | "secondTabWidth":65, 442 | "tabXOffset":10 443 | }, 444 | "order":21 445 | }, 446 | { 447 | "mode":0, 448 | "outputs":[ 449 | 450 | ], 451 | "size":[ 452 | 438.7269592285156, 453 | 610.3634033203125 454 | ], 455 | "pos":[ 456 | 7262.8671875, 457 | 985.3197631835938 458 | ], 459 | "widgets_values":[ 460 | 461 | ], 462 | "inputs":[ 463 | { 464 | "name":"mask", 465 | "link":56, 466 | "label":"遮罩", 467 | "type":"MASK", 468 | "localized_name":"mask" 469 | } 470 | ], 471 | "flags":{ 472 | 473 | }, 474 | "id":27, 475 | "type":"MaskPreview+", 476 | "properties":{ 477 | "hasSecondTab":false, 478 | "cnr_id":"comfyui_essentials", 479 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9", 480 | "ue_properties":{ 481 | "widget_ue_connectable":{ 482 | 483 | } 484 | }, 485 | "widget_ue_connectable":{ 486 | 487 | }, 488 | "secondTabText":"Send Back", 489 | "enableTabs":false, 490 | "secondTabOffset":80, 491 | "Node name for S&R":"MaskPreview+", 492 | "tabWidth":65, 493 | "secondTabWidth":65, 494 | "tabXOffset":10 495 | }, 496 | "order":23 497 | }, 498 | { 499 | "mode":0, 500 | "outputs":[ 501 | 502 | ], 503 | "size":[ 504 | 446.0731506347656, 505 | 603.0172729492188 506 | ], 507 | "pos":[ 508 | 7734.06884765625, 509 | 990.8294067382812 510 | ], 511 | "widgets_values":[ 512 | 513 | ], 514 | "inputs":[ 515 | { 516 | "name":"mask", 517 | "link":37, 518 | "label":"遮罩", 519 | "type":"MASK", 520 | "localized_name":"mask" 521 | } 522 | ], 523 | "flags":{ 524 | 525 | }, 526 | "id":28, 527 | "type":"MaskPreview+", 528 | "properties":{ 529 | "hasSecondTab":false, 530 | "cnr_id":"comfyui_essentials", 531 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9", 532 | "ue_properties":{ 533 | "widget_ue_connectable":{ 534 | 535 | } 536 | }, 537 | "widget_ue_connectable":{ 538 | 539 | }, 540 | "secondTabText":"Send Back", 541 | "enableTabs":false, 542 | "secondTabOffset":80, 543 | "Node name for S&R":"MaskPreview+", 544 | "tabWidth":65, 545 | "secondTabWidth":65, 546 | "tabXOffset":10 547 | }, 548 | "order":17 549 | }, 550 | { 551 | "mode":0, 552 | "outputs":[ 553 | { 554 | "name":"image", 555 | "label":"图像", 556 | "type":"IMAGE", 557 | "localized_name":"image" 558 | }, 559 | { 560 | "name":"mask", 561 | "links":[ 562 | 32, 563 | 58 564 | ], 565 | "label":"遮罩", 566 | "type":"MASK", 567 | "localized_name":"mask" 568 | } 569 | ], 570 | "size":[ 571 | 390.8785095214844, 572 | 366 573 | ], 574 | "color":"rgba(27, 80, 119, 0.7)", 575 | "pos":[ 576 | 6547.099609375, 577 | 2095.3642578125 578 | ], 579 | "widgets_values":[ 580 | "sam_vit_h (2.56GB)", 581 | "GroundingDINO_SwinT_OGC (694MB)", 582 | 0.3, 583 | "VITMatte", 584 | 6, 585 | 6, 586 | 0.15, 587 | 0.99, 588 | true, 589 | "subject", 590 | "cuda", 591 | 2, 592 | false 593 | ], 594 | "inputs":[ 595 | { 596 | "name":"image", 597 | "link":20, 598 | "label":"图像", 599 | "type":"IMAGE", 600 | "localized_name":"image" 601 | }, 602 | { 603 | "widget":{ 604 | "name":"sam_model" 605 | }, 606 | "name":"sam_model", 607 | "type":"COMBO", 608 | "localized_name":"SAM模型" 609 | }, 610 | { 611 | "widget":{ 612 | "name":"grounding_dino_model" 613 | }, 614 | "name":"grounding_dino_model", 615 | "type":"COMBO", 616 | "localized_name":"GroundingDINO模型" 617 | }, 618 | { 619 | "widget":{ 620 | "name":"threshold" 621 | }, 622 | "name":"threshold", 623 | "type":"FLOAT", 624 | "localized_name":"阈值" 625 | }, 626 | { 627 | "widget":{ 628 | "name":"detail_method" 629 | }, 630 | "name":"detail_method", 631 | "type":"COMBO", 632 | "localized_name":"细节处理方法" 633 | }, 634 | { 635 | "widget":{ 636 | "name":"detail_erode" 637 | }, 638 | "name":"detail_erode", 639 | "type":"INT", 640 | "localized_name":"细节消融" 641 | }, 642 | { 643 | "widget":{ 644 | "name":"detail_dilate" 645 | }, 646 | "name":"detail_dilate", 647 | "type":"INT", 648 | "localized_name":"细节膨胀" 649 | }, 650 | { 651 | "widget":{ 652 | "name":"black_point" 653 | }, 654 | "name":"black_point", 655 | "type":"FLOAT", 656 | "localized_name":"黑色阈值" 657 | }, 658 | { 659 | "widget":{ 660 | "name":"white_point" 661 | }, 662 | "name":"white_point", 663 | "type":"FLOAT", 664 | "localized_name":"白色阈值" 665 | }, 666 | { 667 | "widget":{ 668 | "name":"process_detail" 669 | }, 670 | "name":"process_detail", 671 | "type":"BOOLEAN", 672 | "localized_name":"处理细节" 673 | }, 674 | { 675 | "widget":{ 676 | "name":"prompt" 677 | }, 678 | "name":"prompt", 679 | "type":"STRING", 680 | "localized_name":"提示词" 681 | }, 682 | { 683 | "widget":{ 684 | "name":"device" 685 | }, 686 | "name":"device", 687 | "type":"COMBO", 688 | "localized_name":"设备" 689 | }, 690 | { 691 | "widget":{ 692 | "name":"max_megapixels" 693 | }, 694 | "name":"max_megapixels", 695 | "type":"FLOAT", 696 | "localized_name":"Vitmatte最大尺寸" 697 | }, 698 | { 699 | "widget":{ 700 | "name":"cache_model" 701 | }, 702 | "name":"cache_model", 703 | "type":"BOOLEAN", 704 | "localized_name":"cache_model" 705 | } 706 | ], 707 | "flags":{ 708 | 709 | }, 710 | "id":24, 711 | "type":"LayerMask: SegmentAnythingUltra V2", 712 | "properties":{ 713 | "hasSecondTab":false, 714 | "cnr_id":"ComfyUI_LayerStyle_Advance", 715 | "ver":"5f8c6f29c484100245bc6f8ea66277f53e23cdbe", 716 | "ue_properties":{ 717 | "widget_ue_connectable":{ 718 | "detail_dilate":true, 719 | "detail_erode":true, 720 | "threshold":true, 721 | "max_megapixels":true, 722 | "sam_model":true, 723 | "detail_method":true, 724 | "black_point":true, 725 | "process_detail":true, 726 | "grounding_dino_model":true, 727 | "white_point":true, 728 | "cache_model":true, 729 | "prompt":true, 730 | "device":true 731 | } 732 | }, 733 | "widget_ue_connectable":{ 734 | 735 | }, 736 | "secondTabText":"Send Back", 737 | "enableTabs":false, 738 | "secondTabOffset":80, 739 | "Node name for S&R":"LayerMask: SegmentAnythingUltra V2", 740 | "tabWidth":65, 741 | "secondTabWidth":65, 742 | "tabXOffset":10 743 | }, 744 | "order":8 745 | }, 746 | { 747 | "mode":0, 748 | "outputs":[ 749 | 750 | ], 751 | "size":[ 752 | 210, 753 | 62 754 | ], 755 | "pos":[ 756 | 6276.8818359375, 757 | -569.6011962890625 758 | ], 759 | "widgets_values":[ 760 | "1", 761 | 1 762 | ], 763 | "inputs":[ 764 | 765 | ], 766 | "flags":{ 767 | 768 | }, 769 | "id":33, 770 | "type":"Bookmark (rgthree)", 771 | "properties":{ 772 | "widget_ue_connectable":{ 773 | 774 | } 775 | }, 776 | "order":3 777 | }, 778 | { 779 | "mode":0, 780 | "outputs":[ 781 | 782 | ], 783 | "size":[ 784 | 210, 785 | 62 786 | ], 787 | "pos":[ 788 | 5576.3740234375, 789 | 3681.332275390625 790 | ], 791 | "widgets_values":[ 792 | "2", 793 | 1 794 | ], 795 | "inputs":[ 796 | 797 | ], 798 | "flags":{ 799 | 800 | }, 801 | "id":36, 802 | "type":"Bookmark (rgthree)", 803 | "properties":{ 804 | "widget_ue_connectable":{ 805 | 806 | } 807 | }, 808 | "order":4 809 | }, 810 | { 811 | "mode":0, 812 | "outputs":[ 813 | { 814 | "name":"IMAGE", 815 | "links":[ 816 | 4, 817 | 51 818 | ], 819 | "label":"图像", 820 | "type":"IMAGE", 821 | "localized_name":"图像" 822 | }, 823 | { 824 | "name":"MASK", 825 | "label":"遮罩", 826 | "type":"MASK", 827 | "localized_name":"遮罩" 828 | } 829 | ], 830 | "size":[ 831 | 270, 832 | 314 833 | ], 834 | "pos":[ 835 | 6019.25244140625, 836 | -299.09930419921875 837 | ], 838 | "widgets_values":[ 839 | "18733ba03f229d3da22203ad98e1ecf6f302d36a3947b72b126b1019f1e370e7.png", 840 | "image" 841 | ], 842 | "inputs":[ 843 | { 844 | "widget":{ 845 | "name":"image" 846 | }, 847 | "name":"image", 848 | "type":"COMBO", 849 | "localized_name":"图像" 850 | }, 851 | { 852 | "widget":{ 853 | "name":"upload" 854 | }, 855 | "name":"upload", 856 | "type":"IMAGEUPLOAD", 857 | "localized_name":"选择文件上传" 858 | } 859 | ], 860 | "flags":{ 861 | 862 | }, 863 | "id":6, 864 | "type":"LoadImage", 865 | "properties":{ 866 | "hasSecondTab":false, 867 | "cnr_id":"comfy-core", 868 | "ver":"0.3.49", 869 | "ue_properties":{ 870 | "widget_ue_connectable":{ 871 | "image":true, 872 | "upload":true 873 | } 874 | }, 875 | "widget_ue_connectable":{ 876 | 877 | }, 878 | "secondTabText":"Send Back", 879 | "enableTabs":false, 880 | "secondTabOffset":80, 881 | "Node name for S&R":"LoadImage", 882 | "tabWidth":65, 883 | "secondTabWidth":65, 884 | "tabXOffset":10 885 | }, 886 | "order":5 887 | }, 888 | { 889 | "mode":0, 890 | "outputs":[ 891 | { 892 | "name":"image", 893 | "label":"图像", 894 | "type":"IMAGE", 895 | "localized_name":"image" 896 | }, 897 | { 898 | "name":"mask", 899 | "links":[ 900 | 8, 901 | 52 902 | ], 903 | "label":"遮罩", 904 | "type":"MASK", 905 | "localized_name":"mask" 906 | } 907 | ], 908 | "size":[ 909 | 390.8785095214844, 910 | 366 911 | ], 912 | "color":"rgba(27, 80, 119, 0.7)", 913 | "pos":[ 914 | 6421.720703125, 915 | -337.93768310546875 916 | ], 917 | "widgets_values":[ 918 | "sam_vit_h (2.56GB)", 919 | "GroundingDINO_SwinT_OGC (694MB)", 920 | 0.3, 921 | "VITMatte", 922 | 6, 923 | 6, 924 | 0.15, 925 | 0.99, 926 | true, 927 | "subject", 928 | "cuda", 929 | 2, 930 | false 931 | ], 932 | "inputs":[ 933 | { 934 | "name":"image", 935 | "link":4, 936 | "label":"图像", 937 | "type":"IMAGE", 938 | "localized_name":"image" 939 | }, 940 | { 941 | "widget":{ 942 | "name":"sam_model" 943 | }, 944 | "name":"sam_model", 945 | "type":"COMBO", 946 | "localized_name":"SAM模型" 947 | }, 948 | { 949 | "widget":{ 950 | "name":"grounding_dino_model" 951 | }, 952 | "name":"grounding_dino_model", 953 | "type":"COMBO", 954 | "localized_name":"GroundingDINO模型" 955 | }, 956 | { 957 | "widget":{ 958 | "name":"threshold" 959 | }, 960 | "name":"threshold", 961 | "type":"FLOAT", 962 | "localized_name":"阈值" 963 | }, 964 | { 965 | "widget":{ 966 | "name":"detail_method" 967 | }, 968 | "name":"detail_method", 969 | "type":"COMBO", 970 | "localized_name":"细节处理方法" 971 | }, 972 | { 973 | "widget":{ 974 | "name":"detail_erode" 975 | }, 976 | "name":"detail_erode", 977 | "type":"INT", 978 | "localized_name":"细节消融" 979 | }, 980 | { 981 | "widget":{ 982 | "name":"detail_dilate" 983 | }, 984 | "name":"detail_dilate", 985 | "type":"INT", 986 | "localized_name":"细节膨胀" 987 | }, 988 | { 989 | "widget":{ 990 | "name":"black_point" 991 | }, 992 | "name":"black_point", 993 | "type":"FLOAT", 994 | "localized_name":"黑色阈值" 995 | }, 996 | { 997 | "widget":{ 998 | "name":"white_point" 999 | }, 1000 | "name":"white_point", 1001 | "type":"FLOAT", 1002 | "localized_name":"白色阈值" 1003 | }, 1004 | { 1005 | "widget":{ 1006 | "name":"process_detail" 1007 | }, 1008 | "name":"process_detail", 1009 | "type":"BOOLEAN", 1010 | "localized_name":"处理细节" 1011 | }, 1012 | { 1013 | "widget":{ 1014 | "name":"prompt" 1015 | }, 1016 | "name":"prompt", 1017 | "type":"STRING", 1018 | "localized_name":"提示词" 1019 | }, 1020 | { 1021 | "widget":{ 1022 | "name":"device" 1023 | }, 1024 | "name":"device", 1025 | "type":"COMBO", 1026 | "localized_name":"设备" 1027 | }, 1028 | { 1029 | "widget":{ 1030 | "name":"max_megapixels" 1031 | }, 1032 | "name":"max_megapixels", 1033 | "type":"FLOAT", 1034 | "localized_name":"Vitmatte最大尺寸" 1035 | }, 1036 | { 1037 | "widget":{ 1038 | "name":"cache_model" 1039 | }, 1040 | "name":"cache_model", 1041 | "type":"BOOLEAN", 1042 | "localized_name":"cache_model" 1043 | } 1044 | ], 1045 | "flags":{ 1046 | 1047 | }, 1048 | "id":5, 1049 | "type":"LayerMask: SegmentAnythingUltra V2", 1050 | "properties":{ 1051 | "hasSecondTab":false, 1052 | "cnr_id":"ComfyUI_LayerStyle_Advance", 1053 | "ver":"5f8c6f29c484100245bc6f8ea66277f53e23cdbe", 1054 | "ue_properties":{ 1055 | "widget_ue_connectable":{ 1056 | "detail_dilate":true, 1057 | "detail_erode":true, 1058 | "threshold":true, 1059 | "max_megapixels":true, 1060 | "sam_model":true, 1061 | "detail_method":true, 1062 | "black_point":true, 1063 | "process_detail":true, 1064 | "grounding_dino_model":true, 1065 | "white_point":true, 1066 | "cache_model":true, 1067 | "prompt":true, 1068 | "device":true 1069 | } 1070 | }, 1071 | "widget_ue_connectable":{ 1072 | 1073 | }, 1074 | "secondTabText":"Send Back", 1075 | "enableTabs":false, 1076 | "secondTabOffset":80, 1077 | "Node name for S&R":"LayerMask: SegmentAnythingUltra V2", 1078 | "tabWidth":65, 1079 | "secondTabWidth":65, 1080 | "tabXOffset":10 1081 | }, 1082 | "order":11 1083 | }, 1084 | { 1085 | "mode":0, 1086 | "outputs":[ 1087 | { 1088 | "name":"image", 1089 | "label":"图像", 1090 | "type":"IMAGE", 1091 | "localized_name":"image" 1092 | }, 1093 | { 1094 | "name":"mask", 1095 | "links":[ 1096 | 38, 1097 | 61 1098 | ], 1099 | "label":"遮罩", 1100 | "type":"MASK", 1101 | "localized_name":"mask" 1102 | } 1103 | ], 1104 | "size":[ 1105 | 390.8785095214844, 1106 | 366 1107 | ], 1108 | "color":"rgba(27, 80, 119, 0.7)", 1109 | "pos":[ 1110 | 6158.27734375, 1111 | 3462.401123046875 1112 | ], 1113 | "widgets_values":[ 1114 | "sam_vit_h (2.56GB)", 1115 | "GroundingDINO_SwinT_OGC (694MB)", 1116 | 0.3, 1117 | "VITMatte", 1118 | 6, 1119 | 6, 1120 | 0.15, 1121 | 0.99, 1122 | true, 1123 | "subject", 1124 | "cuda", 1125 | 2, 1126 | false 1127 | ], 1128 | "inputs":[ 1129 | { 1130 | "name":"image", 1131 | "link":19, 1132 | "label":"图像", 1133 | "type":"IMAGE", 1134 | "localized_name":"image" 1135 | }, 1136 | { 1137 | "widget":{ 1138 | "name":"sam_model" 1139 | }, 1140 | "name":"sam_model", 1141 | "type":"COMBO", 1142 | "localized_name":"SAM模型" 1143 | }, 1144 | { 1145 | "widget":{ 1146 | "name":"grounding_dino_model" 1147 | }, 1148 | "name":"grounding_dino_model", 1149 | "type":"COMBO", 1150 | "localized_name":"GroundingDINO模型" 1151 | }, 1152 | { 1153 | "widget":{ 1154 | "name":"threshold" 1155 | }, 1156 | "name":"threshold", 1157 | "type":"FLOAT", 1158 | "localized_name":"阈值" 1159 | }, 1160 | { 1161 | "widget":{ 1162 | "name":"detail_method" 1163 | }, 1164 | "name":"detail_method", 1165 | "type":"COMBO", 1166 | "localized_name":"细节处理方法" 1167 | }, 1168 | { 1169 | "widget":{ 1170 | "name":"detail_erode" 1171 | }, 1172 | "name":"detail_erode", 1173 | "type":"INT", 1174 | "localized_name":"细节消融" 1175 | }, 1176 | { 1177 | "widget":{ 1178 | "name":"detail_dilate" 1179 | }, 1180 | "name":"detail_dilate", 1181 | "type":"INT", 1182 | "localized_name":"细节膨胀" 1183 | }, 1184 | { 1185 | "widget":{ 1186 | "name":"black_point" 1187 | }, 1188 | "name":"black_point", 1189 | "type":"FLOAT", 1190 | "localized_name":"黑色阈值" 1191 | }, 1192 | { 1193 | "widget":{ 1194 | "name":"white_point" 1195 | }, 1196 | "name":"white_point", 1197 | "type":"FLOAT", 1198 | "localized_name":"白色阈值" 1199 | }, 1200 | { 1201 | "widget":{ 1202 | "name":"process_detail" 1203 | }, 1204 | "name":"process_detail", 1205 | "type":"BOOLEAN", 1206 | "localized_name":"处理细节" 1207 | }, 1208 | { 1209 | "widget":{ 1210 | "name":"prompt" 1211 | }, 1212 | "name":"prompt", 1213 | "type":"STRING", 1214 | "localized_name":"提示词" 1215 | }, 1216 | { 1217 | "widget":{ 1218 | "name":"device" 1219 | }, 1220 | "name":"device", 1221 | "type":"COMBO", 1222 | "localized_name":"设备" 1223 | }, 1224 | { 1225 | "widget":{ 1226 | "name":"max_megapixels" 1227 | }, 1228 | "name":"max_megapixels", 1229 | "type":"FLOAT", 1230 | "localized_name":"Vitmatte最大尺寸" 1231 | }, 1232 | { 1233 | "widget":{ 1234 | "name":"cache_model" 1235 | }, 1236 | "name":"cache_model", 1237 | "type":"BOOLEAN", 1238 | "localized_name":"cache_model" 1239 | } 1240 | ], 1241 | "flags":{ 1242 | 1243 | }, 1244 | "id":25, 1245 | "type":"LayerMask: SegmentAnythingUltra V2", 1246 | "properties":{ 1247 | "hasSecondTab":false, 1248 | "cnr_id":"ComfyUI_LayerStyle_Advance", 1249 | "ver":"5f8c6f29c484100245bc6f8ea66277f53e23cdbe", 1250 | "ue_properties":{ 1251 | "widget_ue_connectable":{ 1252 | "detail_dilate":true, 1253 | "detail_erode":true, 1254 | "threshold":true, 1255 | "max_megapixels":true, 1256 | "sam_model":true, 1257 | "detail_method":true, 1258 | "black_point":true, 1259 | "process_detail":true, 1260 | "grounding_dino_model":true, 1261 | "white_point":true, 1262 | "cache_model":true, 1263 | "prompt":true, 1264 | "device":true 1265 | } 1266 | }, 1267 | "widget_ue_connectable":{ 1268 | 1269 | }, 1270 | "secondTabText":"Send Back", 1271 | "enableTabs":false, 1272 | "secondTabOffset":80, 1273 | "Node name for S&R":"LayerMask: SegmentAnythingUltra V2", 1274 | "tabWidth":65, 1275 | "secondTabWidth":65, 1276 | "tabXOffset":10 1277 | }, 1278 | "order":9 1279 | }, 1280 | { 1281 | "mode":0, 1282 | "outputs":[ 1283 | 1284 | ], 1285 | "size":[ 1286 | 526.8812255859375, 1287 | 432.2183532714844 1288 | ], 1289 | "pos":[ 1290 | 6902.7294921875, 1291 | 3433.54345703125 1292 | ], 1293 | "widgets_values":[ 1294 | 1295 | ], 1296 | "inputs":[ 1297 | { 1298 | "name":"mask", 1299 | "link":62, 1300 | "label":"遮罩", 1301 | "type":"MASK", 1302 | "localized_name":"mask" 1303 | } 1304 | ], 1305 | "flags":{ 1306 | 1307 | }, 1308 | "id":31, 1309 | "type":"MaskPreview+", 1310 | "properties":{ 1311 | "hasSecondTab":false, 1312 | "cnr_id":"comfyui_essentials", 1313 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9", 1314 | "ue_properties":{ 1315 | "widget_ue_connectable":{ 1316 | 1317 | } 1318 | }, 1319 | "widget_ue_connectable":{ 1320 | 1321 | }, 1322 | "secondTabText":"Send Back", 1323 | "enableTabs":false, 1324 | "secondTabOffset":80, 1325 | "Node name for S&R":"MaskPreview+", 1326 | "tabWidth":65, 1327 | "secondTabWidth":65, 1328 | "tabXOffset":10 1329 | }, 1330 | "order":22 1331 | }, 1332 | { 1333 | "mode":0, 1334 | "outputs":[ 1335 | 1336 | ], 1337 | "size":[ 1338 | 554.429443359375, 1339 | 435.8914489746094 1340 | ], 1341 | "pos":[ 1342 | 7463.20166015625, 1343 | 3439.820068359375 1344 | ], 1345 | "widgets_values":[ 1346 | 1347 | ], 1348 | "inputs":[ 1349 | { 1350 | "name":"mask", 1351 | "link":38, 1352 | "label":"遮罩", 1353 | "type":"MASK", 1354 | "localized_name":"mask" 1355 | } 1356 | ], 1357 | "flags":{ 1358 | 1359 | }, 1360 | "id":32, 1361 | "type":"MaskPreview+", 1362 | "properties":{ 1363 | "hasSecondTab":false, 1364 | "cnr_id":"comfyui_essentials", 1365 | "ver":"9d9f4bedfc9f0321c19faf71855e228c93bd0dc9", 1366 | "ue_properties":{ 1367 | "widget_ue_connectable":{ 1368 | 1369 | } 1370 | }, 1371 | "widget_ue_connectable":{ 1372 | 1373 | }, 1374 | "secondTabText":"Send Back", 1375 | "enableTabs":false, 1376 | "secondTabOffset":80, 1377 | "Node name for S&R":"MaskPreview+", 1378 | "tabWidth":65, 1379 | "secondTabWidth":65, 1380 | "tabXOffset":10 1381 | }, 1382 | "order":14 1383 | }, 1384 | { 1385 | "mode":0, 1386 | "outputs":[ 1387 | 1388 | ], 1389 | "size":[ 1390 | 210, 1391 | 62 1392 | ], 1393 | "pos":[ 1394 | 7009.439453125, 1395 | 2423.40869140625 1396 | ], 1397 | "widgets_values":[ 1398 | "3", 1399 | 1 1400 | ], 1401 | "inputs":[ 1402 | 1403 | ], 1404 | "flags":{ 1405 | 1406 | }, 1407 | "id":35, 1408 | "type":"Bookmark (rgthree)", 1409 | "properties":{ 1410 | "widget_ue_connectable":{ 1411 | 1412 | } 1413 | }, 1414 | "order":6 1415 | }, 1416 | { 1417 | "mode":0, 1418 | "outputs":[ 1419 | 1420 | ], 1421 | "size":[ 1422 | 210, 1423 | 62 1424 | ], 1425 | "pos":[ 1426 | 6160.31201171875, 1427 | 1049.2061767578125 1428 | ], 1429 | "widgets_values":[ 1430 | "4", 1431 | 1 1432 | ], 1433 | "inputs":[ 1434 | 1435 | ], 1436 | "flags":{ 1437 | 1438 | }, 1439 | "id":34, 1440 | "type":"Bookmark (rgthree)", 1441 | "properties":{ 1442 | "widget_ue_connectable":{ 1443 | 1444 | } 1445 | }, 1446 | "order":7 1447 | }, 1448 | { 1449 | "mode":0, 1450 | "outputs":[ 1451 | { 1452 | "name":"image", 1453 | "links":[ 1454 | 50 1455 | ], 1456 | "label":"图像", 1457 | "type":"IMAGE", 1458 | "localized_name":"image" 1459 | }, 1460 | { 1461 | "name":"mask", 1462 | "links":[ 1463 | 37, 1464 | 55 1465 | ], 1466 | "label":"遮罩", 1467 | "type":"MASK", 1468 | "localized_name":"mask" 1469 | } 1470 | ], 1471 | "size":[ 1472 | 390.8785095214844, 1473 | 366 1474 | ], 1475 | "color":"rgba(27, 80, 119, 0.7)", 1476 | "pos":[ 1477 | 6448.3759765625, 1478 | 1170.3975830078125 1479 | ], 1480 | "widgets_values":[ 1481 | "sam_vit_h (2.56GB)", 1482 | "GroundingDINO_SwinT_OGC (694MB)", 1483 | 0.3, 1484 | "VITMatte", 1485 | 6, 1486 | 6, 1487 | 0.15, 1488 | 0.99, 1489 | true, 1490 | "subject", 1491 | "cuda", 1492 | 2, 1493 | false 1494 | ], 1495 | "inputs":[ 1496 | { 1497 | "name":"image", 1498 | "link":21, 1499 | "label":"图像", 1500 | "type":"IMAGE", 1501 | "localized_name":"image" 1502 | }, 1503 | { 1504 | "widget":{ 1505 | "name":"sam_model" 1506 | }, 1507 | "name":"sam_model", 1508 | "type":"COMBO", 1509 | "localized_name":"SAM模型" 1510 | }, 1511 | { 1512 | "widget":{ 1513 | "name":"grounding_dino_model" 1514 | }, 1515 | "name":"grounding_dino_model", 1516 | "type":"COMBO", 1517 | "localized_name":"GroundingDINO模型" 1518 | }, 1519 | { 1520 | "widget":{ 1521 | "name":"threshold" 1522 | }, 1523 | "name":"threshold", 1524 | "type":"FLOAT", 1525 | "localized_name":"阈值" 1526 | }, 1527 | { 1528 | "widget":{ 1529 | "name":"detail_method" 1530 | }, 1531 | "name":"detail_method", 1532 | "type":"COMBO", 1533 | "localized_name":"细节处理方法" 1534 | }, 1535 | { 1536 | "widget":{ 1537 | "name":"detail_erode" 1538 | }, 1539 | "name":"detail_erode", 1540 | "type":"INT", 1541 | "localized_name":"细节消融" 1542 | }, 1543 | { 1544 | "widget":{ 1545 | "name":"detail_dilate" 1546 | }, 1547 | "name":"detail_dilate", 1548 | "type":"INT", 1549 | "localized_name":"细节膨胀" 1550 | }, 1551 | { 1552 | "widget":{ 1553 | "name":"black_point" 1554 | }, 1555 | "name":"black_point", 1556 | "type":"FLOAT", 1557 | "localized_name":"黑色阈值" 1558 | }, 1559 | { 1560 | "widget":{ 1561 | "name":"white_point" 1562 | }, 1563 | "name":"white_point", 1564 | "type":"FLOAT", 1565 | "localized_name":"白色阈值" 1566 | }, 1567 | { 1568 | "widget":{ 1569 | "name":"process_detail" 1570 | }, 1571 | "name":"process_detail", 1572 | "type":"BOOLEAN", 1573 | "localized_name":"处理细节" 1574 | }, 1575 | { 1576 | "widget":{ 1577 | "name":"prompt" 1578 | }, 1579 | "name":"prompt", 1580 | "type":"STRING", 1581 | "localized_name":"提示词" 1582 | }, 1583 | { 1584 | "widget":{ 1585 | "name":"device" 1586 | }, 1587 | "name":"device", 1588 | "type":"COMBO", 1589 | "localized_name":"设备" 1590 | }, 1591 | { 1592 | "widget":{ 1593 | "name":"max_megapixels" 1594 | }, 1595 | "name":"max_megapixels", 1596 | "type":"FLOAT", 1597 | "localized_name":"Vitmatte最大尺寸" 1598 | }, 1599 | { 1600 | "widget":{ 1601 | "name":"cache_model" 1602 | }, 1603 | "name":"cache_model", 1604 | "type":"BOOLEAN", 1605 | "localized_name":"cache_model" 1606 | } 1607 | ], 1608 | "flags":{ 1609 | 1610 | }, 1611 | "id":23, 1612 | "type":"LayerMask: SegmentAnythingUltra V2", 1613 | "properties":{ 1614 | "hasSecondTab":false, 1615 | "cnr_id":"ComfyUI_LayerStyle_Advance", 1616 | "ver":"5f8c6f29c484100245bc6f8ea66277f53e23cdbe", 1617 | "ue_properties":{ 1618 | "widget_ue_connectable":{ 1619 | "detail_dilate":true, 1620 | "detail_erode":true, 1621 | "threshold":true, 1622 | "max_megapixels":true, 1623 | "sam_model":true, 1624 | "detail_method":true, 1625 | "black_point":true, 1626 | "process_detail":true, 1627 | "grounding_dino_model":true, 1628 | "white_point":true, 1629 | "cache_model":true, 1630 | "prompt":true, 1631 | "device":true 1632 | } 1633 | }, 1634 | "widget_ue_connectable":{ 1635 | 1636 | }, 1637 | "secondTabText":"Send Back", 1638 | "enableTabs":false, 1639 | "secondTabOffset":80, 1640 | "Node name for S&R":"LayerMask: SegmentAnythingUltra V2", 1641 | "tabWidth":65, 1642 | "secondTabWidth":65, 1643 | "tabXOffset":10 1644 | }, 1645 | "order":10 1646 | }, 1647 | { 1648 | "mode":0, 1649 | "outputs":[ 1650 | { 1651 | "name":"alpha_mask", 1652 | "links":[ 1653 | 53 1654 | ], 1655 | "label":"alpha_mask", 1656 | "type":"MASK", 1657 | "localized_name":"alpha_mask" 1658 | }, 1659 | { 1660 | "name":"matted_image", 1661 | "label":"matted_image", 1662 | "type":"IMAGE", 1663 | "localized_name":"matted_image" 1664 | } 1665 | ], 1666 | "size":[ 1667 | 270, 1668 | 242 1669 | ], 1670 | "pos":[ 1671 | 6893.458984375, 1672 | -143.44204711914062 1673 | ], 1674 | "widgets_values":[ 1675 | "SDMatte_plus.safetensors", 1676 | 1024, 1677 | true, 1678 | "alpha_only", 1679 | true, 1680 | 0.8, 1681 | false 1682 | ], 1683 | "inputs":[ 1684 | { 1685 | "name":"image", 1686 | "link":51, 1687 | "label":"image", 1688 | "type":"IMAGE", 1689 | "localized_name":"image" 1690 | }, 1691 | { 1692 | "name":"trimap", 1693 | "link":52, 1694 | "label":"trimap", 1695 | "type":"MASK", 1696 | "localized_name":"trimap" 1697 | }, 1698 | { 1699 | "widget":{ 1700 | "name":"model_name" 1701 | }, 1702 | "name":"model_name", 1703 | "label":"model_name", 1704 | "type":"COMBO", 1705 | "localized_name":"model_name" 1706 | }, 1707 | { 1708 | "widget":{ 1709 | "name":"inference_size" 1710 | }, 1711 | "name":"inference_size", 1712 | "label":"inference_size", 1713 | "type":"COMBO", 1714 | "localized_name":"inference_size" 1715 | }, 1716 | { 1717 | "widget":{ 1718 | "name":"is_transparent" 1719 | }, 1720 | "name":"is_transparent", 1721 | "label":"is_transparent", 1722 | "type":"BOOLEAN", 1723 | "localized_name":"is_transparent" 1724 | }, 1725 | { 1726 | "widget":{ 1727 | "name":"output_mode" 1728 | }, 1729 | "name":"output_mode", 1730 | "label":"output_mode", 1731 | "type":"COMBO", 1732 | "localized_name":"output_mode" 1733 | }, 1734 | { 1735 | "widget":{ 1736 | "name":"mask_refine" 1737 | }, 1738 | "name":"mask_refine", 1739 | "label":"mask_refine", 1740 | "type":"BOOLEAN", 1741 | "localized_name":"mask_refine" 1742 | }, 1743 | { 1744 | "widget":{ 1745 | "name":"trimap_constraint" 1746 | }, 1747 | "name":"trimap_constraint", 1748 | "label":"trimap_constraint", 1749 | "type":"FLOAT", 1750 | "localized_name":"trimap_constraint" 1751 | }, 1752 | { 1753 | "widget":{ 1754 | "name":"force_cpu" 1755 | }, 1756 | "shape":7, 1757 | "name":"force_cpu", 1758 | "label":"force_cpu", 1759 | "type":"BOOLEAN", 1760 | "localized_name":"force_cpu" 1761 | } 1762 | ], 1763 | "flags":{ 1764 | 1765 | }, 1766 | "id":47, 1767 | "type":"SDMatteApply", 1768 | "properties":{ 1769 | "widget_ue_connectable":{ 1770 | 1771 | }, 1772 | "Node name for S&R":"SDMatteApply" 1773 | }, 1774 | "order":20 1775 | }, 1776 | { 1777 | "mode":0, 1778 | "outputs":[ 1779 | 1780 | ], 1781 | "size":[ 1782 | 270, 1783 | 58 1784 | ], 1785 | "pos":[ 1786 | 6919.6875, 1787 | 1521.2562255859375 1788 | ], 1789 | "widgets_values":[ 1790 | "ComfyUI" 1791 | ], 1792 | "inputs":[ 1793 | { 1794 | "name":"images", 1795 | "link":50, 1796 | "label":"图像", 1797 | "type":"IMAGE", 1798 | "localized_name":"图片" 1799 | }, 1800 | { 1801 | "widget":{ 1802 | "name":"filename_prefix" 1803 | }, 1804 | "name":"filename_prefix", 1805 | "type":"STRING", 1806 | "localized_name":"文件名前缀" 1807 | } 1808 | ], 1809 | "flags":{ 1810 | 1811 | }, 1812 | "id":46, 1813 | "type":"SaveImage", 1814 | "properties":{ 1815 | "widget_ue_connectable":{ 1816 | 1817 | }, 1818 | "Node name for S&R":"SaveImage" 1819 | }, 1820 | "order":16 1821 | }, 1822 | { 1823 | "mode":0, 1824 | "outputs":[ 1825 | { 1826 | "name":"alpha_mask", 1827 | "links":[ 1828 | 56 1829 | ], 1830 | "label":"alpha_mask", 1831 | "type":"MASK", 1832 | "localized_name":"alpha_mask" 1833 | }, 1834 | { 1835 | "name":"matted_image", 1836 | "label":"matted_image", 1837 | "type":"IMAGE", 1838 | "localized_name":"matted_image" 1839 | } 1840 | ], 1841 | "size":[ 1842 | 270, 1843 | 242 1844 | ], 1845 | "pos":[ 1846 | 6918.3759765625, 1847 | 1210.397705078125 1848 | ], 1849 | "widgets_values":[ 1850 | "SDMatte_plus.safetensors", 1851 | 1024, 1852 | false, 1853 | "alpha_only", 1854 | true, 1855 | 0.8, 1856 | false 1857 | ], 1858 | "inputs":[ 1859 | { 1860 | "name":"image", 1861 | "link":54, 1862 | "label":"image", 1863 | "type":"IMAGE", 1864 | "localized_name":"image" 1865 | }, 1866 | { 1867 | "name":"trimap", 1868 | "link":55, 1869 | "label":"trimap", 1870 | "type":"MASK", 1871 | "localized_name":"trimap" 1872 | }, 1873 | { 1874 | "widget":{ 1875 | "name":"model_name" 1876 | }, 1877 | "name":"model_name", 1878 | "label":"model_name", 1879 | "type":"COMBO", 1880 | "localized_name":"model_name" 1881 | }, 1882 | { 1883 | "widget":{ 1884 | "name":"inference_size" 1885 | }, 1886 | "name":"inference_size", 1887 | "label":"inference_size", 1888 | "type":"COMBO", 1889 | "localized_name":"inference_size" 1890 | }, 1891 | { 1892 | "widget":{ 1893 | "name":"is_transparent" 1894 | }, 1895 | "name":"is_transparent", 1896 | "label":"is_transparent", 1897 | "type":"BOOLEAN", 1898 | "localized_name":"is_transparent" 1899 | }, 1900 | { 1901 | "widget":{ 1902 | "name":"output_mode" 1903 | }, 1904 | "name":"output_mode", 1905 | "label":"output_mode", 1906 | "type":"COMBO", 1907 | "localized_name":"output_mode" 1908 | }, 1909 | { 1910 | "widget":{ 1911 | "name":"mask_refine" 1912 | }, 1913 | "name":"mask_refine", 1914 | "label":"mask_refine", 1915 | "type":"BOOLEAN", 1916 | "localized_name":"mask_refine" 1917 | }, 1918 | { 1919 | "widget":{ 1920 | "name":"trimap_constraint" 1921 | }, 1922 | "name":"trimap_constraint", 1923 | "label":"trimap_constraint", 1924 | "type":"FLOAT", 1925 | "localized_name":"trimap_constraint" 1926 | }, 1927 | { 1928 | "widget":{ 1929 | "name":"force_cpu" 1930 | }, 1931 | "shape":7, 1932 | "name":"force_cpu", 1933 | "label":"force_cpu", 1934 | "type":"BOOLEAN", 1935 | "localized_name":"force_cpu" 1936 | } 1937 | ], 1938 | "flags":{ 1939 | 1940 | }, 1941 | "id":48, 1942 | "type":"SDMatteApply", 1943 | "properties":{ 1944 | "widget_ue_connectable":{ 1945 | 1946 | }, 1947 | "Node name for S&R":"SDMatteApply" 1948 | }, 1949 | "order":18 1950 | }, 1951 | { 1952 | "mode":0, 1953 | "outputs":[ 1954 | { 1955 | "name":"alpha_mask", 1956 | "links":[ 1957 | 59 1958 | ], 1959 | "label":"alpha_mask", 1960 | "type":"MASK", 1961 | "localized_name":"alpha_mask" 1962 | }, 1963 | { 1964 | "name":"matted_image", 1965 | "label":"matted_image", 1966 | "type":"IMAGE", 1967 | "localized_name":"matted_image" 1968 | } 1969 | ], 1970 | "size":[ 1971 | 270, 1972 | 242 1973 | ], 1974 | "pos":[ 1975 | 7037.099609375, 1976 | 2225.364501953125 1977 | ], 1978 | "widgets_values":[ 1979 | "SDMatte_plus.safetensors", 1980 | 1024, 1981 | false, 1982 | "alpha_only", 1983 | true, 1984 | 0.8, 1985 | false 1986 | ], 1987 | "inputs":[ 1988 | { 1989 | "name":"image", 1990 | "link":57, 1991 | "label":"image", 1992 | "type":"IMAGE", 1993 | "localized_name":"image" 1994 | }, 1995 | { 1996 | "name":"trimap", 1997 | "link":58, 1998 | "label":"trimap", 1999 | "type":"MASK", 2000 | "localized_name":"trimap" 2001 | }, 2002 | { 2003 | "widget":{ 2004 | "name":"model_name" 2005 | }, 2006 | "name":"model_name", 2007 | "label":"model_name", 2008 | "type":"COMBO", 2009 | "localized_name":"model_name" 2010 | }, 2011 | { 2012 | "widget":{ 2013 | "name":"inference_size" 2014 | }, 2015 | "name":"inference_size", 2016 | "label":"inference_size", 2017 | "type":"COMBO", 2018 | "localized_name":"inference_size" 2019 | }, 2020 | { 2021 | "widget":{ 2022 | "name":"is_transparent" 2023 | }, 2024 | "name":"is_transparent", 2025 | "label":"is_transparent", 2026 | "type":"BOOLEAN", 2027 | "localized_name":"is_transparent" 2028 | }, 2029 | { 2030 | "widget":{ 2031 | "name":"output_mode" 2032 | }, 2033 | "name":"output_mode", 2034 | "label":"output_mode", 2035 | "type":"COMBO", 2036 | "localized_name":"output_mode" 2037 | }, 2038 | { 2039 | "widget":{ 2040 | "name":"mask_refine" 2041 | }, 2042 | "name":"mask_refine", 2043 | "label":"mask_refine", 2044 | "type":"BOOLEAN", 2045 | "localized_name":"mask_refine" 2046 | }, 2047 | { 2048 | "widget":{ 2049 | "name":"trimap_constraint" 2050 | }, 2051 | "name":"trimap_constraint", 2052 | "label":"trimap_constraint", 2053 | "type":"FLOAT", 2054 | "localized_name":"trimap_constraint" 2055 | }, 2056 | { 2057 | "widget":{ 2058 | "name":"force_cpu" 2059 | }, 2060 | "shape":7, 2061 | "name":"force_cpu", 2062 | "label":"force_cpu", 2063 | "type":"BOOLEAN", 2064 | "localized_name":"force_cpu" 2065 | } 2066 | ], 2067 | "flags":{ 2068 | 2069 | }, 2070 | "id":49, 2071 | "type":"SDMatteApply", 2072 | "properties":{ 2073 | "widget_ue_connectable":{ 2074 | 2075 | }, 2076 | "Node name for S&R":"SDMatteApply" 2077 | }, 2078 | "order":13 2079 | }, 2080 | { 2081 | "mode":0, 2082 | "outputs":[ 2083 | { 2084 | "name":"alpha_mask", 2085 | "links":[ 2086 | 62 2087 | ], 2088 | "label":"alpha_mask", 2089 | "type":"MASK", 2090 | "localized_name":"alpha_mask" 2091 | }, 2092 | { 2093 | "name":"matted_image", 2094 | "label":"matted_image", 2095 | "type":"IMAGE", 2096 | "localized_name":"matted_image" 2097 | } 2098 | ], 2099 | "size":[ 2100 | 270, 2101 | 242 2102 | ], 2103 | "pos":[ 2104 | 6589.41748046875, 2105 | 3463.54296875 2106 | ], 2107 | "widgets_values":[ 2108 | "SDMatte_plus.safetensors", 2109 | 1024, 2110 | false, 2111 | "alpha_only", 2112 | true, 2113 | 0.8, 2114 | false 2115 | ], 2116 | "inputs":[ 2117 | { 2118 | "name":"image", 2119 | "link":60, 2120 | "label":"image", 2121 | "type":"IMAGE", 2122 | "localized_name":"image" 2123 | }, 2124 | { 2125 | "name":"trimap", 2126 | "link":61, 2127 | "label":"trimap", 2128 | "type":"MASK", 2129 | "localized_name":"trimap" 2130 | }, 2131 | { 2132 | "widget":{ 2133 | "name":"model_name" 2134 | }, 2135 | "name":"model_name", 2136 | "label":"model_name", 2137 | "type":"COMBO", 2138 | "localized_name":"model_name" 2139 | }, 2140 | { 2141 | "widget":{ 2142 | "name":"inference_size" 2143 | }, 2144 | "name":"inference_size", 2145 | "label":"inference_size", 2146 | "type":"COMBO", 2147 | "localized_name":"inference_size" 2148 | }, 2149 | { 2150 | "widget":{ 2151 | "name":"is_transparent" 2152 | }, 2153 | "name":"is_transparent", 2154 | "label":"is_transparent", 2155 | "type":"BOOLEAN", 2156 | "localized_name":"is_transparent" 2157 | }, 2158 | { 2159 | "widget":{ 2160 | "name":"output_mode" 2161 | }, 2162 | "name":"output_mode", 2163 | "label":"output_mode", 2164 | "type":"COMBO", 2165 | "localized_name":"output_mode" 2166 | }, 2167 | { 2168 | "widget":{ 2169 | "name":"mask_refine" 2170 | }, 2171 | "name":"mask_refine", 2172 | "label":"mask_refine", 2173 | "type":"BOOLEAN", 2174 | "localized_name":"mask_refine" 2175 | }, 2176 | { 2177 | "widget":{ 2178 | "name":"trimap_constraint" 2179 | }, 2180 | "name":"trimap_constraint", 2181 | "label":"trimap_constraint", 2182 | "type":"FLOAT", 2183 | "localized_name":"trimap_constraint" 2184 | }, 2185 | { 2186 | "widget":{ 2187 | "name":"force_cpu" 2188 | }, 2189 | "shape":7, 2190 | "name":"force_cpu", 2191 | "label":"force_cpu", 2192 | "type":"BOOLEAN", 2193 | "localized_name":"force_cpu" 2194 | } 2195 | ], 2196 | "flags":{ 2197 | 2198 | }, 2199 | "id":50, 2200 | "type":"SDMatteApply", 2201 | "properties":{ 2202 | "widget_ue_connectable":{ 2203 | 2204 | }, 2205 | "Node name for S&R":"SDMatteApply" 2206 | }, 2207 | "order":15 2208 | } 2209 | ], 2210 | "extra":{ 2211 | "links_added_by_ue":[ 2212 | 2213 | ], 2214 | "VHS_KeepIntermediate":true, 2215 | "ue_links":[ 2216 | 2217 | ], 2218 | "VHS_MetadataImage":true, 2219 | "0246.VERSION":[ 2220 | 0, 2221 | 0, 2222 | 4 2223 | ], 2224 | "VHS_latentpreviewrate":0, 2225 | "frontendVersion":"1.23.4", 2226 | "VHS_latentpreview":false, 2227 | "ds":{ 2228 | "offset":[ 2229 | -5798.510608893245, 2230 | -3257.772095718232 2231 | ], 2232 | "scale":0.9646149645000017 2233 | } 2234 | }, 2235 | "groups":[ 2236 | { 2237 | "color":"#3f789e", 2238 | "font_size":24, 2239 | "flags":{ 2240 | 2241 | }, 2242 | "id":1, 2243 | "title":"Group", 2244 | "bounding":[ 2245 | 5840, 2246 | 3356.39990234375, 2247 | 2439.1904296875, 2248 | 547.0269775390625 2249 | ] 2250 | }, 2251 | { 2252 | "color":"#3f789e", 2253 | "font_size":24, 2254 | "flags":{ 2255 | 2256 | }, 2257 | "id":2, 2258 | "title":"Group", 2259 | "bounding":[ 2260 | 6097.099609375, 2261 | 2021.7645263671875, 2262 | 1939.23046875, 2263 | 477.5785217285156 2264 | ] 2265 | }, 2266 | { 2267 | "color":"#3f789e", 2268 | "font_size":24, 2269 | "flags":{ 2270 | 2271 | }, 2272 | "id":3, 2273 | "title":"Group", 2274 | "bounding":[ 2275 | 6008.3759765625, 2276 | 911.7197265625, 2277 | 2181.765625, 2278 | 693.96337890625 2279 | ] 2280 | }, 2281 | { 2282 | "color":"#3f789e", 2283 | "font_size":24, 2284 | "flags":{ 2285 | 2286 | }, 2287 | "id":4, 2288 | "title":"Group", 2289 | "bounding":[ 2290 | 5868.4970703125, 2291 | -411.5376892089844, 2292 | 2220.255859375, 2293 | 675.7010498046875 2294 | ] 2295 | } 2296 | ], 2297 | "links":[ 2298 | [ 2299 | 4, 2300 | 6, 2301 | 0, 2302 | 5, 2303 | 0, 2304 | "IMAGE" 2305 | ], 2306 | [ 2307 | 8, 2308 | 5, 2309 | 1, 2310 | 8, 2311 | 0, 2312 | "MASK" 2313 | ], 2314 | [ 2315 | 19, 2316 | 19, 2317 | 0, 2318 | 25, 2319 | 0, 2320 | "IMAGE" 2321 | ], 2322 | [ 2323 | 20, 2324 | 18, 2325 | 0, 2326 | 24, 2327 | 0, 2328 | "IMAGE" 2329 | ], 2330 | [ 2331 | 21, 2332 | 17, 2333 | 0, 2334 | 23, 2335 | 0, 2336 | "IMAGE" 2337 | ], 2338 | [ 2339 | 32, 2340 | 24, 2341 | 1, 2342 | 30, 2343 | 0, 2344 | "MASK" 2345 | ], 2346 | [ 2347 | 37, 2348 | 23, 2349 | 1, 2350 | 28, 2351 | 0, 2352 | "MASK" 2353 | ], 2354 | [ 2355 | 38, 2356 | 25, 2357 | 1, 2358 | 32, 2359 | 0, 2360 | "MASK" 2361 | ], 2362 | [ 2363 | 50, 2364 | 23, 2365 | 0, 2366 | 46, 2367 | 0, 2368 | "IMAGE" 2369 | ], 2370 | [ 2371 | 51, 2372 | 6, 2373 | 0, 2374 | 47, 2375 | 0, 2376 | "IMAGE" 2377 | ], 2378 | [ 2379 | 52, 2380 | 5, 2381 | 1, 2382 | 47, 2383 | 1, 2384 | "MASK" 2385 | ], 2386 | [ 2387 | 53, 2388 | 47, 2389 | 0, 2390 | 16, 2391 | 0, 2392 | "MASK" 2393 | ], 2394 | [ 2395 | 54, 2396 | 17, 2397 | 0, 2398 | 48, 2399 | 0, 2400 | "IMAGE" 2401 | ], 2402 | [ 2403 | 55, 2404 | 23, 2405 | 1, 2406 | 48, 2407 | 1, 2408 | "MASK" 2409 | ], 2410 | [ 2411 | 56, 2412 | 48, 2413 | 0, 2414 | 27, 2415 | 0, 2416 | "MASK" 2417 | ], 2418 | [ 2419 | 57, 2420 | 18, 2421 | 0, 2422 | 49, 2423 | 0, 2424 | "IMAGE" 2425 | ], 2426 | [ 2427 | 58, 2428 | 24, 2429 | 1, 2430 | 49, 2431 | 1, 2432 | "MASK" 2433 | ], 2434 | [ 2435 | 59, 2436 | 49, 2437 | 0, 2438 | 29, 2439 | 0, 2440 | "MASK" 2441 | ], 2442 | [ 2443 | 60, 2444 | 19, 2445 | 0, 2446 | 50, 2447 | 0, 2448 | "IMAGE" 2449 | ], 2450 | [ 2451 | 61, 2452 | 25, 2453 | 1, 2454 | 50, 2455 | 1, 2456 | "MASK" 2457 | ], 2458 | [ 2459 | 62, 2460 | 50, 2461 | 0, 2462 | 31, 2463 | 0, 2464 | "MASK" 2465 | ] 2466 | ], 2467 | "id":"500ae763-00a0-4a77-839d-fab74ac03ec3", 2468 | "config":{ 2469 | 2470 | }, 2471 | "version":0.4, 2472 | "last_node_id":50, 2473 | "revision":0 2474 | } --------------------------------------------------------------------------------