├── .gitattributes ├── .github └── workflows │ └── publish.yml ├── .gitignore ├── README.md ├── __init__.py ├── configs └── lotus_unet_config.json ├── empty_text_embed.pt ├── examples ├── lotus_depth_g_example_01.json └── lotus_normal_g_example_01.json ├── nodes.py ├── pyproject.toml └── requirements.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to Comfy registry 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | paths: 9 | - "pyproject.toml" 10 | 11 | jobs: 12 | publish-node: 13 | name: Publish Custom Node to registry 14 | runs-on: ubuntu-latest 15 | # if this is a forked repository. Skipping the workflow. 16 | if: github.event.repository.fork == false 17 | steps: 18 | - name: Check out code 19 | uses: actions/checkout@v4 20 | - name: Publish Custom Node 21 | uses: Comfy-Org/publish-node-action@main 22 | with: 23 | ## Add your own personal access token to your Github Repository secrets and reference it here. 24 | personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }} 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | 3 | training/ 4 | lightning_logs/ 5 | image_log/ 6 | 7 | *.pth 8 | *.ckpt 9 | *.safetensors 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | pip-wheel-metadata/ 34 | share/python-wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | MANIFEST 39 | 40 | # PyInstaller 41 | # Usually these files are written by a python script from a template 42 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 43 | *.manifest 44 | *.spec 45 | 46 | # Installer logs 47 | pip-log.txt 48 | pip-delete-this-directory.txt 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .nox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | *.py,cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | db.sqlite3-journal 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | .python-version 96 | 97 | # pipenv 98 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 99 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 100 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 101 | # install all needed dependencies. 102 | #Pipfile.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | 142 | *.safetensors 143 | *.ckpt 144 | 145 | checkpoints -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ComfyUI nodes to use Lotus depth/normal prediction 2 | 3 | ![image](https://github.com/user-attachments/assets/ff98210c-c828-4bdc-9514-1ba8332a983b) 4 | 5 | Models from: 6 | 7 | https://huggingface.co/Kijai/lotus-comfyui/tree/main 8 | 9 | to: 10 | 11 | `ComfyUI/models/diffusion_models` 12 | 13 | Original repo: https://github.com/EnVision-Research/Lotus 14 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS 2 | 3 | __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"] -------------------------------------------------------------------------------- /configs/lotus_unet_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_class_name": "UNet2DConditionModel", 3 | "_diffusers_version": "0.28.0.dev0", 4 | "_name_or_path": "../Lotus-weights/lotus-depth-d-v1-1/unet", 5 | "act_fn": "silu", 6 | "addition_embed_type": null, 7 | "addition_embed_type_num_heads": 64, 8 | "addition_time_embed_dim": null, 9 | "attention_head_dim": [ 10 | 5, 11 | 10, 12 | 20, 13 | 20 14 | ], 15 | "attention_type": "default", 16 | "block_out_channels": [ 17 | 320, 18 | 640, 19 | 1280, 20 | 1280 21 | ], 22 | "center_input_sample": false, 23 | "class_embed_type": "projection", 24 | "class_embeddings_concat": false, 25 | "conv_in_kernel": 3, 26 | "conv_out_kernel": 3, 27 | "cross_attention_dim": 1024, 28 | "cross_attention_norm": null, 29 | "down_block_types": [ 30 | "CrossAttnDownBlock2D", 31 | "CrossAttnDownBlock2D", 32 | "CrossAttnDownBlock2D", 33 | "DownBlock2D" 34 | ], 35 | "downsample_padding": 1, 36 | "dropout": 0.0, 37 | "dual_cross_attention": false, 38 | "encoder_hid_dim": null, 39 | "encoder_hid_dim_type": null, 40 | "flip_sin_to_cos": true, 41 | "freq_shift": 0, 42 | "in_channels": 4, 43 | "layers_per_block": 2, 44 | "mid_block_only_cross_attention": null, 45 | "mid_block_scale_factor": 1, 46 | "mid_block_type": "UNetMidBlock2DCrossAttn", 47 | "norm_eps": 1e-05, 48 | "norm_num_groups": 32, 49 | "num_attention_heads": null, 50 | "num_class_embeds": null, 51 | "only_cross_attention": false, 52 | "out_channels": 4, 53 | "projection_class_embeddings_input_dim": 4, 54 | "resnet_out_scale_factor": 1.0, 55 | "resnet_skip_time_act": false, 56 | "resnet_time_scale_shift": "default", 57 | "reverse_transformer_layers_per_block": null, 58 | "sample_size": 64, 59 | "time_cond_proj_dim": null, 60 | "time_embedding_act_fn": null, 61 | "time_embedding_dim": null, 62 | "time_embedding_type": "positional", 63 | "timestep_post_act": null, 64 | "transformer_layers_per_block": 1, 65 | "up_block_types": [ 66 | "UpBlock2D", 67 | "CrossAttnUpBlock2D", 68 | "CrossAttnUpBlock2D", 69 | "CrossAttnUpBlock2D" 70 | ], 71 | "upcast_attention": false, 72 | "use_linear_projection": true 73 | } -------------------------------------------------------------------------------- /empty_text_embed.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kijai/ComfyUI-Lotus/dcd5bea7a418717a6e0c27a5f607058cb9c24a5e/empty_text_embed.pt -------------------------------------------------------------------------------- /examples/lotus_depth_g_example_01.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 15, 3 | "last_link_id": 12, 4 | "nodes": [ 5 | { 6 | "id": 10, 7 | "type": "LoadImage", 8 | "pos": { 9 | "0": 484, 10 | "1": 657 11 | }, 12 | "size": { 13 | "0": 290.0658874511719, 14 | "1": 378.9219665527344 15 | }, 16 | "flags": {}, 17 | "order": 0, 18 | "mode": 0, 19 | "inputs": [], 20 | "outputs": [ 21 | { 22 | "name": "IMAGE", 23 | "type": "IMAGE", 24 | "links": [ 25 | 6 26 | ] 27 | }, 28 | { 29 | "name": "MASK", 30 | "type": "MASK", 31 | "links": null 32 | } 33 | ], 34 | "properties": { 35 | "Node name for S&R": "LoadImage" 36 | }, 37 | "widgets_values": [ 38 | "002_source.png", 39 | "image" 40 | ] 41 | }, 42 | { 43 | "id": 9, 44 | "type": "VAEEncode", 45 | "pos": { 46 | "0": 846, 47 | "1": 731 48 | }, 49 | "size": { 50 | "0": 210, 51 | "1": 46 52 | }, 53 | "flags": {}, 54 | "order": 4, 55 | "mode": 0, 56 | "inputs": [ 57 | { 58 | "name": "pixels", 59 | "type": "IMAGE", 60 | "link": 6 61 | }, 62 | { 63 | "name": "vae", 64 | "type": "VAE", 65 | "link": 7 66 | } 67 | ], 68 | "outputs": [ 69 | { 70 | "name": "LATENT", 71 | "type": "LATENT", 72 | "links": [ 73 | 5 74 | ] 75 | } 76 | ], 77 | "properties": { 78 | "Node name for S&R": "VAEEncode" 79 | }, 80 | "widgets_values": [] 81 | }, 82 | { 83 | "id": 11, 84 | "type": "VAELoader", 85 | "pos": { 86 | "0": 481, 87 | "1": 1095 88 | }, 89 | "size": { 90 | "0": 315, 91 | "1": 58 92 | }, 93 | "flags": {}, 94 | "order": 1, 95 | "mode": 0, 96 | "inputs": [], 97 | "outputs": [ 98 | { 99 | "name": "VAE", 100 | "type": "VAE", 101 | "links": [ 102 | 7, 103 | 9 104 | ] 105 | } 106 | ], 107 | "properties": { 108 | "Node name for S&R": "VAELoader" 109 | }, 110 | "widgets_values": [ 111 | "vae-ft-mse-840000-ema-pruned.safetensors" 112 | ] 113 | }, 114 | { 115 | "id": 12, 116 | "type": "Note", 117 | "pos": { 118 | "0": 490, 119 | "1": 1200 120 | }, 121 | "size": { 122 | "0": 289.78192138671875, 123 | "1": 58 124 | }, 125 | "flags": {}, 126 | "order": 2, 127 | "mode": 0, 128 | "inputs": [], 129 | "outputs": [], 130 | "properties": {}, 131 | "widgets_values": [ 132 | "Any SD1.5 / 2.0 VAE" 133 | ], 134 | "color": "#432", 135 | "bgcolor": "#653" 136 | }, 137 | { 138 | "id": 8, 139 | "type": "LotusSampler", 140 | "pos": { 141 | "0": 1120, 142 | "1": 514 143 | }, 144 | "size": { 145 | "0": 315, 146 | "1": 150 147 | }, 148 | "flags": {}, 149 | "order": 5, 150 | "mode": 0, 151 | "inputs": [ 152 | { 153 | "name": "lotus_unet", 154 | "type": "LOTUSUNET", 155 | "link": 4 156 | }, 157 | { 158 | "name": "samples", 159 | "type": "LATENT", 160 | "link": 5 161 | } 162 | ], 163 | "outputs": [ 164 | { 165 | "name": "samples", 166 | "type": "LATENT", 167 | "links": [ 168 | 8 169 | ], 170 | "slot_index": 0 171 | } 172 | ], 173 | "properties": { 174 | "Node name for S&R": "LotusSampler" 175 | }, 176 | "widgets_values": [ 177 | 3599967422, 178 | "fixed", 179 | 4, 180 | false 181 | ] 182 | }, 183 | { 184 | "id": 14, 185 | "type": "PreviewImage", 186 | "pos": { 187 | "0": 1295, 188 | "1": 731 189 | }, 190 | "size": { 191 | "0": 499.8815002441406, 192 | "1": 507.5718994140625 193 | }, 194 | "flags": {}, 195 | "order": 8, 196 | "mode": 0, 197 | "inputs": [ 198 | { 199 | "name": "images", 200 | "type": "IMAGE", 201 | "link": 12 202 | } 203 | ], 204 | "outputs": [], 205 | "properties": { 206 | "Node name for S&R": "PreviewImage" 207 | }, 208 | "widgets_values": [] 209 | }, 210 | { 211 | "id": 7, 212 | "type": "LoadLotusModel", 213 | "pos": { 214 | "0": 479, 215 | "1": 515 216 | }, 217 | "size": { 218 | "0": 315, 219 | "1": 82 220 | }, 221 | "flags": {}, 222 | "order": 3, 223 | "mode": 0, 224 | "inputs": [], 225 | "outputs": [ 226 | { 227 | "name": "lotus_unet", 228 | "type": "LOTUSUNET", 229 | "links": [ 230 | 4 231 | ], 232 | "slot_index": 0 233 | } 234 | ], 235 | "properties": { 236 | "Node name for S&R": "LoadLotusModel" 237 | }, 238 | "widgets_values": [ 239 | "lotus-depth-g-v1-0-fp16.safetensors", 240 | "fp16" 241 | ] 242 | }, 243 | { 244 | "id": 13, 245 | "type": "VAEDecode", 246 | "pos": { 247 | "0": 1492, 248 | "1": 523 249 | }, 250 | "size": { 251 | "0": 210, 252 | "1": 46 253 | }, 254 | "flags": {}, 255 | "order": 6, 256 | "mode": 0, 257 | "inputs": [ 258 | { 259 | "name": "samples", 260 | "type": "LATENT", 261 | "link": 8 262 | }, 263 | { 264 | "name": "vae", 265 | "type": "VAE", 266 | "link": 9 267 | } 268 | ], 269 | "outputs": [ 270 | { 271 | "name": "IMAGE", 272 | "type": "IMAGE", 273 | "links": [ 274 | 11 275 | ], 276 | "slot_index": 0 277 | } 278 | ], 279 | "properties": { 280 | "Node name for S&R": "VAEDecode" 281 | }, 282 | "widgets_values": [] 283 | }, 284 | { 285 | "id": 15, 286 | "type": "ImageInvert", 287 | "pos": { 288 | "0": 1485, 289 | "1": 631 290 | }, 291 | "size": { 292 | "0": 210, 293 | "1": 26 294 | }, 295 | "flags": {}, 296 | "order": 7, 297 | "mode": 0, 298 | "inputs": [ 299 | { 300 | "name": "image", 301 | "type": "IMAGE", 302 | "link": 11 303 | } 304 | ], 305 | "outputs": [ 306 | { 307 | "name": "IMAGE", 308 | "type": "IMAGE", 309 | "links": [ 310 | 12 311 | ], 312 | "slot_index": 0 313 | } 314 | ], 315 | "properties": { 316 | "Node name for S&R": "ImageInvert" 317 | } 318 | } 319 | ], 320 | "links": [ 321 | [ 322 | 4, 323 | 7, 324 | 0, 325 | 8, 326 | 0, 327 | "LOTUSUNET" 328 | ], 329 | [ 330 | 5, 331 | 9, 332 | 0, 333 | 8, 334 | 1, 335 | "LATENT" 336 | ], 337 | [ 338 | 6, 339 | 10, 340 | 0, 341 | 9, 342 | 0, 343 | "IMAGE" 344 | ], 345 | [ 346 | 7, 347 | 11, 348 | 0, 349 | 9, 350 | 1, 351 | "VAE" 352 | ], 353 | [ 354 | 8, 355 | 8, 356 | 0, 357 | 13, 358 | 0, 359 | "LATENT" 360 | ], 361 | [ 362 | 9, 363 | 11, 364 | 0, 365 | 13, 366 | 1, 367 | "VAE" 368 | ], 369 | [ 370 | 11, 371 | 13, 372 | 0, 373 | 15, 374 | 0, 375 | "IMAGE" 376 | ], 377 | [ 378 | 12, 379 | 15, 380 | 0, 381 | 14, 382 | 0, 383 | "IMAGE" 384 | ] 385 | ], 386 | "groups": [], 387 | "config": {}, 388 | "extra": { 389 | "ds": { 390 | "scale": 0.7627768444387127, 391 | "offset": [ 392 | 165.4996922821213, 393 | -136.75873788961735 394 | ] 395 | } 396 | }, 397 | "version": 0.4 398 | } -------------------------------------------------------------------------------- /examples/lotus_normal_g_example_01.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 14, 3 | "last_link_id": 10, 4 | "nodes": [ 5 | { 6 | "id": 7, 7 | "type": "LoadLotusModel", 8 | "pos": { 9 | "0": 479, 10 | "1": 515 11 | }, 12 | "size": { 13 | "0": 315, 14 | "1": 82 15 | }, 16 | "flags": {}, 17 | "order": 0, 18 | "mode": 0, 19 | "inputs": [], 20 | "outputs": [ 21 | { 22 | "name": "lotus_unet", 23 | "type": "LOTUSUNET", 24 | "links": [ 25 | 4 26 | ], 27 | "slot_index": 0 28 | } 29 | ], 30 | "properties": { 31 | "Node name for S&R": "LoadLotusModel" 32 | }, 33 | "widgets_values": [ 34 | "lotus-normal-g-v1-0-fp16.safetensors", 35 | "fp16" 36 | ] 37 | }, 38 | { 39 | "id": 10, 40 | "type": "LoadImage", 41 | "pos": { 42 | "0": 484, 43 | "1": 657 44 | }, 45 | "size": { 46 | "0": 290.0658874511719, 47 | "1": 378.9219665527344 48 | }, 49 | "flags": {}, 50 | "order": 1, 51 | "mode": 0, 52 | "inputs": [], 53 | "outputs": [ 54 | { 55 | "name": "IMAGE", 56 | "type": "IMAGE", 57 | "links": [ 58 | 6 59 | ] 60 | }, 61 | { 62 | "name": "MASK", 63 | "type": "MASK", 64 | "links": null 65 | } 66 | ], 67 | "properties": { 68 | "Node name for S&R": "LoadImage" 69 | }, 70 | "widgets_values": [ 71 | "002_source.png", 72 | "image" 73 | ] 74 | }, 75 | { 76 | "id": 9, 77 | "type": "VAEEncode", 78 | "pos": { 79 | "0": 846, 80 | "1": 731 81 | }, 82 | "size": { 83 | "0": 210, 84 | "1": 46 85 | }, 86 | "flags": {}, 87 | "order": 4, 88 | "mode": 0, 89 | "inputs": [ 90 | { 91 | "name": "pixels", 92 | "type": "IMAGE", 93 | "link": 6 94 | }, 95 | { 96 | "name": "vae", 97 | "type": "VAE", 98 | "link": 7 99 | } 100 | ], 101 | "outputs": [ 102 | { 103 | "name": "LATENT", 104 | "type": "LATENT", 105 | "links": [ 106 | 5 107 | ] 108 | } 109 | ], 110 | "properties": { 111 | "Node name for S&R": "VAEEncode" 112 | }, 113 | "widgets_values": [] 114 | }, 115 | { 116 | "id": 11, 117 | "type": "VAELoader", 118 | "pos": { 119 | "0": 481, 120 | "1": 1095 121 | }, 122 | "size": { 123 | "0": 315, 124 | "1": 58 125 | }, 126 | "flags": {}, 127 | "order": 2, 128 | "mode": 0, 129 | "inputs": [], 130 | "outputs": [ 131 | { 132 | "name": "VAE", 133 | "type": "VAE", 134 | "links": [ 135 | 7, 136 | 9 137 | ] 138 | } 139 | ], 140 | "properties": { 141 | "Node name for S&R": "VAELoader" 142 | }, 143 | "widgets_values": [ 144 | "vae-ft-mse-840000-ema-pruned.safetensors" 145 | ] 146 | }, 147 | { 148 | "id": 12, 149 | "type": "Note", 150 | "pos": { 151 | "0": 490, 152 | "1": 1200 153 | }, 154 | "size": { 155 | "0": 289.78192138671875, 156 | "1": 58 157 | }, 158 | "flags": {}, 159 | "order": 3, 160 | "mode": 0, 161 | "inputs": [], 162 | "outputs": [], 163 | "properties": {}, 164 | "widgets_values": [ 165 | "Any SD1.5 / 2.0 VAE" 166 | ], 167 | "color": "#432", 168 | "bgcolor": "#653" 169 | }, 170 | { 171 | "id": 8, 172 | "type": "LotusSampler", 173 | "pos": { 174 | "0": 1120, 175 | "1": 514 176 | }, 177 | "size": { 178 | "0": 315, 179 | "1": 150 180 | }, 181 | "flags": {}, 182 | "order": 5, 183 | "mode": 0, 184 | "inputs": [ 185 | { 186 | "name": "lotus_unet", 187 | "type": "LOTUSUNET", 188 | "link": 4 189 | }, 190 | { 191 | "name": "samples", 192 | "type": "LATENT", 193 | "link": 5 194 | } 195 | ], 196 | "outputs": [ 197 | { 198 | "name": "samples", 199 | "type": "LATENT", 200 | "links": [ 201 | 8 202 | ], 203 | "slot_index": 0 204 | } 205 | ], 206 | "properties": { 207 | "Node name for S&R": "LotusSampler" 208 | }, 209 | "widgets_values": [ 210 | 3599967422, 211 | "fixed", 212 | 4, 213 | false 214 | ] 215 | }, 216 | { 217 | "id": 13, 218 | "type": "VAEDecode", 219 | "pos": { 220 | "0": 1492, 221 | "1": 523 222 | }, 223 | "size": { 224 | "0": 210, 225 | "1": 46 226 | }, 227 | "flags": {}, 228 | "order": 6, 229 | "mode": 0, 230 | "inputs": [ 231 | { 232 | "name": "samples", 233 | "type": "LATENT", 234 | "link": 8 235 | }, 236 | { 237 | "name": "vae", 238 | "type": "VAE", 239 | "link": 9 240 | } 241 | ], 242 | "outputs": [ 243 | { 244 | "name": "IMAGE", 245 | "type": "IMAGE", 246 | "links": [ 247 | 10 248 | ], 249 | "slot_index": 0 250 | } 251 | ], 252 | "properties": { 253 | "Node name for S&R": "VAEDecode" 254 | }, 255 | "widgets_values": [] 256 | }, 257 | { 258 | "id": 14, 259 | "type": "PreviewImage", 260 | "pos": { 261 | "0": 1295, 262 | "1": 731 263 | }, 264 | "size": { 265 | "0": 499.8815002441406, 266 | "1": 507.5718994140625 267 | }, 268 | "flags": {}, 269 | "order": 7, 270 | "mode": 0, 271 | "inputs": [ 272 | { 273 | "name": "images", 274 | "type": "IMAGE", 275 | "link": 10 276 | } 277 | ], 278 | "outputs": [], 279 | "properties": { 280 | "Node name for S&R": "PreviewImage" 281 | }, 282 | "widgets_values": [] 283 | } 284 | ], 285 | "links": [ 286 | [ 287 | 4, 288 | 7, 289 | 0, 290 | 8, 291 | 0, 292 | "LOTUSUNET" 293 | ], 294 | [ 295 | 5, 296 | 9, 297 | 0, 298 | 8, 299 | 1, 300 | "LATENT" 301 | ], 302 | [ 303 | 6, 304 | 10, 305 | 0, 306 | 9, 307 | 0, 308 | "IMAGE" 309 | ], 310 | [ 311 | 7, 312 | 11, 313 | 0, 314 | 9, 315 | 1, 316 | "VAE" 317 | ], 318 | [ 319 | 8, 320 | 8, 321 | 0, 322 | 13, 323 | 0, 324 | "LATENT" 325 | ], 326 | [ 327 | 9, 328 | 11, 329 | 0, 330 | 13, 331 | 1, 332 | "VAE" 333 | ], 334 | [ 335 | 10, 336 | 13, 337 | 0, 338 | 14, 339 | 0, 340 | "IMAGE" 341 | ] 342 | ], 343 | "groups": [], 344 | "config": {}, 345 | "extra": { 346 | "ds": { 347 | "scale": 0.7627768444387127, 348 | "offset": [ 349 | 165.4996922821213, 350 | -136.75873788961735 351 | ] 352 | } 353 | }, 354 | "version": 0.4 355 | } -------------------------------------------------------------------------------- /nodes.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import folder_paths 4 | import comfy.model_management as mm 5 | from comfy.utils import load_torch_file, ProgressBar 6 | 7 | import logging 8 | import json 9 | from diffusers.models import UNet2DConditionModel 10 | 11 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 12 | log = logging.getLogger(__name__) 13 | 14 | script_directory = os.path.dirname(os.path.abspath(__file__)) 15 | 16 | class LoadLotusModel: 17 | @classmethod 18 | def INPUT_TYPES(s): 19 | return { 20 | "required": { 21 | "model": (folder_paths.get_filename_list("diffusion_models"),{"tooltip":"models are loaded from 'ComfyUI/models/diffusion_models'"}), 22 | }, 23 | "optional": { 24 | "precision": (["fp16", "fp32",], 25 | {"default": "fp16"} 26 | ), 27 | } 28 | } 29 | 30 | RETURN_TYPES = ("LOTUSUNET",) 31 | RETURN_NAMES = ("lotus_unet", ) 32 | FUNCTION = "loadmodel" 33 | CATEGORY = "ComfyUI-Lotus" 34 | 35 | def loadmodel(self, model, precision): 36 | 37 | dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[precision] 38 | mm.soft_empty_cache() 39 | 40 | lotus_model_path = folder_paths.get_full_path_or_raise("diffusion_models", model) 41 | 42 | lotus_sd = load_torch_file(lotus_model_path) 43 | in_channels = lotus_sd['conv_in.weight'].shape[1] 44 | lotus_config = os.path.join(script_directory, "configs", "lotus_unet_config.json") 45 | 46 | with open(lotus_config, 'r') as config_file: 47 | config_data = json.load(config_file) 48 | config_data["in_channels"] = in_channels 49 | 50 | lotus_unet = UNet2DConditionModel.from_config(config_data) 51 | 52 | lotus_unet.load_state_dict(lotus_sd) 53 | lotus_unet.to(dtype) 54 | 55 | lotus_model = { 56 | "model": lotus_unet, 57 | "dtype": dtype, 58 | "in_channels": in_channels, 59 | } 60 | 61 | return (lotus_model,) 62 | 63 | class LotusSampler: 64 | @classmethod 65 | def INPUT_TYPES(s): 66 | return { 67 | "required": { 68 | "lotus_unet": ("LOTUSUNET",), 69 | "samples": ("LATENT",), 70 | "seed": ("INT", {"default": 123, "min": 0, "max": 2**32, "step": 1}), 71 | "per_batch": ("INT", {"default": 4, "min": 1, "max": 4096, "step": 1}), 72 | "keep_model_loaded": ("BOOLEAN", {"default": False}), 73 | }, 74 | } 75 | 76 | RETURN_TYPES = ("LATENT",) 77 | RETURN_NAMES = ("samples",) 78 | FUNCTION = "loadmodel" 79 | CATEGORY = "ComfyUI-Lotus" 80 | 81 | def loadmodel(self, lotus_unet, seed, samples, per_batch, keep_model_loaded): 82 | 83 | device = mm.get_torch_device() 84 | offload_device = mm.unet_offload_device() 85 | mm.soft_empty_cache() 86 | 87 | model = lotus_unet["model"] 88 | dtype = lotus_unet["dtype"] 89 | in_channels = lotus_unet["in_channels"] 90 | 91 | latents = samples["samples"].to(dtype) 92 | latents = latents * 0.18215 93 | 94 | torch.manual_seed(seed) 95 | torch.cuda.manual_seed(seed) 96 | 97 | if in_channels == 8: # input for g model is 8 channels 98 | single_noise = torch.randn(latents.shape[1:], device=torch.device("cpu"), dtype=dtype, layout=torch.strided) 99 | repeated_noise = single_noise.unsqueeze(0).repeat(latents.shape[0], 1, 1, 1) 100 | latents = torch.cat([latents, repeated_noise], dim=1) 101 | 102 | timesteps = torch.tensor(999, device=device).long() 103 | 104 | task_emb = torch.tensor([1, 0], device=device, dtype=dtype).unsqueeze(0).repeat(1, 1) 105 | task_emb = torch.cat([torch.sin(task_emb), torch.cos(task_emb)], dim=-1).repeat(1, 1) 106 | 107 | prompt_embeds = torch.load(os.path.join(script_directory, "empty_text_embed.pt"), weights_only=True).to(device).to(dtype) 108 | extended_prompt_embeds = prompt_embeds.repeat(latents.shape[0], 1, 1) 109 | 110 | model.to(device) 111 | pbar = ProgressBar(latents.shape[0]) 112 | 113 | results = [] 114 | for start_idx in range(0, latents.shape[0], per_batch): 115 | 116 | sub_images = model( 117 | latents[start_idx:start_idx+per_batch].to(device), 118 | timesteps, 119 | encoder_hidden_states=extended_prompt_embeds[start_idx:start_idx+per_batch], 120 | cross_attention_kwargs=None, 121 | return_dict=False, 122 | class_labels=task_emb, 123 | )[0] 124 | 125 | results.append(sub_images.cpu()) 126 | batch_count = sub_images.shape[0] 127 | pbar.update(batch_count) 128 | 129 | if not keep_model_loaded: 130 | model.to(offload_device) 131 | mm.soft_empty_cache() 132 | 133 | results = torch.cat(results, dim=0) 134 | results = results / 0.18215 135 | 136 | return {"samples": results}, 137 | 138 | 139 | NODE_CLASS_MAPPINGS = { 140 | "LoadLotusModel": LoadLotusModel, 141 | "LotusSampler": LotusSampler, 142 | } 143 | NODE_DISPLAY_NAME_MAPPINGS = { 144 | "LoadLotusModel": "Load Lotus Model", 145 | "LotusSampler": "Lotus Sampler", 146 | } 147 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "comfyui-lotus" 3 | description = "ComfyUI nodes to use Lotus depth/normal prediction.\nNOTE:The necessary models can be downloaded from ComfyUI-Manager." 4 | version = "1.0.0" 5 | license = {file = "LICENSE"} 6 | dependencies = ["diffusers"] 7 | 8 | [project.urls] 9 | Repository = "https://github.com/kijai/ComfyUI-Lotus" 10 | # Used by Comfy Registry https://comfyregistry.org 11 | 12 | [tool.comfy] 13 | PublisherId = "kijai" 14 | DisplayName = "ComfyUI-Lotus" 15 | Icon = "" 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers --------------------------------------------------------------------------------