├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── example_vids ├── exp_input_1.mp4 ├── exp_input_2.mp4 ├── exp_input_3.mp4 ├── exp_input_4.mp4 └── exp_input_5.mp4 ├── nodes.py └── workflow ├── RAVE_basic_workflow.json └── RAVE_basic_workflow_output.mp4 /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 spacepxl 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## ComfyUI-RAVE 2 | 3 | [RAVE_00005.webm](https://github.com/spacepxl/ComfyUI-RAVE/assets/143970342/4d1b065d-f114-448b-ad40-ac7b337fbb67) 4 | 5 | Unofficial ComfyUI implementation of [RAVE](https://rave-video.github.io/) 6 | 7 | Use [BlenderNeko's Unsampler](https://github.com/BlenderNeko/ComfyUI_Noise) for noise inversion. A basic workflow is included, using the cupcake train example from the RAVE paper. 8 | 9 | Most of the testing was done with SD1.5, but SDXL does work, although not as well (possibly because the multi-resolution training reduces the tiling effect?) 10 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS 2 | 3 | __all__ = ['NODE_CLASS_MAPPINGS', 'NODE_DISPLAY_NAME_MAPPINGS'] -------------------------------------------------------------------------------- /example_vids/exp_input_1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spacepxl/ComfyUI-RAVE/92d19be86cdcec198088757c07873919d3c8e1b1/example_vids/exp_input_1.mp4 -------------------------------------------------------------------------------- /example_vids/exp_input_2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spacepxl/ComfyUI-RAVE/92d19be86cdcec198088757c07873919d3c8e1b1/example_vids/exp_input_2.mp4 -------------------------------------------------------------------------------- /example_vids/exp_input_3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spacepxl/ComfyUI-RAVE/92d19be86cdcec198088757c07873919d3c8e1b1/example_vids/exp_input_3.mp4 -------------------------------------------------------------------------------- /example_vids/exp_input_4.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spacepxl/ComfyUI-RAVE/92d19be86cdcec198088757c07873919d3c8e1b1/example_vids/exp_input_4.mp4 -------------------------------------------------------------------------------- /example_vids/exp_input_5.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spacepxl/ComfyUI-RAVE/92d19be86cdcec198088757c07873919d3c8e1b1/example_vids/exp_input_5.mp4 -------------------------------------------------------------------------------- /nodes.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import sys 4 | import math 5 | import copy 6 | import numpy as np 7 | from torchvision.utils import make_grid 8 | from tqdm.auto import trange, tqdm 9 | 10 | import comfy.sample 11 | import comfy.utils 12 | import latent_preview 13 | 14 | 15 | def grid_compose(images, x_dim, random, rs, pad=0): 16 | 17 | grid_size = x_dim * x_dim 18 | batch_size = math.ceil(images.size(dim=0) / grid_size) 19 | 20 | shuffled_images = torch.zeros(batch_size * grid_size, images.size(1), images.size(2), images.size(3)) 21 | if random: 22 | torch.manual_seed(rs) 23 | order = torch.randperm(batch_size * grid_size) 24 | order = torch.clamp(order, max=images.size(0) - 1) 25 | shuffled_images = images[order] 26 | else: 27 | shuffled_images[0:images.size(0)] = images 28 | 29 | batch_tensor = [] 30 | 31 | for i in range(batch_size): 32 | offset = i * grid_size 33 | img_batch = shuffled_images[offset:offset+grid_size] 34 | 35 | grid = make_grid(img_batch.movedim(-1,1), nrow=x_dim, padding=pad).movedim(0,2)[None,] 36 | 37 | if pad > 0: 38 | grid = grid[:, pad:-pad, pad:-pad, :] 39 | 40 | batch_tensor.append(grid) 41 | 42 | batch_tensor = torch.cat(batch_tensor, 0) 43 | 44 | return batch_tensor 45 | 46 | 47 | def grid_decompose(images, x_dim, random, rs, pad=0): 48 | 49 | grid_size = x_dim * x_dim 50 | batch_size = images.size(0) * grid_size 51 | 52 | padding = pad * (x_dim - 1) 53 | 54 | orig_w = int((images.size(1) - padding) / x_dim) 55 | orig_h = int((images.size(2) - padding) / x_dim) 56 | 57 | batch_tensor = [] 58 | 59 | for i in range(images.size(0)): 60 | grid = images[i] 61 | 62 | for j in range (grid_size): 63 | w0 = int(math.floor(j / x_dim) * (orig_w + pad)) 64 | h0 = int((j % x_dim) * orig_h) + ((j % x_dim) * pad) 65 | w1 = w0 + orig_w 66 | h1 = h0 + orig_h 67 | img = grid[w0:w1, h0:h1] 68 | 69 | batch_tensor.append(img[None,]) 70 | 71 | t = torch.cat(batch_tensor, 0) 72 | 73 | if random: 74 | torch.manual_seed(rs) 75 | order = torch.randperm(batch_size) 76 | t[order] = t.clone() 77 | 78 | return t 79 | 80 | 81 | def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False): 82 | latent_image = latent["samples"] 83 | 84 | if disable_noise: 85 | noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") 86 | else: 87 | batch_inds = latent["batch_index"] if "batch_index" in latent else None 88 | noise = comfy.sample.prepare_noise(latent_image, seed, batch_inds) 89 | 90 | noise_mask = None 91 | if "noise_mask" in latent: 92 | noise_mask = latent["noise_mask"] 93 | 94 | callback = latent_preview.prepare_callback(model, steps) 95 | #disable_pbar = not comfy.utils.PROGRESS_BAR_ENABLED 96 | samples = comfy.sample.sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, 97 | denoise=denoise, disable_noise=disable_noise, start_step=start_step, last_step=last_step, 98 | force_full_denoise=force_full_denoise, noise_mask=noise_mask, callback=callback, disable_pbar=True, seed=seed) 99 | out = latent.copy() 100 | out["samples"] = samples 101 | return (out, ) 102 | 103 | 104 | def calc_sigma(model, sampler_name, scheduler, steps, start_at_step, end_at_step): 105 | device = comfy.model_management.get_torch_device() 106 | end = min(steps, end_at_step) 107 | start = min(start_at_step, end) 108 | real_model = None 109 | comfy.model_management.load_model_gpu(model) 110 | real_model = model.model 111 | sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=1.0, model_options=model.model_options) 112 | sigmas = sampler.sigmas 113 | sigma = sigmas[start] - sigmas[end] 114 | sigma /= model.model.latent_format.scale_factor 115 | return sigma.cpu().numpy() 116 | 117 | 118 | def rave_prepare_mask(noise_mask, shape): 119 | noise_mask = torch.nn.functional.interpolate(noise_mask.reshape((-1, 1, noise_mask.shape[-2], noise_mask.shape[-1])), size=(shape[2] * 8, shape[3] * 8), mode='nearest-exact') 120 | noise_mask = torch.cat([noise_mask] * shape[1], dim=1) 121 | noise_mask = comfy.utils.repeat_to_batch_size(noise_mask, shape[0]) 122 | return noise_mask 123 | 124 | 125 | class KSamplerRAVE: 126 | @classmethod 127 | def INPUT_TYPES(s): 128 | return {"required": 129 | {"model": ("MODEL",), 130 | "grid_size": ("INT", {"default": 3, "min": 2, "max": 8}), 131 | "pad_grid": ("BOOLEAN", {"default": False}), 132 | "noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 133 | "add_noise": ("BOOLEAN", {"default": False}), 134 | "steps": ("INT", {"default": 20, "min": 1, "max": 10000}), 135 | "cfg": ("FLOAT", {"default": 7.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.1}), 136 | "sampler_name": (comfy.samplers.KSampler.SAMPLERS, ), 137 | "scheduler": (comfy.samplers.KSampler.SCHEDULERS, ), 138 | "positive": ("CONDITIONING", ), 139 | "negative": ("CONDITIONING", ), 140 | "latent_image": ("LATENT", ), 141 | "start_at_step": ("INT", {"default": 0, "min": 0, "max": 10000}), 142 | "end_at_step": ("INT", {"default": 10000, "min": 0, "max": 10000}), 143 | } 144 | } 145 | 146 | RETURN_TYPES = ("LATENT", ) 147 | FUNCTION = "sample" 148 | 149 | CATEGORY = "RAVE" 150 | 151 | def sample(self, model, grid_size, pad_grid, noise_seed, add_noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, start_at_step, end_at_step): 152 | latent = latent_image["samples"].clone() 153 | batch_length = latent.size(0) 154 | 155 | mask_enabled = False 156 | if "noise_mask" in latent_image: 157 | mask_enabled = True 158 | noise_mask = latent_image["noise_mask"].clone() 159 | noise_mask = comfy.sample.prepare_mask(noise_mask, latent.shape, "cpu")[:, 0, :, :].unsqueeze(1) 160 | noise_mask = (noise_mask > 0).type(noise_mask.dtype) 161 | noise_mask = rave_prepare_mask(noise_mask, latent.shape)[:, 0, :, :].unsqueeze(1) 162 | 163 | pad = 0 164 | if pad_grid: 165 | pad = 1 166 | 167 | print("RAVE sampling with %d frames (%d grids)" % (batch_length, math.ceil(batch_length / (grid_size ** 2)))) 168 | 169 | # check pos and neg for controlnets and masks 170 | controlnet_exist = False 171 | cond_mask_exists = False 172 | for conditioning in [positive, negative]: 173 | for t in conditioning: 174 | if 'control' in t[1]: 175 | controlnet_exist = True 176 | if 'mask' in t[1]: 177 | cond_mask_exists = True 178 | #check for condition masks and add them to lists 179 | cond_masks_pos = [] 180 | cond_masks_neg = [] 181 | if cond_mask_exists: 182 | for t in positive: 183 | if 'mask' in t[1]: 184 | cond_mask_pos = t[1]['mask'] 185 | else: 186 | cond_mask_pos = None 187 | cond_masks_pos.append(cond_mask_pos) 188 | for t in negative: 189 | if 'mask' in t[1]: 190 | cond_mask_neg = t[1]['mask'] 191 | else: 192 | cond_mask_neg = None 193 | cond_masks_neg.append(cond_mask_neg) 194 | 195 | # get list of controlnet objs and images 196 | control_objs = [] 197 | control_images = [] 198 | control_masks = [] 199 | if controlnet_exist: 200 | for t in positive: 201 | control = t[1]['control'] 202 | control_objs.append(control) 203 | control_images.append(control.cond_hint_original) 204 | if hasattr(control, 'mask_cond_hint_original'): 205 | control_masks.append(control.mask_cond_hint_original) 206 | 207 | prev = control.previous_controlnet 208 | while prev != None: 209 | control_objs.append(prev) 210 | control_images.append(prev.cond_hint_original) 211 | if hasattr(control, 'mask_cond_hint_original'): 212 | control_masks.append(prev.mask_cond_hint_original) 213 | prev = prev.previous_controlnet 214 | 215 | # add random noise if enabled 216 | if add_noise: 217 | noise = comfy.sample.prepare_noise(latent, noise_seed) 218 | if mask_enabled: 219 | noise = noise * torch.nn.functional.interpolate(noise_mask, size=(noise.size(2), noise.size(3)), mode="bilinear").repeat(1, 4, 1, 1) 220 | sigma = calc_sigma(model, sampler_name, scheduler, steps, start_at_step, end_at_step) 221 | latent = latent + noise * sigma 222 | 223 | # iterate steps 224 | seed = noise_seed 225 | total_steps = min(steps, end_at_step) - start_at_step 226 | pbar = comfy.utils.ProgressBar(total_steps) 227 | for step in trange(total_steps, delay=1): 228 | # grid latents in random arrangement 229 | grid = {"samples": grid_compose(latent.movedim(1,3), grid_size, True, seed, pad).movedim(-1,1)} 230 | 231 | # grid latent mask if it exists 232 | if mask_enabled: 233 | grid["noise_mask"] = grid_compose(noise_mask.movedim(1,3), grid_size, True, seed, pad).movedim(-1,1)[:,0:1,:,:] 234 | 235 | # grid controlnet images and apply 236 | if controlnet_exist: 237 | for i in range(len(control_objs)): 238 | ctrl_img = grid_compose(control_images[i].movedim(1,3), grid_size, True, seed, pad*8).movedim(-1,1) 239 | control_objs[i].set_cond_hint(ctrl_img, control_objs[i].strength, control_objs[i].timestep_percent_range) 240 | # grid controlnet masks and apply 241 | if control_masks: 242 | if control_masks[i] is not None: 243 | ctrl_mask = grid_compose(control_masks[i].unsqueeze(1).movedim(1,3), grid_size, True, seed, pad*8).movedim(-1,1) 244 | control_objs[i].set_cond_hint_mask(ctrl_mask) 245 | 246 | # grid condition masks and apply 247 | if cond_mask_exists: 248 | for i in range(len(cond_masks_pos)): 249 | if cond_masks_pos[i] is not None: 250 | cmask_pos = grid_compose(cond_masks_pos[i].unsqueeze(1).movedim(1,3), grid_size, True, seed, pad*8).movedim(-1,1) 251 | cmask_pos = cmask_pos[:, 0, :, :] 252 | positive[i][1]['mask'] = cmask_pos 253 | for i in range(len(cond_masks_neg)): 254 | if cond_masks_neg[i] is not None: 255 | cmask_neg = grid_compose(cond_masks_neg[i].unsqueeze(1).movedim(1,3), grid_size, True, seed, pad*8).movedim(-1,1) 256 | cmask_neg = cmask_neg[:, 0, :, :] 257 | negative[i][1]['mask'] = cmask_neg 258 | 259 | # sample 1 step 260 | start = start_at_step + step 261 | end = start + 1 262 | result = common_ksampler(model, noise_seed, steps, cfg, sampler_name, scheduler, positive, negative, grid, denoise=1.0, disable_noise=True, start_step=start, last_step=end, force_full_denoise=False) 263 | 264 | # ungrid latents and increment seed to shuffle grids with a different arrangement on the next step 265 | latent = grid_decompose(result[0]["samples"].movedim(1,3), grid_size, True, seed, pad).movedim(-1,1) 266 | 267 | seed += 1 268 | pbar.update(1) 269 | 270 | # restore original controlnet images (may cause issues if job is interrupted) 271 | if controlnet_exist: 272 | for i in range(len(control_objs)): 273 | control_objs[i].set_cond_hint(control_images[i], control_objs[i].strength, control_objs[i].timestep_percent_range) 274 | if control_masks: 275 | if control_masks[i] is not None: 276 | control_objs[i].set_cond_hint_mask(control_masks[i]) 277 | 278 | # restore original condition masks 279 | if cond_mask_exists: 280 | for i in range(len(cond_masks_pos)): 281 | if cond_masks_pos[i] is not None: 282 | positive[i][1]['mask'] = cond_masks_pos[i] 283 | for i in range(len(cond_masks_neg)): 284 | if cond_masks_neg[i] is not None: 285 | negative[i][1]['mask'] = cond_masks_neg[i] 286 | 287 | 288 | out = copy.deepcopy(latent_image) 289 | out["samples"] = latent[:batch_length] 290 | if mask_enabled: 291 | out["noise_mask"] = noise_mask[:batch_length] 292 | return (out, ) 293 | 294 | 295 | class ImageGridCompose: 296 | @classmethod 297 | def INPUT_TYPES(s): 298 | return {"required": { 299 | "images": ("IMAGE", ), 300 | "x_dim": ("INT", {"default": 3, "min": 2, "max": 8}), 301 | "pad_grid": ("BOOLEAN", {"default": False}), 302 | "random": ("BOOLEAN", {"default": False}), 303 | "rs": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 304 | } 305 | } 306 | 307 | RETURN_TYPES = ("IMAGE",) 308 | FUNCTION = "compose" 309 | 310 | CATEGORY = "RAVE/Image" 311 | 312 | def compose(self, images, x_dim, pad_grid, random, rs): 313 | pad = 0 314 | if pad_grid: 315 | pad = 1 316 | 317 | return (grid_compose(images, x_dim, random, rs, pad*8),) 318 | 319 | 320 | class ImageGridDecompose: 321 | @classmethod 322 | def INPUT_TYPES(s): 323 | return {"required": { 324 | "images": ("IMAGE", ), 325 | "x_dim": ("INT", {"default": 3, "min": 2, "max": 8}), 326 | "pad_grid": ("BOOLEAN", {"default": False}), 327 | "random": ("BOOLEAN", {"default": False}), 328 | "rs": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 329 | } 330 | } 331 | 332 | RETURN_TYPES = ("IMAGE",) 333 | FUNCTION = "decompose" 334 | 335 | CATEGORY = "RAVE/Image" 336 | 337 | def decompose(self, images, x_dim, pad_grid, random, rs): 338 | pad = 0 339 | if pad_grid: 340 | pad = 1 341 | 342 | return (grid_decompose(images, x_dim, random, rs, pad*8),) 343 | 344 | 345 | class LatentGridCompose: 346 | @classmethod 347 | def INPUT_TYPES(s): 348 | return {"required": { 349 | "latents": ("LATENT", ), 350 | "x_dim": ("INT", {"default": 3, "min": 2, "max": 8}), 351 | "pad_grid": ("BOOLEAN", {"default": False}), 352 | "random": ("BOOLEAN", {"default": False}), 353 | "rs": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 354 | } 355 | } 356 | 357 | RETURN_TYPES = ("LATENT",) 358 | FUNCTION = "compose" 359 | 360 | CATEGORY = "RAVE/Latent" 361 | 362 | def compose(self, latents, x_dim, pad_grid, random, rs): 363 | pad = 0 364 | if pad_grid: 365 | pad = 1 366 | 367 | t = grid_compose(latents["samples"].movedim(1,3), x_dim, random, rs, pad).movedim(-1,1) 368 | 369 | return ({"samples":t}, ) 370 | 371 | 372 | class LatentGridDecompose: 373 | @classmethod 374 | def INPUT_TYPES(s): 375 | return {"required": { 376 | "latents": ("LATENT", ), 377 | "x_dim": ("INT", {"default": 3, "min": 2, "max": 8}), 378 | "pad_grid": ("BOOLEAN", {"default": False}), 379 | "random": ("BOOLEAN", {"default": False}), 380 | "rs": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 381 | } 382 | } 383 | 384 | RETURN_TYPES = ("LATENT",) 385 | FUNCTION = "decompose" 386 | 387 | CATEGORY = "RAVE/Latent" 388 | 389 | def decompose(self, latents, x_dim, pad_grid, random, rs): 390 | pad = 0 391 | if pad_grid: 392 | pad = 1 393 | 394 | t = grid_decompose(latents["samples"].movedim(1,3), x_dim, random, rs, pad).movedim(-1,1) 395 | 396 | return ({"samples":t}, ) 397 | 398 | 399 | class ConditioningDebug: 400 | @classmethod 401 | def INPUT_TYPES(s): 402 | return {"required": {"conditioning": ("CONDITIONING", )}} 403 | RETURN_TYPES = ("CONDITIONING",) 404 | FUNCTION = "debug" 405 | 406 | CATEGORY = "RAVE/debug" 407 | 408 | def debug(self, conditioning): 409 | control_objs = [] 410 | control_images = [] 411 | for t in conditioning: 412 | control = t[1]['control'] 413 | control_objs.append(control) 414 | control_images.append(control.cond_hint_original) 415 | 416 | prev = control.previous_controlnet 417 | while prev != None: 418 | control_objs.append(prev) 419 | control_images.append(prev.cond_hint_original) 420 | prev = prev.previous_controlnet 421 | 422 | print("control_objs") 423 | for element in control_objs: 424 | print(element) 425 | print("control_images") 426 | for element in control_images: 427 | print(element.shape) 428 | 429 | return (conditioning, ) 430 | 431 | 432 | NODE_CLASS_MAPPINGS = { 433 | "KSamplerRAVE": KSamplerRAVE, 434 | "ImageGridCompose": ImageGridCompose, 435 | "ImageGridDecompose": ImageGridDecompose, 436 | "LatentGridCompose": LatentGridCompose, 437 | "LatentGridDecompose": LatentGridDecompose, 438 | # "ConditioningDebug": ConditioningDebug, 439 | } 440 | 441 | NODE_DISPLAY_NAME_MAPPINGS = { 442 | "KSamplerRAVE": "KSampler (RAVE)", 443 | "ImageGridCompose": "ImageGridCompose", 444 | "ImageGridDecompose": "ImageGridDecompose", 445 | "LatentGridCompose": "LatentGridCompose", 446 | "LatentGridDecompose": "LatentGridDecompose", 447 | # "ConditioningDebug": "ConditioningDebug", 448 | } 449 | -------------------------------------------------------------------------------- /workflow/RAVE_basic_workflow.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 15, 3 | "last_link_id": 27, 4 | "nodes": [ 5 | { 6 | "id": 4, 7 | "type": "CLIPTextEncode", 8 | "pos": [ 9 | 660, 10 | 390 11 | ], 12 | "size": { 13 | "0": 400, 14 | "1": 200 15 | }, 16 | "flags": {}, 17 | "order": 4, 18 | "mode": 0, 19 | "inputs": [ 20 | { 21 | "name": "clip", 22 | "type": "CLIP", 23 | "link": 3, 24 | "slot_index": 0 25 | } 26 | ], 27 | "outputs": [ 28 | { 29 | "name": "CONDITIONING", 30 | "type": "CONDITIONING", 31 | "links": [ 32 | 5 33 | ], 34 | "shape": 3 35 | } 36 | ], 37 | "properties": { 38 | "Node name for S&R": "CLIPTextEncode" 39 | }, 40 | "widgets_values": [ 41 | "" 42 | ] 43 | }, 44 | { 45 | "id": 3, 46 | "type": "CLIPTextEncode", 47 | "pos": [ 48 | 660, 49 | 140 50 | ], 51 | "size": { 52 | "0": 400, 53 | "1": 200 54 | }, 55 | "flags": {}, 56 | "order": 3, 57 | "mode": 0, 58 | "inputs": [ 59 | { 60 | "name": "clip", 61 | "type": "CLIP", 62 | "link": 2 63 | } 64 | ], 65 | "outputs": [ 66 | { 67 | "name": "CONDITIONING", 68 | "type": "CONDITIONING", 69 | "links": [ 70 | 4 71 | ], 72 | "shape": 3, 73 | "slot_index": 0 74 | } 75 | ], 76 | "properties": { 77 | "Node name for S&R": "CLIPTextEncode" 78 | }, 79 | "widgets_values": [ 80 | "white cupcakes moving on the table" 81 | ] 82 | }, 83 | { 84 | "id": 6, 85 | "type": "ControlNetLoader", 86 | "pos": [ 87 | 1210, 88 | 160 89 | ], 90 | "size": { 91 | "0": 315, 92 | "1": 58 93 | }, 94 | "flags": {}, 95 | "order": 0, 96 | "mode": 0, 97 | "outputs": [ 98 | { 99 | "name": "CONTROL_NET", 100 | "type": "CONTROL_NET", 101 | "links": [ 102 | 6, 103 | 11 104 | ], 105 | "shape": 3 106 | } 107 | ], 108 | "properties": { 109 | "Node name for S&R": "ControlNetLoader" 110 | }, 111 | "widgets_values": [ 112 | "control_sd15_depth.safetensors" 113 | ] 114 | }, 115 | { 116 | "id": 2, 117 | "type": "CheckpointLoaderSimple", 118 | "pos": [ 119 | 100, 120 | 330 121 | ], 122 | "size": { 123 | "0": 315, 124 | "1": 98 125 | }, 126 | "flags": {}, 127 | "order": 1, 128 | "mode": 0, 129 | "outputs": [ 130 | { 131 | "name": "MODEL", 132 | "type": "MODEL", 133 | "links": [ 134 | 1, 135 | 20 136 | ], 137 | "shape": 3, 138 | "slot_index": 0 139 | }, 140 | { 141 | "name": "CLIP", 142 | "type": "CLIP", 143 | "links": [ 144 | 2, 145 | 3, 146 | 13 147 | ], 148 | "shape": 3, 149 | "slot_index": 1 150 | }, 151 | { 152 | "name": "VAE", 153 | "type": "VAE", 154 | "links": [ 155 | 17, 156 | 26 157 | ], 158 | "shape": 3 159 | } 160 | ], 161 | "properties": { 162 | "Node name for S&R": "CheckpointLoaderSimple" 163 | }, 164 | "widgets_values": [ 165 | "v1-5-pruned-emaonly.safetensors" 166 | ] 167 | }, 168 | { 169 | "id": 9, 170 | "type": "Zoe-DepthMapPreprocessor", 171 | "pos": [ 172 | 700, 173 | 690 174 | ], 175 | "size": { 176 | "0": 315, 177 | "1": 58 178 | }, 179 | "flags": {}, 180 | "order": 6, 181 | "mode": 0, 182 | "inputs": [ 183 | { 184 | "name": "image", 185 | "type": "IMAGE", 186 | "link": 7, 187 | "slot_index": 0 188 | } 189 | ], 190 | "outputs": [ 191 | { 192 | "name": "IMAGE", 193 | "type": "IMAGE", 194 | "links": [ 195 | 8, 196 | 12 197 | ], 198 | "shape": 3, 199 | "slot_index": 0 200 | } 201 | ], 202 | "properties": { 203 | "Node name for S&R": "Zoe-DepthMapPreprocessor" 204 | }, 205 | "widgets_values": [ 206 | 320 207 | ] 208 | }, 209 | { 210 | "id": 11, 211 | "type": "CLIPTextEncode", 212 | "pos": [ 213 | 760, 214 | 850 215 | ], 216 | "size": [ 217 | 210, 218 | 76.00003051757812 219 | ], 220 | "flags": {}, 221 | "order": 5, 222 | "mode": 0, 223 | "inputs": [ 224 | { 225 | "name": "clip", 226 | "type": "CLIP", 227 | "link": 13 228 | } 229 | ], 230 | "outputs": [ 231 | { 232 | "name": "CONDITIONING", 233 | "type": "CONDITIONING", 234 | "links": [ 235 | 14, 236 | 15 237 | ], 238 | "shape": 3, 239 | "slot_index": 0 240 | } 241 | ], 242 | "properties": { 243 | "Node name for S&R": "CLIPTextEncode" 244 | }, 245 | "widgets_values": [ 246 | "" 247 | ] 248 | }, 249 | { 250 | "id": 12, 251 | "type": "VAEEncode", 252 | "pos": [ 253 | 780, 254 | 1060 255 | ], 256 | "size": { 257 | "0": 210, 258 | "1": 46 259 | }, 260 | "flags": {}, 261 | "order": 7, 262 | "mode": 0, 263 | "inputs": [ 264 | { 265 | "name": "pixels", 266 | "type": "IMAGE", 267 | "link": 16 268 | }, 269 | { 270 | "name": "vae", 271 | "type": "VAE", 272 | "link": 17, 273 | "slot_index": 1 274 | } 275 | ], 276 | "outputs": [ 277 | { 278 | "name": "LATENT", 279 | "type": "LATENT", 280 | "links": [ 281 | 21 282 | ], 283 | "shape": 3 284 | } 285 | ], 286 | "properties": { 287 | "Node name for S&R": "VAEEncode" 288 | } 289 | }, 290 | { 291 | "id": 10, 292 | "type": "ControlNetApplyAdvanced", 293 | "pos": [ 294 | 1170, 295 | 790 296 | ], 297 | "size": { 298 | "0": 315, 299 | "1": 166 300 | }, 301 | "flags": {}, 302 | "order": 9, 303 | "mode": 0, 304 | "inputs": [ 305 | { 306 | "name": "positive", 307 | "type": "CONDITIONING", 308 | "link": 14 309 | }, 310 | { 311 | "name": "negative", 312 | "type": "CONDITIONING", 313 | "link": 15, 314 | "slot_index": 1 315 | }, 316 | { 317 | "name": "control_net", 318 | "type": "CONTROL_NET", 319 | "link": 11, 320 | "slot_index": 2 321 | }, 322 | { 323 | "name": "image", 324 | "type": "IMAGE", 325 | "link": 12 326 | } 327 | ], 328 | "outputs": [ 329 | { 330 | "name": "positive", 331 | "type": "CONDITIONING", 332 | "links": [ 333 | 18 334 | ], 335 | "shape": 3, 336 | "slot_index": 0 337 | }, 338 | { 339 | "name": "negative", 340 | "type": "CONDITIONING", 341 | "links": [ 342 | 19 343 | ], 344 | "shape": 3, 345 | "slot_index": 1 346 | } 347 | ], 348 | "properties": { 349 | "Node name for S&R": "ControlNetApplyAdvanced" 350 | }, 351 | "widgets_values": [ 352 | 0.7, 353 | 0, 354 | 1 355 | ] 356 | }, 357 | { 358 | "id": 5, 359 | "type": "ControlNetApplyAdvanced", 360 | "pos": [ 361 | 1210, 362 | 300 363 | ], 364 | "size": { 365 | "0": 315, 366 | "1": 166 367 | }, 368 | "flags": {}, 369 | "order": 8, 370 | "mode": 0, 371 | "inputs": [ 372 | { 373 | "name": "positive", 374 | "type": "CONDITIONING", 375 | "link": 4 376 | }, 377 | { 378 | "name": "negative", 379 | "type": "CONDITIONING", 380 | "link": 5, 381 | "slot_index": 1 382 | }, 383 | { 384 | "name": "control_net", 385 | "type": "CONTROL_NET", 386 | "link": 6, 387 | "slot_index": 2 388 | }, 389 | { 390 | "name": "image", 391 | "type": "IMAGE", 392 | "link": 8 393 | } 394 | ], 395 | "outputs": [ 396 | { 397 | "name": "positive", 398 | "type": "CONDITIONING", 399 | "links": [ 400 | 24 401 | ], 402 | "shape": 3, 403 | "slot_index": 0 404 | }, 405 | { 406 | "name": "negative", 407 | "type": "CONDITIONING", 408 | "links": [ 409 | 23 410 | ], 411 | "shape": 3 412 | } 413 | ], 414 | "properties": { 415 | "Node name for S&R": "ControlNetApplyAdvanced" 416 | }, 417 | "widgets_values": [ 418 | 0.7, 419 | 0, 420 | 1 421 | ] 422 | }, 423 | { 424 | "id": 8, 425 | "type": "VHS_LoadVideo", 426 | "pos": [ 427 | 80, 428 | 650 429 | ], 430 | "size": [ 431 | 360, 432 | 440 433 | ], 434 | "flags": {}, 435 | "order": 2, 436 | "mode": 0, 437 | "outputs": [ 438 | { 439 | "name": "IMAGE", 440 | "type": "IMAGE", 441 | "links": [ 442 | 7, 443 | 16 444 | ], 445 | "shape": 3, 446 | "slot_index": 0 447 | }, 448 | { 449 | "name": "frame_count", 450 | "type": "INT", 451 | "links": null, 452 | "shape": 3 453 | } 454 | ], 455 | "properties": { 456 | "Node name for S&R": "VHS_LoadVideo" 457 | }, 458 | "widgets_values": { 459 | "video": "exp_input_5.mp4", 460 | "force_rate": 0, 461 | "force_size": "Disabled", 462 | "custom_width": 512, 463 | "custom_height": 512, 464 | "frame_load_cap": 0, 465 | "skip_first_frames": 0, 466 | "select_every_nth": 1, 467 | "choose video to upload": "image", 468 | "videopreview": { 469 | "hidden": false, 470 | "paused": false, 471 | "params": { 472 | "filename": "exp_input_5.mp4", 473 | "type": "input", 474 | "format": "video" 475 | } 476 | } 477 | } 478 | }, 479 | { 480 | "id": 14, 481 | "type": "VAEDecode", 482 | "pos": [ 483 | 2440, 484 | 520 485 | ], 486 | "size": { 487 | "0": 210, 488 | "1": 46 489 | }, 490 | "flags": {}, 491 | "order": 12, 492 | "mode": 0, 493 | "inputs": [ 494 | { 495 | "name": "samples", 496 | "type": "LATENT", 497 | "link": 25 498 | }, 499 | { 500 | "name": "vae", 501 | "type": "VAE", 502 | "link": 26, 503 | "slot_index": 1 504 | } 505 | ], 506 | "outputs": [ 507 | { 508 | "name": "IMAGE", 509 | "type": "IMAGE", 510 | "links": [ 511 | 27 512 | ], 513 | "shape": 3, 514 | "slot_index": 0 515 | } 516 | ], 517 | "properties": { 518 | "Node name for S&R": "VAEDecode" 519 | } 520 | }, 521 | { 522 | "id": 15, 523 | "type": "VHS_VideoCombine", 524 | "pos": [ 525 | 2440, 526 | 620 527 | ], 528 | "size": [ 529 | 550, 530 | 580 531 | ], 532 | "flags": {}, 533 | "order": 13, 534 | "mode": 0, 535 | "inputs": [ 536 | { 537 | "name": "images", 538 | "type": "IMAGE", 539 | "link": 27 540 | } 541 | ], 542 | "outputs": [], 543 | "properties": { 544 | "Node name for S&R": "VHS_VideoCombine" 545 | }, 546 | "widgets_values": { 547 | "frame_rate": 12, 548 | "loop_count": 0, 549 | "filename_prefix": "RAVE", 550 | "format": "video/h264-mp4", 551 | "pingpong": false, 552 | "save_image": true, 553 | "crf": 16, 554 | "save_metadata": true, 555 | "audio_file": "", 556 | "videopreview": { 557 | "hidden": false, 558 | "paused": false, 559 | "params": { 560 | "filename": "RAVE_00003.mp4", 561 | "subfolder": "", 562 | "type": "output", 563 | "format": "video/h264-mp4" 564 | } 565 | } 566 | } 567 | }, 568 | { 569 | "id": 13, 570 | "type": "BNK_Unsampler", 571 | "pos": [ 572 | 1580, 573 | 780 574 | ], 575 | "size": { 576 | "0": 315, 577 | "1": 238 578 | }, 579 | "flags": {}, 580 | "order": 10, 581 | "mode": 0, 582 | "inputs": [ 583 | { 584 | "name": "model", 585 | "type": "MODEL", 586 | "link": 20, 587 | "slot_index": 0 588 | }, 589 | { 590 | "name": "positive", 591 | "type": "CONDITIONING", 592 | "link": 18 593 | }, 594 | { 595 | "name": "negative", 596 | "type": "CONDITIONING", 597 | "link": 19 598 | }, 599 | { 600 | "name": "latent_image", 601 | "type": "LATENT", 602 | "link": 21, 603 | "slot_index": 3 604 | } 605 | ], 606 | "outputs": [ 607 | { 608 | "name": "LATENT", 609 | "type": "LATENT", 610 | "links": [ 611 | 22 612 | ], 613 | "shape": 3, 614 | "slot_index": 0 615 | } 616 | ], 617 | "properties": { 618 | "Node name for S&R": "BNK_Unsampler" 619 | }, 620 | "widgets_values": [ 621 | 25, 622 | 0, 623 | 1, 624 | "dpmpp_2m", 625 | "simple", 626 | "disable" 627 | ] 628 | }, 629 | { 630 | "id": 1, 631 | "type": "KSamplerRAVE", 632 | "pos": [ 633 | 2100, 634 | 520 635 | ], 636 | "size": { 637 | "0": 315, 638 | "1": 358 639 | }, 640 | "flags": {}, 641 | "order": 11, 642 | "mode": 0, 643 | "inputs": [ 644 | { 645 | "name": "model", 646 | "type": "MODEL", 647 | "link": 1 648 | }, 649 | { 650 | "name": "positive", 651 | "type": "CONDITIONING", 652 | "link": 24 653 | }, 654 | { 655 | "name": "negative", 656 | "type": "CONDITIONING", 657 | "link": 23, 658 | "slot_index": 2 659 | }, 660 | { 661 | "name": "latent_image", 662 | "type": "LATENT", 663 | "link": 22 664 | } 665 | ], 666 | "outputs": [ 667 | { 668 | "name": "LATENT", 669 | "type": "LATENT", 670 | "links": [ 671 | 25 672 | ], 673 | "shape": 3, 674 | "slot_index": 0 675 | } 676 | ], 677 | "properties": { 678 | "Node name for S&R": "KSamplerRAVE" 679 | }, 680 | "widgets_values": [ 681 | 3, 682 | false, 683 | 0, 684 | "fixed", 685 | false, 686 | 25, 687 | 7, 688 | "dpmpp_2m", 689 | "simple", 690 | 0, 691 | 10000 692 | ] 693 | } 694 | ], 695 | "links": [ 696 | [ 697 | 1, 698 | 2, 699 | 0, 700 | 1, 701 | 0, 702 | "MODEL" 703 | ], 704 | [ 705 | 2, 706 | 2, 707 | 1, 708 | 3, 709 | 0, 710 | "CLIP" 711 | ], 712 | [ 713 | 3, 714 | 2, 715 | 1, 716 | 4, 717 | 0, 718 | "CLIP" 719 | ], 720 | [ 721 | 4, 722 | 3, 723 | 0, 724 | 5, 725 | 0, 726 | "CONDITIONING" 727 | ], 728 | [ 729 | 5, 730 | 4, 731 | 0, 732 | 5, 733 | 1, 734 | "CONDITIONING" 735 | ], 736 | [ 737 | 6, 738 | 6, 739 | 0, 740 | 5, 741 | 2, 742 | "CONTROL_NET" 743 | ], 744 | [ 745 | 7, 746 | 8, 747 | 0, 748 | 9, 749 | 0, 750 | "IMAGE" 751 | ], 752 | [ 753 | 8, 754 | 9, 755 | 0, 756 | 5, 757 | 3, 758 | "IMAGE" 759 | ], 760 | [ 761 | 11, 762 | 6, 763 | 0, 764 | 10, 765 | 2, 766 | "CONTROL_NET" 767 | ], 768 | [ 769 | 12, 770 | 9, 771 | 0, 772 | 10, 773 | 3, 774 | "IMAGE" 775 | ], 776 | [ 777 | 13, 778 | 2, 779 | 1, 780 | 11, 781 | 0, 782 | "CLIP" 783 | ], 784 | [ 785 | 14, 786 | 11, 787 | 0, 788 | 10, 789 | 0, 790 | "CONDITIONING" 791 | ], 792 | [ 793 | 15, 794 | 11, 795 | 0, 796 | 10, 797 | 1, 798 | "CONDITIONING" 799 | ], 800 | [ 801 | 16, 802 | 8, 803 | 0, 804 | 12, 805 | 0, 806 | "IMAGE" 807 | ], 808 | [ 809 | 17, 810 | 2, 811 | 2, 812 | 12, 813 | 1, 814 | "VAE" 815 | ], 816 | [ 817 | 18, 818 | 10, 819 | 0, 820 | 13, 821 | 1, 822 | "CONDITIONING" 823 | ], 824 | [ 825 | 19, 826 | 10, 827 | 1, 828 | 13, 829 | 2, 830 | "CONDITIONING" 831 | ], 832 | [ 833 | 20, 834 | 2, 835 | 0, 836 | 13, 837 | 0, 838 | "MODEL" 839 | ], 840 | [ 841 | 21, 842 | 12, 843 | 0, 844 | 13, 845 | 3, 846 | "LATENT" 847 | ], 848 | [ 849 | 22, 850 | 13, 851 | 0, 852 | 1, 853 | 3, 854 | "LATENT" 855 | ], 856 | [ 857 | 23, 858 | 5, 859 | 1, 860 | 1, 861 | 2, 862 | "CONDITIONING" 863 | ], 864 | [ 865 | 24, 866 | 5, 867 | 0, 868 | 1, 869 | 1, 870 | "CONDITIONING" 871 | ], 872 | [ 873 | 25, 874 | 1, 875 | 0, 876 | 14, 877 | 0, 878 | "LATENT" 879 | ], 880 | [ 881 | 26, 882 | 2, 883 | 2, 884 | 14, 885 | 1, 886 | "VAE" 887 | ], 888 | [ 889 | 27, 890 | 14, 891 | 0, 892 | 15, 893 | 0, 894 | "IMAGE" 895 | ] 896 | ], 897 | "groups": [], 898 | "config": {}, 899 | "extra": {}, 900 | "version": 0.4 901 | } -------------------------------------------------------------------------------- /workflow/RAVE_basic_workflow_output.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spacepxl/ComfyUI-RAVE/92d19be86cdcec198088757c07873919d3c8e1b1/workflow/RAVE_basic_workflow_output.mp4 --------------------------------------------------------------------------------