├── .gitignore
├── modules
    ├── block_type.py
    ├── unet_wrapper.py
    └── temporal_attn_wrapper.py
├── reference
    ├── ref_mode.py
    ├── ref_config.py
    └── ref_controller.py
├── utils
    ├── ref_utils.py
    ├── wrapper_utils.py
    ├── module_utils.py
    └── noise_utils.py
├── pyproject.toml
├── __init__.py
├── README.md
├── nodes
    ├── ref_apply_node.py
    └── ref_settings_node.py
└── example_workflows
    └── motion_thief_example.json


/.gitignore:
--------------------------------------------------------------------------------
1 | **/*.pyc
2 | .DS_Store


--------------------------------------------------------------------------------
/modules/block_type.py:
--------------------------------------------------------------------------------
1 | import enum
2 | 
3 | class BlockType(enum.Enum):
4 |     OUTPUT = 'OUTPUT'
5 | 


--------------------------------------------------------------------------------
/reference/ref_mode.py:
--------------------------------------------------------------------------------
1 | import enum
2 | 
3 | 
4 | class RefMode(enum.Enum):
5 |     OFF = 'OFF'
6 |     WRITE = 'WRITE'
7 |     READ = 'READ'
8 | 


--------------------------------------------------------------------------------
/utils/ref_utils.py:
--------------------------------------------------------------------------------
 1 | import comfy.model_management
 2 | 
 3 | 
 4 | def prepare_ref_latents(model, ref_latent):
 5 |     base_model = model.model
 6 |     ref_latent = ref_latent['samples'].clone()
 7 |     ref_latent = base_model.process_latent_in(ref_latent)
 8 |     device = comfy.model_management.get_torch_device()
 9 |     ref_latent = ref_latent.to(device)
10 |     return ref_latent
11 | 


--------------------------------------------------------------------------------
/utils/wrapper_utils.py:
--------------------------------------------------------------------------------
1 | from ..modules.unet_wrapper import get_unet_wrapper
2 | from ..reference.ref_controller import RefController
3 | 
4 | def setup_ref_unet(model):
5 |     if not hasattr(model.model.diffusion_model, 'is_ref'):
6 |         ref_controller = RefController(model.model.diffusion_model)
7 |         model.model.diffusion_model.__class__ = get_unet_wrapper(model.model.diffusion_model.__class__, ref_controller)
8 |         return ref_controller
9 |     return None


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "comfyui-motionthiefexperiment"
 3 | description = "experimental node pack to test using reference videos for their motion."
 4 | version = "1.0.0"
 5 | license = "LICENSE"
 6 | 
 7 | [project.urls]
 8 | Repository = "https://github.com/logtd/ComfyUI-MotionThiefExperiment"
 9 | #  Used by Comfy Registry https://comfyregistry.org
10 | 
11 | [tool.comfy]
12 | PublisherId = "logtd"
13 | DisplayName = "ComfyUI-MotionThiefExperiment"
14 | Icon = ""
15 | 


--------------------------------------------------------------------------------
/utils/module_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def isinstance_str(x: object, cls_name: str):
 3 |     for _cls in x.__class__.__mro__:
 4 |         if _cls.__name__ == cls_name:
 5 |             return True
 6 |     
 7 |     return False
 8 | 
 9 | 
10 | def is_temporal_block(module):
11 |     return isinstance_str(module, 'TemporalTransformerBlock')
12 | 
13 | 
14 | def is_named_module_transformer_block(named_module):
15 |     if is_temporal_block(named_module[1]):
16 |         return True
17 |     return False
18 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | from .nodes.ref_apply_node import ApplyRefMotionNode
 2 | from .nodes.ref_settings_node import MotionRefSettingsDefaultNode, MotionRefSettingsCustomNode
 3 | 
 4 | 
 5 | NODE_CLASS_MAPPINGS = {
 6 |     "ApplyRefMotionNode": ApplyRefMotionNode,
 7 |     "MotionRefSettingsDefaultNode": MotionRefSettingsDefaultNode,
 8 |     "MotionRefSettingsCustomNode": MotionRefSettingsCustomNode,
 9 | }
10 | 
11 | NODE_DISPLAY_NAME_MAPPINGS = {
12 |     "ApplyRefMotionNode": "Apply Ref Motion",
13 |     "MotionRefSettingsDefaultNode": "Motion Ref Setting",
14 |     "MotionRefSettingsCustomNode": "Motion Ref Settings (Custom)"
15 | }
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ComfyUI-MotionThiefExperiment
 2 | 
 3 | This is an experimental node pack to test using reference videos for their motion.
 4 | 
 5 | It isn't compatible with a lot of things as this is a hacky implementation for experiments only.
 6 | 
 7 | ## Examples
 8 | See example workflow in `example_workflows` to get started. It uses the basic mechanism but there is also an advanced (Custom) settings.
 9 | 
10 | Reference videos are the first on the left.
11 | 
12 | https://github.com/logtd/ComfyUI-MotionThiefExperiment/assets/160989552/396ddddc-b4c2-4e55-a8c8-516981ad688e
13 | 
14 | 
15 | 
16 | 
17 | https://github.com/logtd/ComfyUI-MotionThiefExperiment/assets/160989552/6ca57165-8517-4d06-bf03-6614e4d971e8
18 | 
19 | 
20 | 
21 | 
22 | https://github.com/logtd/ComfyUI-MotionThiefExperiment/assets/160989552/ed9f728a-989a-4b6e-bf27-1e82f50fdc8a
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | https://github.com/logtd/ComfyUI-MotionThiefExperiment/assets/160989552/654879f7-da81-43b1-b1b4-eaf916b308d1
32 | 
33 | 
34 | 
35 | https://github.com/logtd/ComfyUI-MotionThiefExperiment/assets/160989552/e70b9e8e-51f6-4618-ad5d-926c9f5c2239
36 | 
37 | 
38 | 
39 | https://github.com/logtd/ComfyUI-MotionThiefExperiment/assets/160989552/d30e1924-3a08-4f9d-98b1-d853e835dd3b
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/utils/noise_utils.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | 
 3 | 
 4 | def get_alphacumprod(sigma):
 5 |     return 1 / ((sigma * sigma) + 1)
 6 | 
 7 | 
 8 | def add_noise(src_latent, noise, sigma):
 9 |     alphas_cumprod = get_alphacumprod(sigma)
10 | 
11 |     sqrt_alpha_prod = alphas_cumprod ** 0.5
12 |     sqrt_alpha_prod = sqrt_alpha_prod.flatten()
13 |     while len(sqrt_alpha_prod.shape) < len(src_latent.shape):
14 |         sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
15 | 
16 |     sqrt_one_minus_alpha_prod = (1 - alphas_cumprod) ** 0.5
17 |     sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
18 |     while len(sqrt_one_minus_alpha_prod.shape) < len(src_latent.shape):
19 |         sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
20 | 
21 |     noisy_samples = sqrt_alpha_prod * src_latent + sqrt_one_minus_alpha_prod * noise
22 |     return noisy_samples
23 | 
24 | 
25 | def add_noise_test(latents, sigma, noise=None):
26 |     alpha_cumprod = 1/ ((sigma * sigma) + 1)
27 |     sqrt_alpha_prod = alpha_cumprod ** 0.5
28 |     sqrt_one_minus_alpha_prod = (1 - alpha_cumprod) ** 0.5
29 |     if noise is None:
30 |         generator = torch.Generator(device='cuda')
31 |         # generator.manual_seed(0)
32 |         noise = torch.empty_like(latents).normal_(generator=generator).to(latents.device)
33 | 
34 |     return sqrt_alpha_prod * latents + sqrt_one_minus_alpha_prod * noise


--------------------------------------------------------------------------------
/reference/ref_config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from typing import List
 3 | 
 4 | 
 5 | class RefSetting:
 6 |     input_attentions = []
 7 |     output_attentions = []
 8 |     q_mode = False
 9 |     k_mode = False
10 |     v_mode = False
11 |     normal_mode = False
12 |     normal_fidelity = 0
13 | 
14 |     def __init__(self,
15 |                  input_attentions,
16 |                  output_attentions,
17 |                  q_mode,
18 |                  k_mode,
19 |                  v_mode,
20 |                  normal_mode,
21 |                  normal_fidelity) -> None:
22 |         self.input_attentions = input_attentions
23 |         self.output_attentions = output_attentions
24 |         self.q_mode = q_mode
25 |         self.k_mode = k_mode
26 |         self.v_mode = v_mode
27 |         self.normal_mode = normal_mode
28 |         self.normal_fidelity = normal_fidelity
29 | 
30 | 
31 | class RefConfig:
32 |     settings = []
33 | 
34 |     def __init__(self,
35 |                  ref_latents,
36 |                  positive,
37 |                  negative,
38 |                  sampling,
39 |                  start_percent,
40 |                  end_percent,
41 |                  settings: List[RefSetting]):
42 |         self.ref_latents = ref_latents
43 |         self.positive = positive
44 |         self.negative = negative
45 |         self.sampling = sampling
46 |         self.start_percent = start_percent
47 |         self.end_percent = end_percent
48 |         self.settings = settings
49 | 


--------------------------------------------------------------------------------
/nodes/ref_apply_node.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..modules.unet_wrapper import setup_ref_unet
 4 | from ..modules.temporal_attn_wrapper import wrap_temporal_attentions
 5 | from ..reference.ref_config import RefConfig
 6 | from ..utils.ref_utils import prepare_ref_latents
 7 | 
 8 | 
 9 | class ApplyRefMotionNode:
10 |     @classmethod
11 |     def INPUT_TYPES(s):
12 | 
13 |         return {"required": {
14 |             "model": ("MODEL",),
15 |             "ref_latents": ("LATENT",),
16 |             "enabled": ("BOOLEAN", {"default": True}),
17 |             "positive": ("CONDITIONING",),
18 |             "negative": ("CONDITIONING",),
19 |             "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01, "round": 0.01}),
20 |             "end_percent": ("FLOAT", {"default": 0.70, "min": 0.0, "max": 1.0, "step": 0.01, "round": 0.01}),
21 |             "ref_settings": ("MOTION_REF_SETTINGS",),
22 |         }}
23 |     RETURN_TYPES = ("MODEL",)
24 |     FUNCTION = "apply"
25 | 
26 |     CATEGORY = "reference"
27 | 
28 |     def apply(self,
29 |               model,
30 |               ref_latents,
31 |               enabled,
32 |               positive,
33 |               negative,
34 |               start_percent,
35 |               end_percent,
36 |               ref_settings):
37 |         if not enabled:
38 |             return (model, )
39 | 
40 |         model = model.clone()
41 |         transformer_options = model.model_options.get(
42 |             'transformer_options', {})
43 |         model.model_options['transformer_options'] = transformer_options
44 | 
45 |         ref_latents = prepare_ref_latents(model, ref_latents)
46 |         sampling = model.model.model_sampling
47 | 
48 |         # prompt = torch.cat([negative[0][0]]* 16 +[positive[0][0]]* 16)
49 | 
50 |         ref_config = RefConfig(
51 |             ref_latents,
52 |             positive[0][0],
53 |             negative[0][0],
54 |             sampling,
55 |             start_percent,
56 |             end_percent,
57 |             ref_settings
58 |         )
59 | 
60 |         transformer_options['ref_motion_config'] = ref_config
61 | 
62 |         ref_controller = setup_ref_unet(model)
63 |         if ref_controller is not None:
64 |             model.model_options['ref_controller'] = ref_controller
65 | 
66 |         base_patch_model_fn = model.patch_model
67 | 
68 |         def patch_model(*args, **kwargs):
69 |             rtrn = base_patch_model_fn(*args, **kwargs)
70 |             diffusion_model = model.model.diffusion_model
71 |             wrap_temporal_attentions(diffusion_model)
72 |             return rtrn
73 | 
74 |         # HACK: This is for experiment purposes only
75 |         model.patch_model = patch_model
76 | 
77 |         return (model, )
78 | 


--------------------------------------------------------------------------------
/reference/ref_controller.py:
--------------------------------------------------------------------------------
 1 | from ..utils.module_utils import is_named_module_transformer_block
 2 | from .ref_mode import RefMode
 3 | from .ref_config import RefConfig
 4 | 
 5 | 
 6 | class RefController:
 7 |     model = None
 8 | 
 9 |     def __init__(self, diffusion_model):
10 |         self.model = diffusion_model
11 | 
12 |     def sef_ref_count(self, ref_count):
13 |         model = self.model
14 |         input_modules = list(
15 |             filter(is_named_module_transformer_block, model.input_blocks.named_modules()))
16 |         output_modules = list(
17 |             filter(is_named_module_transformer_block, model.output_blocks.named_modules()))
18 | 
19 |         for _, module in input_modules + output_modules:
20 |             for i in range(len(module.attention_blocks)):
21 |                 attn = module.attention_blocks[i]
22 |                 attn.ref_count = ref_count
23 | 
24 |     def set_motion_mode(self, mode: RefMode, config: RefConfig = None):
25 |         model = self.model
26 |         input_modules = list(
27 |             filter(is_named_module_transformer_block, model.input_blocks.named_modules()))
28 |         output_modules = list(
29 |             filter(is_named_module_transformer_block, model.output_blocks.named_modules()))
30 | 
31 |         for _, module in input_modules + output_modules:
32 |             for i in range(len(module.attention_blocks)):
33 |                 attn = module.attention_blocks[i]
34 |                 attn.ref_off()
35 | 
36 |         if config is None or len(config.settings) == 0:
37 |             return
38 | 
39 |         for setting in config.settings:
40 |             for i, (_, module) in enumerate(input_modules):
41 |                 for attn_idx in range(len(module.attention_blocks)):
42 |                     attn = module.attention_blocks[attn_idx]
43 |                     if setting.input_attentions[i]:
44 |                         attn.ref_mode = mode
45 |                         if setting.q_mode:
46 |                             attn.q_mode = True
47 |                         if setting.k_mode:
48 |                             attn.k_mode = True
49 |                         if setting.v_mode:
50 |                             attn.v_mode = True
51 |                         if setting.normal_mode:
52 |                             attn.normal_mode = True
53 |                             attn.ref_norm_fidelity = setting.normal_fidelity
54 |             for i, (_, module) in enumerate(output_modules):
55 |                 for attn_idx in range(len(module.attention_blocks)):
56 |                     attn = module.attention_blocks[attn_idx]
57 |                     if setting.output_attentions[i]:
58 |                         attn.ref_mode = mode
59 |                         if setting.q_mode:
60 |                             attn.q_mode = True
61 |                         if setting.k_mode:
62 |                             attn.k_mode = True
63 |                         if setting.v_mode:
64 |                             attn.v_mode = True
65 |                         if setting.normal_mode:
66 |                             attn.normal_mode = True
67 |                             attn.ref_norm_fidelity = setting.normal_fidelity
68 | 
69 |     def clear_modules(self):
70 |         model = self.model
71 |         input_modules = list(
72 |             filter(is_named_module_transformer_block, model.input_blocks.named_modules()))
73 |         output_modules = list(
74 |             filter(is_named_module_transformer_block, model.output_blocks.named_modules()))
75 | 
76 |         for _, module in input_modules + output_modules:
77 |             for i in range(len(module.attention_blocks)):
78 |                 attn = module.attention_blocks[i]
79 |                 attn.ref_off()
80 |                 attn.ref_clean()
81 | 


--------------------------------------------------------------------------------
/nodes/ref_settings_node.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from ..reference.ref_config import RefSetting
  3 | 
  4 | 
  5 | class MotionRefSettingsDefaultNode:
  6 |     @classmethod
  7 |     def INPUT_TYPES(s):
  8 |         d = {"required": {
  9 |             "enabled": ("BOOLEAN", {"default": True}),
 10 |             "strength": ("INT", {"default": 5, "min": 0, "max": 12, "step": 1}),
 11 |             "fidelity": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01})
 12 |         }, "optional": {
 13 |             "prev_settings": ("MOTION_REF_SETTINGS", ),
 14 |         }}
 15 | 
 16 |         return d
 17 |     RETURN_TYPES = ("MOTION_REF_SETTINGS",)
 18 |     FUNCTION = "add"
 19 | 
 20 |     CATEGORY = "reference"
 21 | 
 22 |     def add(self,
 23 |             enabled,
 24 |             strength,
 25 |             fidelity,
 26 |             prev_settings=[]):
 27 | 
 28 |         if not enabled:
 29 |             return (prev_settings, )
 30 | 
 31 |         input_attentions = [False, False, False,
 32 |                             False, False, False, False, False]
 33 |         output_attentions = [False, False, False, False, False, False,
 34 |                              False, False, False, False, False, False]
 35 | 
 36 |         for i in range(strength):
 37 |             output_attentions[i] = True
 38 | 
 39 |         ref_setting = RefSetting(
 40 |             input_attentions,
 41 |             output_attentions,
 42 |             False,
 43 |             False,
 44 |             False,
 45 |             True,
 46 |             fidelity
 47 |         )
 48 | 
 49 |         ref_settings = [*prev_settings, ref_setting]
 50 | 
 51 |         return (ref_settings, )
 52 | 
 53 | 
 54 | class MotionRefSettingsCustomNode:
 55 |     @classmethod
 56 |     def INPUT_TYPES(s):
 57 |         inputs = list(range(1, 9))
 58 |         outputs = list(range(1, 13))
 59 | 
 60 |         d = {"required": {
 61 |             "enabled": ("BOOLEAN", {"default": True}),
 62 |             "q_bank": ("BOOLEAN", {"default": False}),
 63 |             "k_bank": ("BOOLEAN", {"default": False}),
 64 |             "v_bank": ("BOOLEAN", {"default": False}),
 65 |             "norm_bank": ("BOOLEAN", {"default": False}),
 66 |             "norm_fidelity": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.01})
 67 |         }, "optional": {
 68 |             "prev_settings": ("MOTION_REF_SETTINGS", ),
 69 |         }}
 70 | 
 71 |         for i in inputs:
 72 |             d['required'][f'input_{i}'] = ("BOOLEAN", {"default": False})
 73 | 
 74 |         for i in outputs:
 75 |             d['required'][f'output_{i}'] = ("BOOLEAN", {"default": False})
 76 | 
 77 |         return d
 78 |     RETURN_TYPES = ("MOTION_REF_SETTINGS",)
 79 |     FUNCTION = "add"
 80 | 
 81 |     CATEGORY = "reference"
 82 | 
 83 |     def add(self,
 84 |             enabled,
 85 |             q_bank,
 86 |             k_bank,
 87 |             v_bank,
 88 |             norm_bank,
 89 |             norm_fidelity,
 90 |             input_1, input_2, input_3, input_4, input_5, input_6, input_7, input_8,
 91 |             output_1, output_2, output_3, output_4, output_5, output_6,
 92 |             output_7, output_8, output_9, output_10, output_11, output_12,
 93 |             prev_settings=[]):
 94 |         if not enabled:
 95 |             return (prev_settings, )
 96 | 
 97 |         input_attentions = [input_1, input_2, input_3,
 98 |                             input_4, input_5, input_6, input_7, input_8]
 99 |         output_attentions = [output_1, output_2, output_3, output_4, output_5, output_6,
100 |                              output_7, output_8, output_9, output_10, output_11, output_12]
101 | 
102 |         ref_setting = RefSetting(
103 |             input_attentions,
104 |             output_attentions,
105 |             q_bank,
106 |             k_bank,
107 |             v_bank,
108 |             norm_bank,
109 |             norm_fidelity
110 |         )
111 | 
112 |         ref_settings = [*prev_settings, ref_setting]
113 | 
114 |         return (ref_settings, )
115 | 


--------------------------------------------------------------------------------
/modules/unet_wrapper.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from ..reference.ref_controller import RefController, RefMode
  4 | from ..reference.ref_config import RefConfig
  5 | from ..utils.noise_utils import add_noise
  6 | 
  7 | 
  8 | def get_unet_wrapper(cls, ref_controller: RefController):
  9 |     class RefUNet(cls):
 10 |         is_ref = True
 11 | 
 12 |         def _ref_motion_forward(self, x, timesteps=None, context=None, y=None, control=None, transformer_options={}, **kwargs):
 13 |             flow_options = transformer_options.get('flow_options', None)
 14 |             ref_config: RefConfig = transformer_options.get(
 15 |                 'ref_motion_config', None)
 16 |             if ref_config is None or len(ref_config.settings) == 0 or (flow_options is not None and flow_options._state == 0):
 17 |                 return RefMode.OFF, None
 18 | 
 19 |             ad_params = transformer_options['ad_params']
 20 |             sub_idxs = ad_params.get('sub_idxs', None)
 21 |             n_conds = len(transformer_options['cond_or_uncond'])
 22 |             n_frames = len(x) // n_conds
 23 |             ref_count = ad_params['full_length'] // 16
 24 |             if sub_idxs is None:
 25 |                 sub_idxs = list(range(n_frames))
 26 |                 ref_count = 1
 27 | 
 28 |             ref_latent = ref_config.ref_latents
 29 |             if ad_params['full_length'] > len(ref_latent):
 30 |                 sub_idxs = list(range(n_frames))
 31 |                 ref_count = 1
 32 | 
 33 |             ref_controller.sef_ref_count(1)
 34 | 
 35 |             if len(ref_latent) < n_frames:
 36 |                 raise ValueError(
 37 |                     "Motion Reference latent must be at least as long as sample latent.")
 38 | 
 39 |             ref_latent = ref_latent[sub_idxs]
 40 |             ref_latent = torch.cat([ref_latent]*n_conds)
 41 |             sigma = ref_config.sampling.sigma(timesteps)
 42 |             start_sigma = ref_config.sampling.percent_to_sigma(
 43 |                 ref_config.start_percent)
 44 |             end_sigma = ref_config.sampling.percent_to_sigma(
 45 |                 ref_config.end_percent)
 46 |             if not (start_sigma >= sigma[0] >= end_sigma):
 47 |                 return RefMode.OFF, ref_config
 48 | 
 49 |             ref_latent_noised = add_noise(ref_latent, torch.randn_like(
 50 |                 ref_latent), sigma[0]).to(x.device).to(x.dtype)
 51 | 
 52 |             transformer_options = transformer_options.copy()
 53 |             if 'flow_options' in transformer_options:
 54 |                 del transformer_options['flow_options']
 55 | 
 56 |             positive = ref_config.positive.to(ref_latent_noised.device).half()
 57 |             negative = ref_config.negative.to(ref_latent_noised.device).half()
 58 | 
 59 |             prompt = torch.cat(([negative] * n_frames) +
 60 |                                ([positive] * n_frames))
 61 | 
 62 |             ref_controller.set_motion_mode(RefMode.WRITE, ref_config)
 63 |             super().forward(ref_latent_noised,
 64 |                             timesteps=torch.cat(
 65 |                                 [timesteps[0].unsqueeze(0)]*(n_conds*n_frames)),
 66 |                             context=prompt,
 67 |                             y=y,
 68 |                             control=None,
 69 |                             transformer_options=transformer_options,
 70 |                             **kwargs)
 71 |             ref_controller.set_motion_mode(RefMode.OFF, ref_config)
 72 | 
 73 |             return RefMode.READ, ref_config
 74 | 
 75 |         def forward(self, x, timesteps=None, context=None, y=None, control=None, transformer_options={}, **kwargs):
 76 |             ref_controller.clear_modules()
 77 |             try:
 78 |                 motion_mode, motion_conifg = self._ref_motion_forward(
 79 |                     x,
 80 |                     timesteps=timesteps,
 81 |                     context=context,
 82 |                     y=y,
 83 |                     control=control,
 84 |                     transformer_options=transformer_options,
 85 |                     **kwargs
 86 |                 )
 87 |                 ref_controller.set_motion_mode(motion_mode, motion_conifg)
 88 |                 output = super().forward(x,
 89 |                                          timesteps=timesteps,
 90 |                                          context=context,
 91 |                                          y=y,
 92 |                                          control=control,
 93 |                                          transformer_options=transformer_options,
 94 |                                          **kwargs)
 95 |                 ref_controller.set_motion_mode(RefMode.OFF)
 96 |                 return output
 97 |             finally:
 98 |                 ref_controller.clear_modules()
 99 | 
100 |     return RefUNet
101 | 
102 | 
103 | def setup_ref_unet(model):
104 |     if not hasattr(model.model.diffusion_model, 'is_ref'):
105 |         ref_controller = RefController(model.model.diffusion_model)
106 |         model.model.diffusion_model.__class__ = get_unet_wrapper(
107 |             model.model.diffusion_model.__class__, ref_controller)
108 |         return ref_controller
109 |     return None
110 | 


--------------------------------------------------------------------------------
/modules/temporal_attn_wrapper.py:
--------------------------------------------------------------------------------
  1 | from einops import rearrange, repeat
  2 | import torch
  3 | 
  4 | from comfy.ldm.modules.attention import attention_basic, attention_pytorch, attention_split, attention_sub_quad
  5 | from comfy import model_management
  6 | from comfy.cli_args import args
  7 | 
  8 | from ..reference.ref_mode import RefMode
  9 | from ..utils.module_utils import is_named_module_transformer_block
 10 | 
 11 | 
 12 | # From ADE
 13 | # until xformers bug is fixed, do not use xformers for VersatileAttention! TODO: change this when fix is out
 14 | # logic for choosing optimized_attention method taken from comfy/ldm/modules/attention.py
 15 | optimized_attention_mm = attention_basic
 16 | if model_management.pytorch_attention_enabled():
 17 |     optimized_attention_mm = attention_pytorch
 18 | else:
 19 |     if args.use_split_cross_attention:
 20 |         optimized_attention_mm = attention_split
 21 |     else:
 22 |         optimized_attention_mm = attention_sub_quad
 23 | 
 24 | 
 25 | def get_attention_wrapper(cls):
 26 |     class TemporalAttentionWrapper(cls):
 27 |         old_class = cls
 28 |         is_ref = True
 29 | 
 30 |         ref_mode = RefMode.OFF
 31 |         q_mode = False
 32 |         k_mode = False
 33 |         v_mode = False
 34 |         normal_mode = False
 35 |         ref_norm_fidelity = 1
 36 | 
 37 |         q_bank = None
 38 |         k_bank = None
 39 |         v_bank = None
 40 |         normal_bank = None
 41 |         ref_count = 1
 42 | 
 43 |         def ref_off(self):
 44 |             self.ref_mode = RefMode.OFF
 45 |             self.q_mode = False
 46 |             self.k_mode = False
 47 |             self.v_mode = False
 48 |             self.normal_mode = False
 49 | 
 50 |         def ref_clean(self):
 51 |             self.q_bank = None
 52 |             self.v_bank = None
 53 |             self.k_bank = None
 54 |             self.normal_mode = None
 55 | 
 56 |         def forward(
 57 |             self,
 58 |             hidden_states,
 59 |             encoder_hidden_states=None,
 60 |             attention_mask=None,
 61 |             video_length=None,
 62 |             scale_mask=None,
 63 |         ):
 64 |             if self.attention_mode != "Temporal":
 65 |                 raise NotImplementedError
 66 | 
 67 |             d = hidden_states.shape[1]
 68 |             b = hidden_states.shape[0] // video_length
 69 |             hidden_states = rearrange(
 70 |                 hidden_states, "(b f) d c -> (b d) f c", f=video_length
 71 |             )
 72 | 
 73 |             if self.pos_encoder is not None:
 74 |                 hidden_states = self.pos_encoder(
 75 |                     hidden_states).to(hidden_states.dtype)
 76 | 
 77 |             encoder_hidden_states = (
 78 |                 repeat(encoder_hidden_states, "b n c -> (b d) n c", d=d)
 79 |                 if encoder_hidden_states is not None
 80 |                 else encoder_hidden_states
 81 |             )
 82 | 
 83 |             if self.ref_mode == RefMode.READ and self.normal_mode and self.ref_norm_fidelity > 0.0:
 84 |                 norm_hidden_states = hidden_states.clone()
 85 | 
 86 |             hidden_states = self.sub_forward(
 87 |                 hidden_states,
 88 |                 encoder_hidden_states,
 89 |                 value=None,
 90 |                 mask=attention_mask,
 91 |                 scale_mask=scale_mask,
 92 |                 uncond=False
 93 |             )  # [8192, 16, 320]
 94 | 
 95 |             if self.ref_mode == RefMode.READ and self.normal_mode and self.ref_norm_fidelity > 0.0:
 96 |                 uc_hidden_states = hidden_states.clone()
 97 |                 # [(b d) f h]
 98 |                 uc_mask = torch.Tensor(
 99 |                     [1] * d
100 |                     + [0] * d
101 |                 ).to(uc_hidden_states.device).bool()
102 | 
103 |                 if encoder_hidden_states is None:
104 |                     encoder_hidden_states = norm_hidden_states
105 | 
106 |                 uc_hidden_states[uc_mask] = self.sub_forward(
107 |                     norm_hidden_states[uc_mask],
108 |                     encoder_hidden_states[uc_mask],
109 |                     value=None,
110 |                     mask=attention_mask,
111 |                     scale_mask=scale_mask,
112 |                     uncond=True
113 |                 )
114 |                 hidden_states = self.ref_norm_fidelity * uc_hidden_states + \
115 |                     (1.0 - self.ref_norm_fidelity) * hidden_states
116 | 
117 |             hidden_states = rearrange(
118 |                 hidden_states, "(b d) f c -> (b f) d c", d=d)
119 | 
120 |             return hidden_states
121 | 
122 |         def sub_forward(self, x, context=None, value=None, mask=None, scale_mask=None, uncond=False):
123 |             context = context if context is not None else x
124 |             value = value if value is not None else context
125 | 
126 |             context_k = context
127 |             value_v = value
128 |             if self.ref_mode == RefMode.WRITE and self.normal_mode and not uncond:
129 |                 self.normal_bank = x
130 |             elif self.ref_mode == RefMode.READ and self.normal_mode and not uncond:
131 |                 context_k = torch.cat([context, self.normal_bank], dim=1)
132 |                 value_v = torch.cat([value, self.normal_bank], dim=1)
133 | 
134 |             q = self.to_q(x)
135 |             k = self.to_k(context_k)
136 |             v = self.to_v(value_v)
137 | 
138 |             if self.ref_mode == RefMode.READ and not uncond:
139 |                 if self.q_mode:
140 |                     q = self.q_bank
141 |                 if self.k_mode:
142 |                     k = self.k_bank
143 |                 if self.v_mode:
144 |                     v = self.v_bank
145 |             elif self.ref_mode == RefMode.WRITE and not uncond:
146 |                 if self.q_mode:
147 |                     self.q_bank = q
148 |                 if self.k_mode:
149 |                     self.k_bank = k
150 |                 if self.v_mode:
151 |                     self.v_bank = v
152 | 
153 |             if self.scale is not None:
154 |                 k *= self.scale
155 |             # apply scale mask, if present
156 |             if scale_mask is not None:
157 |                 k *= scale_mask
158 | 
159 |             attn_output = optimized_attention_mm(q, k, v, self.heads, mask)
160 | 
161 |             return self.to_out(attn_output)
162 | 
163 |     return TemporalAttentionWrapper
164 | 
165 | 
166 | def wrap_temporal_attentions(model):
167 |     # HACK: This is for experiment reasons only
168 |     input_modules = list(
169 |         filter(is_named_module_transformer_block, model.input_blocks.named_modules()))
170 |     middle_modules = list(
171 |         filter(is_named_module_transformer_block, model.middle_block.named_modules()))
172 |     output_modules = list(
173 |         filter(is_named_module_transformer_block, model.output_blocks.named_modules()))
174 | 
175 |     for _, module in input_modules + output_modules + middle_modules:
176 |         for i in range(len(module.attention_blocks)):
177 |             attn = module.attention_blocks[i]
178 |             attn.__class__ = get_attention_wrapper(attn.__class__)
179 | 
180 | 
181 | def unwrap_temporal_attentions(model):
182 |     # HACK: This is for experiment reasons only
183 |     input_modules = list(
184 |         filter(is_named_module_transformer_block, model.input_blocks.named_modules()))
185 |     middle_modules = list(
186 |         filter(is_named_module_transformer_block, model.middle_block.named_modules()))
187 |     output_modules = list(
188 |         filter(is_named_module_transformer_block, model.output_blocks.named_modules()))
189 | 
190 |     for _, module in input_modules + output_modules + middle_modules:
191 |         for i in range(len(module.attention_blocks)):
192 |             attn = module.attention_blocks[i]
193 |             attn.__class__ = attn.old_class
194 | 


--------------------------------------------------------------------------------
/example_workflows/motion_thief_example.json:
--------------------------------------------------------------------------------
   1 | {
   2 |   "last_node_id": 33,
   3 |   "last_link_id": 58,
   4 |   "nodes": [
   5 |     {
   6 |       "id": 17,
   7 |       "type": "Reroute",
   8 |       "pos": [
   9 |         -165,
  10 |         768
  11 |       ],
  12 |       "size": [
  13 |         75,
  14 |         26
  15 |       ],
  16 |       "flags": {},
  17 |       "order": 5,
  18 |       "mode": 0,
  19 |       "inputs": [
  20 |         {
  21 |           "name": "",
  22 |           "type": "*",
  23 |           "link": 20
  24 |         }
  25 |       ],
  26 |       "outputs": [
  27 |         {
  28 |           "name": "",
  29 |           "type": "CLIP",
  30 |           "links": [
  31 |             21,
  32 |             22,
  33 |             23
  34 |           ],
  35 |           "slot_index": 0
  36 |         }
  37 |       ],
  38 |       "properties": {
  39 |         "showOutputText": false,
  40 |         "horizontal": false
  41 |       }
  42 |     },
  43 |     {
  44 |       "id": 10,
  45 |       "type": "VAEEncode",
  46 |       "pos": [
  47 |         290,
  48 |         1025
  49 |       ],
  50 |       "size": {
  51 |         "0": 140,
  52 |         "1": 46
  53 |       },
  54 |       "flags": {},
  55 |       "order": 13,
  56 |       "mode": 0,
  57 |       "inputs": [
  58 |         {
  59 |           "name": "pixels",
  60 |           "type": "IMAGE",
  61 |           "link": 7
  62 |         },
  63 |         {
  64 |           "name": "vae",
  65 |           "type": "VAE",
  66 |           "link": 25
  67 |         }
  68 |       ],
  69 |       "outputs": [
  70 |         {
  71 |           "name": "LATENT",
  72 |           "type": "LATENT",
  73 |           "links": [
  74 |             54
  75 |           ],
  76 |           "shape": 3,
  77 |           "slot_index": 0
  78 |         }
  79 |       ],
  80 |       "properties": {
  81 |         "Node name for S&R": "VAEEncode"
  82 |       }
  83 |     },
  84 |     {
  85 |       "id": 18,
  86 |       "type": "Reroute",
  87 |       "pos": [
  88 |         11,
  89 |         1062
  90 |       ],
  91 |       "size": [
  92 |         75,
  93 |         26
  94 |       ],
  95 |       "flags": {},
  96 |       "order": 6,
  97 |       "mode": 0,
  98 |       "inputs": [
  99 |         {
 100 |           "name": "",
 101 |           "type": "*",
 102 |           "link": 24
 103 |         }
 104 |       ],
 105 |       "outputs": [
 106 |         {
 107 |           "name": "",
 108 |           "type": "VAE",
 109 |           "links": [
 110 |             25,
 111 |             26
 112 |           ],
 113 |           "slot_index": 0
 114 |         }
 115 |       ],
 116 |       "properties": {
 117 |         "showOutputText": false,
 118 |         "horizontal": false
 119 |       }
 120 |     },
 121 |     {
 122 |       "id": 3,
 123 |       "type": "CLIPTextEncode",
 124 |       "pos": [
 125 |         11,
 126 |         899
 127 |       ],
 128 |       "size": {
 129 |         "0": 210,
 130 |         "1": 76
 131 |       },
 132 |       "flags": {},
 133 |       "order": 11,
 134 |       "mode": 0,
 135 |       "inputs": [
 136 |         {
 137 |           "name": "clip",
 138 |           "type": "CLIP",
 139 |           "link": 23
 140 |         }
 141 |       ],
 142 |       "outputs": [
 143 |         {
 144 |           "name": "CONDITIONING",
 145 |           "type": "CONDITIONING",
 146 |           "links": [
 147 |             12,
 148 |             56
 149 |           ],
 150 |           "shape": 3,
 151 |           "slot_index": 0
 152 |         }
 153 |       ],
 154 |       "properties": {
 155 |         "Node name for S&R": "CLIPTextEncode"
 156 |       },
 157 |       "widgets_values": [
 158 |         ""
 159 |       ]
 160 |     },
 161 |     {
 162 |       "id": 7,
 163 |       "type": "ImageScale",
 164 |       "pos": [
 165 |         -39,
 166 |         1182
 167 |       ],
 168 |       "size": {
 169 |         "0": 210,
 170 |         "1": 130
 171 |       },
 172 |       "flags": {},
 173 |       "order": 8,
 174 |       "mode": 0,
 175 |       "inputs": [
 176 |         {
 177 |           "name": "image",
 178 |           "type": "IMAGE",
 179 |           "link": 31
 180 |         }
 181 |       ],
 182 |       "outputs": [
 183 |         {
 184 |           "name": "IMAGE",
 185 |           "type": "IMAGE",
 186 |           "links": [
 187 |             7,
 188 |             42
 189 |           ],
 190 |           "shape": 3,
 191 |           "slot_index": 0
 192 |         }
 193 |       ],
 194 |       "properties": {
 195 |         "Node name for S&R": "ImageScale"
 196 |       },
 197 |       "widgets_values": [
 198 |         "nearest-exact",
 199 |         512,
 200 |         512,
 201 |         "center"
 202 |       ]
 203 |     },
 204 |     {
 205 |       "id": 25,
 206 |       "type": "VHS_VideoCombine",
 207 |       "pos": [
 208 |         1135,
 209 |         1018
 210 |       ],
 211 |       "size": [
 212 |         320,
 213 |         604
 214 |       ],
 215 |       "flags": {},
 216 |       "order": 18,
 217 |       "mode": 0,
 218 |       "inputs": [
 219 |         {
 220 |           "name": "images",
 221 |           "type": "IMAGE",
 222 |           "link": 41
 223 |         },
 224 |         {
 225 |           "name": "audio",
 226 |           "type": "VHS_AUDIO",
 227 |           "link": null
 228 |         },
 229 |         {
 230 |           "name": "batch_manager",
 231 |           "type": "VHS_BatchManager",
 232 |           "link": null
 233 |         }
 234 |       ],
 235 |       "outputs": [
 236 |         {
 237 |           "name": "Filenames",
 238 |           "type": "VHS_FILENAMES",
 239 |           "links": null,
 240 |           "shape": 3
 241 |         }
 242 |       ],
 243 |       "properties": {
 244 |         "Node name for S&R": "VHS_VideoCombine"
 245 |       },
 246 |       "widgets_values": {
 247 |         "frame_rate": 8,
 248 |         "loop_count": 0,
 249 |         "filename_prefix": "AnimateDiff",
 250 |         "format": "video/h264-mp4",
 251 |         "pix_fmt": "yuv420p",
 252 |         "crf": 19,
 253 |         "save_metadata": true,
 254 |         "pingpong": false,
 255 |         "save_output": true,
 256 |         "videopreview": {
 257 |           "hidden": false,
 258 |           "paused": false,
 259 |           "params": {
 260 |             "filename": "AnimateDiff_00378.mp4",
 261 |             "subfolder": "",
 262 |             "type": "output",
 263 |             "format": "video/h264-mp4"
 264 |           }
 265 |         }
 266 |       }
 267 |     },
 268 |     {
 269 |       "id": 26,
 270 |       "type": "VHS_VideoCombine",
 271 |       "pos": [
 272 |         762,
 273 |         1017
 274 |       ],
 275 |       "size": [
 276 |         320,
 277 |         604
 278 |       ],
 279 |       "flags": {},
 280 |       "order": 14,
 281 |       "mode": 0,
 282 |       "inputs": [
 283 |         {
 284 |           "name": "images",
 285 |           "type": "IMAGE",
 286 |           "link": 42
 287 |         },
 288 |         {
 289 |           "name": "audio",
 290 |           "type": "VHS_AUDIO",
 291 |           "link": null
 292 |         },
 293 |         {
 294 |           "name": "batch_manager",
 295 |           "type": "VHS_BatchManager",
 296 |           "link": null
 297 |         }
 298 |       ],
 299 |       "outputs": [
 300 |         {
 301 |           "name": "Filenames",
 302 |           "type": "VHS_FILENAMES",
 303 |           "links": null,
 304 |           "shape": 3
 305 |         }
 306 |       ],
 307 |       "properties": {
 308 |         "Node name for S&R": "VHS_VideoCombine"
 309 |       },
 310 |       "widgets_values": {
 311 |         "frame_rate": 8,
 312 |         "loop_count": 0,
 313 |         "filename_prefix": "AnimateDiff",
 314 |         "format": "video/h264-mp4",
 315 |         "pix_fmt": "yuv420p",
 316 |         "crf": 19,
 317 |         "save_metadata": true,
 318 |         "pingpong": false,
 319 |         "save_output": true,
 320 |         "videopreview": {
 321 |           "hidden": false,
 322 |           "paused": false,
 323 |           "params": {
 324 |             "filename": "AnimateDiff_00377.mp4",
 325 |             "subfolder": "",
 326 |             "type": "output",
 327 |             "format": "video/h264-mp4"
 328 |           }
 329 |         }
 330 |       }
 331 |     },
 332 |     {
 333 |       "id": 19,
 334 |       "type": "ADE_UseEvolvedSampling",
 335 |       "pos": [
 336 |         -105,
 337 |         401
 338 |       ],
 339 |       "size": {
 340 |         "0": 235.1999969482422,
 341 |         "1": 118
 342 |       },
 343 |       "flags": {},
 344 |       "order": 12,
 345 |       "mode": 0,
 346 |       "inputs": [
 347 |         {
 348 |           "name": "model",
 349 |           "type": "MODEL",
 350 |           "link": 27
 351 |         },
 352 |         {
 353 |           "name": "m_models",
 354 |           "type": "M_MODELS",
 355 |           "link": 30
 356 |         },
 357 |         {
 358 |           "name": "context_options",
 359 |           "type": "CONTEXT_OPTIONS",
 360 |           "link": null
 361 |         },
 362 |         {
 363 |           "name": "sample_settings",
 364 |           "type": "SAMPLE_SETTINGS",
 365 |           "link": null
 366 |         }
 367 |       ],
 368 |       "outputs": [
 369 |         {
 370 |           "name": "MODEL",
 371 |           "type": "MODEL",
 372 |           "links": [
 373 |             51
 374 |           ],
 375 |           "shape": 3,
 376 |           "slot_index": 0
 377 |         }
 378 |       ],
 379 |       "properties": {
 380 |         "Node name for S&R": "ADE_UseEvolvedSampling"
 381 |       },
 382 |       "widgets_values": [
 383 |         "autoselect"
 384 |       ]
 385 |     },
 386 |     {
 387 |       "id": 1,
 388 |       "type": "CheckpointLoaderSimple",
 389 |       "pos": [
 390 |         -606,
 391 |         741
 392 |       ],
 393 |       "size": {
 394 |         "0": 315,
 395 |         "1": 98
 396 |       },
 397 |       "flags": {},
 398 |       "order": 0,
 399 |       "mode": 0,
 400 |       "outputs": [
 401 |         {
 402 |           "name": "MODEL",
 403 |           "type": "MODEL",
 404 |           "links": [
 405 |             27
 406 |           ],
 407 |           "shape": 3,
 408 |           "slot_index": 0
 409 |         },
 410 |         {
 411 |           "name": "CLIP",
 412 |           "type": "CLIP",
 413 |           "links": [
 414 |             20
 415 |           ],
 416 |           "shape": 3,
 417 |           "slot_index": 1
 418 |         },
 419 |         {
 420 |           "name": "VAE",
 421 |           "type": "VAE",
 422 |           "links": [
 423 |             24
 424 |           ],
 425 |           "shape": 3,
 426 |           "slot_index": 2
 427 |         }
 428 |       ],
 429 |       "properties": {
 430 |         "Node name for S&R": "CheckpointLoaderSimple"
 431 |       },
 432 |       "widgets_values": [
 433 |         "photon_v1.safetensors"
 434 |       ]
 435 |     },
 436 |     {
 437 |       "id": 31,
 438 |       "type": "ApplyRefMotionNode",
 439 |       "pos": [
 440 |         328,
 441 |         388
 442 |       ],
 443 |       "size": {
 444 |         "0": 210,
 445 |         "1": 186
 446 |       },
 447 |       "flags": {},
 448 |       "order": 15,
 449 |       "mode": 0,
 450 |       "inputs": [
 451 |         {
 452 |           "name": "model",
 453 |           "type": "MODEL",
 454 |           "link": 51
 455 |         },
 456 |         {
 457 |           "name": "ref_latents",
 458 |           "type": "LATENT",
 459 |           "link": 54
 460 |         },
 461 |         {
 462 |           "name": "positive",
 463 |           "type": "CONDITIONING",
 464 |           "link": 55
 465 |         },
 466 |         {
 467 |           "name": "negative",
 468 |           "type": "CONDITIONING",
 469 |           "link": 56
 470 |         },
 471 |         {
 472 |           "name": "ref_settings",
 473 |           "type": "MOTION_REF_SETTINGS",
 474 |           "link": 58
 475 |         }
 476 |       ],
 477 |       "outputs": [
 478 |         {
 479 |           "name": "MODEL",
 480 |           "type": "MODEL",
 481 |           "links": [
 482 |             57
 483 |           ],
 484 |           "shape": 3,
 485 |           "slot_index": 0
 486 |         }
 487 |       ],
 488 |       "properties": {
 489 |         "Node name for S&R": "ApplyRefMotionNode"
 490 |       },
 491 |       "widgets_values": [
 492 |         true,
 493 |         0,
 494 |         1
 495 |       ]
 496 |     },
 497 |     {
 498 |       "id": 6,
 499 |       "type": "EmptyLatentImage",
 500 |       "pos": [
 501 |         270,
 502 |         1230
 503 |       ],
 504 |       "size": {
 505 |         "0": 210,
 506 |         "1": 106
 507 |       },
 508 |       "flags": {},
 509 |       "order": 1,
 510 |       "mode": 0,
 511 |       "outputs": [
 512 |         {
 513 |           "name": "LATENT",
 514 |           "type": "LATENT",
 515 |           "links": [
 516 |             13
 517 |           ],
 518 |           "shape": 3,
 519 |           "slot_index": 0
 520 |         }
 521 |       ],
 522 |       "properties": {
 523 |         "Node name for S&R": "EmptyLatentImage"
 524 |       },
 525 |       "widgets_values": [
 526 |         512,
 527 |         512,
 528 |         16
 529 |       ]
 530 |     },
 531 |     {
 532 |       "id": 11,
 533 |       "type": "KSampler",
 534 |       "pos": [
 535 |         600,
 536 |         565
 537 |       ],
 538 |       "size": {
 539 |         "0": 210,
 540 |         "1": 263.1576232910156
 541 |       },
 542 |       "flags": {},
 543 |       "order": 16,
 544 |       "mode": 0,
 545 |       "inputs": [
 546 |         {
 547 |           "name": "model",
 548 |           "type": "MODEL",
 549 |           "link": 57
 550 |         },
 551 |         {
 552 |           "name": "positive",
 553 |           "type": "CONDITIONING",
 554 |           "link": 11
 555 |         },
 556 |         {
 557 |           "name": "negative",
 558 |           "type": "CONDITIONING",
 559 |           "link": 12
 560 |         },
 561 |         {
 562 |           "name": "latent_image",
 563 |           "type": "LATENT",
 564 |           "link": 13
 565 |         }
 566 |       ],
 567 |       "outputs": [
 568 |         {
 569 |           "name": "LATENT",
 570 |           "type": "LATENT",
 571 |           "links": [
 572 |             15
 573 |           ],
 574 |           "shape": 3,
 575 |           "slot_index": 0
 576 |         }
 577 |       ],
 578 |       "properties": {
 579 |         "Node name for S&R": "KSampler"
 580 |       },
 581 |       "widgets_values": [
 582 |         3,
 583 |         "fixed",
 584 |         20,
 585 |         7,
 586 |         "euler_ancestral",
 587 |         "normal",
 588 |         1
 589 |       ]
 590 |     },
 591 |     {
 592 |       "id": 12,
 593 |       "type": "VAEDecode",
 594 |       "pos": [
 595 |         866,
 596 |         916
 597 |       ],
 598 |       "size": {
 599 |         "0": 140,
 600 |         "1": 46
 601 |       },
 602 |       "flags": {},
 603 |       "order": 17,
 604 |       "mode": 0,
 605 |       "inputs": [
 606 |         {
 607 |           "name": "samples",
 608 |           "type": "LATENT",
 609 |           "link": 15
 610 |         },
 611 |         {
 612 |           "name": "vae",
 613 |           "type": "VAE",
 614 |           "link": 26
 615 |         }
 616 |       ],
 617 |       "outputs": [
 618 |         {
 619 |           "name": "IMAGE",
 620 |           "type": "IMAGE",
 621 |           "links": [
 622 |             41
 623 |           ],
 624 |           "shape": 3,
 625 |           "slot_index": 0
 626 |         }
 627 |       ],
 628 |       "properties": {
 629 |         "Node name for S&R": "VAEDecode"
 630 |       }
 631 |     },
 632 |     {
 633 |       "id": 21,
 634 |       "type": "ADE_ApplyAnimateDiffModelSimple",
 635 |       "pos": [
 636 |         -465,
 637 |         429
 638 |       ],
 639 |       "size": {
 640 |         "0": 260.3999938964844,
 641 |         "1": 106
 642 |       },
 643 |       "flags": {},
 644 |       "order": 7,
 645 |       "mode": 0,
 646 |       "inputs": [
 647 |         {
 648 |           "name": "motion_model",
 649 |           "type": "MOTION_MODEL_ADE",
 650 |           "link": 29
 651 |         },
 652 |         {
 653 |           "name": "motion_lora",
 654 |           "type": "MOTION_LORA",
 655 |           "link": null
 656 |         },
 657 |         {
 658 |           "name": "scale_multival",
 659 |           "type": "MULTIVAL",
 660 |           "link": null
 661 |         },
 662 |         {
 663 |           "name": "effect_multival",
 664 |           "type": "MULTIVAL",
 665 |           "link": null
 666 |         },
 667 |         {
 668 |           "name": "ad_keyframes",
 669 |           "type": "AD_KEYFRAMES",
 670 |           "link": null
 671 |         }
 672 |       ],
 673 |       "outputs": [
 674 |         {
 675 |           "name": "M_MODELS",
 676 |           "type": "M_MODELS",
 677 |           "links": [
 678 |             30
 679 |           ],
 680 |           "shape": 3,
 681 |           "slot_index": 0
 682 |         }
 683 |       ],
 684 |       "properties": {
 685 |         "Node name for S&R": "ADE_ApplyAnimateDiffModelSimple"
 686 |       }
 687 |     },
 688 |     {
 689 |       "id": 20,
 690 |       "type": "ADE_LoadAnimateDiffModel",
 691 |       "pos": [
 692 |         -829,
 693 |         433
 694 |       ],
 695 |       "size": {
 696 |         "0": 252,
 697 |         "1": 58
 698 |       },
 699 |       "flags": {},
 700 |       "order": 2,
 701 |       "mode": 0,
 702 |       "inputs": [
 703 |         {
 704 |           "name": "ad_settings",
 705 |           "type": "AD_SETTINGS",
 706 |           "link": null
 707 |         }
 708 |       ],
 709 |       "outputs": [
 710 |         {
 711 |           "name": "MOTION_MODEL",
 712 |           "type": "MOTION_MODEL_ADE",
 713 |           "links": [
 714 |             29
 715 |           ],
 716 |           "shape": 3,
 717 |           "slot_index": 0
 718 |         }
 719 |       ],
 720 |       "properties": {
 721 |         "Node name for S&R": "ADE_LoadAnimateDiffModel"
 722 |       },
 723 |       "widgets_values": [
 724 |         "v3_sd15_mm.ckpt"
 725 |       ]
 726 |     },
 727 |     {
 728 |       "id": 22,
 729 |       "type": "VHS_LoadVideo",
 730 |       "pos": [
 731 |         -365,
 732 |         1179
 733 |       ],
 734 |       "size": [
 735 |         240,
 736 |         378.57142857142856
 737 |       ],
 738 |       "flags": {},
 739 |       "order": 3,
 740 |       "mode": 0,
 741 |       "inputs": [
 742 |         {
 743 |           "name": "batch_manager",
 744 |           "type": "VHS_BatchManager",
 745 |           "link": null
 746 |         }
 747 |       ],
 748 |       "outputs": [
 749 |         {
 750 |           "name": "IMAGE",
 751 |           "type": "IMAGE",
 752 |           "links": [
 753 |             31
 754 |           ],
 755 |           "shape": 3,
 756 |           "slot_index": 0
 757 |         },
 758 |         {
 759 |           "name": "frame_count",
 760 |           "type": "INT",
 761 |           "links": null,
 762 |           "shape": 3
 763 |         },
 764 |         {
 765 |           "name": "audio",
 766 |           "type": "VHS_AUDIO",
 767 |           "links": null,
 768 |           "shape": 3
 769 |         }
 770 |       ],
 771 |       "properties": {
 772 |         "Node name for S&R": "VHS_LoadVideo"
 773 |       },
 774 |       "widgets_values": {
 775 |         "video": "lr_up1_FILM_00019.mp4",
 776 |         "force_rate": 0,
 777 |         "force_size": "Disabled",
 778 |         "custom_width": 512,
 779 |         "custom_height": 512,
 780 |         "frame_load_cap": 16,
 781 |         "skip_first_frames": 30,
 782 |         "select_every_nth": 4,
 783 |         "choose video to upload": "image",
 784 |         "videopreview": {
 785 |           "hidden": false,
 786 |           "paused": false,
 787 |           "params": {
 788 |             "frame_load_cap": 16,
 789 |             "skip_first_frames": 30,
 790 |             "force_rate": 0,
 791 |             "filename": "lr_up1_FILM_00019.mp4",
 792 |             "type": "input",
 793 |             "format": "video/mp4",
 794 |             "select_every_nth": 4
 795 |           }
 796 |         }
 797 |       }
 798 |     },
 799 |     {
 800 |       "id": 15,
 801 |       "type": "CLIPTextEncode",
 802 |       "pos": [
 803 |         1,
 804 |         770
 805 |       ],
 806 |       "size": {
 807 |         "0": 245.16639709472656,
 808 |         "1": 76
 809 |       },
 810 |       "flags": {},
 811 |       "order": 10,
 812 |       "mode": 0,
 813 |       "inputs": [
 814 |         {
 815 |           "name": "clip",
 816 |           "type": "CLIP",
 817 |           "link": 22
 818 |         }
 819 |       ],
 820 |       "outputs": [
 821 |         {
 822 |           "name": "CONDITIONING",
 823 |           "type": "CONDITIONING",
 824 |           "links": [
 825 |             55
 826 |           ],
 827 |           "shape": 3,
 828 |           "slot_index": 0
 829 |         }
 830 |       ],
 831 |       "properties": {
 832 |         "Node name for S&R": "CLIPTextEncode"
 833 |       },
 834 |       "widgets_values": [
 835 |         "explosions"
 836 |       ]
 837 |     },
 838 |     {
 839 |       "id": 33,
 840 |       "type": "MotionRefSettingsDefaultNode",
 841 |       "pos": [
 842 |         -53,
 843 |         225
 844 |       ],
 845 |       "size": [
 846 |         278.79998779296875,
 847 |         58
 848 |       ],
 849 |       "flags": {},
 850 |       "order": 4,
 851 |       "mode": 0,
 852 |       "inputs": [
 853 |         {
 854 |           "name": "prev_settings",
 855 |           "type": "MOTION_REF_SETTINGS",
 856 |           "link": null
 857 |         }
 858 |       ],
 859 |       "outputs": [
 860 |         {
 861 |           "name": "MOTION_REF_SETTINGS",
 862 |           "type": "MOTION_REF_SETTINGS",
 863 |           "links": [
 864 |             58
 865 |           ],
 866 |           "shape": 3,
 867 |           "slot_index": 0
 868 |         }
 869 |       ],
 870 |       "properties": {
 871 |         "Node name for S&R": "MotionRefSettingsDefaultNode"
 872 |       },
 873 |       "widgets_values": [
 874 |         true
 875 |       ]
 876 |     },
 877 |     {
 878 |       "id": 2,
 879 |       "type": "CLIPTextEncode",
 880 |       "pos": [
 881 |         3,
 882 |         628
 883 |       ],
 884 |       "size": {
 885 |         "0": 251.83071899414062,
 886 |         "1": 76
 887 |       },
 888 |       "flags": {},
 889 |       "order": 9,
 890 |       "mode": 0,
 891 |       "inputs": [
 892 |         {
 893 |           "name": "clip",
 894 |           "type": "CLIP",
 895 |           "link": 21
 896 |         }
 897 |       ],
 898 |       "outputs": [
 899 |         {
 900 |           "name": "CONDITIONING",
 901 |           "type": "CONDITIONING",
 902 |           "links": [
 903 |             11
 904 |           ],
 905 |           "shape": 3,
 906 |           "slot_index": 0
 907 |         }
 908 |       ],
 909 |       "properties": {
 910 |         "Node name for S&R": "CLIPTextEncode"
 911 |       },
 912 |       "widgets_values": [
 913 |         "puddle"
 914 |       ]
 915 |     }
 916 |   ],
 917 |   "links": [
 918 |     [
 919 |       7,
 920 |       7,
 921 |       0,
 922 |       10,
 923 |       0,
 924 |       "IMAGE"
 925 |     ],
 926 |     [
 927 |       11,
 928 |       2,
 929 |       0,
 930 |       11,
 931 |       1,
 932 |       "CONDITIONING"
 933 |     ],
 934 |     [
 935 |       12,
 936 |       3,
 937 |       0,
 938 |       11,
 939 |       2,
 940 |       "CONDITIONING"
 941 |     ],
 942 |     [
 943 |       13,
 944 |       6,
 945 |       0,
 946 |       11,
 947 |       3,
 948 |       "LATENT"
 949 |     ],
 950 |     [
 951 |       15,
 952 |       11,
 953 |       0,
 954 |       12,
 955 |       0,
 956 |       "LATENT"
 957 |     ],
 958 |     [
 959 |       20,
 960 |       1,
 961 |       1,
 962 |       17,
 963 |       0,
 964 |       "*"
 965 |     ],
 966 |     [
 967 |       21,
 968 |       17,
 969 |       0,
 970 |       2,
 971 |       0,
 972 |       "CLIP"
 973 |     ],
 974 |     [
 975 |       22,
 976 |       17,
 977 |       0,
 978 |       15,
 979 |       0,
 980 |       "CLIP"
 981 |     ],
 982 |     [
 983 |       23,
 984 |       17,
 985 |       0,
 986 |       3,
 987 |       0,
 988 |       "CLIP"
 989 |     ],
 990 |     [
 991 |       24,
 992 |       1,
 993 |       2,
 994 |       18,
 995 |       0,
 996 |       "*"
 997 |     ],
 998 |     [
 999 |       25,
1000 |       18,
1001 |       0,
1002 |       10,
1003 |       1,
1004 |       "VAE"
1005 |     ],
1006 |     [
1007 |       26,
1008 |       18,
1009 |       0,
1010 |       12,
1011 |       1,
1012 |       "VAE"
1013 |     ],
1014 |     [
1015 |       27,
1016 |       1,
1017 |       0,
1018 |       19,
1019 |       0,
1020 |       "MODEL"
1021 |     ],
1022 |     [
1023 |       29,
1024 |       20,
1025 |       0,
1026 |       21,
1027 |       0,
1028 |       "MOTION_MODEL_ADE"
1029 |     ],
1030 |     [
1031 |       30,
1032 |       21,
1033 |       0,
1034 |       19,
1035 |       1,
1036 |       "M_MODELS"
1037 |     ],
1038 |     [
1039 |       31,
1040 |       22,
1041 |       0,
1042 |       7,
1043 |       0,
1044 |       "IMAGE"
1045 |     ],
1046 |     [
1047 |       41,
1048 |       12,
1049 |       0,
1050 |       25,
1051 |       0,
1052 |       "IMAGE"
1053 |     ],
1054 |     [
1055 |       42,
1056 |       7,
1057 |       0,
1058 |       26,
1059 |       0,
1060 |       "IMAGE"
1061 |     ],
1062 |     [
1063 |       51,
1064 |       19,
1065 |       0,
1066 |       31,
1067 |       0,
1068 |       "MODEL"
1069 |     ],
1070 |     [
1071 |       54,
1072 |       10,
1073 |       0,
1074 |       31,
1075 |       1,
1076 |       "LATENT"
1077 |     ],
1078 |     [
1079 |       55,
1080 |       15,
1081 |       0,
1082 |       31,
1083 |       2,
1084 |       "CONDITIONING"
1085 |     ],
1086 |     [
1087 |       56,
1088 |       3,
1089 |       0,
1090 |       31,
1091 |       3,
1092 |       "CONDITIONING"
1093 |     ],
1094 |     [
1095 |       57,
1096 |       31,
1097 |       0,
1098 |       11,
1099 |       0,
1100 |       "MODEL"
1101 |     ],
1102 |     [
1103 |       58,
1104 |       33,
1105 |       0,
1106 |       31,
1107 |       4,
1108 |       "MOTION_REF_SETTINGS"
1109 |     ]
1110 |   ],
1111 |   "groups": [],
1112 |   "config": {},
1113 |   "extra": {},
1114 |   "version": 0.4
1115 | }


--------------------------------------------------------------------------------