├── auto_Screenshot 2022-10-22 113941.png
├── README.md
└── cycle.py


/auto_Screenshot 2022-10-22 113941.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nagolinc/auto_cycleDiffusion/HEAD/auto_Screenshot 2022-10-22 113941.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # auto_cycleDiffusion
 2 | this is a port of https://github.com/ChenWu98/cycle-diffusion to run on https://github.com/AUTOMATIC1111/stable-diffusion-webui
 3 | 
 4 | To use, just copy cycle.py to your `{automatic}/scripts` folder
 5 | 
 6 | 
 7 | ![screenshot](https://raw.githubusercontent.com/nagolinc/auto_cycleDiffusion/main/auto_Screenshot%202022-10-22%20113941.png)
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/cycle.py:
--------------------------------------------------------------------------------
   1 | # scripts imports
   2 | #from ldm.models.diffusion.ddim import DDIMSampler
   3 | from contextlib import contextmanager, nullcontext
   4 | from torch import autocast
   5 | import torch.nn.functional as F
   6 | import torchvision.transforms as transforms
   7 | import torch
   8 | import numpy as np
   9 | from omegaconf import OmegaConf
  10 | import glob
  11 | import modules.scripts as scripts
  12 | import gradio as gr
  13 | import os
  14 | 
  15 | from modules import images
  16 | from modules.processing import process_images, Processed
  17 | from modules.processing import Processed
  18 | from modules.shared import opts, cmd_opts, state,sd_model
  19 | 
  20 | 
  21 | #his ddimSampler is different
  22 | 
  23 | # Created by Chen Henry Wu
  24 | """SAMPLING ONLY."""
  25 | 
  26 | import torch
  27 | import numpy as np
  28 | from tqdm import tqdm
  29 | from functools import partial
  30 | 
  31 | from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, \
  32 |     extract_into_tensor
  33 | 
  34 | 
  35 | class DDIMSampler(object):
  36 |     def __init__(self, model, schedule="linear", **kwargs):
  37 |         super().__init__()
  38 |         self.model = model
  39 |         self.ddpm_num_timesteps = model.num_timesteps
  40 |         self.schedule = schedule
  41 | 
  42 |     def register_buffer(self, name, attr):
  43 |         if type(attr) == torch.Tensor:
  44 |             if attr.device != torch.device("cuda"):
  45 |                 attr = attr.to(torch.device("cuda"))
  46 |         setattr(self, name, attr)
  47 | 
  48 |     def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
  49 |         self.ddim_timesteps = make_ddim_timesteps(ddim_discr_method=ddim_discretize, num_ddim_timesteps=ddim_num_steps,
  50 |                                                   num_ddpm_timesteps=self.ddpm_num_timesteps,verbose=verbose)
  51 |         alphas_cumprod = self.model.alphas_cumprod
  52 |         assert alphas_cumprod.shape[0] == self.ddpm_num_timesteps, 'alphas have to be defined for each timestep'
  53 |         to_torch = lambda x: x.clone().detach().to(torch.float32).to(self.model.device)
  54 | 
  55 |         self.register_buffer('betas', to_torch(self.model.betas))
  56 |         self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
  57 |         self.register_buffer('alphas_cumprod_prev', to_torch(self.model.alphas_cumprod_prev))
  58 | 
  59 |         # calculations for diffusion q(x_t | x_{t-1}) and others
  60 |         self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod.cpu())))
  61 |         self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod.cpu())))
  62 |         self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod.cpu())))
  63 |         self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod.cpu())))
  64 |         self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod.cpu() - 1)))
  65 | 
  66 |         # ddim sampling parameters
  67 |         ddim_sigmas, ddim_alphas, ddim_alphas_prev = make_ddim_sampling_parameters(alphacums=alphas_cumprod.cpu(),
  68 |                                                                                    ddim_timesteps=self.ddim_timesteps,
  69 |                                                                                    eta=ddim_eta,verbose=verbose)
  70 |         self.register_buffer('ddim_sigmas', ddim_sigmas)
  71 |         self.register_buffer('ddim_alphas', ddim_alphas)
  72 |         self.register_buffer('ddim_alphas_prev', ddim_alphas_prev)
  73 |         self.register_buffer('ddim_sqrt_one_minus_alphas', np.sqrt(1. - ddim_alphas))
  74 |         self.register_buffer('ddim_sqrt_one_minus_alphas_prev', np.sqrt(1. - ddim_alphas_prev))
  75 |         sigmas_for_original_sampling_steps = ddim_eta * torch.sqrt(
  76 |             (1 - self.alphas_cumprod_prev) / (1 - self.alphas_cumprod) * (
  77 |                         1 - self.alphas_cumprod / self.alphas_cumprod_prev))
  78 |         self.register_buffer('ddim_sigmas_for_original_num_steps', sigmas_for_original_sampling_steps)
  79 | 
  80 |     def sample(self,
  81 |                S,
  82 |                batch_size,
  83 |                shape,
  84 |                conditioning=None,
  85 |                callback=None,
  86 |                normals_sequence=None,
  87 |                img_callback=None,
  88 |                quantize_x0=False,
  89 |                eta=0.,
  90 |                mask=None,
  91 |                x0=None,
  92 |                temperature=1.,
  93 |                noise_dropout=0.,
  94 |                score_corrector=None,
  95 |                corrector_kwargs=None,
  96 |                verbose=True,
  97 |                x_T=None,
  98 |                log_every_t=100,
  99 |                unconditional_guidance_scale=1.,
 100 |                unconditional_conditioning=None,
 101 |                # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
 102 |                **kwargs
 103 |                ):
 104 |         if conditioning is not None:
 105 |             if isinstance(conditioning, dict):
 106 |                 cbs = conditioning[list(conditioning.keys())[0]].shape[0]
 107 |                 if cbs != batch_size:
 108 |                     print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
 109 |             else:
 110 |                 if conditioning.shape[0] != batch_size:
 111 |                     print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
 112 | 
 113 |         self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose)
 114 |         # sampling
 115 |         C, H, W = shape
 116 |         size = (batch_size, C, H, W)
 117 |         print(f'Data shape for DDIM sampling is {size}, eta {eta}')
 118 | 
 119 |         samples, intermediates = self.ddim_sampling(conditioning, size,
 120 |                                                     callback=callback,
 121 |                                                     img_callback=img_callback,
 122 |                                                     quantize_denoised=quantize_x0,
 123 |                                                     mask=mask, x0=x0,
 124 |                                                     ddim_use_original_steps=False,
 125 |                                                     noise_dropout=noise_dropout,
 126 |                                                     temperature=temperature,
 127 |                                                     score_corrector=score_corrector,
 128 |                                                     corrector_kwargs=corrector_kwargs,
 129 |                                                     x_T=x_T,
 130 |                                                     log_every_t=log_every_t,
 131 |                                                     unconditional_guidance_scale=unconditional_guidance_scale,
 132 |                                                     unconditional_conditioning=unconditional_conditioning,
 133 |                                                     )
 134 |         return samples, intermediates
 135 | 
 136 | 
 137 |     def refine(self,
 138 |                S,
 139 |                refine_steps,
 140 |                batch_size,
 141 |                shape,
 142 |                conditioning=None,
 143 |                callback=None,
 144 |                normals_sequence=None,
 145 |                img_callback=None,
 146 |                quantize_x0=False,
 147 |                eta=0.,
 148 |                mask=None,
 149 |                x0=None,
 150 |                temperature=1.,
 151 |                noise_dropout=0.,
 152 |                score_corrector=None,
 153 |                corrector_kwargs=None,
 154 |                verbose=True,
 155 |                x_T=None,
 156 |                log_every_t=100,
 157 |                unconditional_guidance_scale=1.,
 158 |                unconditional_conditioning=None,
 159 |                # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
 160 |                **kwargs
 161 |                ):
 162 |         if conditioning is not None:
 163 |             if isinstance(conditioning, dict):
 164 |                 cbs = conditioning[list(conditioning.keys())[0]].shape[0]
 165 |                 if cbs != batch_size:
 166 |                     print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
 167 |             else:
 168 |                 if conditioning.shape[0] != batch_size:
 169 |                     print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
 170 | 
 171 |         self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose)
 172 |         # sampling
 173 |         C, H, W = shape
 174 |         size = (batch_size, C, H, W)
 175 |         print(f'Data shape for DDIM sampling is {size}, eta {eta}')
 176 | 
 177 |         samples, intermediates = self._refine(refine_steps, conditioning, size,
 178 |                                               callback=callback,
 179 |                                               img_callback=img_callback,
 180 |                                               quantize_denoised=quantize_x0,
 181 |                                               mask=mask, x0=x0,
 182 |                                               ddim_use_original_steps=False,
 183 |                                               noise_dropout=noise_dropout,
 184 |                                               temperature=temperature,
 185 |                                               score_corrector=score_corrector,
 186 |                                               corrector_kwargs=corrector_kwargs,
 187 |                                               log_every_t=log_every_t,
 188 |                                               unconditional_guidance_scale=unconditional_guidance_scale,
 189 |                                               unconditional_conditioning=unconditional_conditioning,
 190 |                                               )
 191 |         return samples, intermediates
 192 | 
 193 |     def sample_with_eps(self,
 194 |                         S,
 195 |                         eps_list,
 196 |                         batch_size,
 197 |                         shape,
 198 |                         conditioning=None,
 199 |                         callback=None,
 200 |                         normals_sequence=None,
 201 |                         img_callback=None,
 202 |                         quantize_x0=False,
 203 |                         eta=0.,
 204 |                         mask=None,
 205 |                         x0=None,
 206 |                         temperature=1.,
 207 |                         noise_dropout=0.,
 208 |                         score_corrector=None,
 209 |                         corrector_kwargs=None,
 210 |                         verbose=True,
 211 |                         x_T=None,
 212 |                         skip_steps=0,
 213 |                         log_every_t=100,
 214 |                         unconditional_guidance_scale=1.,
 215 |                         unconditional_conditioning=None,
 216 |                         # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
 217 |                         **kwargs
 218 |                         ):
 219 |         if conditioning is not None:
 220 |             if isinstance(conditioning, dict):
 221 |                 cbs = conditioning[list(conditioning.keys())[0]].shape[0]
 222 |                 if cbs != batch_size:
 223 |                     print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
 224 |             else:
 225 |                 if conditioning.shape[0] != batch_size:
 226 |                     print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
 227 | 
 228 |         self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose)
 229 |         # sampling
 230 |         C, H, W = shape
 231 |         size = (batch_size, C, H, W)
 232 |         print(f'Data shape for DDIM sampling is {size}, eta {eta}')
 233 | 
 234 |         samples, intermediates = self.ddim_sampling_with_eps(conditioning, size,
 235 |                                                              eps_list,
 236 |                                                              callback=callback,
 237 |                                                              img_callback=img_callback,
 238 |                                                              quantize_denoised=quantize_x0,
 239 |                                                              mask=mask, x0=x0,
 240 |                                                              ddim_use_original_steps=False,
 241 |                                                              noise_dropout=noise_dropout,
 242 |                                                              temperature=temperature,
 243 |                                                              score_corrector=score_corrector,
 244 |                                                              corrector_kwargs=corrector_kwargs,
 245 |                                                              x_T=x_T,
 246 |                                                              skip_steps=skip_steps,
 247 |                                                              log_every_t=log_every_t,
 248 |                                                              unconditional_guidance_scale=unconditional_guidance_scale,
 249 |                                                              unconditional_conditioning=unconditional_conditioning,
 250 |                                                              )
 251 |         return samples, intermediates
 252 | 
 253 |     def ddpm_ddim_encoding(self,
 254 |                            S,
 255 |                            batch_size,
 256 |                            shape,
 257 |                            conditioning=None,
 258 |                            callback=None,
 259 |                            normals_sequence=None,
 260 |                            img_callback=None,
 261 |                            quantize_x0=False,
 262 |                            eta=0.,
 263 |                            white_box_steps=None,
 264 |                            skip_steps=0,
 265 |                            x0=None,
 266 |                            temperature=1.,
 267 |                            noise_dropout=0.,
 268 |                            score_corrector=None,
 269 |                            corrector_kwargs=None,
 270 |                            verbose=True,
 271 |                            log_every_t=100,
 272 |                            unconditional_guidance_scale=1.,
 273 |                            unconditional_conditioning=None,
 274 |                            # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
 275 |                            **kwargs
 276 |                            ):
 277 |         if conditioning is not None:
 278 |             if isinstance(conditioning, dict):
 279 |                 cbs = conditioning[list(conditioning.keys())[0]].shape[0]
 280 |                 if cbs != batch_size:
 281 |                     print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
 282 |             else:
 283 |                 if conditioning.shape[0] != batch_size:
 284 |                     print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
 285 | 
 286 |         self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose)
 287 |         # sampling
 288 |         C, H, W = shape
 289 |         size = (batch_size, C, H, W)
 290 |         print(f'Data shape for DDIM sampling is {size}, eta {eta}')
 291 |         assert eta > 0
 292 | 
 293 |         z_list = self._ddpm_ddim_encoding(conditioning, size,
 294 |                                           callback=callback,
 295 |                                           img_callback=img_callback,
 296 |                                           quantize_denoised=quantize_x0,
 297 |                                           eta=eta, white_box_steps=white_box_steps, skip_steps=skip_steps,
 298 |                                           x0=x0,
 299 |                                           ddim_use_original_steps=False,
 300 |                                           noise_dropout=noise_dropout,
 301 |                                           temperature=temperature,
 302 |                                           score_corrector=score_corrector,
 303 |                                           corrector_kwargs=corrector_kwargs,
 304 |                                           log_every_t=log_every_t,
 305 |                                           unconditional_guidance_scale=unconditional_guidance_scale,
 306 |                                           unconditional_conditioning=unconditional_conditioning,
 307 |                                           )
 308 | 
 309 |         return z_list
 310 | 
 311 |     def ddim_sampling(self, cond, shape,
 312 |                       x_T=None, ddim_use_original_steps=False,
 313 |                       callback=None, timesteps=None, quantize_denoised=False,
 314 |                       mask=None, x0=None, img_callback=None, log_every_t=100,
 315 |                       temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
 316 |                       unconditional_guidance_scale=1., unconditional_conditioning=None):
 317 |         device = self.model.betas.device
 318 |         b = shape[0]
 319 |         if x_T is None:
 320 |             img = torch.randn(shape, device=device)
 321 |         else:
 322 |             img = x_T
 323 | 
 324 |         if timesteps is None:
 325 |             timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps
 326 |         elif timesteps is not None and not ddim_use_original_steps:
 327 |             subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1
 328 |             timesteps = self.ddim_timesteps[:subset_end]
 329 | 
 330 |         intermediates = {'x_inter': [img], 'pred_x0': [img]}
 331 |         time_range = reversed(range(0,timesteps)) if ddim_use_original_steps else np.flip(timesteps)
 332 |         total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0]
 333 |         print(f"Running DDIM Sampling with {total_steps} timesteps")
 334 | 
 335 |         iterator = tqdm(time_range, desc='DDIM Sampler', total=total_steps, disable=True)
 336 | 
 337 |         for i, step in enumerate(iterator):
 338 |             index = total_steps - i - 1
 339 |             ts = torch.full((b,), step, device=device, dtype=torch.long)
 340 | 
 341 |             if mask is not None:
 342 |                 assert x0 is not None
 343 |                 img_orig = self.model.q_sample(x0, ts)  # TODO: deterministic forward pass?
 344 |                 img = img_orig * mask + (1. - mask) * img
 345 | 
 346 |             outs = self.p_sample_ddim(img, cond, ts, index=index, use_original_steps=ddim_use_original_steps,
 347 |                                       quantize_denoised=quantize_denoised, temperature=temperature,
 348 |                                       noise_dropout=noise_dropout, score_corrector=score_corrector,
 349 |                                       corrector_kwargs=corrector_kwargs,
 350 |                                       unconditional_guidance_scale=unconditional_guidance_scale,
 351 |                                       unconditional_conditioning=unconditional_conditioning)
 352 |             img, pred_x0 = outs
 353 |             if callback: callback(i)
 354 |             if img_callback: img_callback(pred_x0, i)
 355 | 
 356 |             if index % log_every_t == 0 or index == total_steps - 1:
 357 |                 intermediates['x_inter'].append(img)
 358 |                 intermediates['pred_x0'].append(pred_x0)
 359 | 
 360 |         return img, intermediates
 361 | 
 362 |     def _refine(self, refine_steps, cond, shape,
 363 |                 ddim_use_original_steps=False,
 364 |                 callback=None, timesteps=None, quantize_denoised=False,
 365 |                 mask=None, x0=None, img_callback=None, log_every_t=100,
 366 |                 temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
 367 |                 unconditional_guidance_scale=1., unconditional_conditioning=None):
 368 |         device = self.model.betas.device
 369 |         b = shape[0]
 370 | 
 371 |         # Sample xt
 372 |         alphas = self.model.alphas_cumprod if ddim_use_original_steps else self.ddim_alphas
 373 |         at = alphas[refine_steps - 1]
 374 |         xt = at.sqrt() * x0 + (1 - at).sqrt() * torch.randn_like(x0)
 375 | 
 376 |         img = xt
 377 | 
 378 |         if timesteps is None:
 379 |             timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps
 380 |         elif timesteps is not None and not ddim_use_original_steps:
 381 |             subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1
 382 |             timesteps = self.ddim_timesteps[:subset_end]
 383 | 
 384 |         intermediates = {'x_inter': [img], 'pred_x0': [img]}
 385 |         time_range = reversed(range(0,timesteps)) if ddim_use_original_steps else np.flip(timesteps)
 386 |         total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0]
 387 |         print(f"Running DDIM Sampling with {total_steps} timesteps with {refine_steps} refinement steps")
 388 | 
 389 |         assert refine_steps < total_steps
 390 |         refine_time_range = time_range[-refine_steps:]
 391 |         iterator = tqdm(refine_time_range, desc='DDIM Sampler', total=refine_steps, disable=True)
 392 | 
 393 |         for i, step in enumerate(iterator):
 394 |             index = refine_steps - i - 1
 395 |             ts = torch.full((b,), step, device=device, dtype=torch.long)
 396 | 
 397 |             if mask is not None:
 398 |                 assert x0 is not None
 399 |                 img_orig = self.model.q_sample(x0, ts)  # TODO: deterministic forward pass?
 400 |                 img = img_orig * mask + (1. - mask) * img
 401 | 
 402 |             outs = self.p_sample_ddim(img, cond, ts, index=index, use_original_steps=ddim_use_original_steps,
 403 |                                       quantize_denoised=quantize_denoised, temperature=temperature,
 404 |                                       noise_dropout=noise_dropout, score_corrector=score_corrector,
 405 |                                       corrector_kwargs=corrector_kwargs,
 406 |                                       unconditional_guidance_scale=unconditional_guidance_scale,
 407 |                                       unconditional_conditioning=unconditional_conditioning)
 408 |             img, pred_x0 = outs
 409 |             if callback: callback(i)
 410 |             if img_callback: img_callback(pred_x0, i)
 411 | 
 412 |             if index % log_every_t == 0 or index == refine_steps - 1:
 413 |                 intermediates['x_inter'].append(img)
 414 |                 intermediates['pred_x0'].append(pred_x0)
 415 | 
 416 |         return img, intermediates
 417 | 
 418 |     def ddim_sampling_with_eps(self, cond, shape, eps_list,
 419 |                                x_T=None, ddim_use_original_steps=False,
 420 |                                callback=None, timesteps=None, skip_steps=0, quantize_denoised=False,
 421 |                                mask=None, x0=None, img_callback=None, log_every_t=100,
 422 |                                temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
 423 |                                unconditional_guidance_scale=1., unconditional_conditioning=None, ):
 424 |         device = self.model.betas.device
 425 |         b = shape[0]
 426 |         if x_T is None:  # x_T is x_t if using skip_steps.
 427 |             img = torch.randn(shape, device=device)
 428 |         else:
 429 |             img = x_T
 430 | 
 431 |         if timesteps is None:
 432 |             timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps
 433 |         elif timesteps is not None and not ddim_use_original_steps:
 434 |             subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1
 435 |             timesteps = self.ddim_timesteps[:subset_end]
 436 | 
 437 |         intermediates = {'x_inter': [img], 'pred_x0': [img]}
 438 |         time_range = reversed(range(0,timesteps)) if ddim_use_original_steps else np.flip(timesteps)
 439 |         total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0]
 440 |         refine_steps = total_steps - skip_steps
 441 |         print(f"Running DDIM Sampling with {total_steps} timesteps and {refine_steps} refinement steps")
 442 | 
 443 |         refine_time_range = time_range[-refine_steps:]
 444 |         iterator = tqdm(refine_time_range, desc='DDIM Sampler', total=refine_steps, disable=True)
 445 | 
 446 |         for i, step in enumerate(iterator):
 447 |             index = refine_steps - i - 1
 448 |             ts = torch.full((b,), step, device=device, dtype=torch.long)
 449 | 
 450 |             if mask is not None:
 451 |                 assert x0 is not None
 452 |                 img_orig = self.model.q_sample(x0, ts)  # TODO: deterministic forward pass?
 453 |                 img = img_orig * mask + (1. - mask) * img
 454 | 
 455 |             outs = self.p_sample_ddim_with_eps(img, cond, ts, index=index, use_original_steps=ddim_use_original_steps,
 456 |                                                quantize_denoised=quantize_denoised, temperature=temperature,
 457 |                                                noise_dropout=noise_dropout, score_corrector=score_corrector,
 458 |                                                corrector_kwargs=corrector_kwargs,
 459 |                                                unconditional_guidance_scale=unconditional_guidance_scale,
 460 |                                                unconditional_conditioning=unconditional_conditioning,
 461 |                                                eps=eps_list[:, i] if i < eps_list.shape[1] else None,
 462 |                                                )
 463 |             img, pred_x0 = outs
 464 |             if callback: callback(i)
 465 |             if img_callback: img_callback(pred_x0, i)
 466 | 
 467 |             if index % log_every_t == 0 or index == total_steps - 1:
 468 |                 intermediates['x_inter'].append(img)
 469 |                 intermediates['pred_x0'].append(pred_x0)
 470 | 
 471 |         return img, intermediates
 472 | 
 473 |     def _ddpm_ddim_encoding(self, cond, shape,
 474 |                             ddim_use_original_steps=False,
 475 |                             callback=None, timesteps=None, quantize_denoised=False,
 476 |                             eta=None, white_box_steps=None, skip_steps=0,
 477 |                             x0=None, img_callback=None, log_every_t=100,
 478 |                             temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
 479 |                             unconditional_guidance_scale=1., unconditional_conditioning=None):
 480 | 
 481 |         assert eta > 0
 482 |         device = self.model.betas.device
 483 |         b = shape[0]
 484 | 
 485 |         if timesteps is None:
 486 |             timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps
 487 |         elif timesteps is not None and not ddim_use_original_steps:
 488 |             subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1
 489 |             timesteps = self.ddim_timesteps[:subset_end]
 490 | 
 491 |         time_range = reversed(range(0,timesteps)) if ddim_use_original_steps else np.flip(timesteps)
 492 |         total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0]
 493 |         refine_steps = total_steps - skip_steps
 494 |         print(f"Running DDIM Sampling with {total_steps} timesteps and {refine_steps} refinement steps")
 495 | 
 496 |         refine_time_range = time_range[-refine_steps:]
 497 |         iterator = tqdm(refine_time_range, desc='DDIM Sampler', total=refine_steps, disable=True)
 498 | 
 499 |         # Sample xt
 500 |         alphas = self.model.alphas_cumprod if ddim_use_original_steps else self.ddim_alphas
 501 |         at = alphas[refine_steps - 1]
 502 |         xt = at.sqrt() * x0 + (1 - at).sqrt() * torch.randn_like(x0)
 503 |         z_list = [xt, ]
 504 | 
 505 |         for i, step in enumerate(iterator):
 506 |             index = refine_steps - i - 1
 507 |             ts = torch.full((b,), step, device=device, dtype=torch.long)
 508 | 
 509 |             if i < white_box_steps - skip_steps - 1:
 510 |                 xt_next = self.sample_xt_next(x0=x0, xt=xt, index=index, use_original_steps=ddim_use_original_steps)
 511 | 
 512 |                 eps = self.compute_eps(
 513 |                     xt=xt, xt_next=xt_next, c=cond, t=ts, index=index, use_original_steps=ddim_use_original_steps,
 514 |                     quantize_denoised=quantize_denoised, temperature=temperature,
 515 |                     noise_dropout=noise_dropout, score_corrector=score_corrector,
 516 |                     corrector_kwargs=corrector_kwargs,
 517 |                     unconditional_guidance_scale=unconditional_guidance_scale,
 518 |                     unconditional_conditioning=unconditional_conditioning)
 519 |                 xt = xt_next
 520 |                 z_list.append(eps)
 521 |             else:
 522 |                 break
 523 | 
 524 |         return z_list
 525 | 
 526 |     def p_sample_ddim(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
 527 |                       temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
 528 |                       unconditional_guidance_scale=1., unconditional_conditioning=None):
 529 |         b, *_, device = *x.shape, x.device
 530 | 
 531 |         if unconditional_conditioning is None or unconditional_guidance_scale == 1.:
 532 |             e_t = self.model.apply_model(x, t, c)
 533 |         elif unconditional_guidance_scale == 0:
 534 |             e_t = self.model.apply_model(x, t, unconditional_conditioning)
 535 |         else:
 536 |             x_in = torch.cat([x] * 2)
 537 |             t_in = torch.cat([t] * 2)
 538 |             c_in = torch.cat([unconditional_conditioning, c])
 539 |             e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2)
 540 |             e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond)
 541 | 
 542 |         if score_corrector is not None:
 543 |             assert self.model.parameterization == "eps"
 544 |             e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs)
 545 | 
 546 |         alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas
 547 |         alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev
 548 |         sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas
 549 |         sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas
 550 |         # select parameters corresponding to the currently considered timestep
 551 |         a_t = torch.full((b, 1, 1, 1), alphas[index], device=device)
 552 |         a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device)
 553 |         sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device)
 554 |         sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device)
 555 | 
 556 |         # current prediction for x_0
 557 |         pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
 558 |         if quantize_denoised:
 559 |             pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0)
 560 |         # direction pointing to x_t
 561 |         dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t
 562 |         noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature
 563 |         if noise_dropout > 0.:
 564 |             noise = torch.nn.functional.dropout(noise, p=noise_dropout)
 565 |         x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise
 566 |         return x_prev, pred_x0
 567 | 
 568 |     def compute_eps(self, xt, xt_next, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
 569 |                     temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
 570 |                     unconditional_guidance_scale=1., unconditional_conditioning=None):
 571 |         b, *_, device = *xt.shape, xt.device
 572 | 
 573 |         if unconditional_conditioning is None or unconditional_guidance_scale == 1.:
 574 |             e_t = self.model.apply_model(xt, t, c)
 575 |         elif unconditional_guidance_scale == 0:
 576 |             e_t = self.model.apply_model(xt, t, unconditional_conditioning)
 577 |         else:
 578 |             x_in = torch.cat([xt] * 2)
 579 |             t_in = torch.cat([t] * 2)
 580 |             c_in = torch.cat([unconditional_conditioning, c])
 581 |             e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2)
 582 |             e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond)
 583 | 
 584 |         if score_corrector is not None:
 585 |             assert self.model.parameterization == "eps"
 586 |             e_t = score_corrector.modify_score(self.model, e_t, xt, t, c, **corrector_kwargs)
 587 | 
 588 |         alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas
 589 |         alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev
 590 |         sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas
 591 |         sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas
 592 |         # select parameters corresponding to the currently considered timestep
 593 |         a_t = torch.full((b, 1, 1, 1), alphas[index], device=device)
 594 |         a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device)
 595 |         sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device)
 596 |         sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device)
 597 | 
 598 |         # current prediction for x_0
 599 |         pred_x0 = (xt - sqrt_one_minus_at * e_t) / a_t.sqrt()
 600 |         # direction pointing to x_t
 601 |         dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t
 602 |         eps = (xt_next - a_prev.sqrt() * pred_x0 - dir_xt) / sigma_t / temperature
 603 |         return eps
 604 | 
 605 |     def sample_xt_next(self, x0, xt, index, use_original_steps=False):
 606 |         if index == 0:
 607 |             return x0
 608 | 
 609 |         b, *_, device = *x0.shape, x0.device
 610 | 
 611 |         alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas
 612 |         alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev
 613 |         sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas
 614 |         # select parameters corresponding to the currently considered timestep
 615 |         a_t = torch.full((b, 1, 1, 1), alphas[index], device=device)
 616 |         a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device)
 617 |         sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device)
 618 | 
 619 |         # direction pointing to x_t
 620 |         e_t = (xt - a_t.sqrt() * x0) / (1 - a_t).sqrt()
 621 |         dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t
 622 |         noise = sigma_t * torch.randn(x0.shape, device=device)
 623 |         xt_next = a_prev.sqrt() * x0 + dir_xt + noise
 624 |         return xt_next
 625 | 
 626 |     def p_sample_ddim_with_eps(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
 627 |                                temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
 628 |                                unconditional_guidance_scale=1., unconditional_conditioning=None, eps=None):
 629 |         b, *_, device = *x.shape, x.device
 630 | 
 631 |         if unconditional_conditioning is None or unconditional_guidance_scale == 1.:
 632 |             e_t = self.model.apply_model(x, t, c)
 633 |         elif unconditional_guidance_scale == 0:
 634 |             e_t = self.model.apply_model(x, t, unconditional_conditioning)
 635 |         else:
 636 |             x_in = torch.cat([x] * 2)
 637 |             t_in = torch.cat([t] * 2)
 638 |             c_in = torch.cat([unconditional_conditioning, c])
 639 |             e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2)
 640 |             e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond)
 641 | 
 642 |         if score_corrector is not None:
 643 |             assert self.model.parameterization == "eps"
 644 |             e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs)
 645 | 
 646 |         alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas
 647 |         alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev
 648 |         sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas
 649 |         sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas
 650 |         # select parameters corresponding to the currently considered timestep
 651 |         a_t = torch.full((b, 1, 1, 1), alphas[index], device=device)
 652 |         a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device)
 653 |         sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device)
 654 |         sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device)
 655 | 
 656 |         # current prediction for x_0
 657 |         pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
 658 |         if quantize_denoised:
 659 |             pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0)
 660 |         # direction pointing to x_t
 661 |         dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t
 662 |         if eps is None:
 663 |             noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature
 664 |         else:
 665 |             noise = sigma_t * eps * temperature
 666 |         if noise_dropout > 0.:
 667 |             noise = torch.nn.functional.dropout(noise, p=noise_dropout)
 668 |         x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise
 669 |         return x_prev, pred_x0
 670 | 
 671 |     def stochastic_encode(self, x0, t, use_original_steps=False, noise=None):
 672 |         # fast, but does not allow for exact reconstruction
 673 |         # t serves as an index to gather the correct alphas
 674 |         if use_original_steps:
 675 |             sqrt_alphas_cumprod = self.sqrt_alphas_cumprod
 676 |             sqrt_one_minus_alphas_cumprod = self.sqrt_one_minus_alphas_cumprod
 677 |         else:
 678 |             sqrt_alphas_cumprod = torch.sqrt(self.ddim_alphas)
 679 |             sqrt_one_minus_alphas_cumprod = self.ddim_sqrt_one_minus_alphas
 680 | 
 681 |         if noise is None:
 682 |             noise = torch.randn_like(x0)
 683 |         return (extract_into_tensor(sqrt_alphas_cumprod, t, x0.shape) * x0 +
 684 |                 extract_into_tensor(sqrt_one_minus_alphas_cumprod, t, x0.shape) * noise)
 685 | 
 686 |     def decode(self, x_latent, cond, t_start, unconditional_guidance_scale=1.0, unconditional_conditioning=None,
 687 |                use_original_steps=False):
 688 | 
 689 |         timesteps = np.arange(self.ddpm_num_timesteps) if use_original_steps else self.ddim_timesteps
 690 |         timesteps = timesteps[:t_start]
 691 | 
 692 |         time_range = np.flip(timesteps)
 693 |         total_steps = timesteps.shape[0]
 694 |         print(f"Running DDIM Sampling with {total_steps} timesteps")
 695 | 
 696 |         iterator = tqdm(time_range, desc='Decoding image', total=total_steps)
 697 |         x_dec = x_latent
 698 |         for i, step in enumerate(iterator):
 699 |             index = total_steps - i - 1
 700 |             ts = torch.full((x_latent.shape[0],), step, device=x_latent.device, dtype=torch.long)
 701 |             x_dec, _ = self.p_sample_ddim(x_dec, cond, ts, index=index, use_original_steps=use_original_steps,
 702 |                                           unconditional_guidance_scale=unconditional_guidance_scale,
 703 |                                           unconditional_conditioning=unconditional_conditioning)
 704 |         return x_dec
 705 | 
 706 | 
 707 | # Created by Chen Henry Wu
 708 | import os
 709 | import argparse
 710 | import sys
 711 | sys.path.append(os.path.abspath('model/lib/stable_diffusion'))
 712 | 
 713 | 
 714 | def requires_grad(model, flag=True):
 715 |     for p in model.parameters():
 716 |         p.requires_grad = flag
 717 | 
 718 | 
 719 | def prepare_stable_diffusion_text(source_model_type):
 720 |     print('First of all, when the code changes, make sure that no part in the model is under no_grad!')
 721 | 
 722 |     config = OmegaConf.load(os.path.join(
 723 |         'model/lib/stable_diffusion/configs/stable-diffusion/v1-inference.yaml'))
 724 |     ckpt = os.path.join('ckpts', 'stable_diffusion', source_model_type)
 725 | 
 726 |     return config, ckpt
 727 | 
 728 | 
 729 | def get_condition(model, text, bs):
 730 |     assert isinstance(text, list)
 731 |     assert isinstance(text[0], str)
 732 | 
 733 |     #print('about to suffer',model,bs)
 734 | 
 735 |     uc = model.get_learned_conditioning(bs * [""])
 736 |     print("model.cond_stage_key: ", model.cond_stage_key)
 737 |     c = model.get_learned_conditioning(text)
 738 |     print("c.shape: ", c.shape)
 739 |     print('-' * 50)
 740 |     return c, uc
 741 | 
 742 | 
 743 | def convsample_ddim_conditional(model, steps, shape, x_T, skip_steps, eta, eps_list, scale, text):
 744 |     ddim = DDIMSampler(model)
 745 |     bs = shape[0]
 746 |     shape = shape[1:]
 747 |     c, uc = get_condition(model, text, bs)
 748 |     samples, intermediates = ddim.sample_with_eps(steps,
 749 |                                                   eps_list,
 750 |                                                   conditioning=c,
 751 |                                                   batch_size=bs,
 752 |                                                   shape=shape,
 753 |                                                   eta=eta,
 754 |                                                   verbose=False,
 755 |                                                   x_T=x_T,
 756 |                                                   skip_steps=skip_steps,
 757 |                                                   unconditional_guidance_scale=scale,
 758 |                                                   unconditional_conditioning=uc
 759 |                                                   )
 760 |     return samples, intermediates
 761 | 
 762 | 
 763 | def make_convolutional_sample_with_eps_conditional(model, custom_steps, eta, x_T, skip_steps, eps_list,
 764 |                                                    scale, text):
 765 |     with model.ema_scope("Plotting"):
 766 |         sample, intermediates = convsample_ddim_conditional(model,
 767 |                                                             steps=custom_steps,
 768 |                                                             shape=x_T.shape,
 769 |                                                             x_T=x_T,
 770 |                                                             skip_steps=skip_steps,
 771 |                                                             eta=eta,
 772 |                                                             eps_list=eps_list,
 773 |                                                             scale=scale,
 774 |                                                             text=text)
 775 | 
 776 |     x_sample = model.decode_first_stage(sample)
 777 | 
 778 |     return x_sample
 779 | 
 780 | 
 781 | def ddpm_ddim_encoding_conditional(model, steps, shape, eta, white_box_steps, skip_steps, x0, scale, text):
 782 |     with model.ema_scope("Plotting"):
 783 |         ddim = DDIMSampler(model)
 784 |         bs = shape[0]
 785 |         shape = shape[1:]
 786 |         c, uc = get_condition(model, text, bs)
 787 | 
 788 |         z_list = ddim.ddpm_ddim_encoding(steps,
 789 |                                          conditioning=c,
 790 |                                          batch_size=bs,
 791 |                                          shape=shape,
 792 |                                          eta=eta,
 793 |                                          white_box_steps=white_box_steps,
 794 |                                          skip_steps=skip_steps,
 795 |                                          verbose=False,
 796 |                                          x0=x0,
 797 |                                          unconditional_guidance_scale=scale,
 798 |                                          unconditional_conditioning=uc,
 799 |                                          )
 800 | 
 801 |     return z_list
 802 | 
 803 | 
 804 | class SDStochasticTextWrapper(torch.nn.Module):
 805 | 
 806 |     def __init__(self, source_model, custom_steps, eta, white_box_steps, skip_steps,
 807 |                  encoder_unconditional_guidance_scales=1, decoder_unconditional_guidance_scales=5,
 808 |                  n_trials=1):
 809 |         super(SDStochasticTextWrapper, self).__init__()
 810 | 
 811 |         self.encoder_unconditional_guidance_scales = encoder_unconditional_guidance_scales
 812 |         self.decoder_unconditional_guidance_scales = decoder_unconditional_guidance_scales
 813 |         self.n_trials = n_trials
 814 | 
 815 |         # Set up generator
 816 |         #self.config, self.ckpt = prepare_stable_diffusion_text(source_model_type)
 817 | 
 818 |         #print(self.config)
 819 | 
 820 |         #self.generator = load_model_from_config(self.config, self.ckpt, verbose=True)
 821 | 
 822 |         self.generator = source_model
 823 | 
 824 |         self.precision = "full"
 825 | 
 826 |         print(75 * "-")
 827 | 
 828 |         self.eta = eta
 829 |         self.custom_steps = custom_steps
 830 |         self.white_box_steps = white_box_steps
 831 |         self.skip_steps = skip_steps
 832 | 
 833 |         self.resolution = 512
 834 |         print(f"resolution: {self.resolution}")
 835 | 
 836 |         print(
 837 |             f'Using DDIM sampling with {self.custom_steps} sampling steps and eta={self.eta}')
 838 | 
 839 |         # Freeze.
 840 |         # requires_grad(self.generator, False)
 841 | 
 842 |         # Post process.
 843 |         self.post_process = transforms.Compose(  # To un-normalize from [-1.0, 1.0] (GAN output) to [0, 1]
 844 |             [transforms.Normalize(mean=[-1.0, -1.0, -1.0],
 845 |                                   std=[2.0, 2.0, 2.0])]
 846 |         )
 847 | 
 848 |     def generate(self, z_ensemble, decode_text):
 849 |         precision_scope = autocast if self.precision == "autocast" else nullcontext
 850 |         with precision_scope("cuda"):
 851 |             img_ensemble = []
 852 |             for i, z in enumerate(z_ensemble):
 853 |                 skip_steps = self.skip_steps[i % len(self.skip_steps)]
 854 |                 bsz = z.shape[0]
 855 |                 if self.white_box_steps != -1:
 856 |                     eps_list = z.view(bsz, (self.white_box_steps - skip_steps),
 857 |                                       self.generator.channels, self.generator.image_size, self.generator.image_size)
 858 |                 else:
 859 |                     eps_list = z.view(bsz, 1, self.generator.channels,
 860 |                                       self.generator.image_size, self.generator.image_size)
 861 |                 x_T = eps_list[:, 0]
 862 |                 eps_list = eps_list[:, 1:]
 863 | 
 864 |                 for decoder_unconditional_guidance_scale in self.decoder_unconditional_guidance_scales:
 865 |                     img = make_convolutional_sample_with_eps_conditional(self.generator,
 866 |                                                                          custom_steps=self.custom_steps,
 867 |                                                                          eta=self.eta,
 868 |                                                                          x_T=x_T,
 869 |                                                                          skip_steps=skip_steps,
 870 |                                                                          eps_list=eps_list,
 871 |                                                                          scale=decoder_unconditional_guidance_scale,
 872 |                                                                          text=decode_text)
 873 |                     img_ensemble.append(img)
 874 | 
 875 |         return img_ensemble
 876 | 
 877 |     def encode(self, image, encode_text):
 878 |         # Eval mode for the generator.
 879 |         self.generator.eval()
 880 | 
 881 |         precision_scope = autocast if self.precision == "autocast" else nullcontext
 882 | 
 883 |         # Normalize.
 884 |         image = (image - 0.5) * 2.0
 885 |         # Resize.
 886 |         assert image.shape[2] == image.shape[3] == self.resolution
 887 |         with precision_scope("cuda"):
 888 |             with torch.no_grad():
 889 |                 # Encode.
 890 |                 encoder_posterior = self.generator.encode_first_stage(image)
 891 |                 z = self.generator.get_first_stage_encoding(encoder_posterior)
 892 |                 x0 = z
 893 | 
 894 |         with precision_scope("cuda"):
 895 |             bsz = image.shape[0]
 896 |             z_ensemble = []
 897 |             for trial in range(self.n_trials):
 898 |                 for encoder_unconditional_guidance_scale in self.encoder_unconditional_guidance_scales:
 899 |                     for skip_steps in self.skip_steps:
 900 |                         with torch.no_grad():
 901 |                             # DDIM forward.
 902 |                             z_list = ddpm_ddim_encoding_conditional(self.generator,
 903 |                                                                     steps=self.custom_steps,
 904 |                                                                     shape=x0.shape,
 905 |                                                                     eta=self.eta,
 906 |                                                                     white_box_steps=self.white_box_steps,
 907 |                                                                     skip_steps=skip_steps,
 908 |                                                                     x0=x0,
 909 |                                                                     scale=encoder_unconditional_guidance_scale,
 910 |                                                                     text=encode_text)
 911 |                             
 912 |                             z = torch.stack(z_list, dim=1).view(bsz, -1)
 913 |                             z_ensemble.append(z)
 914 | 
 915 |         return z_ensemble
 916 | 
 917 |     def forward(self, z_ensemble, original_img, encode_text, decode_text):
 918 |         # Eval mode for the generator.
 919 |         self.generator.eval()
 920 | 
 921 |         print("DOING GENERATE")
 922 |         img_ensemble = self.generate(z_ensemble, decode_text)
 923 | 
 924 |         print("DONE GENERATING")
 925 | 
 926 |         assert len(img_ensemble) == len(self.decoder_unconditional_guidance_scales) * len(
 927 |             self.encoder_unconditional_guidance_scales) * len(self.skip_steps) * self.n_trials
 928 | 
 929 |         # Post process.
 930 |         img_ensemble = [self.post_process(img) for img in img_ensemble]
 931 | 
 932 |         return img_ensemble
 933 | 
 934 |     @property
 935 |     def device(self):
 936 |         return next(self.parameters()).device
 937 | 
 938 | 
 939 | class Script(scripts.Script):
 940 | 
 941 |     # The title of the script. This is what will be displayed in the dropdown menu.
 942 |     def title(self):
 943 | 
 944 |         return "cycleDiffusion"
 945 | 
 946 | 
 947 | # Determines when the script should be shown in the dropdown menu via the
 948 | # returned value. As an example:
 949 | # is_img2img is True if the current tab is img2img, and False if it is txt2img.
 950 | # Thus, return is_img2img to only show the script on the img2img tab.
 951 | 
 952 | 
 953 |     def show(self, is_img2img):
 954 | 
 955 |         return is_img2img
 956 | 
 957 | # How the script's is displayed in the UI. See https://gradio.app/docs/#components
 958 | # for the different UI components you can use and how to create them.
 959 | # Most UI components can return a value, such as a boolean for a checkbox.
 960 | # The returned values are passed to the run method as parameters.
 961 | 
 962 |     def ui(self, is_img2img):
 963 |         encode_text = gr.Textbox(label="encode_text", lines=1)
 964 |         decode_text = gr.Textbox(label="encode_text", lines=1)
 965 |         overwrite = gr.Checkbox(False, label="Overwrite existing files")
 966 |         skip_steps = gr.Slider(label="skip steps", minimum=0,
 967 |                                maximum=150, step=1, value=20)
 968 | 
 969 |         encoder_unconditional_guidance_scales = gr.Slider(label="encoder_unconditional_guidance_scales", minimum=1,
 970 |                                maximum=7, step=0.1, value=1)                 
 971 |         decoder_unconditional_guidance_scales = gr.Slider(label="decoder_unconditional_guidance_scales", minimum=1,
 972 |                                maximum=7, step=0.1, value=3)                    
 973 | 
 974 |         return [encode_text, decode_text, overwrite, skip_steps,encoder_unconditional_guidance_scales,decoder_unconditional_guidance_scales]
 975 | 
 976 | 
 977 | # This is where the additional processing is implemented. The parameters include
 978 | # self, the model object "p" (a StableDiffusionProcessing class, see
 979 | # processing.py), and the parameters returned by the ui method.
 980 | # Custom functions can be defined here, and additional libraries can be imported
 981 | # to be used in processing. The return value should be a Processed object, which is
 982 | # what is returned by the process_images method.
 983 | 
 984 |     def run(self, p, encode_text, decode_text, overwrite, skip_steps,encoder_unconditional_guidance_scales, decoder_unconditional_guidance_scales):
 985 | 
 986 |         # If overwrite is false, append the rotation information to the filename
 987 |         # using the "basename" parameter and save it in the same directory.
 988 |         # If overwrite is true, stop the model from saving its outputs and
 989 |         # save the rotated and flipped images instead.
 990 |         basename = "cycle"
 991 |         if (not overwrite):
 992 |             basename = "cycle"
 993 |         else:
 994 |             p.do_not_save_samples = True
 995 | 
 996 |         #proc = process_images(p) #this will run the SD pipeline first, so don't do that
 997 |         #images=proc.images
 998 |         images=p.init_images
 999 | 
1000 |         #I don't really know if changing these is a good idea (probably once we muck around with other samplers. then try it)
1001 |         white_box_steps=100
1002 |         #custom_steps = p.steps #don't do this
1003 |         custom_steps=99
1004 | 
1005 |         #maybe change this in the future to allow other sizes?
1006 |         width,height=512,512
1007 |         
1008 |         eta = p.eta
1009 |         #eta can't be none
1010 |         if eta is None:
1011 |             eta=1.0
1012 | 
1013 |         source_model=p.sd_model
1014 |         sd = SDStochasticTextWrapper(source_model, custom_steps, eta, white_box_steps, [skip_steps],
1015 |                                     [encoder_unconditional_guidance_scales], [decoder_unconditional_guidance_scales],
1016 |                                     n_trials=1)
1017 | 
1018 |         for i in range(len(images)):
1019 | 
1020 |             original_image = images[i]
1021 |             original_image=original_image.resize((width,height))
1022 | 
1023 |             convert_tensor = transforms.ToTensor()
1024 | 
1025 |             img=convert_tensor(original_image)
1026 |             img=torch.unsqueeze(img, 0).half().cuda()
1027 | 
1028 |             with autocast('cuda'):
1029 |                 print("ENCODING")
1030 |                 z = sd.encode(img, [encode_text])
1031 |                 print("GENERATING")
1032 |                 img = sd(z, img, [encode_text], [decode_text])
1033 | 
1034 |             toPIL=transforms.ToPILImage()
1035 | 
1036 |             img=torch.squeeze(img[0])
1037 | 
1038 |             #images[i] = toPIL((img+1)/2)
1039 |             img=torch.clamp(img, min=0, max=1)
1040 |             images[i] = toPIL(img)
1041 | 
1042 |             #images.save_image(images[i], p.outpath_samples, basename,
1043 |             #                  p.seed + i, p.prompt, opts.samples_format, info=p.info, p=p)
1044 | 
1045 |         #return images
1046 |         processed = Processed(p, images)
1047 |         return processed
1048 | 


--------------------------------------------------------------------------------