├── FooocusSDXLInpaintAllInOnePipeline.py
├── assets
    └── teaser.png
├── data
    ├── 1_1.jpg
    ├── 1_2.jpg
    ├── 2_1.jpg
    ├── 2_2.jpg
    ├── 3_1.jpg
    ├── 3_2.jpg
    ├── 4_1.jpg
    └── 4_2.jpg
├── infer.ipynb
├── ldm_patched
    ├── contrib
    │   ├── external.py
    │   ├── external_canny.py
    │   ├── external_clip_sdxl.py
    │   ├── external_compositing.py
    │   ├── external_custom_sampler.py
    │   ├── external_freelunch.py
    │   ├── external_hypernetwork.py
    │   ├── external_hypertile.py
    │   ├── external_images.py
    │   ├── external_latent.py
    │   ├── external_mask.py
    │   ├── external_model_advanced.py
    │   ├── external_model_downscale.py
    │   ├── external_model_merging.py
    │   ├── external_perpneg.py
    │   ├── external_photomaker.py
    │   ├── external_post_processing.py
    │   ├── external_rebatch.py
    │   ├── external_sag.py
    │   ├── external_sdupscale.py
    │   ├── external_stable3d.py
    │   ├── external_tomesd.py
    │   ├── external_upscale_model.py
    │   └── external_video_model.py
    ├── controlnet
    │   └── cldm.py
    ├── k_diffusion
    │   ├── sampling.py
    │   └── utils.py
    ├── ldm
    │   ├── models
    │   │   ├── __pycache__
    │   │   │   └── autoencoder.cpython-310.pyc
    │   │   └── autoencoder.py
    │   ├── modules
    │   │   ├── __pycache__
    │   │   │   ├── attention.cpython-310.pyc
    │   │   │   ├── ema.cpython-310.pyc
    │   │   │   └── sub_quadratic_attention.cpython-310.pyc
    │   │   ├── attention.py
    │   │   ├── diffusionmodules
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   ├── model.cpython-310.pyc
    │   │   │   │   ├── openaimodel.cpython-310.pyc
    │   │   │   │   ├── upscaling.cpython-310.pyc
    │   │   │   │   └── util.cpython-310.pyc
    │   │   │   ├── model.py
    │   │   │   ├── openaimodel.py
    │   │   │   ├── upscaling.py
    │   │   │   └── util.py
    │   │   ├── distributions
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   └── distributions.cpython-310.pyc
    │   │   │   └── distributions.py
    │   │   ├── ema.py
    │   │   ├── encoders
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   └── noise_aug_modules.cpython-310.pyc
    │   │   │   └── noise_aug_modules.py
    │   │   ├── sub_quadratic_attention.py
    │   │   └── temporal_ae.py
    │   └── util.py
    ├── licenses-3rd
    │   ├── chainer
    │   ├── comfyui
    │   ├── diffusers
    │   ├── kdiffusion
    │   ├── ldm
    │   ├── taesd
    │   └── transformers
    ├── modules
    │   ├── args_parser.py
    │   ├── checkpoint_pickle.py
    │   ├── clip_config_bigg.json
    │   ├── clip_model.py
    │   ├── clip_vision.py
    │   ├── clip_vision_config_g.json
    │   ├── clip_vision_config_h.json
    │   ├── clip_vision_config_vitl.json
    │   ├── conds.py
    │   ├── controlnet.py
    │   ├── diffusers_convert.py
    │   ├── diffusers_load.py
    │   ├── gligen.py
    │   ├── latent_formats.py
    │   ├── lora.py
    │   ├── model_base.py
    │   ├── model_detection.py
    │   ├── model_management.py
    │   ├── model_patcher.py
    │   ├── model_sampling.py
    │   ├── ops.py
    │   ├── options.py
    │   ├── sample.py
    │   ├── samplers.py
    │   ├── sd.py
    │   ├── sd1_clip.py
    │   ├── sd1_clip_config.json
    │   ├── sd1_tokenizer
    │   │   ├── merges.txt
    │   │   ├── special_tokens_map.json
    │   │   ├── tokenizer_config.json
    │   │   └── vocab.json
    │   ├── sd2_clip.py
    │   ├── sd2_clip_config.json
    │   ├── sdxl_clip.py
    │   ├── supported_models.py
    │   ├── supported_models_base.py
    │   └── utils.py
    ├── pfn
    │   ├── __init__.py
    │   ├── architecture
    │   │   ├── DAT.py
    │   │   ├── HAT.py
    │   │   ├── LICENSE-DAT
    │   │   ├── LICENSE-ESRGAN
    │   │   ├── LICENSE-HAT
    │   │   ├── LICENSE-RealESRGAN
    │   │   ├── LICENSE-SCUNet
    │   │   ├── LICENSE-SPSR
    │   │   ├── LICENSE-SwiftSRGAN
    │   │   ├── LICENSE-Swin2SR
    │   │   ├── LICENSE-SwinIR
    │   │   ├── LICENSE-lama
    │   │   ├── LaMa.py
    │   │   ├── OmniSR
    │   │   │   ├── ChannelAttention.py
    │   │   │   ├── LICENSE
    │   │   │   ├── OSA.py
    │   │   │   ├── OSAG.py
    │   │   │   ├── OmniSR.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── OSA.cpython-310.pyc
    │   │   │   │   ├── OSAG.cpython-310.pyc
    │   │   │   │   ├── OmniSR.cpython-310.pyc
    │   │   │   │   ├── esa.cpython-310.pyc
    │   │   │   │   ├── layernorm.cpython-310.pyc
    │   │   │   │   └── pixelshuffle.cpython-310.pyc
    │   │   │   ├── esa.py
    │   │   │   ├── layernorm.py
    │   │   │   └── pixelshuffle.py
    │   │   ├── RRDB.py
    │   │   ├── SCUNet.py
    │   │   ├── SPSR.py
    │   │   ├── SRVGG.py
    │   │   ├── SwiftSRGAN.py
    │   │   ├── Swin2SR.py
    │   │   ├── SwinIR.py
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── DAT.cpython-310.pyc
    │   │   │   ├── HAT.cpython-310.pyc
    │   │   │   ├── LaMa.cpython-310.pyc
    │   │   │   ├── RRDB.cpython-310.pyc
    │   │   │   ├── SCUNet.cpython-310.pyc
    │   │   │   ├── SPSR.cpython-310.pyc
    │   │   │   ├── SRVGG.cpython-310.pyc
    │   │   │   ├── SwiftSRGAN.cpython-310.pyc
    │   │   │   ├── Swin2SR.cpython-310.pyc
    │   │   │   ├── SwinIR.cpython-310.pyc
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   └── block.cpython-310.pyc
    │   │   ├── block.py
    │   │   ├── face
    │   │   │   ├── LICENSE-GFPGAN
    │   │   │   ├── LICENSE-RestoreFormer
    │   │   │   ├── LICENSE-codeformer
    │   │   │   ├── __pycache__
    │   │   │   │   ├── codeformer.cpython-310.pyc
    │   │   │   │   ├── gfpganv1_clean_arch.cpython-310.pyc
    │   │   │   │   ├── restoreformer_arch.cpython-310.pyc
    │   │   │   │   └── stylegan2_clean_arch.cpython-310.pyc
    │   │   │   ├── arcface_arch.py
    │   │   │   ├── codeformer.py
    │   │   │   ├── fused_act.py
    │   │   │   ├── gfpgan_bilinear_arch.py
    │   │   │   ├── gfpganv1_arch.py
    │   │   │   ├── gfpganv1_clean_arch.py
    │   │   │   ├── restoreformer_arch.py
    │   │   │   ├── stylegan2_arch.py
    │   │   │   ├── stylegan2_bilinear_arch.py
    │   │   │   ├── stylegan2_clean_arch.py
    │   │   │   └── upfirdn2d.py
    │   │   └── timm
    │   │   │   ├── LICENSE
    │   │   │   ├── __pycache__
    │   │   │       ├── drop.cpython-310.pyc
    │   │   │       ├── helpers.cpython-310.pyc
    │   │   │       └── weight_init.cpython-310.pyc
    │   │   │   ├── drop.py
    │   │   │   ├── helpers.py
    │   │   │   └── weight_init.py
    │   ├── model_loading.py
    │   └── types.py
    ├── t2ia
    │   └── adapter.py
    ├── taesd
    │   └── taesd.py
    ├── unipc
    │   └── uni_pc.py
    └── utils
    │   ├── latent_visualization.py
    │   └── path_utils.py
├── make_img.ipynb
├── models
    ├── inpaint
    │   └── put_inpaint_here
    ├── loras
    │   └── put_loras_here
    └── upscale_models
    │   └── put_esrgan_and_other_upscale_models_here
├── modules
    ├── anisotropic.py
    ├── async_worker.py
    ├── auth.py
    ├── config.py
    ├── constants.py
    ├── core.py
    ├── default_pipeline.py
    ├── flags.py
    ├── gradio_hijack.py
    ├── html.py
    ├── inpaint_worker.py
    ├── launch_util.py
    ├── localization.py
    ├── lora.py
    ├── meta_parser.py
    ├── model_loader.py
    ├── ops.py
    ├── patch.py
    ├── patch_clip.py
    ├── patch_precision.py
    ├── private_logger.py
    ├── sample_hijack.py
    ├── sdxl_styles.py
    ├── style_sorter.py
    ├── ui_gradio_extensions.py
    ├── upscaler.py
    └── util.py
├── positive.txt
├── readme.md
├── requirements.txt
├── sdxl_styles
    ├── sdxl_styles_diva.json
    ├── sdxl_styles_fooocus.json
    ├── sdxl_styles_marc_k3nt3l.json
    ├── sdxl_styles_mre.json
    ├── sdxl_styles_sai.json
    └── sdxl_styles_twri.json
├── train.py
└── utils
    ├── FooocusDpmpp2mSdeGpuKarras.py
    ├── __init__.py
    ├── add_fooocus_inpaint_head_patch.py
    ├── add_fooocus_inpaint_patch.py
    ├── mask_aug.py
    ├── orthogonal_decomposition.py
    └── prompt_style_enhance.py


/assets/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/assets/teaser.png


--------------------------------------------------------------------------------
/data/1_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/1_1.jpg


--------------------------------------------------------------------------------
/data/1_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/1_2.jpg


--------------------------------------------------------------------------------
/data/2_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/2_1.jpg


--------------------------------------------------------------------------------
/data/2_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/2_2.jpg


--------------------------------------------------------------------------------
/data/3_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/3_1.jpg


--------------------------------------------------------------------------------
/data/3_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/3_2.jpg


--------------------------------------------------------------------------------
/data/4_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/4_1.jpg


--------------------------------------------------------------------------------
/data/4_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/4_2.jpg


--------------------------------------------------------------------------------
/infer.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import torch\n",
 10 |     "\n",
 11 |     "DEVICE = \"cuda:1\"\n",
 12 |     "torch.cuda.set_device(DEVICE)\n",
 13 |     "\n",
 14 |     "from FooocusSDXLInpaintAllInOnePipeline import FooocusSDXLInpaintPipeline\n",
 15 |     "\n",
 16 |     "pipe = FooocusSDXLInpaintPipeline.from_pretrained(\n",
 17 |     "    \"frankjoshua/juggernautXL_v8Rundiffusion\",\n",
 18 |     "    torch_dtype=torch.float16,\n",
 19 |     "    use_safetensors=True,\n",
 20 |     ").to(DEVICE)"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {
 27 |     "notebookRunGroups": {
 28 |      "groupValue": "1"
 29 |     }
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "lora_config = [\n",
 34 |     "    {\n",
 35 |     "        \"model_path\": f\"lora/xxx\",\n",
 36 |     "        \"scale\": 1,\n",
 37 |     "        \"for_raw_unet\": False,\n",
 38 |     "        \"for_fooocus_unet\": True,\n",
 39 |     "    },\n",
 40 |     "]\n",
 41 |     "\n",
 42 |     "pipe.preload_fooocus_unet(\n",
 43 |     "    fooocus_model_path=\"./models/fooocus_inpaint/inpaint_v26.fooocus.patch\",\n",
 44 |     "    lora_configs=lora_config,\n",
 45 |     "    add_double_sa=False,\n",
 46 |     ")"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "from diffusers.utils import load_image\n",
 56 |     "from PIL import Image\n",
 57 |     "\n",
 58 |     "\n",
 59 |     "img_url = f\"data/1_1.jpg\"\n",
 60 |     "mask_url = f\"data/1_2.jpg\"\n",
 61 |     "\n",
 62 |     "init_image = load_image(img_url).convert(\"RGB\")\n",
 63 |     "mask_image = load_image(mask_url).convert(\"RGB\")\n",
 64 |     "\n",
 65 |     "prompt = \"\"\n",
 66 |     "negative_prompt = \"\""
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "# Infer！\n",
 76 |     "image = pipe(\n",
 77 |     "    isf_global_time=20,\n",
 78 |     "    isf_global_ia=1,\n",
 79 |     "    decompose_prefix_prompt=\"a photo of a sks\",\n",
 80 |     "    sks_decompose_words=[\"\"],\n",
 81 |     "    fooocus_model_head_path=\"./models/fooocus_inpaint/fooocus_inpaint_head.pth\",\n",
 82 |     "    fooocus_model_head_upscale_path=\"./models/upscale_models/fooocus_upscaler_s409985e5.bin\",\n",
 83 |     "    pag_scale=1,\n",
 84 |     "    guidance_scale=4,\n",
 85 |     "    ref_image_type=\"no\", \n",
 86 |     "    double_sa_alpha=1,\n",
 87 |     "    save_self_attn=False,\n",
 88 |     "    save_cross_attn=False,\n",
 89 |     "    fooocus_time=0.8,\n",
 90 |     "    inpaint_respective_field=0.5, \n",
 91 |     "    sharpness=1, \n",
 92 |     "    adm_scaler_positive=1.5, \n",
 93 |     "    adm_scaler_negative=0.8,  \n",
 94 |     "    adm_scaler_end=0.3,\n",
 95 |     "    seed=42,\n",
 96 |     "    image=init_image,\n",
 97 |     "    mask_image=mask_image,\n",
 98 |     "    prompt=prompt,\n",
 99 |     "    negative_prompt=negative_prompt,\n",
100 |     "    num_inference_steps=30,\n",
101 |     "    strength=1,\n",
102 |     ")\n",
103 |     "image.resize((512, 512))\n",
104 |     "image"
105 |    ]
106 |   }
107 |  ],
108 |  "metadata": {
109 |   "kernelspec": {
110 |    "display_name": "DreamMix",
111 |    "language": "python",
112 |    "name": "python3"
113 |   },
114 |   "language_info": {
115 |    "codemirror_mode": {
116 |     "name": "ipython",
117 |     "version": 3
118 |    },
119 |    "file_extension": ".py",
120 |    "mimetype": "text/x-python",
121 |    "name": "python",
122 |    "nbconvert_exporter": "python",
123 |    "pygments_lexer": "ipython3",
124 |    "version": "3.10.15"
125 |   }
126 |  },
127 |  "nbformat": 4,
128 |  "nbformat_minor": 2
129 | }
130 | 


--------------------------------------------------------------------------------
/ldm_patched/contrib/external_clip_sdxl.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
 2 | 
 3 | import torch
 4 | from ldm_patched.contrib.external import MAX_RESOLUTION
 5 | 
 6 | class CLIPTextEncodeSDXLRefiner:
 7 |     @classmethod
 8 |     def INPUT_TYPES(s):
 9 |         return {"required": {
10 |             "ascore": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
11 |             "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
12 |             "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
13 |             "text": ("STRING", {"multiline": True}), "clip": ("CLIP", ),
14 |             }}
15 |     RETURN_TYPES = ("CONDITIONING",)
16 |     FUNCTION = "encode"
17 | 
18 |     CATEGORY = "advanced/conditioning"
19 | 
20 |     def encode(self, clip, ascore, width, height, text):
21 |         tokens = clip.tokenize(text)
22 |         cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True)
23 |         return ([[cond, {"pooled_output": pooled, "aesthetic_score": ascore, "width": width,"height": height}]], )
24 | 
25 | class CLIPTextEncodeSDXL:
26 |     @classmethod
27 |     def INPUT_TYPES(s):
28 |         return {"required": {
29 |             "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
30 |             "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
31 |             "crop_w": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}),
32 |             "crop_h": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}),
33 |             "target_width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
34 |             "target_height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
35 |             "text_g": ("STRING", {"multiline": True, "default": "CLIP_G"}), "clip": ("CLIP", ),
36 |             "text_l": ("STRING", {"multiline": True, "default": "CLIP_L"}), "clip": ("CLIP", ),
37 |             }}
38 |     RETURN_TYPES = ("CONDITIONING",)
39 |     FUNCTION = "encode"
40 | 
41 |     CATEGORY = "advanced/conditioning"
42 | 
43 |     def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l):
44 |         tokens = clip.tokenize(text_g)
45 |         tokens["l"] = clip.tokenize(text_l)["l"]
46 |         if len(tokens["l"]) != len(tokens["g"]):
47 |             empty = clip.tokenize("")
48 |             while len(tokens["l"]) < len(tokens["g"]):
49 |                 tokens["l"] += empty["l"]
50 |             while len(tokens["l"]) > len(tokens["g"]):
51 |                 tokens["g"] += empty["g"]
52 |         cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True)
53 |         return ([[cond, {"pooled_output": pooled, "width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}]], )
54 | 
55 | NODE_CLASS_MAPPINGS = {
56 |     "CLIPTextEncodeSDXLRefiner": CLIPTextEncodeSDXLRefiner,
57 |     "CLIPTextEncodeSDXL": CLIPTextEncodeSDXL,
58 | }
59 | 


--------------------------------------------------------------------------------
/ldm_patched/contrib/external_freelunch.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
  2 | 
  3 | #code originally taken from: https://github.com/ChenyangSi/FreeU (under MIT License)
  4 | 
  5 | import torch
  6 | 
  7 | 
  8 | def Fourier_filter(x, threshold, scale):
  9 |     # FFT
 10 |     x_freq = torch.fft.fftn(x.float(), dim=(-2, -1))
 11 |     x_freq = torch.fft.fftshift(x_freq, dim=(-2, -1))
 12 | 
 13 |     B, C, H, W = x_freq.shape
 14 |     mask = torch.ones((B, C, H, W), device=x.device)
 15 | 
 16 |     crow, ccol = H // 2, W //2
 17 |     mask[..., crow - threshold:crow + threshold, ccol - threshold:ccol + threshold] = scale
 18 |     x_freq = x_freq * mask
 19 | 
 20 |     # IFFT
 21 |     x_freq = torch.fft.ifftshift(x_freq, dim=(-2, -1))
 22 |     x_filtered = torch.fft.ifftn(x_freq, dim=(-2, -1)).real
 23 | 
 24 |     return x_filtered.to(x.dtype)
 25 | 
 26 | 
 27 | class FreeU:
 28 |     @classmethod
 29 |     def INPUT_TYPES(s):
 30 |         return {"required": { "model": ("MODEL",),
 31 |                              "b1": ("FLOAT", {"default": 1.1, "min": 0.0, "max": 10.0, "step": 0.01}),
 32 |                              "b2": ("FLOAT", {"default": 1.2, "min": 0.0, "max": 10.0, "step": 0.01}),
 33 |                              "s1": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 10.0, "step": 0.01}),
 34 |                              "s2": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.01}),
 35 |                               }}
 36 |     RETURN_TYPES = ("MODEL",)
 37 |     FUNCTION = "patch"
 38 | 
 39 |     CATEGORY = "model_patches"
 40 | 
 41 |     def patch(self, model, b1, b2, s1, s2):
 42 |         model_channels = model.model.model_config.unet_config["model_channels"]
 43 |         scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)}
 44 |         on_cpu_devices = {}
 45 | 
 46 |         def output_block_patch(h, hsp, transformer_options):
 47 |             scale = scale_dict.get(h.shape[1], None)
 48 |             if scale is not None:
 49 |                 h[:,:h.shape[1] // 2] = h[:,:h.shape[1] // 2] * scale[0]
 50 |                 if hsp.device not in on_cpu_devices:
 51 |                     try:
 52 |                         hsp = Fourier_filter(hsp, threshold=1, scale=scale[1])
 53 |                     except:
 54 |                         print("Device", hsp.device, "does not support the torch.fft functions used in the FreeU node, switching to CPU.")
 55 |                         on_cpu_devices[hsp.device] = True
 56 |                         hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device)
 57 |                 else:
 58 |                     hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device)
 59 | 
 60 |             return h, hsp
 61 | 
 62 |         m = model.clone()
 63 |         m.set_model_output_block_patch(output_block_patch)
 64 |         return (m, )
 65 | 
 66 | class FreeU_V2:
 67 |     @classmethod
 68 |     def INPUT_TYPES(s):
 69 |         return {"required": { "model": ("MODEL",),
 70 |                              "b1": ("FLOAT", {"default": 1.3, "min": 0.0, "max": 10.0, "step": 0.01}),
 71 |                              "b2": ("FLOAT", {"default": 1.4, "min": 0.0, "max": 10.0, "step": 0.01}),
 72 |                              "s1": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 10.0, "step": 0.01}),
 73 |                              "s2": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.01}),
 74 |                               }}
 75 |     RETURN_TYPES = ("MODEL",)
 76 |     FUNCTION = "patch"
 77 | 
 78 |     CATEGORY = "model_patches"
 79 | 
 80 |     def patch(self, model, b1, b2, s1, s2):
 81 |         model_channels = model.model.model_config.unet_config["model_channels"]
 82 |         scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)}
 83 |         on_cpu_devices = {}
 84 | 
 85 |         def output_block_patch(h, hsp, transformer_options):
 86 |             scale = scale_dict.get(h.shape[1], None)
 87 |             if scale is not None:
 88 |                 hidden_mean = h.mean(1).unsqueeze(1)
 89 |                 B = hidden_mean.shape[0]
 90 |                 hidden_max, _ = torch.max(hidden_mean.view(B, -1), dim=-1, keepdim=True)
 91 |                 hidden_min, _ = torch.min(hidden_mean.view(B, -1), dim=-1, keepdim=True)
 92 |                 hidden_mean = (hidden_mean - hidden_min.unsqueeze(2).unsqueeze(3)) / (hidden_max - hidden_min).unsqueeze(2).unsqueeze(3)
 93 | 
 94 |                 h[:,:h.shape[1] // 2] = h[:,:h.shape[1] // 2] * ((scale[0] - 1 ) * hidden_mean + 1)
 95 | 
 96 |                 if hsp.device not in on_cpu_devices:
 97 |                     try:
 98 |                         hsp = Fourier_filter(hsp, threshold=1, scale=scale[1])
 99 |                     except:
100 |                         print("Device", hsp.device, "does not support the torch.fft functions used in the FreeU node, switching to CPU.")
101 |                         on_cpu_devices[hsp.device] = True
102 |                         hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device)
103 |                 else:
104 |                     hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device)
105 | 
106 |             return h, hsp
107 | 
108 |         m = model.clone()
109 |         m.set_model_output_block_patch(output_block_patch)
110 |         return (m, )
111 | 
112 | NODE_CLASS_MAPPINGS = {
113 |     "FreeU": FreeU,
114 |     "FreeU_V2": FreeU_V2,
115 | }
116 | 


--------------------------------------------------------------------------------
/ldm_patched/contrib/external_hypernetwork.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
  2 | 
  3 | import ldm_patched.modules.utils
  4 | import ldm_patched.utils.path_utils
  5 | import torch
  6 | 
  7 | def load_hypernetwork_patch(path, strength):
  8 |     sd = ldm_patched.modules.utils.load_torch_file(path, safe_load=True)
  9 |     activation_func = sd.get('activation_func', 'linear')
 10 |     is_layer_norm = sd.get('is_layer_norm', False)
 11 |     use_dropout = sd.get('use_dropout', False)
 12 |     activate_output = sd.get('activate_output', False)
 13 |     last_layer_dropout = sd.get('last_layer_dropout', False)
 14 | 
 15 |     valid_activation = {
 16 |         "linear": torch.nn.Identity,
 17 |         "relu": torch.nn.ReLU,
 18 |         "leakyrelu": torch.nn.LeakyReLU,
 19 |         "elu": torch.nn.ELU,
 20 |         "swish": torch.nn.Hardswish,
 21 |         "tanh": torch.nn.Tanh,
 22 |         "sigmoid": torch.nn.Sigmoid,
 23 |         "softsign": torch.nn.Softsign,
 24 |         "mish": torch.nn.Mish,
 25 |     }
 26 | 
 27 |     if activation_func not in valid_activation:
 28 |         print("Unsupported Hypernetwork format, if you report it I might implement it.", path, " ", activation_func, is_layer_norm, use_dropout, activate_output, last_layer_dropout)
 29 |         return None
 30 | 
 31 |     out = {}
 32 | 
 33 |     for d in sd:
 34 |         try:
 35 |             dim = int(d)
 36 |         except:
 37 |             continue
 38 | 
 39 |         output = []
 40 |         for index in [0, 1]:
 41 |             attn_weights = sd[dim][index]
 42 |             keys = attn_weights.keys()
 43 | 
 44 |             linears = filter(lambda a: a.endswith(".weight"), keys)
 45 |             linears = list(map(lambda a: a[:-len(".weight")], linears))
 46 |             layers = []
 47 | 
 48 |             i = 0
 49 |             while i < len(linears):
 50 |                 lin_name = linears[i]
 51 |                 last_layer = (i == (len(linears) - 1))
 52 |                 penultimate_layer = (i == (len(linears) - 2))
 53 | 
 54 |                 lin_weight = attn_weights['{}.weight'.format(lin_name)]
 55 |                 lin_bias = attn_weights['{}.bias'.format(lin_name)]
 56 |                 layer = torch.nn.Linear(lin_weight.shape[1], lin_weight.shape[0])
 57 |                 layer.load_state_dict({"weight": lin_weight, "bias": lin_bias})
 58 |                 layers.append(layer)
 59 |                 if activation_func != "linear":
 60 |                     if (not last_layer) or (activate_output):
 61 |                         layers.append(valid_activation[activation_func]())
 62 |                 if is_layer_norm:
 63 |                     i += 1
 64 |                     ln_name = linears[i]
 65 |                     ln_weight = attn_weights['{}.weight'.format(ln_name)]
 66 |                     ln_bias = attn_weights['{}.bias'.format(ln_name)]
 67 |                     ln = torch.nn.LayerNorm(ln_weight.shape[0])
 68 |                     ln.load_state_dict({"weight": ln_weight, "bias": ln_bias})
 69 |                     layers.append(ln)
 70 |                 if use_dropout:
 71 |                     if (not last_layer) and (not penultimate_layer or last_layer_dropout):
 72 |                         layers.append(torch.nn.Dropout(p=0.3))
 73 |                 i += 1
 74 | 
 75 |             output.append(torch.nn.Sequential(*layers))
 76 |         out[dim] = torch.nn.ModuleList(output)
 77 | 
 78 |     class hypernetwork_patch:
 79 |         def __init__(self, hypernet, strength):
 80 |             self.hypernet = hypernet
 81 |             self.strength = strength
 82 |         def __call__(self, q, k, v, extra_options):
 83 |             dim = k.shape[-1]
 84 |             if dim in self.hypernet:
 85 |                 hn = self.hypernet[dim]
 86 |                 k = k + hn[0](k) * self.strength
 87 |                 v = v + hn[1](v) * self.strength
 88 | 
 89 |             return q, k, v
 90 | 
 91 |         def to(self, device):
 92 |             for d in self.hypernet.keys():
 93 |                 self.hypernet[d] = self.hypernet[d].to(device)
 94 |             return self
 95 | 
 96 |     return hypernetwork_patch(out, strength)
 97 | 
 98 | class HypernetworkLoader:
 99 |     @classmethod
100 |     def INPUT_TYPES(s):
101 |         return {"required": { "model": ("MODEL",),
102 |                               "hypernetwork_name": (ldm_patched.utils.path_utils.get_filename_list("hypernetworks"), ),
103 |                               "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
104 |                               }}
105 |     RETURN_TYPES = ("MODEL",)
106 |     FUNCTION = "load_hypernetwork"
107 | 
108 |     CATEGORY = "loaders"
109 | 
110 |     def load_hypernetwork(self, model, hypernetwork_name, strength):
111 |         hypernetwork_path = ldm_patched.utils.path_utils.get_full_path("hypernetworks", hypernetwork_name)
112 |         model_hypernetwork = model.clone()
113 |         patch = load_hypernetwork_patch(hypernetwork_path, strength)
114 |         if patch is not None:
115 |             model_hypernetwork.set_model_attn1_patch(patch)
116 |             model_hypernetwork.set_model_attn2_patch(patch)
117 |         return (model_hypernetwork,)
118 | 
119 | NODE_CLASS_MAPPINGS = {
120 |     "HypernetworkLoader": HypernetworkLoader
121 | }
122 | 


--------------------------------------------------------------------------------
/ldm_patched/contrib/external_hypertile.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
 2 | 
 3 | #Taken from: https://github.com/tfernd/HyperTile/
 4 | 
 5 | import math
 6 | from einops import rearrange
 7 | # Use torch rng for consistency across generations
 8 | from torch import randint
 9 | 
10 | def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int:
11 |     min_value = min(min_value, value)
12 | 
13 |     # All big divisors of value (inclusive)
14 |     divisors = [i for i in range(min_value, value + 1) if value % i == 0]
15 | 
16 |     ns = [value // i for i in divisors[:max_options]]  # has at least 1 element
17 | 
18 |     if len(ns) - 1 > 0:
19 |         idx = randint(low=0, high=len(ns) - 1, size=(1,)).item()
20 |     else:
21 |         idx = 0
22 | 
23 |     return ns[idx]
24 | 
25 | class HyperTile:
26 |     @classmethod
27 |     def INPUT_TYPES(s):
28 |         return {"required": { "model": ("MODEL",),
29 |                              "tile_size": ("INT", {"default": 256, "min": 1, "max": 2048}),
30 |                              "swap_size": ("INT", {"default": 2, "min": 1, "max": 128}),
31 |                              "max_depth": ("INT", {"default": 0, "min": 0, "max": 10}),
32 |                              "scale_depth": ("BOOLEAN", {"default": False}),
33 |                               }}
34 |     RETURN_TYPES = ("MODEL",)
35 |     FUNCTION = "patch"
36 | 
37 |     CATEGORY = "model_patches"
38 | 
39 |     def patch(self, model, tile_size, swap_size, max_depth, scale_depth):
40 |         model_channels = model.model.model_config.unet_config["model_channels"]
41 | 
42 |         latent_tile_size = max(32, tile_size) // 8
43 |         self.temp = None
44 | 
45 |         def hypertile_in(q, k, v, extra_options):
46 |             model_chans = q.shape[-2]
47 |             orig_shape = extra_options['original_shape']
48 |             apply_to = []
49 |             for i in range(max_depth + 1):
50 |                 apply_to.append((orig_shape[-2] / (2 ** i)) * (orig_shape[-1] / (2 ** i)))
51 | 
52 |             if model_chans in apply_to:
53 |                 shape = extra_options["original_shape"]
54 |                 aspect_ratio = shape[-1] / shape[-2]
55 | 
56 |                 hw = q.size(1)
57 |                 h, w = round(math.sqrt(hw * aspect_ratio)), round(math.sqrt(hw / aspect_ratio))
58 | 
59 |                 factor = (2 ** apply_to.index(model_chans)) if scale_depth else 1
60 |                 nh = random_divisor(h, latent_tile_size * factor, swap_size)
61 |                 nw = random_divisor(w, latent_tile_size * factor, swap_size)
62 | 
63 |                 if nh * nw > 1:
64 |                     q = rearrange(q, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw)
65 |                     self.temp = (nh, nw, h, w)
66 |                 return q, k, v
67 | 
68 |             return q, k, v
69 |         def hypertile_out(out, extra_options):
70 |             if self.temp is not None:
71 |                 nh, nw, h, w = self.temp
72 |                 self.temp = None
73 |                 out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw)
74 |                 out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw)
75 |             return out
76 | 
77 | 
78 |         m = model.clone()
79 |         m.set_model_attn1_patch(hypertile_in)
80 |         m.set_model_attn1_output_patch(hypertile_out)
81 |         return (m, )
82 | 
83 | NODE_CLASS_MAPPINGS = {
84 |     "HyperTile": HyperTile,
85 | }
86 | 


--------------------------------------------------------------------------------
/ldm_patched/contrib/external_latent.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
  2 | 
  3 | import ldm_patched.modules.utils
  4 | import torch
  5 | 
  6 | def reshape_latent_to(target_shape, latent):
  7 |     if latent.shape[1:] != target_shape[1:]:
  8 |         latent = ldm_patched.modules.utils.common_upscale(latent, target_shape[3], target_shape[2], "bilinear", "center")
  9 |     return ldm_patched.modules.utils.repeat_to_batch_size(latent, target_shape[0])
 10 | 
 11 | 
 12 | class LatentAdd:
 13 |     @classmethod
 14 |     def INPUT_TYPES(s):
 15 |         return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}}
 16 | 
 17 |     RETURN_TYPES = ("LATENT",)
 18 |     FUNCTION = "op"
 19 | 
 20 |     CATEGORY = "latent/advanced"
 21 | 
 22 |     def op(self, samples1, samples2):
 23 |         samples_out = samples1.copy()
 24 | 
 25 |         s1 = samples1["samples"]
 26 |         s2 = samples2["samples"]
 27 | 
 28 |         s2 = reshape_latent_to(s1.shape, s2)
 29 |         samples_out["samples"] = s1 + s2
 30 |         return (samples_out,)
 31 | 
 32 | class LatentSubtract:
 33 |     @classmethod
 34 |     def INPUT_TYPES(s):
 35 |         return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}}
 36 | 
 37 |     RETURN_TYPES = ("LATENT",)
 38 |     FUNCTION = "op"
 39 | 
 40 |     CATEGORY = "latent/advanced"
 41 | 
 42 |     def op(self, samples1, samples2):
 43 |         samples_out = samples1.copy()
 44 | 
 45 |         s1 = samples1["samples"]
 46 |         s2 = samples2["samples"]
 47 | 
 48 |         s2 = reshape_latent_to(s1.shape, s2)
 49 |         samples_out["samples"] = s1 - s2
 50 |         return (samples_out,)
 51 | 
 52 | class LatentMultiply:
 53 |     @classmethod
 54 |     def INPUT_TYPES(s):
 55 |         return {"required": { "samples": ("LATENT",),
 56 |                               "multiplier": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
 57 |                              }}
 58 | 
 59 |     RETURN_TYPES = ("LATENT",)
 60 |     FUNCTION = "op"
 61 | 
 62 |     CATEGORY = "latent/advanced"
 63 | 
 64 |     def op(self, samples, multiplier):
 65 |         samples_out = samples.copy()
 66 | 
 67 |         s1 = samples["samples"]
 68 |         samples_out["samples"] = s1 * multiplier
 69 |         return (samples_out,)
 70 | 
 71 | class LatentInterpolate:
 72 |     @classmethod
 73 |     def INPUT_TYPES(s):
 74 |         return {"required": { "samples1": ("LATENT",),
 75 |                               "samples2": ("LATENT",),
 76 |                               "ratio": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
 77 |                               }}
 78 | 
 79 |     RETURN_TYPES = ("LATENT",)
 80 |     FUNCTION = "op"
 81 | 
 82 |     CATEGORY = "latent/advanced"
 83 | 
 84 |     def op(self, samples1, samples2, ratio):
 85 |         samples_out = samples1.copy()
 86 | 
 87 |         s1 = samples1["samples"]
 88 |         s2 = samples2["samples"]
 89 | 
 90 |         s2 = reshape_latent_to(s1.shape, s2)
 91 | 
 92 |         m1 = torch.linalg.vector_norm(s1, dim=(1))
 93 |         m2 = torch.linalg.vector_norm(s2, dim=(1))
 94 | 
 95 |         s1 = torch.nan_to_num(s1 / m1)
 96 |         s2 = torch.nan_to_num(s2 / m2)
 97 | 
 98 |         t = (s1 * ratio + s2 * (1.0 - ratio))
 99 |         mt = torch.linalg.vector_norm(t, dim=(1))
100 |         st = torch.nan_to_num(t / mt)
101 | 
102 |         samples_out["samples"] = st * (m1 * ratio + m2 * (1.0 - ratio))
103 |         return (samples_out,)
104 | 
105 | class LatentBatch:
106 |     @classmethod
107 |     def INPUT_TYPES(s):
108 |         return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}}
109 | 
110 |     RETURN_TYPES = ("LATENT",)
111 |     FUNCTION = "batch"
112 | 
113 |     CATEGORY = "latent/batch"
114 | 
115 |     def batch(self, samples1, samples2):
116 |         samples_out = samples1.copy()
117 |         s1 = samples1["samples"]
118 |         s2 = samples2["samples"]
119 | 
120 |         if s1.shape[1:] != s2.shape[1:]:
121 |             s2 = ldm_patched.modules.utils.common_upscale(s2, s1.shape[3], s1.shape[2], "bilinear", "center")
122 |         s = torch.cat((s1, s2), dim=0)
123 |         samples_out["samples"] = s
124 |         samples_out["batch_index"] = samples1.get("batch_index", [x for x in range(0, s1.shape[0])]) + samples2.get("batch_index", [x for x in range(0, s2.shape[0])])
125 |         return (samples_out,)
126 | 
127 | class LatentBatchSeedBehavior:
128 |     @classmethod
129 |     def INPUT_TYPES(s):
130 |         return {"required": { "samples": ("LATENT",),
131 |                               "seed_behavior": (["random", "fixed"],),}}
132 | 
133 |     RETURN_TYPES = ("LATENT",)
134 |     FUNCTION = "op"
135 | 
136 |     CATEGORY = "latent/advanced"
137 | 
138 |     def op(self, samples, seed_behavior):
139 |         samples_out = samples.copy()
140 |         latent = samples["samples"]
141 |         if seed_behavior == "random":
142 |             if 'batch_index' in samples_out:
143 |                 samples_out.pop('batch_index')
144 |         elif seed_behavior == "fixed":
145 |             batch_number = samples_out.get("batch_index", [0])[0]
146 |             samples_out["batch_index"] = [batch_number] * latent.shape[0]
147 | 
148 |         return (samples_out,)
149 | 
150 | NODE_CLASS_MAPPINGS = {
151 |     "LatentAdd": LatentAdd,
152 |     "LatentSubtract": LatentSubtract,
153 |     "LatentMultiply": LatentMultiply,
154 |     "LatentInterpolate": LatentInterpolate,
155 |     "LatentBatch": LatentBatch,
156 |     "LatentBatchSeedBehavior": LatentBatchSeedBehavior,
157 | }
158 | 


--------------------------------------------------------------------------------
/ldm_patched/contrib/external_model_downscale.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
 2 | 
 3 | import torch
 4 | import ldm_patched.modules.utils
 5 | 
 6 | class PatchModelAddDownscale:
 7 |     upscale_methods = ["bicubic", "nearest-exact", "bilinear", "area", "bislerp"]
 8 |     @classmethod
 9 |     def INPUT_TYPES(s):
10 |         return {"required": { "model": ("MODEL",),
11 |                               "block_number": ("INT", {"default": 3, "min": 1, "max": 32, "step": 1}),
12 |                               "downscale_factor": ("FLOAT", {"default": 2.0, "min": 0.1, "max": 9.0, "step": 0.001}),
13 |                               "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
14 |                               "end_percent": ("FLOAT", {"default": 0.35, "min": 0.0, "max": 1.0, "step": 0.001}),
15 |                               "downscale_after_skip": ("BOOLEAN", {"default": True}),
16 |                               "downscale_method": (s.upscale_methods,),
17 |                               "upscale_method": (s.upscale_methods,),
18 |                               }}
19 |     RETURN_TYPES = ("MODEL",)
20 |     FUNCTION = "patch"
21 | 
22 |     CATEGORY = "_for_testing"
23 | 
24 |     def patch(self, model, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip, downscale_method, upscale_method):
25 |         sigma_start = model.model.model_sampling.percent_to_sigma(start_percent)
26 |         sigma_end = model.model.model_sampling.percent_to_sigma(end_percent)
27 | 
28 |         def input_block_patch(h, transformer_options):
29 |             if transformer_options["block"][1] == block_number:
30 |                 sigma = transformer_options["sigmas"][0].item()
31 |                 if sigma <= sigma_start and sigma >= sigma_end:
32 |                     h = ldm_patched.modules.utils.common_upscale(h, round(h.shape[-1] * (1.0 / downscale_factor)), round(h.shape[-2] * (1.0 / downscale_factor)), downscale_method, "disabled")
33 |             return h
34 | 
35 |         def output_block_patch(h, hsp, transformer_options):
36 |             if h.shape[2] != hsp.shape[2]:
37 |                 h = ldm_patched.modules.utils.common_upscale(h, hsp.shape[-1], hsp.shape[-2], upscale_method, "disabled")
38 |             return h, hsp
39 | 
40 |         m = model.clone()
41 |         if downscale_after_skip:
42 |             m.set_model_input_block_patch_after_skip(input_block_patch)
43 |         else:
44 |             m.set_model_input_block_patch(input_block_patch)
45 |         m.set_model_output_block_patch(output_block_patch)
46 |         return (m, )
47 | 
48 | NODE_CLASS_MAPPINGS = {
49 |     "PatchModelAddDownscale": PatchModelAddDownscale,
50 | }
51 | 
52 | NODE_DISPLAY_NAME_MAPPINGS = {
53 |     # Sampling
54 |     "PatchModelAddDownscale": "PatchModelAddDownscale (Kohya Deep Shrink)",
55 | }
56 | 


--------------------------------------------------------------------------------
/ldm_patched/contrib/external_perpneg.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
 2 | 
 3 | import torch
 4 | import ldm_patched.modules.model_management
 5 | import ldm_patched.modules.sample
 6 | import ldm_patched.modules.samplers
 7 | import ldm_patched.modules.utils
 8 | 
 9 | 
10 | class PerpNeg:
11 |     @classmethod
12 |     def INPUT_TYPES(s):
13 |         return {"required": {"model": ("MODEL", ),
14 |                              "empty_conditioning": ("CONDITIONING", ),
15 |                              "neg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0}),
16 |                             }}
17 |     RETURN_TYPES = ("MODEL",)
18 |     FUNCTION = "patch"
19 | 
20 |     CATEGORY = "_for_testing"
21 | 
22 |     def patch(self, model, empty_conditioning, neg_scale):
23 |         m = model.clone()
24 |         nocond = ldm_patched.modules.sample.convert_cond(empty_conditioning)
25 | 
26 |         def cfg_function(args):
27 |             model = args["model"]
28 |             noise_pred_pos = args["cond_denoised"]
29 |             noise_pred_neg = args["uncond_denoised"]
30 |             cond_scale = args["cond_scale"]
31 |             x = args["input"]
32 |             sigma = args["sigma"]
33 |             model_options = args["model_options"]
34 |             nocond_processed = ldm_patched.modules.samplers.encode_model_conds(model.extra_conds, nocond, x, x.device, "negative")
35 | 
36 |             (noise_pred_nocond, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, nocond_processed, None, x, sigma, model_options)
37 | 
38 |             pos = noise_pred_pos - noise_pred_nocond
39 |             neg = noise_pred_neg - noise_pred_nocond
40 |             perp = ((torch.mul(pos, neg).sum())/(torch.norm(neg)**2)) * neg
41 |             perp_neg = perp * neg_scale
42 |             cfg_result = noise_pred_nocond + cond_scale*(pos - perp_neg)
43 |             cfg_result = x - cfg_result
44 |             return cfg_result
45 | 
46 |         m.set_model_sampler_cfg_function(cfg_function)
47 | 
48 |         return (m, )
49 | 
50 | 
51 | NODE_CLASS_MAPPINGS = {
52 |     "PerpNeg": PerpNeg,
53 | }
54 | 
55 | NODE_DISPLAY_NAME_MAPPINGS = {
56 |     "PerpNeg": "Perp-Neg",
57 | }
58 | 


--------------------------------------------------------------------------------
/ldm_patched/contrib/external_sdupscale.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
 2 | 
 3 | import torch
 4 | import ldm_patched.contrib.external
 5 | import ldm_patched.modules.utils
 6 | 
 7 | class SD_4XUpscale_Conditioning:
 8 |     @classmethod
 9 |     def INPUT_TYPES(s):
10 |         return {"required": { "images": ("IMAGE",),
11 |                               "positive": ("CONDITIONING",),
12 |                               "negative": ("CONDITIONING",),
13 |                               "scale_ratio": ("FLOAT", {"default": 4.0, "min": 0.0, "max": 10.0, "step": 0.01}),
14 |                               "noise_augmentation": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
15 |                              }}
16 |     RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
17 |     RETURN_NAMES = ("positive", "negative", "latent")
18 | 
19 |     FUNCTION = "encode"
20 | 
21 |     CATEGORY = "conditioning/upscale_diffusion"
22 | 
23 |     def encode(self, images, positive, negative, scale_ratio, noise_augmentation):
24 |         width = max(1, round(images.shape[-2] * scale_ratio))
25 |         height = max(1, round(images.shape[-3] * scale_ratio))
26 | 
27 |         pixels = ldm_patched.modules.utils.common_upscale((images.movedim(-1,1) * 2.0) - 1.0, width // 4, height // 4, "bilinear", "center")
28 | 
29 |         out_cp = []
30 |         out_cn = []
31 | 
32 |         for t in positive:
33 |             n = [t[0], t[1].copy()]
34 |             n[1]['concat_image'] = pixels
35 |             n[1]['noise_augmentation'] = noise_augmentation
36 |             out_cp.append(n)
37 | 
38 |         for t in negative:
39 |             n = [t[0], t[1].copy()]
40 |             n[1]['concat_image'] = pixels
41 |             n[1]['noise_augmentation'] = noise_augmentation
42 |             out_cn.append(n)
43 | 
44 |         latent = torch.zeros([images.shape[0], 4, height // 4, width // 4])
45 |         return (out_cp, out_cn, {"samples":latent})
46 | 
47 | NODE_CLASS_MAPPINGS = {
48 |     "SD_4XUpscale_Conditioning": SD_4XUpscale_Conditioning,
49 | }
50 | 


--------------------------------------------------------------------------------
/ldm_patched/contrib/external_stable3d.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
  2 | 
  3 | import torch
  4 | import ldm_patched.contrib.external
  5 | import ldm_patched.modules.utils
  6 | 
  7 | def camera_embeddings(elevation, azimuth):
  8 |     elevation = torch.as_tensor([elevation])
  9 |     azimuth = torch.as_tensor([azimuth])
 10 |     embeddings = torch.stack(
 11 |         [
 12 |                 torch.deg2rad(
 13 |                     (90 - elevation) - (90)
 14 |                 ),  # Zero123 polar is 90-elevation
 15 |                 torch.sin(torch.deg2rad(azimuth)),
 16 |                 torch.cos(torch.deg2rad(azimuth)),
 17 |                 torch.deg2rad(
 18 |                     90 - torch.full_like(elevation, 0)
 19 |                 ),
 20 |         ], dim=-1).unsqueeze(1)
 21 | 
 22 |     return embeddings
 23 | 
 24 | 
 25 | class StableZero123_Conditioning:
 26 |     @classmethod
 27 |     def INPUT_TYPES(s):
 28 |         return {"required": { "clip_vision": ("CLIP_VISION",),
 29 |                               "init_image": ("IMAGE",),
 30 |                               "vae": ("VAE",),
 31 |                               "width": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}),
 32 |                               "height": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}),
 33 |                               "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
 34 |                               "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}),
 35 |                               "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}),
 36 |                              }}
 37 |     RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
 38 |     RETURN_NAMES = ("positive", "negative", "latent")
 39 | 
 40 |     FUNCTION = "encode"
 41 | 
 42 |     CATEGORY = "conditioning/3d_models"
 43 | 
 44 |     def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth):
 45 |         output = clip_vision.encode_image(init_image)
 46 |         pooled = output.image_embeds.unsqueeze(0)
 47 |         pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
 48 |         encode_pixels = pixels[:,:,:,:3]
 49 |         t = vae.encode(encode_pixels)
 50 |         cam_embeds = camera_embeddings(elevation, azimuth)
 51 |         cond = torch.cat([pooled, cam_embeds.to(pooled.device).repeat((pooled.shape[0], 1, 1))], dim=-1)
 52 | 
 53 |         positive = [[cond, {"concat_latent_image": t}]]
 54 |         negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
 55 |         latent = torch.zeros([batch_size, 4, height // 8, width // 8])
 56 |         return (positive, negative, {"samples":latent})
 57 | 
 58 | class StableZero123_Conditioning_Batched:
 59 |     @classmethod
 60 |     def INPUT_TYPES(s):
 61 |         return {"required": { "clip_vision": ("CLIP_VISION",),
 62 |                               "init_image": ("IMAGE",),
 63 |                               "vae": ("VAE",),
 64 |                               "width": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}),
 65 |                               "height": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}),
 66 |                               "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
 67 |                               "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}),
 68 |                               "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}),
 69 |                               "elevation_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}),
 70 |                               "azimuth_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}),
 71 |                              }}
 72 |     RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
 73 |     RETURN_NAMES = ("positive", "negative", "latent")
 74 | 
 75 |     FUNCTION = "encode"
 76 | 
 77 |     CATEGORY = "conditioning/3d_models"
 78 | 
 79 |     def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment):
 80 |         output = clip_vision.encode_image(init_image)
 81 |         pooled = output.image_embeds.unsqueeze(0)
 82 |         pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
 83 |         encode_pixels = pixels[:,:,:,:3]
 84 |         t = vae.encode(encode_pixels)
 85 | 
 86 |         cam_embeds = []
 87 |         for i in range(batch_size):
 88 |             cam_embeds.append(camera_embeddings(elevation, azimuth))
 89 |             elevation += elevation_batch_increment
 90 |             azimuth += azimuth_batch_increment
 91 | 
 92 |         cam_embeds = torch.cat(cam_embeds, dim=0)
 93 |         cond = torch.cat([ldm_patched.modules.utils.repeat_to_batch_size(pooled, batch_size), cam_embeds], dim=-1)
 94 | 
 95 |         positive = [[cond, {"concat_latent_image": t}]]
 96 |         negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
 97 |         latent = torch.zeros([batch_size, 4, height // 8, width // 8])
 98 |         return (positive, negative, {"samples":latent, "batch_index": [0] * batch_size})
 99 | 
100 | 
101 | NODE_CLASS_MAPPINGS = {
102 |     "StableZero123_Conditioning": StableZero123_Conditioning,
103 |     "StableZero123_Conditioning_Batched": StableZero123_Conditioning_Batched,
104 | }
105 | 


--------------------------------------------------------------------------------
/ldm_patched/contrib/external_upscale_model.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
 2 | 
 3 | import os
 4 | from ldm_patched.pfn import model_loading
 5 | from ldm_patched.modules import model_management
 6 | import torch
 7 | import ldm_patched.modules.utils
 8 | import ldm_patched.utils.path_utils
 9 | 
10 | class UpscaleModelLoader:
11 |     @classmethod
12 |     def INPUT_TYPES(s):
13 |         return {"required": { "model_name": (ldm_patched.utils.path_utils.get_filename_list("upscale_models"), ),
14 |                              }}
15 |     RETURN_TYPES = ("UPSCALE_MODEL",)
16 |     FUNCTION = "load_model"
17 | 
18 |     CATEGORY = "loaders"
19 | 
20 |     def load_model(self, model_name):
21 |         model_path = ldm_patched.utils.path_utils.get_full_path("upscale_models", model_name)
22 |         sd = ldm_patched.modules.utils.load_torch_file(model_path, safe_load=True)
23 |         if "module.layers.0.residual_group.blocks.0.norm1.weight" in sd:
24 |             sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {"module.":""})
25 |         out = model_loading.load_state_dict(sd).eval()
26 |         return (out, )
27 | 
28 | 
29 | class ImageUpscaleWithModel:
30 |     @classmethod
31 |     def INPUT_TYPES(s):
32 |         return {"required": { "upscale_model": ("UPSCALE_MODEL",),
33 |                               "image": ("IMAGE",),
34 |                               }}
35 |     RETURN_TYPES = ("IMAGE",)
36 |     FUNCTION = "upscale"
37 | 
38 |     CATEGORY = "image/upscaling"
39 | 
40 |     def upscale(self, upscale_model, image):
41 |         device = model_management.get_torch_device()
42 |         upscale_model.to(device)
43 |         in_img = image.movedim(-1,-3).to(device)
44 |         free_memory = model_management.get_free_memory(device)
45 | 
46 |         tile = 512
47 |         overlap = 32
48 | 
49 |         oom = True
50 |         while oom:
51 |             try:
52 |                 steps = in_img.shape[0] * ldm_patched.modules.utils.get_tiled_scale_steps(in_img.shape[3], in_img.shape[2], tile_x=tile, tile_y=tile, overlap=overlap)
53 |                 pbar = ldm_patched.modules.utils.ProgressBar(steps)
54 |                 s = ldm_patched.modules.utils.tiled_scale(in_img, lambda a: upscale_model(a), tile_x=tile, tile_y=tile, overlap=overlap, upscale_amount=upscale_model.scale, pbar=pbar)
55 |                 oom = False
56 |             except model_management.OOM_EXCEPTION as e:
57 |                 tile //= 2
58 |                 if tile < 128:
59 |                     raise e
60 | 
61 |         upscale_model.cpu()
62 |         s = torch.clamp(s.movedim(-3,-1), min=0, max=1.0)
63 |         return (s,)
64 | 
65 | NODE_CLASS_MAPPINGS = {
66 |     "UpscaleModelLoader": UpscaleModelLoader,
67 |     "ImageUpscaleWithModel": ImageUpscaleWithModel
68 | }
69 | 


--------------------------------------------------------------------------------
/ldm_patched/contrib/external_video_model.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
  2 | 
  3 | import ldm_patched.contrib.external
  4 | import torch
  5 | import ldm_patched.modules.utils
  6 | import ldm_patched.modules.sd
  7 | import ldm_patched.utils.path_utils
  8 | import ldm_patched.contrib.external_model_merging
  9 | 
 10 | 
 11 | class ImageOnlyCheckpointLoader:
 12 |     @classmethod
 13 |     def INPUT_TYPES(s):
 14 |         return {"required": { "ckpt_name": (ldm_patched.utils.path_utils.get_filename_list("checkpoints"), ),
 15 |                              }}
 16 |     RETURN_TYPES = ("MODEL", "CLIP_VISION", "VAE")
 17 |     FUNCTION = "load_checkpoint"
 18 | 
 19 |     CATEGORY = "loaders/video_models"
 20 | 
 21 |     def load_checkpoint(self, ckpt_name, output_vae=True, output_clip=True):
 22 |         ckpt_path = ldm_patched.utils.path_utils.get_full_path("checkpoints", ckpt_name)
 23 |         out = ldm_patched.modules.sd.load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=False, output_clipvision=True, embedding_directory=ldm_patched.utils.path_utils.get_folder_paths("embeddings"))
 24 |         return (out[0], out[3], out[2])
 25 | 
 26 | 
 27 | class SVD_img2vid_Conditioning:
 28 |     @classmethod
 29 |     def INPUT_TYPES(s):
 30 |         return {"required": { "clip_vision": ("CLIP_VISION",),
 31 |                               "init_image": ("IMAGE",),
 32 |                               "vae": ("VAE",),
 33 |                               "width": ("INT", {"default": 1024, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}),
 34 |                               "height": ("INT", {"default": 576, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}),
 35 |                               "video_frames": ("INT", {"default": 14, "min": 1, "max": 4096}),
 36 |                               "motion_bucket_id": ("INT", {"default": 127, "min": 1, "max": 1023}),
 37 |                               "fps": ("INT", {"default": 6, "min": 1, "max": 1024}),
 38 |                               "augmentation_level": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10.0, "step": 0.01})
 39 |                              }}
 40 |     RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
 41 |     RETURN_NAMES = ("positive", "negative", "latent")
 42 | 
 43 |     FUNCTION = "encode"
 44 | 
 45 |     CATEGORY = "conditioning/video_models"
 46 | 
 47 |     def encode(self, clip_vision, init_image, vae, width, height, video_frames, motion_bucket_id, fps, augmentation_level):
 48 |         output = clip_vision.encode_image(init_image)
 49 |         pooled = output.image_embeds.unsqueeze(0)
 50 |         pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
 51 |         encode_pixels = pixels[:,:,:,:3]
 52 |         if augmentation_level > 0:
 53 |             encode_pixels += torch.randn_like(pixels) * augmentation_level
 54 |         t = vae.encode(encode_pixels)
 55 |         positive = [[pooled, {"motion_bucket_id": motion_bucket_id, "fps": fps, "augmentation_level": augmentation_level, "concat_latent_image": t}]]
 56 |         negative = [[torch.zeros_like(pooled), {"motion_bucket_id": motion_bucket_id, "fps": fps, "augmentation_level": augmentation_level, "concat_latent_image": torch.zeros_like(t)}]]
 57 |         latent = torch.zeros([video_frames, 4, height // 8, width // 8])
 58 |         return (positive, negative, {"samples":latent})
 59 | 
 60 | class VideoLinearCFGGuidance:
 61 |     @classmethod
 62 |     def INPUT_TYPES(s):
 63 |         return {"required": { "model": ("MODEL",),
 64 |                               "min_cfg": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.5, "round": 0.01}),
 65 |                               }}
 66 |     RETURN_TYPES = ("MODEL",)
 67 |     FUNCTION = "patch"
 68 | 
 69 |     CATEGORY = "sampling/video_models"
 70 | 
 71 |     def patch(self, model, min_cfg):
 72 |         def linear_cfg(args):
 73 |             cond = args["cond"]
 74 |             uncond = args["uncond"]
 75 |             cond_scale = args["cond_scale"]
 76 | 
 77 |             scale = torch.linspace(min_cfg, cond_scale, cond.shape[0], device=cond.device).reshape((cond.shape[0], 1, 1, 1))
 78 |             return uncond + scale * (cond - uncond)
 79 | 
 80 |         m = model.clone()
 81 |         m.set_model_sampler_cfg_function(linear_cfg)
 82 |         return (m, )
 83 | 
 84 | class ImageOnlyCheckpointSave(ldm_patched.contrib.external_model_merging.CheckpointSave):
 85 |     CATEGORY = "_for_testing"
 86 | 
 87 |     @classmethod
 88 |     def INPUT_TYPES(s):
 89 |         return {"required": { "model": ("MODEL",),
 90 |                               "clip_vision": ("CLIP_VISION",),
 91 |                               "vae": ("VAE",),
 92 |                               "filename_prefix": ("STRING", {"default": "checkpoints/ldm_patched"}),},
 93 |                 "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},}
 94 | 
 95 |     def save(self, model, clip_vision, vae, filename_prefix, prompt=None, extra_pnginfo=None):
 96 |         ldm_patched.contrib.external_model_merging.save_checkpoint(model, clip_vision=clip_vision, vae=vae, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo)
 97 |         return {}
 98 | 
 99 | NODE_CLASS_MAPPINGS = {
100 |     "ImageOnlyCheckpointLoader": ImageOnlyCheckpointLoader,
101 |     "SVD_img2vid_Conditioning": SVD_img2vid_Conditioning,
102 |     "VideoLinearCFGGuidance": VideoLinearCFGGuidance,
103 |     "ImageOnlyCheckpointSave": ImageOnlyCheckpointSave,
104 | }
105 | 
106 | NODE_DISPLAY_NAME_MAPPINGS = {
107 |     "ImageOnlyCheckpointLoader": "Image Only Checkpoint Loader (img2vid model)",
108 | }
109 | 


--------------------------------------------------------------------------------
/ldm_patched/ldm/models/__pycache__/autoencoder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/models/__pycache__/autoencoder.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/__pycache__/attention.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/__pycache__/attention.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/__pycache__/ema.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/__pycache__/ema.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/__pycache__/sub_quadratic_attention.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/__pycache__/sub_quadratic_attention.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/diffusionmodules/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/diffusionmodules/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/diffusionmodules/__pycache__/upscaling.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/upscaling.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/diffusionmodules/__pycache__/util.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/util.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/diffusionmodules/upscaling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | from functools import partial
 5 | 
 6 | from .util import extract_into_tensor, make_beta_schedule
 7 | from ldm_patched.ldm.util import default
 8 | 
 9 | 
10 | class AbstractLowScaleModel(nn.Module):
11 |     # for concatenating a downsampled image to the latent representation
12 |     def __init__(self, noise_schedule_config=None):
13 |         super(AbstractLowScaleModel, self).__init__()
14 |         if noise_schedule_config is not None:
15 |             self.register_schedule(**noise_schedule_config)
16 | 
17 |     def register_schedule(self, beta_schedule="linear", timesteps=1000,
18 |                           linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
19 |         betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end,
20 |                                    cosine_s=cosine_s)
21 |         alphas = 1. - betas
22 |         alphas_cumprod = np.cumprod(alphas, axis=0)
23 |         alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])
24 | 
25 |         timesteps, = betas.shape
26 |         self.num_timesteps = int(timesteps)
27 |         self.linear_start = linear_start
28 |         self.linear_end = linear_end
29 |         assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep'
30 | 
31 |         to_torch = partial(torch.tensor, dtype=torch.float32)
32 | 
33 |         self.register_buffer('betas', to_torch(betas))
34 |         self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
35 |         self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev))
36 | 
37 |         # calculations for diffusion q(x_t | x_{t-1}) and others
38 |         self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod)))
39 |         self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod)))
40 |         self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod)))
41 |         self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod)))
42 |         self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1)))
43 | 
44 |     def q_sample(self, x_start, t, noise=None, seed=None):
45 |         if noise is None:
46 |             if seed is None:
47 |                 noise = torch.randn_like(x_start)
48 |             else:
49 |                 noise = torch.randn(x_start.size(), dtype=x_start.dtype, layout=x_start.layout, generator=torch.manual_seed(seed)).to(x_start.device)
50 |         return (extract_into_tensor(self.sqrt_alphas_cumprod.to(x_start.device), t, x_start.shape) * x_start +
51 |                 extract_into_tensor(self.sqrt_one_minus_alphas_cumprod.to(x_start.device), t, x_start.shape) * noise)
52 | 
53 |     def forward(self, x):
54 |         return x, None
55 | 
56 |     def decode(self, x):
57 |         return x
58 | 
59 | 
60 | class SimpleImageConcat(AbstractLowScaleModel):
61 |     # no noise level conditioning
62 |     def __init__(self):
63 |         super(SimpleImageConcat, self).__init__(noise_schedule_config=None)
64 |         self.max_noise_level = 0
65 | 
66 |     def forward(self, x):
67 |         # fix to constant noise level
68 |         return x, torch.zeros(x.shape[0], device=x.device).long()
69 | 
70 | 
71 | class ImageConcatWithNoiseAugmentation(AbstractLowScaleModel):
72 |     def __init__(self, noise_schedule_config, max_noise_level=1000, to_cuda=False):
73 |         super().__init__(noise_schedule_config=noise_schedule_config)
74 |         self.max_noise_level = max_noise_level
75 | 
76 |     def forward(self, x, noise_level=None, seed=None):
77 |         if noise_level is None:
78 |             noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long()
79 |         else:
80 |             assert isinstance(noise_level, torch.Tensor)
81 |         z = self.q_sample(x, noise_level, seed=seed)
82 |         return z, noise_level
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/distributions/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/distributions/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/distributions/__pycache__/distributions.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/distributions/__pycache__/distributions.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/distributions/distributions.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | class AbstractDistribution:
 6 |     def sample(self):
 7 |         raise NotImplementedError()
 8 | 
 9 |     def mode(self):
10 |         raise NotImplementedError()
11 | 
12 | 
13 | class DiracDistribution(AbstractDistribution):
14 |     def __init__(self, value):
15 |         self.value = value
16 | 
17 |     def sample(self):
18 |         return self.value
19 | 
20 |     def mode(self):
21 |         return self.value
22 | 
23 | 
24 | class DiagonalGaussianDistribution(object):
25 |     def __init__(self, parameters, deterministic=False):
26 |         self.parameters = parameters
27 |         self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
28 |         self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
29 |         self.deterministic = deterministic
30 |         self.std = torch.exp(0.5 * self.logvar)
31 |         self.var = torch.exp(self.logvar)
32 |         if self.deterministic:
33 |             self.var = self.std = torch.zeros_like(self.mean).to(device=self.parameters.device)
34 | 
35 |     def sample(self):
36 |         x = self.mean + self.std * torch.randn(self.mean.shape).to(device=self.parameters.device)
37 |         return x
38 | 
39 |     def kl(self, other=None):
40 |         if self.deterministic:
41 |             return torch.Tensor([0.])
42 |         else:
43 |             if other is None:
44 |                 return 0.5 * torch.sum(torch.pow(self.mean, 2)
45 |                                        + self.var - 1.0 - self.logvar,
46 |                                        dim=[1, 2, 3])
47 |             else:
48 |                 return 0.5 * torch.sum(
49 |                     torch.pow(self.mean - other.mean, 2) / other.var
50 |                     + self.var / other.var - 1.0 - self.logvar + other.logvar,
51 |                     dim=[1, 2, 3])
52 | 
53 |     def nll(self, sample, dims=[1,2,3]):
54 |         if self.deterministic:
55 |             return torch.Tensor([0.])
56 |         logtwopi = np.log(2.0 * np.pi)
57 |         return 0.5 * torch.sum(
58 |             logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
59 |             dim=dims)
60 | 
61 |     def mode(self):
62 |         return self.mean
63 | 
64 | 
65 | def normal_kl(mean1, logvar1, mean2, logvar2):
66 |     """
67 |     source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12
68 |     Compute the KL divergence between two gaussians.
69 |     Shapes are automatically broadcasted, so batches can be compared to
70 |     scalars, among other use cases.
71 |     """
72 |     tensor = None
73 |     for obj in (mean1, logvar1, mean2, logvar2):
74 |         if isinstance(obj, torch.Tensor):
75 |             tensor = obj
76 |             break
77 |     assert tensor is not None, "at least one argument must be a Tensor"
78 | 
79 |     # Force variances to be Tensors. Broadcasting helps convert scalars to
80 |     # Tensors, but it does not work for torch.exp().
81 |     logvar1, logvar2 = [
82 |         x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor)
83 |         for x in (logvar1, logvar2)
84 |     ]
85 | 
86 |     return 0.5 * (
87 |         -1.0
88 |         + logvar2
89 |         - logvar1
90 |         + torch.exp(logvar1 - logvar2)
91 |         + ((mean1 - mean2) ** 2) * torch.exp(-logvar2)
92 |     )
93 | 


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/ema.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class LitEma(nn.Module):
 6 |     def __init__(self, model, decay=0.9999, use_num_upates=True):
 7 |         super().__init__()
 8 |         if decay < 0.0 or decay > 1.0:
 9 |             raise ValueError('Decay must be between 0 and 1')
10 | 
11 |         self.m_name2s_name = {}
12 |         self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32))
13 |         self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates
14 |         else torch.tensor(-1, dtype=torch.int))
15 | 
16 |         for name, p in model.named_parameters():
17 |             if p.requires_grad:
18 |                 # remove as '.'-character is not allowed in buffers
19 |                 s_name = name.replace('.', '')
20 |                 self.m_name2s_name.update({name: s_name})
21 |                 self.register_buffer(s_name, p.clone().detach().data)
22 | 
23 |         self.collected_params = []
24 | 
25 |     def reset_num_updates(self):
26 |         del self.num_updates
27 |         self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int))
28 | 
29 |     def forward(self, model):
30 |         decay = self.decay
31 | 
32 |         if self.num_updates >= 0:
33 |             self.num_updates += 1
34 |             decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates))
35 | 
36 |         one_minus_decay = 1.0 - decay
37 | 
38 |         with torch.no_grad():
39 |             m_param = dict(model.named_parameters())
40 |             shadow_params = dict(self.named_buffers())
41 | 
42 |             for key in m_param:
43 |                 if m_param[key].requires_grad:
44 |                     sname = self.m_name2s_name[key]
45 |                     shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
46 |                     shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key]))
47 |                 else:
48 |                     assert not key in self.m_name2s_name
49 | 
50 |     def copy_to(self, model):
51 |         m_param = dict(model.named_parameters())
52 |         shadow_params = dict(self.named_buffers())
53 |         for key in m_param:
54 |             if m_param[key].requires_grad:
55 |                 m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
56 |             else:
57 |                 assert not key in self.m_name2s_name
58 | 
59 |     def store(self, parameters):
60 |         """
61 |         Save the current parameters for restoring later.
62 |         Args:
63 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
64 |             temporarily stored.
65 |         """
66 |         self.collected_params = [param.clone() for param in parameters]
67 | 
68 |     def restore(self, parameters):
69 |         """
70 |         Restore the parameters stored with the `store` method.
71 |         Useful to validate the model with EMA parameters without affecting the
72 |         original optimization process. Store the parameters before the
73 |         `copy_to` method. After validation (or model saving), use this to
74 |         restore the former parameters.
75 |         Args:
76 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
77 |             updated with the stored parameters.
78 |         """
79 |         for c_param, param in zip(self.collected_params, parameters):
80 |             param.data.copy_(c_param.data)
81 | 


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/encoders/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/encoders/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/encoders/__pycache__/noise_aug_modules.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/encoders/__pycache__/noise_aug_modules.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/ldm/modules/encoders/noise_aug_modules.py:
--------------------------------------------------------------------------------
 1 | from ..diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation
 2 | from ..diffusionmodules.openaimodel import Timestep
 3 | import torch
 4 | 
 5 | class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation):
 6 |     def __init__(self, *args, clip_stats_path=None, timestep_dim=256, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 |         if clip_stats_path is None:
 9 |             clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim)
10 |         else:
11 |             clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu")
12 |         self.register_buffer("data_mean", clip_mean[None, :], persistent=False)
13 |         self.register_buffer("data_std", clip_std[None, :], persistent=False)
14 |         self.time_embed = Timestep(timestep_dim)
15 | 
16 |     def scale(self, x):
17 |         # re-normalize to centered mean and unit variance
18 |         x = (x - self.data_mean.to(x.device)) * 1. / self.data_std.to(x.device)
19 |         return x
20 | 
21 |     def unscale(self, x):
22 |         # back to original data stats
23 |         x = (x * self.data_std.to(x.device)) + self.data_mean.to(x.device)
24 |         return x
25 | 
26 |     def forward(self, x, noise_level=None, seed=None):
27 |         if noise_level is None:
28 |             noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long()
29 |         else:
30 |             assert isinstance(noise_level, torch.Tensor)
31 |         x = self.scale(x)
32 |         z = self.q_sample(x, noise_level, seed=seed)
33 |         z = self.unscale(z)
34 |         noise_level = self.time_embed(noise_level)
35 |         return z, noise_level
36 | 


--------------------------------------------------------------------------------
/ldm_patched/licenses-3rd/chainer:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Preferred Infrastructure, Inc.
 2 | Copyright (c) 2015 Preferred Networks, Inc.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy
 5 | of this software and associated documentation files (the "Software"), to deal
 6 | in the Software without restriction, including without limitation the rights
 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the Software is
 9 | furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.


--------------------------------------------------------------------------------
/ldm_patched/licenses-3rd/kdiffusion:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2022 Katherine Crowson
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.


--------------------------------------------------------------------------------
/ldm_patched/licenses-3rd/ldm:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Machine Vision and Learning Group, LMU Munich
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/ldm_patched/licenses-3rd/taesd:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Ollin Boer Bohan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/ldm_patched/modules/checkpoint_pickle.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | load = pickle.load
 4 | 
 5 | class Empty:
 6 |     pass
 7 | 
 8 | class Unpickler(pickle.Unpickler):
 9 |     def find_class(self, module, name):
10 |         #TODO: safe unpickle
11 |         if module.startswith("pytorch_lightning"):
12 |             return Empty
13 |         return super().find_class(module, name)
14 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/clip_config_bigg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "CLIPTextModel"
 4 |   ],
 5 |   "attention_dropout": 0.0,
 6 |   "bos_token_id": 0,
 7 |   "dropout": 0.0,
 8 |   "eos_token_id": 2,
 9 |   "hidden_act": "gelu",
10 |   "hidden_size": 1280,
11 |   "initializer_factor": 1.0,
12 |   "initializer_range": 0.02,
13 |   "intermediate_size": 5120,
14 |   "layer_norm_eps": 1e-05,
15 |   "max_position_embeddings": 77,
16 |   "model_type": "clip_text_model",
17 |   "num_attention_heads": 20,
18 |   "num_hidden_layers": 32,
19 |   "pad_token_id": 1,
20 |   "projection_dim": 1280,
21 |   "torch_dtype": "float32",
22 |   "vocab_size": 49408
23 | }
24 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/clip_vision.py:
--------------------------------------------------------------------------------
  1 | from .utils import load_torch_file, transformers_convert, state_dict_prefix_replace
  2 | import os
  3 | import torch
  4 | import json
  5 | 
  6 | import ldm_patched.modules.ops
  7 | import ldm_patched.modules.model_patcher
  8 | import ldm_patched.modules.model_management
  9 | import ldm_patched.modules.utils
 10 | import ldm_patched.modules.clip_model
 11 | 
 12 | class Output:
 13 |     def __getitem__(self, key):
 14 |         return getattr(self, key)
 15 |     def __setitem__(self, key, item):
 16 |         setattr(self, key, item)
 17 | 
 18 | def clip_preprocess(image, size=224):
 19 |     mean = torch.tensor([ 0.48145466,0.4578275,0.40821073], device=image.device, dtype=image.dtype)
 20 |     std = torch.tensor([0.26862954,0.26130258,0.27577711], device=image.device, dtype=image.dtype)
 21 |     image = image.movedim(-1, 1)
 22 |     if not (image.shape[2] == size and image.shape[3] == size):
 23 |         scale = (size / min(image.shape[2], image.shape[3]))
 24 |         image = torch.nn.functional.interpolate(image, size=(round(scale * image.shape[2]), round(scale * image.shape[3])), mode="bicubic", antialias=True)
 25 |         h = (image.shape[2] - size)//2
 26 |         w = (image.shape[3] - size)//2
 27 |         image = image[:,:,h:h+size,w:w+size]
 28 |     image = torch.clip((255. * image), 0, 255).round() / 255.0
 29 |     return (image - mean.view([3,1,1])) / std.view([3,1,1])
 30 | 
 31 | class ClipVisionModel():
 32 |     def __init__(self, json_config):
 33 |         with open(json_config) as f:
 34 |             config = json.load(f)
 35 | 
 36 |         self.load_device = ldm_patched.modules.model_management.text_encoder_device()
 37 |         offload_device = ldm_patched.modules.model_management.text_encoder_offload_device()
 38 |         self.dtype = ldm_patched.modules.model_management.text_encoder_dtype(self.load_device)
 39 |         self.model = ldm_patched.modules.clip_model.CLIPVisionModelProjection(config, self.dtype, offload_device, ldm_patched.modules.ops.manual_cast)
 40 |         self.model.eval()
 41 | 
 42 |         self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
 43 | 
 44 |     def load_sd(self, sd):
 45 |         return self.model.load_state_dict(sd, strict=False)
 46 | 
 47 |     def get_sd(self):
 48 |         return self.model.state_dict()
 49 | 
 50 |     def encode_image(self, image):
 51 |         ldm_patched.modules.model_management.load_model_gpu(self.patcher)
 52 |         pixel_values = clip_preprocess(image.to(self.load_device)).float()
 53 |         out = self.model(pixel_values=pixel_values, intermediate_output=-2)
 54 | 
 55 |         outputs = Output()
 56 |         outputs["last_hidden_state"] = out[0].to(ldm_patched.modules.model_management.intermediate_device())
 57 |         outputs["image_embeds"] = out[2].to(ldm_patched.modules.model_management.intermediate_device())
 58 |         outputs["penultimate_hidden_states"] = out[1].to(ldm_patched.modules.model_management.intermediate_device())
 59 |         return outputs
 60 | 
 61 | def convert_to_transformers(sd, prefix):
 62 |     sd_k = sd.keys()
 63 |     if "{}transformer.resblocks.0.attn.in_proj_weight".format(prefix) in sd_k:
 64 |         keys_to_replace = {
 65 |             "{}class_embedding".format(prefix): "vision_model.embeddings.class_embedding",
 66 |             "{}conv1.weight".format(prefix): "vision_model.embeddings.patch_embedding.weight",
 67 |             "{}positional_embedding".format(prefix): "vision_model.embeddings.position_embedding.weight",
 68 |             "{}ln_post.bias".format(prefix): "vision_model.post_layernorm.bias",
 69 |             "{}ln_post.weight".format(prefix): "vision_model.post_layernorm.weight",
 70 |             "{}ln_pre.bias".format(prefix): "vision_model.pre_layrnorm.bias",
 71 |             "{}ln_pre.weight".format(prefix): "vision_model.pre_layrnorm.weight",
 72 |         }
 73 | 
 74 |         for x in keys_to_replace:
 75 |             if x in sd_k:
 76 |                 sd[keys_to_replace[x]] = sd.pop(x)
 77 | 
 78 |         if "{}proj".format(prefix) in sd_k:
 79 |             sd['visual_projection.weight'] = sd.pop("{}proj".format(prefix)).transpose(0, 1)
 80 | 
 81 |         sd = transformers_convert(sd, prefix, "vision_model.", 48)
 82 |     else:
 83 |         replace_prefix = {prefix: ""}
 84 |         sd = state_dict_prefix_replace(sd, replace_prefix)
 85 |     return sd
 86 | 
 87 | def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
 88 |     if convert_keys:
 89 |         sd = convert_to_transformers(sd, prefix)
 90 |     if "vision_model.encoder.layers.47.layer_norm1.weight" in sd:
 91 |         json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_g.json")
 92 |     elif "vision_model.encoder.layers.30.layer_norm1.weight" in sd:
 93 |         json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_h.json")
 94 |     elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd:
 95 |         json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json")
 96 |     else:
 97 |         return None
 98 | 
 99 |     clip = ClipVisionModel(json_config)
100 |     m, u = clip.load_sd(sd)
101 |     if len(m) > 0:
102 |         print("extra clip vision:", m)
103 |     u = set(u)
104 |     keys = list(sd.keys())
105 |     for k in keys:
106 |         if k not in u:
107 |             t = sd.pop(k)
108 |             del t
109 |     return clip
110 | 
111 | def load(ckpt_path):
112 |     sd = load_torch_file(ckpt_path)
113 |     if "visual.transformer.resblocks.0.attn.in_proj_weight" in sd:
114 |         return load_clipvision_from_sd(sd, prefix="visual.", convert_keys=True)
115 |     else:
116 |         return load_clipvision_from_sd(sd)
117 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/clip_vision_config_g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "attention_dropout": 0.0,
 3 |   "dropout": 0.0,
 4 |   "hidden_act": "gelu",
 5 |   "hidden_size": 1664,
 6 |   "image_size": 224,
 7 |   "initializer_factor": 1.0,
 8 |   "initializer_range": 0.02,
 9 |   "intermediate_size": 8192,
10 |   "layer_norm_eps": 1e-05,
11 |   "model_type": "clip_vision_model",
12 |   "num_attention_heads": 16,
13 |   "num_channels": 3,
14 |   "num_hidden_layers": 48,
15 |   "patch_size": 14,
16 |   "projection_dim": 1280,
17 |   "torch_dtype": "float32"
18 | }
19 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/clip_vision_config_h.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "attention_dropout": 0.0,
 3 |   "dropout": 0.0,
 4 |   "hidden_act": "gelu",
 5 |   "hidden_size": 1280,
 6 |   "image_size": 224,
 7 |   "initializer_factor": 1.0,
 8 |   "initializer_range": 0.02,
 9 |   "intermediate_size": 5120,
10 |   "layer_norm_eps": 1e-05,
11 |   "model_type": "clip_vision_model",
12 |   "num_attention_heads": 16,
13 |   "num_channels": 3,
14 |   "num_hidden_layers": 32,
15 |   "patch_size": 14,
16 |   "projection_dim": 1024,
17 |   "torch_dtype": "float32"
18 | }
19 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/clip_vision_config_vitl.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "attention_dropout": 0.0,
 3 |   "dropout": 0.0,
 4 |   "hidden_act": "quick_gelu",
 5 |   "hidden_size": 1024,
 6 |   "image_size": 224,
 7 |   "initializer_factor": 1.0,
 8 |   "initializer_range": 0.02,
 9 |   "intermediate_size": 4096,
10 |   "layer_norm_eps": 1e-05,
11 |   "model_type": "clip_vision_model",
12 |   "num_attention_heads": 16,
13 |   "num_channels": 3,
14 |   "num_hidden_layers": 24,
15 |   "patch_size": 14,
16 |   "projection_dim": 768,
17 |   "torch_dtype": "float32"
18 | }
19 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/conds.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import math
 3 | import ldm_patched.modules.utils
 4 | 
 5 | 
 6 | 
 7 | class CONDRegular:
 8 |     def __init__(self, cond):
 9 |         self.cond = cond
10 | 
11 |     def _copy_with(self, cond):
12 |         return self.__class__(cond)
13 | 
14 |     def process_cond(self, batch_size, device, **kwargs):
15 |         return self._copy_with(ldm_patched.modules.utils.repeat_to_batch_size(self.cond, batch_size).to(device))
16 | 
17 |     def can_concat(self, other):
18 |         if self.cond.shape != other.cond.shape:
19 |             return False
20 |         return True
21 | 
22 |     def concat(self, others):
23 |         conds = [self.cond]
24 |         for x in others:
25 |             conds.append(x.cond)
26 |         return torch.cat(conds)
27 | 
28 | class CONDNoiseShape(CONDRegular):
29 |     def process_cond(self, batch_size, device, area, **kwargs):
30 |         data = self.cond[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
31 |         return self._copy_with(ldm_patched.modules.utils.repeat_to_batch_size(data, batch_size).to(device))
32 | 
33 | 
34 | class CONDCrossAttn(CONDRegular):
35 |     def can_concat(self, other):
36 |         s1 = self.cond.shape
37 |         s2 = other.cond.shape
38 |         if s1 != s2:
39 |             if s1[0] != s2[0] or s1[2] != s2[2]: #these 2 cases should not happen
40 |                 return False
41 | 
42 |             mult_min = math.lcm(s1[1], s2[1])
43 |             diff = mult_min // min(s1[1], s2[1])
44 |             if diff > 4: #arbitrary limit on the padding because it's probably going to impact performance negatively if it's too much
45 |                 return False
46 |         return True
47 | 
48 |     def concat(self, others):
49 |         conds = [self.cond]
50 |         crossattn_max_len = self.cond.shape[1]
51 |         for x in others:
52 |             c = x.cond
53 |             crossattn_max_len = math.lcm(crossattn_max_len, c.shape[1])
54 |             conds.append(c)
55 | 
56 |         out = []
57 |         for c in conds:
58 |             if c.shape[1] < crossattn_max_len:
59 |                 c = c.repeat(1, crossattn_max_len // c.shape[1], 1) #padding with repeat doesn't change result
60 |             out.append(c)
61 |         return torch.cat(out)
62 | 
63 | class CONDConstant(CONDRegular):
64 |     def __init__(self, cond):
65 |         self.cond = cond
66 | 
67 |     def process_cond(self, batch_size, device, **kwargs):
68 |         return self._copy_with(self.cond)
69 | 
70 |     def can_concat(self, other):
71 |         if self.cond != other.cond:
72 |             return False
73 |         return True
74 | 
75 |     def concat(self, others):
76 |         return self.cond
77 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/diffusers_load.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import ldm_patched.modules.sd
 4 | 
 5 | def first_file(path, filenames):
 6 |     for f in filenames:
 7 |         p = os.path.join(path, f)
 8 |         if os.path.exists(p):
 9 |             return p
10 |     return None
11 | 
12 | def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_directory=None):
13 |     diffusion_model_names = ["diffusion_pytorch_model.fp16.safetensors", "diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.fp16.bin", "diffusion_pytorch_model.bin"]
14 |     unet_path = first_file(os.path.join(model_path, "unet"), diffusion_model_names)
15 |     vae_path = first_file(os.path.join(model_path, "vae"), diffusion_model_names)
16 | 
17 |     text_encoder_model_names = ["model.fp16.safetensors", "model.safetensors", "pytorch_model.fp16.bin", "pytorch_model.bin"]
18 |     text_encoder1_path = first_file(os.path.join(model_path, "text_encoder"), text_encoder_model_names)
19 |     text_encoder2_path = first_file(os.path.join(model_path, "text_encoder_2"), text_encoder_model_names)
20 | 
21 |     text_encoder_paths = [text_encoder1_path]
22 |     if text_encoder2_path is not None:
23 |         text_encoder_paths.append(text_encoder2_path)
24 | 
25 |     unet = ldm_patched.modules.sd.load_unet(unet_path)
26 | 
27 |     clip = None
28 |     if output_clip:
29 |         clip = ldm_patched.modules.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory)
30 | 
31 |     vae = None
32 |     if output_vae:
33 |         sd = ldm_patched.modules.utils.load_torch_file(vae_path)
34 |         vae = ldm_patched.modules.sd.VAE(sd=sd)
35 | 
36 |     return (unet, clip, vae)
37 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/latent_formats.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class LatentFormat:
 3 |     scale_factor = 1.0
 4 |     latent_rgb_factors = None
 5 |     taesd_decoder_name = None
 6 | 
 7 |     def process_in(self, latent):
 8 |         return latent * self.scale_factor
 9 | 
10 |     def process_out(self, latent):
11 |         return latent / self.scale_factor
12 | 
13 | class SD15(LatentFormat):
14 |     def __init__(self, scale_factor=0.18215):
15 |         self.scale_factor = scale_factor
16 |         self.latent_rgb_factors = [
17 |                     #   R        G        B
18 |                     [ 0.3512,  0.2297,  0.3227],
19 |                     [ 0.3250,  0.4974,  0.2350],
20 |                     [-0.2829,  0.1762,  0.2721],
21 |                     [-0.2120, -0.2616, -0.7177]
22 |                 ]
23 |         self.taesd_decoder_name = "taesd_decoder"
24 | 
25 | class SDXL(LatentFormat):
26 |     def __init__(self):
27 |         self.scale_factor = 0.13025
28 |         self.latent_rgb_factors = [
29 |                     #   R        G        B
30 |                     [ 0.3920,  0.4054,  0.4549],
31 |                     [-0.2634, -0.0196,  0.0653],
32 |                     [ 0.0568,  0.1687, -0.0755],
33 |                     [-0.3112, -0.2359, -0.2076]
34 |                 ]
35 |         self.taesd_decoder_name = "taesdxl_decoder"
36 | 
37 | class SD_X4(LatentFormat):
38 |     def __init__(self):
39 |         self.scale_factor = 0.08333
40 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/model_sampling.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from ldm_patched.ldm.modules.diffusionmodules.util import make_beta_schedule
  4 | import math
  5 | 
  6 | class EPS:
  7 |     def calculate_input(self, sigma, noise):
  8 |         sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
  9 |         return noise / (sigma ** 2 + self.sigma_data ** 2) ** 0.5
 10 | 
 11 |     def calculate_denoised(self, sigma, model_output, model_input):
 12 |         sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
 13 |         return model_input - model_output * sigma
 14 | 
 15 | 
 16 | class V_PREDICTION(EPS):
 17 |     def calculate_denoised(self, sigma, model_output, model_input):
 18 |         sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
 19 |         return model_input * self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) - model_output * sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2) ** 0.5
 20 | 
 21 | 
 22 | class ModelSamplingDiscrete(torch.nn.Module):
 23 |     def __init__(self, model_config=None):
 24 |         super().__init__()
 25 | 
 26 |         if model_config is not None:
 27 |             sampling_settings = model_config.sampling_settings
 28 |         else:
 29 |             sampling_settings = {}
 30 | 
 31 |         beta_schedule = sampling_settings.get("beta_schedule", "linear")
 32 |         linear_start = sampling_settings.get("linear_start", 0.00085)
 33 |         linear_end = sampling_settings.get("linear_end", 0.012)
 34 | 
 35 |         self._register_schedule(given_betas=None, beta_schedule=beta_schedule, timesteps=1000, linear_start=linear_start, linear_end=linear_end, cosine_s=8e-3)
 36 |         self.sigma_data = 1.0
 37 | 
 38 |     def _register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,
 39 |                           linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
 40 |         if given_betas is not None:
 41 |             betas = given_betas
 42 |         else:
 43 |             betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s)
 44 |         alphas = 1. - betas
 45 |         alphas_cumprod = torch.tensor(np.cumprod(alphas, axis=0), dtype=torch.float32)
 46 |         # alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])
 47 | 
 48 |         timesteps, = betas.shape
 49 |         self.num_timesteps = int(timesteps)
 50 |         self.linear_start = linear_start
 51 |         self.linear_end = linear_end
 52 | 
 53 |         # self.register_buffer('betas', torch.tensor(betas, dtype=torch.float32))
 54 |         # self.register_buffer('alphas_cumprod', torch.tensor(alphas_cumprod, dtype=torch.float32))
 55 |         # self.register_buffer('alphas_cumprod_prev', torch.tensor(alphas_cumprod_prev, dtype=torch.float32))
 56 | 
 57 |         sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5
 58 |         self.set_sigmas(sigmas)
 59 | 
 60 |     def set_sigmas(self, sigmas):
 61 |         self.register_buffer('sigmas', sigmas)
 62 |         self.register_buffer('log_sigmas', sigmas.log())
 63 | 
 64 |     @property
 65 |     def sigma_min(self):
 66 |         return self.sigmas[0]
 67 | 
 68 |     @property
 69 |     def sigma_max(self):
 70 |         return self.sigmas[-1]
 71 | 
 72 |     def timestep(self, sigma):
 73 |         log_sigma = sigma.log()
 74 |         dists = log_sigma.to(self.log_sigmas.device) - self.log_sigmas[:, None]
 75 |         return dists.abs().argmin(dim=0).view(sigma.shape).to(sigma.device)
 76 | 
 77 |     def sigma(self, timestep):
 78 |         t = torch.clamp(timestep.float().to(self.log_sigmas.device), min=0, max=(len(self.sigmas) - 1))
 79 |         low_idx = t.floor().long()
 80 |         high_idx = t.ceil().long()
 81 |         w = t.frac()
 82 |         log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx]
 83 |         return log_sigma.exp().to(timestep.device)
 84 | 
 85 |     def percent_to_sigma(self, percent):
 86 |         if percent <= 0.0:
 87 |             return 999999999.9
 88 |         if percent >= 1.0:
 89 |             return 0.0
 90 |         percent = 1.0 - percent
 91 |         return self.sigma(torch.tensor(percent * 999.0)).item()
 92 | 
 93 | 
 94 | class ModelSamplingContinuousEDM(torch.nn.Module):
 95 |     def __init__(self, model_config=None):
 96 |         super().__init__()
 97 |         self.sigma_data = 1.0
 98 | 
 99 |         if model_config is not None:
100 |             sampling_settings = model_config.sampling_settings
101 |         else:
102 |             sampling_settings = {}
103 | 
104 |         sigma_min = sampling_settings.get("sigma_min", 0.002)
105 |         sigma_max = sampling_settings.get("sigma_max", 120.0)
106 |         self.set_sigma_range(sigma_min, sigma_max)
107 | 
108 |     def set_sigma_range(self, sigma_min, sigma_max):
109 |         sigmas = torch.linspace(math.log(sigma_min), math.log(sigma_max), 1000).exp()
110 | 
111 |         self.register_buffer('sigmas', sigmas) #for compatibility with some schedulers
112 |         self.register_buffer('log_sigmas', sigmas.log())
113 | 
114 |     @property
115 |     def sigma_min(self):
116 |         return self.sigmas[0]
117 | 
118 |     @property
119 |     def sigma_max(self):
120 |         return self.sigmas[-1]
121 | 
122 |     def timestep(self, sigma):
123 |         return 0.25 * sigma.log()
124 | 
125 |     def sigma(self, timestep):
126 |         return (timestep / 0.25).exp()
127 | 
128 |     def percent_to_sigma(self, percent):
129 |         if percent <= 0.0:
130 |             return 999999999.9
131 |         if percent >= 1.0:
132 |             return 0.0
133 |         percent = 1.0 - percent
134 | 
135 |         log_sigma_min = math.log(self.sigma_min)
136 |         return math.exp((math.log(self.sigma_max) - log_sigma_min) * percent + log_sigma_min)
137 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/ops.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import ldm_patched.modules.model_management
  3 | 
  4 | def cast_bias_weight(s, input):
  5 |     bias = None
  6 |     non_blocking = ldm_patched.modules.model_management.device_supports_non_blocking(input.device)
  7 |     if s.bias is not None:
  8 |         bias = s.bias.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking)
  9 |     weight = s.weight.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking)
 10 |     return weight, bias
 11 | 
 12 | 
 13 | class disable_weight_init:
 14 |     class Linear(torch.nn.Linear):
 15 |         ldm_patched_cast_weights = False
 16 |         def reset_parameters(self):
 17 |             return None
 18 | 
 19 |         def forward_ldm_patched_cast_weights(self, input):
 20 |             weight, bias = cast_bias_weight(self, input)
 21 |             return torch.nn.functional.linear(input, weight, bias)
 22 | 
 23 |         def forward(self, *args, **kwargs):
 24 |             if self.ldm_patched_cast_weights:
 25 |                 return self.forward_ldm_patched_cast_weights(*args, **kwargs)
 26 |             else:
 27 |                 return super().forward(*args, **kwargs)
 28 | 
 29 |     class Conv2d(torch.nn.Conv2d):
 30 |         ldm_patched_cast_weights = False
 31 |         def reset_parameters(self):
 32 |             return None
 33 | 
 34 |         def forward_ldm_patched_cast_weights(self, input):
 35 |             weight, bias = cast_bias_weight(self, input)
 36 |             return self._conv_forward(input, weight, bias)
 37 | 
 38 |         def forward(self, *args, **kwargs):
 39 |             if self.ldm_patched_cast_weights:
 40 |                 return self.forward_ldm_patched_cast_weights(*args, **kwargs)
 41 |             else:
 42 |                 return super().forward(*args, **kwargs)
 43 | 
 44 |     class Conv3d(torch.nn.Conv3d):
 45 |         ldm_patched_cast_weights = False
 46 |         def reset_parameters(self):
 47 |             return None
 48 | 
 49 |         def forward_ldm_patched_cast_weights(self, input):
 50 |             weight, bias = cast_bias_weight(self, input)
 51 |             return self._conv_forward(input, weight, bias)
 52 | 
 53 |         def forward(self, *args, **kwargs):
 54 |             if self.ldm_patched_cast_weights:
 55 |                 return self.forward_ldm_patched_cast_weights(*args, **kwargs)
 56 |             else:
 57 |                 return super().forward(*args, **kwargs)
 58 | 
 59 |     class GroupNorm(torch.nn.GroupNorm):
 60 |         ldm_patched_cast_weights = False
 61 |         def reset_parameters(self):
 62 |             return None
 63 | 
 64 |         def forward_ldm_patched_cast_weights(self, input):
 65 |             weight, bias = cast_bias_weight(self, input)
 66 |             return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps)
 67 | 
 68 |         def forward(self, *args, **kwargs):
 69 |             if self.ldm_patched_cast_weights:
 70 |                 return self.forward_ldm_patched_cast_weights(*args, **kwargs)
 71 |             else:
 72 |                 return super().forward(*args, **kwargs)
 73 | 
 74 | 
 75 |     class LayerNorm(torch.nn.LayerNorm):
 76 |         ldm_patched_cast_weights = False
 77 |         def reset_parameters(self):
 78 |             return None
 79 | 
 80 |         def forward_ldm_patched_cast_weights(self, input):
 81 |             weight, bias = cast_bias_weight(self, input)
 82 |             return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps)
 83 | 
 84 |         def forward(self, *args, **kwargs):
 85 |             if self.ldm_patched_cast_weights:
 86 |                 return self.forward_ldm_patched_cast_weights(*args, **kwargs)
 87 |             else:
 88 |                 return super().forward(*args, **kwargs)
 89 | 
 90 |     @classmethod
 91 |     def conv_nd(s, dims, *args, **kwargs):
 92 |         if dims == 2:
 93 |             return s.Conv2d(*args, **kwargs)
 94 |         elif dims == 3:
 95 |             return s.Conv3d(*args, **kwargs)
 96 |         else:
 97 |             raise ValueError(f"unsupported dimensions: {dims}")
 98 | 
 99 | 
100 | class manual_cast(disable_weight_init):
101 |     class Linear(disable_weight_init.Linear):
102 |         ldm_patched_cast_weights = True
103 | 
104 |     class Conv2d(disable_weight_init.Conv2d):
105 |         ldm_patched_cast_weights = True
106 | 
107 |     class Conv3d(disable_weight_init.Conv3d):
108 |         ldm_patched_cast_weights = True
109 | 
110 |     class GroupNorm(disable_weight_init.GroupNorm):
111 |         ldm_patched_cast_weights = True
112 | 
113 |     class LayerNorm(disable_weight_init.LayerNorm):
114 |         ldm_patched_cast_weights = True
115 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/options.py:
--------------------------------------------------------------------------------
1 | 
2 | args_parsing = False
3 | 
4 | def enable_args_parsing(enable=True):
5 |     global args_parsing
6 |     args_parsing = enable
7 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/sd1_clip_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "openai/clip-vit-large-patch14",
 3 |   "architectures": [
 4 |     "CLIPTextModel"
 5 |   ],
 6 |   "attention_dropout": 0.0,
 7 |   "bos_token_id": 0,
 8 |   "dropout": 0.0,
 9 |   "eos_token_id": 2,
10 |   "hidden_act": "quick_gelu",
11 |   "hidden_size": 768,
12 |   "initializer_factor": 1.0,
13 |   "initializer_range": 0.02,
14 |   "intermediate_size": 3072,
15 |   "layer_norm_eps": 1e-05,
16 |   "max_position_embeddings": 77,
17 |   "model_type": "clip_text_model",
18 |   "num_attention_heads": 12,
19 |   "num_hidden_layers": 12,
20 |   "pad_token_id": 1,
21 |   "projection_dim": 768,
22 |   "torch_dtype": "float32",
23 |   "transformers_version": "4.24.0",
24 |   "vocab_size": 49408
25 | }
26 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/sd1_tokenizer/special_tokens_map.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "bos_token": {
 3 |     "content": "<|startoftext|>",
 4 |     "lstrip": false,
 5 |     "normalized": true,
 6 |     "rstrip": false,
 7 |     "single_word": false
 8 |   },
 9 |   "eos_token": {
10 |     "content": "<|endoftext|>",
11 |     "lstrip": false,
12 |     "normalized": true,
13 |     "rstrip": false,
14 |     "single_word": false
15 |   },
16 |   "pad_token": "<|endoftext|>",
17 |   "unk_token": {
18 |     "content": "<|endoftext|>",
19 |     "lstrip": false,
20 |     "normalized": true,
21 |     "rstrip": false,
22 |     "single_word": false
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/sd1_tokenizer/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "add_prefix_space": false,
 3 |   "bos_token": {
 4 |     "__type": "AddedToken",
 5 |     "content": "<|startoftext|>",
 6 |     "lstrip": false,
 7 |     "normalized": true,
 8 |     "rstrip": false,
 9 |     "single_word": false
10 |   },
11 |   "do_lower_case": true,
12 |   "eos_token": {
13 |     "__type": "AddedToken",
14 |     "content": "<|endoftext|>",
15 |     "lstrip": false,
16 |     "normalized": true,
17 |     "rstrip": false,
18 |     "single_word": false
19 |   },
20 |   "errors": "replace",
21 |   "model_max_length": 77,
22 |   "name_or_path": "openai/clip-vit-large-patch14",
23 |   "pad_token": "<|endoftext|>",
24 |   "special_tokens_map_file": "./special_tokens_map.json",
25 |   "tokenizer_class": "CLIPTokenizer",
26 |   "unk_token": {
27 |     "__type": "AddedToken",
28 |     "content": "<|endoftext|>",
29 |     "lstrip": false,
30 |     "normalized": true,
31 |     "rstrip": false,
32 |     "single_word": false
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/sd2_clip.py:
--------------------------------------------------------------------------------
 1 | from ldm_patched.modules import sd1_clip
 2 | import torch
 3 | import os
 4 | 
 5 | class SD2ClipHModel(sd1_clip.SDClipModel):
 6 |     def __init__(self, arch="ViT-H-14", device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None):
 7 |         if layer == "penultimate":
 8 |             layer="hidden"
 9 |             layer_idx=-2
10 | 
11 |         textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd2_clip_config.json")
12 |         super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"start": 49406, "end": 49407, "pad": 0})
13 | 
14 | class SD2ClipHTokenizer(sd1_clip.SDTokenizer):
15 |     def __init__(self, tokenizer_path=None, embedding_directory=None):
16 |         super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1024)
17 | 
18 | class SD2Tokenizer(sd1_clip.SD1Tokenizer):
19 |     def __init__(self, embedding_directory=None):
20 |         super().__init__(embedding_directory=embedding_directory, clip_name="h", tokenizer=SD2ClipHTokenizer)
21 | 
22 | class SD2ClipModel(sd1_clip.SD1ClipModel):
23 |     def __init__(self, device="cpu", dtype=None, **kwargs):
24 |         super().__init__(device=device, dtype=dtype, clip_name="h", clip_model=SD2ClipHModel, **kwargs)
25 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/sd2_clip_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "CLIPTextModel"
 4 |   ],
 5 |   "attention_dropout": 0.0,
 6 |   "bos_token_id": 0,
 7 |   "dropout": 0.0,
 8 |   "eos_token_id": 2,
 9 |   "hidden_act": "gelu",
10 |   "hidden_size": 1024,
11 |   "initializer_factor": 1.0,
12 |   "initializer_range": 0.02,
13 |   "intermediate_size": 4096,
14 |   "layer_norm_eps": 1e-05,
15 |   "max_position_embeddings": 77,
16 |   "model_type": "clip_text_model",
17 |   "num_attention_heads": 16,
18 |   "num_hidden_layers": 24,
19 |   "pad_token_id": 1,
20 |   "projection_dim": 1024,
21 |   "torch_dtype": "float32",
22 |   "vocab_size": 49408
23 | }
24 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/sdxl_clip.py:
--------------------------------------------------------------------------------
 1 | from ldm_patched.modules import sd1_clip
 2 | import torch
 3 | import os
 4 | 
 5 | class SDXLClipG(sd1_clip.SDClipModel):
 6 |     def __init__(self, device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None):
 7 |         if layer == "penultimate":
 8 |             layer="hidden"
 9 |             layer_idx=-2
10 | 
11 |         textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_config_bigg.json")
12 |         super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype,
13 |                          special_tokens={"start": 49406, "end": 49407, "pad": 0}, layer_norm_hidden_state=False)
14 | 
15 |     def load_sd(self, sd):
16 |         return super().load_sd(sd)
17 | 
18 | class SDXLClipGTokenizer(sd1_clip.SDTokenizer):
19 |     def __init__(self, tokenizer_path=None, embedding_directory=None):
20 |         super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1280, embedding_key='clip_g')
21 | 
22 | 
23 | class SDXLTokenizer:
24 |     def __init__(self, embedding_directory=None):
25 |         self.clip_l = sd1_clip.SDTokenizer(embedding_directory=embedding_directory)
26 |         self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory)
27 | 
28 |     def tokenize_with_weights(self, text:str, return_word_ids=False):
29 |         out = {}
30 |         out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids)
31 |         out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)
32 |         return out
33 | 
34 |     def untokenize(self, token_weight_pair):
35 |         return self.clip_g.untokenize(token_weight_pair)
36 | 
37 | class SDXLClipModel(torch.nn.Module):
38 |     def __init__(self, device="cpu", dtype=None):
39 |         super().__init__()
40 |         self.clip_l = sd1_clip.SDClipModel(layer="hidden", layer_idx=-2, device=device, dtype=dtype, layer_norm_hidden_state=False)
41 |         self.clip_g = SDXLClipG(device=device, dtype=dtype)
42 | 
43 |     def clip_layer(self, layer_idx):
44 |         self.clip_l.clip_layer(layer_idx)
45 |         self.clip_g.clip_layer(layer_idx)
46 | 
47 |     def reset_clip_layer(self):
48 |         self.clip_g.reset_clip_layer()
49 |         self.clip_l.reset_clip_layer()
50 | 
51 |     def encode_token_weights(self, token_weight_pairs):
52 |         token_weight_pairs_g = token_weight_pairs["g"]
53 |         token_weight_pairs_l = token_weight_pairs["l"]
54 |         g_out, g_pooled = self.clip_g.encode_token_weights(token_weight_pairs_g)
55 |         l_out, l_pooled = self.clip_l.encode_token_weights(token_weight_pairs_l)
56 |         return torch.cat([l_out, g_out], dim=-1), g_pooled
57 | 
58 |     def load_sd(self, sd):
59 |         if "text_model.encoder.layers.30.mlp.fc1.weight" in sd:
60 |             return self.clip_g.load_sd(sd)
61 |         else:
62 |             return self.clip_l.load_sd(sd)
63 | 
64 | class SDXLRefinerClipModel(sd1_clip.SD1ClipModel):
65 |     def __init__(self, device="cpu", dtype=None):
66 |         super().__init__(device=device, dtype=dtype, clip_name="g", clip_model=SDXLClipG)
67 | 


--------------------------------------------------------------------------------
/ldm_patched/modules/supported_models_base.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from . import model_base
 3 | from . import utils
 4 | from . import latent_formats
 5 | 
 6 | class ClipTarget:
 7 |     def __init__(self, tokenizer, clip):
 8 |         self.clip = clip
 9 |         self.tokenizer = tokenizer
10 |         self.params = {}
11 | 
12 | class BASE:
13 |     unet_config = {}
14 |     unet_extra_config = {
15 |         "num_heads": -1,
16 |         "num_head_channels": 64,
17 |     }
18 | 
19 |     clip_prefix = []
20 |     clip_vision_prefix = None
21 |     noise_aug_config = None
22 |     sampling_settings = {}
23 |     latent_format = latent_formats.LatentFormat
24 | 
25 |     manual_cast_dtype = None
26 | 
27 |     @classmethod
28 |     def matches(s, unet_config):
29 |         for k in s.unet_config:
30 |             if s.unet_config[k] != unet_config[k]:
31 |                 return False
32 |         return True
33 | 
34 |     def model_type(self, state_dict, prefix=""):
35 |         return model_base.ModelType.EPS
36 | 
37 |     def inpaint_model(self):
38 |         return self.unet_config["in_channels"] > 4
39 | 
40 |     def __init__(self, unet_config):
41 |         self.unet_config = unet_config
42 |         self.latent_format = self.latent_format()
43 |         for x in self.unet_extra_config:
44 |             self.unet_config[x] = self.unet_extra_config[x]
45 | 
46 |     def get_model(self, state_dict, prefix="", device=None):
47 |         if self.noise_aug_config is not None:
48 |             out = model_base.SD21UNCLIP(self, self.noise_aug_config, model_type=self.model_type(state_dict, prefix), device=device)
49 |         else:
50 |             out = model_base.BaseModel(self, model_type=self.model_type(state_dict, prefix), device=device)
51 |         if self.inpaint_model():
52 |             out.set_inpaint()
53 |         return out
54 | 
55 |     def process_clip_state_dict(self, state_dict):
56 |         return state_dict
57 | 
58 |     def process_unet_state_dict(self, state_dict):
59 |         return state_dict
60 | 
61 |     def process_vae_state_dict(self, state_dict):
62 |         return state_dict
63 | 
64 |     def process_clip_state_dict_for_saving(self, state_dict):
65 |         replace_prefix = {"": "cond_stage_model."}
66 |         return utils.state_dict_prefix_replace(state_dict, replace_prefix)
67 | 
68 |     def process_clip_vision_state_dict_for_saving(self, state_dict):
69 |         replace_prefix = {}
70 |         if self.clip_vision_prefix is not None:
71 |             replace_prefix[""] = self.clip_vision_prefix
72 |         return utils.state_dict_prefix_replace(state_dict, replace_prefix)
73 | 
74 |     def process_unet_state_dict_for_saving(self, state_dict):
75 |         replace_prefix = {"": "model.diffusion_model."}
76 |         return utils.state_dict_prefix_replace(state_dict, replace_prefix)
77 | 
78 |     def process_vae_state_dict_for_saving(self, state_dict):
79 |         replace_prefix = {"": "first_stage_model."}
80 |         return utils.state_dict_prefix_replace(state_dict, replace_prefix)
81 | 
82 |     def set_manual_cast(self, manual_cast_dtype):
83 |         self.manual_cast_dtype = manual_cast_dtype
84 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/__init__.py


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/LICENSE-HAT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Xiangyu Chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/LICENSE-RealESRGAN:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2021, Xintao Wang
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/OmniSR/ChannelAttention.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch.nn as nn
  4 | 
  5 | 
  6 | class CA_layer(nn.Module):
  7 |     def __init__(self, channel, reduction=16):
  8 |         super(CA_layer, self).__init__()
  9 |         # global average pooling
 10 |         self.gap = nn.AdaptiveAvgPool2d(1)
 11 |         self.fc = nn.Sequential(
 12 |             nn.Conv2d(channel, channel // reduction, kernel_size=(1, 1), bias=False),
 13 |             nn.GELU(),
 14 |             nn.Conv2d(channel // reduction, channel, kernel_size=(1, 1), bias=False),
 15 |             # nn.Sigmoid()
 16 |         )
 17 | 
 18 |     def forward(self, x):
 19 |         y = self.fc(self.gap(x))
 20 |         return x * y.expand_as(x)
 21 | 
 22 | 
 23 | class Simple_CA_layer(nn.Module):
 24 |     def __init__(self, channel):
 25 |         super(Simple_CA_layer, self).__init__()
 26 |         self.gap = nn.AdaptiveAvgPool2d(1)
 27 |         self.fc = nn.Conv2d(
 28 |             in_channels=channel,
 29 |             out_channels=channel,
 30 |             kernel_size=1,
 31 |             padding=0,
 32 |             stride=1,
 33 |             groups=1,
 34 |             bias=True,
 35 |         )
 36 | 
 37 |     def forward(self, x):
 38 |         return x * self.fc(self.gap(x))
 39 | 
 40 | 
 41 | class ECA_layer(nn.Module):
 42 |     """Constructs a ECA module.
 43 |     Args:
 44 |         channel: Number of channels of the input feature map
 45 |         k_size: Adaptive selection of kernel size
 46 |     """
 47 | 
 48 |     def __init__(self, channel):
 49 |         super(ECA_layer, self).__init__()
 50 | 
 51 |         b = 1
 52 |         gamma = 2
 53 |         k_size = int(abs(math.log(channel, 2) + b) / gamma)
 54 |         k_size = k_size if k_size % 2 else k_size + 1
 55 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 56 |         self.conv = nn.Conv1d(
 57 |             1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False
 58 |         )
 59 |         # self.sigmoid = nn.Sigmoid()
 60 | 
 61 |     def forward(self, x):
 62 |         # x: input features with shape [b, c, h, w]
 63 |         # b, c, h, w = x.size()
 64 | 
 65 |         # feature descriptor on the global spatial information
 66 |         y = self.avg_pool(x)
 67 | 
 68 |         # Two different branches of ECA module
 69 |         y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
 70 | 
 71 |         # Multi-scale information fusion
 72 |         # y = self.sigmoid(y)
 73 | 
 74 |         return x * y.expand_as(x)
 75 | 
 76 | 
 77 | class ECA_MaxPool_layer(nn.Module):
 78 |     """Constructs a ECA module.
 79 |     Args:
 80 |         channel: Number of channels of the input feature map
 81 |         k_size: Adaptive selection of kernel size
 82 |     """
 83 | 
 84 |     def __init__(self, channel):
 85 |         super(ECA_MaxPool_layer, self).__init__()
 86 | 
 87 |         b = 1
 88 |         gamma = 2
 89 |         k_size = int(abs(math.log(channel, 2) + b) / gamma)
 90 |         k_size = k_size if k_size % 2 else k_size + 1
 91 |         self.max_pool = nn.AdaptiveMaxPool2d(1)
 92 |         self.conv = nn.Conv1d(
 93 |             1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False
 94 |         )
 95 |         # self.sigmoid = nn.Sigmoid()
 96 | 
 97 |     def forward(self, x):
 98 |         # x: input features with shape [b, c, h, w]
 99 |         # b, c, h, w = x.size()
100 | 
101 |         # feature descriptor on the global spatial information
102 |         y = self.max_pool(x)
103 | 
104 |         # Two different branches of ECA module
105 |         y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
106 | 
107 |         # Multi-scale information fusion
108 |         # y = self.sigmoid(y)
109 | 
110 |         return x * y.expand_as(x)
111 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/OmniSR/OSAG.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | #############################################################
 4 | # File: OSAG.py
 5 | # Created Date: Tuesday April 28th 2022
 6 | # Author: Chen Xuanhong
 7 | # Email: chenxuanhongzju@outlook.com
 8 | # Last Modified:  Sunday, 23rd April 2023 3:08:49 pm
 9 | # Modified By: Chen Xuanhong
10 | # Copyright (c) 2020 Shanghai Jiao Tong University
11 | #############################################################
12 | 
13 | 
14 | import torch.nn as nn
15 | 
16 | from .esa import ESA
17 | from .OSA import OSA_Block
18 | 
19 | 
20 | class OSAG(nn.Module):
21 |     def __init__(
22 |         self,
23 |         channel_num=64,
24 |         bias=True,
25 |         block_num=4,
26 |         ffn_bias=False,
27 |         window_size=0,
28 |         pe=False,
29 |     ):
30 |         super(OSAG, self).__init__()
31 | 
32 |         # print("window_size: %d" % (window_size))
33 |         # print("with_pe", pe)
34 |         # print("ffn_bias: %d" % (ffn_bias))
35 | 
36 |         # block_script_name = kwargs.get("block_script_name", "OSA")
37 |         # block_class_name = kwargs.get("block_class_name", "OSA_Block")
38 | 
39 |         # script_name = "." + block_script_name
40 |         # package = __import__(script_name, fromlist=True)
41 |         block_class = OSA_Block  # getattr(package, block_class_name)
42 |         group_list = []
43 |         for _ in range(block_num):
44 |             temp_res = block_class(
45 |                 channel_num,
46 |                 bias,
47 |                 ffn_bias=ffn_bias,
48 |                 window_size=window_size,
49 |                 with_pe=pe,
50 |             )
51 |             group_list.append(temp_res)
52 |         group_list.append(nn.Conv2d(channel_num, channel_num, 1, 1, 0, bias=bias))
53 |         self.residual_layer = nn.Sequential(*group_list)
54 |         esa_channel = max(channel_num // 4, 16)
55 |         self.esa = ESA(esa_channel, channel_num)
56 | 
57 |     def forward(self, x):
58 |         out = self.residual_layer(x)
59 |         out = out + x
60 |         return self.esa(out)
61 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/OmniSR/OmniSR.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | #############################################################
  4 | # File: OmniSR.py
  5 | # Created Date: Tuesday April 28th 2022
  6 | # Author: Chen Xuanhong
  7 | # Email: chenxuanhongzju@outlook.com
  8 | # Last Modified:  Sunday, 23rd April 2023 3:06:36 pm
  9 | # Modified By: Chen Xuanhong
 10 | # Copyright (c) 2020 Shanghai Jiao Tong University
 11 | #############################################################
 12 | 
 13 | import math
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | import torch.nn.functional as F
 18 | 
 19 | from .OSAG import OSAG
 20 | from .pixelshuffle import pixelshuffle_block
 21 | 
 22 | 
 23 | class OmniSR(nn.Module):
 24 |     def __init__(
 25 |         self,
 26 |         state_dict,
 27 |         **kwargs,
 28 |     ):
 29 |         super(OmniSR, self).__init__()
 30 |         self.state = state_dict
 31 | 
 32 |         bias = True  # Fine to assume this for now
 33 |         block_num = 1  # Fine to assume this for now
 34 |         ffn_bias = True
 35 |         pe = True
 36 | 
 37 |         num_feat = state_dict["input.weight"].shape[0] or 64
 38 |         num_in_ch = state_dict["input.weight"].shape[1] or 3
 39 |         num_out_ch = num_in_ch  # we can just assume this for now. pixelshuffle smh
 40 | 
 41 |         pixelshuffle_shape = state_dict["up.0.weight"].shape[0]
 42 |         up_scale = math.sqrt(pixelshuffle_shape / num_out_ch)
 43 |         if up_scale - int(up_scale) > 0:
 44 |             print(
 45 |                 "out_nc is probably different than in_nc, scale calculation might be wrong"
 46 |             )
 47 |         up_scale = int(up_scale)
 48 |         res_num = 0
 49 |         for key in state_dict.keys():
 50 |             if "residual_layer" in key:
 51 |                 temp_res_num = int(key.split(".")[1])
 52 |                 if temp_res_num > res_num:
 53 |                     res_num = temp_res_num
 54 |         res_num = res_num + 1  # zero-indexed
 55 | 
 56 |         residual_layer = []
 57 |         self.res_num = res_num
 58 | 
 59 |         if (
 60 |             "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight"
 61 |             in state_dict.keys()
 62 |         ):
 63 |             rel_pos_bias_weight = state_dict[
 64 |                 "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight"
 65 |             ].shape[0]
 66 |             self.window_size = int((math.sqrt(rel_pos_bias_weight) + 1) / 2)
 67 |         else:
 68 |             self.window_size = 8
 69 | 
 70 |         self.up_scale = up_scale
 71 | 
 72 |         for _ in range(res_num):
 73 |             temp_res = OSAG(
 74 |                 channel_num=num_feat,
 75 |                 bias=bias,
 76 |                 block_num=block_num,
 77 |                 ffn_bias=ffn_bias,
 78 |                 window_size=self.window_size,
 79 |                 pe=pe,
 80 |             )
 81 |             residual_layer.append(temp_res)
 82 |         self.residual_layer = nn.Sequential(*residual_layer)
 83 |         self.input = nn.Conv2d(
 84 |             in_channels=num_in_ch,
 85 |             out_channels=num_feat,
 86 |             kernel_size=3,
 87 |             stride=1,
 88 |             padding=1,
 89 |             bias=bias,
 90 |         )
 91 |         self.output = nn.Conv2d(
 92 |             in_channels=num_feat,
 93 |             out_channels=num_feat,
 94 |             kernel_size=3,
 95 |             stride=1,
 96 |             padding=1,
 97 |             bias=bias,
 98 |         )
 99 |         self.up = pixelshuffle_block(num_feat, num_out_ch, up_scale, bias=bias)
100 | 
101 |         # self.tail   = pixelshuffle_block(num_feat,num_out_ch,up_scale,bias=bias)
102 | 
103 |         # for m in self.modules():
104 |         #     if isinstance(m, nn.Conv2d):
105 |         #         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
106 |         #         m.weight.data.normal_(0, sqrt(2. / n))
107 | 
108 |         # chaiNNer specific stuff
109 |         self.model_arch = "OmniSR"
110 |         self.sub_type = "SR"
111 |         self.in_nc = num_in_ch
112 |         self.out_nc = num_out_ch
113 |         self.num_feat = num_feat
114 |         self.scale = up_scale
115 | 
116 |         self.supports_fp16 = True  # TODO: Test this
117 |         self.supports_bfp16 = True
118 |         self.min_size_restriction = 16
119 | 
120 |         self.load_state_dict(state_dict, strict=False)
121 | 
122 |     def check_image_size(self, x):
123 |         _, _, h, w = x.size()
124 |         # import pdb; pdb.set_trace()
125 |         mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
126 |         mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
127 |         # x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect')
128 |         x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "constant", 0)
129 |         return x
130 | 
131 |     def forward(self, x):
132 |         H, W = x.shape[2:]
133 |         x = self.check_image_size(x)
134 | 
135 |         residual = self.input(x)
136 |         out = self.residual_layer(residual)
137 | 
138 |         # origin
139 |         out = torch.add(self.output(out), residual)
140 |         out = self.up(out)
141 | 
142 |         out = out[:, :, : H * self.up_scale, : W * self.up_scale]
143 |         return out
144 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/OmniSR/__pycache__/OSA.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/OSA.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/OmniSR/__pycache__/OSAG.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/OSAG.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/OmniSR/__pycache__/OmniSR.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/OmniSR.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/OmniSR/__pycache__/esa.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/esa.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/OmniSR/__pycache__/layernorm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/layernorm.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/OmniSR/__pycache__/pixelshuffle.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/pixelshuffle.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/OmniSR/layernorm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | #############################################################
 4 | # File: layernorm.py
 5 | # Created Date: Tuesday April 28th 2022
 6 | # Author: Chen Xuanhong
 7 | # Email: chenxuanhongzju@outlook.com
 8 | # Last Modified:  Thursday, 20th April 2023 9:28:20 am
 9 | # Modified By: Chen Xuanhong
10 | # Copyright (c) 2020 Shanghai Jiao Tong University
11 | #############################################################
12 | 
13 | import torch
14 | import torch.nn as nn
15 | 
16 | 
17 | class LayerNormFunction(torch.autograd.Function):
18 |     @staticmethod
19 |     def forward(ctx, x, weight, bias, eps):
20 |         ctx.eps = eps
21 |         N, C, H, W = x.size()
22 |         mu = x.mean(1, keepdim=True)
23 |         var = (x - mu).pow(2).mean(1, keepdim=True)
24 |         y = (x - mu) / (var + eps).sqrt()
25 |         ctx.save_for_backward(y, var, weight)
26 |         y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1)
27 |         return y
28 | 
29 |     @staticmethod
30 |     def backward(ctx, grad_output):
31 |         eps = ctx.eps
32 | 
33 |         N, C, H, W = grad_output.size()
34 |         y, var, weight = ctx.saved_variables
35 |         g = grad_output * weight.view(1, C, 1, 1)
36 |         mean_g = g.mean(dim=1, keepdim=True)
37 | 
38 |         mean_gy = (g * y).mean(dim=1, keepdim=True)
39 |         gx = 1.0 / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g)
40 |         return (
41 |             gx,
42 |             (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0),
43 |             grad_output.sum(dim=3).sum(dim=2).sum(dim=0),
44 |             None,
45 |         )
46 | 
47 | 
48 | class LayerNorm2d(nn.Module):
49 |     def __init__(self, channels, eps=1e-6):
50 |         super(LayerNorm2d, self).__init__()
51 |         self.register_parameter("weight", nn.Parameter(torch.ones(channels)))
52 |         self.register_parameter("bias", nn.Parameter(torch.zeros(channels)))
53 |         self.eps = eps
54 | 
55 |     def forward(self, x):
56 |         return LayerNormFunction.apply(x, self.weight, self.bias, self.eps)
57 | 
58 | 
59 | class GRN(nn.Module):
60 |     """GRN (Global Response Normalization) layer"""
61 | 
62 |     def __init__(self, dim):
63 |         super().__init__()
64 |         self.gamma = nn.Parameter(torch.zeros(1, dim, 1, 1))
65 |         self.beta = nn.Parameter(torch.zeros(1, dim, 1, 1))
66 | 
67 |     def forward(self, x):
68 |         Gx = torch.norm(x, p=2, dim=(2, 3), keepdim=True)
69 |         Nx = Gx / (Gx.mean(dim=1, keepdim=True) + 1e-6)
70 |         return self.gamma * (x * Nx) + self.beta + x
71 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/OmniSR/pixelshuffle.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | #############################################################
 4 | # File: pixelshuffle.py
 5 | # Created Date: Friday July 1st 2022
 6 | # Author: Chen Xuanhong
 7 | # Email: chenxuanhongzju@outlook.com
 8 | # Last Modified:  Friday, 1st July 2022 10:18:39 am
 9 | # Modified By: Chen Xuanhong
10 | # Copyright (c) 2022 Shanghai Jiao Tong University
11 | #############################################################
12 | 
13 | import torch.nn as nn
14 | 
15 | 
16 | def pixelshuffle_block(
17 |     in_channels, out_channels, upscale_factor=2, kernel_size=3, bias=False
18 | ):
19 |     """
20 |     Upsample features according to `upscale_factor`.
21 |     """
22 |     padding = kernel_size // 2
23 |     conv = nn.Conv2d(
24 |         in_channels,
25 |         out_channels * (upscale_factor**2),
26 |         kernel_size,
27 |         padding=1,
28 |         bias=bias,
29 |     )
30 |     pixel_shuffle = nn.PixelShuffle(upscale_factor)
31 |     return nn.Sequential(*[conv, pixel_shuffle])
32 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/SRVGG.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import math
  5 | 
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class SRVGGNetCompact(nn.Module):
 11 |     """A compact VGG-style network structure for super-resolution.
 12 |     It is a compact network structure, which performs upsampling in the last layer and no convolution is
 13 |     conducted on the HR feature space.
 14 |     Args:
 15 |         num_in_ch (int): Channel number of inputs. Default: 3.
 16 |         num_out_ch (int): Channel number of outputs. Default: 3.
 17 |         num_feat (int): Channel number of intermediate features. Default: 64.
 18 |         num_conv (int): Number of convolution layers in the body network. Default: 16.
 19 |         upscale (int): Upsampling factor. Default: 4.
 20 |         act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu.
 21 |     """
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         state_dict,
 26 |         act_type: str = "prelu",
 27 |     ):
 28 |         super(SRVGGNetCompact, self).__init__()
 29 |         self.model_arch = "SRVGG (RealESRGAN)"
 30 |         self.sub_type = "SR"
 31 | 
 32 |         self.act_type = act_type
 33 | 
 34 |         self.state = state_dict
 35 | 
 36 |         if "params" in self.state:
 37 |             self.state = self.state["params"]
 38 | 
 39 |         self.key_arr = list(self.state.keys())
 40 | 
 41 |         self.in_nc = self.get_in_nc()
 42 |         self.num_feat = self.get_num_feats()
 43 |         self.num_conv = self.get_num_conv()
 44 |         self.out_nc = self.in_nc  # :(
 45 |         self.pixelshuffle_shape = None  # Defined in get_scale()
 46 |         self.scale = self.get_scale()
 47 | 
 48 |         self.supports_fp16 = True
 49 |         self.supports_bfp16 = True
 50 |         self.min_size_restriction = None
 51 | 
 52 |         self.body = nn.ModuleList()
 53 |         # the first conv
 54 |         self.body.append(nn.Conv2d(self.in_nc, self.num_feat, 3, 1, 1))
 55 |         # the first activation
 56 |         if act_type == "relu":
 57 |             activation = nn.ReLU(inplace=True)
 58 |         elif act_type == "prelu":
 59 |             activation = nn.PReLU(num_parameters=self.num_feat)
 60 |         elif act_type == "leakyrelu":
 61 |             activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
 62 |         self.body.append(activation)  # type: ignore
 63 | 
 64 |         # the body structure
 65 |         for _ in range(self.num_conv):
 66 |             self.body.append(nn.Conv2d(self.num_feat, self.num_feat, 3, 1, 1))
 67 |             # activation
 68 |             if act_type == "relu":
 69 |                 activation = nn.ReLU(inplace=True)
 70 |             elif act_type == "prelu":
 71 |                 activation = nn.PReLU(num_parameters=self.num_feat)
 72 |             elif act_type == "leakyrelu":
 73 |                 activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
 74 |             self.body.append(activation)  # type: ignore
 75 | 
 76 |         # the last conv
 77 |         self.body.append(nn.Conv2d(self.num_feat, self.pixelshuffle_shape, 3, 1, 1))  # type: ignore
 78 |         # upsample
 79 |         self.upsampler = nn.PixelShuffle(self.scale)
 80 | 
 81 |         self.load_state_dict(self.state, strict=False)
 82 | 
 83 |     def get_num_conv(self) -> int:
 84 |         return (int(self.key_arr[-1].split(".")[1]) - 2) // 2
 85 | 
 86 |     def get_num_feats(self) -> int:
 87 |         return self.state[self.key_arr[0]].shape[0]
 88 | 
 89 |     def get_in_nc(self) -> int:
 90 |         return self.state[self.key_arr[0]].shape[1]
 91 | 
 92 |     def get_scale(self) -> int:
 93 |         self.pixelshuffle_shape = self.state[self.key_arr[-1]].shape[0]
 94 |         # Assume out_nc is the same as in_nc
 95 |         # I cant think of a better way to do that
 96 |         self.out_nc = self.in_nc
 97 |         scale = math.sqrt(self.pixelshuffle_shape / self.out_nc)
 98 |         if scale - int(scale) > 0:
 99 |             print(
100 |                 "out_nc is probably different than in_nc, scale calculation might be wrong"
101 |             )
102 |         scale = int(scale)
103 |         return scale
104 | 
105 |     def forward(self, x):
106 |         out = x
107 |         for i in range(0, len(self.body)):
108 |             out = self.body[i](out)
109 | 
110 |         out = self.upsampler(out)
111 |         # add the nearest upsampled image, so that the network learns the residual
112 |         base = F.interpolate(x, scale_factor=self.scale, mode="nearest")
113 |         out += base
114 |         return out
115 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/SwiftSRGAN.py:
--------------------------------------------------------------------------------
  1 | # From https://github.com/Koushik0901/Swift-SRGAN/blob/master/swift-srgan/models.py
  2 | 
  3 | import torch
  4 | from torch import nn
  5 | 
  6 | 
  7 | class SeperableConv2d(nn.Module):
  8 |     def __init__(
  9 |         self, in_channels, out_channels, kernel_size, stride=1, padding=1, bias=True
 10 |     ):
 11 |         super(SeperableConv2d, self).__init__()
 12 |         self.depthwise = nn.Conv2d(
 13 |             in_channels,
 14 |             in_channels,
 15 |             kernel_size=kernel_size,
 16 |             stride=stride,
 17 |             groups=in_channels,
 18 |             bias=bias,
 19 |             padding=padding,
 20 |         )
 21 |         self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias)
 22 | 
 23 |     def forward(self, x):
 24 |         return self.pointwise(self.depthwise(x))
 25 | 
 26 | 
 27 | class ConvBlock(nn.Module):
 28 |     def __init__(
 29 |         self,
 30 |         in_channels,
 31 |         out_channels,
 32 |         use_act=True,
 33 |         use_bn=True,
 34 |         discriminator=False,
 35 |         **kwargs,
 36 |     ):
 37 |         super(ConvBlock, self).__init__()
 38 | 
 39 |         self.use_act = use_act
 40 |         self.cnn = SeperableConv2d(in_channels, out_channels, **kwargs, bias=not use_bn)
 41 |         self.bn = nn.BatchNorm2d(out_channels) if use_bn else nn.Identity()
 42 |         self.act = (
 43 |             nn.LeakyReLU(0.2, inplace=True)
 44 |             if discriminator
 45 |             else nn.PReLU(num_parameters=out_channels)
 46 |         )
 47 | 
 48 |     def forward(self, x):
 49 |         return self.act(self.bn(self.cnn(x))) if self.use_act else self.bn(self.cnn(x))
 50 | 
 51 | 
 52 | class UpsampleBlock(nn.Module):
 53 |     def __init__(self, in_channels, scale_factor):
 54 |         super(UpsampleBlock, self).__init__()
 55 | 
 56 |         self.conv = SeperableConv2d(
 57 |             in_channels,
 58 |             in_channels * scale_factor**2,
 59 |             kernel_size=3,
 60 |             stride=1,
 61 |             padding=1,
 62 |         )
 63 |         self.ps = nn.PixelShuffle(
 64 |             scale_factor
 65 |         )  # (in_channels * 4, H, W) -> (in_channels, H*2, W*2)
 66 |         self.act = nn.PReLU(num_parameters=in_channels)
 67 | 
 68 |     def forward(self, x):
 69 |         return self.act(self.ps(self.conv(x)))
 70 | 
 71 | 
 72 | class ResidualBlock(nn.Module):
 73 |     def __init__(self, in_channels):
 74 |         super(ResidualBlock, self).__init__()
 75 | 
 76 |         self.block1 = ConvBlock(
 77 |             in_channels, in_channels, kernel_size=3, stride=1, padding=1
 78 |         )
 79 |         self.block2 = ConvBlock(
 80 |             in_channels, in_channels, kernel_size=3, stride=1, padding=1, use_act=False
 81 |         )
 82 | 
 83 |     def forward(self, x):
 84 |         out = self.block1(x)
 85 |         out = self.block2(out)
 86 |         return out + x
 87 | 
 88 | 
 89 | class Generator(nn.Module):
 90 |     """Swift-SRGAN Generator
 91 |     Args:
 92 |         in_channels (int): number of input image channels.
 93 |         num_channels (int): number of hidden channels.
 94 |         num_blocks (int): number of residual blocks.
 95 |         upscale_factor (int): factor to upscale the image [2x, 4x, 8x].
 96 |     Returns:
 97 |         torch.Tensor: super resolution image
 98 |     """
 99 | 
100 |     def __init__(
101 |         self,
102 |         state_dict,
103 |     ):
104 |         super(Generator, self).__init__()
105 |         self.model_arch = "Swift-SRGAN"
106 |         self.sub_type = "SR"
107 |         self.state = state_dict
108 |         if "model" in self.state:
109 |             self.state = self.state["model"]
110 | 
111 |         self.in_nc: int = self.state["initial.cnn.depthwise.weight"].shape[0]
112 |         self.out_nc: int = self.state["final_conv.pointwise.weight"].shape[0]
113 |         self.num_filters: int = self.state["initial.cnn.pointwise.weight"].shape[0]
114 |         self.num_blocks = len(
115 |             set([x.split(".")[1] for x in self.state.keys() if "residual" in x])
116 |         )
117 |         self.scale: int = 2 ** len(
118 |             set([x.split(".")[1] for x in self.state.keys() if "upsampler" in x])
119 |         )
120 | 
121 |         in_channels = self.in_nc
122 |         num_channels = self.num_filters
123 |         num_blocks = self.num_blocks
124 |         upscale_factor = self.scale
125 | 
126 |         self.supports_fp16 = True
127 |         self.supports_bfp16 = True
128 |         self.min_size_restriction = None
129 | 
130 |         self.initial = ConvBlock(
131 |             in_channels, num_channels, kernel_size=9, stride=1, padding=4, use_bn=False
132 |         )
133 |         self.residual = nn.Sequential(
134 |             *[ResidualBlock(num_channels) for _ in range(num_blocks)]
135 |         )
136 |         self.convblock = ConvBlock(
137 |             num_channels,
138 |             num_channels,
139 |             kernel_size=3,
140 |             stride=1,
141 |             padding=1,
142 |             use_act=False,
143 |         )
144 |         self.upsampler = nn.Sequential(
145 |             *[
146 |                 UpsampleBlock(num_channels, scale_factor=2)
147 |                 for _ in range(upscale_factor // 2)
148 |             ]
149 |         )
150 |         self.final_conv = SeperableConv2d(
151 |             num_channels, in_channels, kernel_size=9, stride=1, padding=4
152 |         )
153 | 
154 |         self.load_state_dict(self.state, strict=False)
155 | 
156 |     def forward(self, x):
157 |         initial = self.initial(x)
158 |         x = self.residual(initial)
159 |         x = self.convblock(x) + initial
160 |         x = self.upsampler(x)
161 |         return (torch.tanh(self.final_conv(x)) + 1) / 2
162 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__init__.py


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__pycache__/DAT.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/DAT.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__pycache__/HAT.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/HAT.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__pycache__/LaMa.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/LaMa.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__pycache__/RRDB.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/RRDB.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__pycache__/SCUNet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SCUNet.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__pycache__/SPSR.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SPSR.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__pycache__/SRVGG.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SRVGG.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__pycache__/SwiftSRGAN.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SwiftSRGAN.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__pycache__/Swin2SR.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/Swin2SR.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__pycache__/SwinIR.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SwinIR.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/__pycache__/block.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/block.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/face/LICENSE-codeformer:
--------------------------------------------------------------------------------
 1 | S-Lab License 1.0
 2 | 
 3 | Copyright 2022 S-Lab
 4 | 
 5 | Redistribution and use for non-commercial purpose in source and
 6 | binary forms, with or without modification, are permitted provided
 7 | that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 |    notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 |    notice, this list of conditions and the following disclaimer in
14 |    the documentation and/or other materials provided with the
15 |    distribution.
16 | 
17 | 3. Neither the name of the copyright holder nor the names of its
18 |    contributors may be used to endorse or promote products derived
19 |    from this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 
33 | In the event that redistribution and/or use for commercial purpose in
34 | source or binary forms, with or without modification is required,
35 | please contact the contributor(s) of the work.
36 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/face/__pycache__/codeformer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/face/__pycache__/codeformer.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/face/__pycache__/gfpganv1_clean_arch.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/face/__pycache__/gfpganv1_clean_arch.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/face/__pycache__/restoreformer_arch.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/face/__pycache__/restoreformer_arch.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/face/__pycache__/stylegan2_clean_arch.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/face/__pycache__/stylegan2_clean_arch.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/face/fused_act.py:
--------------------------------------------------------------------------------
 1 | # pylint: skip-file
 2 | # type: ignore
 3 | # modify from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501
 4 | 
 5 | import torch
 6 | from torch import nn
 7 | from torch.autograd import Function
 8 | 
 9 | fused_act_ext = None
10 | 
11 | 
12 | class FusedLeakyReLUFunctionBackward(Function):
13 |     @staticmethod
14 |     def forward(ctx, grad_output, out, negative_slope, scale):
15 |         ctx.save_for_backward(out)
16 |         ctx.negative_slope = negative_slope
17 |         ctx.scale = scale
18 | 
19 |         empty = grad_output.new_empty(0)
20 | 
21 |         grad_input = fused_act_ext.fused_bias_act(
22 |             grad_output, empty, out, 3, 1, negative_slope, scale
23 |         )
24 | 
25 |         dim = [0]
26 | 
27 |         if grad_input.ndim > 2:
28 |             dim += list(range(2, grad_input.ndim))
29 | 
30 |         grad_bias = grad_input.sum(dim).detach()
31 | 
32 |         return grad_input, grad_bias
33 | 
34 |     @staticmethod
35 |     def backward(ctx, gradgrad_input, gradgrad_bias):
36 |         (out,) = ctx.saved_tensors
37 |         gradgrad_out = fused_act_ext.fused_bias_act(
38 |             gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale
39 |         )
40 | 
41 |         return gradgrad_out, None, None, None
42 | 
43 | 
44 | class FusedLeakyReLUFunction(Function):
45 |     @staticmethod
46 |     def forward(ctx, input, bias, negative_slope, scale):
47 |         empty = input.new_empty(0)
48 |         out = fused_act_ext.fused_bias_act(
49 |             input, bias, empty, 3, 0, negative_slope, scale
50 |         )
51 |         ctx.save_for_backward(out)
52 |         ctx.negative_slope = negative_slope
53 |         ctx.scale = scale
54 | 
55 |         return out
56 | 
57 |     @staticmethod
58 |     def backward(ctx, grad_output):
59 |         (out,) = ctx.saved_tensors
60 | 
61 |         grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply(
62 |             grad_output, out, ctx.negative_slope, ctx.scale
63 |         )
64 | 
65 |         return grad_input, grad_bias, None, None
66 | 
67 | 
68 | class FusedLeakyReLU(nn.Module):
69 |     def __init__(self, channel, negative_slope=0.2, scale=2**0.5):
70 |         super().__init__()
71 | 
72 |         self.bias = nn.Parameter(torch.zeros(channel))
73 |         self.negative_slope = negative_slope
74 |         self.scale = scale
75 | 
76 |     def forward(self, input):
77 |         return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)
78 | 
79 | 
80 | def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2**0.5):
81 |     return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale)
82 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/timm/__pycache__/drop.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/timm/__pycache__/drop.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/timm/__pycache__/helpers.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/timm/__pycache__/helpers.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/timm/__pycache__/weight_init.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/timm/__pycache__/weight_init.cpython-310.pyc


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/timm/helpers.py:
--------------------------------------------------------------------------------
 1 | """ Layer/Module Helpers
 2 | Hacked together by / Copyright 2020 Ross Wightman
 3 | """
 4 | import collections.abc
 5 | from itertools import repeat
 6 | 
 7 | 
 8 | # From PyTorch internals
 9 | def _ntuple(n):
10 |     def parse(x):
11 |         if isinstance(x, collections.abc.Iterable) and not isinstance(x, str):
12 |             return x
13 |         return tuple(repeat(x, n))
14 | 
15 |     return parse
16 | 
17 | 
18 | to_1tuple = _ntuple(1)
19 | to_2tuple = _ntuple(2)
20 | to_3tuple = _ntuple(3)
21 | to_4tuple = _ntuple(4)
22 | to_ntuple = _ntuple
23 | 
24 | 
25 | def make_divisible(v, divisor=8, min_value=None, round_limit=0.9):
26 |     min_value = min_value or divisor
27 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
28 |     # Make sure that round down does not go down by more than 10%.
29 |     if new_v < round_limit * v:
30 |         new_v += divisor
31 |     return new_v
32 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/architecture/timm/weight_init.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import warnings
  3 | 
  4 | import torch
  5 | from torch.nn.init import _calculate_fan_in_and_fan_out
  6 | 
  7 | 
  8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b):
  9 |     # Cut & paste from PyTorch official master until it's in a few official releases - RW
 10 |     # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
 11 |     def norm_cdf(x):
 12 |         # Computes standard normal cumulative distribution function
 13 |         return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0
 14 | 
 15 |     if (mean < a - 2 * std) or (mean > b + 2 * std):
 16 |         warnings.warn(
 17 |             "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
 18 |             "The distribution of values may be incorrect.",
 19 |             stacklevel=2,
 20 |         )
 21 | 
 22 |     with torch.no_grad():
 23 |         # Values are generated by using a truncated uniform distribution and
 24 |         # then using the inverse CDF for the normal distribution.
 25 |         # Get upper and lower cdf values
 26 |         l = norm_cdf((a - mean) / std)
 27 |         u = norm_cdf((b - mean) / std)
 28 | 
 29 |         # Uniformly fill tensor with values from [l, u], then translate to
 30 |         # [2l-1, 2u-1].
 31 |         tensor.uniform_(2 * l - 1, 2 * u - 1)
 32 | 
 33 |         # Use inverse cdf transform for normal distribution to get truncated
 34 |         # standard normal
 35 |         tensor.erfinv_()
 36 | 
 37 |         # Transform to proper mean, std
 38 |         tensor.mul_(std * math.sqrt(2.0))
 39 |         tensor.add_(mean)
 40 | 
 41 |         # Clamp to ensure it's in the proper range
 42 |         tensor.clamp_(min=a, max=b)
 43 |         return tensor
 44 | 
 45 | 
 46 | def trunc_normal_(
 47 |     tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0
 48 | ) -> torch.Tensor:
 49 |     r"""Fills the input Tensor with values drawn from a truncated
 50 |     normal distribution. The values are effectively drawn from the
 51 |     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
 52 |     with values outside :math:`[a, b]` redrawn until they are within
 53 |     the bounds. The method used for generating the random values works
 54 |     best when :math:`a \leq \text{mean} \leq b`.
 55 | 
 56 |     NOTE: this impl is similar to the PyTorch trunc_normal_, the bounds [a, b] are
 57 |     applied while sampling the normal with mean/std applied, therefore a, b args
 58 |     should be adjusted to match the range of mean, std args.
 59 | 
 60 |     Args:
 61 |         tensor: an n-dimensional `torch.Tensor`
 62 |         mean: the mean of the normal distribution
 63 |         std: the standard deviation of the normal distribution
 64 |         a: the minimum cutoff value
 65 |         b: the maximum cutoff value
 66 |     Examples:
 67 |         >>> w = torch.empty(3, 5)
 68 |         >>> nn.init.trunc_normal_(w)
 69 |     """
 70 |     return _no_grad_trunc_normal_(tensor, mean, std, a, b)
 71 | 
 72 | 
 73 | def trunc_normal_tf_(
 74 |     tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0
 75 | ) -> torch.Tensor:
 76 |     r"""Fills the input Tensor with values drawn from a truncated
 77 |     normal distribution. The values are effectively drawn from the
 78 |     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
 79 |     with values outside :math:`[a, b]` redrawn until they are within
 80 |     the bounds. The method used for generating the random values works
 81 |     best when :math:`a \leq \text{mean} \leq b`.
 82 | 
 83 |     NOTE: this 'tf' variant behaves closer to Tensorflow / JAX impl where the
 84 |     bounds [a, b] are applied when sampling the normal distribution with mean=0, std=1.0
 85 |     and the result is subsquently scaled and shifted by the mean and std args.
 86 | 
 87 |     Args:
 88 |         tensor: an n-dimensional `torch.Tensor`
 89 |         mean: the mean of the normal distribution
 90 |         std: the standard deviation of the normal distribution
 91 |         a: the minimum cutoff value
 92 |         b: the maximum cutoff value
 93 |     Examples:
 94 |         >>> w = torch.empty(3, 5)
 95 |         >>> nn.init.trunc_normal_(w)
 96 |     """
 97 |     _no_grad_trunc_normal_(tensor, 0, 1.0, a, b)
 98 |     with torch.no_grad():
 99 |         tensor.mul_(std).add_(mean)
100 |     return tensor
101 | 
102 | 
103 | def variance_scaling_(tensor, scale=1.0, mode="fan_in", distribution="normal"):
104 |     fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
105 |     if mode == "fan_in":
106 |         denom = fan_in
107 |     elif mode == "fan_out":
108 |         denom = fan_out
109 |     elif mode == "fan_avg":
110 |         denom = (fan_in + fan_out) / 2
111 | 
112 |     variance = scale / denom  # type: ignore
113 | 
114 |     if distribution == "truncated_normal":
115 |         # constant is stddev of standard normal truncated to (-2, 2)
116 |         trunc_normal_tf_(tensor, std=math.sqrt(variance) / 0.87962566103423978)
117 |     elif distribution == "normal":
118 |         tensor.normal_(std=math.sqrt(variance))
119 |     elif distribution == "uniform":
120 |         bound = math.sqrt(3 * variance)
121 |         # pylint: disable=invalid-unary-operand-type
122 |         tensor.uniform_(-bound, bound)
123 |     else:
124 |         raise ValueError(f"invalid distribution {distribution}")
125 | 
126 | 
127 | def lecun_normal_(tensor):
128 |     variance_scaling_(tensor, mode="fan_in", distribution="truncated_normal")
129 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/model_loading.py:
--------------------------------------------------------------------------------
  1 | import logging as logger
  2 | 
  3 | from .architecture.DAT import DAT
  4 | from .architecture.face.codeformer import CodeFormer
  5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean
  6 | from .architecture.face.restoreformer_arch import RestoreFormer
  7 | from .architecture.HAT import HAT
  8 | from .architecture.LaMa import LaMa
  9 | from .architecture.OmniSR.OmniSR import OmniSR
 10 | from .architecture.RRDB import RRDBNet as ESRGAN
 11 | from .architecture.SCUNet import SCUNet
 12 | from .architecture.SPSR import SPSRNet as SPSR
 13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2
 14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN
 15 | from .architecture.Swin2SR import Swin2SR
 16 | from .architecture.SwinIR import SwinIR
 17 | from .types import PyTorchModel
 18 | 
 19 | 
 20 | class UnsupportedModel(Exception):
 21 |     pass
 22 | 
 23 | 
 24 | def load_state_dict(state_dict) -> PyTorchModel:
 25 |     logger.debug(f"Loading state dict into pytorch model arch")
 26 | 
 27 |     state_dict_keys = list(state_dict.keys())
 28 | 
 29 |     if "params_ema" in state_dict_keys:
 30 |         state_dict = state_dict["params_ema"]
 31 |     elif "params-ema" in state_dict_keys:
 32 |         state_dict = state_dict["params-ema"]
 33 |     elif "params" in state_dict_keys:
 34 |         state_dict = state_dict["params"]
 35 | 
 36 |     state_dict_keys = list(state_dict.keys())
 37 |     # SRVGGNet Real-ESRGAN (v2)
 38 |     if "body.0.weight" in state_dict_keys and "body.1.weight" in state_dict_keys:
 39 |         model = RealESRGANv2(state_dict)
 40 |     # SPSR (ESRGAN with lots of extra layers)
 41 |     elif "f_HR_conv1.0.weight" in state_dict:
 42 |         model = SPSR(state_dict)
 43 |     # Swift-SRGAN
 44 |     elif (
 45 |         "model" in state_dict_keys
 46 |         and "initial.cnn.depthwise.weight" in state_dict["model"].keys()
 47 |     ):
 48 |         model = SwiftSRGAN(state_dict)
 49 |     # SwinIR, Swin2SR, HAT
 50 |     elif "layers.0.residual_group.blocks.0.norm1.weight" in state_dict_keys:
 51 |         if (
 52 |             "layers.0.residual_group.blocks.0.conv_block.cab.0.weight"
 53 |             in state_dict_keys
 54 |         ):
 55 |             model = HAT(state_dict)
 56 |         elif "patch_embed.proj.weight" in state_dict_keys:
 57 |             model = Swin2SR(state_dict)
 58 |         else:
 59 |             model = SwinIR(state_dict)
 60 |     # GFPGAN
 61 |     elif (
 62 |         "toRGB.0.weight" in state_dict_keys
 63 |         and "stylegan_decoder.style_mlp.1.weight" in state_dict_keys
 64 |     ):
 65 |         model = GFPGANv1Clean(state_dict)
 66 |     # RestoreFormer
 67 |     elif (
 68 |         "encoder.conv_in.weight" in state_dict_keys
 69 |         and "encoder.down.0.block.0.norm1.weight" in state_dict_keys
 70 |     ):
 71 |         model = RestoreFormer(state_dict)
 72 |     elif (
 73 |         "encoder.blocks.0.weight" in state_dict_keys
 74 |         and "quantize.embedding.weight" in state_dict_keys
 75 |     ):
 76 |         model = CodeFormer(state_dict)
 77 |     # LaMa
 78 |     elif (
 79 |         "model.model.1.bn_l.running_mean" in state_dict_keys
 80 |         or "generator.model.1.bn_l.running_mean" in state_dict_keys
 81 |     ):
 82 |         model = LaMa(state_dict)
 83 |     # Omni-SR
 84 |     elif "residual_layer.0.residual_layer.0.layer.0.fn.0.weight" in state_dict_keys:
 85 |         model = OmniSR(state_dict)
 86 |     # SCUNet
 87 |     elif "m_head.0.weight" in state_dict_keys and "m_tail.0.weight" in state_dict_keys:
 88 |         model = SCUNet(state_dict)
 89 |     # DAT
 90 |     elif "layers.0.blocks.2.attn.attn_mask_0" in state_dict_keys:
 91 |         model = DAT(state_dict)
 92 |     # Regular ESRGAN, "new-arch" ESRGAN, Real-ESRGAN v1
 93 |     else:
 94 |         try:
 95 |             model = ESRGAN(state_dict)
 96 |         except:
 97 |             # pylint: disable=raise-missing-from
 98 |             raise UnsupportedModel
 99 |     return model
100 | 


--------------------------------------------------------------------------------
/ldm_patched/pfn/types.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from .architecture.DAT import DAT
 4 | from .architecture.face.codeformer import CodeFormer
 5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean
 6 | from .architecture.face.restoreformer_arch import RestoreFormer
 7 | from .architecture.HAT import HAT
 8 | from .architecture.LaMa import LaMa
 9 | from .architecture.OmniSR.OmniSR import OmniSR
10 | from .architecture.RRDB import RRDBNet as ESRGAN
11 | from .architecture.SCUNet import SCUNet
12 | from .architecture.SPSR import SPSRNet as SPSR
13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2
14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN
15 | from .architecture.Swin2SR import Swin2SR
16 | from .architecture.SwinIR import SwinIR
17 | 
18 | PyTorchSRModels = (
19 |     RealESRGANv2,
20 |     SPSR,
21 |     SwiftSRGAN,
22 |     ESRGAN,
23 |     SwinIR,
24 |     Swin2SR,
25 |     HAT,
26 |     OmniSR,
27 |     SCUNet,
28 |     DAT,
29 | )
30 | PyTorchSRModel = Union[
31 |     RealESRGANv2,
32 |     SPSR,
33 |     SwiftSRGAN,
34 |     ESRGAN,
35 |     SwinIR,
36 |     Swin2SR,
37 |     HAT,
38 |     OmniSR,
39 |     SCUNet,
40 |     DAT,
41 | ]
42 | 
43 | 
44 | def is_pytorch_sr_model(model: object):
45 |     return isinstance(model, PyTorchSRModels)
46 | 
47 | 
48 | PyTorchFaceModels = (GFPGANv1Clean, RestoreFormer, CodeFormer)
49 | PyTorchFaceModel = Union[GFPGANv1Clean, RestoreFormer, CodeFormer]
50 | 
51 | 
52 | def is_pytorch_face_model(model: object):
53 |     return isinstance(model, PyTorchFaceModels)
54 | 
55 | 
56 | PyTorchInpaintModels = (LaMa,)
57 | PyTorchInpaintModel = Union[LaMa]
58 | 
59 | 
60 | def is_pytorch_inpaint_model(model: object):
61 |     return isinstance(model, PyTorchInpaintModels)
62 | 
63 | 
64 | PyTorchModels = (*PyTorchSRModels, *PyTorchFaceModels, *PyTorchInpaintModels)
65 | PyTorchModel = Union[PyTorchSRModel, PyTorchFaceModel, PyTorchInpaintModel]
66 | 
67 | 
68 | def is_pytorch_model(model: object):
69 |     return isinstance(model, PyTorchModels)
70 | 


--------------------------------------------------------------------------------
/ldm_patched/taesd/taesd.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Tiny AutoEncoder for Stable Diffusion
 4 | (DNN for encoding / decoding SD's latent space)
 5 | """
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | import ldm_patched.modules.utils
10 | import ldm_patched.modules.ops
11 | 
12 | def conv(n_in, n_out, **kwargs):
13 |     return ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 3, padding=1, **kwargs)
14 | 
15 | class Clamp(nn.Module):
16 |     def forward(self, x):
17 |         return torch.tanh(x / 3) * 3
18 | 
19 | class Block(nn.Module):
20 |     def __init__(self, n_in, n_out):
21 |         super().__init__()
22 |         self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out))
23 |         self.skip = ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity()
24 |         self.fuse = nn.ReLU()
25 |     def forward(self, x):
26 |         return self.fuse(self.conv(x) + self.skip(x))
27 | 
28 | def Encoder():
29 |     return nn.Sequential(
30 |         conv(3, 64), Block(64, 64),
31 |         conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
32 |         conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
33 |         conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
34 |         conv(64, 4),
35 |     )
36 | 
37 | def Decoder():
38 |     return nn.Sequential(
39 |         Clamp(), conv(4, 64), nn.ReLU(),
40 |         Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
41 |         Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
42 |         Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
43 |         Block(64, 64), conv(64, 3),
44 |     )
45 | 
46 | class TAESD(nn.Module):
47 |     latent_magnitude = 3
48 |     latent_shift = 0.5
49 | 
50 |     def __init__(self, encoder_path=None, decoder_path=None):
51 |         """Initialize pretrained TAESD on the given device from the given checkpoints."""
52 |         super().__init__()
53 |         self.taesd_encoder = Encoder()
54 |         self.taesd_decoder = Decoder()
55 |         self.vae_scale = torch.nn.Parameter(torch.tensor(1.0))
56 |         if encoder_path is not None:
57 |             self.taesd_encoder.load_state_dict(ldm_patched.modules.utils.load_torch_file(encoder_path, safe_load=True))
58 |         if decoder_path is not None:
59 |             self.taesd_decoder.load_state_dict(ldm_patched.modules.utils.load_torch_file(decoder_path, safe_load=True))
60 | 
61 |     @staticmethod
62 |     def scale_latents(x):
63 |         """raw latents -> [0, 1]"""
64 |         return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1)
65 | 
66 |     @staticmethod
67 |     def unscale_latents(x):
68 |         """[0, 1] -> raw latents"""
69 |         return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude)
70 | 
71 |     def decode(self, x):
72 |         x_sample = self.taesd_decoder(x * self.vae_scale)
73 |         x_sample = x_sample.sub(0.5).mul(2)
74 |         return x_sample
75 | 
76 |     def encode(self, x):
77 |         return self.taesd_encoder(x * 0.5 + 0.5) / self.vae_scale
78 | 


--------------------------------------------------------------------------------
/ldm_patched/utils/latent_visualization.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from PIL import Image
 3 | import struct
 4 | import numpy as np
 5 | from ldm_patched.modules.args_parser import args, LatentPreviewMethod
 6 | from ldm_patched.taesd.taesd import TAESD
 7 | import ldm_patched.utils.path_utils
 8 | import ldm_patched.modules.utils
 9 | 
10 | MAX_PREVIEW_RESOLUTION = 512
11 | 
12 | class LatentPreviewer:
13 |     def decode_latent_to_preview(self, x0):
14 |         pass
15 | 
16 |     def decode_latent_to_preview_image(self, preview_format, x0):
17 |         preview_image = self.decode_latent_to_preview(x0)
18 |         return ("JPEG", preview_image, MAX_PREVIEW_RESOLUTION)
19 | 
20 | class TAESDPreviewerImpl(LatentPreviewer):
21 |     def __init__(self, taesd):
22 |         self.taesd = taesd
23 | 
24 |     def decode_latent_to_preview(self, x0):
25 |         x_sample = self.taesd.decode(x0[:1])[0].detach()
26 |         x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0)
27 |         x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
28 |         x_sample = x_sample.astype(np.uint8)
29 | 
30 |         preview_image = Image.fromarray(x_sample)
31 |         return preview_image
32 | 
33 | 
34 | class Latent2RGBPreviewer(LatentPreviewer):
35 |     def __init__(self, latent_rgb_factors):
36 |         self.latent_rgb_factors = torch.tensor(latent_rgb_factors, device="cpu")
37 | 
38 |     def decode_latent_to_preview(self, x0):
39 |         latent_image = x0[0].permute(1, 2, 0).cpu() @ self.latent_rgb_factors
40 | 
41 |         latents_ubyte = (((latent_image + 1) / 2)
42 |                             .clamp(0, 1)  # change scale from -1..1 to 0..1
43 |                             .mul(0xFF)  # to 0..255
44 |                             .byte()).cpu()
45 | 
46 |         return Image.fromarray(latents_ubyte.numpy())
47 | 
48 | 
49 | def get_previewer(device, latent_format):
50 |     previewer = None
51 |     method = args.preview_option
52 |     if method != LatentPreviewMethod.NoPreviews:
53 |         # TODO previewer methods
54 |         taesd_decoder_path = None
55 |         if latent_format.taesd_decoder_name is not None:
56 |             taesd_decoder_path = next(
57 |                 (fn for fn in ldm_patched.utils.path_utils.get_filename_list("vae_approx")
58 |                     if fn.startswith(latent_format.taesd_decoder_name)),
59 |                 ""
60 |             )
61 |             taesd_decoder_path = ldm_patched.utils.path_utils.get_full_path("vae_approx", taesd_decoder_path)
62 | 
63 |         if method == LatentPreviewMethod.Auto:
64 |             method = LatentPreviewMethod.Latent2RGB
65 |             if taesd_decoder_path:
66 |                 method = LatentPreviewMethod.TAESD
67 | 
68 |         if method == LatentPreviewMethod.TAESD:
69 |             if taesd_decoder_path:
70 |                 taesd = TAESD(None, taesd_decoder_path).to(device)
71 |                 previewer = TAESDPreviewerImpl(taesd)
72 |             else:
73 |                 print("Warning: TAESD previews enabled, but could not find models/vae_approx/{}".format(latent_format.taesd_decoder_name))
74 | 
75 |         if previewer is None:
76 |             if latent_format.latent_rgb_factors is not None:
77 |                 previewer = Latent2RGBPreviewer(latent_format.latent_rgb_factors)
78 |     return previewer
79 | 
80 | def prepare_callback(model, steps, x0_output_dict=None):
81 |     preview_format = "JPEG"
82 |     if preview_format not in ["JPEG", "PNG"]:
83 |         preview_format = "JPEG"
84 | 
85 |     previewer = get_previewer(model.load_device, model.model.latent_format)
86 | 
87 |     pbar = ldm_patched.modules.utils.ProgressBar(steps)
88 |     def callback(step, x0, x, total_steps):
89 |         if x0_output_dict is not None:
90 |             x0_output_dict["x0"] = x0
91 | 
92 |         preview_bytes = None
93 |         if previewer:
94 |             preview_bytes = previewer.decode_latent_to_preview_image(preview_format, x0)
95 |         pbar.update_absolute(step + 1, total_steps, preview_bytes)
96 |     return callback
97 | 
98 | 


--------------------------------------------------------------------------------
/make_img.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import torch\n",
 10 |     "import random\n",
 11 |     "import os\n",
 12 |     "import shutil\n",
 13 |     "from tqdm import tqdm\n",
 14 |     "from diffusers import StableDiffusionXLPipeline\n",
 15 |     "\n",
 16 |     "regular_prompts_list = [\n",
 17 |     "    ...\n",
 18 |     "]\n",
 19 |     "object_name = \"teapot\"\n",
 20 |     "save_dir = \"regular_teapot\"\n",
 21 |     "\n",
 22 |     "\n",
 23 |     "repeat_times = 30\n",
 24 |     "\n",
 25 |     "DEVICE = \"cuda:0\"\n",
 26 |     "torch.cuda.set_device(DEVICE)"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "pipeline = StableDiffusionXLPipeline.from_pretrained(\n",
 36 |     "    \"stabilityai/stable-diffusion-xl-base-1.0\",\n",
 37 |     "    torch_dtype=torch.float16,\n",
 38 |     "    use_safetensors=True,\n",
 39 |     "    variant=\"fp16\",\n",
 40 |     ").to(DEVICE)\n",
 41 |     "pipeline.set_progress_bar_config(disable=True)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# 使用lang-sam完成分割任务  python3.10装包\n",
 51 |     "# git clone https://github.com/mycfhs/lang-segment-anything && cd lang-segment-anything\n",
 52 |     "# python -m pip install -e . --ignore-installed\n",
 53 |     "from lang_sam import LangSAM\n",
 54 |     "\n",
 55 |     "model = LangSAM(sam_type=\"vit_h\")  # b, l, h"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "from torchvision.transforms import ToPILImage\n",
 65 |     "import gc\n",
 66 |     "\n",
 67 |     "to_pil_image = ToPILImage()\n",
 68 |     "\n",
 69 |     "if os.path.exists(save_dir):\n",
 70 |     "    shutil.rmtree(save_dir)\n",
 71 |     "\n",
 72 |     "os.makedirs(save_dir)\n",
 73 |     "for prompt in regular_prompts_list:\n",
 74 |     "    prompt = prompt.replace(\" \", \"_\")\n",
 75 |     "    os.makedirs(f\"{save_dir}/{prompt}\")\n",
 76 |     "\n",
 77 |     "for _ in tqdm(range(repeat_times)):\n",
 78 |     "    random_seed = random.randint(0, 1000000)\n",
 79 |     "    images = pipeline(regular_prompts_list, seed=random_seed).images\n",
 80 |     "\n",
 81 |     "    gc.collect()\n",
 82 |     "    if torch.cuda.is_available():\n",
 83 |     "        torch.cuda.empty_cache()\n",
 84 |     "\n",
 85 |     "    for image, prompt in zip(images, regular_prompts_list):\n",
 86 |     "        prompt = prompt.replace(\" \", \"_\")\n",
 87 |     "\n",
 88 |     "        masks, boxes, phrases, logits = model.predict(image, object_name)\n",
 89 |     "        mask = masks.to(torch.uint8) * 255\n",
 90 |     "\n",
 91 |     "        try:\n",
 92 |     "            mask_img = to_pil_image(mask[0])\n",
 93 |     "            mask_img.save(f\"{save_dir}/{prompt}/{random_seed}-mask.png\")\n",
 94 |     "            image.save(f\"{save_dir}/{prompt}/{random_seed}-image.png\")\n",
 95 |     "        except:\n",
 96 |     "            print(f\"Error img, ignore\")\n",
 97 |     "            continue\n",
 98 |     "\n",
 99 |     "    gc.collect()\n",
100 |     "    if torch.cuda.is_available():\n",
101 |     "        torch.cuda.empty_cache()"
102 |    ]
103 |   }
104 |  ],
105 |  "metadata": {
106 |   "kernelspec": {
107 |    "display_name": "DreamMix",
108 |    "language": "python",
109 |    "name": "python3"
110 |   },
111 |   "language_info": {
112 |    "codemirror_mode": {
113 |     "name": "ipython",
114 |     "version": 3
115 |    },
116 |    "file_extension": ".py",
117 |    "mimetype": "text/x-python",
118 |    "name": "python",
119 |    "nbconvert_exporter": "python",
120 |    "pygments_lexer": "ipython3",
121 |    "version": "3.10.15"
122 |   }
123 |  },
124 |  "nbformat": 4,
125 |  "nbformat_minor": 2
126 | }
127 | 


--------------------------------------------------------------------------------
/models/inpaint/put_inpaint_here:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/models/inpaint/put_inpaint_here


--------------------------------------------------------------------------------
/models/loras/put_loras_here:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/models/loras/put_loras_here


--------------------------------------------------------------------------------
/models/upscale_models/put_esrgan_and_other_upscale_models_here:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/models/upscale_models/put_esrgan_and_other_upscale_models_here


--------------------------------------------------------------------------------
/modules/auth.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import hashlib
 3 | import modules.constants as constants
 4 | 
 5 | from os.path import exists
 6 | 
 7 | 
 8 | def auth_list_to_dict(auth_list):
 9 |     auth_dict = {}
10 |     for auth_data in auth_list:
11 |         if 'user' in auth_data:
12 |             if 'hash' in auth_data:
13 |                 auth_dict |= {auth_data['user']: auth_data['hash']}
14 |             elif 'pass' in auth_data:
15 |                 auth_dict |= {auth_data['user']: hashlib.sha256(bytes(auth_data['pass'], encoding='utf-8')).hexdigest()}
16 |     return auth_dict
17 | 
18 | 
19 | def load_auth_data(filename=None):
20 |     auth_dict = None
21 |     if filename != None and exists(filename):
22 |         with open(filename, encoding='utf-8') as auth_file:
23 |             try:
24 |                 auth_obj = json.load(auth_file)
25 |                 if isinstance(auth_obj, list) and len(auth_obj) > 0:
26 |                     auth_dict = auth_list_to_dict(auth_obj)
27 |             except Exception as e:
28 |                 print('load_auth_data, e: ' + str(e))
29 |     return auth_dict
30 | 
31 | 
32 | auth_dict = load_auth_data(constants.AUTH_FILENAME)
33 | 
34 | auth_enabled = auth_dict != None
35 | 
36 | 
37 | def check_auth(user, password):
38 |     if user not in auth_dict:
39 |         return False
40 |     else:   
41 |         return hashlib.sha256(bytes(password, encoding='utf-8')).hexdigest() == auth_dict[user]
42 | 


--------------------------------------------------------------------------------
/modules/constants.py:
--------------------------------------------------------------------------------
1 | # as in k-diffusion (sampling.py)
2 | MIN_SEED = 0
3 | MAX_SEED = 2**63 - 1
4 | 
5 | AUTH_FILENAME = 'auth.json'
6 | 


--------------------------------------------------------------------------------
/modules/flags.py:
--------------------------------------------------------------------------------
  1 | from enum import IntEnum, Enum
  2 | 
  3 | disabled = 'Disabled'
  4 | enabled = 'Enabled'
  5 | subtle_variation = 'Vary (Subtle)'
  6 | strong_variation = 'Vary (Strong)'
  7 | upscale_15 = 'Upscale (1.5x)'
  8 | upscale_2 = 'Upscale (2x)'
  9 | upscale_fast = 'Upscale (Fast 2x)'
 10 | 
 11 | uov_list = [
 12 |     disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast
 13 | ]
 14 | 
 15 | CIVITAI_NO_KARRAS = ["euler", "euler_ancestral", "heun", "dpm_fast", "dpm_adaptive", "ddim", "uni_pc"]
 16 | 
 17 | # fooocus: a1111 (Civitai)
 18 | KSAMPLER = {
 19 |     "euler": "Euler",
 20 |     "euler_ancestral": "Euler a",
 21 |     "heun": "Heun",
 22 |     "heunpp2": "",
 23 |     "dpm_2": "DPM2",
 24 |     "dpm_2_ancestral": "DPM2 a",
 25 |     "lms": "LMS",
 26 |     "dpm_fast": "DPM fast",
 27 |     "dpm_adaptive": "DPM adaptive",
 28 |     "dpmpp_2s_ancestral": "DPM++ 2S a",
 29 |     "dpmpp_sde": "DPM++ SDE",
 30 |     "dpmpp_sde_gpu": "DPM++ SDE",
 31 |     "dpmpp_2m": "DPM++ 2M",
 32 |     "dpmpp_2m_sde": "DPM++ 2M SDE",
 33 |     "dpmpp_2m_sde_gpu": "DPM++ 2M SDE",
 34 |     "dpmpp_3m_sde": "",
 35 |     "dpmpp_3m_sde_gpu": "",
 36 |     "ddpm": "",
 37 |     "lcm": "LCM"
 38 | }
 39 | 
 40 | SAMPLER_EXTRA = {
 41 |     "ddim": "DDIM",
 42 |     "uni_pc": "UniPC",
 43 |     "uni_pc_bh2": ""
 44 | }
 45 | 
 46 | SAMPLERS = KSAMPLER | SAMPLER_EXTRA
 47 | 
 48 | KSAMPLER_NAMES = list(KSAMPLER.keys())
 49 | 
 50 | SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform", "lcm", "turbo"]
 51 | SAMPLER_NAMES = KSAMPLER_NAMES + list(SAMPLER_EXTRA.keys())
 52 | 
 53 | sampler_list = SAMPLER_NAMES
 54 | scheduler_list = SCHEDULER_NAMES
 55 | 
 56 | refiner_swap_method = 'joint'
 57 | 
 58 | cn_ip = "ImagePrompt"
 59 | cn_ip_face = "FaceSwap"
 60 | cn_canny = "PyraCanny"
 61 | cn_cpds = "CPDS"
 62 | 
 63 | ip_list = [cn_ip, cn_canny, cn_cpds, cn_ip_face]
 64 | default_ip = cn_ip
 65 | 
 66 | default_parameters = {
 67 |     cn_ip: (0.5, 0.6), cn_ip_face: (0.9, 0.75), cn_canny: (0.5, 1.0), cn_cpds: (0.5, 1.0)
 68 | }  # stop, weight
 69 | 
 70 | output_formats = ['png', 'jpeg', 'webp']
 71 | 
 72 | inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6']
 73 | inpaint_option_default = 'Inpaint or Outpaint (default)'
 74 | inpaint_option_detail = 'Improve Detail (face, hand, eyes, etc.)'
 75 | inpaint_option_modify = 'Modify Content (add objects, change background, etc.)'
 76 | inpaint_options = [inpaint_option_default, inpaint_option_detail, inpaint_option_modify]
 77 | 
 78 | desc_type_photo = 'Photograph'
 79 | desc_type_anime = 'Art/Anime'
 80 | 
 81 | 
 82 | class MetadataScheme(Enum):
 83 |     FOOOCUS = 'fooocus'
 84 |     A1111 = 'a1111'
 85 | 
 86 | 
 87 | metadata_scheme = [
 88 |     (f'{MetadataScheme.FOOOCUS.value} (json)', MetadataScheme.FOOOCUS.value),
 89 |     (f'{MetadataScheme.A1111.value} (plain text)', MetadataScheme.A1111.value),
 90 | ]
 91 | 
 92 | controlnet_image_count = 4
 93 | 
 94 | 
 95 | class OutputFormat(Enum):
 96 |     PNG = 'png'
 97 |     JPEG = 'jpeg'
 98 |     WEBP = 'webp'
 99 | 
100 |     @classmethod
101 |     def list(cls) -> list:
102 |         return list(map(lambda c: c.value, cls))
103 | 
104 | 
105 | class Steps(IntEnum):
106 |     QUALITY = 60
107 |     SPEED = 30
108 |     EXTREME_SPEED = 8
109 |     LIGHTNING = 4
110 | 
111 | 
112 | class StepsUOV(IntEnum):
113 |     QUALITY = 36
114 |     SPEED = 18
115 |     EXTREME_SPEED = 8
116 |     LIGHTNING = 4
117 | 
118 | 
119 | class Performance(Enum):
120 |     QUALITY = 'Quality'
121 |     SPEED = 'Speed'
122 |     EXTREME_SPEED = 'Extreme Speed'
123 |     LIGHTNING = 'Lightning'
124 | 
125 |     @classmethod
126 |     def list(cls) -> list:
127 |         return list(map(lambda c: c.value, cls))
128 | 
129 |     @classmethod
130 |     def has_restricted_features(cls, x) -> bool:
131 |         if isinstance(x, Performance):
132 |             x = x.value
133 |         return x in [cls.EXTREME_SPEED.value, cls.LIGHTNING.value]
134 | 
135 |     def steps(self) -> int | None:
136 |         return Steps[self.name].value if Steps[self.name] else None
137 | 
138 |     def steps_uov(self) -> int | None:
139 |         return StepsUOV[self.name].value if Steps[self.name] else None
140 | 


--------------------------------------------------------------------------------
/modules/html.py:
--------------------------------------------------------------------------------
 1 | progress_html = '''
 2 | <div class="loader-container">
 3 |   <div class="loader"></div>
 4 |   <div class="progress-container">
 5 |     <progress value="*number*" max="100"></progress>
 6 |   </div>
 7 |   <span>*text*</span>
 8 | </div>
 9 | '''
10 | 
11 | 
12 | def make_progress_html(number, text):
13 |     return progress_html.replace('*number*', str(number)).replace('*text*', text)
14 | 


--------------------------------------------------------------------------------
/modules/launch_util.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import importlib
  3 | import importlib.util
  4 | import shutil
  5 | import subprocess
  6 | import sys
  7 | import re
  8 | import logging
  9 | import importlib.metadata
 10 | import packaging.version
 11 | from packaging.requirements import Requirement
 12 | 
 13 | logging.getLogger("torch.distributed.nn").setLevel(logging.ERROR)  # sshh...
 14 | logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage())
 15 | 
 16 | re_requirement = re.compile(r"\s*([-\w]+)\s*(?:==\s*([-+.\w]+))?\s*")
 17 | 
 18 | python = sys.executable
 19 | default_command_live = (os.environ.get('LAUNCH_LIVE_OUTPUT') == "1")
 20 | index_url = os.environ.get('INDEX_URL', "")
 21 | 
 22 | modules_path = os.path.dirname(os.path.realpath(__file__))
 23 | script_path = os.path.dirname(modules_path)
 24 | 
 25 | 
 26 | def is_installed(package):
 27 |     try:
 28 |         spec = importlib.util.find_spec(package)
 29 |     except ModuleNotFoundError:
 30 |         return False
 31 | 
 32 |     return spec is not None
 33 | 
 34 | 
 35 | def run(command, desc=None, errdesc=None, custom_env=None, live: bool = default_command_live) -> str:
 36 |     if desc is not None:
 37 |         print(desc)
 38 | 
 39 |     run_kwargs = {
 40 |         "args": command,
 41 |         "shell": True,
 42 |         "env": os.environ if custom_env is None else custom_env,
 43 |         "encoding": 'utf8',
 44 |         "errors": 'ignore',
 45 |     }
 46 | 
 47 |     if not live:
 48 |         run_kwargs["stdout"] = run_kwargs["stderr"] = subprocess.PIPE
 49 | 
 50 |     result = subprocess.run(**run_kwargs)
 51 | 
 52 |     if result.returncode != 0:
 53 |         error_bits = [
 54 |             f"{errdesc or 'Error running command'}.",
 55 |             f"Command: {command}",
 56 |             f"Error code: {result.returncode}",
 57 |         ]
 58 |         if result.stdout:
 59 |             error_bits.append(f"stdout: {result.stdout}")
 60 |         if result.stderr:
 61 |             error_bits.append(f"stderr: {result.stderr}")
 62 |         raise RuntimeError("\n".join(error_bits))
 63 | 
 64 |     return (result.stdout or "")
 65 | 
 66 | 
 67 | def run_pip(command, desc=None, live=default_command_live):
 68 |     try:
 69 |         index_url_line = f' --index-url {index_url}' if index_url != '' else ''
 70 |         return run(f'"{python}" -m pip {command} --prefer-binary{index_url_line}', desc=f"Installing {desc}",
 71 |                    errdesc=f"Couldn't install {desc}", live=live)
 72 |     except Exception as e:
 73 |         print(e)
 74 |         print(f'CMD Failed {desc}: {command}')
 75 |         return None
 76 | 
 77 | 
 78 | def requirements_met(requirements_file):
 79 |     with open(requirements_file, "r", encoding="utf8") as file:
 80 |         for line in file:
 81 |             line = line.strip()
 82 |             if line == "" or line.startswith('#'):
 83 |                 continue
 84 | 
 85 |             requirement = Requirement(line)
 86 |             package = requirement.name
 87 | 
 88 |             try:
 89 |                 version_installed = importlib.metadata.version(package)
 90 |                 installed_version = packaging.version.parse(version_installed)
 91 | 
 92 |                 # Check if the installed version satisfies the requirement
 93 |                 if installed_version not in requirement.specifier:
 94 |                     print(f"Version mismatch for {package}: Installed version {version_installed} does not meet requirement {requirement}")
 95 |                     return False
 96 |             except Exception as e:
 97 |                 print(f"Error checking version for {package}: {e}")
 98 |                 return False
 99 | 
100 |     return True
101 | 
102 | 
103 | def delete_folder_content(folder, prefix=None):
104 |     result = True
105 | 
106 |     for filename in os.listdir(folder):
107 |         file_path = os.path.join(folder, filename)
108 |         try:
109 |             if os.path.isfile(file_path) or os.path.islink(file_path):
110 |                 os.unlink(file_path)
111 |             elif os.path.isdir(file_path):
112 |                 shutil.rmtree(file_path)
113 |         except Exception as e:
114 |             print(f'{prefix}Failed to delete {file_path}. Reason: {e}')
115 |             result = False
116 | 
117 |     return result


--------------------------------------------------------------------------------
/modules/localization.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | 
 5 | current_translation = {}
 6 | localization_root = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'language')
 7 | 
 8 | 
 9 | def localization_js(filename):
10 |     global current_translation
11 | 
12 |     if isinstance(filename, str):
13 |         full_name = os.path.abspath(os.path.join(localization_root, filename + '.json'))
14 |         if os.path.exists(full_name):
15 |             try:
16 |                 with open(full_name, encoding='utf-8') as f:
17 |                     current_translation = json.load(f)
18 |                     assert isinstance(current_translation, dict)
19 |                     for k, v in current_translation.items():
20 |                         assert isinstance(k, str)
21 |                         assert isinstance(v, str)
22 |             except Exception as e:
23 |                 print(str(e))
24 |                 print(f'Failed to load localization file {full_name}')
25 | 
26 |     # current_translation = {k: 'XXX' for k in current_translation.keys()}  # use this to see if all texts are covered
27 | 
28 |     return f"window.localization = {json.dumps(current_translation)}"
29 | 
30 | 
31 | def dump_english_config(components):
32 |     all_texts = []
33 |     for c in components:
34 |         label = getattr(c, 'label', None)
35 |         value = getattr(c, 'value', None)
36 |         choices = getattr(c, 'choices', None)
37 |         info = getattr(c, 'info', None)
38 | 
39 |         if isinstance(label, str):
40 |             all_texts.append(label)
41 |         if isinstance(value, str):
42 |             all_texts.append(value)
43 |         if isinstance(info, str):
44 |             all_texts.append(info)
45 |         if isinstance(choices, list):
46 |             for x in choices:
47 |                 if isinstance(x, str):
48 |                     all_texts.append(x)
49 |                 if isinstance(x, tuple):
50 |                     for y in x:
51 |                         if isinstance(y, str):
52 |                             all_texts.append(y)
53 | 
54 |     config_dict = {k: k for k in all_texts if k != "" and 'progress-container' not in k}
55 |     full_name = os.path.abspath(os.path.join(localization_root, 'en.json'))
56 | 
57 |     with open(full_name, "w", encoding="utf-8") as json_file:
58 |         json.dump(config_dict, json_file, indent=4)
59 | 
60 |     return
61 | 


--------------------------------------------------------------------------------
/modules/model_loader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from urllib.parse import urlparse
 3 | from typing import Optional
 4 | 
 5 | 
 6 | def load_file_from_url(
 7 |         url: str,
 8 |         *,
 9 |         model_dir: str,
10 |         progress: bool = True,
11 |         file_name: Optional[str] = None,
12 | ) -> str:
13 |     """Download a file from `url` into `model_dir`, using the file present if possible.
14 | 
15 |     Returns the path to the downloaded file.
16 |     """
17 |     os.makedirs(model_dir, exist_ok=True)
18 |     if not file_name:
19 |         parts = urlparse(url)
20 |         file_name = os.path.basename(parts.path)
21 |     cached_file = os.path.abspath(os.path.join(model_dir, file_name))
22 |     if not os.path.exists(cached_file):
23 |         print(f'Downloading: "{url}" to {cached_file}\n')
24 |         from torch.hub import download_url_to_file
25 |         download_url_to_file(url, cached_file, progress=progress)
26 |     return cached_file
27 | 


--------------------------------------------------------------------------------
/modules/ops.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import contextlib
 3 | 
 4 | 
 5 | @contextlib.contextmanager
 6 | def use_patched_ops(operations):
 7 |     op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm']
 8 |     backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names}
 9 | 
10 |     try:
11 |         for op_name in op_names:
12 |             setattr(torch.nn, op_name, getattr(operations, op_name))
13 | 
14 |         yield
15 | 
16 |     finally:
17 |         for op_name in op_names:
18 |             setattr(torch.nn, op_name, backups[op_name])
19 |     return
20 | 


--------------------------------------------------------------------------------
/modules/patch_precision.py:
--------------------------------------------------------------------------------
 1 | # Consistent with Kohya to reduce differences between model training and inference.
 2 | 
 3 | import torch
 4 | import math
 5 | import einops
 6 | import numpy as np
 7 | 
 8 | import ldm_patched.ldm.modules.diffusionmodules.openaimodel
 9 | import ldm_patched.modules.model_sampling
10 | import ldm_patched.modules.sd1_clip
11 | 
12 | from ldm_patched.ldm.modules.diffusionmodules.util import make_beta_schedule
13 | 
14 | 
15 | def patched_timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False):
16 |     # Consistent with Kohya to reduce differences between model training and inference.
17 | 
18 |     if not repeat_only:
19 |         half = dim // 2
20 |         freqs = torch.exp(
21 |             -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half
22 |         ).to(device=timesteps.device)
23 |         args = timesteps[:, None].float() * freqs[None]
24 |         embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
25 |         if dim % 2:
26 |             embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
27 |     else:
28 |         embedding = einops.repeat(timesteps, 'b -> b d', d=dim)
29 |     return embedding
30 | 
31 | 
32 | def patched_register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,
33 |                           linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
34 |     # Consistent with Kohya to reduce differences between model training and inference.
35 | 
36 |     if given_betas is not None:
37 |         betas = given_betas
38 |     else:
39 |         betas = make_beta_schedule(
40 |             beta_schedule,
41 |             timesteps,
42 |             linear_start=linear_start,
43 |             linear_end=linear_end,
44 |             cosine_s=cosine_s)
45 | 
46 |     alphas = 1. - betas
47 |     alphas_cumprod = np.cumprod(alphas, axis=0)
48 |     timesteps, = betas.shape
49 |     self.num_timesteps = int(timesteps)
50 |     self.linear_start = linear_start
51 |     self.linear_end = linear_end
52 |     sigmas = torch.tensor(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5, dtype=torch.float32)
53 |     self.set_sigmas(sigmas)
54 |     return
55 | 
56 | 
57 | def patch_all_precision():
58 |     ldm_patched.ldm.modules.diffusionmodules.openaimodel.timestep_embedding = patched_timestep_embedding
59 |     ldm_patched.modules.model_sampling.ModelSamplingDiscrete._register_schedule = patched_register_schedule
60 |     return
61 | 


--------------------------------------------------------------------------------
/modules/sdxl_styles.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import json
  4 | import math
  5 | # import modules.config
  6 | #TODO 先不用wildcard通配符查询
  7 | 
  8 | from modules.util import get_files_from_folder
  9 | 
 10 | # cannot use modules.config - validators causing circular imports
 11 | styles_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../sdxl_styles/'))
 12 | wildcards_max_bfs_depth = 64
 13 | 
 14 | 
 15 | def normalize_key(k):
 16 |     k = k.replace('-', ' ')
 17 |     words = k.split(' ')
 18 |     words = [w[:1].upper() + w[1:].lower() for w in words]
 19 |     k = ' '.join(words)
 20 |     k = k.replace('3d', '3D')
 21 |     k = k.replace('Sai', 'SAI')
 22 |     k = k.replace('Mre', 'MRE')
 23 |     k = k.replace('(s', '(S')
 24 |     return k
 25 | 
 26 | 
 27 | styles = {}
 28 | 
 29 | styles_files = get_files_from_folder(styles_path, ['.json'])
 30 | 
 31 | for x in ['sdxl_styles_fooocus.json',
 32 |           'sdxl_styles_sai.json',
 33 |           'sdxl_styles_mre.json',
 34 |           'sdxl_styles_twri.json',
 35 |           'sdxl_styles_diva.json',
 36 |           'sdxl_styles_marc_k3nt3l.json']:
 37 |     if x in styles_files:
 38 |         styles_files.remove(x)
 39 |         styles_files.append(x)
 40 | 
 41 | for styles_file in styles_files:
 42 |     try:
 43 |         with open(os.path.join(styles_path, styles_file), encoding='utf-8') as f:
 44 |             for entry in json.load(f):
 45 |                 name = normalize_key(entry['name'])
 46 |                 prompt = entry['prompt'] if 'prompt' in entry else ''
 47 |                 negative_prompt = entry['negative_prompt'] if 'negative_prompt' in entry else ''
 48 |                 styles[name] = (prompt, negative_prompt)
 49 |     except Exception as e:
 50 |         print(str(e))
 51 |         print(f'Failed to load style file {styles_file}')
 52 | 
 53 | style_keys = list(styles.keys())
 54 | fooocus_expansion = "Fooocus V2"
 55 | legal_style_names = [fooocus_expansion] + style_keys
 56 | 
 57 | 
 58 | def apply_style(style, positive):
 59 |     p, n = styles[style]
 60 |     return p.replace('{prompt}', positive).splitlines(), n.splitlines()
 61 | 
 62 | 
 63 | def apply_wildcards(wildcard_text, rng, i, read_wildcards_in_order):
 64 |     for _ in range(wildcards_max_bfs_depth):
 65 |         placeholders = re.findall(r'__([\w-]+)__', wildcard_text)
 66 |         if len(placeholders) == 0:
 67 |             return wildcard_text
 68 | 
 69 |         print(f'[Wildcards] processing: {wildcard_text}')
 70 |         for placeholder in placeholders:
 71 |             try:
 72 |                 matches = [x for x in modules.config.wildcard_filenames if os.path.splitext(os.path.basename(x))[0] == placeholder]
 73 |                 words = open(os.path.join(modules.config.path_wildcards, matches[0]), encoding='utf-8').read().splitlines()
 74 |                 words = [x for x in words if x != '']
 75 |                 assert len(words) > 0
 76 |                 if read_wildcards_in_order:
 77 |                     wildcard_text = wildcard_text.replace(f'__{placeholder}__', words[i % len(words)], 1)
 78 |                 else:
 79 |                     wildcard_text = wildcard_text.replace(f'__{placeholder}__', rng.choice(words), 1)
 80 |             except:
 81 |                 print(f'[Wildcards] Warning: {placeholder}.txt missing or empty. '
 82 |                       f'Using "{placeholder}" as a normal word.')
 83 |                 wildcard_text = wildcard_text.replace(f'__{placeholder}__', placeholder)
 84 |             print(f'[Wildcards] {wildcard_text}')
 85 | 
 86 |     print(f'[Wildcards] BFS stack overflow. Current text: {wildcard_text}')
 87 |     return wildcard_text
 88 | 
 89 | 
 90 | def get_words(arrays, totalMult, index):
 91 |     if len(arrays) == 1:
 92 |         return [arrays[0].split(',')[index]]
 93 |     else:
 94 |         words = arrays[0].split(',')
 95 |         word = words[index % len(words)]
 96 |         index -= index % len(words)
 97 |         index /= len(words)
 98 |         index = math.floor(index)
 99 |         return [word] + get_words(arrays[1:], math.floor(totalMult/len(words)), index)
100 | 
101 | 
102 | def apply_arrays(text, index):
103 |     arrays = re.findall(r'\[\[(.*?)\]\]', text)
104 |     if len(arrays) == 0:
105 |         return text
106 | 
107 |     print(f'[Arrays] processing: {text}')
108 |     mult = 1
109 |     for arr in arrays:
110 |         words = arr.split(',')
111 |         mult *= len(words)
112 |     
113 |     index %= mult
114 |     chosen_words = get_words(arrays, mult, index)
115 |     
116 |     i = 0
117 |     for arr in arrays:
118 |         text = text.replace(f'[[{arr}]]', chosen_words[i], 1)   
119 |         i = i+1
120 |     
121 |     return text
122 | 
123 | 


--------------------------------------------------------------------------------
/modules/style_sorter.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import gradio as gr
 3 | import modules.localization as localization
 4 | import json
 5 | 
 6 | 
 7 | all_styles = []
 8 | 
 9 | 
10 | def try_load_sorted_styles(style_names, default_selected):
11 |     global all_styles
12 | 
13 |     all_styles = style_names
14 | 
15 |     try:
16 |         if os.path.exists('sorted_styles.json'):
17 |             with open('sorted_styles.json', 'rt', encoding='utf-8') as fp:
18 |                 sorted_styles = []
19 |                 for x in json.load(fp):
20 |                     if x in all_styles:
21 |                         sorted_styles.append(x)
22 |                 for x in all_styles:
23 |                     if x not in sorted_styles:
24 |                         sorted_styles.append(x)
25 |                 all_styles = sorted_styles
26 |     except Exception as e:
27 |         print('Load style sorting failed.')
28 |         print(e)
29 | 
30 |     unselected = [y for y in all_styles if y not in default_selected]
31 |     all_styles = default_selected + unselected
32 | 
33 |     return
34 | 
35 | 
36 | def sort_styles(selected):
37 |     global all_styles
38 |     unselected = [y for y in all_styles if y not in selected]
39 |     sorted_styles = selected + unselected
40 |     try:
41 |         with open('sorted_styles.json', 'wt', encoding='utf-8') as fp:
42 |             json.dump(sorted_styles, fp, indent=4)
43 |     except Exception as e:
44 |         print('Write style sorting failed.')
45 |         print(e)
46 |     all_styles = sorted_styles
47 |     return gr.CheckboxGroup.update(choices=sorted_styles)
48 | 
49 | 
50 | def localization_key(x):
51 |     return x + localization.current_translation.get(x, '')
52 | 
53 | 
54 | def search_styles(selected, query):
55 |     unselected = [y for y in all_styles if y not in selected]
56 |     matched = [y for y in unselected if query.lower() in localization_key(y).lower()] if len(query.replace(' ', '')) > 0 else []
57 |     unmatched = [y for y in unselected if y not in matched]
58 |     sorted_styles = matched + selected + unmatched
59 |     return gr.CheckboxGroup.update(choices=sorted_styles)
60 | 


--------------------------------------------------------------------------------
/modules/ui_gradio_extensions.py:
--------------------------------------------------------------------------------
 1 | # based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/modules/ui_gradio_extensions.py
 2 | 
 3 | import os
 4 | import gradio as gr
 5 | import args_manager
 6 | 
 7 | from modules.localization import localization_js
 8 | 
 9 | 
10 | GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse
11 | 
12 | modules_path = os.path.dirname(os.path.realpath(__file__))
13 | script_path = os.path.dirname(modules_path)
14 | 
15 | 
16 | def webpath(fn):
17 |     if fn.startswith(script_path):
18 |         web_path = os.path.relpath(fn, script_path).replace('\\', '/')
19 |     else:
20 |         web_path = os.path.abspath(fn)
21 | 
22 |     return f'file={web_path}?{os.path.getmtime(fn)}'
23 | 
24 | 
25 | def javascript_html():
26 |     script_js_path = webpath('javascript/script.js')
27 |     context_menus_js_path = webpath('javascript/contextMenus.js')
28 |     localization_js_path = webpath('javascript/localization.js')
29 |     zoom_js_path = webpath('javascript/zoom.js')
30 |     edit_attention_js_path = webpath('javascript/edit-attention.js')
31 |     viewer_js_path = webpath('javascript/viewer.js')
32 |     image_viewer_js_path = webpath('javascript/imageviewer.js')
33 |     samples_path = webpath(os.path.abspath('./sdxl_styles/samples/fooocus_v2.jpg'))
34 |     head = f'<script type="text/javascript">{localization_js(args_manager.args.language)}</script>\n'
35 |     head += f'<script type="text/javascript" src="{script_js_path}"></script>\n'
36 |     head += f'<script type="text/javascript" src="{context_menus_js_path}"></script>\n'
37 |     head += f'<script type="text/javascript" src="{localization_js_path}"></script>\n'
38 |     head += f'<script type="text/javascript" src="{zoom_js_path}"></script>\n'
39 |     head += f'<script type="text/javascript" src="{edit_attention_js_path}"></script>\n'
40 |     head += f'<script type="text/javascript" src="{viewer_js_path}"></script>\n'
41 |     head += f'<script type="text/javascript" src="{image_viewer_js_path}"></script>\n'
42 |     head += f'<meta name="samples-path" content="{samples_path}"></meta>\n'
43 | 
44 |     if args_manager.args.theme:
45 |         head += f'<script type="text/javascript">set_theme(\"{args_manager.args.theme}\");</script>\n'
46 | 
47 |     return head
48 | 
49 | 
50 | def css_html():
51 |     style_css_path = webpath('css/style.css')
52 |     head = f'<link rel="stylesheet" property="stylesheet" href="{style_css_path}">'
53 |     return head
54 | 
55 | 
56 | def reload_javascript():
57 |     js = javascript_html()
58 |     css = css_html()
59 | 
60 |     def template_response(*args, **kwargs):
61 |         res = GradioTemplateResponseOriginal(*args, **kwargs)
62 |         res.body = res.body.replace(b'</head>', f'{js}</head>'.encode("utf8"))
63 |         res.body = res.body.replace(b'</body>', f'{css}</body>'.encode("utf8"))
64 |         res.init_headers()
65 |         return res
66 | 
67 |     gr.routes.templates.TemplateResponse = template_response
68 | 


--------------------------------------------------------------------------------
/modules/upscaler.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import modules.core as core
 4 | 
 5 | from ldm_patched.pfn.architecture.RRDB import RRDBNet as ESRGAN
 6 | from ldm_patched.contrib.external_upscale_model import ImageUpscaleWithModel
 7 | from collections import OrderedDict
 8 | from modules.config import path_upscale_models
 9 | 
10 | model_filename = os.path.join(path_upscale_models, 'fooocus_upscaler_s409985e5.bin')
11 | opImageUpscaleWithModel = ImageUpscaleWithModel()
12 | model = None
13 | 
14 | 
15 | def perform_upscale(img):
16 |     global model
17 | 
18 |     print(f'Upscaling image with shape {str(img.shape)} ...')
19 | 
20 |     if model is None:
21 |         sd = torch.load(model_filename)
22 |         sdo = OrderedDict()
23 |         for k, v in sd.items():
24 |             sdo[k.replace('residual_block_', 'RDB')] = v
25 |         del sd
26 |         model = ESRGAN(sdo)
27 |         model.cpu()
28 |         model.eval()
29 | 
30 |     img = core.numpy_to_pytorch(img)
31 |     img = opImageUpscaleWithModel.upscale(model, img)[0]
32 |     img = core.pytorch_to_numpy(img)[0]
33 | 
34 |     return img
35 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | # python=3.10.14
 3 | diffusers==0.30.2
 4 | transformers==4.46.0
 5 | accelerate==0.29.1
 6 | tqdm==4.66.5
 7 | matplotlib==3.8.4
 8 | torch==2.2.2    
 9 | torchvision==0.17.2
10 | xformers==0.0.25.post1
11 | huggingface_hub==0.23.5
12 | peft==0.10.0
13 | bezier
14 | notebook==7.1.2
15 | opencv-python==4.9.0.80
16 | numpy==1.25.1
17 | scipy==1.13.0
18 | torchsde==0.2.6
19 | einops==0.7.0
20 | 


--------------------------------------------------------------------------------
/sdxl_styles/sdxl_styles_fooocus.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "name": "Fooocus Enhance",
 4 |         "negative_prompt": "(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, grayscale, bw, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (airbrushed, cartoon, anime, semi-realistic, cgi, render, blender, digital art, manga, amateur:1.3), (3D ,3D Game, 3D Game Scene, 3D Character:1.1), (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)"
 5 |     },
 6 |     {
 7 |         "name": "Fooocus Semi Realistic",
 8 |         "negative_prompt": "(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)"
 9 |     },
10 |     {
11 |         "name": "Fooocus Sharp",
12 |         "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, 4k epic detailed, shot on kodak, 35mm photo, sharp focus, high budget, cinemascope, moody, epic, gorgeous, film grain, grainy",
13 |         "negative_prompt": "anime, cartoon, graphic, (blur, blurry, bokeh), text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
14 |     },
15 |     {
16 |         "name": "Fooocus Masterpiece",
17 |         "prompt": "(masterpiece), (best quality), (ultra-detailed), {prompt}, illustration, disheveled hair, detailed eyes, perfect composition, moist skin, intricate details, earrings, by wlop",
18 |         "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, pubic hair,extra digit, fewer digits, cropped, worst quality, low quality"
19 |     },
20 |     {
21 |         "name": "Fooocus Photograph",
22 |         "prompt": "photograph {prompt}, 50mm . cinematic 4k epic detailed 4k epic detailed photograph shot on kodak detailed cinematic hbo dark moody, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage",
23 |         "negative_prompt": "Brad Pitt, bokeh, depth of field, blurry, cropped, regular face, saturated, contrast, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, text, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
24 |     },
25 |     {
26 |         "name": "Fooocus Negative",
27 |         "negative_prompt": "deformed, bad anatomy, disfigured, poorly drawn face, mutated, extra limb, ugly, poorly drawn hands, missing limb, floating limbs, disconnected limbs, disconnected head, malformed hands, long neck, mutated hands and fingers, bad hands, missing fingers, cropped, worst quality, low quality, mutation, poorly drawn, huge calf, bad hands, fused hand, missing hand, disappearing arms, disappearing thigh, disappearing calf, disappearing legs, missing fingers, fused fingers, abnormal eye proportion, Abnormal hands, abnormal legs, abnormal feet, abnormal fingers, drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly, anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch"
28 |     },
29 |     {
30 |         "name": "Fooocus Cinematic",
31 |         "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
32 |         "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
33 |     }
34 | ]
35 | 


--------------------------------------------------------------------------------
/sdxl_styles/sdxl_styles_sai.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "name": "sai-3d-model",
 4 |         "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
 5 |         "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting"
 6 |     },
 7 |     {
 8 |         "name": "sai-analog film",
 9 |         "prompt": "analog film photo {prompt} . faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage",
10 |         "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured"
11 |     },
12 |     {
13 |         "name": "sai-anime",
14 |         "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed",
15 |         "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast"
16 |     },
17 |     {
18 |         "name": "sai-cinematic",
19 |         "prompt": "cinematic film still {prompt} . shallow depth of field, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
20 |         "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
21 |     },
22 |     {
23 |         "name": "sai-comic book",
24 |         "prompt": "comic {prompt} . graphic illustration, comic art, graphic novel art, vibrant, highly detailed",
25 |         "negative_prompt": "photograph, deformed, glitch, noisy, realistic, stock photo"
26 |     },
27 |     {
28 |         "name": "sai-craft clay",
29 |         "prompt": "play-doh style {prompt} . sculpture, clay art, centered composition, Claymation",
30 |         "negative_prompt": "sloppy, messy, grainy, highly detailed, ultra textured, photo"
31 |     },
32 |     {
33 |         "name": "sai-digital art",
34 |         "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
35 |         "negative_prompt": "photo, photorealistic, realism, ugly"
36 |     },
37 |     {
38 |         "name": "sai-enhance",
39 |         "prompt": "breathtaking {prompt} . award-winning, professional, highly detailed",
40 |         "negative_prompt": "ugly, deformed, noisy, blurry, distorted, grainy"
41 |     },
42 |     {
43 |         "name": "sai-fantasy art",
44 |         "prompt": "ethereal fantasy concept art of  {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
45 |         "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white"
46 |     },
47 |     {
48 |         "name": "sai-isometric",
49 |         "prompt": "isometric style {prompt} . vibrant, beautiful, crisp, detailed, ultra detailed, intricate",
50 |         "negative_prompt": "deformed, mutated, ugly, disfigured, blur, blurry, noise, noisy, realistic, photographic"
51 |     },
52 |     {
53 |         "name": "sai-line art",
54 |         "prompt": "line art drawing {prompt} . professional, sleek, modern, minimalist, graphic, line art, vector graphics",
55 |         "negative_prompt": "anime, photorealistic, 35mm film, deformed, glitch, blurry, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, mutated, realism, realistic, impressionism, expressionism, oil, acrylic"
56 |     },
57 |     {
58 |         "name": "sai-lowpoly",
59 |         "prompt": "low-poly style {prompt} . low-poly game art, polygon mesh, jagged, blocky, wireframe edges, centered composition",
60 |         "negative_prompt": "noisy, sloppy, messy, grainy, highly detailed, ultra textured, photo"
61 |     },
62 |     {
63 |         "name": "sai-neonpunk",
64 |         "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
65 |         "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured"
66 |     },
67 |     {
68 |         "name": "sai-origami",
69 |         "prompt": "origami style {prompt} . paper art, pleated paper, folded, origami art, pleats, cut and fold, centered composition",
70 |         "negative_prompt": "noisy, sloppy, messy, grainy, highly detailed, ultra textured, photo"
71 |     },
72 |     {
73 |         "name": "sai-photographic",
74 |         "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
75 |         "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly"
76 |     },
77 |     {
78 |         "name": "sai-pixel art",
79 |         "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
80 |         "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic"
81 |     },
82 |     {
83 |         "name": "sai-texture",
84 |         "prompt": "texture {prompt} top down close-up",
85 |         "negative_prompt": "ugly, deformed, noisy, blurry"
86 |     }
87 | ]


--------------------------------------------------------------------------------
/utils/FooocusDpmpp2mSdeGpuKarras.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from ldm_patched.k_diffusion.sampling import BrownianTreeNoiseSampler
  3 | from ldm_patched.modules.model_sampling import EPS, ModelSamplingDiscrete
  4 | 
  5 | 
  6 | class ModelSampling(EPS, ModelSamplingDiscrete):
  7 |     pass
  8 | 
  9 | def append_zero(x):
 10 |     return torch.cat([x, x.new_zeros([1])])
 11 | 
 12 |     
 13 | def get_sigmas_karras(n, sigma_min, sigma_max, rho=7., device='cpu'):
 14 |     
 15 |     """Constructs the noise schedule of Karras et al. (2022)."""
 16 |     ramp = torch.linspace(0, 1, n, device=device)
 17 |     min_inv_rho = sigma_min ** (1 / rho)
 18 |     max_inv_rho = sigma_max ** (1 / rho)
 19 |     sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
 20 |     return append_zero(sigmas).to(device)
 21 | 
 22 | class KSampler:
 23 | 
 24 |     def __init__(self,
 25 |                  latent, 
 26 |                  steps,
 27 |                  device, 
 28 |                  sampler='dpmpp_2m_sde_gpu',
 29 |                  scheduler='karras',
 30 |                  denoise=1, 
 31 |                  model_options={},
 32 |                  start_step=0, 
 33 |                  last_step=30,
 34 |                  force_full_denoise=False, 
 35 |                  seed = None):
 36 |         self.device = device
 37 |         self.scheduler = scheduler
 38 |         self.sampler = sampler
 39 |         self.set_steps(steps, denoise)
 40 |         self.denoise = denoise  # denoising_strength
 41 |         self.model_options = model_options
 42 |         
 43 |         # step param
 44 |         self.old_denoised = None
 45 |         self.h_last = None
 46 |         
 47 |         self.model_sampling = ModelSampling()
 48 |         
 49 | 
 50 |         sigmas = self.sigmas
 51 | 
 52 |         if last_step is not None and last_step < (len(sigmas) - 1):
 53 |             sigmas = sigmas[:last_step + 1]
 54 |             if force_full_denoise:
 55 |                 sigmas[-1] = 0
 56 | 
 57 |         if start_step is not None:
 58 |             assert start_step < (len(sigmas) - 1)
 59 |             sigmas = sigmas[start_step:]
 60 |         
 61 |             # if start_step < (len(sigmas) - 1):
 62 |             #     sigmas = sigmas[start_step:]
 63 |             # else:
 64 |             #     if latent_image is not None:
 65 |             #         return latent_image
 66 |             #     else:
 67 |             #         return torch.zeros_like(noise)
 68 |         sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
 69 |         self.noise_sampler = BrownianTreeNoiseSampler(latent, sigma_min, sigma_max, seed=seed)
 70 |         self.sigmas = sigmas
 71 |         self.log_sigmas = sigmas.log()
 72 | 
 73 |     def calculate_sigmas(self, steps):
 74 |         sigmas = None
 75 | 
 76 |         discard_penultimate_sigma = False
 77 |         if self.sampler in ['dpm_2', 'dpm_2_ancestral', 'uni_pc', 'uni_pc_bh2']:
 78 |             steps += 1
 79 |             discard_penultimate_sigma = True
 80 |             
 81 |         sigmas = get_sigmas_karras(n=steps, sigma_min=0.0292, sigma_max=14.6146)
 82 |         # sigmas = get_sigmas_karras(n=steps, sigma_min=0.0291675, sigma_max=14.614642)
 83 | 
 84 |         if discard_penultimate_sigma:
 85 |             sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
 86 |         return sigmas
 87 |     
 88 |     def set_steps(self, steps, denoise=None):
 89 |         self.steps = steps
 90 |         if denoise is None or denoise > 0.9999:
 91 |             self.sigmas = self.calculate_sigmas(steps).to(self.device)
 92 |         else:
 93 |             new_steps = int(steps/denoise)
 94 |             sigmas = self.calculate_sigmas(new_steps).to(self.device)
 95 |             self.sigmas = sigmas[-(steps + 1):]
 96 | 
 97 |     @torch.no_grad()
 98 |     def step(self, i, pred_x0, x, t=None, eta=1., s_noise=1., solver_type='midpoint'):
 99 |         """DPM-Solver++(2M) SDE."""
100 | 
101 |         if solver_type not in {'heun', 'midpoint'}:
102 |             raise ValueError('solver_type must be \'heun\' or \'midpoint\'')
103 |         sigmas = self.sigmas
104 | 
105 |         denoised = pred_x0
106 |         if sigmas[i + 1] == 0:
107 |             x = denoised
108 |         else:
109 |             # DPM-Solver++(2M) SDE
110 |             t, s = -sigmas[i].log(), -sigmas[i + 1].log()
111 |             h = s - t
112 |             eta_h = eta * h
113 | 
114 |             x = sigmas[i + 1] / sigmas[i] * (-eta_h).exp() * x + (-h - eta_h).expm1().neg() * denoised
115 | 
116 |             if self.old_denoised is not None:
117 |                 r = self.h_last / h
118 |                 if solver_type == 'heun':
119 |                     x = x + ((-h - eta_h).expm1().neg() / (-h - eta_h) + 1) * (1 / r) * (denoised - self.old_denoised)
120 |                 elif solver_type == 'midpoint':
121 |                     x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - self.old_denoised)
122 | 
123 |             if eta:
124 |                 x = x + self.noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise
125 | 
126 |             self.old_denoised = denoised
127 |             self.h_last = h
128 |         return x
129 | 
130 |     def timestep(self, i):
131 |         sigma = self.sigmas[i]
132 |         t = self.model_sampling.timestep(sigma).float()
133 |         return t
134 | 
135 |     def calculate_input(self, i, x):
136 |         sigma = self.sigmas[i]
137 |         return self.model_sampling.calculate_input(sigma, x)
138 | 
139 |     def calculate_denoised(self, i, model_output, model_input):
140 |         sigma = self.sigmas[i]
141 |         return self.model_sampling.calculate_denoised(sigma, model_output, model_input)
142 | 
143 | 
144 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .add_fooocus_inpaint_patch import add_fooocus_inpaint_patch
 2 | from .add_fooocus_inpaint_head_patch import (
 3 |     add_fooocus_inpaint_head_patch_with_work,
 4 |     inject_fooocus_inpaint_head,
 5 | )
 6 | from .prompt_style_enhance import enhance_prompt
 7 | from .FooocusDpmpp2mSdeGpuKarras import KSampler
 8 | from .mask_aug import extend_mask_with_bezier, mask_paint2bbox
 9 | from .orthogonal_decomposition import sks_decompose, orthogonal_decomposition
10 | 


--------------------------------------------------------------------------------
/utils/mask_aug.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import bezier
 3 | import random
 4 | import numpy as np
 5 | 
 6 | 
 7 | def extend_mask_with_bezier(mask, extend_ratio=0.2, random_width=5):
 8 | 
 9 |     H, W = mask.shape
10 | 
11 |     contours, _ = cv2.findContours(
12 |         mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
13 |     )
14 | 
15 |     extended_mask = np.zeros((H, W), dtype=np.uint8)
16 | 
17 |     for contour in contours:
18 |         bbox = cv2.boundingRect(contour)
19 |         x, y, w, h = bbox
20 | 
21 |         extended_bbox = [
22 |             x - int(extend_ratio * w),
23 |             y - int(extend_ratio * h),
24 |             x + w + int(extend_ratio * w),
25 |             y + h + int(extend_ratio * h),
26 |         ]
27 | 
28 |         extended_bbox[0] = max(0, extended_bbox[0])
29 |         extended_bbox[1] = max(0, extended_bbox[1])
30 |         extended_bbox[2] = min(W, extended_bbox[2])
31 |         extended_bbox[3] = min(H, extended_bbox[3])
32 | 
33 |         top_nodes = np.asfortranarray(
34 |             [[x, (x + x + w) // 2, x + w], [y, extended_bbox[1], y]]
35 |         )
36 |         down_nodes = np.asfortranarray(
37 |             [[x + w, (x + x + w) // 2, x], [y + h, extended_bbox[3], y + h]]
38 |         )
39 |         left_nodes = np.asfortranarray(
40 |             [[x, extended_bbox[0], x], [y + h, (y + y + h) // 2, y]]
41 |         )
42 |         right_nodes = np.asfortranarray(
43 |             [[x + w, extended_bbox[2], x + w], [y, (y + y + h) // 2, y + h]]
44 |         )
45 | 
46 |         top_curve = bezier.Curve(top_nodes, degree=2)
47 |         right_curve = bezier.Curve(right_nodes, degree=2)
48 |         down_curve = bezier.Curve(down_nodes, degree=2)
49 |         left_curve = bezier.Curve(left_nodes, degree=2)
50 | 
51 |         pt_list = []
52 |         for curve in [top_curve, right_curve, down_curve, left_curve]:
53 |             for i in range(1, 20):
54 |                 pt = curve.evaluate(i * 0.05)
55 |                 pt_list.append(
56 |                     (
57 |                         int(pt[0, 0] + random.randint(-random_width, random_width)),
58 |                         int(pt[1, 0] + random.randint(-random_width, random_width)),
59 |                     )
60 |                 )
61 |         cv2.fillPoly(extended_mask, [np.array(pt_list)], 1)
62 | 
63 |     return extended_mask * 255
64 | 
65 | 
66 | def mask_paint2bbox(mask, random_drop=0.0):
67 |     contours, _ = cv2.findContours(
68 |         mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
69 |     )
70 |     x, y, w, h = cv2.boundingRect(contours[0])
71 |     new_mask = np.zeros_like(mask)
72 |     # if random_drop > 0 and random.random() < random_drop:
73 |     #     w = w * (random.random() + 0.5)
74 |     #     h = h * (random.random() + 0.5)
75 |         
76 |     cv2.rectangle(new_mask, (x, y), (x + w, y + h), (255, 255, 255), -1)
77 |     return new_mask
78 | 


--------------------------------------------------------------------------------
/utils/orthogonal_decomposition.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import torch
 3 | 
 4 | def normalize_spaces(text: str) -> str:
 5 |     return re.sub(r"\s+", " ", text)
 6 | 
 7 | 
 8 | def orthogonal_decomposition(raw_emb: torch.Tensor, remove_emb: torch.Tensor) -> torch.Tensor:
 9 |     projected_vector_magnitude = raw_emb.dot(remove_emb) / remove_emb.norm()
10 |     projected_vector = projected_vector_magnitude * remove_emb / remove_emb.norm()
11 |     return raw_emb - projected_vector
12 | 
13 | 
14 | def sks_decompose(
15 |     prompt: str,
16 |     prompt_emb: torch.Tensor,
17 |     to_decopose_embeds: torch.Tensor,
18 |     decompose_words_num: int,
19 |     prefix_prompt:str = "",
20 | ) -> torch.Tensor:
21 | 
22 |     prompt = normalize_spaces(prompt.lower().strip())
23 |     prompt_words = prompt.split(" ")
24 | 
25 |     prefix_prompt = normalize_spaces(prefix_prompt.lower().strip())
26 | 
27 |     if prefix_prompt == "":
28 |         prefix_prompt_len = 0 + 1
29 |     else:
30 |         prefix_prompt_len = len(prefix_prompt.split(" ")) + 1
31 | 
32 |     # get index of "sks"
33 |     for i in range(len(prompt_words)):
34 |         if prompt_words[i] == "sks":
35 |             ind_sks = i + 1 
36 |             break
37 |     else:
38 |         raise ValueError(f"Prompt {prompt} does not contain 'sks'")
39 | 
40 |     # # get index of remove_words
41 |     inds_replace = []
42 |     # for word in remove_words:
43 |     #     word = word.lower()
44 |     #     for i in range(len(prompt_words)):
45 |     #         if prompt_words[i] == word:
46 |     #             inds_replace.append(i + 1)
47 |     #             break
48 | 
49 |     # for ind_replace in inds_replace:
50 |     #     prompt_emb[ind_sks, ...] = orthogonal_decomposition(
51 |     #         prompt_emb[ind_sks, ...], raw_prompt_embeds[ind_replace, ...]
52 |     #     )
53 | 
54 |     for ind_de in range(prefix_prompt_len, decompose_words_num + prefix_prompt_len):
55 |         # for i in range(decompose_words_num):
56 |         for ind in range(1, len(prompt_words) + 1):
57 |             prompt_emb[ind, ...] = orthogonal_decomposition(
58 |                 prompt_emb[ind, ...], to_decopose_embeds[ind_de, ...]
59 |             )
60 | 
61 |     return prompt_emb
62 | 


--------------------------------------------------------------------------------