├── FooocusSDXLInpaintAllInOnePipeline.py ├── assets └── teaser.png ├── data ├── 1_1.jpg ├── 1_2.jpg ├── 2_1.jpg ├── 2_2.jpg ├── 3_1.jpg ├── 3_2.jpg ├── 4_1.jpg └── 4_2.jpg ├── infer.ipynb ├── ldm_patched ├── contrib │ ├── external.py │ ├── external_canny.py │ ├── external_clip_sdxl.py │ ├── external_compositing.py │ ├── external_custom_sampler.py │ ├── external_freelunch.py │ ├── external_hypernetwork.py │ ├── external_hypertile.py │ ├── external_images.py │ ├── external_latent.py │ ├── external_mask.py │ ├── external_model_advanced.py │ ├── external_model_downscale.py │ ├── external_model_merging.py │ ├── external_perpneg.py │ ├── external_photomaker.py │ ├── external_post_processing.py │ ├── external_rebatch.py │ ├── external_sag.py │ ├── external_sdupscale.py │ ├── external_stable3d.py │ ├── external_tomesd.py │ ├── external_upscale_model.py │ └── external_video_model.py ├── controlnet │ └── cldm.py ├── k_diffusion │ ├── sampling.py │ └── utils.py ├── ldm │ ├── models │ │ ├── __pycache__ │ │ │ └── autoencoder.cpython-310.pyc │ │ └── autoencoder.py │ ├── modules │ │ ├── __pycache__ │ │ │ ├── attention.cpython-310.pyc │ │ │ ├── ema.cpython-310.pyc │ │ │ └── sub_quadratic_attention.cpython-310.pyc │ │ ├── attention.py │ │ ├── diffusionmodules │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── model.cpython-310.pyc │ │ │ │ ├── openaimodel.cpython-310.pyc │ │ │ │ ├── upscaling.cpython-310.pyc │ │ │ │ └── util.cpython-310.pyc │ │ │ ├── model.py │ │ │ ├── openaimodel.py │ │ │ ├── upscaling.py │ │ │ └── util.py │ │ ├── distributions │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ └── distributions.cpython-310.pyc │ │ │ └── distributions.py │ │ ├── ema.py │ │ ├── encoders │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ └── noise_aug_modules.cpython-310.pyc │ │ │ └── noise_aug_modules.py │ │ ├── sub_quadratic_attention.py │ │ └── temporal_ae.py │ └── util.py ├── licenses-3rd │ ├── chainer │ ├── comfyui │ ├── diffusers │ ├── kdiffusion │ ├── ldm │ ├── taesd │ └── transformers ├── modules │ ├── args_parser.py │ ├── checkpoint_pickle.py │ ├── clip_config_bigg.json │ ├── clip_model.py │ ├── clip_vision.py │ ├── clip_vision_config_g.json │ ├── clip_vision_config_h.json │ ├── clip_vision_config_vitl.json │ ├── conds.py │ ├── controlnet.py │ ├── diffusers_convert.py │ ├── diffusers_load.py │ ├── gligen.py │ ├── latent_formats.py │ ├── lora.py │ ├── model_base.py │ ├── model_detection.py │ ├── model_management.py │ ├── model_patcher.py │ ├── model_sampling.py │ ├── ops.py │ ├── options.py │ ├── sample.py │ ├── samplers.py │ ├── sd.py │ ├── sd1_clip.py │ ├── sd1_clip_config.json │ ├── sd1_tokenizer │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ ├── sd2_clip.py │ ├── sd2_clip_config.json │ ├── sdxl_clip.py │ ├── supported_models.py │ ├── supported_models_base.py │ └── utils.py ├── pfn │ ├── __init__.py │ ├── architecture │ │ ├── DAT.py │ │ ├── HAT.py │ │ ├── LICENSE-DAT │ │ ├── LICENSE-ESRGAN │ │ ├── LICENSE-HAT │ │ ├── LICENSE-RealESRGAN │ │ ├── LICENSE-SCUNet │ │ ├── LICENSE-SPSR │ │ ├── LICENSE-SwiftSRGAN │ │ ├── LICENSE-Swin2SR │ │ ├── LICENSE-SwinIR │ │ ├── LICENSE-lama │ │ ├── LaMa.py │ │ ├── OmniSR │ │ │ ├── ChannelAttention.py │ │ │ ├── LICENSE │ │ │ ├── OSA.py │ │ │ ├── OSAG.py │ │ │ ├── OmniSR.py │ │ │ ├── __pycache__ │ │ │ │ ├── OSA.cpython-310.pyc │ │ │ │ ├── OSAG.cpython-310.pyc │ │ │ │ ├── OmniSR.cpython-310.pyc │ │ │ │ ├── esa.cpython-310.pyc │ │ │ │ ├── layernorm.cpython-310.pyc │ │ │ │ └── pixelshuffle.cpython-310.pyc │ │ │ ├── esa.py │ │ │ ├── layernorm.py │ │ │ └── pixelshuffle.py │ │ ├── RRDB.py │ │ ├── SCUNet.py │ │ ├── SPSR.py │ │ ├── SRVGG.py │ │ ├── SwiftSRGAN.py │ │ ├── Swin2SR.py │ │ ├── SwinIR.py │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── DAT.cpython-310.pyc │ │ │ ├── HAT.cpython-310.pyc │ │ │ ├── LaMa.cpython-310.pyc │ │ │ ├── RRDB.cpython-310.pyc │ │ │ ├── SCUNet.cpython-310.pyc │ │ │ ├── SPSR.cpython-310.pyc │ │ │ ├── SRVGG.cpython-310.pyc │ │ │ ├── SwiftSRGAN.cpython-310.pyc │ │ │ ├── Swin2SR.cpython-310.pyc │ │ │ ├── SwinIR.cpython-310.pyc │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── block.cpython-310.pyc │ │ ├── block.py │ │ ├── face │ │ │ ├── LICENSE-GFPGAN │ │ │ ├── LICENSE-RestoreFormer │ │ │ ├── LICENSE-codeformer │ │ │ ├── __pycache__ │ │ │ │ ├── codeformer.cpython-310.pyc │ │ │ │ ├── gfpganv1_clean_arch.cpython-310.pyc │ │ │ │ ├── restoreformer_arch.cpython-310.pyc │ │ │ │ └── stylegan2_clean_arch.cpython-310.pyc │ │ │ ├── arcface_arch.py │ │ │ ├── codeformer.py │ │ │ ├── fused_act.py │ │ │ ├── gfpgan_bilinear_arch.py │ │ │ ├── gfpganv1_arch.py │ │ │ ├── gfpganv1_clean_arch.py │ │ │ ├── restoreformer_arch.py │ │ │ ├── stylegan2_arch.py │ │ │ ├── stylegan2_bilinear_arch.py │ │ │ ├── stylegan2_clean_arch.py │ │ │ └── upfirdn2d.py │ │ └── timm │ │ │ ├── LICENSE │ │ │ ├── __pycache__ │ │ │ ├── drop.cpython-310.pyc │ │ │ ├── helpers.cpython-310.pyc │ │ │ └── weight_init.cpython-310.pyc │ │ │ ├── drop.py │ │ │ ├── helpers.py │ │ │ └── weight_init.py │ ├── model_loading.py │ └── types.py ├── t2ia │ └── adapter.py ├── taesd │ └── taesd.py ├── unipc │ └── uni_pc.py └── utils │ ├── latent_visualization.py │ └── path_utils.py ├── make_img.ipynb ├── models ├── inpaint │ └── put_inpaint_here ├── loras │ └── put_loras_here └── upscale_models │ └── put_esrgan_and_other_upscale_models_here ├── modules ├── anisotropic.py ├── async_worker.py ├── auth.py ├── config.py ├── constants.py ├── core.py ├── default_pipeline.py ├── flags.py ├── gradio_hijack.py ├── html.py ├── inpaint_worker.py ├── launch_util.py ├── localization.py ├── lora.py ├── meta_parser.py ├── model_loader.py ├── ops.py ├── patch.py ├── patch_clip.py ├── patch_precision.py ├── private_logger.py ├── sample_hijack.py ├── sdxl_styles.py ├── style_sorter.py ├── ui_gradio_extensions.py ├── upscaler.py └── util.py ├── positive.txt ├── readme.md ├── requirements.txt ├── sdxl_styles ├── sdxl_styles_diva.json ├── sdxl_styles_fooocus.json ├── sdxl_styles_marc_k3nt3l.json ├── sdxl_styles_mre.json ├── sdxl_styles_sai.json └── sdxl_styles_twri.json ├── train.py └── utils ├── FooocusDpmpp2mSdeGpuKarras.py ├── __init__.py ├── add_fooocus_inpaint_head_patch.py ├── add_fooocus_inpaint_patch.py ├── mask_aug.py ├── orthogonal_decomposition.py └── prompt_style_enhance.py /assets/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/assets/teaser.png -------------------------------------------------------------------------------- /data/1_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/1_1.jpg -------------------------------------------------------------------------------- /data/1_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/1_2.jpg -------------------------------------------------------------------------------- /data/2_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/2_1.jpg -------------------------------------------------------------------------------- /data/2_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/2_2.jpg -------------------------------------------------------------------------------- /data/3_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/3_1.jpg -------------------------------------------------------------------------------- /data/3_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/3_2.jpg -------------------------------------------------------------------------------- /data/4_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/4_1.jpg -------------------------------------------------------------------------------- /data/4_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/4_2.jpg -------------------------------------------------------------------------------- /infer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "\n", 11 | "DEVICE = \"cuda:1\"\n", 12 | "torch.cuda.set_device(DEVICE)\n", 13 | "\n", 14 | "from FooocusSDXLInpaintAllInOnePipeline import FooocusSDXLInpaintPipeline\n", 15 | "\n", 16 | "pipe = FooocusSDXLInpaintPipeline.from_pretrained(\n", 17 | " \"frankjoshua/juggernautXL_v8Rundiffusion\",\n", 18 | " torch_dtype=torch.float16,\n", 19 | " use_safetensors=True,\n", 20 | ").to(DEVICE)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "notebookRunGroups": { 28 | "groupValue": "1" 29 | } 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "lora_config = [\n", 34 | " {\n", 35 | " \"model_path\": f\"lora/xxx\",\n", 36 | " \"scale\": 1,\n", 37 | " \"for_raw_unet\": False,\n", 38 | " \"for_fooocus_unet\": True,\n", 39 | " },\n", 40 | "]\n", 41 | "\n", 42 | "pipe.preload_fooocus_unet(\n", 43 | " fooocus_model_path=\"./models/fooocus_inpaint/inpaint_v26.fooocus.patch\",\n", 44 | " lora_configs=lora_config,\n", 45 | " add_double_sa=False,\n", 46 | ")" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "from diffusers.utils import load_image\n", 56 | "from PIL import Image\n", 57 | "\n", 58 | "\n", 59 | "img_url = f\"data/1_1.jpg\"\n", 60 | "mask_url = f\"data/1_2.jpg\"\n", 61 | "\n", 62 | "init_image = load_image(img_url).convert(\"RGB\")\n", 63 | "mask_image = load_image(mask_url).convert(\"RGB\")\n", 64 | "\n", 65 | "prompt = \"\"\n", 66 | "negative_prompt = \"\"" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "# Infer!\n", 76 | "image = pipe(\n", 77 | " isf_global_time=20,\n", 78 | " isf_global_ia=1,\n", 79 | " decompose_prefix_prompt=\"a photo of a sks\",\n", 80 | " sks_decompose_words=[\"\"],\n", 81 | " fooocus_model_head_path=\"./models/fooocus_inpaint/fooocus_inpaint_head.pth\",\n", 82 | " fooocus_model_head_upscale_path=\"./models/upscale_models/fooocus_upscaler_s409985e5.bin\",\n", 83 | " pag_scale=1,\n", 84 | " guidance_scale=4,\n", 85 | " ref_image_type=\"no\", \n", 86 | " double_sa_alpha=1,\n", 87 | " save_self_attn=False,\n", 88 | " save_cross_attn=False,\n", 89 | " fooocus_time=0.8,\n", 90 | " inpaint_respective_field=0.5, \n", 91 | " sharpness=1, \n", 92 | " adm_scaler_positive=1.5, \n", 93 | " adm_scaler_negative=0.8, \n", 94 | " adm_scaler_end=0.3,\n", 95 | " seed=42,\n", 96 | " image=init_image,\n", 97 | " mask_image=mask_image,\n", 98 | " prompt=prompt,\n", 99 | " negative_prompt=negative_prompt,\n", 100 | " num_inference_steps=30,\n", 101 | " strength=1,\n", 102 | ")\n", 103 | "image.resize((512, 512))\n", 104 | "image" 105 | ] 106 | } 107 | ], 108 | "metadata": { 109 | "kernelspec": { 110 | "display_name": "DreamMix", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.10.15" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 2 129 | } 130 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_clip_sdxl.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | from ldm_patched.contrib.external import MAX_RESOLUTION 5 | 6 | class CLIPTextEncodeSDXLRefiner: 7 | @classmethod 8 | def INPUT_TYPES(s): 9 | return {"required": { 10 | "ascore": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 1000.0, "step": 0.01}), 11 | "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 12 | "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 13 | "text": ("STRING", {"multiline": True}), "clip": ("CLIP", ), 14 | }} 15 | RETURN_TYPES = ("CONDITIONING",) 16 | FUNCTION = "encode" 17 | 18 | CATEGORY = "advanced/conditioning" 19 | 20 | def encode(self, clip, ascore, width, height, text): 21 | tokens = clip.tokenize(text) 22 | cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) 23 | return ([[cond, {"pooled_output": pooled, "aesthetic_score": ascore, "width": width,"height": height}]], ) 24 | 25 | class CLIPTextEncodeSDXL: 26 | @classmethod 27 | def INPUT_TYPES(s): 28 | return {"required": { 29 | "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 30 | "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 31 | "crop_w": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), 32 | "crop_h": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), 33 | "target_width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 34 | "target_height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 35 | "text_g": ("STRING", {"multiline": True, "default": "CLIP_G"}), "clip": ("CLIP", ), 36 | "text_l": ("STRING", {"multiline": True, "default": "CLIP_L"}), "clip": ("CLIP", ), 37 | }} 38 | RETURN_TYPES = ("CONDITIONING",) 39 | FUNCTION = "encode" 40 | 41 | CATEGORY = "advanced/conditioning" 42 | 43 | def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l): 44 | tokens = clip.tokenize(text_g) 45 | tokens["l"] = clip.tokenize(text_l)["l"] 46 | if len(tokens["l"]) != len(tokens["g"]): 47 | empty = clip.tokenize("") 48 | while len(tokens["l"]) < len(tokens["g"]): 49 | tokens["l"] += empty["l"] 50 | while len(tokens["l"]) > len(tokens["g"]): 51 | tokens["g"] += empty["g"] 52 | cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) 53 | return ([[cond, {"pooled_output": pooled, "width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}]], ) 54 | 55 | NODE_CLASS_MAPPINGS = { 56 | "CLIPTextEncodeSDXLRefiner": CLIPTextEncodeSDXLRefiner, 57 | "CLIPTextEncodeSDXL": CLIPTextEncodeSDXL, 58 | } 59 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_freelunch.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | #code originally taken from: https://github.com/ChenyangSi/FreeU (under MIT License) 4 | 5 | import torch 6 | 7 | 8 | def Fourier_filter(x, threshold, scale): 9 | # FFT 10 | x_freq = torch.fft.fftn(x.float(), dim=(-2, -1)) 11 | x_freq = torch.fft.fftshift(x_freq, dim=(-2, -1)) 12 | 13 | B, C, H, W = x_freq.shape 14 | mask = torch.ones((B, C, H, W), device=x.device) 15 | 16 | crow, ccol = H // 2, W //2 17 | mask[..., crow - threshold:crow + threshold, ccol - threshold:ccol + threshold] = scale 18 | x_freq = x_freq * mask 19 | 20 | # IFFT 21 | x_freq = torch.fft.ifftshift(x_freq, dim=(-2, -1)) 22 | x_filtered = torch.fft.ifftn(x_freq, dim=(-2, -1)).real 23 | 24 | return x_filtered.to(x.dtype) 25 | 26 | 27 | class FreeU: 28 | @classmethod 29 | def INPUT_TYPES(s): 30 | return {"required": { "model": ("MODEL",), 31 | "b1": ("FLOAT", {"default": 1.1, "min": 0.0, "max": 10.0, "step": 0.01}), 32 | "b2": ("FLOAT", {"default": 1.2, "min": 0.0, "max": 10.0, "step": 0.01}), 33 | "s1": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 10.0, "step": 0.01}), 34 | "s2": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.01}), 35 | }} 36 | RETURN_TYPES = ("MODEL",) 37 | FUNCTION = "patch" 38 | 39 | CATEGORY = "model_patches" 40 | 41 | def patch(self, model, b1, b2, s1, s2): 42 | model_channels = model.model.model_config.unet_config["model_channels"] 43 | scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)} 44 | on_cpu_devices = {} 45 | 46 | def output_block_patch(h, hsp, transformer_options): 47 | scale = scale_dict.get(h.shape[1], None) 48 | if scale is not None: 49 | h[:,:h.shape[1] // 2] = h[:,:h.shape[1] // 2] * scale[0] 50 | if hsp.device not in on_cpu_devices: 51 | try: 52 | hsp = Fourier_filter(hsp, threshold=1, scale=scale[1]) 53 | except: 54 | print("Device", hsp.device, "does not support the torch.fft functions used in the FreeU node, switching to CPU.") 55 | on_cpu_devices[hsp.device] = True 56 | hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) 57 | else: 58 | hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) 59 | 60 | return h, hsp 61 | 62 | m = model.clone() 63 | m.set_model_output_block_patch(output_block_patch) 64 | return (m, ) 65 | 66 | class FreeU_V2: 67 | @classmethod 68 | def INPUT_TYPES(s): 69 | return {"required": { "model": ("MODEL",), 70 | "b1": ("FLOAT", {"default": 1.3, "min": 0.0, "max": 10.0, "step": 0.01}), 71 | "b2": ("FLOAT", {"default": 1.4, "min": 0.0, "max": 10.0, "step": 0.01}), 72 | "s1": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 10.0, "step": 0.01}), 73 | "s2": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.01}), 74 | }} 75 | RETURN_TYPES = ("MODEL",) 76 | FUNCTION = "patch" 77 | 78 | CATEGORY = "model_patches" 79 | 80 | def patch(self, model, b1, b2, s1, s2): 81 | model_channels = model.model.model_config.unet_config["model_channels"] 82 | scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)} 83 | on_cpu_devices = {} 84 | 85 | def output_block_patch(h, hsp, transformer_options): 86 | scale = scale_dict.get(h.shape[1], None) 87 | if scale is not None: 88 | hidden_mean = h.mean(1).unsqueeze(1) 89 | B = hidden_mean.shape[0] 90 | hidden_max, _ = torch.max(hidden_mean.view(B, -1), dim=-1, keepdim=True) 91 | hidden_min, _ = torch.min(hidden_mean.view(B, -1), dim=-1, keepdim=True) 92 | hidden_mean = (hidden_mean - hidden_min.unsqueeze(2).unsqueeze(3)) / (hidden_max - hidden_min).unsqueeze(2).unsqueeze(3) 93 | 94 | h[:,:h.shape[1] // 2] = h[:,:h.shape[1] // 2] * ((scale[0] - 1 ) * hidden_mean + 1) 95 | 96 | if hsp.device not in on_cpu_devices: 97 | try: 98 | hsp = Fourier_filter(hsp, threshold=1, scale=scale[1]) 99 | except: 100 | print("Device", hsp.device, "does not support the torch.fft functions used in the FreeU node, switching to CPU.") 101 | on_cpu_devices[hsp.device] = True 102 | hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) 103 | else: 104 | hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) 105 | 106 | return h, hsp 107 | 108 | m = model.clone() 109 | m.set_model_output_block_patch(output_block_patch) 110 | return (m, ) 111 | 112 | NODE_CLASS_MAPPINGS = { 113 | "FreeU": FreeU, 114 | "FreeU_V2": FreeU_V2, 115 | } 116 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_hypernetwork.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import ldm_patched.modules.utils 4 | import ldm_patched.utils.path_utils 5 | import torch 6 | 7 | def load_hypernetwork_patch(path, strength): 8 | sd = ldm_patched.modules.utils.load_torch_file(path, safe_load=True) 9 | activation_func = sd.get('activation_func', 'linear') 10 | is_layer_norm = sd.get('is_layer_norm', False) 11 | use_dropout = sd.get('use_dropout', False) 12 | activate_output = sd.get('activate_output', False) 13 | last_layer_dropout = sd.get('last_layer_dropout', False) 14 | 15 | valid_activation = { 16 | "linear": torch.nn.Identity, 17 | "relu": torch.nn.ReLU, 18 | "leakyrelu": torch.nn.LeakyReLU, 19 | "elu": torch.nn.ELU, 20 | "swish": torch.nn.Hardswish, 21 | "tanh": torch.nn.Tanh, 22 | "sigmoid": torch.nn.Sigmoid, 23 | "softsign": torch.nn.Softsign, 24 | "mish": torch.nn.Mish, 25 | } 26 | 27 | if activation_func not in valid_activation: 28 | print("Unsupported Hypernetwork format, if you report it I might implement it.", path, " ", activation_func, is_layer_norm, use_dropout, activate_output, last_layer_dropout) 29 | return None 30 | 31 | out = {} 32 | 33 | for d in sd: 34 | try: 35 | dim = int(d) 36 | except: 37 | continue 38 | 39 | output = [] 40 | for index in [0, 1]: 41 | attn_weights = sd[dim][index] 42 | keys = attn_weights.keys() 43 | 44 | linears = filter(lambda a: a.endswith(".weight"), keys) 45 | linears = list(map(lambda a: a[:-len(".weight")], linears)) 46 | layers = [] 47 | 48 | i = 0 49 | while i < len(linears): 50 | lin_name = linears[i] 51 | last_layer = (i == (len(linears) - 1)) 52 | penultimate_layer = (i == (len(linears) - 2)) 53 | 54 | lin_weight = attn_weights['{}.weight'.format(lin_name)] 55 | lin_bias = attn_weights['{}.bias'.format(lin_name)] 56 | layer = torch.nn.Linear(lin_weight.shape[1], lin_weight.shape[0]) 57 | layer.load_state_dict({"weight": lin_weight, "bias": lin_bias}) 58 | layers.append(layer) 59 | if activation_func != "linear": 60 | if (not last_layer) or (activate_output): 61 | layers.append(valid_activation[activation_func]()) 62 | if is_layer_norm: 63 | i += 1 64 | ln_name = linears[i] 65 | ln_weight = attn_weights['{}.weight'.format(ln_name)] 66 | ln_bias = attn_weights['{}.bias'.format(ln_name)] 67 | ln = torch.nn.LayerNorm(ln_weight.shape[0]) 68 | ln.load_state_dict({"weight": ln_weight, "bias": ln_bias}) 69 | layers.append(ln) 70 | if use_dropout: 71 | if (not last_layer) and (not penultimate_layer or last_layer_dropout): 72 | layers.append(torch.nn.Dropout(p=0.3)) 73 | i += 1 74 | 75 | output.append(torch.nn.Sequential(*layers)) 76 | out[dim] = torch.nn.ModuleList(output) 77 | 78 | class hypernetwork_patch: 79 | def __init__(self, hypernet, strength): 80 | self.hypernet = hypernet 81 | self.strength = strength 82 | def __call__(self, q, k, v, extra_options): 83 | dim = k.shape[-1] 84 | if dim in self.hypernet: 85 | hn = self.hypernet[dim] 86 | k = k + hn[0](k) * self.strength 87 | v = v + hn[1](v) * self.strength 88 | 89 | return q, k, v 90 | 91 | def to(self, device): 92 | for d in self.hypernet.keys(): 93 | self.hypernet[d] = self.hypernet[d].to(device) 94 | return self 95 | 96 | return hypernetwork_patch(out, strength) 97 | 98 | class HypernetworkLoader: 99 | @classmethod 100 | def INPUT_TYPES(s): 101 | return {"required": { "model": ("MODEL",), 102 | "hypernetwork_name": (ldm_patched.utils.path_utils.get_filename_list("hypernetworks"), ), 103 | "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), 104 | }} 105 | RETURN_TYPES = ("MODEL",) 106 | FUNCTION = "load_hypernetwork" 107 | 108 | CATEGORY = "loaders" 109 | 110 | def load_hypernetwork(self, model, hypernetwork_name, strength): 111 | hypernetwork_path = ldm_patched.utils.path_utils.get_full_path("hypernetworks", hypernetwork_name) 112 | model_hypernetwork = model.clone() 113 | patch = load_hypernetwork_patch(hypernetwork_path, strength) 114 | if patch is not None: 115 | model_hypernetwork.set_model_attn1_patch(patch) 116 | model_hypernetwork.set_model_attn2_patch(patch) 117 | return (model_hypernetwork,) 118 | 119 | NODE_CLASS_MAPPINGS = { 120 | "HypernetworkLoader": HypernetworkLoader 121 | } 122 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_hypertile.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | #Taken from: https://github.com/tfernd/HyperTile/ 4 | 5 | import math 6 | from einops import rearrange 7 | # Use torch rng for consistency across generations 8 | from torch import randint 9 | 10 | def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int: 11 | min_value = min(min_value, value) 12 | 13 | # All big divisors of value (inclusive) 14 | divisors = [i for i in range(min_value, value + 1) if value % i == 0] 15 | 16 | ns = [value // i for i in divisors[:max_options]] # has at least 1 element 17 | 18 | if len(ns) - 1 > 0: 19 | idx = randint(low=0, high=len(ns) - 1, size=(1,)).item() 20 | else: 21 | idx = 0 22 | 23 | return ns[idx] 24 | 25 | class HyperTile: 26 | @classmethod 27 | def INPUT_TYPES(s): 28 | return {"required": { "model": ("MODEL",), 29 | "tile_size": ("INT", {"default": 256, "min": 1, "max": 2048}), 30 | "swap_size": ("INT", {"default": 2, "min": 1, "max": 128}), 31 | "max_depth": ("INT", {"default": 0, "min": 0, "max": 10}), 32 | "scale_depth": ("BOOLEAN", {"default": False}), 33 | }} 34 | RETURN_TYPES = ("MODEL",) 35 | FUNCTION = "patch" 36 | 37 | CATEGORY = "model_patches" 38 | 39 | def patch(self, model, tile_size, swap_size, max_depth, scale_depth): 40 | model_channels = model.model.model_config.unet_config["model_channels"] 41 | 42 | latent_tile_size = max(32, tile_size) // 8 43 | self.temp = None 44 | 45 | def hypertile_in(q, k, v, extra_options): 46 | model_chans = q.shape[-2] 47 | orig_shape = extra_options['original_shape'] 48 | apply_to = [] 49 | for i in range(max_depth + 1): 50 | apply_to.append((orig_shape[-2] / (2 ** i)) * (orig_shape[-1] / (2 ** i))) 51 | 52 | if model_chans in apply_to: 53 | shape = extra_options["original_shape"] 54 | aspect_ratio = shape[-1] / shape[-2] 55 | 56 | hw = q.size(1) 57 | h, w = round(math.sqrt(hw * aspect_ratio)), round(math.sqrt(hw / aspect_ratio)) 58 | 59 | factor = (2 ** apply_to.index(model_chans)) if scale_depth else 1 60 | nh = random_divisor(h, latent_tile_size * factor, swap_size) 61 | nw = random_divisor(w, latent_tile_size * factor, swap_size) 62 | 63 | if nh * nw > 1: 64 | q = rearrange(q, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw) 65 | self.temp = (nh, nw, h, w) 66 | return q, k, v 67 | 68 | return q, k, v 69 | def hypertile_out(out, extra_options): 70 | if self.temp is not None: 71 | nh, nw, h, w = self.temp 72 | self.temp = None 73 | out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw) 74 | out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw) 75 | return out 76 | 77 | 78 | m = model.clone() 79 | m.set_model_attn1_patch(hypertile_in) 80 | m.set_model_attn1_output_patch(hypertile_out) 81 | return (m, ) 82 | 83 | NODE_CLASS_MAPPINGS = { 84 | "HyperTile": HyperTile, 85 | } 86 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_latent.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import ldm_patched.modules.utils 4 | import torch 5 | 6 | def reshape_latent_to(target_shape, latent): 7 | if latent.shape[1:] != target_shape[1:]: 8 | latent = ldm_patched.modules.utils.common_upscale(latent, target_shape[3], target_shape[2], "bilinear", "center") 9 | return ldm_patched.modules.utils.repeat_to_batch_size(latent, target_shape[0]) 10 | 11 | 12 | class LatentAdd: 13 | @classmethod 14 | def INPUT_TYPES(s): 15 | return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} 16 | 17 | RETURN_TYPES = ("LATENT",) 18 | FUNCTION = "op" 19 | 20 | CATEGORY = "latent/advanced" 21 | 22 | def op(self, samples1, samples2): 23 | samples_out = samples1.copy() 24 | 25 | s1 = samples1["samples"] 26 | s2 = samples2["samples"] 27 | 28 | s2 = reshape_latent_to(s1.shape, s2) 29 | samples_out["samples"] = s1 + s2 30 | return (samples_out,) 31 | 32 | class LatentSubtract: 33 | @classmethod 34 | def INPUT_TYPES(s): 35 | return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} 36 | 37 | RETURN_TYPES = ("LATENT",) 38 | FUNCTION = "op" 39 | 40 | CATEGORY = "latent/advanced" 41 | 42 | def op(self, samples1, samples2): 43 | samples_out = samples1.copy() 44 | 45 | s1 = samples1["samples"] 46 | s2 = samples2["samples"] 47 | 48 | s2 = reshape_latent_to(s1.shape, s2) 49 | samples_out["samples"] = s1 - s2 50 | return (samples_out,) 51 | 52 | class LatentMultiply: 53 | @classmethod 54 | def INPUT_TYPES(s): 55 | return {"required": { "samples": ("LATENT",), 56 | "multiplier": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), 57 | }} 58 | 59 | RETURN_TYPES = ("LATENT",) 60 | FUNCTION = "op" 61 | 62 | CATEGORY = "latent/advanced" 63 | 64 | def op(self, samples, multiplier): 65 | samples_out = samples.copy() 66 | 67 | s1 = samples["samples"] 68 | samples_out["samples"] = s1 * multiplier 69 | return (samples_out,) 70 | 71 | class LatentInterpolate: 72 | @classmethod 73 | def INPUT_TYPES(s): 74 | return {"required": { "samples1": ("LATENT",), 75 | "samples2": ("LATENT",), 76 | "ratio": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), 77 | }} 78 | 79 | RETURN_TYPES = ("LATENT",) 80 | FUNCTION = "op" 81 | 82 | CATEGORY = "latent/advanced" 83 | 84 | def op(self, samples1, samples2, ratio): 85 | samples_out = samples1.copy() 86 | 87 | s1 = samples1["samples"] 88 | s2 = samples2["samples"] 89 | 90 | s2 = reshape_latent_to(s1.shape, s2) 91 | 92 | m1 = torch.linalg.vector_norm(s1, dim=(1)) 93 | m2 = torch.linalg.vector_norm(s2, dim=(1)) 94 | 95 | s1 = torch.nan_to_num(s1 / m1) 96 | s2 = torch.nan_to_num(s2 / m2) 97 | 98 | t = (s1 * ratio + s2 * (1.0 - ratio)) 99 | mt = torch.linalg.vector_norm(t, dim=(1)) 100 | st = torch.nan_to_num(t / mt) 101 | 102 | samples_out["samples"] = st * (m1 * ratio + m2 * (1.0 - ratio)) 103 | return (samples_out,) 104 | 105 | class LatentBatch: 106 | @classmethod 107 | def INPUT_TYPES(s): 108 | return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} 109 | 110 | RETURN_TYPES = ("LATENT",) 111 | FUNCTION = "batch" 112 | 113 | CATEGORY = "latent/batch" 114 | 115 | def batch(self, samples1, samples2): 116 | samples_out = samples1.copy() 117 | s1 = samples1["samples"] 118 | s2 = samples2["samples"] 119 | 120 | if s1.shape[1:] != s2.shape[1:]: 121 | s2 = ldm_patched.modules.utils.common_upscale(s2, s1.shape[3], s1.shape[2], "bilinear", "center") 122 | s = torch.cat((s1, s2), dim=0) 123 | samples_out["samples"] = s 124 | samples_out["batch_index"] = samples1.get("batch_index", [x for x in range(0, s1.shape[0])]) + samples2.get("batch_index", [x for x in range(0, s2.shape[0])]) 125 | return (samples_out,) 126 | 127 | class LatentBatchSeedBehavior: 128 | @classmethod 129 | def INPUT_TYPES(s): 130 | return {"required": { "samples": ("LATENT",), 131 | "seed_behavior": (["random", "fixed"],),}} 132 | 133 | RETURN_TYPES = ("LATENT",) 134 | FUNCTION = "op" 135 | 136 | CATEGORY = "latent/advanced" 137 | 138 | def op(self, samples, seed_behavior): 139 | samples_out = samples.copy() 140 | latent = samples["samples"] 141 | if seed_behavior == "random": 142 | if 'batch_index' in samples_out: 143 | samples_out.pop('batch_index') 144 | elif seed_behavior == "fixed": 145 | batch_number = samples_out.get("batch_index", [0])[0] 146 | samples_out["batch_index"] = [batch_number] * latent.shape[0] 147 | 148 | return (samples_out,) 149 | 150 | NODE_CLASS_MAPPINGS = { 151 | "LatentAdd": LatentAdd, 152 | "LatentSubtract": LatentSubtract, 153 | "LatentMultiply": LatentMultiply, 154 | "LatentInterpolate": LatentInterpolate, 155 | "LatentBatch": LatentBatch, 156 | "LatentBatchSeedBehavior": LatentBatchSeedBehavior, 157 | } 158 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_model_downscale.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | import ldm_patched.modules.utils 5 | 6 | class PatchModelAddDownscale: 7 | upscale_methods = ["bicubic", "nearest-exact", "bilinear", "area", "bislerp"] 8 | @classmethod 9 | def INPUT_TYPES(s): 10 | return {"required": { "model": ("MODEL",), 11 | "block_number": ("INT", {"default": 3, "min": 1, "max": 32, "step": 1}), 12 | "downscale_factor": ("FLOAT", {"default": 2.0, "min": 0.1, "max": 9.0, "step": 0.001}), 13 | "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), 14 | "end_percent": ("FLOAT", {"default": 0.35, "min": 0.0, "max": 1.0, "step": 0.001}), 15 | "downscale_after_skip": ("BOOLEAN", {"default": True}), 16 | "downscale_method": (s.upscale_methods,), 17 | "upscale_method": (s.upscale_methods,), 18 | }} 19 | RETURN_TYPES = ("MODEL",) 20 | FUNCTION = "patch" 21 | 22 | CATEGORY = "_for_testing" 23 | 24 | def patch(self, model, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip, downscale_method, upscale_method): 25 | sigma_start = model.model.model_sampling.percent_to_sigma(start_percent) 26 | sigma_end = model.model.model_sampling.percent_to_sigma(end_percent) 27 | 28 | def input_block_patch(h, transformer_options): 29 | if transformer_options["block"][1] == block_number: 30 | sigma = transformer_options["sigmas"][0].item() 31 | if sigma <= sigma_start and sigma >= sigma_end: 32 | h = ldm_patched.modules.utils.common_upscale(h, round(h.shape[-1] * (1.0 / downscale_factor)), round(h.shape[-2] * (1.0 / downscale_factor)), downscale_method, "disabled") 33 | return h 34 | 35 | def output_block_patch(h, hsp, transformer_options): 36 | if h.shape[2] != hsp.shape[2]: 37 | h = ldm_patched.modules.utils.common_upscale(h, hsp.shape[-1], hsp.shape[-2], upscale_method, "disabled") 38 | return h, hsp 39 | 40 | m = model.clone() 41 | if downscale_after_skip: 42 | m.set_model_input_block_patch_after_skip(input_block_patch) 43 | else: 44 | m.set_model_input_block_patch(input_block_patch) 45 | m.set_model_output_block_patch(output_block_patch) 46 | return (m, ) 47 | 48 | NODE_CLASS_MAPPINGS = { 49 | "PatchModelAddDownscale": PatchModelAddDownscale, 50 | } 51 | 52 | NODE_DISPLAY_NAME_MAPPINGS = { 53 | # Sampling 54 | "PatchModelAddDownscale": "PatchModelAddDownscale (Kohya Deep Shrink)", 55 | } 56 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_perpneg.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | import ldm_patched.modules.model_management 5 | import ldm_patched.modules.sample 6 | import ldm_patched.modules.samplers 7 | import ldm_patched.modules.utils 8 | 9 | 10 | class PerpNeg: 11 | @classmethod 12 | def INPUT_TYPES(s): 13 | return {"required": {"model": ("MODEL", ), 14 | "empty_conditioning": ("CONDITIONING", ), 15 | "neg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0}), 16 | }} 17 | RETURN_TYPES = ("MODEL",) 18 | FUNCTION = "patch" 19 | 20 | CATEGORY = "_for_testing" 21 | 22 | def patch(self, model, empty_conditioning, neg_scale): 23 | m = model.clone() 24 | nocond = ldm_patched.modules.sample.convert_cond(empty_conditioning) 25 | 26 | def cfg_function(args): 27 | model = args["model"] 28 | noise_pred_pos = args["cond_denoised"] 29 | noise_pred_neg = args["uncond_denoised"] 30 | cond_scale = args["cond_scale"] 31 | x = args["input"] 32 | sigma = args["sigma"] 33 | model_options = args["model_options"] 34 | nocond_processed = ldm_patched.modules.samplers.encode_model_conds(model.extra_conds, nocond, x, x.device, "negative") 35 | 36 | (noise_pred_nocond, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, nocond_processed, None, x, sigma, model_options) 37 | 38 | pos = noise_pred_pos - noise_pred_nocond 39 | neg = noise_pred_neg - noise_pred_nocond 40 | perp = ((torch.mul(pos, neg).sum())/(torch.norm(neg)**2)) * neg 41 | perp_neg = perp * neg_scale 42 | cfg_result = noise_pred_nocond + cond_scale*(pos - perp_neg) 43 | cfg_result = x - cfg_result 44 | return cfg_result 45 | 46 | m.set_model_sampler_cfg_function(cfg_function) 47 | 48 | return (m, ) 49 | 50 | 51 | NODE_CLASS_MAPPINGS = { 52 | "PerpNeg": PerpNeg, 53 | } 54 | 55 | NODE_DISPLAY_NAME_MAPPINGS = { 56 | "PerpNeg": "Perp-Neg", 57 | } 58 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_sdupscale.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | import ldm_patched.contrib.external 5 | import ldm_patched.modules.utils 6 | 7 | class SD_4XUpscale_Conditioning: 8 | @classmethod 9 | def INPUT_TYPES(s): 10 | return {"required": { "images": ("IMAGE",), 11 | "positive": ("CONDITIONING",), 12 | "negative": ("CONDITIONING",), 13 | "scale_ratio": ("FLOAT", {"default": 4.0, "min": 0.0, "max": 10.0, "step": 0.01}), 14 | "noise_augmentation": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), 15 | }} 16 | RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") 17 | RETURN_NAMES = ("positive", "negative", "latent") 18 | 19 | FUNCTION = "encode" 20 | 21 | CATEGORY = "conditioning/upscale_diffusion" 22 | 23 | def encode(self, images, positive, negative, scale_ratio, noise_augmentation): 24 | width = max(1, round(images.shape[-2] * scale_ratio)) 25 | height = max(1, round(images.shape[-3] * scale_ratio)) 26 | 27 | pixels = ldm_patched.modules.utils.common_upscale((images.movedim(-1,1) * 2.0) - 1.0, width // 4, height // 4, "bilinear", "center") 28 | 29 | out_cp = [] 30 | out_cn = [] 31 | 32 | for t in positive: 33 | n = [t[0], t[1].copy()] 34 | n[1]['concat_image'] = pixels 35 | n[1]['noise_augmentation'] = noise_augmentation 36 | out_cp.append(n) 37 | 38 | for t in negative: 39 | n = [t[0], t[1].copy()] 40 | n[1]['concat_image'] = pixels 41 | n[1]['noise_augmentation'] = noise_augmentation 42 | out_cn.append(n) 43 | 44 | latent = torch.zeros([images.shape[0], 4, height // 4, width // 4]) 45 | return (out_cp, out_cn, {"samples":latent}) 46 | 47 | NODE_CLASS_MAPPINGS = { 48 | "SD_4XUpscale_Conditioning": SD_4XUpscale_Conditioning, 49 | } 50 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_stable3d.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | import ldm_patched.contrib.external 5 | import ldm_patched.modules.utils 6 | 7 | def camera_embeddings(elevation, azimuth): 8 | elevation = torch.as_tensor([elevation]) 9 | azimuth = torch.as_tensor([azimuth]) 10 | embeddings = torch.stack( 11 | [ 12 | torch.deg2rad( 13 | (90 - elevation) - (90) 14 | ), # Zero123 polar is 90-elevation 15 | torch.sin(torch.deg2rad(azimuth)), 16 | torch.cos(torch.deg2rad(azimuth)), 17 | torch.deg2rad( 18 | 90 - torch.full_like(elevation, 0) 19 | ), 20 | ], dim=-1).unsqueeze(1) 21 | 22 | return embeddings 23 | 24 | 25 | class StableZero123_Conditioning: 26 | @classmethod 27 | def INPUT_TYPES(s): 28 | return {"required": { "clip_vision": ("CLIP_VISION",), 29 | "init_image": ("IMAGE",), 30 | "vae": ("VAE",), 31 | "width": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), 32 | "height": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), 33 | "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), 34 | "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), 35 | "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), 36 | }} 37 | RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") 38 | RETURN_NAMES = ("positive", "negative", "latent") 39 | 40 | FUNCTION = "encode" 41 | 42 | CATEGORY = "conditioning/3d_models" 43 | 44 | def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth): 45 | output = clip_vision.encode_image(init_image) 46 | pooled = output.image_embeds.unsqueeze(0) 47 | pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1) 48 | encode_pixels = pixels[:,:,:,:3] 49 | t = vae.encode(encode_pixels) 50 | cam_embeds = camera_embeddings(elevation, azimuth) 51 | cond = torch.cat([pooled, cam_embeds.to(pooled.device).repeat((pooled.shape[0], 1, 1))], dim=-1) 52 | 53 | positive = [[cond, {"concat_latent_image": t}]] 54 | negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]] 55 | latent = torch.zeros([batch_size, 4, height // 8, width // 8]) 56 | return (positive, negative, {"samples":latent}) 57 | 58 | class StableZero123_Conditioning_Batched: 59 | @classmethod 60 | def INPUT_TYPES(s): 61 | return {"required": { "clip_vision": ("CLIP_VISION",), 62 | "init_image": ("IMAGE",), 63 | "vae": ("VAE",), 64 | "width": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), 65 | "height": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), 66 | "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), 67 | "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), 68 | "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), 69 | "elevation_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), 70 | "azimuth_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), 71 | }} 72 | RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") 73 | RETURN_NAMES = ("positive", "negative", "latent") 74 | 75 | FUNCTION = "encode" 76 | 77 | CATEGORY = "conditioning/3d_models" 78 | 79 | def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment): 80 | output = clip_vision.encode_image(init_image) 81 | pooled = output.image_embeds.unsqueeze(0) 82 | pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1) 83 | encode_pixels = pixels[:,:,:,:3] 84 | t = vae.encode(encode_pixels) 85 | 86 | cam_embeds = [] 87 | for i in range(batch_size): 88 | cam_embeds.append(camera_embeddings(elevation, azimuth)) 89 | elevation += elevation_batch_increment 90 | azimuth += azimuth_batch_increment 91 | 92 | cam_embeds = torch.cat(cam_embeds, dim=0) 93 | cond = torch.cat([ldm_patched.modules.utils.repeat_to_batch_size(pooled, batch_size), cam_embeds], dim=-1) 94 | 95 | positive = [[cond, {"concat_latent_image": t}]] 96 | negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]] 97 | latent = torch.zeros([batch_size, 4, height // 8, width // 8]) 98 | return (positive, negative, {"samples":latent, "batch_index": [0] * batch_size}) 99 | 100 | 101 | NODE_CLASS_MAPPINGS = { 102 | "StableZero123_Conditioning": StableZero123_Conditioning, 103 | "StableZero123_Conditioning_Batched": StableZero123_Conditioning_Batched, 104 | } 105 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_upscale_model.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import os 4 | from ldm_patched.pfn import model_loading 5 | from ldm_patched.modules import model_management 6 | import torch 7 | import ldm_patched.modules.utils 8 | import ldm_patched.utils.path_utils 9 | 10 | class UpscaleModelLoader: 11 | @classmethod 12 | def INPUT_TYPES(s): 13 | return {"required": { "model_name": (ldm_patched.utils.path_utils.get_filename_list("upscale_models"), ), 14 | }} 15 | RETURN_TYPES = ("UPSCALE_MODEL",) 16 | FUNCTION = "load_model" 17 | 18 | CATEGORY = "loaders" 19 | 20 | def load_model(self, model_name): 21 | model_path = ldm_patched.utils.path_utils.get_full_path("upscale_models", model_name) 22 | sd = ldm_patched.modules.utils.load_torch_file(model_path, safe_load=True) 23 | if "module.layers.0.residual_group.blocks.0.norm1.weight" in sd: 24 | sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {"module.":""}) 25 | out = model_loading.load_state_dict(sd).eval() 26 | return (out, ) 27 | 28 | 29 | class ImageUpscaleWithModel: 30 | @classmethod 31 | def INPUT_TYPES(s): 32 | return {"required": { "upscale_model": ("UPSCALE_MODEL",), 33 | "image": ("IMAGE",), 34 | }} 35 | RETURN_TYPES = ("IMAGE",) 36 | FUNCTION = "upscale" 37 | 38 | CATEGORY = "image/upscaling" 39 | 40 | def upscale(self, upscale_model, image): 41 | device = model_management.get_torch_device() 42 | upscale_model.to(device) 43 | in_img = image.movedim(-1,-3).to(device) 44 | free_memory = model_management.get_free_memory(device) 45 | 46 | tile = 512 47 | overlap = 32 48 | 49 | oom = True 50 | while oom: 51 | try: 52 | steps = in_img.shape[0] * ldm_patched.modules.utils.get_tiled_scale_steps(in_img.shape[3], in_img.shape[2], tile_x=tile, tile_y=tile, overlap=overlap) 53 | pbar = ldm_patched.modules.utils.ProgressBar(steps) 54 | s = ldm_patched.modules.utils.tiled_scale(in_img, lambda a: upscale_model(a), tile_x=tile, tile_y=tile, overlap=overlap, upscale_amount=upscale_model.scale, pbar=pbar) 55 | oom = False 56 | except model_management.OOM_EXCEPTION as e: 57 | tile //= 2 58 | if tile < 128: 59 | raise e 60 | 61 | upscale_model.cpu() 62 | s = torch.clamp(s.movedim(-3,-1), min=0, max=1.0) 63 | return (s,) 64 | 65 | NODE_CLASS_MAPPINGS = { 66 | "UpscaleModelLoader": UpscaleModelLoader, 67 | "ImageUpscaleWithModel": ImageUpscaleWithModel 68 | } 69 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_video_model.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import ldm_patched.contrib.external 4 | import torch 5 | import ldm_patched.modules.utils 6 | import ldm_patched.modules.sd 7 | import ldm_patched.utils.path_utils 8 | import ldm_patched.contrib.external_model_merging 9 | 10 | 11 | class ImageOnlyCheckpointLoader: 12 | @classmethod 13 | def INPUT_TYPES(s): 14 | return {"required": { "ckpt_name": (ldm_patched.utils.path_utils.get_filename_list("checkpoints"), ), 15 | }} 16 | RETURN_TYPES = ("MODEL", "CLIP_VISION", "VAE") 17 | FUNCTION = "load_checkpoint" 18 | 19 | CATEGORY = "loaders/video_models" 20 | 21 | def load_checkpoint(self, ckpt_name, output_vae=True, output_clip=True): 22 | ckpt_path = ldm_patched.utils.path_utils.get_full_path("checkpoints", ckpt_name) 23 | out = ldm_patched.modules.sd.load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=False, output_clipvision=True, embedding_directory=ldm_patched.utils.path_utils.get_folder_paths("embeddings")) 24 | return (out[0], out[3], out[2]) 25 | 26 | 27 | class SVD_img2vid_Conditioning: 28 | @classmethod 29 | def INPUT_TYPES(s): 30 | return {"required": { "clip_vision": ("CLIP_VISION",), 31 | "init_image": ("IMAGE",), 32 | "vae": ("VAE",), 33 | "width": ("INT", {"default": 1024, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), 34 | "height": ("INT", {"default": 576, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), 35 | "video_frames": ("INT", {"default": 14, "min": 1, "max": 4096}), 36 | "motion_bucket_id": ("INT", {"default": 127, "min": 1, "max": 1023}), 37 | "fps": ("INT", {"default": 6, "min": 1, "max": 1024}), 38 | "augmentation_level": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10.0, "step": 0.01}) 39 | }} 40 | RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") 41 | RETURN_NAMES = ("positive", "negative", "latent") 42 | 43 | FUNCTION = "encode" 44 | 45 | CATEGORY = "conditioning/video_models" 46 | 47 | def encode(self, clip_vision, init_image, vae, width, height, video_frames, motion_bucket_id, fps, augmentation_level): 48 | output = clip_vision.encode_image(init_image) 49 | pooled = output.image_embeds.unsqueeze(0) 50 | pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1) 51 | encode_pixels = pixels[:,:,:,:3] 52 | if augmentation_level > 0: 53 | encode_pixels += torch.randn_like(pixels) * augmentation_level 54 | t = vae.encode(encode_pixels) 55 | positive = [[pooled, {"motion_bucket_id": motion_bucket_id, "fps": fps, "augmentation_level": augmentation_level, "concat_latent_image": t}]] 56 | negative = [[torch.zeros_like(pooled), {"motion_bucket_id": motion_bucket_id, "fps": fps, "augmentation_level": augmentation_level, "concat_latent_image": torch.zeros_like(t)}]] 57 | latent = torch.zeros([video_frames, 4, height // 8, width // 8]) 58 | return (positive, negative, {"samples":latent}) 59 | 60 | class VideoLinearCFGGuidance: 61 | @classmethod 62 | def INPUT_TYPES(s): 63 | return {"required": { "model": ("MODEL",), 64 | "min_cfg": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.5, "round": 0.01}), 65 | }} 66 | RETURN_TYPES = ("MODEL",) 67 | FUNCTION = "patch" 68 | 69 | CATEGORY = "sampling/video_models" 70 | 71 | def patch(self, model, min_cfg): 72 | def linear_cfg(args): 73 | cond = args["cond"] 74 | uncond = args["uncond"] 75 | cond_scale = args["cond_scale"] 76 | 77 | scale = torch.linspace(min_cfg, cond_scale, cond.shape[0], device=cond.device).reshape((cond.shape[0], 1, 1, 1)) 78 | return uncond + scale * (cond - uncond) 79 | 80 | m = model.clone() 81 | m.set_model_sampler_cfg_function(linear_cfg) 82 | return (m, ) 83 | 84 | class ImageOnlyCheckpointSave(ldm_patched.contrib.external_model_merging.CheckpointSave): 85 | CATEGORY = "_for_testing" 86 | 87 | @classmethod 88 | def INPUT_TYPES(s): 89 | return {"required": { "model": ("MODEL",), 90 | "clip_vision": ("CLIP_VISION",), 91 | "vae": ("VAE",), 92 | "filename_prefix": ("STRING", {"default": "checkpoints/ldm_patched"}),}, 93 | "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},} 94 | 95 | def save(self, model, clip_vision, vae, filename_prefix, prompt=None, extra_pnginfo=None): 96 | ldm_patched.contrib.external_model_merging.save_checkpoint(model, clip_vision=clip_vision, vae=vae, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo) 97 | return {} 98 | 99 | NODE_CLASS_MAPPINGS = { 100 | "ImageOnlyCheckpointLoader": ImageOnlyCheckpointLoader, 101 | "SVD_img2vid_Conditioning": SVD_img2vid_Conditioning, 102 | "VideoLinearCFGGuidance": VideoLinearCFGGuidance, 103 | "ImageOnlyCheckpointSave": ImageOnlyCheckpointSave, 104 | } 105 | 106 | NODE_DISPLAY_NAME_MAPPINGS = { 107 | "ImageOnlyCheckpointLoader": "Image Only Checkpoint Loader (img2vid model)", 108 | } 109 | -------------------------------------------------------------------------------- /ldm_patched/ldm/models/__pycache__/autoencoder.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/models/__pycache__/autoencoder.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/__pycache__/attention.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/__pycache__/attention.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/__pycache__/ema.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/__pycache__/ema.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/__pycache__/sub_quadratic_attention.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/__pycache__/sub_quadratic_attention.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/__pycache__/upscaling.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/upscaling.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/__pycache__/util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/util.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/upscaling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from functools import partial 5 | 6 | from .util import extract_into_tensor, make_beta_schedule 7 | from ldm_patched.ldm.util import default 8 | 9 | 10 | class AbstractLowScaleModel(nn.Module): 11 | # for concatenating a downsampled image to the latent representation 12 | def __init__(self, noise_schedule_config=None): 13 | super(AbstractLowScaleModel, self).__init__() 14 | if noise_schedule_config is not None: 15 | self.register_schedule(**noise_schedule_config) 16 | 17 | def register_schedule(self, beta_schedule="linear", timesteps=1000, 18 | linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): 19 | betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, 20 | cosine_s=cosine_s) 21 | alphas = 1. - betas 22 | alphas_cumprod = np.cumprod(alphas, axis=0) 23 | alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) 24 | 25 | timesteps, = betas.shape 26 | self.num_timesteps = int(timesteps) 27 | self.linear_start = linear_start 28 | self.linear_end = linear_end 29 | assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' 30 | 31 | to_torch = partial(torch.tensor, dtype=torch.float32) 32 | 33 | self.register_buffer('betas', to_torch(betas)) 34 | self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) 35 | self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) 36 | 37 | # calculations for diffusion q(x_t | x_{t-1}) and others 38 | self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) 39 | self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) 40 | self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) 41 | self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) 42 | self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) 43 | 44 | def q_sample(self, x_start, t, noise=None, seed=None): 45 | if noise is None: 46 | if seed is None: 47 | noise = torch.randn_like(x_start) 48 | else: 49 | noise = torch.randn(x_start.size(), dtype=x_start.dtype, layout=x_start.layout, generator=torch.manual_seed(seed)).to(x_start.device) 50 | return (extract_into_tensor(self.sqrt_alphas_cumprod.to(x_start.device), t, x_start.shape) * x_start + 51 | extract_into_tensor(self.sqrt_one_minus_alphas_cumprod.to(x_start.device), t, x_start.shape) * noise) 52 | 53 | def forward(self, x): 54 | return x, None 55 | 56 | def decode(self, x): 57 | return x 58 | 59 | 60 | class SimpleImageConcat(AbstractLowScaleModel): 61 | # no noise level conditioning 62 | def __init__(self): 63 | super(SimpleImageConcat, self).__init__(noise_schedule_config=None) 64 | self.max_noise_level = 0 65 | 66 | def forward(self, x): 67 | # fix to constant noise level 68 | return x, torch.zeros(x.shape[0], device=x.device).long() 69 | 70 | 71 | class ImageConcatWithNoiseAugmentation(AbstractLowScaleModel): 72 | def __init__(self, noise_schedule_config, max_noise_level=1000, to_cuda=False): 73 | super().__init__(noise_schedule_config=noise_schedule_config) 74 | self.max_noise_level = max_noise_level 75 | 76 | def forward(self, x, noise_level=None, seed=None): 77 | if noise_level is None: 78 | noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() 79 | else: 80 | assert isinstance(noise_level, torch.Tensor) 81 | z = self.q_sample(x, noise_level, seed=seed) 82 | return z, noise_level 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/distributions/__init__.py -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/distributions/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/distributions/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/distributions/__pycache__/distributions.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/distributions/__pycache__/distributions.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/distributions/distributions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class AbstractDistribution: 6 | def sample(self): 7 | raise NotImplementedError() 8 | 9 | def mode(self): 10 | raise NotImplementedError() 11 | 12 | 13 | class DiracDistribution(AbstractDistribution): 14 | def __init__(self, value): 15 | self.value = value 16 | 17 | def sample(self): 18 | return self.value 19 | 20 | def mode(self): 21 | return self.value 22 | 23 | 24 | class DiagonalGaussianDistribution(object): 25 | def __init__(self, parameters, deterministic=False): 26 | self.parameters = parameters 27 | self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) 28 | self.logvar = torch.clamp(self.logvar, -30.0, 20.0) 29 | self.deterministic = deterministic 30 | self.std = torch.exp(0.5 * self.logvar) 31 | self.var = torch.exp(self.logvar) 32 | if self.deterministic: 33 | self.var = self.std = torch.zeros_like(self.mean).to(device=self.parameters.device) 34 | 35 | def sample(self): 36 | x = self.mean + self.std * torch.randn(self.mean.shape).to(device=self.parameters.device) 37 | return x 38 | 39 | def kl(self, other=None): 40 | if self.deterministic: 41 | return torch.Tensor([0.]) 42 | else: 43 | if other is None: 44 | return 0.5 * torch.sum(torch.pow(self.mean, 2) 45 | + self.var - 1.0 - self.logvar, 46 | dim=[1, 2, 3]) 47 | else: 48 | return 0.5 * torch.sum( 49 | torch.pow(self.mean - other.mean, 2) / other.var 50 | + self.var / other.var - 1.0 - self.logvar + other.logvar, 51 | dim=[1, 2, 3]) 52 | 53 | def nll(self, sample, dims=[1,2,3]): 54 | if self.deterministic: 55 | return torch.Tensor([0.]) 56 | logtwopi = np.log(2.0 * np.pi) 57 | return 0.5 * torch.sum( 58 | logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, 59 | dim=dims) 60 | 61 | def mode(self): 62 | return self.mean 63 | 64 | 65 | def normal_kl(mean1, logvar1, mean2, logvar2): 66 | """ 67 | source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 68 | Compute the KL divergence between two gaussians. 69 | Shapes are automatically broadcasted, so batches can be compared to 70 | scalars, among other use cases. 71 | """ 72 | tensor = None 73 | for obj in (mean1, logvar1, mean2, logvar2): 74 | if isinstance(obj, torch.Tensor): 75 | tensor = obj 76 | break 77 | assert tensor is not None, "at least one argument must be a Tensor" 78 | 79 | # Force variances to be Tensors. Broadcasting helps convert scalars to 80 | # Tensors, but it does not work for torch.exp(). 81 | logvar1, logvar2 = [ 82 | x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) 83 | for x in (logvar1, logvar2) 84 | ] 85 | 86 | return 0.5 * ( 87 | -1.0 88 | + logvar2 89 | - logvar1 90 | + torch.exp(logvar1 - logvar2) 91 | + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) 92 | ) 93 | -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class LitEma(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_upates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError('Decay must be between 0 and 1') 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates 14 | else torch.tensor(-1, dtype=torch.int)) 15 | 16 | for name, p in model.named_parameters(): 17 | if p.requires_grad: 18 | # remove as '.'-character is not allowed in buffers 19 | s_name = name.replace('.', '') 20 | self.m_name2s_name.update({name: s_name}) 21 | self.register_buffer(s_name, p.clone().detach().data) 22 | 23 | self.collected_params = [] 24 | 25 | def reset_num_updates(self): 26 | del self.num_updates 27 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int)) 28 | 29 | def forward(self, model): 30 | decay = self.decay 31 | 32 | if self.num_updates >= 0: 33 | self.num_updates += 1 34 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) 35 | 36 | one_minus_decay = 1.0 - decay 37 | 38 | with torch.no_grad(): 39 | m_param = dict(model.named_parameters()) 40 | shadow_params = dict(self.named_buffers()) 41 | 42 | for key in m_param: 43 | if m_param[key].requires_grad: 44 | sname = self.m_name2s_name[key] 45 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 46 | shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key])) 47 | else: 48 | assert not key in self.m_name2s_name 49 | 50 | def copy_to(self, model): 51 | m_param = dict(model.named_parameters()) 52 | shadow_params = dict(self.named_buffers()) 53 | for key in m_param: 54 | if m_param[key].requires_grad: 55 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 56 | else: 57 | assert not key in self.m_name2s_name 58 | 59 | def store(self, parameters): 60 | """ 61 | Save the current parameters for restoring later. 62 | Args: 63 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 64 | temporarily stored. 65 | """ 66 | self.collected_params = [param.clone() for param in parameters] 67 | 68 | def restore(self, parameters): 69 | """ 70 | Restore the parameters stored with the `store` method. 71 | Useful to validate the model with EMA parameters without affecting the 72 | original optimization process. Store the parameters before the 73 | `copy_to` method. After validation (or model saving), use this to 74 | restore the former parameters. 75 | Args: 76 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 77 | updated with the stored parameters. 78 | """ 79 | for c_param, param in zip(self.collected_params, parameters): 80 | param.data.copy_(c_param.data) 81 | -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/encoders/__init__.py -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/encoders/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/encoders/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/encoders/__pycache__/noise_aug_modules.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/encoders/__pycache__/noise_aug_modules.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/encoders/noise_aug_modules.py: -------------------------------------------------------------------------------- 1 | from ..diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation 2 | from ..diffusionmodules.openaimodel import Timestep 3 | import torch 4 | 5 | class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation): 6 | def __init__(self, *args, clip_stats_path=None, timestep_dim=256, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | if clip_stats_path is None: 9 | clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim) 10 | else: 11 | clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu") 12 | self.register_buffer("data_mean", clip_mean[None, :], persistent=False) 13 | self.register_buffer("data_std", clip_std[None, :], persistent=False) 14 | self.time_embed = Timestep(timestep_dim) 15 | 16 | def scale(self, x): 17 | # re-normalize to centered mean and unit variance 18 | x = (x - self.data_mean.to(x.device)) * 1. / self.data_std.to(x.device) 19 | return x 20 | 21 | def unscale(self, x): 22 | # back to original data stats 23 | x = (x * self.data_std.to(x.device)) + self.data_mean.to(x.device) 24 | return x 25 | 26 | def forward(self, x, noise_level=None, seed=None): 27 | if noise_level is None: 28 | noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() 29 | else: 30 | assert isinstance(noise_level, torch.Tensor) 31 | x = self.scale(x) 32 | z = self.q_sample(x, noise_level, seed=seed) 33 | z = self.unscale(z) 34 | noise_level = self.time_embed(noise_level) 35 | return z, noise_level 36 | -------------------------------------------------------------------------------- /ldm_patched/licenses-3rd/chainer: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Preferred Infrastructure, Inc. 2 | Copyright (c) 2015 Preferred Networks, Inc. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. -------------------------------------------------------------------------------- /ldm_patched/licenses-3rd/kdiffusion: -------------------------------------------------------------------------------- 1 | Copyright (c) 2022 Katherine Crowson 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /ldm_patched/licenses-3rd/ldm: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Machine Vision and Learning Group, LMU Munich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /ldm_patched/licenses-3rd/taesd: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Ollin Boer Bohan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /ldm_patched/modules/checkpoint_pickle.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | load = pickle.load 4 | 5 | class Empty: 6 | pass 7 | 8 | class Unpickler(pickle.Unpickler): 9 | def find_class(self, module, name): 10 | #TODO: safe unpickle 11 | if module.startswith("pytorch_lightning"): 12 | return Empty 13 | return super().find_class(module, name) 14 | -------------------------------------------------------------------------------- /ldm_patched/modules/clip_config_bigg.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "CLIPTextModel" 4 | ], 5 | "attention_dropout": 0.0, 6 | "bos_token_id": 0, 7 | "dropout": 0.0, 8 | "eos_token_id": 2, 9 | "hidden_act": "gelu", 10 | "hidden_size": 1280, 11 | "initializer_factor": 1.0, 12 | "initializer_range": 0.02, 13 | "intermediate_size": 5120, 14 | "layer_norm_eps": 1e-05, 15 | "max_position_embeddings": 77, 16 | "model_type": "clip_text_model", 17 | "num_attention_heads": 20, 18 | "num_hidden_layers": 32, 19 | "pad_token_id": 1, 20 | "projection_dim": 1280, 21 | "torch_dtype": "float32", 22 | "vocab_size": 49408 23 | } 24 | -------------------------------------------------------------------------------- /ldm_patched/modules/clip_vision.py: -------------------------------------------------------------------------------- 1 | from .utils import load_torch_file, transformers_convert, state_dict_prefix_replace 2 | import os 3 | import torch 4 | import json 5 | 6 | import ldm_patched.modules.ops 7 | import ldm_patched.modules.model_patcher 8 | import ldm_patched.modules.model_management 9 | import ldm_patched.modules.utils 10 | import ldm_patched.modules.clip_model 11 | 12 | class Output: 13 | def __getitem__(self, key): 14 | return getattr(self, key) 15 | def __setitem__(self, key, item): 16 | setattr(self, key, item) 17 | 18 | def clip_preprocess(image, size=224): 19 | mean = torch.tensor([ 0.48145466,0.4578275,0.40821073], device=image.device, dtype=image.dtype) 20 | std = torch.tensor([0.26862954,0.26130258,0.27577711], device=image.device, dtype=image.dtype) 21 | image = image.movedim(-1, 1) 22 | if not (image.shape[2] == size and image.shape[3] == size): 23 | scale = (size / min(image.shape[2], image.shape[3])) 24 | image = torch.nn.functional.interpolate(image, size=(round(scale * image.shape[2]), round(scale * image.shape[3])), mode="bicubic", antialias=True) 25 | h = (image.shape[2] - size)//2 26 | w = (image.shape[3] - size)//2 27 | image = image[:,:,h:h+size,w:w+size] 28 | image = torch.clip((255. * image), 0, 255).round() / 255.0 29 | return (image - mean.view([3,1,1])) / std.view([3,1,1]) 30 | 31 | class ClipVisionModel(): 32 | def __init__(self, json_config): 33 | with open(json_config) as f: 34 | config = json.load(f) 35 | 36 | self.load_device = ldm_patched.modules.model_management.text_encoder_device() 37 | offload_device = ldm_patched.modules.model_management.text_encoder_offload_device() 38 | self.dtype = ldm_patched.modules.model_management.text_encoder_dtype(self.load_device) 39 | self.model = ldm_patched.modules.clip_model.CLIPVisionModelProjection(config, self.dtype, offload_device, ldm_patched.modules.ops.manual_cast) 40 | self.model.eval() 41 | 42 | self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device) 43 | 44 | def load_sd(self, sd): 45 | return self.model.load_state_dict(sd, strict=False) 46 | 47 | def get_sd(self): 48 | return self.model.state_dict() 49 | 50 | def encode_image(self, image): 51 | ldm_patched.modules.model_management.load_model_gpu(self.patcher) 52 | pixel_values = clip_preprocess(image.to(self.load_device)).float() 53 | out = self.model(pixel_values=pixel_values, intermediate_output=-2) 54 | 55 | outputs = Output() 56 | outputs["last_hidden_state"] = out[0].to(ldm_patched.modules.model_management.intermediate_device()) 57 | outputs["image_embeds"] = out[2].to(ldm_patched.modules.model_management.intermediate_device()) 58 | outputs["penultimate_hidden_states"] = out[1].to(ldm_patched.modules.model_management.intermediate_device()) 59 | return outputs 60 | 61 | def convert_to_transformers(sd, prefix): 62 | sd_k = sd.keys() 63 | if "{}transformer.resblocks.0.attn.in_proj_weight".format(prefix) in sd_k: 64 | keys_to_replace = { 65 | "{}class_embedding".format(prefix): "vision_model.embeddings.class_embedding", 66 | "{}conv1.weight".format(prefix): "vision_model.embeddings.patch_embedding.weight", 67 | "{}positional_embedding".format(prefix): "vision_model.embeddings.position_embedding.weight", 68 | "{}ln_post.bias".format(prefix): "vision_model.post_layernorm.bias", 69 | "{}ln_post.weight".format(prefix): "vision_model.post_layernorm.weight", 70 | "{}ln_pre.bias".format(prefix): "vision_model.pre_layrnorm.bias", 71 | "{}ln_pre.weight".format(prefix): "vision_model.pre_layrnorm.weight", 72 | } 73 | 74 | for x in keys_to_replace: 75 | if x in sd_k: 76 | sd[keys_to_replace[x]] = sd.pop(x) 77 | 78 | if "{}proj".format(prefix) in sd_k: 79 | sd['visual_projection.weight'] = sd.pop("{}proj".format(prefix)).transpose(0, 1) 80 | 81 | sd = transformers_convert(sd, prefix, "vision_model.", 48) 82 | else: 83 | replace_prefix = {prefix: ""} 84 | sd = state_dict_prefix_replace(sd, replace_prefix) 85 | return sd 86 | 87 | def load_clipvision_from_sd(sd, prefix="", convert_keys=False): 88 | if convert_keys: 89 | sd = convert_to_transformers(sd, prefix) 90 | if "vision_model.encoder.layers.47.layer_norm1.weight" in sd: 91 | json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_g.json") 92 | elif "vision_model.encoder.layers.30.layer_norm1.weight" in sd: 93 | json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_h.json") 94 | elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd: 95 | json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json") 96 | else: 97 | return None 98 | 99 | clip = ClipVisionModel(json_config) 100 | m, u = clip.load_sd(sd) 101 | if len(m) > 0: 102 | print("extra clip vision:", m) 103 | u = set(u) 104 | keys = list(sd.keys()) 105 | for k in keys: 106 | if k not in u: 107 | t = sd.pop(k) 108 | del t 109 | return clip 110 | 111 | def load(ckpt_path): 112 | sd = load_torch_file(ckpt_path) 113 | if "visual.transformer.resblocks.0.attn.in_proj_weight" in sd: 114 | return load_clipvision_from_sd(sd, prefix="visual.", convert_keys=True) 115 | else: 116 | return load_clipvision_from_sd(sd) 117 | -------------------------------------------------------------------------------- /ldm_patched/modules/clip_vision_config_g.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "gelu", 5 | "hidden_size": 1664, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 8192, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 48, 15 | "patch_size": 14, 16 | "projection_dim": 1280, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /ldm_patched/modules/clip_vision_config_h.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "gelu", 5 | "hidden_size": 1280, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 5120, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 32, 15 | "patch_size": 14, 16 | "projection_dim": 1024, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /ldm_patched/modules/clip_vision_config_vitl.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "quick_gelu", 5 | "hidden_size": 1024, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 4096, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 24, 15 | "patch_size": 14, 16 | "projection_dim": 768, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /ldm_patched/modules/conds.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import ldm_patched.modules.utils 4 | 5 | 6 | 7 | class CONDRegular: 8 | def __init__(self, cond): 9 | self.cond = cond 10 | 11 | def _copy_with(self, cond): 12 | return self.__class__(cond) 13 | 14 | def process_cond(self, batch_size, device, **kwargs): 15 | return self._copy_with(ldm_patched.modules.utils.repeat_to_batch_size(self.cond, batch_size).to(device)) 16 | 17 | def can_concat(self, other): 18 | if self.cond.shape != other.cond.shape: 19 | return False 20 | return True 21 | 22 | def concat(self, others): 23 | conds = [self.cond] 24 | for x in others: 25 | conds.append(x.cond) 26 | return torch.cat(conds) 27 | 28 | class CONDNoiseShape(CONDRegular): 29 | def process_cond(self, batch_size, device, area, **kwargs): 30 | data = self.cond[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] 31 | return self._copy_with(ldm_patched.modules.utils.repeat_to_batch_size(data, batch_size).to(device)) 32 | 33 | 34 | class CONDCrossAttn(CONDRegular): 35 | def can_concat(self, other): 36 | s1 = self.cond.shape 37 | s2 = other.cond.shape 38 | if s1 != s2: 39 | if s1[0] != s2[0] or s1[2] != s2[2]: #these 2 cases should not happen 40 | return False 41 | 42 | mult_min = math.lcm(s1[1], s2[1]) 43 | diff = mult_min // min(s1[1], s2[1]) 44 | if diff > 4: #arbitrary limit on the padding because it's probably going to impact performance negatively if it's too much 45 | return False 46 | return True 47 | 48 | def concat(self, others): 49 | conds = [self.cond] 50 | crossattn_max_len = self.cond.shape[1] 51 | for x in others: 52 | c = x.cond 53 | crossattn_max_len = math.lcm(crossattn_max_len, c.shape[1]) 54 | conds.append(c) 55 | 56 | out = [] 57 | for c in conds: 58 | if c.shape[1] < crossattn_max_len: 59 | c = c.repeat(1, crossattn_max_len // c.shape[1], 1) #padding with repeat doesn't change result 60 | out.append(c) 61 | return torch.cat(out) 62 | 63 | class CONDConstant(CONDRegular): 64 | def __init__(self, cond): 65 | self.cond = cond 66 | 67 | def process_cond(self, batch_size, device, **kwargs): 68 | return self._copy_with(self.cond) 69 | 70 | def can_concat(self, other): 71 | if self.cond != other.cond: 72 | return False 73 | return True 74 | 75 | def concat(self, others): 76 | return self.cond 77 | -------------------------------------------------------------------------------- /ldm_patched/modules/diffusers_load.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import ldm_patched.modules.sd 4 | 5 | def first_file(path, filenames): 6 | for f in filenames: 7 | p = os.path.join(path, f) 8 | if os.path.exists(p): 9 | return p 10 | return None 11 | 12 | def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_directory=None): 13 | diffusion_model_names = ["diffusion_pytorch_model.fp16.safetensors", "diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.fp16.bin", "diffusion_pytorch_model.bin"] 14 | unet_path = first_file(os.path.join(model_path, "unet"), diffusion_model_names) 15 | vae_path = first_file(os.path.join(model_path, "vae"), diffusion_model_names) 16 | 17 | text_encoder_model_names = ["model.fp16.safetensors", "model.safetensors", "pytorch_model.fp16.bin", "pytorch_model.bin"] 18 | text_encoder1_path = first_file(os.path.join(model_path, "text_encoder"), text_encoder_model_names) 19 | text_encoder2_path = first_file(os.path.join(model_path, "text_encoder_2"), text_encoder_model_names) 20 | 21 | text_encoder_paths = [text_encoder1_path] 22 | if text_encoder2_path is not None: 23 | text_encoder_paths.append(text_encoder2_path) 24 | 25 | unet = ldm_patched.modules.sd.load_unet(unet_path) 26 | 27 | clip = None 28 | if output_clip: 29 | clip = ldm_patched.modules.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory) 30 | 31 | vae = None 32 | if output_vae: 33 | sd = ldm_patched.modules.utils.load_torch_file(vae_path) 34 | vae = ldm_patched.modules.sd.VAE(sd=sd) 35 | 36 | return (unet, clip, vae) 37 | -------------------------------------------------------------------------------- /ldm_patched/modules/latent_formats.py: -------------------------------------------------------------------------------- 1 | 2 | class LatentFormat: 3 | scale_factor = 1.0 4 | latent_rgb_factors = None 5 | taesd_decoder_name = None 6 | 7 | def process_in(self, latent): 8 | return latent * self.scale_factor 9 | 10 | def process_out(self, latent): 11 | return latent / self.scale_factor 12 | 13 | class SD15(LatentFormat): 14 | def __init__(self, scale_factor=0.18215): 15 | self.scale_factor = scale_factor 16 | self.latent_rgb_factors = [ 17 | # R G B 18 | [ 0.3512, 0.2297, 0.3227], 19 | [ 0.3250, 0.4974, 0.2350], 20 | [-0.2829, 0.1762, 0.2721], 21 | [-0.2120, -0.2616, -0.7177] 22 | ] 23 | self.taesd_decoder_name = "taesd_decoder" 24 | 25 | class SDXL(LatentFormat): 26 | def __init__(self): 27 | self.scale_factor = 0.13025 28 | self.latent_rgb_factors = [ 29 | # R G B 30 | [ 0.3920, 0.4054, 0.4549], 31 | [-0.2634, -0.0196, 0.0653], 32 | [ 0.0568, 0.1687, -0.0755], 33 | [-0.3112, -0.2359, -0.2076] 34 | ] 35 | self.taesd_decoder_name = "taesdxl_decoder" 36 | 37 | class SD_X4(LatentFormat): 38 | def __init__(self): 39 | self.scale_factor = 0.08333 40 | -------------------------------------------------------------------------------- /ldm_patched/modules/model_sampling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from ldm_patched.ldm.modules.diffusionmodules.util import make_beta_schedule 4 | import math 5 | 6 | class EPS: 7 | def calculate_input(self, sigma, noise): 8 | sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1)) 9 | return noise / (sigma ** 2 + self.sigma_data ** 2) ** 0.5 10 | 11 | def calculate_denoised(self, sigma, model_output, model_input): 12 | sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) 13 | return model_input - model_output * sigma 14 | 15 | 16 | class V_PREDICTION(EPS): 17 | def calculate_denoised(self, sigma, model_output, model_input): 18 | sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) 19 | return model_input * self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) - model_output * sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2) ** 0.5 20 | 21 | 22 | class ModelSamplingDiscrete(torch.nn.Module): 23 | def __init__(self, model_config=None): 24 | super().__init__() 25 | 26 | if model_config is not None: 27 | sampling_settings = model_config.sampling_settings 28 | else: 29 | sampling_settings = {} 30 | 31 | beta_schedule = sampling_settings.get("beta_schedule", "linear") 32 | linear_start = sampling_settings.get("linear_start", 0.00085) 33 | linear_end = sampling_settings.get("linear_end", 0.012) 34 | 35 | self._register_schedule(given_betas=None, beta_schedule=beta_schedule, timesteps=1000, linear_start=linear_start, linear_end=linear_end, cosine_s=8e-3) 36 | self.sigma_data = 1.0 37 | 38 | def _register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, 39 | linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): 40 | if given_betas is not None: 41 | betas = given_betas 42 | else: 43 | betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s) 44 | alphas = 1. - betas 45 | alphas_cumprod = torch.tensor(np.cumprod(alphas, axis=0), dtype=torch.float32) 46 | # alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) 47 | 48 | timesteps, = betas.shape 49 | self.num_timesteps = int(timesteps) 50 | self.linear_start = linear_start 51 | self.linear_end = linear_end 52 | 53 | # self.register_buffer('betas', torch.tensor(betas, dtype=torch.float32)) 54 | # self.register_buffer('alphas_cumprod', torch.tensor(alphas_cumprod, dtype=torch.float32)) 55 | # self.register_buffer('alphas_cumprod_prev', torch.tensor(alphas_cumprod_prev, dtype=torch.float32)) 56 | 57 | sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5 58 | self.set_sigmas(sigmas) 59 | 60 | def set_sigmas(self, sigmas): 61 | self.register_buffer('sigmas', sigmas) 62 | self.register_buffer('log_sigmas', sigmas.log()) 63 | 64 | @property 65 | def sigma_min(self): 66 | return self.sigmas[0] 67 | 68 | @property 69 | def sigma_max(self): 70 | return self.sigmas[-1] 71 | 72 | def timestep(self, sigma): 73 | log_sigma = sigma.log() 74 | dists = log_sigma.to(self.log_sigmas.device) - self.log_sigmas[:, None] 75 | return dists.abs().argmin(dim=0).view(sigma.shape).to(sigma.device) 76 | 77 | def sigma(self, timestep): 78 | t = torch.clamp(timestep.float().to(self.log_sigmas.device), min=0, max=(len(self.sigmas) - 1)) 79 | low_idx = t.floor().long() 80 | high_idx = t.ceil().long() 81 | w = t.frac() 82 | log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx] 83 | return log_sigma.exp().to(timestep.device) 84 | 85 | def percent_to_sigma(self, percent): 86 | if percent <= 0.0: 87 | return 999999999.9 88 | if percent >= 1.0: 89 | return 0.0 90 | percent = 1.0 - percent 91 | return self.sigma(torch.tensor(percent * 999.0)).item() 92 | 93 | 94 | class ModelSamplingContinuousEDM(torch.nn.Module): 95 | def __init__(self, model_config=None): 96 | super().__init__() 97 | self.sigma_data = 1.0 98 | 99 | if model_config is not None: 100 | sampling_settings = model_config.sampling_settings 101 | else: 102 | sampling_settings = {} 103 | 104 | sigma_min = sampling_settings.get("sigma_min", 0.002) 105 | sigma_max = sampling_settings.get("sigma_max", 120.0) 106 | self.set_sigma_range(sigma_min, sigma_max) 107 | 108 | def set_sigma_range(self, sigma_min, sigma_max): 109 | sigmas = torch.linspace(math.log(sigma_min), math.log(sigma_max), 1000).exp() 110 | 111 | self.register_buffer('sigmas', sigmas) #for compatibility with some schedulers 112 | self.register_buffer('log_sigmas', sigmas.log()) 113 | 114 | @property 115 | def sigma_min(self): 116 | return self.sigmas[0] 117 | 118 | @property 119 | def sigma_max(self): 120 | return self.sigmas[-1] 121 | 122 | def timestep(self, sigma): 123 | return 0.25 * sigma.log() 124 | 125 | def sigma(self, timestep): 126 | return (timestep / 0.25).exp() 127 | 128 | def percent_to_sigma(self, percent): 129 | if percent <= 0.0: 130 | return 999999999.9 131 | if percent >= 1.0: 132 | return 0.0 133 | percent = 1.0 - percent 134 | 135 | log_sigma_min = math.log(self.sigma_min) 136 | return math.exp((math.log(self.sigma_max) - log_sigma_min) * percent + log_sigma_min) 137 | -------------------------------------------------------------------------------- /ldm_patched/modules/ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import ldm_patched.modules.model_management 3 | 4 | def cast_bias_weight(s, input): 5 | bias = None 6 | non_blocking = ldm_patched.modules.model_management.device_supports_non_blocking(input.device) 7 | if s.bias is not None: 8 | bias = s.bias.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking) 9 | weight = s.weight.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking) 10 | return weight, bias 11 | 12 | 13 | class disable_weight_init: 14 | class Linear(torch.nn.Linear): 15 | ldm_patched_cast_weights = False 16 | def reset_parameters(self): 17 | return None 18 | 19 | def forward_ldm_patched_cast_weights(self, input): 20 | weight, bias = cast_bias_weight(self, input) 21 | return torch.nn.functional.linear(input, weight, bias) 22 | 23 | def forward(self, *args, **kwargs): 24 | if self.ldm_patched_cast_weights: 25 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 26 | else: 27 | return super().forward(*args, **kwargs) 28 | 29 | class Conv2d(torch.nn.Conv2d): 30 | ldm_patched_cast_weights = False 31 | def reset_parameters(self): 32 | return None 33 | 34 | def forward_ldm_patched_cast_weights(self, input): 35 | weight, bias = cast_bias_weight(self, input) 36 | return self._conv_forward(input, weight, bias) 37 | 38 | def forward(self, *args, **kwargs): 39 | if self.ldm_patched_cast_weights: 40 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 41 | else: 42 | return super().forward(*args, **kwargs) 43 | 44 | class Conv3d(torch.nn.Conv3d): 45 | ldm_patched_cast_weights = False 46 | def reset_parameters(self): 47 | return None 48 | 49 | def forward_ldm_patched_cast_weights(self, input): 50 | weight, bias = cast_bias_weight(self, input) 51 | return self._conv_forward(input, weight, bias) 52 | 53 | def forward(self, *args, **kwargs): 54 | if self.ldm_patched_cast_weights: 55 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 56 | else: 57 | return super().forward(*args, **kwargs) 58 | 59 | class GroupNorm(torch.nn.GroupNorm): 60 | ldm_patched_cast_weights = False 61 | def reset_parameters(self): 62 | return None 63 | 64 | def forward_ldm_patched_cast_weights(self, input): 65 | weight, bias = cast_bias_weight(self, input) 66 | return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps) 67 | 68 | def forward(self, *args, **kwargs): 69 | if self.ldm_patched_cast_weights: 70 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 71 | else: 72 | return super().forward(*args, **kwargs) 73 | 74 | 75 | class LayerNorm(torch.nn.LayerNorm): 76 | ldm_patched_cast_weights = False 77 | def reset_parameters(self): 78 | return None 79 | 80 | def forward_ldm_patched_cast_weights(self, input): 81 | weight, bias = cast_bias_weight(self, input) 82 | return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps) 83 | 84 | def forward(self, *args, **kwargs): 85 | if self.ldm_patched_cast_weights: 86 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 87 | else: 88 | return super().forward(*args, **kwargs) 89 | 90 | @classmethod 91 | def conv_nd(s, dims, *args, **kwargs): 92 | if dims == 2: 93 | return s.Conv2d(*args, **kwargs) 94 | elif dims == 3: 95 | return s.Conv3d(*args, **kwargs) 96 | else: 97 | raise ValueError(f"unsupported dimensions: {dims}") 98 | 99 | 100 | class manual_cast(disable_weight_init): 101 | class Linear(disable_weight_init.Linear): 102 | ldm_patched_cast_weights = True 103 | 104 | class Conv2d(disable_weight_init.Conv2d): 105 | ldm_patched_cast_weights = True 106 | 107 | class Conv3d(disable_weight_init.Conv3d): 108 | ldm_patched_cast_weights = True 109 | 110 | class GroupNorm(disable_weight_init.GroupNorm): 111 | ldm_patched_cast_weights = True 112 | 113 | class LayerNorm(disable_weight_init.LayerNorm): 114 | ldm_patched_cast_weights = True 115 | -------------------------------------------------------------------------------- /ldm_patched/modules/options.py: -------------------------------------------------------------------------------- 1 | 2 | args_parsing = False 3 | 4 | def enable_args_parsing(enable=True): 5 | global args_parsing 6 | args_parsing = enable 7 | -------------------------------------------------------------------------------- /ldm_patched/modules/sd1_clip_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "openai/clip-vit-large-patch14", 3 | "architectures": [ 4 | "CLIPTextModel" 5 | ], 6 | "attention_dropout": 0.0, 7 | "bos_token_id": 0, 8 | "dropout": 0.0, 9 | "eos_token_id": 2, 10 | "hidden_act": "quick_gelu", 11 | "hidden_size": 768, 12 | "initializer_factor": 1.0, 13 | "initializer_range": 0.02, 14 | "intermediate_size": 3072, 15 | "layer_norm_eps": 1e-05, 16 | "max_position_embeddings": 77, 17 | "model_type": "clip_text_model", 18 | "num_attention_heads": 12, 19 | "num_hidden_layers": 12, 20 | "pad_token_id": 1, 21 | "projection_dim": 768, 22 | "torch_dtype": "float32", 23 | "transformers_version": "4.24.0", 24 | "vocab_size": 49408 25 | } 26 | -------------------------------------------------------------------------------- /ldm_patched/modules/sd1_tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": { 3 | "content": "<|startoftext|>", 4 | "lstrip": false, 5 | "normalized": true, 6 | "rstrip": false, 7 | "single_word": false 8 | }, 9 | "eos_token": { 10 | "content": "<|endoftext|>", 11 | "lstrip": false, 12 | "normalized": true, 13 | "rstrip": false, 14 | "single_word": false 15 | }, 16 | "pad_token": "<|endoftext|>", 17 | "unk_token": { 18 | "content": "<|endoftext|>", 19 | "lstrip": false, 20 | "normalized": true, 21 | "rstrip": false, 22 | "single_word": false 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /ldm_patched/modules/sd1_tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_prefix_space": false, 3 | "bos_token": { 4 | "__type": "AddedToken", 5 | "content": "<|startoftext|>", 6 | "lstrip": false, 7 | "normalized": true, 8 | "rstrip": false, 9 | "single_word": false 10 | }, 11 | "do_lower_case": true, 12 | "eos_token": { 13 | "__type": "AddedToken", 14 | "content": "<|endoftext|>", 15 | "lstrip": false, 16 | "normalized": true, 17 | "rstrip": false, 18 | "single_word": false 19 | }, 20 | "errors": "replace", 21 | "model_max_length": 77, 22 | "name_or_path": "openai/clip-vit-large-patch14", 23 | "pad_token": "<|endoftext|>", 24 | "special_tokens_map_file": "./special_tokens_map.json", 25 | "tokenizer_class": "CLIPTokenizer", 26 | "unk_token": { 27 | "__type": "AddedToken", 28 | "content": "<|endoftext|>", 29 | "lstrip": false, 30 | "normalized": true, 31 | "rstrip": false, 32 | "single_word": false 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /ldm_patched/modules/sd2_clip.py: -------------------------------------------------------------------------------- 1 | from ldm_patched.modules import sd1_clip 2 | import torch 3 | import os 4 | 5 | class SD2ClipHModel(sd1_clip.SDClipModel): 6 | def __init__(self, arch="ViT-H-14", device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None): 7 | if layer == "penultimate": 8 | layer="hidden" 9 | layer_idx=-2 10 | 11 | textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd2_clip_config.json") 12 | super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"start": 49406, "end": 49407, "pad": 0}) 13 | 14 | class SD2ClipHTokenizer(sd1_clip.SDTokenizer): 15 | def __init__(self, tokenizer_path=None, embedding_directory=None): 16 | super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1024) 17 | 18 | class SD2Tokenizer(sd1_clip.SD1Tokenizer): 19 | def __init__(self, embedding_directory=None): 20 | super().__init__(embedding_directory=embedding_directory, clip_name="h", tokenizer=SD2ClipHTokenizer) 21 | 22 | class SD2ClipModel(sd1_clip.SD1ClipModel): 23 | def __init__(self, device="cpu", dtype=None, **kwargs): 24 | super().__init__(device=device, dtype=dtype, clip_name="h", clip_model=SD2ClipHModel, **kwargs) 25 | -------------------------------------------------------------------------------- /ldm_patched/modules/sd2_clip_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "CLIPTextModel" 4 | ], 5 | "attention_dropout": 0.0, 6 | "bos_token_id": 0, 7 | "dropout": 0.0, 8 | "eos_token_id": 2, 9 | "hidden_act": "gelu", 10 | "hidden_size": 1024, 11 | "initializer_factor": 1.0, 12 | "initializer_range": 0.02, 13 | "intermediate_size": 4096, 14 | "layer_norm_eps": 1e-05, 15 | "max_position_embeddings": 77, 16 | "model_type": "clip_text_model", 17 | "num_attention_heads": 16, 18 | "num_hidden_layers": 24, 19 | "pad_token_id": 1, 20 | "projection_dim": 1024, 21 | "torch_dtype": "float32", 22 | "vocab_size": 49408 23 | } 24 | -------------------------------------------------------------------------------- /ldm_patched/modules/sdxl_clip.py: -------------------------------------------------------------------------------- 1 | from ldm_patched.modules import sd1_clip 2 | import torch 3 | import os 4 | 5 | class SDXLClipG(sd1_clip.SDClipModel): 6 | def __init__(self, device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None): 7 | if layer == "penultimate": 8 | layer="hidden" 9 | layer_idx=-2 10 | 11 | textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_config_bigg.json") 12 | super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, 13 | special_tokens={"start": 49406, "end": 49407, "pad": 0}, layer_norm_hidden_state=False) 14 | 15 | def load_sd(self, sd): 16 | return super().load_sd(sd) 17 | 18 | class SDXLClipGTokenizer(sd1_clip.SDTokenizer): 19 | def __init__(self, tokenizer_path=None, embedding_directory=None): 20 | super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1280, embedding_key='clip_g') 21 | 22 | 23 | class SDXLTokenizer: 24 | def __init__(self, embedding_directory=None): 25 | self.clip_l = sd1_clip.SDTokenizer(embedding_directory=embedding_directory) 26 | self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory) 27 | 28 | def tokenize_with_weights(self, text:str, return_word_ids=False): 29 | out = {} 30 | out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) 31 | out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) 32 | return out 33 | 34 | def untokenize(self, token_weight_pair): 35 | return self.clip_g.untokenize(token_weight_pair) 36 | 37 | class SDXLClipModel(torch.nn.Module): 38 | def __init__(self, device="cpu", dtype=None): 39 | super().__init__() 40 | self.clip_l = sd1_clip.SDClipModel(layer="hidden", layer_idx=-2, device=device, dtype=dtype, layer_norm_hidden_state=False) 41 | self.clip_g = SDXLClipG(device=device, dtype=dtype) 42 | 43 | def clip_layer(self, layer_idx): 44 | self.clip_l.clip_layer(layer_idx) 45 | self.clip_g.clip_layer(layer_idx) 46 | 47 | def reset_clip_layer(self): 48 | self.clip_g.reset_clip_layer() 49 | self.clip_l.reset_clip_layer() 50 | 51 | def encode_token_weights(self, token_weight_pairs): 52 | token_weight_pairs_g = token_weight_pairs["g"] 53 | token_weight_pairs_l = token_weight_pairs["l"] 54 | g_out, g_pooled = self.clip_g.encode_token_weights(token_weight_pairs_g) 55 | l_out, l_pooled = self.clip_l.encode_token_weights(token_weight_pairs_l) 56 | return torch.cat([l_out, g_out], dim=-1), g_pooled 57 | 58 | def load_sd(self, sd): 59 | if "text_model.encoder.layers.30.mlp.fc1.weight" in sd: 60 | return self.clip_g.load_sd(sd) 61 | else: 62 | return self.clip_l.load_sd(sd) 63 | 64 | class SDXLRefinerClipModel(sd1_clip.SD1ClipModel): 65 | def __init__(self, device="cpu", dtype=None): 66 | super().__init__(device=device, dtype=dtype, clip_name="g", clip_model=SDXLClipG) 67 | -------------------------------------------------------------------------------- /ldm_patched/modules/supported_models_base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from . import model_base 3 | from . import utils 4 | from . import latent_formats 5 | 6 | class ClipTarget: 7 | def __init__(self, tokenizer, clip): 8 | self.clip = clip 9 | self.tokenizer = tokenizer 10 | self.params = {} 11 | 12 | class BASE: 13 | unet_config = {} 14 | unet_extra_config = { 15 | "num_heads": -1, 16 | "num_head_channels": 64, 17 | } 18 | 19 | clip_prefix = [] 20 | clip_vision_prefix = None 21 | noise_aug_config = None 22 | sampling_settings = {} 23 | latent_format = latent_formats.LatentFormat 24 | 25 | manual_cast_dtype = None 26 | 27 | @classmethod 28 | def matches(s, unet_config): 29 | for k in s.unet_config: 30 | if s.unet_config[k] != unet_config[k]: 31 | return False 32 | return True 33 | 34 | def model_type(self, state_dict, prefix=""): 35 | return model_base.ModelType.EPS 36 | 37 | def inpaint_model(self): 38 | return self.unet_config["in_channels"] > 4 39 | 40 | def __init__(self, unet_config): 41 | self.unet_config = unet_config 42 | self.latent_format = self.latent_format() 43 | for x in self.unet_extra_config: 44 | self.unet_config[x] = self.unet_extra_config[x] 45 | 46 | def get_model(self, state_dict, prefix="", device=None): 47 | if self.noise_aug_config is not None: 48 | out = model_base.SD21UNCLIP(self, self.noise_aug_config, model_type=self.model_type(state_dict, prefix), device=device) 49 | else: 50 | out = model_base.BaseModel(self, model_type=self.model_type(state_dict, prefix), device=device) 51 | if self.inpaint_model(): 52 | out.set_inpaint() 53 | return out 54 | 55 | def process_clip_state_dict(self, state_dict): 56 | return state_dict 57 | 58 | def process_unet_state_dict(self, state_dict): 59 | return state_dict 60 | 61 | def process_vae_state_dict(self, state_dict): 62 | return state_dict 63 | 64 | def process_clip_state_dict_for_saving(self, state_dict): 65 | replace_prefix = {"": "cond_stage_model."} 66 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 67 | 68 | def process_clip_vision_state_dict_for_saving(self, state_dict): 69 | replace_prefix = {} 70 | if self.clip_vision_prefix is not None: 71 | replace_prefix[""] = self.clip_vision_prefix 72 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 73 | 74 | def process_unet_state_dict_for_saving(self, state_dict): 75 | replace_prefix = {"": "model.diffusion_model."} 76 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 77 | 78 | def process_vae_state_dict_for_saving(self, state_dict): 79 | replace_prefix = {"": "first_stage_model."} 80 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 81 | 82 | def set_manual_cast(self, manual_cast_dtype): 83 | self.manual_cast_dtype = manual_cast_dtype 84 | -------------------------------------------------------------------------------- /ldm_patched/pfn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/__init__.py -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/LICENSE-HAT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Xiangyu Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/LICENSE-RealESRGAN: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, Xintao Wang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/ChannelAttention.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch.nn as nn 4 | 5 | 6 | class CA_layer(nn.Module): 7 | def __init__(self, channel, reduction=16): 8 | super(CA_layer, self).__init__() 9 | # global average pooling 10 | self.gap = nn.AdaptiveAvgPool2d(1) 11 | self.fc = nn.Sequential( 12 | nn.Conv2d(channel, channel // reduction, kernel_size=(1, 1), bias=False), 13 | nn.GELU(), 14 | nn.Conv2d(channel // reduction, channel, kernel_size=(1, 1), bias=False), 15 | # nn.Sigmoid() 16 | ) 17 | 18 | def forward(self, x): 19 | y = self.fc(self.gap(x)) 20 | return x * y.expand_as(x) 21 | 22 | 23 | class Simple_CA_layer(nn.Module): 24 | def __init__(self, channel): 25 | super(Simple_CA_layer, self).__init__() 26 | self.gap = nn.AdaptiveAvgPool2d(1) 27 | self.fc = nn.Conv2d( 28 | in_channels=channel, 29 | out_channels=channel, 30 | kernel_size=1, 31 | padding=0, 32 | stride=1, 33 | groups=1, 34 | bias=True, 35 | ) 36 | 37 | def forward(self, x): 38 | return x * self.fc(self.gap(x)) 39 | 40 | 41 | class ECA_layer(nn.Module): 42 | """Constructs a ECA module. 43 | Args: 44 | channel: Number of channels of the input feature map 45 | k_size: Adaptive selection of kernel size 46 | """ 47 | 48 | def __init__(self, channel): 49 | super(ECA_layer, self).__init__() 50 | 51 | b = 1 52 | gamma = 2 53 | k_size = int(abs(math.log(channel, 2) + b) / gamma) 54 | k_size = k_size if k_size % 2 else k_size + 1 55 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 56 | self.conv = nn.Conv1d( 57 | 1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False 58 | ) 59 | # self.sigmoid = nn.Sigmoid() 60 | 61 | def forward(self, x): 62 | # x: input features with shape [b, c, h, w] 63 | # b, c, h, w = x.size() 64 | 65 | # feature descriptor on the global spatial information 66 | y = self.avg_pool(x) 67 | 68 | # Two different branches of ECA module 69 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 70 | 71 | # Multi-scale information fusion 72 | # y = self.sigmoid(y) 73 | 74 | return x * y.expand_as(x) 75 | 76 | 77 | class ECA_MaxPool_layer(nn.Module): 78 | """Constructs a ECA module. 79 | Args: 80 | channel: Number of channels of the input feature map 81 | k_size: Adaptive selection of kernel size 82 | """ 83 | 84 | def __init__(self, channel): 85 | super(ECA_MaxPool_layer, self).__init__() 86 | 87 | b = 1 88 | gamma = 2 89 | k_size = int(abs(math.log(channel, 2) + b) / gamma) 90 | k_size = k_size if k_size % 2 else k_size + 1 91 | self.max_pool = nn.AdaptiveMaxPool2d(1) 92 | self.conv = nn.Conv1d( 93 | 1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False 94 | ) 95 | # self.sigmoid = nn.Sigmoid() 96 | 97 | def forward(self, x): 98 | # x: input features with shape [b, c, h, w] 99 | # b, c, h, w = x.size() 100 | 101 | # feature descriptor on the global spatial information 102 | y = self.max_pool(x) 103 | 104 | # Two different branches of ECA module 105 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 106 | 107 | # Multi-scale information fusion 108 | # y = self.sigmoid(y) 109 | 110 | return x * y.expand_as(x) 111 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/OSAG.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: OSAG.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Sunday, 23rd April 2023 3:08:49 pm 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | 14 | import torch.nn as nn 15 | 16 | from .esa import ESA 17 | from .OSA import OSA_Block 18 | 19 | 20 | class OSAG(nn.Module): 21 | def __init__( 22 | self, 23 | channel_num=64, 24 | bias=True, 25 | block_num=4, 26 | ffn_bias=False, 27 | window_size=0, 28 | pe=False, 29 | ): 30 | super(OSAG, self).__init__() 31 | 32 | # print("window_size: %d" % (window_size)) 33 | # print("with_pe", pe) 34 | # print("ffn_bias: %d" % (ffn_bias)) 35 | 36 | # block_script_name = kwargs.get("block_script_name", "OSA") 37 | # block_class_name = kwargs.get("block_class_name", "OSA_Block") 38 | 39 | # script_name = "." + block_script_name 40 | # package = __import__(script_name, fromlist=True) 41 | block_class = OSA_Block # getattr(package, block_class_name) 42 | group_list = [] 43 | for _ in range(block_num): 44 | temp_res = block_class( 45 | channel_num, 46 | bias, 47 | ffn_bias=ffn_bias, 48 | window_size=window_size, 49 | with_pe=pe, 50 | ) 51 | group_list.append(temp_res) 52 | group_list.append(nn.Conv2d(channel_num, channel_num, 1, 1, 0, bias=bias)) 53 | self.residual_layer = nn.Sequential(*group_list) 54 | esa_channel = max(channel_num // 4, 16) 55 | self.esa = ESA(esa_channel, channel_num) 56 | 57 | def forward(self, x): 58 | out = self.residual_layer(x) 59 | out = out + x 60 | return self.esa(out) 61 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/OmniSR.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: OmniSR.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Sunday, 23rd April 2023 3:06:36 pm 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import math 14 | 15 | import torch 16 | import torch.nn as nn 17 | import torch.nn.functional as F 18 | 19 | from .OSAG import OSAG 20 | from .pixelshuffle import pixelshuffle_block 21 | 22 | 23 | class OmniSR(nn.Module): 24 | def __init__( 25 | self, 26 | state_dict, 27 | **kwargs, 28 | ): 29 | super(OmniSR, self).__init__() 30 | self.state = state_dict 31 | 32 | bias = True # Fine to assume this for now 33 | block_num = 1 # Fine to assume this for now 34 | ffn_bias = True 35 | pe = True 36 | 37 | num_feat = state_dict["input.weight"].shape[0] or 64 38 | num_in_ch = state_dict["input.weight"].shape[1] or 3 39 | num_out_ch = num_in_ch # we can just assume this for now. pixelshuffle smh 40 | 41 | pixelshuffle_shape = state_dict["up.0.weight"].shape[0] 42 | up_scale = math.sqrt(pixelshuffle_shape / num_out_ch) 43 | if up_scale - int(up_scale) > 0: 44 | print( 45 | "out_nc is probably different than in_nc, scale calculation might be wrong" 46 | ) 47 | up_scale = int(up_scale) 48 | res_num = 0 49 | for key in state_dict.keys(): 50 | if "residual_layer" in key: 51 | temp_res_num = int(key.split(".")[1]) 52 | if temp_res_num > res_num: 53 | res_num = temp_res_num 54 | res_num = res_num + 1 # zero-indexed 55 | 56 | residual_layer = [] 57 | self.res_num = res_num 58 | 59 | if ( 60 | "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight" 61 | in state_dict.keys() 62 | ): 63 | rel_pos_bias_weight = state_dict[ 64 | "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight" 65 | ].shape[0] 66 | self.window_size = int((math.sqrt(rel_pos_bias_weight) + 1) / 2) 67 | else: 68 | self.window_size = 8 69 | 70 | self.up_scale = up_scale 71 | 72 | for _ in range(res_num): 73 | temp_res = OSAG( 74 | channel_num=num_feat, 75 | bias=bias, 76 | block_num=block_num, 77 | ffn_bias=ffn_bias, 78 | window_size=self.window_size, 79 | pe=pe, 80 | ) 81 | residual_layer.append(temp_res) 82 | self.residual_layer = nn.Sequential(*residual_layer) 83 | self.input = nn.Conv2d( 84 | in_channels=num_in_ch, 85 | out_channels=num_feat, 86 | kernel_size=3, 87 | stride=1, 88 | padding=1, 89 | bias=bias, 90 | ) 91 | self.output = nn.Conv2d( 92 | in_channels=num_feat, 93 | out_channels=num_feat, 94 | kernel_size=3, 95 | stride=1, 96 | padding=1, 97 | bias=bias, 98 | ) 99 | self.up = pixelshuffle_block(num_feat, num_out_ch, up_scale, bias=bias) 100 | 101 | # self.tail = pixelshuffle_block(num_feat,num_out_ch,up_scale,bias=bias) 102 | 103 | # for m in self.modules(): 104 | # if isinstance(m, nn.Conv2d): 105 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 106 | # m.weight.data.normal_(0, sqrt(2. / n)) 107 | 108 | # chaiNNer specific stuff 109 | self.model_arch = "OmniSR" 110 | self.sub_type = "SR" 111 | self.in_nc = num_in_ch 112 | self.out_nc = num_out_ch 113 | self.num_feat = num_feat 114 | self.scale = up_scale 115 | 116 | self.supports_fp16 = True # TODO: Test this 117 | self.supports_bfp16 = True 118 | self.min_size_restriction = 16 119 | 120 | self.load_state_dict(state_dict, strict=False) 121 | 122 | def check_image_size(self, x): 123 | _, _, h, w = x.size() 124 | # import pdb; pdb.set_trace() 125 | mod_pad_h = (self.window_size - h % self.window_size) % self.window_size 126 | mod_pad_w = (self.window_size - w % self.window_size) % self.window_size 127 | # x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect') 128 | x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "constant", 0) 129 | return x 130 | 131 | def forward(self, x): 132 | H, W = x.shape[2:] 133 | x = self.check_image_size(x) 134 | 135 | residual = self.input(x) 136 | out = self.residual_layer(residual) 137 | 138 | # origin 139 | out = torch.add(self.output(out), residual) 140 | out = self.up(out) 141 | 142 | out = out[:, :, : H * self.up_scale, : W * self.up_scale] 143 | return out 144 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/__pycache__/OSA.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/OSA.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/__pycache__/OSAG.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/OSAG.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/__pycache__/OmniSR.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/OmniSR.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/__pycache__/esa.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/esa.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/__pycache__/layernorm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/layernorm.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/__pycache__/pixelshuffle.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/pixelshuffle.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/layernorm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: layernorm.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Thursday, 20th April 2023 9:28:20 am 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | 17 | class LayerNormFunction(torch.autograd.Function): 18 | @staticmethod 19 | def forward(ctx, x, weight, bias, eps): 20 | ctx.eps = eps 21 | N, C, H, W = x.size() 22 | mu = x.mean(1, keepdim=True) 23 | var = (x - mu).pow(2).mean(1, keepdim=True) 24 | y = (x - mu) / (var + eps).sqrt() 25 | ctx.save_for_backward(y, var, weight) 26 | y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1) 27 | return y 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | eps = ctx.eps 32 | 33 | N, C, H, W = grad_output.size() 34 | y, var, weight = ctx.saved_variables 35 | g = grad_output * weight.view(1, C, 1, 1) 36 | mean_g = g.mean(dim=1, keepdim=True) 37 | 38 | mean_gy = (g * y).mean(dim=1, keepdim=True) 39 | gx = 1.0 / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g) 40 | return ( 41 | gx, 42 | (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), 43 | grad_output.sum(dim=3).sum(dim=2).sum(dim=0), 44 | None, 45 | ) 46 | 47 | 48 | class LayerNorm2d(nn.Module): 49 | def __init__(self, channels, eps=1e-6): 50 | super(LayerNorm2d, self).__init__() 51 | self.register_parameter("weight", nn.Parameter(torch.ones(channels))) 52 | self.register_parameter("bias", nn.Parameter(torch.zeros(channels))) 53 | self.eps = eps 54 | 55 | def forward(self, x): 56 | return LayerNormFunction.apply(x, self.weight, self.bias, self.eps) 57 | 58 | 59 | class GRN(nn.Module): 60 | """GRN (Global Response Normalization) layer""" 61 | 62 | def __init__(self, dim): 63 | super().__init__() 64 | self.gamma = nn.Parameter(torch.zeros(1, dim, 1, 1)) 65 | self.beta = nn.Parameter(torch.zeros(1, dim, 1, 1)) 66 | 67 | def forward(self, x): 68 | Gx = torch.norm(x, p=2, dim=(2, 3), keepdim=True) 69 | Nx = Gx / (Gx.mean(dim=1, keepdim=True) + 1e-6) 70 | return self.gamma * (x * Nx) + self.beta + x 71 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/pixelshuffle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: pixelshuffle.py 5 | # Created Date: Friday July 1st 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Friday, 1st July 2022 10:18:39 am 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2022 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import torch.nn as nn 14 | 15 | 16 | def pixelshuffle_block( 17 | in_channels, out_channels, upscale_factor=2, kernel_size=3, bias=False 18 | ): 19 | """ 20 | Upsample features according to `upscale_factor`. 21 | """ 22 | padding = kernel_size // 2 23 | conv = nn.Conv2d( 24 | in_channels, 25 | out_channels * (upscale_factor**2), 26 | kernel_size, 27 | padding=1, 28 | bias=bias, 29 | ) 30 | pixel_shuffle = nn.PixelShuffle(upscale_factor) 31 | return nn.Sequential(*[conv, pixel_shuffle]) 32 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/SRVGG.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import math 5 | 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class SRVGGNetCompact(nn.Module): 11 | """A compact VGG-style network structure for super-resolution. 12 | It is a compact network structure, which performs upsampling in the last layer and no convolution is 13 | conducted on the HR feature space. 14 | Args: 15 | num_in_ch (int): Channel number of inputs. Default: 3. 16 | num_out_ch (int): Channel number of outputs. Default: 3. 17 | num_feat (int): Channel number of intermediate features. Default: 64. 18 | num_conv (int): Number of convolution layers in the body network. Default: 16. 19 | upscale (int): Upsampling factor. Default: 4. 20 | act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu. 21 | """ 22 | 23 | def __init__( 24 | self, 25 | state_dict, 26 | act_type: str = "prelu", 27 | ): 28 | super(SRVGGNetCompact, self).__init__() 29 | self.model_arch = "SRVGG (RealESRGAN)" 30 | self.sub_type = "SR" 31 | 32 | self.act_type = act_type 33 | 34 | self.state = state_dict 35 | 36 | if "params" in self.state: 37 | self.state = self.state["params"] 38 | 39 | self.key_arr = list(self.state.keys()) 40 | 41 | self.in_nc = self.get_in_nc() 42 | self.num_feat = self.get_num_feats() 43 | self.num_conv = self.get_num_conv() 44 | self.out_nc = self.in_nc # :( 45 | self.pixelshuffle_shape = None # Defined in get_scale() 46 | self.scale = self.get_scale() 47 | 48 | self.supports_fp16 = True 49 | self.supports_bfp16 = True 50 | self.min_size_restriction = None 51 | 52 | self.body = nn.ModuleList() 53 | # the first conv 54 | self.body.append(nn.Conv2d(self.in_nc, self.num_feat, 3, 1, 1)) 55 | # the first activation 56 | if act_type == "relu": 57 | activation = nn.ReLU(inplace=True) 58 | elif act_type == "prelu": 59 | activation = nn.PReLU(num_parameters=self.num_feat) 60 | elif act_type == "leakyrelu": 61 | activation = nn.LeakyReLU(negative_slope=0.1, inplace=True) 62 | self.body.append(activation) # type: ignore 63 | 64 | # the body structure 65 | for _ in range(self.num_conv): 66 | self.body.append(nn.Conv2d(self.num_feat, self.num_feat, 3, 1, 1)) 67 | # activation 68 | if act_type == "relu": 69 | activation = nn.ReLU(inplace=True) 70 | elif act_type == "prelu": 71 | activation = nn.PReLU(num_parameters=self.num_feat) 72 | elif act_type == "leakyrelu": 73 | activation = nn.LeakyReLU(negative_slope=0.1, inplace=True) 74 | self.body.append(activation) # type: ignore 75 | 76 | # the last conv 77 | self.body.append(nn.Conv2d(self.num_feat, self.pixelshuffle_shape, 3, 1, 1)) # type: ignore 78 | # upsample 79 | self.upsampler = nn.PixelShuffle(self.scale) 80 | 81 | self.load_state_dict(self.state, strict=False) 82 | 83 | def get_num_conv(self) -> int: 84 | return (int(self.key_arr[-1].split(".")[1]) - 2) // 2 85 | 86 | def get_num_feats(self) -> int: 87 | return self.state[self.key_arr[0]].shape[0] 88 | 89 | def get_in_nc(self) -> int: 90 | return self.state[self.key_arr[0]].shape[1] 91 | 92 | def get_scale(self) -> int: 93 | self.pixelshuffle_shape = self.state[self.key_arr[-1]].shape[0] 94 | # Assume out_nc is the same as in_nc 95 | # I cant think of a better way to do that 96 | self.out_nc = self.in_nc 97 | scale = math.sqrt(self.pixelshuffle_shape / self.out_nc) 98 | if scale - int(scale) > 0: 99 | print( 100 | "out_nc is probably different than in_nc, scale calculation might be wrong" 101 | ) 102 | scale = int(scale) 103 | return scale 104 | 105 | def forward(self, x): 106 | out = x 107 | for i in range(0, len(self.body)): 108 | out = self.body[i](out) 109 | 110 | out = self.upsampler(out) 111 | # add the nearest upsampled image, so that the network learns the residual 112 | base = F.interpolate(x, scale_factor=self.scale, mode="nearest") 113 | out += base 114 | return out 115 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/SwiftSRGAN.py: -------------------------------------------------------------------------------- 1 | # From https://github.com/Koushik0901/Swift-SRGAN/blob/master/swift-srgan/models.py 2 | 3 | import torch 4 | from torch import nn 5 | 6 | 7 | class SeperableConv2d(nn.Module): 8 | def __init__( 9 | self, in_channels, out_channels, kernel_size, stride=1, padding=1, bias=True 10 | ): 11 | super(SeperableConv2d, self).__init__() 12 | self.depthwise = nn.Conv2d( 13 | in_channels, 14 | in_channels, 15 | kernel_size=kernel_size, 16 | stride=stride, 17 | groups=in_channels, 18 | bias=bias, 19 | padding=padding, 20 | ) 21 | self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias) 22 | 23 | def forward(self, x): 24 | return self.pointwise(self.depthwise(x)) 25 | 26 | 27 | class ConvBlock(nn.Module): 28 | def __init__( 29 | self, 30 | in_channels, 31 | out_channels, 32 | use_act=True, 33 | use_bn=True, 34 | discriminator=False, 35 | **kwargs, 36 | ): 37 | super(ConvBlock, self).__init__() 38 | 39 | self.use_act = use_act 40 | self.cnn = SeperableConv2d(in_channels, out_channels, **kwargs, bias=not use_bn) 41 | self.bn = nn.BatchNorm2d(out_channels) if use_bn else nn.Identity() 42 | self.act = ( 43 | nn.LeakyReLU(0.2, inplace=True) 44 | if discriminator 45 | else nn.PReLU(num_parameters=out_channels) 46 | ) 47 | 48 | def forward(self, x): 49 | return self.act(self.bn(self.cnn(x))) if self.use_act else self.bn(self.cnn(x)) 50 | 51 | 52 | class UpsampleBlock(nn.Module): 53 | def __init__(self, in_channels, scale_factor): 54 | super(UpsampleBlock, self).__init__() 55 | 56 | self.conv = SeperableConv2d( 57 | in_channels, 58 | in_channels * scale_factor**2, 59 | kernel_size=3, 60 | stride=1, 61 | padding=1, 62 | ) 63 | self.ps = nn.PixelShuffle( 64 | scale_factor 65 | ) # (in_channels * 4, H, W) -> (in_channels, H*2, W*2) 66 | self.act = nn.PReLU(num_parameters=in_channels) 67 | 68 | def forward(self, x): 69 | return self.act(self.ps(self.conv(x))) 70 | 71 | 72 | class ResidualBlock(nn.Module): 73 | def __init__(self, in_channels): 74 | super(ResidualBlock, self).__init__() 75 | 76 | self.block1 = ConvBlock( 77 | in_channels, in_channels, kernel_size=3, stride=1, padding=1 78 | ) 79 | self.block2 = ConvBlock( 80 | in_channels, in_channels, kernel_size=3, stride=1, padding=1, use_act=False 81 | ) 82 | 83 | def forward(self, x): 84 | out = self.block1(x) 85 | out = self.block2(out) 86 | return out + x 87 | 88 | 89 | class Generator(nn.Module): 90 | """Swift-SRGAN Generator 91 | Args: 92 | in_channels (int): number of input image channels. 93 | num_channels (int): number of hidden channels. 94 | num_blocks (int): number of residual blocks. 95 | upscale_factor (int): factor to upscale the image [2x, 4x, 8x]. 96 | Returns: 97 | torch.Tensor: super resolution image 98 | """ 99 | 100 | def __init__( 101 | self, 102 | state_dict, 103 | ): 104 | super(Generator, self).__init__() 105 | self.model_arch = "Swift-SRGAN" 106 | self.sub_type = "SR" 107 | self.state = state_dict 108 | if "model" in self.state: 109 | self.state = self.state["model"] 110 | 111 | self.in_nc: int = self.state["initial.cnn.depthwise.weight"].shape[0] 112 | self.out_nc: int = self.state["final_conv.pointwise.weight"].shape[0] 113 | self.num_filters: int = self.state["initial.cnn.pointwise.weight"].shape[0] 114 | self.num_blocks = len( 115 | set([x.split(".")[1] for x in self.state.keys() if "residual" in x]) 116 | ) 117 | self.scale: int = 2 ** len( 118 | set([x.split(".")[1] for x in self.state.keys() if "upsampler" in x]) 119 | ) 120 | 121 | in_channels = self.in_nc 122 | num_channels = self.num_filters 123 | num_blocks = self.num_blocks 124 | upscale_factor = self.scale 125 | 126 | self.supports_fp16 = True 127 | self.supports_bfp16 = True 128 | self.min_size_restriction = None 129 | 130 | self.initial = ConvBlock( 131 | in_channels, num_channels, kernel_size=9, stride=1, padding=4, use_bn=False 132 | ) 133 | self.residual = nn.Sequential( 134 | *[ResidualBlock(num_channels) for _ in range(num_blocks)] 135 | ) 136 | self.convblock = ConvBlock( 137 | num_channels, 138 | num_channels, 139 | kernel_size=3, 140 | stride=1, 141 | padding=1, 142 | use_act=False, 143 | ) 144 | self.upsampler = nn.Sequential( 145 | *[ 146 | UpsampleBlock(num_channels, scale_factor=2) 147 | for _ in range(upscale_factor // 2) 148 | ] 149 | ) 150 | self.final_conv = SeperableConv2d( 151 | num_channels, in_channels, kernel_size=9, stride=1, padding=4 152 | ) 153 | 154 | self.load_state_dict(self.state, strict=False) 155 | 156 | def forward(self, x): 157 | initial = self.initial(x) 158 | x = self.residual(initial) 159 | x = self.convblock(x) + initial 160 | x = self.upsampler(x) 161 | return (torch.tanh(self.final_conv(x)) + 1) / 2 162 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__init__.py -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/DAT.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/DAT.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/HAT.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/HAT.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/LaMa.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/LaMa.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/RRDB.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/RRDB.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/SCUNet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SCUNet.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/SPSR.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SPSR.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/SRVGG.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SRVGG.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/SwiftSRGAN.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SwiftSRGAN.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/Swin2SR.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/Swin2SR.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/SwinIR.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SwinIR.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/block.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/block.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/face/LICENSE-codeformer: -------------------------------------------------------------------------------- 1 | S-Lab License 1.0 2 | 3 | Copyright 2022 S-Lab 4 | 5 | Redistribution and use for non-commercial purpose in source and 6 | binary forms, with or without modification, are permitted provided 7 | that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in 14 | the documentation and/or other materials provided with the 15 | distribution. 16 | 17 | 3. Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | In the event that redistribution and/or use for commercial purpose in 34 | source or binary forms, with or without modification is required, 35 | please contact the contributor(s) of the work. 36 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/face/__pycache__/codeformer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/face/__pycache__/codeformer.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/face/__pycache__/gfpganv1_clean_arch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/face/__pycache__/gfpganv1_clean_arch.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/face/__pycache__/restoreformer_arch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/face/__pycache__/restoreformer_arch.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/face/__pycache__/stylegan2_clean_arch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/face/__pycache__/stylegan2_clean_arch.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/face/fused_act.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | # type: ignore 3 | # modify from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501 4 | 5 | import torch 6 | from torch import nn 7 | from torch.autograd import Function 8 | 9 | fused_act_ext = None 10 | 11 | 12 | class FusedLeakyReLUFunctionBackward(Function): 13 | @staticmethod 14 | def forward(ctx, grad_output, out, negative_slope, scale): 15 | ctx.save_for_backward(out) 16 | ctx.negative_slope = negative_slope 17 | ctx.scale = scale 18 | 19 | empty = grad_output.new_empty(0) 20 | 21 | grad_input = fused_act_ext.fused_bias_act( 22 | grad_output, empty, out, 3, 1, negative_slope, scale 23 | ) 24 | 25 | dim = [0] 26 | 27 | if grad_input.ndim > 2: 28 | dim += list(range(2, grad_input.ndim)) 29 | 30 | grad_bias = grad_input.sum(dim).detach() 31 | 32 | return grad_input, grad_bias 33 | 34 | @staticmethod 35 | def backward(ctx, gradgrad_input, gradgrad_bias): 36 | (out,) = ctx.saved_tensors 37 | gradgrad_out = fused_act_ext.fused_bias_act( 38 | gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale 39 | ) 40 | 41 | return gradgrad_out, None, None, None 42 | 43 | 44 | class FusedLeakyReLUFunction(Function): 45 | @staticmethod 46 | def forward(ctx, input, bias, negative_slope, scale): 47 | empty = input.new_empty(0) 48 | out = fused_act_ext.fused_bias_act( 49 | input, bias, empty, 3, 0, negative_slope, scale 50 | ) 51 | ctx.save_for_backward(out) 52 | ctx.negative_slope = negative_slope 53 | ctx.scale = scale 54 | 55 | return out 56 | 57 | @staticmethod 58 | def backward(ctx, grad_output): 59 | (out,) = ctx.saved_tensors 60 | 61 | grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply( 62 | grad_output, out, ctx.negative_slope, ctx.scale 63 | ) 64 | 65 | return grad_input, grad_bias, None, None 66 | 67 | 68 | class FusedLeakyReLU(nn.Module): 69 | def __init__(self, channel, negative_slope=0.2, scale=2**0.5): 70 | super().__init__() 71 | 72 | self.bias = nn.Parameter(torch.zeros(channel)) 73 | self.negative_slope = negative_slope 74 | self.scale = scale 75 | 76 | def forward(self, input): 77 | return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale) 78 | 79 | 80 | def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2**0.5): 81 | return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale) 82 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/timm/__pycache__/drop.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/timm/__pycache__/drop.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/timm/__pycache__/helpers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/timm/__pycache__/helpers.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/timm/__pycache__/weight_init.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/timm/__pycache__/weight_init.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/timm/helpers.py: -------------------------------------------------------------------------------- 1 | """ Layer/Module Helpers 2 | Hacked together by / Copyright 2020 Ross Wightman 3 | """ 4 | import collections.abc 5 | from itertools import repeat 6 | 7 | 8 | # From PyTorch internals 9 | def _ntuple(n): 10 | def parse(x): 11 | if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): 12 | return x 13 | return tuple(repeat(x, n)) 14 | 15 | return parse 16 | 17 | 18 | to_1tuple = _ntuple(1) 19 | to_2tuple = _ntuple(2) 20 | to_3tuple = _ntuple(3) 21 | to_4tuple = _ntuple(4) 22 | to_ntuple = _ntuple 23 | 24 | 25 | def make_divisible(v, divisor=8, min_value=None, round_limit=0.9): 26 | min_value = min_value or divisor 27 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 28 | # Make sure that round down does not go down by more than 10%. 29 | if new_v < round_limit * v: 30 | new_v += divisor 31 | return new_v 32 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/timm/weight_init.py: -------------------------------------------------------------------------------- 1 | import math 2 | import warnings 3 | 4 | import torch 5 | from torch.nn.init import _calculate_fan_in_and_fan_out 6 | 7 | 8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b): 9 | # Cut & paste from PyTorch official master until it's in a few official releases - RW 10 | # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf 11 | def norm_cdf(x): 12 | # Computes standard normal cumulative distribution function 13 | return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 14 | 15 | if (mean < a - 2 * std) or (mean > b + 2 * std): 16 | warnings.warn( 17 | "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " 18 | "The distribution of values may be incorrect.", 19 | stacklevel=2, 20 | ) 21 | 22 | with torch.no_grad(): 23 | # Values are generated by using a truncated uniform distribution and 24 | # then using the inverse CDF for the normal distribution. 25 | # Get upper and lower cdf values 26 | l = norm_cdf((a - mean) / std) 27 | u = norm_cdf((b - mean) / std) 28 | 29 | # Uniformly fill tensor with values from [l, u], then translate to 30 | # [2l-1, 2u-1]. 31 | tensor.uniform_(2 * l - 1, 2 * u - 1) 32 | 33 | # Use inverse cdf transform for normal distribution to get truncated 34 | # standard normal 35 | tensor.erfinv_() 36 | 37 | # Transform to proper mean, std 38 | tensor.mul_(std * math.sqrt(2.0)) 39 | tensor.add_(mean) 40 | 41 | # Clamp to ensure it's in the proper range 42 | tensor.clamp_(min=a, max=b) 43 | return tensor 44 | 45 | 46 | def trunc_normal_( 47 | tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0 48 | ) -> torch.Tensor: 49 | r"""Fills the input Tensor with values drawn from a truncated 50 | normal distribution. The values are effectively drawn from the 51 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 52 | with values outside :math:`[a, b]` redrawn until they are within 53 | the bounds. The method used for generating the random values works 54 | best when :math:`a \leq \text{mean} \leq b`. 55 | 56 | NOTE: this impl is similar to the PyTorch trunc_normal_, the bounds [a, b] are 57 | applied while sampling the normal with mean/std applied, therefore a, b args 58 | should be adjusted to match the range of mean, std args. 59 | 60 | Args: 61 | tensor: an n-dimensional `torch.Tensor` 62 | mean: the mean of the normal distribution 63 | std: the standard deviation of the normal distribution 64 | a: the minimum cutoff value 65 | b: the maximum cutoff value 66 | Examples: 67 | >>> w = torch.empty(3, 5) 68 | >>> nn.init.trunc_normal_(w) 69 | """ 70 | return _no_grad_trunc_normal_(tensor, mean, std, a, b) 71 | 72 | 73 | def trunc_normal_tf_( 74 | tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0 75 | ) -> torch.Tensor: 76 | r"""Fills the input Tensor with values drawn from a truncated 77 | normal distribution. The values are effectively drawn from the 78 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 79 | with values outside :math:`[a, b]` redrawn until they are within 80 | the bounds. The method used for generating the random values works 81 | best when :math:`a \leq \text{mean} \leq b`. 82 | 83 | NOTE: this 'tf' variant behaves closer to Tensorflow / JAX impl where the 84 | bounds [a, b] are applied when sampling the normal distribution with mean=0, std=1.0 85 | and the result is subsquently scaled and shifted by the mean and std args. 86 | 87 | Args: 88 | tensor: an n-dimensional `torch.Tensor` 89 | mean: the mean of the normal distribution 90 | std: the standard deviation of the normal distribution 91 | a: the minimum cutoff value 92 | b: the maximum cutoff value 93 | Examples: 94 | >>> w = torch.empty(3, 5) 95 | >>> nn.init.trunc_normal_(w) 96 | """ 97 | _no_grad_trunc_normal_(tensor, 0, 1.0, a, b) 98 | with torch.no_grad(): 99 | tensor.mul_(std).add_(mean) 100 | return tensor 101 | 102 | 103 | def variance_scaling_(tensor, scale=1.0, mode="fan_in", distribution="normal"): 104 | fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) 105 | if mode == "fan_in": 106 | denom = fan_in 107 | elif mode == "fan_out": 108 | denom = fan_out 109 | elif mode == "fan_avg": 110 | denom = (fan_in + fan_out) / 2 111 | 112 | variance = scale / denom # type: ignore 113 | 114 | if distribution == "truncated_normal": 115 | # constant is stddev of standard normal truncated to (-2, 2) 116 | trunc_normal_tf_(tensor, std=math.sqrt(variance) / 0.87962566103423978) 117 | elif distribution == "normal": 118 | tensor.normal_(std=math.sqrt(variance)) 119 | elif distribution == "uniform": 120 | bound = math.sqrt(3 * variance) 121 | # pylint: disable=invalid-unary-operand-type 122 | tensor.uniform_(-bound, bound) 123 | else: 124 | raise ValueError(f"invalid distribution {distribution}") 125 | 126 | 127 | def lecun_normal_(tensor): 128 | variance_scaling_(tensor, mode="fan_in", distribution="truncated_normal") 129 | -------------------------------------------------------------------------------- /ldm_patched/pfn/model_loading.py: -------------------------------------------------------------------------------- 1 | import logging as logger 2 | 3 | from .architecture.DAT import DAT 4 | from .architecture.face.codeformer import CodeFormer 5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean 6 | from .architecture.face.restoreformer_arch import RestoreFormer 7 | from .architecture.HAT import HAT 8 | from .architecture.LaMa import LaMa 9 | from .architecture.OmniSR.OmniSR import OmniSR 10 | from .architecture.RRDB import RRDBNet as ESRGAN 11 | from .architecture.SCUNet import SCUNet 12 | from .architecture.SPSR import SPSRNet as SPSR 13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2 14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN 15 | from .architecture.Swin2SR import Swin2SR 16 | from .architecture.SwinIR import SwinIR 17 | from .types import PyTorchModel 18 | 19 | 20 | class UnsupportedModel(Exception): 21 | pass 22 | 23 | 24 | def load_state_dict(state_dict) -> PyTorchModel: 25 | logger.debug(f"Loading state dict into pytorch model arch") 26 | 27 | state_dict_keys = list(state_dict.keys()) 28 | 29 | if "params_ema" in state_dict_keys: 30 | state_dict = state_dict["params_ema"] 31 | elif "params-ema" in state_dict_keys: 32 | state_dict = state_dict["params-ema"] 33 | elif "params" in state_dict_keys: 34 | state_dict = state_dict["params"] 35 | 36 | state_dict_keys = list(state_dict.keys()) 37 | # SRVGGNet Real-ESRGAN (v2) 38 | if "body.0.weight" in state_dict_keys and "body.1.weight" in state_dict_keys: 39 | model = RealESRGANv2(state_dict) 40 | # SPSR (ESRGAN with lots of extra layers) 41 | elif "f_HR_conv1.0.weight" in state_dict: 42 | model = SPSR(state_dict) 43 | # Swift-SRGAN 44 | elif ( 45 | "model" in state_dict_keys 46 | and "initial.cnn.depthwise.weight" in state_dict["model"].keys() 47 | ): 48 | model = SwiftSRGAN(state_dict) 49 | # SwinIR, Swin2SR, HAT 50 | elif "layers.0.residual_group.blocks.0.norm1.weight" in state_dict_keys: 51 | if ( 52 | "layers.0.residual_group.blocks.0.conv_block.cab.0.weight" 53 | in state_dict_keys 54 | ): 55 | model = HAT(state_dict) 56 | elif "patch_embed.proj.weight" in state_dict_keys: 57 | model = Swin2SR(state_dict) 58 | else: 59 | model = SwinIR(state_dict) 60 | # GFPGAN 61 | elif ( 62 | "toRGB.0.weight" in state_dict_keys 63 | and "stylegan_decoder.style_mlp.1.weight" in state_dict_keys 64 | ): 65 | model = GFPGANv1Clean(state_dict) 66 | # RestoreFormer 67 | elif ( 68 | "encoder.conv_in.weight" in state_dict_keys 69 | and "encoder.down.0.block.0.norm1.weight" in state_dict_keys 70 | ): 71 | model = RestoreFormer(state_dict) 72 | elif ( 73 | "encoder.blocks.0.weight" in state_dict_keys 74 | and "quantize.embedding.weight" in state_dict_keys 75 | ): 76 | model = CodeFormer(state_dict) 77 | # LaMa 78 | elif ( 79 | "model.model.1.bn_l.running_mean" in state_dict_keys 80 | or "generator.model.1.bn_l.running_mean" in state_dict_keys 81 | ): 82 | model = LaMa(state_dict) 83 | # Omni-SR 84 | elif "residual_layer.0.residual_layer.0.layer.0.fn.0.weight" in state_dict_keys: 85 | model = OmniSR(state_dict) 86 | # SCUNet 87 | elif "m_head.0.weight" in state_dict_keys and "m_tail.0.weight" in state_dict_keys: 88 | model = SCUNet(state_dict) 89 | # DAT 90 | elif "layers.0.blocks.2.attn.attn_mask_0" in state_dict_keys: 91 | model = DAT(state_dict) 92 | # Regular ESRGAN, "new-arch" ESRGAN, Real-ESRGAN v1 93 | else: 94 | try: 95 | model = ESRGAN(state_dict) 96 | except: 97 | # pylint: disable=raise-missing-from 98 | raise UnsupportedModel 99 | return model 100 | -------------------------------------------------------------------------------- /ldm_patched/pfn/types.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from .architecture.DAT import DAT 4 | from .architecture.face.codeformer import CodeFormer 5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean 6 | from .architecture.face.restoreformer_arch import RestoreFormer 7 | from .architecture.HAT import HAT 8 | from .architecture.LaMa import LaMa 9 | from .architecture.OmniSR.OmniSR import OmniSR 10 | from .architecture.RRDB import RRDBNet as ESRGAN 11 | from .architecture.SCUNet import SCUNet 12 | from .architecture.SPSR import SPSRNet as SPSR 13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2 14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN 15 | from .architecture.Swin2SR import Swin2SR 16 | from .architecture.SwinIR import SwinIR 17 | 18 | PyTorchSRModels = ( 19 | RealESRGANv2, 20 | SPSR, 21 | SwiftSRGAN, 22 | ESRGAN, 23 | SwinIR, 24 | Swin2SR, 25 | HAT, 26 | OmniSR, 27 | SCUNet, 28 | DAT, 29 | ) 30 | PyTorchSRModel = Union[ 31 | RealESRGANv2, 32 | SPSR, 33 | SwiftSRGAN, 34 | ESRGAN, 35 | SwinIR, 36 | Swin2SR, 37 | HAT, 38 | OmniSR, 39 | SCUNet, 40 | DAT, 41 | ] 42 | 43 | 44 | def is_pytorch_sr_model(model: object): 45 | return isinstance(model, PyTorchSRModels) 46 | 47 | 48 | PyTorchFaceModels = (GFPGANv1Clean, RestoreFormer, CodeFormer) 49 | PyTorchFaceModel = Union[GFPGANv1Clean, RestoreFormer, CodeFormer] 50 | 51 | 52 | def is_pytorch_face_model(model: object): 53 | return isinstance(model, PyTorchFaceModels) 54 | 55 | 56 | PyTorchInpaintModels = (LaMa,) 57 | PyTorchInpaintModel = Union[LaMa] 58 | 59 | 60 | def is_pytorch_inpaint_model(model: object): 61 | return isinstance(model, PyTorchInpaintModels) 62 | 63 | 64 | PyTorchModels = (*PyTorchSRModels, *PyTorchFaceModels, *PyTorchInpaintModels) 65 | PyTorchModel = Union[PyTorchSRModel, PyTorchFaceModel, PyTorchInpaintModel] 66 | 67 | 68 | def is_pytorch_model(model: object): 69 | return isinstance(model, PyTorchModels) 70 | -------------------------------------------------------------------------------- /ldm_patched/taesd/taesd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Tiny AutoEncoder for Stable Diffusion 4 | (DNN for encoding / decoding SD's latent space) 5 | """ 6 | import torch 7 | import torch.nn as nn 8 | 9 | import ldm_patched.modules.utils 10 | import ldm_patched.modules.ops 11 | 12 | def conv(n_in, n_out, **kwargs): 13 | return ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 3, padding=1, **kwargs) 14 | 15 | class Clamp(nn.Module): 16 | def forward(self, x): 17 | return torch.tanh(x / 3) * 3 18 | 19 | class Block(nn.Module): 20 | def __init__(self, n_in, n_out): 21 | super().__init__() 22 | self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out)) 23 | self.skip = ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity() 24 | self.fuse = nn.ReLU() 25 | def forward(self, x): 26 | return self.fuse(self.conv(x) + self.skip(x)) 27 | 28 | def Encoder(): 29 | return nn.Sequential( 30 | conv(3, 64), Block(64, 64), 31 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 32 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 33 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 34 | conv(64, 4), 35 | ) 36 | 37 | def Decoder(): 38 | return nn.Sequential( 39 | Clamp(), conv(4, 64), nn.ReLU(), 40 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 41 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 42 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 43 | Block(64, 64), conv(64, 3), 44 | ) 45 | 46 | class TAESD(nn.Module): 47 | latent_magnitude = 3 48 | latent_shift = 0.5 49 | 50 | def __init__(self, encoder_path=None, decoder_path=None): 51 | """Initialize pretrained TAESD on the given device from the given checkpoints.""" 52 | super().__init__() 53 | self.taesd_encoder = Encoder() 54 | self.taesd_decoder = Decoder() 55 | self.vae_scale = torch.nn.Parameter(torch.tensor(1.0)) 56 | if encoder_path is not None: 57 | self.taesd_encoder.load_state_dict(ldm_patched.modules.utils.load_torch_file(encoder_path, safe_load=True)) 58 | if decoder_path is not None: 59 | self.taesd_decoder.load_state_dict(ldm_patched.modules.utils.load_torch_file(decoder_path, safe_load=True)) 60 | 61 | @staticmethod 62 | def scale_latents(x): 63 | """raw latents -> [0, 1]""" 64 | return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1) 65 | 66 | @staticmethod 67 | def unscale_latents(x): 68 | """[0, 1] -> raw latents""" 69 | return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude) 70 | 71 | def decode(self, x): 72 | x_sample = self.taesd_decoder(x * self.vae_scale) 73 | x_sample = x_sample.sub(0.5).mul(2) 74 | return x_sample 75 | 76 | def encode(self, x): 77 | return self.taesd_encoder(x * 0.5 + 0.5) / self.vae_scale 78 | -------------------------------------------------------------------------------- /ldm_patched/utils/latent_visualization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from PIL import Image 3 | import struct 4 | import numpy as np 5 | from ldm_patched.modules.args_parser import args, LatentPreviewMethod 6 | from ldm_patched.taesd.taesd import TAESD 7 | import ldm_patched.utils.path_utils 8 | import ldm_patched.modules.utils 9 | 10 | MAX_PREVIEW_RESOLUTION = 512 11 | 12 | class LatentPreviewer: 13 | def decode_latent_to_preview(self, x0): 14 | pass 15 | 16 | def decode_latent_to_preview_image(self, preview_format, x0): 17 | preview_image = self.decode_latent_to_preview(x0) 18 | return ("JPEG", preview_image, MAX_PREVIEW_RESOLUTION) 19 | 20 | class TAESDPreviewerImpl(LatentPreviewer): 21 | def __init__(self, taesd): 22 | self.taesd = taesd 23 | 24 | def decode_latent_to_preview(self, x0): 25 | x_sample = self.taesd.decode(x0[:1])[0].detach() 26 | x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0) 27 | x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) 28 | x_sample = x_sample.astype(np.uint8) 29 | 30 | preview_image = Image.fromarray(x_sample) 31 | return preview_image 32 | 33 | 34 | class Latent2RGBPreviewer(LatentPreviewer): 35 | def __init__(self, latent_rgb_factors): 36 | self.latent_rgb_factors = torch.tensor(latent_rgb_factors, device="cpu") 37 | 38 | def decode_latent_to_preview(self, x0): 39 | latent_image = x0[0].permute(1, 2, 0).cpu() @ self.latent_rgb_factors 40 | 41 | latents_ubyte = (((latent_image + 1) / 2) 42 | .clamp(0, 1) # change scale from -1..1 to 0..1 43 | .mul(0xFF) # to 0..255 44 | .byte()).cpu() 45 | 46 | return Image.fromarray(latents_ubyte.numpy()) 47 | 48 | 49 | def get_previewer(device, latent_format): 50 | previewer = None 51 | method = args.preview_option 52 | if method != LatentPreviewMethod.NoPreviews: 53 | # TODO previewer methods 54 | taesd_decoder_path = None 55 | if latent_format.taesd_decoder_name is not None: 56 | taesd_decoder_path = next( 57 | (fn for fn in ldm_patched.utils.path_utils.get_filename_list("vae_approx") 58 | if fn.startswith(latent_format.taesd_decoder_name)), 59 | "" 60 | ) 61 | taesd_decoder_path = ldm_patched.utils.path_utils.get_full_path("vae_approx", taesd_decoder_path) 62 | 63 | if method == LatentPreviewMethod.Auto: 64 | method = LatentPreviewMethod.Latent2RGB 65 | if taesd_decoder_path: 66 | method = LatentPreviewMethod.TAESD 67 | 68 | if method == LatentPreviewMethod.TAESD: 69 | if taesd_decoder_path: 70 | taesd = TAESD(None, taesd_decoder_path).to(device) 71 | previewer = TAESDPreviewerImpl(taesd) 72 | else: 73 | print("Warning: TAESD previews enabled, but could not find models/vae_approx/{}".format(latent_format.taesd_decoder_name)) 74 | 75 | if previewer is None: 76 | if latent_format.latent_rgb_factors is not None: 77 | previewer = Latent2RGBPreviewer(latent_format.latent_rgb_factors) 78 | return previewer 79 | 80 | def prepare_callback(model, steps, x0_output_dict=None): 81 | preview_format = "JPEG" 82 | if preview_format not in ["JPEG", "PNG"]: 83 | preview_format = "JPEG" 84 | 85 | previewer = get_previewer(model.load_device, model.model.latent_format) 86 | 87 | pbar = ldm_patched.modules.utils.ProgressBar(steps) 88 | def callback(step, x0, x, total_steps): 89 | if x0_output_dict is not None: 90 | x0_output_dict["x0"] = x0 91 | 92 | preview_bytes = None 93 | if previewer: 94 | preview_bytes = previewer.decode_latent_to_preview_image(preview_format, x0) 95 | pbar.update_absolute(step + 1, total_steps, preview_bytes) 96 | return callback 97 | 98 | -------------------------------------------------------------------------------- /make_img.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "import random\n", 11 | "import os\n", 12 | "import shutil\n", 13 | "from tqdm import tqdm\n", 14 | "from diffusers import StableDiffusionXLPipeline\n", 15 | "\n", 16 | "regular_prompts_list = [\n", 17 | " ...\n", 18 | "]\n", 19 | "object_name = \"teapot\"\n", 20 | "save_dir = \"regular_teapot\"\n", 21 | "\n", 22 | "\n", 23 | "repeat_times = 30\n", 24 | "\n", 25 | "DEVICE = \"cuda:0\"\n", 26 | "torch.cuda.set_device(DEVICE)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "pipeline = StableDiffusionXLPipeline.from_pretrained(\n", 36 | " \"stabilityai/stable-diffusion-xl-base-1.0\",\n", 37 | " torch_dtype=torch.float16,\n", 38 | " use_safetensors=True,\n", 39 | " variant=\"fp16\",\n", 40 | ").to(DEVICE)\n", 41 | "pipeline.set_progress_bar_config(disable=True)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "# 使用lang-sam完成分割任务 python3.10装包\n", 51 | "# git clone https://github.com/mycfhs/lang-segment-anything && cd lang-segment-anything\n", 52 | "# python -m pip install -e . --ignore-installed\n", 53 | "from lang_sam import LangSAM\n", 54 | "\n", 55 | "model = LangSAM(sam_type=\"vit_h\") # b, l, h" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "from torchvision.transforms import ToPILImage\n", 65 | "import gc\n", 66 | "\n", 67 | "to_pil_image = ToPILImage()\n", 68 | "\n", 69 | "if os.path.exists(save_dir):\n", 70 | " shutil.rmtree(save_dir)\n", 71 | "\n", 72 | "os.makedirs(save_dir)\n", 73 | "for prompt in regular_prompts_list:\n", 74 | " prompt = prompt.replace(\" \", \"_\")\n", 75 | " os.makedirs(f\"{save_dir}/{prompt}\")\n", 76 | "\n", 77 | "for _ in tqdm(range(repeat_times)):\n", 78 | " random_seed = random.randint(0, 1000000)\n", 79 | " images = pipeline(regular_prompts_list, seed=random_seed).images\n", 80 | "\n", 81 | " gc.collect()\n", 82 | " if torch.cuda.is_available():\n", 83 | " torch.cuda.empty_cache()\n", 84 | "\n", 85 | " for image, prompt in zip(images, regular_prompts_list):\n", 86 | " prompt = prompt.replace(\" \", \"_\")\n", 87 | "\n", 88 | " masks, boxes, phrases, logits = model.predict(image, object_name)\n", 89 | " mask = masks.to(torch.uint8) * 255\n", 90 | "\n", 91 | " try:\n", 92 | " mask_img = to_pil_image(mask[0])\n", 93 | " mask_img.save(f\"{save_dir}/{prompt}/{random_seed}-mask.png\")\n", 94 | " image.save(f\"{save_dir}/{prompt}/{random_seed}-image.png\")\n", 95 | " except:\n", 96 | " print(f\"Error img, ignore\")\n", 97 | " continue\n", 98 | "\n", 99 | " gc.collect()\n", 100 | " if torch.cuda.is_available():\n", 101 | " torch.cuda.empty_cache()" 102 | ] 103 | } 104 | ], 105 | "metadata": { 106 | "kernelspec": { 107 | "display_name": "DreamMix", 108 | "language": "python", 109 | "name": "python3" 110 | }, 111 | "language_info": { 112 | "codemirror_mode": { 113 | "name": "ipython", 114 | "version": 3 115 | }, 116 | "file_extension": ".py", 117 | "mimetype": "text/x-python", 118 | "name": "python", 119 | "nbconvert_exporter": "python", 120 | "pygments_lexer": "ipython3", 121 | "version": "3.10.15" 122 | } 123 | }, 124 | "nbformat": 4, 125 | "nbformat_minor": 2 126 | } 127 | -------------------------------------------------------------------------------- /models/inpaint/put_inpaint_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/models/inpaint/put_inpaint_here -------------------------------------------------------------------------------- /models/loras/put_loras_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/models/loras/put_loras_here -------------------------------------------------------------------------------- /models/upscale_models/put_esrgan_and_other_upscale_models_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/models/upscale_models/put_esrgan_and_other_upscale_models_here -------------------------------------------------------------------------------- /modules/auth.py: -------------------------------------------------------------------------------- 1 | import json 2 | import hashlib 3 | import modules.constants as constants 4 | 5 | from os.path import exists 6 | 7 | 8 | def auth_list_to_dict(auth_list): 9 | auth_dict = {} 10 | for auth_data in auth_list: 11 | if 'user' in auth_data: 12 | if 'hash' in auth_data: 13 | auth_dict |= {auth_data['user']: auth_data['hash']} 14 | elif 'pass' in auth_data: 15 | auth_dict |= {auth_data['user']: hashlib.sha256(bytes(auth_data['pass'], encoding='utf-8')).hexdigest()} 16 | return auth_dict 17 | 18 | 19 | def load_auth_data(filename=None): 20 | auth_dict = None 21 | if filename != None and exists(filename): 22 | with open(filename, encoding='utf-8') as auth_file: 23 | try: 24 | auth_obj = json.load(auth_file) 25 | if isinstance(auth_obj, list) and len(auth_obj) > 0: 26 | auth_dict = auth_list_to_dict(auth_obj) 27 | except Exception as e: 28 | print('load_auth_data, e: ' + str(e)) 29 | return auth_dict 30 | 31 | 32 | auth_dict = load_auth_data(constants.AUTH_FILENAME) 33 | 34 | auth_enabled = auth_dict != None 35 | 36 | 37 | def check_auth(user, password): 38 | if user not in auth_dict: 39 | return False 40 | else: 41 | return hashlib.sha256(bytes(password, encoding='utf-8')).hexdigest() == auth_dict[user] 42 | -------------------------------------------------------------------------------- /modules/constants.py: -------------------------------------------------------------------------------- 1 | # as in k-diffusion (sampling.py) 2 | MIN_SEED = 0 3 | MAX_SEED = 2**63 - 1 4 | 5 | AUTH_FILENAME = 'auth.json' 6 | -------------------------------------------------------------------------------- /modules/flags.py: -------------------------------------------------------------------------------- 1 | from enum import IntEnum, Enum 2 | 3 | disabled = 'Disabled' 4 | enabled = 'Enabled' 5 | subtle_variation = 'Vary (Subtle)' 6 | strong_variation = 'Vary (Strong)' 7 | upscale_15 = 'Upscale (1.5x)' 8 | upscale_2 = 'Upscale (2x)' 9 | upscale_fast = 'Upscale (Fast 2x)' 10 | 11 | uov_list = [ 12 | disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast 13 | ] 14 | 15 | CIVITAI_NO_KARRAS = ["euler", "euler_ancestral", "heun", "dpm_fast", "dpm_adaptive", "ddim", "uni_pc"] 16 | 17 | # fooocus: a1111 (Civitai) 18 | KSAMPLER = { 19 | "euler": "Euler", 20 | "euler_ancestral": "Euler a", 21 | "heun": "Heun", 22 | "heunpp2": "", 23 | "dpm_2": "DPM2", 24 | "dpm_2_ancestral": "DPM2 a", 25 | "lms": "LMS", 26 | "dpm_fast": "DPM fast", 27 | "dpm_adaptive": "DPM adaptive", 28 | "dpmpp_2s_ancestral": "DPM++ 2S a", 29 | "dpmpp_sde": "DPM++ SDE", 30 | "dpmpp_sde_gpu": "DPM++ SDE", 31 | "dpmpp_2m": "DPM++ 2M", 32 | "dpmpp_2m_sde": "DPM++ 2M SDE", 33 | "dpmpp_2m_sde_gpu": "DPM++ 2M SDE", 34 | "dpmpp_3m_sde": "", 35 | "dpmpp_3m_sde_gpu": "", 36 | "ddpm": "", 37 | "lcm": "LCM" 38 | } 39 | 40 | SAMPLER_EXTRA = { 41 | "ddim": "DDIM", 42 | "uni_pc": "UniPC", 43 | "uni_pc_bh2": "" 44 | } 45 | 46 | SAMPLERS = KSAMPLER | SAMPLER_EXTRA 47 | 48 | KSAMPLER_NAMES = list(KSAMPLER.keys()) 49 | 50 | SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform", "lcm", "turbo"] 51 | SAMPLER_NAMES = KSAMPLER_NAMES + list(SAMPLER_EXTRA.keys()) 52 | 53 | sampler_list = SAMPLER_NAMES 54 | scheduler_list = SCHEDULER_NAMES 55 | 56 | refiner_swap_method = 'joint' 57 | 58 | cn_ip = "ImagePrompt" 59 | cn_ip_face = "FaceSwap" 60 | cn_canny = "PyraCanny" 61 | cn_cpds = "CPDS" 62 | 63 | ip_list = [cn_ip, cn_canny, cn_cpds, cn_ip_face] 64 | default_ip = cn_ip 65 | 66 | default_parameters = { 67 | cn_ip: (0.5, 0.6), cn_ip_face: (0.9, 0.75), cn_canny: (0.5, 1.0), cn_cpds: (0.5, 1.0) 68 | } # stop, weight 69 | 70 | output_formats = ['png', 'jpeg', 'webp'] 71 | 72 | inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6'] 73 | inpaint_option_default = 'Inpaint or Outpaint (default)' 74 | inpaint_option_detail = 'Improve Detail (face, hand, eyes, etc.)' 75 | inpaint_option_modify = 'Modify Content (add objects, change background, etc.)' 76 | inpaint_options = [inpaint_option_default, inpaint_option_detail, inpaint_option_modify] 77 | 78 | desc_type_photo = 'Photograph' 79 | desc_type_anime = 'Art/Anime' 80 | 81 | 82 | class MetadataScheme(Enum): 83 | FOOOCUS = 'fooocus' 84 | A1111 = 'a1111' 85 | 86 | 87 | metadata_scheme = [ 88 | (f'{MetadataScheme.FOOOCUS.value} (json)', MetadataScheme.FOOOCUS.value), 89 | (f'{MetadataScheme.A1111.value} (plain text)', MetadataScheme.A1111.value), 90 | ] 91 | 92 | controlnet_image_count = 4 93 | 94 | 95 | class OutputFormat(Enum): 96 | PNG = 'png' 97 | JPEG = 'jpeg' 98 | WEBP = 'webp' 99 | 100 | @classmethod 101 | def list(cls) -> list: 102 | return list(map(lambda c: c.value, cls)) 103 | 104 | 105 | class Steps(IntEnum): 106 | QUALITY = 60 107 | SPEED = 30 108 | EXTREME_SPEED = 8 109 | LIGHTNING = 4 110 | 111 | 112 | class StepsUOV(IntEnum): 113 | QUALITY = 36 114 | SPEED = 18 115 | EXTREME_SPEED = 8 116 | LIGHTNING = 4 117 | 118 | 119 | class Performance(Enum): 120 | QUALITY = 'Quality' 121 | SPEED = 'Speed' 122 | EXTREME_SPEED = 'Extreme Speed' 123 | LIGHTNING = 'Lightning' 124 | 125 | @classmethod 126 | def list(cls) -> list: 127 | return list(map(lambda c: c.value, cls)) 128 | 129 | @classmethod 130 | def has_restricted_features(cls, x) -> bool: 131 | if isinstance(x, Performance): 132 | x = x.value 133 | return x in [cls.EXTREME_SPEED.value, cls.LIGHTNING.value] 134 | 135 | def steps(self) -> int | None: 136 | return Steps[self.name].value if Steps[self.name] else None 137 | 138 | def steps_uov(self) -> int | None: 139 | return StepsUOV[self.name].value if Steps[self.name] else None 140 | -------------------------------------------------------------------------------- /modules/html.py: -------------------------------------------------------------------------------- 1 | progress_html = ''' 2 |