├── FooocusSDXLInpaintAllInOnePipeline.py ├── assets └── teaser.png ├── data ├── 1_1.jpg ├── 1_2.jpg ├── 2_1.jpg ├── 2_2.jpg ├── 3_1.jpg ├── 3_2.jpg ├── 4_1.jpg └── 4_2.jpg ├── infer.ipynb ├── ldm_patched ├── contrib │ ├── external.py │ ├── external_canny.py │ ├── external_clip_sdxl.py │ ├── external_compositing.py │ ├── external_custom_sampler.py │ ├── external_freelunch.py │ ├── external_hypernetwork.py │ ├── external_hypertile.py │ ├── external_images.py │ ├── external_latent.py │ ├── external_mask.py │ ├── external_model_advanced.py │ ├── external_model_downscale.py │ ├── external_model_merging.py │ ├── external_perpneg.py │ ├── external_photomaker.py │ ├── external_post_processing.py │ ├── external_rebatch.py │ ├── external_sag.py │ ├── external_sdupscale.py │ ├── external_stable3d.py │ ├── external_tomesd.py │ ├── external_upscale_model.py │ └── external_video_model.py ├── controlnet │ └── cldm.py ├── k_diffusion │ ├── sampling.py │ └── utils.py ├── ldm │ ├── models │ │ ├── __pycache__ │ │ │ └── autoencoder.cpython-310.pyc │ │ └── autoencoder.py │ ├── modules │ │ ├── __pycache__ │ │ │ ├── attention.cpython-310.pyc │ │ │ ├── ema.cpython-310.pyc │ │ │ └── sub_quadratic_attention.cpython-310.pyc │ │ ├── attention.py │ │ ├── diffusionmodules │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ ├── model.cpython-310.pyc │ │ │ │ ├── openaimodel.cpython-310.pyc │ │ │ │ ├── upscaling.cpython-310.pyc │ │ │ │ └── util.cpython-310.pyc │ │ │ ├── model.py │ │ │ ├── openaimodel.py │ │ │ ├── upscaling.py │ │ │ └── util.py │ │ ├── distributions │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ └── distributions.cpython-310.pyc │ │ │ └── distributions.py │ │ ├── ema.py │ │ ├── encoders │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-310.pyc │ │ │ │ └── noise_aug_modules.cpython-310.pyc │ │ │ └── noise_aug_modules.py │ │ ├── sub_quadratic_attention.py │ │ └── temporal_ae.py │ └── util.py ├── licenses-3rd │ ├── chainer │ ├── comfyui │ ├── diffusers │ ├── kdiffusion │ ├── ldm │ ├── taesd │ └── transformers ├── modules │ ├── args_parser.py │ ├── checkpoint_pickle.py │ ├── clip_config_bigg.json │ ├── clip_model.py │ ├── clip_vision.py │ ├── clip_vision_config_g.json │ ├── clip_vision_config_h.json │ ├── clip_vision_config_vitl.json │ ├── conds.py │ ├── controlnet.py │ ├── diffusers_convert.py │ ├── diffusers_load.py │ ├── gligen.py │ ├── latent_formats.py │ ├── lora.py │ ├── model_base.py │ ├── model_detection.py │ ├── model_management.py │ ├── model_patcher.py │ ├── model_sampling.py │ ├── ops.py │ ├── options.py │ ├── sample.py │ ├── samplers.py │ ├── sd.py │ ├── sd1_clip.py │ ├── sd1_clip_config.json │ ├── sd1_tokenizer │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ ├── sd2_clip.py │ ├── sd2_clip_config.json │ ├── sdxl_clip.py │ ├── supported_models.py │ ├── supported_models_base.py │ └── utils.py ├── pfn │ ├── __init__.py │ ├── architecture │ │ ├── DAT.py │ │ ├── HAT.py │ │ ├── LICENSE-DAT │ │ ├── LICENSE-ESRGAN │ │ ├── LICENSE-HAT │ │ ├── LICENSE-RealESRGAN │ │ ├── LICENSE-SCUNet │ │ ├── LICENSE-SPSR │ │ ├── LICENSE-SwiftSRGAN │ │ ├── LICENSE-Swin2SR │ │ ├── LICENSE-SwinIR │ │ ├── LICENSE-lama │ │ ├── LaMa.py │ │ ├── OmniSR │ │ │ ├── ChannelAttention.py │ │ │ ├── LICENSE │ │ │ ├── OSA.py │ │ │ ├── OSAG.py │ │ │ ├── OmniSR.py │ │ │ ├── __pycache__ │ │ │ │ ├── OSA.cpython-310.pyc │ │ │ │ ├── OSAG.cpython-310.pyc │ │ │ │ ├── OmniSR.cpython-310.pyc │ │ │ │ ├── esa.cpython-310.pyc │ │ │ │ ├── layernorm.cpython-310.pyc │ │ │ │ └── pixelshuffle.cpython-310.pyc │ │ │ ├── esa.py │ │ │ ├── layernorm.py │ │ │ └── pixelshuffle.py │ │ ├── RRDB.py │ │ ├── SCUNet.py │ │ ├── SPSR.py │ │ ├── SRVGG.py │ │ ├── SwiftSRGAN.py │ │ ├── Swin2SR.py │ │ ├── SwinIR.py │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── DAT.cpython-310.pyc │ │ │ ├── HAT.cpython-310.pyc │ │ │ ├── LaMa.cpython-310.pyc │ │ │ ├── RRDB.cpython-310.pyc │ │ │ ├── SCUNet.cpython-310.pyc │ │ │ ├── SPSR.cpython-310.pyc │ │ │ ├── SRVGG.cpython-310.pyc │ │ │ ├── SwiftSRGAN.cpython-310.pyc │ │ │ ├── Swin2SR.cpython-310.pyc │ │ │ ├── SwinIR.cpython-310.pyc │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── block.cpython-310.pyc │ │ ├── block.py │ │ ├── face │ │ │ ├── LICENSE-GFPGAN │ │ │ ├── LICENSE-RestoreFormer │ │ │ ├── LICENSE-codeformer │ │ │ ├── __pycache__ │ │ │ │ ├── codeformer.cpython-310.pyc │ │ │ │ ├── gfpganv1_clean_arch.cpython-310.pyc │ │ │ │ ├── restoreformer_arch.cpython-310.pyc │ │ │ │ └── stylegan2_clean_arch.cpython-310.pyc │ │ │ ├── arcface_arch.py │ │ │ ├── codeformer.py │ │ │ ├── fused_act.py │ │ │ ├── gfpgan_bilinear_arch.py │ │ │ ├── gfpganv1_arch.py │ │ │ ├── gfpganv1_clean_arch.py │ │ │ ├── restoreformer_arch.py │ │ │ ├── stylegan2_arch.py │ │ │ ├── stylegan2_bilinear_arch.py │ │ │ ├── stylegan2_clean_arch.py │ │ │ └── upfirdn2d.py │ │ └── timm │ │ │ ├── LICENSE │ │ │ ├── __pycache__ │ │ │ ├── drop.cpython-310.pyc │ │ │ ├── helpers.cpython-310.pyc │ │ │ └── weight_init.cpython-310.pyc │ │ │ ├── drop.py │ │ │ ├── helpers.py │ │ │ └── weight_init.py │ ├── model_loading.py │ └── types.py ├── t2ia │ └── adapter.py ├── taesd │ └── taesd.py ├── unipc │ └── uni_pc.py └── utils │ ├── latent_visualization.py │ └── path_utils.py ├── make_img.ipynb ├── models ├── inpaint │ └── put_inpaint_here ├── loras │ └── put_loras_here └── upscale_models │ └── put_esrgan_and_other_upscale_models_here ├── modules ├── anisotropic.py ├── async_worker.py ├── auth.py ├── config.py ├── constants.py ├── core.py ├── default_pipeline.py ├── flags.py ├── gradio_hijack.py ├── html.py ├── inpaint_worker.py ├── launch_util.py ├── localization.py ├── lora.py ├── meta_parser.py ├── model_loader.py ├── ops.py ├── patch.py ├── patch_clip.py ├── patch_precision.py ├── private_logger.py ├── sample_hijack.py ├── sdxl_styles.py ├── style_sorter.py ├── ui_gradio_extensions.py ├── upscaler.py └── util.py ├── positive.txt ├── readme.md ├── requirements.txt ├── sdxl_styles ├── sdxl_styles_diva.json ├── sdxl_styles_fooocus.json ├── sdxl_styles_marc_k3nt3l.json ├── sdxl_styles_mre.json ├── sdxl_styles_sai.json └── sdxl_styles_twri.json ├── train.py └── utils ├── FooocusDpmpp2mSdeGpuKarras.py ├── __init__.py ├── add_fooocus_inpaint_head_patch.py ├── add_fooocus_inpaint_patch.py ├── mask_aug.py ├── orthogonal_decomposition.py └── prompt_style_enhance.py /assets/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/assets/teaser.png -------------------------------------------------------------------------------- /data/1_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/1_1.jpg -------------------------------------------------------------------------------- /data/1_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/1_2.jpg -------------------------------------------------------------------------------- /data/2_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/2_1.jpg -------------------------------------------------------------------------------- /data/2_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/2_2.jpg -------------------------------------------------------------------------------- /data/3_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/3_1.jpg -------------------------------------------------------------------------------- /data/3_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/3_2.jpg -------------------------------------------------------------------------------- /data/4_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/4_1.jpg -------------------------------------------------------------------------------- /data/4_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/data/4_2.jpg -------------------------------------------------------------------------------- /infer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "\n", 11 | "DEVICE = \"cuda:1\"\n", 12 | "torch.cuda.set_device(DEVICE)\n", 13 | "\n", 14 | "from FooocusSDXLInpaintAllInOnePipeline import FooocusSDXLInpaintPipeline\n", 15 | "\n", 16 | "pipe = FooocusSDXLInpaintPipeline.from_pretrained(\n", 17 | " \"frankjoshua/juggernautXL_v8Rundiffusion\",\n", 18 | " torch_dtype=torch.float16,\n", 19 | " use_safetensors=True,\n", 20 | ").to(DEVICE)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": { 27 | "notebookRunGroups": { 28 | "groupValue": "1" 29 | } 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "lora_config = [\n", 34 | " {\n", 35 | " \"model_path\": f\"lora/xxx\",\n", 36 | " \"scale\": 1,\n", 37 | " \"for_raw_unet\": False,\n", 38 | " \"for_fooocus_unet\": True,\n", 39 | " },\n", 40 | "]\n", 41 | "\n", 42 | "pipe.preload_fooocus_unet(\n", 43 | " fooocus_model_path=\"./models/fooocus_inpaint/inpaint_v26.fooocus.patch\",\n", 44 | " lora_configs=lora_config,\n", 45 | " add_double_sa=False,\n", 46 | ")" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "from diffusers.utils import load_image\n", 56 | "from PIL import Image\n", 57 | "\n", 58 | "\n", 59 | "img_url = f\"data/1_1.jpg\"\n", 60 | "mask_url = f\"data/1_2.jpg\"\n", 61 | "\n", 62 | "init_image = load_image(img_url).convert(\"RGB\")\n", 63 | "mask_image = load_image(mask_url).convert(\"RGB\")\n", 64 | "\n", 65 | "prompt = \"\"\n", 66 | "negative_prompt = \"\"" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "# Infer!\n", 76 | "image = pipe(\n", 77 | " isf_global_time=20,\n", 78 | " isf_global_ia=1,\n", 79 | " decompose_prefix_prompt=\"a photo of a sks\",\n", 80 | " sks_decompose_words=[\"\"],\n", 81 | " fooocus_model_head_path=\"./models/fooocus_inpaint/fooocus_inpaint_head.pth\",\n", 82 | " fooocus_model_head_upscale_path=\"./models/upscale_models/fooocus_upscaler_s409985e5.bin\",\n", 83 | " pag_scale=1,\n", 84 | " guidance_scale=4,\n", 85 | " ref_image_type=\"no\", \n", 86 | " double_sa_alpha=1,\n", 87 | " save_self_attn=False,\n", 88 | " save_cross_attn=False,\n", 89 | " fooocus_time=0.8,\n", 90 | " inpaint_respective_field=0.5, \n", 91 | " sharpness=1, \n", 92 | " adm_scaler_positive=1.5, \n", 93 | " adm_scaler_negative=0.8, \n", 94 | " adm_scaler_end=0.3,\n", 95 | " seed=42,\n", 96 | " image=init_image,\n", 97 | " mask_image=mask_image,\n", 98 | " prompt=prompt,\n", 99 | " negative_prompt=negative_prompt,\n", 100 | " num_inference_steps=30,\n", 101 | " strength=1,\n", 102 | ")\n", 103 | "image.resize((512, 512))\n", 104 | "image" 105 | ] 106 | } 107 | ], 108 | "metadata": { 109 | "kernelspec": { 110 | "display_name": "DreamMix", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.10.15" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 2 129 | } 130 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_clip_sdxl.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | from ldm_patched.contrib.external import MAX_RESOLUTION 5 | 6 | class CLIPTextEncodeSDXLRefiner: 7 | @classmethod 8 | def INPUT_TYPES(s): 9 | return {"required": { 10 | "ascore": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 1000.0, "step": 0.01}), 11 | "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 12 | "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 13 | "text": ("STRING", {"multiline": True}), "clip": ("CLIP", ), 14 | }} 15 | RETURN_TYPES = ("CONDITIONING",) 16 | FUNCTION = "encode" 17 | 18 | CATEGORY = "advanced/conditioning" 19 | 20 | def encode(self, clip, ascore, width, height, text): 21 | tokens = clip.tokenize(text) 22 | cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) 23 | return ([[cond, {"pooled_output": pooled, "aesthetic_score": ascore, "width": width,"height": height}]], ) 24 | 25 | class CLIPTextEncodeSDXL: 26 | @classmethod 27 | def INPUT_TYPES(s): 28 | return {"required": { 29 | "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 30 | "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 31 | "crop_w": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), 32 | "crop_h": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), 33 | "target_width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 34 | "target_height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 35 | "text_g": ("STRING", {"multiline": True, "default": "CLIP_G"}), "clip": ("CLIP", ), 36 | "text_l": ("STRING", {"multiline": True, "default": "CLIP_L"}), "clip": ("CLIP", ), 37 | }} 38 | RETURN_TYPES = ("CONDITIONING",) 39 | FUNCTION = "encode" 40 | 41 | CATEGORY = "advanced/conditioning" 42 | 43 | def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l): 44 | tokens = clip.tokenize(text_g) 45 | tokens["l"] = clip.tokenize(text_l)["l"] 46 | if len(tokens["l"]) != len(tokens["g"]): 47 | empty = clip.tokenize("") 48 | while len(tokens["l"]) < len(tokens["g"]): 49 | tokens["l"] += empty["l"] 50 | while len(tokens["l"]) > len(tokens["g"]): 51 | tokens["g"] += empty["g"] 52 | cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) 53 | return ([[cond, {"pooled_output": pooled, "width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}]], ) 54 | 55 | NODE_CLASS_MAPPINGS = { 56 | "CLIPTextEncodeSDXLRefiner": CLIPTextEncodeSDXLRefiner, 57 | "CLIPTextEncodeSDXL": CLIPTextEncodeSDXL, 58 | } 59 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_freelunch.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | #code originally taken from: https://github.com/ChenyangSi/FreeU (under MIT License) 4 | 5 | import torch 6 | 7 | 8 | def Fourier_filter(x, threshold, scale): 9 | # FFT 10 | x_freq = torch.fft.fftn(x.float(), dim=(-2, -1)) 11 | x_freq = torch.fft.fftshift(x_freq, dim=(-2, -1)) 12 | 13 | B, C, H, W = x_freq.shape 14 | mask = torch.ones((B, C, H, W), device=x.device) 15 | 16 | crow, ccol = H // 2, W //2 17 | mask[..., crow - threshold:crow + threshold, ccol - threshold:ccol + threshold] = scale 18 | x_freq = x_freq * mask 19 | 20 | # IFFT 21 | x_freq = torch.fft.ifftshift(x_freq, dim=(-2, -1)) 22 | x_filtered = torch.fft.ifftn(x_freq, dim=(-2, -1)).real 23 | 24 | return x_filtered.to(x.dtype) 25 | 26 | 27 | class FreeU: 28 | @classmethod 29 | def INPUT_TYPES(s): 30 | return {"required": { "model": ("MODEL",), 31 | "b1": ("FLOAT", {"default": 1.1, "min": 0.0, "max": 10.0, "step": 0.01}), 32 | "b2": ("FLOAT", {"default": 1.2, "min": 0.0, "max": 10.0, "step": 0.01}), 33 | "s1": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 10.0, "step": 0.01}), 34 | "s2": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.01}), 35 | }} 36 | RETURN_TYPES = ("MODEL",) 37 | FUNCTION = "patch" 38 | 39 | CATEGORY = "model_patches" 40 | 41 | def patch(self, model, b1, b2, s1, s2): 42 | model_channels = model.model.model_config.unet_config["model_channels"] 43 | scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)} 44 | on_cpu_devices = {} 45 | 46 | def output_block_patch(h, hsp, transformer_options): 47 | scale = scale_dict.get(h.shape[1], None) 48 | if scale is not None: 49 | h[:,:h.shape[1] // 2] = h[:,:h.shape[1] // 2] * scale[0] 50 | if hsp.device not in on_cpu_devices: 51 | try: 52 | hsp = Fourier_filter(hsp, threshold=1, scale=scale[1]) 53 | except: 54 | print("Device", hsp.device, "does not support the torch.fft functions used in the FreeU node, switching to CPU.") 55 | on_cpu_devices[hsp.device] = True 56 | hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) 57 | else: 58 | hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) 59 | 60 | return h, hsp 61 | 62 | m = model.clone() 63 | m.set_model_output_block_patch(output_block_patch) 64 | return (m, ) 65 | 66 | class FreeU_V2: 67 | @classmethod 68 | def INPUT_TYPES(s): 69 | return {"required": { "model": ("MODEL",), 70 | "b1": ("FLOAT", {"default": 1.3, "min": 0.0, "max": 10.0, "step": 0.01}), 71 | "b2": ("FLOAT", {"default": 1.4, "min": 0.0, "max": 10.0, "step": 0.01}), 72 | "s1": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 10.0, "step": 0.01}), 73 | "s2": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.01}), 74 | }} 75 | RETURN_TYPES = ("MODEL",) 76 | FUNCTION = "patch" 77 | 78 | CATEGORY = "model_patches" 79 | 80 | def patch(self, model, b1, b2, s1, s2): 81 | model_channels = model.model.model_config.unet_config["model_channels"] 82 | scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)} 83 | on_cpu_devices = {} 84 | 85 | def output_block_patch(h, hsp, transformer_options): 86 | scale = scale_dict.get(h.shape[1], None) 87 | if scale is not None: 88 | hidden_mean = h.mean(1).unsqueeze(1) 89 | B = hidden_mean.shape[0] 90 | hidden_max, _ = torch.max(hidden_mean.view(B, -1), dim=-1, keepdim=True) 91 | hidden_min, _ = torch.min(hidden_mean.view(B, -1), dim=-1, keepdim=True) 92 | hidden_mean = (hidden_mean - hidden_min.unsqueeze(2).unsqueeze(3)) / (hidden_max - hidden_min).unsqueeze(2).unsqueeze(3) 93 | 94 | h[:,:h.shape[1] // 2] = h[:,:h.shape[1] // 2] * ((scale[0] - 1 ) * hidden_mean + 1) 95 | 96 | if hsp.device not in on_cpu_devices: 97 | try: 98 | hsp = Fourier_filter(hsp, threshold=1, scale=scale[1]) 99 | except: 100 | print("Device", hsp.device, "does not support the torch.fft functions used in the FreeU node, switching to CPU.") 101 | on_cpu_devices[hsp.device] = True 102 | hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) 103 | else: 104 | hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) 105 | 106 | return h, hsp 107 | 108 | m = model.clone() 109 | m.set_model_output_block_patch(output_block_patch) 110 | return (m, ) 111 | 112 | NODE_CLASS_MAPPINGS = { 113 | "FreeU": FreeU, 114 | "FreeU_V2": FreeU_V2, 115 | } 116 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_hypernetwork.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import ldm_patched.modules.utils 4 | import ldm_patched.utils.path_utils 5 | import torch 6 | 7 | def load_hypernetwork_patch(path, strength): 8 | sd = ldm_patched.modules.utils.load_torch_file(path, safe_load=True) 9 | activation_func = sd.get('activation_func', 'linear') 10 | is_layer_norm = sd.get('is_layer_norm', False) 11 | use_dropout = sd.get('use_dropout', False) 12 | activate_output = sd.get('activate_output', False) 13 | last_layer_dropout = sd.get('last_layer_dropout', False) 14 | 15 | valid_activation = { 16 | "linear": torch.nn.Identity, 17 | "relu": torch.nn.ReLU, 18 | "leakyrelu": torch.nn.LeakyReLU, 19 | "elu": torch.nn.ELU, 20 | "swish": torch.nn.Hardswish, 21 | "tanh": torch.nn.Tanh, 22 | "sigmoid": torch.nn.Sigmoid, 23 | "softsign": torch.nn.Softsign, 24 | "mish": torch.nn.Mish, 25 | } 26 | 27 | if activation_func not in valid_activation: 28 | print("Unsupported Hypernetwork format, if you report it I might implement it.", path, " ", activation_func, is_layer_norm, use_dropout, activate_output, last_layer_dropout) 29 | return None 30 | 31 | out = {} 32 | 33 | for d in sd: 34 | try: 35 | dim = int(d) 36 | except: 37 | continue 38 | 39 | output = [] 40 | for index in [0, 1]: 41 | attn_weights = sd[dim][index] 42 | keys = attn_weights.keys() 43 | 44 | linears = filter(lambda a: a.endswith(".weight"), keys) 45 | linears = list(map(lambda a: a[:-len(".weight")], linears)) 46 | layers = [] 47 | 48 | i = 0 49 | while i < len(linears): 50 | lin_name = linears[i] 51 | last_layer = (i == (len(linears) - 1)) 52 | penultimate_layer = (i == (len(linears) - 2)) 53 | 54 | lin_weight = attn_weights['{}.weight'.format(lin_name)] 55 | lin_bias = attn_weights['{}.bias'.format(lin_name)] 56 | layer = torch.nn.Linear(lin_weight.shape[1], lin_weight.shape[0]) 57 | layer.load_state_dict({"weight": lin_weight, "bias": lin_bias}) 58 | layers.append(layer) 59 | if activation_func != "linear": 60 | if (not last_layer) or (activate_output): 61 | layers.append(valid_activation[activation_func]()) 62 | if is_layer_norm: 63 | i += 1 64 | ln_name = linears[i] 65 | ln_weight = attn_weights['{}.weight'.format(ln_name)] 66 | ln_bias = attn_weights['{}.bias'.format(ln_name)] 67 | ln = torch.nn.LayerNorm(ln_weight.shape[0]) 68 | ln.load_state_dict({"weight": ln_weight, "bias": ln_bias}) 69 | layers.append(ln) 70 | if use_dropout: 71 | if (not last_layer) and (not penultimate_layer or last_layer_dropout): 72 | layers.append(torch.nn.Dropout(p=0.3)) 73 | i += 1 74 | 75 | output.append(torch.nn.Sequential(*layers)) 76 | out[dim] = torch.nn.ModuleList(output) 77 | 78 | class hypernetwork_patch: 79 | def __init__(self, hypernet, strength): 80 | self.hypernet = hypernet 81 | self.strength = strength 82 | def __call__(self, q, k, v, extra_options): 83 | dim = k.shape[-1] 84 | if dim in self.hypernet: 85 | hn = self.hypernet[dim] 86 | k = k + hn[0](k) * self.strength 87 | v = v + hn[1](v) * self.strength 88 | 89 | return q, k, v 90 | 91 | def to(self, device): 92 | for d in self.hypernet.keys(): 93 | self.hypernet[d] = self.hypernet[d].to(device) 94 | return self 95 | 96 | return hypernetwork_patch(out, strength) 97 | 98 | class HypernetworkLoader: 99 | @classmethod 100 | def INPUT_TYPES(s): 101 | return {"required": { "model": ("MODEL",), 102 | "hypernetwork_name": (ldm_patched.utils.path_utils.get_filename_list("hypernetworks"), ), 103 | "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), 104 | }} 105 | RETURN_TYPES = ("MODEL",) 106 | FUNCTION = "load_hypernetwork" 107 | 108 | CATEGORY = "loaders" 109 | 110 | def load_hypernetwork(self, model, hypernetwork_name, strength): 111 | hypernetwork_path = ldm_patched.utils.path_utils.get_full_path("hypernetworks", hypernetwork_name) 112 | model_hypernetwork = model.clone() 113 | patch = load_hypernetwork_patch(hypernetwork_path, strength) 114 | if patch is not None: 115 | model_hypernetwork.set_model_attn1_patch(patch) 116 | model_hypernetwork.set_model_attn2_patch(patch) 117 | return (model_hypernetwork,) 118 | 119 | NODE_CLASS_MAPPINGS = { 120 | "HypernetworkLoader": HypernetworkLoader 121 | } 122 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_hypertile.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | #Taken from: https://github.com/tfernd/HyperTile/ 4 | 5 | import math 6 | from einops import rearrange 7 | # Use torch rng for consistency across generations 8 | from torch import randint 9 | 10 | def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int: 11 | min_value = min(min_value, value) 12 | 13 | # All big divisors of value (inclusive) 14 | divisors = [i for i in range(min_value, value + 1) if value % i == 0] 15 | 16 | ns = [value // i for i in divisors[:max_options]] # has at least 1 element 17 | 18 | if len(ns) - 1 > 0: 19 | idx = randint(low=0, high=len(ns) - 1, size=(1,)).item() 20 | else: 21 | idx = 0 22 | 23 | return ns[idx] 24 | 25 | class HyperTile: 26 | @classmethod 27 | def INPUT_TYPES(s): 28 | return {"required": { "model": ("MODEL",), 29 | "tile_size": ("INT", {"default": 256, "min": 1, "max": 2048}), 30 | "swap_size": ("INT", {"default": 2, "min": 1, "max": 128}), 31 | "max_depth": ("INT", {"default": 0, "min": 0, "max": 10}), 32 | "scale_depth": ("BOOLEAN", {"default": False}), 33 | }} 34 | RETURN_TYPES = ("MODEL",) 35 | FUNCTION = "patch" 36 | 37 | CATEGORY = "model_patches" 38 | 39 | def patch(self, model, tile_size, swap_size, max_depth, scale_depth): 40 | model_channels = model.model.model_config.unet_config["model_channels"] 41 | 42 | latent_tile_size = max(32, tile_size) // 8 43 | self.temp = None 44 | 45 | def hypertile_in(q, k, v, extra_options): 46 | model_chans = q.shape[-2] 47 | orig_shape = extra_options['original_shape'] 48 | apply_to = [] 49 | for i in range(max_depth + 1): 50 | apply_to.append((orig_shape[-2] / (2 ** i)) * (orig_shape[-1] / (2 ** i))) 51 | 52 | if model_chans in apply_to: 53 | shape = extra_options["original_shape"] 54 | aspect_ratio = shape[-1] / shape[-2] 55 | 56 | hw = q.size(1) 57 | h, w = round(math.sqrt(hw * aspect_ratio)), round(math.sqrt(hw / aspect_ratio)) 58 | 59 | factor = (2 ** apply_to.index(model_chans)) if scale_depth else 1 60 | nh = random_divisor(h, latent_tile_size * factor, swap_size) 61 | nw = random_divisor(w, latent_tile_size * factor, swap_size) 62 | 63 | if nh * nw > 1: 64 | q = rearrange(q, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw) 65 | self.temp = (nh, nw, h, w) 66 | return q, k, v 67 | 68 | return q, k, v 69 | def hypertile_out(out, extra_options): 70 | if self.temp is not None: 71 | nh, nw, h, w = self.temp 72 | self.temp = None 73 | out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw) 74 | out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw) 75 | return out 76 | 77 | 78 | m = model.clone() 79 | m.set_model_attn1_patch(hypertile_in) 80 | m.set_model_attn1_output_patch(hypertile_out) 81 | return (m, ) 82 | 83 | NODE_CLASS_MAPPINGS = { 84 | "HyperTile": HyperTile, 85 | } 86 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_latent.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import ldm_patched.modules.utils 4 | import torch 5 | 6 | def reshape_latent_to(target_shape, latent): 7 | if latent.shape[1:] != target_shape[1:]: 8 | latent = ldm_patched.modules.utils.common_upscale(latent, target_shape[3], target_shape[2], "bilinear", "center") 9 | return ldm_patched.modules.utils.repeat_to_batch_size(latent, target_shape[0]) 10 | 11 | 12 | class LatentAdd: 13 | @classmethod 14 | def INPUT_TYPES(s): 15 | return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} 16 | 17 | RETURN_TYPES = ("LATENT",) 18 | FUNCTION = "op" 19 | 20 | CATEGORY = "latent/advanced" 21 | 22 | def op(self, samples1, samples2): 23 | samples_out = samples1.copy() 24 | 25 | s1 = samples1["samples"] 26 | s2 = samples2["samples"] 27 | 28 | s2 = reshape_latent_to(s1.shape, s2) 29 | samples_out["samples"] = s1 + s2 30 | return (samples_out,) 31 | 32 | class LatentSubtract: 33 | @classmethod 34 | def INPUT_TYPES(s): 35 | return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} 36 | 37 | RETURN_TYPES = ("LATENT",) 38 | FUNCTION = "op" 39 | 40 | CATEGORY = "latent/advanced" 41 | 42 | def op(self, samples1, samples2): 43 | samples_out = samples1.copy() 44 | 45 | s1 = samples1["samples"] 46 | s2 = samples2["samples"] 47 | 48 | s2 = reshape_latent_to(s1.shape, s2) 49 | samples_out["samples"] = s1 - s2 50 | return (samples_out,) 51 | 52 | class LatentMultiply: 53 | @classmethod 54 | def INPUT_TYPES(s): 55 | return {"required": { "samples": ("LATENT",), 56 | "multiplier": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), 57 | }} 58 | 59 | RETURN_TYPES = ("LATENT",) 60 | FUNCTION = "op" 61 | 62 | CATEGORY = "latent/advanced" 63 | 64 | def op(self, samples, multiplier): 65 | samples_out = samples.copy() 66 | 67 | s1 = samples["samples"] 68 | samples_out["samples"] = s1 * multiplier 69 | return (samples_out,) 70 | 71 | class LatentInterpolate: 72 | @classmethod 73 | def INPUT_TYPES(s): 74 | return {"required": { "samples1": ("LATENT",), 75 | "samples2": ("LATENT",), 76 | "ratio": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), 77 | }} 78 | 79 | RETURN_TYPES = ("LATENT",) 80 | FUNCTION = "op" 81 | 82 | CATEGORY = "latent/advanced" 83 | 84 | def op(self, samples1, samples2, ratio): 85 | samples_out = samples1.copy() 86 | 87 | s1 = samples1["samples"] 88 | s2 = samples2["samples"] 89 | 90 | s2 = reshape_latent_to(s1.shape, s2) 91 | 92 | m1 = torch.linalg.vector_norm(s1, dim=(1)) 93 | m2 = torch.linalg.vector_norm(s2, dim=(1)) 94 | 95 | s1 = torch.nan_to_num(s1 / m1) 96 | s2 = torch.nan_to_num(s2 / m2) 97 | 98 | t = (s1 * ratio + s2 * (1.0 - ratio)) 99 | mt = torch.linalg.vector_norm(t, dim=(1)) 100 | st = torch.nan_to_num(t / mt) 101 | 102 | samples_out["samples"] = st * (m1 * ratio + m2 * (1.0 - ratio)) 103 | return (samples_out,) 104 | 105 | class LatentBatch: 106 | @classmethod 107 | def INPUT_TYPES(s): 108 | return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} 109 | 110 | RETURN_TYPES = ("LATENT",) 111 | FUNCTION = "batch" 112 | 113 | CATEGORY = "latent/batch" 114 | 115 | def batch(self, samples1, samples2): 116 | samples_out = samples1.copy() 117 | s1 = samples1["samples"] 118 | s2 = samples2["samples"] 119 | 120 | if s1.shape[1:] != s2.shape[1:]: 121 | s2 = ldm_patched.modules.utils.common_upscale(s2, s1.shape[3], s1.shape[2], "bilinear", "center") 122 | s = torch.cat((s1, s2), dim=0) 123 | samples_out["samples"] = s 124 | samples_out["batch_index"] = samples1.get("batch_index", [x for x in range(0, s1.shape[0])]) + samples2.get("batch_index", [x for x in range(0, s2.shape[0])]) 125 | return (samples_out,) 126 | 127 | class LatentBatchSeedBehavior: 128 | @classmethod 129 | def INPUT_TYPES(s): 130 | return {"required": { "samples": ("LATENT",), 131 | "seed_behavior": (["random", "fixed"],),}} 132 | 133 | RETURN_TYPES = ("LATENT",) 134 | FUNCTION = "op" 135 | 136 | CATEGORY = "latent/advanced" 137 | 138 | def op(self, samples, seed_behavior): 139 | samples_out = samples.copy() 140 | latent = samples["samples"] 141 | if seed_behavior == "random": 142 | if 'batch_index' in samples_out: 143 | samples_out.pop('batch_index') 144 | elif seed_behavior == "fixed": 145 | batch_number = samples_out.get("batch_index", [0])[0] 146 | samples_out["batch_index"] = [batch_number] * latent.shape[0] 147 | 148 | return (samples_out,) 149 | 150 | NODE_CLASS_MAPPINGS = { 151 | "LatentAdd": LatentAdd, 152 | "LatentSubtract": LatentSubtract, 153 | "LatentMultiply": LatentMultiply, 154 | "LatentInterpolate": LatentInterpolate, 155 | "LatentBatch": LatentBatch, 156 | "LatentBatchSeedBehavior": LatentBatchSeedBehavior, 157 | } 158 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_model_downscale.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | import ldm_patched.modules.utils 5 | 6 | class PatchModelAddDownscale: 7 | upscale_methods = ["bicubic", "nearest-exact", "bilinear", "area", "bislerp"] 8 | @classmethod 9 | def INPUT_TYPES(s): 10 | return {"required": { "model": ("MODEL",), 11 | "block_number": ("INT", {"default": 3, "min": 1, "max": 32, "step": 1}), 12 | "downscale_factor": ("FLOAT", {"default": 2.0, "min": 0.1, "max": 9.0, "step": 0.001}), 13 | "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), 14 | "end_percent": ("FLOAT", {"default": 0.35, "min": 0.0, "max": 1.0, "step": 0.001}), 15 | "downscale_after_skip": ("BOOLEAN", {"default": True}), 16 | "downscale_method": (s.upscale_methods,), 17 | "upscale_method": (s.upscale_methods,), 18 | }} 19 | RETURN_TYPES = ("MODEL",) 20 | FUNCTION = "patch" 21 | 22 | CATEGORY = "_for_testing" 23 | 24 | def patch(self, model, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip, downscale_method, upscale_method): 25 | sigma_start = model.model.model_sampling.percent_to_sigma(start_percent) 26 | sigma_end = model.model.model_sampling.percent_to_sigma(end_percent) 27 | 28 | def input_block_patch(h, transformer_options): 29 | if transformer_options["block"][1] == block_number: 30 | sigma = transformer_options["sigmas"][0].item() 31 | if sigma <= sigma_start and sigma >= sigma_end: 32 | h = ldm_patched.modules.utils.common_upscale(h, round(h.shape[-1] * (1.0 / downscale_factor)), round(h.shape[-2] * (1.0 / downscale_factor)), downscale_method, "disabled") 33 | return h 34 | 35 | def output_block_patch(h, hsp, transformer_options): 36 | if h.shape[2] != hsp.shape[2]: 37 | h = ldm_patched.modules.utils.common_upscale(h, hsp.shape[-1], hsp.shape[-2], upscale_method, "disabled") 38 | return h, hsp 39 | 40 | m = model.clone() 41 | if downscale_after_skip: 42 | m.set_model_input_block_patch_after_skip(input_block_patch) 43 | else: 44 | m.set_model_input_block_patch(input_block_patch) 45 | m.set_model_output_block_patch(output_block_patch) 46 | return (m, ) 47 | 48 | NODE_CLASS_MAPPINGS = { 49 | "PatchModelAddDownscale": PatchModelAddDownscale, 50 | } 51 | 52 | NODE_DISPLAY_NAME_MAPPINGS = { 53 | # Sampling 54 | "PatchModelAddDownscale": "PatchModelAddDownscale (Kohya Deep Shrink)", 55 | } 56 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_perpneg.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | import ldm_patched.modules.model_management 5 | import ldm_patched.modules.sample 6 | import ldm_patched.modules.samplers 7 | import ldm_patched.modules.utils 8 | 9 | 10 | class PerpNeg: 11 | @classmethod 12 | def INPUT_TYPES(s): 13 | return {"required": {"model": ("MODEL", ), 14 | "empty_conditioning": ("CONDITIONING", ), 15 | "neg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0}), 16 | }} 17 | RETURN_TYPES = ("MODEL",) 18 | FUNCTION = "patch" 19 | 20 | CATEGORY = "_for_testing" 21 | 22 | def patch(self, model, empty_conditioning, neg_scale): 23 | m = model.clone() 24 | nocond = ldm_patched.modules.sample.convert_cond(empty_conditioning) 25 | 26 | def cfg_function(args): 27 | model = args["model"] 28 | noise_pred_pos = args["cond_denoised"] 29 | noise_pred_neg = args["uncond_denoised"] 30 | cond_scale = args["cond_scale"] 31 | x = args["input"] 32 | sigma = args["sigma"] 33 | model_options = args["model_options"] 34 | nocond_processed = ldm_patched.modules.samplers.encode_model_conds(model.extra_conds, nocond, x, x.device, "negative") 35 | 36 | (noise_pred_nocond, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, nocond_processed, None, x, sigma, model_options) 37 | 38 | pos = noise_pred_pos - noise_pred_nocond 39 | neg = noise_pred_neg - noise_pred_nocond 40 | perp = ((torch.mul(pos, neg).sum())/(torch.norm(neg)**2)) * neg 41 | perp_neg = perp * neg_scale 42 | cfg_result = noise_pred_nocond + cond_scale*(pos - perp_neg) 43 | cfg_result = x - cfg_result 44 | return cfg_result 45 | 46 | m.set_model_sampler_cfg_function(cfg_function) 47 | 48 | return (m, ) 49 | 50 | 51 | NODE_CLASS_MAPPINGS = { 52 | "PerpNeg": PerpNeg, 53 | } 54 | 55 | NODE_DISPLAY_NAME_MAPPINGS = { 56 | "PerpNeg": "Perp-Neg", 57 | } 58 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_sdupscale.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | import ldm_patched.contrib.external 5 | import ldm_patched.modules.utils 6 | 7 | class SD_4XUpscale_Conditioning: 8 | @classmethod 9 | def INPUT_TYPES(s): 10 | return {"required": { "images": ("IMAGE",), 11 | "positive": ("CONDITIONING",), 12 | "negative": ("CONDITIONING",), 13 | "scale_ratio": ("FLOAT", {"default": 4.0, "min": 0.0, "max": 10.0, "step": 0.01}), 14 | "noise_augmentation": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), 15 | }} 16 | RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") 17 | RETURN_NAMES = ("positive", "negative", "latent") 18 | 19 | FUNCTION = "encode" 20 | 21 | CATEGORY = "conditioning/upscale_diffusion" 22 | 23 | def encode(self, images, positive, negative, scale_ratio, noise_augmentation): 24 | width = max(1, round(images.shape[-2] * scale_ratio)) 25 | height = max(1, round(images.shape[-3] * scale_ratio)) 26 | 27 | pixels = ldm_patched.modules.utils.common_upscale((images.movedim(-1,1) * 2.0) - 1.0, width // 4, height // 4, "bilinear", "center") 28 | 29 | out_cp = [] 30 | out_cn = [] 31 | 32 | for t in positive: 33 | n = [t[0], t[1].copy()] 34 | n[1]['concat_image'] = pixels 35 | n[1]['noise_augmentation'] = noise_augmentation 36 | out_cp.append(n) 37 | 38 | for t in negative: 39 | n = [t[0], t[1].copy()] 40 | n[1]['concat_image'] = pixels 41 | n[1]['noise_augmentation'] = noise_augmentation 42 | out_cn.append(n) 43 | 44 | latent = torch.zeros([images.shape[0], 4, height // 4, width // 4]) 45 | return (out_cp, out_cn, {"samples":latent}) 46 | 47 | NODE_CLASS_MAPPINGS = { 48 | "SD_4XUpscale_Conditioning": SD_4XUpscale_Conditioning, 49 | } 50 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_stable3d.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | import ldm_patched.contrib.external 5 | import ldm_patched.modules.utils 6 | 7 | def camera_embeddings(elevation, azimuth): 8 | elevation = torch.as_tensor([elevation]) 9 | azimuth = torch.as_tensor([azimuth]) 10 | embeddings = torch.stack( 11 | [ 12 | torch.deg2rad( 13 | (90 - elevation) - (90) 14 | ), # Zero123 polar is 90-elevation 15 | torch.sin(torch.deg2rad(azimuth)), 16 | torch.cos(torch.deg2rad(azimuth)), 17 | torch.deg2rad( 18 | 90 - torch.full_like(elevation, 0) 19 | ), 20 | ], dim=-1).unsqueeze(1) 21 | 22 | return embeddings 23 | 24 | 25 | class StableZero123_Conditioning: 26 | @classmethod 27 | def INPUT_TYPES(s): 28 | return {"required": { "clip_vision": ("CLIP_VISION",), 29 | "init_image": ("IMAGE",), 30 | "vae": ("VAE",), 31 | "width": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), 32 | "height": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), 33 | "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), 34 | "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), 35 | "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), 36 | }} 37 | RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") 38 | RETURN_NAMES = ("positive", "negative", "latent") 39 | 40 | FUNCTION = "encode" 41 | 42 | CATEGORY = "conditioning/3d_models" 43 | 44 | def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth): 45 | output = clip_vision.encode_image(init_image) 46 | pooled = output.image_embeds.unsqueeze(0) 47 | pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1) 48 | encode_pixels = pixels[:,:,:,:3] 49 | t = vae.encode(encode_pixels) 50 | cam_embeds = camera_embeddings(elevation, azimuth) 51 | cond = torch.cat([pooled, cam_embeds.to(pooled.device).repeat((pooled.shape[0], 1, 1))], dim=-1) 52 | 53 | positive = [[cond, {"concat_latent_image": t}]] 54 | negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]] 55 | latent = torch.zeros([batch_size, 4, height // 8, width // 8]) 56 | return (positive, negative, {"samples":latent}) 57 | 58 | class StableZero123_Conditioning_Batched: 59 | @classmethod 60 | def INPUT_TYPES(s): 61 | return {"required": { "clip_vision": ("CLIP_VISION",), 62 | "init_image": ("IMAGE",), 63 | "vae": ("VAE",), 64 | "width": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), 65 | "height": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), 66 | "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), 67 | "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), 68 | "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), 69 | "elevation_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), 70 | "azimuth_batch_increment": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}), 71 | }} 72 | RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") 73 | RETURN_NAMES = ("positive", "negative", "latent") 74 | 75 | FUNCTION = "encode" 76 | 77 | CATEGORY = "conditioning/3d_models" 78 | 79 | def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth, elevation_batch_increment, azimuth_batch_increment): 80 | output = clip_vision.encode_image(init_image) 81 | pooled = output.image_embeds.unsqueeze(0) 82 | pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1) 83 | encode_pixels = pixels[:,:,:,:3] 84 | t = vae.encode(encode_pixels) 85 | 86 | cam_embeds = [] 87 | for i in range(batch_size): 88 | cam_embeds.append(camera_embeddings(elevation, azimuth)) 89 | elevation += elevation_batch_increment 90 | azimuth += azimuth_batch_increment 91 | 92 | cam_embeds = torch.cat(cam_embeds, dim=0) 93 | cond = torch.cat([ldm_patched.modules.utils.repeat_to_batch_size(pooled, batch_size), cam_embeds], dim=-1) 94 | 95 | positive = [[cond, {"concat_latent_image": t}]] 96 | negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]] 97 | latent = torch.zeros([batch_size, 4, height // 8, width // 8]) 98 | return (positive, negative, {"samples":latent, "batch_index": [0] * batch_size}) 99 | 100 | 101 | NODE_CLASS_MAPPINGS = { 102 | "StableZero123_Conditioning": StableZero123_Conditioning, 103 | "StableZero123_Conditioning_Batched": StableZero123_Conditioning_Batched, 104 | } 105 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_upscale_model.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import os 4 | from ldm_patched.pfn import model_loading 5 | from ldm_patched.modules import model_management 6 | import torch 7 | import ldm_patched.modules.utils 8 | import ldm_patched.utils.path_utils 9 | 10 | class UpscaleModelLoader: 11 | @classmethod 12 | def INPUT_TYPES(s): 13 | return {"required": { "model_name": (ldm_patched.utils.path_utils.get_filename_list("upscale_models"), ), 14 | }} 15 | RETURN_TYPES = ("UPSCALE_MODEL",) 16 | FUNCTION = "load_model" 17 | 18 | CATEGORY = "loaders" 19 | 20 | def load_model(self, model_name): 21 | model_path = ldm_patched.utils.path_utils.get_full_path("upscale_models", model_name) 22 | sd = ldm_patched.modules.utils.load_torch_file(model_path, safe_load=True) 23 | if "module.layers.0.residual_group.blocks.0.norm1.weight" in sd: 24 | sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {"module.":""}) 25 | out = model_loading.load_state_dict(sd).eval() 26 | return (out, ) 27 | 28 | 29 | class ImageUpscaleWithModel: 30 | @classmethod 31 | def INPUT_TYPES(s): 32 | return {"required": { "upscale_model": ("UPSCALE_MODEL",), 33 | "image": ("IMAGE",), 34 | }} 35 | RETURN_TYPES = ("IMAGE",) 36 | FUNCTION = "upscale" 37 | 38 | CATEGORY = "image/upscaling" 39 | 40 | def upscale(self, upscale_model, image): 41 | device = model_management.get_torch_device() 42 | upscale_model.to(device) 43 | in_img = image.movedim(-1,-3).to(device) 44 | free_memory = model_management.get_free_memory(device) 45 | 46 | tile = 512 47 | overlap = 32 48 | 49 | oom = True 50 | while oom: 51 | try: 52 | steps = in_img.shape[0] * ldm_patched.modules.utils.get_tiled_scale_steps(in_img.shape[3], in_img.shape[2], tile_x=tile, tile_y=tile, overlap=overlap) 53 | pbar = ldm_patched.modules.utils.ProgressBar(steps) 54 | s = ldm_patched.modules.utils.tiled_scale(in_img, lambda a: upscale_model(a), tile_x=tile, tile_y=tile, overlap=overlap, upscale_amount=upscale_model.scale, pbar=pbar) 55 | oom = False 56 | except model_management.OOM_EXCEPTION as e: 57 | tile //= 2 58 | if tile < 128: 59 | raise e 60 | 61 | upscale_model.cpu() 62 | s = torch.clamp(s.movedim(-3,-1), min=0, max=1.0) 63 | return (s,) 64 | 65 | NODE_CLASS_MAPPINGS = { 66 | "UpscaleModelLoader": UpscaleModelLoader, 67 | "ImageUpscaleWithModel": ImageUpscaleWithModel 68 | } 69 | -------------------------------------------------------------------------------- /ldm_patched/contrib/external_video_model.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import ldm_patched.contrib.external 4 | import torch 5 | import ldm_patched.modules.utils 6 | import ldm_patched.modules.sd 7 | import ldm_patched.utils.path_utils 8 | import ldm_patched.contrib.external_model_merging 9 | 10 | 11 | class ImageOnlyCheckpointLoader: 12 | @classmethod 13 | def INPUT_TYPES(s): 14 | return {"required": { "ckpt_name": (ldm_patched.utils.path_utils.get_filename_list("checkpoints"), ), 15 | }} 16 | RETURN_TYPES = ("MODEL", "CLIP_VISION", "VAE") 17 | FUNCTION = "load_checkpoint" 18 | 19 | CATEGORY = "loaders/video_models" 20 | 21 | def load_checkpoint(self, ckpt_name, output_vae=True, output_clip=True): 22 | ckpt_path = ldm_patched.utils.path_utils.get_full_path("checkpoints", ckpt_name) 23 | out = ldm_patched.modules.sd.load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=False, output_clipvision=True, embedding_directory=ldm_patched.utils.path_utils.get_folder_paths("embeddings")) 24 | return (out[0], out[3], out[2]) 25 | 26 | 27 | class SVD_img2vid_Conditioning: 28 | @classmethod 29 | def INPUT_TYPES(s): 30 | return {"required": { "clip_vision": ("CLIP_VISION",), 31 | "init_image": ("IMAGE",), 32 | "vae": ("VAE",), 33 | "width": ("INT", {"default": 1024, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), 34 | "height": ("INT", {"default": 576, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}), 35 | "video_frames": ("INT", {"default": 14, "min": 1, "max": 4096}), 36 | "motion_bucket_id": ("INT", {"default": 127, "min": 1, "max": 1023}), 37 | "fps": ("INT", {"default": 6, "min": 1, "max": 1024}), 38 | "augmentation_level": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 10.0, "step": 0.01}) 39 | }} 40 | RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") 41 | RETURN_NAMES = ("positive", "negative", "latent") 42 | 43 | FUNCTION = "encode" 44 | 45 | CATEGORY = "conditioning/video_models" 46 | 47 | def encode(self, clip_vision, init_image, vae, width, height, video_frames, motion_bucket_id, fps, augmentation_level): 48 | output = clip_vision.encode_image(init_image) 49 | pooled = output.image_embeds.unsqueeze(0) 50 | pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1) 51 | encode_pixels = pixels[:,:,:,:3] 52 | if augmentation_level > 0: 53 | encode_pixels += torch.randn_like(pixels) * augmentation_level 54 | t = vae.encode(encode_pixels) 55 | positive = [[pooled, {"motion_bucket_id": motion_bucket_id, "fps": fps, "augmentation_level": augmentation_level, "concat_latent_image": t}]] 56 | negative = [[torch.zeros_like(pooled), {"motion_bucket_id": motion_bucket_id, "fps": fps, "augmentation_level": augmentation_level, "concat_latent_image": torch.zeros_like(t)}]] 57 | latent = torch.zeros([video_frames, 4, height // 8, width // 8]) 58 | return (positive, negative, {"samples":latent}) 59 | 60 | class VideoLinearCFGGuidance: 61 | @classmethod 62 | def INPUT_TYPES(s): 63 | return {"required": { "model": ("MODEL",), 64 | "min_cfg": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.5, "round": 0.01}), 65 | }} 66 | RETURN_TYPES = ("MODEL",) 67 | FUNCTION = "patch" 68 | 69 | CATEGORY = "sampling/video_models" 70 | 71 | def patch(self, model, min_cfg): 72 | def linear_cfg(args): 73 | cond = args["cond"] 74 | uncond = args["uncond"] 75 | cond_scale = args["cond_scale"] 76 | 77 | scale = torch.linspace(min_cfg, cond_scale, cond.shape[0], device=cond.device).reshape((cond.shape[0], 1, 1, 1)) 78 | return uncond + scale * (cond - uncond) 79 | 80 | m = model.clone() 81 | m.set_model_sampler_cfg_function(linear_cfg) 82 | return (m, ) 83 | 84 | class ImageOnlyCheckpointSave(ldm_patched.contrib.external_model_merging.CheckpointSave): 85 | CATEGORY = "_for_testing" 86 | 87 | @classmethod 88 | def INPUT_TYPES(s): 89 | return {"required": { "model": ("MODEL",), 90 | "clip_vision": ("CLIP_VISION",), 91 | "vae": ("VAE",), 92 | "filename_prefix": ("STRING", {"default": "checkpoints/ldm_patched"}),}, 93 | "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},} 94 | 95 | def save(self, model, clip_vision, vae, filename_prefix, prompt=None, extra_pnginfo=None): 96 | ldm_patched.contrib.external_model_merging.save_checkpoint(model, clip_vision=clip_vision, vae=vae, filename_prefix=filename_prefix, output_dir=self.output_dir, prompt=prompt, extra_pnginfo=extra_pnginfo) 97 | return {} 98 | 99 | NODE_CLASS_MAPPINGS = { 100 | "ImageOnlyCheckpointLoader": ImageOnlyCheckpointLoader, 101 | "SVD_img2vid_Conditioning": SVD_img2vid_Conditioning, 102 | "VideoLinearCFGGuidance": VideoLinearCFGGuidance, 103 | "ImageOnlyCheckpointSave": ImageOnlyCheckpointSave, 104 | } 105 | 106 | NODE_DISPLAY_NAME_MAPPINGS = { 107 | "ImageOnlyCheckpointLoader": "Image Only Checkpoint Loader (img2vid model)", 108 | } 109 | -------------------------------------------------------------------------------- /ldm_patched/ldm/models/__pycache__/autoencoder.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/models/__pycache__/autoencoder.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/__pycache__/attention.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/__pycache__/attention.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/__pycache__/ema.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/__pycache__/ema.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/__pycache__/sub_quadratic_attention.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/__pycache__/sub_quadratic_attention.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/__pycache__/upscaling.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/upscaling.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/__pycache__/util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/diffusionmodules/__pycache__/util.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/diffusionmodules/upscaling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from functools import partial 5 | 6 | from .util import extract_into_tensor, make_beta_schedule 7 | from ldm_patched.ldm.util import default 8 | 9 | 10 | class AbstractLowScaleModel(nn.Module): 11 | # for concatenating a downsampled image to the latent representation 12 | def __init__(self, noise_schedule_config=None): 13 | super(AbstractLowScaleModel, self).__init__() 14 | if noise_schedule_config is not None: 15 | self.register_schedule(**noise_schedule_config) 16 | 17 | def register_schedule(self, beta_schedule="linear", timesteps=1000, 18 | linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): 19 | betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, 20 | cosine_s=cosine_s) 21 | alphas = 1. - betas 22 | alphas_cumprod = np.cumprod(alphas, axis=0) 23 | alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) 24 | 25 | timesteps, = betas.shape 26 | self.num_timesteps = int(timesteps) 27 | self.linear_start = linear_start 28 | self.linear_end = linear_end 29 | assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' 30 | 31 | to_torch = partial(torch.tensor, dtype=torch.float32) 32 | 33 | self.register_buffer('betas', to_torch(betas)) 34 | self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) 35 | self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) 36 | 37 | # calculations for diffusion q(x_t | x_{t-1}) and others 38 | self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) 39 | self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) 40 | self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) 41 | self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) 42 | self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) 43 | 44 | def q_sample(self, x_start, t, noise=None, seed=None): 45 | if noise is None: 46 | if seed is None: 47 | noise = torch.randn_like(x_start) 48 | else: 49 | noise = torch.randn(x_start.size(), dtype=x_start.dtype, layout=x_start.layout, generator=torch.manual_seed(seed)).to(x_start.device) 50 | return (extract_into_tensor(self.sqrt_alphas_cumprod.to(x_start.device), t, x_start.shape) * x_start + 51 | extract_into_tensor(self.sqrt_one_minus_alphas_cumprod.to(x_start.device), t, x_start.shape) * noise) 52 | 53 | def forward(self, x): 54 | return x, None 55 | 56 | def decode(self, x): 57 | return x 58 | 59 | 60 | class SimpleImageConcat(AbstractLowScaleModel): 61 | # no noise level conditioning 62 | def __init__(self): 63 | super(SimpleImageConcat, self).__init__(noise_schedule_config=None) 64 | self.max_noise_level = 0 65 | 66 | def forward(self, x): 67 | # fix to constant noise level 68 | return x, torch.zeros(x.shape[0], device=x.device).long() 69 | 70 | 71 | class ImageConcatWithNoiseAugmentation(AbstractLowScaleModel): 72 | def __init__(self, noise_schedule_config, max_noise_level=1000, to_cuda=False): 73 | super().__init__(noise_schedule_config=noise_schedule_config) 74 | self.max_noise_level = max_noise_level 75 | 76 | def forward(self, x, noise_level=None, seed=None): 77 | if noise_level is None: 78 | noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() 79 | else: 80 | assert isinstance(noise_level, torch.Tensor) 81 | z = self.q_sample(x, noise_level, seed=seed) 82 | return z, noise_level 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/distributions/__init__.py -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/distributions/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/distributions/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/distributions/__pycache__/distributions.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/distributions/__pycache__/distributions.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/distributions/distributions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class AbstractDistribution: 6 | def sample(self): 7 | raise NotImplementedError() 8 | 9 | def mode(self): 10 | raise NotImplementedError() 11 | 12 | 13 | class DiracDistribution(AbstractDistribution): 14 | def __init__(self, value): 15 | self.value = value 16 | 17 | def sample(self): 18 | return self.value 19 | 20 | def mode(self): 21 | return self.value 22 | 23 | 24 | class DiagonalGaussianDistribution(object): 25 | def __init__(self, parameters, deterministic=False): 26 | self.parameters = parameters 27 | self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) 28 | self.logvar = torch.clamp(self.logvar, -30.0, 20.0) 29 | self.deterministic = deterministic 30 | self.std = torch.exp(0.5 * self.logvar) 31 | self.var = torch.exp(self.logvar) 32 | if self.deterministic: 33 | self.var = self.std = torch.zeros_like(self.mean).to(device=self.parameters.device) 34 | 35 | def sample(self): 36 | x = self.mean + self.std * torch.randn(self.mean.shape).to(device=self.parameters.device) 37 | return x 38 | 39 | def kl(self, other=None): 40 | if self.deterministic: 41 | return torch.Tensor([0.]) 42 | else: 43 | if other is None: 44 | return 0.5 * torch.sum(torch.pow(self.mean, 2) 45 | + self.var - 1.0 - self.logvar, 46 | dim=[1, 2, 3]) 47 | else: 48 | return 0.5 * torch.sum( 49 | torch.pow(self.mean - other.mean, 2) / other.var 50 | + self.var / other.var - 1.0 - self.logvar + other.logvar, 51 | dim=[1, 2, 3]) 52 | 53 | def nll(self, sample, dims=[1,2,3]): 54 | if self.deterministic: 55 | return torch.Tensor([0.]) 56 | logtwopi = np.log(2.0 * np.pi) 57 | return 0.5 * torch.sum( 58 | logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, 59 | dim=dims) 60 | 61 | def mode(self): 62 | return self.mean 63 | 64 | 65 | def normal_kl(mean1, logvar1, mean2, logvar2): 66 | """ 67 | source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 68 | Compute the KL divergence between two gaussians. 69 | Shapes are automatically broadcasted, so batches can be compared to 70 | scalars, among other use cases. 71 | """ 72 | tensor = None 73 | for obj in (mean1, logvar1, mean2, logvar2): 74 | if isinstance(obj, torch.Tensor): 75 | tensor = obj 76 | break 77 | assert tensor is not None, "at least one argument must be a Tensor" 78 | 79 | # Force variances to be Tensors. Broadcasting helps convert scalars to 80 | # Tensors, but it does not work for torch.exp(). 81 | logvar1, logvar2 = [ 82 | x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) 83 | for x in (logvar1, logvar2) 84 | ] 85 | 86 | return 0.5 * ( 87 | -1.0 88 | + logvar2 89 | - logvar1 90 | + torch.exp(logvar1 - logvar2) 91 | + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) 92 | ) 93 | -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class LitEma(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_upates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError('Decay must be between 0 and 1') 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates 14 | else torch.tensor(-1, dtype=torch.int)) 15 | 16 | for name, p in model.named_parameters(): 17 | if p.requires_grad: 18 | # remove as '.'-character is not allowed in buffers 19 | s_name = name.replace('.', '') 20 | self.m_name2s_name.update({name: s_name}) 21 | self.register_buffer(s_name, p.clone().detach().data) 22 | 23 | self.collected_params = [] 24 | 25 | def reset_num_updates(self): 26 | del self.num_updates 27 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int)) 28 | 29 | def forward(self, model): 30 | decay = self.decay 31 | 32 | if self.num_updates >= 0: 33 | self.num_updates += 1 34 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) 35 | 36 | one_minus_decay = 1.0 - decay 37 | 38 | with torch.no_grad(): 39 | m_param = dict(model.named_parameters()) 40 | shadow_params = dict(self.named_buffers()) 41 | 42 | for key in m_param: 43 | if m_param[key].requires_grad: 44 | sname = self.m_name2s_name[key] 45 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 46 | shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key])) 47 | else: 48 | assert not key in self.m_name2s_name 49 | 50 | def copy_to(self, model): 51 | m_param = dict(model.named_parameters()) 52 | shadow_params = dict(self.named_buffers()) 53 | for key in m_param: 54 | if m_param[key].requires_grad: 55 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 56 | else: 57 | assert not key in self.m_name2s_name 58 | 59 | def store(self, parameters): 60 | """ 61 | Save the current parameters for restoring later. 62 | Args: 63 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 64 | temporarily stored. 65 | """ 66 | self.collected_params = [param.clone() for param in parameters] 67 | 68 | def restore(self, parameters): 69 | """ 70 | Restore the parameters stored with the `store` method. 71 | Useful to validate the model with EMA parameters without affecting the 72 | original optimization process. Store the parameters before the 73 | `copy_to` method. After validation (or model saving), use this to 74 | restore the former parameters. 75 | Args: 76 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 77 | updated with the stored parameters. 78 | """ 79 | for c_param, param in zip(self.collected_params, parameters): 80 | param.data.copy_(c_param.data) 81 | -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/encoders/__init__.py -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/encoders/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/encoders/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/encoders/__pycache__/noise_aug_modules.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/ldm/modules/encoders/__pycache__/noise_aug_modules.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/ldm/modules/encoders/noise_aug_modules.py: -------------------------------------------------------------------------------- 1 | from ..diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation 2 | from ..diffusionmodules.openaimodel import Timestep 3 | import torch 4 | 5 | class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation): 6 | def __init__(self, *args, clip_stats_path=None, timestep_dim=256, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | if clip_stats_path is None: 9 | clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim) 10 | else: 11 | clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu") 12 | self.register_buffer("data_mean", clip_mean[None, :], persistent=False) 13 | self.register_buffer("data_std", clip_std[None, :], persistent=False) 14 | self.time_embed = Timestep(timestep_dim) 15 | 16 | def scale(self, x): 17 | # re-normalize to centered mean and unit variance 18 | x = (x - self.data_mean.to(x.device)) * 1. / self.data_std.to(x.device) 19 | return x 20 | 21 | def unscale(self, x): 22 | # back to original data stats 23 | x = (x * self.data_std.to(x.device)) + self.data_mean.to(x.device) 24 | return x 25 | 26 | def forward(self, x, noise_level=None, seed=None): 27 | if noise_level is None: 28 | noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() 29 | else: 30 | assert isinstance(noise_level, torch.Tensor) 31 | x = self.scale(x) 32 | z = self.q_sample(x, noise_level, seed=seed) 33 | z = self.unscale(z) 34 | noise_level = self.time_embed(noise_level) 35 | return z, noise_level 36 | -------------------------------------------------------------------------------- /ldm_patched/licenses-3rd/chainer: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Preferred Infrastructure, Inc. 2 | Copyright (c) 2015 Preferred Networks, Inc. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. -------------------------------------------------------------------------------- /ldm_patched/licenses-3rd/kdiffusion: -------------------------------------------------------------------------------- 1 | Copyright (c) 2022 Katherine Crowson 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /ldm_patched/licenses-3rd/ldm: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Machine Vision and Learning Group, LMU Munich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /ldm_patched/licenses-3rd/taesd: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Ollin Boer Bohan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /ldm_patched/modules/checkpoint_pickle.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | load = pickle.load 4 | 5 | class Empty: 6 | pass 7 | 8 | class Unpickler(pickle.Unpickler): 9 | def find_class(self, module, name): 10 | #TODO: safe unpickle 11 | if module.startswith("pytorch_lightning"): 12 | return Empty 13 | return super().find_class(module, name) 14 | -------------------------------------------------------------------------------- /ldm_patched/modules/clip_config_bigg.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "CLIPTextModel" 4 | ], 5 | "attention_dropout": 0.0, 6 | "bos_token_id": 0, 7 | "dropout": 0.0, 8 | "eos_token_id": 2, 9 | "hidden_act": "gelu", 10 | "hidden_size": 1280, 11 | "initializer_factor": 1.0, 12 | "initializer_range": 0.02, 13 | "intermediate_size": 5120, 14 | "layer_norm_eps": 1e-05, 15 | "max_position_embeddings": 77, 16 | "model_type": "clip_text_model", 17 | "num_attention_heads": 20, 18 | "num_hidden_layers": 32, 19 | "pad_token_id": 1, 20 | "projection_dim": 1280, 21 | "torch_dtype": "float32", 22 | "vocab_size": 49408 23 | } 24 | -------------------------------------------------------------------------------- /ldm_patched/modules/clip_vision.py: -------------------------------------------------------------------------------- 1 | from .utils import load_torch_file, transformers_convert, state_dict_prefix_replace 2 | import os 3 | import torch 4 | import json 5 | 6 | import ldm_patched.modules.ops 7 | import ldm_patched.modules.model_patcher 8 | import ldm_patched.modules.model_management 9 | import ldm_patched.modules.utils 10 | import ldm_patched.modules.clip_model 11 | 12 | class Output: 13 | def __getitem__(self, key): 14 | return getattr(self, key) 15 | def __setitem__(self, key, item): 16 | setattr(self, key, item) 17 | 18 | def clip_preprocess(image, size=224): 19 | mean = torch.tensor([ 0.48145466,0.4578275,0.40821073], device=image.device, dtype=image.dtype) 20 | std = torch.tensor([0.26862954,0.26130258,0.27577711], device=image.device, dtype=image.dtype) 21 | image = image.movedim(-1, 1) 22 | if not (image.shape[2] == size and image.shape[3] == size): 23 | scale = (size / min(image.shape[2], image.shape[3])) 24 | image = torch.nn.functional.interpolate(image, size=(round(scale * image.shape[2]), round(scale * image.shape[3])), mode="bicubic", antialias=True) 25 | h = (image.shape[2] - size)//2 26 | w = (image.shape[3] - size)//2 27 | image = image[:,:,h:h+size,w:w+size] 28 | image = torch.clip((255. * image), 0, 255).round() / 255.0 29 | return (image - mean.view([3,1,1])) / std.view([3,1,1]) 30 | 31 | class ClipVisionModel(): 32 | def __init__(self, json_config): 33 | with open(json_config) as f: 34 | config = json.load(f) 35 | 36 | self.load_device = ldm_patched.modules.model_management.text_encoder_device() 37 | offload_device = ldm_patched.modules.model_management.text_encoder_offload_device() 38 | self.dtype = ldm_patched.modules.model_management.text_encoder_dtype(self.load_device) 39 | self.model = ldm_patched.modules.clip_model.CLIPVisionModelProjection(config, self.dtype, offload_device, ldm_patched.modules.ops.manual_cast) 40 | self.model.eval() 41 | 42 | self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device) 43 | 44 | def load_sd(self, sd): 45 | return self.model.load_state_dict(sd, strict=False) 46 | 47 | def get_sd(self): 48 | return self.model.state_dict() 49 | 50 | def encode_image(self, image): 51 | ldm_patched.modules.model_management.load_model_gpu(self.patcher) 52 | pixel_values = clip_preprocess(image.to(self.load_device)).float() 53 | out = self.model(pixel_values=pixel_values, intermediate_output=-2) 54 | 55 | outputs = Output() 56 | outputs["last_hidden_state"] = out[0].to(ldm_patched.modules.model_management.intermediate_device()) 57 | outputs["image_embeds"] = out[2].to(ldm_patched.modules.model_management.intermediate_device()) 58 | outputs["penultimate_hidden_states"] = out[1].to(ldm_patched.modules.model_management.intermediate_device()) 59 | return outputs 60 | 61 | def convert_to_transformers(sd, prefix): 62 | sd_k = sd.keys() 63 | if "{}transformer.resblocks.0.attn.in_proj_weight".format(prefix) in sd_k: 64 | keys_to_replace = { 65 | "{}class_embedding".format(prefix): "vision_model.embeddings.class_embedding", 66 | "{}conv1.weight".format(prefix): "vision_model.embeddings.patch_embedding.weight", 67 | "{}positional_embedding".format(prefix): "vision_model.embeddings.position_embedding.weight", 68 | "{}ln_post.bias".format(prefix): "vision_model.post_layernorm.bias", 69 | "{}ln_post.weight".format(prefix): "vision_model.post_layernorm.weight", 70 | "{}ln_pre.bias".format(prefix): "vision_model.pre_layrnorm.bias", 71 | "{}ln_pre.weight".format(prefix): "vision_model.pre_layrnorm.weight", 72 | } 73 | 74 | for x in keys_to_replace: 75 | if x in sd_k: 76 | sd[keys_to_replace[x]] = sd.pop(x) 77 | 78 | if "{}proj".format(prefix) in sd_k: 79 | sd['visual_projection.weight'] = sd.pop("{}proj".format(prefix)).transpose(0, 1) 80 | 81 | sd = transformers_convert(sd, prefix, "vision_model.", 48) 82 | else: 83 | replace_prefix = {prefix: ""} 84 | sd = state_dict_prefix_replace(sd, replace_prefix) 85 | return sd 86 | 87 | def load_clipvision_from_sd(sd, prefix="", convert_keys=False): 88 | if convert_keys: 89 | sd = convert_to_transformers(sd, prefix) 90 | if "vision_model.encoder.layers.47.layer_norm1.weight" in sd: 91 | json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_g.json") 92 | elif "vision_model.encoder.layers.30.layer_norm1.weight" in sd: 93 | json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_h.json") 94 | elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd: 95 | json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json") 96 | else: 97 | return None 98 | 99 | clip = ClipVisionModel(json_config) 100 | m, u = clip.load_sd(sd) 101 | if len(m) > 0: 102 | print("extra clip vision:", m) 103 | u = set(u) 104 | keys = list(sd.keys()) 105 | for k in keys: 106 | if k not in u: 107 | t = sd.pop(k) 108 | del t 109 | return clip 110 | 111 | def load(ckpt_path): 112 | sd = load_torch_file(ckpt_path) 113 | if "visual.transformer.resblocks.0.attn.in_proj_weight" in sd: 114 | return load_clipvision_from_sd(sd, prefix="visual.", convert_keys=True) 115 | else: 116 | return load_clipvision_from_sd(sd) 117 | -------------------------------------------------------------------------------- /ldm_patched/modules/clip_vision_config_g.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "gelu", 5 | "hidden_size": 1664, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 8192, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 48, 15 | "patch_size": 14, 16 | "projection_dim": 1280, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /ldm_patched/modules/clip_vision_config_h.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "gelu", 5 | "hidden_size": 1280, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 5120, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 32, 15 | "patch_size": 14, 16 | "projection_dim": 1024, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /ldm_patched/modules/clip_vision_config_vitl.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "quick_gelu", 5 | "hidden_size": 1024, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 4096, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 24, 15 | "patch_size": 14, 16 | "projection_dim": 768, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /ldm_patched/modules/conds.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import ldm_patched.modules.utils 4 | 5 | 6 | 7 | class CONDRegular: 8 | def __init__(self, cond): 9 | self.cond = cond 10 | 11 | def _copy_with(self, cond): 12 | return self.__class__(cond) 13 | 14 | def process_cond(self, batch_size, device, **kwargs): 15 | return self._copy_with(ldm_patched.modules.utils.repeat_to_batch_size(self.cond, batch_size).to(device)) 16 | 17 | def can_concat(self, other): 18 | if self.cond.shape != other.cond.shape: 19 | return False 20 | return True 21 | 22 | def concat(self, others): 23 | conds = [self.cond] 24 | for x in others: 25 | conds.append(x.cond) 26 | return torch.cat(conds) 27 | 28 | class CONDNoiseShape(CONDRegular): 29 | def process_cond(self, batch_size, device, area, **kwargs): 30 | data = self.cond[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] 31 | return self._copy_with(ldm_patched.modules.utils.repeat_to_batch_size(data, batch_size).to(device)) 32 | 33 | 34 | class CONDCrossAttn(CONDRegular): 35 | def can_concat(self, other): 36 | s1 = self.cond.shape 37 | s2 = other.cond.shape 38 | if s1 != s2: 39 | if s1[0] != s2[0] or s1[2] != s2[2]: #these 2 cases should not happen 40 | return False 41 | 42 | mult_min = math.lcm(s1[1], s2[1]) 43 | diff = mult_min // min(s1[1], s2[1]) 44 | if diff > 4: #arbitrary limit on the padding because it's probably going to impact performance negatively if it's too much 45 | return False 46 | return True 47 | 48 | def concat(self, others): 49 | conds = [self.cond] 50 | crossattn_max_len = self.cond.shape[1] 51 | for x in others: 52 | c = x.cond 53 | crossattn_max_len = math.lcm(crossattn_max_len, c.shape[1]) 54 | conds.append(c) 55 | 56 | out = [] 57 | for c in conds: 58 | if c.shape[1] < crossattn_max_len: 59 | c = c.repeat(1, crossattn_max_len // c.shape[1], 1) #padding with repeat doesn't change result 60 | out.append(c) 61 | return torch.cat(out) 62 | 63 | class CONDConstant(CONDRegular): 64 | def __init__(self, cond): 65 | self.cond = cond 66 | 67 | def process_cond(self, batch_size, device, **kwargs): 68 | return self._copy_with(self.cond) 69 | 70 | def can_concat(self, other): 71 | if self.cond != other.cond: 72 | return False 73 | return True 74 | 75 | def concat(self, others): 76 | return self.cond 77 | -------------------------------------------------------------------------------- /ldm_patched/modules/diffusers_load.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import ldm_patched.modules.sd 4 | 5 | def first_file(path, filenames): 6 | for f in filenames: 7 | p = os.path.join(path, f) 8 | if os.path.exists(p): 9 | return p 10 | return None 11 | 12 | def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_directory=None): 13 | diffusion_model_names = ["diffusion_pytorch_model.fp16.safetensors", "diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.fp16.bin", "diffusion_pytorch_model.bin"] 14 | unet_path = first_file(os.path.join(model_path, "unet"), diffusion_model_names) 15 | vae_path = first_file(os.path.join(model_path, "vae"), diffusion_model_names) 16 | 17 | text_encoder_model_names = ["model.fp16.safetensors", "model.safetensors", "pytorch_model.fp16.bin", "pytorch_model.bin"] 18 | text_encoder1_path = first_file(os.path.join(model_path, "text_encoder"), text_encoder_model_names) 19 | text_encoder2_path = first_file(os.path.join(model_path, "text_encoder_2"), text_encoder_model_names) 20 | 21 | text_encoder_paths = [text_encoder1_path] 22 | if text_encoder2_path is not None: 23 | text_encoder_paths.append(text_encoder2_path) 24 | 25 | unet = ldm_patched.modules.sd.load_unet(unet_path) 26 | 27 | clip = None 28 | if output_clip: 29 | clip = ldm_patched.modules.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory) 30 | 31 | vae = None 32 | if output_vae: 33 | sd = ldm_patched.modules.utils.load_torch_file(vae_path) 34 | vae = ldm_patched.modules.sd.VAE(sd=sd) 35 | 36 | return (unet, clip, vae) 37 | -------------------------------------------------------------------------------- /ldm_patched/modules/latent_formats.py: -------------------------------------------------------------------------------- 1 | 2 | class LatentFormat: 3 | scale_factor = 1.0 4 | latent_rgb_factors = None 5 | taesd_decoder_name = None 6 | 7 | def process_in(self, latent): 8 | return latent * self.scale_factor 9 | 10 | def process_out(self, latent): 11 | return latent / self.scale_factor 12 | 13 | class SD15(LatentFormat): 14 | def __init__(self, scale_factor=0.18215): 15 | self.scale_factor = scale_factor 16 | self.latent_rgb_factors = [ 17 | # R G B 18 | [ 0.3512, 0.2297, 0.3227], 19 | [ 0.3250, 0.4974, 0.2350], 20 | [-0.2829, 0.1762, 0.2721], 21 | [-0.2120, -0.2616, -0.7177] 22 | ] 23 | self.taesd_decoder_name = "taesd_decoder" 24 | 25 | class SDXL(LatentFormat): 26 | def __init__(self): 27 | self.scale_factor = 0.13025 28 | self.latent_rgb_factors = [ 29 | # R G B 30 | [ 0.3920, 0.4054, 0.4549], 31 | [-0.2634, -0.0196, 0.0653], 32 | [ 0.0568, 0.1687, -0.0755], 33 | [-0.3112, -0.2359, -0.2076] 34 | ] 35 | self.taesd_decoder_name = "taesdxl_decoder" 36 | 37 | class SD_X4(LatentFormat): 38 | def __init__(self): 39 | self.scale_factor = 0.08333 40 | -------------------------------------------------------------------------------- /ldm_patched/modules/model_sampling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from ldm_patched.ldm.modules.diffusionmodules.util import make_beta_schedule 4 | import math 5 | 6 | class EPS: 7 | def calculate_input(self, sigma, noise): 8 | sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1)) 9 | return noise / (sigma ** 2 + self.sigma_data ** 2) ** 0.5 10 | 11 | def calculate_denoised(self, sigma, model_output, model_input): 12 | sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) 13 | return model_input - model_output * sigma 14 | 15 | 16 | class V_PREDICTION(EPS): 17 | def calculate_denoised(self, sigma, model_output, model_input): 18 | sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) 19 | return model_input * self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) - model_output * sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2) ** 0.5 20 | 21 | 22 | class ModelSamplingDiscrete(torch.nn.Module): 23 | def __init__(self, model_config=None): 24 | super().__init__() 25 | 26 | if model_config is not None: 27 | sampling_settings = model_config.sampling_settings 28 | else: 29 | sampling_settings = {} 30 | 31 | beta_schedule = sampling_settings.get("beta_schedule", "linear") 32 | linear_start = sampling_settings.get("linear_start", 0.00085) 33 | linear_end = sampling_settings.get("linear_end", 0.012) 34 | 35 | self._register_schedule(given_betas=None, beta_schedule=beta_schedule, timesteps=1000, linear_start=linear_start, linear_end=linear_end, cosine_s=8e-3) 36 | self.sigma_data = 1.0 37 | 38 | def _register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, 39 | linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): 40 | if given_betas is not None: 41 | betas = given_betas 42 | else: 43 | betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s) 44 | alphas = 1. - betas 45 | alphas_cumprod = torch.tensor(np.cumprod(alphas, axis=0), dtype=torch.float32) 46 | # alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) 47 | 48 | timesteps, = betas.shape 49 | self.num_timesteps = int(timesteps) 50 | self.linear_start = linear_start 51 | self.linear_end = linear_end 52 | 53 | # self.register_buffer('betas', torch.tensor(betas, dtype=torch.float32)) 54 | # self.register_buffer('alphas_cumprod', torch.tensor(alphas_cumprod, dtype=torch.float32)) 55 | # self.register_buffer('alphas_cumprod_prev', torch.tensor(alphas_cumprod_prev, dtype=torch.float32)) 56 | 57 | sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5 58 | self.set_sigmas(sigmas) 59 | 60 | def set_sigmas(self, sigmas): 61 | self.register_buffer('sigmas', sigmas) 62 | self.register_buffer('log_sigmas', sigmas.log()) 63 | 64 | @property 65 | def sigma_min(self): 66 | return self.sigmas[0] 67 | 68 | @property 69 | def sigma_max(self): 70 | return self.sigmas[-1] 71 | 72 | def timestep(self, sigma): 73 | log_sigma = sigma.log() 74 | dists = log_sigma.to(self.log_sigmas.device) - self.log_sigmas[:, None] 75 | return dists.abs().argmin(dim=0).view(sigma.shape).to(sigma.device) 76 | 77 | def sigma(self, timestep): 78 | t = torch.clamp(timestep.float().to(self.log_sigmas.device), min=0, max=(len(self.sigmas) - 1)) 79 | low_idx = t.floor().long() 80 | high_idx = t.ceil().long() 81 | w = t.frac() 82 | log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx] 83 | return log_sigma.exp().to(timestep.device) 84 | 85 | def percent_to_sigma(self, percent): 86 | if percent <= 0.0: 87 | return 999999999.9 88 | if percent >= 1.0: 89 | return 0.0 90 | percent = 1.0 - percent 91 | return self.sigma(torch.tensor(percent * 999.0)).item() 92 | 93 | 94 | class ModelSamplingContinuousEDM(torch.nn.Module): 95 | def __init__(self, model_config=None): 96 | super().__init__() 97 | self.sigma_data = 1.0 98 | 99 | if model_config is not None: 100 | sampling_settings = model_config.sampling_settings 101 | else: 102 | sampling_settings = {} 103 | 104 | sigma_min = sampling_settings.get("sigma_min", 0.002) 105 | sigma_max = sampling_settings.get("sigma_max", 120.0) 106 | self.set_sigma_range(sigma_min, sigma_max) 107 | 108 | def set_sigma_range(self, sigma_min, sigma_max): 109 | sigmas = torch.linspace(math.log(sigma_min), math.log(sigma_max), 1000).exp() 110 | 111 | self.register_buffer('sigmas', sigmas) #for compatibility with some schedulers 112 | self.register_buffer('log_sigmas', sigmas.log()) 113 | 114 | @property 115 | def sigma_min(self): 116 | return self.sigmas[0] 117 | 118 | @property 119 | def sigma_max(self): 120 | return self.sigmas[-1] 121 | 122 | def timestep(self, sigma): 123 | return 0.25 * sigma.log() 124 | 125 | def sigma(self, timestep): 126 | return (timestep / 0.25).exp() 127 | 128 | def percent_to_sigma(self, percent): 129 | if percent <= 0.0: 130 | return 999999999.9 131 | if percent >= 1.0: 132 | return 0.0 133 | percent = 1.0 - percent 134 | 135 | log_sigma_min = math.log(self.sigma_min) 136 | return math.exp((math.log(self.sigma_max) - log_sigma_min) * percent + log_sigma_min) 137 | -------------------------------------------------------------------------------- /ldm_patched/modules/ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import ldm_patched.modules.model_management 3 | 4 | def cast_bias_weight(s, input): 5 | bias = None 6 | non_blocking = ldm_patched.modules.model_management.device_supports_non_blocking(input.device) 7 | if s.bias is not None: 8 | bias = s.bias.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking) 9 | weight = s.weight.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking) 10 | return weight, bias 11 | 12 | 13 | class disable_weight_init: 14 | class Linear(torch.nn.Linear): 15 | ldm_patched_cast_weights = False 16 | def reset_parameters(self): 17 | return None 18 | 19 | def forward_ldm_patched_cast_weights(self, input): 20 | weight, bias = cast_bias_weight(self, input) 21 | return torch.nn.functional.linear(input, weight, bias) 22 | 23 | def forward(self, *args, **kwargs): 24 | if self.ldm_patched_cast_weights: 25 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 26 | else: 27 | return super().forward(*args, **kwargs) 28 | 29 | class Conv2d(torch.nn.Conv2d): 30 | ldm_patched_cast_weights = False 31 | def reset_parameters(self): 32 | return None 33 | 34 | def forward_ldm_patched_cast_weights(self, input): 35 | weight, bias = cast_bias_weight(self, input) 36 | return self._conv_forward(input, weight, bias) 37 | 38 | def forward(self, *args, **kwargs): 39 | if self.ldm_patched_cast_weights: 40 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 41 | else: 42 | return super().forward(*args, **kwargs) 43 | 44 | class Conv3d(torch.nn.Conv3d): 45 | ldm_patched_cast_weights = False 46 | def reset_parameters(self): 47 | return None 48 | 49 | def forward_ldm_patched_cast_weights(self, input): 50 | weight, bias = cast_bias_weight(self, input) 51 | return self._conv_forward(input, weight, bias) 52 | 53 | def forward(self, *args, **kwargs): 54 | if self.ldm_patched_cast_weights: 55 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 56 | else: 57 | return super().forward(*args, **kwargs) 58 | 59 | class GroupNorm(torch.nn.GroupNorm): 60 | ldm_patched_cast_weights = False 61 | def reset_parameters(self): 62 | return None 63 | 64 | def forward_ldm_patched_cast_weights(self, input): 65 | weight, bias = cast_bias_weight(self, input) 66 | return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps) 67 | 68 | def forward(self, *args, **kwargs): 69 | if self.ldm_patched_cast_weights: 70 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 71 | else: 72 | return super().forward(*args, **kwargs) 73 | 74 | 75 | class LayerNorm(torch.nn.LayerNorm): 76 | ldm_patched_cast_weights = False 77 | def reset_parameters(self): 78 | return None 79 | 80 | def forward_ldm_patched_cast_weights(self, input): 81 | weight, bias = cast_bias_weight(self, input) 82 | return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps) 83 | 84 | def forward(self, *args, **kwargs): 85 | if self.ldm_patched_cast_weights: 86 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 87 | else: 88 | return super().forward(*args, **kwargs) 89 | 90 | @classmethod 91 | def conv_nd(s, dims, *args, **kwargs): 92 | if dims == 2: 93 | return s.Conv2d(*args, **kwargs) 94 | elif dims == 3: 95 | return s.Conv3d(*args, **kwargs) 96 | else: 97 | raise ValueError(f"unsupported dimensions: {dims}") 98 | 99 | 100 | class manual_cast(disable_weight_init): 101 | class Linear(disable_weight_init.Linear): 102 | ldm_patched_cast_weights = True 103 | 104 | class Conv2d(disable_weight_init.Conv2d): 105 | ldm_patched_cast_weights = True 106 | 107 | class Conv3d(disable_weight_init.Conv3d): 108 | ldm_patched_cast_weights = True 109 | 110 | class GroupNorm(disable_weight_init.GroupNorm): 111 | ldm_patched_cast_weights = True 112 | 113 | class LayerNorm(disable_weight_init.LayerNorm): 114 | ldm_patched_cast_weights = True 115 | -------------------------------------------------------------------------------- /ldm_patched/modules/options.py: -------------------------------------------------------------------------------- 1 | 2 | args_parsing = False 3 | 4 | def enable_args_parsing(enable=True): 5 | global args_parsing 6 | args_parsing = enable 7 | -------------------------------------------------------------------------------- /ldm_patched/modules/sd1_clip_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "openai/clip-vit-large-patch14", 3 | "architectures": [ 4 | "CLIPTextModel" 5 | ], 6 | "attention_dropout": 0.0, 7 | "bos_token_id": 0, 8 | "dropout": 0.0, 9 | "eos_token_id": 2, 10 | "hidden_act": "quick_gelu", 11 | "hidden_size": 768, 12 | "initializer_factor": 1.0, 13 | "initializer_range": 0.02, 14 | "intermediate_size": 3072, 15 | "layer_norm_eps": 1e-05, 16 | "max_position_embeddings": 77, 17 | "model_type": "clip_text_model", 18 | "num_attention_heads": 12, 19 | "num_hidden_layers": 12, 20 | "pad_token_id": 1, 21 | "projection_dim": 768, 22 | "torch_dtype": "float32", 23 | "transformers_version": "4.24.0", 24 | "vocab_size": 49408 25 | } 26 | -------------------------------------------------------------------------------- /ldm_patched/modules/sd1_tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": { 3 | "content": "<|startoftext|>", 4 | "lstrip": false, 5 | "normalized": true, 6 | "rstrip": false, 7 | "single_word": false 8 | }, 9 | "eos_token": { 10 | "content": "<|endoftext|>", 11 | "lstrip": false, 12 | "normalized": true, 13 | "rstrip": false, 14 | "single_word": false 15 | }, 16 | "pad_token": "<|endoftext|>", 17 | "unk_token": { 18 | "content": "<|endoftext|>", 19 | "lstrip": false, 20 | "normalized": true, 21 | "rstrip": false, 22 | "single_word": false 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /ldm_patched/modules/sd1_tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_prefix_space": false, 3 | "bos_token": { 4 | "__type": "AddedToken", 5 | "content": "<|startoftext|>", 6 | "lstrip": false, 7 | "normalized": true, 8 | "rstrip": false, 9 | "single_word": false 10 | }, 11 | "do_lower_case": true, 12 | "eos_token": { 13 | "__type": "AddedToken", 14 | "content": "<|endoftext|>", 15 | "lstrip": false, 16 | "normalized": true, 17 | "rstrip": false, 18 | "single_word": false 19 | }, 20 | "errors": "replace", 21 | "model_max_length": 77, 22 | "name_or_path": "openai/clip-vit-large-patch14", 23 | "pad_token": "<|endoftext|>", 24 | "special_tokens_map_file": "./special_tokens_map.json", 25 | "tokenizer_class": "CLIPTokenizer", 26 | "unk_token": { 27 | "__type": "AddedToken", 28 | "content": "<|endoftext|>", 29 | "lstrip": false, 30 | "normalized": true, 31 | "rstrip": false, 32 | "single_word": false 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /ldm_patched/modules/sd2_clip.py: -------------------------------------------------------------------------------- 1 | from ldm_patched.modules import sd1_clip 2 | import torch 3 | import os 4 | 5 | class SD2ClipHModel(sd1_clip.SDClipModel): 6 | def __init__(self, arch="ViT-H-14", device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None): 7 | if layer == "penultimate": 8 | layer="hidden" 9 | layer_idx=-2 10 | 11 | textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd2_clip_config.json") 12 | super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"start": 49406, "end": 49407, "pad": 0}) 13 | 14 | class SD2ClipHTokenizer(sd1_clip.SDTokenizer): 15 | def __init__(self, tokenizer_path=None, embedding_directory=None): 16 | super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1024) 17 | 18 | class SD2Tokenizer(sd1_clip.SD1Tokenizer): 19 | def __init__(self, embedding_directory=None): 20 | super().__init__(embedding_directory=embedding_directory, clip_name="h", tokenizer=SD2ClipHTokenizer) 21 | 22 | class SD2ClipModel(sd1_clip.SD1ClipModel): 23 | def __init__(self, device="cpu", dtype=None, **kwargs): 24 | super().__init__(device=device, dtype=dtype, clip_name="h", clip_model=SD2ClipHModel, **kwargs) 25 | -------------------------------------------------------------------------------- /ldm_patched/modules/sd2_clip_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "CLIPTextModel" 4 | ], 5 | "attention_dropout": 0.0, 6 | "bos_token_id": 0, 7 | "dropout": 0.0, 8 | "eos_token_id": 2, 9 | "hidden_act": "gelu", 10 | "hidden_size": 1024, 11 | "initializer_factor": 1.0, 12 | "initializer_range": 0.02, 13 | "intermediate_size": 4096, 14 | "layer_norm_eps": 1e-05, 15 | "max_position_embeddings": 77, 16 | "model_type": "clip_text_model", 17 | "num_attention_heads": 16, 18 | "num_hidden_layers": 24, 19 | "pad_token_id": 1, 20 | "projection_dim": 1024, 21 | "torch_dtype": "float32", 22 | "vocab_size": 49408 23 | } 24 | -------------------------------------------------------------------------------- /ldm_patched/modules/sdxl_clip.py: -------------------------------------------------------------------------------- 1 | from ldm_patched.modules import sd1_clip 2 | import torch 3 | import os 4 | 5 | class SDXLClipG(sd1_clip.SDClipModel): 6 | def __init__(self, device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None): 7 | if layer == "penultimate": 8 | layer="hidden" 9 | layer_idx=-2 10 | 11 | textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_config_bigg.json") 12 | super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, 13 | special_tokens={"start": 49406, "end": 49407, "pad": 0}, layer_norm_hidden_state=False) 14 | 15 | def load_sd(self, sd): 16 | return super().load_sd(sd) 17 | 18 | class SDXLClipGTokenizer(sd1_clip.SDTokenizer): 19 | def __init__(self, tokenizer_path=None, embedding_directory=None): 20 | super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1280, embedding_key='clip_g') 21 | 22 | 23 | class SDXLTokenizer: 24 | def __init__(self, embedding_directory=None): 25 | self.clip_l = sd1_clip.SDTokenizer(embedding_directory=embedding_directory) 26 | self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory) 27 | 28 | def tokenize_with_weights(self, text:str, return_word_ids=False): 29 | out = {} 30 | out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) 31 | out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) 32 | return out 33 | 34 | def untokenize(self, token_weight_pair): 35 | return self.clip_g.untokenize(token_weight_pair) 36 | 37 | class SDXLClipModel(torch.nn.Module): 38 | def __init__(self, device="cpu", dtype=None): 39 | super().__init__() 40 | self.clip_l = sd1_clip.SDClipModel(layer="hidden", layer_idx=-2, device=device, dtype=dtype, layer_norm_hidden_state=False) 41 | self.clip_g = SDXLClipG(device=device, dtype=dtype) 42 | 43 | def clip_layer(self, layer_idx): 44 | self.clip_l.clip_layer(layer_idx) 45 | self.clip_g.clip_layer(layer_idx) 46 | 47 | def reset_clip_layer(self): 48 | self.clip_g.reset_clip_layer() 49 | self.clip_l.reset_clip_layer() 50 | 51 | def encode_token_weights(self, token_weight_pairs): 52 | token_weight_pairs_g = token_weight_pairs["g"] 53 | token_weight_pairs_l = token_weight_pairs["l"] 54 | g_out, g_pooled = self.clip_g.encode_token_weights(token_weight_pairs_g) 55 | l_out, l_pooled = self.clip_l.encode_token_weights(token_weight_pairs_l) 56 | return torch.cat([l_out, g_out], dim=-1), g_pooled 57 | 58 | def load_sd(self, sd): 59 | if "text_model.encoder.layers.30.mlp.fc1.weight" in sd: 60 | return self.clip_g.load_sd(sd) 61 | else: 62 | return self.clip_l.load_sd(sd) 63 | 64 | class SDXLRefinerClipModel(sd1_clip.SD1ClipModel): 65 | def __init__(self, device="cpu", dtype=None): 66 | super().__init__(device=device, dtype=dtype, clip_name="g", clip_model=SDXLClipG) 67 | -------------------------------------------------------------------------------- /ldm_patched/modules/supported_models_base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from . import model_base 3 | from . import utils 4 | from . import latent_formats 5 | 6 | class ClipTarget: 7 | def __init__(self, tokenizer, clip): 8 | self.clip = clip 9 | self.tokenizer = tokenizer 10 | self.params = {} 11 | 12 | class BASE: 13 | unet_config = {} 14 | unet_extra_config = { 15 | "num_heads": -1, 16 | "num_head_channels": 64, 17 | } 18 | 19 | clip_prefix = [] 20 | clip_vision_prefix = None 21 | noise_aug_config = None 22 | sampling_settings = {} 23 | latent_format = latent_formats.LatentFormat 24 | 25 | manual_cast_dtype = None 26 | 27 | @classmethod 28 | def matches(s, unet_config): 29 | for k in s.unet_config: 30 | if s.unet_config[k] != unet_config[k]: 31 | return False 32 | return True 33 | 34 | def model_type(self, state_dict, prefix=""): 35 | return model_base.ModelType.EPS 36 | 37 | def inpaint_model(self): 38 | return self.unet_config["in_channels"] > 4 39 | 40 | def __init__(self, unet_config): 41 | self.unet_config = unet_config 42 | self.latent_format = self.latent_format() 43 | for x in self.unet_extra_config: 44 | self.unet_config[x] = self.unet_extra_config[x] 45 | 46 | def get_model(self, state_dict, prefix="", device=None): 47 | if self.noise_aug_config is not None: 48 | out = model_base.SD21UNCLIP(self, self.noise_aug_config, model_type=self.model_type(state_dict, prefix), device=device) 49 | else: 50 | out = model_base.BaseModel(self, model_type=self.model_type(state_dict, prefix), device=device) 51 | if self.inpaint_model(): 52 | out.set_inpaint() 53 | return out 54 | 55 | def process_clip_state_dict(self, state_dict): 56 | return state_dict 57 | 58 | def process_unet_state_dict(self, state_dict): 59 | return state_dict 60 | 61 | def process_vae_state_dict(self, state_dict): 62 | return state_dict 63 | 64 | def process_clip_state_dict_for_saving(self, state_dict): 65 | replace_prefix = {"": "cond_stage_model."} 66 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 67 | 68 | def process_clip_vision_state_dict_for_saving(self, state_dict): 69 | replace_prefix = {} 70 | if self.clip_vision_prefix is not None: 71 | replace_prefix[""] = self.clip_vision_prefix 72 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 73 | 74 | def process_unet_state_dict_for_saving(self, state_dict): 75 | replace_prefix = {"": "model.diffusion_model."} 76 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 77 | 78 | def process_vae_state_dict_for_saving(self, state_dict): 79 | replace_prefix = {"": "first_stage_model."} 80 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 81 | 82 | def set_manual_cast(self, manual_cast_dtype): 83 | self.manual_cast_dtype = manual_cast_dtype 84 | -------------------------------------------------------------------------------- /ldm_patched/pfn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/__init__.py -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/LICENSE-HAT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Xiangyu Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/LICENSE-RealESRGAN: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, Xintao Wang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/ChannelAttention.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch.nn as nn 4 | 5 | 6 | class CA_layer(nn.Module): 7 | def __init__(self, channel, reduction=16): 8 | super(CA_layer, self).__init__() 9 | # global average pooling 10 | self.gap = nn.AdaptiveAvgPool2d(1) 11 | self.fc = nn.Sequential( 12 | nn.Conv2d(channel, channel // reduction, kernel_size=(1, 1), bias=False), 13 | nn.GELU(), 14 | nn.Conv2d(channel // reduction, channel, kernel_size=(1, 1), bias=False), 15 | # nn.Sigmoid() 16 | ) 17 | 18 | def forward(self, x): 19 | y = self.fc(self.gap(x)) 20 | return x * y.expand_as(x) 21 | 22 | 23 | class Simple_CA_layer(nn.Module): 24 | def __init__(self, channel): 25 | super(Simple_CA_layer, self).__init__() 26 | self.gap = nn.AdaptiveAvgPool2d(1) 27 | self.fc = nn.Conv2d( 28 | in_channels=channel, 29 | out_channels=channel, 30 | kernel_size=1, 31 | padding=0, 32 | stride=1, 33 | groups=1, 34 | bias=True, 35 | ) 36 | 37 | def forward(self, x): 38 | return x * self.fc(self.gap(x)) 39 | 40 | 41 | class ECA_layer(nn.Module): 42 | """Constructs a ECA module. 43 | Args: 44 | channel: Number of channels of the input feature map 45 | k_size: Adaptive selection of kernel size 46 | """ 47 | 48 | def __init__(self, channel): 49 | super(ECA_layer, self).__init__() 50 | 51 | b = 1 52 | gamma = 2 53 | k_size = int(abs(math.log(channel, 2) + b) / gamma) 54 | k_size = k_size if k_size % 2 else k_size + 1 55 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 56 | self.conv = nn.Conv1d( 57 | 1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False 58 | ) 59 | # self.sigmoid = nn.Sigmoid() 60 | 61 | def forward(self, x): 62 | # x: input features with shape [b, c, h, w] 63 | # b, c, h, w = x.size() 64 | 65 | # feature descriptor on the global spatial information 66 | y = self.avg_pool(x) 67 | 68 | # Two different branches of ECA module 69 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 70 | 71 | # Multi-scale information fusion 72 | # y = self.sigmoid(y) 73 | 74 | return x * y.expand_as(x) 75 | 76 | 77 | class ECA_MaxPool_layer(nn.Module): 78 | """Constructs a ECA module. 79 | Args: 80 | channel: Number of channels of the input feature map 81 | k_size: Adaptive selection of kernel size 82 | """ 83 | 84 | def __init__(self, channel): 85 | super(ECA_MaxPool_layer, self).__init__() 86 | 87 | b = 1 88 | gamma = 2 89 | k_size = int(abs(math.log(channel, 2) + b) / gamma) 90 | k_size = k_size if k_size % 2 else k_size + 1 91 | self.max_pool = nn.AdaptiveMaxPool2d(1) 92 | self.conv = nn.Conv1d( 93 | 1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False 94 | ) 95 | # self.sigmoid = nn.Sigmoid() 96 | 97 | def forward(self, x): 98 | # x: input features with shape [b, c, h, w] 99 | # b, c, h, w = x.size() 100 | 101 | # feature descriptor on the global spatial information 102 | y = self.max_pool(x) 103 | 104 | # Two different branches of ECA module 105 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 106 | 107 | # Multi-scale information fusion 108 | # y = self.sigmoid(y) 109 | 110 | return x * y.expand_as(x) 111 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/OSAG.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: OSAG.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Sunday, 23rd April 2023 3:08:49 pm 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | 14 | import torch.nn as nn 15 | 16 | from .esa import ESA 17 | from .OSA import OSA_Block 18 | 19 | 20 | class OSAG(nn.Module): 21 | def __init__( 22 | self, 23 | channel_num=64, 24 | bias=True, 25 | block_num=4, 26 | ffn_bias=False, 27 | window_size=0, 28 | pe=False, 29 | ): 30 | super(OSAG, self).__init__() 31 | 32 | # print("window_size: %d" % (window_size)) 33 | # print("with_pe", pe) 34 | # print("ffn_bias: %d" % (ffn_bias)) 35 | 36 | # block_script_name = kwargs.get("block_script_name", "OSA") 37 | # block_class_name = kwargs.get("block_class_name", "OSA_Block") 38 | 39 | # script_name = "." + block_script_name 40 | # package = __import__(script_name, fromlist=True) 41 | block_class = OSA_Block # getattr(package, block_class_name) 42 | group_list = [] 43 | for _ in range(block_num): 44 | temp_res = block_class( 45 | channel_num, 46 | bias, 47 | ffn_bias=ffn_bias, 48 | window_size=window_size, 49 | with_pe=pe, 50 | ) 51 | group_list.append(temp_res) 52 | group_list.append(nn.Conv2d(channel_num, channel_num, 1, 1, 0, bias=bias)) 53 | self.residual_layer = nn.Sequential(*group_list) 54 | esa_channel = max(channel_num // 4, 16) 55 | self.esa = ESA(esa_channel, channel_num) 56 | 57 | def forward(self, x): 58 | out = self.residual_layer(x) 59 | out = out + x 60 | return self.esa(out) 61 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/OmniSR.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: OmniSR.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Sunday, 23rd April 2023 3:06:36 pm 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import math 14 | 15 | import torch 16 | import torch.nn as nn 17 | import torch.nn.functional as F 18 | 19 | from .OSAG import OSAG 20 | from .pixelshuffle import pixelshuffle_block 21 | 22 | 23 | class OmniSR(nn.Module): 24 | def __init__( 25 | self, 26 | state_dict, 27 | **kwargs, 28 | ): 29 | super(OmniSR, self).__init__() 30 | self.state = state_dict 31 | 32 | bias = True # Fine to assume this for now 33 | block_num = 1 # Fine to assume this for now 34 | ffn_bias = True 35 | pe = True 36 | 37 | num_feat = state_dict["input.weight"].shape[0] or 64 38 | num_in_ch = state_dict["input.weight"].shape[1] or 3 39 | num_out_ch = num_in_ch # we can just assume this for now. pixelshuffle smh 40 | 41 | pixelshuffle_shape = state_dict["up.0.weight"].shape[0] 42 | up_scale = math.sqrt(pixelshuffle_shape / num_out_ch) 43 | if up_scale - int(up_scale) > 0: 44 | print( 45 | "out_nc is probably different than in_nc, scale calculation might be wrong" 46 | ) 47 | up_scale = int(up_scale) 48 | res_num = 0 49 | for key in state_dict.keys(): 50 | if "residual_layer" in key: 51 | temp_res_num = int(key.split(".")[1]) 52 | if temp_res_num > res_num: 53 | res_num = temp_res_num 54 | res_num = res_num + 1 # zero-indexed 55 | 56 | residual_layer = [] 57 | self.res_num = res_num 58 | 59 | if ( 60 | "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight" 61 | in state_dict.keys() 62 | ): 63 | rel_pos_bias_weight = state_dict[ 64 | "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight" 65 | ].shape[0] 66 | self.window_size = int((math.sqrt(rel_pos_bias_weight) + 1) / 2) 67 | else: 68 | self.window_size = 8 69 | 70 | self.up_scale = up_scale 71 | 72 | for _ in range(res_num): 73 | temp_res = OSAG( 74 | channel_num=num_feat, 75 | bias=bias, 76 | block_num=block_num, 77 | ffn_bias=ffn_bias, 78 | window_size=self.window_size, 79 | pe=pe, 80 | ) 81 | residual_layer.append(temp_res) 82 | self.residual_layer = nn.Sequential(*residual_layer) 83 | self.input = nn.Conv2d( 84 | in_channels=num_in_ch, 85 | out_channels=num_feat, 86 | kernel_size=3, 87 | stride=1, 88 | padding=1, 89 | bias=bias, 90 | ) 91 | self.output = nn.Conv2d( 92 | in_channels=num_feat, 93 | out_channels=num_feat, 94 | kernel_size=3, 95 | stride=1, 96 | padding=1, 97 | bias=bias, 98 | ) 99 | self.up = pixelshuffle_block(num_feat, num_out_ch, up_scale, bias=bias) 100 | 101 | # self.tail = pixelshuffle_block(num_feat,num_out_ch,up_scale,bias=bias) 102 | 103 | # for m in self.modules(): 104 | # if isinstance(m, nn.Conv2d): 105 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 106 | # m.weight.data.normal_(0, sqrt(2. / n)) 107 | 108 | # chaiNNer specific stuff 109 | self.model_arch = "OmniSR" 110 | self.sub_type = "SR" 111 | self.in_nc = num_in_ch 112 | self.out_nc = num_out_ch 113 | self.num_feat = num_feat 114 | self.scale = up_scale 115 | 116 | self.supports_fp16 = True # TODO: Test this 117 | self.supports_bfp16 = True 118 | self.min_size_restriction = 16 119 | 120 | self.load_state_dict(state_dict, strict=False) 121 | 122 | def check_image_size(self, x): 123 | _, _, h, w = x.size() 124 | # import pdb; pdb.set_trace() 125 | mod_pad_h = (self.window_size - h % self.window_size) % self.window_size 126 | mod_pad_w = (self.window_size - w % self.window_size) % self.window_size 127 | # x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect') 128 | x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "constant", 0) 129 | return x 130 | 131 | def forward(self, x): 132 | H, W = x.shape[2:] 133 | x = self.check_image_size(x) 134 | 135 | residual = self.input(x) 136 | out = self.residual_layer(residual) 137 | 138 | # origin 139 | out = torch.add(self.output(out), residual) 140 | out = self.up(out) 141 | 142 | out = out[:, :, : H * self.up_scale, : W * self.up_scale] 143 | return out 144 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/__pycache__/OSA.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/OSA.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/__pycache__/OSAG.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/OSAG.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/__pycache__/OmniSR.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/OmniSR.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/__pycache__/esa.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/esa.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/__pycache__/layernorm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/layernorm.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/__pycache__/pixelshuffle.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/OmniSR/__pycache__/pixelshuffle.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/layernorm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: layernorm.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Thursday, 20th April 2023 9:28:20 am 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | 17 | class LayerNormFunction(torch.autograd.Function): 18 | @staticmethod 19 | def forward(ctx, x, weight, bias, eps): 20 | ctx.eps = eps 21 | N, C, H, W = x.size() 22 | mu = x.mean(1, keepdim=True) 23 | var = (x - mu).pow(2).mean(1, keepdim=True) 24 | y = (x - mu) / (var + eps).sqrt() 25 | ctx.save_for_backward(y, var, weight) 26 | y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1) 27 | return y 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | eps = ctx.eps 32 | 33 | N, C, H, W = grad_output.size() 34 | y, var, weight = ctx.saved_variables 35 | g = grad_output * weight.view(1, C, 1, 1) 36 | mean_g = g.mean(dim=1, keepdim=True) 37 | 38 | mean_gy = (g * y).mean(dim=1, keepdim=True) 39 | gx = 1.0 / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g) 40 | return ( 41 | gx, 42 | (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), 43 | grad_output.sum(dim=3).sum(dim=2).sum(dim=0), 44 | None, 45 | ) 46 | 47 | 48 | class LayerNorm2d(nn.Module): 49 | def __init__(self, channels, eps=1e-6): 50 | super(LayerNorm2d, self).__init__() 51 | self.register_parameter("weight", nn.Parameter(torch.ones(channels))) 52 | self.register_parameter("bias", nn.Parameter(torch.zeros(channels))) 53 | self.eps = eps 54 | 55 | def forward(self, x): 56 | return LayerNormFunction.apply(x, self.weight, self.bias, self.eps) 57 | 58 | 59 | class GRN(nn.Module): 60 | """GRN (Global Response Normalization) layer""" 61 | 62 | def __init__(self, dim): 63 | super().__init__() 64 | self.gamma = nn.Parameter(torch.zeros(1, dim, 1, 1)) 65 | self.beta = nn.Parameter(torch.zeros(1, dim, 1, 1)) 66 | 67 | def forward(self, x): 68 | Gx = torch.norm(x, p=2, dim=(2, 3), keepdim=True) 69 | Nx = Gx / (Gx.mean(dim=1, keepdim=True) + 1e-6) 70 | return self.gamma * (x * Nx) + self.beta + x 71 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/OmniSR/pixelshuffle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: pixelshuffle.py 5 | # Created Date: Friday July 1st 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Friday, 1st July 2022 10:18:39 am 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2022 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import torch.nn as nn 14 | 15 | 16 | def pixelshuffle_block( 17 | in_channels, out_channels, upscale_factor=2, kernel_size=3, bias=False 18 | ): 19 | """ 20 | Upsample features according to `upscale_factor`. 21 | """ 22 | padding = kernel_size // 2 23 | conv = nn.Conv2d( 24 | in_channels, 25 | out_channels * (upscale_factor**2), 26 | kernel_size, 27 | padding=1, 28 | bias=bias, 29 | ) 30 | pixel_shuffle = nn.PixelShuffle(upscale_factor) 31 | return nn.Sequential(*[conv, pixel_shuffle]) 32 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/SRVGG.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import math 5 | 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class SRVGGNetCompact(nn.Module): 11 | """A compact VGG-style network structure for super-resolution. 12 | It is a compact network structure, which performs upsampling in the last layer and no convolution is 13 | conducted on the HR feature space. 14 | Args: 15 | num_in_ch (int): Channel number of inputs. Default: 3. 16 | num_out_ch (int): Channel number of outputs. Default: 3. 17 | num_feat (int): Channel number of intermediate features. Default: 64. 18 | num_conv (int): Number of convolution layers in the body network. Default: 16. 19 | upscale (int): Upsampling factor. Default: 4. 20 | act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu. 21 | """ 22 | 23 | def __init__( 24 | self, 25 | state_dict, 26 | act_type: str = "prelu", 27 | ): 28 | super(SRVGGNetCompact, self).__init__() 29 | self.model_arch = "SRVGG (RealESRGAN)" 30 | self.sub_type = "SR" 31 | 32 | self.act_type = act_type 33 | 34 | self.state = state_dict 35 | 36 | if "params" in self.state: 37 | self.state = self.state["params"] 38 | 39 | self.key_arr = list(self.state.keys()) 40 | 41 | self.in_nc = self.get_in_nc() 42 | self.num_feat = self.get_num_feats() 43 | self.num_conv = self.get_num_conv() 44 | self.out_nc = self.in_nc # :( 45 | self.pixelshuffle_shape = None # Defined in get_scale() 46 | self.scale = self.get_scale() 47 | 48 | self.supports_fp16 = True 49 | self.supports_bfp16 = True 50 | self.min_size_restriction = None 51 | 52 | self.body = nn.ModuleList() 53 | # the first conv 54 | self.body.append(nn.Conv2d(self.in_nc, self.num_feat, 3, 1, 1)) 55 | # the first activation 56 | if act_type == "relu": 57 | activation = nn.ReLU(inplace=True) 58 | elif act_type == "prelu": 59 | activation = nn.PReLU(num_parameters=self.num_feat) 60 | elif act_type == "leakyrelu": 61 | activation = nn.LeakyReLU(negative_slope=0.1, inplace=True) 62 | self.body.append(activation) # type: ignore 63 | 64 | # the body structure 65 | for _ in range(self.num_conv): 66 | self.body.append(nn.Conv2d(self.num_feat, self.num_feat, 3, 1, 1)) 67 | # activation 68 | if act_type == "relu": 69 | activation = nn.ReLU(inplace=True) 70 | elif act_type == "prelu": 71 | activation = nn.PReLU(num_parameters=self.num_feat) 72 | elif act_type == "leakyrelu": 73 | activation = nn.LeakyReLU(negative_slope=0.1, inplace=True) 74 | self.body.append(activation) # type: ignore 75 | 76 | # the last conv 77 | self.body.append(nn.Conv2d(self.num_feat, self.pixelshuffle_shape, 3, 1, 1)) # type: ignore 78 | # upsample 79 | self.upsampler = nn.PixelShuffle(self.scale) 80 | 81 | self.load_state_dict(self.state, strict=False) 82 | 83 | def get_num_conv(self) -> int: 84 | return (int(self.key_arr[-1].split(".")[1]) - 2) // 2 85 | 86 | def get_num_feats(self) -> int: 87 | return self.state[self.key_arr[0]].shape[0] 88 | 89 | def get_in_nc(self) -> int: 90 | return self.state[self.key_arr[0]].shape[1] 91 | 92 | def get_scale(self) -> int: 93 | self.pixelshuffle_shape = self.state[self.key_arr[-1]].shape[0] 94 | # Assume out_nc is the same as in_nc 95 | # I cant think of a better way to do that 96 | self.out_nc = self.in_nc 97 | scale = math.sqrt(self.pixelshuffle_shape / self.out_nc) 98 | if scale - int(scale) > 0: 99 | print( 100 | "out_nc is probably different than in_nc, scale calculation might be wrong" 101 | ) 102 | scale = int(scale) 103 | return scale 104 | 105 | def forward(self, x): 106 | out = x 107 | for i in range(0, len(self.body)): 108 | out = self.body[i](out) 109 | 110 | out = self.upsampler(out) 111 | # add the nearest upsampled image, so that the network learns the residual 112 | base = F.interpolate(x, scale_factor=self.scale, mode="nearest") 113 | out += base 114 | return out 115 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/SwiftSRGAN.py: -------------------------------------------------------------------------------- 1 | # From https://github.com/Koushik0901/Swift-SRGAN/blob/master/swift-srgan/models.py 2 | 3 | import torch 4 | from torch import nn 5 | 6 | 7 | class SeperableConv2d(nn.Module): 8 | def __init__( 9 | self, in_channels, out_channels, kernel_size, stride=1, padding=1, bias=True 10 | ): 11 | super(SeperableConv2d, self).__init__() 12 | self.depthwise = nn.Conv2d( 13 | in_channels, 14 | in_channels, 15 | kernel_size=kernel_size, 16 | stride=stride, 17 | groups=in_channels, 18 | bias=bias, 19 | padding=padding, 20 | ) 21 | self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias) 22 | 23 | def forward(self, x): 24 | return self.pointwise(self.depthwise(x)) 25 | 26 | 27 | class ConvBlock(nn.Module): 28 | def __init__( 29 | self, 30 | in_channels, 31 | out_channels, 32 | use_act=True, 33 | use_bn=True, 34 | discriminator=False, 35 | **kwargs, 36 | ): 37 | super(ConvBlock, self).__init__() 38 | 39 | self.use_act = use_act 40 | self.cnn = SeperableConv2d(in_channels, out_channels, **kwargs, bias=not use_bn) 41 | self.bn = nn.BatchNorm2d(out_channels) if use_bn else nn.Identity() 42 | self.act = ( 43 | nn.LeakyReLU(0.2, inplace=True) 44 | if discriminator 45 | else nn.PReLU(num_parameters=out_channels) 46 | ) 47 | 48 | def forward(self, x): 49 | return self.act(self.bn(self.cnn(x))) if self.use_act else self.bn(self.cnn(x)) 50 | 51 | 52 | class UpsampleBlock(nn.Module): 53 | def __init__(self, in_channels, scale_factor): 54 | super(UpsampleBlock, self).__init__() 55 | 56 | self.conv = SeperableConv2d( 57 | in_channels, 58 | in_channels * scale_factor**2, 59 | kernel_size=3, 60 | stride=1, 61 | padding=1, 62 | ) 63 | self.ps = nn.PixelShuffle( 64 | scale_factor 65 | ) # (in_channels * 4, H, W) -> (in_channels, H*2, W*2) 66 | self.act = nn.PReLU(num_parameters=in_channels) 67 | 68 | def forward(self, x): 69 | return self.act(self.ps(self.conv(x))) 70 | 71 | 72 | class ResidualBlock(nn.Module): 73 | def __init__(self, in_channels): 74 | super(ResidualBlock, self).__init__() 75 | 76 | self.block1 = ConvBlock( 77 | in_channels, in_channels, kernel_size=3, stride=1, padding=1 78 | ) 79 | self.block2 = ConvBlock( 80 | in_channels, in_channels, kernel_size=3, stride=1, padding=1, use_act=False 81 | ) 82 | 83 | def forward(self, x): 84 | out = self.block1(x) 85 | out = self.block2(out) 86 | return out + x 87 | 88 | 89 | class Generator(nn.Module): 90 | """Swift-SRGAN Generator 91 | Args: 92 | in_channels (int): number of input image channels. 93 | num_channels (int): number of hidden channels. 94 | num_blocks (int): number of residual blocks. 95 | upscale_factor (int): factor to upscale the image [2x, 4x, 8x]. 96 | Returns: 97 | torch.Tensor: super resolution image 98 | """ 99 | 100 | def __init__( 101 | self, 102 | state_dict, 103 | ): 104 | super(Generator, self).__init__() 105 | self.model_arch = "Swift-SRGAN" 106 | self.sub_type = "SR" 107 | self.state = state_dict 108 | if "model" in self.state: 109 | self.state = self.state["model"] 110 | 111 | self.in_nc: int = self.state["initial.cnn.depthwise.weight"].shape[0] 112 | self.out_nc: int = self.state["final_conv.pointwise.weight"].shape[0] 113 | self.num_filters: int = self.state["initial.cnn.pointwise.weight"].shape[0] 114 | self.num_blocks = len( 115 | set([x.split(".")[1] for x in self.state.keys() if "residual" in x]) 116 | ) 117 | self.scale: int = 2 ** len( 118 | set([x.split(".")[1] for x in self.state.keys() if "upsampler" in x]) 119 | ) 120 | 121 | in_channels = self.in_nc 122 | num_channels = self.num_filters 123 | num_blocks = self.num_blocks 124 | upscale_factor = self.scale 125 | 126 | self.supports_fp16 = True 127 | self.supports_bfp16 = True 128 | self.min_size_restriction = None 129 | 130 | self.initial = ConvBlock( 131 | in_channels, num_channels, kernel_size=9, stride=1, padding=4, use_bn=False 132 | ) 133 | self.residual = nn.Sequential( 134 | *[ResidualBlock(num_channels) for _ in range(num_blocks)] 135 | ) 136 | self.convblock = ConvBlock( 137 | num_channels, 138 | num_channels, 139 | kernel_size=3, 140 | stride=1, 141 | padding=1, 142 | use_act=False, 143 | ) 144 | self.upsampler = nn.Sequential( 145 | *[ 146 | UpsampleBlock(num_channels, scale_factor=2) 147 | for _ in range(upscale_factor // 2) 148 | ] 149 | ) 150 | self.final_conv = SeperableConv2d( 151 | num_channels, in_channels, kernel_size=9, stride=1, padding=4 152 | ) 153 | 154 | self.load_state_dict(self.state, strict=False) 155 | 156 | def forward(self, x): 157 | initial = self.initial(x) 158 | x = self.residual(initial) 159 | x = self.convblock(x) + initial 160 | x = self.upsampler(x) 161 | return (torch.tanh(self.final_conv(x)) + 1) / 2 162 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__init__.py -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/DAT.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/DAT.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/HAT.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/HAT.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/LaMa.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/LaMa.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/RRDB.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/RRDB.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/SCUNet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SCUNet.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/SPSR.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SPSR.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/SRVGG.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SRVGG.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/SwiftSRGAN.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SwiftSRGAN.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/Swin2SR.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/Swin2SR.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/SwinIR.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/SwinIR.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/__pycache__/block.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/__pycache__/block.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/face/LICENSE-codeformer: -------------------------------------------------------------------------------- 1 | S-Lab License 1.0 2 | 3 | Copyright 2022 S-Lab 4 | 5 | Redistribution and use for non-commercial purpose in source and 6 | binary forms, with or without modification, are permitted provided 7 | that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in 14 | the documentation and/or other materials provided with the 15 | distribution. 16 | 17 | 3. Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | In the event that redistribution and/or use for commercial purpose in 34 | source or binary forms, with or without modification is required, 35 | please contact the contributor(s) of the work. 36 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/face/__pycache__/codeformer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/face/__pycache__/codeformer.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/face/__pycache__/gfpganv1_clean_arch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/face/__pycache__/gfpganv1_clean_arch.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/face/__pycache__/restoreformer_arch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/face/__pycache__/restoreformer_arch.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/face/__pycache__/stylegan2_clean_arch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/face/__pycache__/stylegan2_clean_arch.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/face/fused_act.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | # type: ignore 3 | # modify from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501 4 | 5 | import torch 6 | from torch import nn 7 | from torch.autograd import Function 8 | 9 | fused_act_ext = None 10 | 11 | 12 | class FusedLeakyReLUFunctionBackward(Function): 13 | @staticmethod 14 | def forward(ctx, grad_output, out, negative_slope, scale): 15 | ctx.save_for_backward(out) 16 | ctx.negative_slope = negative_slope 17 | ctx.scale = scale 18 | 19 | empty = grad_output.new_empty(0) 20 | 21 | grad_input = fused_act_ext.fused_bias_act( 22 | grad_output, empty, out, 3, 1, negative_slope, scale 23 | ) 24 | 25 | dim = [0] 26 | 27 | if grad_input.ndim > 2: 28 | dim += list(range(2, grad_input.ndim)) 29 | 30 | grad_bias = grad_input.sum(dim).detach() 31 | 32 | return grad_input, grad_bias 33 | 34 | @staticmethod 35 | def backward(ctx, gradgrad_input, gradgrad_bias): 36 | (out,) = ctx.saved_tensors 37 | gradgrad_out = fused_act_ext.fused_bias_act( 38 | gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale 39 | ) 40 | 41 | return gradgrad_out, None, None, None 42 | 43 | 44 | class FusedLeakyReLUFunction(Function): 45 | @staticmethod 46 | def forward(ctx, input, bias, negative_slope, scale): 47 | empty = input.new_empty(0) 48 | out = fused_act_ext.fused_bias_act( 49 | input, bias, empty, 3, 0, negative_slope, scale 50 | ) 51 | ctx.save_for_backward(out) 52 | ctx.negative_slope = negative_slope 53 | ctx.scale = scale 54 | 55 | return out 56 | 57 | @staticmethod 58 | def backward(ctx, grad_output): 59 | (out,) = ctx.saved_tensors 60 | 61 | grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply( 62 | grad_output, out, ctx.negative_slope, ctx.scale 63 | ) 64 | 65 | return grad_input, grad_bias, None, None 66 | 67 | 68 | class FusedLeakyReLU(nn.Module): 69 | def __init__(self, channel, negative_slope=0.2, scale=2**0.5): 70 | super().__init__() 71 | 72 | self.bias = nn.Parameter(torch.zeros(channel)) 73 | self.negative_slope = negative_slope 74 | self.scale = scale 75 | 76 | def forward(self, input): 77 | return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale) 78 | 79 | 80 | def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2**0.5): 81 | return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale) 82 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/timm/__pycache__/drop.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/timm/__pycache__/drop.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/timm/__pycache__/helpers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/timm/__pycache__/helpers.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/timm/__pycache__/weight_init.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/ldm_patched/pfn/architecture/timm/__pycache__/weight_init.cpython-310.pyc -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/timm/helpers.py: -------------------------------------------------------------------------------- 1 | """ Layer/Module Helpers 2 | Hacked together by / Copyright 2020 Ross Wightman 3 | """ 4 | import collections.abc 5 | from itertools import repeat 6 | 7 | 8 | # From PyTorch internals 9 | def _ntuple(n): 10 | def parse(x): 11 | if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): 12 | return x 13 | return tuple(repeat(x, n)) 14 | 15 | return parse 16 | 17 | 18 | to_1tuple = _ntuple(1) 19 | to_2tuple = _ntuple(2) 20 | to_3tuple = _ntuple(3) 21 | to_4tuple = _ntuple(4) 22 | to_ntuple = _ntuple 23 | 24 | 25 | def make_divisible(v, divisor=8, min_value=None, round_limit=0.9): 26 | min_value = min_value or divisor 27 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 28 | # Make sure that round down does not go down by more than 10%. 29 | if new_v < round_limit * v: 30 | new_v += divisor 31 | return new_v 32 | -------------------------------------------------------------------------------- /ldm_patched/pfn/architecture/timm/weight_init.py: -------------------------------------------------------------------------------- 1 | import math 2 | import warnings 3 | 4 | import torch 5 | from torch.nn.init import _calculate_fan_in_and_fan_out 6 | 7 | 8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b): 9 | # Cut & paste from PyTorch official master until it's in a few official releases - RW 10 | # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf 11 | def norm_cdf(x): 12 | # Computes standard normal cumulative distribution function 13 | return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 14 | 15 | if (mean < a - 2 * std) or (mean > b + 2 * std): 16 | warnings.warn( 17 | "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " 18 | "The distribution of values may be incorrect.", 19 | stacklevel=2, 20 | ) 21 | 22 | with torch.no_grad(): 23 | # Values are generated by using a truncated uniform distribution and 24 | # then using the inverse CDF for the normal distribution. 25 | # Get upper and lower cdf values 26 | l = norm_cdf((a - mean) / std) 27 | u = norm_cdf((b - mean) / std) 28 | 29 | # Uniformly fill tensor with values from [l, u], then translate to 30 | # [2l-1, 2u-1]. 31 | tensor.uniform_(2 * l - 1, 2 * u - 1) 32 | 33 | # Use inverse cdf transform for normal distribution to get truncated 34 | # standard normal 35 | tensor.erfinv_() 36 | 37 | # Transform to proper mean, std 38 | tensor.mul_(std * math.sqrt(2.0)) 39 | tensor.add_(mean) 40 | 41 | # Clamp to ensure it's in the proper range 42 | tensor.clamp_(min=a, max=b) 43 | return tensor 44 | 45 | 46 | def trunc_normal_( 47 | tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0 48 | ) -> torch.Tensor: 49 | r"""Fills the input Tensor with values drawn from a truncated 50 | normal distribution. The values are effectively drawn from the 51 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 52 | with values outside :math:`[a, b]` redrawn until they are within 53 | the bounds. The method used for generating the random values works 54 | best when :math:`a \leq \text{mean} \leq b`. 55 | 56 | NOTE: this impl is similar to the PyTorch trunc_normal_, the bounds [a, b] are 57 | applied while sampling the normal with mean/std applied, therefore a, b args 58 | should be adjusted to match the range of mean, std args. 59 | 60 | Args: 61 | tensor: an n-dimensional `torch.Tensor` 62 | mean: the mean of the normal distribution 63 | std: the standard deviation of the normal distribution 64 | a: the minimum cutoff value 65 | b: the maximum cutoff value 66 | Examples: 67 | >>> w = torch.empty(3, 5) 68 | >>> nn.init.trunc_normal_(w) 69 | """ 70 | return _no_grad_trunc_normal_(tensor, mean, std, a, b) 71 | 72 | 73 | def trunc_normal_tf_( 74 | tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0 75 | ) -> torch.Tensor: 76 | r"""Fills the input Tensor with values drawn from a truncated 77 | normal distribution. The values are effectively drawn from the 78 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 79 | with values outside :math:`[a, b]` redrawn until they are within 80 | the bounds. The method used for generating the random values works 81 | best when :math:`a \leq \text{mean} \leq b`. 82 | 83 | NOTE: this 'tf' variant behaves closer to Tensorflow / JAX impl where the 84 | bounds [a, b] are applied when sampling the normal distribution with mean=0, std=1.0 85 | and the result is subsquently scaled and shifted by the mean and std args. 86 | 87 | Args: 88 | tensor: an n-dimensional `torch.Tensor` 89 | mean: the mean of the normal distribution 90 | std: the standard deviation of the normal distribution 91 | a: the minimum cutoff value 92 | b: the maximum cutoff value 93 | Examples: 94 | >>> w = torch.empty(3, 5) 95 | >>> nn.init.trunc_normal_(w) 96 | """ 97 | _no_grad_trunc_normal_(tensor, 0, 1.0, a, b) 98 | with torch.no_grad(): 99 | tensor.mul_(std).add_(mean) 100 | return tensor 101 | 102 | 103 | def variance_scaling_(tensor, scale=1.0, mode="fan_in", distribution="normal"): 104 | fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) 105 | if mode == "fan_in": 106 | denom = fan_in 107 | elif mode == "fan_out": 108 | denom = fan_out 109 | elif mode == "fan_avg": 110 | denom = (fan_in + fan_out) / 2 111 | 112 | variance = scale / denom # type: ignore 113 | 114 | if distribution == "truncated_normal": 115 | # constant is stddev of standard normal truncated to (-2, 2) 116 | trunc_normal_tf_(tensor, std=math.sqrt(variance) / 0.87962566103423978) 117 | elif distribution == "normal": 118 | tensor.normal_(std=math.sqrt(variance)) 119 | elif distribution == "uniform": 120 | bound = math.sqrt(3 * variance) 121 | # pylint: disable=invalid-unary-operand-type 122 | tensor.uniform_(-bound, bound) 123 | else: 124 | raise ValueError(f"invalid distribution {distribution}") 125 | 126 | 127 | def lecun_normal_(tensor): 128 | variance_scaling_(tensor, mode="fan_in", distribution="truncated_normal") 129 | -------------------------------------------------------------------------------- /ldm_patched/pfn/model_loading.py: -------------------------------------------------------------------------------- 1 | import logging as logger 2 | 3 | from .architecture.DAT import DAT 4 | from .architecture.face.codeformer import CodeFormer 5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean 6 | from .architecture.face.restoreformer_arch import RestoreFormer 7 | from .architecture.HAT import HAT 8 | from .architecture.LaMa import LaMa 9 | from .architecture.OmniSR.OmniSR import OmniSR 10 | from .architecture.RRDB import RRDBNet as ESRGAN 11 | from .architecture.SCUNet import SCUNet 12 | from .architecture.SPSR import SPSRNet as SPSR 13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2 14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN 15 | from .architecture.Swin2SR import Swin2SR 16 | from .architecture.SwinIR import SwinIR 17 | from .types import PyTorchModel 18 | 19 | 20 | class UnsupportedModel(Exception): 21 | pass 22 | 23 | 24 | def load_state_dict(state_dict) -> PyTorchModel: 25 | logger.debug(f"Loading state dict into pytorch model arch") 26 | 27 | state_dict_keys = list(state_dict.keys()) 28 | 29 | if "params_ema" in state_dict_keys: 30 | state_dict = state_dict["params_ema"] 31 | elif "params-ema" in state_dict_keys: 32 | state_dict = state_dict["params-ema"] 33 | elif "params" in state_dict_keys: 34 | state_dict = state_dict["params"] 35 | 36 | state_dict_keys = list(state_dict.keys()) 37 | # SRVGGNet Real-ESRGAN (v2) 38 | if "body.0.weight" in state_dict_keys and "body.1.weight" in state_dict_keys: 39 | model = RealESRGANv2(state_dict) 40 | # SPSR (ESRGAN with lots of extra layers) 41 | elif "f_HR_conv1.0.weight" in state_dict: 42 | model = SPSR(state_dict) 43 | # Swift-SRGAN 44 | elif ( 45 | "model" in state_dict_keys 46 | and "initial.cnn.depthwise.weight" in state_dict["model"].keys() 47 | ): 48 | model = SwiftSRGAN(state_dict) 49 | # SwinIR, Swin2SR, HAT 50 | elif "layers.0.residual_group.blocks.0.norm1.weight" in state_dict_keys: 51 | if ( 52 | "layers.0.residual_group.blocks.0.conv_block.cab.0.weight" 53 | in state_dict_keys 54 | ): 55 | model = HAT(state_dict) 56 | elif "patch_embed.proj.weight" in state_dict_keys: 57 | model = Swin2SR(state_dict) 58 | else: 59 | model = SwinIR(state_dict) 60 | # GFPGAN 61 | elif ( 62 | "toRGB.0.weight" in state_dict_keys 63 | and "stylegan_decoder.style_mlp.1.weight" in state_dict_keys 64 | ): 65 | model = GFPGANv1Clean(state_dict) 66 | # RestoreFormer 67 | elif ( 68 | "encoder.conv_in.weight" in state_dict_keys 69 | and "encoder.down.0.block.0.norm1.weight" in state_dict_keys 70 | ): 71 | model = RestoreFormer(state_dict) 72 | elif ( 73 | "encoder.blocks.0.weight" in state_dict_keys 74 | and "quantize.embedding.weight" in state_dict_keys 75 | ): 76 | model = CodeFormer(state_dict) 77 | # LaMa 78 | elif ( 79 | "model.model.1.bn_l.running_mean" in state_dict_keys 80 | or "generator.model.1.bn_l.running_mean" in state_dict_keys 81 | ): 82 | model = LaMa(state_dict) 83 | # Omni-SR 84 | elif "residual_layer.0.residual_layer.0.layer.0.fn.0.weight" in state_dict_keys: 85 | model = OmniSR(state_dict) 86 | # SCUNet 87 | elif "m_head.0.weight" in state_dict_keys and "m_tail.0.weight" in state_dict_keys: 88 | model = SCUNet(state_dict) 89 | # DAT 90 | elif "layers.0.blocks.2.attn.attn_mask_0" in state_dict_keys: 91 | model = DAT(state_dict) 92 | # Regular ESRGAN, "new-arch" ESRGAN, Real-ESRGAN v1 93 | else: 94 | try: 95 | model = ESRGAN(state_dict) 96 | except: 97 | # pylint: disable=raise-missing-from 98 | raise UnsupportedModel 99 | return model 100 | -------------------------------------------------------------------------------- /ldm_patched/pfn/types.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from .architecture.DAT import DAT 4 | from .architecture.face.codeformer import CodeFormer 5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean 6 | from .architecture.face.restoreformer_arch import RestoreFormer 7 | from .architecture.HAT import HAT 8 | from .architecture.LaMa import LaMa 9 | from .architecture.OmniSR.OmniSR import OmniSR 10 | from .architecture.RRDB import RRDBNet as ESRGAN 11 | from .architecture.SCUNet import SCUNet 12 | from .architecture.SPSR import SPSRNet as SPSR 13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2 14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN 15 | from .architecture.Swin2SR import Swin2SR 16 | from .architecture.SwinIR import SwinIR 17 | 18 | PyTorchSRModels = ( 19 | RealESRGANv2, 20 | SPSR, 21 | SwiftSRGAN, 22 | ESRGAN, 23 | SwinIR, 24 | Swin2SR, 25 | HAT, 26 | OmniSR, 27 | SCUNet, 28 | DAT, 29 | ) 30 | PyTorchSRModel = Union[ 31 | RealESRGANv2, 32 | SPSR, 33 | SwiftSRGAN, 34 | ESRGAN, 35 | SwinIR, 36 | Swin2SR, 37 | HAT, 38 | OmniSR, 39 | SCUNet, 40 | DAT, 41 | ] 42 | 43 | 44 | def is_pytorch_sr_model(model: object): 45 | return isinstance(model, PyTorchSRModels) 46 | 47 | 48 | PyTorchFaceModels = (GFPGANv1Clean, RestoreFormer, CodeFormer) 49 | PyTorchFaceModel = Union[GFPGANv1Clean, RestoreFormer, CodeFormer] 50 | 51 | 52 | def is_pytorch_face_model(model: object): 53 | return isinstance(model, PyTorchFaceModels) 54 | 55 | 56 | PyTorchInpaintModels = (LaMa,) 57 | PyTorchInpaintModel = Union[LaMa] 58 | 59 | 60 | def is_pytorch_inpaint_model(model: object): 61 | return isinstance(model, PyTorchInpaintModels) 62 | 63 | 64 | PyTorchModels = (*PyTorchSRModels, *PyTorchFaceModels, *PyTorchInpaintModels) 65 | PyTorchModel = Union[PyTorchSRModel, PyTorchFaceModel, PyTorchInpaintModel] 66 | 67 | 68 | def is_pytorch_model(model: object): 69 | return isinstance(model, PyTorchModels) 70 | -------------------------------------------------------------------------------- /ldm_patched/taesd/taesd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Tiny AutoEncoder for Stable Diffusion 4 | (DNN for encoding / decoding SD's latent space) 5 | """ 6 | import torch 7 | import torch.nn as nn 8 | 9 | import ldm_patched.modules.utils 10 | import ldm_patched.modules.ops 11 | 12 | def conv(n_in, n_out, **kwargs): 13 | return ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 3, padding=1, **kwargs) 14 | 15 | class Clamp(nn.Module): 16 | def forward(self, x): 17 | return torch.tanh(x / 3) * 3 18 | 19 | class Block(nn.Module): 20 | def __init__(self, n_in, n_out): 21 | super().__init__() 22 | self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out)) 23 | self.skip = ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity() 24 | self.fuse = nn.ReLU() 25 | def forward(self, x): 26 | return self.fuse(self.conv(x) + self.skip(x)) 27 | 28 | def Encoder(): 29 | return nn.Sequential( 30 | conv(3, 64), Block(64, 64), 31 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 32 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 33 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 34 | conv(64, 4), 35 | ) 36 | 37 | def Decoder(): 38 | return nn.Sequential( 39 | Clamp(), conv(4, 64), nn.ReLU(), 40 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 41 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 42 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 43 | Block(64, 64), conv(64, 3), 44 | ) 45 | 46 | class TAESD(nn.Module): 47 | latent_magnitude = 3 48 | latent_shift = 0.5 49 | 50 | def __init__(self, encoder_path=None, decoder_path=None): 51 | """Initialize pretrained TAESD on the given device from the given checkpoints.""" 52 | super().__init__() 53 | self.taesd_encoder = Encoder() 54 | self.taesd_decoder = Decoder() 55 | self.vae_scale = torch.nn.Parameter(torch.tensor(1.0)) 56 | if encoder_path is not None: 57 | self.taesd_encoder.load_state_dict(ldm_patched.modules.utils.load_torch_file(encoder_path, safe_load=True)) 58 | if decoder_path is not None: 59 | self.taesd_decoder.load_state_dict(ldm_patched.modules.utils.load_torch_file(decoder_path, safe_load=True)) 60 | 61 | @staticmethod 62 | def scale_latents(x): 63 | """raw latents -> [0, 1]""" 64 | return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1) 65 | 66 | @staticmethod 67 | def unscale_latents(x): 68 | """[0, 1] -> raw latents""" 69 | return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude) 70 | 71 | def decode(self, x): 72 | x_sample = self.taesd_decoder(x * self.vae_scale) 73 | x_sample = x_sample.sub(0.5).mul(2) 74 | return x_sample 75 | 76 | def encode(self, x): 77 | return self.taesd_encoder(x * 0.5 + 0.5) / self.vae_scale 78 | -------------------------------------------------------------------------------- /ldm_patched/utils/latent_visualization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from PIL import Image 3 | import struct 4 | import numpy as np 5 | from ldm_patched.modules.args_parser import args, LatentPreviewMethod 6 | from ldm_patched.taesd.taesd import TAESD 7 | import ldm_patched.utils.path_utils 8 | import ldm_patched.modules.utils 9 | 10 | MAX_PREVIEW_RESOLUTION = 512 11 | 12 | class LatentPreviewer: 13 | def decode_latent_to_preview(self, x0): 14 | pass 15 | 16 | def decode_latent_to_preview_image(self, preview_format, x0): 17 | preview_image = self.decode_latent_to_preview(x0) 18 | return ("JPEG", preview_image, MAX_PREVIEW_RESOLUTION) 19 | 20 | class TAESDPreviewerImpl(LatentPreviewer): 21 | def __init__(self, taesd): 22 | self.taesd = taesd 23 | 24 | def decode_latent_to_preview(self, x0): 25 | x_sample = self.taesd.decode(x0[:1])[0].detach() 26 | x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0) 27 | x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) 28 | x_sample = x_sample.astype(np.uint8) 29 | 30 | preview_image = Image.fromarray(x_sample) 31 | return preview_image 32 | 33 | 34 | class Latent2RGBPreviewer(LatentPreviewer): 35 | def __init__(self, latent_rgb_factors): 36 | self.latent_rgb_factors = torch.tensor(latent_rgb_factors, device="cpu") 37 | 38 | def decode_latent_to_preview(self, x0): 39 | latent_image = x0[0].permute(1, 2, 0).cpu() @ self.latent_rgb_factors 40 | 41 | latents_ubyte = (((latent_image + 1) / 2) 42 | .clamp(0, 1) # change scale from -1..1 to 0..1 43 | .mul(0xFF) # to 0..255 44 | .byte()).cpu() 45 | 46 | return Image.fromarray(latents_ubyte.numpy()) 47 | 48 | 49 | def get_previewer(device, latent_format): 50 | previewer = None 51 | method = args.preview_option 52 | if method != LatentPreviewMethod.NoPreviews: 53 | # TODO previewer methods 54 | taesd_decoder_path = None 55 | if latent_format.taesd_decoder_name is not None: 56 | taesd_decoder_path = next( 57 | (fn for fn in ldm_patched.utils.path_utils.get_filename_list("vae_approx") 58 | if fn.startswith(latent_format.taesd_decoder_name)), 59 | "" 60 | ) 61 | taesd_decoder_path = ldm_patched.utils.path_utils.get_full_path("vae_approx", taesd_decoder_path) 62 | 63 | if method == LatentPreviewMethod.Auto: 64 | method = LatentPreviewMethod.Latent2RGB 65 | if taesd_decoder_path: 66 | method = LatentPreviewMethod.TAESD 67 | 68 | if method == LatentPreviewMethod.TAESD: 69 | if taesd_decoder_path: 70 | taesd = TAESD(None, taesd_decoder_path).to(device) 71 | previewer = TAESDPreviewerImpl(taesd) 72 | else: 73 | print("Warning: TAESD previews enabled, but could not find models/vae_approx/{}".format(latent_format.taesd_decoder_name)) 74 | 75 | if previewer is None: 76 | if latent_format.latent_rgb_factors is not None: 77 | previewer = Latent2RGBPreviewer(latent_format.latent_rgb_factors) 78 | return previewer 79 | 80 | def prepare_callback(model, steps, x0_output_dict=None): 81 | preview_format = "JPEG" 82 | if preview_format not in ["JPEG", "PNG"]: 83 | preview_format = "JPEG" 84 | 85 | previewer = get_previewer(model.load_device, model.model.latent_format) 86 | 87 | pbar = ldm_patched.modules.utils.ProgressBar(steps) 88 | def callback(step, x0, x, total_steps): 89 | if x0_output_dict is not None: 90 | x0_output_dict["x0"] = x0 91 | 92 | preview_bytes = None 93 | if previewer: 94 | preview_bytes = previewer.decode_latent_to_preview_image(preview_format, x0) 95 | pbar.update_absolute(step + 1, total_steps, preview_bytes) 96 | return callback 97 | 98 | -------------------------------------------------------------------------------- /make_img.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "import random\n", 11 | "import os\n", 12 | "import shutil\n", 13 | "from tqdm import tqdm\n", 14 | "from diffusers import StableDiffusionXLPipeline\n", 15 | "\n", 16 | "regular_prompts_list = [\n", 17 | " ...\n", 18 | "]\n", 19 | "object_name = \"teapot\"\n", 20 | "save_dir = \"regular_teapot\"\n", 21 | "\n", 22 | "\n", 23 | "repeat_times = 30\n", 24 | "\n", 25 | "DEVICE = \"cuda:0\"\n", 26 | "torch.cuda.set_device(DEVICE)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "pipeline = StableDiffusionXLPipeline.from_pretrained(\n", 36 | " \"stabilityai/stable-diffusion-xl-base-1.0\",\n", 37 | " torch_dtype=torch.float16,\n", 38 | " use_safetensors=True,\n", 39 | " variant=\"fp16\",\n", 40 | ").to(DEVICE)\n", 41 | "pipeline.set_progress_bar_config(disable=True)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "# 使用lang-sam完成分割任务 python3.10装包\n", 51 | "# git clone https://github.com/mycfhs/lang-segment-anything && cd lang-segment-anything\n", 52 | "# python -m pip install -e . --ignore-installed\n", 53 | "from lang_sam import LangSAM\n", 54 | "\n", 55 | "model = LangSAM(sam_type=\"vit_h\") # b, l, h" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "from torchvision.transforms import ToPILImage\n", 65 | "import gc\n", 66 | "\n", 67 | "to_pil_image = ToPILImage()\n", 68 | "\n", 69 | "if os.path.exists(save_dir):\n", 70 | " shutil.rmtree(save_dir)\n", 71 | "\n", 72 | "os.makedirs(save_dir)\n", 73 | "for prompt in regular_prompts_list:\n", 74 | " prompt = prompt.replace(\" \", \"_\")\n", 75 | " os.makedirs(f\"{save_dir}/{prompt}\")\n", 76 | "\n", 77 | "for _ in tqdm(range(repeat_times)):\n", 78 | " random_seed = random.randint(0, 1000000)\n", 79 | " images = pipeline(regular_prompts_list, seed=random_seed).images\n", 80 | "\n", 81 | " gc.collect()\n", 82 | " if torch.cuda.is_available():\n", 83 | " torch.cuda.empty_cache()\n", 84 | "\n", 85 | " for image, prompt in zip(images, regular_prompts_list):\n", 86 | " prompt = prompt.replace(\" \", \"_\")\n", 87 | "\n", 88 | " masks, boxes, phrases, logits = model.predict(image, object_name)\n", 89 | " mask = masks.to(torch.uint8) * 255\n", 90 | "\n", 91 | " try:\n", 92 | " mask_img = to_pil_image(mask[0])\n", 93 | " mask_img.save(f\"{save_dir}/{prompt}/{random_seed}-mask.png\")\n", 94 | " image.save(f\"{save_dir}/{prompt}/{random_seed}-image.png\")\n", 95 | " except:\n", 96 | " print(f\"Error img, ignore\")\n", 97 | " continue\n", 98 | "\n", 99 | " gc.collect()\n", 100 | " if torch.cuda.is_available():\n", 101 | " torch.cuda.empty_cache()" 102 | ] 103 | } 104 | ], 105 | "metadata": { 106 | "kernelspec": { 107 | "display_name": "DreamMix", 108 | "language": "python", 109 | "name": "python3" 110 | }, 111 | "language_info": { 112 | "codemirror_mode": { 113 | "name": "ipython", 114 | "version": 3 115 | }, 116 | "file_extension": ".py", 117 | "mimetype": "text/x-python", 118 | "name": "python", 119 | "nbconvert_exporter": "python", 120 | "pygments_lexer": "ipython3", 121 | "version": "3.10.15" 122 | } 123 | }, 124 | "nbformat": 4, 125 | "nbformat_minor": 2 126 | } 127 | -------------------------------------------------------------------------------- /models/inpaint/put_inpaint_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/models/inpaint/put_inpaint_here -------------------------------------------------------------------------------- /models/loras/put_loras_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/models/loras/put_loras_here -------------------------------------------------------------------------------- /models/upscale_models/put_esrgan_and_other_upscale_models_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mycfhs/DreamMix/0de2f5f06ce7b87ad3fc8ffe313087d33133df9b/models/upscale_models/put_esrgan_and_other_upscale_models_here -------------------------------------------------------------------------------- /modules/auth.py: -------------------------------------------------------------------------------- 1 | import json 2 | import hashlib 3 | import modules.constants as constants 4 | 5 | from os.path import exists 6 | 7 | 8 | def auth_list_to_dict(auth_list): 9 | auth_dict = {} 10 | for auth_data in auth_list: 11 | if 'user' in auth_data: 12 | if 'hash' in auth_data: 13 | auth_dict |= {auth_data['user']: auth_data['hash']} 14 | elif 'pass' in auth_data: 15 | auth_dict |= {auth_data['user']: hashlib.sha256(bytes(auth_data['pass'], encoding='utf-8')).hexdigest()} 16 | return auth_dict 17 | 18 | 19 | def load_auth_data(filename=None): 20 | auth_dict = None 21 | if filename != None and exists(filename): 22 | with open(filename, encoding='utf-8') as auth_file: 23 | try: 24 | auth_obj = json.load(auth_file) 25 | if isinstance(auth_obj, list) and len(auth_obj) > 0: 26 | auth_dict = auth_list_to_dict(auth_obj) 27 | except Exception as e: 28 | print('load_auth_data, e: ' + str(e)) 29 | return auth_dict 30 | 31 | 32 | auth_dict = load_auth_data(constants.AUTH_FILENAME) 33 | 34 | auth_enabled = auth_dict != None 35 | 36 | 37 | def check_auth(user, password): 38 | if user not in auth_dict: 39 | return False 40 | else: 41 | return hashlib.sha256(bytes(password, encoding='utf-8')).hexdigest() == auth_dict[user] 42 | -------------------------------------------------------------------------------- /modules/constants.py: -------------------------------------------------------------------------------- 1 | # as in k-diffusion (sampling.py) 2 | MIN_SEED = 0 3 | MAX_SEED = 2**63 - 1 4 | 5 | AUTH_FILENAME = 'auth.json' 6 | -------------------------------------------------------------------------------- /modules/flags.py: -------------------------------------------------------------------------------- 1 | from enum import IntEnum, Enum 2 | 3 | disabled = 'Disabled' 4 | enabled = 'Enabled' 5 | subtle_variation = 'Vary (Subtle)' 6 | strong_variation = 'Vary (Strong)' 7 | upscale_15 = 'Upscale (1.5x)' 8 | upscale_2 = 'Upscale (2x)' 9 | upscale_fast = 'Upscale (Fast 2x)' 10 | 11 | uov_list = [ 12 | disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast 13 | ] 14 | 15 | CIVITAI_NO_KARRAS = ["euler", "euler_ancestral", "heun", "dpm_fast", "dpm_adaptive", "ddim", "uni_pc"] 16 | 17 | # fooocus: a1111 (Civitai) 18 | KSAMPLER = { 19 | "euler": "Euler", 20 | "euler_ancestral": "Euler a", 21 | "heun": "Heun", 22 | "heunpp2": "", 23 | "dpm_2": "DPM2", 24 | "dpm_2_ancestral": "DPM2 a", 25 | "lms": "LMS", 26 | "dpm_fast": "DPM fast", 27 | "dpm_adaptive": "DPM adaptive", 28 | "dpmpp_2s_ancestral": "DPM++ 2S a", 29 | "dpmpp_sde": "DPM++ SDE", 30 | "dpmpp_sde_gpu": "DPM++ SDE", 31 | "dpmpp_2m": "DPM++ 2M", 32 | "dpmpp_2m_sde": "DPM++ 2M SDE", 33 | "dpmpp_2m_sde_gpu": "DPM++ 2M SDE", 34 | "dpmpp_3m_sde": "", 35 | "dpmpp_3m_sde_gpu": "", 36 | "ddpm": "", 37 | "lcm": "LCM" 38 | } 39 | 40 | SAMPLER_EXTRA = { 41 | "ddim": "DDIM", 42 | "uni_pc": "UniPC", 43 | "uni_pc_bh2": "" 44 | } 45 | 46 | SAMPLERS = KSAMPLER | SAMPLER_EXTRA 47 | 48 | KSAMPLER_NAMES = list(KSAMPLER.keys()) 49 | 50 | SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform", "lcm", "turbo"] 51 | SAMPLER_NAMES = KSAMPLER_NAMES + list(SAMPLER_EXTRA.keys()) 52 | 53 | sampler_list = SAMPLER_NAMES 54 | scheduler_list = SCHEDULER_NAMES 55 | 56 | refiner_swap_method = 'joint' 57 | 58 | cn_ip = "ImagePrompt" 59 | cn_ip_face = "FaceSwap" 60 | cn_canny = "PyraCanny" 61 | cn_cpds = "CPDS" 62 | 63 | ip_list = [cn_ip, cn_canny, cn_cpds, cn_ip_face] 64 | default_ip = cn_ip 65 | 66 | default_parameters = { 67 | cn_ip: (0.5, 0.6), cn_ip_face: (0.9, 0.75), cn_canny: (0.5, 1.0), cn_cpds: (0.5, 1.0) 68 | } # stop, weight 69 | 70 | output_formats = ['png', 'jpeg', 'webp'] 71 | 72 | inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6'] 73 | inpaint_option_default = 'Inpaint or Outpaint (default)' 74 | inpaint_option_detail = 'Improve Detail (face, hand, eyes, etc.)' 75 | inpaint_option_modify = 'Modify Content (add objects, change background, etc.)' 76 | inpaint_options = [inpaint_option_default, inpaint_option_detail, inpaint_option_modify] 77 | 78 | desc_type_photo = 'Photograph' 79 | desc_type_anime = 'Art/Anime' 80 | 81 | 82 | class MetadataScheme(Enum): 83 | FOOOCUS = 'fooocus' 84 | A1111 = 'a1111' 85 | 86 | 87 | metadata_scheme = [ 88 | (f'{MetadataScheme.FOOOCUS.value} (json)', MetadataScheme.FOOOCUS.value), 89 | (f'{MetadataScheme.A1111.value} (plain text)', MetadataScheme.A1111.value), 90 | ] 91 | 92 | controlnet_image_count = 4 93 | 94 | 95 | class OutputFormat(Enum): 96 | PNG = 'png' 97 | JPEG = 'jpeg' 98 | WEBP = 'webp' 99 | 100 | @classmethod 101 | def list(cls) -> list: 102 | return list(map(lambda c: c.value, cls)) 103 | 104 | 105 | class Steps(IntEnum): 106 | QUALITY = 60 107 | SPEED = 30 108 | EXTREME_SPEED = 8 109 | LIGHTNING = 4 110 | 111 | 112 | class StepsUOV(IntEnum): 113 | QUALITY = 36 114 | SPEED = 18 115 | EXTREME_SPEED = 8 116 | LIGHTNING = 4 117 | 118 | 119 | class Performance(Enum): 120 | QUALITY = 'Quality' 121 | SPEED = 'Speed' 122 | EXTREME_SPEED = 'Extreme Speed' 123 | LIGHTNING = 'Lightning' 124 | 125 | @classmethod 126 | def list(cls) -> list: 127 | return list(map(lambda c: c.value, cls)) 128 | 129 | @classmethod 130 | def has_restricted_features(cls, x) -> bool: 131 | if isinstance(x, Performance): 132 | x = x.value 133 | return x in [cls.EXTREME_SPEED.value, cls.LIGHTNING.value] 134 | 135 | def steps(self) -> int | None: 136 | return Steps[self.name].value if Steps[self.name] else None 137 | 138 | def steps_uov(self) -> int | None: 139 | return StepsUOV[self.name].value if Steps[self.name] else None 140 | -------------------------------------------------------------------------------- /modules/html.py: -------------------------------------------------------------------------------- 1 | progress_html = ''' 2 |
3 |
4 |
5 | 6 |
7 | *text* 8 |
9 | ''' 10 | 11 | 12 | def make_progress_html(number, text): 13 | return progress_html.replace('*number*', str(number)).replace('*text*', text) 14 | -------------------------------------------------------------------------------- /modules/launch_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import importlib 3 | import importlib.util 4 | import shutil 5 | import subprocess 6 | import sys 7 | import re 8 | import logging 9 | import importlib.metadata 10 | import packaging.version 11 | from packaging.requirements import Requirement 12 | 13 | logging.getLogger("torch.distributed.nn").setLevel(logging.ERROR) # sshh... 14 | logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage()) 15 | 16 | re_requirement = re.compile(r"\s*([-\w]+)\s*(?:==\s*([-+.\w]+))?\s*") 17 | 18 | python = sys.executable 19 | default_command_live = (os.environ.get('LAUNCH_LIVE_OUTPUT') == "1") 20 | index_url = os.environ.get('INDEX_URL', "") 21 | 22 | modules_path = os.path.dirname(os.path.realpath(__file__)) 23 | script_path = os.path.dirname(modules_path) 24 | 25 | 26 | def is_installed(package): 27 | try: 28 | spec = importlib.util.find_spec(package) 29 | except ModuleNotFoundError: 30 | return False 31 | 32 | return spec is not None 33 | 34 | 35 | def run(command, desc=None, errdesc=None, custom_env=None, live: bool = default_command_live) -> str: 36 | if desc is not None: 37 | print(desc) 38 | 39 | run_kwargs = { 40 | "args": command, 41 | "shell": True, 42 | "env": os.environ if custom_env is None else custom_env, 43 | "encoding": 'utf8', 44 | "errors": 'ignore', 45 | } 46 | 47 | if not live: 48 | run_kwargs["stdout"] = run_kwargs["stderr"] = subprocess.PIPE 49 | 50 | result = subprocess.run(**run_kwargs) 51 | 52 | if result.returncode != 0: 53 | error_bits = [ 54 | f"{errdesc or 'Error running command'}.", 55 | f"Command: {command}", 56 | f"Error code: {result.returncode}", 57 | ] 58 | if result.stdout: 59 | error_bits.append(f"stdout: {result.stdout}") 60 | if result.stderr: 61 | error_bits.append(f"stderr: {result.stderr}") 62 | raise RuntimeError("\n".join(error_bits)) 63 | 64 | return (result.stdout or "") 65 | 66 | 67 | def run_pip(command, desc=None, live=default_command_live): 68 | try: 69 | index_url_line = f' --index-url {index_url}' if index_url != '' else '' 70 | return run(f'"{python}" -m pip {command} --prefer-binary{index_url_line}', desc=f"Installing {desc}", 71 | errdesc=f"Couldn't install {desc}", live=live) 72 | except Exception as e: 73 | print(e) 74 | print(f'CMD Failed {desc}: {command}') 75 | return None 76 | 77 | 78 | def requirements_met(requirements_file): 79 | with open(requirements_file, "r", encoding="utf8") as file: 80 | for line in file: 81 | line = line.strip() 82 | if line == "" or line.startswith('#'): 83 | continue 84 | 85 | requirement = Requirement(line) 86 | package = requirement.name 87 | 88 | try: 89 | version_installed = importlib.metadata.version(package) 90 | installed_version = packaging.version.parse(version_installed) 91 | 92 | # Check if the installed version satisfies the requirement 93 | if installed_version not in requirement.specifier: 94 | print(f"Version mismatch for {package}: Installed version {version_installed} does not meet requirement {requirement}") 95 | return False 96 | except Exception as e: 97 | print(f"Error checking version for {package}: {e}") 98 | return False 99 | 100 | return True 101 | 102 | 103 | def delete_folder_content(folder, prefix=None): 104 | result = True 105 | 106 | for filename in os.listdir(folder): 107 | file_path = os.path.join(folder, filename) 108 | try: 109 | if os.path.isfile(file_path) or os.path.islink(file_path): 110 | os.unlink(file_path) 111 | elif os.path.isdir(file_path): 112 | shutil.rmtree(file_path) 113 | except Exception as e: 114 | print(f'{prefix}Failed to delete {file_path}. Reason: {e}') 115 | result = False 116 | 117 | return result -------------------------------------------------------------------------------- /modules/localization.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | 5 | current_translation = {} 6 | localization_root = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'language') 7 | 8 | 9 | def localization_js(filename): 10 | global current_translation 11 | 12 | if isinstance(filename, str): 13 | full_name = os.path.abspath(os.path.join(localization_root, filename + '.json')) 14 | if os.path.exists(full_name): 15 | try: 16 | with open(full_name, encoding='utf-8') as f: 17 | current_translation = json.load(f) 18 | assert isinstance(current_translation, dict) 19 | for k, v in current_translation.items(): 20 | assert isinstance(k, str) 21 | assert isinstance(v, str) 22 | except Exception as e: 23 | print(str(e)) 24 | print(f'Failed to load localization file {full_name}') 25 | 26 | # current_translation = {k: 'XXX' for k in current_translation.keys()} # use this to see if all texts are covered 27 | 28 | return f"window.localization = {json.dumps(current_translation)}" 29 | 30 | 31 | def dump_english_config(components): 32 | all_texts = [] 33 | for c in components: 34 | label = getattr(c, 'label', None) 35 | value = getattr(c, 'value', None) 36 | choices = getattr(c, 'choices', None) 37 | info = getattr(c, 'info', None) 38 | 39 | if isinstance(label, str): 40 | all_texts.append(label) 41 | if isinstance(value, str): 42 | all_texts.append(value) 43 | if isinstance(info, str): 44 | all_texts.append(info) 45 | if isinstance(choices, list): 46 | for x in choices: 47 | if isinstance(x, str): 48 | all_texts.append(x) 49 | if isinstance(x, tuple): 50 | for y in x: 51 | if isinstance(y, str): 52 | all_texts.append(y) 53 | 54 | config_dict = {k: k for k in all_texts if k != "" and 'progress-container' not in k} 55 | full_name = os.path.abspath(os.path.join(localization_root, 'en.json')) 56 | 57 | with open(full_name, "w", encoding="utf-8") as json_file: 58 | json.dump(config_dict, json_file, indent=4) 59 | 60 | return 61 | -------------------------------------------------------------------------------- /modules/model_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urllib.parse import urlparse 3 | from typing import Optional 4 | 5 | 6 | def load_file_from_url( 7 | url: str, 8 | *, 9 | model_dir: str, 10 | progress: bool = True, 11 | file_name: Optional[str] = None, 12 | ) -> str: 13 | """Download a file from `url` into `model_dir`, using the file present if possible. 14 | 15 | Returns the path to the downloaded file. 16 | """ 17 | os.makedirs(model_dir, exist_ok=True) 18 | if not file_name: 19 | parts = urlparse(url) 20 | file_name = os.path.basename(parts.path) 21 | cached_file = os.path.abspath(os.path.join(model_dir, file_name)) 22 | if not os.path.exists(cached_file): 23 | print(f'Downloading: "{url}" to {cached_file}\n') 24 | from torch.hub import download_url_to_file 25 | download_url_to_file(url, cached_file, progress=progress) 26 | return cached_file 27 | -------------------------------------------------------------------------------- /modules/ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import contextlib 3 | 4 | 5 | @contextlib.contextmanager 6 | def use_patched_ops(operations): 7 | op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm'] 8 | backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names} 9 | 10 | try: 11 | for op_name in op_names: 12 | setattr(torch.nn, op_name, getattr(operations, op_name)) 13 | 14 | yield 15 | 16 | finally: 17 | for op_name in op_names: 18 | setattr(torch.nn, op_name, backups[op_name]) 19 | return 20 | -------------------------------------------------------------------------------- /modules/patch_precision.py: -------------------------------------------------------------------------------- 1 | # Consistent with Kohya to reduce differences between model training and inference. 2 | 3 | import torch 4 | import math 5 | import einops 6 | import numpy as np 7 | 8 | import ldm_patched.ldm.modules.diffusionmodules.openaimodel 9 | import ldm_patched.modules.model_sampling 10 | import ldm_patched.modules.sd1_clip 11 | 12 | from ldm_patched.ldm.modules.diffusionmodules.util import make_beta_schedule 13 | 14 | 15 | def patched_timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False): 16 | # Consistent with Kohya to reduce differences between model training and inference. 17 | 18 | if not repeat_only: 19 | half = dim // 2 20 | freqs = torch.exp( 21 | -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half 22 | ).to(device=timesteps.device) 23 | args = timesteps[:, None].float() * freqs[None] 24 | embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) 25 | if dim % 2: 26 | embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) 27 | else: 28 | embedding = einops.repeat(timesteps, 'b -> b d', d=dim) 29 | return embedding 30 | 31 | 32 | def patched_register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, 33 | linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): 34 | # Consistent with Kohya to reduce differences between model training and inference. 35 | 36 | if given_betas is not None: 37 | betas = given_betas 38 | else: 39 | betas = make_beta_schedule( 40 | beta_schedule, 41 | timesteps, 42 | linear_start=linear_start, 43 | linear_end=linear_end, 44 | cosine_s=cosine_s) 45 | 46 | alphas = 1. - betas 47 | alphas_cumprod = np.cumprod(alphas, axis=0) 48 | timesteps, = betas.shape 49 | self.num_timesteps = int(timesteps) 50 | self.linear_start = linear_start 51 | self.linear_end = linear_end 52 | sigmas = torch.tensor(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5, dtype=torch.float32) 53 | self.set_sigmas(sigmas) 54 | return 55 | 56 | 57 | def patch_all_precision(): 58 | ldm_patched.ldm.modules.diffusionmodules.openaimodel.timestep_embedding = patched_timestep_embedding 59 | ldm_patched.modules.model_sampling.ModelSamplingDiscrete._register_schedule = patched_register_schedule 60 | return 61 | -------------------------------------------------------------------------------- /modules/sdxl_styles.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import json 4 | import math 5 | # import modules.config 6 | #TODO 先不用wildcard通配符查询 7 | 8 | from modules.util import get_files_from_folder 9 | 10 | # cannot use modules.config - validators causing circular imports 11 | styles_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../sdxl_styles/')) 12 | wildcards_max_bfs_depth = 64 13 | 14 | 15 | def normalize_key(k): 16 | k = k.replace('-', ' ') 17 | words = k.split(' ') 18 | words = [w[:1].upper() + w[1:].lower() for w in words] 19 | k = ' '.join(words) 20 | k = k.replace('3d', '3D') 21 | k = k.replace('Sai', 'SAI') 22 | k = k.replace('Mre', 'MRE') 23 | k = k.replace('(s', '(S') 24 | return k 25 | 26 | 27 | styles = {} 28 | 29 | styles_files = get_files_from_folder(styles_path, ['.json']) 30 | 31 | for x in ['sdxl_styles_fooocus.json', 32 | 'sdxl_styles_sai.json', 33 | 'sdxl_styles_mre.json', 34 | 'sdxl_styles_twri.json', 35 | 'sdxl_styles_diva.json', 36 | 'sdxl_styles_marc_k3nt3l.json']: 37 | if x in styles_files: 38 | styles_files.remove(x) 39 | styles_files.append(x) 40 | 41 | for styles_file in styles_files: 42 | try: 43 | with open(os.path.join(styles_path, styles_file), encoding='utf-8') as f: 44 | for entry in json.load(f): 45 | name = normalize_key(entry['name']) 46 | prompt = entry['prompt'] if 'prompt' in entry else '' 47 | negative_prompt = entry['negative_prompt'] if 'negative_prompt' in entry else '' 48 | styles[name] = (prompt, negative_prompt) 49 | except Exception as e: 50 | print(str(e)) 51 | print(f'Failed to load style file {styles_file}') 52 | 53 | style_keys = list(styles.keys()) 54 | fooocus_expansion = "Fooocus V2" 55 | legal_style_names = [fooocus_expansion] + style_keys 56 | 57 | 58 | def apply_style(style, positive): 59 | p, n = styles[style] 60 | return p.replace('{prompt}', positive).splitlines(), n.splitlines() 61 | 62 | 63 | def apply_wildcards(wildcard_text, rng, i, read_wildcards_in_order): 64 | for _ in range(wildcards_max_bfs_depth): 65 | placeholders = re.findall(r'__([\w-]+)__', wildcard_text) 66 | if len(placeholders) == 0: 67 | return wildcard_text 68 | 69 | print(f'[Wildcards] processing: {wildcard_text}') 70 | for placeholder in placeholders: 71 | try: 72 | matches = [x for x in modules.config.wildcard_filenames if os.path.splitext(os.path.basename(x))[0] == placeholder] 73 | words = open(os.path.join(modules.config.path_wildcards, matches[0]), encoding='utf-8').read().splitlines() 74 | words = [x for x in words if x != ''] 75 | assert len(words) > 0 76 | if read_wildcards_in_order: 77 | wildcard_text = wildcard_text.replace(f'__{placeholder}__', words[i % len(words)], 1) 78 | else: 79 | wildcard_text = wildcard_text.replace(f'__{placeholder}__', rng.choice(words), 1) 80 | except: 81 | print(f'[Wildcards] Warning: {placeholder}.txt missing or empty. ' 82 | f'Using "{placeholder}" as a normal word.') 83 | wildcard_text = wildcard_text.replace(f'__{placeholder}__', placeholder) 84 | print(f'[Wildcards] {wildcard_text}') 85 | 86 | print(f'[Wildcards] BFS stack overflow. Current text: {wildcard_text}') 87 | return wildcard_text 88 | 89 | 90 | def get_words(arrays, totalMult, index): 91 | if len(arrays) == 1: 92 | return [arrays[0].split(',')[index]] 93 | else: 94 | words = arrays[0].split(',') 95 | word = words[index % len(words)] 96 | index -= index % len(words) 97 | index /= len(words) 98 | index = math.floor(index) 99 | return [word] + get_words(arrays[1:], math.floor(totalMult/len(words)), index) 100 | 101 | 102 | def apply_arrays(text, index): 103 | arrays = re.findall(r'\[\[(.*?)\]\]', text) 104 | if len(arrays) == 0: 105 | return text 106 | 107 | print(f'[Arrays] processing: {text}') 108 | mult = 1 109 | for arr in arrays: 110 | words = arr.split(',') 111 | mult *= len(words) 112 | 113 | index %= mult 114 | chosen_words = get_words(arrays, mult, index) 115 | 116 | i = 0 117 | for arr in arrays: 118 | text = text.replace(f'[[{arr}]]', chosen_words[i], 1) 119 | i = i+1 120 | 121 | return text 122 | 123 | -------------------------------------------------------------------------------- /modules/style_sorter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import gradio as gr 3 | import modules.localization as localization 4 | import json 5 | 6 | 7 | all_styles = [] 8 | 9 | 10 | def try_load_sorted_styles(style_names, default_selected): 11 | global all_styles 12 | 13 | all_styles = style_names 14 | 15 | try: 16 | if os.path.exists('sorted_styles.json'): 17 | with open('sorted_styles.json', 'rt', encoding='utf-8') as fp: 18 | sorted_styles = [] 19 | for x in json.load(fp): 20 | if x in all_styles: 21 | sorted_styles.append(x) 22 | for x in all_styles: 23 | if x not in sorted_styles: 24 | sorted_styles.append(x) 25 | all_styles = sorted_styles 26 | except Exception as e: 27 | print('Load style sorting failed.') 28 | print(e) 29 | 30 | unselected = [y for y in all_styles if y not in default_selected] 31 | all_styles = default_selected + unselected 32 | 33 | return 34 | 35 | 36 | def sort_styles(selected): 37 | global all_styles 38 | unselected = [y for y in all_styles if y not in selected] 39 | sorted_styles = selected + unselected 40 | try: 41 | with open('sorted_styles.json', 'wt', encoding='utf-8') as fp: 42 | json.dump(sorted_styles, fp, indent=4) 43 | except Exception as e: 44 | print('Write style sorting failed.') 45 | print(e) 46 | all_styles = sorted_styles 47 | return gr.CheckboxGroup.update(choices=sorted_styles) 48 | 49 | 50 | def localization_key(x): 51 | return x + localization.current_translation.get(x, '') 52 | 53 | 54 | def search_styles(selected, query): 55 | unselected = [y for y in all_styles if y not in selected] 56 | matched = [y for y in unselected if query.lower() in localization_key(y).lower()] if len(query.replace(' ', '')) > 0 else [] 57 | unmatched = [y for y in unselected if y not in matched] 58 | sorted_styles = matched + selected + unmatched 59 | return gr.CheckboxGroup.update(choices=sorted_styles) 60 | -------------------------------------------------------------------------------- /modules/ui_gradio_extensions.py: -------------------------------------------------------------------------------- 1 | # based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/modules/ui_gradio_extensions.py 2 | 3 | import os 4 | import gradio as gr 5 | import args_manager 6 | 7 | from modules.localization import localization_js 8 | 9 | 10 | GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse 11 | 12 | modules_path = os.path.dirname(os.path.realpath(__file__)) 13 | script_path = os.path.dirname(modules_path) 14 | 15 | 16 | def webpath(fn): 17 | if fn.startswith(script_path): 18 | web_path = os.path.relpath(fn, script_path).replace('\\', '/') 19 | else: 20 | web_path = os.path.abspath(fn) 21 | 22 | return f'file={web_path}?{os.path.getmtime(fn)}' 23 | 24 | 25 | def javascript_html(): 26 | script_js_path = webpath('javascript/script.js') 27 | context_menus_js_path = webpath('javascript/contextMenus.js') 28 | localization_js_path = webpath('javascript/localization.js') 29 | zoom_js_path = webpath('javascript/zoom.js') 30 | edit_attention_js_path = webpath('javascript/edit-attention.js') 31 | viewer_js_path = webpath('javascript/viewer.js') 32 | image_viewer_js_path = webpath('javascript/imageviewer.js') 33 | samples_path = webpath(os.path.abspath('./sdxl_styles/samples/fooocus_v2.jpg')) 34 | head = f'\n' 35 | head += f'\n' 36 | head += f'\n' 37 | head += f'\n' 38 | head += f'\n' 39 | head += f'\n' 40 | head += f'\n' 41 | head += f'\n' 42 | head += f'\n' 43 | 44 | if args_manager.args.theme: 45 | head += f'\n' 46 | 47 | return head 48 | 49 | 50 | def css_html(): 51 | style_css_path = webpath('css/style.css') 52 | head = f'' 53 | return head 54 | 55 | 56 | def reload_javascript(): 57 | js = javascript_html() 58 | css = css_html() 59 | 60 | def template_response(*args, **kwargs): 61 | res = GradioTemplateResponseOriginal(*args, **kwargs) 62 | res.body = res.body.replace(b'', f'{js}'.encode("utf8")) 63 | res.body = res.body.replace(b'', f'{css}'.encode("utf8")) 64 | res.init_headers() 65 | return res 66 | 67 | gr.routes.templates.TemplateResponse = template_response 68 | -------------------------------------------------------------------------------- /modules/upscaler.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import modules.core as core 4 | 5 | from ldm_patched.pfn.architecture.RRDB import RRDBNet as ESRGAN 6 | from ldm_patched.contrib.external_upscale_model import ImageUpscaleWithModel 7 | from collections import OrderedDict 8 | from modules.config import path_upscale_models 9 | 10 | model_filename = os.path.join(path_upscale_models, 'fooocus_upscaler_s409985e5.bin') 11 | opImageUpscaleWithModel = ImageUpscaleWithModel() 12 | model = None 13 | 14 | 15 | def perform_upscale(img): 16 | global model 17 | 18 | print(f'Upscaling image with shape {str(img.shape)} ...') 19 | 20 | if model is None: 21 | sd = torch.load(model_filename) 22 | sdo = OrderedDict() 23 | for k, v in sd.items(): 24 | sdo[k.replace('residual_block_', 'RDB')] = v 25 | del sd 26 | model = ESRGAN(sdo) 27 | model.cpu() 28 | model.eval() 29 | 30 | img = core.numpy_to_pytorch(img) 31 | img = opImageUpscaleWithModel.upscale(model, img)[0] 32 | img = core.pytorch_to_numpy(img)[0] 33 | 34 | return img 35 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | 2 | # python=3.10.14 3 | diffusers==0.30.2 4 | transformers==4.46.0 5 | accelerate==0.29.1 6 | tqdm==4.66.5 7 | matplotlib==3.8.4 8 | torch==2.2.2 9 | torchvision==0.17.2 10 | xformers==0.0.25.post1 11 | huggingface_hub==0.23.5 12 | peft==0.10.0 13 | bezier 14 | notebook==7.1.2 15 | opencv-python==4.9.0.80 16 | numpy==1.25.1 17 | scipy==1.13.0 18 | torchsde==0.2.6 19 | einops==0.7.0 20 | -------------------------------------------------------------------------------- /sdxl_styles/sdxl_styles_fooocus.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "Fooocus Enhance", 4 | "negative_prompt": "(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, grayscale, bw, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (airbrushed, cartoon, anime, semi-realistic, cgi, render, blender, digital art, manga, amateur:1.3), (3D ,3D Game, 3D Game Scene, 3D Character:1.1), (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)" 5 | }, 6 | { 7 | "name": "Fooocus Semi Realistic", 8 | "negative_prompt": "(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)" 9 | }, 10 | { 11 | "name": "Fooocus Sharp", 12 | "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, 4k epic detailed, shot on kodak, 35mm photo, sharp focus, high budget, cinemascope, moody, epic, gorgeous, film grain, grainy", 13 | "negative_prompt": "anime, cartoon, graphic, (blur, blurry, bokeh), text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured" 14 | }, 15 | { 16 | "name": "Fooocus Masterpiece", 17 | "prompt": "(masterpiece), (best quality), (ultra-detailed), {prompt}, illustration, disheveled hair, detailed eyes, perfect composition, moist skin, intricate details, earrings, by wlop", 18 | "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, pubic hair,extra digit, fewer digits, cropped, worst quality, low quality" 19 | }, 20 | { 21 | "name": "Fooocus Photograph", 22 | "prompt": "photograph {prompt}, 50mm . cinematic 4k epic detailed 4k epic detailed photograph shot on kodak detailed cinematic hbo dark moody, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage", 23 | "negative_prompt": "Brad Pitt, bokeh, depth of field, blurry, cropped, regular face, saturated, contrast, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, text, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck" 24 | }, 25 | { 26 | "name": "Fooocus Negative", 27 | "negative_prompt": "deformed, bad anatomy, disfigured, poorly drawn face, mutated, extra limb, ugly, poorly drawn hands, missing limb, floating limbs, disconnected limbs, disconnected head, malformed hands, long neck, mutated hands and fingers, bad hands, missing fingers, cropped, worst quality, low quality, mutation, poorly drawn, huge calf, bad hands, fused hand, missing hand, disappearing arms, disappearing thigh, disappearing calf, disappearing legs, missing fingers, fused fingers, abnormal eye proportion, Abnormal hands, abnormal legs, abnormal feet, abnormal fingers, drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly, anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch" 28 | }, 29 | { 30 | "name": "Fooocus Cinematic", 31 | "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy", 32 | "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured" 33 | } 34 | ] 35 | -------------------------------------------------------------------------------- /sdxl_styles/sdxl_styles_sai.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "sai-3d-model", 4 | "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting", 5 | "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting" 6 | }, 7 | { 8 | "name": "sai-analog film", 9 | "prompt": "analog film photo {prompt} . faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage", 10 | "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured" 11 | }, 12 | { 13 | "name": "sai-anime", 14 | "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed", 15 | "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast" 16 | }, 17 | { 18 | "name": "sai-cinematic", 19 | "prompt": "cinematic film still {prompt} . shallow depth of field, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy", 20 | "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured" 21 | }, 22 | { 23 | "name": "sai-comic book", 24 | "prompt": "comic {prompt} . graphic illustration, comic art, graphic novel art, vibrant, highly detailed", 25 | "negative_prompt": "photograph, deformed, glitch, noisy, realistic, stock photo" 26 | }, 27 | { 28 | "name": "sai-craft clay", 29 | "prompt": "play-doh style {prompt} . sculpture, clay art, centered composition, Claymation", 30 | "negative_prompt": "sloppy, messy, grainy, highly detailed, ultra textured, photo" 31 | }, 32 | { 33 | "name": "sai-digital art", 34 | "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed", 35 | "negative_prompt": "photo, photorealistic, realism, ugly" 36 | }, 37 | { 38 | "name": "sai-enhance", 39 | "prompt": "breathtaking {prompt} . award-winning, professional, highly detailed", 40 | "negative_prompt": "ugly, deformed, noisy, blurry, distorted, grainy" 41 | }, 42 | { 43 | "name": "sai-fantasy art", 44 | "prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy", 45 | "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white" 46 | }, 47 | { 48 | "name": "sai-isometric", 49 | "prompt": "isometric style {prompt} . vibrant, beautiful, crisp, detailed, ultra detailed, intricate", 50 | "negative_prompt": "deformed, mutated, ugly, disfigured, blur, blurry, noise, noisy, realistic, photographic" 51 | }, 52 | { 53 | "name": "sai-line art", 54 | "prompt": "line art drawing {prompt} . professional, sleek, modern, minimalist, graphic, line art, vector graphics", 55 | "negative_prompt": "anime, photorealistic, 35mm film, deformed, glitch, blurry, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, mutated, realism, realistic, impressionism, expressionism, oil, acrylic" 56 | }, 57 | { 58 | "name": "sai-lowpoly", 59 | "prompt": "low-poly style {prompt} . low-poly game art, polygon mesh, jagged, blocky, wireframe edges, centered composition", 60 | "negative_prompt": "noisy, sloppy, messy, grainy, highly detailed, ultra textured, photo" 61 | }, 62 | { 63 | "name": "sai-neonpunk", 64 | "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional", 65 | "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured" 66 | }, 67 | { 68 | "name": "sai-origami", 69 | "prompt": "origami style {prompt} . paper art, pleated paper, folded, origami art, pleats, cut and fold, centered composition", 70 | "negative_prompt": "noisy, sloppy, messy, grainy, highly detailed, ultra textured, photo" 71 | }, 72 | { 73 | "name": "sai-photographic", 74 | "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed", 75 | "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly" 76 | }, 77 | { 78 | "name": "sai-pixel art", 79 | "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics", 80 | "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic" 81 | }, 82 | { 83 | "name": "sai-texture", 84 | "prompt": "texture {prompt} top down close-up", 85 | "negative_prompt": "ugly, deformed, noisy, blurry" 86 | } 87 | ] -------------------------------------------------------------------------------- /utils/FooocusDpmpp2mSdeGpuKarras.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from ldm_patched.k_diffusion.sampling import BrownianTreeNoiseSampler 3 | from ldm_patched.modules.model_sampling import EPS, ModelSamplingDiscrete 4 | 5 | 6 | class ModelSampling(EPS, ModelSamplingDiscrete): 7 | pass 8 | 9 | def append_zero(x): 10 | return torch.cat([x, x.new_zeros([1])]) 11 | 12 | 13 | def get_sigmas_karras(n, sigma_min, sigma_max, rho=7., device='cpu'): 14 | 15 | """Constructs the noise schedule of Karras et al. (2022).""" 16 | ramp = torch.linspace(0, 1, n, device=device) 17 | min_inv_rho = sigma_min ** (1 / rho) 18 | max_inv_rho = sigma_max ** (1 / rho) 19 | sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho 20 | return append_zero(sigmas).to(device) 21 | 22 | class KSampler: 23 | 24 | def __init__(self, 25 | latent, 26 | steps, 27 | device, 28 | sampler='dpmpp_2m_sde_gpu', 29 | scheduler='karras', 30 | denoise=1, 31 | model_options={}, 32 | start_step=0, 33 | last_step=30, 34 | force_full_denoise=False, 35 | seed = None): 36 | self.device = device 37 | self.scheduler = scheduler 38 | self.sampler = sampler 39 | self.set_steps(steps, denoise) 40 | self.denoise = denoise # denoising_strength 41 | self.model_options = model_options 42 | 43 | # step param 44 | self.old_denoised = None 45 | self.h_last = None 46 | 47 | self.model_sampling = ModelSampling() 48 | 49 | 50 | sigmas = self.sigmas 51 | 52 | if last_step is not None and last_step < (len(sigmas) - 1): 53 | sigmas = sigmas[:last_step + 1] 54 | if force_full_denoise: 55 | sigmas[-1] = 0 56 | 57 | if start_step is not None: 58 | assert start_step < (len(sigmas) - 1) 59 | sigmas = sigmas[start_step:] 60 | 61 | # if start_step < (len(sigmas) - 1): 62 | # sigmas = sigmas[start_step:] 63 | # else: 64 | # if latent_image is not None: 65 | # return latent_image 66 | # else: 67 | # return torch.zeros_like(noise) 68 | sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() 69 | self.noise_sampler = BrownianTreeNoiseSampler(latent, sigma_min, sigma_max, seed=seed) 70 | self.sigmas = sigmas 71 | self.log_sigmas = sigmas.log() 72 | 73 | def calculate_sigmas(self, steps): 74 | sigmas = None 75 | 76 | discard_penultimate_sigma = False 77 | if self.sampler in ['dpm_2', 'dpm_2_ancestral', 'uni_pc', 'uni_pc_bh2']: 78 | steps += 1 79 | discard_penultimate_sigma = True 80 | 81 | sigmas = get_sigmas_karras(n=steps, sigma_min=0.0292, sigma_max=14.6146) 82 | # sigmas = get_sigmas_karras(n=steps, sigma_min=0.0291675, sigma_max=14.614642) 83 | 84 | if discard_penultimate_sigma: 85 | sigmas = torch.cat([sigmas[:-2], sigmas[-1:]]) 86 | return sigmas 87 | 88 | def set_steps(self, steps, denoise=None): 89 | self.steps = steps 90 | if denoise is None or denoise > 0.9999: 91 | self.sigmas = self.calculate_sigmas(steps).to(self.device) 92 | else: 93 | new_steps = int(steps/denoise) 94 | sigmas = self.calculate_sigmas(new_steps).to(self.device) 95 | self.sigmas = sigmas[-(steps + 1):] 96 | 97 | @torch.no_grad() 98 | def step(self, i, pred_x0, x, t=None, eta=1., s_noise=1., solver_type='midpoint'): 99 | """DPM-Solver++(2M) SDE.""" 100 | 101 | if solver_type not in {'heun', 'midpoint'}: 102 | raise ValueError('solver_type must be \'heun\' or \'midpoint\'') 103 | sigmas = self.sigmas 104 | 105 | denoised = pred_x0 106 | if sigmas[i + 1] == 0: 107 | x = denoised 108 | else: 109 | # DPM-Solver++(2M) SDE 110 | t, s = -sigmas[i].log(), -sigmas[i + 1].log() 111 | h = s - t 112 | eta_h = eta * h 113 | 114 | x = sigmas[i + 1] / sigmas[i] * (-eta_h).exp() * x + (-h - eta_h).expm1().neg() * denoised 115 | 116 | if self.old_denoised is not None: 117 | r = self.h_last / h 118 | if solver_type == 'heun': 119 | x = x + ((-h - eta_h).expm1().neg() / (-h - eta_h) + 1) * (1 / r) * (denoised - self.old_denoised) 120 | elif solver_type == 'midpoint': 121 | x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - self.old_denoised) 122 | 123 | if eta: 124 | x = x + self.noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise 125 | 126 | self.old_denoised = denoised 127 | self.h_last = h 128 | return x 129 | 130 | def timestep(self, i): 131 | sigma = self.sigmas[i] 132 | t = self.model_sampling.timestep(sigma).float() 133 | return t 134 | 135 | def calculate_input(self, i, x): 136 | sigma = self.sigmas[i] 137 | return self.model_sampling.calculate_input(sigma, x) 138 | 139 | def calculate_denoised(self, i, model_output, model_input): 140 | sigma = self.sigmas[i] 141 | return self.model_sampling.calculate_denoised(sigma, model_output, model_input) 142 | 143 | 144 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .add_fooocus_inpaint_patch import add_fooocus_inpaint_patch 2 | from .add_fooocus_inpaint_head_patch import ( 3 | add_fooocus_inpaint_head_patch_with_work, 4 | inject_fooocus_inpaint_head, 5 | ) 6 | from .prompt_style_enhance import enhance_prompt 7 | from .FooocusDpmpp2mSdeGpuKarras import KSampler 8 | from .mask_aug import extend_mask_with_bezier, mask_paint2bbox 9 | from .orthogonal_decomposition import sks_decompose, orthogonal_decomposition 10 | -------------------------------------------------------------------------------- /utils/mask_aug.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import bezier 3 | import random 4 | import numpy as np 5 | 6 | 7 | def extend_mask_with_bezier(mask, extend_ratio=0.2, random_width=5): 8 | 9 | H, W = mask.shape 10 | 11 | contours, _ = cv2.findContours( 12 | mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE 13 | ) 14 | 15 | extended_mask = np.zeros((H, W), dtype=np.uint8) 16 | 17 | for contour in contours: 18 | bbox = cv2.boundingRect(contour) 19 | x, y, w, h = bbox 20 | 21 | extended_bbox = [ 22 | x - int(extend_ratio * w), 23 | y - int(extend_ratio * h), 24 | x + w + int(extend_ratio * w), 25 | y + h + int(extend_ratio * h), 26 | ] 27 | 28 | extended_bbox[0] = max(0, extended_bbox[0]) 29 | extended_bbox[1] = max(0, extended_bbox[1]) 30 | extended_bbox[2] = min(W, extended_bbox[2]) 31 | extended_bbox[3] = min(H, extended_bbox[3]) 32 | 33 | top_nodes = np.asfortranarray( 34 | [[x, (x + x + w) // 2, x + w], [y, extended_bbox[1], y]] 35 | ) 36 | down_nodes = np.asfortranarray( 37 | [[x + w, (x + x + w) // 2, x], [y + h, extended_bbox[3], y + h]] 38 | ) 39 | left_nodes = np.asfortranarray( 40 | [[x, extended_bbox[0], x], [y + h, (y + y + h) // 2, y]] 41 | ) 42 | right_nodes = np.asfortranarray( 43 | [[x + w, extended_bbox[2], x + w], [y, (y + y + h) // 2, y + h]] 44 | ) 45 | 46 | top_curve = bezier.Curve(top_nodes, degree=2) 47 | right_curve = bezier.Curve(right_nodes, degree=2) 48 | down_curve = bezier.Curve(down_nodes, degree=2) 49 | left_curve = bezier.Curve(left_nodes, degree=2) 50 | 51 | pt_list = [] 52 | for curve in [top_curve, right_curve, down_curve, left_curve]: 53 | for i in range(1, 20): 54 | pt = curve.evaluate(i * 0.05) 55 | pt_list.append( 56 | ( 57 | int(pt[0, 0] + random.randint(-random_width, random_width)), 58 | int(pt[1, 0] + random.randint(-random_width, random_width)), 59 | ) 60 | ) 61 | cv2.fillPoly(extended_mask, [np.array(pt_list)], 1) 62 | 63 | return extended_mask * 255 64 | 65 | 66 | def mask_paint2bbox(mask, random_drop=0.0): 67 | contours, _ = cv2.findContours( 68 | mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE 69 | ) 70 | x, y, w, h = cv2.boundingRect(contours[0]) 71 | new_mask = np.zeros_like(mask) 72 | # if random_drop > 0 and random.random() < random_drop: 73 | # w = w * (random.random() + 0.5) 74 | # h = h * (random.random() + 0.5) 75 | 76 | cv2.rectangle(new_mask, (x, y), (x + w, y + h), (255, 255, 255), -1) 77 | return new_mask 78 | -------------------------------------------------------------------------------- /utils/orthogonal_decomposition.py: -------------------------------------------------------------------------------- 1 | import re 2 | import torch 3 | 4 | def normalize_spaces(text: str) -> str: 5 | return re.sub(r"\s+", " ", text) 6 | 7 | 8 | def orthogonal_decomposition(raw_emb: torch.Tensor, remove_emb: torch.Tensor) -> torch.Tensor: 9 | projected_vector_magnitude = raw_emb.dot(remove_emb) / remove_emb.norm() 10 | projected_vector = projected_vector_magnitude * remove_emb / remove_emb.norm() 11 | return raw_emb - projected_vector 12 | 13 | 14 | def sks_decompose( 15 | prompt: str, 16 | prompt_emb: torch.Tensor, 17 | to_decopose_embeds: torch.Tensor, 18 | decompose_words_num: int, 19 | prefix_prompt:str = "", 20 | ) -> torch.Tensor: 21 | 22 | prompt = normalize_spaces(prompt.lower().strip()) 23 | prompt_words = prompt.split(" ") 24 | 25 | prefix_prompt = normalize_spaces(prefix_prompt.lower().strip()) 26 | 27 | if prefix_prompt == "": 28 | prefix_prompt_len = 0 + 1 29 | else: 30 | prefix_prompt_len = len(prefix_prompt.split(" ")) + 1 31 | 32 | # get index of "sks" 33 | for i in range(len(prompt_words)): 34 | if prompt_words[i] == "sks": 35 | ind_sks = i + 1 36 | break 37 | else: 38 | raise ValueError(f"Prompt {prompt} does not contain 'sks'") 39 | 40 | # # get index of remove_words 41 | inds_replace = [] 42 | # for word in remove_words: 43 | # word = word.lower() 44 | # for i in range(len(prompt_words)): 45 | # if prompt_words[i] == word: 46 | # inds_replace.append(i + 1) 47 | # break 48 | 49 | # for ind_replace in inds_replace: 50 | # prompt_emb[ind_sks, ...] = orthogonal_decomposition( 51 | # prompt_emb[ind_sks, ...], raw_prompt_embeds[ind_replace, ...] 52 | # ) 53 | 54 | for ind_de in range(prefix_prompt_len, decompose_words_num + prefix_prompt_len): 55 | # for i in range(decompose_words_num): 56 | for ind in range(1, len(prompt_words) + 1): 57 | prompt_emb[ind, ...] = orthogonal_decomposition( 58 | prompt_emb[ind, ...], to_decopose_embeds[ind_de, ...] 59 | ) 60 | 61 | return prompt_emb 62 | --------------------------------------------------------------------------------