├── .github └── workflows │ └── publish.yml ├── .gitignore ├── README.md ├── __init__.py ├── fooocus_expansion ├── config.json ├── merges.txt ├── positive.txt ├── special_tokens_map.json ├── tokenizer.json ├── tokenizer_config.json └── vocab.json ├── py ├── __init__.py ├── api.py ├── extras │ ├── BLIP │ │ ├── configs │ │ │ ├── bert_config.json │ │ │ ├── caption_coco.yaml │ │ │ ├── med_config.json │ │ │ ├── nlvr.yaml │ │ │ ├── nocaps.yaml │ │ │ ├── pretrain.yaml │ │ │ ├── retrieval_coco.yaml │ │ │ ├── retrieval_flickr.yaml │ │ │ ├── retrieval_msrvtt.yaml │ │ │ └── vqa.yaml │ │ └── models │ │ │ ├── bert_tokenizer │ │ │ ├── config.json │ │ │ ├── tokenizer.json │ │ │ ├── tokenizer_config.json │ │ │ └── vocab.txt │ │ │ ├── blip.py │ │ │ ├── blip_itm.py │ │ │ ├── blip_nlvr.py │ │ │ ├── blip_pretrain.py │ │ │ ├── blip_retrieval.py │ │ │ ├── blip_vqa.py │ │ │ ├── med.py │ │ │ ├── nlvr_encoder.py │ │ │ └── vit.py │ ├── expansion.py │ ├── face_crop.py │ ├── facexlib │ │ ├── detection │ │ │ ├── __init__.py │ │ │ ├── align_trans.py │ │ │ ├── matlab_cp2tform.py │ │ │ ├── retinaface.py │ │ │ ├── retinaface_net.py │ │ │ └── retinaface_utils.py │ │ ├── parsing │ │ │ ├── __init__.py │ │ │ ├── bisenet.py │ │ │ ├── parsenet.py │ │ │ └── resnet.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── face_restoration_helper.py │ │ │ ├── face_utils.py │ │ │ └── misc.py │ ├── interrogate.py │ ├── ip_adapter.py │ ├── preprocessors.py │ ├── resampler.py │ ├── vae_interpose.py │ └── wd14tagger.py ├── fooocusNodes.py ├── ldm_patched │ ├── contrib │ │ ├── external.py │ │ ├── external_canny.py │ │ ├── external_clip_sdxl.py │ │ ├── external_compositing.py │ │ ├── external_custom_sampler.py │ │ ├── external_freelunch.py │ │ ├── external_hypernetwork.py │ │ ├── external_hypertile.py │ │ ├── external_images.py │ │ ├── external_latent.py │ │ ├── external_mask.py │ │ ├── external_model_advanced.py │ │ ├── external_model_downscale.py │ │ ├── external_model_merging.py │ │ ├── external_perpneg.py │ │ ├── external_photomaker.py │ │ ├── external_post_processing.py │ │ ├── external_rebatch.py │ │ ├── external_sag.py │ │ ├── external_sdupscale.py │ │ ├── external_stable3d.py │ │ ├── external_tomesd.py │ │ ├── external_upscale_model.py │ │ └── external_video_model.py │ ├── controlnet │ │ └── cldm.py │ ├── k_diffusion │ │ ├── sampling.py │ │ └── utils.py │ ├── ldm │ │ ├── models │ │ │ └── autoencoder.py │ │ ├── modules │ │ │ ├── attention.py │ │ │ ├── diffusionmodules │ │ │ │ ├── __init__.py │ │ │ │ ├── model.py │ │ │ │ ├── openaimodel.py │ │ │ │ ├── upscaling.py │ │ │ │ └── util.py │ │ │ ├── distributions │ │ │ │ ├── __init__.py │ │ │ │ └── distributions.py │ │ │ ├── ema.py │ │ │ ├── encoders │ │ │ │ ├── __init__.py │ │ │ │ └── noise_aug_modules.py │ │ │ ├── sub_quadratic_attention.py │ │ │ └── temporal_ae.py │ │ └── util.py │ ├── modules │ │ ├── args_parser.py │ │ ├── checkpoint_pickle.py │ │ ├── clip_config_bigg.json │ │ ├── clip_model.py │ │ ├── clip_vision.py │ │ ├── clip_vision_config_g.json │ │ ├── clip_vision_config_h.json │ │ ├── clip_vision_config_vitl.json │ │ ├── conds.py │ │ ├── controlnet.py │ │ ├── diffusers_convert.py │ │ ├── diffusers_load.py │ │ ├── gligen.py │ │ ├── latent_formats.py │ │ ├── lora.py │ │ ├── model_base.py │ │ ├── model_detection.py │ │ ├── model_patcher.py │ │ ├── model_sampling.py │ │ ├── ops.py │ │ ├── options.py │ │ ├── sample.py │ │ ├── samplers.py │ │ ├── sd.py │ │ ├── sd1_clip.py │ │ ├── sd1_clip_config.json │ │ ├── sd1_tokenizer │ │ │ ├── merges.txt │ │ │ ├── special_tokens_map.json │ │ │ ├── tokenizer_config.json │ │ │ └── vocab.json │ │ ├── sd2_clip.py │ │ ├── sd2_clip_config.json │ │ ├── sdxl_clip.py │ │ ├── supported_models.py │ │ ├── supported_models_base.py │ │ └── utils.py │ ├── pfn │ │ ├── __init__.py │ │ ├── architecture │ │ │ ├── DAT.py │ │ │ ├── HAT.py │ │ │ ├── LICENSE-DAT │ │ │ ├── LICENSE-ESRGAN │ │ │ ├── LICENSE-HAT │ │ │ ├── LICENSE-RealESRGAN │ │ │ ├── LICENSE-SCUNet │ │ │ ├── LICENSE-SPSR │ │ │ ├── LICENSE-SwiftSRGAN │ │ │ ├── LICENSE-Swin2SR │ │ │ ├── LICENSE-SwinIR │ │ │ ├── LICENSE-lama │ │ │ ├── LaMa.py │ │ │ ├── OmniSR │ │ │ │ ├── ChannelAttention.py │ │ │ │ ├── LICENSE │ │ │ │ ├── OSA.py │ │ │ │ ├── OSAG.py │ │ │ │ ├── OmniSR.py │ │ │ │ ├── esa.py │ │ │ │ ├── layernorm.py │ │ │ │ └── pixelshuffle.py │ │ │ ├── RRDB.py │ │ │ ├── SCUNet.py │ │ │ ├── SPSR.py │ │ │ ├── SRVGG.py │ │ │ ├── SwiftSRGAN.py │ │ │ ├── Swin2SR.py │ │ │ ├── SwinIR.py │ │ │ ├── __init__.py │ │ │ ├── block.py │ │ │ ├── face │ │ │ │ ├── LICENSE-GFPGAN │ │ │ │ ├── LICENSE-RestoreFormer │ │ │ │ ├── LICENSE-codeformer │ │ │ │ ├── arcface_arch.py │ │ │ │ ├── codeformer.py │ │ │ │ ├── fused_act.py │ │ │ │ ├── gfpgan_bilinear_arch.py │ │ │ │ ├── gfpganv1_arch.py │ │ │ │ ├── gfpganv1_clean_arch.py │ │ │ │ ├── restoreformer_arch.py │ │ │ │ ├── stylegan2_arch.py │ │ │ │ ├── stylegan2_bilinear_arch.py │ │ │ │ ├── stylegan2_clean_arch.py │ │ │ │ └── upfirdn2d.py │ │ │ └── timm │ │ │ │ ├── LICENSE │ │ │ │ ├── drop.py │ │ │ │ ├── helpers.py │ │ │ │ └── weight_init.py │ │ ├── model_loading.py │ │ └── types.py │ ├── t2ia │ │ └── adapter.py │ ├── taesd │ │ └── taesd.py │ ├── unipc │ │ └── uni_pc.py │ └── utils │ │ ├── latent_visualization.py │ │ └── path_utils.py ├── libs │ └── utils.py ├── log.py ├── modules │ ├── __init__.py │ ├── advanced_parameters.py │ ├── anisotropic.py │ ├── config.py │ ├── constants.py │ ├── core.py │ ├── default_pipeline.py │ ├── flags.py │ ├── inpaint_worker.py │ ├── lora.py │ ├── model_loader.py │ ├── ops.py │ ├── patch.py │ ├── patch_clip.py │ ├── patch_precision.py │ ├── sample_hijack.py │ ├── sdxl_styles.py │ ├── upscaler.py │ └── util.py └── prompt.py ├── pyproject.toml ├── requirements.txt ├── screnshot ├── Fooocus.png └── FooocusNodes.png ├── sdxl_styles ├── sdxl_styles_diva.json ├── sdxl_styles_fooocus.json ├── sdxl_styles_marc_k3nt3l.json ├── sdxl_styles_mre.json ├── sdxl_styles_sai.json └── sdxl_styles_twri.json ├── web ├── js │ ├── dynamic_widgets.js │ ├── interface.js │ └── style_selector.js └── lib │ └── fabric.js ├── wildcards ├── artist.txt ├── color.txt ├── color_flower.txt ├── extended-color.txt ├── flower.txt └── nationality.txt └── workflow ├── basic.json ├── controlnet.json ├── detailer_fix.json ├── fooocus_describe.json ├── fooocus_prompt_expansion.json ├── imagePrompt_faceSwap.json ├── inpaint_outpaint.json ├── ipadapter_plus_style_transfer.json ├── screenshot.png └── upscale.json /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to Comfy registry 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - "pyproject.toml" 9 | 10 | jobs: 11 | publish-node: 12 | name: Publish Custom Node to registry 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Check out code 16 | uses: actions/checkout@v4 17 | - name: Publish Custom Node 18 | uses: Comfy-Org/publish-node-action@main 19 | with: 20 | ## Add your own personal access token to your Github Repository secrets and reference it here. 21 | personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | /venv 3 | .vscode 4 | *.ckpt 5 | *.safetensors 6 | *.pth 7 | types 8 | *.pyc 9 | .DS_Store 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ComfyUI Fooocus Nodes 2 | 3 | # Installation 4 | 5 | 1. Clone the repository: 6 | `git clone https://github.com/Seedsa/Fooocus_Nodes.git` 7 | to your ComfyUI `custom_nodes` directory 8 | 9 | # Update 10 | 11 | 1. Navigate to the cloned repo e.g. `custom_nodes/Fooocus_Nodes` 12 | 2. `git pull` 13 | 14 | # Comparisons 15 | 16 | reproduce the same images generated from Fooocus on ComfyUI 17 | 18 | ![ComfyUIFooocusNodes](screnshot/FooocusNodes.png) 19 | 20 | ![Fooocus](screnshot/Fooocus.png) 21 | 22 | # Features 23 | 24 | - [x] Fooocus Txt2image&Img2img 25 | - [x] Fooocus Inpaint&Outpaint 26 | - [x] Fooocus Upscale 27 | - [x] Fooocus ImagePrompt&FaceSwap 28 | - [x] Fooocus Canny&CPDS 29 | - [x] Fooocus Styles&PromptExpansion 30 | - [x] Fooocus DeftailerFix 31 | - [x] Fooocus Describe 32 | 33 | # Example Workflows 34 | 35 | [example workflows](./workflow/) 36 | 37 | ![basic](/workflow/screenshot.png) 38 | 39 | ## Credits 40 | 41 | - [Fooocus](https://github.com/lllyasviel/Fooocus) 42 | - [ComfyUI-Easy-Use](https://github.com/yolain/ComfyUI-Easy-Use) 43 | - [ComfyUI](https://github.com/comfyanonymous/ComfyUI) 44 | 45 | # Acknowledgments 46 | 47 | This project builds upon and extends the original work found at [ComfyUI_Fooocus](https://github.com/17Retoucher/ComfyUI_Fooocus). 48 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module initializes and sets up the Fooocus extension for ComfyUI. 3 | It handles folder creation, file downloads, and node mapping for the extension. 4 | """ 5 | 6 | import os 7 | import importlib 8 | import shutil 9 | import folder_paths 10 | import filecmp 11 | def add_folder_path_and_extensions(folder_name, full_folder_paths, extensions): 12 | for full_folder_path in full_folder_paths: 13 | folder_paths.add_model_folder_path(folder_name, full_folder_path) 14 | if folder_name in folder_paths.folder_names_and_paths: 15 | current_paths, current_extensions = folder_paths.folder_names_and_paths[folder_name] 16 | updated_extensions = current_extensions | extensions 17 | folder_paths.folder_names_and_paths[folder_name] = (current_paths, updated_extensions) 18 | else: 19 | folder_paths.folder_names_and_paths[folder_name] = (full_folder_paths, extensions) 20 | 21 | model_path = folder_paths.models_dir 22 | add_folder_path_and_extensions("ultralytics_bbox", [os.path.join(model_path, "ultralytics", "bbox")], folder_paths.supported_pt_extensions) 23 | add_folder_path_and_extensions("ultralytics_segm", [os.path.join(model_path, "ultralytics", "segm")], folder_paths.supported_pt_extensions) 24 | add_folder_path_and_extensions("ultralytics", [os.path.join(model_path, "ultralytics")], folder_paths.supported_pt_extensions) 25 | add_folder_path_and_extensions("sams", [os.path.join(model_path, "sams")], folder_paths.supported_pt_extensions) 26 | add_folder_path_and_extensions("ipadapter", [os.path.join(model_path, "ipadapter")], folder_paths.supported_pt_extensions) 27 | add_folder_path_and_extensions("inpaint", [os.path.join(model_path, "inpaint")], folder_paths.supported_pt_extensions) 28 | add_folder_path_and_extensions("fooocus_expansion", [os.path.join(model_path, "fooocus_expansion")], folder_paths.supported_pt_extensions) 29 | 30 | from .py.modules.model_loader import load_file_from_url 31 | from .py.modules.config import ( 32 | path_fooocus_expansion as fooocus_expansion_path, 33 | ) 34 | from .py import log 35 | 36 | 37 | node_list = [ 38 | "api", 39 | "fooocusNodes", 40 | "prompt" 41 | ] 42 | 43 | 44 | NODE_CLASS_MAPPINGS = {} 45 | NODE_DISPLAY_NAME_MAPPINGS = {} 46 | for module_name in node_list: 47 | imported_module = importlib.import_module( 48 | ".py.{}".format(module_name), __name__) 49 | NODE_CLASS_MAPPINGS = {**NODE_CLASS_MAPPINGS, 50 | **imported_module.NODE_CLASS_MAPPINGS} 51 | NODE_DISPLAY_NAME_MAPPINGS = { 52 | **NODE_DISPLAY_NAME_MAPPINGS, **imported_module.NODE_DISPLAY_NAME_MAPPINGS} 53 | 54 | 55 | WEB_DIRECTORY = "./web" 56 | 57 | 58 | def recursive_overwrite(src, dest, ignore=None): 59 | if os.path.isdir(src): 60 | if not os.path.isdir(dest): 61 | os.makedirs(dest) 62 | files = os.listdir(src) 63 | if ignore is not None: 64 | ignored = ignore(src, files) 65 | else: 66 | ignored = set() 67 | for f in files: 68 | if f not in ignored: 69 | recursive_overwrite(os.path.join(src, f), 70 | os.path.join(dest, f), 71 | ignore) 72 | else: 73 | if not os.path.exists(dest) or not filecmp.cmp(src, dest): 74 | shutil.copyfile(src, dest) 75 | log.log_node_info(f'Copying file from {src} to {dest}') 76 | 77 | def get_ext_dir(subpath=None, mkdir=False): 78 | dir = os.path.dirname(__file__) 79 | if subpath is not None: 80 | dir = os.path.join(dir, subpath) 81 | dir = os.path.abspath(dir) 82 | if mkdir and not os.path.exists(dir): 83 | os.makedirs(dir) 84 | return dir 85 | 86 | 87 | def install_expansion(): 88 | src_dir = get_ext_dir("fooocus_expansion") 89 | if not os.path.exists(src_dir): 90 | log.log_node_error( 91 | "prompt_expansion is not exists. Please reinstall the extension.") 92 | return 93 | if not os.path.exists(fooocus_expansion_path): 94 | os.makedirs(fooocus_expansion_path) 95 | recursive_overwrite(src_dir, fooocus_expansion_path) 96 | 97 | 98 | def download_models(): 99 | vae_approx_filenames = [ 100 | ('xlvaeapp.pth', 'https://huggingface.co/lllyasviel/misc/resolve/main/xlvaeapp.pth'), 101 | ('vaeapp_sd15.pth', 102 | 'https://huggingface.co/lllyasviel/misc/resolve/main/vaeapp_sd15.pt'), 103 | ('xl-to-v1_interposer-v3.1.safetensors', 104 | 'https://huggingface.co/lllyasviel/misc/resolve/main/xl-to-v1_interposer-v3.1.safetensors') 105 | ] 106 | 107 | for file_name, url in vae_approx_filenames: 108 | load_file_from_url( 109 | url=url, model_dir="vae_approx", file_name=file_name) 110 | 111 | install_expansion() 112 | load_file_from_url( 113 | url='https://huggingface.co/lllyasviel/misc/resolve/main/fooocus_expansion.bin', 114 | model_dir="fooocus_expansion", 115 | file_name='pytorch_model.bin' 116 | ) 117 | 118 | 119 | download_models() 120 | 121 | __all__ = ['NODE_CLASS_MAPPINGS', 122 | 'NODE_DISPLAY_NAME_MAPPINGS', "WEB_DIRECTORY"] 123 | print("\033[0m\033[95m ComfyUI Fooocus Nodes :\033[0m \033[32mloaded\033[0m") 124 | -------------------------------------------------------------------------------- /fooocus_expansion/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "gpt2", 3 | "activation_function": "gelu_new", 4 | "architectures": [ 5 | "GPT2LMHeadModel" 6 | ], 7 | "attn_pdrop": 0.1, 8 | "bos_token_id": 50256, 9 | "embd_pdrop": 0.1, 10 | "eos_token_id": 50256, 11 | "pad_token_id": 50256, 12 | "initializer_range": 0.02, 13 | "layer_norm_epsilon": 1e-05, 14 | "model_type": "gpt2", 15 | "n_ctx": 1024, 16 | "n_embd": 768, 17 | "n_head": 12, 18 | "n_inner": null, 19 | "n_layer": 12, 20 | "n_positions": 1024, 21 | "reorder_and_upcast_attn": false, 22 | "resid_pdrop": 0.1, 23 | "scale_attn_by_inverse_layer_idx": false, 24 | "scale_attn_weights": true, 25 | "summary_activation": null, 26 | "summary_first_dropout": 0.1, 27 | "summary_proj_to_labels": true, 28 | "summary_type": "cls_index", 29 | "summary_use_proj": true, 30 | "task_specific_params": { 31 | "text-generation": { 32 | "do_sample": true, 33 | "max_length": 50 34 | } 35 | }, 36 | "torch_dtype": "float32", 37 | "transformers_version": "4.23.0.dev0", 38 | "use_cache": true, 39 | "vocab_size": 50257 40 | } 41 | -------------------------------------------------------------------------------- /fooocus_expansion/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": "<|endoftext|>", 3 | "eos_token": "<|endoftext|>", 4 | "unk_token": "<|endoftext|>" 5 | } 6 | -------------------------------------------------------------------------------- /fooocus_expansion/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_prefix_space": false, 3 | "bos_token": "<|endoftext|>", 4 | "eos_token": "<|endoftext|>", 5 | "model_max_length": 1024, 6 | "name_or_path": "gpt2", 7 | "special_tokens_map_file": null, 8 | "tokenizer_class": "GPT2Tokenizer", 9 | "unk_token": "<|endoftext|>" 10 | } 11 | -------------------------------------------------------------------------------- /py/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/__init__.py -------------------------------------------------------------------------------- /py/api.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import sys 4 | 5 | 6 | modules_path = os.path.dirname(os.path.realpath(__file__)) 7 | sys.path.append(modules_path) 8 | from server import PromptServer 9 | from modules.sdxl_styles import legal_style_names 10 | 11 | try: 12 | import aiohttp 13 | from aiohttp import web 14 | except ImportError: 15 | print("Module 'aiohttp' not installed. Please install it via:") 16 | print("pip install aiohttp") 17 | sys.exit() 18 | 19 | 20 | @PromptServer.instance.routes.get("/fooocus/prompt/styles") 21 | async def getStylesList(request): 22 | if "name" in request.rel_url.query: 23 | name = request.rel_url.query["name"] 24 | return web.json_response(legal_style_names) 25 | 26 | 27 | NODE_CLASS_MAPPINGS = {} 28 | NODE_DISPLAY_NAME_MAPPINGS = {} 29 | -------------------------------------------------------------------------------- /py/extras/BLIP/configs/bert_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertModel" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "hidden_act": "gelu", 7 | "hidden_dropout_prob": 0.1, 8 | "hidden_size": 768, 9 | "initializer_range": 0.02, 10 | "intermediate_size": 3072, 11 | "layer_norm_eps": 1e-12, 12 | "max_position_embeddings": 512, 13 | "model_type": "bert", 14 | "num_attention_heads": 12, 15 | "num_hidden_layers": 12, 16 | "pad_token_id": 0, 17 | "type_vocab_size": 2, 18 | "vocab_size": 30522, 19 | "encoder_width": 768, 20 | "add_cross_attention": true 21 | } 22 | -------------------------------------------------------------------------------- /py/extras/BLIP/configs/caption_coco.yaml: -------------------------------------------------------------------------------- 1 | image_root: '/export/share/datasets/vision/coco/images/' 2 | ann_root: 'annotation' 3 | coco_gt_root: 'annotation/coco_gt' 4 | 5 | # set pretrained as a file path or an url 6 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth' 7 | 8 | # size of vit model; base or large 9 | vit: 'base' 10 | vit_grad_ckpt: False 11 | vit_ckpt_layer: 0 12 | batch_size: 32 13 | init_lr: 1e-5 14 | 15 | # vit: 'large' 16 | # vit_grad_ckpt: True 17 | # vit_ckpt_layer: 5 18 | # batch_size: 16 19 | # init_lr: 2e-6 20 | 21 | image_size: 384 22 | 23 | # generation configs 24 | max_length: 20 25 | min_length: 5 26 | num_beams: 3 27 | prompt: 'a picture of ' 28 | 29 | # optimizer 30 | weight_decay: 0.05 31 | min_lr: 0 32 | max_epoch: 5 33 | 34 | -------------------------------------------------------------------------------- /py/extras/BLIP/configs/med_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertModel" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "hidden_act": "gelu", 7 | "hidden_dropout_prob": 0.1, 8 | "hidden_size": 768, 9 | "initializer_range": 0.02, 10 | "intermediate_size": 3072, 11 | "layer_norm_eps": 1e-12, 12 | "max_position_embeddings": 512, 13 | "model_type": "bert", 14 | "num_attention_heads": 12, 15 | "num_hidden_layers": 12, 16 | "pad_token_id": 0, 17 | "type_vocab_size": 2, 18 | "vocab_size": 30524, 19 | "encoder_width": 768, 20 | "add_cross_attention": true 21 | } 22 | -------------------------------------------------------------------------------- /py/extras/BLIP/configs/nlvr.yaml: -------------------------------------------------------------------------------- 1 | image_root: '/export/share/datasets/vision/NLVR2/' 2 | ann_root: 'annotation' 3 | 4 | # set pretrained as a file path or an url 5 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_nlvr.pth' 6 | 7 | #size of vit model; base or large 8 | vit: 'base' 9 | batch_size_train: 16 10 | batch_size_test: 64 11 | vit_grad_ckpt: False 12 | vit_ckpt_layer: 0 13 | max_epoch: 15 14 | 15 | image_size: 384 16 | 17 | # optimizer 18 | weight_decay: 0.05 19 | init_lr: 3e-5 20 | min_lr: 0 21 | 22 | -------------------------------------------------------------------------------- /py/extras/BLIP/configs/nocaps.yaml: -------------------------------------------------------------------------------- 1 | image_root: '/export/share/datasets/vision/nocaps/' 2 | ann_root: 'annotation' 3 | 4 | # set pretrained as a file path or an url 5 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth' 6 | 7 | vit: 'base' 8 | batch_size: 32 9 | 10 | image_size: 384 11 | 12 | max_length: 20 13 | min_length: 5 14 | num_beams: 3 15 | prompt: 'a picture of ' -------------------------------------------------------------------------------- /py/extras/BLIP/configs/pretrain.yaml: -------------------------------------------------------------------------------- 1 | train_file: ['/export/share/junnan-li/VL_pretrain/annotation/coco_karpathy_train.json', 2 | '/export/share/junnan-li/VL_pretrain/annotation/vg_caption.json', 3 | ] 4 | laion_path: '' 5 | 6 | # size of vit model; base or large 7 | vit: 'base' 8 | vit_grad_ckpt: False 9 | vit_ckpt_layer: 0 10 | 11 | image_size: 224 12 | batch_size: 75 13 | 14 | queue_size: 57600 15 | alpha: 0.4 16 | 17 | # optimizer 18 | weight_decay: 0.05 19 | init_lr: 3e-4 20 | min_lr: 1e-6 21 | warmup_lr: 1e-6 22 | lr_decay_rate: 0.9 23 | max_epoch: 20 24 | warmup_steps: 3000 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /py/extras/BLIP/configs/retrieval_coco.yaml: -------------------------------------------------------------------------------- 1 | image_root: '/export/share/datasets/vision/coco/images/' 2 | ann_root: 'annotation' 3 | dataset: 'coco' 4 | 5 | # set pretrained as a file path or an url 6 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth' 7 | 8 | # size of vit model; base or large 9 | 10 | vit: 'base' 11 | batch_size_train: 32 12 | batch_size_test: 64 13 | vit_grad_ckpt: True 14 | vit_ckpt_layer: 4 15 | init_lr: 1e-5 16 | 17 | # vit: 'large' 18 | # batch_size_train: 16 19 | # batch_size_test: 32 20 | # vit_grad_ckpt: True 21 | # vit_ckpt_layer: 12 22 | # init_lr: 5e-6 23 | 24 | image_size: 384 25 | queue_size: 57600 26 | alpha: 0.4 27 | k_test: 256 28 | negative_all_rank: True 29 | 30 | # optimizer 31 | weight_decay: 0.05 32 | min_lr: 0 33 | max_epoch: 6 34 | 35 | -------------------------------------------------------------------------------- /py/extras/BLIP/configs/retrieval_flickr.yaml: -------------------------------------------------------------------------------- 1 | image_root: '/export/share/datasets/vision/flickr30k/' 2 | ann_root: 'annotation' 3 | dataset: 'flickr' 4 | 5 | # set pretrained as a file path or an url 6 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_flickr.pth' 7 | 8 | # size of vit model; base or large 9 | 10 | vit: 'base' 11 | batch_size_train: 32 12 | batch_size_test: 64 13 | vit_grad_ckpt: True 14 | vit_ckpt_layer: 4 15 | init_lr: 1e-5 16 | 17 | # vit: 'large' 18 | # batch_size_train: 16 19 | # batch_size_test: 32 20 | # vit_grad_ckpt: True 21 | # vit_ckpt_layer: 10 22 | # init_lr: 5e-6 23 | 24 | image_size: 384 25 | queue_size: 57600 26 | alpha: 0.4 27 | k_test: 128 28 | negative_all_rank: False 29 | 30 | # optimizer 31 | weight_decay: 0.05 32 | min_lr: 0 33 | max_epoch: 6 34 | 35 | -------------------------------------------------------------------------------- /py/extras/BLIP/configs/retrieval_msrvtt.yaml: -------------------------------------------------------------------------------- 1 | video_root: '/export/share/dongxuli/data/msrvtt_retrieval/videos' 2 | ann_root: 'annotation' 3 | 4 | # set pretrained as a file path or an url 5 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth' 6 | 7 | # size of vit model; base or large 8 | vit: 'base' 9 | batch_size: 64 10 | k_test: 128 11 | image_size: 384 12 | num_frm_test: 8 -------------------------------------------------------------------------------- /py/extras/BLIP/configs/vqa.yaml: -------------------------------------------------------------------------------- 1 | vqa_root: '/export/share/datasets/vision/VQA/Images/mscoco/' #followed by train2014/ 2 | vg_root: '/export/share/datasets/vision/visual-genome/' #followed by image/ 3 | train_files: ['vqa_train','vqa_val','vg_qa'] 4 | ann_root: 'annotation' 5 | 6 | # set pretrained as a file path or an url 7 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth' 8 | 9 | # size of vit model; base or large 10 | vit: 'base' 11 | batch_size_train: 16 12 | batch_size_test: 32 13 | vit_grad_ckpt: False 14 | vit_ckpt_layer: 0 15 | init_lr: 2e-5 16 | 17 | image_size: 480 18 | 19 | k_test: 128 20 | inference: 'rank' 21 | 22 | # optimizer 23 | weight_decay: 0.05 24 | min_lr: 0 25 | max_epoch: 10 -------------------------------------------------------------------------------- /py/extras/BLIP/models/bert_tokenizer/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "BertForMaskedLM" 4 | ], 5 | "attention_probs_dropout_prob": 0.1, 6 | "gradient_checkpointing": false, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "initializer_range": 0.02, 11 | "intermediate_size": 3072, 12 | "layer_norm_eps": 1e-12, 13 | "max_position_embeddings": 512, 14 | "model_type": "bert", 15 | "num_attention_heads": 12, 16 | "num_hidden_layers": 12, 17 | "pad_token_id": 0, 18 | "position_embedding_type": "absolute", 19 | "transformers_version": "4.6.0.dev0", 20 | "type_vocab_size": 2, 21 | "use_cache": true, 22 | "vocab_size": 30522 23 | } 24 | -------------------------------------------------------------------------------- /py/extras/BLIP/models/bert_tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "do_lower_case": true 3 | } 4 | -------------------------------------------------------------------------------- /py/extras/BLIP/models/blip_itm.py: -------------------------------------------------------------------------------- 1 | from extras.BLIP.models.med import BertConfig, BertModel 2 | from transformers import BertTokenizer 3 | 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | 8 | from extras.BLIP.models.blip import create_vit, init_tokenizer, load_checkpoint 9 | 10 | class BLIP_ITM(nn.Module): 11 | def __init__(self, 12 | med_config = 'configs/med_config.json', 13 | image_size = 384, 14 | vit = 'base', 15 | vit_grad_ckpt = False, 16 | vit_ckpt_layer = 0, 17 | embed_dim = 256, 18 | ): 19 | """ 20 | Args: 21 | med_config (str): path for the mixture of encoder-decoder model's configuration file 22 | image_size (int): input image size 23 | vit (str): model size of vision transformer 24 | """ 25 | super().__init__() 26 | 27 | self.visual_encoder, vision_width = create_vit(vit,image_size, vit_grad_ckpt, vit_ckpt_layer) 28 | self.tokenizer = init_tokenizer() 29 | med_config = BertConfig.from_json_file(med_config) 30 | med_config.encoder_width = vision_width 31 | self.text_encoder = BertModel(config=med_config, add_pooling_layer=False) 32 | 33 | text_width = self.text_encoder.config.hidden_size 34 | 35 | self.vision_proj = nn.Linear(vision_width, embed_dim) 36 | self.text_proj = nn.Linear(text_width, embed_dim) 37 | 38 | self.itm_head = nn.Linear(text_width, 2) 39 | 40 | 41 | def forward(self, image, caption, match_head='itm'): 42 | 43 | image_embeds = self.visual_encoder(image) 44 | image_atts = torch.ones(image_embeds.size()[:-1],dtype=torch.long).to(image.device) 45 | 46 | text = self.tokenizer(caption, padding='max_length', truncation=True, max_length=35, 47 | return_tensors="pt").to(image.device) 48 | 49 | 50 | if match_head=='itm': 51 | output = self.text_encoder(text.input_ids, 52 | attention_mask = text.attention_mask, 53 | encoder_hidden_states = image_embeds, 54 | encoder_attention_mask = image_atts, 55 | return_dict = True, 56 | ) 57 | itm_output = self.itm_head(output.last_hidden_state[:,0,:]) 58 | return itm_output 59 | 60 | elif match_head=='itc': 61 | text_output = self.text_encoder(text.input_ids, attention_mask = text.attention_mask, 62 | return_dict = True, mode = 'text') 63 | image_feat = F.normalize(self.vision_proj(image_embeds[:,0,:]),dim=-1) 64 | text_feat = F.normalize(self.text_proj(text_output.last_hidden_state[:,0,:]),dim=-1) 65 | 66 | sim = image_feat @ text_feat.t() 67 | return sim 68 | 69 | 70 | def blip_itm(pretrained='',**kwargs): 71 | model = BLIP_ITM(**kwargs) 72 | if pretrained: 73 | model,msg = load_checkpoint(model,pretrained) 74 | assert(len(msg.missing_keys)==0) 75 | return model 76 | -------------------------------------------------------------------------------- /py/extras/BLIP/models/blip_nlvr.py: -------------------------------------------------------------------------------- 1 | from extras.BLIP.models.med import BertConfig 2 | from extras.BLIP.models.nlvr_encoder import BertModel 3 | from extras.BLIP.models.vit import interpolate_pos_embed 4 | from extras.BLIP.models.blip import create_vit, init_tokenizer, is_url 5 | 6 | from timm.models.hub import download_cached_file 7 | 8 | import torch 9 | from torch import nn 10 | import torch.nn.functional as F 11 | from transformers import BertTokenizer 12 | import numpy as np 13 | import os 14 | 15 | 16 | class BLIP_NLVR(nn.Module): 17 | def __init__(self, 18 | med_config = 'configs/med_config.json', 19 | image_size = 480, 20 | vit = 'base', 21 | vit_grad_ckpt = False, 22 | vit_ckpt_layer = 0, 23 | ): 24 | """ 25 | Args: 26 | med_config (str): path for the mixture of encoder-decoder model's configuration file 27 | image_size (int): input image size 28 | vit (str): model size of vision transformer 29 | """ 30 | super().__init__() 31 | 32 | self.visual_encoder, vision_width = create_vit(vit,image_size, vit_grad_ckpt, vit_ckpt_layer, drop_path_rate=0.1) 33 | self.tokenizer = init_tokenizer() 34 | med_config = BertConfig.from_json_file(med_config) 35 | med_config.encoder_width = vision_width 36 | self.text_encoder = BertModel(config=med_config, add_pooling_layer=False) 37 | 38 | self.cls_head = nn.Sequential( 39 | nn.Linear(self.text_encoder.config.hidden_size, self.text_encoder.config.hidden_size), 40 | nn.ReLU(), 41 | nn.Linear(self.text_encoder.config.hidden_size, 2) 42 | ) 43 | 44 | def forward(self, image, text, targets, train=True): 45 | 46 | image_embeds = self.visual_encoder(image) 47 | image_atts = torch.ones(image_embeds.size()[:-1],dtype=torch.long).to(image.device) 48 | image0_embeds, image1_embeds = torch.split(image_embeds,targets.size(0)) 49 | 50 | text = self.tokenizer(text, padding='longest', return_tensors="pt").to(image.device) 51 | text.input_ids[:,0] = self.tokenizer.enc_token_id 52 | 53 | output = self.text_encoder(text.input_ids, 54 | attention_mask = text.attention_mask, 55 | encoder_hidden_states = [image0_embeds,image1_embeds], 56 | encoder_attention_mask = [image_atts[:image0_embeds.size(0)], 57 | image_atts[image0_embeds.size(0):]], 58 | return_dict = True, 59 | ) 60 | hidden_state = output.last_hidden_state[:,0,:] 61 | prediction = self.cls_head(hidden_state) 62 | 63 | if train: 64 | loss = F.cross_entropy(prediction, targets) 65 | return loss 66 | else: 67 | return prediction 68 | 69 | def blip_nlvr(pretrained='',**kwargs): 70 | model = BLIP_NLVR(**kwargs) 71 | if pretrained: 72 | model,msg = load_checkpoint(model,pretrained) 73 | print("missing keys:") 74 | print(msg.missing_keys) 75 | return model 76 | 77 | 78 | def load_checkpoint(model,url_or_filename): 79 | if is_url(url_or_filename): 80 | cached_file = download_cached_file(url_or_filename, check_hash=False, progress=True) 81 | checkpoint = torch.load(cached_file, map_location='cpu', weights_only=True) 82 | elif os.path.isfile(url_or_filename): 83 | checkpoint = torch.load(url_or_filename, map_location='cpu', weights_only=True) 84 | else: 85 | raise RuntimeError('checkpoint url or path is invalid') 86 | state_dict = checkpoint['model'] 87 | 88 | state_dict['visual_encoder.pos_embed'] = interpolate_pos_embed(state_dict['visual_encoder.pos_embed'],model.visual_encoder) 89 | 90 | for key in list(state_dict.keys()): 91 | if 'crossattention.self.' in key: 92 | new_key0 = key.replace('self','self0') 93 | new_key1 = key.replace('self','self1') 94 | state_dict[new_key0] = state_dict[key] 95 | state_dict[new_key1] = state_dict[key] 96 | elif 'crossattention.output.dense.' in key: 97 | new_key0 = key.replace('dense','dense0') 98 | new_key1 = key.replace('dense','dense1') 99 | state_dict[new_key0] = state_dict[key] 100 | state_dict[new_key1] = state_dict[key] 101 | 102 | msg = model.load_state_dict(state_dict,strict=False) 103 | print('load checkpoint from %s'%url_or_filename) 104 | return model,msg 105 | -------------------------------------------------------------------------------- /py/extras/expansion.py: -------------------------------------------------------------------------------- 1 | # Fooocus GPT2 Expansion 2 | # Algorithm created by Lvmin Zhang at 2023, Stanford 3 | # If used inside Fooocus, any use is permitted. 4 | # If used outside Fooocus, only non-commercial use is permitted (CC-By NC 4.0). 5 | # This applies to the word list, vocab, model, and algorithm. 6 | 7 | 8 | import os 9 | import torch 10 | import math 11 | import comfy.model_management as model_management 12 | 13 | from transformers.generation.logits_process import LogitsProcessorList 14 | from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed 15 | from modules.config import path_fooocus_expansion 16 | from ldm_patched.modules.model_patcher import FooocusModelPatcher 17 | 18 | 19 | # limitation of np.random.seed(), called from transformers.set_seed() 20 | SEED_LIMIT_NUMPY = 2**32 21 | neg_inf = - 8192.0 22 | 23 | 24 | def safe_str(x): 25 | x = str(x) 26 | for _ in range(16): 27 | x = x.replace(' ', ' ') 28 | return x.strip(",. \r\n") 29 | 30 | 31 | def remove_pattern(x, pattern): 32 | for p in pattern: 33 | x = x.replace(p, '') 34 | return x 35 | 36 | 37 | class FooocusExpansion: 38 | def __init__(self): 39 | self.tokenizer = AutoTokenizer.from_pretrained(path_fooocus_expansion) 40 | 41 | positive_words = open(os.path.join(path_fooocus_expansion, 'positive.txt'), 42 | encoding='utf-8').read().splitlines() 43 | positive_words = ['Ġ' + x.lower() for x in positive_words if x != ''] 44 | 45 | self.logits_bias = torch.zeros((1, len(self.tokenizer.vocab)), dtype=torch.float32) + neg_inf 46 | 47 | debug_list = [] 48 | for k, v in self.tokenizer.vocab.items(): 49 | if k in positive_words: 50 | self.logits_bias[0, v] = 0 51 | debug_list.append(k[1:]) 52 | 53 | print(f'Fooocus V2 Expansion: Vocab with {len(debug_list)} words.') 54 | 55 | # debug_list = '\n'.join(sorted(debug_list)) 56 | # print(debug_list) 57 | 58 | # t11 = self.tokenizer(',', return_tensors="np") 59 | # t198 = self.tokenizer('\n', return_tensors="np") 60 | # eos = self.tokenizer.eos_token_id 61 | 62 | self.model = AutoModelForCausalLM.from_pretrained(path_fooocus_expansion) 63 | self.model.eval() 64 | 65 | load_device = model_management.text_encoder_device() 66 | offload_device = model_management.text_encoder_offload_device() 67 | 68 | # MPS hack 69 | if model_management.is_device_mps(load_device): 70 | load_device = torch.device('cpu') 71 | offload_device = torch.device('cpu') 72 | 73 | use_fp16 = model_management.should_use_fp16(device=load_device) 74 | 75 | if use_fp16: 76 | self.model.half() 77 | 78 | self.patcher = FooocusModelPatcher(self.model, load_device=load_device, offload_device=offload_device) 79 | print(f'Fooocus Expansion engine loaded for {load_device}, use_fp16 = {use_fp16}.') 80 | 81 | @torch.no_grad() 82 | @torch.inference_mode() 83 | def logits_processor(self, input_ids, scores): 84 | assert scores.ndim == 2 and scores.shape[0] == 1 85 | self.logits_bias = self.logits_bias.to(scores) 86 | 87 | bias = self.logits_bias.clone() 88 | bias[0, input_ids[0].to(bias.device).long()] = neg_inf 89 | bias[0, 11] = 0 90 | 91 | return scores + bias 92 | 93 | @torch.no_grad() 94 | @torch.inference_mode() 95 | def __call__(self, prompt, seed): 96 | if prompt == '': 97 | return '' 98 | 99 | if self.patcher.current_loaded_device() != self.patcher.load_device: 100 | print('Fooocus Expansion loaded by itself.') 101 | model_management.load_model_gpu(self.patcher) 102 | 103 | seed = int(seed) % SEED_LIMIT_NUMPY 104 | set_seed(seed) 105 | prompt = safe_str(prompt) + ',' 106 | 107 | tokenized_kwargs = self.tokenizer(prompt, return_tensors="pt") 108 | tokenized_kwargs.data['input_ids'] = tokenized_kwargs.data['input_ids'].to(self.patcher.load_device) 109 | tokenized_kwargs.data['attention_mask'] = tokenized_kwargs.data['attention_mask'].to(self.patcher.load_device) 110 | 111 | current_token_length = int(tokenized_kwargs.data['input_ids'].shape[1]) 112 | max_token_length = 75 * int(math.ceil(float(current_token_length) / 75.0)) 113 | max_new_tokens = max_token_length - current_token_length 114 | 115 | # https://huggingface.co/blog/introducing-csearch 116 | # https://huggingface.co/docs/transformers/generation_strategies 117 | features = self.model.generate(**tokenized_kwargs, 118 | top_k=100, 119 | max_new_tokens=max_new_tokens, 120 | do_sample=True, 121 | logits_processor=LogitsProcessorList([self.logits_processor])) 122 | 123 | response = self.tokenizer.batch_decode(features, skip_special_tokens=True) 124 | result = safe_str(response[0]) 125 | 126 | return result 127 | -------------------------------------------------------------------------------- /py/extras/face_crop.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import modules.config 4 | 5 | 6 | faceRestoreHelper = None 7 | 8 | 9 | def align_warp_face(self, landmark, border_mode='constant'): 10 | affine_matrix = cv2.estimateAffinePartial2D(landmark, self.face_template, method=cv2.LMEDS)[0] 11 | self.affine_matrices.append(affine_matrix) 12 | if border_mode == 'constant': 13 | border_mode = cv2.BORDER_CONSTANT 14 | elif border_mode == 'reflect101': 15 | border_mode = cv2.BORDER_REFLECT101 16 | elif border_mode == 'reflect': 17 | border_mode = cv2.BORDER_REFLECT 18 | input_img = self.input_img 19 | cropped_face = cv2.warpAffine(input_img, affine_matrix, self.face_size, 20 | borderMode=border_mode, borderValue=(135, 133, 132)) 21 | return cropped_face 22 | 23 | 24 | def crop_image(img_rgb): 25 | global faceRestoreHelper 26 | 27 | if faceRestoreHelper is None: 28 | from extras.facexlib.utils.face_restoration_helper import FaceRestoreHelper 29 | faceRestoreHelper = FaceRestoreHelper( 30 | upscale_factor=1, 31 | model_rootpath=modules.config.path_controlnet, 32 | device='cpu' # use cpu is safer since we are out of memory management 33 | ) 34 | 35 | faceRestoreHelper.clean_all() 36 | faceRestoreHelper.read_image(np.ascontiguousarray(img_rgb[:, :, ::-1].copy())) 37 | faceRestoreHelper.get_face_landmarks_5() 38 | 39 | landmarks = faceRestoreHelper.all_landmarks_5 40 | # landmarks are already sorted with confidence. 41 | 42 | if len(landmarks) == 0: 43 | print('No face detected') 44 | return img_rgb 45 | else: 46 | print(f'Detected {len(landmarks)} faces') 47 | 48 | result = align_warp_face(faceRestoreHelper, landmarks[0]) 49 | 50 | return np.ascontiguousarray(result[:, :, ::-1].copy()) 51 | -------------------------------------------------------------------------------- /py/extras/facexlib/detection/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from copy import deepcopy 3 | 4 | from extras.facexlib.utils import load_file_from_url 5 | from .retinaface import RetinaFace 6 | 7 | 8 | def init_detection_model(model_name, half=False, device='cuda', model_rootpath=None): 9 | if model_name == 'retinaface_resnet50': 10 | model = RetinaFace(network_name='resnet50', half=half, device=device) 11 | model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth' 12 | elif model_name == 'retinaface_mobile0.25': 13 | model = RetinaFace(network_name='mobile0.25', half=half, device=device) 14 | model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_mobilenet0.25_Final.pth' 15 | else: 16 | raise NotImplementedError(f'{model_name} is not implemented.') 17 | 18 | model_path = load_file_from_url( 19 | url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath) 20 | 21 | # TODO: clean pretrained model 22 | load_net = torch.load(model_path, map_location=lambda storage, loc: storage) 23 | # remove unnecessary 'module.' 24 | for k, v in deepcopy(load_net).items(): 25 | if k.startswith('module.'): 26 | load_net[k[7:]] = v 27 | load_net.pop(k) 28 | model.load_state_dict(load_net, strict=True) 29 | model.eval() 30 | model = model.to(device) 31 | return model 32 | -------------------------------------------------------------------------------- /py/extras/facexlib/parsing/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from extras.facexlib.utils import load_file_from_url 4 | from .bisenet import BiSeNet 5 | from .parsenet import ParseNet 6 | 7 | 8 | def init_parsing_model(model_name='bisenet', half=False, device='cuda', model_rootpath=None): 9 | if model_name == 'bisenet': 10 | model = BiSeNet(num_class=19) 11 | model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.2.0/parsing_bisenet.pth' 12 | elif model_name == 'parsenet': 13 | model = ParseNet(in_size=512, out_size=512, parsing_ch=19) 14 | model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth' 15 | else: 16 | raise NotImplementedError(f'{model_name} is not implemented.') 17 | 18 | model_path = load_file_from_url( 19 | url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath) 20 | load_net = torch.load(model_path, map_location=lambda storage, loc: storage) 21 | model.load_state_dict(load_net, strict=True) 22 | model.eval() 23 | model = model.to(device) 24 | return model 25 | -------------------------------------------------------------------------------- /py/extras/facexlib/parsing/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | def conv3x3(in_planes, out_planes, stride=1): 6 | """3x3 convolution with padding""" 7 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 8 | 9 | 10 | class BasicBlock(nn.Module): 11 | 12 | def __init__(self, in_chan, out_chan, stride=1): 13 | super(BasicBlock, self).__init__() 14 | self.conv1 = conv3x3(in_chan, out_chan, stride) 15 | self.bn1 = nn.BatchNorm2d(out_chan) 16 | self.conv2 = conv3x3(out_chan, out_chan) 17 | self.bn2 = nn.BatchNorm2d(out_chan) 18 | self.relu = nn.ReLU(inplace=True) 19 | self.downsample = None 20 | if in_chan != out_chan or stride != 1: 21 | self.downsample = nn.Sequential( 22 | nn.Conv2d(in_chan, out_chan, kernel_size=1, stride=stride, bias=False), 23 | nn.BatchNorm2d(out_chan), 24 | ) 25 | 26 | def forward(self, x): 27 | residual = self.conv1(x) 28 | residual = F.relu(self.bn1(residual)) 29 | residual = self.conv2(residual) 30 | residual = self.bn2(residual) 31 | 32 | shortcut = x 33 | if self.downsample is not None: 34 | shortcut = self.downsample(x) 35 | 36 | out = shortcut + residual 37 | out = self.relu(out) 38 | return out 39 | 40 | 41 | def create_layer_basic(in_chan, out_chan, bnum, stride=1): 42 | layers = [BasicBlock(in_chan, out_chan, stride=stride)] 43 | for i in range(bnum - 1): 44 | layers.append(BasicBlock(out_chan, out_chan, stride=1)) 45 | return nn.Sequential(*layers) 46 | 47 | 48 | class ResNet18(nn.Module): 49 | 50 | def __init__(self): 51 | super(ResNet18, self).__init__() 52 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 53 | self.bn1 = nn.BatchNorm2d(64) 54 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 55 | self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1) 56 | self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2) 57 | self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2) 58 | self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2) 59 | 60 | def forward(self, x): 61 | x = self.conv1(x) 62 | x = F.relu(self.bn1(x)) 63 | x = self.maxpool(x) 64 | 65 | x = self.layer1(x) 66 | feat8 = self.layer2(x) # 1/8 67 | feat16 = self.layer3(feat8) # 1/16 68 | feat32 = self.layer4(feat16) # 1/32 69 | return feat8, feat16, feat32 70 | -------------------------------------------------------------------------------- /py/extras/facexlib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .face_utils import align_crop_face_landmarks, compute_increased_bbox, get_valid_bboxes, paste_face_back 2 | from .misc import img2tensor, load_file_from_url, scandir 3 | 4 | __all__ = [ 5 | 'align_crop_face_landmarks', 'compute_increased_bbox', 'get_valid_bboxes', 'load_file_from_url', 'paste_face_back', 6 | 'img2tensor', 'scandir' 7 | ] 8 | -------------------------------------------------------------------------------- /py/extras/facexlib/utils/misc.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import os.path as osp 4 | import torch 5 | from torch.hub import download_url_to_file, get_dir 6 | from urllib.parse import urlparse 7 | 8 | ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | 10 | 11 | def imwrite(img, file_path, params=None, auto_mkdir=True): 12 | """Write image to file. 13 | 14 | Args: 15 | img (ndarray): Image array to be written. 16 | file_path (str): Image file path. 17 | params (None or list): Same as opencv's :func:`imwrite` interface. 18 | auto_mkdir (bool): If the parent folder of `file_path` does not exist, 19 | whether to create it automatically. 20 | 21 | Returns: 22 | bool: Successful or not. 23 | """ 24 | if auto_mkdir: 25 | dir_name = os.path.abspath(os.path.dirname(file_path)) 26 | os.makedirs(dir_name, exist_ok=True) 27 | return cv2.imwrite(file_path, img, params) 28 | 29 | 30 | def img2tensor(imgs, bgr2rgb=True, float32=True): 31 | """Numpy array to tensor. 32 | 33 | Args: 34 | imgs (list[ndarray] | ndarray): Input images. 35 | bgr2rgb (bool): Whether to change bgr to rgb. 36 | float32 (bool): Whether to change to float32. 37 | 38 | Returns: 39 | list[tensor] | tensor: Tensor images. If returned results only have 40 | one element, just return tensor. 41 | """ 42 | 43 | def _totensor(img, bgr2rgb, float32): 44 | if img.shape[2] == 3 and bgr2rgb: 45 | if img.dtype == 'float64': 46 | img = img.astype('float32') 47 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 48 | img = torch.from_numpy(img.transpose(2, 0, 1)) 49 | if float32: 50 | img = img.float() 51 | return img 52 | 53 | if isinstance(imgs, list): 54 | return [_totensor(img, bgr2rgb, float32) for img in imgs] 55 | else: 56 | return _totensor(imgs, bgr2rgb, float32) 57 | 58 | 59 | def load_file_from_url(url, model_dir=None, progress=True, file_name=None, save_dir=None): 60 | """Ref:https://github.com/1adrianb/face-alignment/blob/master/face_alignment/utils.py 61 | """ 62 | if model_dir is None: 63 | hub_dir = get_dir() 64 | model_dir = os.path.join(hub_dir, 'checkpoints') 65 | 66 | if save_dir is None: 67 | save_dir = os.path.join(ROOT_DIR, model_dir) 68 | os.makedirs(save_dir, exist_ok=True) 69 | 70 | parts = urlparse(url) 71 | filename = os.path.basename(parts.path) 72 | if file_name is not None: 73 | filename = file_name 74 | cached_file = os.path.abspath(os.path.join(save_dir, filename)) 75 | if not os.path.exists(cached_file): 76 | print(f'Downloading: "{url}" to {cached_file}\n') 77 | download_url_to_file(url, cached_file, hash_prefix=None, progress=progress) 78 | return cached_file 79 | 80 | 81 | def scandir(dir_path, suffix=None, recursive=False, full_path=False): 82 | """Scan a directory to find the interested files. 83 | Args: 84 | dir_path (str): Path of the directory. 85 | suffix (str | tuple(str), optional): File suffix that we are 86 | interested in. Default: None. 87 | recursive (bool, optional): If set to True, recursively scan the 88 | directory. Default: False. 89 | full_path (bool, optional): If set to True, include the dir_path. 90 | Default: False. 91 | Returns: 92 | A generator for all the interested files with relative paths. 93 | """ 94 | 95 | if (suffix is not None) and not isinstance(suffix, (str, tuple)): 96 | raise TypeError('"suffix" must be a string or tuple of strings') 97 | 98 | root = dir_path 99 | 100 | def _scandir(dir_path, suffix, recursive): 101 | for entry in os.scandir(dir_path): 102 | if not entry.name.startswith('.') and entry.is_file(): 103 | if full_path: 104 | return_path = entry.path 105 | else: 106 | return_path = osp.relpath(entry.path, root) 107 | 108 | if suffix is None: 109 | yield return_path 110 | elif return_path.endswith(suffix): 111 | yield return_path 112 | else: 113 | if recursive: 114 | yield from _scandir(entry.path, suffix=suffix, recursive=recursive) 115 | else: 116 | continue 117 | 118 | return _scandir(dir_path, suffix=suffix, recursive=recursive) 119 | -------------------------------------------------------------------------------- /py/extras/interrogate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import comfy.model_management as model_management 4 | 5 | from torchvision import transforms 6 | from torchvision.transforms.functional import InterpolationMode 7 | from modules.model_loader import load_file_from_url 8 | from ldm_patched.modules.model_patcher import FooocusModelPatcher 9 | from extras.BLIP.models.blip import blip_decoder 10 | 11 | 12 | blip_image_eval_size = 384 13 | blip_repo_root = os.path.join(os.path.dirname(__file__), 'BLIP') 14 | 15 | 16 | class Interrogator: 17 | def __init__(self): 18 | self.blip_model = None 19 | self.load_device = torch.device('cpu') 20 | self.offload_device = torch.device('cpu') 21 | self.dtype = torch.float32 22 | 23 | @torch.no_grad() 24 | @torch.inference_mode() 25 | def interrogate(self, img_rgb): 26 | if self.blip_model is None: 27 | filename = load_file_from_url( 28 | url='https://huggingface.co/lllyasviel/misc/resolve/main/model_base_caption_capfilt_large.pth', 29 | model_dir="clip_vision", 30 | file_name='model_base_caption_capfilt_large.pth', 31 | ) 32 | 33 | model = blip_decoder(pretrained=filename, image_size=blip_image_eval_size, vit='base', 34 | med_config=os.path.join(blip_repo_root, "configs", "med_config.json")) 35 | model.eval() 36 | 37 | self.load_device = model_management.text_encoder_device() 38 | self.offload_device = model_management.text_encoder_offload_device() 39 | self.dtype = torch.float32 40 | 41 | model.to(self.offload_device) 42 | 43 | if model_management.should_use_fp16(device=self.load_device): 44 | model.half() 45 | self.dtype = torch.float16 46 | 47 | self.blip_model = FooocusModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device) 48 | 49 | model_management.load_model_gpu(self.blip_model) 50 | 51 | gpu_image = transforms.Compose([ 52 | transforms.ToTensor(), 53 | transforms.Resize((blip_image_eval_size, blip_image_eval_size), interpolation=InterpolationMode.BICUBIC), 54 | transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)) 55 | ])(img_rgb).unsqueeze(0).to(device=self.load_device, dtype=self.dtype) 56 | 57 | caption = self.blip_model.model.generate(gpu_image, sample=True, num_beams=1, max_length=75)[0] 58 | 59 | return caption 60 | 61 | 62 | default_interrogator = Interrogator().interrogate 63 | -------------------------------------------------------------------------------- /py/extras/preprocessors.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import modules.advanced_parameters as advanced_parameters 4 | 5 | 6 | def centered_canny(x: np.ndarray): 7 | assert isinstance(x, np.ndarray) 8 | assert x.ndim == 2 and x.dtype == np.uint8 9 | 10 | y = cv2.Canny(x, int(64), int(128)) 11 | y = y.astype(np.float32) / 255.0 12 | return y 13 | 14 | 15 | def centered_canny_color(x: np.ndarray): 16 | assert isinstance(x, np.ndarray) 17 | assert x.ndim == 3 and x.shape[2] == 3 18 | 19 | result = [centered_canny(x[..., i]) for i in range(3)] 20 | result = np.stack(result, axis=2) 21 | return result 22 | 23 | 24 | def pyramid_canny_color(x: np.ndarray): 25 | assert isinstance(x, np.ndarray) 26 | assert x.ndim == 3 and x.shape[2] == 3 27 | 28 | H, W, C = x.shape 29 | acc_edge = None 30 | 31 | for k in [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: 32 | Hs, Ws = int(H * k), int(W * k) 33 | small = cv2.resize(x, (Ws, Hs), interpolation=cv2.INTER_AREA) 34 | edge = centered_canny_color(small) 35 | if acc_edge is None: 36 | acc_edge = edge 37 | else: 38 | acc_edge = cv2.resize(acc_edge, (edge.shape[1], edge.shape[0]), interpolation=cv2.INTER_LINEAR) 39 | acc_edge = acc_edge * 0.75 + edge * 0.25 40 | 41 | return acc_edge 42 | 43 | 44 | def norm255(x, low=4, high=96): 45 | assert isinstance(x, np.ndarray) 46 | assert x.ndim == 2 and x.dtype == np.float32 47 | 48 | v_min = np.percentile(x, low) 49 | v_max = np.percentile(x, high) 50 | 51 | x -= v_min 52 | x /= v_max - v_min 53 | 54 | return x * 255.0 55 | 56 | 57 | def canny_pyramid(x): 58 | # For some reasons, SAI's Control-lora Canny seems to be trained on canny maps with non-standard resolutions. 59 | # Then we use pyramid to use all resolutions to avoid missing any structure in specific resolutions. 60 | 61 | color_canny = pyramid_canny_color(x) 62 | result = np.sum(color_canny, axis=2) 63 | 64 | return norm255(result, low=1, high=99).clip(0, 255).astype(np.uint8) 65 | 66 | 67 | def cpds(x): 68 | # cv2.decolor is not "decolor", it is Cewu Lu's method 69 | # See http://www.cse.cuhk.edu.hk/leojia/projects/color2gray/index.html 70 | # See https://docs.opencv.org/3.0-beta/modules/photo/doc/decolor.html 71 | 72 | raw = cv2.GaussianBlur(x, (0, 0), 0.8) 73 | density, boost = cv2.decolor(raw) 74 | 75 | raw = raw.astype(np.float32) 76 | density = density.astype(np.float32) 77 | boost = boost.astype(np.float32) 78 | 79 | offset = np.sum((raw - boost) ** 2.0, axis=2) ** 0.5 80 | result = density + offset 81 | 82 | return norm255(result, low=4, high=96).clip(0, 255).astype(np.uint8) 83 | -------------------------------------------------------------------------------- /py/extras/resampler.py: -------------------------------------------------------------------------------- 1 | # modified from https://github.com/mlfoundations/open_flamingo/blob/main/open_flamingo/src/helpers.py 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | 8 | # FFN 9 | def FeedForward(dim, mult=4): 10 | inner_dim = int(dim * mult) 11 | return nn.Sequential( 12 | nn.LayerNorm(dim), 13 | nn.Linear(dim, inner_dim, bias=False), 14 | nn.GELU(), 15 | nn.Linear(inner_dim, dim, bias=False), 16 | ) 17 | 18 | 19 | def reshape_tensor(x, heads): 20 | bs, length, width = x.shape 21 | #(bs, length, width) --> (bs, length, n_heads, dim_per_head) 22 | x = x.view(bs, length, heads, -1) 23 | # (bs, length, n_heads, dim_per_head) --> (bs, n_heads, length, dim_per_head) 24 | x = x.transpose(1, 2) 25 | # (bs, n_heads, length, dim_per_head) --> (bs*n_heads, length, dim_per_head) 26 | x = x.reshape(bs, heads, length, -1) 27 | return x 28 | 29 | 30 | class PerceiverAttention(nn.Module): 31 | def __init__(self, *, dim, dim_head=64, heads=8): 32 | super().__init__() 33 | self.scale = dim_head**-0.5 34 | self.dim_head = dim_head 35 | self.heads = heads 36 | inner_dim = dim_head * heads 37 | 38 | self.norm1 = nn.LayerNorm(dim) 39 | self.norm2 = nn.LayerNorm(dim) 40 | 41 | self.to_q = nn.Linear(dim, inner_dim, bias=False) 42 | self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False) 43 | self.to_out = nn.Linear(inner_dim, dim, bias=False) 44 | 45 | 46 | def forward(self, x, latents): 47 | """ 48 | Args: 49 | x (torch.Tensor): image features 50 | shape (b, n1, D) 51 | latent (torch.Tensor): latent features 52 | shape (b, n2, D) 53 | """ 54 | x = self.norm1(x) 55 | latents = self.norm2(latents) 56 | 57 | b, l, _ = latents.shape 58 | 59 | q = self.to_q(latents) 60 | kv_input = torch.cat((x, latents), dim=-2) 61 | k, v = self.to_kv(kv_input).chunk(2, dim=-1) 62 | 63 | q = reshape_tensor(q, self.heads) 64 | k = reshape_tensor(k, self.heads) 65 | v = reshape_tensor(v, self.heads) 66 | 67 | # attention 68 | scale = 1 / math.sqrt(math.sqrt(self.dim_head)) 69 | weight = (q * scale) @ (k * scale).transpose(-2, -1) # More stable with f16 than dividing afterwards 70 | weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) 71 | out = weight @ v 72 | 73 | out = out.permute(0, 2, 1, 3).reshape(b, l, -1) 74 | 75 | return self.to_out(out) 76 | 77 | 78 | class Resampler(nn.Module): 79 | def __init__( 80 | self, 81 | dim=1024, 82 | depth=8, 83 | dim_head=64, 84 | heads=16, 85 | num_queries=8, 86 | embedding_dim=768, 87 | output_dim=1024, 88 | ff_mult=4, 89 | ): 90 | super().__init__() 91 | 92 | self.latents = nn.Parameter(torch.randn(1, num_queries, dim) / dim**0.5) 93 | 94 | self.proj_in = nn.Linear(embedding_dim, dim) 95 | 96 | self.proj_out = nn.Linear(dim, output_dim) 97 | self.norm_out = nn.LayerNorm(output_dim) 98 | 99 | self.layers = nn.ModuleList([]) 100 | for _ in range(depth): 101 | self.layers.append( 102 | nn.ModuleList( 103 | [ 104 | PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads), 105 | FeedForward(dim=dim, mult=ff_mult), 106 | ] 107 | ) 108 | ) 109 | 110 | def forward(self, x): 111 | latents = self.latents.repeat(x.size(0), 1, 1).to(x) 112 | 113 | x = self.proj_in(x) 114 | 115 | for attn, ff in self.layers: 116 | latents = attn(x, latents) + latents 117 | latents = ff(latents) + latents 118 | 119 | latents = self.proj_out(latents) 120 | return self.norm_out(latents) 121 | -------------------------------------------------------------------------------- /py/extras/vae_interpose.py: -------------------------------------------------------------------------------- 1 | # https://github.com/city96/SD-Latent-Interposer/blob/main/interposer.py 2 | 3 | import os 4 | import torch 5 | import safetensors.torch as sf 6 | import torch.nn as nn 7 | import comfy.model_management 8 | 9 | from ldm_patched.modules.model_patcher import FooocusModelPatcher 10 | import folder_paths 11 | 12 | class Block(nn.Module): 13 | def __init__(self, size): 14 | super().__init__() 15 | self.join = nn.ReLU() 16 | self.long = nn.Sequential( 17 | nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), 18 | nn.LeakyReLU(0.1), 19 | nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), 20 | nn.LeakyReLU(0.1), 21 | nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), 22 | ) 23 | 24 | def forward(self, x): 25 | y = self.long(x) 26 | z = self.join(y + x) 27 | return z 28 | 29 | 30 | class Interposer(nn.Module): 31 | def __init__(self): 32 | super().__init__() 33 | self.chan = 4 34 | self.hid = 128 35 | 36 | self.head_join = nn.ReLU() 37 | self.head_short = nn.Conv2d(self.chan, self.hid, kernel_size=3, stride=1, padding=1) 38 | self.head_long = nn.Sequential( 39 | nn.Conv2d(self.chan, self.hid, kernel_size=3, stride=1, padding=1), 40 | nn.LeakyReLU(0.1), 41 | nn.Conv2d(self.hid, self.hid, kernel_size=3, stride=1, padding=1), 42 | nn.LeakyReLU(0.1), 43 | nn.Conv2d(self.hid, self.hid, kernel_size=3, stride=1, padding=1), 44 | ) 45 | self.core = nn.Sequential( 46 | Block(self.hid), 47 | Block(self.hid), 48 | Block(self.hid), 49 | ) 50 | self.tail = nn.Sequential( 51 | nn.ReLU(), 52 | nn.Conv2d(self.hid, self.chan, kernel_size=3, stride=1, padding=1) 53 | ) 54 | 55 | def forward(self, x): 56 | y = self.head_join( 57 | self.head_long(x) + 58 | self.head_short(x) 59 | ) 60 | z = self.core(y) 61 | return self.tail(z) 62 | 63 | 64 | vae_approx_model = None 65 | vae_approx_filename = folder_paths.get_full_path("vae_approx", 'xl-to-v1_interposer-v3.1.safetensors') 66 | 67 | 68 | def parse(x): 69 | global vae_approx_model 70 | 71 | x_origin = x.clone() 72 | 73 | if vae_approx_model is None: 74 | model = Interposer() 75 | model.eval() 76 | sd = sf.load_file(vae_approx_filename) 77 | model.load_state_dict(sd) 78 | fp16 = comfy.model_management.should_use_fp16() 79 | if fp16: 80 | model = model.half() 81 | vae_approx_model = FooocusModelPatcher( 82 | model=model, 83 | load_device=comfy.model_management.get_torch_device(), 84 | offload_device=torch.device('cpu') 85 | ) 86 | vae_approx_model.dtype = torch.float16 if fp16 else torch.float32 87 | 88 | comfy.model_management.load_model_gpu(vae_approx_model) 89 | 90 | x = x_origin.to(device=vae_approx_model.load_device, dtype=vae_approx_model.dtype) 91 | x = vae_approx_model.model(x).to(x_origin) 92 | return x 93 | -------------------------------------------------------------------------------- /py/extras/wd14tagger.py: -------------------------------------------------------------------------------- 1 | # https://huggingface.co/spaces/SmilingWolf/wd-v1-4-tags 2 | # https://github.com/pythongosssss/ComfyUI-WD14-Tagger/blob/main/wd14tagger.py 3 | 4 | # { 5 | # "wd-v1-4-moat-tagger-v2": "https://huggingface.co/SmilingWolf/wd-v1-4-moat-tagger-v2", 6 | # "wd-v1-4-convnextv2-tagger-v2": "https://huggingface.co/SmilingWolf/wd-v1-4-convnextv2-tagger-v2", 7 | # "wd-v1-4-convnext-tagger-v2": "https://huggingface.co/SmilingWolf/wd-v1-4-convnext-tagger-v2", 8 | # "wd-v1-4-convnext-tagger": "https://huggingface.co/SmilingWolf/wd-v1-4-convnext-tagger", 9 | # "wd-v1-4-vit-tagger-v2": "https://huggingface.co/SmilingWolf/wd-v1-4-vit-tagger-v2" 10 | # } 11 | 12 | 13 | import numpy as np 14 | import csv 15 | import onnxruntime as ort 16 | 17 | from PIL import Image 18 | from onnxruntime import InferenceSession 19 | from modules.model_loader import load_file_from_url 20 | 21 | 22 | global_model = None 23 | global_csv = None 24 | 25 | 26 | def default_interrogator(image_rgb, threshold=0.35, character_threshold=0.85, exclude_tags=""): 27 | global global_model, global_csv 28 | 29 | model_name = "wd-v1-4-moat-tagger-v2" 30 | 31 | model_onnx_filename = load_file_from_url( 32 | url=f'https://huggingface.co/lllyasviel/misc/resolve/main/{model_name}.onnx', 33 | model_dir="clip_vision", 34 | file_name=f'{model_name}.onnx', 35 | ) 36 | 37 | model_csv_filename = load_file_from_url( 38 | url=f'https://huggingface.co/lllyasviel/misc/resolve/main/{model_name}.csv', 39 | model_dir="clip_vision", 40 | file_name=f'{model_name}.csv', 41 | ) 42 | 43 | if global_model is not None: 44 | model = global_model 45 | else: 46 | model = InferenceSession(model_onnx_filename, providers=ort.get_available_providers()) 47 | global_model = model 48 | 49 | input = model.get_inputs()[0] 50 | height = input.shape[1] 51 | 52 | image = Image.fromarray(image_rgb) # RGB 53 | ratio = float(height)/max(image.size) 54 | new_size = tuple([int(x*ratio) for x in image.size]) 55 | image = image.resize(new_size, Image.LANCZOS) 56 | square = Image.new("RGB", (height, height), (255, 255, 255)) 57 | square.paste(image, ((height-new_size[0])//2, (height-new_size[1])//2)) 58 | 59 | image = np.array(square).astype(np.float32) 60 | image = image[:, :, ::-1] # RGB -> BGR 61 | image = np.expand_dims(image, 0) 62 | 63 | if global_csv is not None: 64 | csv_lines = global_csv 65 | else: 66 | csv_lines = [] 67 | with open(model_csv_filename) as f: 68 | reader = csv.reader(f) 69 | next(reader) 70 | for row in reader: 71 | csv_lines.append(row) 72 | global_csv = csv_lines 73 | 74 | tags = [] 75 | general_index = None 76 | character_index = None 77 | for line_num, row in enumerate(csv_lines): 78 | if general_index is None and row[2] == "0": 79 | general_index = line_num 80 | elif character_index is None and row[2] == "4": 81 | character_index = line_num 82 | tags.append(row[1]) 83 | 84 | label_name = model.get_outputs()[0].name 85 | probs = model.run([label_name], {input.name: image})[0] 86 | 87 | result = list(zip(tags, probs[0])) 88 | 89 | general = [item for item in result[general_index:character_index] if item[1] > threshold] 90 | character = [item for item in result[character_index:] if item[1] > character_threshold] 91 | 92 | all = character + general 93 | remove = [s.strip() for s in exclude_tags.lower().split(",")] 94 | all = [tag for tag in all if tag[0] not in remove] 95 | 96 | res = ", ".join((item[0].replace("(", "\\(").replace(")", "\\)") for item in all)).replace('_', ' ') 97 | return res 98 | -------------------------------------------------------------------------------- /py/ldm_patched/contrib/external_clip_sdxl.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | from ldm_patched.contrib.external import MAX_RESOLUTION 5 | 6 | class CLIPTextEncodeSDXLRefiner: 7 | @classmethod 8 | def INPUT_TYPES(s): 9 | return {"required": { 10 | "ascore": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 1000.0, "step": 0.01}), 11 | "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 12 | "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 13 | "text": ("STRING", {"multiline": True}), "clip": ("CLIP", ), 14 | }} 15 | RETURN_TYPES = ("CONDITIONING",) 16 | FUNCTION = "encode" 17 | 18 | CATEGORY = "advanced/conditioning" 19 | 20 | def encode(self, clip, ascore, width, height, text): 21 | tokens = clip.tokenize(text) 22 | cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) 23 | return ([[cond, {"pooled_output": pooled, "aesthetic_score": ascore, "width": width,"height": height}]], ) 24 | 25 | class CLIPTextEncodeSDXL: 26 | @classmethod 27 | def INPUT_TYPES(s): 28 | return {"required": { 29 | "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 30 | "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 31 | "crop_w": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), 32 | "crop_h": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), 33 | "target_width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 34 | "target_height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 35 | "text_g": ("STRING", {"multiline": True, "default": "CLIP_G"}), "clip": ("CLIP", ), 36 | "text_l": ("STRING", {"multiline": True, "default": "CLIP_L"}), "clip": ("CLIP", ), 37 | }} 38 | RETURN_TYPES = ("CONDITIONING",) 39 | FUNCTION = "encode" 40 | 41 | CATEGORY = "advanced/conditioning" 42 | 43 | def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l): 44 | tokens = clip.tokenize(text_g) 45 | tokens["l"] = clip.tokenize(text_l)["l"] 46 | if len(tokens["l"]) != len(tokens["g"]): 47 | empty = clip.tokenize("") 48 | while len(tokens["l"]) < len(tokens["g"]): 49 | tokens["l"] += empty["l"] 50 | while len(tokens["l"]) > len(tokens["g"]): 51 | tokens["g"] += empty["g"] 52 | cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) 53 | return ([[cond, {"pooled_output": pooled, "width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}]], ) 54 | 55 | NODE_CLASS_MAPPINGS = { 56 | "CLIPTextEncodeSDXLRefiner": CLIPTextEncodeSDXLRefiner, 57 | "CLIPTextEncodeSDXL": CLIPTextEncodeSDXL, 58 | } 59 | -------------------------------------------------------------------------------- /py/ldm_patched/contrib/external_freelunch.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | #code originally taken from: https://github.com/ChenyangSi/FreeU (under MIT License) 4 | 5 | import torch 6 | 7 | 8 | def Fourier_filter(x, threshold, scale): 9 | # FFT 10 | x_freq = torch.fft.fftn(x.float(), dim=(-2, -1)) 11 | x_freq = torch.fft.fftshift(x_freq, dim=(-2, -1)) 12 | 13 | B, C, H, W = x_freq.shape 14 | mask = torch.ones((B, C, H, W), device=x.device) 15 | 16 | crow, ccol = H // 2, W //2 17 | mask[..., crow - threshold:crow + threshold, ccol - threshold:ccol + threshold] = scale 18 | x_freq = x_freq * mask 19 | 20 | # IFFT 21 | x_freq = torch.fft.ifftshift(x_freq, dim=(-2, -1)) 22 | x_filtered = torch.fft.ifftn(x_freq, dim=(-2, -1)).real 23 | 24 | return x_filtered.to(x.dtype) 25 | 26 | 27 | class FreeU: 28 | @classmethod 29 | def INPUT_TYPES(s): 30 | return {"required": { "model": ("MODEL",), 31 | "b1": ("FLOAT", {"default": 1.1, "min": 0.0, "max": 10.0, "step": 0.01}), 32 | "b2": ("FLOAT", {"default": 1.2, "min": 0.0, "max": 10.0, "step": 0.01}), 33 | "s1": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 10.0, "step": 0.01}), 34 | "s2": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.01}), 35 | }} 36 | RETURN_TYPES = ("MODEL",) 37 | FUNCTION = "patch" 38 | 39 | CATEGORY = "model_patches" 40 | 41 | def patch(self, model, b1, b2, s1, s2): 42 | model_channels = model.model.model_config.unet_config["model_channels"] 43 | scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)} 44 | on_cpu_devices = {} 45 | 46 | def output_block_patch(h, hsp, transformer_options): 47 | scale = scale_dict.get(h.shape[1], None) 48 | if scale is not None: 49 | h[:,:h.shape[1] // 2] = h[:,:h.shape[1] // 2] * scale[0] 50 | if hsp.device not in on_cpu_devices: 51 | try: 52 | hsp = Fourier_filter(hsp, threshold=1, scale=scale[1]) 53 | except: 54 | print("Device", hsp.device, "does not support the torch.fft functions used in the FreeU node, switching to CPU.") 55 | on_cpu_devices[hsp.device] = True 56 | hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) 57 | else: 58 | hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) 59 | 60 | return h, hsp 61 | 62 | m = model.clone() 63 | m.set_model_output_block_patch(output_block_patch) 64 | return (m, ) 65 | 66 | class FreeU_V2: 67 | @classmethod 68 | def INPUT_TYPES(s): 69 | return {"required": { "model": ("MODEL",), 70 | "b1": ("FLOAT", {"default": 1.3, "min": 0.0, "max": 10.0, "step": 0.01}), 71 | "b2": ("FLOAT", {"default": 1.4, "min": 0.0, "max": 10.0, "step": 0.01}), 72 | "s1": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 10.0, "step": 0.01}), 73 | "s2": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.01}), 74 | }} 75 | RETURN_TYPES = ("MODEL",) 76 | FUNCTION = "patch" 77 | 78 | CATEGORY = "model_patches" 79 | 80 | def patch(self, model, b1, b2, s1, s2): 81 | model_channels = model.model.model_config.unet_config["model_channels"] 82 | scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)} 83 | on_cpu_devices = {} 84 | 85 | def output_block_patch(h, hsp, transformer_options): 86 | scale = scale_dict.get(h.shape[1], None) 87 | if scale is not None: 88 | hidden_mean = h.mean(1).unsqueeze(1) 89 | B = hidden_mean.shape[0] 90 | hidden_max, _ = torch.max(hidden_mean.view(B, -1), dim=-1, keepdim=True) 91 | hidden_min, _ = torch.min(hidden_mean.view(B, -1), dim=-1, keepdim=True) 92 | hidden_mean = (hidden_mean - hidden_min.unsqueeze(2).unsqueeze(3)) / (hidden_max - hidden_min).unsqueeze(2).unsqueeze(3) 93 | 94 | h[:,:h.shape[1] // 2] = h[:,:h.shape[1] // 2] * ((scale[0] - 1 ) * hidden_mean + 1) 95 | 96 | if hsp.device not in on_cpu_devices: 97 | try: 98 | hsp = Fourier_filter(hsp, threshold=1, scale=scale[1]) 99 | except: 100 | print("Device", hsp.device, "does not support the torch.fft functions used in the FreeU node, switching to CPU.") 101 | on_cpu_devices[hsp.device] = True 102 | hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) 103 | else: 104 | hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device) 105 | 106 | return h, hsp 107 | 108 | m = model.clone() 109 | m.set_model_output_block_patch(output_block_patch) 110 | return (m, ) 111 | 112 | NODE_CLASS_MAPPINGS = { 113 | "FreeU": FreeU, 114 | "FreeU_V2": FreeU_V2, 115 | } 116 | -------------------------------------------------------------------------------- /py/ldm_patched/contrib/external_hypernetwork.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import ldm_patched.modules.utils 4 | import ldm_patched.utils.path_utils 5 | import torch 6 | 7 | def load_hypernetwork_patch(path, strength): 8 | sd = ldm_patched.modules.utils.load_torch_file(path, safe_load=True) 9 | activation_func = sd.get('activation_func', 'linear') 10 | is_layer_norm = sd.get('is_layer_norm', False) 11 | use_dropout = sd.get('use_dropout', False) 12 | activate_output = sd.get('activate_output', False) 13 | last_layer_dropout = sd.get('last_layer_dropout', False) 14 | 15 | valid_activation = { 16 | "linear": torch.nn.Identity, 17 | "relu": torch.nn.ReLU, 18 | "leakyrelu": torch.nn.LeakyReLU, 19 | "elu": torch.nn.ELU, 20 | "swish": torch.nn.Hardswish, 21 | "tanh": torch.nn.Tanh, 22 | "sigmoid": torch.nn.Sigmoid, 23 | "softsign": torch.nn.Softsign, 24 | "mish": torch.nn.Mish, 25 | } 26 | 27 | if activation_func not in valid_activation: 28 | print("Unsupported Hypernetwork format, if you report it I might implement it.", path, " ", activation_func, is_layer_norm, use_dropout, activate_output, last_layer_dropout) 29 | return None 30 | 31 | out = {} 32 | 33 | for d in sd: 34 | try: 35 | dim = int(d) 36 | except: 37 | continue 38 | 39 | output = [] 40 | for index in [0, 1]: 41 | attn_weights = sd[dim][index] 42 | keys = attn_weights.keys() 43 | 44 | linears = filter(lambda a: a.endswith(".weight"), keys) 45 | linears = list(map(lambda a: a[:-len(".weight")], linears)) 46 | layers = [] 47 | 48 | i = 0 49 | while i < len(linears): 50 | lin_name = linears[i] 51 | last_layer = (i == (len(linears) - 1)) 52 | penultimate_layer = (i == (len(linears) - 2)) 53 | 54 | lin_weight = attn_weights['{}.weight'.format(lin_name)] 55 | lin_bias = attn_weights['{}.bias'.format(lin_name)] 56 | layer = torch.nn.Linear(lin_weight.shape[1], lin_weight.shape[0]) 57 | layer.load_state_dict({"weight": lin_weight, "bias": lin_bias}) 58 | layers.append(layer) 59 | if activation_func != "linear": 60 | if (not last_layer) or (activate_output): 61 | layers.append(valid_activation[activation_func]()) 62 | if is_layer_norm: 63 | i += 1 64 | ln_name = linears[i] 65 | ln_weight = attn_weights['{}.weight'.format(ln_name)] 66 | ln_bias = attn_weights['{}.bias'.format(ln_name)] 67 | ln = torch.nn.LayerNorm(ln_weight.shape[0]) 68 | ln.load_state_dict({"weight": ln_weight, "bias": ln_bias}) 69 | layers.append(ln) 70 | if use_dropout: 71 | if (not last_layer) and (not penultimate_layer or last_layer_dropout): 72 | layers.append(torch.nn.Dropout(p=0.3)) 73 | i += 1 74 | 75 | output.append(torch.nn.Sequential(*layers)) 76 | out[dim] = torch.nn.ModuleList(output) 77 | 78 | class hypernetwork_patch: 79 | def __init__(self, hypernet, strength): 80 | self.hypernet = hypernet 81 | self.strength = strength 82 | def __call__(self, q, k, v, extra_options): 83 | dim = k.shape[-1] 84 | if dim in self.hypernet: 85 | hn = self.hypernet[dim] 86 | k = k + hn[0](k) * self.strength 87 | v = v + hn[1](v) * self.strength 88 | 89 | return q, k, v 90 | 91 | def to(self, device): 92 | for d in self.hypernet.keys(): 93 | self.hypernet[d] = self.hypernet[d].to(device) 94 | return self 95 | 96 | return hypernetwork_patch(out, strength) 97 | 98 | class HypernetworkLoader: 99 | @classmethod 100 | def INPUT_TYPES(s): 101 | return {"required": { "model": ("MODEL",), 102 | "hypernetwork_name": (ldm_patched.utils.path_utils.get_filename_list("hypernetworks"), ), 103 | "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), 104 | }} 105 | RETURN_TYPES = ("MODEL",) 106 | FUNCTION = "load_hypernetwork" 107 | 108 | CATEGORY = "loaders" 109 | 110 | def load_hypernetwork(self, model, hypernetwork_name, strength): 111 | hypernetwork_path = ldm_patched.utils.path_utils.get_full_path("hypernetworks", hypernetwork_name) 112 | model_hypernetwork = model.clone() 113 | patch = load_hypernetwork_patch(hypernetwork_path, strength) 114 | if patch is not None: 115 | model_hypernetwork.set_model_attn1_patch(patch) 116 | model_hypernetwork.set_model_attn2_patch(patch) 117 | return (model_hypernetwork,) 118 | 119 | NODE_CLASS_MAPPINGS = { 120 | "HypernetworkLoader": HypernetworkLoader 121 | } 122 | -------------------------------------------------------------------------------- /py/ldm_patched/contrib/external_hypertile.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | #Taken from: https://github.com/tfernd/HyperTile/ 4 | 5 | import math 6 | from einops import rearrange 7 | # Use torch rng for consistency across generations 8 | from torch import randint 9 | 10 | def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int: 11 | min_value = min(min_value, value) 12 | 13 | # All big divisors of value (inclusive) 14 | divisors = [i for i in range(min_value, value + 1) if value % i == 0] 15 | 16 | ns = [value // i for i in divisors[:max_options]] # has at least 1 element 17 | 18 | if len(ns) - 1 > 0: 19 | idx = randint(low=0, high=len(ns) - 1, size=(1,)).item() 20 | else: 21 | idx = 0 22 | 23 | return ns[idx] 24 | 25 | class HyperTile: 26 | @classmethod 27 | def INPUT_TYPES(s): 28 | return {"required": { "model": ("MODEL",), 29 | "tile_size": ("INT", {"default": 256, "min": 1, "max": 2048}), 30 | "swap_size": ("INT", {"default": 2, "min": 1, "max": 128}), 31 | "max_depth": ("INT", {"default": 0, "min": 0, "max": 10}), 32 | "scale_depth": ("BOOLEAN", {"default": False}), 33 | }} 34 | RETURN_TYPES = ("MODEL",) 35 | FUNCTION = "patch" 36 | 37 | CATEGORY = "model_patches" 38 | 39 | def patch(self, model, tile_size, swap_size, max_depth, scale_depth): 40 | model_channels = model.model.model_config.unet_config["model_channels"] 41 | 42 | latent_tile_size = max(32, tile_size) // 8 43 | self.temp = None 44 | 45 | def hypertile_in(q, k, v, extra_options): 46 | model_chans = q.shape[-2] 47 | orig_shape = extra_options['original_shape'] 48 | apply_to = [] 49 | for i in range(max_depth + 1): 50 | apply_to.append((orig_shape[-2] / (2 ** i)) * (orig_shape[-1] / (2 ** i))) 51 | 52 | if model_chans in apply_to: 53 | shape = extra_options["original_shape"] 54 | aspect_ratio = shape[-1] / shape[-2] 55 | 56 | hw = q.size(1) 57 | h, w = round(math.sqrt(hw * aspect_ratio)), round(math.sqrt(hw / aspect_ratio)) 58 | 59 | factor = (2 ** apply_to.index(model_chans)) if scale_depth else 1 60 | nh = random_divisor(h, latent_tile_size * factor, swap_size) 61 | nw = random_divisor(w, latent_tile_size * factor, swap_size) 62 | 63 | if nh * nw > 1: 64 | q = rearrange(q, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw) 65 | self.temp = (nh, nw, h, w) 66 | return q, k, v 67 | 68 | return q, k, v 69 | def hypertile_out(out, extra_options): 70 | if self.temp is not None: 71 | nh, nw, h, w = self.temp 72 | self.temp = None 73 | out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw) 74 | out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw) 75 | return out 76 | 77 | 78 | m = model.clone() 79 | m.set_model_attn1_patch(hypertile_in) 80 | m.set_model_attn1_output_patch(hypertile_out) 81 | return (m, ) 82 | 83 | NODE_CLASS_MAPPINGS = { 84 | "HyperTile": HyperTile, 85 | } 86 | -------------------------------------------------------------------------------- /py/ldm_patched/contrib/external_model_downscale.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | import ldm_patched.modules.utils 5 | 6 | class PatchModelAddDownscale: 7 | upscale_methods = ["bicubic", "nearest-exact", "bilinear", "area", "bislerp"] 8 | @classmethod 9 | def INPUT_TYPES(s): 10 | return {"required": { "model": ("MODEL",), 11 | "block_number": ("INT", {"default": 3, "min": 1, "max": 32, "step": 1}), 12 | "downscale_factor": ("FLOAT", {"default": 2.0, "min": 0.1, "max": 9.0, "step": 0.001}), 13 | "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), 14 | "end_percent": ("FLOAT", {"default": 0.35, "min": 0.0, "max": 1.0, "step": 0.001}), 15 | "downscale_after_skip": ("BOOLEAN", {"default": True}), 16 | "downscale_method": (s.upscale_methods,), 17 | "upscale_method": (s.upscale_methods,), 18 | }} 19 | RETURN_TYPES = ("MODEL",) 20 | FUNCTION = "patch" 21 | 22 | CATEGORY = "_for_testing" 23 | 24 | def patch(self, model, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip, downscale_method, upscale_method): 25 | sigma_start = model.model.model_sampling.percent_to_sigma(start_percent) 26 | sigma_end = model.model.model_sampling.percent_to_sigma(end_percent) 27 | 28 | def input_block_patch(h, transformer_options): 29 | if transformer_options["block"][1] == block_number: 30 | sigma = transformer_options["sigmas"][0].item() 31 | if sigma <= sigma_start and sigma >= sigma_end: 32 | h = ldm_patched.modules.utils.common_upscale(h, round(h.shape[-1] * (1.0 / downscale_factor)), round(h.shape[-2] * (1.0 / downscale_factor)), downscale_method, "disabled") 33 | return h 34 | 35 | def output_block_patch(h, hsp, transformer_options): 36 | if h.shape[2] != hsp.shape[2]: 37 | h = ldm_patched.modules.utils.common_upscale(h, hsp.shape[-1], hsp.shape[-2], upscale_method, "disabled") 38 | return h, hsp 39 | 40 | m = model.clone() 41 | if downscale_after_skip: 42 | m.set_model_input_block_patch_after_skip(input_block_patch) 43 | else: 44 | m.set_model_input_block_patch(input_block_patch) 45 | m.set_model_output_block_patch(output_block_patch) 46 | return (m, ) 47 | 48 | NODE_CLASS_MAPPINGS = { 49 | "PatchModelAddDownscale": PatchModelAddDownscale, 50 | } 51 | 52 | NODE_DISPLAY_NAME_MAPPINGS = { 53 | # Sampling 54 | "PatchModelAddDownscale": "PatchModelAddDownscale (Kohya Deep Shrink)", 55 | } 56 | -------------------------------------------------------------------------------- /py/ldm_patched/contrib/external_perpneg.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | import comfy.model_management 5 | import ldm_patched.modules.sample 6 | import ldm_patched.modules.samplers 7 | import ldm_patched.modules.utils 8 | 9 | 10 | class PerpNeg: 11 | @classmethod 12 | def INPUT_TYPES(s): 13 | return {"required": {"model": ("MODEL", ), 14 | "empty_conditioning": ("CONDITIONING", ), 15 | "neg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0}), 16 | }} 17 | RETURN_TYPES = ("MODEL",) 18 | FUNCTION = "patch" 19 | 20 | CATEGORY = "_for_testing" 21 | 22 | def patch(self, model, empty_conditioning, neg_scale): 23 | m = model.clone() 24 | nocond = ldm_patched.modules.sample.convert_cond(empty_conditioning) 25 | 26 | def cfg_function(args): 27 | model = args["model"] 28 | noise_pred_pos = args["cond_denoised"] 29 | noise_pred_neg = args["uncond_denoised"] 30 | cond_scale = args["cond_scale"] 31 | x = args["input"] 32 | sigma = args["sigma"] 33 | model_options = args["model_options"] 34 | nocond_processed = ldm_patched.modules.samplers.encode_model_conds(model.extra_conds, nocond, x, x.device, "negative") 35 | 36 | (noise_pred_nocond, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, nocond_processed, None, x, sigma, model_options) 37 | 38 | pos = noise_pred_pos - noise_pred_nocond 39 | neg = noise_pred_neg - noise_pred_nocond 40 | perp = ((torch.mul(pos, neg).sum())/(torch.norm(neg)**2)) * neg 41 | perp_neg = perp * neg_scale 42 | cfg_result = noise_pred_nocond + cond_scale*(pos - perp_neg) 43 | cfg_result = x - cfg_result 44 | return cfg_result 45 | 46 | m.set_model_sampler_cfg_function(cfg_function) 47 | 48 | return (m, ) 49 | 50 | 51 | NODE_CLASS_MAPPINGS = { 52 | "PerpNeg": PerpNeg, 53 | } 54 | 55 | NODE_DISPLAY_NAME_MAPPINGS = { 56 | "PerpNeg": "Perp-Neg", 57 | } 58 | -------------------------------------------------------------------------------- /py/ldm_patched/contrib/external_sdupscale.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import torch 4 | import ldm_patched.contrib.external 5 | import ldm_patched.modules.utils 6 | 7 | class SD_4XUpscale_Conditioning: 8 | @classmethod 9 | def INPUT_TYPES(s): 10 | return {"required": { "images": ("IMAGE",), 11 | "positive": ("CONDITIONING",), 12 | "negative": ("CONDITIONING",), 13 | "scale_ratio": ("FLOAT", {"default": 4.0, "min": 0.0, "max": 10.0, "step": 0.01}), 14 | "noise_augmentation": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), 15 | }} 16 | RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") 17 | RETURN_NAMES = ("positive", "negative", "latent") 18 | 19 | FUNCTION = "encode" 20 | 21 | CATEGORY = "conditioning/upscale_diffusion" 22 | 23 | def encode(self, images, positive, negative, scale_ratio, noise_augmentation): 24 | width = max(1, round(images.shape[-2] * scale_ratio)) 25 | height = max(1, round(images.shape[-3] * scale_ratio)) 26 | 27 | pixels = ldm_patched.modules.utils.common_upscale((images.movedim(-1,1) * 2.0) - 1.0, width // 4, height // 4, "bilinear", "center") 28 | 29 | out_cp = [] 30 | out_cn = [] 31 | 32 | for t in positive: 33 | n = [t[0], t[1].copy()] 34 | n[1]['concat_image'] = pixels 35 | n[1]['noise_augmentation'] = noise_augmentation 36 | out_cp.append(n) 37 | 38 | for t in negative: 39 | n = [t[0], t[1].copy()] 40 | n[1]['concat_image'] = pixels 41 | n[1]['noise_augmentation'] = noise_augmentation 42 | out_cn.append(n) 43 | 44 | latent = torch.zeros([images.shape[0], 4, height // 4, width // 4]) 45 | return (out_cp, out_cn, {"samples":latent}) 46 | 47 | NODE_CLASS_MAPPINGS = { 48 | "SD_4XUpscale_Conditioning": SD_4XUpscale_Conditioning, 49 | } 50 | -------------------------------------------------------------------------------- /py/ldm_patched/contrib/external_upscale_model.py: -------------------------------------------------------------------------------- 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 2 | 3 | import os 4 | from ldm_patched.pfn import model_loading 5 | from comfy import model_management 6 | import torch 7 | import ldm_patched.modules.utils 8 | import ldm_patched.utils.path_utils 9 | 10 | class UpscaleModelLoader: 11 | @classmethod 12 | def INPUT_TYPES(s): 13 | return {"required": { "model_name": (ldm_patched.utils.path_utils.get_filename_list("upscale_models"), ), 14 | }} 15 | RETURN_TYPES = ("UPSCALE_MODEL",) 16 | FUNCTION = "load_model" 17 | 18 | CATEGORY = "loaders" 19 | 20 | def load_model(self, model_name): 21 | model_path = ldm_patched.utils.path_utils.get_full_path("upscale_models", model_name) 22 | sd = ldm_patched.modules.utils.load_torch_file(model_path, safe_load=True) 23 | if "module.layers.0.residual_group.blocks.0.norm1.weight" in sd: 24 | sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {"module.":""}) 25 | out = model_loading.load_state_dict(sd).eval() 26 | return (out, ) 27 | 28 | 29 | class ImageUpscaleWithModel: 30 | @classmethod 31 | def INPUT_TYPES(s): 32 | return {"required": { "upscale_model": ("UPSCALE_MODEL",), 33 | "image": ("IMAGE",), 34 | }} 35 | RETURN_TYPES = ("IMAGE",) 36 | FUNCTION = "upscale" 37 | 38 | CATEGORY = "image/upscaling" 39 | 40 | def upscale(self, upscale_model, image): 41 | device = model_management.get_torch_device() 42 | upscale_model.to(device) 43 | in_img = image.movedim(-1,-3).to(device) 44 | free_memory = model_management.get_free_memory(device) 45 | 46 | tile = 512 47 | overlap = 32 48 | 49 | oom = True 50 | while oom: 51 | try: 52 | steps = in_img.shape[0] * ldm_patched.modules.utils.get_tiled_scale_steps(in_img.shape[3], in_img.shape[2], tile_x=tile, tile_y=tile, overlap=overlap) 53 | pbar = ldm_patched.modules.utils.ProgressBar(steps) 54 | s = ldm_patched.modules.utils.tiled_scale(in_img, lambda a: upscale_model(a), tile_x=tile, tile_y=tile, overlap=overlap, upscale_amount=upscale_model.scale, pbar=pbar) 55 | oom = False 56 | except model_management.OOM_EXCEPTION as e: 57 | tile //= 2 58 | if tile < 128: 59 | raise e 60 | 61 | upscale_model.cpu() 62 | s = torch.clamp(s.movedim(-3,-1), min=0, max=1.0) 63 | return (s,) 64 | 65 | NODE_CLASS_MAPPINGS = { 66 | "UpscaleModelLoader": UpscaleModelLoader, 67 | "ImageUpscaleWithModel": ImageUpscaleWithModel 68 | } 69 | -------------------------------------------------------------------------------- /py/ldm_patched/ldm/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/ldm_patched/ldm/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /py/ldm_patched/ldm/modules/diffusionmodules/upscaling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from functools import partial 5 | 6 | from .util import extract_into_tensor, make_beta_schedule 7 | from ldm_patched.ldm.util import default 8 | 9 | 10 | class AbstractLowScaleModel(nn.Module): 11 | # for concatenating a downsampled image to the latent representation 12 | def __init__(self, noise_schedule_config=None): 13 | super(AbstractLowScaleModel, self).__init__() 14 | if noise_schedule_config is not None: 15 | self.register_schedule(**noise_schedule_config) 16 | 17 | def register_schedule(self, beta_schedule="linear", timesteps=1000, 18 | linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): 19 | betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, 20 | cosine_s=cosine_s) 21 | alphas = 1. - betas 22 | alphas_cumprod = np.cumprod(alphas, axis=0) 23 | alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) 24 | 25 | timesteps, = betas.shape 26 | self.num_timesteps = int(timesteps) 27 | self.linear_start = linear_start 28 | self.linear_end = linear_end 29 | assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' 30 | 31 | to_torch = partial(torch.tensor, dtype=torch.float32) 32 | 33 | self.register_buffer('betas', to_torch(betas)) 34 | self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) 35 | self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) 36 | 37 | # calculations for diffusion q(x_t | x_{t-1}) and others 38 | self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) 39 | self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) 40 | self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) 41 | self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) 42 | self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) 43 | 44 | def q_sample(self, x_start, t, noise=None, seed=None): 45 | if noise is None: 46 | if seed is None: 47 | noise = torch.randn_like(x_start) 48 | else: 49 | noise = torch.randn(x_start.size(), dtype=x_start.dtype, layout=x_start.layout, generator=torch.manual_seed(seed)).to(x_start.device) 50 | return (extract_into_tensor(self.sqrt_alphas_cumprod.to(x_start.device), t, x_start.shape) * x_start + 51 | extract_into_tensor(self.sqrt_one_minus_alphas_cumprod.to(x_start.device), t, x_start.shape) * noise) 52 | 53 | def forward(self, x): 54 | return x, None 55 | 56 | def decode(self, x): 57 | return x 58 | 59 | 60 | class SimpleImageConcat(AbstractLowScaleModel): 61 | # no noise level conditioning 62 | def __init__(self): 63 | super(SimpleImageConcat, self).__init__(noise_schedule_config=None) 64 | self.max_noise_level = 0 65 | 66 | def forward(self, x): 67 | # fix to constant noise level 68 | return x, torch.zeros(x.shape[0], device=x.device).long() 69 | 70 | 71 | class ImageConcatWithNoiseAugmentation(AbstractLowScaleModel): 72 | def __init__(self, noise_schedule_config, max_noise_level=1000, to_cuda=False): 73 | super().__init__(noise_schedule_config=noise_schedule_config) 74 | self.max_noise_level = max_noise_level 75 | 76 | def forward(self, x, noise_level=None, seed=None): 77 | if noise_level is None: 78 | noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() 79 | else: 80 | assert isinstance(noise_level, torch.Tensor) 81 | z = self.q_sample(x, noise_level, seed=seed) 82 | return z, noise_level 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /py/ldm_patched/ldm/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/ldm_patched/ldm/modules/distributions/__init__.py -------------------------------------------------------------------------------- /py/ldm_patched/ldm/modules/distributions/distributions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class AbstractDistribution: 6 | def sample(self): 7 | raise NotImplementedError() 8 | 9 | def mode(self): 10 | raise NotImplementedError() 11 | 12 | 13 | class DiracDistribution(AbstractDistribution): 14 | def __init__(self, value): 15 | self.value = value 16 | 17 | def sample(self): 18 | return self.value 19 | 20 | def mode(self): 21 | return self.value 22 | 23 | 24 | class DiagonalGaussianDistribution(object): 25 | def __init__(self, parameters, deterministic=False): 26 | self.parameters = parameters 27 | self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) 28 | self.logvar = torch.clamp(self.logvar, -30.0, 20.0) 29 | self.deterministic = deterministic 30 | self.std = torch.exp(0.5 * self.logvar) 31 | self.var = torch.exp(self.logvar) 32 | if self.deterministic: 33 | self.var = self.std = torch.zeros_like(self.mean).to(device=self.parameters.device) 34 | 35 | def sample(self): 36 | x = self.mean + self.std * torch.randn(self.mean.shape).to(device=self.parameters.device) 37 | return x 38 | 39 | def kl(self, other=None): 40 | if self.deterministic: 41 | return torch.Tensor([0.]) 42 | else: 43 | if other is None: 44 | return 0.5 * torch.sum(torch.pow(self.mean, 2) 45 | + self.var - 1.0 - self.logvar, 46 | dim=[1, 2, 3]) 47 | else: 48 | return 0.5 * torch.sum( 49 | torch.pow(self.mean - other.mean, 2) / other.var 50 | + self.var / other.var - 1.0 - self.logvar + other.logvar, 51 | dim=[1, 2, 3]) 52 | 53 | def nll(self, sample, dims=[1,2,3]): 54 | if self.deterministic: 55 | return torch.Tensor([0.]) 56 | logtwopi = np.log(2.0 * np.pi) 57 | return 0.5 * torch.sum( 58 | logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, 59 | dim=dims) 60 | 61 | def mode(self): 62 | return self.mean 63 | 64 | 65 | def normal_kl(mean1, logvar1, mean2, logvar2): 66 | """ 67 | source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 68 | Compute the KL divergence between two gaussians. 69 | Shapes are automatically broadcasted, so batches can be compared to 70 | scalars, among other use cases. 71 | """ 72 | tensor = None 73 | for obj in (mean1, logvar1, mean2, logvar2): 74 | if isinstance(obj, torch.Tensor): 75 | tensor = obj 76 | break 77 | assert tensor is not None, "at least one argument must be a Tensor" 78 | 79 | # Force variances to be Tensors. Broadcasting helps convert scalars to 80 | # Tensors, but it does not work for torch.exp(). 81 | logvar1, logvar2 = [ 82 | x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) 83 | for x in (logvar1, logvar2) 84 | ] 85 | 86 | return 0.5 * ( 87 | -1.0 88 | + logvar2 89 | - logvar1 90 | + torch.exp(logvar1 - logvar2) 91 | + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) 92 | ) 93 | -------------------------------------------------------------------------------- /py/ldm_patched/ldm/modules/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class LitEma(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_upates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError('Decay must be between 0 and 1') 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates 14 | else torch.tensor(-1, dtype=torch.int)) 15 | 16 | for name, p in model.named_parameters(): 17 | if p.requires_grad: 18 | # remove as '.'-character is not allowed in buffers 19 | s_name = name.replace('.', '') 20 | self.m_name2s_name.update({name: s_name}) 21 | self.register_buffer(s_name, p.clone().detach().data) 22 | 23 | self.collected_params = [] 24 | 25 | def reset_num_updates(self): 26 | del self.num_updates 27 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int)) 28 | 29 | def forward(self, model): 30 | decay = self.decay 31 | 32 | if self.num_updates >= 0: 33 | self.num_updates += 1 34 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) 35 | 36 | one_minus_decay = 1.0 - decay 37 | 38 | with torch.no_grad(): 39 | m_param = dict(model.named_parameters()) 40 | shadow_params = dict(self.named_buffers()) 41 | 42 | for key in m_param: 43 | if m_param[key].requires_grad: 44 | sname = self.m_name2s_name[key] 45 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 46 | shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key])) 47 | else: 48 | assert not key in self.m_name2s_name 49 | 50 | def copy_to(self, model): 51 | m_param = dict(model.named_parameters()) 52 | shadow_params = dict(self.named_buffers()) 53 | for key in m_param: 54 | if m_param[key].requires_grad: 55 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 56 | else: 57 | assert not key in self.m_name2s_name 58 | 59 | def store(self, parameters): 60 | """ 61 | Save the current parameters for restoring later. 62 | Args: 63 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 64 | temporarily stored. 65 | """ 66 | self.collected_params = [param.clone() for param in parameters] 67 | 68 | def restore(self, parameters): 69 | """ 70 | Restore the parameters stored with the `store` method. 71 | Useful to validate the model with EMA parameters without affecting the 72 | original optimization process. Store the parameters before the 73 | `copy_to` method. After validation (or model saving), use this to 74 | restore the former parameters. 75 | Args: 76 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 77 | updated with the stored parameters. 78 | """ 79 | for c_param, param in zip(self.collected_params, parameters): 80 | param.data.copy_(c_param.data) 81 | -------------------------------------------------------------------------------- /py/ldm_patched/ldm/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/ldm_patched/ldm/modules/encoders/__init__.py -------------------------------------------------------------------------------- /py/ldm_patched/ldm/modules/encoders/noise_aug_modules.py: -------------------------------------------------------------------------------- 1 | from ..diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation 2 | from ..diffusionmodules.openaimodel import Timestep 3 | import torch 4 | 5 | class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation): 6 | def __init__(self, *args, clip_stats_path=None, timestep_dim=256, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | if clip_stats_path is None: 9 | clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim) 10 | else: 11 | clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu") 12 | self.register_buffer("data_mean", clip_mean[None, :], persistent=False) 13 | self.register_buffer("data_std", clip_std[None, :], persistent=False) 14 | self.time_embed = Timestep(timestep_dim) 15 | 16 | def scale(self, x): 17 | # re-normalize to centered mean and unit variance 18 | x = (x - self.data_mean.to(x.device)) * 1. / self.data_std.to(x.device) 19 | return x 20 | 21 | def unscale(self, x): 22 | # back to original data stats 23 | x = (x * self.data_std.to(x.device)) + self.data_mean.to(x.device) 24 | return x 25 | 26 | def forward(self, x, noise_level=None, seed=None): 27 | if noise_level is None: 28 | noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() 29 | else: 30 | assert isinstance(noise_level, torch.Tensor) 31 | x = self.scale(x) 32 | z = self.q_sample(x, noise_level, seed=seed) 33 | z = self.unscale(z) 34 | noise_level = self.time_embed(noise_level) 35 | return z, noise_level 36 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/checkpoint_pickle.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | load = pickle.load 4 | 5 | class Empty: 6 | pass 7 | 8 | class Unpickler(pickle.Unpickler): 9 | def find_class(self, module, name): 10 | #TODO: safe unpickle 11 | if module.startswith("pytorch_lightning"): 12 | return Empty 13 | return super().find_class(module, name) 14 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/clip_config_bigg.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "CLIPTextModel" 4 | ], 5 | "attention_dropout": 0.0, 6 | "bos_token_id": 0, 7 | "dropout": 0.0, 8 | "eos_token_id": 2, 9 | "hidden_act": "gelu", 10 | "hidden_size": 1280, 11 | "initializer_factor": 1.0, 12 | "initializer_range": 0.02, 13 | "intermediate_size": 5120, 14 | "layer_norm_eps": 1e-05, 15 | "max_position_embeddings": 77, 16 | "model_type": "clip_text_model", 17 | "num_attention_heads": 20, 18 | "num_hidden_layers": 32, 19 | "pad_token_id": 1, 20 | "projection_dim": 1280, 21 | "torch_dtype": "float32", 22 | "vocab_size": 49408 23 | } 24 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/clip_vision_config_g.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "gelu", 5 | "hidden_size": 1664, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 8192, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 48, 15 | "patch_size": 14, 16 | "projection_dim": 1280, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/clip_vision_config_h.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "gelu", 5 | "hidden_size": 1280, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 5120, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 32, 15 | "patch_size": 14, 16 | "projection_dim": 1024, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/clip_vision_config_vitl.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "quick_gelu", 5 | "hidden_size": 1024, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 4096, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 24, 15 | "patch_size": 14, 16 | "projection_dim": 768, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/conds.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import ldm_patched.modules.utils 4 | 5 | 6 | 7 | class CONDRegular: 8 | def __init__(self, cond): 9 | self.cond = cond 10 | 11 | def _copy_with(self, cond): 12 | return self.__class__(cond) 13 | 14 | def process_cond(self, batch_size, device, **kwargs): 15 | return self._copy_with(ldm_patched.modules.utils.repeat_to_batch_size(self.cond, batch_size).to(device)) 16 | 17 | def can_concat(self, other): 18 | if self.cond.shape != other.cond.shape: 19 | return False 20 | return True 21 | 22 | def concat(self, others): 23 | conds = [self.cond] 24 | for x in others: 25 | conds.append(x.cond) 26 | return torch.cat(conds) 27 | 28 | class CONDNoiseShape(CONDRegular): 29 | def process_cond(self, batch_size, device, area, **kwargs): 30 | data = self.cond[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] 31 | return self._copy_with(ldm_patched.modules.utils.repeat_to_batch_size(data, batch_size).to(device)) 32 | 33 | 34 | class CONDCrossAttn(CONDRegular): 35 | def can_concat(self, other): 36 | s1 = self.cond.shape 37 | s2 = other.cond.shape 38 | if s1 != s2: 39 | if s1[0] != s2[0] or s1[2] != s2[2]: #these 2 cases should not happen 40 | return False 41 | 42 | mult_min = math.lcm(s1[1], s2[1]) 43 | diff = mult_min // min(s1[1], s2[1]) 44 | if diff > 4: #arbitrary limit on the padding because it's probably going to impact performance negatively if it's too much 45 | return False 46 | return True 47 | 48 | def concat(self, others): 49 | conds = [self.cond] 50 | crossattn_max_len = self.cond.shape[1] 51 | for x in others: 52 | c = x.cond 53 | crossattn_max_len = math.lcm(crossattn_max_len, c.shape[1]) 54 | conds.append(c) 55 | 56 | out = [] 57 | for c in conds: 58 | if c.shape[1] < crossattn_max_len: 59 | c = c.repeat(1, crossattn_max_len // c.shape[1], 1) #padding with repeat doesn't change result 60 | out.append(c) 61 | return torch.cat(out) 62 | 63 | class CONDConstant(CONDRegular): 64 | def __init__(self, cond): 65 | self.cond = cond 66 | 67 | def process_cond(self, batch_size, device, **kwargs): 68 | return self._copy_with(self.cond) 69 | 70 | def can_concat(self, other): 71 | if self.cond != other.cond: 72 | return False 73 | return True 74 | 75 | def concat(self, others): 76 | return self.cond 77 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/diffusers_load.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import ldm_patched.modules.sd 4 | 5 | def first_file(path, filenames): 6 | for f in filenames: 7 | p = os.path.join(path, f) 8 | if os.path.exists(p): 9 | return p 10 | return None 11 | 12 | def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_directory=None): 13 | diffusion_model_names = ["diffusion_pytorch_model.fp16.safetensors", "diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.fp16.bin", "diffusion_pytorch_model.bin"] 14 | unet_path = first_file(os.path.join(model_path, "unet"), diffusion_model_names) 15 | vae_path = first_file(os.path.join(model_path, "vae"), diffusion_model_names) 16 | 17 | text_encoder_model_names = ["model.fp16.safetensors", "model.safetensors", "pytorch_model.fp16.bin", "pytorch_model.bin"] 18 | text_encoder1_path = first_file(os.path.join(model_path, "text_encoder"), text_encoder_model_names) 19 | text_encoder2_path = first_file(os.path.join(model_path, "text_encoder_2"), text_encoder_model_names) 20 | 21 | text_encoder_paths = [text_encoder1_path] 22 | if text_encoder2_path is not None: 23 | text_encoder_paths.append(text_encoder2_path) 24 | 25 | unet = ldm_patched.modules.sd.load_unet(unet_path) 26 | 27 | clip = None 28 | if output_clip: 29 | clip = ldm_patched.modules.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory) 30 | 31 | vae = None 32 | if output_vae: 33 | sd = ldm_patched.modules.utils.load_torch_file(vae_path) 34 | vae = ldm_patched.modules.sd.VAE(sd=sd) 35 | 36 | return (unet, clip, vae) 37 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/latent_formats.py: -------------------------------------------------------------------------------- 1 | 2 | class LatentFormat: 3 | scale_factor = 1.0 4 | latent_channels = 4 5 | latent_dimensions = 2 6 | latent_rgb_factors = None 7 | latent_rgb_factors_bias = None 8 | taesd_decoder_name = None 9 | 10 | def process_in(self, latent): 11 | return latent * self.scale_factor 12 | 13 | def process_out(self, latent): 14 | return latent / self.scale_factor 15 | 16 | class SD15(LatentFormat): 17 | def __init__(self, scale_factor=0.18215): 18 | self.scale_factor = scale_factor 19 | self.latent_rgb_factors = [ 20 | # R G B 21 | [ 0.3512, 0.2297, 0.3227], 22 | [ 0.3250, 0.4974, 0.2350], 23 | [-0.2829, 0.1762, 0.2721], 24 | [-0.2120, -0.2616, -0.7177] 25 | ] 26 | self.taesd_decoder_name = "taesd_decoder" 27 | 28 | class SDXL(LatentFormat): 29 | scale_factor = 0.13025 30 | 31 | def __init__(self): 32 | self.latent_rgb_factors = [ 33 | # R G B 34 | [ 0.3651, 0.4232, 0.4341], 35 | [-0.2533, -0.0042, 0.1068], 36 | [ 0.1076, 0.1111, -0.0362], 37 | [-0.3165, -0.2492, -0.2188] 38 | ] 39 | self.latent_rgb_factors_bias = [ 0.1084, -0.0175, -0.0011] 40 | 41 | self.taesd_decoder_name = "taesdxl_decoder" 42 | 43 | class SD_X4(LatentFormat): 44 | def __init__(self): 45 | self.scale_factor = 0.08333 46 | self.latent_rgb_factors = [ 47 | [-0.2340, -0.3863, -0.3257], 48 | [ 0.0994, 0.0885, -0.0908], 49 | [-0.2833, -0.2349, -0.3741], 50 | [ 0.2523, -0.0055, -0.1651] 51 | ] 52 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import comfy.model_management 3 | 4 | def cast_bias_weight(s, input): 5 | bias = None 6 | non_blocking = comfy.model_management.device_supports_non_blocking(input.device) 7 | if s.bias is not None: 8 | bias = s.bias.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking) 9 | weight = s.weight.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking) 10 | return weight, bias 11 | 12 | 13 | class disable_weight_init: 14 | class Linear(torch.nn.Linear): 15 | ldm_patched_cast_weights = False 16 | def reset_parameters(self): 17 | return None 18 | 19 | def forward_ldm_patched_cast_weights(self, input): 20 | weight, bias = cast_bias_weight(self, input) 21 | return torch.nn.functional.linear(input, weight, bias) 22 | 23 | def forward(self, *args, **kwargs): 24 | if self.ldm_patched_cast_weights: 25 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 26 | else: 27 | return super().forward(*args, **kwargs) 28 | 29 | class Conv2d(torch.nn.Conv2d): 30 | ldm_patched_cast_weights = False 31 | def reset_parameters(self): 32 | return None 33 | 34 | def forward_ldm_patched_cast_weights(self, input): 35 | weight, bias = cast_bias_weight(self, input) 36 | return self._conv_forward(input, weight, bias) 37 | 38 | def forward(self, *args, **kwargs): 39 | if self.ldm_patched_cast_weights: 40 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 41 | else: 42 | return super().forward(*args, **kwargs) 43 | 44 | class Conv3d(torch.nn.Conv3d): 45 | ldm_patched_cast_weights = False 46 | def reset_parameters(self): 47 | return None 48 | 49 | def forward_ldm_patched_cast_weights(self, input): 50 | weight, bias = cast_bias_weight(self, input) 51 | return self._conv_forward(input, weight, bias) 52 | 53 | def forward(self, *args, **kwargs): 54 | if self.ldm_patched_cast_weights: 55 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 56 | else: 57 | return super().forward(*args, **kwargs) 58 | 59 | class GroupNorm(torch.nn.GroupNorm): 60 | ldm_patched_cast_weights = False 61 | def reset_parameters(self): 62 | return None 63 | 64 | def forward_ldm_patched_cast_weights(self, input): 65 | weight, bias = cast_bias_weight(self, input) 66 | return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps) 67 | 68 | def forward(self, *args, **kwargs): 69 | if self.ldm_patched_cast_weights: 70 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 71 | else: 72 | return super().forward(*args, **kwargs) 73 | 74 | 75 | class LayerNorm(torch.nn.LayerNorm): 76 | ldm_patched_cast_weights = False 77 | def reset_parameters(self): 78 | return None 79 | 80 | def forward_ldm_patched_cast_weights(self, input): 81 | weight, bias = cast_bias_weight(self, input) 82 | return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps) 83 | 84 | def forward(self, *args, **kwargs): 85 | if self.ldm_patched_cast_weights: 86 | return self.forward_ldm_patched_cast_weights(*args, **kwargs) 87 | else: 88 | return super().forward(*args, **kwargs) 89 | 90 | @classmethod 91 | def conv_nd(s, dims, *args, **kwargs): 92 | if dims == 2: 93 | return s.Conv2d(*args, **kwargs) 94 | elif dims == 3: 95 | return s.Conv3d(*args, **kwargs) 96 | else: 97 | raise ValueError(f"unsupported dimensions: {dims}") 98 | 99 | 100 | class manual_cast(disable_weight_init): 101 | class Linear(disable_weight_init.Linear): 102 | ldm_patched_cast_weights = True 103 | 104 | class Conv2d(disable_weight_init.Conv2d): 105 | ldm_patched_cast_weights = True 106 | 107 | class Conv3d(disable_weight_init.Conv3d): 108 | ldm_patched_cast_weights = True 109 | 110 | class GroupNorm(disable_weight_init.GroupNorm): 111 | ldm_patched_cast_weights = True 112 | 113 | class LayerNorm(disable_weight_init.LayerNorm): 114 | ldm_patched_cast_weights = True 115 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/options.py: -------------------------------------------------------------------------------- 1 | 2 | args_parsing = False 3 | 4 | def enable_args_parsing(enable=True): 5 | global args_parsing 6 | args_parsing = enable 7 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/sd1_clip_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "openai/clip-vit-large-patch14", 3 | "architectures": [ 4 | "CLIPTextModel" 5 | ], 6 | "attention_dropout": 0.0, 7 | "bos_token_id": 0, 8 | "dropout": 0.0, 9 | "eos_token_id": 2, 10 | "hidden_act": "quick_gelu", 11 | "hidden_size": 768, 12 | "initializer_factor": 1.0, 13 | "initializer_range": 0.02, 14 | "intermediate_size": 3072, 15 | "layer_norm_eps": 1e-05, 16 | "max_position_embeddings": 77, 17 | "model_type": "clip_text_model", 18 | "num_attention_heads": 12, 19 | "num_hidden_layers": 12, 20 | "pad_token_id": 1, 21 | "projection_dim": 768, 22 | "torch_dtype": "float32", 23 | "transformers_version": "4.24.0", 24 | "vocab_size": 49408 25 | } 26 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/sd1_tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": { 3 | "content": "<|startoftext|>", 4 | "lstrip": false, 5 | "normalized": true, 6 | "rstrip": false, 7 | "single_word": false 8 | }, 9 | "eos_token": { 10 | "content": "<|endoftext|>", 11 | "lstrip": false, 12 | "normalized": true, 13 | "rstrip": false, 14 | "single_word": false 15 | }, 16 | "pad_token": "<|endoftext|>", 17 | "unk_token": { 18 | "content": "<|endoftext|>", 19 | "lstrip": false, 20 | "normalized": true, 21 | "rstrip": false, 22 | "single_word": false 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/sd1_tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_prefix_space": false, 3 | "bos_token": { 4 | "__type": "AddedToken", 5 | "content": "<|startoftext|>", 6 | "lstrip": false, 7 | "normalized": true, 8 | "rstrip": false, 9 | "single_word": false 10 | }, 11 | "do_lower_case": true, 12 | "eos_token": { 13 | "__type": "AddedToken", 14 | "content": "<|endoftext|>", 15 | "lstrip": false, 16 | "normalized": true, 17 | "rstrip": false, 18 | "single_word": false 19 | }, 20 | "errors": "replace", 21 | "model_max_length": 77, 22 | "name_or_path": "openai/clip-vit-large-patch14", 23 | "pad_token": "<|endoftext|>", 24 | "special_tokens_map_file": "./special_tokens_map.json", 25 | "tokenizer_class": "CLIPTokenizer", 26 | "unk_token": { 27 | "__type": "AddedToken", 28 | "content": "<|endoftext|>", 29 | "lstrip": false, 30 | "normalized": true, 31 | "rstrip": false, 32 | "single_word": false 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/sd2_clip.py: -------------------------------------------------------------------------------- 1 | from ldm_patched.modules import sd1_clip 2 | import torch 3 | import os 4 | 5 | class SD2ClipHModel(sd1_clip.SDClipModel): 6 | def __init__(self, arch="ViT-H-14", device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None): 7 | if layer == "penultimate": 8 | layer="hidden" 9 | layer_idx=-2 10 | 11 | textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd2_clip_config.json") 12 | super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"start": 49406, "end": 49407, "pad": 0}) 13 | 14 | class SD2ClipHTokenizer(sd1_clip.SDTokenizer): 15 | def __init__(self, tokenizer_path=None, embedding_directory=None): 16 | super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1024) 17 | 18 | class SD2Tokenizer(sd1_clip.SD1Tokenizer): 19 | def __init__(self, embedding_directory=None): 20 | super().__init__(embedding_directory=embedding_directory, clip_name="h", tokenizer=SD2ClipHTokenizer) 21 | 22 | class SD2ClipModel(sd1_clip.SD1ClipModel): 23 | def __init__(self, device="cpu", dtype=None, **kwargs): 24 | super().__init__(device=device, dtype=dtype, clip_name="h", clip_model=SD2ClipHModel, **kwargs) 25 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/sd2_clip_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "CLIPTextModel" 4 | ], 5 | "attention_dropout": 0.0, 6 | "bos_token_id": 0, 7 | "dropout": 0.0, 8 | "eos_token_id": 2, 9 | "hidden_act": "gelu", 10 | "hidden_size": 1024, 11 | "initializer_factor": 1.0, 12 | "initializer_range": 0.02, 13 | "intermediate_size": 4096, 14 | "layer_norm_eps": 1e-05, 15 | "max_position_embeddings": 77, 16 | "model_type": "clip_text_model", 17 | "num_attention_heads": 16, 18 | "num_hidden_layers": 24, 19 | "pad_token_id": 1, 20 | "projection_dim": 1024, 21 | "torch_dtype": "float32", 22 | "vocab_size": 49408 23 | } 24 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/sdxl_clip.py: -------------------------------------------------------------------------------- 1 | from ldm_patched.modules import sd1_clip 2 | import torch 3 | import os 4 | 5 | class SDXLClipG(sd1_clip.SDClipModel): 6 | def __init__(self, device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None): 7 | if layer == "penultimate": 8 | layer="hidden" 9 | layer_idx=-2 10 | 11 | textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_config_bigg.json") 12 | super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, 13 | special_tokens={"start": 49406, "end": 49407, "pad": 0}, layer_norm_hidden_state=False) 14 | 15 | def load_sd(self, sd): 16 | return super().load_sd(sd) 17 | 18 | class SDXLClipGTokenizer(sd1_clip.SDTokenizer): 19 | def __init__(self, tokenizer_path=None, embedding_directory=None): 20 | super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1280, embedding_key='clip_g') 21 | 22 | 23 | class SDXLTokenizer: 24 | def __init__(self, embedding_directory=None): 25 | self.clip_l = sd1_clip.SDTokenizer(embedding_directory=embedding_directory) 26 | self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory) 27 | 28 | def tokenize_with_weights(self, text:str, return_word_ids=False): 29 | out = {} 30 | out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) 31 | out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) 32 | return out 33 | 34 | def untokenize(self, token_weight_pair): 35 | return self.clip_g.untokenize(token_weight_pair) 36 | 37 | class SDXLClipModel(torch.nn.Module): 38 | def __init__(self, device="cpu", dtype=None): 39 | super().__init__() 40 | self.clip_l = sd1_clip.SDClipModel(layer="hidden", layer_idx=-2, device=device, dtype=dtype, layer_norm_hidden_state=False) 41 | self.clip_g = SDXLClipG(device=device, dtype=dtype) 42 | 43 | def clip_layer(self, layer_idx): 44 | self.clip_l.clip_layer(layer_idx) 45 | self.clip_g.clip_layer(layer_idx) 46 | 47 | def reset_clip_layer(self): 48 | self.clip_g.reset_clip_layer() 49 | self.clip_l.reset_clip_layer() 50 | 51 | def encode_token_weights(self, token_weight_pairs): 52 | token_weight_pairs_g = token_weight_pairs["g"] 53 | token_weight_pairs_l = token_weight_pairs["l"] 54 | g_out, g_pooled = self.clip_g.encode_token_weights(token_weight_pairs_g) 55 | l_out, l_pooled = self.clip_l.encode_token_weights(token_weight_pairs_l) 56 | return torch.cat([l_out, g_out], dim=-1), g_pooled 57 | 58 | def load_sd(self, sd): 59 | if "text_model.encoder.layers.30.mlp.fc1.weight" in sd: 60 | return self.clip_g.load_sd(sd) 61 | else: 62 | return self.clip_l.load_sd(sd) 63 | 64 | class SDXLRefinerClipModel(sd1_clip.SD1ClipModel): 65 | def __init__(self, device="cpu", dtype=None): 66 | super().__init__(device=device, dtype=dtype, clip_name="g", clip_model=SDXLClipG) 67 | -------------------------------------------------------------------------------- /py/ldm_patched/modules/supported_models_base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from . import model_base 3 | from . import utils 4 | from . import latent_formats 5 | 6 | class ClipTarget: 7 | def __init__(self, tokenizer, clip): 8 | self.clip = clip 9 | self.tokenizer = tokenizer 10 | self.params = {} 11 | 12 | class BASE: 13 | unet_config = {} 14 | unet_extra_config = { 15 | "num_heads": -1, 16 | "num_head_channels": 64, 17 | } 18 | 19 | clip_prefix = [] 20 | clip_vision_prefix = None 21 | noise_aug_config = None 22 | sampling_settings = {} 23 | latent_format = latent_formats.LatentFormat 24 | 25 | manual_cast_dtype = None 26 | 27 | @classmethod 28 | def matches(s, unet_config): 29 | for k in s.unet_config: 30 | if s.unet_config[k] != unet_config[k]: 31 | return False 32 | return True 33 | 34 | def model_type(self, state_dict, prefix=""): 35 | return model_base.ModelType.EPS 36 | 37 | def inpaint_model(self): 38 | return self.unet_config["in_channels"] > 4 39 | 40 | def __init__(self, unet_config): 41 | self.unet_config = unet_config 42 | self.latent_format = self.latent_format() 43 | for x in self.unet_extra_config: 44 | self.unet_config[x] = self.unet_extra_config[x] 45 | 46 | def get_model(self, state_dict, prefix="", device=None): 47 | if self.noise_aug_config is not None: 48 | out = model_base.SD21UNCLIP(self, self.noise_aug_config, model_type=self.model_type(state_dict, prefix), device=device) 49 | else: 50 | out = model_base.BaseModel(self, model_type=self.model_type(state_dict, prefix), device=device) 51 | if self.inpaint_model(): 52 | out.set_inpaint() 53 | return out 54 | 55 | def process_clip_state_dict(self, state_dict): 56 | return state_dict 57 | 58 | def process_unet_state_dict(self, state_dict): 59 | return state_dict 60 | 61 | def process_vae_state_dict(self, state_dict): 62 | return state_dict 63 | 64 | def process_clip_state_dict_for_saving(self, state_dict): 65 | replace_prefix = {"": "cond_stage_model."} 66 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 67 | 68 | def process_clip_vision_state_dict_for_saving(self, state_dict): 69 | replace_prefix = {} 70 | if self.clip_vision_prefix is not None: 71 | replace_prefix[""] = self.clip_vision_prefix 72 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 73 | 74 | def process_unet_state_dict_for_saving(self, state_dict): 75 | replace_prefix = {"": "model.diffusion_model."} 76 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 77 | 78 | def process_vae_state_dict_for_saving(self, state_dict): 79 | replace_prefix = {"": "first_stage_model."} 80 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 81 | 82 | def set_manual_cast(self, manual_cast_dtype): 83 | self.manual_cast_dtype = manual_cast_dtype 84 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/ldm_patched/pfn/__init__.py -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/LICENSE-HAT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Xiangyu Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/LICENSE-RealESRGAN: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, Xintao Wang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/OmniSR/ChannelAttention.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch.nn as nn 4 | 5 | 6 | class CA_layer(nn.Module): 7 | def __init__(self, channel, reduction=16): 8 | super(CA_layer, self).__init__() 9 | # global average pooling 10 | self.gap = nn.AdaptiveAvgPool2d(1) 11 | self.fc = nn.Sequential( 12 | nn.Conv2d(channel, channel // reduction, kernel_size=(1, 1), bias=False), 13 | nn.GELU(), 14 | nn.Conv2d(channel // reduction, channel, kernel_size=(1, 1), bias=False), 15 | # nn.Sigmoid() 16 | ) 17 | 18 | def forward(self, x): 19 | y = self.fc(self.gap(x)) 20 | return x * y.expand_as(x) 21 | 22 | 23 | class Simple_CA_layer(nn.Module): 24 | def __init__(self, channel): 25 | super(Simple_CA_layer, self).__init__() 26 | self.gap = nn.AdaptiveAvgPool2d(1) 27 | self.fc = nn.Conv2d( 28 | in_channels=channel, 29 | out_channels=channel, 30 | kernel_size=1, 31 | padding=0, 32 | stride=1, 33 | groups=1, 34 | bias=True, 35 | ) 36 | 37 | def forward(self, x): 38 | return x * self.fc(self.gap(x)) 39 | 40 | 41 | class ECA_layer(nn.Module): 42 | """Constructs a ECA module. 43 | Args: 44 | channel: Number of channels of the input feature map 45 | k_size: Adaptive selection of kernel size 46 | """ 47 | 48 | def __init__(self, channel): 49 | super(ECA_layer, self).__init__() 50 | 51 | b = 1 52 | gamma = 2 53 | k_size = int(abs(math.log(channel, 2) + b) / gamma) 54 | k_size = k_size if k_size % 2 else k_size + 1 55 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 56 | self.conv = nn.Conv1d( 57 | 1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False 58 | ) 59 | # self.sigmoid = nn.Sigmoid() 60 | 61 | def forward(self, x): 62 | # x: input features with shape [b, c, h, w] 63 | # b, c, h, w = x.size() 64 | 65 | # feature descriptor on the global spatial information 66 | y = self.avg_pool(x) 67 | 68 | # Two different branches of ECA module 69 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 70 | 71 | # Multi-scale information fusion 72 | # y = self.sigmoid(y) 73 | 74 | return x * y.expand_as(x) 75 | 76 | 77 | class ECA_MaxPool_layer(nn.Module): 78 | """Constructs a ECA module. 79 | Args: 80 | channel: Number of channels of the input feature map 81 | k_size: Adaptive selection of kernel size 82 | """ 83 | 84 | def __init__(self, channel): 85 | super(ECA_MaxPool_layer, self).__init__() 86 | 87 | b = 1 88 | gamma = 2 89 | k_size = int(abs(math.log(channel, 2) + b) / gamma) 90 | k_size = k_size if k_size % 2 else k_size + 1 91 | self.max_pool = nn.AdaptiveMaxPool2d(1) 92 | self.conv = nn.Conv1d( 93 | 1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False 94 | ) 95 | # self.sigmoid = nn.Sigmoid() 96 | 97 | def forward(self, x): 98 | # x: input features with shape [b, c, h, w] 99 | # b, c, h, w = x.size() 100 | 101 | # feature descriptor on the global spatial information 102 | y = self.max_pool(x) 103 | 104 | # Two different branches of ECA module 105 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 106 | 107 | # Multi-scale information fusion 108 | # y = self.sigmoid(y) 109 | 110 | return x * y.expand_as(x) 111 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/OmniSR/OSAG.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: OSAG.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Sunday, 23rd April 2023 3:08:49 pm 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | 14 | import torch.nn as nn 15 | 16 | from .esa import ESA 17 | from .OSA import OSA_Block 18 | 19 | 20 | class OSAG(nn.Module): 21 | def __init__( 22 | self, 23 | channel_num=64, 24 | bias=True, 25 | block_num=4, 26 | ffn_bias=False, 27 | window_size=0, 28 | pe=False, 29 | ): 30 | super(OSAG, self).__init__() 31 | 32 | # print("window_size: %d" % (window_size)) 33 | # print("with_pe", pe) 34 | # print("ffn_bias: %d" % (ffn_bias)) 35 | 36 | # block_script_name = kwargs.get("block_script_name", "OSA") 37 | # block_class_name = kwargs.get("block_class_name", "OSA_Block") 38 | 39 | # script_name = "." + block_script_name 40 | # package = __import__(script_name, fromlist=True) 41 | block_class = OSA_Block # getattr(package, block_class_name) 42 | group_list = [] 43 | for _ in range(block_num): 44 | temp_res = block_class( 45 | channel_num, 46 | bias, 47 | ffn_bias=ffn_bias, 48 | window_size=window_size, 49 | with_pe=pe, 50 | ) 51 | group_list.append(temp_res) 52 | group_list.append(nn.Conv2d(channel_num, channel_num, 1, 1, 0, bias=bias)) 53 | self.residual_layer = nn.Sequential(*group_list) 54 | esa_channel = max(channel_num // 4, 16) 55 | self.esa = ESA(esa_channel, channel_num) 56 | 57 | def forward(self, x): 58 | out = self.residual_layer(x) 59 | out = out + x 60 | return self.esa(out) 61 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/OmniSR/OmniSR.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: OmniSR.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Sunday, 23rd April 2023 3:06:36 pm 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import math 14 | 15 | import torch 16 | import torch.nn as nn 17 | import torch.nn.functional as F 18 | 19 | from .OSAG import OSAG 20 | from .pixelshuffle import pixelshuffle_block 21 | 22 | 23 | class OmniSR(nn.Module): 24 | def __init__( 25 | self, 26 | state_dict, 27 | **kwargs, 28 | ): 29 | super(OmniSR, self).__init__() 30 | self.state = state_dict 31 | 32 | bias = True # Fine to assume this for now 33 | block_num = 1 # Fine to assume this for now 34 | ffn_bias = True 35 | pe = True 36 | 37 | num_feat = state_dict["input.weight"].shape[0] or 64 38 | num_in_ch = state_dict["input.weight"].shape[1] or 3 39 | num_out_ch = num_in_ch # we can just assume this for now. pixelshuffle smh 40 | 41 | pixelshuffle_shape = state_dict["up.0.weight"].shape[0] 42 | up_scale = math.sqrt(pixelshuffle_shape / num_out_ch) 43 | if up_scale - int(up_scale) > 0: 44 | print( 45 | "out_nc is probably different than in_nc, scale calculation might be wrong" 46 | ) 47 | up_scale = int(up_scale) 48 | res_num = 0 49 | for key in state_dict.keys(): 50 | if "residual_layer" in key: 51 | temp_res_num = int(key.split(".")[1]) 52 | if temp_res_num > res_num: 53 | res_num = temp_res_num 54 | res_num = res_num + 1 # zero-indexed 55 | 56 | residual_layer = [] 57 | self.res_num = res_num 58 | 59 | if ( 60 | "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight" 61 | in state_dict.keys() 62 | ): 63 | rel_pos_bias_weight = state_dict[ 64 | "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight" 65 | ].shape[0] 66 | self.window_size = int((math.sqrt(rel_pos_bias_weight) + 1) / 2) 67 | else: 68 | self.window_size = 8 69 | 70 | self.up_scale = up_scale 71 | 72 | for _ in range(res_num): 73 | temp_res = OSAG( 74 | channel_num=num_feat, 75 | bias=bias, 76 | block_num=block_num, 77 | ffn_bias=ffn_bias, 78 | window_size=self.window_size, 79 | pe=pe, 80 | ) 81 | residual_layer.append(temp_res) 82 | self.residual_layer = nn.Sequential(*residual_layer) 83 | self.input = nn.Conv2d( 84 | in_channels=num_in_ch, 85 | out_channels=num_feat, 86 | kernel_size=3, 87 | stride=1, 88 | padding=1, 89 | bias=bias, 90 | ) 91 | self.output = nn.Conv2d( 92 | in_channels=num_feat, 93 | out_channels=num_feat, 94 | kernel_size=3, 95 | stride=1, 96 | padding=1, 97 | bias=bias, 98 | ) 99 | self.up = pixelshuffle_block(num_feat, num_out_ch, up_scale, bias=bias) 100 | 101 | # self.tail = pixelshuffle_block(num_feat,num_out_ch,up_scale,bias=bias) 102 | 103 | # for m in self.modules(): 104 | # if isinstance(m, nn.Conv2d): 105 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 106 | # m.weight.data.normal_(0, sqrt(2. / n)) 107 | 108 | # chaiNNer specific stuff 109 | self.model_arch = "OmniSR" 110 | self.sub_type = "SR" 111 | self.in_nc = num_in_ch 112 | self.out_nc = num_out_ch 113 | self.num_feat = num_feat 114 | self.scale = up_scale 115 | 116 | self.supports_fp16 = True # TODO: Test this 117 | self.supports_bfp16 = True 118 | self.min_size_restriction = 16 119 | 120 | self.load_state_dict(state_dict, strict=False) 121 | 122 | def check_image_size(self, x): 123 | _, _, h, w = x.size() 124 | # import pdb; pdb.set_trace() 125 | mod_pad_h = (self.window_size - h % self.window_size) % self.window_size 126 | mod_pad_w = (self.window_size - w % self.window_size) % self.window_size 127 | # x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect') 128 | x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "constant", 0) 129 | return x 130 | 131 | def forward(self, x): 132 | H, W = x.shape[2:] 133 | x = self.check_image_size(x) 134 | 135 | residual = self.input(x) 136 | out = self.residual_layer(residual) 137 | 138 | # origin 139 | out = torch.add(self.output(out), residual) 140 | out = self.up(out) 141 | 142 | out = out[:, :, : H * self.up_scale, : W * self.up_scale] 143 | return out 144 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/OmniSR/layernorm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: layernorm.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Thursday, 20th April 2023 9:28:20 am 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | 17 | class LayerNormFunction(torch.autograd.Function): 18 | @staticmethod 19 | def forward(ctx, x, weight, bias, eps): 20 | ctx.eps = eps 21 | N, C, H, W = x.size() 22 | mu = x.mean(1, keepdim=True) 23 | var = (x - mu).pow(2).mean(1, keepdim=True) 24 | y = (x - mu) / (var + eps).sqrt() 25 | ctx.save_for_backward(y, var, weight) 26 | y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1) 27 | return y 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | eps = ctx.eps 32 | 33 | N, C, H, W = grad_output.size() 34 | y, var, weight = ctx.saved_variables 35 | g = grad_output * weight.view(1, C, 1, 1) 36 | mean_g = g.mean(dim=1, keepdim=True) 37 | 38 | mean_gy = (g * y).mean(dim=1, keepdim=True) 39 | gx = 1.0 / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g) 40 | return ( 41 | gx, 42 | (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), 43 | grad_output.sum(dim=3).sum(dim=2).sum(dim=0), 44 | None, 45 | ) 46 | 47 | 48 | class LayerNorm2d(nn.Module): 49 | def __init__(self, channels, eps=1e-6): 50 | super(LayerNorm2d, self).__init__() 51 | self.register_parameter("weight", nn.Parameter(torch.ones(channels))) 52 | self.register_parameter("bias", nn.Parameter(torch.zeros(channels))) 53 | self.eps = eps 54 | 55 | def forward(self, x): 56 | return LayerNormFunction.apply(x, self.weight, self.bias, self.eps) 57 | 58 | 59 | class GRN(nn.Module): 60 | """GRN (Global Response Normalization) layer""" 61 | 62 | def __init__(self, dim): 63 | super().__init__() 64 | self.gamma = nn.Parameter(torch.zeros(1, dim, 1, 1)) 65 | self.beta = nn.Parameter(torch.zeros(1, dim, 1, 1)) 66 | 67 | def forward(self, x): 68 | Gx = torch.norm(x, p=2, dim=(2, 3), keepdim=True) 69 | Nx = Gx / (Gx.mean(dim=1, keepdim=True) + 1e-6) 70 | return self.gamma * (x * Nx) + self.beta + x 71 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/OmniSR/pixelshuffle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: pixelshuffle.py 5 | # Created Date: Friday July 1st 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Friday, 1st July 2022 10:18:39 am 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2022 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import torch.nn as nn 14 | 15 | 16 | def pixelshuffle_block( 17 | in_channels, out_channels, upscale_factor=2, kernel_size=3, bias=False 18 | ): 19 | """ 20 | Upsample features according to `upscale_factor`. 21 | """ 22 | padding = kernel_size // 2 23 | conv = nn.Conv2d( 24 | in_channels, 25 | out_channels * (upscale_factor**2), 26 | kernel_size, 27 | padding=1, 28 | bias=bias, 29 | ) 30 | pixel_shuffle = nn.PixelShuffle(upscale_factor) 31 | return nn.Sequential(*[conv, pixel_shuffle]) 32 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/SRVGG.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import math 5 | 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class SRVGGNetCompact(nn.Module): 11 | """A compact VGG-style network structure for super-resolution. 12 | It is a compact network structure, which performs upsampling in the last layer and no convolution is 13 | conducted on the HR feature space. 14 | Args: 15 | num_in_ch (int): Channel number of inputs. Default: 3. 16 | num_out_ch (int): Channel number of outputs. Default: 3. 17 | num_feat (int): Channel number of intermediate features. Default: 64. 18 | num_conv (int): Number of convolution layers in the body network. Default: 16. 19 | upscale (int): Upsampling factor. Default: 4. 20 | act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu. 21 | """ 22 | 23 | def __init__( 24 | self, 25 | state_dict, 26 | act_type: str = "prelu", 27 | ): 28 | super(SRVGGNetCompact, self).__init__() 29 | self.model_arch = "SRVGG (RealESRGAN)" 30 | self.sub_type = "SR" 31 | 32 | self.act_type = act_type 33 | 34 | self.state = state_dict 35 | 36 | if "params" in self.state: 37 | self.state = self.state["params"] 38 | 39 | self.key_arr = list(self.state.keys()) 40 | 41 | self.in_nc = self.get_in_nc() 42 | self.num_feat = self.get_num_feats() 43 | self.num_conv = self.get_num_conv() 44 | self.out_nc = self.in_nc # :( 45 | self.pixelshuffle_shape = None # Defined in get_scale() 46 | self.scale = self.get_scale() 47 | 48 | self.supports_fp16 = True 49 | self.supports_bfp16 = True 50 | self.min_size_restriction = None 51 | 52 | self.body = nn.ModuleList() 53 | # the first conv 54 | self.body.append(nn.Conv2d(self.in_nc, self.num_feat, 3, 1, 1)) 55 | # the first activation 56 | if act_type == "relu": 57 | activation = nn.ReLU(inplace=True) 58 | elif act_type == "prelu": 59 | activation = nn.PReLU(num_parameters=self.num_feat) 60 | elif act_type == "leakyrelu": 61 | activation = nn.LeakyReLU(negative_slope=0.1, inplace=True) 62 | self.body.append(activation) # type: ignore 63 | 64 | # the body structure 65 | for _ in range(self.num_conv): 66 | self.body.append(nn.Conv2d(self.num_feat, self.num_feat, 3, 1, 1)) 67 | # activation 68 | if act_type == "relu": 69 | activation = nn.ReLU(inplace=True) 70 | elif act_type == "prelu": 71 | activation = nn.PReLU(num_parameters=self.num_feat) 72 | elif act_type == "leakyrelu": 73 | activation = nn.LeakyReLU(negative_slope=0.1, inplace=True) 74 | self.body.append(activation) # type: ignore 75 | 76 | # the last conv 77 | self.body.append(nn.Conv2d(self.num_feat, self.pixelshuffle_shape, 3, 1, 1)) # type: ignore 78 | # upsample 79 | self.upsampler = nn.PixelShuffle(self.scale) 80 | 81 | self.load_state_dict(self.state, strict=False) 82 | 83 | def get_num_conv(self) -> int: 84 | return (int(self.key_arr[-1].split(".")[1]) - 2) // 2 85 | 86 | def get_num_feats(self) -> int: 87 | return self.state[self.key_arr[0]].shape[0] 88 | 89 | def get_in_nc(self) -> int: 90 | return self.state[self.key_arr[0]].shape[1] 91 | 92 | def get_scale(self) -> int: 93 | self.pixelshuffle_shape = self.state[self.key_arr[-1]].shape[0] 94 | # Assume out_nc is the same as in_nc 95 | # I cant think of a better way to do that 96 | self.out_nc = self.in_nc 97 | scale = math.sqrt(self.pixelshuffle_shape / self.out_nc) 98 | if scale - int(scale) > 0: 99 | print( 100 | "out_nc is probably different than in_nc, scale calculation might be wrong" 101 | ) 102 | scale = int(scale) 103 | return scale 104 | 105 | def forward(self, x): 106 | out = x 107 | for i in range(0, len(self.body)): 108 | out = self.body[i](out) 109 | 110 | out = self.upsampler(out) 111 | # add the nearest upsampled image, so that the network learns the residual 112 | base = F.interpolate(x, scale_factor=self.scale, mode="nearest") 113 | out += base 114 | return out 115 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/ldm_patched/pfn/architecture/__init__.py -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/face/LICENSE-codeformer: -------------------------------------------------------------------------------- 1 | S-Lab License 1.0 2 | 3 | Copyright 2022 S-Lab 4 | 5 | Redistribution and use for non-commercial purpose in source and 6 | binary forms, with or without modification, are permitted provided 7 | that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in 14 | the documentation and/or other materials provided with the 15 | distribution. 16 | 17 | 3. Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | In the event that redistribution and/or use for commercial purpose in 34 | source or binary forms, with or without modification is required, 35 | please contact the contributor(s) of the work. 36 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/face/fused_act.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | # type: ignore 3 | # modify from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501 4 | 5 | import torch 6 | from torch import nn 7 | from torch.autograd import Function 8 | 9 | fused_act_ext = None 10 | 11 | 12 | class FusedLeakyReLUFunctionBackward(Function): 13 | @staticmethod 14 | def forward(ctx, grad_output, out, negative_slope, scale): 15 | ctx.save_for_backward(out) 16 | ctx.negative_slope = negative_slope 17 | ctx.scale = scale 18 | 19 | empty = grad_output.new_empty(0) 20 | 21 | grad_input = fused_act_ext.fused_bias_act( 22 | grad_output, empty, out, 3, 1, negative_slope, scale 23 | ) 24 | 25 | dim = [0] 26 | 27 | if grad_input.ndim > 2: 28 | dim += list(range(2, grad_input.ndim)) 29 | 30 | grad_bias = grad_input.sum(dim).detach() 31 | 32 | return grad_input, grad_bias 33 | 34 | @staticmethod 35 | def backward(ctx, gradgrad_input, gradgrad_bias): 36 | (out,) = ctx.saved_tensors 37 | gradgrad_out = fused_act_ext.fused_bias_act( 38 | gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale 39 | ) 40 | 41 | return gradgrad_out, None, None, None 42 | 43 | 44 | class FusedLeakyReLUFunction(Function): 45 | @staticmethod 46 | def forward(ctx, input, bias, negative_slope, scale): 47 | empty = input.new_empty(0) 48 | out = fused_act_ext.fused_bias_act( 49 | input, bias, empty, 3, 0, negative_slope, scale 50 | ) 51 | ctx.save_for_backward(out) 52 | ctx.negative_slope = negative_slope 53 | ctx.scale = scale 54 | 55 | return out 56 | 57 | @staticmethod 58 | def backward(ctx, grad_output): 59 | (out,) = ctx.saved_tensors 60 | 61 | grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply( 62 | grad_output, out, ctx.negative_slope, ctx.scale 63 | ) 64 | 65 | return grad_input, grad_bias, None, None 66 | 67 | 68 | class FusedLeakyReLU(nn.Module): 69 | def __init__(self, channel, negative_slope=0.2, scale=2**0.5): 70 | super().__init__() 71 | 72 | self.bias = nn.Parameter(torch.zeros(channel)) 73 | self.negative_slope = negative_slope 74 | self.scale = scale 75 | 76 | def forward(self, input): 77 | return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale) 78 | 79 | 80 | def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2**0.5): 81 | return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale) 82 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/timm/helpers.py: -------------------------------------------------------------------------------- 1 | """ Layer/Module Helpers 2 | Hacked together by / Copyright 2020 Ross Wightman 3 | """ 4 | import collections.abc 5 | from itertools import repeat 6 | 7 | 8 | # From PyTorch internals 9 | def _ntuple(n): 10 | def parse(x): 11 | if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): 12 | return x 13 | return tuple(repeat(x, n)) 14 | 15 | return parse 16 | 17 | 18 | to_1tuple = _ntuple(1) 19 | to_2tuple = _ntuple(2) 20 | to_3tuple = _ntuple(3) 21 | to_4tuple = _ntuple(4) 22 | to_ntuple = _ntuple 23 | 24 | 25 | def make_divisible(v, divisor=8, min_value=None, round_limit=0.9): 26 | min_value = min_value or divisor 27 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 28 | # Make sure that round down does not go down by more than 10%. 29 | if new_v < round_limit * v: 30 | new_v += divisor 31 | return new_v 32 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/architecture/timm/weight_init.py: -------------------------------------------------------------------------------- 1 | import math 2 | import warnings 3 | 4 | import torch 5 | from torch.nn.init import _calculate_fan_in_and_fan_out 6 | 7 | 8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b): 9 | # Cut & paste from PyTorch official master until it's in a few official releases - RW 10 | # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf 11 | def norm_cdf(x): 12 | # Computes standard normal cumulative distribution function 13 | return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0 14 | 15 | if (mean < a - 2 * std) or (mean > b + 2 * std): 16 | warnings.warn( 17 | "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " 18 | "The distribution of values may be incorrect.", 19 | stacklevel=2, 20 | ) 21 | 22 | with torch.no_grad(): 23 | # Values are generated by using a truncated uniform distribution and 24 | # then using the inverse CDF for the normal distribution. 25 | # Get upper and lower cdf values 26 | l = norm_cdf((a - mean) / std) 27 | u = norm_cdf((b - mean) / std) 28 | 29 | # Uniformly fill tensor with values from [l, u], then translate to 30 | # [2l-1, 2u-1]. 31 | tensor.uniform_(2 * l - 1, 2 * u - 1) 32 | 33 | # Use inverse cdf transform for normal distribution to get truncated 34 | # standard normal 35 | tensor.erfinv_() 36 | 37 | # Transform to proper mean, std 38 | tensor.mul_(std * math.sqrt(2.0)) 39 | tensor.add_(mean) 40 | 41 | # Clamp to ensure it's in the proper range 42 | tensor.clamp_(min=a, max=b) 43 | return tensor 44 | 45 | 46 | def trunc_normal_( 47 | tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0 48 | ) -> torch.Tensor: 49 | r"""Fills the input Tensor with values drawn from a truncated 50 | normal distribution. The values are effectively drawn from the 51 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 52 | with values outside :math:`[a, b]` redrawn until they are within 53 | the bounds. The method used for generating the random values works 54 | best when :math:`a \leq \text{mean} \leq b`. 55 | 56 | NOTE: this impl is similar to the PyTorch trunc_normal_, the bounds [a, b] are 57 | applied while sampling the normal with mean/std applied, therefore a, b args 58 | should be adjusted to match the range of mean, std args. 59 | 60 | Args: 61 | tensor: an n-dimensional `torch.Tensor` 62 | mean: the mean of the normal distribution 63 | std: the standard deviation of the normal distribution 64 | a: the minimum cutoff value 65 | b: the maximum cutoff value 66 | Examples: 67 | >>> w = torch.empty(3, 5) 68 | >>> nn.init.trunc_normal_(w) 69 | """ 70 | return _no_grad_trunc_normal_(tensor, mean, std, a, b) 71 | 72 | 73 | def trunc_normal_tf_( 74 | tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0 75 | ) -> torch.Tensor: 76 | r"""Fills the input Tensor with values drawn from a truncated 77 | normal distribution. The values are effectively drawn from the 78 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 79 | with values outside :math:`[a, b]` redrawn until they are within 80 | the bounds. The method used for generating the random values works 81 | best when :math:`a \leq \text{mean} \leq b`. 82 | 83 | NOTE: this 'tf' variant behaves closer to Tensorflow / JAX impl where the 84 | bounds [a, b] are applied when sampling the normal distribution with mean=0, std=1.0 85 | and the result is subsquently scaled and shifted by the mean and std args. 86 | 87 | Args: 88 | tensor: an n-dimensional `torch.Tensor` 89 | mean: the mean of the normal distribution 90 | std: the standard deviation of the normal distribution 91 | a: the minimum cutoff value 92 | b: the maximum cutoff value 93 | Examples: 94 | >>> w = torch.empty(3, 5) 95 | >>> nn.init.trunc_normal_(w) 96 | """ 97 | _no_grad_trunc_normal_(tensor, 0, 1.0, a, b) 98 | with torch.no_grad(): 99 | tensor.mul_(std).add_(mean) 100 | return tensor 101 | 102 | 103 | def variance_scaling_(tensor, scale=1.0, mode="fan_in", distribution="normal"): 104 | fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) 105 | if mode == "fan_in": 106 | denom = fan_in 107 | elif mode == "fan_out": 108 | denom = fan_out 109 | elif mode == "fan_avg": 110 | denom = (fan_in + fan_out) / 2 111 | 112 | variance = scale / denom # type: ignore 113 | 114 | if distribution == "truncated_normal": 115 | # constant is stddev of standard normal truncated to (-2, 2) 116 | trunc_normal_tf_(tensor, std=math.sqrt(variance) / 0.87962566103423978) 117 | elif distribution == "normal": 118 | tensor.normal_(std=math.sqrt(variance)) 119 | elif distribution == "uniform": 120 | bound = math.sqrt(3 * variance) 121 | # pylint: disable=invalid-unary-operand-type 122 | tensor.uniform_(-bound, bound) 123 | else: 124 | raise ValueError(f"invalid distribution {distribution}") 125 | 126 | 127 | def lecun_normal_(tensor): 128 | variance_scaling_(tensor, mode="fan_in", distribution="truncated_normal") 129 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/model_loading.py: -------------------------------------------------------------------------------- 1 | import logging as logger 2 | 3 | from .architecture.DAT import DAT 4 | from .architecture.face.codeformer import CodeFormer 5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean 6 | from .architecture.face.restoreformer_arch import RestoreFormer 7 | from .architecture.HAT import HAT 8 | from .architecture.LaMa import LaMa 9 | from .architecture.OmniSR.OmniSR import OmniSR 10 | from .architecture.RRDB import RRDBNet as ESRGAN 11 | from .architecture.SCUNet import SCUNet 12 | from .architecture.SPSR import SPSRNet as SPSR 13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2 14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN 15 | from .architecture.Swin2SR import Swin2SR 16 | from .architecture.SwinIR import SwinIR 17 | from .types import PyTorchModel 18 | 19 | 20 | class UnsupportedModel(Exception): 21 | pass 22 | 23 | 24 | def load_state_dict(state_dict) -> PyTorchModel: 25 | logger.debug(f"Loading state dict into pytorch model arch") 26 | 27 | state_dict_keys = list(state_dict.keys()) 28 | 29 | if "params_ema" in state_dict_keys: 30 | state_dict = state_dict["params_ema"] 31 | elif "params-ema" in state_dict_keys: 32 | state_dict = state_dict["params-ema"] 33 | elif "params" in state_dict_keys: 34 | state_dict = state_dict["params"] 35 | 36 | state_dict_keys = list(state_dict.keys()) 37 | # SRVGGNet Real-ESRGAN (v2) 38 | if "body.0.weight" in state_dict_keys and "body.1.weight" in state_dict_keys: 39 | model = RealESRGANv2(state_dict) 40 | # SPSR (ESRGAN with lots of extra layers) 41 | elif "f_HR_conv1.0.weight" in state_dict: 42 | model = SPSR(state_dict) 43 | # Swift-SRGAN 44 | elif ( 45 | "model" in state_dict_keys 46 | and "initial.cnn.depthwise.weight" in state_dict["model"].keys() 47 | ): 48 | model = SwiftSRGAN(state_dict) 49 | # SwinIR, Swin2SR, HAT 50 | elif "layers.0.residual_group.blocks.0.norm1.weight" in state_dict_keys: 51 | if ( 52 | "layers.0.residual_group.blocks.0.conv_block.cab.0.weight" 53 | in state_dict_keys 54 | ): 55 | model = HAT(state_dict) 56 | elif "patch_embed.proj.weight" in state_dict_keys: 57 | model = Swin2SR(state_dict) 58 | else: 59 | model = SwinIR(state_dict) 60 | # GFPGAN 61 | elif ( 62 | "toRGB.0.weight" in state_dict_keys 63 | and "stylegan_decoder.style_mlp.1.weight" in state_dict_keys 64 | ): 65 | model = GFPGANv1Clean(state_dict) 66 | # RestoreFormer 67 | elif ( 68 | "encoder.conv_in.weight" in state_dict_keys 69 | and "encoder.down.0.block.0.norm1.weight" in state_dict_keys 70 | ): 71 | model = RestoreFormer(state_dict) 72 | elif ( 73 | "encoder.blocks.0.weight" in state_dict_keys 74 | and "quantize.embedding.weight" in state_dict_keys 75 | ): 76 | model = CodeFormer(state_dict) 77 | # LaMa 78 | elif ( 79 | "model.model.1.bn_l.running_mean" in state_dict_keys 80 | or "generator.model.1.bn_l.running_mean" in state_dict_keys 81 | ): 82 | model = LaMa(state_dict) 83 | # Omni-SR 84 | elif "residual_layer.0.residual_layer.0.layer.0.fn.0.weight" in state_dict_keys: 85 | model = OmniSR(state_dict) 86 | # SCUNet 87 | elif "m_head.0.weight" in state_dict_keys and "m_tail.0.weight" in state_dict_keys: 88 | model = SCUNet(state_dict) 89 | # DAT 90 | elif "layers.0.blocks.2.attn.attn_mask_0" in state_dict_keys: 91 | model = DAT(state_dict) 92 | # Regular ESRGAN, "new-arch" ESRGAN, Real-ESRGAN v1 93 | else: 94 | try: 95 | model = ESRGAN(state_dict) 96 | except: 97 | # pylint: disable=raise-missing-from 98 | raise UnsupportedModel 99 | return model 100 | -------------------------------------------------------------------------------- /py/ldm_patched/pfn/types.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from .architecture.DAT import DAT 4 | from .architecture.face.codeformer import CodeFormer 5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean 6 | from .architecture.face.restoreformer_arch import RestoreFormer 7 | from .architecture.HAT import HAT 8 | from .architecture.LaMa import LaMa 9 | from .architecture.OmniSR.OmniSR import OmniSR 10 | from .architecture.RRDB import RRDBNet as ESRGAN 11 | from .architecture.SCUNet import SCUNet 12 | from .architecture.SPSR import SPSRNet as SPSR 13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2 14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN 15 | from .architecture.Swin2SR import Swin2SR 16 | from .architecture.SwinIR import SwinIR 17 | 18 | PyTorchSRModels = ( 19 | RealESRGANv2, 20 | SPSR, 21 | SwiftSRGAN, 22 | ESRGAN, 23 | SwinIR, 24 | Swin2SR, 25 | HAT, 26 | OmniSR, 27 | SCUNet, 28 | DAT, 29 | ) 30 | PyTorchSRModel = Union[ 31 | RealESRGANv2, 32 | SPSR, 33 | SwiftSRGAN, 34 | ESRGAN, 35 | SwinIR, 36 | Swin2SR, 37 | HAT, 38 | OmniSR, 39 | SCUNet, 40 | DAT, 41 | ] 42 | 43 | 44 | def is_pytorch_sr_model(model: object): 45 | return isinstance(model, PyTorchSRModels) 46 | 47 | 48 | PyTorchFaceModels = (GFPGANv1Clean, RestoreFormer, CodeFormer) 49 | PyTorchFaceModel = Union[GFPGANv1Clean, RestoreFormer, CodeFormer] 50 | 51 | 52 | def is_pytorch_face_model(model: object): 53 | return isinstance(model, PyTorchFaceModels) 54 | 55 | 56 | PyTorchInpaintModels = (LaMa,) 57 | PyTorchInpaintModel = Union[LaMa] 58 | 59 | 60 | def is_pytorch_inpaint_model(model: object): 61 | return isinstance(model, PyTorchInpaintModels) 62 | 63 | 64 | PyTorchModels = (*PyTorchSRModels, *PyTorchFaceModels, *PyTorchInpaintModels) 65 | PyTorchModel = Union[PyTorchSRModel, PyTorchFaceModel, PyTorchInpaintModel] 66 | 67 | 68 | def is_pytorch_model(model: object): 69 | return isinstance(model, PyTorchModels) 70 | -------------------------------------------------------------------------------- /py/ldm_patched/taesd/taesd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Tiny AutoEncoder for Stable Diffusion 4 | (DNN for encoding / decoding SD's latent space) 5 | """ 6 | import torch 7 | import torch.nn as nn 8 | 9 | import ldm_patched.modules.utils 10 | import ldm_patched.modules.ops 11 | 12 | def conv(n_in, n_out, **kwargs): 13 | return ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 3, padding=1, **kwargs) 14 | 15 | class Clamp(nn.Module): 16 | def forward(self, x): 17 | return torch.tanh(x / 3) * 3 18 | 19 | class Block(nn.Module): 20 | def __init__(self, n_in, n_out): 21 | super().__init__() 22 | self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out)) 23 | self.skip = ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity() 24 | self.fuse = nn.ReLU() 25 | def forward(self, x): 26 | return self.fuse(self.conv(x) + self.skip(x)) 27 | 28 | def Encoder(): 29 | return nn.Sequential( 30 | conv(3, 64), Block(64, 64), 31 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 32 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 33 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 34 | conv(64, 4), 35 | ) 36 | 37 | def Decoder(): 38 | return nn.Sequential( 39 | Clamp(), conv(4, 64), nn.ReLU(), 40 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 41 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 42 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 43 | Block(64, 64), conv(64, 3), 44 | ) 45 | 46 | class TAESD(nn.Module): 47 | latent_magnitude = 3 48 | latent_shift = 0.5 49 | 50 | def __init__(self, encoder_path=None, decoder_path=None): 51 | """Initialize pretrained TAESD on the given device from the given checkpoints.""" 52 | super().__init__() 53 | self.taesd_encoder = Encoder() 54 | self.taesd_decoder = Decoder() 55 | self.vae_scale = torch.nn.Parameter(torch.tensor(1.0)) 56 | if encoder_path is not None: 57 | self.taesd_encoder.load_state_dict(ldm_patched.modules.utils.load_torch_file(encoder_path, safe_load=True)) 58 | if decoder_path is not None: 59 | self.taesd_decoder.load_state_dict(ldm_patched.modules.utils.load_torch_file(decoder_path, safe_load=True)) 60 | 61 | @staticmethod 62 | def scale_latents(x): 63 | """raw latents -> [0, 1]""" 64 | return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1) 65 | 66 | @staticmethod 67 | def unscale_latents(x): 68 | """[0, 1] -> raw latents""" 69 | return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude) 70 | 71 | def decode(self, x): 72 | x_sample = self.taesd_decoder(x * self.vae_scale) 73 | x_sample = x_sample.sub(0.5).mul(2) 74 | return x_sample 75 | 76 | def encode(self, x): 77 | return self.taesd_encoder(x * 0.5 + 0.5) / self.vae_scale 78 | -------------------------------------------------------------------------------- /py/ldm_patched/utils/latent_visualization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from PIL import Image 3 | import struct 4 | import numpy as np 5 | from ldm_patched.modules.args_parser import args, LatentPreviewMethod 6 | from ldm_patched.taesd.taesd import TAESD 7 | import ldm_patched.utils.path_utils 8 | import ldm_patched.modules.utils 9 | 10 | MAX_PREVIEW_RESOLUTION = 512 11 | 12 | class LatentPreviewer: 13 | def decode_latent_to_preview(self, x0): 14 | pass 15 | 16 | def decode_latent_to_preview_image(self, preview_format, x0): 17 | preview_image = self.decode_latent_to_preview(x0) 18 | return ("JPEG", preview_image, MAX_PREVIEW_RESOLUTION) 19 | 20 | class TAESDPreviewerImpl(LatentPreviewer): 21 | def __init__(self, taesd): 22 | self.taesd = taesd 23 | 24 | def decode_latent_to_preview(self, x0): 25 | x_sample = self.taesd.decode(x0[:1])[0].detach() 26 | x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0) 27 | x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) 28 | x_sample = x_sample.astype(np.uint8) 29 | 30 | preview_image = Image.fromarray(x_sample) 31 | return preview_image 32 | 33 | 34 | class Latent2RGBPreviewer(LatentPreviewer): 35 | def __init__(self, latent_rgb_factors): 36 | self.latent_rgb_factors = torch.tensor(latent_rgb_factors, device="cpu") 37 | 38 | def decode_latent_to_preview(self, x0): 39 | latent_image = x0[0].permute(1, 2, 0).cpu() @ self.latent_rgb_factors 40 | 41 | latents_ubyte = (((latent_image + 1) / 2) 42 | .clamp(0, 1) # change scale from -1..1 to 0..1 43 | .mul(0xFF) # to 0..255 44 | .byte()).cpu() 45 | 46 | return Image.fromarray(latents_ubyte.numpy()) 47 | 48 | 49 | def get_previewer(device, latent_format): 50 | previewer = None 51 | method = args.preview_option 52 | if method != LatentPreviewMethod.NoPreviews: 53 | # TODO previewer methods 54 | taesd_decoder_path = None 55 | if latent_format.taesd_decoder_name is not None: 56 | taesd_decoder_path = next( 57 | (fn for fn in ldm_patched.utils.path_utils.get_filename_list("vae_approx") 58 | if fn.startswith(latent_format.taesd_decoder_name)), 59 | "" 60 | ) 61 | taesd_decoder_path = ldm_patched.utils.path_utils.get_full_path("vae_approx", taesd_decoder_path) 62 | 63 | if method == LatentPreviewMethod.Auto: 64 | method = LatentPreviewMethod.Latent2RGB 65 | if taesd_decoder_path: 66 | method = LatentPreviewMethod.TAESD 67 | 68 | if method == LatentPreviewMethod.TAESD: 69 | if taesd_decoder_path: 70 | taesd = TAESD(None, taesd_decoder_path).to(device) 71 | previewer = TAESDPreviewerImpl(taesd) 72 | else: 73 | print("Warning: TAESD previews enabled, but could not find models/vae_approx/{}".format(latent_format.taesd_decoder_name)) 74 | 75 | if previewer is None: 76 | if latent_format.latent_rgb_factors is not None: 77 | previewer = Latent2RGBPreviewer(latent_format.latent_rgb_factors) 78 | return previewer 79 | 80 | def prepare_callback(model, steps, x0_output_dict=None): 81 | preview_format = "JPEG" 82 | if preview_format not in ["JPEG", "PNG"]: 83 | preview_format = "JPEG" 84 | 85 | previewer = get_previewer(model.load_device, model.model.latent_format) 86 | 87 | pbar = ldm_patched.modules.utils.ProgressBar(steps) 88 | def callback(step, x0, x, total_steps): 89 | if x0_output_dict is not None: 90 | x0_output_dict["x0"] = x0 91 | 92 | preview_bytes = None 93 | if previewer: 94 | preview_bytes = previewer.decode_latent_to_preview_image(preview_format, x0) 95 | pbar.update_absolute(step + 1, total_steps, preview_bytes) 96 | return callback 97 | 98 | -------------------------------------------------------------------------------- /py/libs/utils.py: -------------------------------------------------------------------------------- 1 | def easySave(images, filename_prefix, output_type, prompt=None, extra_pnginfo=None): 2 | """Save or Preview Image""" 3 | from nodes import PreviewImage, SaveImage 4 | if output_type == "Hide": 5 | return list() 6 | if output_type == "Preview": 7 | filename_prefix = 'easyPreview' 8 | results = PreviewImage().save_images(images, filename_prefix, prompt, extra_pnginfo) 9 | return results['ui']['images'] 10 | else: 11 | results = SaveImage().save_images(images, filename_prefix, prompt, extra_pnginfo) 12 | return results['ui']['images'] 13 | -------------------------------------------------------------------------------- /py/log.py: -------------------------------------------------------------------------------- 1 | COLORS_FG = { 2 | 'BLACK': '\33[30m', 3 | 'RED': '\33[31m', 4 | 'GREEN': '\33[32m', 5 | 'YELLOW': '\33[33m', 6 | 'BLUE': '\33[34m', 7 | 'MAGENTA': '\33[35m', 8 | 'CYAN': '\33[36m', 9 | 'WHITE': '\33[37m', 10 | 'GREY': '\33[90m', 11 | 'BRIGHT_RED': '\33[91m', 12 | 'BRIGHT_GREEN': '\33[92m', 13 | 'BRIGHT_YELLOW': '\33[93m', 14 | 'BRIGHT_BLUE': '\33[94m', 15 | 'BRIGHT_MAGENTA': '\33[95m', 16 | 'BRIGHT_CYAN': '\33[96m', 17 | 'BRIGHT_WHITE': '\33[97m', 18 | } 19 | COLORS_STYLE = { 20 | 'RESET': '\33[0m', 21 | 'BOLD': '\33[1m', 22 | 'NORMAL': '\33[22m', 23 | 'ITALIC': '\33[3m', 24 | 'UNDERLINE': '\33[4m', 25 | 'BLINK': '\33[5m', 26 | 'BLINK2': '\33[6m', 27 | 'SELECTED': '\33[7m', 28 | } 29 | COLORS_BG = { 30 | 'BLACK': '\33[40m', 31 | 'RED': '\33[41m', 32 | 'GREEN': '\33[42m', 33 | 'YELLOW': '\33[43m', 34 | 'BLUE': '\33[44m', 35 | 'MAGENTA': '\33[45m', 36 | 'CYAN': '\33[46m', 37 | 'WHITE': '\33[47m', 38 | 'GREY': '\33[100m', 39 | 'BRIGHT_RED': '\33[101m', 40 | 'BRIGHT_GREEN': '\33[102m', 41 | 'BRIGHT_YELLOW': '\33[103m', 42 | 'BRIGHT_BLUE': '\33[104m', 43 | 'BRIGHT_MAGENTA': '\33[105m', 44 | 'BRIGHT_CYAN': '\33[106m', 45 | 'BRIGHT_WHITE': '\33[107m', 46 | } 47 | 48 | 49 | def log_node_success(node_name, message=None): 50 | """Logs a success message.""" 51 | _log_node(COLORS_FG["GREEN"], node_name, message) 52 | 53 | 54 | def log_node_info(node_name, message=None): 55 | """Logs an info message.""" 56 | _log_node(COLORS_FG["CYAN"], node_name, message) 57 | 58 | 59 | def log_node_warn(node_name, message=None): 60 | """Logs an warn message.""" 61 | _log_node(COLORS_FG["YELLOW"], node_name, message) 62 | 63 | 64 | def log_node_error(node_name, message=None): 65 | """Logs an warn message.""" 66 | _log_node(COLORS_FG["RED"], node_name, message) 67 | 68 | 69 | def log_node(node_name, message=None): 70 | """Logs a message.""" 71 | _log_node(COLORS_FG["CYAN"], node_name, message) 72 | 73 | 74 | def _log_node(color, node_name, message=None, prefix=''): 75 | print(_get_log_msg(color, node_name, message, prefix=prefix)) 76 | 77 | 78 | def _get_log_msg(color, node_name, message=None, prefix=''): 79 | msg = f'{COLORS_STYLE["BOLD"]}{color}{prefix}[Fooocus] {node_name.replace(" (Fooocus)", "")}' 80 | msg += f':{COLORS_STYLE["RESET"]} {message}' if message is not None else f'{COLORS_STYLE["RESET"]}' 81 | return msg 82 | -------------------------------------------------------------------------------- /py/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/modules/__init__.py -------------------------------------------------------------------------------- /py/modules/advanced_parameters.py: -------------------------------------------------------------------------------- 1 | disable_preview, adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, \ 2 | scheduler_name, generate_image_grid, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, \ 3 | overwrite_vary_strength, overwrite_upscale_strength, \ 4 | mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \ 5 | debugging_cn_preprocessor, skipping_cn_preprocessor, \ 6 | refiner_swap_method, \ 7 | freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2, \ 8 | debugging_inpaint_preprocessor, inpaint_disable_initial_latent, inpaint_engine, inpaint_strength, inpaint_respective_field, \ 9 | inpaint_mask_upload_checkbox, invert_mask_checkbox, inpaint_erode_or_dilate = [None] * 32 10 | 11 | 12 | controlnet_softness = 0.25 13 | canny_low_threshold = 64 14 | canny_high_threshold = 128 15 | -------------------------------------------------------------------------------- /py/modules/constants.py: -------------------------------------------------------------------------------- 1 | # as in k-diffusion (sampling.py) 2 | MIN_SEED = 0 3 | MAX_SEED = 2**63 - 1 4 | 5 | AUTH_FILENAME = 'auth.json' 6 | -------------------------------------------------------------------------------- /py/modules/flags.py: -------------------------------------------------------------------------------- 1 | disabled = 'Disabled' 2 | enabled = 'Enabled' 3 | subtle_variation = 'Vary (Subtle)' 4 | strong_variation = 'Vary (Strong)' 5 | upscale_15 = 'Upscale (1.5x)' 6 | upscale_2 = 'Upscale (2x)' 7 | upscale_fast = 'Upscale (Fast 2x)' 8 | 9 | uov_list = [ 10 | disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast 11 | ] 12 | 13 | KSAMPLER_NAMES = ["euler", "euler_ancestral", "heun", "heunpp2", "dpm_2", "dpm_2_ancestral", 14 | "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", 15 | "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm"] 16 | 17 | SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform", "lcm", "turbo"] 18 | SAMPLER_NAMES = KSAMPLER_NAMES + ["ddim", "uni_pc", "uni_pc_bh2"] 19 | 20 | sampler_list = SAMPLER_NAMES 21 | scheduler_list = SCHEDULER_NAMES 22 | 23 | cn_ip = "ImagePrompt" 24 | cn_ip_face = "FaceSwap" 25 | cn_canny = "PyraCanny" 26 | cn_cpds = "CPDS" 27 | 28 | ip_list = [cn_ip, cn_ip_face] 29 | cn_list = [cn_canny, cn_cpds] 30 | default_ip = cn_ip 31 | default_cn = cn_canny 32 | 33 | 34 | default_parameters = { 35 | cn_ip: (0.5, 0.6), cn_ip_face: (0.9, 0.75), cn_canny: (0.5, 1.0), cn_cpds: (0.5, 1.0) 36 | } # stop, weight 37 | 38 | inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6'] 39 | performance_selections = ['Speed', 'Quality', 'Extreme Speed'] 40 | 41 | inpaint_option_default = 'Inpaint or Outpaint (default)' 42 | inpaint_option_detail = 'Improve Detail (face, hand, eyes, etc.)' 43 | inpaint_option_modify = 'Modify Content (add objects, change background, etc.)' 44 | inpaint_options = [inpaint_option_default, inpaint_option_detail, inpaint_option_modify] 45 | 46 | desc_type_photo = 'Photograph' 47 | desc_type_anime = 'Art/Anime' 48 | -------------------------------------------------------------------------------- /py/modules/model_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urllib.parse import urlparse 3 | from typing import Optional 4 | import folder_paths 5 | 6 | def load_file_from_url( 7 | url: str, 8 | *, 9 | model_dir: str, 10 | progress: bool = True, 11 | file_name: Optional[str] = None, 12 | ) -> str: 13 | """Download a file from `url` into `model_dir`, using the file present if possible. 14 | 15 | Returns the path to the downloaded file. 16 | """ 17 | if not file_name: 18 | parts = urlparse(url) 19 | file_name = os.path.basename(parts.path) 20 | # 从所有文件夹中寻找 21 | cached_file = folder_paths.get_full_path(model_dir, file_name) 22 | if cached_file is None: 23 | os.makedirs(folder_paths.get_folder_paths(model_dir)[0], exist_ok=True) 24 | cached_file = os.path.join(folder_paths.get_folder_paths(model_dir)[0],file_name) 25 | 26 | if not os.path.exists(cached_file): 27 | print(f'Downloading: "{url}" to {cached_file}\n') 28 | from torch.hub import download_url_to_file 29 | download_url_to_file(url, cached_file, progress=progress) 30 | return cached_file 31 | -------------------------------------------------------------------------------- /py/modules/ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import contextlib 3 | 4 | 5 | @contextlib.contextmanager 6 | def use_patched_ops(operations): 7 | op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm'] 8 | backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names} 9 | 10 | try: 11 | for op_name in op_names: 12 | setattr(torch.nn, op_name, getattr(operations, op_name)) 13 | 14 | yield 15 | 16 | finally: 17 | for op_name in op_names: 18 | setattr(torch.nn, op_name, backups[op_name]) 19 | return 20 | -------------------------------------------------------------------------------- /py/modules/patch_precision.py: -------------------------------------------------------------------------------- 1 | # Consistent with Kohya to reduce differences between model training and inference. 2 | 3 | import torch 4 | import math 5 | import einops 6 | import numpy as np 7 | 8 | import ldm_patched.ldm.modules.diffusionmodules.openaimodel 9 | import ldm_patched.modules.model_sampling 10 | import ldm_patched.modules.sd1_clip 11 | 12 | from ldm_patched.ldm.modules.diffusionmodules.util import make_beta_schedule 13 | 14 | 15 | def patched_timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False): 16 | # Consistent with Kohya to reduce differences between model training and inference. 17 | 18 | if not repeat_only: 19 | half = dim // 2 20 | freqs = torch.exp( 21 | -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half 22 | ).to(device=timesteps.device) 23 | args = timesteps[:, None].float() * freqs[None] 24 | embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) 25 | if dim % 2: 26 | embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) 27 | else: 28 | embedding = einops.repeat(timesteps, 'b -> b d', d=dim) 29 | return embedding 30 | 31 | 32 | def patched_register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, 33 | linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): 34 | # Consistent with Kohya to reduce differences between model training and inference. 35 | 36 | if given_betas is not None: 37 | betas = given_betas 38 | else: 39 | betas = make_beta_schedule( 40 | beta_schedule, 41 | timesteps, 42 | linear_start=linear_start, 43 | linear_end=linear_end, 44 | cosine_s=cosine_s) 45 | 46 | alphas = 1. - betas 47 | alphas_cumprod = np.cumprod(alphas, axis=0) 48 | timesteps, = betas.shape 49 | self.num_timesteps = int(timesteps) 50 | self.linear_start = linear_start 51 | self.linear_end = linear_end 52 | sigmas = torch.tensor(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5, dtype=torch.float32) 53 | self.set_sigmas(sigmas) 54 | return 55 | 56 | 57 | def patch_all_precision(): 58 | ldm_patched.ldm.modules.diffusionmodules.openaimodel.timestep_embedding = patched_timestep_embedding 59 | ldm_patched.modules.model_sampling.ModelSamplingDiscrete._register_schedule = patched_register_schedule 60 | return 61 | -------------------------------------------------------------------------------- /py/modules/sdxl_styles.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import json 4 | import math 5 | 6 | from modules.util import get_files_from_folder 7 | from random import Random 8 | 9 | # cannot use modules.config - validators causing circular imports 10 | styles_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../sdxl_styles/')) 11 | 12 | 13 | def normalize_key(k): 14 | k = k.replace('-', ' ') 15 | words = k.split(' ') 16 | words = [w[:1].upper() + w[1:].lower() for w in words] 17 | k = ' '.join(words) 18 | k = k.replace('3d', '3D') 19 | k = k.replace('Sai', 'SAI') 20 | k = k.replace('Mre', 'MRE') 21 | k = k.replace('(s', '(S') 22 | return k 23 | 24 | 25 | styles = {} 26 | styles_files = get_files_from_folder(styles_path, ['.json']) 27 | 28 | for x in ['sdxl_styles_fooocus.json', 29 | 'sdxl_styles_sai.json', 30 | 'sdxl_styles_mre.json', 31 | 'sdxl_styles_twri.json', 32 | 'sdxl_styles_diva.json', 33 | 'sdxl_styles_marc_k3nt3l.json']: 34 | if x in styles_files: 35 | styles_files.remove(x) 36 | styles_files.append(x) 37 | 38 | for styles_file in styles_files: 39 | try: 40 | with open(os.path.join(styles_path, styles_file), encoding='utf-8') as f: 41 | for entry in json.load(f): 42 | name = normalize_key(entry['name']) 43 | prompt = entry['prompt'] if 'prompt' in entry else '' 44 | negative_prompt = entry['negative_prompt'] if 'negative_prompt' in entry else '' 45 | styles[name] = (prompt, negative_prompt) 46 | except Exception as e: 47 | print(str(e)) 48 | print(f'Failed to load style file {styles_file}') 49 | 50 | style_keys = list(styles.keys()) 51 | fooocus_expansion = 'Fooocus V2' 52 | random_style_name = 'Random Style' 53 | legal_style_names = [fooocus_expansion, random_style_name] + style_keys 54 | 55 | 56 | def get_random_style(rng: Random) -> str: 57 | return rng.choice(list(styles.items()))[0] 58 | 59 | 60 | def apply_style(style, positive): 61 | p, n = styles[style] 62 | return p.replace('{prompt}', positive).splitlines(), n.splitlines(), '{prompt}' in p 63 | 64 | 65 | def get_words(arrays, total_mult, index): 66 | if len(arrays) == 1: 67 | return [arrays[0].split(',')[index]] 68 | else: 69 | words = arrays[0].split(',') 70 | word = words[index % len(words)] 71 | index -= index % len(words) 72 | index /= len(words) 73 | index = math.floor(index) 74 | return [word] + get_words(arrays[1:], math.floor(total_mult / len(words)), index) 75 | 76 | 77 | def apply_arrays(text, index): 78 | arrays = re.findall(r'\[\[(.*?)\]\]', text) 79 | if len(arrays) == 0: 80 | return text 81 | 82 | print(f'[Arrays] processing: {text}') 83 | mult = 1 84 | for arr in arrays: 85 | words = arr.split(',') 86 | mult *= len(words) 87 | 88 | index %= mult 89 | chosen_words = get_words(arrays, mult, index) 90 | 91 | i = 0 92 | for arr in arrays: 93 | text = text.replace(f'[[{arr}]]', chosen_words[i], 1) 94 | i = i+1 95 | 96 | return text 97 | 98 | -------------------------------------------------------------------------------- /py/modules/upscaler.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import modules.core as core 4 | 5 | from ldm_patched.pfn.architecture.RRDB import RRDBNet as ESRGAN 6 | from ldm_patched.contrib.external_upscale_model import ImageUpscaleWithModel 7 | from collections import OrderedDict 8 | import folder_paths 9 | 10 | model_filename = folder_paths.get_full_path('upscale_models','fooocus_upscaler_s409985e5.bin') 11 | opImageUpscaleWithModel = ImageUpscaleWithModel() 12 | model = None 13 | 14 | 15 | def perform_upscale(img): 16 | global model 17 | 18 | print(f'Upscaling image with shape {str(img.shape)} ...') 19 | 20 | if model is None: 21 | sd = torch.load(model_filename) 22 | sdo = OrderedDict() 23 | for k, v in sd.items(): 24 | sdo[k.replace('residual_block_', 'RDB')] = v 25 | del sd 26 | model = ESRGAN(sdo) 27 | model.cpu() 28 | model.eval() 29 | 30 | img = core.numpy_to_pytorch(img) 31 | img = opImageUpscaleWithModel.upscale(model, img)[0] 32 | img = core.pytorch_to_numpy(img)[0] 33 | 34 | return img 35 | -------------------------------------------------------------------------------- /py/prompt.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | path_styles = os.path.abspath(os.path.join(os.path.dirname(__file__), '../sdxl_styles/')) 4 | 5 | # 风格提示词选择器 6 | class FooocusStyles: 7 | @classmethod 8 | def INPUT_TYPES(s): 9 | styles = ["fooocus_styles"] 10 | styles_dir = path_styles 11 | for file_name in os.listdir(styles_dir): 12 | file = os.path.join(styles_dir, file_name) 13 | if ( 14 | os.path.isfile(file) 15 | and file_name.endswith(".json") 16 | and "styles" in file_name.split(".")[0] 17 | ): 18 | styles.append(file_name.split(".")[0]) 19 | return { 20 | "required": { 21 | "styles": (styles, {"default": "fooocus_styles"}), 22 | }, 23 | "hidden": { 24 | "prompt": "PROMPT", 25 | "extra_pnginfo": "EXTRA_PNGINFO", 26 | "my_unique_id": "UNIQUE_ID", 27 | }, 28 | } 29 | 30 | # 31 | RETURN_TYPES = ( 32 | "FOOOCUS_STYLES", 33 | ) 34 | RETURN_NAMES = ( 35 | "fooocus_styles", 36 | ) 37 | 38 | CATEGORY = "Fooocus/Prompt" 39 | FUNCTION = "run" 40 | OUTPUT_MODE = True 41 | 42 | def run( 43 | self, 44 | styles, 45 | prompt=None, 46 | extra_pnginfo=None, 47 | my_unique_id=None, 48 | ): 49 | values = [] 50 | if my_unique_id in prompt: 51 | if prompt[my_unique_id]["inputs"]["select_styles"]: 52 | values = prompt[my_unique_id]["inputs"]["select_styles"].split( 53 | ",") 54 | 55 | return (values,) 56 | 57 | 58 | # 正面提示词 59 | class positivePrompt: 60 | def __init__(self): 61 | pass 62 | 63 | @classmethod 64 | def INPUT_TYPES(s): 65 | return { 66 | "required": {"positive": ("STRING", {"default": "", "multiline": True, "placeholder": "Positive"},), 67 | } 68 | } 69 | 70 | RETURN_TYPES = ("STRING",) 71 | RETURN_NAMES = ("positive",) 72 | FUNCTION = "main" 73 | 74 | CATEGORY = "Fooocus/Prompt" 75 | 76 | @staticmethod 77 | def main(positive): 78 | return (positive,) 79 | 80 | 81 | # 负面提示词 82 | class negativePrompt: 83 | def __init__(self): 84 | pass 85 | 86 | @classmethod 87 | def INPUT_TYPES(s): 88 | return { 89 | "required": { 90 | "negative": ( 91 | "STRING", 92 | {"default": "", "multiline": True, "placeholder": "Negative"}, 93 | ), 94 | } 95 | } 96 | 97 | RETURN_TYPES = ("STRING",) 98 | RETURN_NAMES = ("negative",) 99 | FUNCTION = "main" 100 | 101 | CATEGORY = "Fooocus/Prompt" 102 | 103 | @staticmethod 104 | def main(negative): 105 | return (negative,) 106 | 107 | 108 | NODE_CLASS_MAPPINGS = { 109 | "Fooocus positive": positivePrompt, 110 | "Fooocus negative": negativePrompt, 111 | "Fooocus Styles": FooocusStyles, 112 | } 113 | 114 | NODE_DISPLAY_NAME_MAPPINGS = { 115 | "Fooocus positive": "Positive", 116 | "Fooocus negative": "Negative", 117 | "Fooocus stylesSelector": "stylesPromptSelector", 118 | "Fooocus Styles": "Fooocus Styles" 119 | } 120 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "fooocus_nodes" 3 | description = "This extension provides image generation features based on Fooocus." 4 | version = "1.0.2" 5 | license = "LICENSE" 6 | dependencies = ["accelerate==0.32.1", "pytorch_lightning==2.3.3", "pygit2==1.15.1", "opencv-contrib-python-headless==4.10.0.84", "httpx==0.27.0", "onnxruntime", "timm==1.0.7"] 7 | 8 | [project.urls] 9 | Repository = "https://github.com/Seedsa/Fooocus_Nodes" 10 | # Used by Comfy Registry https://comfyregistry.org 11 | 12 | [tool.comfy] 13 | PublisherId = "seed" 14 | DisplayName = "Fooocus_Nodes" 15 | Icon = "" 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.32.1 2 | pytorch_lightning==2.3.3 3 | pygit2==1.15.1 4 | opencv-contrib-python-headless==4.10.0.84 5 | httpx==0.27.0 6 | timm==1.0.7 7 | onnxruntime 8 | -------------------------------------------------------------------------------- /screnshot/Fooocus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/screnshot/Fooocus.png -------------------------------------------------------------------------------- /screnshot/FooocusNodes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/screnshot/FooocusNodes.png -------------------------------------------------------------------------------- /sdxl_styles/sdxl_styles_fooocus.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "Fooocus Enhance", 4 | "negative_prompt": "(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, grayscale, bw, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (airbrushed, cartoon, anime, semi-realistic, cgi, render, blender, digital art, manga, amateur:1.3), (3D ,3D Game, 3D Game Scene, 3D Character:1.1), (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)" 5 | }, 6 | { 7 | "name": "Fooocus Semi Realistic", 8 | "negative_prompt": "(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)" 9 | }, 10 | { 11 | "name": "Fooocus Sharp", 12 | "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, 4k epic detailed, shot on kodak, 35mm photo, sharp focus, high budget, cinemascope, moody, epic, gorgeous, film grain, grainy", 13 | "negative_prompt": "anime, cartoon, graphic, (blur, blurry, bokeh), text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured" 14 | }, 15 | { 16 | "name": "Fooocus Masterpiece", 17 | "prompt": "(masterpiece), (best quality), (ultra-detailed), {prompt}, illustration, disheveled hair, detailed eyes, perfect composition, moist skin, intricate details, earrings", 18 | "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, pubic hair,extra digit, fewer digits, cropped, worst quality, low quality" 19 | }, 20 | { 21 | "name": "Fooocus Photograph", 22 | "prompt": "photograph {prompt}, 50mm . cinematic 4k epic detailed 4k epic detailed photograph shot on kodak detailed cinematic hbo dark moody, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage", 23 | "negative_prompt": "Brad Pitt, bokeh, depth of field, blurry, cropped, regular face, saturated, contrast, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, text, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck" 24 | }, 25 | { 26 | "name": "Fooocus Negative", 27 | "negative_prompt": "deformed, bad anatomy, disfigured, poorly drawn face, mutated, extra limb, ugly, poorly drawn hands, missing limb, floating limbs, disconnected limbs, disconnected head, malformed hands, long neck, mutated hands and fingers, bad hands, missing fingers, cropped, worst quality, low quality, mutation, poorly drawn, huge calf, bad hands, fused hand, missing hand, disappearing arms, disappearing thigh, disappearing calf, disappearing legs, missing fingers, fused fingers, abnormal eye proportion, Abnormal hands, abnormal legs, abnormal feet, abnormal fingers, drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly, anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch" 28 | }, 29 | { 30 | "name": "Fooocus Cinematic", 31 | "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy", 32 | "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured" 33 | }, 34 | { 35 | "name": "Fooocus Pony", 36 | "prompt": "score_9, score_8_up, score_7_up, {prompt}", 37 | "negative_prompt": "score_6, score_5, score_4" 38 | } 39 | ] 40 | -------------------------------------------------------------------------------- /sdxl_styles/sdxl_styles_sai.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "sai-3d-model", 4 | "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting", 5 | "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting" 6 | }, 7 | { 8 | "name": "sai-analog film", 9 | "prompt": "analog film photo {prompt} . faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage", 10 | "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured" 11 | }, 12 | { 13 | "name": "sai-anime", 14 | "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed", 15 | "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast" 16 | }, 17 | { 18 | "name": "sai-cinematic", 19 | "prompt": "cinematic film still {prompt} . shallow depth of field, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy", 20 | "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured" 21 | }, 22 | { 23 | "name": "sai-comic book", 24 | "prompt": "comic {prompt} . graphic illustration, comic art, graphic novel art, vibrant, highly detailed", 25 | "negative_prompt": "photograph, deformed, glitch, noisy, realistic, stock photo" 26 | }, 27 | { 28 | "name": "sai-craft clay", 29 | "prompt": "play-doh style {prompt} . sculpture, clay art, centered composition, Claymation", 30 | "negative_prompt": "sloppy, messy, grainy, highly detailed, ultra textured, photo" 31 | }, 32 | { 33 | "name": "sai-digital art", 34 | "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed", 35 | "negative_prompt": "photo, photorealistic, realism, ugly" 36 | }, 37 | { 38 | "name": "sai-enhance", 39 | "prompt": "breathtaking {prompt} . award-winning, professional, highly detailed", 40 | "negative_prompt": "ugly, deformed, noisy, blurry, distorted, grainy" 41 | }, 42 | { 43 | "name": "sai-fantasy art", 44 | "prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy", 45 | "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white" 46 | }, 47 | { 48 | "name": "sai-isometric", 49 | "prompt": "isometric style {prompt} . vibrant, beautiful, crisp, detailed, ultra detailed, intricate", 50 | "negative_prompt": "deformed, mutated, ugly, disfigured, blur, blurry, noise, noisy, realistic, photographic" 51 | }, 52 | { 53 | "name": "sai-line art", 54 | "prompt": "line art drawing {prompt} . professional, sleek, modern, minimalist, graphic, line art, vector graphics", 55 | "negative_prompt": "anime, photorealistic, 35mm film, deformed, glitch, blurry, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, mutated, realism, realistic, impressionism, expressionism, oil, acrylic" 56 | }, 57 | { 58 | "name": "sai-lowpoly", 59 | "prompt": "low-poly style {prompt} . low-poly game art, polygon mesh, jagged, blocky, wireframe edges, centered composition", 60 | "negative_prompt": "noisy, sloppy, messy, grainy, highly detailed, ultra textured, photo" 61 | }, 62 | { 63 | "name": "sai-neonpunk", 64 | "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional", 65 | "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured" 66 | }, 67 | { 68 | "name": "sai-origami", 69 | "prompt": "origami style {prompt} . paper art, pleated paper, folded, origami art, pleats, cut and fold, centered composition", 70 | "negative_prompt": "noisy, sloppy, messy, grainy, highly detailed, ultra textured, photo" 71 | }, 72 | { 73 | "name": "sai-photographic", 74 | "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed", 75 | "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly" 76 | }, 77 | { 78 | "name": "sai-pixel art", 79 | "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics", 80 | "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic" 81 | }, 82 | { 83 | "name": "sai-texture", 84 | "prompt": "texture {prompt} top down close-up", 85 | "negative_prompt": "ugly, deformed, noisy, blurry" 86 | } 87 | ] -------------------------------------------------------------------------------- /web/js/interface.js: -------------------------------------------------------------------------------- 1 | import { app } from "/scripts/app.js"; 2 | 3 | // 增加Slot颜色 4 | const customPipeLineLink = "#7737AA"; 5 | const customPipeLineSDXLLink = "#7737AA"; 6 | const customIntLink = "#29699C"; 7 | const customXYPlotLink = "#74DA5D"; 8 | const customXYLink = "#38291f"; 9 | const STRINGLink = "#00aa8c"; 10 | 11 | var customLinkColors = 12 | JSON.parse(localStorage.getItem("Comfy.Settings.ttN.customLinkColors")) || {}; 13 | if ( 14 | !customLinkColors["PIPE_LINE"] || 15 | !LGraphCanvas.link_type_colors["PIPE_LINE"] 16 | ) { 17 | customLinkColors["PIPE_LINE"] = customPipeLineLink; 18 | } 19 | if ( 20 | !customLinkColors["PIPE_LINE_SDXL"] || 21 | !LGraphCanvas.link_type_colors["PIPE_LINE_SDXL"] 22 | ) { 23 | customLinkColors["PIPE_LINE_SDXL"] = customPipeLineSDXLLink; 24 | } 25 | if (!customLinkColors["INT"] || !LGraphCanvas.link_type_colors["INT"]) { 26 | customLinkColors["INT"] = customIntLink; 27 | } 28 | if (!customLinkColors["XYPLOT"] || !LGraphCanvas.link_type_colors["XYPLOT"]) { 29 | customLinkColors["XYPLOT"] = customXYPlotLink; 30 | } 31 | if (!customLinkColors["X_Y"] || !LGraphCanvas.link_type_colors["X_Y"]) { 32 | customLinkColors["X_Y"] = customXYLink; 33 | } 34 | if (!customLinkColors["STRING"] || !LGraphCanvas.link_type_colors["STRING"]) { 35 | customLinkColors["STRING"] = STRINGLink; 36 | } 37 | 38 | localStorage.setItem( 39 | "Comfy.Settings.fooocus.customLinkColors", 40 | JSON.stringify(customLinkColors) 41 | ); 42 | 43 | // 节点颜色 44 | const COLOR_THEMES = LGraphCanvas.node_colors; 45 | const NODE_COLORS = { 46 | "Fooocus positive": "green", 47 | "Fooocus negative": "red", 48 | }; 49 | 50 | function setNodeColors(node, theme) { 51 | if (!theme) { 52 | return; 53 | } 54 | if (theme.color) node.color = theme.color; 55 | if (theme.bgcolor) node.bgcolor = theme.bgcolor; 56 | } 57 | 58 | app.registerExtension({ 59 | name: "comfy.fooocus.interface", 60 | setup() { 61 | Object.assign(app.canvas.default_connection_color_byType, customLinkColors); 62 | Object.assign(LGraphCanvas.link_type_colors, customLinkColors); 63 | }, 64 | 65 | nodeCreated(node) { 66 | if (NODE_COLORS.hasOwnProperty(node.comfyClass)) { 67 | const colorKey = NODE_COLORS[node.comfyClass]; 68 | const theme = COLOR_THEMES[colorKey]; 69 | setNodeColors(node, theme); 70 | } 71 | }, 72 | }); 73 | -------------------------------------------------------------------------------- /wildcards/color.txt: -------------------------------------------------------------------------------- 1 | aqua 2 | black 3 | blue 4 | fuchsia 5 | gray 6 | green 7 | lime 8 | maroon 9 | navy 10 | olive 11 | orange 12 | purple 13 | red 14 | silver 15 | teal 16 | white 17 | yellow 18 | -------------------------------------------------------------------------------- /wildcards/color_flower.txt: -------------------------------------------------------------------------------- 1 | __color__ __flower__ 2 | -------------------------------------------------------------------------------- /wildcards/extended-color.txt: -------------------------------------------------------------------------------- 1 | aliceblue 2 | antiquewhite 3 | aqua 4 | aquamarine 5 | azure 6 | beige 7 | bisque 8 | black 9 | blanchedalmond 10 | blue 11 | blueviolet 12 | brown 13 | burlywood 14 | cadetblue 15 | chartreuse 16 | chocolate 17 | coral 18 | cornflowerblue 19 | cornsilk 20 | crimson 21 | cyan 22 | darkblue 23 | darkcyan 24 | darkgoldenrod 25 | darkgray 26 | darkgreen 27 | darkgrey 28 | darkkhaki 29 | darkmagenta 30 | darkolivegreen 31 | darkorange 32 | darkorchid 33 | darkred 34 | darksalmon 35 | darkseagreen 36 | darkslateblue 37 | darkslategray 38 | darkslategrey 39 | darkturquoise 40 | darkviolet 41 | deeppink 42 | deepskyblue 43 | dimgray 44 | dimgrey 45 | dodgerblue 46 | firebrick 47 | floralwhite 48 | forestgreen 49 | fuchsia 50 | gainsboro 51 | ghostwhite 52 | gold 53 | goldenrod 54 | gray 55 | green 56 | greenyellow 57 | grey 58 | honeydew 59 | hotpink 60 | indianred 61 | indigo 62 | ivory 63 | khaki 64 | lavender 65 | lavenderblush 66 | lawngreen 67 | lemonchiffon 68 | lightblue 69 | lightcoral 70 | lightcyan 71 | lightgoldenrodyellow 72 | lightgray 73 | lightgreen 74 | lightgrey 75 | lightpink 76 | lightsalmon 77 | lightseagreen 78 | lightskyblue 79 | lightslategray 80 | lightslategrey 81 | lightsteelblue 82 | lightyellow 83 | lime 84 | limegreen 85 | linen 86 | magenta 87 | maroon 88 | mediumaquamarine 89 | mediumblue 90 | mediumorchid 91 | mediumpurple 92 | mediumseagreen 93 | mediumslateblue 94 | mediumspringgreen 95 | mediumturquoise 96 | mediumvioletred 97 | midnightblue 98 | mintcream 99 | mistyrose 100 | moccasin 101 | navajowhite 102 | navy 103 | oldlace 104 | olive 105 | olivedrab 106 | orange 107 | orangered 108 | orchid 109 | palegoldenrod 110 | palegreen 111 | paleturquoise 112 | palevioletred 113 | papayawhip 114 | peachpuff 115 | peru 116 | pink 117 | plum 118 | powderblue 119 | purple 120 | red 121 | rosybrown 122 | royalblue 123 | saddlebrown 124 | salmon 125 | sandybrown 126 | seagreen 127 | seashell 128 | sienna 129 | silver 130 | skyblue 131 | slateblue 132 | slategray 133 | slategrey 134 | snow 135 | springgreen 136 | steelblue 137 | tan 138 | teal 139 | thistle 140 | tomato 141 | turquoise 142 | violet 143 | wheat 144 | white 145 | whitesmoke 146 | yellow 147 | yellowgreen 148 | -------------------------------------------------------------------------------- /wildcards/flower.txt: -------------------------------------------------------------------------------- 1 | Acacia 2 | Achillea 3 | Adam's-needle 4 | African Boxwood 5 | African Lily 6 | Agapanthus 7 | Ageratum 8 | Ageratum houstonim 9 | Allium 10 | Alpina 11 | Alstroemeria 12 | Amaranthus hypochondriacus 13 | Amaryllis 14 | Ammi majus 15 | Anconitum 16 | Anemone 17 | Anigozanthus 18 | Annual Delphinium 19 | Anthurium 20 | Antirrhinum majus 21 | Artichoke thistle 22 | Asparagus 23 | Aster 24 | Astilbe 25 | Baby's Breath 26 | Bachelor's Button 27 | Banksia 28 | Bellflower 29 | Big Flax 30 | Bighead Knapweed 31 | Billy Buttons 32 | Bird of Paradise 33 | Blazing Star 34 | Blue Lace Flower 35 | Boronia 36 | Bouvardia 37 | Boxwood African 38 | Diosma 39 | Buckthorn Variegated 40 | Buddleia 41 | Bupleurum 42 | Butterfly Bush 43 | Butterfly Orchid 44 | Calla Lily 45 | Campanula 46 | Candytuft 47 | Canterbury Bells 48 | Carnation 49 | Carthamus 50 | Casa Blanca 51 | Caspia 52 | Cattleya 53 | Celosia 54 | Celosia argenta 55 | Centaurea cyanus 56 | Chamelaucium 57 | Chimney Bells 58 | Chrysanthemum 59 | Chrysanthemum x morifolium 60 | Clarkia 61 | Cockscomb Crested 62 | Coffee Bean Berry 63 | Common Myrtle 64 | Common Yarrow 65 | Cone Flower 66 | Consolida ambigua 67 | Convallaria 68 | Cordyline 69 | Cosmos 70 | Cornflower 71 | Craspedia 72 | Curly Willow 73 | Cymbidium 74 | Cymbidium Orchid 75 | Daffodil 76 | Dahlia 77 | Daisy Mums 78 | Delphinium Belladonna 79 | Delphinium Pacific Giant 80 | Dendrobium 81 | Dendrobium Orchid 82 | Dianthus barbatus 83 | Dianthus caryophyllus 84 | Dianthus caryophyllus nana 85 | Erica spp 86 | Eucalyptus seeded 87 | Eucalyptus silver dollar 88 | Eustoma grandiflorum 89 | False Bird of Paradise 90 | False Spirea 91 | Farewell-To-Spring 92 | Fernleaf Yarrow 93 | Feverfew 94 | Flamingo Flower 95 | Flax New Zealand 96 | Floss Flower 97 | Foxtail Fern 98 | Freesia 99 | Freesia x hybrida 100 | Fuji Mums 101 | Gardenia 102 | Gay Feather 103 | Genista 104 | Gerbera 105 | Gerbera Ruby Red 106 | Ginger 107 | Gladiolus 108 | Gladiolus hybrid nanus 109 | Goat's Beard 110 | Godetia 111 | Golden Rod 112 | Guersney Lily 113 | Gyp 114 | Gypsophila paniculata 115 | Hanging Helicona 116 | Heath 117 | Heather 118 | Helianthus annuus 119 | Heliconia spp. 120 | Hippeastrum 121 | Hydrangea 122 | Iberis amara 123 | Inca Lily 124 | Iris 125 | Japhette Orchid 126 | Jonquil 127 | Knapweed 128 | Lace fern 129 | Larkspur 130 | Lathyrus odoratus 131 | Lavandula 132 | Lavender 133 | Liatris 134 | Lilac 135 | Lily 136 | Lilly-of-the-Valley 137 | Lily Casa Blanca 138 | Lily of the Field 139 | Lily of the Nile 140 | Lily Stargazer 141 | Limonium 142 | Lisianthus 143 | Marguerite daisy 144 | Mattholia incana 145 | Melaleuca 146 | Memosa 147 | Misty Blue Limonium 148 | Moluccella laevis 149 | Monkshood 150 | Montbretia 151 | Monte Cassino 152 | Moon orchid 153 | Musa 154 | Myrsine 155 | Myrtle 156 | Myrtus 157 | Nephrolepis 158 | Nerine 159 | Nerine Lily 160 | Nigella 161 | Ornithogalum 162 | Paeonia 163 | Painted Tongue 164 | Paper Reed 165 | Papyrus lion's head 166 | Peony 167 | Peruvian Lily 168 | Phalaenopsis 169 | Philodendron 170 | Phlox 171 | Pincushion Flower 172 | Pink Mink 173 | Pitt 174 | Pittosporum 175 | Pixie Carnation 176 | Polianthes tuberosa 177 | Pompon Chrysanthemum 178 | Poppy Anemone 179 | Porium 180 | Pussy Willow 181 | Queen Anne's Lace 182 | Ranunculus 183 | Red Ribbons 184 | Rice flower 185 | Rose 186 | Rose Bridal Pink 187 | Rose Bridal White 188 | Rose Champagne 189 | Rose Diadem 190 | Rose Emblem 191 | Rose Kardinal 192 | Rose Lady Liberty 193 | Rose Lavanda 194 | Rose Osiana 195 | Rose Royalty 196 | Safari Sunset 197 | Safflower 198 | Sage Perennial 199 | Salix 200 | Salmon Reagan 201 | Sansevieria 202 | Saponaria 203 | Satin Flowers 204 | Saxicola 205 | Scabiosa 206 | Schinus 207 | Sea lavender 208 | Shell Flowers 209 | Snake Plant 210 | Snapdragon 211 | Solidago 212 | Solidaster 213 | Speedwell 214 | Spider Lily 215 | Spider Mums 216 | Spray Carnation 217 | Sprengeri Fern 218 | Star of Bethlehem 219 | Statice 220 | Stenamezon 221 | Stephanotis 222 | Strawberry banksia 223 | Strawflower 224 | Summer poinsettia 225 | Summer's Darling 226 | Sunflower 227 | Sweet Pea 228 | Sweet William 229 | Sword Fern 230 | Syringa vulgaris 231 | Tailflowers 232 | Tassel flower 233 | Thouroughwax 234 | Throatwort 235 | Tracelium 236 | Tree Fern 237 | Trumpet Lily 238 | Tuberose 239 | Tulip 240 | Tulipa 241 | Veronica 242 | Wattle 243 | Waxflower 244 | Wild Plantain 245 | Willow curly 246 | Windflower 247 | Wolfsbane 248 | Zantedeschia 249 | Zinna 250 | Zinnia elegans 251 | -------------------------------------------------------------------------------- /wildcards/nationality.txt: -------------------------------------------------------------------------------- 1 | Afghan 2 | Albanian 3 | Algerian 4 | American 5 | Andorran 6 | Angolan 7 | Antiguans 8 | Argentine 9 | Armenian 10 | Australian 11 | Austrian 12 | Azerbaijani 13 | Bahamian 14 | Bahraini 15 | Bangladeshi 16 | Barbadian 17 | Barbudans 18 | Batswana 19 | Belarusian 20 | Belgian 21 | Belizean 22 | Beninese 23 | Bhutanese 24 | Bolivian 25 | Bosnian 26 | Brazilian 27 | British 28 | Bruneian 29 | Bulgarian 30 | Burkinabe 31 | Burmese 32 | Burundian 33 | Cambodian 34 | Cameroonian 35 | Canadian 36 | Cape Verdean 37 | Central African 38 | Chadian 39 | Chilean 40 | Chinese 41 | Colombian 42 | Comoran 43 | Congolese 44 | Costa Rican 45 | Croatian 46 | Cuban 47 | Cypriot 48 | Czech 49 | Danish 50 | Djibouti 51 | Dominican 52 | Dutch 53 | East Timorese 54 | Ecuadorean 55 | Egyptian 56 | Emirati 57 | Equatorial Guinean 58 | Eritrean 59 | Estonian 60 | Ethiopian 61 | Fijian 62 | Filipino 63 | Finnish 64 | French 65 | Gabonese 66 | Gambian 67 | Georgian 68 | German 69 | Ghanaian 70 | Greek 71 | Grenadian 72 | Guatemalan 73 | Guinea-Bissauan 74 | Guinean 75 | Guyanese 76 | Haitian 77 | Herzegovinian 78 | Honduran 79 | Hungarian 80 | Icelander 81 | Indian 82 | Indonesian 83 | Iranian 84 | Iraqi 85 | Irish 86 | Israeli 87 | Italian 88 | Ivorian 89 | Jamaican 90 | Japanese 91 | Jordanian 92 | Kazakhstani 93 | Kenyan 94 | Kittian and Nevisian 95 | Kuwaiti 96 | Kyrgyz 97 | Laotian 98 | Latvian 99 | Lebanese 100 | Liberian 101 | Libyan 102 | Liechtensteiner 103 | Lithuanian 104 | Luxembourger 105 | Macedonian 106 | Malagasy 107 | Malawian 108 | Malaysian 109 | Maldivan 110 | Malian 111 | Maltese 112 | Marshallese 113 | Mauritanian 114 | Mauritian 115 | Mexican 116 | Micronesian 117 | Moldovan 118 | Monacan 119 | Mongolian 120 | Montenegrin 121 | Moroccan 122 | Mosotho 123 | Motswana 124 | Mozambican 125 | Namibian 126 | Nauruan 127 | Nepalese 128 | New Zealander 129 | Nicaraguan 130 | Nigerian 131 | Nigerien 132 | North Korean 133 | Northern Irish 134 | Norwegian 135 | Omani 136 | Pakistani 137 | Palauan 138 | Palestinian 139 | Panamanian 140 | Papua New Guinean 141 | Paraguayan 142 | Peruvian 143 | Polish 144 | Portuguese 145 | Qatari 146 | Romanian 147 | Russian 148 | Rwandan 149 | Saint Lucian 150 | Salvadoran 151 | Samoan 152 | San Marinese 153 | Sao Tomean 154 | Saudi 155 | Scottish 156 | Senegalese 157 | Serbian 158 | Seychellois 159 | Sierra Leonean 160 | Singaporean 161 | Slovakian 162 | Slovenian 163 | Solomon Islander 164 | Somali 165 | South African 166 | South Korean 167 | Spanish 168 | Sri Lankan 169 | Sudanese 170 | Surinamer 171 | Swazi 172 | Swedish 173 | Swiss 174 | Syrian 175 | Taiwanese 176 | Tajik 177 | Tanzanian 178 | Thai 179 | Togolese 180 | Tongan 181 | Trinidadian or Tobagonian 182 | Tunisian 183 | Turkish 184 | Tuvaluan 185 | Ugandan 186 | Ukrainian 187 | Uruguayan 188 | Uzbekistani 189 | Vanuatuan 190 | Venezuelan 191 | Vietnamese 192 | Welsh 193 | Yemenite 194 | Zambian 195 | Zimbabwean 196 | -------------------------------------------------------------------------------- /workflow/fooocus_describe.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 4, 3 | "last_link_id": 4, 4 | "nodes": [ 5 | { 6 | "id": 3, 7 | "type": "Display Any (rgthree)", 8 | "pos": [ 9 | 1808, 10 | 127 11 | ], 12 | "size": { 13 | "0": 331.6216125488281, 14 | "1": 262.77801513671875 15 | }, 16 | "flags": {}, 17 | "order": 2, 18 | "mode": 0, 19 | "inputs": [ 20 | { 21 | "name": "source", 22 | "type": "*", 23 | "link": 3, 24 | "dir": 3 25 | } 26 | ], 27 | "properties": { 28 | "Node name for S&R": "Display Any (rgthree)" 29 | }, 30 | "widgets_values": [ 31 | "" 32 | ] 33 | }, 34 | { 35 | "id": 1, 36 | "type": "LoadImage", 37 | "pos": [ 38 | 1001, 39 | 134 40 | ], 41 | "size": { 42 | "0": 315, 43 | "1": 314 44 | }, 45 | "flags": {}, 46 | "order": 0, 47 | "mode": 0, 48 | "outputs": [ 49 | { 50 | "name": "IMAGE", 51 | "type": "IMAGE", 52 | "links": [ 53 | 4 54 | ], 55 | "slot_index": 0, 56 | "shape": 3 57 | }, 58 | { 59 | "name": "MASK", 60 | "type": "MASK", 61 | "links": null, 62 | "shape": 3 63 | } 64 | ], 65 | "properties": { 66 | "Node name for S&R": "LoadImage" 67 | }, 68 | "widgets_values": [ 69 | "example.png", 70 | "image" 71 | ] 72 | }, 73 | { 74 | "id": 4, 75 | "type": "Fooocus Describe", 76 | "pos": [ 77 | 1415, 78 | 136 79 | ], 80 | "size": { 81 | "0": 315, 82 | "1": 58 83 | }, 84 | "flags": {}, 85 | "order": 1, 86 | "mode": 0, 87 | "inputs": [ 88 | { 89 | "name": "image", 90 | "type": "IMAGE", 91 | "link": 4 92 | } 93 | ], 94 | "outputs": [ 95 | { 96 | "name": "STRING", 97 | "type": "STRING", 98 | "links": [ 99 | 3 100 | ], 101 | "shape": 3, 102 | "slot_index": 0 103 | } 104 | ], 105 | "properties": { 106 | "Node name for S&R": "Fooocus Describe" 107 | }, 108 | "widgets_values": [ 109 | "Photo" 110 | ] 111 | } 112 | ], 113 | "links": [ 114 | [ 115 | 3, 116 | 4, 117 | 0, 118 | 3, 119 | 0, 120 | "*" 121 | ], 122 | [ 123 | 4, 124 | 1, 125 | 0, 126 | 4, 127 | 0, 128 | "IMAGE" 129 | ] 130 | ], 131 | "groups": [], 132 | "config": {}, 133 | "extra": { 134 | "ds": { 135 | "scale": 0.8264462809917362, 136 | "offset": [ 137 | -840.484618298602, 138 | 250.69346054186292 139 | ] 140 | } 141 | }, 142 | "version": 0.4 143 | } -------------------------------------------------------------------------------- /workflow/fooocus_prompt_expansion.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 6, 3 | "last_link_id": 5, 4 | "nodes": [ 5 | { 6 | "id": 6, 7 | "type": "Display Any (rgthree)", 8 | "pos": [ 9 | 1777, 10 | -46 11 | ], 12 | "size": [ 13 | 292.4664932986011, 14 | 196.80439102063673 15 | ], 16 | "flags": {}, 17 | "order": 1, 18 | "mode": 0, 19 | "inputs": [ 20 | { 21 | "name": "source", 22 | "type": "*", 23 | "link": 5, 24 | "dir": 3 25 | } 26 | ], 27 | "properties": { 28 | "Node name for S&R": "Display Any (rgthree)" 29 | }, 30 | "widgets_values": [ 31 | "" 32 | ] 33 | }, 34 | { 35 | "id": 5, 36 | "type": "Fooocus Expansion", 37 | "pos": [ 38 | 1271, 39 | -46 40 | ], 41 | "size": { 42 | "0": 400, 43 | "1": 200 44 | }, 45 | "flags": {}, 46 | "order": 0, 47 | "mode": 0, 48 | "outputs": [ 49 | { 50 | "name": "final_prompt", 51 | "type": "STRING", 52 | "links": [ 53 | 5 54 | ], 55 | "shape": 3, 56 | "slot_index": 0 57 | }, 58 | { 59 | "name": "seed", 60 | "type": "INT", 61 | "links": null, 62 | "shape": 3 63 | } 64 | ], 65 | "properties": { 66 | "Node name for S&R": "Fooocus Expansion" 67 | }, 68 | "widgets_values": [ 69 | "cat", 70 | 3314052962, 71 | "fixed", 72 | true 73 | ] 74 | } 75 | ], 76 | "links": [ 77 | [ 78 | 5, 79 | 5, 80 | 0, 81 | 6, 82 | 0, 83 | "*" 84 | ] 85 | ], 86 | "groups": [], 87 | "config": {}, 88 | "extra": { 89 | "ds": { 90 | "scale": 0.8264462809917362, 91 | "offset": [ 92 | -840.342821423602, 93 | 250.98650741686293 94 | ] 95 | } 96 | }, 97 | "version": 0.4 98 | } -------------------------------------------------------------------------------- /workflow/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/workflow/screenshot.png --------------------------------------------------------------------------------