├── .github
    └── workflows
    │   └── publish.yml
├── .gitignore
├── README.md
├── __init__.py
├── fooocus_expansion
    ├── config.json
    ├── merges.txt
    ├── positive.txt
    ├── special_tokens_map.json
    ├── tokenizer.json
    ├── tokenizer_config.json
    └── vocab.json
├── py
    ├── __init__.py
    ├── api.py
    ├── extras
    │   ├── BLIP
    │   │   ├── configs
    │   │   │   ├── bert_config.json
    │   │   │   ├── caption_coco.yaml
    │   │   │   ├── med_config.json
    │   │   │   ├── nlvr.yaml
    │   │   │   ├── nocaps.yaml
    │   │   │   ├── pretrain.yaml
    │   │   │   ├── retrieval_coco.yaml
    │   │   │   ├── retrieval_flickr.yaml
    │   │   │   ├── retrieval_msrvtt.yaml
    │   │   │   └── vqa.yaml
    │   │   └── models
    │   │   │   ├── bert_tokenizer
    │   │   │       ├── config.json
    │   │   │       ├── tokenizer.json
    │   │   │       ├── tokenizer_config.json
    │   │   │       └── vocab.txt
    │   │   │   ├── blip.py
    │   │   │   ├── blip_itm.py
    │   │   │   ├── blip_nlvr.py
    │   │   │   ├── blip_pretrain.py
    │   │   │   ├── blip_retrieval.py
    │   │   │   ├── blip_vqa.py
    │   │   │   ├── med.py
    │   │   │   ├── nlvr_encoder.py
    │   │   │   └── vit.py
    │   ├── expansion.py
    │   ├── face_crop.py
    │   ├── facexlib
    │   │   ├── detection
    │   │   │   ├── __init__.py
    │   │   │   ├── align_trans.py
    │   │   │   ├── matlab_cp2tform.py
    │   │   │   ├── retinaface.py
    │   │   │   ├── retinaface_net.py
    │   │   │   └── retinaface_utils.py
    │   │   ├── parsing
    │   │   │   ├── __init__.py
    │   │   │   ├── bisenet.py
    │   │   │   ├── parsenet.py
    │   │   │   └── resnet.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── face_restoration_helper.py
    │   │   │   ├── face_utils.py
    │   │   │   └── misc.py
    │   ├── interrogate.py
    │   ├── ip_adapter.py
    │   ├── preprocessors.py
    │   ├── resampler.py
    │   ├── vae_interpose.py
    │   └── wd14tagger.py
    ├── fooocusNodes.py
    ├── ldm_patched
    │   ├── contrib
    │   │   ├── external.py
    │   │   ├── external_canny.py
    │   │   ├── external_clip_sdxl.py
    │   │   ├── external_compositing.py
    │   │   ├── external_custom_sampler.py
    │   │   ├── external_freelunch.py
    │   │   ├── external_hypernetwork.py
    │   │   ├── external_hypertile.py
    │   │   ├── external_images.py
    │   │   ├── external_latent.py
    │   │   ├── external_mask.py
    │   │   ├── external_model_advanced.py
    │   │   ├── external_model_downscale.py
    │   │   ├── external_model_merging.py
    │   │   ├── external_perpneg.py
    │   │   ├── external_photomaker.py
    │   │   ├── external_post_processing.py
    │   │   ├── external_rebatch.py
    │   │   ├── external_sag.py
    │   │   ├── external_sdupscale.py
    │   │   ├── external_stable3d.py
    │   │   ├── external_tomesd.py
    │   │   ├── external_upscale_model.py
    │   │   └── external_video_model.py
    │   ├── controlnet
    │   │   └── cldm.py
    │   ├── k_diffusion
    │   │   ├── sampling.py
    │   │   └── utils.py
    │   ├── ldm
    │   │   ├── models
    │   │   │   └── autoencoder.py
    │   │   ├── modules
    │   │   │   ├── attention.py
    │   │   │   ├── diffusionmodules
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── model.py
    │   │   │   │   ├── openaimodel.py
    │   │   │   │   ├── upscaling.py
    │   │   │   │   └── util.py
    │   │   │   ├── distributions
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── distributions.py
    │   │   │   ├── ema.py
    │   │   │   ├── encoders
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── noise_aug_modules.py
    │   │   │   ├── sub_quadratic_attention.py
    │   │   │   └── temporal_ae.py
    │   │   └── util.py
    │   ├── modules
    │   │   ├── args_parser.py
    │   │   ├── checkpoint_pickle.py
    │   │   ├── clip_config_bigg.json
    │   │   ├── clip_model.py
    │   │   ├── clip_vision.py
    │   │   ├── clip_vision_config_g.json
    │   │   ├── clip_vision_config_h.json
    │   │   ├── clip_vision_config_vitl.json
    │   │   ├── conds.py
    │   │   ├── controlnet.py
    │   │   ├── diffusers_convert.py
    │   │   ├── diffusers_load.py
    │   │   ├── gligen.py
    │   │   ├── latent_formats.py
    │   │   ├── lora.py
    │   │   ├── model_base.py
    │   │   ├── model_detection.py
    │   │   ├── model_patcher.py
    │   │   ├── model_sampling.py
    │   │   ├── ops.py
    │   │   ├── options.py
    │   │   ├── sample.py
    │   │   ├── samplers.py
    │   │   ├── sd.py
    │   │   ├── sd1_clip.py
    │   │   ├── sd1_clip_config.json
    │   │   ├── sd1_tokenizer
    │   │   │   ├── merges.txt
    │   │   │   ├── special_tokens_map.json
    │   │   │   ├── tokenizer_config.json
    │   │   │   └── vocab.json
    │   │   ├── sd2_clip.py
    │   │   ├── sd2_clip_config.json
    │   │   ├── sdxl_clip.py
    │   │   ├── supported_models.py
    │   │   ├── supported_models_base.py
    │   │   └── utils.py
    │   ├── pfn
    │   │   ├── __init__.py
    │   │   ├── architecture
    │   │   │   ├── DAT.py
    │   │   │   ├── HAT.py
    │   │   │   ├── LICENSE-DAT
    │   │   │   ├── LICENSE-ESRGAN
    │   │   │   ├── LICENSE-HAT
    │   │   │   ├── LICENSE-RealESRGAN
    │   │   │   ├── LICENSE-SCUNet
    │   │   │   ├── LICENSE-SPSR
    │   │   │   ├── LICENSE-SwiftSRGAN
    │   │   │   ├── LICENSE-Swin2SR
    │   │   │   ├── LICENSE-SwinIR
    │   │   │   ├── LICENSE-lama
    │   │   │   ├── LaMa.py
    │   │   │   ├── OmniSR
    │   │   │   │   ├── ChannelAttention.py
    │   │   │   │   ├── LICENSE
    │   │   │   │   ├── OSA.py
    │   │   │   │   ├── OSAG.py
    │   │   │   │   ├── OmniSR.py
    │   │   │   │   ├── esa.py
    │   │   │   │   ├── layernorm.py
    │   │   │   │   └── pixelshuffle.py
    │   │   │   ├── RRDB.py
    │   │   │   ├── SCUNet.py
    │   │   │   ├── SPSR.py
    │   │   │   ├── SRVGG.py
    │   │   │   ├── SwiftSRGAN.py
    │   │   │   ├── Swin2SR.py
    │   │   │   ├── SwinIR.py
    │   │   │   ├── __init__.py
    │   │   │   ├── block.py
    │   │   │   ├── face
    │   │   │   │   ├── LICENSE-GFPGAN
    │   │   │   │   ├── LICENSE-RestoreFormer
    │   │   │   │   ├── LICENSE-codeformer
    │   │   │   │   ├── arcface_arch.py
    │   │   │   │   ├── codeformer.py
    │   │   │   │   ├── fused_act.py
    │   │   │   │   ├── gfpgan_bilinear_arch.py
    │   │   │   │   ├── gfpganv1_arch.py
    │   │   │   │   ├── gfpganv1_clean_arch.py
    │   │   │   │   ├── restoreformer_arch.py
    │   │   │   │   ├── stylegan2_arch.py
    │   │   │   │   ├── stylegan2_bilinear_arch.py
    │   │   │   │   ├── stylegan2_clean_arch.py
    │   │   │   │   └── upfirdn2d.py
    │   │   │   └── timm
    │   │   │   │   ├── LICENSE
    │   │   │   │   ├── drop.py
    │   │   │   │   ├── helpers.py
    │   │   │   │   └── weight_init.py
    │   │   ├── model_loading.py
    │   │   └── types.py
    │   ├── t2ia
    │   │   └── adapter.py
    │   ├── taesd
    │   │   └── taesd.py
    │   ├── unipc
    │   │   └── uni_pc.py
    │   └── utils
    │   │   ├── latent_visualization.py
    │   │   └── path_utils.py
    ├── libs
    │   └── utils.py
    ├── log.py
    ├── modules
    │   ├── __init__.py
    │   ├── advanced_parameters.py
    │   ├── anisotropic.py
    │   ├── config.py
    │   ├── constants.py
    │   ├── core.py
    │   ├── default_pipeline.py
    │   ├── flags.py
    │   ├── inpaint_worker.py
    │   ├── lora.py
    │   ├── model_loader.py
    │   ├── ops.py
    │   ├── patch.py
    │   ├── patch_clip.py
    │   ├── patch_precision.py
    │   ├── sample_hijack.py
    │   ├── sdxl_styles.py
    │   ├── upscaler.py
    │   └── util.py
    └── prompt.py
├── pyproject.toml
├── requirements.txt
├── screnshot
    ├── Fooocus.png
    └── FooocusNodes.png
├── sdxl_styles
    ├── sdxl_styles_diva.json
    ├── sdxl_styles_fooocus.json
    ├── sdxl_styles_marc_k3nt3l.json
    ├── sdxl_styles_mre.json
    ├── sdxl_styles_sai.json
    └── sdxl_styles_twri.json
├── web
    ├── js
    │   ├── dynamic_widgets.js
    │   ├── interface.js
    │   └── style_selector.js
    └── lib
    │   └── fabric.js
├── wildcards
    ├── artist.txt
    ├── color.txt
    ├── color_flower.txt
    ├── extended-color.txt
    ├── flower.txt
    └── nationality.txt
└── workflow
    ├── basic.json
    ├── controlnet.json
    ├── detailer_fix.json
    ├── fooocus_describe.json
    ├── fooocus_prompt_expansion.json
    ├── imagePrompt_faceSwap.json
    ├── inpaint_outpaint.json
    ├── ipadapter_plus_style_transfer.json
    ├── screenshot.png
    └── upscale.json


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to Comfy registry
 2 | on:
 3 |   workflow_dispatch:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       - "pyproject.toml"
 9 | 
10 | jobs:
11 |   publish-node:
12 |     name: Publish Custom Node to registry
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Check out code
16 |         uses: actions/checkout@v4
17 |       - name: Publish Custom Node
18 |         uses: Comfy-Org/publish-node-action@main
19 |         with:
20 |           ## Add your own personal access token to your Github Repository secrets and reference it here.
21 |           personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | /venv
 3 | .vscode
 4 | *.ckpt
 5 | *.safetensors
 6 | *.pth
 7 | types
 8 | *.pyc
 9 | .DS_Store
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ﻿# ComfyUI Fooocus Nodes
 2 | 
 3 | # Installation
 4 | 
 5 | 1. Clone the repository:
 6 |    `git clone https://github.com/Seedsa/Fooocus_Nodes.git`  
 7 |    to your ComfyUI `custom_nodes` directory
 8 | 
 9 | # Update
10 | 
11 | 1. Navigate to the cloned repo e.g. `custom_nodes/Fooocus_Nodes`
12 | 2. `git pull`
13 | 
14 | # Comparisons
15 | 
16 | reproduce the same images generated from Fooocus on ComfyUI
17 | 
18 | ![ComfyUIFooocusNodes](screnshot/FooocusNodes.png)
19 | 
20 | ![Fooocus](screnshot/Fooocus.png)
21 | 
22 | # Features
23 | 
24 | - [x] Fooocus Txt2image&Img2img
25 | - [x] Fooocus Inpaint&Outpaint
26 | - [x] Fooocus Upscale
27 | - [x] Fooocus ImagePrompt&FaceSwap
28 | - [x] Fooocus Canny&CPDS
29 | - [x] Fooocus Styles&PromptExpansion
30 | - [x] Fooocus DeftailerFix
31 | - [x] Fooocus Describe
32 | 
33 | # Example Workflows
34 | 
35 | [example workflows](./workflow/)
36 | 
37 | ![basic](/workflow/screenshot.png)
38 | 
39 | ## Credits
40 | 
41 | - [Fooocus](https://github.com/lllyasviel/Fooocus)
42 | - [ComfyUI-Easy-Use](https://github.com/yolain/ComfyUI-Easy-Use)
43 | - [ComfyUI](https://github.com/comfyanonymous/ComfyUI)
44 | 
45 | # Acknowledgments
46 | 
47 | This project builds upon and extends the original work found at [ComfyUI_Fooocus](https://github.com/17Retoucher/ComfyUI_Fooocus).
48 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module initializes and sets up the Fooocus extension for ComfyUI.
  3 | It handles folder creation, file downloads, and node mapping for the extension.
  4 | """
  5 | 
  6 | import os
  7 | import importlib
  8 | import shutil
  9 | import folder_paths
 10 | import filecmp
 11 | def add_folder_path_and_extensions(folder_name, full_folder_paths, extensions):
 12 |     for full_folder_path in full_folder_paths:
 13 |         folder_paths.add_model_folder_path(folder_name, full_folder_path)
 14 |     if folder_name in folder_paths.folder_names_and_paths:
 15 |         current_paths, current_extensions = folder_paths.folder_names_and_paths[folder_name]
 16 |         updated_extensions = current_extensions | extensions
 17 |         folder_paths.folder_names_and_paths[folder_name] = (current_paths, updated_extensions)
 18 |     else:
 19 |         folder_paths.folder_names_and_paths[folder_name] = (full_folder_paths, extensions)
 20 | 
 21 | model_path = folder_paths.models_dir
 22 | add_folder_path_and_extensions("ultralytics_bbox", [os.path.join(model_path, "ultralytics", "bbox")], folder_paths.supported_pt_extensions)
 23 | add_folder_path_and_extensions("ultralytics_segm", [os.path.join(model_path, "ultralytics", "segm")], folder_paths.supported_pt_extensions)
 24 | add_folder_path_and_extensions("ultralytics", [os.path.join(model_path, "ultralytics")], folder_paths.supported_pt_extensions)
 25 | add_folder_path_and_extensions("sams", [os.path.join(model_path, "sams")], folder_paths.supported_pt_extensions)
 26 | add_folder_path_and_extensions("ipadapter", [os.path.join(model_path, "ipadapter")], folder_paths.supported_pt_extensions)
 27 | add_folder_path_and_extensions("inpaint", [os.path.join(model_path, "inpaint")], folder_paths.supported_pt_extensions)
 28 | add_folder_path_and_extensions("fooocus_expansion", [os.path.join(model_path, "fooocus_expansion")], folder_paths.supported_pt_extensions)
 29 | 
 30 | from .py.modules.model_loader import load_file_from_url
 31 | from .py.modules.config import (
 32 |     path_fooocus_expansion as fooocus_expansion_path,
 33 | )
 34 | from .py import log
 35 | 
 36 | 
 37 | node_list = [
 38 |     "api",
 39 |     "fooocusNodes",
 40 |     "prompt"
 41 | ]
 42 | 
 43 | 
 44 | NODE_CLASS_MAPPINGS = {}
 45 | NODE_DISPLAY_NAME_MAPPINGS = {}
 46 | for module_name in node_list:
 47 |     imported_module = importlib.import_module(
 48 |         ".py.{}".format(module_name), __name__)
 49 |     NODE_CLASS_MAPPINGS = {**NODE_CLASS_MAPPINGS,
 50 |                            **imported_module.NODE_CLASS_MAPPINGS}
 51 |     NODE_DISPLAY_NAME_MAPPINGS = {
 52 |         **NODE_DISPLAY_NAME_MAPPINGS, **imported_module.NODE_DISPLAY_NAME_MAPPINGS}
 53 | 
 54 | 
 55 | WEB_DIRECTORY = "./web"
 56 | 
 57 | 
 58 | def recursive_overwrite(src, dest, ignore=None):
 59 |     if os.path.isdir(src):
 60 |         if not os.path.isdir(dest):
 61 |             os.makedirs(dest)
 62 |         files = os.listdir(src)
 63 |         if ignore is not None:
 64 |             ignored = ignore(src, files)
 65 |         else:
 66 |             ignored = set()
 67 |         for f in files:
 68 |             if f not in ignored:
 69 |                 recursive_overwrite(os.path.join(src, f),
 70 |                                     os.path.join(dest, f),
 71 |                                     ignore)
 72 |     else:
 73 |         if not os.path.exists(dest) or not filecmp.cmp(src, dest):
 74 |             shutil.copyfile(src, dest)
 75 |             log.log_node_info(f'Copying file from {src} to {dest}')
 76 | 
 77 | def get_ext_dir(subpath=None, mkdir=False):
 78 |     dir = os.path.dirname(__file__)
 79 |     if subpath is not None:
 80 |         dir = os.path.join(dir, subpath)
 81 |     dir = os.path.abspath(dir)
 82 |     if mkdir and not os.path.exists(dir):
 83 |         os.makedirs(dir)
 84 |     return dir
 85 | 
 86 | 
 87 | def install_expansion():
 88 |     src_dir = get_ext_dir("fooocus_expansion")
 89 |     if not os.path.exists(src_dir):
 90 |         log.log_node_error(
 91 |             "prompt_expansion is not exists. Please reinstall the extension.")
 92 |         return
 93 |     if not os.path.exists(fooocus_expansion_path):
 94 |         os.makedirs(fooocus_expansion_path)
 95 |     recursive_overwrite(src_dir, fooocus_expansion_path)
 96 | 
 97 | 
 98 | def download_models():
 99 |     vae_approx_filenames = [
100 |         ('xlvaeapp.pth', 'https://huggingface.co/lllyasviel/misc/resolve/main/xlvaeapp.pth'),
101 |         ('vaeapp_sd15.pth',
102 |          'https://huggingface.co/lllyasviel/misc/resolve/main/vaeapp_sd15.pt'),
103 |         ('xl-to-v1_interposer-v3.1.safetensors',
104 |          'https://huggingface.co/lllyasviel/misc/resolve/main/xl-to-v1_interposer-v3.1.safetensors')
105 |     ]
106 | 
107 |     for file_name, url in vae_approx_filenames:
108 |         load_file_from_url(
109 |             url=url, model_dir="vae_approx", file_name=file_name)
110 | 
111 |     install_expansion()
112 |     load_file_from_url(
113 |         url='https://huggingface.co/lllyasviel/misc/resolve/main/fooocus_expansion.bin',
114 |         model_dir="fooocus_expansion",
115 |         file_name='pytorch_model.bin'
116 |     )
117 | 
118 | 
119 | download_models()
120 | 
121 | __all__ = ['NODE_CLASS_MAPPINGS',
122 |            'NODE_DISPLAY_NAME_MAPPINGS', "WEB_DIRECTORY"]
123 | print("\033[0m\033[95m ComfyUI  Fooocus Nodes  :\033[0m \033[32mloaded\033[0m")
124 | 


--------------------------------------------------------------------------------
/fooocus_expansion/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "gpt2",
 3 |   "activation_function": "gelu_new",
 4 |   "architectures": [
 5 |     "GPT2LMHeadModel"
 6 |   ],
 7 |   "attn_pdrop": 0.1,
 8 |   "bos_token_id": 50256,
 9 |   "embd_pdrop": 0.1,
10 |   "eos_token_id": 50256,
11 |   "pad_token_id": 50256,
12 |   "initializer_range": 0.02,
13 |   "layer_norm_epsilon": 1e-05,
14 |   "model_type": "gpt2",
15 |   "n_ctx": 1024,
16 |   "n_embd": 768,
17 |   "n_head": 12,
18 |   "n_inner": null,
19 |   "n_layer": 12,
20 |   "n_positions": 1024,
21 |   "reorder_and_upcast_attn": false,
22 |   "resid_pdrop": 0.1,
23 |   "scale_attn_by_inverse_layer_idx": false,
24 |   "scale_attn_weights": true,
25 |   "summary_activation": null,
26 |   "summary_first_dropout": 0.1,
27 |   "summary_proj_to_labels": true,
28 |   "summary_type": "cls_index",
29 |   "summary_use_proj": true,
30 |   "task_specific_params": {
31 |     "text-generation": {
32 |       "do_sample": true,
33 |       "max_length": 50
34 |     }
35 |   },
36 |   "torch_dtype": "float32",
37 |   "transformers_version": "4.23.0.dev0",
38 |   "use_cache": true,
39 |   "vocab_size": 50257
40 | }
41 | 


--------------------------------------------------------------------------------
/fooocus_expansion/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {
2 |   "bos_token": "<|endoftext|>",
3 |   "eos_token": "<|endoftext|>",
4 |   "unk_token": "<|endoftext|>"
5 | }
6 | 


--------------------------------------------------------------------------------
/fooocus_expansion/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "add_prefix_space": false,
 3 |   "bos_token": "<|endoftext|>",
 4 |   "eos_token": "<|endoftext|>",
 5 |   "model_max_length": 1024,
 6 |   "name_or_path": "gpt2",
 7 |   "special_tokens_map_file": null,
 8 |   "tokenizer_class": "GPT2Tokenizer",
 9 |   "unk_token": "<|endoftext|>"
10 | }
11 | 


--------------------------------------------------------------------------------
/py/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/__init__.py


--------------------------------------------------------------------------------
/py/api.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import os
 3 | import sys
 4 | 
 5 | 
 6 | modules_path = os.path.dirname(os.path.realpath(__file__))
 7 | sys.path.append(modules_path)
 8 | from server import PromptServer
 9 | from modules.sdxl_styles import legal_style_names
10 | 
11 | try:
12 |     import aiohttp
13 |     from aiohttp import web
14 | except ImportError:
15 |     print("Module 'aiohttp' not installed. Please install it via:")
16 |     print("pip install aiohttp")
17 |     sys.exit()
18 | 
19 | 
20 | @PromptServer.instance.routes.get("/fooocus/prompt/styles")
21 | async def getStylesList(request):
22 |     if "name" in request.rel_url.query:
23 |         name = request.rel_url.query["name"]
24 |     return web.json_response(legal_style_names)
25 | 
26 | 
27 | NODE_CLASS_MAPPINGS = {}
28 | NODE_DISPLAY_NAME_MAPPINGS = {}
29 | 


--------------------------------------------------------------------------------
/py/extras/BLIP/configs/bert_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertModel"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "hidden_act": "gelu",
 7 |   "hidden_dropout_prob": 0.1,
 8 |   "hidden_size": 768,
 9 |   "initializer_range": 0.02,
10 |   "intermediate_size": 3072,
11 |   "layer_norm_eps": 1e-12,
12 |   "max_position_embeddings": 512,
13 |   "model_type": "bert",
14 |   "num_attention_heads": 12,
15 |   "num_hidden_layers": 12,
16 |   "pad_token_id": 0,
17 |   "type_vocab_size": 2,
18 |   "vocab_size": 30522,
19 |   "encoder_width": 768,
20 |   "add_cross_attention": true   
21 | }
22 | 


--------------------------------------------------------------------------------
/py/extras/BLIP/configs/caption_coco.yaml:
--------------------------------------------------------------------------------
 1 | image_root: '/export/share/datasets/vision/coco/images/'
 2 | ann_root: 'annotation'
 3 | coco_gt_root: 'annotation/coco_gt'
 4 | 
 5 | # set pretrained as a file path or an url
 6 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
 7 | 
 8 | # size of vit model; base or large
 9 | vit: 'base'
10 | vit_grad_ckpt: False
11 | vit_ckpt_layer: 0
12 | batch_size: 32
13 | init_lr: 1e-5
14 | 
15 | # vit: 'large'
16 | # vit_grad_ckpt: True
17 | # vit_ckpt_layer: 5
18 | # batch_size: 16
19 | # init_lr: 2e-6
20 | 
21 | image_size: 384
22 | 
23 | # generation configs
24 | max_length: 20  
25 | min_length: 5
26 | num_beams: 3
27 | prompt: 'a picture of '
28 | 
29 | # optimizer
30 | weight_decay: 0.05
31 | min_lr: 0
32 | max_epoch: 5
33 | 
34 | 


--------------------------------------------------------------------------------
/py/extras/BLIP/configs/med_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertModel"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "hidden_act": "gelu",
 7 |   "hidden_dropout_prob": 0.1,
 8 |   "hidden_size": 768,
 9 |   "initializer_range": 0.02,
10 |   "intermediate_size": 3072,
11 |   "layer_norm_eps": 1e-12,
12 |   "max_position_embeddings": 512,
13 |   "model_type": "bert",
14 |   "num_attention_heads": 12,
15 |   "num_hidden_layers": 12,
16 |   "pad_token_id": 0,
17 |   "type_vocab_size": 2,
18 |   "vocab_size": 30524,
19 |   "encoder_width": 768,
20 |   "add_cross_attention": true   
21 | }
22 | 


--------------------------------------------------------------------------------
/py/extras/BLIP/configs/nlvr.yaml:
--------------------------------------------------------------------------------
 1 | image_root: '/export/share/datasets/vision/NLVR2/' 
 2 | ann_root: 'annotation'
 3 | 
 4 | # set pretrained as a file path or an url
 5 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_nlvr.pth'
 6 | 
 7 | #size of vit model; base or large
 8 | vit: 'base'
 9 | batch_size_train: 16 
10 | batch_size_test: 64 
11 | vit_grad_ckpt: False
12 | vit_ckpt_layer: 0
13 | max_epoch: 15
14 | 
15 | image_size: 384
16 | 
17 | # optimizer
18 | weight_decay: 0.05
19 | init_lr: 3e-5
20 | min_lr: 0
21 | 
22 | 


--------------------------------------------------------------------------------
/py/extras/BLIP/configs/nocaps.yaml:
--------------------------------------------------------------------------------
 1 | image_root: '/export/share/datasets/vision/nocaps/'
 2 | ann_root: 'annotation'
 3 | 
 4 | # set pretrained as a file path or an url
 5 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
 6 | 
 7 | vit: 'base'
 8 | batch_size: 32
 9 | 
10 | image_size: 384
11 | 
12 | max_length: 20
13 | min_length: 5
14 | num_beams: 3
15 | prompt: 'a picture of '


--------------------------------------------------------------------------------
/py/extras/BLIP/configs/pretrain.yaml:
--------------------------------------------------------------------------------
 1 | train_file: ['/export/share/junnan-li/VL_pretrain/annotation/coco_karpathy_train.json',
 2 |              '/export/share/junnan-li/VL_pretrain/annotation/vg_caption.json',
 3 |              ]
 4 | laion_path: ''   
 5 | 
 6 | # size of vit model; base or large
 7 | vit: 'base'
 8 | vit_grad_ckpt: False
 9 | vit_ckpt_layer: 0
10 | 
11 | image_size: 224
12 | batch_size: 75
13 | 
14 | queue_size: 57600
15 | alpha: 0.4
16 | 
17 | # optimizer
18 | weight_decay: 0.05
19 | init_lr: 3e-4
20 | min_lr: 1e-6
21 | warmup_lr: 1e-6
22 | lr_decay_rate: 0.9
23 | max_epoch: 20
24 | warmup_steps: 3000
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/py/extras/BLIP/configs/retrieval_coco.yaml:
--------------------------------------------------------------------------------
 1 | image_root: '/export/share/datasets/vision/coco/images/'
 2 | ann_root: 'annotation'
 3 | dataset: 'coco'
 4 | 
 5 | # set pretrained as a file path or an url
 6 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth'
 7 | 
 8 | # size of vit model; base or large
 9 | 
10 | vit: 'base'
11 | batch_size_train: 32
12 | batch_size_test: 64
13 | vit_grad_ckpt: True
14 | vit_ckpt_layer: 4
15 | init_lr: 1e-5
16 | 
17 | # vit: 'large'
18 | # batch_size_train: 16
19 | # batch_size_test: 32
20 | # vit_grad_ckpt: True
21 | # vit_ckpt_layer: 12
22 | # init_lr: 5e-6
23 | 
24 | image_size: 384
25 | queue_size: 57600
26 | alpha: 0.4
27 | k_test: 256
28 | negative_all_rank: True
29 | 
30 | # optimizer
31 | weight_decay: 0.05
32 | min_lr: 0
33 | max_epoch: 6
34 | 
35 | 


--------------------------------------------------------------------------------
/py/extras/BLIP/configs/retrieval_flickr.yaml:
--------------------------------------------------------------------------------
 1 | image_root: '/export/share/datasets/vision/flickr30k/'
 2 | ann_root: 'annotation'
 3 | dataset: 'flickr'
 4 | 
 5 | # set pretrained as a file path or an url
 6 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_flickr.pth'
 7 | 
 8 | # size of vit model; base or large
 9 | 
10 | vit: 'base'
11 | batch_size_train: 32
12 | batch_size_test: 64
13 | vit_grad_ckpt: True
14 | vit_ckpt_layer: 4
15 | init_lr: 1e-5
16 | 
17 | # vit: 'large'
18 | # batch_size_train: 16
19 | # batch_size_test: 32
20 | # vit_grad_ckpt: True
21 | # vit_ckpt_layer: 10
22 | # init_lr: 5e-6
23 | 
24 | image_size: 384
25 | queue_size: 57600
26 | alpha: 0.4
27 | k_test: 128
28 | negative_all_rank: False
29 | 
30 | # optimizer
31 | weight_decay: 0.05
32 | min_lr: 0
33 | max_epoch: 6
34 | 
35 | 


--------------------------------------------------------------------------------
/py/extras/BLIP/configs/retrieval_msrvtt.yaml:
--------------------------------------------------------------------------------
 1 | video_root: '/export/share/dongxuli/data/msrvtt_retrieval/videos'
 2 | ann_root: 'annotation'
 3 | 
 4 | # set pretrained as a file path or an url
 5 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_retrieval_coco.pth'
 6 | 
 7 | # size of vit model; base or large
 8 | vit: 'base'
 9 | batch_size: 64
10 | k_test: 128
11 | image_size: 384
12 | num_frm_test: 8


--------------------------------------------------------------------------------
/py/extras/BLIP/configs/vqa.yaml:
--------------------------------------------------------------------------------
 1 | vqa_root: '/export/share/datasets/vision/VQA/Images/mscoco/' #followed by train2014/
 2 | vg_root: '/export/share/datasets/vision/visual-genome/'  #followed by image/
 3 | train_files: ['vqa_train','vqa_val','vg_qa']
 4 | ann_root: 'annotation'
 5 | 
 6 | # set pretrained as a file path or an url
 7 | pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth'
 8 | 
 9 | # size of vit model; base or large
10 | vit: 'base'
11 | batch_size_train: 16 
12 | batch_size_test: 32 
13 | vit_grad_ckpt: False
14 | vit_ckpt_layer: 0
15 | init_lr: 2e-5
16 | 
17 | image_size: 480
18 | 
19 | k_test: 128
20 | inference: 'rank'
21 | 
22 | # optimizer
23 | weight_decay: 0.05
24 | min_lr: 0
25 | max_epoch: 10


--------------------------------------------------------------------------------
/py/extras/BLIP/models/bert_tokenizer/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "BertForMaskedLM"
 4 |   ],
 5 |   "attention_probs_dropout_prob": 0.1,
 6 |   "gradient_checkpointing": false,
 7 |   "hidden_act": "gelu",
 8 |   "hidden_dropout_prob": 0.1,
 9 |   "hidden_size": 768,
10 |   "initializer_range": 0.02,
11 |   "intermediate_size": 3072,
12 |   "layer_norm_eps": 1e-12,
13 |   "max_position_embeddings": 512,
14 |   "model_type": "bert",
15 |   "num_attention_heads": 12,
16 |   "num_hidden_layers": 12,
17 |   "pad_token_id": 0,
18 |   "position_embedding_type": "absolute",
19 |   "transformers_version": "4.6.0.dev0",
20 |   "type_vocab_size": 2,
21 |   "use_cache": true,
22 |   "vocab_size": 30522
23 | }
24 | 


--------------------------------------------------------------------------------
/py/extras/BLIP/models/bert_tokenizer/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "do_lower_case": true
3 | }
4 | 


--------------------------------------------------------------------------------
/py/extras/BLIP/models/blip_itm.py:
--------------------------------------------------------------------------------
 1 | from extras.BLIP.models.med import BertConfig, BertModel
 2 | from transformers import BertTokenizer
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | import torch.nn.functional as F
 7 | 
 8 | from extras.BLIP.models.blip import create_vit, init_tokenizer, load_checkpoint
 9 | 
10 | class BLIP_ITM(nn.Module):
11 |     def __init__(self,                 
12 |                  med_config = 'configs/med_config.json',  
13 |                  image_size = 384,
14 |                  vit = 'base',
15 |                  vit_grad_ckpt = False,
16 |                  vit_ckpt_layer = 0,                      
17 |                  embed_dim = 256,     
18 |                  ):
19 |         """
20 |         Args:
21 |             med_config (str): path for the mixture of encoder-decoder model's configuration file
22 |             image_size (int): input image size
23 |             vit (str): model size of vision transformer
24 |         """               
25 |         super().__init__()
26 |         
27 |         self.visual_encoder, vision_width = create_vit(vit,image_size, vit_grad_ckpt, vit_ckpt_layer)
28 |         self.tokenizer = init_tokenizer()   
29 |         med_config = BertConfig.from_json_file(med_config)
30 |         med_config.encoder_width = vision_width
31 |         self.text_encoder = BertModel(config=med_config, add_pooling_layer=False)          
32 | 
33 |         text_width = self.text_encoder.config.hidden_size
34 |         
35 |         self.vision_proj = nn.Linear(vision_width, embed_dim)
36 |         self.text_proj = nn.Linear(text_width, embed_dim)
37 | 
38 |         self.itm_head = nn.Linear(text_width, 2) 
39 |         
40 |         
41 |     def forward(self, image, caption, match_head='itm'):
42 | 
43 |         image_embeds = self.visual_encoder(image) 
44 |         image_atts = torch.ones(image_embeds.size()[:-1],dtype=torch.long).to(image.device)        
45 |       
46 |         text = self.tokenizer(caption, padding='max_length', truncation=True, max_length=35, 
47 |                               return_tensors="pt").to(image.device) 
48 | 
49 |                  
50 |         if match_head=='itm':
51 |             output = self.text_encoder(text.input_ids,
52 |                                        attention_mask = text.attention_mask,
53 |                                        encoder_hidden_states = image_embeds,
54 |                                        encoder_attention_mask = image_atts,      
55 |                                        return_dict = True,
56 |                                       )
57 |             itm_output = self.itm_head(output.last_hidden_state[:,0,:])     
58 |             return itm_output
59 |             
60 |         elif match_head=='itc':
61 |             text_output = self.text_encoder(text.input_ids, attention_mask = text.attention_mask,                      
62 |                                             return_dict = True, mode = 'text')                     
63 |             image_feat = F.normalize(self.vision_proj(image_embeds[:,0,:]),dim=-1)   
64 |             text_feat = F.normalize(self.text_proj(text_output.last_hidden_state[:,0,:]),dim=-1)    
65 |             
66 |             sim = image_feat @ text_feat.t()
67 |             return sim
68 |         
69 |         
70 | def blip_itm(pretrained='',**kwargs):
71 |     model = BLIP_ITM(**kwargs)
72 |     if pretrained:
73 |         model,msg = load_checkpoint(model,pretrained)
74 |         assert(len(msg.missing_keys)==0)
75 |     return model         
76 |             


--------------------------------------------------------------------------------
/py/extras/BLIP/models/blip_nlvr.py:
--------------------------------------------------------------------------------
  1 | from extras.BLIP.models.med import BertConfig
  2 | from extras.BLIP.models.nlvr_encoder import BertModel
  3 | from extras.BLIP.models.vit import interpolate_pos_embed
  4 | from extras.BLIP.models.blip import create_vit, init_tokenizer, is_url
  5 | 
  6 | from timm.models.hub import download_cached_file
  7 | 
  8 | import torch
  9 | from torch import nn
 10 | import torch.nn.functional as F
 11 | from transformers import BertTokenizer
 12 | import numpy as np
 13 | import os
 14 | 
 15 | 
 16 | class BLIP_NLVR(nn.Module):
 17 |     def __init__(self,                 
 18 |                  med_config = 'configs/med_config.json',  
 19 |                  image_size = 480,
 20 |                  vit = 'base',
 21 |                  vit_grad_ckpt = False,
 22 |                  vit_ckpt_layer = 0,                   
 23 |                  ):
 24 |         """
 25 |         Args:
 26 |             med_config (str): path for the mixture of encoder-decoder model's configuration file
 27 |             image_size (int): input image size
 28 |             vit (str): model size of vision transformer
 29 |         """               
 30 |         super().__init__()
 31 |         
 32 |         self.visual_encoder, vision_width = create_vit(vit,image_size, vit_grad_ckpt, vit_ckpt_layer, drop_path_rate=0.1)
 33 |         self.tokenizer = init_tokenizer()   
 34 |         med_config = BertConfig.from_json_file(med_config)
 35 |         med_config.encoder_width = vision_width
 36 |         self.text_encoder = BertModel(config=med_config, add_pooling_layer=False) 
 37 |                     
 38 |         self.cls_head = nn.Sequential(
 39 |                   nn.Linear(self.text_encoder.config.hidden_size, self.text_encoder.config.hidden_size),
 40 |                   nn.ReLU(),
 41 |                   nn.Linear(self.text_encoder.config.hidden_size, 2)
 42 |                 )  
 43 | 
 44 |     def forward(self, image, text, targets, train=True):
 45 |         
 46 |         image_embeds = self.visual_encoder(image) 
 47 |         image_atts = torch.ones(image_embeds.size()[:-1],dtype=torch.long).to(image.device)        
 48 |         image0_embeds, image1_embeds = torch.split(image_embeds,targets.size(0))     
 49 | 
 50 |         text = self.tokenizer(text, padding='longest', return_tensors="pt").to(image.device) 
 51 |         text.input_ids[:,0] = self.tokenizer.enc_token_id        
 52 | 
 53 |         output = self.text_encoder(text.input_ids, 
 54 |                                    attention_mask = text.attention_mask, 
 55 |                                    encoder_hidden_states = [image0_embeds,image1_embeds],
 56 |                                    encoder_attention_mask = [image_atts[:image0_embeds.size(0)],
 57 |                                                              image_atts[image0_embeds.size(0):]],        
 58 |                                    return_dict = True,
 59 |                                   )  
 60 |         hidden_state = output.last_hidden_state[:,0,:]        
 61 |         prediction = self.cls_head(hidden_state)
 62 | 
 63 |         if train:            
 64 |             loss = F.cross_entropy(prediction, targets)   
 65 |             return loss
 66 |         else:
 67 |             return prediction
 68 |     
 69 | def blip_nlvr(pretrained='',**kwargs):
 70 |     model = BLIP_NLVR(**kwargs)
 71 |     if pretrained:
 72 |         model,msg = load_checkpoint(model,pretrained)
 73 |         print("missing keys:")
 74 |         print(msg.missing_keys)
 75 |     return model  
 76 | 
 77 |         
 78 | def load_checkpoint(model,url_or_filename):
 79 |     if is_url(url_or_filename):
 80 |         cached_file = download_cached_file(url_or_filename, check_hash=False, progress=True)
 81 |         checkpoint = torch.load(cached_file, map_location='cpu', weights_only=True) 
 82 |     elif os.path.isfile(url_or_filename):        
 83 |         checkpoint = torch.load(url_or_filename, map_location='cpu', weights_only=True) 
 84 |     else:
 85 |         raise RuntimeError('checkpoint url or path is invalid')
 86 |     state_dict = checkpoint['model']
 87 |     
 88 |     state_dict['visual_encoder.pos_embed'] = interpolate_pos_embed(state_dict['visual_encoder.pos_embed'],model.visual_encoder) 
 89 |     
 90 |     for key in list(state_dict.keys()):
 91 |         if 'crossattention.self.' in key:
 92 |             new_key0 = key.replace('self','self0')
 93 |             new_key1 = key.replace('self','self1')
 94 |             state_dict[new_key0] = state_dict[key]
 95 |             state_dict[new_key1] = state_dict[key]
 96 |         elif 'crossattention.output.dense.' in key:
 97 |             new_key0 = key.replace('dense','dense0')
 98 |             new_key1 = key.replace('dense','dense1')
 99 |             state_dict[new_key0] = state_dict[key]
100 |             state_dict[new_key1] = state_dict[key]  
101 |                 
102 |     msg = model.load_state_dict(state_dict,strict=False)
103 |     print('load checkpoint from %s'%url_or_filename)  
104 |     return model,msg
105 |             


--------------------------------------------------------------------------------
/py/extras/expansion.py:
--------------------------------------------------------------------------------
  1 | # Fooocus GPT2 Expansion
  2 | # Algorithm created by Lvmin Zhang at 2023, Stanford
  3 | # If used inside Fooocus, any use is permitted.
  4 | # If used outside Fooocus, only non-commercial use is permitted (CC-By NC 4.0).
  5 | # This applies to the word list, vocab, model, and algorithm.
  6 | 
  7 | 
  8 | import os
  9 | import torch
 10 | import math
 11 | import comfy.model_management as model_management
 12 | 
 13 | from transformers.generation.logits_process import LogitsProcessorList
 14 | from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
 15 | from modules.config import path_fooocus_expansion
 16 | from ldm_patched.modules.model_patcher import FooocusModelPatcher
 17 | 
 18 | 
 19 | # limitation of np.random.seed(), called from transformers.set_seed()
 20 | SEED_LIMIT_NUMPY = 2**32
 21 | neg_inf = - 8192.0
 22 | 
 23 | 
 24 | def safe_str(x):
 25 |     x = str(x)
 26 |     for _ in range(16):
 27 |         x = x.replace('  ', ' ')
 28 |     return x.strip(",. \r\n")
 29 | 
 30 | 
 31 | def remove_pattern(x, pattern):
 32 |     for p in pattern:
 33 |         x = x.replace(p, '')
 34 |     return x
 35 | 
 36 | 
 37 | class FooocusExpansion:
 38 |     def __init__(self):
 39 |         self.tokenizer = AutoTokenizer.from_pretrained(path_fooocus_expansion)
 40 | 
 41 |         positive_words = open(os.path.join(path_fooocus_expansion, 'positive.txt'),
 42 |                               encoding='utf-8').read().splitlines()
 43 |         positive_words = ['Ġ' + x.lower() for x in positive_words if x != '']
 44 | 
 45 |         self.logits_bias = torch.zeros((1, len(self.tokenizer.vocab)), dtype=torch.float32) + neg_inf
 46 | 
 47 |         debug_list = []
 48 |         for k, v in self.tokenizer.vocab.items():
 49 |             if k in positive_words:
 50 |                 self.logits_bias[0, v] = 0
 51 |                 debug_list.append(k[1:])
 52 | 
 53 |         print(f'Fooocus V2 Expansion: Vocab with {len(debug_list)} words.')
 54 | 
 55 |         # debug_list = '\n'.join(sorted(debug_list))
 56 |         # print(debug_list)
 57 | 
 58 |         # t11 = self.tokenizer(',', return_tensors="np")
 59 |         # t198 = self.tokenizer('\n', return_tensors="np")
 60 |         # eos = self.tokenizer.eos_token_id
 61 | 
 62 |         self.model = AutoModelForCausalLM.from_pretrained(path_fooocus_expansion)
 63 |         self.model.eval()
 64 | 
 65 |         load_device = model_management.text_encoder_device()
 66 |         offload_device = model_management.text_encoder_offload_device()
 67 | 
 68 |         # MPS hack
 69 |         if model_management.is_device_mps(load_device):
 70 |             load_device = torch.device('cpu')
 71 |             offload_device = torch.device('cpu')
 72 | 
 73 |         use_fp16 = model_management.should_use_fp16(device=load_device)
 74 | 
 75 |         if use_fp16:
 76 |             self.model.half()
 77 | 
 78 |         self.patcher = FooocusModelPatcher(self.model, load_device=load_device, offload_device=offload_device)
 79 |         print(f'Fooocus Expansion engine loaded for {load_device}, use_fp16 = {use_fp16}.')
 80 | 
 81 |     @torch.no_grad()
 82 |     @torch.inference_mode()
 83 |     def logits_processor(self, input_ids, scores):
 84 |         assert scores.ndim == 2 and scores.shape[0] == 1
 85 |         self.logits_bias = self.logits_bias.to(scores)
 86 | 
 87 |         bias = self.logits_bias.clone()
 88 |         bias[0, input_ids[0].to(bias.device).long()] = neg_inf
 89 |         bias[0, 11] = 0
 90 | 
 91 |         return scores + bias
 92 | 
 93 |     @torch.no_grad()
 94 |     @torch.inference_mode()
 95 |     def __call__(self, prompt, seed):
 96 |         if prompt == '':
 97 |             return ''
 98 | 
 99 |         if self.patcher.current_loaded_device() != self.patcher.load_device:
100 |             print('Fooocus Expansion loaded by itself.')
101 |             model_management.load_model_gpu(self.patcher)
102 | 
103 |         seed = int(seed) % SEED_LIMIT_NUMPY
104 |         set_seed(seed)
105 |         prompt = safe_str(prompt) + ','
106 | 
107 |         tokenized_kwargs = self.tokenizer(prompt, return_tensors="pt")
108 |         tokenized_kwargs.data['input_ids'] = tokenized_kwargs.data['input_ids'].to(self.patcher.load_device)
109 |         tokenized_kwargs.data['attention_mask'] = tokenized_kwargs.data['attention_mask'].to(self.patcher.load_device)
110 | 
111 |         current_token_length = int(tokenized_kwargs.data['input_ids'].shape[1])
112 |         max_token_length = 75 * int(math.ceil(float(current_token_length) / 75.0))
113 |         max_new_tokens = max_token_length - current_token_length
114 | 
115 |         # https://huggingface.co/blog/introducing-csearch
116 |         # https://huggingface.co/docs/transformers/generation_strategies
117 |         features = self.model.generate(**tokenized_kwargs,
118 |                                        top_k=100,
119 |                                        max_new_tokens=max_new_tokens,
120 |                                        do_sample=True,
121 |                                        logits_processor=LogitsProcessorList([self.logits_processor]))
122 | 
123 |         response = self.tokenizer.batch_decode(features, skip_special_tokens=True)
124 |         result = safe_str(response[0])
125 | 
126 |         return result
127 | 


--------------------------------------------------------------------------------
/py/extras/face_crop.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import modules.config
 4 | 
 5 | 
 6 | faceRestoreHelper = None
 7 | 
 8 | 
 9 | def align_warp_face(self, landmark, border_mode='constant'):
10 |     affine_matrix = cv2.estimateAffinePartial2D(landmark, self.face_template, method=cv2.LMEDS)[0]
11 |     self.affine_matrices.append(affine_matrix)
12 |     if border_mode == 'constant':
13 |         border_mode = cv2.BORDER_CONSTANT
14 |     elif border_mode == 'reflect101':
15 |         border_mode = cv2.BORDER_REFLECT101
16 |     elif border_mode == 'reflect':
17 |         border_mode = cv2.BORDER_REFLECT
18 |     input_img = self.input_img
19 |     cropped_face = cv2.warpAffine(input_img, affine_matrix, self.face_size,
20 |                                   borderMode=border_mode, borderValue=(135, 133, 132))
21 |     return cropped_face
22 | 
23 | 
24 | def crop_image(img_rgb):
25 |     global faceRestoreHelper
26 |     
27 |     if faceRestoreHelper is None:
28 |         from extras.facexlib.utils.face_restoration_helper import FaceRestoreHelper
29 |         faceRestoreHelper = FaceRestoreHelper(
30 |             upscale_factor=1,
31 |             model_rootpath=modules.config.path_controlnet,
32 |             device='cpu'  # use cpu is safer since we are out of memory management
33 |         )
34 | 
35 |     faceRestoreHelper.clean_all()
36 |     faceRestoreHelper.read_image(np.ascontiguousarray(img_rgb[:, :, ::-1].copy()))
37 |     faceRestoreHelper.get_face_landmarks_5()
38 | 
39 |     landmarks = faceRestoreHelper.all_landmarks_5
40 |     # landmarks are already sorted with confidence.
41 | 
42 |     if len(landmarks) == 0:
43 |         print('No face detected')
44 |         return img_rgb
45 |     else:
46 |         print(f'Detected {len(landmarks)} faces')
47 | 
48 |     result = align_warp_face(faceRestoreHelper, landmarks[0])
49 | 
50 |     return np.ascontiguousarray(result[:, :, ::-1].copy())
51 | 


--------------------------------------------------------------------------------
/py/extras/facexlib/detection/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from copy import deepcopy
 3 | 
 4 | from extras.facexlib.utils import load_file_from_url
 5 | from .retinaface import RetinaFace
 6 | 
 7 | 
 8 | def init_detection_model(model_name, half=False, device='cuda', model_rootpath=None):
 9 |     if model_name == 'retinaface_resnet50':
10 |         model = RetinaFace(network_name='resnet50', half=half, device=device)
11 |         model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth'
12 |     elif model_name == 'retinaface_mobile0.25':
13 |         model = RetinaFace(network_name='mobile0.25', half=half, device=device)
14 |         model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_mobilenet0.25_Final.pth'
15 |     else:
16 |         raise NotImplementedError(f'{model_name} is not implemented.')
17 | 
18 |     model_path = load_file_from_url(
19 |         url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath)
20 | 
21 |     # TODO: clean pretrained model
22 |     load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
23 |     # remove unnecessary 'module.'
24 |     for k, v in deepcopy(load_net).items():
25 |         if k.startswith('module.'):
26 |             load_net[k[7:]] = v
27 |             load_net.pop(k)
28 |     model.load_state_dict(load_net, strict=True)
29 |     model.eval()
30 |     model = model.to(device)
31 |     return model
32 | 


--------------------------------------------------------------------------------
/py/extras/facexlib/parsing/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from extras.facexlib.utils import load_file_from_url
 4 | from .bisenet import BiSeNet
 5 | from .parsenet import ParseNet
 6 | 
 7 | 
 8 | def init_parsing_model(model_name='bisenet', half=False, device='cuda', model_rootpath=None):
 9 |     if model_name == 'bisenet':
10 |         model = BiSeNet(num_class=19)
11 |         model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.2.0/parsing_bisenet.pth'
12 |     elif model_name == 'parsenet':
13 |         model = ParseNet(in_size=512, out_size=512, parsing_ch=19)
14 |         model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth'
15 |     else:
16 |         raise NotImplementedError(f'{model_name} is not implemented.')
17 | 
18 |     model_path = load_file_from_url(
19 |         url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath)
20 |     load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
21 |     model.load_state_dict(load_net, strict=True)
22 |     model.eval()
23 |     model = model.to(device)
24 |     return model
25 | 


--------------------------------------------------------------------------------
/py/extras/facexlib/parsing/resnet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def conv3x3(in_planes, out_planes, stride=1):
 6 |     """3x3 convolution with padding"""
 7 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
 8 | 
 9 | 
10 | class BasicBlock(nn.Module):
11 | 
12 |     def __init__(self, in_chan, out_chan, stride=1):
13 |         super(BasicBlock, self).__init__()
14 |         self.conv1 = conv3x3(in_chan, out_chan, stride)
15 |         self.bn1 = nn.BatchNorm2d(out_chan)
16 |         self.conv2 = conv3x3(out_chan, out_chan)
17 |         self.bn2 = nn.BatchNorm2d(out_chan)
18 |         self.relu = nn.ReLU(inplace=True)
19 |         self.downsample = None
20 |         if in_chan != out_chan or stride != 1:
21 |             self.downsample = nn.Sequential(
22 |                 nn.Conv2d(in_chan, out_chan, kernel_size=1, stride=stride, bias=False),
23 |                 nn.BatchNorm2d(out_chan),
24 |             )
25 | 
26 |     def forward(self, x):
27 |         residual = self.conv1(x)
28 |         residual = F.relu(self.bn1(residual))
29 |         residual = self.conv2(residual)
30 |         residual = self.bn2(residual)
31 | 
32 |         shortcut = x
33 |         if self.downsample is not None:
34 |             shortcut = self.downsample(x)
35 | 
36 |         out = shortcut + residual
37 |         out = self.relu(out)
38 |         return out
39 | 
40 | 
41 | def create_layer_basic(in_chan, out_chan, bnum, stride=1):
42 |     layers = [BasicBlock(in_chan, out_chan, stride=stride)]
43 |     for i in range(bnum - 1):
44 |         layers.append(BasicBlock(out_chan, out_chan, stride=1))
45 |     return nn.Sequential(*layers)
46 | 
47 | 
48 | class ResNet18(nn.Module):
49 | 
50 |     def __init__(self):
51 |         super(ResNet18, self).__init__()
52 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
53 |         self.bn1 = nn.BatchNorm2d(64)
54 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
55 |         self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
56 |         self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
57 |         self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
58 |         self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
59 | 
60 |     def forward(self, x):
61 |         x = self.conv1(x)
62 |         x = F.relu(self.bn1(x))
63 |         x = self.maxpool(x)
64 | 
65 |         x = self.layer1(x)
66 |         feat8 = self.layer2(x)  # 1/8
67 |         feat16 = self.layer3(feat8)  # 1/16
68 |         feat32 = self.layer4(feat16)  # 1/32
69 |         return feat8, feat16, feat32
70 | 


--------------------------------------------------------------------------------
/py/extras/facexlib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .face_utils import align_crop_face_landmarks, compute_increased_bbox, get_valid_bboxes, paste_face_back
2 | from .misc import img2tensor, load_file_from_url, scandir
3 | 
4 | __all__ = [
5 |     'align_crop_face_landmarks', 'compute_increased_bbox', 'get_valid_bboxes', 'load_file_from_url', 'paste_face_back',
6 |     'img2tensor', 'scandir'
7 | ]
8 | 


--------------------------------------------------------------------------------
/py/extras/facexlib/utils/misc.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import os
  3 | import os.path as osp
  4 | import torch
  5 | from torch.hub import download_url_to_file, get_dir
  6 | from urllib.parse import urlparse
  7 | 
  8 | ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  9 | 
 10 | 
 11 | def imwrite(img, file_path, params=None, auto_mkdir=True):
 12 |     """Write image to file.
 13 | 
 14 |     Args:
 15 |         img (ndarray): Image array to be written.
 16 |         file_path (str): Image file path.
 17 |         params (None or list): Same as opencv's :func:`imwrite` interface.
 18 |         auto_mkdir (bool): If the parent folder of `file_path` does not exist,
 19 |             whether to create it automatically.
 20 | 
 21 |     Returns:
 22 |         bool: Successful or not.
 23 |     """
 24 |     if auto_mkdir:
 25 |         dir_name = os.path.abspath(os.path.dirname(file_path))
 26 |         os.makedirs(dir_name, exist_ok=True)
 27 |     return cv2.imwrite(file_path, img, params)
 28 | 
 29 | 
 30 | def img2tensor(imgs, bgr2rgb=True, float32=True):
 31 |     """Numpy array to tensor.
 32 | 
 33 |     Args:
 34 |         imgs (list[ndarray] | ndarray): Input images.
 35 |         bgr2rgb (bool): Whether to change bgr to rgb.
 36 |         float32 (bool): Whether to change to float32.
 37 | 
 38 |     Returns:
 39 |         list[tensor] | tensor: Tensor images. If returned results only have
 40 |             one element, just return tensor.
 41 |     """
 42 | 
 43 |     def _totensor(img, bgr2rgb, float32):
 44 |         if img.shape[2] == 3 and bgr2rgb:
 45 |             if img.dtype == 'float64':
 46 |                 img = img.astype('float32')
 47 |             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 48 |         img = torch.from_numpy(img.transpose(2, 0, 1))
 49 |         if float32:
 50 |             img = img.float()
 51 |         return img
 52 | 
 53 |     if isinstance(imgs, list):
 54 |         return [_totensor(img, bgr2rgb, float32) for img in imgs]
 55 |     else:
 56 |         return _totensor(imgs, bgr2rgb, float32)
 57 | 
 58 | 
 59 | def load_file_from_url(url, model_dir=None, progress=True, file_name=None, save_dir=None):
 60 |     """Ref:https://github.com/1adrianb/face-alignment/blob/master/face_alignment/utils.py
 61 |     """
 62 |     if model_dir is None:
 63 |         hub_dir = get_dir()
 64 |         model_dir = os.path.join(hub_dir, 'checkpoints')
 65 | 
 66 |     if save_dir is None:
 67 |         save_dir = os.path.join(ROOT_DIR, model_dir)
 68 |     os.makedirs(save_dir, exist_ok=True)
 69 | 
 70 |     parts = urlparse(url)
 71 |     filename = os.path.basename(parts.path)
 72 |     if file_name is not None:
 73 |         filename = file_name
 74 |     cached_file = os.path.abspath(os.path.join(save_dir, filename))
 75 |     if not os.path.exists(cached_file):
 76 |         print(f'Downloading: "{url}" to {cached_file}\n')
 77 |         download_url_to_file(url, cached_file, hash_prefix=None, progress=progress)
 78 |     return cached_file
 79 | 
 80 | 
 81 | def scandir(dir_path, suffix=None, recursive=False, full_path=False):
 82 |     """Scan a directory to find the interested files.
 83 |     Args:
 84 |         dir_path (str): Path of the directory.
 85 |         suffix (str | tuple(str), optional): File suffix that we are
 86 |             interested in. Default: None.
 87 |         recursive (bool, optional): If set to True, recursively scan the
 88 |             directory. Default: False.
 89 |         full_path (bool, optional): If set to True, include the dir_path.
 90 |             Default: False.
 91 |     Returns:
 92 |         A generator for all the interested files with relative paths.
 93 |     """
 94 | 
 95 |     if (suffix is not None) and not isinstance(suffix, (str, tuple)):
 96 |         raise TypeError('"suffix" must be a string or tuple of strings')
 97 | 
 98 |     root = dir_path
 99 | 
100 |     def _scandir(dir_path, suffix, recursive):
101 |         for entry in os.scandir(dir_path):
102 |             if not entry.name.startswith('.') and entry.is_file():
103 |                 if full_path:
104 |                     return_path = entry.path
105 |                 else:
106 |                     return_path = osp.relpath(entry.path, root)
107 | 
108 |                 if suffix is None:
109 |                     yield return_path
110 |                 elif return_path.endswith(suffix):
111 |                     yield return_path
112 |             else:
113 |                 if recursive:
114 |                     yield from _scandir(entry.path, suffix=suffix, recursive=recursive)
115 |                 else:
116 |                     continue
117 | 
118 |     return _scandir(dir_path, suffix=suffix, recursive=recursive)
119 | 


--------------------------------------------------------------------------------
/py/extras/interrogate.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import comfy.model_management as model_management
 4 | 
 5 | from torchvision import transforms
 6 | from torchvision.transforms.functional import InterpolationMode
 7 | from modules.model_loader import load_file_from_url
 8 | from ldm_patched.modules.model_patcher import FooocusModelPatcher
 9 | from extras.BLIP.models.blip import blip_decoder
10 | 
11 | 
12 | blip_image_eval_size = 384
13 | blip_repo_root = os.path.join(os.path.dirname(__file__), 'BLIP')
14 | 
15 | 
16 | class Interrogator:
17 |     def __init__(self):
18 |         self.blip_model = None
19 |         self.load_device = torch.device('cpu')
20 |         self.offload_device = torch.device('cpu')
21 |         self.dtype = torch.float32
22 | 
23 |     @torch.no_grad()
24 |     @torch.inference_mode()
25 |     def interrogate(self, img_rgb):
26 |         if self.blip_model is None:
27 |             filename = load_file_from_url(
28 |                 url='https://huggingface.co/lllyasviel/misc/resolve/main/model_base_caption_capfilt_large.pth',
29 |                 model_dir="clip_vision",
30 |                 file_name='model_base_caption_capfilt_large.pth',
31 |             )
32 | 
33 |             model = blip_decoder(pretrained=filename, image_size=blip_image_eval_size, vit='base',
34 |                                  med_config=os.path.join(blip_repo_root, "configs", "med_config.json"))
35 |             model.eval()
36 | 
37 |             self.load_device = model_management.text_encoder_device()
38 |             self.offload_device = model_management.text_encoder_offload_device()
39 |             self.dtype = torch.float32
40 | 
41 |             model.to(self.offload_device)
42 | 
43 |             if model_management.should_use_fp16(device=self.load_device):
44 |                 model.half()
45 |                 self.dtype = torch.float16
46 | 
47 |             self.blip_model = FooocusModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device)
48 | 
49 |         model_management.load_model_gpu(self.blip_model)
50 | 
51 |         gpu_image = transforms.Compose([
52 |             transforms.ToTensor(),
53 |             transforms.Resize((blip_image_eval_size, blip_image_eval_size), interpolation=InterpolationMode.BICUBIC),
54 |             transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
55 |         ])(img_rgb).unsqueeze(0).to(device=self.load_device, dtype=self.dtype)
56 | 
57 |         caption = self.blip_model.model.generate(gpu_image, sample=True, num_beams=1, max_length=75)[0]
58 | 
59 |         return caption
60 | 
61 | 
62 | default_interrogator = Interrogator().interrogate
63 | 


--------------------------------------------------------------------------------
/py/extras/preprocessors.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import modules.advanced_parameters as advanced_parameters
 4 | 
 5 | 
 6 | def centered_canny(x: np.ndarray):
 7 |     assert isinstance(x, np.ndarray)
 8 |     assert x.ndim == 2 and x.dtype == np.uint8
 9 | 
10 |     y = cv2.Canny(x, int(64), int(128))
11 |     y = y.astype(np.float32) / 255.0
12 |     return y
13 | 
14 | 
15 | def centered_canny_color(x: np.ndarray):
16 |     assert isinstance(x, np.ndarray)
17 |     assert x.ndim == 3 and x.shape[2] == 3
18 | 
19 |     result = [centered_canny(x[..., i]) for i in range(3)]
20 |     result = np.stack(result, axis=2)
21 |     return result
22 | 
23 | 
24 | def pyramid_canny_color(x: np.ndarray):
25 |     assert isinstance(x, np.ndarray)
26 |     assert x.ndim == 3 and x.shape[2] == 3
27 | 
28 |     H, W, C = x.shape
29 |     acc_edge = None
30 | 
31 |     for k in [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
32 |         Hs, Ws = int(H * k), int(W * k)
33 |         small = cv2.resize(x, (Ws, Hs), interpolation=cv2.INTER_AREA)
34 |         edge = centered_canny_color(small)
35 |         if acc_edge is None:
36 |             acc_edge = edge
37 |         else:
38 |             acc_edge = cv2.resize(acc_edge, (edge.shape[1], edge.shape[0]), interpolation=cv2.INTER_LINEAR)
39 |             acc_edge = acc_edge * 0.75 + edge * 0.25
40 | 
41 |     return acc_edge
42 | 
43 | 
44 | def norm255(x, low=4, high=96):
45 |     assert isinstance(x, np.ndarray)
46 |     assert x.ndim == 2 and x.dtype == np.float32
47 | 
48 |     v_min = np.percentile(x, low)
49 |     v_max = np.percentile(x, high)
50 | 
51 |     x -= v_min
52 |     x /= v_max - v_min
53 | 
54 |     return x * 255.0
55 | 
56 | 
57 | def canny_pyramid(x):
58 |     # For some reasons, SAI's Control-lora Canny seems to be trained on canny maps with non-standard resolutions.
59 |     # Then we use pyramid to use all resolutions to avoid missing any structure in specific resolutions.
60 | 
61 |     color_canny = pyramid_canny_color(x)
62 |     result = np.sum(color_canny, axis=2)
63 | 
64 |     return norm255(result, low=1, high=99).clip(0, 255).astype(np.uint8)
65 | 
66 | 
67 | def cpds(x):
68 |     # cv2.decolor is not "decolor", it is Cewu Lu's method
69 |     # See http://www.cse.cuhk.edu.hk/leojia/projects/color2gray/index.html
70 |     # See https://docs.opencv.org/3.0-beta/modules/photo/doc/decolor.html
71 | 
72 |     raw = cv2.GaussianBlur(x, (0, 0), 0.8)
73 |     density, boost = cv2.decolor(raw)
74 | 
75 |     raw = raw.astype(np.float32)
76 |     density = density.astype(np.float32)
77 |     boost = boost.astype(np.float32)
78 | 
79 |     offset = np.sum((raw - boost) ** 2.0, axis=2) ** 0.5
80 |     result = density + offset
81 | 
82 |     return norm255(result, low=4, high=96).clip(0, 255).astype(np.uint8)
83 | 


--------------------------------------------------------------------------------
/py/extras/resampler.py:
--------------------------------------------------------------------------------
  1 | # modified from https://github.com/mlfoundations/open_flamingo/blob/main/open_flamingo/src/helpers.py
  2 | import math
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | 
  8 | # FFN
  9 | def FeedForward(dim, mult=4):
 10 |     inner_dim = int(dim * mult)
 11 |     return nn.Sequential(
 12 |         nn.LayerNorm(dim),
 13 |         nn.Linear(dim, inner_dim, bias=False),
 14 |         nn.GELU(),
 15 |         nn.Linear(inner_dim, dim, bias=False),
 16 |     )
 17 |     
 18 |     
 19 | def reshape_tensor(x, heads):
 20 |     bs, length, width = x.shape
 21 |     #(bs, length, width) --> (bs, length, n_heads, dim_per_head)
 22 |     x = x.view(bs, length, heads, -1)
 23 |     # (bs, length, n_heads, dim_per_head) --> (bs, n_heads, length, dim_per_head)
 24 |     x = x.transpose(1, 2)
 25 |     # (bs, n_heads, length, dim_per_head) --> (bs*n_heads, length, dim_per_head)
 26 |     x = x.reshape(bs, heads, length, -1)
 27 |     return x
 28 | 
 29 | 
 30 | class PerceiverAttention(nn.Module):
 31 |     def __init__(self, *, dim, dim_head=64, heads=8):
 32 |         super().__init__()
 33 |         self.scale = dim_head**-0.5
 34 |         self.dim_head = dim_head
 35 |         self.heads = heads
 36 |         inner_dim = dim_head * heads
 37 | 
 38 |         self.norm1 = nn.LayerNorm(dim)
 39 |         self.norm2 = nn.LayerNorm(dim)
 40 | 
 41 |         self.to_q = nn.Linear(dim, inner_dim, bias=False)
 42 |         self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False)
 43 |         self.to_out = nn.Linear(inner_dim, dim, bias=False)
 44 | 
 45 | 
 46 |     def forward(self, x, latents):
 47 |         """
 48 |         Args:
 49 |             x (torch.Tensor): image features
 50 |                 shape (b, n1, D)
 51 |             latent (torch.Tensor): latent features
 52 |                 shape (b, n2, D)
 53 |         """
 54 |         x = self.norm1(x)
 55 |         latents = self.norm2(latents)
 56 |         
 57 |         b, l, _ = latents.shape
 58 | 
 59 |         q = self.to_q(latents)
 60 |         kv_input = torch.cat((x, latents), dim=-2)
 61 |         k, v = self.to_kv(kv_input).chunk(2, dim=-1)
 62 |         
 63 |         q = reshape_tensor(q, self.heads)
 64 |         k = reshape_tensor(k, self.heads)
 65 |         v = reshape_tensor(v, self.heads)
 66 | 
 67 |         # attention
 68 |         scale = 1 / math.sqrt(math.sqrt(self.dim_head))
 69 |         weight = (q * scale) @ (k * scale).transpose(-2, -1) # More stable with f16 than dividing afterwards
 70 |         weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype)
 71 |         out = weight @ v
 72 |         
 73 |         out = out.permute(0, 2, 1, 3).reshape(b, l, -1)
 74 | 
 75 |         return self.to_out(out)
 76 | 
 77 | 
 78 | class Resampler(nn.Module):
 79 |     def __init__(
 80 |         self,
 81 |         dim=1024,
 82 |         depth=8,
 83 |         dim_head=64,
 84 |         heads=16,
 85 |         num_queries=8,
 86 |         embedding_dim=768,
 87 |         output_dim=1024,
 88 |         ff_mult=4,
 89 |     ):
 90 |         super().__init__()
 91 |         
 92 |         self.latents = nn.Parameter(torch.randn(1, num_queries, dim) / dim**0.5)
 93 |         
 94 |         self.proj_in = nn.Linear(embedding_dim, dim)
 95 | 
 96 |         self.proj_out = nn.Linear(dim, output_dim)
 97 |         self.norm_out = nn.LayerNorm(output_dim)
 98 |         
 99 |         self.layers = nn.ModuleList([])
100 |         for _ in range(depth):
101 |             self.layers.append(
102 |                 nn.ModuleList(
103 |                     [
104 |                         PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads),
105 |                         FeedForward(dim=dim, mult=ff_mult),
106 |                     ]
107 |                 )
108 |             )
109 | 
110 |     def forward(self, x):
111 |         latents = self.latents.repeat(x.size(0), 1, 1).to(x)
112 |         
113 |         x = self.proj_in(x)
114 |         
115 |         for attn, ff in self.layers:
116 |             latents = attn(x, latents) + latents
117 |             latents = ff(latents) + latents
118 |             
119 |         latents = self.proj_out(latents)
120 |         return self.norm_out(latents)
121 | 


--------------------------------------------------------------------------------
/py/extras/vae_interpose.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/city96/SD-Latent-Interposer/blob/main/interposer.py
 2 | 
 3 | import os
 4 | import torch
 5 | import safetensors.torch as sf
 6 | import torch.nn as nn
 7 | import comfy.model_management
 8 | 
 9 | from ldm_patched.modules.model_patcher import FooocusModelPatcher
10 | import folder_paths
11 | 
12 | class Block(nn.Module):
13 |     def __init__(self, size):
14 |         super().__init__()
15 |         self.join = nn.ReLU()
16 |         self.long = nn.Sequential(
17 |             nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1),
18 |             nn.LeakyReLU(0.1),
19 |             nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1),
20 |             nn.LeakyReLU(0.1),
21 |             nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1),
22 |         )
23 | 
24 |     def forward(self, x):
25 |         y = self.long(x)
26 |         z = self.join(y + x)
27 |         return z
28 | 
29 | 
30 | class Interposer(nn.Module):
31 |     def __init__(self):
32 |         super().__init__()
33 |         self.chan = 4
34 |         self.hid = 128
35 | 
36 |         self.head_join = nn.ReLU()
37 |         self.head_short = nn.Conv2d(self.chan, self.hid, kernel_size=3, stride=1, padding=1)
38 |         self.head_long = nn.Sequential(
39 |             nn.Conv2d(self.chan, self.hid, kernel_size=3, stride=1, padding=1),
40 |             nn.LeakyReLU(0.1),
41 |             nn.Conv2d(self.hid, self.hid, kernel_size=3, stride=1, padding=1),
42 |             nn.LeakyReLU(0.1),
43 |             nn.Conv2d(self.hid, self.hid, kernel_size=3, stride=1, padding=1),
44 |         )
45 |         self.core = nn.Sequential(
46 |             Block(self.hid),
47 |             Block(self.hid),
48 |             Block(self.hid),
49 |         )
50 |         self.tail = nn.Sequential(
51 |             nn.ReLU(),
52 |             nn.Conv2d(self.hid, self.chan, kernel_size=3, stride=1, padding=1)
53 |         )
54 | 
55 |     def forward(self, x):
56 |         y = self.head_join(
57 |             self.head_long(x) +
58 |             self.head_short(x)
59 |         )
60 |         z = self.core(y)
61 |         return self.tail(z)
62 | 
63 | 
64 | vae_approx_model = None
65 | vae_approx_filename = folder_paths.get_full_path("vae_approx", 'xl-to-v1_interposer-v3.1.safetensors')
66 | 
67 | 
68 | def parse(x):
69 |     global vae_approx_model
70 | 
71 |     x_origin = x.clone()
72 | 
73 |     if vae_approx_model is None:
74 |         model = Interposer()
75 |         model.eval()
76 |         sd = sf.load_file(vae_approx_filename)
77 |         model.load_state_dict(sd)
78 |         fp16 = comfy.model_management.should_use_fp16()
79 |         if fp16:
80 |             model = model.half()
81 |         vae_approx_model = FooocusModelPatcher(
82 |             model=model,
83 |             load_device=comfy.model_management.get_torch_device(),
84 |             offload_device=torch.device('cpu')
85 |         )
86 |         vae_approx_model.dtype = torch.float16 if fp16 else torch.float32
87 | 
88 |     comfy.model_management.load_model_gpu(vae_approx_model)
89 | 
90 |     x = x_origin.to(device=vae_approx_model.load_device, dtype=vae_approx_model.dtype)
91 |     x = vae_approx_model.model(x).to(x_origin)
92 |     return x
93 | 


--------------------------------------------------------------------------------
/py/extras/wd14tagger.py:
--------------------------------------------------------------------------------
 1 | # https://huggingface.co/spaces/SmilingWolf/wd-v1-4-tags
 2 | # https://github.com/pythongosssss/ComfyUI-WD14-Tagger/blob/main/wd14tagger.py
 3 | 
 4 | # {
 5 | #     "wd-v1-4-moat-tagger-v2": "https://huggingface.co/SmilingWolf/wd-v1-4-moat-tagger-v2",
 6 | #     "wd-v1-4-convnextv2-tagger-v2": "https://huggingface.co/SmilingWolf/wd-v1-4-convnextv2-tagger-v2",
 7 | #     "wd-v1-4-convnext-tagger-v2": "https://huggingface.co/SmilingWolf/wd-v1-4-convnext-tagger-v2",
 8 | #     "wd-v1-4-convnext-tagger": "https://huggingface.co/SmilingWolf/wd-v1-4-convnext-tagger",
 9 | #     "wd-v1-4-vit-tagger-v2": "https://huggingface.co/SmilingWolf/wd-v1-4-vit-tagger-v2"
10 | # }
11 | 
12 | 
13 | import numpy as np
14 | import csv
15 | import onnxruntime as ort
16 | 
17 | from PIL import Image
18 | from onnxruntime import InferenceSession
19 | from modules.model_loader import load_file_from_url
20 | 
21 | 
22 | global_model = None
23 | global_csv = None
24 | 
25 | 
26 | def default_interrogator(image_rgb, threshold=0.35, character_threshold=0.85, exclude_tags=""):
27 |     global global_model, global_csv
28 | 
29 |     model_name = "wd-v1-4-moat-tagger-v2"
30 | 
31 |     model_onnx_filename = load_file_from_url(
32 |         url=f'https://huggingface.co/lllyasviel/misc/resolve/main/{model_name}.onnx',
33 |         model_dir="clip_vision",
34 |         file_name=f'{model_name}.onnx',
35 |     )
36 | 
37 |     model_csv_filename = load_file_from_url(
38 |         url=f'https://huggingface.co/lllyasviel/misc/resolve/main/{model_name}.csv',
39 |         model_dir="clip_vision",
40 |         file_name=f'{model_name}.csv',
41 |     )
42 | 
43 |     if global_model is not None:
44 |         model = global_model
45 |     else:
46 |         model = InferenceSession(model_onnx_filename, providers=ort.get_available_providers())
47 |         global_model = model
48 | 
49 |     input = model.get_inputs()[0]
50 |     height = input.shape[1]
51 | 
52 |     image = Image.fromarray(image_rgb)  # RGB
53 |     ratio = float(height)/max(image.size)
54 |     new_size = tuple([int(x*ratio) for x in image.size])
55 |     image = image.resize(new_size, Image.LANCZOS)
56 |     square = Image.new("RGB", (height, height), (255, 255, 255))
57 |     square.paste(image, ((height-new_size[0])//2, (height-new_size[1])//2))
58 | 
59 |     image = np.array(square).astype(np.float32)
60 |     image = image[:, :, ::-1]  # RGB -> BGR
61 |     image = np.expand_dims(image, 0)
62 | 
63 |     if global_csv is not None:
64 |         csv_lines = global_csv
65 |     else:
66 |         csv_lines = []
67 |         with open(model_csv_filename) as f:
68 |             reader = csv.reader(f)
69 |             next(reader)
70 |             for row in reader:
71 |                 csv_lines.append(row)
72 |         global_csv = csv_lines
73 | 
74 |     tags = []
75 |     general_index = None
76 |     character_index = None
77 |     for line_num, row in enumerate(csv_lines):
78 |         if general_index is None and row[2] == "0":
79 |             general_index = line_num
80 |         elif character_index is None and row[2] == "4":
81 |             character_index = line_num
82 |         tags.append(row[1])
83 | 
84 |     label_name = model.get_outputs()[0].name
85 |     probs = model.run([label_name], {input.name: image})[0]
86 | 
87 |     result = list(zip(tags, probs[0]))
88 | 
89 |     general = [item for item in result[general_index:character_index] if item[1] > threshold]
90 |     character = [item for item in result[character_index:] if item[1] > character_threshold]
91 | 
92 |     all = character + general
93 |     remove = [s.strip() for s in exclude_tags.lower().split(",")]
94 |     all = [tag for tag in all if tag[0] not in remove]
95 | 
96 |     res = ", ".join((item[0].replace("(", "\\(").replace(")", "\\)") for item in all)).replace('_', ' ')
97 |     return res
98 | 


--------------------------------------------------------------------------------
/py/ldm_patched/contrib/external_clip_sdxl.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
 2 | 
 3 | import torch
 4 | from ldm_patched.contrib.external import MAX_RESOLUTION
 5 | 
 6 | class CLIPTextEncodeSDXLRefiner:
 7 |     @classmethod
 8 |     def INPUT_TYPES(s):
 9 |         return {"required": {
10 |             "ascore": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 1000.0, "step": 0.01}),
11 |             "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
12 |             "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
13 |             "text": ("STRING", {"multiline": True}), "clip": ("CLIP", ),
14 |             }}
15 |     RETURN_TYPES = ("CONDITIONING",)
16 |     FUNCTION = "encode"
17 | 
18 |     CATEGORY = "advanced/conditioning"
19 | 
20 |     def encode(self, clip, ascore, width, height, text):
21 |         tokens = clip.tokenize(text)
22 |         cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True)
23 |         return ([[cond, {"pooled_output": pooled, "aesthetic_score": ascore, "width": width,"height": height}]], )
24 | 
25 | class CLIPTextEncodeSDXL:
26 |     @classmethod
27 |     def INPUT_TYPES(s):
28 |         return {"required": {
29 |             "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
30 |             "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
31 |             "crop_w": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}),
32 |             "crop_h": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}),
33 |             "target_width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
34 |             "target_height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}),
35 |             "text_g": ("STRING", {"multiline": True, "default": "CLIP_G"}), "clip": ("CLIP", ),
36 |             "text_l": ("STRING", {"multiline": True, "default": "CLIP_L"}), "clip": ("CLIP", ),
37 |             }}
38 |     RETURN_TYPES = ("CONDITIONING",)
39 |     FUNCTION = "encode"
40 | 
41 |     CATEGORY = "advanced/conditioning"
42 | 
43 |     def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l):
44 |         tokens = clip.tokenize(text_g)
45 |         tokens["l"] = clip.tokenize(text_l)["l"]
46 |         if len(tokens["l"]) != len(tokens["g"]):
47 |             empty = clip.tokenize("")
48 |             while len(tokens["l"]) < len(tokens["g"]):
49 |                 tokens["l"] += empty["l"]
50 |             while len(tokens["l"]) > len(tokens["g"]):
51 |                 tokens["g"] += empty["g"]
52 |         cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True)
53 |         return ([[cond, {"pooled_output": pooled, "width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}]], )
54 | 
55 | NODE_CLASS_MAPPINGS = {
56 |     "CLIPTextEncodeSDXLRefiner": CLIPTextEncodeSDXLRefiner,
57 |     "CLIPTextEncodeSDXL": CLIPTextEncodeSDXL,
58 | }
59 | 


--------------------------------------------------------------------------------
/py/ldm_patched/contrib/external_freelunch.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
  2 | 
  3 | #code originally taken from: https://github.com/ChenyangSi/FreeU (under MIT License)
  4 | 
  5 | import torch
  6 | 
  7 | 
  8 | def Fourier_filter(x, threshold, scale):
  9 |     # FFT
 10 |     x_freq = torch.fft.fftn(x.float(), dim=(-2, -1))
 11 |     x_freq = torch.fft.fftshift(x_freq, dim=(-2, -1))
 12 | 
 13 |     B, C, H, W = x_freq.shape
 14 |     mask = torch.ones((B, C, H, W), device=x.device)
 15 | 
 16 |     crow, ccol = H // 2, W //2
 17 |     mask[..., crow - threshold:crow + threshold, ccol - threshold:ccol + threshold] = scale
 18 |     x_freq = x_freq * mask
 19 | 
 20 |     # IFFT
 21 |     x_freq = torch.fft.ifftshift(x_freq, dim=(-2, -1))
 22 |     x_filtered = torch.fft.ifftn(x_freq, dim=(-2, -1)).real
 23 | 
 24 |     return x_filtered.to(x.dtype)
 25 | 
 26 | 
 27 | class FreeU:
 28 |     @classmethod
 29 |     def INPUT_TYPES(s):
 30 |         return {"required": { "model": ("MODEL",),
 31 |                              "b1": ("FLOAT", {"default": 1.1, "min": 0.0, "max": 10.0, "step": 0.01}),
 32 |                              "b2": ("FLOAT", {"default": 1.2, "min": 0.0, "max": 10.0, "step": 0.01}),
 33 |                              "s1": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 10.0, "step": 0.01}),
 34 |                              "s2": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.01}),
 35 |                               }}
 36 |     RETURN_TYPES = ("MODEL",)
 37 |     FUNCTION = "patch"
 38 | 
 39 |     CATEGORY = "model_patches"
 40 | 
 41 |     def patch(self, model, b1, b2, s1, s2):
 42 |         model_channels = model.model.model_config.unet_config["model_channels"]
 43 |         scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)}
 44 |         on_cpu_devices = {}
 45 | 
 46 |         def output_block_patch(h, hsp, transformer_options):
 47 |             scale = scale_dict.get(h.shape[1], None)
 48 |             if scale is not None:
 49 |                 h[:,:h.shape[1] // 2] = h[:,:h.shape[1] // 2] * scale[0]
 50 |                 if hsp.device not in on_cpu_devices:
 51 |                     try:
 52 |                         hsp = Fourier_filter(hsp, threshold=1, scale=scale[1])
 53 |                     except:
 54 |                         print("Device", hsp.device, "does not support the torch.fft functions used in the FreeU node, switching to CPU.")
 55 |                         on_cpu_devices[hsp.device] = True
 56 |                         hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device)
 57 |                 else:
 58 |                     hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device)
 59 | 
 60 |             return h, hsp
 61 | 
 62 |         m = model.clone()
 63 |         m.set_model_output_block_patch(output_block_patch)
 64 |         return (m, )
 65 | 
 66 | class FreeU_V2:
 67 |     @classmethod
 68 |     def INPUT_TYPES(s):
 69 |         return {"required": { "model": ("MODEL",),
 70 |                              "b1": ("FLOAT", {"default": 1.3, "min": 0.0, "max": 10.0, "step": 0.01}),
 71 |                              "b2": ("FLOAT", {"default": 1.4, "min": 0.0, "max": 10.0, "step": 0.01}),
 72 |                              "s1": ("FLOAT", {"default": 0.9, "min": 0.0, "max": 10.0, "step": 0.01}),
 73 |                              "s2": ("FLOAT", {"default": 0.2, "min": 0.0, "max": 10.0, "step": 0.01}),
 74 |                               }}
 75 |     RETURN_TYPES = ("MODEL",)
 76 |     FUNCTION = "patch"
 77 | 
 78 |     CATEGORY = "model_patches"
 79 | 
 80 |     def patch(self, model, b1, b2, s1, s2):
 81 |         model_channels = model.model.model_config.unet_config["model_channels"]
 82 |         scale_dict = {model_channels * 4: (b1, s1), model_channels * 2: (b2, s2)}
 83 |         on_cpu_devices = {}
 84 | 
 85 |         def output_block_patch(h, hsp, transformer_options):
 86 |             scale = scale_dict.get(h.shape[1], None)
 87 |             if scale is not None:
 88 |                 hidden_mean = h.mean(1).unsqueeze(1)
 89 |                 B = hidden_mean.shape[0]
 90 |                 hidden_max, _ = torch.max(hidden_mean.view(B, -1), dim=-1, keepdim=True)
 91 |                 hidden_min, _ = torch.min(hidden_mean.view(B, -1), dim=-1, keepdim=True)
 92 |                 hidden_mean = (hidden_mean - hidden_min.unsqueeze(2).unsqueeze(3)) / (hidden_max - hidden_min).unsqueeze(2).unsqueeze(3)
 93 | 
 94 |                 h[:,:h.shape[1] // 2] = h[:,:h.shape[1] // 2] * ((scale[0] - 1 ) * hidden_mean + 1)
 95 | 
 96 |                 if hsp.device not in on_cpu_devices:
 97 |                     try:
 98 |                         hsp = Fourier_filter(hsp, threshold=1, scale=scale[1])
 99 |                     except:
100 |                         print("Device", hsp.device, "does not support the torch.fft functions used in the FreeU node, switching to CPU.")
101 |                         on_cpu_devices[hsp.device] = True
102 |                         hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device)
103 |                 else:
104 |                     hsp = Fourier_filter(hsp.cpu(), threshold=1, scale=scale[1]).to(hsp.device)
105 | 
106 |             return h, hsp
107 | 
108 |         m = model.clone()
109 |         m.set_model_output_block_patch(output_block_patch)
110 |         return (m, )
111 | 
112 | NODE_CLASS_MAPPINGS = {
113 |     "FreeU": FreeU,
114 |     "FreeU_V2": FreeU_V2,
115 | }
116 | 


--------------------------------------------------------------------------------
/py/ldm_patched/contrib/external_hypernetwork.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
  2 | 
  3 | import ldm_patched.modules.utils
  4 | import ldm_patched.utils.path_utils
  5 | import torch
  6 | 
  7 | def load_hypernetwork_patch(path, strength):
  8 |     sd = ldm_patched.modules.utils.load_torch_file(path, safe_load=True)
  9 |     activation_func = sd.get('activation_func', 'linear')
 10 |     is_layer_norm = sd.get('is_layer_norm', False)
 11 |     use_dropout = sd.get('use_dropout', False)
 12 |     activate_output = sd.get('activate_output', False)
 13 |     last_layer_dropout = sd.get('last_layer_dropout', False)
 14 | 
 15 |     valid_activation = {
 16 |         "linear": torch.nn.Identity,
 17 |         "relu": torch.nn.ReLU,
 18 |         "leakyrelu": torch.nn.LeakyReLU,
 19 |         "elu": torch.nn.ELU,
 20 |         "swish": torch.nn.Hardswish,
 21 |         "tanh": torch.nn.Tanh,
 22 |         "sigmoid": torch.nn.Sigmoid,
 23 |         "softsign": torch.nn.Softsign,
 24 |         "mish": torch.nn.Mish,
 25 |     }
 26 | 
 27 |     if activation_func not in valid_activation:
 28 |         print("Unsupported Hypernetwork format, if you report it I might implement it.", path, " ", activation_func, is_layer_norm, use_dropout, activate_output, last_layer_dropout)
 29 |         return None
 30 | 
 31 |     out = {}
 32 | 
 33 |     for d in sd:
 34 |         try:
 35 |             dim = int(d)
 36 |         except:
 37 |             continue
 38 | 
 39 |         output = []
 40 |         for index in [0, 1]:
 41 |             attn_weights = sd[dim][index]
 42 |             keys = attn_weights.keys()
 43 | 
 44 |             linears = filter(lambda a: a.endswith(".weight"), keys)
 45 |             linears = list(map(lambda a: a[:-len(".weight")], linears))
 46 |             layers = []
 47 | 
 48 |             i = 0
 49 |             while i < len(linears):
 50 |                 lin_name = linears[i]
 51 |                 last_layer = (i == (len(linears) - 1))
 52 |                 penultimate_layer = (i == (len(linears) - 2))
 53 | 
 54 |                 lin_weight = attn_weights['{}.weight'.format(lin_name)]
 55 |                 lin_bias = attn_weights['{}.bias'.format(lin_name)]
 56 |                 layer = torch.nn.Linear(lin_weight.shape[1], lin_weight.shape[0])
 57 |                 layer.load_state_dict({"weight": lin_weight, "bias": lin_bias})
 58 |                 layers.append(layer)
 59 |                 if activation_func != "linear":
 60 |                     if (not last_layer) or (activate_output):
 61 |                         layers.append(valid_activation[activation_func]())
 62 |                 if is_layer_norm:
 63 |                     i += 1
 64 |                     ln_name = linears[i]
 65 |                     ln_weight = attn_weights['{}.weight'.format(ln_name)]
 66 |                     ln_bias = attn_weights['{}.bias'.format(ln_name)]
 67 |                     ln = torch.nn.LayerNorm(ln_weight.shape[0])
 68 |                     ln.load_state_dict({"weight": ln_weight, "bias": ln_bias})
 69 |                     layers.append(ln)
 70 |                 if use_dropout:
 71 |                     if (not last_layer) and (not penultimate_layer or last_layer_dropout):
 72 |                         layers.append(torch.nn.Dropout(p=0.3))
 73 |                 i += 1
 74 | 
 75 |             output.append(torch.nn.Sequential(*layers))
 76 |         out[dim] = torch.nn.ModuleList(output)
 77 | 
 78 |     class hypernetwork_patch:
 79 |         def __init__(self, hypernet, strength):
 80 |             self.hypernet = hypernet
 81 |             self.strength = strength
 82 |         def __call__(self, q, k, v, extra_options):
 83 |             dim = k.shape[-1]
 84 |             if dim in self.hypernet:
 85 |                 hn = self.hypernet[dim]
 86 |                 k = k + hn[0](k) * self.strength
 87 |                 v = v + hn[1](v) * self.strength
 88 | 
 89 |             return q, k, v
 90 | 
 91 |         def to(self, device):
 92 |             for d in self.hypernet.keys():
 93 |                 self.hypernet[d] = self.hypernet[d].to(device)
 94 |             return self
 95 | 
 96 |     return hypernetwork_patch(out, strength)
 97 | 
 98 | class HypernetworkLoader:
 99 |     @classmethod
100 |     def INPUT_TYPES(s):
101 |         return {"required": { "model": ("MODEL",),
102 |                               "hypernetwork_name": (ldm_patched.utils.path_utils.get_filename_list("hypernetworks"), ),
103 |                               "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
104 |                               }}
105 |     RETURN_TYPES = ("MODEL",)
106 |     FUNCTION = "load_hypernetwork"
107 | 
108 |     CATEGORY = "loaders"
109 | 
110 |     def load_hypernetwork(self, model, hypernetwork_name, strength):
111 |         hypernetwork_path = ldm_patched.utils.path_utils.get_full_path("hypernetworks", hypernetwork_name)
112 |         model_hypernetwork = model.clone()
113 |         patch = load_hypernetwork_patch(hypernetwork_path, strength)
114 |         if patch is not None:
115 |             model_hypernetwork.set_model_attn1_patch(patch)
116 |             model_hypernetwork.set_model_attn2_patch(patch)
117 |         return (model_hypernetwork,)
118 | 
119 | NODE_CLASS_MAPPINGS = {
120 |     "HypernetworkLoader": HypernetworkLoader
121 | }
122 | 


--------------------------------------------------------------------------------
/py/ldm_patched/contrib/external_hypertile.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
 2 | 
 3 | #Taken from: https://github.com/tfernd/HyperTile/
 4 | 
 5 | import math
 6 | from einops import rearrange
 7 | # Use torch rng for consistency across generations
 8 | from torch import randint
 9 | 
10 | def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int:
11 |     min_value = min(min_value, value)
12 | 
13 |     # All big divisors of value (inclusive)
14 |     divisors = [i for i in range(min_value, value + 1) if value % i == 0]
15 | 
16 |     ns = [value // i for i in divisors[:max_options]]  # has at least 1 element
17 | 
18 |     if len(ns) - 1 > 0:
19 |         idx = randint(low=0, high=len(ns) - 1, size=(1,)).item()
20 |     else:
21 |         idx = 0
22 | 
23 |     return ns[idx]
24 | 
25 | class HyperTile:
26 |     @classmethod
27 |     def INPUT_TYPES(s):
28 |         return {"required": { "model": ("MODEL",),
29 |                              "tile_size": ("INT", {"default": 256, "min": 1, "max": 2048}),
30 |                              "swap_size": ("INT", {"default": 2, "min": 1, "max": 128}),
31 |                              "max_depth": ("INT", {"default": 0, "min": 0, "max": 10}),
32 |                              "scale_depth": ("BOOLEAN", {"default": False}),
33 |                               }}
34 |     RETURN_TYPES = ("MODEL",)
35 |     FUNCTION = "patch"
36 | 
37 |     CATEGORY = "model_patches"
38 | 
39 |     def patch(self, model, tile_size, swap_size, max_depth, scale_depth):
40 |         model_channels = model.model.model_config.unet_config["model_channels"]
41 | 
42 |         latent_tile_size = max(32, tile_size) // 8
43 |         self.temp = None
44 | 
45 |         def hypertile_in(q, k, v, extra_options):
46 |             model_chans = q.shape[-2]
47 |             orig_shape = extra_options['original_shape']
48 |             apply_to = []
49 |             for i in range(max_depth + 1):
50 |                 apply_to.append((orig_shape[-2] / (2 ** i)) * (orig_shape[-1] / (2 ** i)))
51 | 
52 |             if model_chans in apply_to:
53 |                 shape = extra_options["original_shape"]
54 |                 aspect_ratio = shape[-1] / shape[-2]
55 | 
56 |                 hw = q.size(1)
57 |                 h, w = round(math.sqrt(hw * aspect_ratio)), round(math.sqrt(hw / aspect_ratio))
58 | 
59 |                 factor = (2 ** apply_to.index(model_chans)) if scale_depth else 1
60 |                 nh = random_divisor(h, latent_tile_size * factor, swap_size)
61 |                 nw = random_divisor(w, latent_tile_size * factor, swap_size)
62 | 
63 |                 if nh * nw > 1:
64 |                     q = rearrange(q, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw)
65 |                     self.temp = (nh, nw, h, w)
66 |                 return q, k, v
67 | 
68 |             return q, k, v
69 |         def hypertile_out(out, extra_options):
70 |             if self.temp is not None:
71 |                 nh, nw, h, w = self.temp
72 |                 self.temp = None
73 |                 out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw)
74 |                 out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw)
75 |             return out
76 | 
77 | 
78 |         m = model.clone()
79 |         m.set_model_attn1_patch(hypertile_in)
80 |         m.set_model_attn1_output_patch(hypertile_out)
81 |         return (m, )
82 | 
83 | NODE_CLASS_MAPPINGS = {
84 |     "HyperTile": HyperTile,
85 | }
86 | 


--------------------------------------------------------------------------------
/py/ldm_patched/contrib/external_model_downscale.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
 2 | 
 3 | import torch
 4 | import ldm_patched.modules.utils
 5 | 
 6 | class PatchModelAddDownscale:
 7 |     upscale_methods = ["bicubic", "nearest-exact", "bilinear", "area", "bislerp"]
 8 |     @classmethod
 9 |     def INPUT_TYPES(s):
10 |         return {"required": { "model": ("MODEL",),
11 |                               "block_number": ("INT", {"default": 3, "min": 1, "max": 32, "step": 1}),
12 |                               "downscale_factor": ("FLOAT", {"default": 2.0, "min": 0.1, "max": 9.0, "step": 0.001}),
13 |                               "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
14 |                               "end_percent": ("FLOAT", {"default": 0.35, "min": 0.0, "max": 1.0, "step": 0.001}),
15 |                               "downscale_after_skip": ("BOOLEAN", {"default": True}),
16 |                               "downscale_method": (s.upscale_methods,),
17 |                               "upscale_method": (s.upscale_methods,),
18 |                               }}
19 |     RETURN_TYPES = ("MODEL",)
20 |     FUNCTION = "patch"
21 | 
22 |     CATEGORY = "_for_testing"
23 | 
24 |     def patch(self, model, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip, downscale_method, upscale_method):
25 |         sigma_start = model.model.model_sampling.percent_to_sigma(start_percent)
26 |         sigma_end = model.model.model_sampling.percent_to_sigma(end_percent)
27 | 
28 |         def input_block_patch(h, transformer_options):
29 |             if transformer_options["block"][1] == block_number:
30 |                 sigma = transformer_options["sigmas"][0].item()
31 |                 if sigma <= sigma_start and sigma >= sigma_end:
32 |                     h = ldm_patched.modules.utils.common_upscale(h, round(h.shape[-1] * (1.0 / downscale_factor)), round(h.shape[-2] * (1.0 / downscale_factor)), downscale_method, "disabled")
33 |             return h
34 | 
35 |         def output_block_patch(h, hsp, transformer_options):
36 |             if h.shape[2] != hsp.shape[2]:
37 |                 h = ldm_patched.modules.utils.common_upscale(h, hsp.shape[-1], hsp.shape[-2], upscale_method, "disabled")
38 |             return h, hsp
39 | 
40 |         m = model.clone()
41 |         if downscale_after_skip:
42 |             m.set_model_input_block_patch_after_skip(input_block_patch)
43 |         else:
44 |             m.set_model_input_block_patch(input_block_patch)
45 |         m.set_model_output_block_patch(output_block_patch)
46 |         return (m, )
47 | 
48 | NODE_CLASS_MAPPINGS = {
49 |     "PatchModelAddDownscale": PatchModelAddDownscale,
50 | }
51 | 
52 | NODE_DISPLAY_NAME_MAPPINGS = {
53 |     # Sampling
54 |     "PatchModelAddDownscale": "PatchModelAddDownscale (Kohya Deep Shrink)",
55 | }
56 | 


--------------------------------------------------------------------------------
/py/ldm_patched/contrib/external_perpneg.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
 2 | 
 3 | import torch
 4 | import comfy.model_management
 5 | import ldm_patched.modules.sample
 6 | import ldm_patched.modules.samplers
 7 | import ldm_patched.modules.utils
 8 | 
 9 | 
10 | class PerpNeg:
11 |     @classmethod
12 |     def INPUT_TYPES(s):
13 |         return {"required": {"model": ("MODEL", ),
14 |                              "empty_conditioning": ("CONDITIONING", ),
15 |                              "neg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0}),
16 |                             }}
17 |     RETURN_TYPES = ("MODEL",)
18 |     FUNCTION = "patch"
19 | 
20 |     CATEGORY = "_for_testing"
21 | 
22 |     def patch(self, model, empty_conditioning, neg_scale):
23 |         m = model.clone()
24 |         nocond = ldm_patched.modules.sample.convert_cond(empty_conditioning)
25 | 
26 |         def cfg_function(args):
27 |             model = args["model"]
28 |             noise_pred_pos = args["cond_denoised"]
29 |             noise_pred_neg = args["uncond_denoised"]
30 |             cond_scale = args["cond_scale"]
31 |             x = args["input"]
32 |             sigma = args["sigma"]
33 |             model_options = args["model_options"]
34 |             nocond_processed = ldm_patched.modules.samplers.encode_model_conds(model.extra_conds, nocond, x, x.device, "negative")
35 | 
36 |             (noise_pred_nocond, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, nocond_processed, None, x, sigma, model_options)
37 | 
38 |             pos = noise_pred_pos - noise_pred_nocond
39 |             neg = noise_pred_neg - noise_pred_nocond
40 |             perp = ((torch.mul(pos, neg).sum())/(torch.norm(neg)**2)) * neg
41 |             perp_neg = perp * neg_scale
42 |             cfg_result = noise_pred_nocond + cond_scale*(pos - perp_neg)
43 |             cfg_result = x - cfg_result
44 |             return cfg_result
45 | 
46 |         m.set_model_sampler_cfg_function(cfg_function)
47 | 
48 |         return (m, )
49 | 
50 | 
51 | NODE_CLASS_MAPPINGS = {
52 |     "PerpNeg": PerpNeg,
53 | }
54 | 
55 | NODE_DISPLAY_NAME_MAPPINGS = {
56 |     "PerpNeg": "Perp-Neg",
57 | }
58 | 


--------------------------------------------------------------------------------
/py/ldm_patched/contrib/external_sdupscale.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
 2 | 
 3 | import torch
 4 | import ldm_patched.contrib.external
 5 | import ldm_patched.modules.utils
 6 | 
 7 | class SD_4XUpscale_Conditioning:
 8 |     @classmethod
 9 |     def INPUT_TYPES(s):
10 |         return {"required": { "images": ("IMAGE",),
11 |                               "positive": ("CONDITIONING",),
12 |                               "negative": ("CONDITIONING",),
13 |                               "scale_ratio": ("FLOAT", {"default": 4.0, "min": 0.0, "max": 10.0, "step": 0.01}),
14 |                               "noise_augmentation": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
15 |                              }}
16 |     RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
17 |     RETURN_NAMES = ("positive", "negative", "latent")
18 | 
19 |     FUNCTION = "encode"
20 | 
21 |     CATEGORY = "conditioning/upscale_diffusion"
22 | 
23 |     def encode(self, images, positive, negative, scale_ratio, noise_augmentation):
24 |         width = max(1, round(images.shape[-2] * scale_ratio))
25 |         height = max(1, round(images.shape[-3] * scale_ratio))
26 | 
27 |         pixels = ldm_patched.modules.utils.common_upscale((images.movedim(-1,1) * 2.0) - 1.0, width // 4, height // 4, "bilinear", "center")
28 | 
29 |         out_cp = []
30 |         out_cn = []
31 | 
32 |         for t in positive:
33 |             n = [t[0], t[1].copy()]
34 |             n[1]['concat_image'] = pixels
35 |             n[1]['noise_augmentation'] = noise_augmentation
36 |             out_cp.append(n)
37 | 
38 |         for t in negative:
39 |             n = [t[0], t[1].copy()]
40 |             n[1]['concat_image'] = pixels
41 |             n[1]['noise_augmentation'] = noise_augmentation
42 |             out_cn.append(n)
43 | 
44 |         latent = torch.zeros([images.shape[0], 4, height // 4, width // 4])
45 |         return (out_cp, out_cn, {"samples":latent})
46 | 
47 | NODE_CLASS_MAPPINGS = {
48 |     "SD_4XUpscale_Conditioning": SD_4XUpscale_Conditioning,
49 | }
50 | 


--------------------------------------------------------------------------------
/py/ldm_patched/contrib/external_upscale_model.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py
 2 | 
 3 | import os
 4 | from ldm_patched.pfn import model_loading
 5 | from comfy import model_management
 6 | import torch
 7 | import ldm_patched.modules.utils
 8 | import ldm_patched.utils.path_utils
 9 | 
10 | class UpscaleModelLoader:
11 |     @classmethod
12 |     def INPUT_TYPES(s):
13 |         return {"required": { "model_name": (ldm_patched.utils.path_utils.get_filename_list("upscale_models"), ),
14 |                              }}
15 |     RETURN_TYPES = ("UPSCALE_MODEL",)
16 |     FUNCTION = "load_model"
17 | 
18 |     CATEGORY = "loaders"
19 | 
20 |     def load_model(self, model_name):
21 |         model_path = ldm_patched.utils.path_utils.get_full_path("upscale_models", model_name)
22 |         sd = ldm_patched.modules.utils.load_torch_file(model_path, safe_load=True)
23 |         if "module.layers.0.residual_group.blocks.0.norm1.weight" in sd:
24 |             sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {"module.":""})
25 |         out = model_loading.load_state_dict(sd).eval()
26 |         return (out, )
27 | 
28 | 
29 | class ImageUpscaleWithModel:
30 |     @classmethod
31 |     def INPUT_TYPES(s):
32 |         return {"required": { "upscale_model": ("UPSCALE_MODEL",),
33 |                               "image": ("IMAGE",),
34 |                               }}
35 |     RETURN_TYPES = ("IMAGE",)
36 |     FUNCTION = "upscale"
37 | 
38 |     CATEGORY = "image/upscaling"
39 | 
40 |     def upscale(self, upscale_model, image):
41 |         device = model_management.get_torch_device()
42 |         upscale_model.to(device)
43 |         in_img = image.movedim(-1,-3).to(device)
44 |         free_memory = model_management.get_free_memory(device)
45 | 
46 |         tile = 512
47 |         overlap = 32
48 | 
49 |         oom = True
50 |         while oom:
51 |             try:
52 |                 steps = in_img.shape[0] * ldm_patched.modules.utils.get_tiled_scale_steps(in_img.shape[3], in_img.shape[2], tile_x=tile, tile_y=tile, overlap=overlap)
53 |                 pbar = ldm_patched.modules.utils.ProgressBar(steps)
54 |                 s = ldm_patched.modules.utils.tiled_scale(in_img, lambda a: upscale_model(a), tile_x=tile, tile_y=tile, overlap=overlap, upscale_amount=upscale_model.scale, pbar=pbar)
55 |                 oom = False
56 |             except model_management.OOM_EXCEPTION as e:
57 |                 tile //= 2
58 |                 if tile < 128:
59 |                     raise e
60 | 
61 |         upscale_model.cpu()
62 |         s = torch.clamp(s.movedim(-3,-1), min=0, max=1.0)
63 |         return (s,)
64 | 
65 | NODE_CLASS_MAPPINGS = {
66 |     "UpscaleModelLoader": UpscaleModelLoader,
67 |     "ImageUpscaleWithModel": ImageUpscaleWithModel
68 | }
69 | 


--------------------------------------------------------------------------------
/py/ldm_patched/ldm/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/ldm_patched/ldm/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/py/ldm_patched/ldm/modules/diffusionmodules/upscaling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | from functools import partial
 5 | 
 6 | from .util import extract_into_tensor, make_beta_schedule
 7 | from ldm_patched.ldm.util import default
 8 | 
 9 | 
10 | class AbstractLowScaleModel(nn.Module):
11 |     # for concatenating a downsampled image to the latent representation
12 |     def __init__(self, noise_schedule_config=None):
13 |         super(AbstractLowScaleModel, self).__init__()
14 |         if noise_schedule_config is not None:
15 |             self.register_schedule(**noise_schedule_config)
16 | 
17 |     def register_schedule(self, beta_schedule="linear", timesteps=1000,
18 |                           linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
19 |         betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end,
20 |                                    cosine_s=cosine_s)
21 |         alphas = 1. - betas
22 |         alphas_cumprod = np.cumprod(alphas, axis=0)
23 |         alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])
24 | 
25 |         timesteps, = betas.shape
26 |         self.num_timesteps = int(timesteps)
27 |         self.linear_start = linear_start
28 |         self.linear_end = linear_end
29 |         assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep'
30 | 
31 |         to_torch = partial(torch.tensor, dtype=torch.float32)
32 | 
33 |         self.register_buffer('betas', to_torch(betas))
34 |         self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
35 |         self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev))
36 | 
37 |         # calculations for diffusion q(x_t | x_{t-1}) and others
38 |         self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod)))
39 |         self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod)))
40 |         self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod)))
41 |         self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod)))
42 |         self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1)))
43 | 
44 |     def q_sample(self, x_start, t, noise=None, seed=None):
45 |         if noise is None:
46 |             if seed is None:
47 |                 noise = torch.randn_like(x_start)
48 |             else:
49 |                 noise = torch.randn(x_start.size(), dtype=x_start.dtype, layout=x_start.layout, generator=torch.manual_seed(seed)).to(x_start.device)
50 |         return (extract_into_tensor(self.sqrt_alphas_cumprod.to(x_start.device), t, x_start.shape) * x_start +
51 |                 extract_into_tensor(self.sqrt_one_minus_alphas_cumprod.to(x_start.device), t, x_start.shape) * noise)
52 | 
53 |     def forward(self, x):
54 |         return x, None
55 | 
56 |     def decode(self, x):
57 |         return x
58 | 
59 | 
60 | class SimpleImageConcat(AbstractLowScaleModel):
61 |     # no noise level conditioning
62 |     def __init__(self):
63 |         super(SimpleImageConcat, self).__init__(noise_schedule_config=None)
64 |         self.max_noise_level = 0
65 | 
66 |     def forward(self, x):
67 |         # fix to constant noise level
68 |         return x, torch.zeros(x.shape[0], device=x.device).long()
69 | 
70 | 
71 | class ImageConcatWithNoiseAugmentation(AbstractLowScaleModel):
72 |     def __init__(self, noise_schedule_config, max_noise_level=1000, to_cuda=False):
73 |         super().__init__(noise_schedule_config=noise_schedule_config)
74 |         self.max_noise_level = max_noise_level
75 | 
76 |     def forward(self, x, noise_level=None, seed=None):
77 |         if noise_level is None:
78 |             noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long()
79 |         else:
80 |             assert isinstance(noise_level, torch.Tensor)
81 |         z = self.q_sample(x, noise_level, seed=seed)
82 |         return z, noise_level
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/py/ldm_patched/ldm/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/ldm_patched/ldm/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/py/ldm_patched/ldm/modules/distributions/distributions.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | class AbstractDistribution:
 6 |     def sample(self):
 7 |         raise NotImplementedError()
 8 | 
 9 |     def mode(self):
10 |         raise NotImplementedError()
11 | 
12 | 
13 | class DiracDistribution(AbstractDistribution):
14 |     def __init__(self, value):
15 |         self.value = value
16 | 
17 |     def sample(self):
18 |         return self.value
19 | 
20 |     def mode(self):
21 |         return self.value
22 | 
23 | 
24 | class DiagonalGaussianDistribution(object):
25 |     def __init__(self, parameters, deterministic=False):
26 |         self.parameters = parameters
27 |         self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
28 |         self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
29 |         self.deterministic = deterministic
30 |         self.std = torch.exp(0.5 * self.logvar)
31 |         self.var = torch.exp(self.logvar)
32 |         if self.deterministic:
33 |             self.var = self.std = torch.zeros_like(self.mean).to(device=self.parameters.device)
34 | 
35 |     def sample(self):
36 |         x = self.mean + self.std * torch.randn(self.mean.shape).to(device=self.parameters.device)
37 |         return x
38 | 
39 |     def kl(self, other=None):
40 |         if self.deterministic:
41 |             return torch.Tensor([0.])
42 |         else:
43 |             if other is None:
44 |                 return 0.5 * torch.sum(torch.pow(self.mean, 2)
45 |                                        + self.var - 1.0 - self.logvar,
46 |                                        dim=[1, 2, 3])
47 |             else:
48 |                 return 0.5 * torch.sum(
49 |                     torch.pow(self.mean - other.mean, 2) / other.var
50 |                     + self.var / other.var - 1.0 - self.logvar + other.logvar,
51 |                     dim=[1, 2, 3])
52 | 
53 |     def nll(self, sample, dims=[1,2,3]):
54 |         if self.deterministic:
55 |             return torch.Tensor([0.])
56 |         logtwopi = np.log(2.0 * np.pi)
57 |         return 0.5 * torch.sum(
58 |             logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
59 |             dim=dims)
60 | 
61 |     def mode(self):
62 |         return self.mean
63 | 
64 | 
65 | def normal_kl(mean1, logvar1, mean2, logvar2):
66 |     """
67 |     source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12
68 |     Compute the KL divergence between two gaussians.
69 |     Shapes are automatically broadcasted, so batches can be compared to
70 |     scalars, among other use cases.
71 |     """
72 |     tensor = None
73 |     for obj in (mean1, logvar1, mean2, logvar2):
74 |         if isinstance(obj, torch.Tensor):
75 |             tensor = obj
76 |             break
77 |     assert tensor is not None, "at least one argument must be a Tensor"
78 | 
79 |     # Force variances to be Tensors. Broadcasting helps convert scalars to
80 |     # Tensors, but it does not work for torch.exp().
81 |     logvar1, logvar2 = [
82 |         x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor)
83 |         for x in (logvar1, logvar2)
84 |     ]
85 | 
86 |     return 0.5 * (
87 |         -1.0
88 |         + logvar2
89 |         - logvar1
90 |         + torch.exp(logvar1 - logvar2)
91 |         + ((mean1 - mean2) ** 2) * torch.exp(-logvar2)
92 |     )
93 | 


--------------------------------------------------------------------------------
/py/ldm_patched/ldm/modules/ema.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class LitEma(nn.Module):
 6 |     def __init__(self, model, decay=0.9999, use_num_upates=True):
 7 |         super().__init__()
 8 |         if decay < 0.0 or decay > 1.0:
 9 |             raise ValueError('Decay must be between 0 and 1')
10 | 
11 |         self.m_name2s_name = {}
12 |         self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32))
13 |         self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates
14 |         else torch.tensor(-1, dtype=torch.int))
15 | 
16 |         for name, p in model.named_parameters():
17 |             if p.requires_grad:
18 |                 # remove as '.'-character is not allowed in buffers
19 |                 s_name = name.replace('.', '')
20 |                 self.m_name2s_name.update({name: s_name})
21 |                 self.register_buffer(s_name, p.clone().detach().data)
22 | 
23 |         self.collected_params = []
24 | 
25 |     def reset_num_updates(self):
26 |         del self.num_updates
27 |         self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int))
28 | 
29 |     def forward(self, model):
30 |         decay = self.decay
31 | 
32 |         if self.num_updates >= 0:
33 |             self.num_updates += 1
34 |             decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates))
35 | 
36 |         one_minus_decay = 1.0 - decay
37 | 
38 |         with torch.no_grad():
39 |             m_param = dict(model.named_parameters())
40 |             shadow_params = dict(self.named_buffers())
41 | 
42 |             for key in m_param:
43 |                 if m_param[key].requires_grad:
44 |                     sname = self.m_name2s_name[key]
45 |                     shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
46 |                     shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key]))
47 |                 else:
48 |                     assert not key in self.m_name2s_name
49 | 
50 |     def copy_to(self, model):
51 |         m_param = dict(model.named_parameters())
52 |         shadow_params = dict(self.named_buffers())
53 |         for key in m_param:
54 |             if m_param[key].requires_grad:
55 |                 m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
56 |             else:
57 |                 assert not key in self.m_name2s_name
58 | 
59 |     def store(self, parameters):
60 |         """
61 |         Save the current parameters for restoring later.
62 |         Args:
63 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
64 |             temporarily stored.
65 |         """
66 |         self.collected_params = [param.clone() for param in parameters]
67 | 
68 |     def restore(self, parameters):
69 |         """
70 |         Restore the parameters stored with the `store` method.
71 |         Useful to validate the model with EMA parameters without affecting the
72 |         original optimization process. Store the parameters before the
73 |         `copy_to` method. After validation (or model saving), use this to
74 |         restore the former parameters.
75 |         Args:
76 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
77 |             updated with the stored parameters.
78 |         """
79 |         for c_param, param in zip(self.collected_params, parameters):
80 |             param.data.copy_(c_param.data)
81 | 


--------------------------------------------------------------------------------
/py/ldm_patched/ldm/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/ldm_patched/ldm/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/py/ldm_patched/ldm/modules/encoders/noise_aug_modules.py:
--------------------------------------------------------------------------------
 1 | from ..diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation
 2 | from ..diffusionmodules.openaimodel import Timestep
 3 | import torch
 4 | 
 5 | class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation):
 6 |     def __init__(self, *args, clip_stats_path=None, timestep_dim=256, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 |         if clip_stats_path is None:
 9 |             clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim)
10 |         else:
11 |             clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu")
12 |         self.register_buffer("data_mean", clip_mean[None, :], persistent=False)
13 |         self.register_buffer("data_std", clip_std[None, :], persistent=False)
14 |         self.time_embed = Timestep(timestep_dim)
15 | 
16 |     def scale(self, x):
17 |         # re-normalize to centered mean and unit variance
18 |         x = (x - self.data_mean.to(x.device)) * 1. / self.data_std.to(x.device)
19 |         return x
20 | 
21 |     def unscale(self, x):
22 |         # back to original data stats
23 |         x = (x * self.data_std.to(x.device)) + self.data_mean.to(x.device)
24 |         return x
25 | 
26 |     def forward(self, x, noise_level=None, seed=None):
27 |         if noise_level is None:
28 |             noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long()
29 |         else:
30 |             assert isinstance(noise_level, torch.Tensor)
31 |         x = self.scale(x)
32 |         z = self.q_sample(x, noise_level, seed=seed)
33 |         z = self.unscale(z)
34 |         noise_level = self.time_embed(noise_level)
35 |         return z, noise_level
36 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/checkpoint_pickle.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | load = pickle.load
 4 | 
 5 | class Empty:
 6 |     pass
 7 | 
 8 | class Unpickler(pickle.Unpickler):
 9 |     def find_class(self, module, name):
10 |         #TODO: safe unpickle
11 |         if module.startswith("pytorch_lightning"):
12 |             return Empty
13 |         return super().find_class(module, name)
14 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/clip_config_bigg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "CLIPTextModel"
 4 |   ],
 5 |   "attention_dropout": 0.0,
 6 |   "bos_token_id": 0,
 7 |   "dropout": 0.0,
 8 |   "eos_token_id": 2,
 9 |   "hidden_act": "gelu",
10 |   "hidden_size": 1280,
11 |   "initializer_factor": 1.0,
12 |   "initializer_range": 0.02,
13 |   "intermediate_size": 5120,
14 |   "layer_norm_eps": 1e-05,
15 |   "max_position_embeddings": 77,
16 |   "model_type": "clip_text_model",
17 |   "num_attention_heads": 20,
18 |   "num_hidden_layers": 32,
19 |   "pad_token_id": 1,
20 |   "projection_dim": 1280,
21 |   "torch_dtype": "float32",
22 |   "vocab_size": 49408
23 | }
24 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/clip_vision_config_g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "attention_dropout": 0.0,
 3 |   "dropout": 0.0,
 4 |   "hidden_act": "gelu",
 5 |   "hidden_size": 1664,
 6 |   "image_size": 224,
 7 |   "initializer_factor": 1.0,
 8 |   "initializer_range": 0.02,
 9 |   "intermediate_size": 8192,
10 |   "layer_norm_eps": 1e-05,
11 |   "model_type": "clip_vision_model",
12 |   "num_attention_heads": 16,
13 |   "num_channels": 3,
14 |   "num_hidden_layers": 48,
15 |   "patch_size": 14,
16 |   "projection_dim": 1280,
17 |   "torch_dtype": "float32"
18 | }
19 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/clip_vision_config_h.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "attention_dropout": 0.0,
 3 |   "dropout": 0.0,
 4 |   "hidden_act": "gelu",
 5 |   "hidden_size": 1280,
 6 |   "image_size": 224,
 7 |   "initializer_factor": 1.0,
 8 |   "initializer_range": 0.02,
 9 |   "intermediate_size": 5120,
10 |   "layer_norm_eps": 1e-05,
11 |   "model_type": "clip_vision_model",
12 |   "num_attention_heads": 16,
13 |   "num_channels": 3,
14 |   "num_hidden_layers": 32,
15 |   "patch_size": 14,
16 |   "projection_dim": 1024,
17 |   "torch_dtype": "float32"
18 | }
19 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/clip_vision_config_vitl.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "attention_dropout": 0.0,
 3 |   "dropout": 0.0,
 4 |   "hidden_act": "quick_gelu",
 5 |   "hidden_size": 1024,
 6 |   "image_size": 224,
 7 |   "initializer_factor": 1.0,
 8 |   "initializer_range": 0.02,
 9 |   "intermediate_size": 4096,
10 |   "layer_norm_eps": 1e-05,
11 |   "model_type": "clip_vision_model",
12 |   "num_attention_heads": 16,
13 |   "num_channels": 3,
14 |   "num_hidden_layers": 24,
15 |   "patch_size": 14,
16 |   "projection_dim": 768,
17 |   "torch_dtype": "float32"
18 | }
19 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/conds.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import math
 3 | import ldm_patched.modules.utils
 4 | 
 5 | 
 6 | 
 7 | class CONDRegular:
 8 |     def __init__(self, cond):
 9 |         self.cond = cond
10 | 
11 |     def _copy_with(self, cond):
12 |         return self.__class__(cond)
13 | 
14 |     def process_cond(self, batch_size, device, **kwargs):
15 |         return self._copy_with(ldm_patched.modules.utils.repeat_to_batch_size(self.cond, batch_size).to(device))
16 | 
17 |     def can_concat(self, other):
18 |         if self.cond.shape != other.cond.shape:
19 |             return False
20 |         return True
21 | 
22 |     def concat(self, others):
23 |         conds = [self.cond]
24 |         for x in others:
25 |             conds.append(x.cond)
26 |         return torch.cat(conds)
27 | 
28 | class CONDNoiseShape(CONDRegular):
29 |     def process_cond(self, batch_size, device, area, **kwargs):
30 |         data = self.cond[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
31 |         return self._copy_with(ldm_patched.modules.utils.repeat_to_batch_size(data, batch_size).to(device))
32 | 
33 | 
34 | class CONDCrossAttn(CONDRegular):
35 |     def can_concat(self, other):
36 |         s1 = self.cond.shape
37 |         s2 = other.cond.shape
38 |         if s1 != s2:
39 |             if s1[0] != s2[0] or s1[2] != s2[2]: #these 2 cases should not happen
40 |                 return False
41 | 
42 |             mult_min = math.lcm(s1[1], s2[1])
43 |             diff = mult_min // min(s1[1], s2[1])
44 |             if diff > 4: #arbitrary limit on the padding because it's probably going to impact performance negatively if it's too much
45 |                 return False
46 |         return True
47 | 
48 |     def concat(self, others):
49 |         conds = [self.cond]
50 |         crossattn_max_len = self.cond.shape[1]
51 |         for x in others:
52 |             c = x.cond
53 |             crossattn_max_len = math.lcm(crossattn_max_len, c.shape[1])
54 |             conds.append(c)
55 | 
56 |         out = []
57 |         for c in conds:
58 |             if c.shape[1] < crossattn_max_len:
59 |                 c = c.repeat(1, crossattn_max_len // c.shape[1], 1) #padding with repeat doesn't change result
60 |             out.append(c)
61 |         return torch.cat(out)
62 | 
63 | class CONDConstant(CONDRegular):
64 |     def __init__(self, cond):
65 |         self.cond = cond
66 | 
67 |     def process_cond(self, batch_size, device, **kwargs):
68 |         return self._copy_with(self.cond)
69 | 
70 |     def can_concat(self, other):
71 |         if self.cond != other.cond:
72 |             return False
73 |         return True
74 | 
75 |     def concat(self, others):
76 |         return self.cond
77 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/diffusers_load.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import ldm_patched.modules.sd
 4 | 
 5 | def first_file(path, filenames):
 6 |     for f in filenames:
 7 |         p = os.path.join(path, f)
 8 |         if os.path.exists(p):
 9 |             return p
10 |     return None
11 | 
12 | def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_directory=None):
13 |     diffusion_model_names = ["diffusion_pytorch_model.fp16.safetensors", "diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.fp16.bin", "diffusion_pytorch_model.bin"]
14 |     unet_path = first_file(os.path.join(model_path, "unet"), diffusion_model_names)
15 |     vae_path = first_file(os.path.join(model_path, "vae"), diffusion_model_names)
16 | 
17 |     text_encoder_model_names = ["model.fp16.safetensors", "model.safetensors", "pytorch_model.fp16.bin", "pytorch_model.bin"]
18 |     text_encoder1_path = first_file(os.path.join(model_path, "text_encoder"), text_encoder_model_names)
19 |     text_encoder2_path = first_file(os.path.join(model_path, "text_encoder_2"), text_encoder_model_names)
20 | 
21 |     text_encoder_paths = [text_encoder1_path]
22 |     if text_encoder2_path is not None:
23 |         text_encoder_paths.append(text_encoder2_path)
24 | 
25 |     unet = ldm_patched.modules.sd.load_unet(unet_path)
26 | 
27 |     clip = None
28 |     if output_clip:
29 |         clip = ldm_patched.modules.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory)
30 | 
31 |     vae = None
32 |     if output_vae:
33 |         sd = ldm_patched.modules.utils.load_torch_file(vae_path)
34 |         vae = ldm_patched.modules.sd.VAE(sd=sd)
35 | 
36 |     return (unet, clip, vae)
37 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/latent_formats.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class LatentFormat:
 3 |     scale_factor = 1.0
 4 |     latent_channels = 4
 5 |     latent_dimensions = 2
 6 |     latent_rgb_factors = None
 7 |     latent_rgb_factors_bias = None
 8 |     taesd_decoder_name = None
 9 | 
10 |     def process_in(self, latent):
11 |         return latent * self.scale_factor
12 | 
13 |     def process_out(self, latent):
14 |         return latent / self.scale_factor
15 | 
16 | class SD15(LatentFormat):
17 |     def __init__(self, scale_factor=0.18215):
18 |         self.scale_factor = scale_factor
19 |         self.latent_rgb_factors = [
20 |                     #   R        G        B
21 |                     [ 0.3512,  0.2297,  0.3227],
22 |                     [ 0.3250,  0.4974,  0.2350],
23 |                     [-0.2829,  0.1762,  0.2721],
24 |                     [-0.2120, -0.2616, -0.7177]
25 |                 ]
26 |         self.taesd_decoder_name = "taesd_decoder"
27 | 
28 | class SDXL(LatentFormat):
29 |     scale_factor = 0.13025
30 | 
31 |     def __init__(self):
32 |         self.latent_rgb_factors = [
33 |                     #   R        G        B
34 |                     [ 0.3651,  0.4232,  0.4341],
35 |                     [-0.2533, -0.0042,  0.1068],
36 |                     [ 0.1076,  0.1111, -0.0362],
37 |                     [-0.3165, -0.2492, -0.2188]
38 |                 ]
39 |         self.latent_rgb_factors_bias = [ 0.1084, -0.0175, -0.0011]
40 | 
41 |         self.taesd_decoder_name = "taesdxl_decoder"
42 | 
43 | class SD_X4(LatentFormat):
44 |     def __init__(self):
45 |         self.scale_factor = 0.08333
46 |         self.latent_rgb_factors = [
47 |             [-0.2340, -0.3863, -0.3257],
48 |             [ 0.0994,  0.0885, -0.0908],
49 |             [-0.2833, -0.2349, -0.3741],
50 |             [ 0.2523, -0.0055, -0.1651]
51 |         ]
52 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/ops.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import comfy.model_management
  3 | 
  4 | def cast_bias_weight(s, input):
  5 |     bias = None
  6 |     non_blocking = comfy.model_management.device_supports_non_blocking(input.device)
  7 |     if s.bias is not None:
  8 |         bias = s.bias.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking)
  9 |     weight = s.weight.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking)
 10 |     return weight, bias
 11 | 
 12 | 
 13 | class disable_weight_init:
 14 |     class Linear(torch.nn.Linear):
 15 |         ldm_patched_cast_weights = False
 16 |         def reset_parameters(self):
 17 |             return None
 18 | 
 19 |         def forward_ldm_patched_cast_weights(self, input):
 20 |             weight, bias = cast_bias_weight(self, input)
 21 |             return torch.nn.functional.linear(input, weight, bias)
 22 | 
 23 |         def forward(self, *args, **kwargs):
 24 |             if self.ldm_patched_cast_weights:
 25 |                 return self.forward_ldm_patched_cast_weights(*args, **kwargs)
 26 |             else:
 27 |                 return super().forward(*args, **kwargs)
 28 | 
 29 |     class Conv2d(torch.nn.Conv2d):
 30 |         ldm_patched_cast_weights = False
 31 |         def reset_parameters(self):
 32 |             return None
 33 | 
 34 |         def forward_ldm_patched_cast_weights(self, input):
 35 |             weight, bias = cast_bias_weight(self, input)
 36 |             return self._conv_forward(input, weight, bias)
 37 | 
 38 |         def forward(self, *args, **kwargs):
 39 |             if self.ldm_patched_cast_weights:
 40 |                 return self.forward_ldm_patched_cast_weights(*args, **kwargs)
 41 |             else:
 42 |                 return super().forward(*args, **kwargs)
 43 | 
 44 |     class Conv3d(torch.nn.Conv3d):
 45 |         ldm_patched_cast_weights = False
 46 |         def reset_parameters(self):
 47 |             return None
 48 | 
 49 |         def forward_ldm_patched_cast_weights(self, input):
 50 |             weight, bias = cast_bias_weight(self, input)
 51 |             return self._conv_forward(input, weight, bias)
 52 | 
 53 |         def forward(self, *args, **kwargs):
 54 |             if self.ldm_patched_cast_weights:
 55 |                 return self.forward_ldm_patched_cast_weights(*args, **kwargs)
 56 |             else:
 57 |                 return super().forward(*args, **kwargs)
 58 | 
 59 |     class GroupNorm(torch.nn.GroupNorm):
 60 |         ldm_patched_cast_weights = False
 61 |         def reset_parameters(self):
 62 |             return None
 63 | 
 64 |         def forward_ldm_patched_cast_weights(self, input):
 65 |             weight, bias = cast_bias_weight(self, input)
 66 |             return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps)
 67 | 
 68 |         def forward(self, *args, **kwargs):
 69 |             if self.ldm_patched_cast_weights:
 70 |                 return self.forward_ldm_patched_cast_weights(*args, **kwargs)
 71 |             else:
 72 |                 return super().forward(*args, **kwargs)
 73 | 
 74 | 
 75 |     class LayerNorm(torch.nn.LayerNorm):
 76 |         ldm_patched_cast_weights = False
 77 |         def reset_parameters(self):
 78 |             return None
 79 | 
 80 |         def forward_ldm_patched_cast_weights(self, input):
 81 |             weight, bias = cast_bias_weight(self, input)
 82 |             return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps)
 83 | 
 84 |         def forward(self, *args, **kwargs):
 85 |             if self.ldm_patched_cast_weights:
 86 |                 return self.forward_ldm_patched_cast_weights(*args, **kwargs)
 87 |             else:
 88 |                 return super().forward(*args, **kwargs)
 89 | 
 90 |     @classmethod
 91 |     def conv_nd(s, dims, *args, **kwargs):
 92 |         if dims == 2:
 93 |             return s.Conv2d(*args, **kwargs)
 94 |         elif dims == 3:
 95 |             return s.Conv3d(*args, **kwargs)
 96 |         else:
 97 |             raise ValueError(f"unsupported dimensions: {dims}")
 98 | 
 99 | 
100 | class manual_cast(disable_weight_init):
101 |     class Linear(disable_weight_init.Linear):
102 |         ldm_patched_cast_weights = True
103 | 
104 |     class Conv2d(disable_weight_init.Conv2d):
105 |         ldm_patched_cast_weights = True
106 | 
107 |     class Conv3d(disable_weight_init.Conv3d):
108 |         ldm_patched_cast_weights = True
109 | 
110 |     class GroupNorm(disable_weight_init.GroupNorm):
111 |         ldm_patched_cast_weights = True
112 | 
113 |     class LayerNorm(disable_weight_init.LayerNorm):
114 |         ldm_patched_cast_weights = True
115 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/options.py:
--------------------------------------------------------------------------------
1 | 
2 | args_parsing = False
3 | 
4 | def enable_args_parsing(enable=True):
5 |     global args_parsing
6 |     args_parsing = enable
7 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/sd1_clip_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "openai/clip-vit-large-patch14",
 3 |   "architectures": [
 4 |     "CLIPTextModel"
 5 |   ],
 6 |   "attention_dropout": 0.0,
 7 |   "bos_token_id": 0,
 8 |   "dropout": 0.0,
 9 |   "eos_token_id": 2,
10 |   "hidden_act": "quick_gelu",
11 |   "hidden_size": 768,
12 |   "initializer_factor": 1.0,
13 |   "initializer_range": 0.02,
14 |   "intermediate_size": 3072,
15 |   "layer_norm_eps": 1e-05,
16 |   "max_position_embeddings": 77,
17 |   "model_type": "clip_text_model",
18 |   "num_attention_heads": 12,
19 |   "num_hidden_layers": 12,
20 |   "pad_token_id": 1,
21 |   "projection_dim": 768,
22 |   "torch_dtype": "float32",
23 |   "transformers_version": "4.24.0",
24 |   "vocab_size": 49408
25 | }
26 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/sd1_tokenizer/special_tokens_map.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "bos_token": {
 3 |     "content": "<|startoftext|>",
 4 |     "lstrip": false,
 5 |     "normalized": true,
 6 |     "rstrip": false,
 7 |     "single_word": false
 8 |   },
 9 |   "eos_token": {
10 |     "content": "<|endoftext|>",
11 |     "lstrip": false,
12 |     "normalized": true,
13 |     "rstrip": false,
14 |     "single_word": false
15 |   },
16 |   "pad_token": "<|endoftext|>",
17 |   "unk_token": {
18 |     "content": "<|endoftext|>",
19 |     "lstrip": false,
20 |     "normalized": true,
21 |     "rstrip": false,
22 |     "single_word": false
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/sd1_tokenizer/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "add_prefix_space": false,
 3 |   "bos_token": {
 4 |     "__type": "AddedToken",
 5 |     "content": "<|startoftext|>",
 6 |     "lstrip": false,
 7 |     "normalized": true,
 8 |     "rstrip": false,
 9 |     "single_word": false
10 |   },
11 |   "do_lower_case": true,
12 |   "eos_token": {
13 |     "__type": "AddedToken",
14 |     "content": "<|endoftext|>",
15 |     "lstrip": false,
16 |     "normalized": true,
17 |     "rstrip": false,
18 |     "single_word": false
19 |   },
20 |   "errors": "replace",
21 |   "model_max_length": 77,
22 |   "name_or_path": "openai/clip-vit-large-patch14",
23 |   "pad_token": "<|endoftext|>",
24 |   "special_tokens_map_file": "./special_tokens_map.json",
25 |   "tokenizer_class": "CLIPTokenizer",
26 |   "unk_token": {
27 |     "__type": "AddedToken",
28 |     "content": "<|endoftext|>",
29 |     "lstrip": false,
30 |     "normalized": true,
31 |     "rstrip": false,
32 |     "single_word": false
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/sd2_clip.py:
--------------------------------------------------------------------------------
 1 | from ldm_patched.modules import sd1_clip
 2 | import torch
 3 | import os
 4 | 
 5 | class SD2ClipHModel(sd1_clip.SDClipModel):
 6 |     def __init__(self, arch="ViT-H-14", device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None):
 7 |         if layer == "penultimate":
 8 |             layer="hidden"
 9 |             layer_idx=-2
10 | 
11 |         textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd2_clip_config.json")
12 |         super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"start": 49406, "end": 49407, "pad": 0})
13 | 
14 | class SD2ClipHTokenizer(sd1_clip.SDTokenizer):
15 |     def __init__(self, tokenizer_path=None, embedding_directory=None):
16 |         super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1024)
17 | 
18 | class SD2Tokenizer(sd1_clip.SD1Tokenizer):
19 |     def __init__(self, embedding_directory=None):
20 |         super().__init__(embedding_directory=embedding_directory, clip_name="h", tokenizer=SD2ClipHTokenizer)
21 | 
22 | class SD2ClipModel(sd1_clip.SD1ClipModel):
23 |     def __init__(self, device="cpu", dtype=None, **kwargs):
24 |         super().__init__(device=device, dtype=dtype, clip_name="h", clip_model=SD2ClipHModel, **kwargs)
25 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/sd2_clip_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "architectures": [
 3 |     "CLIPTextModel"
 4 |   ],
 5 |   "attention_dropout": 0.0,
 6 |   "bos_token_id": 0,
 7 |   "dropout": 0.0,
 8 |   "eos_token_id": 2,
 9 |   "hidden_act": "gelu",
10 |   "hidden_size": 1024,
11 |   "initializer_factor": 1.0,
12 |   "initializer_range": 0.02,
13 |   "intermediate_size": 4096,
14 |   "layer_norm_eps": 1e-05,
15 |   "max_position_embeddings": 77,
16 |   "model_type": "clip_text_model",
17 |   "num_attention_heads": 16,
18 |   "num_hidden_layers": 24,
19 |   "pad_token_id": 1,
20 |   "projection_dim": 1024,
21 |   "torch_dtype": "float32",
22 |   "vocab_size": 49408
23 | }
24 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/sdxl_clip.py:
--------------------------------------------------------------------------------
 1 | from ldm_patched.modules import sd1_clip
 2 | import torch
 3 | import os
 4 | 
 5 | class SDXLClipG(sd1_clip.SDClipModel):
 6 |     def __init__(self, device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, dtype=None):
 7 |         if layer == "penultimate":
 8 |             layer="hidden"
 9 |             layer_idx=-2
10 | 
11 |         textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_config_bigg.json")
12 |         super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype,
13 |                          special_tokens={"start": 49406, "end": 49407, "pad": 0}, layer_norm_hidden_state=False)
14 | 
15 |     def load_sd(self, sd):
16 |         return super().load_sd(sd)
17 | 
18 | class SDXLClipGTokenizer(sd1_clip.SDTokenizer):
19 |     def __init__(self, tokenizer_path=None, embedding_directory=None):
20 |         super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1280, embedding_key='clip_g')
21 | 
22 | 
23 | class SDXLTokenizer:
24 |     def __init__(self, embedding_directory=None):
25 |         self.clip_l = sd1_clip.SDTokenizer(embedding_directory=embedding_directory)
26 |         self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory)
27 | 
28 |     def tokenize_with_weights(self, text:str, return_word_ids=False):
29 |         out = {}
30 |         out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids)
31 |         out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)
32 |         return out
33 | 
34 |     def untokenize(self, token_weight_pair):
35 |         return self.clip_g.untokenize(token_weight_pair)
36 | 
37 | class SDXLClipModel(torch.nn.Module):
38 |     def __init__(self, device="cpu", dtype=None):
39 |         super().__init__()
40 |         self.clip_l = sd1_clip.SDClipModel(layer="hidden", layer_idx=-2, device=device, dtype=dtype, layer_norm_hidden_state=False)
41 |         self.clip_g = SDXLClipG(device=device, dtype=dtype)
42 | 
43 |     def clip_layer(self, layer_idx):
44 |         self.clip_l.clip_layer(layer_idx)
45 |         self.clip_g.clip_layer(layer_idx)
46 | 
47 |     def reset_clip_layer(self):
48 |         self.clip_g.reset_clip_layer()
49 |         self.clip_l.reset_clip_layer()
50 | 
51 |     def encode_token_weights(self, token_weight_pairs):
52 |         token_weight_pairs_g = token_weight_pairs["g"]
53 |         token_weight_pairs_l = token_weight_pairs["l"]
54 |         g_out, g_pooled = self.clip_g.encode_token_weights(token_weight_pairs_g)
55 |         l_out, l_pooled = self.clip_l.encode_token_weights(token_weight_pairs_l)
56 |         return torch.cat([l_out, g_out], dim=-1), g_pooled
57 | 
58 |     def load_sd(self, sd):
59 |         if "text_model.encoder.layers.30.mlp.fc1.weight" in sd:
60 |             return self.clip_g.load_sd(sd)
61 |         else:
62 |             return self.clip_l.load_sd(sd)
63 | 
64 | class SDXLRefinerClipModel(sd1_clip.SD1ClipModel):
65 |     def __init__(self, device="cpu", dtype=None):
66 |         super().__init__(device=device, dtype=dtype, clip_name="g", clip_model=SDXLClipG)
67 | 


--------------------------------------------------------------------------------
/py/ldm_patched/modules/supported_models_base.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from . import model_base
 3 | from . import utils
 4 | from . import latent_formats
 5 | 
 6 | class ClipTarget:
 7 |     def __init__(self, tokenizer, clip):
 8 |         self.clip = clip
 9 |         self.tokenizer = tokenizer
10 |         self.params = {}
11 | 
12 | class BASE:
13 |     unet_config = {}
14 |     unet_extra_config = {
15 |         "num_heads": -1,
16 |         "num_head_channels": 64,
17 |     }
18 | 
19 |     clip_prefix = []
20 |     clip_vision_prefix = None
21 |     noise_aug_config = None
22 |     sampling_settings = {}
23 |     latent_format = latent_formats.LatentFormat
24 | 
25 |     manual_cast_dtype = None
26 | 
27 |     @classmethod
28 |     def matches(s, unet_config):
29 |         for k in s.unet_config:
30 |             if s.unet_config[k] != unet_config[k]:
31 |                 return False
32 |         return True
33 | 
34 |     def model_type(self, state_dict, prefix=""):
35 |         return model_base.ModelType.EPS
36 | 
37 |     def inpaint_model(self):
38 |         return self.unet_config["in_channels"] > 4
39 | 
40 |     def __init__(self, unet_config):
41 |         self.unet_config = unet_config
42 |         self.latent_format = self.latent_format()
43 |         for x in self.unet_extra_config:
44 |             self.unet_config[x] = self.unet_extra_config[x]
45 | 
46 |     def get_model(self, state_dict, prefix="", device=None):
47 |         if self.noise_aug_config is not None:
48 |             out = model_base.SD21UNCLIP(self, self.noise_aug_config, model_type=self.model_type(state_dict, prefix), device=device)
49 |         else:
50 |             out = model_base.BaseModel(self, model_type=self.model_type(state_dict, prefix), device=device)
51 |         if self.inpaint_model():
52 |             out.set_inpaint()
53 |         return out
54 | 
55 |     def process_clip_state_dict(self, state_dict):
56 |         return state_dict
57 | 
58 |     def process_unet_state_dict(self, state_dict):
59 |         return state_dict
60 | 
61 |     def process_vae_state_dict(self, state_dict):
62 |         return state_dict
63 | 
64 |     def process_clip_state_dict_for_saving(self, state_dict):
65 |         replace_prefix = {"": "cond_stage_model."}
66 |         return utils.state_dict_prefix_replace(state_dict, replace_prefix)
67 | 
68 |     def process_clip_vision_state_dict_for_saving(self, state_dict):
69 |         replace_prefix = {}
70 |         if self.clip_vision_prefix is not None:
71 |             replace_prefix[""] = self.clip_vision_prefix
72 |         return utils.state_dict_prefix_replace(state_dict, replace_prefix)
73 | 
74 |     def process_unet_state_dict_for_saving(self, state_dict):
75 |         replace_prefix = {"": "model.diffusion_model."}
76 |         return utils.state_dict_prefix_replace(state_dict, replace_prefix)
77 | 
78 |     def process_vae_state_dict_for_saving(self, state_dict):
79 |         replace_prefix = {"": "first_stage_model."}
80 |         return utils.state_dict_prefix_replace(state_dict, replace_prefix)
81 | 
82 |     def set_manual_cast(self, manual_cast_dtype):
83 |         self.manual_cast_dtype = manual_cast_dtype
84 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/ldm_patched/pfn/__init__.py


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/LICENSE-HAT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Xiangyu Chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/LICENSE-RealESRGAN:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2021, Xintao Wang
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/OmniSR/ChannelAttention.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch.nn as nn
  4 | 
  5 | 
  6 | class CA_layer(nn.Module):
  7 |     def __init__(self, channel, reduction=16):
  8 |         super(CA_layer, self).__init__()
  9 |         # global average pooling
 10 |         self.gap = nn.AdaptiveAvgPool2d(1)
 11 |         self.fc = nn.Sequential(
 12 |             nn.Conv2d(channel, channel // reduction, kernel_size=(1, 1), bias=False),
 13 |             nn.GELU(),
 14 |             nn.Conv2d(channel // reduction, channel, kernel_size=(1, 1), bias=False),
 15 |             # nn.Sigmoid()
 16 |         )
 17 | 
 18 |     def forward(self, x):
 19 |         y = self.fc(self.gap(x))
 20 |         return x * y.expand_as(x)
 21 | 
 22 | 
 23 | class Simple_CA_layer(nn.Module):
 24 |     def __init__(self, channel):
 25 |         super(Simple_CA_layer, self).__init__()
 26 |         self.gap = nn.AdaptiveAvgPool2d(1)
 27 |         self.fc = nn.Conv2d(
 28 |             in_channels=channel,
 29 |             out_channels=channel,
 30 |             kernel_size=1,
 31 |             padding=0,
 32 |             stride=1,
 33 |             groups=1,
 34 |             bias=True,
 35 |         )
 36 | 
 37 |     def forward(self, x):
 38 |         return x * self.fc(self.gap(x))
 39 | 
 40 | 
 41 | class ECA_layer(nn.Module):
 42 |     """Constructs a ECA module.
 43 |     Args:
 44 |         channel: Number of channels of the input feature map
 45 |         k_size: Adaptive selection of kernel size
 46 |     """
 47 | 
 48 |     def __init__(self, channel):
 49 |         super(ECA_layer, self).__init__()
 50 | 
 51 |         b = 1
 52 |         gamma = 2
 53 |         k_size = int(abs(math.log(channel, 2) + b) / gamma)
 54 |         k_size = k_size if k_size % 2 else k_size + 1
 55 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 56 |         self.conv = nn.Conv1d(
 57 |             1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False
 58 |         )
 59 |         # self.sigmoid = nn.Sigmoid()
 60 | 
 61 |     def forward(self, x):
 62 |         # x: input features with shape [b, c, h, w]
 63 |         # b, c, h, w = x.size()
 64 | 
 65 |         # feature descriptor on the global spatial information
 66 |         y = self.avg_pool(x)
 67 | 
 68 |         # Two different branches of ECA module
 69 |         y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
 70 | 
 71 |         # Multi-scale information fusion
 72 |         # y = self.sigmoid(y)
 73 | 
 74 |         return x * y.expand_as(x)
 75 | 
 76 | 
 77 | class ECA_MaxPool_layer(nn.Module):
 78 |     """Constructs a ECA module.
 79 |     Args:
 80 |         channel: Number of channels of the input feature map
 81 |         k_size: Adaptive selection of kernel size
 82 |     """
 83 | 
 84 |     def __init__(self, channel):
 85 |         super(ECA_MaxPool_layer, self).__init__()
 86 | 
 87 |         b = 1
 88 |         gamma = 2
 89 |         k_size = int(abs(math.log(channel, 2) + b) / gamma)
 90 |         k_size = k_size if k_size % 2 else k_size + 1
 91 |         self.max_pool = nn.AdaptiveMaxPool2d(1)
 92 |         self.conv = nn.Conv1d(
 93 |             1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False
 94 |         )
 95 |         # self.sigmoid = nn.Sigmoid()
 96 | 
 97 |     def forward(self, x):
 98 |         # x: input features with shape [b, c, h, w]
 99 |         # b, c, h, w = x.size()
100 | 
101 |         # feature descriptor on the global spatial information
102 |         y = self.max_pool(x)
103 | 
104 |         # Two different branches of ECA module
105 |         y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
106 | 
107 |         # Multi-scale information fusion
108 |         # y = self.sigmoid(y)
109 | 
110 |         return x * y.expand_as(x)
111 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/OmniSR/OSAG.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | #############################################################
 4 | # File: OSAG.py
 5 | # Created Date: Tuesday April 28th 2022
 6 | # Author: Chen Xuanhong
 7 | # Email: chenxuanhongzju@outlook.com
 8 | # Last Modified:  Sunday, 23rd April 2023 3:08:49 pm
 9 | # Modified By: Chen Xuanhong
10 | # Copyright (c) 2020 Shanghai Jiao Tong University
11 | #############################################################
12 | 
13 | 
14 | import torch.nn as nn
15 | 
16 | from .esa import ESA
17 | from .OSA import OSA_Block
18 | 
19 | 
20 | class OSAG(nn.Module):
21 |     def __init__(
22 |         self,
23 |         channel_num=64,
24 |         bias=True,
25 |         block_num=4,
26 |         ffn_bias=False,
27 |         window_size=0,
28 |         pe=False,
29 |     ):
30 |         super(OSAG, self).__init__()
31 | 
32 |         # print("window_size: %d" % (window_size))
33 |         # print("with_pe", pe)
34 |         # print("ffn_bias: %d" % (ffn_bias))
35 | 
36 |         # block_script_name = kwargs.get("block_script_name", "OSA")
37 |         # block_class_name = kwargs.get("block_class_name", "OSA_Block")
38 | 
39 |         # script_name = "." + block_script_name
40 |         # package = __import__(script_name, fromlist=True)
41 |         block_class = OSA_Block  # getattr(package, block_class_name)
42 |         group_list = []
43 |         for _ in range(block_num):
44 |             temp_res = block_class(
45 |                 channel_num,
46 |                 bias,
47 |                 ffn_bias=ffn_bias,
48 |                 window_size=window_size,
49 |                 with_pe=pe,
50 |             )
51 |             group_list.append(temp_res)
52 |         group_list.append(nn.Conv2d(channel_num, channel_num, 1, 1, 0, bias=bias))
53 |         self.residual_layer = nn.Sequential(*group_list)
54 |         esa_channel = max(channel_num // 4, 16)
55 |         self.esa = ESA(esa_channel, channel_num)
56 | 
57 |     def forward(self, x):
58 |         out = self.residual_layer(x)
59 |         out = out + x
60 |         return self.esa(out)
61 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/OmniSR/OmniSR.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | #############################################################
  4 | # File: OmniSR.py
  5 | # Created Date: Tuesday April 28th 2022
  6 | # Author: Chen Xuanhong
  7 | # Email: chenxuanhongzju@outlook.com
  8 | # Last Modified:  Sunday, 23rd April 2023 3:06:36 pm
  9 | # Modified By: Chen Xuanhong
 10 | # Copyright (c) 2020 Shanghai Jiao Tong University
 11 | #############################################################
 12 | 
 13 | import math
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | import torch.nn.functional as F
 18 | 
 19 | from .OSAG import OSAG
 20 | from .pixelshuffle import pixelshuffle_block
 21 | 
 22 | 
 23 | class OmniSR(nn.Module):
 24 |     def __init__(
 25 |         self,
 26 |         state_dict,
 27 |         **kwargs,
 28 |     ):
 29 |         super(OmniSR, self).__init__()
 30 |         self.state = state_dict
 31 | 
 32 |         bias = True  # Fine to assume this for now
 33 |         block_num = 1  # Fine to assume this for now
 34 |         ffn_bias = True
 35 |         pe = True
 36 | 
 37 |         num_feat = state_dict["input.weight"].shape[0] or 64
 38 |         num_in_ch = state_dict["input.weight"].shape[1] or 3
 39 |         num_out_ch = num_in_ch  # we can just assume this for now. pixelshuffle smh
 40 | 
 41 |         pixelshuffle_shape = state_dict["up.0.weight"].shape[0]
 42 |         up_scale = math.sqrt(pixelshuffle_shape / num_out_ch)
 43 |         if up_scale - int(up_scale) > 0:
 44 |             print(
 45 |                 "out_nc is probably different than in_nc, scale calculation might be wrong"
 46 |             )
 47 |         up_scale = int(up_scale)
 48 |         res_num = 0
 49 |         for key in state_dict.keys():
 50 |             if "residual_layer" in key:
 51 |                 temp_res_num = int(key.split(".")[1])
 52 |                 if temp_res_num > res_num:
 53 |                     res_num = temp_res_num
 54 |         res_num = res_num + 1  # zero-indexed
 55 | 
 56 |         residual_layer = []
 57 |         self.res_num = res_num
 58 | 
 59 |         if (
 60 |             "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight"
 61 |             in state_dict.keys()
 62 |         ):
 63 |             rel_pos_bias_weight = state_dict[
 64 |                 "residual_layer.0.residual_layer.0.layer.2.fn.rel_pos_bias.weight"
 65 |             ].shape[0]
 66 |             self.window_size = int((math.sqrt(rel_pos_bias_weight) + 1) / 2)
 67 |         else:
 68 |             self.window_size = 8
 69 | 
 70 |         self.up_scale = up_scale
 71 | 
 72 |         for _ in range(res_num):
 73 |             temp_res = OSAG(
 74 |                 channel_num=num_feat,
 75 |                 bias=bias,
 76 |                 block_num=block_num,
 77 |                 ffn_bias=ffn_bias,
 78 |                 window_size=self.window_size,
 79 |                 pe=pe,
 80 |             )
 81 |             residual_layer.append(temp_res)
 82 |         self.residual_layer = nn.Sequential(*residual_layer)
 83 |         self.input = nn.Conv2d(
 84 |             in_channels=num_in_ch,
 85 |             out_channels=num_feat,
 86 |             kernel_size=3,
 87 |             stride=1,
 88 |             padding=1,
 89 |             bias=bias,
 90 |         )
 91 |         self.output = nn.Conv2d(
 92 |             in_channels=num_feat,
 93 |             out_channels=num_feat,
 94 |             kernel_size=3,
 95 |             stride=1,
 96 |             padding=1,
 97 |             bias=bias,
 98 |         )
 99 |         self.up = pixelshuffle_block(num_feat, num_out_ch, up_scale, bias=bias)
100 | 
101 |         # self.tail   = pixelshuffle_block(num_feat,num_out_ch,up_scale,bias=bias)
102 | 
103 |         # for m in self.modules():
104 |         #     if isinstance(m, nn.Conv2d):
105 |         #         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
106 |         #         m.weight.data.normal_(0, sqrt(2. / n))
107 | 
108 |         # chaiNNer specific stuff
109 |         self.model_arch = "OmniSR"
110 |         self.sub_type = "SR"
111 |         self.in_nc = num_in_ch
112 |         self.out_nc = num_out_ch
113 |         self.num_feat = num_feat
114 |         self.scale = up_scale
115 | 
116 |         self.supports_fp16 = True  # TODO: Test this
117 |         self.supports_bfp16 = True
118 |         self.min_size_restriction = 16
119 | 
120 |         self.load_state_dict(state_dict, strict=False)
121 | 
122 |     def check_image_size(self, x):
123 |         _, _, h, w = x.size()
124 |         # import pdb; pdb.set_trace()
125 |         mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
126 |         mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
127 |         # x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect')
128 |         x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "constant", 0)
129 |         return x
130 | 
131 |     def forward(self, x):
132 |         H, W = x.shape[2:]
133 |         x = self.check_image_size(x)
134 | 
135 |         residual = self.input(x)
136 |         out = self.residual_layer(residual)
137 | 
138 |         # origin
139 |         out = torch.add(self.output(out), residual)
140 |         out = self.up(out)
141 | 
142 |         out = out[:, :, : H * self.up_scale, : W * self.up_scale]
143 |         return out
144 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/OmniSR/layernorm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | #############################################################
 4 | # File: layernorm.py
 5 | # Created Date: Tuesday April 28th 2022
 6 | # Author: Chen Xuanhong
 7 | # Email: chenxuanhongzju@outlook.com
 8 | # Last Modified:  Thursday, 20th April 2023 9:28:20 am
 9 | # Modified By: Chen Xuanhong
10 | # Copyright (c) 2020 Shanghai Jiao Tong University
11 | #############################################################
12 | 
13 | import torch
14 | import torch.nn as nn
15 | 
16 | 
17 | class LayerNormFunction(torch.autograd.Function):
18 |     @staticmethod
19 |     def forward(ctx, x, weight, bias, eps):
20 |         ctx.eps = eps
21 |         N, C, H, W = x.size()
22 |         mu = x.mean(1, keepdim=True)
23 |         var = (x - mu).pow(2).mean(1, keepdim=True)
24 |         y = (x - mu) / (var + eps).sqrt()
25 |         ctx.save_for_backward(y, var, weight)
26 |         y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1)
27 |         return y
28 | 
29 |     @staticmethod
30 |     def backward(ctx, grad_output):
31 |         eps = ctx.eps
32 | 
33 |         N, C, H, W = grad_output.size()
34 |         y, var, weight = ctx.saved_variables
35 |         g = grad_output * weight.view(1, C, 1, 1)
36 |         mean_g = g.mean(dim=1, keepdim=True)
37 | 
38 |         mean_gy = (g * y).mean(dim=1, keepdim=True)
39 |         gx = 1.0 / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g)
40 |         return (
41 |             gx,
42 |             (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0),
43 |             grad_output.sum(dim=3).sum(dim=2).sum(dim=0),
44 |             None,
45 |         )
46 | 
47 | 
48 | class LayerNorm2d(nn.Module):
49 |     def __init__(self, channels, eps=1e-6):
50 |         super(LayerNorm2d, self).__init__()
51 |         self.register_parameter("weight", nn.Parameter(torch.ones(channels)))
52 |         self.register_parameter("bias", nn.Parameter(torch.zeros(channels)))
53 |         self.eps = eps
54 | 
55 |     def forward(self, x):
56 |         return LayerNormFunction.apply(x, self.weight, self.bias, self.eps)
57 | 
58 | 
59 | class GRN(nn.Module):
60 |     """GRN (Global Response Normalization) layer"""
61 | 
62 |     def __init__(self, dim):
63 |         super().__init__()
64 |         self.gamma = nn.Parameter(torch.zeros(1, dim, 1, 1))
65 |         self.beta = nn.Parameter(torch.zeros(1, dim, 1, 1))
66 | 
67 |     def forward(self, x):
68 |         Gx = torch.norm(x, p=2, dim=(2, 3), keepdim=True)
69 |         Nx = Gx / (Gx.mean(dim=1, keepdim=True) + 1e-6)
70 |         return self.gamma * (x * Nx) + self.beta + x
71 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/OmniSR/pixelshuffle.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | #############################################################
 4 | # File: pixelshuffle.py
 5 | # Created Date: Friday July 1st 2022
 6 | # Author: Chen Xuanhong
 7 | # Email: chenxuanhongzju@outlook.com
 8 | # Last Modified:  Friday, 1st July 2022 10:18:39 am
 9 | # Modified By: Chen Xuanhong
10 | # Copyright (c) 2022 Shanghai Jiao Tong University
11 | #############################################################
12 | 
13 | import torch.nn as nn
14 | 
15 | 
16 | def pixelshuffle_block(
17 |     in_channels, out_channels, upscale_factor=2, kernel_size=3, bias=False
18 | ):
19 |     """
20 |     Upsample features according to `upscale_factor`.
21 |     """
22 |     padding = kernel_size // 2
23 |     conv = nn.Conv2d(
24 |         in_channels,
25 |         out_channels * (upscale_factor**2),
26 |         kernel_size,
27 |         padding=1,
28 |         bias=bias,
29 |     )
30 |     pixel_shuffle = nn.PixelShuffle(upscale_factor)
31 |     return nn.Sequential(*[conv, pixel_shuffle])
32 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/SRVGG.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import math
  5 | 
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class SRVGGNetCompact(nn.Module):
 11 |     """A compact VGG-style network structure for super-resolution.
 12 |     It is a compact network structure, which performs upsampling in the last layer and no convolution is
 13 |     conducted on the HR feature space.
 14 |     Args:
 15 |         num_in_ch (int): Channel number of inputs. Default: 3.
 16 |         num_out_ch (int): Channel number of outputs. Default: 3.
 17 |         num_feat (int): Channel number of intermediate features. Default: 64.
 18 |         num_conv (int): Number of convolution layers in the body network. Default: 16.
 19 |         upscale (int): Upsampling factor. Default: 4.
 20 |         act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu.
 21 |     """
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         state_dict,
 26 |         act_type: str = "prelu",
 27 |     ):
 28 |         super(SRVGGNetCompact, self).__init__()
 29 |         self.model_arch = "SRVGG (RealESRGAN)"
 30 |         self.sub_type = "SR"
 31 | 
 32 |         self.act_type = act_type
 33 | 
 34 |         self.state = state_dict
 35 | 
 36 |         if "params" in self.state:
 37 |             self.state = self.state["params"]
 38 | 
 39 |         self.key_arr = list(self.state.keys())
 40 | 
 41 |         self.in_nc = self.get_in_nc()
 42 |         self.num_feat = self.get_num_feats()
 43 |         self.num_conv = self.get_num_conv()
 44 |         self.out_nc = self.in_nc  # :(
 45 |         self.pixelshuffle_shape = None  # Defined in get_scale()
 46 |         self.scale = self.get_scale()
 47 | 
 48 |         self.supports_fp16 = True
 49 |         self.supports_bfp16 = True
 50 |         self.min_size_restriction = None
 51 | 
 52 |         self.body = nn.ModuleList()
 53 |         # the first conv
 54 |         self.body.append(nn.Conv2d(self.in_nc, self.num_feat, 3, 1, 1))
 55 |         # the first activation
 56 |         if act_type == "relu":
 57 |             activation = nn.ReLU(inplace=True)
 58 |         elif act_type == "prelu":
 59 |             activation = nn.PReLU(num_parameters=self.num_feat)
 60 |         elif act_type == "leakyrelu":
 61 |             activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
 62 |         self.body.append(activation)  # type: ignore
 63 | 
 64 |         # the body structure
 65 |         for _ in range(self.num_conv):
 66 |             self.body.append(nn.Conv2d(self.num_feat, self.num_feat, 3, 1, 1))
 67 |             # activation
 68 |             if act_type == "relu":
 69 |                 activation = nn.ReLU(inplace=True)
 70 |             elif act_type == "prelu":
 71 |                 activation = nn.PReLU(num_parameters=self.num_feat)
 72 |             elif act_type == "leakyrelu":
 73 |                 activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
 74 |             self.body.append(activation)  # type: ignore
 75 | 
 76 |         # the last conv
 77 |         self.body.append(nn.Conv2d(self.num_feat, self.pixelshuffle_shape, 3, 1, 1))  # type: ignore
 78 |         # upsample
 79 |         self.upsampler = nn.PixelShuffle(self.scale)
 80 | 
 81 |         self.load_state_dict(self.state, strict=False)
 82 | 
 83 |     def get_num_conv(self) -> int:
 84 |         return (int(self.key_arr[-1].split(".")[1]) - 2) // 2
 85 | 
 86 |     def get_num_feats(self) -> int:
 87 |         return self.state[self.key_arr[0]].shape[0]
 88 | 
 89 |     def get_in_nc(self) -> int:
 90 |         return self.state[self.key_arr[0]].shape[1]
 91 | 
 92 |     def get_scale(self) -> int:
 93 |         self.pixelshuffle_shape = self.state[self.key_arr[-1]].shape[0]
 94 |         # Assume out_nc is the same as in_nc
 95 |         # I cant think of a better way to do that
 96 |         self.out_nc = self.in_nc
 97 |         scale = math.sqrt(self.pixelshuffle_shape / self.out_nc)
 98 |         if scale - int(scale) > 0:
 99 |             print(
100 |                 "out_nc is probably different than in_nc, scale calculation might be wrong"
101 |             )
102 |         scale = int(scale)
103 |         return scale
104 | 
105 |     def forward(self, x):
106 |         out = x
107 |         for i in range(0, len(self.body)):
108 |             out = self.body[i](out)
109 | 
110 |         out = self.upsampler(out)
111 |         # add the nearest upsampled image, so that the network learns the residual
112 |         base = F.interpolate(x, scale_factor=self.scale, mode="nearest")
113 |         out += base
114 |         return out
115 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/ldm_patched/pfn/architecture/__init__.py


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/face/LICENSE-codeformer:
--------------------------------------------------------------------------------
 1 | S-Lab License 1.0
 2 | 
 3 | Copyright 2022 S-Lab
 4 | 
 5 | Redistribution and use for non-commercial purpose in source and
 6 | binary forms, with or without modification, are permitted provided
 7 | that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 |    notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 |    notice, this list of conditions and the following disclaimer in
14 |    the documentation and/or other materials provided with the
15 |    distribution.
16 | 
17 | 3. Neither the name of the copyright holder nor the names of its
18 |    contributors may be used to endorse or promote products derived
19 |    from this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 
33 | In the event that redistribution and/or use for commercial purpose in
34 | source or binary forms, with or without modification is required,
35 | please contact the contributor(s) of the work.
36 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/face/fused_act.py:
--------------------------------------------------------------------------------
 1 | # pylint: skip-file
 2 | # type: ignore
 3 | # modify from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501
 4 | 
 5 | import torch
 6 | from torch import nn
 7 | from torch.autograd import Function
 8 | 
 9 | fused_act_ext = None
10 | 
11 | 
12 | class FusedLeakyReLUFunctionBackward(Function):
13 |     @staticmethod
14 |     def forward(ctx, grad_output, out, negative_slope, scale):
15 |         ctx.save_for_backward(out)
16 |         ctx.negative_slope = negative_slope
17 |         ctx.scale = scale
18 | 
19 |         empty = grad_output.new_empty(0)
20 | 
21 |         grad_input = fused_act_ext.fused_bias_act(
22 |             grad_output, empty, out, 3, 1, negative_slope, scale
23 |         )
24 | 
25 |         dim = [0]
26 | 
27 |         if grad_input.ndim > 2:
28 |             dim += list(range(2, grad_input.ndim))
29 | 
30 |         grad_bias = grad_input.sum(dim).detach()
31 | 
32 |         return grad_input, grad_bias
33 | 
34 |     @staticmethod
35 |     def backward(ctx, gradgrad_input, gradgrad_bias):
36 |         (out,) = ctx.saved_tensors
37 |         gradgrad_out = fused_act_ext.fused_bias_act(
38 |             gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale
39 |         )
40 | 
41 |         return gradgrad_out, None, None, None
42 | 
43 | 
44 | class FusedLeakyReLUFunction(Function):
45 |     @staticmethod
46 |     def forward(ctx, input, bias, negative_slope, scale):
47 |         empty = input.new_empty(0)
48 |         out = fused_act_ext.fused_bias_act(
49 |             input, bias, empty, 3, 0, negative_slope, scale
50 |         )
51 |         ctx.save_for_backward(out)
52 |         ctx.negative_slope = negative_slope
53 |         ctx.scale = scale
54 | 
55 |         return out
56 | 
57 |     @staticmethod
58 |     def backward(ctx, grad_output):
59 |         (out,) = ctx.saved_tensors
60 | 
61 |         grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply(
62 |             grad_output, out, ctx.negative_slope, ctx.scale
63 |         )
64 | 
65 |         return grad_input, grad_bias, None, None
66 | 
67 | 
68 | class FusedLeakyReLU(nn.Module):
69 |     def __init__(self, channel, negative_slope=0.2, scale=2**0.5):
70 |         super().__init__()
71 | 
72 |         self.bias = nn.Parameter(torch.zeros(channel))
73 |         self.negative_slope = negative_slope
74 |         self.scale = scale
75 | 
76 |     def forward(self, input):
77 |         return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)
78 | 
79 | 
80 | def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2**0.5):
81 |     return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale)
82 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/timm/helpers.py:
--------------------------------------------------------------------------------
 1 | """ Layer/Module Helpers
 2 | Hacked together by / Copyright 2020 Ross Wightman
 3 | """
 4 | import collections.abc
 5 | from itertools import repeat
 6 | 
 7 | 
 8 | # From PyTorch internals
 9 | def _ntuple(n):
10 |     def parse(x):
11 |         if isinstance(x, collections.abc.Iterable) and not isinstance(x, str):
12 |             return x
13 |         return tuple(repeat(x, n))
14 | 
15 |     return parse
16 | 
17 | 
18 | to_1tuple = _ntuple(1)
19 | to_2tuple = _ntuple(2)
20 | to_3tuple = _ntuple(3)
21 | to_4tuple = _ntuple(4)
22 | to_ntuple = _ntuple
23 | 
24 | 
25 | def make_divisible(v, divisor=8, min_value=None, round_limit=0.9):
26 |     min_value = min_value or divisor
27 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
28 |     # Make sure that round down does not go down by more than 10%.
29 |     if new_v < round_limit * v:
30 |         new_v += divisor
31 |     return new_v
32 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/architecture/timm/weight_init.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import warnings
  3 | 
  4 | import torch
  5 | from torch.nn.init import _calculate_fan_in_and_fan_out
  6 | 
  7 | 
  8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b):
  9 |     # Cut & paste from PyTorch official master until it's in a few official releases - RW
 10 |     # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
 11 |     def norm_cdf(x):
 12 |         # Computes standard normal cumulative distribution function
 13 |         return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0
 14 | 
 15 |     if (mean < a - 2 * std) or (mean > b + 2 * std):
 16 |         warnings.warn(
 17 |             "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
 18 |             "The distribution of values may be incorrect.",
 19 |             stacklevel=2,
 20 |         )
 21 | 
 22 |     with torch.no_grad():
 23 |         # Values are generated by using a truncated uniform distribution and
 24 |         # then using the inverse CDF for the normal distribution.
 25 |         # Get upper and lower cdf values
 26 |         l = norm_cdf((a - mean) / std)
 27 |         u = norm_cdf((b - mean) / std)
 28 | 
 29 |         # Uniformly fill tensor with values from [l, u], then translate to
 30 |         # [2l-1, 2u-1].
 31 |         tensor.uniform_(2 * l - 1, 2 * u - 1)
 32 | 
 33 |         # Use inverse cdf transform for normal distribution to get truncated
 34 |         # standard normal
 35 |         tensor.erfinv_()
 36 | 
 37 |         # Transform to proper mean, std
 38 |         tensor.mul_(std * math.sqrt(2.0))
 39 |         tensor.add_(mean)
 40 | 
 41 |         # Clamp to ensure it's in the proper range
 42 |         tensor.clamp_(min=a, max=b)
 43 |         return tensor
 44 | 
 45 | 
 46 | def trunc_normal_(
 47 |     tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0
 48 | ) -> torch.Tensor:
 49 |     r"""Fills the input Tensor with values drawn from a truncated
 50 |     normal distribution. The values are effectively drawn from the
 51 |     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
 52 |     with values outside :math:`[a, b]` redrawn until they are within
 53 |     the bounds. The method used for generating the random values works
 54 |     best when :math:`a \leq \text{mean} \leq b`.
 55 | 
 56 |     NOTE: this impl is similar to the PyTorch trunc_normal_, the bounds [a, b] are
 57 |     applied while sampling the normal with mean/std applied, therefore a, b args
 58 |     should be adjusted to match the range of mean, std args.
 59 | 
 60 |     Args:
 61 |         tensor: an n-dimensional `torch.Tensor`
 62 |         mean: the mean of the normal distribution
 63 |         std: the standard deviation of the normal distribution
 64 |         a: the minimum cutoff value
 65 |         b: the maximum cutoff value
 66 |     Examples:
 67 |         >>> w = torch.empty(3, 5)
 68 |         >>> nn.init.trunc_normal_(w)
 69 |     """
 70 |     return _no_grad_trunc_normal_(tensor, mean, std, a, b)
 71 | 
 72 | 
 73 | def trunc_normal_tf_(
 74 |     tensor: torch.Tensor, mean=0.0, std=1.0, a=-2.0, b=2.0
 75 | ) -> torch.Tensor:
 76 |     r"""Fills the input Tensor with values drawn from a truncated
 77 |     normal distribution. The values are effectively drawn from the
 78 |     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
 79 |     with values outside :math:`[a, b]` redrawn until they are within
 80 |     the bounds. The method used for generating the random values works
 81 |     best when :math:`a \leq \text{mean} \leq b`.
 82 | 
 83 |     NOTE: this 'tf' variant behaves closer to Tensorflow / JAX impl where the
 84 |     bounds [a, b] are applied when sampling the normal distribution with mean=0, std=1.0
 85 |     and the result is subsquently scaled and shifted by the mean and std args.
 86 | 
 87 |     Args:
 88 |         tensor: an n-dimensional `torch.Tensor`
 89 |         mean: the mean of the normal distribution
 90 |         std: the standard deviation of the normal distribution
 91 |         a: the minimum cutoff value
 92 |         b: the maximum cutoff value
 93 |     Examples:
 94 |         >>> w = torch.empty(3, 5)
 95 |         >>> nn.init.trunc_normal_(w)
 96 |     """
 97 |     _no_grad_trunc_normal_(tensor, 0, 1.0, a, b)
 98 |     with torch.no_grad():
 99 |         tensor.mul_(std).add_(mean)
100 |     return tensor
101 | 
102 | 
103 | def variance_scaling_(tensor, scale=1.0, mode="fan_in", distribution="normal"):
104 |     fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
105 |     if mode == "fan_in":
106 |         denom = fan_in
107 |     elif mode == "fan_out":
108 |         denom = fan_out
109 |     elif mode == "fan_avg":
110 |         denom = (fan_in + fan_out) / 2
111 | 
112 |     variance = scale / denom  # type: ignore
113 | 
114 |     if distribution == "truncated_normal":
115 |         # constant is stddev of standard normal truncated to (-2, 2)
116 |         trunc_normal_tf_(tensor, std=math.sqrt(variance) / 0.87962566103423978)
117 |     elif distribution == "normal":
118 |         tensor.normal_(std=math.sqrt(variance))
119 |     elif distribution == "uniform":
120 |         bound = math.sqrt(3 * variance)
121 |         # pylint: disable=invalid-unary-operand-type
122 |         tensor.uniform_(-bound, bound)
123 |     else:
124 |         raise ValueError(f"invalid distribution {distribution}")
125 | 
126 | 
127 | def lecun_normal_(tensor):
128 |     variance_scaling_(tensor, mode="fan_in", distribution="truncated_normal")
129 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/model_loading.py:
--------------------------------------------------------------------------------
  1 | import logging as logger
  2 | 
  3 | from .architecture.DAT import DAT
  4 | from .architecture.face.codeformer import CodeFormer
  5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean
  6 | from .architecture.face.restoreformer_arch import RestoreFormer
  7 | from .architecture.HAT import HAT
  8 | from .architecture.LaMa import LaMa
  9 | from .architecture.OmniSR.OmniSR import OmniSR
 10 | from .architecture.RRDB import RRDBNet as ESRGAN
 11 | from .architecture.SCUNet import SCUNet
 12 | from .architecture.SPSR import SPSRNet as SPSR
 13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2
 14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN
 15 | from .architecture.Swin2SR import Swin2SR
 16 | from .architecture.SwinIR import SwinIR
 17 | from .types import PyTorchModel
 18 | 
 19 | 
 20 | class UnsupportedModel(Exception):
 21 |     pass
 22 | 
 23 | 
 24 | def load_state_dict(state_dict) -> PyTorchModel:
 25 |     logger.debug(f"Loading state dict into pytorch model arch")
 26 | 
 27 |     state_dict_keys = list(state_dict.keys())
 28 | 
 29 |     if "params_ema" in state_dict_keys:
 30 |         state_dict = state_dict["params_ema"]
 31 |     elif "params-ema" in state_dict_keys:
 32 |         state_dict = state_dict["params-ema"]
 33 |     elif "params" in state_dict_keys:
 34 |         state_dict = state_dict["params"]
 35 | 
 36 |     state_dict_keys = list(state_dict.keys())
 37 |     # SRVGGNet Real-ESRGAN (v2)
 38 |     if "body.0.weight" in state_dict_keys and "body.1.weight" in state_dict_keys:
 39 |         model = RealESRGANv2(state_dict)
 40 |     # SPSR (ESRGAN with lots of extra layers)
 41 |     elif "f_HR_conv1.0.weight" in state_dict:
 42 |         model = SPSR(state_dict)
 43 |     # Swift-SRGAN
 44 |     elif (
 45 |         "model" in state_dict_keys
 46 |         and "initial.cnn.depthwise.weight" in state_dict["model"].keys()
 47 |     ):
 48 |         model = SwiftSRGAN(state_dict)
 49 |     # SwinIR, Swin2SR, HAT
 50 |     elif "layers.0.residual_group.blocks.0.norm1.weight" in state_dict_keys:
 51 |         if (
 52 |             "layers.0.residual_group.blocks.0.conv_block.cab.0.weight"
 53 |             in state_dict_keys
 54 |         ):
 55 |             model = HAT(state_dict)
 56 |         elif "patch_embed.proj.weight" in state_dict_keys:
 57 |             model = Swin2SR(state_dict)
 58 |         else:
 59 |             model = SwinIR(state_dict)
 60 |     # GFPGAN
 61 |     elif (
 62 |         "toRGB.0.weight" in state_dict_keys
 63 |         and "stylegan_decoder.style_mlp.1.weight" in state_dict_keys
 64 |     ):
 65 |         model = GFPGANv1Clean(state_dict)
 66 |     # RestoreFormer
 67 |     elif (
 68 |         "encoder.conv_in.weight" in state_dict_keys
 69 |         and "encoder.down.0.block.0.norm1.weight" in state_dict_keys
 70 |     ):
 71 |         model = RestoreFormer(state_dict)
 72 |     elif (
 73 |         "encoder.blocks.0.weight" in state_dict_keys
 74 |         and "quantize.embedding.weight" in state_dict_keys
 75 |     ):
 76 |         model = CodeFormer(state_dict)
 77 |     # LaMa
 78 |     elif (
 79 |         "model.model.1.bn_l.running_mean" in state_dict_keys
 80 |         or "generator.model.1.bn_l.running_mean" in state_dict_keys
 81 |     ):
 82 |         model = LaMa(state_dict)
 83 |     # Omni-SR
 84 |     elif "residual_layer.0.residual_layer.0.layer.0.fn.0.weight" in state_dict_keys:
 85 |         model = OmniSR(state_dict)
 86 |     # SCUNet
 87 |     elif "m_head.0.weight" in state_dict_keys and "m_tail.0.weight" in state_dict_keys:
 88 |         model = SCUNet(state_dict)
 89 |     # DAT
 90 |     elif "layers.0.blocks.2.attn.attn_mask_0" in state_dict_keys:
 91 |         model = DAT(state_dict)
 92 |     # Regular ESRGAN, "new-arch" ESRGAN, Real-ESRGAN v1
 93 |     else:
 94 |         try:
 95 |             model = ESRGAN(state_dict)
 96 |         except:
 97 |             # pylint: disable=raise-missing-from
 98 |             raise UnsupportedModel
 99 |     return model
100 | 


--------------------------------------------------------------------------------
/py/ldm_patched/pfn/types.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from .architecture.DAT import DAT
 4 | from .architecture.face.codeformer import CodeFormer
 5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean
 6 | from .architecture.face.restoreformer_arch import RestoreFormer
 7 | from .architecture.HAT import HAT
 8 | from .architecture.LaMa import LaMa
 9 | from .architecture.OmniSR.OmniSR import OmniSR
10 | from .architecture.RRDB import RRDBNet as ESRGAN
11 | from .architecture.SCUNet import SCUNet
12 | from .architecture.SPSR import SPSRNet as SPSR
13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2
14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN
15 | from .architecture.Swin2SR import Swin2SR
16 | from .architecture.SwinIR import SwinIR
17 | 
18 | PyTorchSRModels = (
19 |     RealESRGANv2,
20 |     SPSR,
21 |     SwiftSRGAN,
22 |     ESRGAN,
23 |     SwinIR,
24 |     Swin2SR,
25 |     HAT,
26 |     OmniSR,
27 |     SCUNet,
28 |     DAT,
29 | )
30 | PyTorchSRModel = Union[
31 |     RealESRGANv2,
32 |     SPSR,
33 |     SwiftSRGAN,
34 |     ESRGAN,
35 |     SwinIR,
36 |     Swin2SR,
37 |     HAT,
38 |     OmniSR,
39 |     SCUNet,
40 |     DAT,
41 | ]
42 | 
43 | 
44 | def is_pytorch_sr_model(model: object):
45 |     return isinstance(model, PyTorchSRModels)
46 | 
47 | 
48 | PyTorchFaceModels = (GFPGANv1Clean, RestoreFormer, CodeFormer)
49 | PyTorchFaceModel = Union[GFPGANv1Clean, RestoreFormer, CodeFormer]
50 | 
51 | 
52 | def is_pytorch_face_model(model: object):
53 |     return isinstance(model, PyTorchFaceModels)
54 | 
55 | 
56 | PyTorchInpaintModels = (LaMa,)
57 | PyTorchInpaintModel = Union[LaMa]
58 | 
59 | 
60 | def is_pytorch_inpaint_model(model: object):
61 |     return isinstance(model, PyTorchInpaintModels)
62 | 
63 | 
64 | PyTorchModels = (*PyTorchSRModels, *PyTorchFaceModels, *PyTorchInpaintModels)
65 | PyTorchModel = Union[PyTorchSRModel, PyTorchFaceModel, PyTorchInpaintModel]
66 | 
67 | 
68 | def is_pytorch_model(model: object):
69 |     return isinstance(model, PyTorchModels)
70 | 


--------------------------------------------------------------------------------
/py/ldm_patched/taesd/taesd.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Tiny AutoEncoder for Stable Diffusion
 4 | (DNN for encoding / decoding SD's latent space)
 5 | """
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | import ldm_patched.modules.utils
10 | import ldm_patched.modules.ops
11 | 
12 | def conv(n_in, n_out, **kwargs):
13 |     return ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 3, padding=1, **kwargs)
14 | 
15 | class Clamp(nn.Module):
16 |     def forward(self, x):
17 |         return torch.tanh(x / 3) * 3
18 | 
19 | class Block(nn.Module):
20 |     def __init__(self, n_in, n_out):
21 |         super().__init__()
22 |         self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out))
23 |         self.skip = ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity()
24 |         self.fuse = nn.ReLU()
25 |     def forward(self, x):
26 |         return self.fuse(self.conv(x) + self.skip(x))
27 | 
28 | def Encoder():
29 |     return nn.Sequential(
30 |         conv(3, 64), Block(64, 64),
31 |         conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
32 |         conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
33 |         conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
34 |         conv(64, 4),
35 |     )
36 | 
37 | def Decoder():
38 |     return nn.Sequential(
39 |         Clamp(), conv(4, 64), nn.ReLU(),
40 |         Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
41 |         Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
42 |         Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
43 |         Block(64, 64), conv(64, 3),
44 |     )
45 | 
46 | class TAESD(nn.Module):
47 |     latent_magnitude = 3
48 |     latent_shift = 0.5
49 | 
50 |     def __init__(self, encoder_path=None, decoder_path=None):
51 |         """Initialize pretrained TAESD on the given device from the given checkpoints."""
52 |         super().__init__()
53 |         self.taesd_encoder = Encoder()
54 |         self.taesd_decoder = Decoder()
55 |         self.vae_scale = torch.nn.Parameter(torch.tensor(1.0))
56 |         if encoder_path is not None:
57 |             self.taesd_encoder.load_state_dict(ldm_patched.modules.utils.load_torch_file(encoder_path, safe_load=True))
58 |         if decoder_path is not None:
59 |             self.taesd_decoder.load_state_dict(ldm_patched.modules.utils.load_torch_file(decoder_path, safe_load=True))
60 | 
61 |     @staticmethod
62 |     def scale_latents(x):
63 |         """raw latents -> [0, 1]"""
64 |         return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1)
65 | 
66 |     @staticmethod
67 |     def unscale_latents(x):
68 |         """[0, 1] -> raw latents"""
69 |         return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude)
70 | 
71 |     def decode(self, x):
72 |         x_sample = self.taesd_decoder(x * self.vae_scale)
73 |         x_sample = x_sample.sub(0.5).mul(2)
74 |         return x_sample
75 | 
76 |     def encode(self, x):
77 |         return self.taesd_encoder(x * 0.5 + 0.5) / self.vae_scale
78 | 


--------------------------------------------------------------------------------
/py/ldm_patched/utils/latent_visualization.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from PIL import Image
 3 | import struct
 4 | import numpy as np
 5 | from ldm_patched.modules.args_parser import args, LatentPreviewMethod
 6 | from ldm_patched.taesd.taesd import TAESD
 7 | import ldm_patched.utils.path_utils
 8 | import ldm_patched.modules.utils
 9 | 
10 | MAX_PREVIEW_RESOLUTION = 512
11 | 
12 | class LatentPreviewer:
13 |     def decode_latent_to_preview(self, x0):
14 |         pass
15 | 
16 |     def decode_latent_to_preview_image(self, preview_format, x0):
17 |         preview_image = self.decode_latent_to_preview(x0)
18 |         return ("JPEG", preview_image, MAX_PREVIEW_RESOLUTION)
19 | 
20 | class TAESDPreviewerImpl(LatentPreviewer):
21 |     def __init__(self, taesd):
22 |         self.taesd = taesd
23 | 
24 |     def decode_latent_to_preview(self, x0):
25 |         x_sample = self.taesd.decode(x0[:1])[0].detach()
26 |         x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0)
27 |         x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
28 |         x_sample = x_sample.astype(np.uint8)
29 | 
30 |         preview_image = Image.fromarray(x_sample)
31 |         return preview_image
32 | 
33 | 
34 | class Latent2RGBPreviewer(LatentPreviewer):
35 |     def __init__(self, latent_rgb_factors):
36 |         self.latent_rgb_factors = torch.tensor(latent_rgb_factors, device="cpu")
37 | 
38 |     def decode_latent_to_preview(self, x0):
39 |         latent_image = x0[0].permute(1, 2, 0).cpu() @ self.latent_rgb_factors
40 | 
41 |         latents_ubyte = (((latent_image + 1) / 2)
42 |                             .clamp(0, 1)  # change scale from -1..1 to 0..1
43 |                             .mul(0xFF)  # to 0..255
44 |                             .byte()).cpu()
45 | 
46 |         return Image.fromarray(latents_ubyte.numpy())
47 | 
48 | 
49 | def get_previewer(device, latent_format):
50 |     previewer = None
51 |     method = args.preview_option
52 |     if method != LatentPreviewMethod.NoPreviews:
53 |         # TODO previewer methods
54 |         taesd_decoder_path = None
55 |         if latent_format.taesd_decoder_name is not None:
56 |             taesd_decoder_path = next(
57 |                 (fn for fn in ldm_patched.utils.path_utils.get_filename_list("vae_approx")
58 |                     if fn.startswith(latent_format.taesd_decoder_name)),
59 |                 ""
60 |             )
61 |             taesd_decoder_path = ldm_patched.utils.path_utils.get_full_path("vae_approx", taesd_decoder_path)
62 | 
63 |         if method == LatentPreviewMethod.Auto:
64 |             method = LatentPreviewMethod.Latent2RGB
65 |             if taesd_decoder_path:
66 |                 method = LatentPreviewMethod.TAESD
67 | 
68 |         if method == LatentPreviewMethod.TAESD:
69 |             if taesd_decoder_path:
70 |                 taesd = TAESD(None, taesd_decoder_path).to(device)
71 |                 previewer = TAESDPreviewerImpl(taesd)
72 |             else:
73 |                 print("Warning: TAESD previews enabled, but could not find models/vae_approx/{}".format(latent_format.taesd_decoder_name))
74 | 
75 |         if previewer is None:
76 |             if latent_format.latent_rgb_factors is not None:
77 |                 previewer = Latent2RGBPreviewer(latent_format.latent_rgb_factors)
78 |     return previewer
79 | 
80 | def prepare_callback(model, steps, x0_output_dict=None):
81 |     preview_format = "JPEG"
82 |     if preview_format not in ["JPEG", "PNG"]:
83 |         preview_format = "JPEG"
84 | 
85 |     previewer = get_previewer(model.load_device, model.model.latent_format)
86 | 
87 |     pbar = ldm_patched.modules.utils.ProgressBar(steps)
88 |     def callback(step, x0, x, total_steps):
89 |         if x0_output_dict is not None:
90 |             x0_output_dict["x0"] = x0
91 | 
92 |         preview_bytes = None
93 |         if previewer:
94 |             preview_bytes = previewer.decode_latent_to_preview_image(preview_format, x0)
95 |         pbar.update_absolute(step + 1, total_steps, preview_bytes)
96 |     return callback
97 | 
98 | 


--------------------------------------------------------------------------------
/py/libs/utils.py:
--------------------------------------------------------------------------------
 1 | def easySave(images, filename_prefix, output_type, prompt=None, extra_pnginfo=None):
 2 |     """Save or Preview Image"""
 3 |     from nodes import PreviewImage, SaveImage
 4 |     if output_type == "Hide":
 5 |         return list()
 6 |     if output_type == "Preview":
 7 |         filename_prefix = 'easyPreview'
 8 |         results = PreviewImage().save_images(images, filename_prefix, prompt, extra_pnginfo)
 9 |         return results['ui']['images']
10 |     else:
11 |         results = SaveImage().save_images(images, filename_prefix, prompt, extra_pnginfo)
12 |         return results['ui']['images']
13 | 


--------------------------------------------------------------------------------
/py/log.py:
--------------------------------------------------------------------------------
 1 | COLORS_FG = {
 2 |     'BLACK': '\33[30m',
 3 |     'RED': '\33[31m',
 4 |     'GREEN': '\33[32m',
 5 |     'YELLOW': '\33[33m',
 6 |     'BLUE': '\33[34m',
 7 |     'MAGENTA': '\33[35m',
 8 |     'CYAN': '\33[36m',
 9 |     'WHITE': '\33[37m',
10 |     'GREY': '\33[90m',
11 |     'BRIGHT_RED': '\33[91m',
12 |     'BRIGHT_GREEN': '\33[92m',
13 |     'BRIGHT_YELLOW': '\33[93m',
14 |     'BRIGHT_BLUE': '\33[94m',
15 |     'BRIGHT_MAGENTA': '\33[95m',
16 |     'BRIGHT_CYAN': '\33[96m',
17 |     'BRIGHT_WHITE': '\33[97m',
18 | }
19 | COLORS_STYLE = {
20 |     'RESET': '\33[0m',
21 |     'BOLD': '\33[1m',
22 |     'NORMAL': '\33[22m',
23 |     'ITALIC': '\33[3m',
24 |     'UNDERLINE': '\33[4m',
25 |     'BLINK': '\33[5m',
26 |     'BLINK2': '\33[6m',
27 |     'SELECTED': '\33[7m',
28 | }
29 | COLORS_BG = {
30 |     'BLACK': '\33[40m',
31 |     'RED': '\33[41m',
32 |     'GREEN': '\33[42m',
33 |     'YELLOW': '\33[43m',
34 |     'BLUE': '\33[44m',
35 |     'MAGENTA': '\33[45m',
36 |     'CYAN': '\33[46m',
37 |     'WHITE': '\33[47m',
38 |     'GREY': '\33[100m',
39 |     'BRIGHT_RED': '\33[101m',
40 |     'BRIGHT_GREEN': '\33[102m',
41 |     'BRIGHT_YELLOW': '\33[103m',
42 |     'BRIGHT_BLUE': '\33[104m',
43 |     'BRIGHT_MAGENTA': '\33[105m',
44 |     'BRIGHT_CYAN': '\33[106m',
45 |     'BRIGHT_WHITE': '\33[107m',
46 | }
47 | 
48 | 
49 | def log_node_success(node_name, message=None):
50 |     """Logs a success message."""
51 |     _log_node(COLORS_FG["GREEN"], node_name, message)
52 | 
53 | 
54 | def log_node_info(node_name, message=None):
55 |     """Logs an info message."""
56 |     _log_node(COLORS_FG["CYAN"], node_name, message)
57 | 
58 | 
59 | def log_node_warn(node_name, message=None):
60 |     """Logs an warn message."""
61 |     _log_node(COLORS_FG["YELLOW"], node_name, message)
62 | 
63 | 
64 | def log_node_error(node_name, message=None):
65 |     """Logs an warn message."""
66 |     _log_node(COLORS_FG["RED"], node_name, message)
67 | 
68 | 
69 | def log_node(node_name, message=None):
70 |     """Logs a message."""
71 |     _log_node(COLORS_FG["CYAN"], node_name, message)
72 | 
73 | 
74 | def _log_node(color, node_name, message=None, prefix=''):
75 |     print(_get_log_msg(color, node_name, message, prefix=prefix))
76 | 
77 | 
78 | def _get_log_msg(color, node_name, message=None, prefix=''):
79 |     msg = f'{COLORS_STYLE["BOLD"]}{color}{prefix}[Fooocus] {node_name.replace(" (Fooocus)", "")}'
80 |     msg += f':{COLORS_STYLE["RESET"]} {message}' if message is not None else f'{COLORS_STYLE["RESET"]}'
81 |     return msg
82 | 


--------------------------------------------------------------------------------
/py/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/py/modules/__init__.py


--------------------------------------------------------------------------------
/py/modules/advanced_parameters.py:
--------------------------------------------------------------------------------
 1 | disable_preview, adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name,  \
 2 |     scheduler_name, generate_image_grid, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, \
 3 |     overwrite_vary_strength, overwrite_upscale_strength, \
 4 |     mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \
 5 |     debugging_cn_preprocessor, skipping_cn_preprocessor, \
 6 |     refiner_swap_method, \
 7 |     freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2, \
 8 |     debugging_inpaint_preprocessor, inpaint_disable_initial_latent, inpaint_engine, inpaint_strength, inpaint_respective_field, \
 9 |     inpaint_mask_upload_checkbox, invert_mask_checkbox, inpaint_erode_or_dilate = [None] * 32
10 | 
11 | 
12 | controlnet_softness = 0.25
13 | canny_low_threshold = 64
14 | canny_high_threshold = 128
15 | 


--------------------------------------------------------------------------------
/py/modules/constants.py:
--------------------------------------------------------------------------------
1 | # as in k-diffusion (sampling.py)
2 | MIN_SEED = 0
3 | MAX_SEED = 2**63 - 1
4 | 
5 | AUTH_FILENAME = 'auth.json'
6 | 


--------------------------------------------------------------------------------
/py/modules/flags.py:
--------------------------------------------------------------------------------
 1 | disabled = 'Disabled'
 2 | enabled = 'Enabled'
 3 | subtle_variation = 'Vary (Subtle)'
 4 | strong_variation = 'Vary (Strong)'
 5 | upscale_15 = 'Upscale (1.5x)'
 6 | upscale_2 = 'Upscale (2x)'
 7 | upscale_fast = 'Upscale (Fast 2x)'
 8 | 
 9 | uov_list = [
10 |     disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast
11 | ]
12 | 
13 | KSAMPLER_NAMES = ["euler", "euler_ancestral", "heun", "heunpp2", "dpm_2", "dpm_2_ancestral",
14 |                   "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu",
15 |                   "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm"]
16 | 
17 | SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform", "lcm", "turbo"]
18 | SAMPLER_NAMES = KSAMPLER_NAMES + ["ddim", "uni_pc", "uni_pc_bh2"]
19 | 
20 | sampler_list = SAMPLER_NAMES
21 | scheduler_list = SCHEDULER_NAMES
22 | 
23 | cn_ip = "ImagePrompt"
24 | cn_ip_face = "FaceSwap"
25 | cn_canny = "PyraCanny"
26 | cn_cpds = "CPDS"
27 | 
28 | ip_list = [cn_ip, cn_ip_face]
29 | cn_list = [cn_canny, cn_cpds]
30 | default_ip = cn_ip
31 | default_cn = cn_canny
32 | 
33 | 
34 | default_parameters = {
35 |     cn_ip: (0.5, 0.6), cn_ip_face: (0.9, 0.75), cn_canny: (0.5, 1.0), cn_cpds: (0.5, 1.0)
36 | }  # stop, weight
37 | 
38 | inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6']
39 | performance_selections = ['Speed', 'Quality', 'Extreme Speed']
40 | 
41 | inpaint_option_default = 'Inpaint or Outpaint (default)'
42 | inpaint_option_detail = 'Improve Detail (face, hand, eyes, etc.)'
43 | inpaint_option_modify = 'Modify Content (add objects, change background, etc.)'
44 | inpaint_options = [inpaint_option_default, inpaint_option_detail, inpaint_option_modify]
45 | 
46 | desc_type_photo = 'Photograph'
47 | desc_type_anime = 'Art/Anime'
48 | 


--------------------------------------------------------------------------------
/py/modules/model_loader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from urllib.parse import urlparse
 3 | from typing import Optional
 4 | import folder_paths
 5 | 
 6 | def load_file_from_url(
 7 |         url: str,
 8 |         *,
 9 |         model_dir: str,
10 |         progress: bool = True,
11 |         file_name: Optional[str] = None,
12 | ) -> str:
13 |     """Download a file from `url` into `model_dir`, using the file present if possible.
14 | 
15 |     Returns the path to the downloaded file.
16 |     """
17 |     if not file_name:
18 |         parts = urlparse(url)
19 |         file_name = os.path.basename(parts.path)
20 |     # 从所有文件夹中寻找
21 |     cached_file = folder_paths.get_full_path(model_dir, file_name)
22 |     if cached_file is None:
23 |         os.makedirs(folder_paths.get_folder_paths(model_dir)[0], exist_ok=True)
24 |         cached_file = os.path.join(folder_paths.get_folder_paths(model_dir)[0],file_name)
25 | 
26 |     if not os.path.exists(cached_file):
27 |         print(f'Downloading: "{url}" to {cached_file}\n')
28 |         from torch.hub import download_url_to_file
29 |         download_url_to_file(url, cached_file, progress=progress)
30 |     return cached_file
31 | 


--------------------------------------------------------------------------------
/py/modules/ops.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import contextlib
 3 | 
 4 | 
 5 | @contextlib.contextmanager
 6 | def use_patched_ops(operations):
 7 |     op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm']
 8 |     backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names}
 9 | 
10 |     try:
11 |         for op_name in op_names:
12 |             setattr(torch.nn, op_name, getattr(operations, op_name))
13 | 
14 |         yield
15 | 
16 |     finally:
17 |         for op_name in op_names:
18 |             setattr(torch.nn, op_name, backups[op_name])
19 |     return
20 | 


--------------------------------------------------------------------------------
/py/modules/patch_precision.py:
--------------------------------------------------------------------------------
 1 | # Consistent with Kohya to reduce differences between model training and inference.
 2 | 
 3 | import torch
 4 | import math
 5 | import einops
 6 | import numpy as np
 7 | 
 8 | import ldm_patched.ldm.modules.diffusionmodules.openaimodel
 9 | import ldm_patched.modules.model_sampling
10 | import ldm_patched.modules.sd1_clip
11 | 
12 | from ldm_patched.ldm.modules.diffusionmodules.util import make_beta_schedule
13 | 
14 | 
15 | def patched_timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False):
16 |     # Consistent with Kohya to reduce differences between model training and inference.
17 | 
18 |     if not repeat_only:
19 |         half = dim // 2
20 |         freqs = torch.exp(
21 |             -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half
22 |         ).to(device=timesteps.device)
23 |         args = timesteps[:, None].float() * freqs[None]
24 |         embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
25 |         if dim % 2:
26 |             embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
27 |     else:
28 |         embedding = einops.repeat(timesteps, 'b -> b d', d=dim)
29 |     return embedding
30 | 
31 | 
32 | def patched_register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,
33 |                           linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
34 |     # Consistent with Kohya to reduce differences between model training and inference.
35 | 
36 |     if given_betas is not None:
37 |         betas = given_betas
38 |     else:
39 |         betas = make_beta_schedule(
40 |             beta_schedule,
41 |             timesteps,
42 |             linear_start=linear_start,
43 |             linear_end=linear_end,
44 |             cosine_s=cosine_s)
45 | 
46 |     alphas = 1. - betas
47 |     alphas_cumprod = np.cumprod(alphas, axis=0)
48 |     timesteps, = betas.shape
49 |     self.num_timesteps = int(timesteps)
50 |     self.linear_start = linear_start
51 |     self.linear_end = linear_end
52 |     sigmas = torch.tensor(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5, dtype=torch.float32)
53 |     self.set_sigmas(sigmas)
54 |     return
55 | 
56 | 
57 | def patch_all_precision():
58 |     ldm_patched.ldm.modules.diffusionmodules.openaimodel.timestep_embedding = patched_timestep_embedding
59 |     ldm_patched.modules.model_sampling.ModelSamplingDiscrete._register_schedule = patched_register_schedule
60 |     return
61 | 


--------------------------------------------------------------------------------
/py/modules/sdxl_styles.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import json
 4 | import math
 5 | 
 6 | from modules.util import get_files_from_folder
 7 | from random import Random
 8 | 
 9 | # cannot use modules.config - validators causing circular imports
10 | styles_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../sdxl_styles/'))
11 | 
12 | 
13 | def normalize_key(k):
14 |     k = k.replace('-', ' ')
15 |     words = k.split(' ')
16 |     words = [w[:1].upper() + w[1:].lower() for w in words]
17 |     k = ' '.join(words)
18 |     k = k.replace('3d', '3D')
19 |     k = k.replace('Sai', 'SAI')
20 |     k = k.replace('Mre', 'MRE')
21 |     k = k.replace('(s', '(S')
22 |     return k
23 | 
24 | 
25 | styles = {}
26 | styles_files = get_files_from_folder(styles_path, ['.json'])
27 | 
28 | for x in ['sdxl_styles_fooocus.json',
29 |           'sdxl_styles_sai.json',
30 |           'sdxl_styles_mre.json',
31 |           'sdxl_styles_twri.json',
32 |           'sdxl_styles_diva.json',
33 |           'sdxl_styles_marc_k3nt3l.json']:
34 |     if x in styles_files:
35 |         styles_files.remove(x)
36 |         styles_files.append(x)
37 | 
38 | for styles_file in styles_files:
39 |     try:
40 |         with open(os.path.join(styles_path, styles_file), encoding='utf-8') as f:
41 |             for entry in json.load(f):
42 |                 name = normalize_key(entry['name'])
43 |                 prompt = entry['prompt'] if 'prompt' in entry else ''
44 |                 negative_prompt = entry['negative_prompt'] if 'negative_prompt' in entry else ''
45 |                 styles[name] = (prompt, negative_prompt)
46 |     except Exception as e:
47 |         print(str(e))
48 |         print(f'Failed to load style file {styles_file}')
49 | 
50 | style_keys = list(styles.keys())
51 | fooocus_expansion = 'Fooocus V2'
52 | random_style_name = 'Random Style'
53 | legal_style_names = [fooocus_expansion, random_style_name] + style_keys
54 | 
55 | 
56 | def get_random_style(rng: Random) -> str:
57 |     return rng.choice(list(styles.items()))[0]
58 | 
59 | 
60 | def apply_style(style, positive):
61 |     p, n = styles[style]
62 |     return p.replace('{prompt}', positive).splitlines(), n.splitlines(), '{prompt}' in p
63 | 
64 | 
65 | def get_words(arrays, total_mult, index):
66 |     if len(arrays) == 1:
67 |         return [arrays[0].split(',')[index]]
68 |     else:
69 |         words = arrays[0].split(',')
70 |         word = words[index % len(words)]
71 |         index -= index % len(words)
72 |         index /= len(words)
73 |         index = math.floor(index)
74 |         return [word] + get_words(arrays[1:], math.floor(total_mult / len(words)), index)
75 | 
76 | 
77 | def apply_arrays(text, index):
78 |     arrays = re.findall(r'\[\[(.*?)\]\]', text)
79 |     if len(arrays) == 0:
80 |         return text
81 | 
82 |     print(f'[Arrays] processing: {text}')
83 |     mult = 1
84 |     for arr in arrays:
85 |         words = arr.split(',')
86 |         mult *= len(words)
87 | 
88 |     index %= mult
89 |     chosen_words = get_words(arrays, mult, index)
90 | 
91 |     i = 0
92 |     for arr in arrays:
93 |         text = text.replace(f'[[{arr}]]', chosen_words[i], 1)
94 |         i = i+1
95 | 
96 |     return text
97 | 
98 | 


--------------------------------------------------------------------------------
/py/modules/upscaler.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import modules.core as core
 4 | 
 5 | from ldm_patched.pfn.architecture.RRDB import RRDBNet as ESRGAN
 6 | from ldm_patched.contrib.external_upscale_model import ImageUpscaleWithModel
 7 | from collections import OrderedDict
 8 | import folder_paths
 9 | 
10 | model_filename = folder_paths.get_full_path('upscale_models','fooocus_upscaler_s409985e5.bin')
11 | opImageUpscaleWithModel = ImageUpscaleWithModel()
12 | model = None
13 | 
14 | 
15 | def perform_upscale(img):
16 |     global model
17 | 
18 |     print(f'Upscaling image with shape {str(img.shape)} ...')
19 | 
20 |     if model is None:
21 |         sd = torch.load(model_filename)
22 |         sdo = OrderedDict()
23 |         for k, v in sd.items():
24 |             sdo[k.replace('residual_block_', 'RDB')] = v
25 |         del sd
26 |         model = ESRGAN(sdo)
27 |         model.cpu()
28 |         model.eval()
29 | 
30 |     img = core.numpy_to_pytorch(img)
31 |     img = opImageUpscaleWithModel.upscale(model, img)[0]
32 |     img = core.pytorch_to_numpy(img)[0]
33 | 
34 |     return img
35 | 


--------------------------------------------------------------------------------
/py/prompt.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | path_styles = os.path.abspath(os.path.join(os.path.dirname(__file__), '../sdxl_styles/'))
  4 | 
  5 | # 风格提示词选择器
  6 | class FooocusStyles:
  7 |     @classmethod
  8 |     def INPUT_TYPES(s):
  9 |         styles = ["fooocus_styles"]
 10 |         styles_dir = path_styles
 11 |         for file_name in os.listdir(styles_dir):
 12 |             file = os.path.join(styles_dir, file_name)
 13 |             if (
 14 |                 os.path.isfile(file)
 15 |                 and file_name.endswith(".json")
 16 |                 and "styles" in file_name.split(".")[0]
 17 |             ):
 18 |                 styles.append(file_name.split(".")[0])
 19 |         return {
 20 |             "required": {
 21 |                 "styles": (styles, {"default": "fooocus_styles"}),
 22 |             },
 23 |             "hidden": {
 24 |                 "prompt": "PROMPT",
 25 |                 "extra_pnginfo": "EXTRA_PNGINFO",
 26 |                 "my_unique_id": "UNIQUE_ID",
 27 |             },
 28 |         }
 29 | 
 30 |     #
 31 |     RETURN_TYPES = (
 32 |         "FOOOCUS_STYLES",
 33 |     )
 34 |     RETURN_NAMES = (
 35 |         "fooocus_styles",
 36 |     )
 37 | 
 38 |     CATEGORY = "Fooocus/Prompt"
 39 |     FUNCTION = "run"
 40 |     OUTPUT_MODE = True
 41 | 
 42 |     def run(
 43 |         self,
 44 |         styles,
 45 |         prompt=None,
 46 |         extra_pnginfo=None,
 47 |         my_unique_id=None,
 48 |     ):
 49 |         values = []
 50 |         if my_unique_id in prompt:
 51 |             if prompt[my_unique_id]["inputs"]["select_styles"]:
 52 |                 values = prompt[my_unique_id]["inputs"]["select_styles"].split(
 53 |                     ",")
 54 | 
 55 |         return (values,)
 56 | 
 57 | 
 58 | # 正面提示词
 59 | class positivePrompt:
 60 |     def __init__(self):
 61 |         pass
 62 | 
 63 |     @classmethod
 64 |     def INPUT_TYPES(s):
 65 |         return {
 66 |             "required": {"positive": ("STRING", {"default": "", "multiline": True, "placeholder": "Positive"},),
 67 |                          }
 68 |         }
 69 | 
 70 |     RETURN_TYPES = ("STRING",)
 71 |     RETURN_NAMES = ("positive",)
 72 |     FUNCTION = "main"
 73 | 
 74 |     CATEGORY = "Fooocus/Prompt"
 75 | 
 76 |     @staticmethod
 77 |     def main(positive):
 78 |         return (positive,)
 79 | 
 80 | 
 81 | # 负面提示词
 82 | class negativePrompt:
 83 |     def __init__(self):
 84 |         pass
 85 | 
 86 |     @classmethod
 87 |     def INPUT_TYPES(s):
 88 |         return {
 89 |             "required": {
 90 |                 "negative": (
 91 |                     "STRING",
 92 |                     {"default": "", "multiline": True, "placeholder": "Negative"},
 93 |                 ),
 94 |             }
 95 |         }
 96 | 
 97 |     RETURN_TYPES = ("STRING",)
 98 |     RETURN_NAMES = ("negative",)
 99 |     FUNCTION = "main"
100 | 
101 |     CATEGORY = "Fooocus/Prompt"
102 | 
103 |     @staticmethod
104 |     def main(negative):
105 |         return (negative,)
106 | 
107 | 
108 | NODE_CLASS_MAPPINGS = {
109 |     "Fooocus positive": positivePrompt,
110 |     "Fooocus negative": negativePrompt,
111 |     "Fooocus Styles": FooocusStyles,
112 | }
113 | 
114 | NODE_DISPLAY_NAME_MAPPINGS = {
115 |     "Fooocus positive": "Positive",
116 |     "Fooocus negative": "Negative",
117 |     "Fooocus stylesSelector": "stylesPromptSelector",
118 |     "Fooocus Styles": "Fooocus Styles"
119 | }
120 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "fooocus_nodes"
 3 | description = "This extension provides image generation features based on Fooocus."
 4 | version = "1.0.2"
 5 | license = "LICENSE"
 6 | dependencies = ["accelerate==0.32.1", "pytorch_lightning==2.3.3", "pygit2==1.15.1", "opencv-contrib-python-headless==4.10.0.84", "httpx==0.27.0", "onnxruntime", "timm==1.0.7"]
 7 | 
 8 | [project.urls]
 9 | Repository = "https://github.com/Seedsa/Fooocus_Nodes"
10 | #  Used by Comfy Registry https://comfyregistry.org
11 | 
12 | [tool.comfy]
13 | PublisherId = "seed"
14 | DisplayName = "Fooocus_Nodes"
15 | Icon = ""
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate==0.32.1
2 | pytorch_lightning==2.3.3
3 | pygit2==1.15.1
4 | opencv-contrib-python-headless==4.10.0.84
5 | httpx==0.27.0
6 | timm==1.0.7
7 | onnxruntime
8 | 


--------------------------------------------------------------------------------
/screnshot/Fooocus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/screnshot/Fooocus.png


--------------------------------------------------------------------------------
/screnshot/FooocusNodes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/screnshot/FooocusNodes.png


--------------------------------------------------------------------------------
/sdxl_styles/sdxl_styles_fooocus.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "name": "Fooocus Enhance",
 4 |         "negative_prompt": "(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, grayscale, bw, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (airbrushed, cartoon, anime, semi-realistic, cgi, render, blender, digital art, manga, amateur:1.3), (3D ,3D Game, 3D Game Scene, 3D Character:1.1), (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)"
 5 |     },
 6 |     {
 7 |         "name": "Fooocus Semi Realistic",
 8 |         "negative_prompt": "(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)"
 9 |     },
10 |     {
11 |         "name": "Fooocus Sharp",
12 |         "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, 4k epic detailed, shot on kodak, 35mm photo, sharp focus, high budget, cinemascope, moody, epic, gorgeous, film grain, grainy",
13 |         "negative_prompt": "anime, cartoon, graphic, (blur, blurry, bokeh), text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
14 |     },
15 |     {
16 |         "name": "Fooocus Masterpiece",
17 |         "prompt": "(masterpiece), (best quality), (ultra-detailed), {prompt}, illustration, disheveled hair, detailed eyes, perfect composition, moist skin, intricate details, earrings",
18 |         "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, pubic hair,extra digit, fewer digits, cropped, worst quality, low quality"
19 |     },
20 |     {
21 |         "name": "Fooocus Photograph",
22 |         "prompt": "photograph {prompt}, 50mm . cinematic 4k epic detailed 4k epic detailed photograph shot on kodak detailed cinematic hbo dark moody, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage",
23 |         "negative_prompt": "Brad Pitt, bokeh, depth of field, blurry, cropped, regular face, saturated, contrast, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, text, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
24 |     },
25 |     {
26 |         "name": "Fooocus Negative",
27 |         "negative_prompt": "deformed, bad anatomy, disfigured, poorly drawn face, mutated, extra limb, ugly, poorly drawn hands, missing limb, floating limbs, disconnected limbs, disconnected head, malformed hands, long neck, mutated hands and fingers, bad hands, missing fingers, cropped, worst quality, low quality, mutation, poorly drawn, huge calf, bad hands, fused hand, missing hand, disappearing arms, disappearing thigh, disappearing calf, disappearing legs, missing fingers, fused fingers, abnormal eye proportion, Abnormal hands, abnormal legs, abnormal feet, abnormal fingers, drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly, anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch"
28 |     },
29 |     {
30 |         "name": "Fooocus Cinematic",
31 |         "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
32 |         "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
33 |     },
34 |     {
35 |         "name": "Fooocus Pony",
36 |         "prompt": "score_9, score_8_up, score_7_up, {prompt}",
37 |         "negative_prompt": "score_6, score_5, score_4"
38 |     }
39 | ]
40 | 


--------------------------------------------------------------------------------
/sdxl_styles/sdxl_styles_sai.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "name": "sai-3d-model",
 4 |         "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
 5 |         "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting"
 6 |     },
 7 |     {
 8 |         "name": "sai-analog film",
 9 |         "prompt": "analog film photo {prompt} . faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage",
10 |         "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured"
11 |     },
12 |     {
13 |         "name": "sai-anime",
14 |         "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed",
15 |         "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast"
16 |     },
17 |     {
18 |         "name": "sai-cinematic",
19 |         "prompt": "cinematic film still {prompt} . shallow depth of field, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
20 |         "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
21 |     },
22 |     {
23 |         "name": "sai-comic book",
24 |         "prompt": "comic {prompt} . graphic illustration, comic art, graphic novel art, vibrant, highly detailed",
25 |         "negative_prompt": "photograph, deformed, glitch, noisy, realistic, stock photo"
26 |     },
27 |     {
28 |         "name": "sai-craft clay",
29 |         "prompt": "play-doh style {prompt} . sculpture, clay art, centered composition, Claymation",
30 |         "negative_prompt": "sloppy, messy, grainy, highly detailed, ultra textured, photo"
31 |     },
32 |     {
33 |         "name": "sai-digital art",
34 |         "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
35 |         "negative_prompt": "photo, photorealistic, realism, ugly"
36 |     },
37 |     {
38 |         "name": "sai-enhance",
39 |         "prompt": "breathtaking {prompt} . award-winning, professional, highly detailed",
40 |         "negative_prompt": "ugly, deformed, noisy, blurry, distorted, grainy"
41 |     },
42 |     {
43 |         "name": "sai-fantasy art",
44 |         "prompt": "ethereal fantasy concept art of  {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
45 |         "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white"
46 |     },
47 |     {
48 |         "name": "sai-isometric",
49 |         "prompt": "isometric style {prompt} . vibrant, beautiful, crisp, detailed, ultra detailed, intricate",
50 |         "negative_prompt": "deformed, mutated, ugly, disfigured, blur, blurry, noise, noisy, realistic, photographic"
51 |     },
52 |     {
53 |         "name": "sai-line art",
54 |         "prompt": "line art drawing {prompt} . professional, sleek, modern, minimalist, graphic, line art, vector graphics",
55 |         "negative_prompt": "anime, photorealistic, 35mm film, deformed, glitch, blurry, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, mutated, realism, realistic, impressionism, expressionism, oil, acrylic"
56 |     },
57 |     {
58 |         "name": "sai-lowpoly",
59 |         "prompt": "low-poly style {prompt} . low-poly game art, polygon mesh, jagged, blocky, wireframe edges, centered composition",
60 |         "negative_prompt": "noisy, sloppy, messy, grainy, highly detailed, ultra textured, photo"
61 |     },
62 |     {
63 |         "name": "sai-neonpunk",
64 |         "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
65 |         "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured"
66 |     },
67 |     {
68 |         "name": "sai-origami",
69 |         "prompt": "origami style {prompt} . paper art, pleated paper, folded, origami art, pleats, cut and fold, centered composition",
70 |         "negative_prompt": "noisy, sloppy, messy, grainy, highly detailed, ultra textured, photo"
71 |     },
72 |     {
73 |         "name": "sai-photographic",
74 |         "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
75 |         "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly"
76 |     },
77 |     {
78 |         "name": "sai-pixel art",
79 |         "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
80 |         "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic"
81 |     },
82 |     {
83 |         "name": "sai-texture",
84 |         "prompt": "texture {prompt} top down close-up",
85 |         "negative_prompt": "ugly, deformed, noisy, blurry"
86 |     }
87 | ]


--------------------------------------------------------------------------------
/web/js/interface.js:
--------------------------------------------------------------------------------
 1 | import { app } from "/scripts/app.js";
 2 | 
 3 | // 增加Slot颜色
 4 | const customPipeLineLink = "#7737AA";
 5 | const customPipeLineSDXLLink = "#7737AA";
 6 | const customIntLink = "#29699C";
 7 | const customXYPlotLink = "#74DA5D";
 8 | const customXYLink = "#38291f";
 9 | const STRINGLink = "#00aa8c";
10 | 
11 | var customLinkColors =
12 |   JSON.parse(localStorage.getItem("Comfy.Settings.ttN.customLinkColors")) || {};
13 | if (
14 |   !customLinkColors["PIPE_LINE"] ||
15 |   !LGraphCanvas.link_type_colors["PIPE_LINE"]
16 | ) {
17 |   customLinkColors["PIPE_LINE"] = customPipeLineLink;
18 | }
19 | if (
20 |   !customLinkColors["PIPE_LINE_SDXL"] ||
21 |   !LGraphCanvas.link_type_colors["PIPE_LINE_SDXL"]
22 | ) {
23 |   customLinkColors["PIPE_LINE_SDXL"] = customPipeLineSDXLLink;
24 | }
25 | if (!customLinkColors["INT"] || !LGraphCanvas.link_type_colors["INT"]) {
26 |   customLinkColors["INT"] = customIntLink;
27 | }
28 | if (!customLinkColors["XYPLOT"] || !LGraphCanvas.link_type_colors["XYPLOT"]) {
29 |   customLinkColors["XYPLOT"] = customXYPlotLink;
30 | }
31 | if (!customLinkColors["X_Y"] || !LGraphCanvas.link_type_colors["X_Y"]) {
32 |   customLinkColors["X_Y"] = customXYLink;
33 | }
34 | if (!customLinkColors["STRING"] || !LGraphCanvas.link_type_colors["STRING"]) {
35 |   customLinkColors["STRING"] = STRINGLink;
36 | }
37 | 
38 | localStorage.setItem(
39 |   "Comfy.Settings.fooocus.customLinkColors",
40 |   JSON.stringify(customLinkColors)
41 | );
42 | 
43 | // 节点颜色
44 | const COLOR_THEMES = LGraphCanvas.node_colors;
45 | const NODE_COLORS = {
46 |   "Fooocus positive": "green",
47 |   "Fooocus negative": "red",
48 | };
49 | 
50 | function setNodeColors(node, theme) {
51 |   if (!theme) {
52 |     return;
53 |   }
54 |   if (theme.color) node.color = theme.color;
55 |   if (theme.bgcolor) node.bgcolor = theme.bgcolor;
56 | }
57 | 
58 | app.registerExtension({
59 |   name: "comfy.fooocus.interface",
60 |   setup() {
61 |     Object.assign(app.canvas.default_connection_color_byType, customLinkColors);
62 |     Object.assign(LGraphCanvas.link_type_colors, customLinkColors);
63 |   },
64 | 
65 |   nodeCreated(node) {
66 |     if (NODE_COLORS.hasOwnProperty(node.comfyClass)) {
67 |       const colorKey = NODE_COLORS[node.comfyClass];
68 |       const theme = COLOR_THEMES[colorKey];
69 |       setNodeColors(node, theme);
70 |     }
71 |   },
72 | });
73 | 


--------------------------------------------------------------------------------
/wildcards/color.txt:
--------------------------------------------------------------------------------
 1 | aqua
 2 | black
 3 | blue
 4 | fuchsia
 5 | gray
 6 | green
 7 | lime
 8 | maroon
 9 | navy
10 | olive
11 | orange
12 | purple
13 | red
14 | silver
15 | teal
16 | white
17 | yellow
18 | 


--------------------------------------------------------------------------------
/wildcards/color_flower.txt:
--------------------------------------------------------------------------------
1 | __color__ __flower__
2 | 


--------------------------------------------------------------------------------
/wildcards/extended-color.txt:
--------------------------------------------------------------------------------
  1 | aliceblue
  2 | antiquewhite
  3 | aqua
  4 | aquamarine
  5 | azure
  6 | beige
  7 | bisque
  8 | black
  9 | blanchedalmond
 10 | blue
 11 | blueviolet
 12 | brown
 13 | burlywood
 14 | cadetblue
 15 | chartreuse
 16 | chocolate
 17 | coral
 18 | cornflowerblue
 19 | cornsilk
 20 | crimson
 21 | cyan
 22 | darkblue
 23 | darkcyan
 24 | darkgoldenrod
 25 | darkgray
 26 | darkgreen
 27 | darkgrey
 28 | darkkhaki
 29 | darkmagenta
 30 | darkolivegreen
 31 | darkorange
 32 | darkorchid
 33 | darkred
 34 | darksalmon
 35 | darkseagreen
 36 | darkslateblue
 37 | darkslategray
 38 | darkslategrey
 39 | darkturquoise
 40 | darkviolet
 41 | deeppink
 42 | deepskyblue
 43 | dimgray
 44 | dimgrey
 45 | dodgerblue
 46 | firebrick
 47 | floralwhite
 48 | forestgreen
 49 | fuchsia
 50 | gainsboro
 51 | ghostwhite
 52 | gold
 53 | goldenrod
 54 | gray
 55 | green
 56 | greenyellow
 57 | grey
 58 | honeydew
 59 | hotpink
 60 | indianred
 61 | indigo
 62 | ivory
 63 | khaki
 64 | lavender
 65 | lavenderblush
 66 | lawngreen
 67 | lemonchiffon
 68 | lightblue
 69 | lightcoral
 70 | lightcyan
 71 | lightgoldenrodyellow
 72 | lightgray
 73 | lightgreen
 74 | lightgrey
 75 | lightpink
 76 | lightsalmon
 77 | lightseagreen
 78 | lightskyblue
 79 | lightslategray
 80 | lightslategrey
 81 | lightsteelblue
 82 | lightyellow
 83 | lime
 84 | limegreen
 85 | linen
 86 | magenta
 87 | maroon
 88 | mediumaquamarine
 89 | mediumblue
 90 | mediumorchid
 91 | mediumpurple
 92 | mediumseagreen
 93 | mediumslateblue
 94 | mediumspringgreen
 95 | mediumturquoise
 96 | mediumvioletred
 97 | midnightblue
 98 | mintcream
 99 | mistyrose
100 | moccasin
101 | navajowhite
102 | navy
103 | oldlace
104 | olive
105 | olivedrab
106 | orange
107 | orangered
108 | orchid
109 | palegoldenrod
110 | palegreen
111 | paleturquoise
112 | palevioletred
113 | papayawhip
114 | peachpuff
115 | peru
116 | pink
117 | plum
118 | powderblue
119 | purple
120 | red
121 | rosybrown
122 | royalblue
123 | saddlebrown
124 | salmon
125 | sandybrown
126 | seagreen
127 | seashell
128 | sienna
129 | silver
130 | skyblue
131 | slateblue
132 | slategray
133 | slategrey
134 | snow
135 | springgreen
136 | steelblue
137 | tan
138 | teal
139 | thistle
140 | tomato
141 | turquoise
142 | violet
143 | wheat
144 | white
145 | whitesmoke
146 | yellow
147 | yellowgreen
148 | 


--------------------------------------------------------------------------------
/wildcards/flower.txt:
--------------------------------------------------------------------------------
  1 | Acacia
  2 | Achillea
  3 | Adam's-needle
  4 | African Boxwood
  5 | African Lily
  6 | Agapanthus
  7 | Ageratum
  8 | Ageratum houstonim
  9 | Allium
 10 | Alpina
 11 | Alstroemeria
 12 | Amaranthus hypochondriacus
 13 | Amaryllis
 14 | Ammi majus
 15 | Anconitum
 16 | Anemone
 17 | Anigozanthus
 18 | Annual Delphinium
 19 | Anthurium
 20 | Antirrhinum majus
 21 | Artichoke thistle
 22 | Asparagus
 23 | Aster
 24 | Astilbe
 25 | Baby's Breath
 26 | Bachelor's Button
 27 | Banksia
 28 | Bellflower
 29 | Big Flax
 30 | Bighead Knapweed
 31 | Billy Buttons
 32 | Bird of Paradise
 33 | Blazing Star
 34 | Blue Lace Flower
 35 | Boronia
 36 | Bouvardia
 37 | Boxwood African
 38 | Diosma
 39 | Buckthorn Variegated
 40 | Buddleia
 41 | Bupleurum
 42 | Butterfly Bush
 43 | Butterfly Orchid
 44 | Calla Lily
 45 | Campanula
 46 | Candytuft
 47 | Canterbury Bells
 48 | Carnation
 49 | Carthamus
 50 | Casa Blanca
 51 | Caspia
 52 | Cattleya
 53 | Celosia
 54 | Celosia argenta
 55 | Centaurea cyanus
 56 | Chamelaucium
 57 | Chimney Bells
 58 | Chrysanthemum
 59 | Chrysanthemum x morifolium
 60 | Clarkia
 61 | Cockscomb Crested
 62 | Coffee Bean Berry
 63 | Common Myrtle
 64 | Common Yarrow
 65 | Cone Flower
 66 | Consolida ambigua
 67 | Convallaria
 68 | Cordyline
 69 | Cosmos
 70 | Cornflower
 71 | Craspedia
 72 | Curly Willow
 73 | Cymbidium
 74 | Cymbidium Orchid
 75 | Daffodil
 76 | Dahlia
 77 | Daisy Mums
 78 | Delphinium Belladonna
 79 | Delphinium Pacific Giant
 80 | Dendrobium
 81 | Dendrobium Orchid
 82 | Dianthus barbatus
 83 | Dianthus caryophyllus
 84 | Dianthus caryophyllus nana
 85 | Erica spp
 86 | Eucalyptus seeded
 87 | Eucalyptus silver dollar
 88 | Eustoma grandiflorum
 89 | False Bird of Paradise
 90 | False Spirea
 91 | Farewell-To-Spring
 92 | Fernleaf Yarrow
 93 | Feverfew
 94 | Flamingo Flower
 95 | Flax New Zealand
 96 | Floss Flower
 97 | Foxtail Fern
 98 | Freesia
 99 | Freesia x hybrida
100 | Fuji Mums
101 | Gardenia
102 | Gay Feather
103 | Genista
104 | Gerbera
105 | Gerbera Ruby Red
106 | Ginger
107 | Gladiolus
108 | Gladiolus hybrid nanus
109 | Goat's Beard
110 | Godetia
111 | Golden Rod
112 | Guersney Lily
113 | Gyp
114 | Gypsophila paniculata
115 | Hanging Helicona
116 | Heath
117 | Heather
118 | Helianthus annuus
119 | Heliconia spp.
120 | Hippeastrum
121 | Hydrangea
122 | Iberis amara
123 | Inca Lily
124 | Iris
125 | Japhette Orchid
126 | Jonquil
127 | Knapweed
128 | Lace fern
129 | Larkspur
130 | Lathyrus odoratus
131 | Lavandula
132 | Lavender
133 | Liatris
134 | Lilac
135 | Lily
136 | Lilly-of-the-Valley
137 | Lily Casa Blanca
138 | Lily of the Field
139 | Lily of the Nile
140 | Lily Stargazer
141 | Limonium
142 | Lisianthus
143 | Marguerite daisy
144 | Mattholia incana
145 | Melaleuca
146 | Memosa
147 | Misty Blue Limonium
148 | Moluccella laevis
149 | Monkshood
150 | Montbretia
151 | Monte Cassino
152 | Moon orchid
153 | Musa
154 | Myrsine
155 | Myrtle
156 | Myrtus
157 | Nephrolepis
158 | Nerine
159 | Nerine Lily
160 | Nigella
161 | Ornithogalum
162 | Paeonia
163 | Painted Tongue
164 | Paper Reed
165 | Papyrus lion's head
166 | Peony
167 | Peruvian Lily
168 | Phalaenopsis
169 | Philodendron
170 | Phlox
171 | Pincushion Flower
172 | Pink Mink
173 | Pitt
174 | Pittosporum
175 | Pixie Carnation
176 | Polianthes tuberosa
177 | Pompon Chrysanthemum
178 | Poppy Anemone
179 | Porium
180 | Pussy Willow
181 | Queen Anne's Lace
182 | Ranunculus
183 | Red Ribbons
184 | Rice flower
185 | Rose
186 | Rose Bridal Pink
187 | Rose Bridal White
188 | Rose Champagne
189 | Rose Diadem
190 | Rose Emblem
191 | Rose Kardinal
192 | Rose Lady Liberty
193 | Rose Lavanda
194 | Rose Osiana
195 | Rose Royalty
196 | Safari Sunset
197 | Safflower
198 | Sage Perennial
199 | Salix
200 | Salmon Reagan
201 | Sansevieria
202 | Saponaria
203 | Satin Flowers
204 | Saxicola
205 | Scabiosa
206 | Schinus
207 | Sea lavender
208 | Shell Flowers
209 | Snake Plant
210 | Snapdragon
211 | Solidago
212 | Solidaster
213 | Speedwell
214 | Spider Lily
215 | Spider Mums
216 | Spray Carnation
217 | Sprengeri Fern
218 | Star of Bethlehem
219 | Statice
220 | Stenamezon
221 | Stephanotis
222 | Strawberry banksia
223 | Strawflower
224 | Summer poinsettia
225 | Summer's Darling
226 | Sunflower
227 | Sweet Pea
228 | Sweet William
229 | Sword Fern
230 | Syringa vulgaris
231 | Tailflowers
232 | Tassel flower
233 | Thouroughwax
234 | Throatwort
235 | Tracelium
236 | Tree Fern
237 | Trumpet Lily
238 | Tuberose
239 | Tulip
240 | Tulipa
241 | Veronica
242 | Wattle
243 | Waxflower
244 | Wild Plantain
245 | Willow curly
246 | Windflower
247 | Wolfsbane
248 | Zantedeschia
249 | Zinna
250 | Zinnia elegans
251 | 


--------------------------------------------------------------------------------
/wildcards/nationality.txt:
--------------------------------------------------------------------------------
  1 | Afghan
  2 | Albanian
  3 | Algerian
  4 | American
  5 | Andorran
  6 | Angolan
  7 | Antiguans
  8 | Argentine
  9 | Armenian
 10 | Australian
 11 | Austrian
 12 | Azerbaijani
 13 | Bahamian
 14 | Bahraini
 15 | Bangladeshi
 16 | Barbadian
 17 | Barbudans
 18 | Batswana
 19 | Belarusian
 20 | Belgian
 21 | Belizean
 22 | Beninese
 23 | Bhutanese
 24 | Bolivian
 25 | Bosnian
 26 | Brazilian
 27 | British
 28 | Bruneian
 29 | Bulgarian
 30 | Burkinabe
 31 | Burmese
 32 | Burundian
 33 | Cambodian
 34 | Cameroonian
 35 | Canadian
 36 | Cape Verdean
 37 | Central African
 38 | Chadian
 39 | Chilean
 40 | Chinese
 41 | Colombian
 42 | Comoran
 43 | Congolese
 44 | Costa Rican
 45 | Croatian
 46 | Cuban
 47 | Cypriot
 48 | Czech
 49 | Danish
 50 | Djibouti
 51 | Dominican
 52 | Dutch
 53 | East Timorese
 54 | Ecuadorean
 55 | Egyptian
 56 | Emirati
 57 | Equatorial Guinean
 58 | Eritrean
 59 | Estonian
 60 | Ethiopian
 61 | Fijian
 62 | Filipino
 63 | Finnish
 64 | French
 65 | Gabonese
 66 | Gambian
 67 | Georgian
 68 | German
 69 | Ghanaian
 70 | Greek
 71 | Grenadian
 72 | Guatemalan
 73 | Guinea-Bissauan
 74 | Guinean
 75 | Guyanese
 76 | Haitian
 77 | Herzegovinian
 78 | Honduran
 79 | Hungarian
 80 | Icelander
 81 | Indian
 82 | Indonesian
 83 | Iranian
 84 | Iraqi
 85 | Irish
 86 | Israeli
 87 | Italian
 88 | Ivorian
 89 | Jamaican
 90 | Japanese
 91 | Jordanian
 92 | Kazakhstani
 93 | Kenyan
 94 | Kittian and Nevisian
 95 | Kuwaiti
 96 | Kyrgyz
 97 | Laotian
 98 | Latvian
 99 | Lebanese
100 | Liberian
101 | Libyan
102 | Liechtensteiner
103 | Lithuanian
104 | Luxembourger
105 | Macedonian
106 | Malagasy
107 | Malawian
108 | Malaysian
109 | Maldivan
110 | Malian
111 | Maltese
112 | Marshallese
113 | Mauritanian
114 | Mauritian
115 | Mexican
116 | Micronesian
117 | Moldovan
118 | Monacan
119 | Mongolian
120 | Montenegrin
121 | Moroccan
122 | Mosotho
123 | Motswana
124 | Mozambican
125 | Namibian
126 | Nauruan
127 | Nepalese
128 | New Zealander
129 | Nicaraguan
130 | Nigerian
131 | Nigerien
132 | North Korean
133 | Northern Irish
134 | Norwegian
135 | Omani
136 | Pakistani
137 | Palauan
138 | Palestinian
139 | Panamanian
140 | Papua New Guinean
141 | Paraguayan
142 | Peruvian
143 | Polish
144 | Portuguese
145 | Qatari
146 | Romanian
147 | Russian
148 | Rwandan
149 | Saint Lucian
150 | Salvadoran
151 | Samoan
152 | San Marinese
153 | Sao Tomean
154 | Saudi
155 | Scottish
156 | Senegalese
157 | Serbian
158 | Seychellois
159 | Sierra Leonean
160 | Singaporean
161 | Slovakian
162 | Slovenian
163 | Solomon Islander
164 | Somali
165 | South African
166 | South Korean
167 | Spanish
168 | Sri Lankan
169 | Sudanese
170 | Surinamer
171 | Swazi
172 | Swedish
173 | Swiss
174 | Syrian
175 | Taiwanese
176 | Tajik
177 | Tanzanian
178 | Thai
179 | Togolese
180 | Tongan
181 | Trinidadian or Tobagonian
182 | Tunisian
183 | Turkish
184 | Tuvaluan
185 | Ugandan
186 | Ukrainian
187 | Uruguayan
188 | Uzbekistani
189 | Vanuatuan
190 | Venezuelan
191 | Vietnamese
192 | Welsh
193 | Yemenite
194 | Zambian
195 | Zimbabwean
196 | 


--------------------------------------------------------------------------------
/workflow/fooocus_describe.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "last_node_id": 4,
  3 |   "last_link_id": 4,
  4 |   "nodes": [
  5 |     {
  6 |       "id": 3,
  7 |       "type": "Display Any (rgthree)",
  8 |       "pos": [
  9 |         1808,
 10 |         127
 11 |       ],
 12 |       "size": {
 13 |         "0": 331.6216125488281,
 14 |         "1": 262.77801513671875
 15 |       },
 16 |       "flags": {},
 17 |       "order": 2,
 18 |       "mode": 0,
 19 |       "inputs": [
 20 |         {
 21 |           "name": "source",
 22 |           "type": "*",
 23 |           "link": 3,
 24 |           "dir": 3
 25 |         }
 26 |       ],
 27 |       "properties": {
 28 |         "Node name for S&R": "Display Any (rgthree)"
 29 |       },
 30 |       "widgets_values": [
 31 |         ""
 32 |       ]
 33 |     },
 34 |     {
 35 |       "id": 1,
 36 |       "type": "LoadImage",
 37 |       "pos": [
 38 |         1001,
 39 |         134
 40 |       ],
 41 |       "size": {
 42 |         "0": 315,
 43 |         "1": 314
 44 |       },
 45 |       "flags": {},
 46 |       "order": 0,
 47 |       "mode": 0,
 48 |       "outputs": [
 49 |         {
 50 |           "name": "IMAGE",
 51 |           "type": "IMAGE",
 52 |           "links": [
 53 |             4
 54 |           ],
 55 |           "slot_index": 0,
 56 |           "shape": 3
 57 |         },
 58 |         {
 59 |           "name": "MASK",
 60 |           "type": "MASK",
 61 |           "links": null,
 62 |           "shape": 3
 63 |         }
 64 |       ],
 65 |       "properties": {
 66 |         "Node name for S&R": "LoadImage"
 67 |       },
 68 |       "widgets_values": [
 69 |         "example.png",
 70 |         "image"
 71 |       ]
 72 |     },
 73 |     {
 74 |       "id": 4,
 75 |       "type": "Fooocus Describe",
 76 |       "pos": [
 77 |         1415,
 78 |         136
 79 |       ],
 80 |       "size": {
 81 |         "0": 315,
 82 |         "1": 58
 83 |       },
 84 |       "flags": {},
 85 |       "order": 1,
 86 |       "mode": 0,
 87 |       "inputs": [
 88 |         {
 89 |           "name": "image",
 90 |           "type": "IMAGE",
 91 |           "link": 4
 92 |         }
 93 |       ],
 94 |       "outputs": [
 95 |         {
 96 |           "name": "STRING",
 97 |           "type": "STRING",
 98 |           "links": [
 99 |             3
100 |           ],
101 |           "shape": 3,
102 |           "slot_index": 0
103 |         }
104 |       ],
105 |       "properties": {
106 |         "Node name for S&R": "Fooocus Describe"
107 |       },
108 |       "widgets_values": [
109 |         "Photo"
110 |       ]
111 |     }
112 |   ],
113 |   "links": [
114 |     [
115 |       3,
116 |       4,
117 |       0,
118 |       3,
119 |       0,
120 |       "*"
121 |     ],
122 |     [
123 |       4,
124 |       1,
125 |       0,
126 |       4,
127 |       0,
128 |       "IMAGE"
129 |     ]
130 |   ],
131 |   "groups": [],
132 |   "config": {},
133 |   "extra": {
134 |     "ds": {
135 |       "scale": 0.8264462809917362,
136 |       "offset": [
137 |         -840.484618298602,
138 |         250.69346054186292
139 |       ]
140 |     }
141 |   },
142 |   "version": 0.4
143 | }


--------------------------------------------------------------------------------
/workflow/fooocus_prompt_expansion.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "last_node_id": 6,
 3 |   "last_link_id": 5,
 4 |   "nodes": [
 5 |     {
 6 |       "id": 6,
 7 |       "type": "Display Any (rgthree)",
 8 |       "pos": [
 9 |         1777,
10 |         -46
11 |       ],
12 |       "size": [
13 |         292.4664932986011,
14 |         196.80439102063673
15 |       ],
16 |       "flags": {},
17 |       "order": 1,
18 |       "mode": 0,
19 |       "inputs": [
20 |         {
21 |           "name": "source",
22 |           "type": "*",
23 |           "link": 5,
24 |           "dir": 3
25 |         }
26 |       ],
27 |       "properties": {
28 |         "Node name for S&R": "Display Any (rgthree)"
29 |       },
30 |       "widgets_values": [
31 |         ""
32 |       ]
33 |     },
34 |     {
35 |       "id": 5,
36 |       "type": "Fooocus Expansion",
37 |       "pos": [
38 |         1271,
39 |         -46
40 |       ],
41 |       "size": {
42 |         "0": 400,
43 |         "1": 200
44 |       },
45 |       "flags": {},
46 |       "order": 0,
47 |       "mode": 0,
48 |       "outputs": [
49 |         {
50 |           "name": "final_prompt",
51 |           "type": "STRING",
52 |           "links": [
53 |             5
54 |           ],
55 |           "shape": 3,
56 |           "slot_index": 0
57 |         },
58 |         {
59 |           "name": "seed",
60 |           "type": "INT",
61 |           "links": null,
62 |           "shape": 3
63 |         }
64 |       ],
65 |       "properties": {
66 |         "Node name for S&R": "Fooocus Expansion"
67 |       },
68 |       "widgets_values": [
69 |         "cat",
70 |         3314052962,
71 |         "fixed",
72 |         true
73 |       ]
74 |     }
75 |   ],
76 |   "links": [
77 |     [
78 |       5,
79 |       5,
80 |       0,
81 |       6,
82 |       0,
83 |       "*"
84 |     ]
85 |   ],
86 |   "groups": [],
87 |   "config": {},
88 |   "extra": {
89 |     "ds": {
90 |       "scale": 0.8264462809917362,
91 |       "offset": [
92 |         -840.342821423602,
93 |         250.98650741686293
94 |       ]
95 |     }
96 |   },
97 |   "version": 0.4
98 | }


--------------------------------------------------------------------------------
/workflow/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Seedsa/Fooocus_Nodes/eb59da9305a3f1b48d0e042aae44fdd2bf724d5d/workflow/screenshot.png


--------------------------------------------------------------------------------