├── .github ├── CODEOWNERS └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── LICENSE ├── args_manager.py ├── asset ├── canny │ ├── 0.png │ ├── 1.png │ ├── canny.png │ └── snip.png ├── compare.png ├── depth │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── depth.png │ └── snip.png ├── discord-icon-svgrepo-com.svg ├── inpaint_outpaint │ └── mask_inpaint.png ├── ip │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── ancient_chinese_costume │ │ ├── 0.png │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ └── snip.png │ ├── ip.png │ └── snip.png ├── ip_depth │ ├── 0.png │ ├── ip_depth.png │ └── snip.png ├── pose_face │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── pose_face.png │ └── snip.png ├── recolor │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── recolor.png │ └── snip.png ├── run_bat.png └── sketch │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── sketch.png │ └── snip.png ├── backend ├── doc └── headless │ ├── LICENSE │ ├── fcbh │ ├── checkpoint_pickle.py │ ├── cldm │ │ └── cldm.py │ ├── cli_args.py │ ├── clip_config_bigg.json │ ├── clip_vision.py │ ├── clip_vision_config_g.json │ ├── clip_vision_config_h.json │ ├── clip_vision_config_vitl.json │ ├── controlnet.py │ ├── diffusers_convert.py │ ├── diffusers_load.py │ ├── extra_samplers │ │ └── uni_pc.py │ ├── gligen.py │ ├── k_diffusion │ │ ├── external.py │ │ ├── sampling.py │ │ └── utils.py │ ├── latent_formats.py │ ├── ldm │ │ ├── models │ │ │ ├── autoencoder.py │ │ │ └── diffusion │ │ │ │ ├── __init__.py │ │ │ │ ├── ddim.py │ │ │ │ ├── dpm_solver │ │ │ │ ├── __init__.py │ │ │ │ ├── dpm_solver.py │ │ │ │ └── sampler.py │ │ │ │ ├── plms.py │ │ │ │ └── sampling_util.py │ │ ├── modules │ │ │ ├── attention.py │ │ │ ├── diffusionmodules │ │ │ │ ├── __init__.py │ │ │ │ ├── model.py │ │ │ │ ├── openaimodel.py │ │ │ │ ├── upscaling.py │ │ │ │ └── util.py │ │ │ ├── distributions │ │ │ │ ├── __init__.py │ │ │ │ └── distributions.py │ │ │ ├── ema.py │ │ │ ├── encoders │ │ │ │ ├── __init__.py │ │ │ │ └── noise_aug_modules.py │ │ │ └── sub_quadratic_attention.py │ │ └── util.py │ ├── lora.py │ ├── model_base.py │ ├── model_detection.py │ ├── model_management.py │ ├── model_patcher.py │ ├── ops.py │ ├── options.py │ ├── sample.py │ ├── samplers.py │ ├── sd.py │ ├── sd1_clip.py │ ├── sd1_clip_config.json │ ├── sd1_tokenizer │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ ├── sd2_clip.py │ ├── sd2_clip_config.json │ ├── sdxl_clip.py │ ├── supported_models.py │ ├── supported_models_base.py │ ├── t2i_adapter │ │ └── adapter.py │ ├── taesd │ │ └── taesd.py │ └── utils.py │ ├── fcbh_extras │ ├── chainner_models │ │ ├── __init__.py │ │ ├── architecture │ │ │ ├── DAT.py │ │ │ ├── HAT.py │ │ │ ├── LICENSE-DAT │ │ │ ├── LICENSE-ESRGAN │ │ │ ├── LICENSE-HAT │ │ │ ├── LICENSE-RealESRGAN │ │ │ ├── LICENSE-SCUNet │ │ │ ├── LICENSE-SPSR │ │ │ ├── LICENSE-SwiftSRGAN │ │ │ ├── LICENSE-Swin2SR │ │ │ ├── LICENSE-SwinIR │ │ │ ├── LICENSE-lama │ │ │ ├── LaMa.py │ │ │ ├── OmniSR │ │ │ │ ├── ChannelAttention.py │ │ │ │ ├── LICENSE │ │ │ │ ├── OSA.py │ │ │ │ ├── OSAG.py │ │ │ │ ├── OmniSR.py │ │ │ │ ├── esa.py │ │ │ │ ├── layernorm.py │ │ │ │ └── pixelshuffle.py │ │ │ ├── RRDB.py │ │ │ ├── SCUNet.py │ │ │ ├── SPSR.py │ │ │ ├── SRVGG.py │ │ │ ├── SwiftSRGAN.py │ │ │ ├── Swin2SR.py │ │ │ ├── SwinIR.py │ │ │ ├── __init__.py │ │ │ ├── block.py │ │ │ ├── face │ │ │ │ ├── LICENSE-GFPGAN │ │ │ │ ├── LICENSE-RestoreFormer │ │ │ │ ├── LICENSE-codeformer │ │ │ │ ├── arcface_arch.py │ │ │ │ ├── codeformer.py │ │ │ │ ├── fused_act.py │ │ │ │ ├── gfpgan_bilinear_arch.py │ │ │ │ ├── gfpganv1_arch.py │ │ │ │ ├── gfpganv1_clean_arch.py │ │ │ │ ├── restoreformer_arch.py │ │ │ │ ├── stylegan2_arch.py │ │ │ │ ├── stylegan2_bilinear_arch.py │ │ │ │ ├── stylegan2_clean_arch.py │ │ │ │ └── upfirdn2d.py │ │ │ └── timm │ │ │ │ ├── LICENSE │ │ │ │ ├── drop.py │ │ │ │ ├── helpers.py │ │ │ │ └── weight_init.py │ │ ├── model_loading.py │ │ └── types.py │ ├── nodes_canny.py │ ├── nodes_clip_sdxl.py │ ├── nodes_compositing.py │ ├── nodes_custom_sampler.py │ ├── nodes_freelunch.py │ ├── nodes_hypernetwork.py │ ├── nodes_latent.py │ ├── nodes_mask.py │ ├── nodes_model_merging.py │ ├── nodes_post_processing.py │ ├── nodes_rebatch.py │ ├── nodes_tomesd.py │ └── nodes_upscale_model.py │ ├── folder_paths.py │ ├── latent_preview.py │ └── nodes.py ├── build_launcher.py ├── colab_fix.txt ├── css └── style.css ├── entry_with_update.py ├── environment.yaml ├── fooocusControl_colab.ipynb ├── fooocus_colab.ipynb ├── fooocus_extras ├── controlnet_preprocess_model │ ├── CPDS │ │ └── __init__.py │ ├── OpenPose │ │ ├── LICENSE │ │ ├── __init__.py │ │ ├── body.py │ │ ├── face.py │ │ ├── hand.py │ │ ├── model.py │ │ └── util.py │ ├── PyramidCanny │ │ └── __init__.py │ ├── ReColor │ │ └── __init__.py │ ├── Revision │ │ └── __init__.py │ ├── Sketch │ │ └── __init__.py │ ├── TileBlur │ │ └── __init__.py │ ├── TileBlurAnime │ │ └── __init__.py │ ├── ZeoDepth │ │ ├── LICENSE │ │ ├── __init__.py │ │ └── zoedepth │ │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── data_mono.py │ │ │ ├── ddad.py │ │ │ ├── diml_indoor_test.py │ │ │ ├── diml_outdoor_test.py │ │ │ ├── diode.py │ │ │ ├── hypersim.py │ │ │ ├── ibims.py │ │ │ ├── preprocess.py │ │ │ ├── sun_rgbd_loader.py │ │ │ ├── transforms.py │ │ │ ├── vkitti.py │ │ │ └── vkitti2.py │ │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── base_models │ │ │ │ ├── __init__.py │ │ │ │ ├── midas.py │ │ │ │ └── midas_repo │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── LICENSE │ │ │ │ │ ├── README.md │ │ │ │ │ ├── environment.yaml │ │ │ │ │ ├── hubconf.py │ │ │ │ │ ├── midas │ │ │ │ │ ├── backbones │ │ │ │ │ │ ├── beit.py │ │ │ │ │ │ ├── levit.py │ │ │ │ │ │ ├── next_vit.py │ │ │ │ │ │ ├── swin.py │ │ │ │ │ │ ├── swin2.py │ │ │ │ │ │ ├── swin_common.py │ │ │ │ │ │ ├── utils.py │ │ │ │ │ │ └── vit.py │ │ │ │ │ ├── base_model.py │ │ │ │ │ ├── blocks.py │ │ │ │ │ ├── dpt_depth.py │ │ │ │ │ ├── midas_net.py │ │ │ │ │ ├── midas_net_custom.py │ │ │ │ │ ├── model_loader.py │ │ │ │ │ └── transforms.py │ │ │ │ │ ├── run.py │ │ │ │ │ └── utils.py │ │ │ ├── builder.py │ │ │ ├── depth_model.py │ │ │ ├── layers │ │ │ │ ├── attractor.py │ │ │ │ ├── dist_layers.py │ │ │ │ ├── localbins_layers.py │ │ │ │ └── patch_transformer.py │ │ │ ├── model_io.py │ │ │ ├── zoedepth │ │ │ │ ├── __init__.py │ │ │ │ ├── config_zoedepth.json │ │ │ │ ├── config_zoedepth_kitti.json │ │ │ │ └── zoedepth_v1.py │ │ │ └── zoedepth_nk │ │ │ │ ├── __init__.py │ │ │ │ ├── config_zoedepth_nk.json │ │ │ │ └── zoedepth_nk_v1.py │ │ │ ├── trainers │ │ │ ├── base_trainer.py │ │ │ ├── builder.py │ │ │ ├── loss.py │ │ │ ├── zoedepth_nk_trainer.py │ │ │ └── zoedepth_trainer.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── arg_utils.py │ │ │ ├── config.py │ │ │ ├── easydict │ │ │ └── __init__.py │ │ │ ├── geometry.py │ │ │ └── misc.py │ └── __init__.py ├── ip_adapter.py ├── preprocessors.py ├── resampler.py └── vae_interpose.py ├── fooocus_version.py ├── javascript ├── contextMenus.js └── script.js ├── launch.py ├── models ├── checkpoints │ └── put_checkpoints_here ├── clip │ └── put_clip_or_text_encoder_models_here ├── clip_vision │ └── put_clip_vision_models_here ├── configs │ ├── anything_v3.yaml │ ├── v1-inference.yaml │ ├── v1-inference_clip_skip_2.yaml │ ├── v1-inference_clip_skip_2_fp16.yaml │ ├── v1-inference_fp16.yaml │ ├── v1-inpainting-inference.yaml │ ├── v2-inference-v.yaml │ ├── v2-inference-v_fp32.yaml │ ├── v2-inference.yaml │ ├── v2-inference_fp32.yaml │ └── v2-inpainting-inference.yaml ├── controlnet │ └── put_controlnets_and_t2i_here ├── diffusers │ └── put_diffusers_models_here ├── embeddings │ └── put_embeddings_or_textual_inversion_concepts_here ├── gligen │ └── put_gligen_models_here ├── hypernetworks │ └── put_hypernetworks_here ├── loras │ └── put_loras_here ├── prompt_expansion │ ├── fooocus_expansion │ │ ├── config.json │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ └── put_prompt_expansion_here ├── style_models │ └── put_t2i_style_model_here ├── unet │ └── put_unet_files_here ├── upscale_models │ └── put_esrgan_and_other_upscale_models_here ├── vae │ └── put_vae_here └── vae_approx │ └── put_taesd_encoder_pth_and_taesd_decoder_pth_here ├── modules ├── advanced_parameters.py ├── anisotropic.py ├── async_worker.py ├── core.py ├── default_pipeline.py ├── expansion.py ├── flags.py ├── gradio_hijack.py ├── html.py ├── inpaint_worker.py ├── launch_util.py ├── model_loader.py ├── patch.py ├── path.py ├── private_logger.py ├── sample_hijack.py ├── sdxl_styles.py ├── ui_gradio_extensions.py ├── upscaler.py └── util.py ├── notification-example.ogg ├── presets ├── anime.json ├── realistic.json └── sdxl.json ├── python_hijack.py ├── readme.md ├── requirements_versions.txt ├── sdxl_styles ├── sdxl_styles_diva.json ├── sdxl_styles_fooocus.json ├── sdxl_styles_mre.json ├── sdxl_styles_sai.json └── sdxl_styles_twri.json ├── shared.py ├── update_log.md ├── user_controlnet_config.py ├── webui.py └── wildcards ├── artist.txt └── color.txt /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @lllyasviel 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Describe a problem 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the problem** 11 | A clear and concise description of what the bug is. 12 | 13 | **Full Console Log** 14 | Paste **full** console log here. You will make our job easier if you give a **full** log. 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the idea you'd like** 14 | A clear and concise description of what you want to happen. 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.ckpt 3 | *.safetensors 4 | *.pth 5 | *.pt 6 | *.bin 7 | *.patch 8 | *.backup 9 | *.corrupted 10 | lena.png 11 | lena_result.png 12 | lena_test.py 13 | user_path_config.txt 14 | build_chb.py 15 | /modules/*.png 16 | /repositories 17 | /venv 18 | /tmp 19 | /ui-config.json 20 | /outputs 21 | /config.json 22 | /log 23 | /webui.settings.bat 24 | /embeddings 25 | /styles.csv 26 | /params.txt 27 | /styles.csv.bak 28 | /webui-user.bat 29 | /webui-user.sh 30 | /interrogate 31 | /user.css 32 | /.idea 33 | /notification.ogg 34 | /notification.mp3 35 | /SwinIR 36 | /textual_inversion 37 | .vscode 38 | /extensions 39 | /test/stdout.txt 40 | /test/stderr.txt 41 | /cache.json* 42 | /config_states/ 43 | /node_modules 44 | /package-lock.json 45 | /.coverage* 46 | -------------------------------------------------------------------------------- /args_manager.py: -------------------------------------------------------------------------------- 1 | from fcbh.options import enable_args_parsing 2 | enable_args_parsing(False) 3 | import fcbh.cli_args as fcbh_cli 4 | 5 | 6 | fcbh_cli.parser.add_argument("--share", action='store_true', help="Set whether to share on Gradio.") 7 | fcbh_cli.parser.add_argument("--preset", type=str, default=None, help="Apply specified UI preset.") 8 | 9 | fcbh_cli.args = fcbh_cli.parser.parse_args() 10 | fcbh_cli.args.disable_cuda_malloc = True 11 | fcbh_cli.args.auto_launch = True 12 | 13 | if getattr(fcbh_cli.args, 'port', 8188) == 8188: 14 | fcbh_cli.args.port = None 15 | 16 | args = fcbh_cli.args 17 | -------------------------------------------------------------------------------- /asset/canny/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/canny/0.png -------------------------------------------------------------------------------- /asset/canny/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/canny/1.png -------------------------------------------------------------------------------- /asset/canny/canny.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/canny/canny.png -------------------------------------------------------------------------------- /asset/canny/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/canny/snip.png -------------------------------------------------------------------------------- /asset/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/compare.png -------------------------------------------------------------------------------- /asset/depth/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/depth/0.png -------------------------------------------------------------------------------- /asset/depth/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/depth/1.png -------------------------------------------------------------------------------- /asset/depth/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/depth/2.png -------------------------------------------------------------------------------- /asset/depth/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/depth/3.png -------------------------------------------------------------------------------- /asset/depth/depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/depth/depth.png -------------------------------------------------------------------------------- /asset/depth/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/depth/snip.png -------------------------------------------------------------------------------- /asset/discord-icon-svgrepo-com.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /asset/inpaint_outpaint/mask_inpaint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/inpaint_outpaint/mask_inpaint.png -------------------------------------------------------------------------------- /asset/ip/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/0.png -------------------------------------------------------------------------------- /asset/ip/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/1.png -------------------------------------------------------------------------------- /asset/ip/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/2.png -------------------------------------------------------------------------------- /asset/ip/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/3.png -------------------------------------------------------------------------------- /asset/ip/ancient_chinese_costume/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/ancient_chinese_costume/0.png -------------------------------------------------------------------------------- /asset/ip/ancient_chinese_costume/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/ancient_chinese_costume/1.png -------------------------------------------------------------------------------- /asset/ip/ancient_chinese_costume/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/ancient_chinese_costume/2.png -------------------------------------------------------------------------------- /asset/ip/ancient_chinese_costume/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/ancient_chinese_costume/3.png -------------------------------------------------------------------------------- /asset/ip/ancient_chinese_costume/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/ancient_chinese_costume/snip.png -------------------------------------------------------------------------------- /asset/ip/ip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/ip.png -------------------------------------------------------------------------------- /asset/ip/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/snip.png -------------------------------------------------------------------------------- /asset/ip_depth/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip_depth/0.png -------------------------------------------------------------------------------- /asset/ip_depth/ip_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip_depth/ip_depth.png -------------------------------------------------------------------------------- /asset/ip_depth/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip_depth/snip.png -------------------------------------------------------------------------------- /asset/pose_face/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/pose_face/0.png -------------------------------------------------------------------------------- /asset/pose_face/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/pose_face/1.png -------------------------------------------------------------------------------- /asset/pose_face/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/pose_face/2.png -------------------------------------------------------------------------------- /asset/pose_face/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/pose_face/3.png -------------------------------------------------------------------------------- /asset/pose_face/pose_face.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/pose_face/pose_face.png -------------------------------------------------------------------------------- /asset/pose_face/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/pose_face/snip.png -------------------------------------------------------------------------------- /asset/recolor/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/recolor/0.png -------------------------------------------------------------------------------- /asset/recolor/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/recolor/1.png -------------------------------------------------------------------------------- /asset/recolor/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/recolor/2.png -------------------------------------------------------------------------------- /asset/recolor/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/recolor/3.png -------------------------------------------------------------------------------- /asset/recolor/recolor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/recolor/recolor.png -------------------------------------------------------------------------------- /asset/recolor/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/recolor/snip.png -------------------------------------------------------------------------------- /asset/run_bat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/run_bat.png -------------------------------------------------------------------------------- /asset/sketch/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/sketch/0.png -------------------------------------------------------------------------------- /asset/sketch/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/sketch/1.png -------------------------------------------------------------------------------- /asset/sketch/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/sketch/2.png -------------------------------------------------------------------------------- /asset/sketch/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/sketch/3.png -------------------------------------------------------------------------------- /asset/sketch/sketch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/sketch/sketch.png -------------------------------------------------------------------------------- /asset/sketch/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/sketch/snip.png -------------------------------------------------------------------------------- /backend/doc: -------------------------------------------------------------------------------- 1 | # Fooocus' Comfy Backend Headless (FCBH) 2 | 3 | This is a Comfy Backend from StabilityAI. This pre-complied backend makes it easier for people who have trouble using pygit2. 4 | 5 | FCBH is maintained by Fooocus's reviewing upon StabilityAI's changes. 6 | -------------------------------------------------------------------------------- /backend/headless/fcbh/checkpoint_pickle.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | load = pickle.load 4 | 5 | class Empty: 6 | pass 7 | 8 | class Unpickler(pickle.Unpickler): 9 | def find_class(self, module, name): 10 | #TODO: safe unpickle 11 | if module.startswith("pytorch_lightning"): 12 | return Empty 13 | return super().find_class(module, name) 14 | -------------------------------------------------------------------------------- /backend/headless/fcbh/clip_config_bigg.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "CLIPTextModel" 4 | ], 5 | "attention_dropout": 0.0, 6 | "bos_token_id": 0, 7 | "dropout": 0.0, 8 | "eos_token_id": 2, 9 | "hidden_act": "gelu", 10 | "hidden_size": 1280, 11 | "initializer_factor": 1.0, 12 | "initializer_range": 0.02, 13 | "intermediate_size": 5120, 14 | "layer_norm_eps": 1e-05, 15 | "max_position_embeddings": 77, 16 | "model_type": "clip_text_model", 17 | "num_attention_heads": 20, 18 | "num_hidden_layers": 32, 19 | "pad_token_id": 1, 20 | "projection_dim": 1280, 21 | "torch_dtype": "float32", 22 | "vocab_size": 49408 23 | } 24 | -------------------------------------------------------------------------------- /backend/headless/fcbh/clip_vision_config_g.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "gelu", 5 | "hidden_size": 1664, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 8192, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 48, 15 | "patch_size": 14, 16 | "projection_dim": 1280, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /backend/headless/fcbh/clip_vision_config_h.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "gelu", 5 | "hidden_size": 1280, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 5120, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 32, 15 | "patch_size": 14, 16 | "projection_dim": 1024, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /backend/headless/fcbh/clip_vision_config_vitl.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "quick_gelu", 5 | "hidden_size": 1024, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 4096, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 24, 15 | "patch_size": 14, 16 | "projection_dim": 768, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /backend/headless/fcbh/diffusers_load.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import fcbh.sd 5 | 6 | def first_file(path, filenames): 7 | for f in filenames: 8 | p = os.path.join(path, f) 9 | if os.path.exists(p): 10 | return p 11 | return None 12 | 13 | def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_directory=None): 14 | diffusion_model_names = ["diffusion_pytorch_model.fp16.safetensors", "diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.fp16.bin", "diffusion_pytorch_model.bin"] 15 | unet_path = first_file(os.path.join(model_path, "unet"), diffusion_model_names) 16 | vae_path = first_file(os.path.join(model_path, "vae"), diffusion_model_names) 17 | 18 | text_encoder_model_names = ["model.fp16.safetensors", "model.safetensors", "pytorch_model.fp16.bin", "pytorch_model.bin"] 19 | text_encoder1_path = first_file(os.path.join(model_path, "text_encoder"), text_encoder_model_names) 20 | text_encoder2_path = first_file(os.path.join(model_path, "text_encoder_2"), text_encoder_model_names) 21 | 22 | text_encoder_paths = [text_encoder1_path] 23 | if text_encoder2_path is not None: 24 | text_encoder_paths.append(text_encoder2_path) 25 | 26 | unet = fcbh.sd.load_unet(unet_path) 27 | 28 | clip = None 29 | if output_clip: 30 | clip = fcbh.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory) 31 | 32 | vae = None 33 | if output_vae: 34 | sd = fcbh.utils.load_torch_file(vae_path) 35 | vae = fcbh.sd.VAE(sd=sd) 36 | 37 | return (unet, clip, vae) 38 | -------------------------------------------------------------------------------- /backend/headless/fcbh/latent_formats.py: -------------------------------------------------------------------------------- 1 | 2 | class LatentFormat: 3 | scale_factor = 1.0 4 | latent_rgb_factors = None 5 | taesd_decoder_name = None 6 | 7 | def process_in(self, latent): 8 | return latent * self.scale_factor 9 | 10 | def process_out(self, latent): 11 | return latent / self.scale_factor 12 | 13 | class SD15(LatentFormat): 14 | def __init__(self, scale_factor=0.18215): 15 | self.scale_factor = scale_factor 16 | self.latent_rgb_factors = [ 17 | # R G B 18 | [ 0.3512, 0.2297, 0.3227], 19 | [ 0.3250, 0.4974, 0.2350], 20 | [-0.2829, 0.1762, 0.2721], 21 | [-0.2120, -0.2616, -0.7177] 22 | ] 23 | self.taesd_decoder_name = "taesd_decoder" 24 | 25 | class SDXL(LatentFormat): 26 | def __init__(self): 27 | self.scale_factor = 0.13025 28 | self.latent_rgb_factors = [ 29 | # R G B 30 | [ 0.3920, 0.4054, 0.4549], 31 | [-0.2634, -0.0196, 0.0653], 32 | [ 0.0568, 0.1687, -0.0755], 33 | [-0.3112, -0.2359, -0.2076] 34 | ] 35 | self.taesd_decoder_name = "taesdxl_decoder" 36 | -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/models/diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/backend/headless/fcbh/ldm/models/diffusion/__init__.py -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/models/diffusion/dpm_solver/__init__.py: -------------------------------------------------------------------------------- 1 | from .sampler import DPMSolverSampler -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/models/diffusion/dpm_solver/sampler.py: -------------------------------------------------------------------------------- 1 | """SAMPLING ONLY.""" 2 | import torch 3 | 4 | from .dpm_solver import NoiseScheduleVP, model_wrapper, DPM_Solver 5 | 6 | MODEL_TYPES = { 7 | "eps": "noise", 8 | "v": "v" 9 | } 10 | 11 | 12 | class DPMSolverSampler(object): 13 | def __init__(self, model, device=torch.device("cuda"), **kwargs): 14 | super().__init__() 15 | self.model = model 16 | self.device = device 17 | to_torch = lambda x: x.clone().detach().to(torch.float32).to(model.device) 18 | self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod)) 19 | 20 | def register_buffer(self, name, attr): 21 | if type(attr) == torch.Tensor: 22 | if attr.device != self.device: 23 | attr = attr.to(self.device) 24 | setattr(self, name, attr) 25 | 26 | @torch.no_grad() 27 | def sample(self, 28 | S, 29 | batch_size, 30 | shape, 31 | conditioning=None, 32 | callback=None, 33 | normals_sequence=None, 34 | img_callback=None, 35 | quantize_x0=False, 36 | eta=0., 37 | mask=None, 38 | x0=None, 39 | temperature=1., 40 | noise_dropout=0., 41 | score_corrector=None, 42 | corrector_kwargs=None, 43 | verbose=True, 44 | x_T=None, 45 | log_every_t=100, 46 | unconditional_guidance_scale=1., 47 | unconditional_conditioning=None, 48 | # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ... 49 | **kwargs 50 | ): 51 | if conditioning is not None: 52 | if isinstance(conditioning, dict): 53 | ctmp = conditioning[list(conditioning.keys())[0]] 54 | while isinstance(ctmp, list): ctmp = ctmp[0] 55 | if isinstance(ctmp, torch.Tensor): 56 | cbs = ctmp.shape[0] 57 | if cbs != batch_size: 58 | print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}") 59 | elif isinstance(conditioning, list): 60 | for ctmp in conditioning: 61 | if ctmp.shape[0] != batch_size: 62 | print(f"Warning: Got {ctmp.shape[0]} conditionings but batch-size is {batch_size}") 63 | else: 64 | if isinstance(conditioning, torch.Tensor): 65 | if conditioning.shape[0] != batch_size: 66 | print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}") 67 | 68 | # sampling 69 | C, H, W = shape 70 | size = (batch_size, C, H, W) 71 | 72 | print(f'Data shape for DPM-Solver sampling is {size}, sampling steps {S}') 73 | 74 | device = self.model.betas.device 75 | if x_T is None: 76 | img = torch.randn(size, device=device) 77 | else: 78 | img = x_T 79 | 80 | ns = NoiseScheduleVP('discrete', alphas_cumprod=self.alphas_cumprod) 81 | 82 | model_fn = model_wrapper( 83 | lambda x, t, c: self.model.apply_model(x, t, c), 84 | ns, 85 | model_type=MODEL_TYPES[self.model.parameterization], 86 | guidance_type="classifier-free", 87 | condition=conditioning, 88 | unconditional_condition=unconditional_conditioning, 89 | guidance_scale=unconditional_guidance_scale, 90 | ) 91 | 92 | dpm_solver = DPM_Solver(model_fn, ns, predict_x0=True, thresholding=False) 93 | x = dpm_solver.sample(img, steps=S, skip_type="time_uniform", method="multistep", order=2, 94 | lower_order_final=True) 95 | 96 | return x.to(device), None 97 | -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/models/diffusion/sampling_util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def append_dims(x, target_dims): 6 | """Appends dimensions to the end of a tensor until it has target_dims dimensions. 7 | From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py""" 8 | dims_to_append = target_dims - x.ndim 9 | if dims_to_append < 0: 10 | raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less') 11 | return x[(...,) + (None,) * dims_to_append] 12 | 13 | 14 | def norm_thresholding(x0, value): 15 | s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim) 16 | return x0 * (value / s) 17 | 18 | 19 | def spatial_norm_thresholding(x0, value): 20 | # b c h w 21 | s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value) 22 | return x0 * (value / s) -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/backend/headless/fcbh/ldm/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/diffusionmodules/upscaling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from functools import partial 5 | 6 | from .util import extract_into_tensor, make_beta_schedule 7 | from fcbh.ldm.util import default 8 | 9 | 10 | class AbstractLowScaleModel(nn.Module): 11 | # for concatenating a downsampled image to the latent representation 12 | def __init__(self, noise_schedule_config=None): 13 | super(AbstractLowScaleModel, self).__init__() 14 | if noise_schedule_config is not None: 15 | self.register_schedule(**noise_schedule_config) 16 | 17 | def register_schedule(self, beta_schedule="linear", timesteps=1000, 18 | linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): 19 | betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, 20 | cosine_s=cosine_s) 21 | alphas = 1. - betas 22 | alphas_cumprod = np.cumprod(alphas, axis=0) 23 | alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) 24 | 25 | timesteps, = betas.shape 26 | self.num_timesteps = int(timesteps) 27 | self.linear_start = linear_start 28 | self.linear_end = linear_end 29 | assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' 30 | 31 | to_torch = partial(torch.tensor, dtype=torch.float32) 32 | 33 | self.register_buffer('betas', to_torch(betas)) 34 | self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) 35 | self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) 36 | 37 | # calculations for diffusion q(x_t | x_{t-1}) and others 38 | self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) 39 | self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) 40 | self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) 41 | self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) 42 | self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) 43 | 44 | def q_sample(self, x_start, t, noise=None): 45 | noise = default(noise, lambda: torch.randn_like(x_start)) 46 | return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + 47 | extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) 48 | 49 | def forward(self, x): 50 | return x, None 51 | 52 | def decode(self, x): 53 | return x 54 | 55 | 56 | class SimpleImageConcat(AbstractLowScaleModel): 57 | # no noise level conditioning 58 | def __init__(self): 59 | super(SimpleImageConcat, self).__init__(noise_schedule_config=None) 60 | self.max_noise_level = 0 61 | 62 | def forward(self, x): 63 | # fix to constant noise level 64 | return x, torch.zeros(x.shape[0], device=x.device).long() 65 | 66 | 67 | class ImageConcatWithNoiseAugmentation(AbstractLowScaleModel): 68 | def __init__(self, noise_schedule_config, max_noise_level=1000, to_cuda=False): 69 | super().__init__(noise_schedule_config=noise_schedule_config) 70 | self.max_noise_level = max_noise_level 71 | 72 | def forward(self, x, noise_level=None): 73 | if noise_level is None: 74 | noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() 75 | else: 76 | assert isinstance(noise_level, torch.Tensor) 77 | z = self.q_sample(x, noise_level) 78 | return z, noise_level 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/backend/headless/fcbh/ldm/modules/distributions/__init__.py -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/distributions/distributions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class AbstractDistribution: 6 | def sample(self): 7 | raise NotImplementedError() 8 | 9 | def mode(self): 10 | raise NotImplementedError() 11 | 12 | 13 | class DiracDistribution(AbstractDistribution): 14 | def __init__(self, value): 15 | self.value = value 16 | 17 | def sample(self): 18 | return self.value 19 | 20 | def mode(self): 21 | return self.value 22 | 23 | 24 | class DiagonalGaussianDistribution(object): 25 | def __init__(self, parameters, deterministic=False): 26 | self.parameters = parameters 27 | self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) 28 | self.logvar = torch.clamp(self.logvar, -30.0, 20.0) 29 | self.deterministic = deterministic 30 | self.std = torch.exp(0.5 * self.logvar) 31 | self.var = torch.exp(self.logvar) 32 | if self.deterministic: 33 | self.var = self.std = torch.zeros_like(self.mean).to(device=self.parameters.device) 34 | 35 | def sample(self): 36 | x = self.mean + self.std * torch.randn(self.mean.shape).to(device=self.parameters.device) 37 | return x 38 | 39 | def kl(self, other=None): 40 | if self.deterministic: 41 | return torch.Tensor([0.]) 42 | else: 43 | if other is None: 44 | return 0.5 * torch.sum(torch.pow(self.mean, 2) 45 | + self.var - 1.0 - self.logvar, 46 | dim=[1, 2, 3]) 47 | else: 48 | return 0.5 * torch.sum( 49 | torch.pow(self.mean - other.mean, 2) / other.var 50 | + self.var / other.var - 1.0 - self.logvar + other.logvar, 51 | dim=[1, 2, 3]) 52 | 53 | def nll(self, sample, dims=[1,2,3]): 54 | if self.deterministic: 55 | return torch.Tensor([0.]) 56 | logtwopi = np.log(2.0 * np.pi) 57 | return 0.5 * torch.sum( 58 | logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, 59 | dim=dims) 60 | 61 | def mode(self): 62 | return self.mean 63 | 64 | 65 | def normal_kl(mean1, logvar1, mean2, logvar2): 66 | """ 67 | source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 68 | Compute the KL divergence between two gaussians. 69 | Shapes are automatically broadcasted, so batches can be compared to 70 | scalars, among other use cases. 71 | """ 72 | tensor = None 73 | for obj in (mean1, logvar1, mean2, logvar2): 74 | if isinstance(obj, torch.Tensor): 75 | tensor = obj 76 | break 77 | assert tensor is not None, "at least one argument must be a Tensor" 78 | 79 | # Force variances to be Tensors. Broadcasting helps convert scalars to 80 | # Tensors, but it does not work for torch.exp(). 81 | logvar1, logvar2 = [ 82 | x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) 83 | for x in (logvar1, logvar2) 84 | ] 85 | 86 | return 0.5 * ( 87 | -1.0 88 | + logvar2 89 | - logvar1 90 | + torch.exp(logvar1 - logvar2) 91 | + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) 92 | ) 93 | -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class LitEma(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_upates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError('Decay must be between 0 and 1') 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates 14 | else torch.tensor(-1, dtype=torch.int)) 15 | 16 | for name, p in model.named_parameters(): 17 | if p.requires_grad: 18 | # remove as '.'-character is not allowed in buffers 19 | s_name = name.replace('.', '') 20 | self.m_name2s_name.update({name: s_name}) 21 | self.register_buffer(s_name, p.clone().detach().data) 22 | 23 | self.collected_params = [] 24 | 25 | def reset_num_updates(self): 26 | del self.num_updates 27 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int)) 28 | 29 | def forward(self, model): 30 | decay = self.decay 31 | 32 | if self.num_updates >= 0: 33 | self.num_updates += 1 34 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) 35 | 36 | one_minus_decay = 1.0 - decay 37 | 38 | with torch.no_grad(): 39 | m_param = dict(model.named_parameters()) 40 | shadow_params = dict(self.named_buffers()) 41 | 42 | for key in m_param: 43 | if m_param[key].requires_grad: 44 | sname = self.m_name2s_name[key] 45 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 46 | shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key])) 47 | else: 48 | assert not key in self.m_name2s_name 49 | 50 | def copy_to(self, model): 51 | m_param = dict(model.named_parameters()) 52 | shadow_params = dict(self.named_buffers()) 53 | for key in m_param: 54 | if m_param[key].requires_grad: 55 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 56 | else: 57 | assert not key in self.m_name2s_name 58 | 59 | def store(self, parameters): 60 | """ 61 | Save the current parameters for restoring later. 62 | Args: 63 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 64 | temporarily stored. 65 | """ 66 | self.collected_params = [param.clone() for param in parameters] 67 | 68 | def restore(self, parameters): 69 | """ 70 | Restore the parameters stored with the `store` method. 71 | Useful to validate the model with EMA parameters without affecting the 72 | original optimization process. Store the parameters before the 73 | `copy_to` method. After validation (or model saving), use this to 74 | restore the former parameters. 75 | Args: 76 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 77 | updated with the stored parameters. 78 | """ 79 | for c_param, param in zip(self.collected_params, parameters): 80 | param.data.copy_(c_param.data) 81 | -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/backend/headless/fcbh/ldm/modules/encoders/__init__.py -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/encoders/noise_aug_modules.py: -------------------------------------------------------------------------------- 1 | from ..diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation 2 | from ..diffusionmodules.openaimodel import Timestep 3 | import torch 4 | 5 | class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation): 6 | def __init__(self, *args, clip_stats_path=None, timestep_dim=256, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | if clip_stats_path is None: 9 | clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim) 10 | else: 11 | clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu") 12 | self.register_buffer("data_mean", clip_mean[None, :], persistent=False) 13 | self.register_buffer("data_std", clip_std[None, :], persistent=False) 14 | self.time_embed = Timestep(timestep_dim) 15 | 16 | def scale(self, x): 17 | # re-normalize to centered mean and unit variance 18 | x = (x - self.data_mean) * 1. / self.data_std 19 | return x 20 | 21 | def unscale(self, x): 22 | # back to original data stats 23 | x = (x * self.data_std) + self.data_mean 24 | return x 25 | 26 | def forward(self, x, noise_level=None): 27 | if noise_level is None: 28 | noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() 29 | else: 30 | assert isinstance(noise_level, torch.Tensor) 31 | x = self.scale(x) 32 | z = self.q_sample(x, noise_level) 33 | z = self.unscale(z) 34 | noise_level = self.time_embed(noise_level) 35 | return z, noise_level 36 | -------------------------------------------------------------------------------- /backend/headless/fcbh/ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from contextlib import contextmanager 3 | 4 | class Linear(torch.nn.Module): 5 | def __init__(self, in_features: int, out_features: int, bias: bool = True, 6 | device=None, dtype=None) -> None: 7 | factory_kwargs = {'device': device, 'dtype': dtype} 8 | super().__init__() 9 | self.in_features = in_features 10 | self.out_features = out_features 11 | self.weight = torch.nn.Parameter(torch.empty((out_features, in_features), **factory_kwargs)) 12 | if bias: 13 | self.bias = torch.nn.Parameter(torch.empty(out_features, **factory_kwargs)) 14 | else: 15 | self.register_parameter('bias', None) 16 | 17 | def forward(self, input): 18 | return torch.nn.functional.linear(input, self.weight, self.bias) 19 | 20 | class Conv2d(torch.nn.Conv2d): 21 | def reset_parameters(self): 22 | return None 23 | 24 | def conv_nd(dims, *args, **kwargs): 25 | if dims == 2: 26 | return Conv2d(*args, **kwargs) 27 | else: 28 | raise ValueError(f"unsupported dimensions: {dims}") 29 | 30 | @contextmanager 31 | def use_fcbh_ops(device=None, dtype=None): # Kind of an ugly hack but I can't think of a better way 32 | old_torch_nn_linear = torch.nn.Linear 33 | force_device = device 34 | force_dtype = dtype 35 | def linear_with_dtype(in_features: int, out_features: int, bias: bool = True, device=None, dtype=None): 36 | if force_device is not None: 37 | device = force_device 38 | if force_dtype is not None: 39 | dtype = force_dtype 40 | return Linear(in_features, out_features, bias=bias, device=device, dtype=dtype) 41 | 42 | torch.nn.Linear = linear_with_dtype 43 | try: 44 | yield 45 | finally: 46 | torch.nn.Linear = old_torch_nn_linear 47 | -------------------------------------------------------------------------------- /backend/headless/fcbh/options.py: -------------------------------------------------------------------------------- 1 | 2 | args_parsing = False 3 | 4 | def enable_args_parsing(enable=True): 5 | global args_parsing 6 | args_parsing = enable 7 | -------------------------------------------------------------------------------- /backend/headless/fcbh/sd1_clip_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "openai/clip-vit-large-patch14", 3 | "architectures": [ 4 | "CLIPTextModel" 5 | ], 6 | "attention_dropout": 0.0, 7 | "bos_token_id": 0, 8 | "dropout": 0.0, 9 | "eos_token_id": 2, 10 | "hidden_act": "quick_gelu", 11 | "hidden_size": 768, 12 | "initializer_factor": 1.0, 13 | "initializer_range": 0.02, 14 | "intermediate_size": 3072, 15 | "layer_norm_eps": 1e-05, 16 | "max_position_embeddings": 77, 17 | "model_type": "clip_text_model", 18 | "num_attention_heads": 12, 19 | "num_hidden_layers": 12, 20 | "pad_token_id": 1, 21 | "projection_dim": 768, 22 | "torch_dtype": "float32", 23 | "transformers_version": "4.24.0", 24 | "vocab_size": 49408 25 | } 26 | -------------------------------------------------------------------------------- /backend/headless/fcbh/sd1_tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": { 3 | "content": "<|startoftext|>", 4 | "lstrip": false, 5 | "normalized": true, 6 | "rstrip": false, 7 | "single_word": false 8 | }, 9 | "eos_token": { 10 | "content": "<|endoftext|>", 11 | "lstrip": false, 12 | "normalized": true, 13 | "rstrip": false, 14 | "single_word": false 15 | }, 16 | "pad_token": "<|endoftext|>", 17 | "unk_token": { 18 | "content": "<|endoftext|>", 19 | "lstrip": false, 20 | "normalized": true, 21 | "rstrip": false, 22 | "single_word": false 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /backend/headless/fcbh/sd1_tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_prefix_space": false, 3 | "bos_token": { 4 | "__type": "AddedToken", 5 | "content": "<|startoftext|>", 6 | "lstrip": false, 7 | "normalized": true, 8 | "rstrip": false, 9 | "single_word": false 10 | }, 11 | "do_lower_case": true, 12 | "eos_token": { 13 | "__type": "AddedToken", 14 | "content": "<|endoftext|>", 15 | "lstrip": false, 16 | "normalized": true, 17 | "rstrip": false, 18 | "single_word": false 19 | }, 20 | "errors": "replace", 21 | "model_max_length": 77, 22 | "name_or_path": "openai/clip-vit-large-patch14", 23 | "pad_token": "<|endoftext|>", 24 | "special_tokens_map_file": "./special_tokens_map.json", 25 | "tokenizer_class": "CLIPTokenizer", 26 | "unk_token": { 27 | "__type": "AddedToken", 28 | "content": "<|endoftext|>", 29 | "lstrip": false, 30 | "normalized": true, 31 | "rstrip": false, 32 | "single_word": false 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /backend/headless/fcbh/sd2_clip.py: -------------------------------------------------------------------------------- 1 | from fcbh import sd1_clip 2 | import torch 3 | import os 4 | 5 | class SD2ClipModel(sd1_clip.SD1ClipModel): 6 | def __init__(self, arch="ViT-H-14", device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, textmodel_path=None, dtype=None): 7 | if layer == "penultimate": 8 | layer="hidden" 9 | layer_idx=23 10 | 11 | textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd2_clip_config.json") 12 | super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, textmodel_path=textmodel_path, dtype=dtype) 13 | self.empty_tokens = [[49406] + [49407] + [0] * 75] 14 | 15 | class SD2Tokenizer(sd1_clip.SD1Tokenizer): 16 | def __init__(self, tokenizer_path=None, embedding_directory=None): 17 | super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1024) 18 | -------------------------------------------------------------------------------- /backend/headless/fcbh/sd2_clip_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "CLIPTextModel" 4 | ], 5 | "attention_dropout": 0.0, 6 | "bos_token_id": 0, 7 | "dropout": 0.0, 8 | "eos_token_id": 2, 9 | "hidden_act": "gelu", 10 | "hidden_size": 1024, 11 | "initializer_factor": 1.0, 12 | "initializer_range": 0.02, 13 | "intermediate_size": 4096, 14 | "layer_norm_eps": 1e-05, 15 | "max_position_embeddings": 77, 16 | "model_type": "clip_text_model", 17 | "num_attention_heads": 16, 18 | "num_hidden_layers": 24, 19 | "pad_token_id": 1, 20 | "projection_dim": 1024, 21 | "torch_dtype": "float32", 22 | "vocab_size": 49408 23 | } 24 | -------------------------------------------------------------------------------- /backend/headless/fcbh/sdxl_clip.py: -------------------------------------------------------------------------------- 1 | from fcbh import sd1_clip 2 | import torch 3 | import os 4 | 5 | class SDXLClipG(sd1_clip.SD1ClipModel): 6 | def __init__(self, device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, textmodel_path=None, dtype=None): 7 | if layer == "penultimate": 8 | layer="hidden" 9 | layer_idx=-2 10 | 11 | textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_config_bigg.json") 12 | super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, textmodel_path=textmodel_path, dtype=dtype) 13 | self.empty_tokens = [[49406] + [49407] + [0] * 75] 14 | self.layer_norm_hidden_state = False 15 | 16 | def load_sd(self, sd): 17 | return super().load_sd(sd) 18 | 19 | class SDXLClipGTokenizer(sd1_clip.SD1Tokenizer): 20 | def __init__(self, tokenizer_path=None, embedding_directory=None): 21 | super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1280, embedding_key='clip_g') 22 | 23 | 24 | class SDXLTokenizer(sd1_clip.SD1Tokenizer): 25 | def __init__(self, embedding_directory=None): 26 | self.clip_l = sd1_clip.SD1Tokenizer(embedding_directory=embedding_directory) 27 | self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory) 28 | 29 | def tokenize_with_weights(self, text:str, return_word_ids=False): 30 | out = {} 31 | out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) 32 | out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) 33 | return out 34 | 35 | def untokenize(self, token_weight_pair): 36 | return self.clip_g.untokenize(token_weight_pair) 37 | 38 | class SDXLClipModel(torch.nn.Module): 39 | def __init__(self, device="cpu", dtype=None): 40 | super().__init__() 41 | self.clip_l = sd1_clip.SD1ClipModel(layer="hidden", layer_idx=11, device=device, dtype=dtype) 42 | self.clip_l.layer_norm_hidden_state = False 43 | self.clip_g = SDXLClipG(device=device, dtype=dtype) 44 | 45 | def clip_layer(self, layer_idx): 46 | self.clip_l.clip_layer(layer_idx) 47 | self.clip_g.clip_layer(layer_idx) 48 | 49 | def reset_clip_layer(self): 50 | self.clip_g.reset_clip_layer() 51 | self.clip_l.reset_clip_layer() 52 | 53 | def encode_token_weights(self, token_weight_pairs): 54 | token_weight_pairs_g = token_weight_pairs["g"] 55 | token_weight_pairs_l = token_weight_pairs["l"] 56 | g_out, g_pooled = self.clip_g.encode_token_weights(token_weight_pairs_g) 57 | l_out, l_pooled = self.clip_l.encode_token_weights(token_weight_pairs_l) 58 | return torch.cat([l_out, g_out], dim=-1), g_pooled 59 | 60 | def load_sd(self, sd): 61 | if "text_model.encoder.layers.30.mlp.fc1.weight" in sd: 62 | return self.clip_g.load_sd(sd) 63 | else: 64 | return self.clip_l.load_sd(sd) 65 | 66 | class SDXLRefinerClipModel(torch.nn.Module): 67 | def __init__(self, device="cpu", dtype=None): 68 | super().__init__() 69 | self.clip_g = SDXLClipG(device=device, dtype=dtype) 70 | 71 | def clip_layer(self, layer_idx): 72 | self.clip_g.clip_layer(layer_idx) 73 | 74 | def reset_clip_layer(self): 75 | self.clip_g.reset_clip_layer() 76 | 77 | def encode_token_weights(self, token_weight_pairs): 78 | token_weight_pairs_g = token_weight_pairs["g"] 79 | g_out, g_pooled = self.clip_g.encode_token_weights(token_weight_pairs_g) 80 | return g_out, g_pooled 81 | 82 | def load_sd(self, sd): 83 | return self.clip_g.load_sd(sd) 84 | -------------------------------------------------------------------------------- /backend/headless/fcbh/supported_models_base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from . import model_base 3 | from . import utils 4 | from . import latent_formats 5 | 6 | class ClipTarget: 7 | def __init__(self, tokenizer, clip): 8 | self.clip = clip 9 | self.tokenizer = tokenizer 10 | self.params = {} 11 | 12 | class BASE: 13 | unet_config = {} 14 | unet_extra_config = { 15 | "num_heads": -1, 16 | "num_head_channels": 64, 17 | } 18 | 19 | clip_prefix = [] 20 | clip_vision_prefix = None 21 | noise_aug_config = None 22 | beta_schedule = "linear" 23 | latent_format = latent_formats.LatentFormat 24 | 25 | @classmethod 26 | def matches(s, unet_config): 27 | for k in s.unet_config: 28 | if s.unet_config[k] != unet_config[k]: 29 | return False 30 | return True 31 | 32 | def model_type(self, state_dict, prefix=""): 33 | return model_base.ModelType.EPS 34 | 35 | def inpaint_model(self): 36 | return self.unet_config["in_channels"] > 4 37 | 38 | def __init__(self, unet_config): 39 | self.unet_config = unet_config 40 | self.latent_format = self.latent_format() 41 | for x in self.unet_extra_config: 42 | self.unet_config[x] = self.unet_extra_config[x] 43 | 44 | def get_model(self, state_dict, prefix="", device=None): 45 | if self.noise_aug_config is not None: 46 | out = model_base.SD21UNCLIP(self, self.noise_aug_config, model_type=self.model_type(state_dict, prefix), device=device) 47 | else: 48 | out = model_base.BaseModel(self, model_type=self.model_type(state_dict, prefix), device=device) 49 | if self.inpaint_model(): 50 | out.set_inpaint() 51 | return out 52 | 53 | def process_clip_state_dict(self, state_dict): 54 | return state_dict 55 | 56 | def process_clip_state_dict_for_saving(self, state_dict): 57 | replace_prefix = {"": "cond_stage_model."} 58 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 59 | 60 | def process_unet_state_dict_for_saving(self, state_dict): 61 | replace_prefix = {"": "model.diffusion_model."} 62 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 63 | 64 | def process_vae_state_dict_for_saving(self, state_dict): 65 | replace_prefix = {"": "first_stage_model."} 66 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 67 | 68 | -------------------------------------------------------------------------------- /backend/headless/fcbh/taesd/taesd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Tiny AutoEncoder for Stable Diffusion 4 | (DNN for encoding / decoding SD's latent space) 5 | """ 6 | import torch 7 | import torch.nn as nn 8 | 9 | import fcbh.utils 10 | 11 | def conv(n_in, n_out, **kwargs): 12 | return nn.Conv2d(n_in, n_out, 3, padding=1, **kwargs) 13 | 14 | class Clamp(nn.Module): 15 | def forward(self, x): 16 | return torch.tanh(x / 3) * 3 17 | 18 | class Block(nn.Module): 19 | def __init__(self, n_in, n_out): 20 | super().__init__() 21 | self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out)) 22 | self.skip = nn.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity() 23 | self.fuse = nn.ReLU() 24 | def forward(self, x): 25 | return self.fuse(self.conv(x) + self.skip(x)) 26 | 27 | def Encoder(): 28 | return nn.Sequential( 29 | conv(3, 64), Block(64, 64), 30 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 31 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 32 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 33 | conv(64, 4), 34 | ) 35 | 36 | def Decoder(): 37 | return nn.Sequential( 38 | Clamp(), conv(4, 64), nn.ReLU(), 39 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 40 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 41 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 42 | Block(64, 64), conv(64, 3), 43 | ) 44 | 45 | class TAESD(nn.Module): 46 | latent_magnitude = 3 47 | latent_shift = 0.5 48 | 49 | def __init__(self, encoder_path="taesd_encoder.pth", decoder_path="taesd_decoder.pth"): 50 | """Initialize pretrained TAESD on the given device from the given checkpoints.""" 51 | super().__init__() 52 | self.encoder = Encoder() 53 | self.decoder = Decoder() 54 | if encoder_path is not None: 55 | self.encoder.load_state_dict(fcbh.utils.load_torch_file(encoder_path, safe_load=True)) 56 | if decoder_path is not None: 57 | self.decoder.load_state_dict(fcbh.utils.load_torch_file(decoder_path, safe_load=True)) 58 | 59 | @staticmethod 60 | def scale_latents(x): 61 | """raw latents -> [0, 1]""" 62 | return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1) 63 | 64 | @staticmethod 65 | def unscale_latents(x): 66 | """[0, 1] -> raw latents""" 67 | return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude) 68 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/backend/headless/fcbh_extras/chainner_models/__init__.py -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/LICENSE-HAT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Xiangyu Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/LICENSE-RealESRGAN: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, Xintao Wang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/OmniSR/ChannelAttention.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch.nn as nn 4 | 5 | 6 | class CA_layer(nn.Module): 7 | def __init__(self, channel, reduction=16): 8 | super(CA_layer, self).__init__() 9 | # global average pooling 10 | self.gap = nn.AdaptiveAvgPool2d(1) 11 | self.fc = nn.Sequential( 12 | nn.Conv2d(channel, channel // reduction, kernel_size=(1, 1), bias=False), 13 | nn.GELU(), 14 | nn.Conv2d(channel // reduction, channel, kernel_size=(1, 1), bias=False), 15 | # nn.Sigmoid() 16 | ) 17 | 18 | def forward(self, x): 19 | y = self.fc(self.gap(x)) 20 | return x * y.expand_as(x) 21 | 22 | 23 | class Simple_CA_layer(nn.Module): 24 | def __init__(self, channel): 25 | super(Simple_CA_layer, self).__init__() 26 | self.gap = nn.AdaptiveAvgPool2d(1) 27 | self.fc = nn.Conv2d( 28 | in_channels=channel, 29 | out_channels=channel, 30 | kernel_size=1, 31 | padding=0, 32 | stride=1, 33 | groups=1, 34 | bias=True, 35 | ) 36 | 37 | def forward(self, x): 38 | return x * self.fc(self.gap(x)) 39 | 40 | 41 | class ECA_layer(nn.Module): 42 | """Constructs a ECA module. 43 | Args: 44 | channel: Number of channels of the input feature map 45 | k_size: Adaptive selection of kernel size 46 | """ 47 | 48 | def __init__(self, channel): 49 | super(ECA_layer, self).__init__() 50 | 51 | b = 1 52 | gamma = 2 53 | k_size = int(abs(math.log(channel, 2) + b) / gamma) 54 | k_size = k_size if k_size % 2 else k_size + 1 55 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 56 | self.conv = nn.Conv1d( 57 | 1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False 58 | ) 59 | # self.sigmoid = nn.Sigmoid() 60 | 61 | def forward(self, x): 62 | # x: input features with shape [b, c, h, w] 63 | # b, c, h, w = x.size() 64 | 65 | # feature descriptor on the global spatial information 66 | y = self.avg_pool(x) 67 | 68 | # Two different branches of ECA module 69 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 70 | 71 | # Multi-scale information fusion 72 | # y = self.sigmoid(y) 73 | 74 | return x * y.expand_as(x) 75 | 76 | 77 | class ECA_MaxPool_layer(nn.Module): 78 | """Constructs a ECA module. 79 | Args: 80 | channel: Number of channels of the input feature map 81 | k_size: Adaptive selection of kernel size 82 | """ 83 | 84 | def __init__(self, channel): 85 | super(ECA_MaxPool_layer, self).__init__() 86 | 87 | b = 1 88 | gamma = 2 89 | k_size = int(abs(math.log(channel, 2) + b) / gamma) 90 | k_size = k_size if k_size % 2 else k_size + 1 91 | self.max_pool = nn.AdaptiveMaxPool2d(1) 92 | self.conv = nn.Conv1d( 93 | 1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False 94 | ) 95 | # self.sigmoid = nn.Sigmoid() 96 | 97 | def forward(self, x): 98 | # x: input features with shape [b, c, h, w] 99 | # b, c, h, w = x.size() 100 | 101 | # feature descriptor on the global spatial information 102 | y = self.max_pool(x) 103 | 104 | # Two different branches of ECA module 105 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 106 | 107 | # Multi-scale information fusion 108 | # y = self.sigmoid(y) 109 | 110 | return x * y.expand_as(x) 111 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/OmniSR/OSAG.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: OSAG.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Sunday, 23rd April 2023 3:08:49 pm 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | 14 | import torch.nn as nn 15 | 16 | from .esa import ESA 17 | from .OSA import OSA_Block 18 | 19 | 20 | class OSAG(nn.Module): 21 | def __init__( 22 | self, 23 | channel_num=64, 24 | bias=True, 25 | block_num=4, 26 | ffn_bias=False, 27 | window_size=0, 28 | pe=False, 29 | ): 30 | super(OSAG, self).__init__() 31 | 32 | # print("window_size: %d" % (window_size)) 33 | # print("with_pe", pe) 34 | # print("ffn_bias: %d" % (ffn_bias)) 35 | 36 | # block_script_name = kwargs.get("block_script_name", "OSA") 37 | # block_class_name = kwargs.get("block_class_name", "OSA_Block") 38 | 39 | # script_name = "." + block_script_name 40 | # package = __import__(script_name, fromlist=True) 41 | block_class = OSA_Block # getattr(package, block_class_name) 42 | group_list = [] 43 | for _ in range(block_num): 44 | temp_res = block_class( 45 | channel_num, 46 | bias, 47 | ffn_bias=ffn_bias, 48 | window_size=window_size, 49 | with_pe=pe, 50 | ) 51 | group_list.append(temp_res) 52 | group_list.append(nn.Conv2d(channel_num, channel_num, 1, 1, 0, bias=bias)) 53 | self.residual_layer = nn.Sequential(*group_list) 54 | esa_channel = max(channel_num // 4, 16) 55 | self.esa = ESA(esa_channel, channel_num) 56 | 57 | def forward(self, x): 58 | out = self.residual_layer(x) 59 | out = out + x 60 | return self.esa(out) 61 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/OmniSR/layernorm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: layernorm.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Thursday, 20th April 2023 9:28:20 am 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | 17 | class LayerNormFunction(torch.autograd.Function): 18 | @staticmethod 19 | def forward(ctx, x, weight, bias, eps): 20 | ctx.eps = eps 21 | N, C, H, W = x.size() 22 | mu = x.mean(1, keepdim=True) 23 | var = (x - mu).pow(2).mean(1, keepdim=True) 24 | y = (x - mu) / (var + eps).sqrt() 25 | ctx.save_for_backward(y, var, weight) 26 | y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1) 27 | return y 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | eps = ctx.eps 32 | 33 | N, C, H, W = grad_output.size() 34 | y, var, weight = ctx.saved_variables 35 | g = grad_output * weight.view(1, C, 1, 1) 36 | mean_g = g.mean(dim=1, keepdim=True) 37 | 38 | mean_gy = (g * y).mean(dim=1, keepdim=True) 39 | gx = 1.0 / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g) 40 | return ( 41 | gx, 42 | (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), 43 | grad_output.sum(dim=3).sum(dim=2).sum(dim=0), 44 | None, 45 | ) 46 | 47 | 48 | class LayerNorm2d(nn.Module): 49 | def __init__(self, channels, eps=1e-6): 50 | super(LayerNorm2d, self).__init__() 51 | self.register_parameter("weight", nn.Parameter(torch.ones(channels))) 52 | self.register_parameter("bias", nn.Parameter(torch.zeros(channels))) 53 | self.eps = eps 54 | 55 | def forward(self, x): 56 | return LayerNormFunction.apply(x, self.weight, self.bias, self.eps) 57 | 58 | 59 | class GRN(nn.Module): 60 | """GRN (Global Response Normalization) layer""" 61 | 62 | def __init__(self, dim): 63 | super().__init__() 64 | self.gamma = nn.Parameter(torch.zeros(1, dim, 1, 1)) 65 | self.beta = nn.Parameter(torch.zeros(1, dim, 1, 1)) 66 | 67 | def forward(self, x): 68 | Gx = torch.norm(x, p=2, dim=(2, 3), keepdim=True) 69 | Nx = Gx / (Gx.mean(dim=1, keepdim=True) + 1e-6) 70 | return self.gamma * (x * Nx) + self.beta + x 71 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/OmniSR/pixelshuffle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: pixelshuffle.py 5 | # Created Date: Friday July 1st 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Friday, 1st July 2022 10:18:39 am 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2022 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import torch.nn as nn 14 | 15 | 16 | def pixelshuffle_block( 17 | in_channels, out_channels, upscale_factor=2, kernel_size=3, bias=False 18 | ): 19 | """ 20 | Upsample features according to `upscale_factor`. 21 | """ 22 | padding = kernel_size // 2 23 | conv = nn.Conv2d( 24 | in_channels, 25 | out_channels * (upscale_factor**2), 26 | kernel_size, 27 | padding=1, 28 | bias=bias, 29 | ) 30 | pixel_shuffle = nn.PixelShuffle(upscale_factor) 31 | return nn.Sequential(*[conv, pixel_shuffle]) 32 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/backend/headless/fcbh_extras/chainner_models/architecture/__init__.py -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/face/LICENSE-codeformer: -------------------------------------------------------------------------------- 1 | S-Lab License 1.0 2 | 3 | Copyright 2022 S-Lab 4 | 5 | Redistribution and use for non-commercial purpose in source and 6 | binary forms, with or without modification, are permitted provided 7 | that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in 14 | the documentation and/or other materials provided with the 15 | distribution. 16 | 17 | 3. Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | In the event that redistribution and/or use for commercial purpose in 34 | source or binary forms, with or without modification is required, 35 | please contact the contributor(s) of the work. 36 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/face/fused_act.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | # type: ignore 3 | # modify from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501 4 | 5 | import torch 6 | from torch import nn 7 | from torch.autograd import Function 8 | 9 | fused_act_ext = None 10 | 11 | 12 | class FusedLeakyReLUFunctionBackward(Function): 13 | @staticmethod 14 | def forward(ctx, grad_output, out, negative_slope, scale): 15 | ctx.save_for_backward(out) 16 | ctx.negative_slope = negative_slope 17 | ctx.scale = scale 18 | 19 | empty = grad_output.new_empty(0) 20 | 21 | grad_input = fused_act_ext.fused_bias_act( 22 | grad_output, empty, out, 3, 1, negative_slope, scale 23 | ) 24 | 25 | dim = [0] 26 | 27 | if grad_input.ndim > 2: 28 | dim += list(range(2, grad_input.ndim)) 29 | 30 | grad_bias = grad_input.sum(dim).detach() 31 | 32 | return grad_input, grad_bias 33 | 34 | @staticmethod 35 | def backward(ctx, gradgrad_input, gradgrad_bias): 36 | (out,) = ctx.saved_tensors 37 | gradgrad_out = fused_act_ext.fused_bias_act( 38 | gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale 39 | ) 40 | 41 | return gradgrad_out, None, None, None 42 | 43 | 44 | class FusedLeakyReLUFunction(Function): 45 | @staticmethod 46 | def forward(ctx, input, bias, negative_slope, scale): 47 | empty = input.new_empty(0) 48 | out = fused_act_ext.fused_bias_act( 49 | input, bias, empty, 3, 0, negative_slope, scale 50 | ) 51 | ctx.save_for_backward(out) 52 | ctx.negative_slope = negative_slope 53 | ctx.scale = scale 54 | 55 | return out 56 | 57 | @staticmethod 58 | def backward(ctx, grad_output): 59 | (out,) = ctx.saved_tensors 60 | 61 | grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply( 62 | grad_output, out, ctx.negative_slope, ctx.scale 63 | ) 64 | 65 | return grad_input, grad_bias, None, None 66 | 67 | 68 | class FusedLeakyReLU(nn.Module): 69 | def __init__(self, channel, negative_slope=0.2, scale=2**0.5): 70 | super().__init__() 71 | 72 | self.bias = nn.Parameter(torch.zeros(channel)) 73 | self.negative_slope = negative_slope 74 | self.scale = scale 75 | 76 | def forward(self, input): 77 | return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale) 78 | 79 | 80 | def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2**0.5): 81 | return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale) 82 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/timm/helpers.py: -------------------------------------------------------------------------------- 1 | """ Layer/Module Helpers 2 | Hacked together by / Copyright 2020 Ross Wightman 3 | """ 4 | import collections.abc 5 | from itertools import repeat 6 | 7 | 8 | # From PyTorch internals 9 | def _ntuple(n): 10 | def parse(x): 11 | if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): 12 | return x 13 | return tuple(repeat(x, n)) 14 | 15 | return parse 16 | 17 | 18 | to_1tuple = _ntuple(1) 19 | to_2tuple = _ntuple(2) 20 | to_3tuple = _ntuple(3) 21 | to_4tuple = _ntuple(4) 22 | to_ntuple = _ntuple 23 | 24 | 25 | def make_divisible(v, divisor=8, min_value=None, round_limit=0.9): 26 | min_value = min_value or divisor 27 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 28 | # Make sure that round down does not go down by more than 10%. 29 | if new_v < round_limit * v: 30 | new_v += divisor 31 | return new_v 32 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/model_loading.py: -------------------------------------------------------------------------------- 1 | import logging as logger 2 | 3 | from .architecture.DAT import DAT 4 | from .architecture.face.codeformer import CodeFormer 5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean 6 | from .architecture.face.restoreformer_arch import RestoreFormer 7 | from .architecture.HAT import HAT 8 | from .architecture.LaMa import LaMa 9 | from .architecture.OmniSR.OmniSR import OmniSR 10 | from .architecture.RRDB import RRDBNet as ESRGAN 11 | from .architecture.SCUNet import SCUNet 12 | from .architecture.SPSR import SPSRNet as SPSR 13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2 14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN 15 | from .architecture.Swin2SR import Swin2SR 16 | from .architecture.SwinIR import SwinIR 17 | from .types import PyTorchModel 18 | 19 | 20 | class UnsupportedModel(Exception): 21 | pass 22 | 23 | 24 | def load_state_dict(state_dict) -> PyTorchModel: 25 | logger.debug(f"Loading state dict into pytorch model arch") 26 | 27 | state_dict_keys = list(state_dict.keys()) 28 | 29 | if "params_ema" in state_dict_keys: 30 | state_dict = state_dict["params_ema"] 31 | elif "params-ema" in state_dict_keys: 32 | state_dict = state_dict["params-ema"] 33 | elif "params" in state_dict_keys: 34 | state_dict = state_dict["params"] 35 | 36 | state_dict_keys = list(state_dict.keys()) 37 | # SRVGGNet Real-ESRGAN (v2) 38 | if "body.0.weight" in state_dict_keys and "body.1.weight" in state_dict_keys: 39 | model = RealESRGANv2(state_dict) 40 | # SPSR (ESRGAN with lots of extra layers) 41 | elif "f_HR_conv1.0.weight" in state_dict: 42 | model = SPSR(state_dict) 43 | # Swift-SRGAN 44 | elif ( 45 | "model" in state_dict_keys 46 | and "initial.cnn.depthwise.weight" in state_dict["model"].keys() 47 | ): 48 | model = SwiftSRGAN(state_dict) 49 | # SwinIR, Swin2SR, HAT 50 | elif "layers.0.residual_group.blocks.0.norm1.weight" in state_dict_keys: 51 | if ( 52 | "layers.0.residual_group.blocks.0.conv_block.cab.0.weight" 53 | in state_dict_keys 54 | ): 55 | model = HAT(state_dict) 56 | elif "patch_embed.proj.weight" in state_dict_keys: 57 | model = Swin2SR(state_dict) 58 | else: 59 | model = SwinIR(state_dict) 60 | # GFPGAN 61 | elif ( 62 | "toRGB.0.weight" in state_dict_keys 63 | and "stylegan_decoder.style_mlp.1.weight" in state_dict_keys 64 | ): 65 | model = GFPGANv1Clean(state_dict) 66 | # RestoreFormer 67 | elif ( 68 | "encoder.conv_in.weight" in state_dict_keys 69 | and "encoder.down.0.block.0.norm1.weight" in state_dict_keys 70 | ): 71 | model = RestoreFormer(state_dict) 72 | elif ( 73 | "encoder.blocks.0.weight" in state_dict_keys 74 | and "quantize.embedding.weight" in state_dict_keys 75 | ): 76 | model = CodeFormer(state_dict) 77 | # LaMa 78 | elif ( 79 | "model.model.1.bn_l.running_mean" in state_dict_keys 80 | or "generator.model.1.bn_l.running_mean" in state_dict_keys 81 | ): 82 | model = LaMa(state_dict) 83 | # Omni-SR 84 | elif "residual_layer.0.residual_layer.0.layer.0.fn.0.weight" in state_dict_keys: 85 | model = OmniSR(state_dict) 86 | # SCUNet 87 | elif "m_head.0.weight" in state_dict_keys and "m_tail.0.weight" in state_dict_keys: 88 | model = SCUNet(state_dict) 89 | # DAT 90 | elif "layers.0.blocks.2.attn.attn_mask_0" in state_dict_keys: 91 | model = DAT(state_dict) 92 | # Regular ESRGAN, "new-arch" ESRGAN, Real-ESRGAN v1 93 | else: 94 | try: 95 | model = ESRGAN(state_dict) 96 | except: 97 | # pylint: disable=raise-missing-from 98 | raise UnsupportedModel 99 | return model 100 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/types.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from .architecture.DAT import DAT 4 | from .architecture.face.codeformer import CodeFormer 5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean 6 | from .architecture.face.restoreformer_arch import RestoreFormer 7 | from .architecture.HAT import HAT 8 | from .architecture.LaMa import LaMa 9 | from .architecture.OmniSR.OmniSR import OmniSR 10 | from .architecture.RRDB import RRDBNet as ESRGAN 11 | from .architecture.SCUNet import SCUNet 12 | from .architecture.SPSR import SPSRNet as SPSR 13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2 14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN 15 | from .architecture.Swin2SR import Swin2SR 16 | from .architecture.SwinIR import SwinIR 17 | 18 | PyTorchSRModels = ( 19 | RealESRGANv2, 20 | SPSR, 21 | SwiftSRGAN, 22 | ESRGAN, 23 | SwinIR, 24 | Swin2SR, 25 | HAT, 26 | OmniSR, 27 | SCUNet, 28 | DAT, 29 | ) 30 | PyTorchSRModel = Union[ 31 | RealESRGANv2, 32 | SPSR, 33 | SwiftSRGAN, 34 | ESRGAN, 35 | SwinIR, 36 | Swin2SR, 37 | HAT, 38 | OmniSR, 39 | SCUNet, 40 | DAT, 41 | ] 42 | 43 | 44 | def is_pytorch_sr_model(model: object): 45 | return isinstance(model, PyTorchSRModels) 46 | 47 | 48 | PyTorchFaceModels = (GFPGANv1Clean, RestoreFormer, CodeFormer) 49 | PyTorchFaceModel = Union[GFPGANv1Clean, RestoreFormer, CodeFormer] 50 | 51 | 52 | def is_pytorch_face_model(model: object): 53 | return isinstance(model, PyTorchFaceModels) 54 | 55 | 56 | PyTorchInpaintModels = (LaMa,) 57 | PyTorchInpaintModel = Union[LaMa] 58 | 59 | 60 | def is_pytorch_inpaint_model(model: object): 61 | return isinstance(model, PyTorchInpaintModels) 62 | 63 | 64 | PyTorchModels = (*PyTorchSRModels, *PyTorchFaceModels, *PyTorchInpaintModels) 65 | PyTorchModel = Union[PyTorchSRModel, PyTorchFaceModel, PyTorchInpaintModel] 66 | 67 | 68 | def is_pytorch_model(model: object): 69 | return isinstance(model, PyTorchModels) 70 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/nodes_clip_sdxl.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from nodes import MAX_RESOLUTION 3 | 4 | class CLIPTextEncodeSDXLRefiner: 5 | @classmethod 6 | def INPUT_TYPES(s): 7 | return {"required": { 8 | "ascore": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 1000.0, "step": 0.01}), 9 | "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 10 | "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 11 | "text": ("STRING", {"multiline": True}), "clip": ("CLIP", ), 12 | }} 13 | RETURN_TYPES = ("CONDITIONING",) 14 | FUNCTION = "encode" 15 | 16 | CATEGORY = "advanced/conditioning" 17 | 18 | def encode(self, clip, ascore, width, height, text): 19 | tokens = clip.tokenize(text) 20 | cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) 21 | return ([[cond, {"pooled_output": pooled, "aesthetic_score": ascore, "width": width,"height": height}]], ) 22 | 23 | class CLIPTextEncodeSDXL: 24 | @classmethod 25 | def INPUT_TYPES(s): 26 | return {"required": { 27 | "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 28 | "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 29 | "crop_w": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), 30 | "crop_h": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), 31 | "target_width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 32 | "target_height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 33 | "text_g": ("STRING", {"multiline": True, "default": "CLIP_G"}), "clip": ("CLIP", ), 34 | "text_l": ("STRING", {"multiline": True, "default": "CLIP_L"}), "clip": ("CLIP", ), 35 | }} 36 | RETURN_TYPES = ("CONDITIONING",) 37 | FUNCTION = "encode" 38 | 39 | CATEGORY = "advanced/conditioning" 40 | 41 | def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l): 42 | tokens = clip.tokenize(text_g) 43 | tokens["l"] = clip.tokenize(text_l)["l"] 44 | if len(tokens["l"]) != len(tokens["g"]): 45 | empty = clip.tokenize("") 46 | while len(tokens["l"]) < len(tokens["g"]): 47 | tokens["l"] += empty["l"] 48 | while len(tokens["l"]) > len(tokens["g"]): 49 | tokens["g"] += empty["g"] 50 | cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) 51 | return ([[cond, {"pooled_output": pooled, "width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}]], ) 52 | 53 | NODE_CLASS_MAPPINGS = { 54 | "CLIPTextEncodeSDXLRefiner": CLIPTextEncodeSDXLRefiner, 55 | "CLIPTextEncodeSDXL": CLIPTextEncodeSDXL, 56 | } 57 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/nodes_latent.py: -------------------------------------------------------------------------------- 1 | import fcbh.utils 2 | 3 | def reshape_latent_to(target_shape, latent): 4 | if latent.shape[1:] != target_shape[1:]: 5 | latent.movedim(1, -1) 6 | latent = fcbh.utils.common_upscale(latent, target_shape[3], target_shape[2], "bilinear", "center") 7 | latent.movedim(-1, 1) 8 | return fcbh.utils.repeat_to_batch_size(latent, target_shape[0]) 9 | 10 | 11 | class LatentAdd: 12 | @classmethod 13 | def INPUT_TYPES(s): 14 | return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} 15 | 16 | RETURN_TYPES = ("LATENT",) 17 | FUNCTION = "op" 18 | 19 | CATEGORY = "latent/advanced" 20 | 21 | def op(self, samples1, samples2): 22 | samples_out = samples1.copy() 23 | 24 | s1 = samples1["samples"] 25 | s2 = samples2["samples"] 26 | 27 | s2 = reshape_latent_to(s1.shape, s2) 28 | samples_out["samples"] = s1 + s2 29 | return (samples_out,) 30 | 31 | class LatentSubtract: 32 | @classmethod 33 | def INPUT_TYPES(s): 34 | return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} 35 | 36 | RETURN_TYPES = ("LATENT",) 37 | FUNCTION = "op" 38 | 39 | CATEGORY = "latent/advanced" 40 | 41 | def op(self, samples1, samples2): 42 | samples_out = samples1.copy() 43 | 44 | s1 = samples1["samples"] 45 | s2 = samples2["samples"] 46 | 47 | s2 = reshape_latent_to(s1.shape, s2) 48 | samples_out["samples"] = s1 - s2 49 | return (samples_out,) 50 | 51 | class LatentMultiply: 52 | @classmethod 53 | def INPUT_TYPES(s): 54 | return {"required": { "samples": ("LATENT",), 55 | "multiplier": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), 56 | }} 57 | 58 | RETURN_TYPES = ("LATENT",) 59 | FUNCTION = "op" 60 | 61 | CATEGORY = "latent/advanced" 62 | 63 | def op(self, samples, multiplier): 64 | samples_out = samples.copy() 65 | 66 | s1 = samples["samples"] 67 | samples_out["samples"] = s1 * multiplier 68 | return (samples_out,) 69 | 70 | NODE_CLASS_MAPPINGS = { 71 | "LatentAdd": LatentAdd, 72 | "LatentSubtract": LatentSubtract, 73 | "LatentMultiply": LatentMultiply, 74 | } 75 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/nodes_upscale_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | from fcbh_extras.chainner_models import model_loading 3 | from fcbh import model_management 4 | import torch 5 | import fcbh.utils 6 | import folder_paths 7 | 8 | class UpscaleModelLoader: 9 | @classmethod 10 | def INPUT_TYPES(s): 11 | return {"required": { "model_name": (folder_paths.get_filename_list("upscale_models"), ), 12 | }} 13 | RETURN_TYPES = ("UPSCALE_MODEL",) 14 | FUNCTION = "load_model" 15 | 16 | CATEGORY = "loaders" 17 | 18 | def load_model(self, model_name): 19 | model_path = folder_paths.get_full_path("upscale_models", model_name) 20 | sd = fcbh.utils.load_torch_file(model_path, safe_load=True) 21 | if "module.layers.0.residual_group.blocks.0.norm1.weight" in sd: 22 | sd = fcbh.utils.state_dict_prefix_replace(sd, {"module.":""}) 23 | out = model_loading.load_state_dict(sd).eval() 24 | return (out, ) 25 | 26 | 27 | class ImageUpscaleWithModel: 28 | @classmethod 29 | def INPUT_TYPES(s): 30 | return {"required": { "upscale_model": ("UPSCALE_MODEL",), 31 | "image": ("IMAGE",), 32 | }} 33 | RETURN_TYPES = ("IMAGE",) 34 | FUNCTION = "upscale" 35 | 36 | CATEGORY = "image/upscaling" 37 | 38 | def upscale(self, upscale_model, image): 39 | device = model_management.get_torch_device() 40 | upscale_model.to(device) 41 | in_img = image.movedim(-1,-3).to(device) 42 | free_memory = model_management.get_free_memory(device) 43 | 44 | tile = 512 45 | overlap = 32 46 | 47 | oom = True 48 | while oom: 49 | try: 50 | steps = in_img.shape[0] * fcbh.utils.get_tiled_scale_steps(in_img.shape[3], in_img.shape[2], tile_x=tile, tile_y=tile, overlap=overlap) 51 | pbar = fcbh.utils.ProgressBar(steps) 52 | s = fcbh.utils.tiled_scale(in_img, lambda a: upscale_model(a), tile_x=tile, tile_y=tile, overlap=overlap, upscale_amount=upscale_model.scale, pbar=pbar) 53 | oom = False 54 | except model_management.OOM_EXCEPTION as e: 55 | tile //= 2 56 | if tile < 128: 57 | raise e 58 | 59 | upscale_model.cpu() 60 | s = torch.clamp(s.movedim(-3,-1), min=0, max=1.0) 61 | return (s,) 62 | 63 | NODE_CLASS_MAPPINGS = { 64 | "UpscaleModelLoader": UpscaleModelLoader, 65 | "ImageUpscaleWithModel": ImageUpscaleWithModel 66 | } 67 | -------------------------------------------------------------------------------- /build_launcher.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | win32_root = os.path.dirname(os.path.dirname(__file__)) 4 | python_embeded_path = os.path.join(win32_root, 'python_embeded') 5 | 6 | is_win32_standalone_build = os.path.exists(python_embeded_path) and os.path.isdir(python_embeded_path) 7 | 8 | win32_cmd = ''' 9 | .\python_embeded\python.exe -s Fooocus\entry_with_update.py {cmds} %* 10 | pause 11 | ''' 12 | 13 | 14 | def build_launcher(): 15 | if not is_win32_standalone_build: 16 | return 17 | 18 | presets = [None, 'anime', 'realistic'] 19 | 20 | for preset in presets: 21 | win32_cmd_preset = win32_cmd.replace('{cmds}', '' if preset is None else f'--preset {preset}') 22 | bat_path = os.path.join(win32_root, 'run.bat' if preset is None else f'run_{preset}.bat') 23 | if not os.path.exists(bat_path): 24 | with open(bat_path, "w", encoding="utf-8") as f: 25 | f.write(win32_cmd_preset) 26 | return 27 | -------------------------------------------------------------------------------- /colab_fix.txt: -------------------------------------------------------------------------------- 1 | {"default_refiner": ""} -------------------------------------------------------------------------------- /css/style.css: -------------------------------------------------------------------------------- 1 | /* based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/style.css */ 2 | 3 | #context-menu{ 4 | z-index:9999; 5 | position:absolute; 6 | display:block; 7 | padding:0px 0; 8 | border:2px solid #a55000; 9 | border-radius:8px; 10 | box-shadow:1px 1px 2px #CE6400; 11 | width: 200px; 12 | } 13 | 14 | .context-menu-items{ 15 | list-style: none; 16 | margin: 0; 17 | padding: 0; 18 | } 19 | 20 | .context-menu-items a{ 21 | display:block; 22 | padding:5px; 23 | cursor:pointer; 24 | } 25 | 26 | .context-menu-items a:hover{ 27 | background: #a55000; 28 | } 29 | -------------------------------------------------------------------------------- /entry_with_update.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | root = os.path.dirname(os.path.abspath(__file__)) 6 | sys.path.append(root) 7 | os.chdir(root) 8 | always_update = True 9 | 10 | if(always_update): 11 | try: 12 | import pygit2 13 | pygit2.option(pygit2.GIT_OPT_SET_OWNER_VALIDATION, 0) 14 | 15 | repo = pygit2.Repository(os.path.abspath(os.path.dirname(__file__))) 16 | 17 | branch_name = repo.head.shorthand 18 | 19 | remote_name = 'origin' 20 | remote = repo.remotes[remote_name] 21 | 22 | remote.fetch() 23 | 24 | local_branch_ref = f'refs/heads/{branch_name}' 25 | local_branch = repo.lookup_reference(local_branch_ref) 26 | 27 | remote_reference = f'refs/remotes/{remote_name}/{branch_name}' 28 | remote_commit = repo.revparse_single(remote_reference) 29 | 30 | merge_result, _ = repo.merge_analysis(remote_commit.id) 31 | 32 | if merge_result & pygit2.GIT_MERGE_ANALYSIS_UP_TO_DATE: 33 | print("Already up-to-date") 34 | elif merge_result & pygit2.GIT_MERGE_ANALYSIS_FASTFORWARD: 35 | local_branch.set_target(remote_commit.id) 36 | repo.head.set_target(remote_commit.id) 37 | repo.checkout_tree(repo.get(remote_commit.id)) 38 | repo.reset(local_branch.target, pygit2.GIT_RESET_HARD) 39 | print("Fast-forward merge") 40 | elif merge_result & pygit2.GIT_MERGE_ANALYSIS_NORMAL: 41 | print("Update failed - Did you modified any file?") 42 | except Exception as e: 43 | print('Update failed.') 44 | print(str(e)) 45 | 46 | print('Update succeeded.') 47 | from launch import * 48 | -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: fooocusControl 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.10 6 | - pip=23.0 7 | - packaging 8 | -------------------------------------------------------------------------------- /fooocusControl_colab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "initial_id", 7 | "metadata": { 8 | "collapsed": true 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "!pip install pygit2==1.12.2\n", 13 | "%cd /content\n", 14 | "!git clone https://github.com/fenneishi/Fooocus-Control.git\n", 15 | "%cd /content/Fooocus\n", 16 | "!cp colab_fix.txt user_path_config.txt\n", 17 | "# for FooocusControl(follow Fooocus) Realistic Edition.\n", 18 | "!python entry_with_update.py --preset realistic --share \n", 19 | "# for FooocusControl(follow Fooocus) Anime Edition.\n", 20 | "# !python entry_with_update.py --preset anime --share " 21 | ] 22 | } 23 | ], 24 | "metadata": { 25 | "kernelspec": { 26 | "display_name": "Python 3", 27 | "language": "python", 28 | "name": "python3" 29 | }, 30 | "language_info": { 31 | "codemirror_mode": { 32 | "name": "ipython", 33 | "version": 2 34 | }, 35 | "file_extension": ".py", 36 | "mimetype": "text/x-python", 37 | "name": "python", 38 | "nbconvert_exporter": "python", 39 | "pygments_lexer": "ipython2", 40 | "version": "2.7.6" 41 | } 42 | }, 43 | "nbformat": 4, 44 | "nbformat_minor": 5 45 | } 46 | -------------------------------------------------------------------------------- /fooocus_colab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "id": "VjYy0F2gZIPR" 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "!pip install pygit2==1.12.2\n", 12 | "%cd /content\n", 13 | "!git clone https://github.com/lllyasviel/Fooocus\n", 14 | "%cd /content/Fooocus\n", 15 | "!cp colab_fix.txt user_path_config.txt\n", 16 | "!python entry_with_update.py --share\n" 17 | ] 18 | } 19 | ], 20 | "metadata": { 21 | "accelerator": "GPU", 22 | "colab": { 23 | "gpuType": "T4", 24 | "provenance": [] 25 | }, 26 | "kernelspec": { 27 | "display_name": "Python 3", 28 | "name": "python3" 29 | }, 30 | "language_info": { 31 | "name": "python" 32 | } 33 | }, 34 | "nbformat": 4, 35 | "nbformat_minor": 0 36 | } 37 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/CPDS/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | from einops import rearrange 4 | import numpy as np 5 | import cv2 6 | 7 | 8 | class CPDS: 9 | # remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/ZoeD_M12_N.pt" 10 | # modelpath = os.path.join(annotator_ckpts_path, "ZoeD_M12_N.pt") 11 | def __init__(self, model_path): 12 | self.model = None 13 | 14 | @classmethod 15 | def norm255(cls, GrayImage, low=4, high=96): 16 | assert isinstance(GrayImage, np.ndarray) 17 | assert GrayImage.ndim == 2 and GrayImage.dtype == np.float32 18 | 19 | v_min = np.percentile(GrayImage, low) 20 | v_max = np.percentile(GrayImage, high) 21 | 22 | if np.allclose(v_min, v_max): 23 | GrayImage = GrayImage * 0 # Avoid 0-division 24 | else: 25 | GrayImage = (GrayImage - v_min) / (v_max - v_min) 26 | 27 | GrayImage -= v_min 28 | GrayImage /= v_max - v_min 29 | return GrayImage * 255.0 30 | 31 | def __call__(self, RGB): 32 | assert RGB.ndim == 3 33 | with torch.no_grad(): 34 | # cv2.decolor is not "decolor", it is Cewu Lu's method 35 | # See http://www.cse.cuhk.edu.hk/leojia/projects/color2gray/index.html 36 | # See https://docs.opencv.org/3.0-beta/modules/photo/doc/decolor.html 37 | 38 | raw = cv2.GaussianBlur(RGB, (0, 0), 0.8) 39 | density, boost = cv2.decolor(raw) 40 | 41 | raw = raw.astype(np.float32) 42 | density = density.astype(np.float32) 43 | boost = boost.astype(np.float32) 44 | 45 | offset = np.sum((raw - boost) ** 2.0, axis=2) ** 0.5 46 | result = density + offset 47 | 48 | return self.norm255(result, low=4, high=96).clip(0, 255).astype(np.uint8) 49 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/OpenPose/hand.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import json 3 | import numpy as np 4 | import math 5 | import time 6 | from scipy.ndimage.filters import gaussian_filter 7 | import matplotlib.pyplot as plt 8 | import matplotlib 9 | import torch 10 | from skimage.measure import label 11 | 12 | from .model import handpose_model 13 | from . import util 14 | 15 | class Hand(object): 16 | def __init__(self, model_path): 17 | self.model = handpose_model() 18 | if torch.cuda.is_available(): 19 | self.model = self.model.cuda() 20 | print('cuda') 21 | model_dict = util.transfer(self.model, torch.load(model_path)) 22 | self.model.load_state_dict(model_dict) 23 | self.model.eval() 24 | 25 | def __call__(self, oriImgRaw): 26 | scale_search = [0.5, 1.0, 1.5, 2.0] 27 | # scale_search = [0.5] 28 | boxsize = 368 29 | stride = 8 30 | padValue = 128 31 | thre = 0.05 32 | multiplier = [x * boxsize for x in scale_search] 33 | 34 | wsize = 128 35 | heatmap_avg = np.zeros((wsize, wsize, 22)) 36 | 37 | Hr, Wr, Cr = oriImgRaw.shape 38 | 39 | oriImg = cv2.GaussianBlur(oriImgRaw, (0, 0), 0.8) 40 | 41 | for m in range(len(multiplier)): 42 | scale = multiplier[m] 43 | imageToTest = util.smart_resize(oriImg, (scale, scale)) 44 | 45 | imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue) 46 | im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5 47 | im = np.ascontiguousarray(im) 48 | 49 | data = torch.from_numpy(im).float() 50 | if torch.cuda.is_available(): 51 | data = data.cuda() 52 | 53 | with torch.no_grad(): 54 | output = self.model(data).cpu().numpy() 55 | 56 | # extract outputs, resize, and remove padding 57 | heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps 58 | heatmap = util.smart_resize_k(heatmap, fx=stride, fy=stride) 59 | heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] 60 | heatmap = util.smart_resize(heatmap, (wsize, wsize)) 61 | 62 | heatmap_avg += heatmap / len(multiplier) 63 | 64 | all_peaks = [] 65 | for part in range(21): 66 | map_ori = heatmap_avg[:, :, part] 67 | one_heatmap = gaussian_filter(map_ori, sigma=3) 68 | binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8) 69 | 70 | if np.sum(binary) == 0: 71 | all_peaks.append([0, 0]) 72 | continue 73 | label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim) 74 | max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1 75 | label_img[label_img != max_index] = 0 76 | map_ori[label_img == 0] = 0 77 | 78 | y, x = util.npmax(map_ori) 79 | y = int(float(y) * float(Hr) / float(wsize)) 80 | x = int(float(x) * float(Wr) / float(wsize)) 81 | all_peaks.append([x, y]) 82 | return np.array(all_peaks) 83 | 84 | if __name__ == "__main__": 85 | hand_estimation = Hand('../model/hand_pose_model.pth') 86 | 87 | # test_image = '../images/hand.jpg' 88 | test_image = '../images/hand.jpg' 89 | oriImg = cv2.imread(test_image) # B,G,R order 90 | peaks = hand_estimation(oriImg) 91 | canvas = util.draw_handpose(oriImg, peaks, True) 92 | cv2.imshow('', canvas) 93 | cv2.waitKey(0) -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/PyramidCanny/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import modules.advanced_parameters as advanced_parameters 4 | from fooocus_extras.controlnet_preprocess_model.ZeoDepth import ZoeDetector 5 | 6 | 7 | class PyramidCanny: 8 | # remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/ZoeD_M12_N.pt" 9 | # modelpath = os.path.join(annotator_ckpts_path, "ZoeD_M12_N.pt") 10 | def __init__(self, model_path): 11 | self.model = None 12 | 13 | @staticmethod 14 | def centered_canny(x: np.ndarray): 15 | assert isinstance(x, np.ndarray) 16 | assert x.ndim == 2 and x.dtype == np.uint8 17 | 18 | y = cv2.Canny(x, int(advanced_parameters.canny_low_threshold), int(advanced_parameters.canny_high_threshold)) 19 | y = y.astype(np.float32) / 255.0 20 | return y 21 | 22 | @staticmethod 23 | def centered_canny_color(x: np.ndarray): 24 | assert isinstance(x, np.ndarray) 25 | assert x.ndim == 3 and x.shape[2] == 3 26 | 27 | result = [PyramidCanny.centered_canny(x[..., i]) for i in range(3)] 28 | result = np.stack(result, axis=2) 29 | return result 30 | 31 | @staticmethod 32 | def pyramid_canny_color(x: np.ndarray): 33 | assert isinstance(x, np.ndarray) 34 | assert x.ndim == 3 and x.shape[2] == 3 35 | 36 | H, W, C = x.shape 37 | acc_edge = None 38 | 39 | for k in [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: 40 | Hs, Ws = int(H * k), int(W * k) 41 | small = cv2.resize(x, (Ws, Hs), interpolation=cv2.INTER_AREA) 42 | edge = PyramidCanny.centered_canny_color(small) 43 | if acc_edge is None: 44 | acc_edge = edge 45 | else: 46 | acc_edge = cv2.resize(acc_edge, (edge.shape[1], edge.shape[0]), interpolation=cv2.INTER_LINEAR) 47 | acc_edge = acc_edge * 0.75 + edge * 0.25 48 | 49 | return acc_edge 50 | 51 | @classmethod 52 | def norm255(cls, GrayImage, low=4, high=96): 53 | assert isinstance(GrayImage, np.ndarray) 54 | assert GrayImage.ndim == 2 and GrayImage.dtype == np.float32 55 | 56 | v_min = np.percentile(GrayImage, low) 57 | v_max = np.percentile(GrayImage, high) 58 | 59 | if np.allclose(v_min, v_max): 60 | GrayImage = GrayImage * 0 # Avoid 0-division 61 | else: 62 | GrayImage = (GrayImage - v_min) / (v_max - v_min) 63 | 64 | GrayImage -= v_min 65 | GrayImage /= v_max - v_min 66 | return GrayImage * 255.0 67 | 68 | def __call__(self, RGB): 69 | assert RGB.ndim == 3 70 | # For some reasons, SAI's Control-lora PyramidCanny seems to be trained on canny maps with non-standard resolutions. 71 | # Then we use pyramid to use all resolutions to avoid missing any structure in specific resolutions. 72 | 73 | color_canny = PyramidCanny.pyramid_canny_color(RGB) 74 | result = np.sum(color_canny, axis=2) 75 | 76 | return self.norm255(result, low=1, high=99).clip(0, 255).astype(np.uint8) 77 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ReColor/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class ReColor: 5 | def __init__(self,model_path): 6 | self.model = None 7 | 8 | def __call__(self,RGB): 9 | assert RGB.ndim == 3 10 | assert RGB.shape[2] == 3 11 | Gray = cv2.cvtColor(RGB, cv2.COLOR_RGB2GRAY) 12 | return Gray -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/Revision/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class Revision: 5 | def __init__(self, model_path): 6 | self.model = None 7 | 8 | def __call__(self, RGB): 9 | assert RGB.ndim == 3 10 | assert RGB.shape[2] == 3 11 | # Gray = cv2.cvtColor(RGB, cv2.COLOR_RGB2BGR) 12 | return RGB 13 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/Sketch/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class Sketch: 5 | def __init__(self, model_path): 6 | self.model = None 7 | 8 | def __call__(self, RGB): 9 | assert RGB.ndim == 3 10 | assert RGB.shape[2] == 3 11 | Gray = cv2.cvtColor(RGB, cv2.COLOR_RGB2GRAY) 12 | return Gray 13 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/TileBlur/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class TileBlur: 5 | def __init__(self,model_path): 6 | self.model = None 7 | 8 | def __call__(self,RGB): 9 | assert RGB.ndim == 3 10 | assert RGB.shape[2] == 3 11 | RGB = cv2.GaussianBlur(RGB, (3, 3), 3) 12 | return RGB -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/TileBlurAnime/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class TileBlurAnime: 5 | def __init__(self,model_path): 6 | self.model = None 7 | 8 | def __call__(self,RGB): 9 | assert RGB.ndim == 3 10 | assert RGB.shape[2] == 3 11 | RGB = cv2.GaussianBlur(RGB, (3, 3), 3) 12 | return RGB -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/__init__.py: -------------------------------------------------------------------------------- 1 | # ZoeDepth 2 | # https://github.com/isl-org/ZoeDepth 3 | 4 | import os 5 | import cv2 6 | import numpy as np 7 | import torch 8 | 9 | from einops import rearrange 10 | from .zoedepth.models.zoedepth.zoedepth_v1 import ZoeDepth 11 | from .zoedepth.utils.config import get_config 12 | 13 | 14 | class ZoeDetector: 15 | # remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/ZoeD_M12_N.pt" 16 | # modelpath = os.path.join(annotator_ckpts_path, "ZoeD_M12_N.pt") 17 | def __init__(self, model_path): 18 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 19 | if DEVICE == "cpu": 20 | print("WARNING: Running on CPU. This will be slow. Check your CUDA installation.") 21 | if not os.path.exists(model_path): 22 | raise Exception(f"ZoeDepth not found in f{model_path}") 23 | model = ZoeDepth.build_from_config( 24 | get_config("zoedepth", "infer") 25 | ) 26 | model.load_state_dict(torch.load(model_path)['model']) 27 | model.to(DEVICE) 28 | model.eval() 29 | self.model = model 30 | 31 | @torch.no_grad() 32 | @torch.inference_mode() 33 | def __call__(self, RGB): 34 | assert RGB.ndim == 3 35 | assert RGB.shape[2] == 3 36 | with torch.no_grad(): 37 | # preprocess 38 | RGB = rearrange( 39 | torch.from_numpy(RGB).float().cuda() / 255.0, 40 | 'h w c -> 1 c h w' 41 | ) 42 | # infer 43 | Depth = self.model.infer(RGB).detach().squeeze().cpu().numpy() 44 | # postprocess 45 | d_min = np.percentile(Depth, 2) 46 | d_max = np.percentile(Depth, 85) 47 | if np.allclose(d_min, d_max): 48 | Depth = Depth * 0 # Avoid 0-division 49 | else: 50 | Depth = (Depth - d_min) / (d_max - d_min) 51 | Depth = 1.0 - Depth 52 | Depth = (Depth * 255.0).clip(0, 255).astype(np.uint8) 53 | return Depth 54 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/data/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/data/ibims.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms as T 32 | 33 | 34 | class iBims(Dataset): 35 | def __init__(self, config): 36 | root_folder = config.ibims_root 37 | with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f: 38 | imglist = f.read().split() 39 | 40 | samples = [] 41 | for basename in imglist: 42 | img_path = os.path.join(root_folder, 'rgb', basename + ".png") 43 | depth_path = os.path.join(root_folder, 'depth', basename + ".png") 44 | valid_mask_path = os.path.join( 45 | root_folder, 'mask_invalid', basename+".png") 46 | transp_mask_path = os.path.join( 47 | root_folder, 'mask_transp', basename+".png") 48 | 49 | samples.append( 50 | (img_path, depth_path, valid_mask_path, transp_mask_path)) 51 | 52 | self.samples = samples 53 | # self.normalize = T.Normalize( 54 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 55 | self.normalize = lambda x : x 56 | 57 | def __getitem__(self, idx): 58 | img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx] 59 | 60 | img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0 61 | depth = np.asarray(Image.open(depth_path), 62 | dtype=np.uint16).astype('float')*50.0/65535 63 | 64 | mask_valid = np.asarray(Image.open(valid_mask_path)) 65 | mask_transp = np.asarray(Image.open(transp_mask_path)) 66 | 67 | # depth = depth * mask_valid * mask_transp 68 | depth = np.where(mask_valid * mask_transp, depth, -1) 69 | 70 | img = torch.from_numpy(img).permute(2, 0, 1) 71 | img = self.normalize(img) 72 | depth = torch.from_numpy(depth).unsqueeze(0) 73 | return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims') 74 | 75 | def __len__(self): 76 | return len(self.samples) 77 | 78 | 79 | def get_ibims_loader(config, batch_size=1, **kwargs): 80 | dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs) 81 | return dataloader 82 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | *.png 107 | *.pfm 108 | *.jpg 109 | *.jpeg 110 | *.pt -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Intel ISL (Intel Intelligent Systems Lab) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/environment.yaml: -------------------------------------------------------------------------------- 1 | name: midas-py310 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - nvidia::cudatoolkit=11.7 7 | - python=3.10.8 8 | - pytorch::pytorch=1.13.0 9 | - torchvision=0.14.0 10 | - pip=22.3.1 11 | - numpy=1.23.4 12 | - pip: 13 | - opencv-python==4.6.0.66 14 | - imutils==0.5.4 15 | - timm==0.6.12 16 | - einops==0.6.0 -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/midas/backbones/next_vit.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | import torch.nn as nn 4 | 5 | from pathlib import Path 6 | from .utils import activations, forward_default, get_activation 7 | 8 | from ..external.next_vit.classification.nextvit import * 9 | 10 | 11 | def forward_next_vit(pretrained, x): 12 | return forward_default(pretrained, x, "forward") 13 | 14 | 15 | def _make_next_vit_backbone( 16 | model, 17 | hooks=[2, 6, 36, 39], 18 | ): 19 | pretrained = nn.Module() 20 | 21 | pretrained.model = model 22 | pretrained.model.features[hooks[0]].register_forward_hook(get_activation("1")) 23 | pretrained.model.features[hooks[1]].register_forward_hook(get_activation("2")) 24 | pretrained.model.features[hooks[2]].register_forward_hook(get_activation("3")) 25 | pretrained.model.features[hooks[3]].register_forward_hook(get_activation("4")) 26 | 27 | pretrained.activations = activations 28 | 29 | return pretrained 30 | 31 | 32 | def _make_pretrained_next_vit_large_6m(hooks=None): 33 | model = timm.create_model("nextvit_large") 34 | 35 | hooks = [2, 6, 36, 39] if hooks == None else hooks 36 | return _make_next_vit_backbone( 37 | model, 38 | hooks=hooks, 39 | ) 40 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/midas/backbones/swin.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | from .swin_common import _make_swin_backbone 4 | 5 | 6 | def _make_pretrained_swinl12_384(pretrained, hooks=None): 7 | model = timm.create_model("swin_large_patch4_window12_384", pretrained=pretrained) 8 | 9 | hooks = [1, 1, 17, 1] if hooks == None else hooks 10 | return _make_swin_backbone( 11 | model, 12 | hooks=hooks 13 | ) 14 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/midas/backbones/swin2.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | from .swin_common import _make_swin_backbone 4 | 5 | 6 | def _make_pretrained_swin2l24_384(pretrained, hooks=None): 7 | model = timm.create_model("swinv2_large_window12to24_192to384_22kft1k", pretrained=pretrained) 8 | 9 | hooks = [1, 1, 17, 1] if hooks == None else hooks 10 | return _make_swin_backbone( 11 | model, 12 | hooks=hooks 13 | ) 14 | 15 | 16 | def _make_pretrained_swin2b24_384(pretrained, hooks=None): 17 | model = timm.create_model("swinv2_base_window12to24_192to384_22kft1k", pretrained=pretrained) 18 | 19 | hooks = [1, 1, 17, 1] if hooks == None else hooks 20 | return _make_swin_backbone( 21 | model, 22 | hooks=hooks 23 | ) 24 | 25 | 26 | def _make_pretrained_swin2t16_256(pretrained, hooks=None): 27 | model = timm.create_model("swinv2_tiny_window16_256", pretrained=pretrained) 28 | 29 | hooks = [1, 1, 5, 1] if hooks == None else hooks 30 | return _make_swin_backbone( 31 | model, 32 | hooks=hooks, 33 | patch_grid=[64, 64] 34 | ) 35 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/midas/backbones/swin_common.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import torch.nn as nn 4 | import numpy as np 5 | 6 | from .utils import activations, forward_default, get_activation, Transpose 7 | 8 | 9 | def forward_swin(pretrained, x): 10 | return forward_default(pretrained, x) 11 | 12 | 13 | def _make_swin_backbone( 14 | model, 15 | hooks=[1, 1, 17, 1], 16 | patch_grid=[96, 96] 17 | ): 18 | pretrained = nn.Module() 19 | 20 | pretrained.model = model 21 | pretrained.model.layers[0].blocks[hooks[0]].register_forward_hook(get_activation("1")) 22 | pretrained.model.layers[1].blocks[hooks[1]].register_forward_hook(get_activation("2")) 23 | pretrained.model.layers[2].blocks[hooks[2]].register_forward_hook(get_activation("3")) 24 | pretrained.model.layers[3].blocks[hooks[3]].register_forward_hook(get_activation("4")) 25 | 26 | pretrained.activations = activations 27 | 28 | if hasattr(model, "patch_grid"): 29 | used_patch_grid = model.patch_grid 30 | else: 31 | used_patch_grid = patch_grid 32 | 33 | patch_grid_size = np.array(used_patch_grid, dtype=int) 34 | 35 | pretrained.act_postprocess1 = nn.Sequential( 36 | Transpose(1, 2), 37 | nn.Unflatten(2, torch.Size(patch_grid_size.tolist())) 38 | ) 39 | pretrained.act_postprocess2 = nn.Sequential( 40 | Transpose(1, 2), 41 | nn.Unflatten(2, torch.Size((patch_grid_size // 2).tolist())) 42 | ) 43 | pretrained.act_postprocess3 = nn.Sequential( 44 | Transpose(1, 2), 45 | nn.Unflatten(2, torch.Size((patch_grid_size // 4).tolist())) 46 | ) 47 | pretrained.act_postprocess4 = nn.Sequential( 48 | Transpose(1, 2), 49 | nn.Unflatten(2, torch.Size((patch_grid_size // 8).tolist())) 50 | ) 51 | 52 | return pretrained 53 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/midas/base_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class BaseModel(torch.nn.Module): 5 | def load(self, path): 6 | """Load model from file. 7 | 8 | Args: 9 | path (str): file path 10 | """ 11 | parameters = torch.load(path, map_location=torch.device('cpu')) 12 | 13 | if "optimizer" in parameters: 14 | parameters = parameters["model"] 15 | 16 | self.load_state_dict(parameters) 17 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/midas/midas_net.py: -------------------------------------------------------------------------------- 1 | """MidashNet: Network for monocular depth estimation trained by mixing several datasets. 2 | This file contains code that is adapted from 3 | https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | 8 | from .base_model import BaseModel 9 | from .blocks import FeatureFusionBlock, Interpolate, _make_encoder 10 | 11 | 12 | class MidasNet(BaseModel): 13 | """Network for monocular depth estimation. 14 | """ 15 | 16 | def __init__(self, path=None, features=256, non_negative=True): 17 | """Init. 18 | 19 | Args: 20 | path (str, optional): Path to saved model. Defaults to None. 21 | features (int, optional): Number of features. Defaults to 256. 22 | backbone (str, optional): Backbone network for encoder. Defaults to resnet50 23 | """ 24 | print("Loading weights: ", path) 25 | 26 | super(MidasNet, self).__init__() 27 | 28 | use_pretrained = False if path is None else True 29 | 30 | self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained) 31 | 32 | self.scratch.refinenet4 = FeatureFusionBlock(features) 33 | self.scratch.refinenet3 = FeatureFusionBlock(features) 34 | self.scratch.refinenet2 = FeatureFusionBlock(features) 35 | self.scratch.refinenet1 = FeatureFusionBlock(features) 36 | 37 | self.scratch.output_conv = nn.Sequential( 38 | nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), 39 | Interpolate(scale_factor=2, mode="bilinear"), 40 | nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1), 41 | nn.ReLU(True), 42 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), 43 | nn.ReLU(True) if non_negative else nn.Identity(), 44 | ) 45 | 46 | if path: 47 | self.load(path) 48 | 49 | def forward(self, x): 50 | """Forward pass. 51 | 52 | Args: 53 | x (tensor): input data (image) 54 | 55 | Returns: 56 | tensor: depth 57 | """ 58 | 59 | layer_1 = self.pretrained.layer1(x) 60 | layer_2 = self.pretrained.layer2(layer_1) 61 | layer_3 = self.pretrained.layer3(layer_2) 62 | layer_4 = self.pretrained.layer4(layer_3) 63 | 64 | layer_1_rn = self.scratch.layer1_rn(layer_1) 65 | layer_2_rn = self.scratch.layer2_rn(layer_2) 66 | layer_3_rn = self.scratch.layer3_rn(layer_3) 67 | layer_4_rn = self.scratch.layer4_rn(layer_4) 68 | 69 | path_4 = self.scratch.refinenet4(layer_4_rn) 70 | path_3 = self.scratch.refinenet3(path_4, layer_3_rn) 71 | path_2 = self.scratch.refinenet2(path_3, layer_2_rn) 72 | path_1 = self.scratch.refinenet1(path_2, layer_1_rn) 73 | 74 | out = self.scratch.output_conv(path_1) 75 | 76 | return torch.squeeze(out, dim=1) 77 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | from .depth_model import DepthModel 27 | 28 | def build_model(config) -> DepthModel: 29 | """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface. 30 | This function should be used to construct models for training and evaluation. 31 | 32 | Args: 33 | config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder. 34 | 35 | Returns: 36 | torch.nn.Module: Model corresponding to name and version as specified in config 37 | """ 38 | module_name = f"zoedepth.models.{config.model}" 39 | try: 40 | module = import_module(module_name) 41 | except ModuleNotFoundError as e: 42 | # print the original error message 43 | print(e) 44 | raise ValueError( 45 | f"Model {config.model} not found. Refer above error for details.") from e 46 | try: 47 | get_version = getattr(module, "get_version") 48 | except AttributeError as e: 49 | raise ValueError( 50 | f"Model {config.model} has no get_version function.") from e 51 | return get_version(config.version_name).build_from_config(config) 52 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/model_io.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | 27 | def load_state_dict(model, state_dict): 28 | """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict. 29 | 30 | DataParallel prefixes state_dict keys with 'module.' when saving. 31 | If the model is not a DataParallel model but the state_dict is, then prefixes are removed. 32 | If the model is a DataParallel model but the state_dict is not, then prefixes are added. 33 | """ 34 | state_dict = state_dict.get('model', state_dict) 35 | # if model is a DataParallel model, then state_dict keys are prefixed with 'module.' 36 | 37 | do_prefix = isinstance( 38 | model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel)) 39 | state = {} 40 | for k, v in state_dict.items(): 41 | if k.startswith('module.') and not do_prefix: 42 | k = k[7:] 43 | 44 | if not k.startswith('module.') and do_prefix: 45 | k = 'module.' + k 46 | 47 | state[k] = v 48 | 49 | model.load_state_dict(state) 50 | print("Loaded successfully") 51 | return model 52 | 53 | 54 | def load_wts(model, checkpoint_path): 55 | ckpt = torch.load(checkpoint_path, map_location='cpu') 56 | return load_state_dict(model, ckpt) 57 | 58 | 59 | def load_state_dict_from_url(model, url, **kwargs): 60 | state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs) 61 | return load_state_dict(model, state_dict) 62 | 63 | 64 | def load_state_from_resource(model, resource: str): 65 | """Loads weights to the model from a given resource. A resource can be of following types: 66 | 1. URL. Prefixed with "url::" 67 | e.g. url::http(s)://url.resource.com/ckpt.pt 68 | 69 | 2. Local path. Prefixed with "local::" 70 | e.g. local::/path/to/ckpt.pt 71 | 72 | 73 | Args: 74 | model (torch.nn.Module): Model 75 | resource (str): resource string 76 | 77 | Returns: 78 | torch.nn.Module: Model with loaded weights 79 | """ 80 | print(f"Using pretrained resource {resource}") 81 | 82 | if resource.startswith('url::'): 83 | url = resource.split('url::')[1] 84 | return load_state_dict_from_url(model, url, progress=True) 85 | 86 | elif resource.startswith('local::'): 87 | path = resource.split('local::')[1] 88 | return load_wts(model, path) 89 | 90 | else: 91 | raise ValueError("Invalid resource type, only url:: and local:: are supported") 92 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/zoedepth/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_v1 import ZoeDepth 26 | 27 | all_versions = { 28 | "v1": ZoeDepth, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/zoedepth/config_zoedepth.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepth", 4 | "version_name": "v1", 5 | "n_bins": 64, 6 | "bin_embedding_dim": 128, 7 | "bin_centers_type": "softplus", 8 | "n_attractors":[16, 8, 4, 1], 9 | "attractor_alpha": 1000, 10 | "attractor_gamma": 2, 11 | "attractor_kind" : "mean", 12 | "attractor_type" : "inv", 13 | "midas_model_type" : "DPT_BEiT_L_384", 14 | "min_temp": 0.0212, 15 | "max_temp": 50.0, 16 | "output_distribution": "logbinomial", 17 | "memory_efficient": true, 18 | "inverse_midas": false, 19 | "img_size": [384, 512] 20 | }, 21 | 22 | "train": { 23 | "train_midas": true, 24 | "use_pretrained_midas": true, 25 | "trainer": "zoedepth", 26 | "epochs": 5, 27 | "bs": 16, 28 | "optim_kwargs": {"lr": 0.000161, "wd": 0.01}, 29 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 30 | "same_lr": false, 31 | "w_si": 1, 32 | "w_domain": 0.2, 33 | "w_reg": 0, 34 | "w_grad": 0, 35 | "avoid_boundary": false, 36 | "random_crop": false, 37 | "input_width": 640, 38 | "input_height": 480, 39 | "midas_lr_factor": 1, 40 | "encoder_lr_factor":10, 41 | "pos_enc_lr_factor":10, 42 | "freeze_midas_bn": true 43 | 44 | }, 45 | 46 | "infer":{ 47 | "train_midas": false, 48 | "use_pretrained_midas": false, 49 | "pretrained_resource" : null, 50 | "force_keep_ar": true 51 | }, 52 | 53 | "eval":{ 54 | "train_midas": false, 55 | "use_pretrained_midas": false, 56 | "pretrained_resource" : null 57 | } 58 | } -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/zoedepth/config_zoedepth_kitti.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "bin_centers_type": "normed", 4 | "img_size": [384, 768] 5 | }, 6 | 7 | "train": { 8 | }, 9 | 10 | "infer":{ 11 | "train_midas": false, 12 | "use_pretrained_midas": false, 13 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt", 14 | "force_keep_ar": true 15 | }, 16 | 17 | "eval":{ 18 | "train_midas": false, 19 | "use_pretrained_midas": false, 20 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt" 21 | } 22 | } -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/zoedepth_nk/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_nk_v1 import ZoeDepthNK 26 | 27 | all_versions = { 28 | "v1": ZoeDepthNK, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepthNK", 4 | "version_name": "v1", 5 | "bin_conf" : [ 6 | { 7 | "name": "nyu", 8 | "n_bins": 64, 9 | "min_depth": 1e-3, 10 | "max_depth": 10.0 11 | }, 12 | { 13 | "name": "kitti", 14 | "n_bins": 64, 15 | "min_depth": 1e-3, 16 | "max_depth": 80.0 17 | } 18 | ], 19 | "bin_embedding_dim": 128, 20 | "bin_centers_type": "softplus", 21 | "n_attractors":[16, 8, 4, 1], 22 | "attractor_alpha": 1000, 23 | "attractor_gamma": 2, 24 | "attractor_kind" : "mean", 25 | "attractor_type" : "inv", 26 | "min_temp": 0.0212, 27 | "max_temp": 50.0, 28 | "memory_efficient": true, 29 | "midas_model_type" : "DPT_BEiT_L_384", 30 | "img_size": [384, 512] 31 | }, 32 | 33 | "train": { 34 | "train_midas": true, 35 | "use_pretrained_midas": true, 36 | "trainer": "zoedepth_nk", 37 | "epochs": 5, 38 | "bs": 16, 39 | "optim_kwargs": {"lr": 0.0002512, "wd": 0.01}, 40 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 41 | "same_lr": false, 42 | "w_si": 1, 43 | "w_domain": 100, 44 | "avoid_boundary": false, 45 | "random_crop": false, 46 | "input_width": 640, 47 | "input_height": 480, 48 | "w_grad": 0, 49 | "w_reg": 0, 50 | "midas_lr_factor": 10, 51 | "encoder_lr_factor":10, 52 | "pos_enc_lr_factor":10 53 | }, 54 | 55 | "infer": { 56 | "train_midas": false, 57 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 58 | "use_pretrained_midas": false, 59 | "force_keep_ar": true 60 | }, 61 | 62 | "eval": { 63 | "train_midas": false, 64 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 65 | "use_pretrained_midas": false 66 | } 67 | } -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/trainers/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | 27 | 28 | def get_trainer(config): 29 | """Builds and returns a trainer based on the config. 30 | 31 | Args: 32 | config (dict): the config dict (typically constructed using utils.config.get_config) 33 | config.trainer (str): the name of the trainer to use. The module named "{config.trainer}_trainer" must exist in trainers root module 34 | 35 | Raises: 36 | ValueError: If the specified trainer does not exist under trainers/ folder 37 | 38 | Returns: 39 | Trainer (inherited from zoedepth.trainers.BaseTrainer): The Trainer object 40 | """ 41 | assert "trainer" in config and config.trainer is not None and config.trainer != '', "Trainer not specified. Config: {0}".format( 42 | config) 43 | try: 44 | Trainer = getattr(import_module( 45 | f"zoedepth.trainers.{config.trainer}_trainer"), 'Trainer') 46 | except ModuleNotFoundError as e: 47 | raise ValueError(f"Trainer {config.trainer}_trainer not found.") from e 48 | return Trainer 49 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/utils/arg_utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def infer_type(x): # hacky way to infer type from string args 4 | if not isinstance(x, str): 5 | return x 6 | 7 | try: 8 | x = int(x) 9 | return x 10 | except ValueError: 11 | pass 12 | 13 | try: 14 | x = float(x) 15 | return x 16 | except ValueError: 17 | pass 18 | 19 | return x 20 | 21 | 22 | def parse_unknown(unknown_args): 23 | clean = [] 24 | for a in unknown_args: 25 | if "=" in a: 26 | k, v = a.split("=") 27 | clean.extend([k, v]) 28 | else: 29 | clean.append(a) 30 | 31 | keys = clean[::2] 32 | values = clean[1::2] 33 | return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)} 34 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/fooocus_extras/controlnet_preprocess_model/__init__.py -------------------------------------------------------------------------------- /fooocus_extras/preprocessors.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import modules.advanced_parameters as advanced_parameters 4 | from fooocus_extras.controlnet_preprocess_model.ZeoDepth import ZoeDetector 5 | 6 | 7 | def centered_canny(x: np.ndarray): 8 | assert isinstance(x, np.ndarray) 9 | assert x.ndim == 2 and x.dtype == np.uint8 10 | 11 | y = cv2.Canny(x, int(advanced_parameters.canny_low_threshold), int(advanced_parameters.canny_high_threshold)) 12 | y = y.astype(np.float32) / 255.0 13 | return y 14 | 15 | 16 | def centered_canny_color(x: np.ndarray): 17 | assert isinstance(x, np.ndarray) 18 | assert x.ndim == 3 and x.shape[2] == 3 19 | 20 | result = [centered_canny(x[..., i]) for i in range(3)] 21 | result = np.stack(result, axis=2) 22 | return result 23 | 24 | 25 | def pyramid_canny_color(x: np.ndarray): 26 | assert isinstance(x, np.ndarray) 27 | assert x.ndim == 3 and x.shape[2] == 3 28 | 29 | H, W, C = x.shape 30 | acc_edge = None 31 | 32 | for k in [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: 33 | Hs, Ws = int(H * k), int(W * k) 34 | small = cv2.resize(x, (Ws, Hs), interpolation=cv2.INTER_AREA) 35 | edge = centered_canny_color(small) 36 | if acc_edge is None: 37 | acc_edge = edge 38 | else: 39 | acc_edge = cv2.resize(acc_edge, (edge.shape[1], edge.shape[0]), interpolation=cv2.INTER_LINEAR) 40 | acc_edge = acc_edge * 0.75 + edge * 0.25 41 | 42 | return acc_edge 43 | 44 | 45 | def norm255(x, low=4, high=96): 46 | assert isinstance(x, np.ndarray) 47 | assert x.ndim == 2 and x.dtype == np.float32 48 | 49 | v_min = np.percentile(x, low) 50 | v_max = np.percentile(x, high) 51 | 52 | x -= v_min 53 | x /= v_max - v_min 54 | 55 | return x * 255.0 56 | def canny_pyramid(x): 57 | # For some reasons, SAI's Control-lora PyramidCanny seems to be trained on canny maps with non-standard resolutions. 58 | # Then we use pyramid to use all resolutions to avoid missing any structure in specific resolutions. 59 | 60 | color_canny = pyramid_canny_color(x) 61 | result = np.sum(color_canny, axis=2) 62 | 63 | return norm255(result, low=1, high=99).clip(0, 255).astype(np.uint8) 64 | 65 | 66 | def cpds(x): 67 | # cv2.decolor is not "decolor", it is Cewu Lu's method 68 | # See http://www.cse.cuhk.edu.hk/leojia/projects/color2gray/index.html 69 | # See https://docs.opencv.org/3.0-beta/modules/photo/doc/decolor.html 70 | 71 | raw = cv2.GaussianBlur(x, (0, 0), 0.8) 72 | density, boost = cv2.decolor(raw) 73 | 74 | raw = raw.astype(np.float32) 75 | density = density.astype(np.float32) 76 | boost = boost.astype(np.float32) 77 | 78 | offset = np.sum((raw - boost) ** 2.0, axis=2) ** 0.5 79 | result = density + offset 80 | 81 | return norm255(result, low=4, high=96).clip(0, 255).astype(np.uint8) 82 | 83 | 84 | def depth(x, depth_model): 85 | result = depth_model(x) 86 | # return norm255(result, low=2, high=85).clip(0, 255).astype(np.uint8) 87 | return result 88 | 89 | 90 | def pose(x, pose_model): 91 | 92 | result = pose_model(x) 93 | return result 94 | -------------------------------------------------------------------------------- /fooocus_extras/vae_interpose.py: -------------------------------------------------------------------------------- 1 | # https://github.com/city96/SD-Latent-Interposer/blob/main/interposer.py 2 | 3 | import os 4 | import torch 5 | import safetensors.torch as sf 6 | import torch.nn as nn 7 | import fcbh.model_management 8 | 9 | from fcbh.model_patcher import ModelPatcher 10 | from modules.path import vae_approx_path 11 | 12 | 13 | class Block(nn.Module): 14 | def __init__(self, size): 15 | super().__init__() 16 | self.join = nn.ReLU() 17 | self.long = nn.Sequential( 18 | nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), 19 | nn.LeakyReLU(0.1), 20 | nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), 21 | nn.LeakyReLU(0.1), 22 | nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), 23 | ) 24 | 25 | def forward(self, x): 26 | y = self.long(x) 27 | z = self.join(y + x) 28 | return z 29 | 30 | 31 | class Interposer(nn.Module): 32 | def __init__(self): 33 | super().__init__() 34 | self.chan = 4 35 | self.hid = 128 36 | 37 | self.head_join = nn.ReLU() 38 | self.head_short = nn.Conv2d(self.chan, self.hid, kernel_size=3, stride=1, padding=1) 39 | self.head_long = nn.Sequential( 40 | nn.Conv2d(self.chan, self.hid, kernel_size=3, stride=1, padding=1), 41 | nn.LeakyReLU(0.1), 42 | nn.Conv2d(self.hid, self.hid, kernel_size=3, stride=1, padding=1), 43 | nn.LeakyReLU(0.1), 44 | nn.Conv2d(self.hid, self.hid, kernel_size=3, stride=1, padding=1), 45 | ) 46 | self.core = nn.Sequential( 47 | Block(self.hid), 48 | Block(self.hid), 49 | Block(self.hid), 50 | ) 51 | self.tail = nn.Sequential( 52 | nn.ReLU(), 53 | nn.Conv2d(self.hid, self.chan, kernel_size=3, stride=1, padding=1) 54 | ) 55 | 56 | def forward(self, x): 57 | y = self.head_join( 58 | self.head_long(x) + 59 | self.head_short(x) 60 | ) 61 | z = self.core(y) 62 | return self.tail(z) 63 | 64 | 65 | vae_approx_model = None 66 | vae_approx_filename = os.path.join(vae_approx_path, 'xl-to-v1_interposer-v3.1.safetensors') 67 | 68 | 69 | def parse(x): 70 | global vae_approx_model 71 | 72 | x_origin = x['samples'].clone() 73 | 74 | if vae_approx_model is None: 75 | model = Interposer() 76 | model.eval() 77 | sd = sf.load_file(vae_approx_filename) 78 | model.load_state_dict(sd) 79 | fp16 = fcbh.model_management.should_use_fp16() 80 | if fp16: 81 | model = model.half() 82 | vae_approx_model = ModelPatcher( 83 | model=model, 84 | load_device=fcbh.model_management.get_torch_device(), 85 | offload_device=torch.device('cpu') 86 | ) 87 | vae_approx_model.dtype = torch.float16 if fp16 else torch.float32 88 | 89 | fcbh.model_management.load_model_gpu(vae_approx_model) 90 | 91 | x = x_origin.to(device=vae_approx_model.load_device, dtype=vae_approx_model.dtype) 92 | x = vae_approx_model.model(x) 93 | 94 | return {'samples': x.to(x_origin)} 95 | -------------------------------------------------------------------------------- /fooocus_version.py: -------------------------------------------------------------------------------- 1 | version = '2.1.701' 2 | -------------------------------------------------------------------------------- /javascript/script.js: -------------------------------------------------------------------------------- 1 | // based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/script.js 2 | 3 | function gradioApp() { 4 | const elems = document.getElementsByTagName('gradio-app'); 5 | const elem = elems.length == 0 ? document : elems[0]; 6 | 7 | if (elem !== document) { 8 | elem.getElementById = function(id) { 9 | return document.getElementById(id); 10 | }; 11 | } 12 | return elem.shadowRoot ? elem.shadowRoot : elem; 13 | } 14 | 15 | function playNotification() { 16 | gradioApp().querySelector('#audio_notification audio')?.play(); 17 | } 18 | 19 | document.addEventListener('keydown', function(e) { 20 | var handled = false; 21 | if (e.key !== undefined) { 22 | if ((e.key == "Enter" && (e.metaKey || e.ctrlKey || e.altKey))) handled = true; 23 | } else if (e.keyCode !== undefined) { 24 | if ((e.keyCode == 13 && (e.metaKey || e.ctrlKey || e.altKey))) handled = true; 25 | } 26 | if (handled) { 27 | var button = gradioApp().querySelector('button[id=generate_button]'); 28 | if (button) { 29 | button.click(); 30 | } 31 | e.preventDefault(); 32 | } 33 | }); 34 | -------------------------------------------------------------------------------- /launch.py: -------------------------------------------------------------------------------- 1 | from python_hijack import * 2 | 3 | import sys 4 | import platform 5 | import fooocus_version 6 | 7 | from build_launcher import build_launcher 8 | from modules.launch_util import is_installed, run, python, run_pip, requirements_met 9 | from modules.model_loader import load_file_from_url 10 | from modules.path import modelfile_path, lorafile_path, vae_approx_path, fooocus_expansion_path, \ 11 | checkpoint_downloads, embeddings_path, embeddings_downloads, lora_downloads 12 | 13 | 14 | REINSTALL_ALL = False 15 | TRY_INSTALL_XFORMERS = False 16 | 17 | 18 | def prepare_environment(): 19 | torch_index_url = os.environ.get('TORCH_INDEX_URL', "https://download.pytorch.org/whl/cu121") 20 | torch_command = os.environ.get('TORCH_COMMAND', 21 | f"pip install torch==2.1.0 torchvision==0.16.0 --extra-index-url {torch_index_url}") 22 | requirements_file = os.environ.get('REQS_FILE', "requirements_versions.txt") 23 | 24 | print(f"Python {sys.version}") 25 | print(f"Fooocus version: {fooocus_version.version}") 26 | 27 | if REINSTALL_ALL or not is_installed("torch") or not is_installed("torchvision"): 28 | run(f'"{python}" -m {torch_command}', "Installing torch and torchvision", "Couldn't install torch", live=True) 29 | 30 | if TRY_INSTALL_XFORMERS: 31 | if REINSTALL_ALL or not is_installed("xformers"): 32 | xformers_package = os.environ.get('XFORMERS_PACKAGE', 'xformers==0.0.20') 33 | if platform.system() == "Windows": 34 | if platform.python_version().startswith("3.10"): 35 | run_pip(f"install -U -I --no-deps {xformers_package}", "xformers", live=True) 36 | else: 37 | print("Installation of xformers is not supported in this version of Python.") 38 | print( 39 | "You can also check this and build manually: https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers#building-xformers-on-windows-by-duckness") 40 | if not is_installed("xformers"): 41 | exit(0) 42 | elif platform.system() == "Linux": 43 | run_pip(f"install -U -I --no-deps {xformers_package}", "xformers") 44 | 45 | if REINSTALL_ALL or not requirements_met(requirements_file): 46 | run_pip(f"install -r \"{requirements_file}\"", "requirements") 47 | 48 | return 49 | 50 | 51 | vae_approx_filenames = [ 52 | ('xlvaeapp.pth', 'https://huggingface.co/lllyasviel/misc/resolve/main/xlvaeapp.pth'), 53 | ('vaeapp_sd15.pth', 'https://huggingface.co/lllyasviel/misc/resolve/main/vaeapp_sd15.pt'), 54 | ('xl-to-v1_interposer-v3.1.safetensors', 55 | 'https://huggingface.co/lllyasviel/misc/resolve/main/xl-to-v1_interposer-v3.1.safetensors') 56 | ] 57 | 58 | 59 | def download_models(): 60 | for file_name, url in checkpoint_downloads.items(): 61 | load_file_from_url(url=url, model_dir=modelfile_path, file_name=file_name) 62 | for file_name, url in embeddings_downloads.items(): 63 | load_file_from_url(url=url, model_dir=embeddings_path, file_name=file_name) 64 | for file_name, url in lora_downloads.items(): 65 | load_file_from_url(url=url, model_dir=lorafile_path, file_name=file_name) 66 | for file_name, url in vae_approx_filenames: 67 | load_file_from_url(url=url, model_dir=vae_approx_path, file_name=file_name) 68 | 69 | load_file_from_url( 70 | url='https://huggingface.co/lllyasviel/misc/resolve/main/fooocus_expansion.bin', 71 | model_dir=fooocus_expansion_path, 72 | file_name='pytorch_model.bin' 73 | ) 74 | 75 | return 76 | 77 | 78 | def ini_cbh_args(): 79 | from args_manager import args 80 | return args 81 | 82 | 83 | prepare_environment() 84 | build_launcher() 85 | ini_cbh_args() 86 | download_models() 87 | 88 | from webui import * 89 | -------------------------------------------------------------------------------- /models/checkpoints/put_checkpoints_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/checkpoints/put_checkpoints_here -------------------------------------------------------------------------------- /models/clip/put_clip_or_text_encoder_models_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/clip/put_clip_or_text_encoder_models_here -------------------------------------------------------------------------------- /models/clip_vision/put_clip_vision_models_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/clip_vision/put_clip_vision_models_here -------------------------------------------------------------------------------- /models/configs/anything_v3.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 10000 ] 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | image_size: 32 # unused 33 | in_channels: 4 34 | out_channels: 4 35 | model_channels: 320 36 | attention_resolutions: [ 4, 2, 1 ] 37 | num_res_blocks: 2 38 | channel_mult: [ 1, 2, 4, 4 ] 39 | num_heads: 8 40 | use_spatial_transformer: True 41 | transformer_depth: 1 42 | context_dim: 768 43 | use_checkpoint: True 44 | legacy: False 45 | 46 | first_stage_config: 47 | target: ldm.models.autoencoder.AutoencoderKL 48 | params: 49 | embed_dim: 4 50 | monitor: val/rec_loss 51 | ddconfig: 52 | double_z: true 53 | z_channels: 4 54 | resolution: 256 55 | in_channels: 3 56 | out_ch: 3 57 | ch: 128 58 | ch_mult: 59 | - 1 60 | - 2 61 | - 4 62 | - 4 63 | num_res_blocks: 2 64 | attn_resolutions: [] 65 | dropout: 0.0 66 | lossconfig: 67 | target: torch.nn.Identity 68 | 69 | cond_stage_config: 70 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 71 | params: 72 | layer: "hidden" 73 | layer_idx: -2 74 | -------------------------------------------------------------------------------- /models/configs/v1-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 10000 ] 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | image_size: 32 # unused 33 | in_channels: 4 34 | out_channels: 4 35 | model_channels: 320 36 | attention_resolutions: [ 4, 2, 1 ] 37 | num_res_blocks: 2 38 | channel_mult: [ 1, 2, 4, 4 ] 39 | num_heads: 8 40 | use_spatial_transformer: True 41 | transformer_depth: 1 42 | context_dim: 768 43 | use_checkpoint: True 44 | legacy: False 45 | 46 | first_stage_config: 47 | target: ldm.models.autoencoder.AutoencoderKL 48 | params: 49 | embed_dim: 4 50 | monitor: val/rec_loss 51 | ddconfig: 52 | double_z: true 53 | z_channels: 4 54 | resolution: 256 55 | in_channels: 3 56 | out_ch: 3 57 | ch: 128 58 | ch_mult: 59 | - 1 60 | - 2 61 | - 4 62 | - 4 63 | num_res_blocks: 2 64 | attn_resolutions: [] 65 | dropout: 0.0 66 | lossconfig: 67 | target: torch.nn.Identity 68 | 69 | cond_stage_config: 70 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 71 | -------------------------------------------------------------------------------- /models/configs/v1-inference_clip_skip_2.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 10000 ] 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | image_size: 32 # unused 33 | in_channels: 4 34 | out_channels: 4 35 | model_channels: 320 36 | attention_resolutions: [ 4, 2, 1 ] 37 | num_res_blocks: 2 38 | channel_mult: [ 1, 2, 4, 4 ] 39 | num_heads: 8 40 | use_spatial_transformer: True 41 | transformer_depth: 1 42 | context_dim: 768 43 | use_checkpoint: True 44 | legacy: False 45 | 46 | first_stage_config: 47 | target: ldm.models.autoencoder.AutoencoderKL 48 | params: 49 | embed_dim: 4 50 | monitor: val/rec_loss 51 | ddconfig: 52 | double_z: true 53 | z_channels: 4 54 | resolution: 256 55 | in_channels: 3 56 | out_ch: 3 57 | ch: 128 58 | ch_mult: 59 | - 1 60 | - 2 61 | - 4 62 | - 4 63 | num_res_blocks: 2 64 | attn_resolutions: [] 65 | dropout: 0.0 66 | lossconfig: 67 | target: torch.nn.Identity 68 | 69 | cond_stage_config: 70 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 71 | params: 72 | layer: "hidden" 73 | layer_idx: -2 74 | -------------------------------------------------------------------------------- /models/configs/v1-inference_clip_skip_2_fp16.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 10000 ] 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | use_fp16: True 33 | image_size: 32 # unused 34 | in_channels: 4 35 | out_channels: 4 36 | model_channels: 320 37 | attention_resolutions: [ 4, 2, 1 ] 38 | num_res_blocks: 2 39 | channel_mult: [ 1, 2, 4, 4 ] 40 | num_heads: 8 41 | use_spatial_transformer: True 42 | transformer_depth: 1 43 | context_dim: 768 44 | use_checkpoint: True 45 | legacy: False 46 | 47 | first_stage_config: 48 | target: ldm.models.autoencoder.AutoencoderKL 49 | params: 50 | embed_dim: 4 51 | monitor: val/rec_loss 52 | ddconfig: 53 | double_z: true 54 | z_channels: 4 55 | resolution: 256 56 | in_channels: 3 57 | out_ch: 3 58 | ch: 128 59 | ch_mult: 60 | - 1 61 | - 2 62 | - 4 63 | - 4 64 | num_res_blocks: 2 65 | attn_resolutions: [] 66 | dropout: 0.0 67 | lossconfig: 68 | target: torch.nn.Identity 69 | 70 | cond_stage_config: 71 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 72 | params: 73 | layer: "hidden" 74 | layer_idx: -2 75 | -------------------------------------------------------------------------------- /models/configs/v1-inference_fp16.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 10000 ] 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | use_fp16: True 33 | image_size: 32 # unused 34 | in_channels: 4 35 | out_channels: 4 36 | model_channels: 320 37 | attention_resolutions: [ 4, 2, 1 ] 38 | num_res_blocks: 2 39 | channel_mult: [ 1, 2, 4, 4 ] 40 | num_heads: 8 41 | use_spatial_transformer: True 42 | transformer_depth: 1 43 | context_dim: 768 44 | use_checkpoint: True 45 | legacy: False 46 | 47 | first_stage_config: 48 | target: ldm.models.autoencoder.AutoencoderKL 49 | params: 50 | embed_dim: 4 51 | monitor: val/rec_loss 52 | ddconfig: 53 | double_z: true 54 | z_channels: 4 55 | resolution: 256 56 | in_channels: 3 57 | out_ch: 3 58 | ch: 128 59 | ch_mult: 60 | - 1 61 | - 2 62 | - 4 63 | - 4 64 | num_res_blocks: 2 65 | attn_resolutions: [] 66 | dropout: 0.0 67 | lossconfig: 68 | target: torch.nn.Identity 69 | 70 | cond_stage_config: 71 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 72 | -------------------------------------------------------------------------------- /models/configs/v1-inpainting-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 7.5e-05 3 | target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: hybrid # important 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | finetune_keys: null 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | image_size: 32 # unused 33 | in_channels: 9 # 4 data + 4 downscaled image + 1 mask 34 | out_channels: 4 35 | model_channels: 320 36 | attention_resolutions: [ 4, 2, 1 ] 37 | num_res_blocks: 2 38 | channel_mult: [ 1, 2, 4, 4 ] 39 | num_heads: 8 40 | use_spatial_transformer: True 41 | transformer_depth: 1 42 | context_dim: 768 43 | use_checkpoint: True 44 | legacy: False 45 | 46 | first_stage_config: 47 | target: ldm.models.autoencoder.AutoencoderKL 48 | params: 49 | embed_dim: 4 50 | monitor: val/rec_loss 51 | ddconfig: 52 | double_z: true 53 | z_channels: 4 54 | resolution: 256 55 | in_channels: 3 56 | out_ch: 3 57 | ch: 128 58 | ch_mult: 59 | - 1 60 | - 2 61 | - 4 62 | - 4 63 | num_res_blocks: 2 64 | attn_resolutions: [] 65 | dropout: 0.0 66 | lossconfig: 67 | target: torch.nn.Identity 68 | 69 | cond_stage_config: 70 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 71 | 72 | -------------------------------------------------------------------------------- /models/configs/v2-inference-v.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | parameterization: "v" 6 | linear_start: 0.00085 7 | linear_end: 0.0120 8 | num_timesteps_cond: 1 9 | log_every_t: 200 10 | timesteps: 1000 11 | first_stage_key: "jpg" 12 | cond_stage_key: "txt" 13 | image_size: 64 14 | channels: 4 15 | cond_stage_trainable: false 16 | conditioning_key: crossattn 17 | monitor: val/loss_simple_ema 18 | scale_factor: 0.18215 19 | use_ema: False # we set this to false because this is an inference only config 20 | 21 | unet_config: 22 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 23 | params: 24 | use_checkpoint: True 25 | use_fp16: True 26 | image_size: 32 # unused 27 | in_channels: 4 28 | out_channels: 4 29 | model_channels: 320 30 | attention_resolutions: [ 4, 2, 1 ] 31 | num_res_blocks: 2 32 | channel_mult: [ 1, 2, 4, 4 ] 33 | num_head_channels: 64 # need to fix for flash-attn 34 | use_spatial_transformer: True 35 | use_linear_in_transformer: True 36 | transformer_depth: 1 37 | context_dim: 1024 38 | legacy: False 39 | 40 | first_stage_config: 41 | target: ldm.models.autoencoder.AutoencoderKL 42 | params: 43 | embed_dim: 4 44 | monitor: val/rec_loss 45 | ddconfig: 46 | #attn_type: "vanilla-xformers" 47 | double_z: true 48 | z_channels: 4 49 | resolution: 256 50 | in_channels: 3 51 | out_ch: 3 52 | ch: 128 53 | ch_mult: 54 | - 1 55 | - 2 56 | - 4 57 | - 4 58 | num_res_blocks: 2 59 | attn_resolutions: [] 60 | dropout: 0.0 61 | lossconfig: 62 | target: torch.nn.Identity 63 | 64 | cond_stage_config: 65 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 66 | params: 67 | freeze: True 68 | layer: "penultimate" 69 | -------------------------------------------------------------------------------- /models/configs/v2-inference-v_fp32.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | parameterization: "v" 6 | linear_start: 0.00085 7 | linear_end: 0.0120 8 | num_timesteps_cond: 1 9 | log_every_t: 200 10 | timesteps: 1000 11 | first_stage_key: "jpg" 12 | cond_stage_key: "txt" 13 | image_size: 64 14 | channels: 4 15 | cond_stage_trainable: false 16 | conditioning_key: crossattn 17 | monitor: val/loss_simple_ema 18 | scale_factor: 0.18215 19 | use_ema: False # we set this to false because this is an inference only config 20 | 21 | unet_config: 22 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 23 | params: 24 | use_checkpoint: True 25 | use_fp16: False 26 | image_size: 32 # unused 27 | in_channels: 4 28 | out_channels: 4 29 | model_channels: 320 30 | attention_resolutions: [ 4, 2, 1 ] 31 | num_res_blocks: 2 32 | channel_mult: [ 1, 2, 4, 4 ] 33 | num_head_channels: 64 # need to fix for flash-attn 34 | use_spatial_transformer: True 35 | use_linear_in_transformer: True 36 | transformer_depth: 1 37 | context_dim: 1024 38 | legacy: False 39 | 40 | first_stage_config: 41 | target: ldm.models.autoencoder.AutoencoderKL 42 | params: 43 | embed_dim: 4 44 | monitor: val/rec_loss 45 | ddconfig: 46 | #attn_type: "vanilla-xformers" 47 | double_z: true 48 | z_channels: 4 49 | resolution: 256 50 | in_channels: 3 51 | out_ch: 3 52 | ch: 128 53 | ch_mult: 54 | - 1 55 | - 2 56 | - 4 57 | - 4 58 | num_res_blocks: 2 59 | attn_resolutions: [] 60 | dropout: 0.0 61 | lossconfig: 62 | target: torch.nn.Identity 63 | 64 | cond_stage_config: 65 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 66 | params: 67 | freeze: True 68 | layer: "penultimate" 69 | -------------------------------------------------------------------------------- /models/configs/v2-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False # we set this to false because this is an inference only config 19 | 20 | unet_config: 21 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 22 | params: 23 | use_checkpoint: True 24 | use_fp16: True 25 | image_size: 32 # unused 26 | in_channels: 4 27 | out_channels: 4 28 | model_channels: 320 29 | attention_resolutions: [ 4, 2, 1 ] 30 | num_res_blocks: 2 31 | channel_mult: [ 1, 2, 4, 4 ] 32 | num_head_channels: 64 # need to fix for flash-attn 33 | use_spatial_transformer: True 34 | use_linear_in_transformer: True 35 | transformer_depth: 1 36 | context_dim: 1024 37 | legacy: False 38 | 39 | first_stage_config: 40 | target: ldm.models.autoencoder.AutoencoderKL 41 | params: 42 | embed_dim: 4 43 | monitor: val/rec_loss 44 | ddconfig: 45 | #attn_type: "vanilla-xformers" 46 | double_z: true 47 | z_channels: 4 48 | resolution: 256 49 | in_channels: 3 50 | out_ch: 3 51 | ch: 128 52 | ch_mult: 53 | - 1 54 | - 2 55 | - 4 56 | - 4 57 | num_res_blocks: 2 58 | attn_resolutions: [] 59 | dropout: 0.0 60 | lossconfig: 61 | target: torch.nn.Identity 62 | 63 | cond_stage_config: 64 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 65 | params: 66 | freeze: True 67 | layer: "penultimate" 68 | -------------------------------------------------------------------------------- /models/configs/v2-inference_fp32.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False # we set this to false because this is an inference only config 19 | 20 | unet_config: 21 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 22 | params: 23 | use_checkpoint: True 24 | use_fp16: False 25 | image_size: 32 # unused 26 | in_channels: 4 27 | out_channels: 4 28 | model_channels: 320 29 | attention_resolutions: [ 4, 2, 1 ] 30 | num_res_blocks: 2 31 | channel_mult: [ 1, 2, 4, 4 ] 32 | num_head_channels: 64 # need to fix for flash-attn 33 | use_spatial_transformer: True 34 | use_linear_in_transformer: True 35 | transformer_depth: 1 36 | context_dim: 1024 37 | legacy: False 38 | 39 | first_stage_config: 40 | target: ldm.models.autoencoder.AutoencoderKL 41 | params: 42 | embed_dim: 4 43 | monitor: val/rec_loss 44 | ddconfig: 45 | #attn_type: "vanilla-xformers" 46 | double_z: true 47 | z_channels: 4 48 | resolution: 256 49 | in_channels: 3 50 | out_ch: 3 51 | ch: 128 52 | ch_mult: 53 | - 1 54 | - 2 55 | - 4 56 | - 4 57 | num_res_blocks: 2 58 | attn_resolutions: [] 59 | dropout: 0.0 60 | lossconfig: 61 | target: torch.nn.Identity 62 | 63 | cond_stage_config: 64 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 65 | params: 66 | freeze: True 67 | layer: "penultimate" 68 | -------------------------------------------------------------------------------- /models/controlnet/put_controlnets_and_t2i_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/controlnet/put_controlnets_and_t2i_here -------------------------------------------------------------------------------- /models/diffusers/put_diffusers_models_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/diffusers/put_diffusers_models_here -------------------------------------------------------------------------------- /models/embeddings/put_embeddings_or_textual_inversion_concepts_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/embeddings/put_embeddings_or_textual_inversion_concepts_here -------------------------------------------------------------------------------- /models/gligen/put_gligen_models_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/gligen/put_gligen_models_here -------------------------------------------------------------------------------- /models/hypernetworks/put_hypernetworks_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/hypernetworks/put_hypernetworks_here -------------------------------------------------------------------------------- /models/loras/put_loras_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/loras/put_loras_here -------------------------------------------------------------------------------- /models/prompt_expansion/fooocus_expansion/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "gpt2", 3 | "activation_function": "gelu_new", 4 | "architectures": [ 5 | "GPT2LMHeadModel" 6 | ], 7 | "attn_pdrop": 0.1, 8 | "bos_token_id": 50256, 9 | "embd_pdrop": 0.1, 10 | "eos_token_id": 50256, 11 | "pad_token_id": 50256, 12 | "initializer_range": 0.02, 13 | "layer_norm_epsilon": 1e-05, 14 | "model_type": "gpt2", 15 | "n_ctx": 1024, 16 | "n_embd": 768, 17 | "n_head": 12, 18 | "n_inner": null, 19 | "n_layer": 12, 20 | "n_positions": 1024, 21 | "reorder_and_upcast_attn": false, 22 | "resid_pdrop": 0.1, 23 | "scale_attn_by_inverse_layer_idx": false, 24 | "scale_attn_weights": true, 25 | "summary_activation": null, 26 | "summary_first_dropout": 0.1, 27 | "summary_proj_to_labels": true, 28 | "summary_type": "cls_index", 29 | "summary_use_proj": true, 30 | "task_specific_params": { 31 | "text-generation": { 32 | "do_sample": true, 33 | "max_length": 50 34 | } 35 | }, 36 | "torch_dtype": "float32", 37 | "transformers_version": "4.23.0.dev0", 38 | "use_cache": true, 39 | "vocab_size": 50257 40 | } 41 | -------------------------------------------------------------------------------- /models/prompt_expansion/fooocus_expansion/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": "<|endoftext|>", 3 | "eos_token": "<|endoftext|>", 4 | "unk_token": "<|endoftext|>" 5 | } 6 | -------------------------------------------------------------------------------- /models/prompt_expansion/fooocus_expansion/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_prefix_space": false, 3 | "bos_token": "<|endoftext|>", 4 | "eos_token": "<|endoftext|>", 5 | "model_max_length": 1024, 6 | "name_or_path": "gpt2", 7 | "special_tokens_map_file": null, 8 | "tokenizer_class": "GPT2Tokenizer", 9 | "unk_token": "<|endoftext|>" 10 | } 11 | -------------------------------------------------------------------------------- /models/prompt_expansion/put_prompt_expansion_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/prompt_expansion/put_prompt_expansion_here -------------------------------------------------------------------------------- /models/style_models/put_t2i_style_model_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/style_models/put_t2i_style_model_here -------------------------------------------------------------------------------- /models/unet/put_unet_files_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/unet/put_unet_files_here -------------------------------------------------------------------------------- /models/upscale_models/put_esrgan_and_other_upscale_models_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/upscale_models/put_esrgan_and_other_upscale_models_here -------------------------------------------------------------------------------- /models/vae/put_vae_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/vae/put_vae_here -------------------------------------------------------------------------------- /models/vae_approx/put_taesd_encoder_pth_and_taesd_decoder_pth_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/vae_approx/put_taesd_encoder_pth_and_taesd_decoder_pth_here -------------------------------------------------------------------------------- /modules/advanced_parameters.py: -------------------------------------------------------------------------------- 1 | adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, \ 2 | scheduler_name, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, \ 3 | overwrite_vary_strength, overwrite_upscale_strength, \ 4 | mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \ 5 | debugging_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, inpaint_engine, \ 6 | refiner_swap_method, \ 7 | freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 = [None] * 25 8 | 9 | 10 | def set_all_advanced_parameters(*args): 11 | global adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, \ 12 | scheduler_name, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, \ 13 | overwrite_vary_strength, overwrite_upscale_strength, \ 14 | mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \ 15 | debugging_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, inpaint_engine, \ 16 | refiner_swap_method, \ 17 | freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 18 | 19 | adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, \ 20 | scheduler_name, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, \ 21 | overwrite_vary_strength, overwrite_upscale_strength, \ 22 | mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \ 23 | debugging_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, inpaint_engine, \ 24 | refiner_swap_method, \ 25 | freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 = args 26 | 27 | return 28 | -------------------------------------------------------------------------------- /modules/expansion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import fcbh.model_management as model_management 4 | 5 | from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed 6 | from modules.path import fooocus_expansion_path 7 | from fcbh.model_patcher import ModelPatcher 8 | 9 | 10 | fooocus_magic_split = [ 11 | ', extremely', 12 | ', intricate,', 13 | ] 14 | dangrous_patterns = '[]【】()()|::' 15 | 16 | 17 | def safe_str(x): 18 | x = str(x) 19 | for _ in range(16): 20 | x = x.replace(' ', ' ') 21 | return x.strip(",. \r\n") 22 | 23 | 24 | def remove_pattern(x, pattern): 25 | for p in pattern: 26 | x = x.replace(p, '') 27 | return x 28 | 29 | 30 | class FooocusExpansion: 31 | def __init__(self): 32 | self.tokenizer = AutoTokenizer.from_pretrained(fooocus_expansion_path) 33 | self.model = AutoModelForCausalLM.from_pretrained(fooocus_expansion_path) 34 | self.model.eval() 35 | 36 | load_device = model_management.text_encoder_device() 37 | offload_device = model_management.text_encoder_offload_device() 38 | 39 | # MPS hack 40 | if model_management.is_device_mps(load_device): 41 | load_device = torch.device('cpu') 42 | offload_device = torch.device('cpu') 43 | 44 | use_fp16 = model_management.should_use_fp16(device=load_device) 45 | 46 | if use_fp16: 47 | self.model.half() 48 | 49 | self.patcher = ModelPatcher(self.model, load_device=load_device, offload_device=offload_device) 50 | print(f'Fooocus Expansion engine loaded for {load_device}, use_fp16 = {use_fp16}.') 51 | 52 | def __call__(self, prompt, seed): 53 | if self.patcher.current_device != self.patcher.load_device: 54 | print('Fooocus Expansion loaded by itself.') 55 | model_management.load_model_gpu(self.patcher) 56 | 57 | seed = int(seed) 58 | set_seed(seed) 59 | origin = safe_str(prompt) 60 | prompt = origin + fooocus_magic_split[seed % len(fooocus_magic_split)] 61 | 62 | tokenized_kwargs = self.tokenizer(prompt, return_tensors="pt") 63 | tokenized_kwargs.data['input_ids'] = tokenized_kwargs.data['input_ids'].to(self.patcher.load_device) 64 | tokenized_kwargs.data['attention_mask'] = tokenized_kwargs.data['attention_mask'].to(self.patcher.load_device) 65 | 66 | # https://huggingface.co/blog/introducing-csearch 67 | # https://huggingface.co/docs/transformers/generation_strategies 68 | features = self.model.generate(**tokenized_kwargs, 69 | num_beams=1, 70 | max_new_tokens=256, 71 | do_sample=True) 72 | 73 | response = self.tokenizer.batch_decode(features, skip_special_tokens=True) 74 | result = response[0][len(origin):] 75 | result = safe_str(result) 76 | result = remove_pattern(result, dangrous_patterns) 77 | return result 78 | -------------------------------------------------------------------------------- /modules/flags.py: -------------------------------------------------------------------------------- 1 | disabled = 'Disabled' 2 | enabled = 'Enabled' 3 | subtle_variation = 'Vary (Subtle)' 4 | strong_variation = 'Vary (Strong)' 5 | upscale_15 = 'Upscale (1.5x)' 6 | upscale_2 = 'Upscale (2x)' 7 | upscale_fast = 'Upscale (Fast 2x)' 8 | 9 | uov_list = [ 10 | disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast 11 | ] 12 | 13 | KSAMPLER_NAMES = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral", 14 | "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", 15 | "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm"] 16 | 17 | SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"] 18 | SAMPLER_NAMES = KSAMPLER_NAMES + ["ddim", "uni_pc", "uni_pc_bh2"] 19 | 20 | sampler_list = SAMPLER_NAMES 21 | scheduler_list = SCHEDULER_NAMES 22 | cn_ip = "Image Prompt" 23 | cn_depth = "Depth" 24 | cn_canny = "PyraCanny" 25 | cn_cpds = "CPDS" 26 | cn_pose = 'Pose' 27 | cn_reColor = 'ReColor' 28 | cn_Sketch = 'sketch' 29 | # cn_revision = 'revision' 30 | # cn_tileBlur = 'TileBlur' 31 | # cn_tileBlurAnime = 'TileBlurAnime' 32 | 33 | ip_list = [cn_ip, cn_canny, cn_cpds, cn_depth, cn_pose, cn_reColor, cn_Sketch, 34 | # cn_revision, cn_tileBlur,cn_tileBlurAnime 35 | ] 36 | default_ip = cn_ip 37 | 38 | default_parameters = { 39 | cn_ip: (0.5, 0.6), 40 | cn_canny: (0.5, 1.0), 41 | cn_cpds: (0.5, 1.0), 42 | cn_depth: (0.5, 1.0), 43 | cn_pose: (0.5, 1.0), 44 | cn_reColor: (0.5, 1.0), 45 | cn_Sketch: (0.5, 1.0), 46 | # cn_revision: (0.5, 1.0), 47 | # cn_tileBlur: (0.5, 1.0), 48 | # cn_tileBlurAnime: (0.5, 1.0), 49 | } # stop, weight 50 | -------------------------------------------------------------------------------- /modules/html.py: -------------------------------------------------------------------------------- 1 | css = ''' 2 | .loader-container { 3 | display: flex; /* Use flex to align items horizontally */ 4 | align-items: center; /* Center items vertically within the container */ 5 | white-space: nowrap; /* Prevent line breaks within the container */ 6 | } 7 | 8 | .loader { 9 | border: 8px solid #f3f3f3; /* Light grey */ 10 | border-top: 8px solid #3498db; /* Blue */ 11 | border-radius: 50%; 12 | width: 30px; 13 | height: 30px; 14 | animation: spin 2s linear infinite; 15 | } 16 | 17 | @keyframes spin { 18 | 0% { transform: rotate(0deg); } 19 | 100% { transform: rotate(360deg); } 20 | } 21 | 22 | /* Style the progress bar */ 23 | progress { 24 | appearance: none; /* Remove default styling */ 25 | height: 20px; /* Set the height of the progress bar */ 26 | border-radius: 5px; /* Round the corners of the progress bar */ 27 | background-color: #f3f3f3; /* Light grey background */ 28 | width: 100%; 29 | } 30 | 31 | /* Style the progress bar container */ 32 | .progress-container { 33 | margin-left: 20px; 34 | margin-right: 20px; 35 | flex-grow: 1; /* Allow the progress container to take up remaining space */ 36 | } 37 | 38 | /* Set the color of the progress bar fill */ 39 | progress::-webkit-progress-value { 40 | background-color: #3498db; /* Blue color for the fill */ 41 | } 42 | 43 | progress::-moz-progress-bar { 44 | background-color: #3498db; /* Blue color for the fill in Firefox */ 45 | } 46 | 47 | /* Style the text on the progress bar */ 48 | progress::after { 49 | content: attr(value '%'); /* Display the progress value followed by '%' */ 50 | position: absolute; 51 | top: 50%; 52 | left: 50%; 53 | transform: translate(-50%, -50%); 54 | color: white; /* Set text color */ 55 | font-size: 14px; /* Set font size */ 56 | } 57 | 58 | /* Style other texts */ 59 | .loader-container > span { 60 | margin-left: 5px; /* Add spacing between the progress bar and the text */ 61 | } 62 | 63 | .progress-bar > .generating { 64 | display: none !important; 65 | } 66 | 67 | .progress-bar{ 68 | height: 30px !important; 69 | } 70 | 71 | .type_row{ 72 | height: 80px !important; 73 | } 74 | 75 | .type_row_half{ 76 | height: 32px !important; 77 | } 78 | 79 | .scroll-hide{ 80 | resize: none !important; 81 | } 82 | 83 | .refresh_button{ 84 | border: none !important; 85 | background: none !important; 86 | font-size: none !important; 87 | box-shadow: none !important; 88 | } 89 | 90 | .advanced_check_row{ 91 | width: 250px !important; 92 | } 93 | 94 | .min_check{ 95 | min-width: min(1px, 100%) !important; 96 | } 97 | 98 | .resizable_area { 99 | resize: vertical; 100 | overflow: auto !important; 101 | } 102 | 103 | ''' 104 | progress_html = ''' 105 |
Fooocus Log {date_string} (private)
\n") 24 | f.write(f"All images do not contain any hidden data.
") 25 | 26 | with open(html_name, 'a+', encoding='utf-8') as f: 27 | div_name = only_name.replace('.', '_') 28 | f.write(f'{only_name}
\n") 30 | i = 0 31 | for k, v in dic: 32 | if i < single_line_number: 33 | f.write(f"{k}: {v}
\n") 34 | else: 35 | if (i - single_line_number) % 2 == 0: 36 | f.write(f"{k}: {v}, ") 37 | else: 38 | f.write(f"{k}: {v}
\n") 39 | i += 1 40 | f.write(f"