├── .github ├── CODEOWNERS └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── LICENSE ├── args_manager.py ├── asset ├── canny │ ├── 0.png │ ├── 1.png │ ├── canny.png │ └── snip.png ├── compare.png ├── depth │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── depth.png │ └── snip.png ├── discord-icon-svgrepo-com.svg ├── inpaint_outpaint │ └── mask_inpaint.png ├── ip │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── ancient_chinese_costume │ │ ├── 0.png │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ └── snip.png │ ├── ip.png │ └── snip.png ├── ip_depth │ ├── 0.png │ ├── ip_depth.png │ └── snip.png ├── pose_face │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── pose_face.png │ └── snip.png ├── recolor │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── recolor.png │ └── snip.png ├── run_bat.png └── sketch │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── sketch.png │ └── snip.png ├── backend ├── doc └── headless │ ├── LICENSE │ ├── fcbh │ ├── checkpoint_pickle.py │ ├── cldm │ │ └── cldm.py │ ├── cli_args.py │ ├── clip_config_bigg.json │ ├── clip_vision.py │ ├── clip_vision_config_g.json │ ├── clip_vision_config_h.json │ ├── clip_vision_config_vitl.json │ ├── controlnet.py │ ├── diffusers_convert.py │ ├── diffusers_load.py │ ├── extra_samplers │ │ └── uni_pc.py │ ├── gligen.py │ ├── k_diffusion │ │ ├── external.py │ │ ├── sampling.py │ │ └── utils.py │ ├── latent_formats.py │ ├── ldm │ │ ├── models │ │ │ ├── autoencoder.py │ │ │ └── diffusion │ │ │ │ ├── __init__.py │ │ │ │ ├── ddim.py │ │ │ │ ├── dpm_solver │ │ │ │ ├── __init__.py │ │ │ │ ├── dpm_solver.py │ │ │ │ └── sampler.py │ │ │ │ ├── plms.py │ │ │ │ └── sampling_util.py │ │ ├── modules │ │ │ ├── attention.py │ │ │ ├── diffusionmodules │ │ │ │ ├── __init__.py │ │ │ │ ├── model.py │ │ │ │ ├── openaimodel.py │ │ │ │ ├── upscaling.py │ │ │ │ └── util.py │ │ │ ├── distributions │ │ │ │ ├── __init__.py │ │ │ │ └── distributions.py │ │ │ ├── ema.py │ │ │ ├── encoders │ │ │ │ ├── __init__.py │ │ │ │ └── noise_aug_modules.py │ │ │ └── sub_quadratic_attention.py │ │ └── util.py │ ├── lora.py │ ├── model_base.py │ ├── model_detection.py │ ├── model_management.py │ ├── model_patcher.py │ ├── ops.py │ ├── options.py │ ├── sample.py │ ├── samplers.py │ ├── sd.py │ ├── sd1_clip.py │ ├── sd1_clip_config.json │ ├── sd1_tokenizer │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ ├── sd2_clip.py │ ├── sd2_clip_config.json │ ├── sdxl_clip.py │ ├── supported_models.py │ ├── supported_models_base.py │ ├── t2i_adapter │ │ └── adapter.py │ ├── taesd │ │ └── taesd.py │ └── utils.py │ ├── fcbh_extras │ ├── chainner_models │ │ ├── __init__.py │ │ ├── architecture │ │ │ ├── DAT.py │ │ │ ├── HAT.py │ │ │ ├── LICENSE-DAT │ │ │ ├── LICENSE-ESRGAN │ │ │ ├── LICENSE-HAT │ │ │ ├── LICENSE-RealESRGAN │ │ │ ├── LICENSE-SCUNet │ │ │ ├── LICENSE-SPSR │ │ │ ├── LICENSE-SwiftSRGAN │ │ │ ├── LICENSE-Swin2SR │ │ │ ├── LICENSE-SwinIR │ │ │ ├── LICENSE-lama │ │ │ ├── LaMa.py │ │ │ ├── OmniSR │ │ │ │ ├── ChannelAttention.py │ │ │ │ ├── LICENSE │ │ │ │ ├── OSA.py │ │ │ │ ├── OSAG.py │ │ │ │ ├── OmniSR.py │ │ │ │ ├── esa.py │ │ │ │ ├── layernorm.py │ │ │ │ └── pixelshuffle.py │ │ │ ├── RRDB.py │ │ │ ├── SCUNet.py │ │ │ ├── SPSR.py │ │ │ ├── SRVGG.py │ │ │ ├── SwiftSRGAN.py │ │ │ ├── Swin2SR.py │ │ │ ├── SwinIR.py │ │ │ ├── __init__.py │ │ │ ├── block.py │ │ │ ├── face │ │ │ │ ├── LICENSE-GFPGAN │ │ │ │ ├── LICENSE-RestoreFormer │ │ │ │ ├── LICENSE-codeformer │ │ │ │ ├── arcface_arch.py │ │ │ │ ├── codeformer.py │ │ │ │ ├── fused_act.py │ │ │ │ ├── gfpgan_bilinear_arch.py │ │ │ │ ├── gfpganv1_arch.py │ │ │ │ ├── gfpganv1_clean_arch.py │ │ │ │ ├── restoreformer_arch.py │ │ │ │ ├── stylegan2_arch.py │ │ │ │ ├── stylegan2_bilinear_arch.py │ │ │ │ ├── stylegan2_clean_arch.py │ │ │ │ └── upfirdn2d.py │ │ │ └── timm │ │ │ │ ├── LICENSE │ │ │ │ ├── drop.py │ │ │ │ ├── helpers.py │ │ │ │ └── weight_init.py │ │ ├── model_loading.py │ │ └── types.py │ ├── nodes_canny.py │ ├── nodes_clip_sdxl.py │ ├── nodes_compositing.py │ ├── nodes_custom_sampler.py │ ├── nodes_freelunch.py │ ├── nodes_hypernetwork.py │ ├── nodes_latent.py │ ├── nodes_mask.py │ ├── nodes_model_merging.py │ ├── nodes_post_processing.py │ ├── nodes_rebatch.py │ ├── nodes_tomesd.py │ └── nodes_upscale_model.py │ ├── folder_paths.py │ ├── latent_preview.py │ └── nodes.py ├── build_launcher.py ├── colab_fix.txt ├── css └── style.css ├── entry_with_update.py ├── environment.yaml ├── fooocusControl_colab.ipynb ├── fooocus_colab.ipynb ├── fooocus_extras ├── controlnet_preprocess_model │ ├── CPDS │ │ └── __init__.py │ ├── OpenPose │ │ ├── LICENSE │ │ ├── __init__.py │ │ ├── body.py │ │ ├── face.py │ │ ├── hand.py │ │ ├── model.py │ │ └── util.py │ ├── PyramidCanny │ │ └── __init__.py │ ├── ReColor │ │ └── __init__.py │ ├── Revision │ │ └── __init__.py │ ├── Sketch │ │ └── __init__.py │ ├── TileBlur │ │ └── __init__.py │ ├── TileBlurAnime │ │ └── __init__.py │ ├── ZeoDepth │ │ ├── LICENSE │ │ ├── __init__.py │ │ └── zoedepth │ │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── data_mono.py │ │ │ ├── ddad.py │ │ │ ├── diml_indoor_test.py │ │ │ ├── diml_outdoor_test.py │ │ │ ├── diode.py │ │ │ ├── hypersim.py │ │ │ ├── ibims.py │ │ │ ├── preprocess.py │ │ │ ├── sun_rgbd_loader.py │ │ │ ├── transforms.py │ │ │ ├── vkitti.py │ │ │ └── vkitti2.py │ │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── base_models │ │ │ │ ├── __init__.py │ │ │ │ ├── midas.py │ │ │ │ └── midas_repo │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── LICENSE │ │ │ │ │ ├── README.md │ │ │ │ │ ├── environment.yaml │ │ │ │ │ ├── hubconf.py │ │ │ │ │ ├── midas │ │ │ │ │ ├── backbones │ │ │ │ │ │ ├── beit.py │ │ │ │ │ │ ├── levit.py │ │ │ │ │ │ ├── next_vit.py │ │ │ │ │ │ ├── swin.py │ │ │ │ │ │ ├── swin2.py │ │ │ │ │ │ ├── swin_common.py │ │ │ │ │ │ ├── utils.py │ │ │ │ │ │ └── vit.py │ │ │ │ │ ├── base_model.py │ │ │ │ │ ├── blocks.py │ │ │ │ │ ├── dpt_depth.py │ │ │ │ │ ├── midas_net.py │ │ │ │ │ ├── midas_net_custom.py │ │ │ │ │ ├── model_loader.py │ │ │ │ │ └── transforms.py │ │ │ │ │ ├── run.py │ │ │ │ │ └── utils.py │ │ │ ├── builder.py │ │ │ ├── depth_model.py │ │ │ ├── layers │ │ │ │ ├── attractor.py │ │ │ │ ├── dist_layers.py │ │ │ │ ├── localbins_layers.py │ │ │ │ └── patch_transformer.py │ │ │ ├── model_io.py │ │ │ ├── zoedepth │ │ │ │ ├── __init__.py │ │ │ │ ├── config_zoedepth.json │ │ │ │ ├── config_zoedepth_kitti.json │ │ │ │ └── zoedepth_v1.py │ │ │ └── zoedepth_nk │ │ │ │ ├── __init__.py │ │ │ │ ├── config_zoedepth_nk.json │ │ │ │ └── zoedepth_nk_v1.py │ │ │ ├── trainers │ │ │ ├── base_trainer.py │ │ │ ├── builder.py │ │ │ ├── loss.py │ │ │ ├── zoedepth_nk_trainer.py │ │ │ └── zoedepth_trainer.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── arg_utils.py │ │ │ ├── config.py │ │ │ ├── easydict │ │ │ └── __init__.py │ │ │ ├── geometry.py │ │ │ └── misc.py │ └── __init__.py ├── ip_adapter.py ├── preprocessors.py ├── resampler.py └── vae_interpose.py ├── fooocus_version.py ├── javascript ├── contextMenus.js └── script.js ├── launch.py ├── models ├── checkpoints │ └── put_checkpoints_here ├── clip │ └── put_clip_or_text_encoder_models_here ├── clip_vision │ └── put_clip_vision_models_here ├── configs │ ├── anything_v3.yaml │ ├── v1-inference.yaml │ ├── v1-inference_clip_skip_2.yaml │ ├── v1-inference_clip_skip_2_fp16.yaml │ ├── v1-inference_fp16.yaml │ ├── v1-inpainting-inference.yaml │ ├── v2-inference-v.yaml │ ├── v2-inference-v_fp32.yaml │ ├── v2-inference.yaml │ ├── v2-inference_fp32.yaml │ └── v2-inpainting-inference.yaml ├── controlnet │ └── put_controlnets_and_t2i_here ├── diffusers │ └── put_diffusers_models_here ├── embeddings │ └── put_embeddings_or_textual_inversion_concepts_here ├── gligen │ └── put_gligen_models_here ├── hypernetworks │ └── put_hypernetworks_here ├── loras │ └── put_loras_here ├── prompt_expansion │ ├── fooocus_expansion │ │ ├── config.json │ │ ├── merges.txt │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.json │ └── put_prompt_expansion_here ├── style_models │ └── put_t2i_style_model_here ├── unet │ └── put_unet_files_here ├── upscale_models │ └── put_esrgan_and_other_upscale_models_here ├── vae │ └── put_vae_here └── vae_approx │ └── put_taesd_encoder_pth_and_taesd_decoder_pth_here ├── modules ├── advanced_parameters.py ├── anisotropic.py ├── async_worker.py ├── core.py ├── default_pipeline.py ├── expansion.py ├── flags.py ├── gradio_hijack.py ├── html.py ├── inpaint_worker.py ├── launch_util.py ├── model_loader.py ├── patch.py ├── path.py ├── private_logger.py ├── sample_hijack.py ├── sdxl_styles.py ├── ui_gradio_extensions.py ├── upscaler.py └── util.py ├── notification-example.ogg ├── presets ├── anime.json ├── realistic.json └── sdxl.json ├── python_hijack.py ├── readme.md ├── requirements_versions.txt ├── sdxl_styles ├── sdxl_styles_diva.json ├── sdxl_styles_fooocus.json ├── sdxl_styles_mre.json ├── sdxl_styles_sai.json └── sdxl_styles_twri.json ├── shared.py ├── update_log.md ├── user_controlnet_config.py ├── webui.py └── wildcards ├── artist.txt └── color.txt /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @lllyasviel 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Describe a problem 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the problem** 11 | A clear and concise description of what the bug is. 12 | 13 | **Full Console Log** 14 | Paste **full** console log here. You will make our job easier if you give a **full** log. 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the idea you'd like** 14 | A clear and concise description of what you want to happen. 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.ckpt 3 | *.safetensors 4 | *.pth 5 | *.pt 6 | *.bin 7 | *.patch 8 | *.backup 9 | *.corrupted 10 | lena.png 11 | lena_result.png 12 | lena_test.py 13 | user_path_config.txt 14 | build_chb.py 15 | /modules/*.png 16 | /repositories 17 | /venv 18 | /tmp 19 | /ui-config.json 20 | /outputs 21 | /config.json 22 | /log 23 | /webui.settings.bat 24 | /embeddings 25 | /styles.csv 26 | /params.txt 27 | /styles.csv.bak 28 | /webui-user.bat 29 | /webui-user.sh 30 | /interrogate 31 | /user.css 32 | /.idea 33 | /notification.ogg 34 | /notification.mp3 35 | /SwinIR 36 | /textual_inversion 37 | .vscode 38 | /extensions 39 | /test/stdout.txt 40 | /test/stderr.txt 41 | /cache.json* 42 | /config_states/ 43 | /node_modules 44 | /package-lock.json 45 | /.coverage* 46 | -------------------------------------------------------------------------------- /args_manager.py: -------------------------------------------------------------------------------- 1 | from fcbh.options import enable_args_parsing 2 | enable_args_parsing(False) 3 | import fcbh.cli_args as fcbh_cli 4 | 5 | 6 | fcbh_cli.parser.add_argument("--share", action='store_true', help="Set whether to share on Gradio.") 7 | fcbh_cli.parser.add_argument("--preset", type=str, default=None, help="Apply specified UI preset.") 8 | 9 | fcbh_cli.args = fcbh_cli.parser.parse_args() 10 | fcbh_cli.args.disable_cuda_malloc = True 11 | fcbh_cli.args.auto_launch = True 12 | 13 | if getattr(fcbh_cli.args, 'port', 8188) == 8188: 14 | fcbh_cli.args.port = None 15 | 16 | args = fcbh_cli.args 17 | -------------------------------------------------------------------------------- /asset/canny/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/canny/0.png -------------------------------------------------------------------------------- /asset/canny/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/canny/1.png -------------------------------------------------------------------------------- /asset/canny/canny.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/canny/canny.png -------------------------------------------------------------------------------- /asset/canny/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/canny/snip.png -------------------------------------------------------------------------------- /asset/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/compare.png -------------------------------------------------------------------------------- /asset/depth/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/depth/0.png -------------------------------------------------------------------------------- /asset/depth/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/depth/1.png -------------------------------------------------------------------------------- /asset/depth/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/depth/2.png -------------------------------------------------------------------------------- /asset/depth/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/depth/3.png -------------------------------------------------------------------------------- /asset/depth/depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/depth/depth.png -------------------------------------------------------------------------------- /asset/depth/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/depth/snip.png -------------------------------------------------------------------------------- /asset/discord-icon-svgrepo-com.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /asset/inpaint_outpaint/mask_inpaint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/inpaint_outpaint/mask_inpaint.png -------------------------------------------------------------------------------- /asset/ip/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/0.png -------------------------------------------------------------------------------- /asset/ip/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/1.png -------------------------------------------------------------------------------- /asset/ip/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/2.png -------------------------------------------------------------------------------- /asset/ip/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/3.png -------------------------------------------------------------------------------- /asset/ip/ancient_chinese_costume/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/ancient_chinese_costume/0.png -------------------------------------------------------------------------------- /asset/ip/ancient_chinese_costume/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/ancient_chinese_costume/1.png -------------------------------------------------------------------------------- /asset/ip/ancient_chinese_costume/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/ancient_chinese_costume/2.png -------------------------------------------------------------------------------- /asset/ip/ancient_chinese_costume/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/ancient_chinese_costume/3.png -------------------------------------------------------------------------------- /asset/ip/ancient_chinese_costume/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/ancient_chinese_costume/snip.png -------------------------------------------------------------------------------- /asset/ip/ip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/ip.png -------------------------------------------------------------------------------- /asset/ip/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip/snip.png -------------------------------------------------------------------------------- /asset/ip_depth/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip_depth/0.png -------------------------------------------------------------------------------- /asset/ip_depth/ip_depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip_depth/ip_depth.png -------------------------------------------------------------------------------- /asset/ip_depth/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/ip_depth/snip.png -------------------------------------------------------------------------------- /asset/pose_face/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/pose_face/0.png -------------------------------------------------------------------------------- /asset/pose_face/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/pose_face/1.png -------------------------------------------------------------------------------- /asset/pose_face/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/pose_face/2.png -------------------------------------------------------------------------------- /asset/pose_face/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/pose_face/3.png -------------------------------------------------------------------------------- /asset/pose_face/pose_face.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/pose_face/pose_face.png -------------------------------------------------------------------------------- /asset/pose_face/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/pose_face/snip.png -------------------------------------------------------------------------------- /asset/recolor/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/recolor/0.png -------------------------------------------------------------------------------- /asset/recolor/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/recolor/1.png -------------------------------------------------------------------------------- /asset/recolor/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/recolor/2.png -------------------------------------------------------------------------------- /asset/recolor/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/recolor/3.png -------------------------------------------------------------------------------- /asset/recolor/recolor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/recolor/recolor.png -------------------------------------------------------------------------------- /asset/recolor/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/recolor/snip.png -------------------------------------------------------------------------------- /asset/run_bat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/run_bat.png -------------------------------------------------------------------------------- /asset/sketch/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/sketch/0.png -------------------------------------------------------------------------------- /asset/sketch/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/sketch/1.png -------------------------------------------------------------------------------- /asset/sketch/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/sketch/2.png -------------------------------------------------------------------------------- /asset/sketch/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/sketch/3.png -------------------------------------------------------------------------------- /asset/sketch/sketch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/sketch/sketch.png -------------------------------------------------------------------------------- /asset/sketch/snip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/asset/sketch/snip.png -------------------------------------------------------------------------------- /backend/doc: -------------------------------------------------------------------------------- 1 | # Fooocus' Comfy Backend Headless (FCBH) 2 | 3 | This is a Comfy Backend from StabilityAI. This pre-complied backend makes it easier for people who have trouble using pygit2. 4 | 5 | FCBH is maintained by Fooocus's reviewing upon StabilityAI's changes. 6 | -------------------------------------------------------------------------------- /backend/headless/fcbh/checkpoint_pickle.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | load = pickle.load 4 | 5 | class Empty: 6 | pass 7 | 8 | class Unpickler(pickle.Unpickler): 9 | def find_class(self, module, name): 10 | #TODO: safe unpickle 11 | if module.startswith("pytorch_lightning"): 12 | return Empty 13 | return super().find_class(module, name) 14 | -------------------------------------------------------------------------------- /backend/headless/fcbh/clip_config_bigg.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "CLIPTextModel" 4 | ], 5 | "attention_dropout": 0.0, 6 | "bos_token_id": 0, 7 | "dropout": 0.0, 8 | "eos_token_id": 2, 9 | "hidden_act": "gelu", 10 | "hidden_size": 1280, 11 | "initializer_factor": 1.0, 12 | "initializer_range": 0.02, 13 | "intermediate_size": 5120, 14 | "layer_norm_eps": 1e-05, 15 | "max_position_embeddings": 77, 16 | "model_type": "clip_text_model", 17 | "num_attention_heads": 20, 18 | "num_hidden_layers": 32, 19 | "pad_token_id": 1, 20 | "projection_dim": 1280, 21 | "torch_dtype": "float32", 22 | "vocab_size": 49408 23 | } 24 | -------------------------------------------------------------------------------- /backend/headless/fcbh/clip_vision_config_g.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "gelu", 5 | "hidden_size": 1664, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 8192, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 48, 15 | "patch_size": 14, 16 | "projection_dim": 1280, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /backend/headless/fcbh/clip_vision_config_h.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "gelu", 5 | "hidden_size": 1280, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 5120, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 32, 15 | "patch_size": 14, 16 | "projection_dim": 1024, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /backend/headless/fcbh/clip_vision_config_vitl.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_dropout": 0.0, 3 | "dropout": 0.0, 4 | "hidden_act": "quick_gelu", 5 | "hidden_size": 1024, 6 | "image_size": 224, 7 | "initializer_factor": 1.0, 8 | "initializer_range": 0.02, 9 | "intermediate_size": 4096, 10 | "layer_norm_eps": 1e-05, 11 | "model_type": "clip_vision_model", 12 | "num_attention_heads": 16, 13 | "num_channels": 3, 14 | "num_hidden_layers": 24, 15 | "patch_size": 14, 16 | "projection_dim": 768, 17 | "torch_dtype": "float32" 18 | } 19 | -------------------------------------------------------------------------------- /backend/headless/fcbh/diffusers_load.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import fcbh.sd 5 | 6 | def first_file(path, filenames): 7 | for f in filenames: 8 | p = os.path.join(path, f) 9 | if os.path.exists(p): 10 | return p 11 | return None 12 | 13 | def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_directory=None): 14 | diffusion_model_names = ["diffusion_pytorch_model.fp16.safetensors", "diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.fp16.bin", "diffusion_pytorch_model.bin"] 15 | unet_path = first_file(os.path.join(model_path, "unet"), diffusion_model_names) 16 | vae_path = first_file(os.path.join(model_path, "vae"), diffusion_model_names) 17 | 18 | text_encoder_model_names = ["model.fp16.safetensors", "model.safetensors", "pytorch_model.fp16.bin", "pytorch_model.bin"] 19 | text_encoder1_path = first_file(os.path.join(model_path, "text_encoder"), text_encoder_model_names) 20 | text_encoder2_path = first_file(os.path.join(model_path, "text_encoder_2"), text_encoder_model_names) 21 | 22 | text_encoder_paths = [text_encoder1_path] 23 | if text_encoder2_path is not None: 24 | text_encoder_paths.append(text_encoder2_path) 25 | 26 | unet = fcbh.sd.load_unet(unet_path) 27 | 28 | clip = None 29 | if output_clip: 30 | clip = fcbh.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory) 31 | 32 | vae = None 33 | if output_vae: 34 | sd = fcbh.utils.load_torch_file(vae_path) 35 | vae = fcbh.sd.VAE(sd=sd) 36 | 37 | return (unet, clip, vae) 38 | -------------------------------------------------------------------------------- /backend/headless/fcbh/latent_formats.py: -------------------------------------------------------------------------------- 1 | 2 | class LatentFormat: 3 | scale_factor = 1.0 4 | latent_rgb_factors = None 5 | taesd_decoder_name = None 6 | 7 | def process_in(self, latent): 8 | return latent * self.scale_factor 9 | 10 | def process_out(self, latent): 11 | return latent / self.scale_factor 12 | 13 | class SD15(LatentFormat): 14 | def __init__(self, scale_factor=0.18215): 15 | self.scale_factor = scale_factor 16 | self.latent_rgb_factors = [ 17 | # R G B 18 | [ 0.3512, 0.2297, 0.3227], 19 | [ 0.3250, 0.4974, 0.2350], 20 | [-0.2829, 0.1762, 0.2721], 21 | [-0.2120, -0.2616, -0.7177] 22 | ] 23 | self.taesd_decoder_name = "taesd_decoder" 24 | 25 | class SDXL(LatentFormat): 26 | def __init__(self): 27 | self.scale_factor = 0.13025 28 | self.latent_rgb_factors = [ 29 | # R G B 30 | [ 0.3920, 0.4054, 0.4549], 31 | [-0.2634, -0.0196, 0.0653], 32 | [ 0.0568, 0.1687, -0.0755], 33 | [-0.3112, -0.2359, -0.2076] 34 | ] 35 | self.taesd_decoder_name = "taesdxl_decoder" 36 | -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/models/diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/backend/headless/fcbh/ldm/models/diffusion/__init__.py -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/models/diffusion/dpm_solver/__init__.py: -------------------------------------------------------------------------------- 1 | from .sampler import DPMSolverSampler -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/models/diffusion/dpm_solver/sampler.py: -------------------------------------------------------------------------------- 1 | """SAMPLING ONLY.""" 2 | import torch 3 | 4 | from .dpm_solver import NoiseScheduleVP, model_wrapper, DPM_Solver 5 | 6 | MODEL_TYPES = { 7 | "eps": "noise", 8 | "v": "v" 9 | } 10 | 11 | 12 | class DPMSolverSampler(object): 13 | def __init__(self, model, device=torch.device("cuda"), **kwargs): 14 | super().__init__() 15 | self.model = model 16 | self.device = device 17 | to_torch = lambda x: x.clone().detach().to(torch.float32).to(model.device) 18 | self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod)) 19 | 20 | def register_buffer(self, name, attr): 21 | if type(attr) == torch.Tensor: 22 | if attr.device != self.device: 23 | attr = attr.to(self.device) 24 | setattr(self, name, attr) 25 | 26 | @torch.no_grad() 27 | def sample(self, 28 | S, 29 | batch_size, 30 | shape, 31 | conditioning=None, 32 | callback=None, 33 | normals_sequence=None, 34 | img_callback=None, 35 | quantize_x0=False, 36 | eta=0., 37 | mask=None, 38 | x0=None, 39 | temperature=1., 40 | noise_dropout=0., 41 | score_corrector=None, 42 | corrector_kwargs=None, 43 | verbose=True, 44 | x_T=None, 45 | log_every_t=100, 46 | unconditional_guidance_scale=1., 47 | unconditional_conditioning=None, 48 | # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ... 49 | **kwargs 50 | ): 51 | if conditioning is not None: 52 | if isinstance(conditioning, dict): 53 | ctmp = conditioning[list(conditioning.keys())[0]] 54 | while isinstance(ctmp, list): ctmp = ctmp[0] 55 | if isinstance(ctmp, torch.Tensor): 56 | cbs = ctmp.shape[0] 57 | if cbs != batch_size: 58 | print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}") 59 | elif isinstance(conditioning, list): 60 | for ctmp in conditioning: 61 | if ctmp.shape[0] != batch_size: 62 | print(f"Warning: Got {ctmp.shape[0]} conditionings but batch-size is {batch_size}") 63 | else: 64 | if isinstance(conditioning, torch.Tensor): 65 | if conditioning.shape[0] != batch_size: 66 | print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}") 67 | 68 | # sampling 69 | C, H, W = shape 70 | size = (batch_size, C, H, W) 71 | 72 | print(f'Data shape for DPM-Solver sampling is {size}, sampling steps {S}') 73 | 74 | device = self.model.betas.device 75 | if x_T is None: 76 | img = torch.randn(size, device=device) 77 | else: 78 | img = x_T 79 | 80 | ns = NoiseScheduleVP('discrete', alphas_cumprod=self.alphas_cumprod) 81 | 82 | model_fn = model_wrapper( 83 | lambda x, t, c: self.model.apply_model(x, t, c), 84 | ns, 85 | model_type=MODEL_TYPES[self.model.parameterization], 86 | guidance_type="classifier-free", 87 | condition=conditioning, 88 | unconditional_condition=unconditional_conditioning, 89 | guidance_scale=unconditional_guidance_scale, 90 | ) 91 | 92 | dpm_solver = DPM_Solver(model_fn, ns, predict_x0=True, thresholding=False) 93 | x = dpm_solver.sample(img, steps=S, skip_type="time_uniform", method="multistep", order=2, 94 | lower_order_final=True) 95 | 96 | return x.to(device), None 97 | -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/models/diffusion/sampling_util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def append_dims(x, target_dims): 6 | """Appends dimensions to the end of a tensor until it has target_dims dimensions. 7 | From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py""" 8 | dims_to_append = target_dims - x.ndim 9 | if dims_to_append < 0: 10 | raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less') 11 | return x[(...,) + (None,) * dims_to_append] 12 | 13 | 14 | def norm_thresholding(x0, value): 15 | s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim) 16 | return x0 * (value / s) 17 | 18 | 19 | def spatial_norm_thresholding(x0, value): 20 | # b c h w 21 | s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value) 22 | return x0 * (value / s) -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/backend/headless/fcbh/ldm/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/diffusionmodules/upscaling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from functools import partial 5 | 6 | from .util import extract_into_tensor, make_beta_schedule 7 | from fcbh.ldm.util import default 8 | 9 | 10 | class AbstractLowScaleModel(nn.Module): 11 | # for concatenating a downsampled image to the latent representation 12 | def __init__(self, noise_schedule_config=None): 13 | super(AbstractLowScaleModel, self).__init__() 14 | if noise_schedule_config is not None: 15 | self.register_schedule(**noise_schedule_config) 16 | 17 | def register_schedule(self, beta_schedule="linear", timesteps=1000, 18 | linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): 19 | betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, 20 | cosine_s=cosine_s) 21 | alphas = 1. - betas 22 | alphas_cumprod = np.cumprod(alphas, axis=0) 23 | alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) 24 | 25 | timesteps, = betas.shape 26 | self.num_timesteps = int(timesteps) 27 | self.linear_start = linear_start 28 | self.linear_end = linear_end 29 | assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' 30 | 31 | to_torch = partial(torch.tensor, dtype=torch.float32) 32 | 33 | self.register_buffer('betas', to_torch(betas)) 34 | self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) 35 | self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) 36 | 37 | # calculations for diffusion q(x_t | x_{t-1}) and others 38 | self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) 39 | self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) 40 | self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) 41 | self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) 42 | self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) 43 | 44 | def q_sample(self, x_start, t, noise=None): 45 | noise = default(noise, lambda: torch.randn_like(x_start)) 46 | return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + 47 | extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) 48 | 49 | def forward(self, x): 50 | return x, None 51 | 52 | def decode(self, x): 53 | return x 54 | 55 | 56 | class SimpleImageConcat(AbstractLowScaleModel): 57 | # no noise level conditioning 58 | def __init__(self): 59 | super(SimpleImageConcat, self).__init__(noise_schedule_config=None) 60 | self.max_noise_level = 0 61 | 62 | def forward(self, x): 63 | # fix to constant noise level 64 | return x, torch.zeros(x.shape[0], device=x.device).long() 65 | 66 | 67 | class ImageConcatWithNoiseAugmentation(AbstractLowScaleModel): 68 | def __init__(self, noise_schedule_config, max_noise_level=1000, to_cuda=False): 69 | super().__init__(noise_schedule_config=noise_schedule_config) 70 | self.max_noise_level = max_noise_level 71 | 72 | def forward(self, x, noise_level=None): 73 | if noise_level is None: 74 | noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() 75 | else: 76 | assert isinstance(noise_level, torch.Tensor) 77 | z = self.q_sample(x, noise_level) 78 | return z, noise_level 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/backend/headless/fcbh/ldm/modules/distributions/__init__.py -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/distributions/distributions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class AbstractDistribution: 6 | def sample(self): 7 | raise NotImplementedError() 8 | 9 | def mode(self): 10 | raise NotImplementedError() 11 | 12 | 13 | class DiracDistribution(AbstractDistribution): 14 | def __init__(self, value): 15 | self.value = value 16 | 17 | def sample(self): 18 | return self.value 19 | 20 | def mode(self): 21 | return self.value 22 | 23 | 24 | class DiagonalGaussianDistribution(object): 25 | def __init__(self, parameters, deterministic=False): 26 | self.parameters = parameters 27 | self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) 28 | self.logvar = torch.clamp(self.logvar, -30.0, 20.0) 29 | self.deterministic = deterministic 30 | self.std = torch.exp(0.5 * self.logvar) 31 | self.var = torch.exp(self.logvar) 32 | if self.deterministic: 33 | self.var = self.std = torch.zeros_like(self.mean).to(device=self.parameters.device) 34 | 35 | def sample(self): 36 | x = self.mean + self.std * torch.randn(self.mean.shape).to(device=self.parameters.device) 37 | return x 38 | 39 | def kl(self, other=None): 40 | if self.deterministic: 41 | return torch.Tensor([0.]) 42 | else: 43 | if other is None: 44 | return 0.5 * torch.sum(torch.pow(self.mean, 2) 45 | + self.var - 1.0 - self.logvar, 46 | dim=[1, 2, 3]) 47 | else: 48 | return 0.5 * torch.sum( 49 | torch.pow(self.mean - other.mean, 2) / other.var 50 | + self.var / other.var - 1.0 - self.logvar + other.logvar, 51 | dim=[1, 2, 3]) 52 | 53 | def nll(self, sample, dims=[1,2,3]): 54 | if self.deterministic: 55 | return torch.Tensor([0.]) 56 | logtwopi = np.log(2.0 * np.pi) 57 | return 0.5 * torch.sum( 58 | logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, 59 | dim=dims) 60 | 61 | def mode(self): 62 | return self.mean 63 | 64 | 65 | def normal_kl(mean1, logvar1, mean2, logvar2): 66 | """ 67 | source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 68 | Compute the KL divergence between two gaussians. 69 | Shapes are automatically broadcasted, so batches can be compared to 70 | scalars, among other use cases. 71 | """ 72 | tensor = None 73 | for obj in (mean1, logvar1, mean2, logvar2): 74 | if isinstance(obj, torch.Tensor): 75 | tensor = obj 76 | break 77 | assert tensor is not None, "at least one argument must be a Tensor" 78 | 79 | # Force variances to be Tensors. Broadcasting helps convert scalars to 80 | # Tensors, but it does not work for torch.exp(). 81 | logvar1, logvar2 = [ 82 | x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) 83 | for x in (logvar1, logvar2) 84 | ] 85 | 86 | return 0.5 * ( 87 | -1.0 88 | + logvar2 89 | - logvar1 90 | + torch.exp(logvar1 - logvar2) 91 | + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) 92 | ) 93 | -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class LitEma(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_upates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError('Decay must be between 0 and 1') 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates 14 | else torch.tensor(-1, dtype=torch.int)) 15 | 16 | for name, p in model.named_parameters(): 17 | if p.requires_grad: 18 | # remove as '.'-character is not allowed in buffers 19 | s_name = name.replace('.', '') 20 | self.m_name2s_name.update({name: s_name}) 21 | self.register_buffer(s_name, p.clone().detach().data) 22 | 23 | self.collected_params = [] 24 | 25 | def reset_num_updates(self): 26 | del self.num_updates 27 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int)) 28 | 29 | def forward(self, model): 30 | decay = self.decay 31 | 32 | if self.num_updates >= 0: 33 | self.num_updates += 1 34 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) 35 | 36 | one_minus_decay = 1.0 - decay 37 | 38 | with torch.no_grad(): 39 | m_param = dict(model.named_parameters()) 40 | shadow_params = dict(self.named_buffers()) 41 | 42 | for key in m_param: 43 | if m_param[key].requires_grad: 44 | sname = self.m_name2s_name[key] 45 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 46 | shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key])) 47 | else: 48 | assert not key in self.m_name2s_name 49 | 50 | def copy_to(self, model): 51 | m_param = dict(model.named_parameters()) 52 | shadow_params = dict(self.named_buffers()) 53 | for key in m_param: 54 | if m_param[key].requires_grad: 55 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 56 | else: 57 | assert not key in self.m_name2s_name 58 | 59 | def store(self, parameters): 60 | """ 61 | Save the current parameters for restoring later. 62 | Args: 63 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 64 | temporarily stored. 65 | """ 66 | self.collected_params = [param.clone() for param in parameters] 67 | 68 | def restore(self, parameters): 69 | """ 70 | Restore the parameters stored with the `store` method. 71 | Useful to validate the model with EMA parameters without affecting the 72 | original optimization process. Store the parameters before the 73 | `copy_to` method. After validation (or model saving), use this to 74 | restore the former parameters. 75 | Args: 76 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 77 | updated with the stored parameters. 78 | """ 79 | for c_param, param in zip(self.collected_params, parameters): 80 | param.data.copy_(c_param.data) 81 | -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/backend/headless/fcbh/ldm/modules/encoders/__init__.py -------------------------------------------------------------------------------- /backend/headless/fcbh/ldm/modules/encoders/noise_aug_modules.py: -------------------------------------------------------------------------------- 1 | from ..diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation 2 | from ..diffusionmodules.openaimodel import Timestep 3 | import torch 4 | 5 | class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation): 6 | def __init__(self, *args, clip_stats_path=None, timestep_dim=256, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | if clip_stats_path is None: 9 | clip_mean, clip_std = torch.zeros(timestep_dim), torch.ones(timestep_dim) 10 | else: 11 | clip_mean, clip_std = torch.load(clip_stats_path, map_location="cpu") 12 | self.register_buffer("data_mean", clip_mean[None, :], persistent=False) 13 | self.register_buffer("data_std", clip_std[None, :], persistent=False) 14 | self.time_embed = Timestep(timestep_dim) 15 | 16 | def scale(self, x): 17 | # re-normalize to centered mean and unit variance 18 | x = (x - self.data_mean) * 1. / self.data_std 19 | return x 20 | 21 | def unscale(self, x): 22 | # back to original data stats 23 | x = (x * self.data_std) + self.data_mean 24 | return x 25 | 26 | def forward(self, x, noise_level=None): 27 | if noise_level is None: 28 | noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() 29 | else: 30 | assert isinstance(noise_level, torch.Tensor) 31 | x = self.scale(x) 32 | z = self.q_sample(x, noise_level) 33 | z = self.unscale(z) 34 | noise_level = self.time_embed(noise_level) 35 | return z, noise_level 36 | -------------------------------------------------------------------------------- /backend/headless/fcbh/ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from contextlib import contextmanager 3 | 4 | class Linear(torch.nn.Module): 5 | def __init__(self, in_features: int, out_features: int, bias: bool = True, 6 | device=None, dtype=None) -> None: 7 | factory_kwargs = {'device': device, 'dtype': dtype} 8 | super().__init__() 9 | self.in_features = in_features 10 | self.out_features = out_features 11 | self.weight = torch.nn.Parameter(torch.empty((out_features, in_features), **factory_kwargs)) 12 | if bias: 13 | self.bias = torch.nn.Parameter(torch.empty(out_features, **factory_kwargs)) 14 | else: 15 | self.register_parameter('bias', None) 16 | 17 | def forward(self, input): 18 | return torch.nn.functional.linear(input, self.weight, self.bias) 19 | 20 | class Conv2d(torch.nn.Conv2d): 21 | def reset_parameters(self): 22 | return None 23 | 24 | def conv_nd(dims, *args, **kwargs): 25 | if dims == 2: 26 | return Conv2d(*args, **kwargs) 27 | else: 28 | raise ValueError(f"unsupported dimensions: {dims}") 29 | 30 | @contextmanager 31 | def use_fcbh_ops(device=None, dtype=None): # Kind of an ugly hack but I can't think of a better way 32 | old_torch_nn_linear = torch.nn.Linear 33 | force_device = device 34 | force_dtype = dtype 35 | def linear_with_dtype(in_features: int, out_features: int, bias: bool = True, device=None, dtype=None): 36 | if force_device is not None: 37 | device = force_device 38 | if force_dtype is not None: 39 | dtype = force_dtype 40 | return Linear(in_features, out_features, bias=bias, device=device, dtype=dtype) 41 | 42 | torch.nn.Linear = linear_with_dtype 43 | try: 44 | yield 45 | finally: 46 | torch.nn.Linear = old_torch_nn_linear 47 | -------------------------------------------------------------------------------- /backend/headless/fcbh/options.py: -------------------------------------------------------------------------------- 1 | 2 | args_parsing = False 3 | 4 | def enable_args_parsing(enable=True): 5 | global args_parsing 6 | args_parsing = enable 7 | -------------------------------------------------------------------------------- /backend/headless/fcbh/sd1_clip_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "openai/clip-vit-large-patch14", 3 | "architectures": [ 4 | "CLIPTextModel" 5 | ], 6 | "attention_dropout": 0.0, 7 | "bos_token_id": 0, 8 | "dropout": 0.0, 9 | "eos_token_id": 2, 10 | "hidden_act": "quick_gelu", 11 | "hidden_size": 768, 12 | "initializer_factor": 1.0, 13 | "initializer_range": 0.02, 14 | "intermediate_size": 3072, 15 | "layer_norm_eps": 1e-05, 16 | "max_position_embeddings": 77, 17 | "model_type": "clip_text_model", 18 | "num_attention_heads": 12, 19 | "num_hidden_layers": 12, 20 | "pad_token_id": 1, 21 | "projection_dim": 768, 22 | "torch_dtype": "float32", 23 | "transformers_version": "4.24.0", 24 | "vocab_size": 49408 25 | } 26 | -------------------------------------------------------------------------------- /backend/headless/fcbh/sd1_tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": { 3 | "content": "<|startoftext|>", 4 | "lstrip": false, 5 | "normalized": true, 6 | "rstrip": false, 7 | "single_word": false 8 | }, 9 | "eos_token": { 10 | "content": "<|endoftext|>", 11 | "lstrip": false, 12 | "normalized": true, 13 | "rstrip": false, 14 | "single_word": false 15 | }, 16 | "pad_token": "<|endoftext|>", 17 | "unk_token": { 18 | "content": "<|endoftext|>", 19 | "lstrip": false, 20 | "normalized": true, 21 | "rstrip": false, 22 | "single_word": false 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /backend/headless/fcbh/sd1_tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_prefix_space": false, 3 | "bos_token": { 4 | "__type": "AddedToken", 5 | "content": "<|startoftext|>", 6 | "lstrip": false, 7 | "normalized": true, 8 | "rstrip": false, 9 | "single_word": false 10 | }, 11 | "do_lower_case": true, 12 | "eos_token": { 13 | "__type": "AddedToken", 14 | "content": "<|endoftext|>", 15 | "lstrip": false, 16 | "normalized": true, 17 | "rstrip": false, 18 | "single_word": false 19 | }, 20 | "errors": "replace", 21 | "model_max_length": 77, 22 | "name_or_path": "openai/clip-vit-large-patch14", 23 | "pad_token": "<|endoftext|>", 24 | "special_tokens_map_file": "./special_tokens_map.json", 25 | "tokenizer_class": "CLIPTokenizer", 26 | "unk_token": { 27 | "__type": "AddedToken", 28 | "content": "<|endoftext|>", 29 | "lstrip": false, 30 | "normalized": true, 31 | "rstrip": false, 32 | "single_word": false 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /backend/headless/fcbh/sd2_clip.py: -------------------------------------------------------------------------------- 1 | from fcbh import sd1_clip 2 | import torch 3 | import os 4 | 5 | class SD2ClipModel(sd1_clip.SD1ClipModel): 6 | def __init__(self, arch="ViT-H-14", device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, textmodel_path=None, dtype=None): 7 | if layer == "penultimate": 8 | layer="hidden" 9 | layer_idx=23 10 | 11 | textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd2_clip_config.json") 12 | super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, textmodel_path=textmodel_path, dtype=dtype) 13 | self.empty_tokens = [[49406] + [49407] + [0] * 75] 14 | 15 | class SD2Tokenizer(sd1_clip.SD1Tokenizer): 16 | def __init__(self, tokenizer_path=None, embedding_directory=None): 17 | super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1024) 18 | -------------------------------------------------------------------------------- /backend/headless/fcbh/sd2_clip_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "architectures": [ 3 | "CLIPTextModel" 4 | ], 5 | "attention_dropout": 0.0, 6 | "bos_token_id": 0, 7 | "dropout": 0.0, 8 | "eos_token_id": 2, 9 | "hidden_act": "gelu", 10 | "hidden_size": 1024, 11 | "initializer_factor": 1.0, 12 | "initializer_range": 0.02, 13 | "intermediate_size": 4096, 14 | "layer_norm_eps": 1e-05, 15 | "max_position_embeddings": 77, 16 | "model_type": "clip_text_model", 17 | "num_attention_heads": 16, 18 | "num_hidden_layers": 24, 19 | "pad_token_id": 1, 20 | "projection_dim": 1024, 21 | "torch_dtype": "float32", 22 | "vocab_size": 49408 23 | } 24 | -------------------------------------------------------------------------------- /backend/headless/fcbh/sdxl_clip.py: -------------------------------------------------------------------------------- 1 | from fcbh import sd1_clip 2 | import torch 3 | import os 4 | 5 | class SDXLClipG(sd1_clip.SD1ClipModel): 6 | def __init__(self, device="cpu", max_length=77, freeze=True, layer="penultimate", layer_idx=None, textmodel_path=None, dtype=None): 7 | if layer == "penultimate": 8 | layer="hidden" 9 | layer_idx=-2 10 | 11 | textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_config_bigg.json") 12 | super().__init__(device=device, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, textmodel_path=textmodel_path, dtype=dtype) 13 | self.empty_tokens = [[49406] + [49407] + [0] * 75] 14 | self.layer_norm_hidden_state = False 15 | 16 | def load_sd(self, sd): 17 | return super().load_sd(sd) 18 | 19 | class SDXLClipGTokenizer(sd1_clip.SD1Tokenizer): 20 | def __init__(self, tokenizer_path=None, embedding_directory=None): 21 | super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1280, embedding_key='clip_g') 22 | 23 | 24 | class SDXLTokenizer(sd1_clip.SD1Tokenizer): 25 | def __init__(self, embedding_directory=None): 26 | self.clip_l = sd1_clip.SD1Tokenizer(embedding_directory=embedding_directory) 27 | self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory) 28 | 29 | def tokenize_with_weights(self, text:str, return_word_ids=False): 30 | out = {} 31 | out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) 32 | out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) 33 | return out 34 | 35 | def untokenize(self, token_weight_pair): 36 | return self.clip_g.untokenize(token_weight_pair) 37 | 38 | class SDXLClipModel(torch.nn.Module): 39 | def __init__(self, device="cpu", dtype=None): 40 | super().__init__() 41 | self.clip_l = sd1_clip.SD1ClipModel(layer="hidden", layer_idx=11, device=device, dtype=dtype) 42 | self.clip_l.layer_norm_hidden_state = False 43 | self.clip_g = SDXLClipG(device=device, dtype=dtype) 44 | 45 | def clip_layer(self, layer_idx): 46 | self.clip_l.clip_layer(layer_idx) 47 | self.clip_g.clip_layer(layer_idx) 48 | 49 | def reset_clip_layer(self): 50 | self.clip_g.reset_clip_layer() 51 | self.clip_l.reset_clip_layer() 52 | 53 | def encode_token_weights(self, token_weight_pairs): 54 | token_weight_pairs_g = token_weight_pairs["g"] 55 | token_weight_pairs_l = token_weight_pairs["l"] 56 | g_out, g_pooled = self.clip_g.encode_token_weights(token_weight_pairs_g) 57 | l_out, l_pooled = self.clip_l.encode_token_weights(token_weight_pairs_l) 58 | return torch.cat([l_out, g_out], dim=-1), g_pooled 59 | 60 | def load_sd(self, sd): 61 | if "text_model.encoder.layers.30.mlp.fc1.weight" in sd: 62 | return self.clip_g.load_sd(sd) 63 | else: 64 | return self.clip_l.load_sd(sd) 65 | 66 | class SDXLRefinerClipModel(torch.nn.Module): 67 | def __init__(self, device="cpu", dtype=None): 68 | super().__init__() 69 | self.clip_g = SDXLClipG(device=device, dtype=dtype) 70 | 71 | def clip_layer(self, layer_idx): 72 | self.clip_g.clip_layer(layer_idx) 73 | 74 | def reset_clip_layer(self): 75 | self.clip_g.reset_clip_layer() 76 | 77 | def encode_token_weights(self, token_weight_pairs): 78 | token_weight_pairs_g = token_weight_pairs["g"] 79 | g_out, g_pooled = self.clip_g.encode_token_weights(token_weight_pairs_g) 80 | return g_out, g_pooled 81 | 82 | def load_sd(self, sd): 83 | return self.clip_g.load_sd(sd) 84 | -------------------------------------------------------------------------------- /backend/headless/fcbh/supported_models_base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from . import model_base 3 | from . import utils 4 | from . import latent_formats 5 | 6 | class ClipTarget: 7 | def __init__(self, tokenizer, clip): 8 | self.clip = clip 9 | self.tokenizer = tokenizer 10 | self.params = {} 11 | 12 | class BASE: 13 | unet_config = {} 14 | unet_extra_config = { 15 | "num_heads": -1, 16 | "num_head_channels": 64, 17 | } 18 | 19 | clip_prefix = [] 20 | clip_vision_prefix = None 21 | noise_aug_config = None 22 | beta_schedule = "linear" 23 | latent_format = latent_formats.LatentFormat 24 | 25 | @classmethod 26 | def matches(s, unet_config): 27 | for k in s.unet_config: 28 | if s.unet_config[k] != unet_config[k]: 29 | return False 30 | return True 31 | 32 | def model_type(self, state_dict, prefix=""): 33 | return model_base.ModelType.EPS 34 | 35 | def inpaint_model(self): 36 | return self.unet_config["in_channels"] > 4 37 | 38 | def __init__(self, unet_config): 39 | self.unet_config = unet_config 40 | self.latent_format = self.latent_format() 41 | for x in self.unet_extra_config: 42 | self.unet_config[x] = self.unet_extra_config[x] 43 | 44 | def get_model(self, state_dict, prefix="", device=None): 45 | if self.noise_aug_config is not None: 46 | out = model_base.SD21UNCLIP(self, self.noise_aug_config, model_type=self.model_type(state_dict, prefix), device=device) 47 | else: 48 | out = model_base.BaseModel(self, model_type=self.model_type(state_dict, prefix), device=device) 49 | if self.inpaint_model(): 50 | out.set_inpaint() 51 | return out 52 | 53 | def process_clip_state_dict(self, state_dict): 54 | return state_dict 55 | 56 | def process_clip_state_dict_for_saving(self, state_dict): 57 | replace_prefix = {"": "cond_stage_model."} 58 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 59 | 60 | def process_unet_state_dict_for_saving(self, state_dict): 61 | replace_prefix = {"": "model.diffusion_model."} 62 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 63 | 64 | def process_vae_state_dict_for_saving(self, state_dict): 65 | replace_prefix = {"": "first_stage_model."} 66 | return utils.state_dict_prefix_replace(state_dict, replace_prefix) 67 | 68 | -------------------------------------------------------------------------------- /backend/headless/fcbh/taesd/taesd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Tiny AutoEncoder for Stable Diffusion 4 | (DNN for encoding / decoding SD's latent space) 5 | """ 6 | import torch 7 | import torch.nn as nn 8 | 9 | import fcbh.utils 10 | 11 | def conv(n_in, n_out, **kwargs): 12 | return nn.Conv2d(n_in, n_out, 3, padding=1, **kwargs) 13 | 14 | class Clamp(nn.Module): 15 | def forward(self, x): 16 | return torch.tanh(x / 3) * 3 17 | 18 | class Block(nn.Module): 19 | def __init__(self, n_in, n_out): 20 | super().__init__() 21 | self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out)) 22 | self.skip = nn.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity() 23 | self.fuse = nn.ReLU() 24 | def forward(self, x): 25 | return self.fuse(self.conv(x) + self.skip(x)) 26 | 27 | def Encoder(): 28 | return nn.Sequential( 29 | conv(3, 64), Block(64, 64), 30 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 31 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 32 | conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), 33 | conv(64, 4), 34 | ) 35 | 36 | def Decoder(): 37 | return nn.Sequential( 38 | Clamp(), conv(4, 64), nn.ReLU(), 39 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 40 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 41 | Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), 42 | Block(64, 64), conv(64, 3), 43 | ) 44 | 45 | class TAESD(nn.Module): 46 | latent_magnitude = 3 47 | latent_shift = 0.5 48 | 49 | def __init__(self, encoder_path="taesd_encoder.pth", decoder_path="taesd_decoder.pth"): 50 | """Initialize pretrained TAESD on the given device from the given checkpoints.""" 51 | super().__init__() 52 | self.encoder = Encoder() 53 | self.decoder = Decoder() 54 | if encoder_path is not None: 55 | self.encoder.load_state_dict(fcbh.utils.load_torch_file(encoder_path, safe_load=True)) 56 | if decoder_path is not None: 57 | self.decoder.load_state_dict(fcbh.utils.load_torch_file(decoder_path, safe_load=True)) 58 | 59 | @staticmethod 60 | def scale_latents(x): 61 | """raw latents -> [0, 1]""" 62 | return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1) 63 | 64 | @staticmethod 65 | def unscale_latents(x): 66 | """[0, 1] -> raw latents""" 67 | return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude) 68 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/backend/headless/fcbh_extras/chainner_models/__init__.py -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/LICENSE-HAT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Xiangyu Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/LICENSE-RealESRGAN: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, Xintao Wang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/OmniSR/ChannelAttention.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch.nn as nn 4 | 5 | 6 | class CA_layer(nn.Module): 7 | def __init__(self, channel, reduction=16): 8 | super(CA_layer, self).__init__() 9 | # global average pooling 10 | self.gap = nn.AdaptiveAvgPool2d(1) 11 | self.fc = nn.Sequential( 12 | nn.Conv2d(channel, channel // reduction, kernel_size=(1, 1), bias=False), 13 | nn.GELU(), 14 | nn.Conv2d(channel // reduction, channel, kernel_size=(1, 1), bias=False), 15 | # nn.Sigmoid() 16 | ) 17 | 18 | def forward(self, x): 19 | y = self.fc(self.gap(x)) 20 | return x * y.expand_as(x) 21 | 22 | 23 | class Simple_CA_layer(nn.Module): 24 | def __init__(self, channel): 25 | super(Simple_CA_layer, self).__init__() 26 | self.gap = nn.AdaptiveAvgPool2d(1) 27 | self.fc = nn.Conv2d( 28 | in_channels=channel, 29 | out_channels=channel, 30 | kernel_size=1, 31 | padding=0, 32 | stride=1, 33 | groups=1, 34 | bias=True, 35 | ) 36 | 37 | def forward(self, x): 38 | return x * self.fc(self.gap(x)) 39 | 40 | 41 | class ECA_layer(nn.Module): 42 | """Constructs a ECA module. 43 | Args: 44 | channel: Number of channels of the input feature map 45 | k_size: Adaptive selection of kernel size 46 | """ 47 | 48 | def __init__(self, channel): 49 | super(ECA_layer, self).__init__() 50 | 51 | b = 1 52 | gamma = 2 53 | k_size = int(abs(math.log(channel, 2) + b) / gamma) 54 | k_size = k_size if k_size % 2 else k_size + 1 55 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 56 | self.conv = nn.Conv1d( 57 | 1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False 58 | ) 59 | # self.sigmoid = nn.Sigmoid() 60 | 61 | def forward(self, x): 62 | # x: input features with shape [b, c, h, w] 63 | # b, c, h, w = x.size() 64 | 65 | # feature descriptor on the global spatial information 66 | y = self.avg_pool(x) 67 | 68 | # Two different branches of ECA module 69 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 70 | 71 | # Multi-scale information fusion 72 | # y = self.sigmoid(y) 73 | 74 | return x * y.expand_as(x) 75 | 76 | 77 | class ECA_MaxPool_layer(nn.Module): 78 | """Constructs a ECA module. 79 | Args: 80 | channel: Number of channels of the input feature map 81 | k_size: Adaptive selection of kernel size 82 | """ 83 | 84 | def __init__(self, channel): 85 | super(ECA_MaxPool_layer, self).__init__() 86 | 87 | b = 1 88 | gamma = 2 89 | k_size = int(abs(math.log(channel, 2) + b) / gamma) 90 | k_size = k_size if k_size % 2 else k_size + 1 91 | self.max_pool = nn.AdaptiveMaxPool2d(1) 92 | self.conv = nn.Conv1d( 93 | 1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False 94 | ) 95 | # self.sigmoid = nn.Sigmoid() 96 | 97 | def forward(self, x): 98 | # x: input features with shape [b, c, h, w] 99 | # b, c, h, w = x.size() 100 | 101 | # feature descriptor on the global spatial information 102 | y = self.max_pool(x) 103 | 104 | # Two different branches of ECA module 105 | y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1) 106 | 107 | # Multi-scale information fusion 108 | # y = self.sigmoid(y) 109 | 110 | return x * y.expand_as(x) 111 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/OmniSR/OSAG.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: OSAG.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Sunday, 23rd April 2023 3:08:49 pm 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | 14 | import torch.nn as nn 15 | 16 | from .esa import ESA 17 | from .OSA import OSA_Block 18 | 19 | 20 | class OSAG(nn.Module): 21 | def __init__( 22 | self, 23 | channel_num=64, 24 | bias=True, 25 | block_num=4, 26 | ffn_bias=False, 27 | window_size=0, 28 | pe=False, 29 | ): 30 | super(OSAG, self).__init__() 31 | 32 | # print("window_size: %d" % (window_size)) 33 | # print("with_pe", pe) 34 | # print("ffn_bias: %d" % (ffn_bias)) 35 | 36 | # block_script_name = kwargs.get("block_script_name", "OSA") 37 | # block_class_name = kwargs.get("block_class_name", "OSA_Block") 38 | 39 | # script_name = "." + block_script_name 40 | # package = __import__(script_name, fromlist=True) 41 | block_class = OSA_Block # getattr(package, block_class_name) 42 | group_list = [] 43 | for _ in range(block_num): 44 | temp_res = block_class( 45 | channel_num, 46 | bias, 47 | ffn_bias=ffn_bias, 48 | window_size=window_size, 49 | with_pe=pe, 50 | ) 51 | group_list.append(temp_res) 52 | group_list.append(nn.Conv2d(channel_num, channel_num, 1, 1, 0, bias=bias)) 53 | self.residual_layer = nn.Sequential(*group_list) 54 | esa_channel = max(channel_num // 4, 16) 55 | self.esa = ESA(esa_channel, channel_num) 56 | 57 | def forward(self, x): 58 | out = self.residual_layer(x) 59 | out = out + x 60 | return self.esa(out) 61 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/OmniSR/layernorm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: layernorm.py 5 | # Created Date: Tuesday April 28th 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Thursday, 20th April 2023 9:28:20 am 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2020 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | 17 | class LayerNormFunction(torch.autograd.Function): 18 | @staticmethod 19 | def forward(ctx, x, weight, bias, eps): 20 | ctx.eps = eps 21 | N, C, H, W = x.size() 22 | mu = x.mean(1, keepdim=True) 23 | var = (x - mu).pow(2).mean(1, keepdim=True) 24 | y = (x - mu) / (var + eps).sqrt() 25 | ctx.save_for_backward(y, var, weight) 26 | y = weight.view(1, C, 1, 1) * y + bias.view(1, C, 1, 1) 27 | return y 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | eps = ctx.eps 32 | 33 | N, C, H, W = grad_output.size() 34 | y, var, weight = ctx.saved_variables 35 | g = grad_output * weight.view(1, C, 1, 1) 36 | mean_g = g.mean(dim=1, keepdim=True) 37 | 38 | mean_gy = (g * y).mean(dim=1, keepdim=True) 39 | gx = 1.0 / torch.sqrt(var + eps) * (g - y * mean_gy - mean_g) 40 | return ( 41 | gx, 42 | (grad_output * y).sum(dim=3).sum(dim=2).sum(dim=0), 43 | grad_output.sum(dim=3).sum(dim=2).sum(dim=0), 44 | None, 45 | ) 46 | 47 | 48 | class LayerNorm2d(nn.Module): 49 | def __init__(self, channels, eps=1e-6): 50 | super(LayerNorm2d, self).__init__() 51 | self.register_parameter("weight", nn.Parameter(torch.ones(channels))) 52 | self.register_parameter("bias", nn.Parameter(torch.zeros(channels))) 53 | self.eps = eps 54 | 55 | def forward(self, x): 56 | return LayerNormFunction.apply(x, self.weight, self.bias, self.eps) 57 | 58 | 59 | class GRN(nn.Module): 60 | """GRN (Global Response Normalization) layer""" 61 | 62 | def __init__(self, dim): 63 | super().__init__() 64 | self.gamma = nn.Parameter(torch.zeros(1, dim, 1, 1)) 65 | self.beta = nn.Parameter(torch.zeros(1, dim, 1, 1)) 66 | 67 | def forward(self, x): 68 | Gx = torch.norm(x, p=2, dim=(2, 3), keepdim=True) 69 | Nx = Gx / (Gx.mean(dim=1, keepdim=True) + 1e-6) 70 | return self.gamma * (x * Nx) + self.beta + x 71 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/OmniSR/pixelshuffle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | ############################################################# 4 | # File: pixelshuffle.py 5 | # Created Date: Friday July 1st 2022 6 | # Author: Chen Xuanhong 7 | # Email: chenxuanhongzju@outlook.com 8 | # Last Modified: Friday, 1st July 2022 10:18:39 am 9 | # Modified By: Chen Xuanhong 10 | # Copyright (c) 2022 Shanghai Jiao Tong University 11 | ############################################################# 12 | 13 | import torch.nn as nn 14 | 15 | 16 | def pixelshuffle_block( 17 | in_channels, out_channels, upscale_factor=2, kernel_size=3, bias=False 18 | ): 19 | """ 20 | Upsample features according to `upscale_factor`. 21 | """ 22 | padding = kernel_size // 2 23 | conv = nn.Conv2d( 24 | in_channels, 25 | out_channels * (upscale_factor**2), 26 | kernel_size, 27 | padding=1, 28 | bias=bias, 29 | ) 30 | pixel_shuffle = nn.PixelShuffle(upscale_factor) 31 | return nn.Sequential(*[conv, pixel_shuffle]) 32 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/backend/headless/fcbh_extras/chainner_models/architecture/__init__.py -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/face/LICENSE-codeformer: -------------------------------------------------------------------------------- 1 | S-Lab License 1.0 2 | 3 | Copyright 2022 S-Lab 4 | 5 | Redistribution and use for non-commercial purpose in source and 6 | binary forms, with or without modification, are permitted provided 7 | that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in 14 | the documentation and/or other materials provided with the 15 | distribution. 16 | 17 | 3. Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived 19 | from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | In the event that redistribution and/or use for commercial purpose in 34 | source or binary forms, with or without modification is required, 35 | please contact the contributor(s) of the work. 36 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/face/fused_act.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | # type: ignore 3 | # modify from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501 4 | 5 | import torch 6 | from torch import nn 7 | from torch.autograd import Function 8 | 9 | fused_act_ext = None 10 | 11 | 12 | class FusedLeakyReLUFunctionBackward(Function): 13 | @staticmethod 14 | def forward(ctx, grad_output, out, negative_slope, scale): 15 | ctx.save_for_backward(out) 16 | ctx.negative_slope = negative_slope 17 | ctx.scale = scale 18 | 19 | empty = grad_output.new_empty(0) 20 | 21 | grad_input = fused_act_ext.fused_bias_act( 22 | grad_output, empty, out, 3, 1, negative_slope, scale 23 | ) 24 | 25 | dim = [0] 26 | 27 | if grad_input.ndim > 2: 28 | dim += list(range(2, grad_input.ndim)) 29 | 30 | grad_bias = grad_input.sum(dim).detach() 31 | 32 | return grad_input, grad_bias 33 | 34 | @staticmethod 35 | def backward(ctx, gradgrad_input, gradgrad_bias): 36 | (out,) = ctx.saved_tensors 37 | gradgrad_out = fused_act_ext.fused_bias_act( 38 | gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale 39 | ) 40 | 41 | return gradgrad_out, None, None, None 42 | 43 | 44 | class FusedLeakyReLUFunction(Function): 45 | @staticmethod 46 | def forward(ctx, input, bias, negative_slope, scale): 47 | empty = input.new_empty(0) 48 | out = fused_act_ext.fused_bias_act( 49 | input, bias, empty, 3, 0, negative_slope, scale 50 | ) 51 | ctx.save_for_backward(out) 52 | ctx.negative_slope = negative_slope 53 | ctx.scale = scale 54 | 55 | return out 56 | 57 | @staticmethod 58 | def backward(ctx, grad_output): 59 | (out,) = ctx.saved_tensors 60 | 61 | grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply( 62 | grad_output, out, ctx.negative_slope, ctx.scale 63 | ) 64 | 65 | return grad_input, grad_bias, None, None 66 | 67 | 68 | class FusedLeakyReLU(nn.Module): 69 | def __init__(self, channel, negative_slope=0.2, scale=2**0.5): 70 | super().__init__() 71 | 72 | self.bias = nn.Parameter(torch.zeros(channel)) 73 | self.negative_slope = negative_slope 74 | self.scale = scale 75 | 76 | def forward(self, input): 77 | return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale) 78 | 79 | 80 | def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2**0.5): 81 | return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale) 82 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/architecture/timm/helpers.py: -------------------------------------------------------------------------------- 1 | """ Layer/Module Helpers 2 | Hacked together by / Copyright 2020 Ross Wightman 3 | """ 4 | import collections.abc 5 | from itertools import repeat 6 | 7 | 8 | # From PyTorch internals 9 | def _ntuple(n): 10 | def parse(x): 11 | if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): 12 | return x 13 | return tuple(repeat(x, n)) 14 | 15 | return parse 16 | 17 | 18 | to_1tuple = _ntuple(1) 19 | to_2tuple = _ntuple(2) 20 | to_3tuple = _ntuple(3) 21 | to_4tuple = _ntuple(4) 22 | to_ntuple = _ntuple 23 | 24 | 25 | def make_divisible(v, divisor=8, min_value=None, round_limit=0.9): 26 | min_value = min_value or divisor 27 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 28 | # Make sure that round down does not go down by more than 10%. 29 | if new_v < round_limit * v: 30 | new_v += divisor 31 | return new_v 32 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/model_loading.py: -------------------------------------------------------------------------------- 1 | import logging as logger 2 | 3 | from .architecture.DAT import DAT 4 | from .architecture.face.codeformer import CodeFormer 5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean 6 | from .architecture.face.restoreformer_arch import RestoreFormer 7 | from .architecture.HAT import HAT 8 | from .architecture.LaMa import LaMa 9 | from .architecture.OmniSR.OmniSR import OmniSR 10 | from .architecture.RRDB import RRDBNet as ESRGAN 11 | from .architecture.SCUNet import SCUNet 12 | from .architecture.SPSR import SPSRNet as SPSR 13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2 14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN 15 | from .architecture.Swin2SR import Swin2SR 16 | from .architecture.SwinIR import SwinIR 17 | from .types import PyTorchModel 18 | 19 | 20 | class UnsupportedModel(Exception): 21 | pass 22 | 23 | 24 | def load_state_dict(state_dict) -> PyTorchModel: 25 | logger.debug(f"Loading state dict into pytorch model arch") 26 | 27 | state_dict_keys = list(state_dict.keys()) 28 | 29 | if "params_ema" in state_dict_keys: 30 | state_dict = state_dict["params_ema"] 31 | elif "params-ema" in state_dict_keys: 32 | state_dict = state_dict["params-ema"] 33 | elif "params" in state_dict_keys: 34 | state_dict = state_dict["params"] 35 | 36 | state_dict_keys = list(state_dict.keys()) 37 | # SRVGGNet Real-ESRGAN (v2) 38 | if "body.0.weight" in state_dict_keys and "body.1.weight" in state_dict_keys: 39 | model = RealESRGANv2(state_dict) 40 | # SPSR (ESRGAN with lots of extra layers) 41 | elif "f_HR_conv1.0.weight" in state_dict: 42 | model = SPSR(state_dict) 43 | # Swift-SRGAN 44 | elif ( 45 | "model" in state_dict_keys 46 | and "initial.cnn.depthwise.weight" in state_dict["model"].keys() 47 | ): 48 | model = SwiftSRGAN(state_dict) 49 | # SwinIR, Swin2SR, HAT 50 | elif "layers.0.residual_group.blocks.0.norm1.weight" in state_dict_keys: 51 | if ( 52 | "layers.0.residual_group.blocks.0.conv_block.cab.0.weight" 53 | in state_dict_keys 54 | ): 55 | model = HAT(state_dict) 56 | elif "patch_embed.proj.weight" in state_dict_keys: 57 | model = Swin2SR(state_dict) 58 | else: 59 | model = SwinIR(state_dict) 60 | # GFPGAN 61 | elif ( 62 | "toRGB.0.weight" in state_dict_keys 63 | and "stylegan_decoder.style_mlp.1.weight" in state_dict_keys 64 | ): 65 | model = GFPGANv1Clean(state_dict) 66 | # RestoreFormer 67 | elif ( 68 | "encoder.conv_in.weight" in state_dict_keys 69 | and "encoder.down.0.block.0.norm1.weight" in state_dict_keys 70 | ): 71 | model = RestoreFormer(state_dict) 72 | elif ( 73 | "encoder.blocks.0.weight" in state_dict_keys 74 | and "quantize.embedding.weight" in state_dict_keys 75 | ): 76 | model = CodeFormer(state_dict) 77 | # LaMa 78 | elif ( 79 | "model.model.1.bn_l.running_mean" in state_dict_keys 80 | or "generator.model.1.bn_l.running_mean" in state_dict_keys 81 | ): 82 | model = LaMa(state_dict) 83 | # Omni-SR 84 | elif "residual_layer.0.residual_layer.0.layer.0.fn.0.weight" in state_dict_keys: 85 | model = OmniSR(state_dict) 86 | # SCUNet 87 | elif "m_head.0.weight" in state_dict_keys and "m_tail.0.weight" in state_dict_keys: 88 | model = SCUNet(state_dict) 89 | # DAT 90 | elif "layers.0.blocks.2.attn.attn_mask_0" in state_dict_keys: 91 | model = DAT(state_dict) 92 | # Regular ESRGAN, "new-arch" ESRGAN, Real-ESRGAN v1 93 | else: 94 | try: 95 | model = ESRGAN(state_dict) 96 | except: 97 | # pylint: disable=raise-missing-from 98 | raise UnsupportedModel 99 | return model 100 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/chainner_models/types.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from .architecture.DAT import DAT 4 | from .architecture.face.codeformer import CodeFormer 5 | from .architecture.face.gfpganv1_clean_arch import GFPGANv1Clean 6 | from .architecture.face.restoreformer_arch import RestoreFormer 7 | from .architecture.HAT import HAT 8 | from .architecture.LaMa import LaMa 9 | from .architecture.OmniSR.OmniSR import OmniSR 10 | from .architecture.RRDB import RRDBNet as ESRGAN 11 | from .architecture.SCUNet import SCUNet 12 | from .architecture.SPSR import SPSRNet as SPSR 13 | from .architecture.SRVGG import SRVGGNetCompact as RealESRGANv2 14 | from .architecture.SwiftSRGAN import Generator as SwiftSRGAN 15 | from .architecture.Swin2SR import Swin2SR 16 | from .architecture.SwinIR import SwinIR 17 | 18 | PyTorchSRModels = ( 19 | RealESRGANv2, 20 | SPSR, 21 | SwiftSRGAN, 22 | ESRGAN, 23 | SwinIR, 24 | Swin2SR, 25 | HAT, 26 | OmniSR, 27 | SCUNet, 28 | DAT, 29 | ) 30 | PyTorchSRModel = Union[ 31 | RealESRGANv2, 32 | SPSR, 33 | SwiftSRGAN, 34 | ESRGAN, 35 | SwinIR, 36 | Swin2SR, 37 | HAT, 38 | OmniSR, 39 | SCUNet, 40 | DAT, 41 | ] 42 | 43 | 44 | def is_pytorch_sr_model(model: object): 45 | return isinstance(model, PyTorchSRModels) 46 | 47 | 48 | PyTorchFaceModels = (GFPGANv1Clean, RestoreFormer, CodeFormer) 49 | PyTorchFaceModel = Union[GFPGANv1Clean, RestoreFormer, CodeFormer] 50 | 51 | 52 | def is_pytorch_face_model(model: object): 53 | return isinstance(model, PyTorchFaceModels) 54 | 55 | 56 | PyTorchInpaintModels = (LaMa,) 57 | PyTorchInpaintModel = Union[LaMa] 58 | 59 | 60 | def is_pytorch_inpaint_model(model: object): 61 | return isinstance(model, PyTorchInpaintModels) 62 | 63 | 64 | PyTorchModels = (*PyTorchSRModels, *PyTorchFaceModels, *PyTorchInpaintModels) 65 | PyTorchModel = Union[PyTorchSRModel, PyTorchFaceModel, PyTorchInpaintModel] 66 | 67 | 68 | def is_pytorch_model(model: object): 69 | return isinstance(model, PyTorchModels) 70 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/nodes_clip_sdxl.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from nodes import MAX_RESOLUTION 3 | 4 | class CLIPTextEncodeSDXLRefiner: 5 | @classmethod 6 | def INPUT_TYPES(s): 7 | return {"required": { 8 | "ascore": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 1000.0, "step": 0.01}), 9 | "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 10 | "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 11 | "text": ("STRING", {"multiline": True}), "clip": ("CLIP", ), 12 | }} 13 | RETURN_TYPES = ("CONDITIONING",) 14 | FUNCTION = "encode" 15 | 16 | CATEGORY = "advanced/conditioning" 17 | 18 | def encode(self, clip, ascore, width, height, text): 19 | tokens = clip.tokenize(text) 20 | cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) 21 | return ([[cond, {"pooled_output": pooled, "aesthetic_score": ascore, "width": width,"height": height}]], ) 22 | 23 | class CLIPTextEncodeSDXL: 24 | @classmethod 25 | def INPUT_TYPES(s): 26 | return {"required": { 27 | "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 28 | "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 29 | "crop_w": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), 30 | "crop_h": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), 31 | "target_width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 32 | "target_height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), 33 | "text_g": ("STRING", {"multiline": True, "default": "CLIP_G"}), "clip": ("CLIP", ), 34 | "text_l": ("STRING", {"multiline": True, "default": "CLIP_L"}), "clip": ("CLIP", ), 35 | }} 36 | RETURN_TYPES = ("CONDITIONING",) 37 | FUNCTION = "encode" 38 | 39 | CATEGORY = "advanced/conditioning" 40 | 41 | def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l): 42 | tokens = clip.tokenize(text_g) 43 | tokens["l"] = clip.tokenize(text_l)["l"] 44 | if len(tokens["l"]) != len(tokens["g"]): 45 | empty = clip.tokenize("") 46 | while len(tokens["l"]) < len(tokens["g"]): 47 | tokens["l"] += empty["l"] 48 | while len(tokens["l"]) > len(tokens["g"]): 49 | tokens["g"] += empty["g"] 50 | cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) 51 | return ([[cond, {"pooled_output": pooled, "width": width, "height": height, "crop_w": crop_w, "crop_h": crop_h, "target_width": target_width, "target_height": target_height}]], ) 52 | 53 | NODE_CLASS_MAPPINGS = { 54 | "CLIPTextEncodeSDXLRefiner": CLIPTextEncodeSDXLRefiner, 55 | "CLIPTextEncodeSDXL": CLIPTextEncodeSDXL, 56 | } 57 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/nodes_latent.py: -------------------------------------------------------------------------------- 1 | import fcbh.utils 2 | 3 | def reshape_latent_to(target_shape, latent): 4 | if latent.shape[1:] != target_shape[1:]: 5 | latent.movedim(1, -1) 6 | latent = fcbh.utils.common_upscale(latent, target_shape[3], target_shape[2], "bilinear", "center") 7 | latent.movedim(-1, 1) 8 | return fcbh.utils.repeat_to_batch_size(latent, target_shape[0]) 9 | 10 | 11 | class LatentAdd: 12 | @classmethod 13 | def INPUT_TYPES(s): 14 | return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} 15 | 16 | RETURN_TYPES = ("LATENT",) 17 | FUNCTION = "op" 18 | 19 | CATEGORY = "latent/advanced" 20 | 21 | def op(self, samples1, samples2): 22 | samples_out = samples1.copy() 23 | 24 | s1 = samples1["samples"] 25 | s2 = samples2["samples"] 26 | 27 | s2 = reshape_latent_to(s1.shape, s2) 28 | samples_out["samples"] = s1 + s2 29 | return (samples_out,) 30 | 31 | class LatentSubtract: 32 | @classmethod 33 | def INPUT_TYPES(s): 34 | return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} 35 | 36 | RETURN_TYPES = ("LATENT",) 37 | FUNCTION = "op" 38 | 39 | CATEGORY = "latent/advanced" 40 | 41 | def op(self, samples1, samples2): 42 | samples_out = samples1.copy() 43 | 44 | s1 = samples1["samples"] 45 | s2 = samples2["samples"] 46 | 47 | s2 = reshape_latent_to(s1.shape, s2) 48 | samples_out["samples"] = s1 - s2 49 | return (samples_out,) 50 | 51 | class LatentMultiply: 52 | @classmethod 53 | def INPUT_TYPES(s): 54 | return {"required": { "samples": ("LATENT",), 55 | "multiplier": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), 56 | }} 57 | 58 | RETURN_TYPES = ("LATENT",) 59 | FUNCTION = "op" 60 | 61 | CATEGORY = "latent/advanced" 62 | 63 | def op(self, samples, multiplier): 64 | samples_out = samples.copy() 65 | 66 | s1 = samples["samples"] 67 | samples_out["samples"] = s1 * multiplier 68 | return (samples_out,) 69 | 70 | NODE_CLASS_MAPPINGS = { 71 | "LatentAdd": LatentAdd, 72 | "LatentSubtract": LatentSubtract, 73 | "LatentMultiply": LatentMultiply, 74 | } 75 | -------------------------------------------------------------------------------- /backend/headless/fcbh_extras/nodes_upscale_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | from fcbh_extras.chainner_models import model_loading 3 | from fcbh import model_management 4 | import torch 5 | import fcbh.utils 6 | import folder_paths 7 | 8 | class UpscaleModelLoader: 9 | @classmethod 10 | def INPUT_TYPES(s): 11 | return {"required": { "model_name": (folder_paths.get_filename_list("upscale_models"), ), 12 | }} 13 | RETURN_TYPES = ("UPSCALE_MODEL",) 14 | FUNCTION = "load_model" 15 | 16 | CATEGORY = "loaders" 17 | 18 | def load_model(self, model_name): 19 | model_path = folder_paths.get_full_path("upscale_models", model_name) 20 | sd = fcbh.utils.load_torch_file(model_path, safe_load=True) 21 | if "module.layers.0.residual_group.blocks.0.norm1.weight" in sd: 22 | sd = fcbh.utils.state_dict_prefix_replace(sd, {"module.":""}) 23 | out = model_loading.load_state_dict(sd).eval() 24 | return (out, ) 25 | 26 | 27 | class ImageUpscaleWithModel: 28 | @classmethod 29 | def INPUT_TYPES(s): 30 | return {"required": { "upscale_model": ("UPSCALE_MODEL",), 31 | "image": ("IMAGE",), 32 | }} 33 | RETURN_TYPES = ("IMAGE",) 34 | FUNCTION = "upscale" 35 | 36 | CATEGORY = "image/upscaling" 37 | 38 | def upscale(self, upscale_model, image): 39 | device = model_management.get_torch_device() 40 | upscale_model.to(device) 41 | in_img = image.movedim(-1,-3).to(device) 42 | free_memory = model_management.get_free_memory(device) 43 | 44 | tile = 512 45 | overlap = 32 46 | 47 | oom = True 48 | while oom: 49 | try: 50 | steps = in_img.shape[0] * fcbh.utils.get_tiled_scale_steps(in_img.shape[3], in_img.shape[2], tile_x=tile, tile_y=tile, overlap=overlap) 51 | pbar = fcbh.utils.ProgressBar(steps) 52 | s = fcbh.utils.tiled_scale(in_img, lambda a: upscale_model(a), tile_x=tile, tile_y=tile, overlap=overlap, upscale_amount=upscale_model.scale, pbar=pbar) 53 | oom = False 54 | except model_management.OOM_EXCEPTION as e: 55 | tile //= 2 56 | if tile < 128: 57 | raise e 58 | 59 | upscale_model.cpu() 60 | s = torch.clamp(s.movedim(-3,-1), min=0, max=1.0) 61 | return (s,) 62 | 63 | NODE_CLASS_MAPPINGS = { 64 | "UpscaleModelLoader": UpscaleModelLoader, 65 | "ImageUpscaleWithModel": ImageUpscaleWithModel 66 | } 67 | -------------------------------------------------------------------------------- /build_launcher.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | win32_root = os.path.dirname(os.path.dirname(__file__)) 4 | python_embeded_path = os.path.join(win32_root, 'python_embeded') 5 | 6 | is_win32_standalone_build = os.path.exists(python_embeded_path) and os.path.isdir(python_embeded_path) 7 | 8 | win32_cmd = ''' 9 | .\python_embeded\python.exe -s Fooocus\entry_with_update.py {cmds} %* 10 | pause 11 | ''' 12 | 13 | 14 | def build_launcher(): 15 | if not is_win32_standalone_build: 16 | return 17 | 18 | presets = [None, 'anime', 'realistic'] 19 | 20 | for preset in presets: 21 | win32_cmd_preset = win32_cmd.replace('{cmds}', '' if preset is None else f'--preset {preset}') 22 | bat_path = os.path.join(win32_root, 'run.bat' if preset is None else f'run_{preset}.bat') 23 | if not os.path.exists(bat_path): 24 | with open(bat_path, "w", encoding="utf-8") as f: 25 | f.write(win32_cmd_preset) 26 | return 27 | -------------------------------------------------------------------------------- /colab_fix.txt: -------------------------------------------------------------------------------- 1 | {"default_refiner": ""} -------------------------------------------------------------------------------- /css/style.css: -------------------------------------------------------------------------------- 1 | /* based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/style.css */ 2 | 3 | #context-menu{ 4 | z-index:9999; 5 | position:absolute; 6 | display:block; 7 | padding:0px 0; 8 | border:2px solid #a55000; 9 | border-radius:8px; 10 | box-shadow:1px 1px 2px #CE6400; 11 | width: 200px; 12 | } 13 | 14 | .context-menu-items{ 15 | list-style: none; 16 | margin: 0; 17 | padding: 0; 18 | } 19 | 20 | .context-menu-items a{ 21 | display:block; 22 | padding:5px; 23 | cursor:pointer; 24 | } 25 | 26 | .context-menu-items a:hover{ 27 | background: #a55000; 28 | } 29 | -------------------------------------------------------------------------------- /entry_with_update.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | root = os.path.dirname(os.path.abspath(__file__)) 6 | sys.path.append(root) 7 | os.chdir(root) 8 | always_update = True 9 | 10 | if(always_update): 11 | try: 12 | import pygit2 13 | pygit2.option(pygit2.GIT_OPT_SET_OWNER_VALIDATION, 0) 14 | 15 | repo = pygit2.Repository(os.path.abspath(os.path.dirname(__file__))) 16 | 17 | branch_name = repo.head.shorthand 18 | 19 | remote_name = 'origin' 20 | remote = repo.remotes[remote_name] 21 | 22 | remote.fetch() 23 | 24 | local_branch_ref = f'refs/heads/{branch_name}' 25 | local_branch = repo.lookup_reference(local_branch_ref) 26 | 27 | remote_reference = f'refs/remotes/{remote_name}/{branch_name}' 28 | remote_commit = repo.revparse_single(remote_reference) 29 | 30 | merge_result, _ = repo.merge_analysis(remote_commit.id) 31 | 32 | if merge_result & pygit2.GIT_MERGE_ANALYSIS_UP_TO_DATE: 33 | print("Already up-to-date") 34 | elif merge_result & pygit2.GIT_MERGE_ANALYSIS_FASTFORWARD: 35 | local_branch.set_target(remote_commit.id) 36 | repo.head.set_target(remote_commit.id) 37 | repo.checkout_tree(repo.get(remote_commit.id)) 38 | repo.reset(local_branch.target, pygit2.GIT_RESET_HARD) 39 | print("Fast-forward merge") 40 | elif merge_result & pygit2.GIT_MERGE_ANALYSIS_NORMAL: 41 | print("Update failed - Did you modified any file?") 42 | except Exception as e: 43 | print('Update failed.') 44 | print(str(e)) 45 | 46 | print('Update succeeded.') 47 | from launch import * 48 | -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: fooocusControl 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.10 6 | - pip=23.0 7 | - packaging 8 | -------------------------------------------------------------------------------- /fooocusControl_colab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "initial_id", 7 | "metadata": { 8 | "collapsed": true 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "!pip install pygit2==1.12.2\n", 13 | "%cd /content\n", 14 | "!git clone https://github.com/fenneishi/Fooocus-Control.git\n", 15 | "%cd /content/Fooocus\n", 16 | "!cp colab_fix.txt user_path_config.txt\n", 17 | "# for FooocusControl(follow Fooocus) Realistic Edition.\n", 18 | "!python entry_with_update.py --preset realistic --share \n", 19 | "# for FooocusControl(follow Fooocus) Anime Edition.\n", 20 | "# !python entry_with_update.py --preset anime --share " 21 | ] 22 | } 23 | ], 24 | "metadata": { 25 | "kernelspec": { 26 | "display_name": "Python 3", 27 | "language": "python", 28 | "name": "python3" 29 | }, 30 | "language_info": { 31 | "codemirror_mode": { 32 | "name": "ipython", 33 | "version": 2 34 | }, 35 | "file_extension": ".py", 36 | "mimetype": "text/x-python", 37 | "name": "python", 38 | "nbconvert_exporter": "python", 39 | "pygments_lexer": "ipython2", 40 | "version": "2.7.6" 41 | } 42 | }, 43 | "nbformat": 4, 44 | "nbformat_minor": 5 45 | } 46 | -------------------------------------------------------------------------------- /fooocus_colab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "id": "VjYy0F2gZIPR" 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "!pip install pygit2==1.12.2\n", 12 | "%cd /content\n", 13 | "!git clone https://github.com/lllyasviel/Fooocus\n", 14 | "%cd /content/Fooocus\n", 15 | "!cp colab_fix.txt user_path_config.txt\n", 16 | "!python entry_with_update.py --share\n" 17 | ] 18 | } 19 | ], 20 | "metadata": { 21 | "accelerator": "GPU", 22 | "colab": { 23 | "gpuType": "T4", 24 | "provenance": [] 25 | }, 26 | "kernelspec": { 27 | "display_name": "Python 3", 28 | "name": "python3" 29 | }, 30 | "language_info": { 31 | "name": "python" 32 | } 33 | }, 34 | "nbformat": 4, 35 | "nbformat_minor": 0 36 | } 37 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/CPDS/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | from einops import rearrange 4 | import numpy as np 5 | import cv2 6 | 7 | 8 | class CPDS: 9 | # remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/ZoeD_M12_N.pt" 10 | # modelpath = os.path.join(annotator_ckpts_path, "ZoeD_M12_N.pt") 11 | def __init__(self, model_path): 12 | self.model = None 13 | 14 | @classmethod 15 | def norm255(cls, GrayImage, low=4, high=96): 16 | assert isinstance(GrayImage, np.ndarray) 17 | assert GrayImage.ndim == 2 and GrayImage.dtype == np.float32 18 | 19 | v_min = np.percentile(GrayImage, low) 20 | v_max = np.percentile(GrayImage, high) 21 | 22 | if np.allclose(v_min, v_max): 23 | GrayImage = GrayImage * 0 # Avoid 0-division 24 | else: 25 | GrayImage = (GrayImage - v_min) / (v_max - v_min) 26 | 27 | GrayImage -= v_min 28 | GrayImage /= v_max - v_min 29 | return GrayImage * 255.0 30 | 31 | def __call__(self, RGB): 32 | assert RGB.ndim == 3 33 | with torch.no_grad(): 34 | # cv2.decolor is not "decolor", it is Cewu Lu's method 35 | # See http://www.cse.cuhk.edu.hk/leojia/projects/color2gray/index.html 36 | # See https://docs.opencv.org/3.0-beta/modules/photo/doc/decolor.html 37 | 38 | raw = cv2.GaussianBlur(RGB, (0, 0), 0.8) 39 | density, boost = cv2.decolor(raw) 40 | 41 | raw = raw.astype(np.float32) 42 | density = density.astype(np.float32) 43 | boost = boost.astype(np.float32) 44 | 45 | offset = np.sum((raw - boost) ** 2.0, axis=2) ** 0.5 46 | result = density + offset 47 | 48 | return self.norm255(result, low=4, high=96).clip(0, 255).astype(np.uint8) 49 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/OpenPose/hand.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import json 3 | import numpy as np 4 | import math 5 | import time 6 | from scipy.ndimage.filters import gaussian_filter 7 | import matplotlib.pyplot as plt 8 | import matplotlib 9 | import torch 10 | from skimage.measure import label 11 | 12 | from .model import handpose_model 13 | from . import util 14 | 15 | class Hand(object): 16 | def __init__(self, model_path): 17 | self.model = handpose_model() 18 | if torch.cuda.is_available(): 19 | self.model = self.model.cuda() 20 | print('cuda') 21 | model_dict = util.transfer(self.model, torch.load(model_path)) 22 | self.model.load_state_dict(model_dict) 23 | self.model.eval() 24 | 25 | def __call__(self, oriImgRaw): 26 | scale_search = [0.5, 1.0, 1.5, 2.0] 27 | # scale_search = [0.5] 28 | boxsize = 368 29 | stride = 8 30 | padValue = 128 31 | thre = 0.05 32 | multiplier = [x * boxsize for x in scale_search] 33 | 34 | wsize = 128 35 | heatmap_avg = np.zeros((wsize, wsize, 22)) 36 | 37 | Hr, Wr, Cr = oriImgRaw.shape 38 | 39 | oriImg = cv2.GaussianBlur(oriImgRaw, (0, 0), 0.8) 40 | 41 | for m in range(len(multiplier)): 42 | scale = multiplier[m] 43 | imageToTest = util.smart_resize(oriImg, (scale, scale)) 44 | 45 | imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue) 46 | im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5 47 | im = np.ascontiguousarray(im) 48 | 49 | data = torch.from_numpy(im).float() 50 | if torch.cuda.is_available(): 51 | data = data.cuda() 52 | 53 | with torch.no_grad(): 54 | output = self.model(data).cpu().numpy() 55 | 56 | # extract outputs, resize, and remove padding 57 | heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps 58 | heatmap = util.smart_resize_k(heatmap, fx=stride, fy=stride) 59 | heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] 60 | heatmap = util.smart_resize(heatmap, (wsize, wsize)) 61 | 62 | heatmap_avg += heatmap / len(multiplier) 63 | 64 | all_peaks = [] 65 | for part in range(21): 66 | map_ori = heatmap_avg[:, :, part] 67 | one_heatmap = gaussian_filter(map_ori, sigma=3) 68 | binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8) 69 | 70 | if np.sum(binary) == 0: 71 | all_peaks.append([0, 0]) 72 | continue 73 | label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim) 74 | max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1 75 | label_img[label_img != max_index] = 0 76 | map_ori[label_img == 0] = 0 77 | 78 | y, x = util.npmax(map_ori) 79 | y = int(float(y) * float(Hr) / float(wsize)) 80 | x = int(float(x) * float(Wr) / float(wsize)) 81 | all_peaks.append([x, y]) 82 | return np.array(all_peaks) 83 | 84 | if __name__ == "__main__": 85 | hand_estimation = Hand('../model/hand_pose_model.pth') 86 | 87 | # test_image = '../images/hand.jpg' 88 | test_image = '../images/hand.jpg' 89 | oriImg = cv2.imread(test_image) # B,G,R order 90 | peaks = hand_estimation(oriImg) 91 | canvas = util.draw_handpose(oriImg, peaks, True) 92 | cv2.imshow('', canvas) 93 | cv2.waitKey(0) -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/PyramidCanny/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import modules.advanced_parameters as advanced_parameters 4 | from fooocus_extras.controlnet_preprocess_model.ZeoDepth import ZoeDetector 5 | 6 | 7 | class PyramidCanny: 8 | # remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/ZoeD_M12_N.pt" 9 | # modelpath = os.path.join(annotator_ckpts_path, "ZoeD_M12_N.pt") 10 | def __init__(self, model_path): 11 | self.model = None 12 | 13 | @staticmethod 14 | def centered_canny(x: np.ndarray): 15 | assert isinstance(x, np.ndarray) 16 | assert x.ndim == 2 and x.dtype == np.uint8 17 | 18 | y = cv2.Canny(x, int(advanced_parameters.canny_low_threshold), int(advanced_parameters.canny_high_threshold)) 19 | y = y.astype(np.float32) / 255.0 20 | return y 21 | 22 | @staticmethod 23 | def centered_canny_color(x: np.ndarray): 24 | assert isinstance(x, np.ndarray) 25 | assert x.ndim == 3 and x.shape[2] == 3 26 | 27 | result = [PyramidCanny.centered_canny(x[..., i]) for i in range(3)] 28 | result = np.stack(result, axis=2) 29 | return result 30 | 31 | @staticmethod 32 | def pyramid_canny_color(x: np.ndarray): 33 | assert isinstance(x, np.ndarray) 34 | assert x.ndim == 3 and x.shape[2] == 3 35 | 36 | H, W, C = x.shape 37 | acc_edge = None 38 | 39 | for k in [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: 40 | Hs, Ws = int(H * k), int(W * k) 41 | small = cv2.resize(x, (Ws, Hs), interpolation=cv2.INTER_AREA) 42 | edge = PyramidCanny.centered_canny_color(small) 43 | if acc_edge is None: 44 | acc_edge = edge 45 | else: 46 | acc_edge = cv2.resize(acc_edge, (edge.shape[1], edge.shape[0]), interpolation=cv2.INTER_LINEAR) 47 | acc_edge = acc_edge * 0.75 + edge * 0.25 48 | 49 | return acc_edge 50 | 51 | @classmethod 52 | def norm255(cls, GrayImage, low=4, high=96): 53 | assert isinstance(GrayImage, np.ndarray) 54 | assert GrayImage.ndim == 2 and GrayImage.dtype == np.float32 55 | 56 | v_min = np.percentile(GrayImage, low) 57 | v_max = np.percentile(GrayImage, high) 58 | 59 | if np.allclose(v_min, v_max): 60 | GrayImage = GrayImage * 0 # Avoid 0-division 61 | else: 62 | GrayImage = (GrayImage - v_min) / (v_max - v_min) 63 | 64 | GrayImage -= v_min 65 | GrayImage /= v_max - v_min 66 | return GrayImage * 255.0 67 | 68 | def __call__(self, RGB): 69 | assert RGB.ndim == 3 70 | # For some reasons, SAI's Control-lora PyramidCanny seems to be trained on canny maps with non-standard resolutions. 71 | # Then we use pyramid to use all resolutions to avoid missing any structure in specific resolutions. 72 | 73 | color_canny = PyramidCanny.pyramid_canny_color(RGB) 74 | result = np.sum(color_canny, axis=2) 75 | 76 | return self.norm255(result, low=1, high=99).clip(0, 255).astype(np.uint8) 77 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ReColor/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class ReColor: 5 | def __init__(self,model_path): 6 | self.model = None 7 | 8 | def __call__(self,RGB): 9 | assert RGB.ndim == 3 10 | assert RGB.shape[2] == 3 11 | Gray = cv2.cvtColor(RGB, cv2.COLOR_RGB2GRAY) 12 | return Gray -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/Revision/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class Revision: 5 | def __init__(self, model_path): 6 | self.model = None 7 | 8 | def __call__(self, RGB): 9 | assert RGB.ndim == 3 10 | assert RGB.shape[2] == 3 11 | # Gray = cv2.cvtColor(RGB, cv2.COLOR_RGB2BGR) 12 | return RGB 13 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/Sketch/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class Sketch: 5 | def __init__(self, model_path): 6 | self.model = None 7 | 8 | def __call__(self, RGB): 9 | assert RGB.ndim == 3 10 | assert RGB.shape[2] == 3 11 | Gray = cv2.cvtColor(RGB, cv2.COLOR_RGB2GRAY) 12 | return Gray 13 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/TileBlur/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class TileBlur: 5 | def __init__(self,model_path): 6 | self.model = None 7 | 8 | def __call__(self,RGB): 9 | assert RGB.ndim == 3 10 | assert RGB.shape[2] == 3 11 | RGB = cv2.GaussianBlur(RGB, (3, 3), 3) 12 | return RGB -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/TileBlurAnime/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class TileBlurAnime: 5 | def __init__(self,model_path): 6 | self.model = None 7 | 8 | def __call__(self,RGB): 9 | assert RGB.ndim == 3 10 | assert RGB.shape[2] == 3 11 | RGB = cv2.GaussianBlur(RGB, (3, 3), 3) 12 | return RGB -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/__init__.py: -------------------------------------------------------------------------------- 1 | # ZoeDepth 2 | # https://github.com/isl-org/ZoeDepth 3 | 4 | import os 5 | import cv2 6 | import numpy as np 7 | import torch 8 | 9 | from einops import rearrange 10 | from .zoedepth.models.zoedepth.zoedepth_v1 import ZoeDepth 11 | from .zoedepth.utils.config import get_config 12 | 13 | 14 | class ZoeDetector: 15 | # remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/ZoeD_M12_N.pt" 16 | # modelpath = os.path.join(annotator_ckpts_path, "ZoeD_M12_N.pt") 17 | def __init__(self, model_path): 18 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 19 | if DEVICE == "cpu": 20 | print("WARNING: Running on CPU. This will be slow. Check your CUDA installation.") 21 | if not os.path.exists(model_path): 22 | raise Exception(f"ZoeDepth not found in f{model_path}") 23 | model = ZoeDepth.build_from_config( 24 | get_config("zoedepth", "infer") 25 | ) 26 | model.load_state_dict(torch.load(model_path)['model']) 27 | model.to(DEVICE) 28 | model.eval() 29 | self.model = model 30 | 31 | @torch.no_grad() 32 | @torch.inference_mode() 33 | def __call__(self, RGB): 34 | assert RGB.ndim == 3 35 | assert RGB.shape[2] == 3 36 | with torch.no_grad(): 37 | # preprocess 38 | RGB = rearrange( 39 | torch.from_numpy(RGB).float().cuda() / 255.0, 40 | 'h w c -> 1 c h w' 41 | ) 42 | # infer 43 | Depth = self.model.infer(RGB).detach().squeeze().cpu().numpy() 44 | # postprocess 45 | d_min = np.percentile(Depth, 2) 46 | d_max = np.percentile(Depth, 85) 47 | if np.allclose(d_min, d_max): 48 | Depth = Depth * 0 # Avoid 0-division 49 | else: 50 | Depth = (Depth - d_min) / (d_max - d_min) 51 | Depth = 1.0 - Depth 52 | Depth = (Depth * 255.0).clip(0, 255).astype(np.uint8) 53 | return Depth 54 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/data/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/data/ibims.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms as T 32 | 33 | 34 | class iBims(Dataset): 35 | def __init__(self, config): 36 | root_folder = config.ibims_root 37 | with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f: 38 | imglist = f.read().split() 39 | 40 | samples = [] 41 | for basename in imglist: 42 | img_path = os.path.join(root_folder, 'rgb', basename + ".png") 43 | depth_path = os.path.join(root_folder, 'depth', basename + ".png") 44 | valid_mask_path = os.path.join( 45 | root_folder, 'mask_invalid', basename+".png") 46 | transp_mask_path = os.path.join( 47 | root_folder, 'mask_transp', basename+".png") 48 | 49 | samples.append( 50 | (img_path, depth_path, valid_mask_path, transp_mask_path)) 51 | 52 | self.samples = samples 53 | # self.normalize = T.Normalize( 54 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 55 | self.normalize = lambda x : x 56 | 57 | def __getitem__(self, idx): 58 | img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx] 59 | 60 | img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0 61 | depth = np.asarray(Image.open(depth_path), 62 | dtype=np.uint16).astype('float')*50.0/65535 63 | 64 | mask_valid = np.asarray(Image.open(valid_mask_path)) 65 | mask_transp = np.asarray(Image.open(transp_mask_path)) 66 | 67 | # depth = depth * mask_valid * mask_transp 68 | depth = np.where(mask_valid * mask_transp, depth, -1) 69 | 70 | img = torch.from_numpy(img).permute(2, 0, 1) 71 | img = self.normalize(img) 72 | depth = torch.from_numpy(depth).unsqueeze(0) 73 | return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims') 74 | 75 | def __len__(self): 76 | return len(self.samples) 77 | 78 | 79 | def get_ibims_loader(config, batch_size=1, **kwargs): 80 | dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs) 81 | return dataloader 82 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | *.png 107 | *.pfm 108 | *.jpg 109 | *.jpeg 110 | *.pt -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Intel ISL (Intel Intelligent Systems Lab) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/environment.yaml: -------------------------------------------------------------------------------- 1 | name: midas-py310 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - nvidia::cudatoolkit=11.7 7 | - python=3.10.8 8 | - pytorch::pytorch=1.13.0 9 | - torchvision=0.14.0 10 | - pip=22.3.1 11 | - numpy=1.23.4 12 | - pip: 13 | - opencv-python==4.6.0.66 14 | - imutils==0.5.4 15 | - timm==0.6.12 16 | - einops==0.6.0 -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/midas/backbones/next_vit.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | import torch.nn as nn 4 | 5 | from pathlib import Path 6 | from .utils import activations, forward_default, get_activation 7 | 8 | from ..external.next_vit.classification.nextvit import * 9 | 10 | 11 | def forward_next_vit(pretrained, x): 12 | return forward_default(pretrained, x, "forward") 13 | 14 | 15 | def _make_next_vit_backbone( 16 | model, 17 | hooks=[2, 6, 36, 39], 18 | ): 19 | pretrained = nn.Module() 20 | 21 | pretrained.model = model 22 | pretrained.model.features[hooks[0]].register_forward_hook(get_activation("1")) 23 | pretrained.model.features[hooks[1]].register_forward_hook(get_activation("2")) 24 | pretrained.model.features[hooks[2]].register_forward_hook(get_activation("3")) 25 | pretrained.model.features[hooks[3]].register_forward_hook(get_activation("4")) 26 | 27 | pretrained.activations = activations 28 | 29 | return pretrained 30 | 31 | 32 | def _make_pretrained_next_vit_large_6m(hooks=None): 33 | model = timm.create_model("nextvit_large") 34 | 35 | hooks = [2, 6, 36, 39] if hooks == None else hooks 36 | return _make_next_vit_backbone( 37 | model, 38 | hooks=hooks, 39 | ) 40 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/midas/backbones/swin.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | from .swin_common import _make_swin_backbone 4 | 5 | 6 | def _make_pretrained_swinl12_384(pretrained, hooks=None): 7 | model = timm.create_model("swin_large_patch4_window12_384", pretrained=pretrained) 8 | 9 | hooks = [1, 1, 17, 1] if hooks == None else hooks 10 | return _make_swin_backbone( 11 | model, 12 | hooks=hooks 13 | ) 14 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/midas/backbones/swin2.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | from .swin_common import _make_swin_backbone 4 | 5 | 6 | def _make_pretrained_swin2l24_384(pretrained, hooks=None): 7 | model = timm.create_model("swinv2_large_window12to24_192to384_22kft1k", pretrained=pretrained) 8 | 9 | hooks = [1, 1, 17, 1] if hooks == None else hooks 10 | return _make_swin_backbone( 11 | model, 12 | hooks=hooks 13 | ) 14 | 15 | 16 | def _make_pretrained_swin2b24_384(pretrained, hooks=None): 17 | model = timm.create_model("swinv2_base_window12to24_192to384_22kft1k", pretrained=pretrained) 18 | 19 | hooks = [1, 1, 17, 1] if hooks == None else hooks 20 | return _make_swin_backbone( 21 | model, 22 | hooks=hooks 23 | ) 24 | 25 | 26 | def _make_pretrained_swin2t16_256(pretrained, hooks=None): 27 | model = timm.create_model("swinv2_tiny_window16_256", pretrained=pretrained) 28 | 29 | hooks = [1, 1, 5, 1] if hooks == None else hooks 30 | return _make_swin_backbone( 31 | model, 32 | hooks=hooks, 33 | patch_grid=[64, 64] 34 | ) 35 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/midas/backbones/swin_common.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import torch.nn as nn 4 | import numpy as np 5 | 6 | from .utils import activations, forward_default, get_activation, Transpose 7 | 8 | 9 | def forward_swin(pretrained, x): 10 | return forward_default(pretrained, x) 11 | 12 | 13 | def _make_swin_backbone( 14 | model, 15 | hooks=[1, 1, 17, 1], 16 | patch_grid=[96, 96] 17 | ): 18 | pretrained = nn.Module() 19 | 20 | pretrained.model = model 21 | pretrained.model.layers[0].blocks[hooks[0]].register_forward_hook(get_activation("1")) 22 | pretrained.model.layers[1].blocks[hooks[1]].register_forward_hook(get_activation("2")) 23 | pretrained.model.layers[2].blocks[hooks[2]].register_forward_hook(get_activation("3")) 24 | pretrained.model.layers[3].blocks[hooks[3]].register_forward_hook(get_activation("4")) 25 | 26 | pretrained.activations = activations 27 | 28 | if hasattr(model, "patch_grid"): 29 | used_patch_grid = model.patch_grid 30 | else: 31 | used_patch_grid = patch_grid 32 | 33 | patch_grid_size = np.array(used_patch_grid, dtype=int) 34 | 35 | pretrained.act_postprocess1 = nn.Sequential( 36 | Transpose(1, 2), 37 | nn.Unflatten(2, torch.Size(patch_grid_size.tolist())) 38 | ) 39 | pretrained.act_postprocess2 = nn.Sequential( 40 | Transpose(1, 2), 41 | nn.Unflatten(2, torch.Size((patch_grid_size // 2).tolist())) 42 | ) 43 | pretrained.act_postprocess3 = nn.Sequential( 44 | Transpose(1, 2), 45 | nn.Unflatten(2, torch.Size((patch_grid_size // 4).tolist())) 46 | ) 47 | pretrained.act_postprocess4 = nn.Sequential( 48 | Transpose(1, 2), 49 | nn.Unflatten(2, torch.Size((patch_grid_size // 8).tolist())) 50 | ) 51 | 52 | return pretrained 53 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/midas/base_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class BaseModel(torch.nn.Module): 5 | def load(self, path): 6 | """Load model from file. 7 | 8 | Args: 9 | path (str): file path 10 | """ 11 | parameters = torch.load(path, map_location=torch.device('cpu')) 12 | 13 | if "optimizer" in parameters: 14 | parameters = parameters["model"] 15 | 16 | self.load_state_dict(parameters) 17 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/base_models/midas_repo/midas/midas_net.py: -------------------------------------------------------------------------------- 1 | """MidashNet: Network for monocular depth estimation trained by mixing several datasets. 2 | This file contains code that is adapted from 3 | https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | 8 | from .base_model import BaseModel 9 | from .blocks import FeatureFusionBlock, Interpolate, _make_encoder 10 | 11 | 12 | class MidasNet(BaseModel): 13 | """Network for monocular depth estimation. 14 | """ 15 | 16 | def __init__(self, path=None, features=256, non_negative=True): 17 | """Init. 18 | 19 | Args: 20 | path (str, optional): Path to saved model. Defaults to None. 21 | features (int, optional): Number of features. Defaults to 256. 22 | backbone (str, optional): Backbone network for encoder. Defaults to resnet50 23 | """ 24 | print("Loading weights: ", path) 25 | 26 | super(MidasNet, self).__init__() 27 | 28 | use_pretrained = False if path is None else True 29 | 30 | self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained) 31 | 32 | self.scratch.refinenet4 = FeatureFusionBlock(features) 33 | self.scratch.refinenet3 = FeatureFusionBlock(features) 34 | self.scratch.refinenet2 = FeatureFusionBlock(features) 35 | self.scratch.refinenet1 = FeatureFusionBlock(features) 36 | 37 | self.scratch.output_conv = nn.Sequential( 38 | nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), 39 | Interpolate(scale_factor=2, mode="bilinear"), 40 | nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1), 41 | nn.ReLU(True), 42 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), 43 | nn.ReLU(True) if non_negative else nn.Identity(), 44 | ) 45 | 46 | if path: 47 | self.load(path) 48 | 49 | def forward(self, x): 50 | """Forward pass. 51 | 52 | Args: 53 | x (tensor): input data (image) 54 | 55 | Returns: 56 | tensor: depth 57 | """ 58 | 59 | layer_1 = self.pretrained.layer1(x) 60 | layer_2 = self.pretrained.layer2(layer_1) 61 | layer_3 = self.pretrained.layer3(layer_2) 62 | layer_4 = self.pretrained.layer4(layer_3) 63 | 64 | layer_1_rn = self.scratch.layer1_rn(layer_1) 65 | layer_2_rn = self.scratch.layer2_rn(layer_2) 66 | layer_3_rn = self.scratch.layer3_rn(layer_3) 67 | layer_4_rn = self.scratch.layer4_rn(layer_4) 68 | 69 | path_4 = self.scratch.refinenet4(layer_4_rn) 70 | path_3 = self.scratch.refinenet3(path_4, layer_3_rn) 71 | path_2 = self.scratch.refinenet2(path_3, layer_2_rn) 72 | path_1 = self.scratch.refinenet1(path_2, layer_1_rn) 73 | 74 | out = self.scratch.output_conv(path_1) 75 | 76 | return torch.squeeze(out, dim=1) 77 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | from .depth_model import DepthModel 27 | 28 | def build_model(config) -> DepthModel: 29 | """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface. 30 | This function should be used to construct models for training and evaluation. 31 | 32 | Args: 33 | config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder. 34 | 35 | Returns: 36 | torch.nn.Module: Model corresponding to name and version as specified in config 37 | """ 38 | module_name = f"zoedepth.models.{config.model}" 39 | try: 40 | module = import_module(module_name) 41 | except ModuleNotFoundError as e: 42 | # print the original error message 43 | print(e) 44 | raise ValueError( 45 | f"Model {config.model} not found. Refer above error for details.") from e 46 | try: 47 | get_version = getattr(module, "get_version") 48 | except AttributeError as e: 49 | raise ValueError( 50 | f"Model {config.model} has no get_version function.") from e 51 | return get_version(config.version_name).build_from_config(config) 52 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/model_io.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | 27 | def load_state_dict(model, state_dict): 28 | """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict. 29 | 30 | DataParallel prefixes state_dict keys with 'module.' when saving. 31 | If the model is not a DataParallel model but the state_dict is, then prefixes are removed. 32 | If the model is a DataParallel model but the state_dict is not, then prefixes are added. 33 | """ 34 | state_dict = state_dict.get('model', state_dict) 35 | # if model is a DataParallel model, then state_dict keys are prefixed with 'module.' 36 | 37 | do_prefix = isinstance( 38 | model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel)) 39 | state = {} 40 | for k, v in state_dict.items(): 41 | if k.startswith('module.') and not do_prefix: 42 | k = k[7:] 43 | 44 | if not k.startswith('module.') and do_prefix: 45 | k = 'module.' + k 46 | 47 | state[k] = v 48 | 49 | model.load_state_dict(state) 50 | print("Loaded successfully") 51 | return model 52 | 53 | 54 | def load_wts(model, checkpoint_path): 55 | ckpt = torch.load(checkpoint_path, map_location='cpu') 56 | return load_state_dict(model, ckpt) 57 | 58 | 59 | def load_state_dict_from_url(model, url, **kwargs): 60 | state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs) 61 | return load_state_dict(model, state_dict) 62 | 63 | 64 | def load_state_from_resource(model, resource: str): 65 | """Loads weights to the model from a given resource. A resource can be of following types: 66 | 1. URL. Prefixed with "url::" 67 | e.g. url::http(s)://url.resource.com/ckpt.pt 68 | 69 | 2. Local path. Prefixed with "local::" 70 | e.g. local::/path/to/ckpt.pt 71 | 72 | 73 | Args: 74 | model (torch.nn.Module): Model 75 | resource (str): resource string 76 | 77 | Returns: 78 | torch.nn.Module: Model with loaded weights 79 | """ 80 | print(f"Using pretrained resource {resource}") 81 | 82 | if resource.startswith('url::'): 83 | url = resource.split('url::')[1] 84 | return load_state_dict_from_url(model, url, progress=True) 85 | 86 | elif resource.startswith('local::'): 87 | path = resource.split('local::')[1] 88 | return load_wts(model, path) 89 | 90 | else: 91 | raise ValueError("Invalid resource type, only url:: and local:: are supported") 92 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/zoedepth/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_v1 import ZoeDepth 26 | 27 | all_versions = { 28 | "v1": ZoeDepth, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/zoedepth/config_zoedepth.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepth", 4 | "version_name": "v1", 5 | "n_bins": 64, 6 | "bin_embedding_dim": 128, 7 | "bin_centers_type": "softplus", 8 | "n_attractors":[16, 8, 4, 1], 9 | "attractor_alpha": 1000, 10 | "attractor_gamma": 2, 11 | "attractor_kind" : "mean", 12 | "attractor_type" : "inv", 13 | "midas_model_type" : "DPT_BEiT_L_384", 14 | "min_temp": 0.0212, 15 | "max_temp": 50.0, 16 | "output_distribution": "logbinomial", 17 | "memory_efficient": true, 18 | "inverse_midas": false, 19 | "img_size": [384, 512] 20 | }, 21 | 22 | "train": { 23 | "train_midas": true, 24 | "use_pretrained_midas": true, 25 | "trainer": "zoedepth", 26 | "epochs": 5, 27 | "bs": 16, 28 | "optim_kwargs": {"lr": 0.000161, "wd": 0.01}, 29 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 30 | "same_lr": false, 31 | "w_si": 1, 32 | "w_domain": 0.2, 33 | "w_reg": 0, 34 | "w_grad": 0, 35 | "avoid_boundary": false, 36 | "random_crop": false, 37 | "input_width": 640, 38 | "input_height": 480, 39 | "midas_lr_factor": 1, 40 | "encoder_lr_factor":10, 41 | "pos_enc_lr_factor":10, 42 | "freeze_midas_bn": true 43 | 44 | }, 45 | 46 | "infer":{ 47 | "train_midas": false, 48 | "use_pretrained_midas": false, 49 | "pretrained_resource" : null, 50 | "force_keep_ar": true 51 | }, 52 | 53 | "eval":{ 54 | "train_midas": false, 55 | "use_pretrained_midas": false, 56 | "pretrained_resource" : null 57 | } 58 | } -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/zoedepth/config_zoedepth_kitti.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "bin_centers_type": "normed", 4 | "img_size": [384, 768] 5 | }, 6 | 7 | "train": { 8 | }, 9 | 10 | "infer":{ 11 | "train_midas": false, 12 | "use_pretrained_midas": false, 13 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt", 14 | "force_keep_ar": true 15 | }, 16 | 17 | "eval":{ 18 | "train_midas": false, 19 | "use_pretrained_midas": false, 20 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt" 21 | } 22 | } -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/zoedepth_nk/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_nk_v1 import ZoeDepthNK 26 | 27 | all_versions = { 28 | "v1": ZoeDepthNK, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepthNK", 4 | "version_name": "v1", 5 | "bin_conf" : [ 6 | { 7 | "name": "nyu", 8 | "n_bins": 64, 9 | "min_depth": 1e-3, 10 | "max_depth": 10.0 11 | }, 12 | { 13 | "name": "kitti", 14 | "n_bins": 64, 15 | "min_depth": 1e-3, 16 | "max_depth": 80.0 17 | } 18 | ], 19 | "bin_embedding_dim": 128, 20 | "bin_centers_type": "softplus", 21 | "n_attractors":[16, 8, 4, 1], 22 | "attractor_alpha": 1000, 23 | "attractor_gamma": 2, 24 | "attractor_kind" : "mean", 25 | "attractor_type" : "inv", 26 | "min_temp": 0.0212, 27 | "max_temp": 50.0, 28 | "memory_efficient": true, 29 | "midas_model_type" : "DPT_BEiT_L_384", 30 | "img_size": [384, 512] 31 | }, 32 | 33 | "train": { 34 | "train_midas": true, 35 | "use_pretrained_midas": true, 36 | "trainer": "zoedepth_nk", 37 | "epochs": 5, 38 | "bs": 16, 39 | "optim_kwargs": {"lr": 0.0002512, "wd": 0.01}, 40 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 41 | "same_lr": false, 42 | "w_si": 1, 43 | "w_domain": 100, 44 | "avoid_boundary": false, 45 | "random_crop": false, 46 | "input_width": 640, 47 | "input_height": 480, 48 | "w_grad": 0, 49 | "w_reg": 0, 50 | "midas_lr_factor": 10, 51 | "encoder_lr_factor":10, 52 | "pos_enc_lr_factor":10 53 | }, 54 | 55 | "infer": { 56 | "train_midas": false, 57 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 58 | "use_pretrained_midas": false, 59 | "force_keep_ar": true 60 | }, 61 | 62 | "eval": { 63 | "train_midas": false, 64 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 65 | "use_pretrained_midas": false 66 | } 67 | } -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/trainers/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | 27 | 28 | def get_trainer(config): 29 | """Builds and returns a trainer based on the config. 30 | 31 | Args: 32 | config (dict): the config dict (typically constructed using utils.config.get_config) 33 | config.trainer (str): the name of the trainer to use. The module named "{config.trainer}_trainer" must exist in trainers root module 34 | 35 | Raises: 36 | ValueError: If the specified trainer does not exist under trainers/ folder 37 | 38 | Returns: 39 | Trainer (inherited from zoedepth.trainers.BaseTrainer): The Trainer object 40 | """ 41 | assert "trainer" in config and config.trainer is not None and config.trainer != '', "Trainer not specified. Config: {0}".format( 42 | config) 43 | try: 44 | Trainer = getattr(import_module( 45 | f"zoedepth.trainers.{config.trainer}_trainer"), 'Trainer') 46 | except ModuleNotFoundError as e: 47 | raise ValueError(f"Trainer {config.trainer}_trainer not found.") from e 48 | return Trainer 49 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/ZeoDepth/zoedepth/utils/arg_utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def infer_type(x): # hacky way to infer type from string args 4 | if not isinstance(x, str): 5 | return x 6 | 7 | try: 8 | x = int(x) 9 | return x 10 | except ValueError: 11 | pass 12 | 13 | try: 14 | x = float(x) 15 | return x 16 | except ValueError: 17 | pass 18 | 19 | return x 20 | 21 | 22 | def parse_unknown(unknown_args): 23 | clean = [] 24 | for a in unknown_args: 25 | if "=" in a: 26 | k, v = a.split("=") 27 | clean.extend([k, v]) 28 | else: 29 | clean.append(a) 30 | 31 | keys = clean[::2] 32 | values = clean[1::2] 33 | return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)} 34 | -------------------------------------------------------------------------------- /fooocus_extras/controlnet_preprocess_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/fooocus_extras/controlnet_preprocess_model/__init__.py -------------------------------------------------------------------------------- /fooocus_extras/preprocessors.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import modules.advanced_parameters as advanced_parameters 4 | from fooocus_extras.controlnet_preprocess_model.ZeoDepth import ZoeDetector 5 | 6 | 7 | def centered_canny(x: np.ndarray): 8 | assert isinstance(x, np.ndarray) 9 | assert x.ndim == 2 and x.dtype == np.uint8 10 | 11 | y = cv2.Canny(x, int(advanced_parameters.canny_low_threshold), int(advanced_parameters.canny_high_threshold)) 12 | y = y.astype(np.float32) / 255.0 13 | return y 14 | 15 | 16 | def centered_canny_color(x: np.ndarray): 17 | assert isinstance(x, np.ndarray) 18 | assert x.ndim == 3 and x.shape[2] == 3 19 | 20 | result = [centered_canny(x[..., i]) for i in range(3)] 21 | result = np.stack(result, axis=2) 22 | return result 23 | 24 | 25 | def pyramid_canny_color(x: np.ndarray): 26 | assert isinstance(x, np.ndarray) 27 | assert x.ndim == 3 and x.shape[2] == 3 28 | 29 | H, W, C = x.shape 30 | acc_edge = None 31 | 32 | for k in [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: 33 | Hs, Ws = int(H * k), int(W * k) 34 | small = cv2.resize(x, (Ws, Hs), interpolation=cv2.INTER_AREA) 35 | edge = centered_canny_color(small) 36 | if acc_edge is None: 37 | acc_edge = edge 38 | else: 39 | acc_edge = cv2.resize(acc_edge, (edge.shape[1], edge.shape[0]), interpolation=cv2.INTER_LINEAR) 40 | acc_edge = acc_edge * 0.75 + edge * 0.25 41 | 42 | return acc_edge 43 | 44 | 45 | def norm255(x, low=4, high=96): 46 | assert isinstance(x, np.ndarray) 47 | assert x.ndim == 2 and x.dtype == np.float32 48 | 49 | v_min = np.percentile(x, low) 50 | v_max = np.percentile(x, high) 51 | 52 | x -= v_min 53 | x /= v_max - v_min 54 | 55 | return x * 255.0 56 | def canny_pyramid(x): 57 | # For some reasons, SAI's Control-lora PyramidCanny seems to be trained on canny maps with non-standard resolutions. 58 | # Then we use pyramid to use all resolutions to avoid missing any structure in specific resolutions. 59 | 60 | color_canny = pyramid_canny_color(x) 61 | result = np.sum(color_canny, axis=2) 62 | 63 | return norm255(result, low=1, high=99).clip(0, 255).astype(np.uint8) 64 | 65 | 66 | def cpds(x): 67 | # cv2.decolor is not "decolor", it is Cewu Lu's method 68 | # See http://www.cse.cuhk.edu.hk/leojia/projects/color2gray/index.html 69 | # See https://docs.opencv.org/3.0-beta/modules/photo/doc/decolor.html 70 | 71 | raw = cv2.GaussianBlur(x, (0, 0), 0.8) 72 | density, boost = cv2.decolor(raw) 73 | 74 | raw = raw.astype(np.float32) 75 | density = density.astype(np.float32) 76 | boost = boost.astype(np.float32) 77 | 78 | offset = np.sum((raw - boost) ** 2.0, axis=2) ** 0.5 79 | result = density + offset 80 | 81 | return norm255(result, low=4, high=96).clip(0, 255).astype(np.uint8) 82 | 83 | 84 | def depth(x, depth_model): 85 | result = depth_model(x) 86 | # return norm255(result, low=2, high=85).clip(0, 255).astype(np.uint8) 87 | return result 88 | 89 | 90 | def pose(x, pose_model): 91 | 92 | result = pose_model(x) 93 | return result 94 | -------------------------------------------------------------------------------- /fooocus_extras/vae_interpose.py: -------------------------------------------------------------------------------- 1 | # https://github.com/city96/SD-Latent-Interposer/blob/main/interposer.py 2 | 3 | import os 4 | import torch 5 | import safetensors.torch as sf 6 | import torch.nn as nn 7 | import fcbh.model_management 8 | 9 | from fcbh.model_patcher import ModelPatcher 10 | from modules.path import vae_approx_path 11 | 12 | 13 | class Block(nn.Module): 14 | def __init__(self, size): 15 | super().__init__() 16 | self.join = nn.ReLU() 17 | self.long = nn.Sequential( 18 | nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), 19 | nn.LeakyReLU(0.1), 20 | nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), 21 | nn.LeakyReLU(0.1), 22 | nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), 23 | ) 24 | 25 | def forward(self, x): 26 | y = self.long(x) 27 | z = self.join(y + x) 28 | return z 29 | 30 | 31 | class Interposer(nn.Module): 32 | def __init__(self): 33 | super().__init__() 34 | self.chan = 4 35 | self.hid = 128 36 | 37 | self.head_join = nn.ReLU() 38 | self.head_short = nn.Conv2d(self.chan, self.hid, kernel_size=3, stride=1, padding=1) 39 | self.head_long = nn.Sequential( 40 | nn.Conv2d(self.chan, self.hid, kernel_size=3, stride=1, padding=1), 41 | nn.LeakyReLU(0.1), 42 | nn.Conv2d(self.hid, self.hid, kernel_size=3, stride=1, padding=1), 43 | nn.LeakyReLU(0.1), 44 | nn.Conv2d(self.hid, self.hid, kernel_size=3, stride=1, padding=1), 45 | ) 46 | self.core = nn.Sequential( 47 | Block(self.hid), 48 | Block(self.hid), 49 | Block(self.hid), 50 | ) 51 | self.tail = nn.Sequential( 52 | nn.ReLU(), 53 | nn.Conv2d(self.hid, self.chan, kernel_size=3, stride=1, padding=1) 54 | ) 55 | 56 | def forward(self, x): 57 | y = self.head_join( 58 | self.head_long(x) + 59 | self.head_short(x) 60 | ) 61 | z = self.core(y) 62 | return self.tail(z) 63 | 64 | 65 | vae_approx_model = None 66 | vae_approx_filename = os.path.join(vae_approx_path, 'xl-to-v1_interposer-v3.1.safetensors') 67 | 68 | 69 | def parse(x): 70 | global vae_approx_model 71 | 72 | x_origin = x['samples'].clone() 73 | 74 | if vae_approx_model is None: 75 | model = Interposer() 76 | model.eval() 77 | sd = sf.load_file(vae_approx_filename) 78 | model.load_state_dict(sd) 79 | fp16 = fcbh.model_management.should_use_fp16() 80 | if fp16: 81 | model = model.half() 82 | vae_approx_model = ModelPatcher( 83 | model=model, 84 | load_device=fcbh.model_management.get_torch_device(), 85 | offload_device=torch.device('cpu') 86 | ) 87 | vae_approx_model.dtype = torch.float16 if fp16 else torch.float32 88 | 89 | fcbh.model_management.load_model_gpu(vae_approx_model) 90 | 91 | x = x_origin.to(device=vae_approx_model.load_device, dtype=vae_approx_model.dtype) 92 | x = vae_approx_model.model(x) 93 | 94 | return {'samples': x.to(x_origin)} 95 | -------------------------------------------------------------------------------- /fooocus_version.py: -------------------------------------------------------------------------------- 1 | version = '2.1.701' 2 | -------------------------------------------------------------------------------- /javascript/script.js: -------------------------------------------------------------------------------- 1 | // based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/script.js 2 | 3 | function gradioApp() { 4 | const elems = document.getElementsByTagName('gradio-app'); 5 | const elem = elems.length == 0 ? document : elems[0]; 6 | 7 | if (elem !== document) { 8 | elem.getElementById = function(id) { 9 | return document.getElementById(id); 10 | }; 11 | } 12 | return elem.shadowRoot ? elem.shadowRoot : elem; 13 | } 14 | 15 | function playNotification() { 16 | gradioApp().querySelector('#audio_notification audio')?.play(); 17 | } 18 | 19 | document.addEventListener('keydown', function(e) { 20 | var handled = false; 21 | if (e.key !== undefined) { 22 | if ((e.key == "Enter" && (e.metaKey || e.ctrlKey || e.altKey))) handled = true; 23 | } else if (e.keyCode !== undefined) { 24 | if ((e.keyCode == 13 && (e.metaKey || e.ctrlKey || e.altKey))) handled = true; 25 | } 26 | if (handled) { 27 | var button = gradioApp().querySelector('button[id=generate_button]'); 28 | if (button) { 29 | button.click(); 30 | } 31 | e.preventDefault(); 32 | } 33 | }); 34 | -------------------------------------------------------------------------------- /launch.py: -------------------------------------------------------------------------------- 1 | from python_hijack import * 2 | 3 | import sys 4 | import platform 5 | import fooocus_version 6 | 7 | from build_launcher import build_launcher 8 | from modules.launch_util import is_installed, run, python, run_pip, requirements_met 9 | from modules.model_loader import load_file_from_url 10 | from modules.path import modelfile_path, lorafile_path, vae_approx_path, fooocus_expansion_path, \ 11 | checkpoint_downloads, embeddings_path, embeddings_downloads, lora_downloads 12 | 13 | 14 | REINSTALL_ALL = False 15 | TRY_INSTALL_XFORMERS = False 16 | 17 | 18 | def prepare_environment(): 19 | torch_index_url = os.environ.get('TORCH_INDEX_URL', "https://download.pytorch.org/whl/cu121") 20 | torch_command = os.environ.get('TORCH_COMMAND', 21 | f"pip install torch==2.1.0 torchvision==0.16.0 --extra-index-url {torch_index_url}") 22 | requirements_file = os.environ.get('REQS_FILE', "requirements_versions.txt") 23 | 24 | print(f"Python {sys.version}") 25 | print(f"Fooocus version: {fooocus_version.version}") 26 | 27 | if REINSTALL_ALL or not is_installed("torch") or not is_installed("torchvision"): 28 | run(f'"{python}" -m {torch_command}', "Installing torch and torchvision", "Couldn't install torch", live=True) 29 | 30 | if TRY_INSTALL_XFORMERS: 31 | if REINSTALL_ALL or not is_installed("xformers"): 32 | xformers_package = os.environ.get('XFORMERS_PACKAGE', 'xformers==0.0.20') 33 | if platform.system() == "Windows": 34 | if platform.python_version().startswith("3.10"): 35 | run_pip(f"install -U -I --no-deps {xformers_package}", "xformers", live=True) 36 | else: 37 | print("Installation of xformers is not supported in this version of Python.") 38 | print( 39 | "You can also check this and build manually: https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers#building-xformers-on-windows-by-duckness") 40 | if not is_installed("xformers"): 41 | exit(0) 42 | elif platform.system() == "Linux": 43 | run_pip(f"install -U -I --no-deps {xformers_package}", "xformers") 44 | 45 | if REINSTALL_ALL or not requirements_met(requirements_file): 46 | run_pip(f"install -r \"{requirements_file}\"", "requirements") 47 | 48 | return 49 | 50 | 51 | vae_approx_filenames = [ 52 | ('xlvaeapp.pth', 'https://huggingface.co/lllyasviel/misc/resolve/main/xlvaeapp.pth'), 53 | ('vaeapp_sd15.pth', 'https://huggingface.co/lllyasviel/misc/resolve/main/vaeapp_sd15.pt'), 54 | ('xl-to-v1_interposer-v3.1.safetensors', 55 | 'https://huggingface.co/lllyasviel/misc/resolve/main/xl-to-v1_interposer-v3.1.safetensors') 56 | ] 57 | 58 | 59 | def download_models(): 60 | for file_name, url in checkpoint_downloads.items(): 61 | load_file_from_url(url=url, model_dir=modelfile_path, file_name=file_name) 62 | for file_name, url in embeddings_downloads.items(): 63 | load_file_from_url(url=url, model_dir=embeddings_path, file_name=file_name) 64 | for file_name, url in lora_downloads.items(): 65 | load_file_from_url(url=url, model_dir=lorafile_path, file_name=file_name) 66 | for file_name, url in vae_approx_filenames: 67 | load_file_from_url(url=url, model_dir=vae_approx_path, file_name=file_name) 68 | 69 | load_file_from_url( 70 | url='https://huggingface.co/lllyasviel/misc/resolve/main/fooocus_expansion.bin', 71 | model_dir=fooocus_expansion_path, 72 | file_name='pytorch_model.bin' 73 | ) 74 | 75 | return 76 | 77 | 78 | def ini_cbh_args(): 79 | from args_manager import args 80 | return args 81 | 82 | 83 | prepare_environment() 84 | build_launcher() 85 | ini_cbh_args() 86 | download_models() 87 | 88 | from webui import * 89 | -------------------------------------------------------------------------------- /models/checkpoints/put_checkpoints_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/checkpoints/put_checkpoints_here -------------------------------------------------------------------------------- /models/clip/put_clip_or_text_encoder_models_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/clip/put_clip_or_text_encoder_models_here -------------------------------------------------------------------------------- /models/clip_vision/put_clip_vision_models_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/clip_vision/put_clip_vision_models_here -------------------------------------------------------------------------------- /models/configs/anything_v3.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 10000 ] 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | image_size: 32 # unused 33 | in_channels: 4 34 | out_channels: 4 35 | model_channels: 320 36 | attention_resolutions: [ 4, 2, 1 ] 37 | num_res_blocks: 2 38 | channel_mult: [ 1, 2, 4, 4 ] 39 | num_heads: 8 40 | use_spatial_transformer: True 41 | transformer_depth: 1 42 | context_dim: 768 43 | use_checkpoint: True 44 | legacy: False 45 | 46 | first_stage_config: 47 | target: ldm.models.autoencoder.AutoencoderKL 48 | params: 49 | embed_dim: 4 50 | monitor: val/rec_loss 51 | ddconfig: 52 | double_z: true 53 | z_channels: 4 54 | resolution: 256 55 | in_channels: 3 56 | out_ch: 3 57 | ch: 128 58 | ch_mult: 59 | - 1 60 | - 2 61 | - 4 62 | - 4 63 | num_res_blocks: 2 64 | attn_resolutions: [] 65 | dropout: 0.0 66 | lossconfig: 67 | target: torch.nn.Identity 68 | 69 | cond_stage_config: 70 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 71 | params: 72 | layer: "hidden" 73 | layer_idx: -2 74 | -------------------------------------------------------------------------------- /models/configs/v1-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 10000 ] 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | image_size: 32 # unused 33 | in_channels: 4 34 | out_channels: 4 35 | model_channels: 320 36 | attention_resolutions: [ 4, 2, 1 ] 37 | num_res_blocks: 2 38 | channel_mult: [ 1, 2, 4, 4 ] 39 | num_heads: 8 40 | use_spatial_transformer: True 41 | transformer_depth: 1 42 | context_dim: 768 43 | use_checkpoint: True 44 | legacy: False 45 | 46 | first_stage_config: 47 | target: ldm.models.autoencoder.AutoencoderKL 48 | params: 49 | embed_dim: 4 50 | monitor: val/rec_loss 51 | ddconfig: 52 | double_z: true 53 | z_channels: 4 54 | resolution: 256 55 | in_channels: 3 56 | out_ch: 3 57 | ch: 128 58 | ch_mult: 59 | - 1 60 | - 2 61 | - 4 62 | - 4 63 | num_res_blocks: 2 64 | attn_resolutions: [] 65 | dropout: 0.0 66 | lossconfig: 67 | target: torch.nn.Identity 68 | 69 | cond_stage_config: 70 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 71 | -------------------------------------------------------------------------------- /models/configs/v1-inference_clip_skip_2.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 10000 ] 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | image_size: 32 # unused 33 | in_channels: 4 34 | out_channels: 4 35 | model_channels: 320 36 | attention_resolutions: [ 4, 2, 1 ] 37 | num_res_blocks: 2 38 | channel_mult: [ 1, 2, 4, 4 ] 39 | num_heads: 8 40 | use_spatial_transformer: True 41 | transformer_depth: 1 42 | context_dim: 768 43 | use_checkpoint: True 44 | legacy: False 45 | 46 | first_stage_config: 47 | target: ldm.models.autoencoder.AutoencoderKL 48 | params: 49 | embed_dim: 4 50 | monitor: val/rec_loss 51 | ddconfig: 52 | double_z: true 53 | z_channels: 4 54 | resolution: 256 55 | in_channels: 3 56 | out_ch: 3 57 | ch: 128 58 | ch_mult: 59 | - 1 60 | - 2 61 | - 4 62 | - 4 63 | num_res_blocks: 2 64 | attn_resolutions: [] 65 | dropout: 0.0 66 | lossconfig: 67 | target: torch.nn.Identity 68 | 69 | cond_stage_config: 70 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 71 | params: 72 | layer: "hidden" 73 | layer_idx: -2 74 | -------------------------------------------------------------------------------- /models/configs/v1-inference_clip_skip_2_fp16.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 10000 ] 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | use_fp16: True 33 | image_size: 32 # unused 34 | in_channels: 4 35 | out_channels: 4 36 | model_channels: 320 37 | attention_resolutions: [ 4, 2, 1 ] 38 | num_res_blocks: 2 39 | channel_mult: [ 1, 2, 4, 4 ] 40 | num_heads: 8 41 | use_spatial_transformer: True 42 | transformer_depth: 1 43 | context_dim: 768 44 | use_checkpoint: True 45 | legacy: False 46 | 47 | first_stage_config: 48 | target: ldm.models.autoencoder.AutoencoderKL 49 | params: 50 | embed_dim: 4 51 | monitor: val/rec_loss 52 | ddconfig: 53 | double_z: true 54 | z_channels: 4 55 | resolution: 256 56 | in_channels: 3 57 | out_ch: 3 58 | ch: 128 59 | ch_mult: 60 | - 1 61 | - 2 62 | - 4 63 | - 4 64 | num_res_blocks: 2 65 | attn_resolutions: [] 66 | dropout: 0.0 67 | lossconfig: 68 | target: torch.nn.Identity 69 | 70 | cond_stage_config: 71 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 72 | params: 73 | layer: "hidden" 74 | layer_idx: -2 75 | -------------------------------------------------------------------------------- /models/configs/v1-inference_fp16.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 10000 ] 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | use_fp16: True 33 | image_size: 32 # unused 34 | in_channels: 4 35 | out_channels: 4 36 | model_channels: 320 37 | attention_resolutions: [ 4, 2, 1 ] 38 | num_res_blocks: 2 39 | channel_mult: [ 1, 2, 4, 4 ] 40 | num_heads: 8 41 | use_spatial_transformer: True 42 | transformer_depth: 1 43 | context_dim: 768 44 | use_checkpoint: True 45 | legacy: False 46 | 47 | first_stage_config: 48 | target: ldm.models.autoencoder.AutoencoderKL 49 | params: 50 | embed_dim: 4 51 | monitor: val/rec_loss 52 | ddconfig: 53 | double_z: true 54 | z_channels: 4 55 | resolution: 256 56 | in_channels: 3 57 | out_ch: 3 58 | ch: 128 59 | ch_mult: 60 | - 1 61 | - 2 62 | - 4 63 | - 4 64 | num_res_blocks: 2 65 | attn_resolutions: [] 66 | dropout: 0.0 67 | lossconfig: 68 | target: torch.nn.Identity 69 | 70 | cond_stage_config: 71 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 72 | -------------------------------------------------------------------------------- /models/configs/v1-inpainting-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 7.5e-05 3 | target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false # Note: different from the one we trained before 15 | conditioning_key: hybrid # important 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | finetune_keys: null 19 | 20 | scheduler_config: # 10000 warmup steps 21 | target: ldm.lr_scheduler.LambdaLinearScheduler 22 | params: 23 | warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch 24 | cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases 25 | f_start: [ 1.e-6 ] 26 | f_max: [ 1. ] 27 | f_min: [ 1. ] 28 | 29 | unet_config: 30 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 31 | params: 32 | image_size: 32 # unused 33 | in_channels: 9 # 4 data + 4 downscaled image + 1 mask 34 | out_channels: 4 35 | model_channels: 320 36 | attention_resolutions: [ 4, 2, 1 ] 37 | num_res_blocks: 2 38 | channel_mult: [ 1, 2, 4, 4 ] 39 | num_heads: 8 40 | use_spatial_transformer: True 41 | transformer_depth: 1 42 | context_dim: 768 43 | use_checkpoint: True 44 | legacy: False 45 | 46 | first_stage_config: 47 | target: ldm.models.autoencoder.AutoencoderKL 48 | params: 49 | embed_dim: 4 50 | monitor: val/rec_loss 51 | ddconfig: 52 | double_z: true 53 | z_channels: 4 54 | resolution: 256 55 | in_channels: 3 56 | out_ch: 3 57 | ch: 128 58 | ch_mult: 59 | - 1 60 | - 2 61 | - 4 62 | - 4 63 | num_res_blocks: 2 64 | attn_resolutions: [] 65 | dropout: 0.0 66 | lossconfig: 67 | target: torch.nn.Identity 68 | 69 | cond_stage_config: 70 | target: ldm.modules.encoders.modules.FrozenCLIPEmbedder 71 | 72 | -------------------------------------------------------------------------------- /models/configs/v2-inference-v.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | parameterization: "v" 6 | linear_start: 0.00085 7 | linear_end: 0.0120 8 | num_timesteps_cond: 1 9 | log_every_t: 200 10 | timesteps: 1000 11 | first_stage_key: "jpg" 12 | cond_stage_key: "txt" 13 | image_size: 64 14 | channels: 4 15 | cond_stage_trainable: false 16 | conditioning_key: crossattn 17 | monitor: val/loss_simple_ema 18 | scale_factor: 0.18215 19 | use_ema: False # we set this to false because this is an inference only config 20 | 21 | unet_config: 22 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 23 | params: 24 | use_checkpoint: True 25 | use_fp16: True 26 | image_size: 32 # unused 27 | in_channels: 4 28 | out_channels: 4 29 | model_channels: 320 30 | attention_resolutions: [ 4, 2, 1 ] 31 | num_res_blocks: 2 32 | channel_mult: [ 1, 2, 4, 4 ] 33 | num_head_channels: 64 # need to fix for flash-attn 34 | use_spatial_transformer: True 35 | use_linear_in_transformer: True 36 | transformer_depth: 1 37 | context_dim: 1024 38 | legacy: False 39 | 40 | first_stage_config: 41 | target: ldm.models.autoencoder.AutoencoderKL 42 | params: 43 | embed_dim: 4 44 | monitor: val/rec_loss 45 | ddconfig: 46 | #attn_type: "vanilla-xformers" 47 | double_z: true 48 | z_channels: 4 49 | resolution: 256 50 | in_channels: 3 51 | out_ch: 3 52 | ch: 128 53 | ch_mult: 54 | - 1 55 | - 2 56 | - 4 57 | - 4 58 | num_res_blocks: 2 59 | attn_resolutions: [] 60 | dropout: 0.0 61 | lossconfig: 62 | target: torch.nn.Identity 63 | 64 | cond_stage_config: 65 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 66 | params: 67 | freeze: True 68 | layer: "penultimate" 69 | -------------------------------------------------------------------------------- /models/configs/v2-inference-v_fp32.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | parameterization: "v" 6 | linear_start: 0.00085 7 | linear_end: 0.0120 8 | num_timesteps_cond: 1 9 | log_every_t: 200 10 | timesteps: 1000 11 | first_stage_key: "jpg" 12 | cond_stage_key: "txt" 13 | image_size: 64 14 | channels: 4 15 | cond_stage_trainable: false 16 | conditioning_key: crossattn 17 | monitor: val/loss_simple_ema 18 | scale_factor: 0.18215 19 | use_ema: False # we set this to false because this is an inference only config 20 | 21 | unet_config: 22 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 23 | params: 24 | use_checkpoint: True 25 | use_fp16: False 26 | image_size: 32 # unused 27 | in_channels: 4 28 | out_channels: 4 29 | model_channels: 320 30 | attention_resolutions: [ 4, 2, 1 ] 31 | num_res_blocks: 2 32 | channel_mult: [ 1, 2, 4, 4 ] 33 | num_head_channels: 64 # need to fix for flash-attn 34 | use_spatial_transformer: True 35 | use_linear_in_transformer: True 36 | transformer_depth: 1 37 | context_dim: 1024 38 | legacy: False 39 | 40 | first_stage_config: 41 | target: ldm.models.autoencoder.AutoencoderKL 42 | params: 43 | embed_dim: 4 44 | monitor: val/rec_loss 45 | ddconfig: 46 | #attn_type: "vanilla-xformers" 47 | double_z: true 48 | z_channels: 4 49 | resolution: 256 50 | in_channels: 3 51 | out_ch: 3 52 | ch: 128 53 | ch_mult: 54 | - 1 55 | - 2 56 | - 4 57 | - 4 58 | num_res_blocks: 2 59 | attn_resolutions: [] 60 | dropout: 0.0 61 | lossconfig: 62 | target: torch.nn.Identity 63 | 64 | cond_stage_config: 65 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 66 | params: 67 | freeze: True 68 | layer: "penultimate" 69 | -------------------------------------------------------------------------------- /models/configs/v2-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False # we set this to false because this is an inference only config 19 | 20 | unet_config: 21 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 22 | params: 23 | use_checkpoint: True 24 | use_fp16: True 25 | image_size: 32 # unused 26 | in_channels: 4 27 | out_channels: 4 28 | model_channels: 320 29 | attention_resolutions: [ 4, 2, 1 ] 30 | num_res_blocks: 2 31 | channel_mult: [ 1, 2, 4, 4 ] 32 | num_head_channels: 64 # need to fix for flash-attn 33 | use_spatial_transformer: True 34 | use_linear_in_transformer: True 35 | transformer_depth: 1 36 | context_dim: 1024 37 | legacy: False 38 | 39 | first_stage_config: 40 | target: ldm.models.autoencoder.AutoencoderKL 41 | params: 42 | embed_dim: 4 43 | monitor: val/rec_loss 44 | ddconfig: 45 | #attn_type: "vanilla-xformers" 46 | double_z: true 47 | z_channels: 4 48 | resolution: 256 49 | in_channels: 3 50 | out_ch: 3 51 | ch: 128 52 | ch_mult: 53 | - 1 54 | - 2 55 | - 4 56 | - 4 57 | num_res_blocks: 2 58 | attn_resolutions: [] 59 | dropout: 0.0 60 | lossconfig: 61 | target: torch.nn.Identity 62 | 63 | cond_stage_config: 64 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 65 | params: 66 | freeze: True 67 | layer: "penultimate" 68 | -------------------------------------------------------------------------------- /models/configs/v2-inference_fp32.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False # we set this to false because this is an inference only config 19 | 20 | unet_config: 21 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 22 | params: 23 | use_checkpoint: True 24 | use_fp16: False 25 | image_size: 32 # unused 26 | in_channels: 4 27 | out_channels: 4 28 | model_channels: 320 29 | attention_resolutions: [ 4, 2, 1 ] 30 | num_res_blocks: 2 31 | channel_mult: [ 1, 2, 4, 4 ] 32 | num_head_channels: 64 # need to fix for flash-attn 33 | use_spatial_transformer: True 34 | use_linear_in_transformer: True 35 | transformer_depth: 1 36 | context_dim: 1024 37 | legacy: False 38 | 39 | first_stage_config: 40 | target: ldm.models.autoencoder.AutoencoderKL 41 | params: 42 | embed_dim: 4 43 | monitor: val/rec_loss 44 | ddconfig: 45 | #attn_type: "vanilla-xformers" 46 | double_z: true 47 | z_channels: 4 48 | resolution: 256 49 | in_channels: 3 50 | out_ch: 3 51 | ch: 128 52 | ch_mult: 53 | - 1 54 | - 2 55 | - 4 56 | - 4 57 | num_res_blocks: 2 58 | attn_resolutions: [] 59 | dropout: 0.0 60 | lossconfig: 61 | target: torch.nn.Identity 62 | 63 | cond_stage_config: 64 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 65 | params: 66 | freeze: True 67 | layer: "penultimate" 68 | -------------------------------------------------------------------------------- /models/controlnet/put_controlnets_and_t2i_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/controlnet/put_controlnets_and_t2i_here -------------------------------------------------------------------------------- /models/diffusers/put_diffusers_models_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/diffusers/put_diffusers_models_here -------------------------------------------------------------------------------- /models/embeddings/put_embeddings_or_textual_inversion_concepts_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/embeddings/put_embeddings_or_textual_inversion_concepts_here -------------------------------------------------------------------------------- /models/gligen/put_gligen_models_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/gligen/put_gligen_models_here -------------------------------------------------------------------------------- /models/hypernetworks/put_hypernetworks_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/hypernetworks/put_hypernetworks_here -------------------------------------------------------------------------------- /models/loras/put_loras_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/loras/put_loras_here -------------------------------------------------------------------------------- /models/prompt_expansion/fooocus_expansion/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "gpt2", 3 | "activation_function": "gelu_new", 4 | "architectures": [ 5 | "GPT2LMHeadModel" 6 | ], 7 | "attn_pdrop": 0.1, 8 | "bos_token_id": 50256, 9 | "embd_pdrop": 0.1, 10 | "eos_token_id": 50256, 11 | "pad_token_id": 50256, 12 | "initializer_range": 0.02, 13 | "layer_norm_epsilon": 1e-05, 14 | "model_type": "gpt2", 15 | "n_ctx": 1024, 16 | "n_embd": 768, 17 | "n_head": 12, 18 | "n_inner": null, 19 | "n_layer": 12, 20 | "n_positions": 1024, 21 | "reorder_and_upcast_attn": false, 22 | "resid_pdrop": 0.1, 23 | "scale_attn_by_inverse_layer_idx": false, 24 | "scale_attn_weights": true, 25 | "summary_activation": null, 26 | "summary_first_dropout": 0.1, 27 | "summary_proj_to_labels": true, 28 | "summary_type": "cls_index", 29 | "summary_use_proj": true, 30 | "task_specific_params": { 31 | "text-generation": { 32 | "do_sample": true, 33 | "max_length": 50 34 | } 35 | }, 36 | "torch_dtype": "float32", 37 | "transformers_version": "4.23.0.dev0", 38 | "use_cache": true, 39 | "vocab_size": 50257 40 | } 41 | -------------------------------------------------------------------------------- /models/prompt_expansion/fooocus_expansion/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": "<|endoftext|>", 3 | "eos_token": "<|endoftext|>", 4 | "unk_token": "<|endoftext|>" 5 | } 6 | -------------------------------------------------------------------------------- /models/prompt_expansion/fooocus_expansion/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_prefix_space": false, 3 | "bos_token": "<|endoftext|>", 4 | "eos_token": "<|endoftext|>", 5 | "model_max_length": 1024, 6 | "name_or_path": "gpt2", 7 | "special_tokens_map_file": null, 8 | "tokenizer_class": "GPT2Tokenizer", 9 | "unk_token": "<|endoftext|>" 10 | } 11 | -------------------------------------------------------------------------------- /models/prompt_expansion/put_prompt_expansion_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/prompt_expansion/put_prompt_expansion_here -------------------------------------------------------------------------------- /models/style_models/put_t2i_style_model_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/style_models/put_t2i_style_model_here -------------------------------------------------------------------------------- /models/unet/put_unet_files_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/unet/put_unet_files_here -------------------------------------------------------------------------------- /models/upscale_models/put_esrgan_and_other_upscale_models_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/upscale_models/put_esrgan_and_other_upscale_models_here -------------------------------------------------------------------------------- /models/vae/put_vae_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/vae/put_vae_here -------------------------------------------------------------------------------- /models/vae_approx/put_taesd_encoder_pth_and_taesd_decoder_pth_here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/models/vae_approx/put_taesd_encoder_pth_and_taesd_decoder_pth_here -------------------------------------------------------------------------------- /modules/advanced_parameters.py: -------------------------------------------------------------------------------- 1 | adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, \ 2 | scheduler_name, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, \ 3 | overwrite_vary_strength, overwrite_upscale_strength, \ 4 | mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \ 5 | debugging_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, inpaint_engine, \ 6 | refiner_swap_method, \ 7 | freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 = [None] * 25 8 | 9 | 10 | def set_all_advanced_parameters(*args): 11 | global adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, \ 12 | scheduler_name, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, \ 13 | overwrite_vary_strength, overwrite_upscale_strength, \ 14 | mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \ 15 | debugging_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, inpaint_engine, \ 16 | refiner_swap_method, \ 17 | freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 18 | 19 | adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, \ 20 | scheduler_name, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, \ 21 | overwrite_vary_strength, overwrite_upscale_strength, \ 22 | mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \ 23 | debugging_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, inpaint_engine, \ 24 | refiner_swap_method, \ 25 | freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 = args 26 | 27 | return 28 | -------------------------------------------------------------------------------- /modules/expansion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import fcbh.model_management as model_management 4 | 5 | from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed 6 | from modules.path import fooocus_expansion_path 7 | from fcbh.model_patcher import ModelPatcher 8 | 9 | 10 | fooocus_magic_split = [ 11 | ', extremely', 12 | ', intricate,', 13 | ] 14 | dangrous_patterns = '[]【】()()|::' 15 | 16 | 17 | def safe_str(x): 18 | x = str(x) 19 | for _ in range(16): 20 | x = x.replace(' ', ' ') 21 | return x.strip(",. \r\n") 22 | 23 | 24 | def remove_pattern(x, pattern): 25 | for p in pattern: 26 | x = x.replace(p, '') 27 | return x 28 | 29 | 30 | class FooocusExpansion: 31 | def __init__(self): 32 | self.tokenizer = AutoTokenizer.from_pretrained(fooocus_expansion_path) 33 | self.model = AutoModelForCausalLM.from_pretrained(fooocus_expansion_path) 34 | self.model.eval() 35 | 36 | load_device = model_management.text_encoder_device() 37 | offload_device = model_management.text_encoder_offload_device() 38 | 39 | # MPS hack 40 | if model_management.is_device_mps(load_device): 41 | load_device = torch.device('cpu') 42 | offload_device = torch.device('cpu') 43 | 44 | use_fp16 = model_management.should_use_fp16(device=load_device) 45 | 46 | if use_fp16: 47 | self.model.half() 48 | 49 | self.patcher = ModelPatcher(self.model, load_device=load_device, offload_device=offload_device) 50 | print(f'Fooocus Expansion engine loaded for {load_device}, use_fp16 = {use_fp16}.') 51 | 52 | def __call__(self, prompt, seed): 53 | if self.patcher.current_device != self.patcher.load_device: 54 | print('Fooocus Expansion loaded by itself.') 55 | model_management.load_model_gpu(self.patcher) 56 | 57 | seed = int(seed) 58 | set_seed(seed) 59 | origin = safe_str(prompt) 60 | prompt = origin + fooocus_magic_split[seed % len(fooocus_magic_split)] 61 | 62 | tokenized_kwargs = self.tokenizer(prompt, return_tensors="pt") 63 | tokenized_kwargs.data['input_ids'] = tokenized_kwargs.data['input_ids'].to(self.patcher.load_device) 64 | tokenized_kwargs.data['attention_mask'] = tokenized_kwargs.data['attention_mask'].to(self.patcher.load_device) 65 | 66 | # https://huggingface.co/blog/introducing-csearch 67 | # https://huggingface.co/docs/transformers/generation_strategies 68 | features = self.model.generate(**tokenized_kwargs, 69 | num_beams=1, 70 | max_new_tokens=256, 71 | do_sample=True) 72 | 73 | response = self.tokenizer.batch_decode(features, skip_special_tokens=True) 74 | result = response[0][len(origin):] 75 | result = safe_str(result) 76 | result = remove_pattern(result, dangrous_patterns) 77 | return result 78 | -------------------------------------------------------------------------------- /modules/flags.py: -------------------------------------------------------------------------------- 1 | disabled = 'Disabled' 2 | enabled = 'Enabled' 3 | subtle_variation = 'Vary (Subtle)' 4 | strong_variation = 'Vary (Strong)' 5 | upscale_15 = 'Upscale (1.5x)' 6 | upscale_2 = 'Upscale (2x)' 7 | upscale_fast = 'Upscale (Fast 2x)' 8 | 9 | uov_list = [ 10 | disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast 11 | ] 12 | 13 | KSAMPLER_NAMES = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral", 14 | "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", 15 | "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm"] 16 | 17 | SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"] 18 | SAMPLER_NAMES = KSAMPLER_NAMES + ["ddim", "uni_pc", "uni_pc_bh2"] 19 | 20 | sampler_list = SAMPLER_NAMES 21 | scheduler_list = SCHEDULER_NAMES 22 | cn_ip = "Image Prompt" 23 | cn_depth = "Depth" 24 | cn_canny = "PyraCanny" 25 | cn_cpds = "CPDS" 26 | cn_pose = 'Pose' 27 | cn_reColor = 'ReColor' 28 | cn_Sketch = 'sketch' 29 | # cn_revision = 'revision' 30 | # cn_tileBlur = 'TileBlur' 31 | # cn_tileBlurAnime = 'TileBlurAnime' 32 | 33 | ip_list = [cn_ip, cn_canny, cn_cpds, cn_depth, cn_pose, cn_reColor, cn_Sketch, 34 | # cn_revision, cn_tileBlur,cn_tileBlurAnime 35 | ] 36 | default_ip = cn_ip 37 | 38 | default_parameters = { 39 | cn_ip: (0.5, 0.6), 40 | cn_canny: (0.5, 1.0), 41 | cn_cpds: (0.5, 1.0), 42 | cn_depth: (0.5, 1.0), 43 | cn_pose: (0.5, 1.0), 44 | cn_reColor: (0.5, 1.0), 45 | cn_Sketch: (0.5, 1.0), 46 | # cn_revision: (0.5, 1.0), 47 | # cn_tileBlur: (0.5, 1.0), 48 | # cn_tileBlurAnime: (0.5, 1.0), 49 | } # stop, weight 50 | -------------------------------------------------------------------------------- /modules/html.py: -------------------------------------------------------------------------------- 1 | css = ''' 2 | .loader-container { 3 | display: flex; /* Use flex to align items horizontally */ 4 | align-items: center; /* Center items vertically within the container */ 5 | white-space: nowrap; /* Prevent line breaks within the container */ 6 | } 7 | 8 | .loader { 9 | border: 8px solid #f3f3f3; /* Light grey */ 10 | border-top: 8px solid #3498db; /* Blue */ 11 | border-radius: 50%; 12 | width: 30px; 13 | height: 30px; 14 | animation: spin 2s linear infinite; 15 | } 16 | 17 | @keyframes spin { 18 | 0% { transform: rotate(0deg); } 19 | 100% { transform: rotate(360deg); } 20 | } 21 | 22 | /* Style the progress bar */ 23 | progress { 24 | appearance: none; /* Remove default styling */ 25 | height: 20px; /* Set the height of the progress bar */ 26 | border-radius: 5px; /* Round the corners of the progress bar */ 27 | background-color: #f3f3f3; /* Light grey background */ 28 | width: 100%; 29 | } 30 | 31 | /* Style the progress bar container */ 32 | .progress-container { 33 | margin-left: 20px; 34 | margin-right: 20px; 35 | flex-grow: 1; /* Allow the progress container to take up remaining space */ 36 | } 37 | 38 | /* Set the color of the progress bar fill */ 39 | progress::-webkit-progress-value { 40 | background-color: #3498db; /* Blue color for the fill */ 41 | } 42 | 43 | progress::-moz-progress-bar { 44 | background-color: #3498db; /* Blue color for the fill in Firefox */ 45 | } 46 | 47 | /* Style the text on the progress bar */ 48 | progress::after { 49 | content: attr(value '%'); /* Display the progress value followed by '%' */ 50 | position: absolute; 51 | top: 50%; 52 | left: 50%; 53 | transform: translate(-50%, -50%); 54 | color: white; /* Set text color */ 55 | font-size: 14px; /* Set font size */ 56 | } 57 | 58 | /* Style other texts */ 59 | .loader-container > span { 60 | margin-left: 5px; /* Add spacing between the progress bar and the text */ 61 | } 62 | 63 | .progress-bar > .generating { 64 | display: none !important; 65 | } 66 | 67 | .progress-bar{ 68 | height: 30px !important; 69 | } 70 | 71 | .type_row{ 72 | height: 80px !important; 73 | } 74 | 75 | .type_row_half{ 76 | height: 32px !important; 77 | } 78 | 79 | .scroll-hide{ 80 | resize: none !important; 81 | } 82 | 83 | .refresh_button{ 84 | border: none !important; 85 | background: none !important; 86 | font-size: none !important; 87 | box-shadow: none !important; 88 | } 89 | 90 | .advanced_check_row{ 91 | width: 250px !important; 92 | } 93 | 94 | .min_check{ 95 | min-width: min(1px, 100%) !important; 96 | } 97 | 98 | .resizable_area { 99 | resize: vertical; 100 | overflow: auto !important; 101 | } 102 | 103 | ''' 104 | progress_html = ''' 105 |
106 |
107 |
108 | 109 |
110 | *text* 111 |
112 | ''' 113 | 114 | 115 | def make_progress_html(number, text): 116 | return progress_html.replace('*number*', str(number)).replace('*text*', text) 117 | -------------------------------------------------------------------------------- /modules/launch_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import importlib 3 | import importlib.util 4 | import shutil 5 | import subprocess 6 | import sys 7 | import re 8 | import logging 9 | import pygit2 10 | 11 | 12 | pygit2.option(pygit2.GIT_OPT_SET_OWNER_VALIDATION, 0) 13 | 14 | logging.getLogger("torch.distributed.nn").setLevel(logging.ERROR) # sshh... 15 | logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage()) 16 | 17 | re_requirement = re.compile(r"\s*([-_a-zA-Z0-9]+)\s*(?:==\s*([-+_.a-zA-Z0-9]+))?\s*") 18 | 19 | python = sys.executable 20 | default_command_live = (os.environ.get('LAUNCH_LIVE_OUTPUT') == "1") 21 | index_url = os.environ.get('INDEX_URL', "") 22 | 23 | modules_path = os.path.dirname(os.path.realpath(__file__)) 24 | script_path = os.path.dirname(modules_path) 25 | 26 | 27 | def is_installed(package): 28 | try: 29 | spec = importlib.util.find_spec(package) 30 | except ModuleNotFoundError: 31 | return False 32 | 33 | return spec is not None 34 | 35 | 36 | def run(command, desc=None, errdesc=None, custom_env=None, live: bool = default_command_live) -> str: 37 | if desc is not None: 38 | print(desc) 39 | 40 | run_kwargs = { 41 | "args": command, 42 | "shell": True, 43 | "env": os.environ if custom_env is None else custom_env, 44 | "encoding": 'utf8', 45 | "errors": 'ignore', 46 | } 47 | 48 | if not live: 49 | run_kwargs["stdout"] = run_kwargs["stderr"] = subprocess.PIPE 50 | 51 | result = subprocess.run(**run_kwargs) 52 | 53 | if result.returncode != 0: 54 | error_bits = [ 55 | f"{errdesc or 'Error running command'}.", 56 | f"Command: {command}", 57 | f"Error code: {result.returncode}", 58 | ] 59 | if result.stdout: 60 | error_bits.append(f"stdout: {result.stdout}") 61 | if result.stderr: 62 | error_bits.append(f"stderr: {result.stderr}") 63 | raise RuntimeError("\n".join(error_bits)) 64 | 65 | return (result.stdout or "") 66 | 67 | 68 | def run_pip(command, desc=None, live=default_command_live): 69 | try: 70 | index_url_line = f' --index-url {index_url}' if index_url != '' else '' 71 | return run(f'"{python}" -m pip {command} --prefer-binary{index_url_line}', desc=f"Installing {desc}", 72 | errdesc=f"Couldn't install {desc}", live=live) 73 | except Exception as e: 74 | print(e) 75 | print(f'CMD Failed {desc}: {command}') 76 | return None 77 | 78 | 79 | def requirements_met(requirements_file): 80 | """ 81 | Does a simple parse of a requirements.txt file to determine if all rerqirements in it 82 | are already installed. Returns True if so, False if not installed or parsing fails. 83 | """ 84 | 85 | import importlib.metadata 86 | import packaging.version 87 | 88 | with open(requirements_file, "r", encoding="utf8") as file: 89 | for line in file: 90 | if line.strip() == "": 91 | continue 92 | 93 | m = re.match(re_requirement, line) 94 | if m is None: 95 | return False 96 | 97 | package = m.group(1).strip() 98 | version_required = (m.group(2) or "").strip() 99 | 100 | if version_required == "": 101 | continue 102 | 103 | try: 104 | version_installed = importlib.metadata.version(package) 105 | except Exception: 106 | return False 107 | 108 | if packaging.version.parse(version_required) != packaging.version.parse(version_installed): 109 | return False 110 | 111 | return True 112 | -------------------------------------------------------------------------------- /modules/model_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urllib.parse import urlparse 3 | from typing import Optional 4 | 5 | 6 | def load_file_from_url( 7 | url: str, 8 | *, 9 | model_dir: str, 10 | progress: bool = True, 11 | file_name: Optional[str] = None, 12 | ) -> str: 13 | """Download a file from `url` into `model_dir`, using the file present if possible. 14 | 15 | Returns the path to the downloaded file. 16 | """ 17 | os.makedirs(model_dir, exist_ok=True) 18 | if not file_name: 19 | parts = urlparse(url) 20 | file_name = os.path.basename(parts.path) 21 | cached_file = os.path.abspath(os.path.join(model_dir, file_name)) 22 | if not os.path.exists(cached_file): 23 | print(f'Downloading: "{url}" to {cached_file}\n') 24 | from torch.hub import download_url_to_file 25 | download_url_to_file(url, cached_file, progress=progress) 26 | return cached_file 27 | -------------------------------------------------------------------------------- /modules/private_logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import modules.path 3 | 4 | from PIL import Image 5 | from modules.util import generate_temp_filename 6 | 7 | 8 | def get_current_html_path(): 9 | date_string, local_temp_filename, only_name = generate_temp_filename(folder=modules.path.temp_outputs_path, 10 | extension='png') 11 | html_name = os.path.join(os.path.dirname(local_temp_filename), 'log.html') 12 | return html_name 13 | 14 | 15 | def log(img, dic, single_line_number=3): 16 | date_string, local_temp_filename, only_name = generate_temp_filename(folder=modules.path.temp_outputs_path, extension='png') 17 | os.makedirs(os.path.dirname(local_temp_filename), exist_ok=True) 18 | Image.fromarray(img).save(local_temp_filename) 19 | html_name = os.path.join(os.path.dirname(local_temp_filename), 'log.html') 20 | 21 | if not os.path.exists(html_name): 22 | with open(html_name, 'a+', encoding='utf-8') as f: 23 | f.write(f"

Fooocus Log {date_string} (private)

\n") 24 | f.write(f"

All images do not contain any hidden data.

") 25 | 26 | with open(html_name, 'a+', encoding='utf-8') as f: 27 | div_name = only_name.replace('.', '_') 28 | f.write(f'

\n') 29 | f.write(f"

{only_name}

\n") 30 | i = 0 31 | for k, v in dic: 32 | if i < single_line_number: 33 | f.write(f"

{k}: {v}

\n") 34 | else: 35 | if (i - single_line_number) % 2 == 0: 36 | f.write(f"

{k}: {v}, ") 37 | else: 38 | f.write(f"{k}: {v}

\n") 39 | i += 1 40 | f.write(f"

\n") 41 | 42 | print(f'Image generated with private log at: {html_name}') 43 | 44 | return 45 | -------------------------------------------------------------------------------- /modules/sdxl_styles.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import random 4 | import json 5 | 6 | from modules.util import get_files_from_folder 7 | 8 | 9 | # cannot use modules.path - validators causing circular imports 10 | styles_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../sdxl_styles/')) 11 | wildcards_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../wildcards/')) 12 | 13 | 14 | def normalize_key(k): 15 | k = k.replace('-', ' ') 16 | words = k.split(' ') 17 | words = [w[:1].upper() + w[1:].lower() for w in words] 18 | k = ' '.join(words) 19 | k = k.replace('3d', '3D') 20 | k = k.replace('Sai', 'SAI') 21 | k = k.replace('Mre', 'MRE') 22 | k = k.replace('(s', '(S') 23 | return k 24 | 25 | 26 | styles = {} 27 | 28 | styles_files = get_files_from_folder(styles_path, ['.json']) 29 | 30 | for x in ['sdxl_styles_fooocus.json', 31 | 'sdxl_styles_sai.json', 32 | 'sdxl_styles_mre.json', 33 | 'sdxl_styles_twri.json', 34 | 'sdxl_styles_diva.json']: 35 | if x in styles_files: 36 | styles_files.remove(x) 37 | styles_files.append(x) 38 | 39 | for styles_file in styles_files: 40 | try: 41 | with open(os.path.join(styles_path, styles_file), encoding='utf-8') as f: 42 | for entry in json.load(f): 43 | name, prompt, negative_prompt = normalize_key(entry['name']), entry['prompt'], entry['negative_prompt'] 44 | styles[name] = (prompt, negative_prompt) 45 | except Exception as e: 46 | print(str(e)) 47 | print(f'Failed to load style file {styles_file}') 48 | 49 | style_keys = list(styles.keys()) 50 | fooocus_expansion = "Fooocus V2" 51 | legal_style_names = [fooocus_expansion] + style_keys 52 | 53 | 54 | SD_XL_BASE_RATIOS = { 55 | "0.5": (704, 1408), 56 | "0.52": (704, 1344), 57 | "0.57": (768, 1344), 58 | "0.6": (768, 1280), 59 | "0.68": (832, 1216), 60 | "0.72": (832, 1152), 61 | "0.78": (896, 1152), 62 | "0.82": (896, 1088), 63 | "0.88": (960, 1088), 64 | "0.94": (960, 1024), 65 | "1.0": (1024, 1024), 66 | "1.07": (1024, 960), 67 | "1.13": (1088, 960), 68 | "1.21": (1088, 896), 69 | "1.29": (1152, 896), 70 | "1.38": (1152, 832), 71 | "1.46": (1216, 832), 72 | "1.67": (1280, 768), 73 | "1.75": (1344, 768), 74 | "1.91": (1344, 704), 75 | "2.0": (1408, 704), 76 | "2.09": (1472, 704), 77 | "2.4": (1536, 640), 78 | "2.5": (1600, 640), 79 | "2.89": (1664, 576), 80 | "3.0": (1728, 576), 81 | } 82 | 83 | aspect_ratios = {} 84 | 85 | # import math 86 | 87 | for k, (w, h) in SD_XL_BASE_RATIOS.items(): 88 | txt = f'{w}×{h}' 89 | 90 | # gcd = math.gcd(w, h) 91 | # txt += f' {w//gcd}:{h//gcd}' 92 | 93 | aspect_ratios[txt] = (w, h) 94 | 95 | 96 | def apply_style(style, positive): 97 | p, n = styles[style] 98 | return p.replace('{prompt}', positive), n 99 | 100 | 101 | def apply_wildcards(wildcard_text, seed=None, directory=wildcards_path): 102 | placeholders = re.findall(r'__(\w+)__', wildcard_text) 103 | if len(placeholders) == 0: 104 | return wildcard_text 105 | 106 | rng = random.Random(seed) 107 | for placeholder in placeholders: 108 | try: 109 | words = open(os.path.join(directory, f'{placeholder}.txt'), encoding='utf-8').read().splitlines() 110 | words = [x for x in words if x != ''] 111 | wildcard_text = wildcard_text.replace(f'__{placeholder}__', rng.choice(words), 1) 112 | except IOError: 113 | print(f'Error: could not open wildcard file {placeholder}.txt, using as normal word.') 114 | wildcard_text = wildcard_text.replace(f'__{placeholder}__', placeholder) 115 | 116 | return wildcard_text 117 | -------------------------------------------------------------------------------- /modules/ui_gradio_extensions.py: -------------------------------------------------------------------------------- 1 | # based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/modules/ui_gradio_extensions.py 2 | 3 | import os 4 | import gradio as gr 5 | 6 | GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse 7 | 8 | modules_path = os.path.dirname(os.path.realpath(__file__)) 9 | script_path = os.path.dirname(modules_path) 10 | 11 | 12 | def webpath(fn): 13 | if fn.startswith(script_path): 14 | web_path = os.path.relpath(fn, script_path).replace('\\', '/') 15 | else: 16 | web_path = os.path.abspath(fn) 17 | 18 | return f'file={web_path}?{os.path.getmtime(fn)}' 19 | 20 | 21 | def javascript_html(): 22 | script_js_path = webpath('javascript/script.js') 23 | context_menus_js_path = webpath('javascript/contextMenus.js') 24 | head = f'\n' 25 | head += f'\n' 26 | return head 27 | 28 | 29 | def css_html(): 30 | style_css_path = webpath('css/style.css') 31 | head = f'' 32 | return head 33 | 34 | 35 | def reload_javascript(): 36 | js = javascript_html() 37 | css = css_html() 38 | 39 | def template_response(*args, **kwargs): 40 | res = GradioTemplateResponseOriginal(*args, **kwargs) 41 | res.body = res.body.replace(b'', f'{js}'.encode("utf8")) 42 | res.body = res.body.replace(b'', f'{css}'.encode("utf8")) 43 | res.init_headers() 44 | return res 45 | 46 | gr.routes.templates.TemplateResponse = template_response 47 | -------------------------------------------------------------------------------- /modules/upscaler.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | from fcbh_extras.chainner_models.architecture.RRDB import RRDBNet as ESRGAN 5 | from fcbh_extras.nodes_upscale_model import ImageUpscaleWithModel 6 | from collections import OrderedDict 7 | from modules.path import upscale_models_path 8 | 9 | model_filename = os.path.join(upscale_models_path, 'fooocus_upscaler_s409985e5.bin') 10 | opImageUpscaleWithModel = ImageUpscaleWithModel() 11 | model = None 12 | 13 | 14 | def perform_upscale(img): 15 | global model 16 | if model is None: 17 | sd = torch.load(model_filename) 18 | sdo = OrderedDict() 19 | for k, v in sd.items(): 20 | sdo[k.replace('residual_block_', 'RDB')] = v 21 | del sd 22 | model = ESRGAN(sdo) 23 | model.cpu() 24 | model.eval() 25 | return opImageUpscaleWithModel.upscale(model, img)[0] 26 | -------------------------------------------------------------------------------- /notification-example.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenneishi/Fooocus-ControlNet-SDXL/9a7fde2ddd8259b3b799d52cf826f603d44b2259/notification-example.ogg -------------------------------------------------------------------------------- /presets/anime.json: -------------------------------------------------------------------------------- 1 | { 2 | "default_model": "bluePencilXL_v050.safetensors", 3 | "default_refiner": "DreamShaper_8_pruned.safetensors", 4 | "default_lora": "sd_xl_offset_example-lora_1.0.safetensors", 5 | "default_lora_weight": 0.5, 6 | "default_cfg_scale": 7.0, 7 | "default_sampler": "dpmpp_2m_sde_gpu", 8 | "default_scheduler": "karras", 9 | "default_styles": [ 10 | "Fooocus V2", 11 | "Fooocus Masterpiece", 12 | "SAI Anime", 13 | "SAI Digital Art", 14 | "SAI Enhance", 15 | "SAI Fantasy Art" 16 | ], 17 | "default_negative_prompt": "(embedding:unaestheticXLv31:0.8), low quality, watermark", 18 | "default_positive_prompt": "1girl, ", 19 | "checkpoint_downloads": { 20 | "bluePencilXL_v050.safetensors": "https://huggingface.co/lllyasviel/fav_models/resolve/main/fav/bluePencilXL_v050.safetensors", 21 | "DreamShaper_8_pruned.safetensors": "https://huggingface.co/lllyasviel/fav_models/resolve/main/fav/DreamShaper_8_pruned.safetensors" 22 | }, 23 | "embeddings_downloads": { 24 | "unaestheticXLv31.safetensors": "https://huggingface.co/lllyasviel/fav_models/resolve/main/fav/unaestheticXLv31.safetensors" 25 | }, 26 | "default_aspect_ratio": "896*1152", 27 | "lora_downloads": { 28 | "sd_xl_offset_example-lora_1.0.safetensors": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_offset_example-lora_1.0.safetensors" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /presets/realistic.json: -------------------------------------------------------------------------------- 1 | { 2 | "default_model": "realisticStockPhoto_v10.safetensors", 3 | "default_refiner": "", 4 | "default_lora": "SDXL_FILM_PHOTOGRAPHY_STYLE_BetaV0.4.safetensors", 5 | "default_lora_weight": 0.25, 6 | "default_cfg_scale": 3.0, 7 | "default_sampler": "dpmpp_2m_sde_gpu", 8 | "default_scheduler": "karras", 9 | "default_styles": [ 10 | "Fooocus V2", 11 | "Fooocus Photograph", 12 | "Fooocus Negative" 13 | ], 14 | "default_negative_prompt": "unrealistic, saturated, high contrast, big nose, painting, drawing, sketch, cartoon, anime, manga, render, CG, 3d, watermark, signature, label", 15 | "default_positive_prompt": "", 16 | "checkpoint_downloads": { 17 | "realisticStockPhoto_v10.safetensors": "https://huggingface.co/lllyasviel/fav_models/resolve/main/fav/realisticStockPhoto_v10.safetensors" 18 | }, 19 | "embeddings_downloads": {}, 20 | "default_aspect_ratio": "896*1152", 21 | "lora_downloads": { 22 | "SDXL_FILM_PHOTOGRAPHY_STYLE_BetaV0.4.safetensors": "https://huggingface.co/lllyasviel/fav_models/resolve/main/fav/SDXL_FILM_PHOTOGRAPHY_STYLE_BetaV0.4.safetensors" 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /presets/sdxl.json: -------------------------------------------------------------------------------- 1 | { 2 | "default_model": "sd_xl_base_1.0_0.9vae.safetensors", 3 | "default_refiner": "sd_xl_refiner_1.0_0.9vae.safetensors", 4 | "default_lora": "sd_xl_offset_example-lora_1.0.safetensors", 5 | "default_lora_weight": 0.5, 6 | "default_cfg_scale": 7.0, 7 | "default_sampler": "dpmpp_2m_sde_gpu", 8 | "default_scheduler": "karras", 9 | "default_styles": [ 10 | "Fooocus V2", 11 | "Default (Slightly Cinematic)" 12 | ], 13 | "default_negative_prompt": "low quality, bad hands, bad eyes, cropped, missing fingers, extra digit", 14 | "default_positive_prompt": "", 15 | "checkpoint_downloads": { 16 | "sd_xl_base_1.0_0.9vae.safetensors": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0_0.9vae.safetensors", 17 | "sd_xl_refiner_1.0_0.9vae.safetensors": "https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0/resolve/main/sd_xl_refiner_1.0_0.9vae.safetensors" 18 | }, 19 | "embeddings_downloads": {}, 20 | "default_aspect_ratio": "1152*896", 21 | "lora_downloads": { 22 | "sd_xl_offset_example-lora_1.0.safetensors": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_offset_example-lora_1.0.safetensors" 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /python_hijack.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | root = os.path.dirname(os.path.abspath(__file__)) 6 | sys.path.append(root) 7 | os.chdir(root) 8 | backend_path = os.path.join(root, 'backend', 'headless') 9 | if backend_path not in sys.path: 10 | sys.path.append(backend_path) 11 | os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" 12 | -------------------------------------------------------------------------------- /requirements_versions.txt: -------------------------------------------------------------------------------- 1 | torchsde==0.2.5 2 | einops==0.4.1 3 | transformers==4.30.2 4 | safetensors==0.3.1 5 | accelerate==0.21.0 6 | pyyaml==6.0 7 | Pillow==9.2.0 8 | scipy==1.9.3 9 | tqdm==4.64.1 10 | psutil==5.9.5 11 | numpy==1.23.5 12 | pytorch_lightning==1.9.4 13 | omegaconf==2.2.3 14 | gradio==3.39.0 15 | pygit2==1.12.2 16 | opencv-contrib-python==4.8.0.74 17 | timm==0.6.12 18 | scikit-image==0.22.0 -------------------------------------------------------------------------------- /sdxl_styles/sdxl_styles_fooocus.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "Default (Slightly Cinematic)", 4 | "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy", 5 | "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured" 6 | }, 7 | { 8 | "name": "Fooocus Masterpiece", 9 | "prompt": "(masterpiece), (best quality), (ultra-detailed), {prompt}, illustration, disheveled hair, detailed eyes, perfect composition, moist skin, intricate details, earrings, by wlop", 10 | "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, pubic hair,extra digit, fewer digits, cropped, worst quality, low quality" 11 | }, 12 | { 13 | "name": "Fooocus Photograph", 14 | "prompt": "photograph {prompt}, 50mm . cinematic 4k epic detailed 4k epic detailed photograph shot on kodak detailed cinematic hbo dark moody, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage", 15 | "negative_prompt": "Brad Pitt, bokeh, depth of field, blurry, cropped, regular face, saturated, contrast, deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, text, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck" 16 | }, 17 | { 18 | "name": "Fooocus Negative", 19 | "prompt": "", 20 | "negative_prompt": "deformed, bad anatomy, disfigured, poorly drawn face, mutated, extra limb, ugly, poorly drawn hands, missing limb, floating limbs, disconnected limbs, disconnected head, malformed hands, long neck, mutated hands and fingers, bad hands, missing fingers, cropped, worst quality, low quality, mutation, poorly drawn, huge calf, bad hands, fused hand, missing hand, disappearing arms, disappearing thigh, disappearing calf, disappearing legs, missing fingers, fused fingers, abnormal eye proportion, Abnormal hands, abnormal legs, abnormal feet, abnormal fingers, drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly, anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch" 21 | } 22 | ] 23 | -------------------------------------------------------------------------------- /shared.py: -------------------------------------------------------------------------------- 1 | gradio_root = None 2 | last_stop = None 3 | -------------------------------------------------------------------------------- /user_controlnet_config.py: -------------------------------------------------------------------------------- 1 | models = { 2 | "model": { 3 | 'dir': controlnet_models_path, 4 | 'name': 'thibaud_xl_openpose_256lora.safetensors', 5 | 'loader': 'ControlNet' 6 | }, 7 | 'preprocess_body': { 8 | 'url': 'https://huggingface.co/lllyasviel/Annotators/resolve/main/body_pose_model.pth', 9 | 'dir': controlnet_models_path, 10 | 'name': 'body_pose_model.pth', 11 | 'loader': 'PoseBody' 12 | }, 13 | 'preprocess_hand': { 14 | 'url': 'https://huggingface.co/lllyasviel/Annotators/resolve/main/hand_pose_model.pth', 15 | 'dir': controlnet_models_path, 16 | 'name': 'hand_pose_model.pth', 17 | 'loader': 'PoseHand' 18 | }, 19 | 'preprocess_face': { 20 | 'url': 'https://huggingface.co/lllyasviel/Annotators/resolve/main/facenet.pth', 21 | 'dir': controlnet_models_path, 22 | 'name': 'facenet.pth', 23 | 'loader': 'PoseFace' 24 | } 25 | } -------------------------------------------------------------------------------- /wildcards/color.txt: -------------------------------------------------------------------------------- 1 | aqua 2 | black 3 | blue 4 | fuchsia 5 | gray 6 | green 7 | lime 8 | maroon 9 | navy 10 | olive 11 | orange 12 | purple 13 | red 14 | silver 15 | teal 16 | white 17 | yellow 18 | --------------------------------------------------------------------------------