├── LICENSE ├── README.md ├── diffusers ├── .github │ ├── ISSUE_TEMPLATE │ │ ├── bug-report.yml │ │ ├── config.yml │ │ ├── feature_request.md │ │ ├── feedback.md │ │ └── new-model-addition.yml │ ├── actions │ │ └── setup-miniconda │ │ │ └── action.yml │ └── workflows │ │ ├── build_docker_images.yml │ │ ├── build_documentation.yml │ │ ├── build_pr_documentation.yml │ │ ├── delete_doc_comment.yml │ │ ├── nightly_tests.yml │ │ ├── pr_quality.yml │ │ ├── pr_tests.yml │ │ ├── push_tests.yml │ │ ├── push_tests_fast.yml │ │ ├── stale.yml │ │ └── typos.yml ├── .gitignore ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── _typos.toml ├── docker │ ├── diffusers-flax-cpu │ │ └── Dockerfile │ ├── diffusers-flax-tpu │ │ └── Dockerfile │ ├── diffusers-onnxruntime-cpu │ │ └── Dockerfile │ ├── diffusers-onnxruntime-cuda │ │ └── Dockerfile │ ├── diffusers-pytorch-cpu │ │ └── Dockerfile │ └── diffusers-pytorch-cuda │ │ └── Dockerfile ├── docs │ ├── README.md │ ├── TRANSLATING.md │ └── source │ │ ├── en │ │ ├── _toctree.yml │ │ ├── api │ │ │ ├── configuration.mdx │ │ │ ├── diffusion_pipeline.mdx │ │ │ ├── experimental │ │ │ │ └── rl.mdx │ │ │ ├── loaders.mdx │ │ │ ├── logging.mdx │ │ │ ├── models.mdx │ │ │ ├── outputs.mdx │ │ │ ├── pipelines │ │ │ │ ├── alt_diffusion.mdx │ │ │ │ ├── audio_diffusion.mdx │ │ │ │ ├── cycle_diffusion.mdx │ │ │ │ ├── dance_diffusion.mdx │ │ │ │ ├── ddim.mdx │ │ │ │ ├── ddpm.mdx │ │ │ │ ├── dit.mdx │ │ │ │ ├── latent_diffusion.mdx │ │ │ │ ├── latent_diffusion_uncond.mdx │ │ │ │ ├── overview.mdx │ │ │ │ ├── paint_by_example.mdx │ │ │ │ ├── pndm.mdx │ │ │ │ ├── repaint.mdx │ │ │ │ ├── score_sde_ve.mdx │ │ │ │ ├── semantic_stable_diffusion.mdx │ │ │ │ ├── stable_diffusion │ │ │ │ │ ├── attend_and_excite.mdx │ │ │ │ │ ├── controlnet.mdx │ │ │ │ │ ├── depth2img.mdx │ │ │ │ │ ├── image_variation.mdx │ │ │ │ │ ├── img2img.mdx │ │ │ │ │ ├── inpaint.mdx │ │ │ │ │ ├── latent_upscale.mdx │ │ │ │ │ ├── overview.mdx │ │ │ │ │ ├── panorama.mdx │ │ │ │ │ ├── pix2pix.mdx │ │ │ │ │ ├── pix2pix_zero.mdx │ │ │ │ │ ├── self_attention_guidance.mdx │ │ │ │ │ ├── text2img.mdx │ │ │ │ │ └── upscale.mdx │ │ │ │ ├── stable_diffusion_2.mdx │ │ │ │ ├── stable_diffusion_safe.mdx │ │ │ │ ├── stable_unclip.mdx │ │ │ │ ├── stochastic_karras_ve.mdx │ │ │ │ ├── unclip.mdx │ │ │ │ ├── versatile_diffusion.mdx │ │ │ │ └── vq_diffusion.mdx │ │ │ └── schedulers │ │ │ │ ├── ddim.mdx │ │ │ │ ├── ddim_inverse.mdx │ │ │ │ ├── ddpm.mdx │ │ │ │ ├── deis.mdx │ │ │ │ ├── dpm_discrete.mdx │ │ │ │ ├── dpm_discrete_ancestral.mdx │ │ │ │ ├── euler.mdx │ │ │ │ ├── euler_ancestral.mdx │ │ │ │ ├── heun.mdx │ │ │ │ ├── ipndm.mdx │ │ │ │ ├── lms_discrete.mdx │ │ │ │ ├── multistep_dpm_solver.mdx │ │ │ │ ├── overview.mdx │ │ │ │ ├── pndm.mdx │ │ │ │ ├── repaint.mdx │ │ │ │ ├── score_sde_ve.mdx │ │ │ │ ├── score_sde_vp.mdx │ │ │ │ ├── singlestep_dpm_solver.mdx │ │ │ │ ├── stochastic_karras_ve.mdx │ │ │ │ ├── unipc.mdx │ │ │ │ └── vq_diffusion.mdx │ │ ├── conceptual │ │ │ ├── contribution.mdx │ │ │ ├── ethical_guidelines.mdx │ │ │ └── philosophy.mdx │ │ ├── imgs │ │ │ ├── access_request.png │ │ │ └── diffusers_library.jpg │ │ ├── index.mdx │ │ ├── installation.mdx │ │ ├── optimization │ │ │ ├── fp16.mdx │ │ │ ├── habana.mdx │ │ │ ├── mps.mdx │ │ │ ├── onnx.mdx │ │ │ ├── open_vino.mdx │ │ │ ├── torch2.0.mdx │ │ │ └── xformers.mdx │ │ ├── quicktour.mdx │ │ ├── stable_diffusion.mdx │ │ ├── training │ │ │ ├── dreambooth.mdx │ │ │ ├── lora.mdx │ │ │ ├── overview.mdx │ │ │ ├── text2image.mdx │ │ │ ├── text_inversion.mdx │ │ │ └── unconditional_training.mdx │ │ ├── tutorials │ │ │ └── basic_training.mdx │ │ └── using-diffusers │ │ │ ├── audio.mdx │ │ │ ├── conditional_image_generation.mdx │ │ │ ├── configuration.mdx │ │ │ ├── contribute_pipeline.mdx │ │ │ ├── controlling_generation.mdx │ │ │ ├── custom_pipeline_examples.mdx │ │ │ ├── custom_pipeline_overview.mdx │ │ │ ├── depth2img.mdx │ │ │ ├── img2img.mdx │ │ │ ├── inpaint.mdx │ │ │ ├── kerascv.mdx │ │ │ ├── loading.mdx │ │ │ ├── other-modalities.mdx │ │ │ ├── reproducibility.mdx │ │ │ ├── reusing_seeds.mdx │ │ │ ├── rl.mdx │ │ │ ├── schedulers.mdx │ │ │ ├── unconditional_image_generation.mdx │ │ │ ├── using_safetensors │ │ │ └── using_safetensors.mdx │ │ └── ko │ │ ├── _toctree.yml │ │ ├── in_translation.mdx │ │ ├── index.mdx │ │ ├── installation.mdx │ │ └── quicktour.mdx ├── examples │ ├── README.md │ ├── community │ │ ├── README.md │ │ ├── bit_diffusion.py │ │ ├── checkpoint_merger.py │ │ ├── clip_guided_stable_diffusion.py │ │ ├── composable_stable_diffusion.py │ │ ├── imagic_stable_diffusion.py │ │ ├── img2img_inpainting.py │ │ ├── interpolate_stable_diffusion.py │ │ ├── lpw_stable_diffusion.py │ │ ├── lpw_stable_diffusion_onnx.py │ │ ├── magic_mix.py │ │ ├── multilingual_stable_diffusion.py │ │ ├── one_step_unet.py │ │ ├── sd_text2img_k_diffusion.py │ │ ├── seed_resize_stable_diffusion.py │ │ ├── speech_to_image_diffusion.py │ │ ├── stable_diffusion_comparison.py │ │ ├── stable_diffusion_controlnet_inpaint.py │ │ ├── stable_diffusion_controlnet_inpaint_img2img.py │ │ ├── stable_diffusion_mega.py │ │ ├── stable_unclip.py │ │ ├── text_inpainting.py │ │ ├── tiled_upscaling.py │ │ ├── unclip_image_interpolation.py │ │ ├── unclip_text_interpolation.py │ │ └── wildcard_stable_diffusion.py │ ├── conftest.py │ ├── dreambooth │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── requirements_flax.txt │ │ ├── train_dreambooth.py │ │ ├── train_dreambooth_flax.py │ │ └── train_dreambooth_lora.py │ ├── inference │ │ ├── README.md │ │ ├── image_to_image.py │ │ └── inpainting.py │ ├── research_projects │ │ ├── README.md │ │ ├── colossalai │ │ │ ├── README.md │ │ │ ├── inference.py │ │ │ ├── requirement.txt │ │ │ └── train_dreambooth_colossalai.py │ │ ├── dreambooth_inpaint │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── train_dreambooth_inpaint.py │ │ │ └── train_dreambooth_inpaint_lora.py │ │ ├── intel_opts │ │ │ ├── README.md │ │ │ ├── inference_bf16.py │ │ │ └── textual_inversion │ │ │ │ ├── README.md │ │ │ │ ├── requirements.txt │ │ │ │ └── textual_inversion_bf16.py │ │ ├── multi_subject_dreambooth │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ └── train_multi_subject_dreambooth.py │ │ └── onnxruntime │ │ │ ├── README.md │ │ │ ├── text_to_image │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ └── train_text_to_image.py │ │ │ ├── textual_inversion │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ └── textual_inversion.py │ │ │ └── unconditional_image_generation │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ └── train_unconditional.py │ ├── rl │ │ ├── README.md │ │ └── run_diffuser_locomotion.py │ ├── test_examples.py │ ├── text_to_image │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── requirements_flax.txt │ │ ├── train_text_to_image.py │ │ ├── train_text_to_image_flax.py │ │ └── train_text_to_image_lora.py │ ├── textual_inversion │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── requirements_flax.txt │ │ ├── textual_inversion.py │ │ └── textual_inversion_flax.py │ └── unconditional_image_generation │ │ ├── README.md │ │ ├── requirements.txt │ │ └── train_unconditional.py ├── pyproject.toml ├── scripts │ ├── __init__.py │ ├── change_naming_configs_and_checkpoints.py │ ├── conversion_ldm_uncond.py │ ├── convert_dance_diffusion_to_diffusers.py │ ├── convert_ddpm_original_checkpoint_to_diffusers.py │ ├── convert_diffusers_to_original_stable_diffusion.py │ ├── convert_dit_to_diffusers.py │ ├── convert_k_upscaler_to_diffusers.py │ ├── convert_kakao_brain_unclip_to_diffusers.py │ ├── convert_ldm_original_checkpoint_to_diffusers.py │ ├── convert_lora_safetensor_to_diffusers.py │ ├── convert_models_diffuser_to_diffusers.py │ ├── convert_ncsnpp_original_checkpoint_to_diffusers.py │ ├── convert_original_stable_diffusion_to_diffusers.py │ ├── convert_stable_diffusion_checkpoint_to_onnx.py │ ├── convert_unclip_txt2img_to_image_variation.py │ ├── convert_vae_diff_to_onnx.py │ ├── convert_vae_pt_to_diffusers.py │ ├── convert_versatile_diffusion_to_diffusers.py │ ├── convert_vq_diffusion_to_diffusers.py │ └── generate_logits.py ├── setup.cfg ├── setup.py ├── src │ └── diffusers │ │ ├── __init__.py │ │ ├── commands │ │ ├── __init__.py │ │ ├── diffusers_cli.py │ │ └── env.py │ │ ├── configuration_utils.py │ │ ├── dependency_versions_check.py │ │ ├── dependency_versions_table.py │ │ ├── experimental │ │ ├── README.md │ │ ├── __init__.py │ │ └── rl │ │ │ ├── __init__.py │ │ │ └── value_guided_sampling.py │ │ ├── loaders.py │ │ ├── models │ │ ├── README.md │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── attention_flax.py │ │ ├── autoencoder_kl.py │ │ ├── controlnet.py │ │ ├── cross_attention.py │ │ ├── dual_transformer_2d.py │ │ ├── embeddings.py │ │ ├── embeddings_flax.py │ │ ├── modeling_flax_pytorch_utils.py │ │ ├── modeling_flax_utils.py │ │ ├── modeling_pytorch_flax_utils.py │ │ ├── modeling_utils.py │ │ ├── prior_transformer.py │ │ ├── promptnet.py │ │ ├── resnet.py │ │ ├── resnet_flax.py │ │ ├── transformer_2d.py │ │ ├── unet_1d.py │ │ ├── unet_1d_blocks.py │ │ ├── unet_2d.py │ │ ├── unet_2d_blocks.py │ │ ├── unet_2d_blocks_flax.py │ │ ├── unet_2d_condition.py │ │ ├── unet_2d_condition_flax.py │ │ ├── vae.py │ │ ├── vae_flax.py │ │ └── vq_model.py │ │ ├── optimization.py │ │ ├── pipeline_utils.py │ │ ├── pipelines │ │ ├── README.md │ │ ├── __init__.py │ │ ├── alt_diffusion │ │ │ ├── __init__.py │ │ │ ├── modeling_roberta_series.py │ │ │ ├── pipeline_alt_diffusion.py │ │ │ └── pipeline_alt_diffusion_img2img.py │ │ ├── audio_diffusion │ │ │ ├── __init__.py │ │ │ ├── mel.py │ │ │ └── pipeline_audio_diffusion.py │ │ ├── dance_diffusion │ │ │ ├── __init__.py │ │ │ └── pipeline_dance_diffusion.py │ │ ├── ddim │ │ │ ├── __init__.py │ │ │ └── pipeline_ddim.py │ │ ├── ddpm │ │ │ ├── __init__.py │ │ │ └── pipeline_ddpm.py │ │ ├── dit │ │ │ ├── __init__.py │ │ │ └── pipeline_dit.py │ │ ├── latent_diffusion │ │ │ ├── __init__.py │ │ │ ├── pipeline_latent_diffusion.py │ │ │ └── pipeline_latent_diffusion_superresolution.py │ │ ├── latent_diffusion_uncond │ │ │ ├── __init__.py │ │ │ └── pipeline_latent_diffusion_uncond.py │ │ ├── onnx_utils.py │ │ ├── paint_by_example │ │ │ ├── __init__.py │ │ │ ├── image_encoder.py │ │ │ └── pipeline_paint_by_example.py │ │ ├── pipeline_flax_utils.py │ │ ├── pipeline_utils.py │ │ ├── pndm │ │ │ ├── __init__.py │ │ │ └── pipeline_pndm.py │ │ ├── repaint │ │ │ ├── __init__.py │ │ │ └── pipeline_repaint.py │ │ ├── score_sde_ve │ │ │ ├── __init__.py │ │ │ └── pipeline_score_sde_ve.py │ │ ├── semantic_stable_diffusion │ │ │ ├── __init__.py │ │ │ └── pipeline_semantic_stable_diffusion.py │ │ ├── stable_diffusion │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── convert_from_ckpt.py │ │ │ ├── pipeline_cycle_diffusion.py │ │ │ ├── pipeline_flax_stable_diffusion.py │ │ │ ├── pipeline_flax_stable_diffusion_img2img.py │ │ │ ├── pipeline_flax_stable_diffusion_inpaint.py │ │ │ ├── pipeline_onnx_stable_diffusion.py │ │ │ ├── pipeline_onnx_stable_diffusion_img2img.py │ │ │ ├── pipeline_onnx_stable_diffusion_inpaint.py │ │ │ ├── pipeline_onnx_stable_diffusion_inpaint_legacy.py │ │ │ ├── pipeline_onnx_stable_diffusion_upscale.py │ │ │ ├── pipeline_stable_diffusion.py │ │ │ ├── pipeline_stable_diffusion_attend_and_excite.py │ │ │ ├── pipeline_stable_diffusion_controlnet.py │ │ │ ├── pipeline_stable_diffusion_depth2img.py │ │ │ ├── pipeline_stable_diffusion_image_variation.py │ │ │ ├── pipeline_stable_diffusion_img2img.py │ │ │ ├── pipeline_stable_diffusion_inpaint.py │ │ │ ├── pipeline_stable_diffusion_inpaint_legacy.py │ │ │ ├── pipeline_stable_diffusion_instruct_pix2pix.py │ │ │ ├── pipeline_stable_diffusion_k_diffusion.py │ │ │ ├── pipeline_stable_diffusion_latent_upscale.py │ │ │ ├── pipeline_stable_diffusion_panorama.py │ │ │ ├── pipeline_stable_diffusion_pix2pix_zero.py │ │ │ ├── pipeline_stable_diffusion_promptnet.py │ │ │ ├── pipeline_stable_diffusion_sag.py │ │ │ ├── pipeline_stable_diffusion_upscale.py │ │ │ ├── pipeline_stable_unclip.py │ │ │ ├── pipeline_stable_unclip_img2img.py │ │ │ ├── safety_checker.py │ │ │ ├── safety_checker_flax.py │ │ │ └── stable_unclip_image_normalizer.py │ │ ├── stable_diffusion_safe │ │ │ ├── __init__.py │ │ │ ├── pipeline_stable_diffusion_safe.py │ │ │ └── safety_checker.py │ │ ├── stochastic_karras_ve │ │ │ ├── __init__.py │ │ │ └── pipeline_stochastic_karras_ve.py │ │ ├── unclip │ │ │ ├── __init__.py │ │ │ ├── pipeline_unclip.py │ │ │ ├── pipeline_unclip_image_variation.py │ │ │ └── text_proj.py │ │ ├── versatile_diffusion │ │ │ ├── __init__.py │ │ │ ├── modeling_text_unet.py │ │ │ ├── pipeline_versatile_diffusion.py │ │ │ ├── pipeline_versatile_diffusion_dual_guided.py │ │ │ ├── pipeline_versatile_diffusion_image_variation.py │ │ │ └── pipeline_versatile_diffusion_text_to_image.py │ │ └── vq_diffusion │ │ │ ├── __init__.py │ │ │ └── pipeline_vq_diffusion.py │ │ ├── schedulers │ │ ├── README.md │ │ ├── __init__.py │ │ ├── scheduling_ddim.py │ │ ├── scheduling_ddim_flax.py │ │ ├── scheduling_ddim_inverse.py │ │ ├── scheduling_ddpm.py │ │ ├── scheduling_ddpm_flax.py │ │ ├── scheduling_deis_multistep.py │ │ ├── scheduling_dpmsolver_multistep.py │ │ ├── scheduling_dpmsolver_multistep_flax.py │ │ ├── scheduling_dpmsolver_singlestep.py │ │ ├── scheduling_euler_ancestral_discrete.py │ │ ├── scheduling_euler_discrete.py │ │ ├── scheduling_heun_discrete.py │ │ ├── scheduling_ipndm.py │ │ ├── scheduling_k_dpm_2_ancestral_discrete.py │ │ ├── scheduling_k_dpm_2_discrete.py │ │ ├── scheduling_karras_ve.py │ │ ├── scheduling_karras_ve_flax.py │ │ ├── scheduling_lms_discrete.py │ │ ├── scheduling_lms_discrete_flax.py │ │ ├── scheduling_pndm.py │ │ ├── scheduling_pndm_flax.py │ │ ├── scheduling_repaint.py │ │ ├── scheduling_sde_ve.py │ │ ├── scheduling_sde_ve_flax.py │ │ ├── scheduling_sde_vp.py │ │ ├── scheduling_unclip.py │ │ ├── scheduling_unipc_multistep.py │ │ ├── scheduling_utils.py │ │ ├── scheduling_utils_flax.py │ │ └── scheduling_vq_diffusion.py │ │ ├── training_utils.py │ │ └── utils │ │ ├── __init__.py │ │ ├── accelerate_utils.py │ │ ├── constants.py │ │ ├── deprecation_utils.py │ │ ├── doc_utils.py │ │ ├── dummy_flax_and_transformers_objects.py │ │ ├── dummy_flax_objects.py │ │ ├── dummy_onnx_objects.py │ │ ├── dummy_pt_objects.py │ │ ├── dummy_torch_and_librosa_objects.py │ │ ├── dummy_torch_and_scipy_objects.py │ │ ├── dummy_torch_and_transformers_and_k_diffusion_objects.py │ │ ├── dummy_torch_and_transformers_and_onnx_objects.py │ │ ├── dummy_torch_and_transformers_objects.py │ │ ├── dynamic_modules_utils.py │ │ ├── hub_utils.py │ │ ├── import_utils.py │ │ ├── logging.py │ │ ├── model_card_template.md │ │ ├── outputs.py │ │ ├── pil_utils.py │ │ ├── testing_utils.py │ │ └── torch_utils.py ├── tests │ ├── __init__.py │ ├── conftest.py │ ├── fixtures │ │ └── custom_pipeline │ │ │ ├── pipeline.py │ │ │ └── what_ever.py │ ├── models │ │ ├── __init__.py │ │ ├── test_models_unet_1d.py │ │ ├── test_models_unet_2d.py │ │ ├── test_models_unet_2d_condition.py │ │ ├── test_models_unet_2d_flax.py │ │ ├── test_models_vae.py │ │ ├── test_models_vae_flax.py │ │ └── test_models_vq.py │ ├── pipeline_params.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── altdiffusion │ │ │ ├── __init__.py │ │ │ ├── test_alt_diffusion.py │ │ │ └── test_alt_diffusion_img2img.py │ │ ├── audio_diffusion │ │ │ ├── __init__.py │ │ │ └── test_audio_diffusion.py │ │ ├── dance_diffusion │ │ │ ├── __init__.py │ │ │ └── test_dance_diffusion.py │ │ ├── ddim │ │ │ ├── __init__.py │ │ │ └── test_ddim.py │ │ ├── ddpm │ │ │ ├── __init__.py │ │ │ └── test_ddpm.py │ │ ├── dit │ │ │ ├── __init__.py │ │ │ └── test_dit.py │ │ ├── karras_ve │ │ │ ├── __init__.py │ │ │ └── test_karras_ve.py │ │ ├── latent_diffusion │ │ │ ├── __init__.py │ │ │ ├── test_latent_diffusion.py │ │ │ ├── test_latent_diffusion_superresolution.py │ │ │ └── test_latent_diffusion_uncond.py │ │ ├── paint_by_example │ │ │ ├── __init__.py │ │ │ └── test_paint_by_example.py │ │ ├── pndm │ │ │ ├── __init__.py │ │ │ └── test_pndm.py │ │ ├── repaint │ │ │ ├── __init__.py │ │ │ └── test_repaint.py │ │ ├── score_sde_ve │ │ │ ├── __init__.py │ │ │ └── test_score_sde_ve.py │ │ ├── semantic_stable_diffusion │ │ │ ├── __init__.py │ │ │ └── test_semantic_diffusion.py │ │ ├── stable_diffusion │ │ │ ├── __init__.py │ │ │ ├── test_cycle_diffusion.py │ │ │ ├── test_onnx_stable_diffusion.py │ │ │ ├── test_onnx_stable_diffusion_img2img.py │ │ │ ├── test_onnx_stable_diffusion_inpaint.py │ │ │ ├── test_onnx_stable_diffusion_inpaint_legacy.py │ │ │ ├── test_onnx_stable_diffusion_upscale.py │ │ │ ├── test_stable_diffusion.py │ │ │ ├── test_stable_diffusion_controlnet.py │ │ │ ├── test_stable_diffusion_image_variation.py │ │ │ ├── test_stable_diffusion_img2img.py │ │ │ ├── test_stable_diffusion_inpaint.py │ │ │ ├── test_stable_diffusion_inpaint_legacy.py │ │ │ ├── test_stable_diffusion_instruction_pix2pix.py │ │ │ ├── test_stable_diffusion_k_diffusion.py │ │ │ ├── test_stable_diffusion_panorama.py │ │ │ ├── test_stable_diffusion_pix2pix_zero.py │ │ │ └── test_stable_diffusion_sag.py │ │ ├── stable_diffusion_2 │ │ │ ├── __init__.py │ │ │ ├── test_stable_diffusion.py │ │ │ ├── test_stable_diffusion_attend_and_excite.py │ │ │ ├── test_stable_diffusion_depth.py │ │ │ ├── test_stable_diffusion_flax.py │ │ │ ├── test_stable_diffusion_flax_inpaint.py │ │ │ ├── test_stable_diffusion_inpaint.py │ │ │ ├── test_stable_diffusion_latent_upscale.py │ │ │ ├── test_stable_diffusion_upscale.py │ │ │ └── test_stable_diffusion_v_pred.py │ │ ├── stable_diffusion_safe │ │ │ ├── __init__.py │ │ │ └── test_safe_diffusion.py │ │ ├── stable_unclip │ │ │ ├── __init__.py │ │ │ ├── test_stable_unclip.py │ │ │ └── test_stable_unclip_img2img.py │ │ ├── test_pipeline_utils.py │ │ ├── unclip │ │ │ ├── __init__.py │ │ │ ├── test_unclip.py │ │ │ └── test_unclip_image_variation.py │ │ ├── versatile_diffusion │ │ │ ├── __init__.py │ │ │ ├── test_versatile_diffusion_dual_guided.py │ │ │ ├── test_versatile_diffusion_image_variation.py │ │ │ ├── test_versatile_diffusion_mega.py │ │ │ └── test_versatile_diffusion_text_to_image.py │ │ └── vq_diffusion │ │ │ ├── __init__.py │ │ │ └── test_vq_diffusion.py │ ├── repo_utils │ │ ├── test_check_copies.py │ │ └── test_check_dummies.py │ ├── test_config.py │ ├── test_hub_utils.py │ ├── test_layers_utils.py │ ├── test_modeling_common.py │ ├── test_modeling_common_flax.py │ ├── test_outputs.py │ ├── test_pipelines.py │ ├── test_pipelines_common.py │ ├── test_pipelines_flax.py │ ├── test_pipelines_onnx_common.py │ ├── test_scheduler.py │ ├── test_scheduler_flax.py │ ├── test_training.py │ ├── test_unet_2d_blocks.py │ ├── test_unet_blocks_common.py │ └── test_utils.py └── utils │ ├── check_config_docstrings.py │ ├── check_copies.py │ ├── check_doc_toc.py │ ├── check_dummies.py │ ├── check_inits.py │ ├── check_repo.py │ ├── check_table.py │ ├── custom_init_isort.py │ ├── get_modified_files.py │ ├── overwrite_expected_slice.py │ ├── print_env.py │ ├── release.py │ └── stale.py ├── imgs ├── ProFusion_example.jpg ├── daniel.jpg ├── examples.png ├── framework.jpg ├── main_results_cafe.jpg └── object_results_cafe.jpg ├── process_img.py ├── test.ipynb ├── test_imgs ├── 00041.jpg ├── bengio.jpg └── danielwu.jpg └── train.py /diffusers/.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F41B Bug Report" 2 | description: Report a bug on diffusers 3 | labels: [ "bug" ] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Thanks a lot for taking the time to file this issue 🤗. 9 | Issues do not only help to improve the library, but also publicly document common problems, questions, workflows for the whole community! 10 | Thus, issues are of the same importance as pull requests when contributing to this library ❤️. 11 | In order to make your issue as **useful for the community as possible**, let's try to stick to some simple guidelines: 12 | - 1. Please try to be as precise and concise as possible. 13 | *Give your issue a fitting title. Assume that someone which very limited knowledge of diffusers can understand your issue. Add links to the source code, documentation other issues, pull requests etc...* 14 | - 2. If your issue is about something not working, **always** provide a reproducible code snippet. The reader should be able to reproduce your issue by **only copy-pasting your code snippet into a Python shell**. 15 | *The community cannot solve your issue if it cannot reproduce it. If your bug is related to training, add your training script and make everything needed to train public. Otherwise, just add a simple Python code snippet.* 16 | - 3. Add the **minimum amount of code / context that is needed to understand, reproduce your issue**. 17 | *Make the life of maintainers easy. `diffusers` is getting many issues every day. Make sure your issue is about one bug and one bug only. Make sure you add only the context, code needed to understand your issues - nothing more. Generally, every issue is a way of documenting this library, try to make it a good documentation entry.* 18 | - type: markdown 19 | attributes: 20 | value: | 21 | For more in-detail information on how to write good issues you can have a look [here](https://huggingface.co/course/chapter8/5?fw=pt) 22 | - type: textarea 23 | id: bug-description 24 | attributes: 25 | label: Describe the bug 26 | description: A clear and concise description of what the bug is. If you intend to submit a pull request for this issue, tell us in the description. Thanks! 27 | placeholder: Bug description 28 | validations: 29 | required: true 30 | - type: textarea 31 | id: reproduction 32 | attributes: 33 | label: Reproduction 34 | description: Please provide a minimal reproducible code which we can copy/paste and reproduce the issue. 35 | placeholder: Reproduction 36 | validations: 37 | required: true 38 | - type: textarea 39 | id: logs 40 | attributes: 41 | label: Logs 42 | description: "Please include the Python logs if you can." 43 | render: shell 44 | - type: textarea 45 | id: system-info 46 | attributes: 47 | label: System Info 48 | description: Please share your system info with us. You can run the command `diffusers-cli env` and copy-paste its output below. 49 | placeholder: diffusers version, platform, python version, ... 50 | validations: 51 | required: true 52 | -------------------------------------------------------------------------------- /diffusers/.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | contact_links: 2 | - name: Blank issue 3 | url: https://github.com/huggingface/diffusers/issues/new 4 | about: Other 5 | - name: Forum 6 | url: https://discuss.huggingface.co/ 7 | about: General usage questions and community discussions -------------------------------------------------------------------------------- /diffusers/.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680 Feature request" 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /diffusers/.github/ISSUE_TEMPLATE/feedback.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "💬 Feedback about API Design" 3 | about: Give feedback about the current API design 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What API design would you like to have changed or added to the library? Why?** 11 | 12 | **What use case would this enable or better enable? Can you give us a code example?** 13 | -------------------------------------------------------------------------------- /diffusers/.github/ISSUE_TEMPLATE/new-model-addition.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F31F New model/pipeline/scheduler addition" 2 | description: Submit a proposal/request to implement a new diffusion model / pipeline / scheduler 3 | labels: [ "New model/pipeline/scheduler" ] 4 | 5 | body: 6 | - type: textarea 7 | id: description-request 8 | validations: 9 | required: true 10 | attributes: 11 | label: Model/Pipeline/Scheduler description 12 | description: | 13 | Put any and all important information relative to the model/pipeline/scheduler 14 | 15 | - type: checkboxes 16 | id: information-tasks 17 | attributes: 18 | label: Open source status 19 | description: | 20 | Please note that if the model implementation isn't available or if the weights aren't open-source, we are less likely to implement it in `diffusers`. 21 | options: 22 | - label: "The model implementation is available" 23 | - label: "The model weights are available (Only relevant if addition is not a scheduler)." 24 | 25 | - type: textarea 26 | id: additional-info 27 | attributes: 28 | label: Provide useful links for the implementation 29 | description: | 30 | Please provide information regarding the implementation, the weights, and the authors. 31 | Please mention the authors by @gh-username if you're aware of their usernames. 32 | -------------------------------------------------------------------------------- /diffusers/.github/workflows/build_docker_images.yml: -------------------------------------------------------------------------------- 1 | name: Build Docker images (nightly) 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: "0 0 * * *" # every day at midnight 7 | 8 | concurrency: 9 | group: docker-image-builds 10 | cancel-in-progress: false 11 | 12 | env: 13 | REGISTRY: diffusers 14 | 15 | jobs: 16 | build-docker-images: 17 | runs-on: ubuntu-latest 18 | 19 | permissions: 20 | contents: read 21 | packages: write 22 | 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | image-name: 27 | - diffusers-pytorch-cpu 28 | - diffusers-pytorch-cuda 29 | - diffusers-flax-cpu 30 | - diffusers-flax-tpu 31 | - diffusers-onnxruntime-cpu 32 | - diffusers-onnxruntime-cuda 33 | 34 | steps: 35 | - name: Checkout repository 36 | uses: actions/checkout@v3 37 | 38 | - name: Login to Docker Hub 39 | uses: docker/login-action@v2 40 | with: 41 | username: ${{ env.REGISTRY }} 42 | password: ${{ secrets.DOCKERHUB_TOKEN }} 43 | 44 | - name: Build and push 45 | uses: docker/build-push-action@v3 46 | with: 47 | no-cache: true 48 | context: ./docker/${{ matrix.image-name }} 49 | push: true 50 | tags: ${{ env.REGISTRY }}/${{ matrix.image-name }}:latest 51 | -------------------------------------------------------------------------------- /diffusers/.github/workflows/build_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - doc-builder* 8 | - v*-release 9 | 10 | jobs: 11 | build: 12 | uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main 13 | with: 14 | commit_sha: ${{ github.sha }} 15 | package: diffusers 16 | languages: en ko 17 | secrets: 18 | token: ${{ secrets.HUGGINGFACE_PUSH }} 19 | -------------------------------------------------------------------------------- /diffusers/.github/workflows/build_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build PR Documentation 2 | 3 | on: 4 | pull_request: 5 | 6 | concurrency: 7 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 8 | cancel-in-progress: true 9 | 10 | jobs: 11 | build: 12 | uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main 13 | with: 14 | commit_sha: ${{ github.event.pull_request.head.sha }} 15 | pr_number: ${{ github.event.number }} 16 | package: diffusers 17 | languages: en ko 18 | -------------------------------------------------------------------------------- /diffusers/.github/workflows/delete_doc_comment.yml: -------------------------------------------------------------------------------- 1 | name: Delete dev documentation 2 | 3 | on: 4 | pull_request: 5 | types: [ closed ] 6 | 7 | 8 | jobs: 9 | delete: 10 | uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main 11 | with: 12 | pr_number: ${{ github.event.number }} 13 | package: diffusers 14 | -------------------------------------------------------------------------------- /diffusers/.github/workflows/pr_quality.yml: -------------------------------------------------------------------------------- 1 | name: Run code quality checks 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | push: 8 | branches: 9 | - main 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | check_code_quality: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Set up Python 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: "3.7" 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install .[quality] 28 | - name: Check quality 29 | run: | 30 | black --check examples tests src utils scripts 31 | ruff examples tests src utils scripts 32 | doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source 33 | 34 | check_repository_consistency: 35 | runs-on: ubuntu-latest 36 | steps: 37 | - uses: actions/checkout@v3 38 | - name: Set up Python 39 | uses: actions/setup-python@v4 40 | with: 41 | python-version: "3.7" 42 | - name: Install dependencies 43 | run: | 44 | python -m pip install --upgrade pip 45 | pip install .[quality] 46 | - name: Check quality 47 | run: | 48 | python utils/check_copies.py 49 | python utils/check_dummies.py 50 | -------------------------------------------------------------------------------- /diffusers/.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Stale Bot 2 | 3 | on: 4 | schedule: 5 | - cron: "0 15 * * *" 6 | 7 | jobs: 8 | close_stale_issues: 9 | name: Close Stale Issues 10 | if: github.repository == 'huggingface/diffusers' 11 | runs-on: ubuntu-latest 12 | env: 13 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 14 | steps: 15 | - uses: actions/checkout@v2 16 | 17 | - name: Setup Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: 3.7 21 | 22 | - name: Install requirements 23 | run: | 24 | pip install PyGithub 25 | - name: Close stale issues 26 | run: | 27 | python utils/stale.py 28 | -------------------------------------------------------------------------------- /diffusers/.github/workflows/typos.yml: -------------------------------------------------------------------------------- 1 | name: Check typos 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v3 12 | 13 | - name: typos-action 14 | uses: crate-ci/typos@v1.12.4 15 | -------------------------------------------------------------------------------- /diffusers/.gitignore: -------------------------------------------------------------------------------- 1 | # Initially taken from Github's Python gitignore file 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # tests and logs 12 | tests/fixtures/cached_*_text.txt 13 | logs/ 14 | lightning_logs/ 15 | lang_code_data/ 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # celery beat schedule file 92 | celerybeat-schedule 93 | 94 | # SageMath parsed files 95 | *.sage.py 96 | 97 | # Environments 98 | .env 99 | .venv 100 | env/ 101 | venv/ 102 | ENV/ 103 | env.bak/ 104 | venv.bak/ 105 | 106 | # Spyder project settings 107 | .spyderproject 108 | .spyproject 109 | 110 | # Rope project settings 111 | .ropeproject 112 | 113 | # mkdocs documentation 114 | /site 115 | 116 | # mypy 117 | .mypy_cache/ 118 | .dmypy.json 119 | dmypy.json 120 | 121 | # Pyre type checker 122 | .pyre/ 123 | 124 | # vscode 125 | .vs 126 | .vscode 127 | 128 | # Pycharm 129 | .idea 130 | 131 | # TF code 132 | tensorflow_code 133 | 134 | # Models 135 | proc_data 136 | 137 | # examples 138 | runs 139 | /runs_old 140 | /wandb 141 | /examples/runs 142 | /examples/**/*.args 143 | /examples/rag/sweep 144 | 145 | # data 146 | /data 147 | serialization_dir 148 | 149 | # emacs 150 | *.*~ 151 | debug.env 152 | 153 | # vim 154 | .*.swp 155 | 156 | #ctags 157 | tags 158 | 159 | # pre-commit 160 | .pre-commit* 161 | 162 | # .lock 163 | *.lock 164 | 165 | # DS_Store (MacOS) 166 | .DS_Store 167 | # RL pipelines may produce mp4 outputs 168 | *.mp4 169 | 170 | # dependencies 171 | /transformers 172 | 173 | # ruff 174 | .ruff_cache 175 | -------------------------------------------------------------------------------- /diffusers/CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | title: 'Diffusers: State-of-the-art diffusion models' 3 | message: >- 4 | If you use this software, please cite it using the 5 | metadata from this file. 6 | type: software 7 | authors: 8 | - given-names: Patrick 9 | family-names: von Platen 10 | - given-names: Suraj 11 | family-names: Patil 12 | - given-names: Anton 13 | family-names: Lozhkov 14 | - given-names: Pedro 15 | family-names: Cuenca 16 | - given-names: Nathan 17 | family-names: Lambert 18 | - given-names: Kashif 19 | family-names: Rasul 20 | - given-names: Mishig 21 | family-names: Davaadorj 22 | - given-names: Thomas 23 | family-names: Wolf 24 | repository-code: 'https://github.com/huggingface/diffusers' 25 | abstract: >- 26 | Diffusers provides pretrained diffusion models across 27 | multiple modalities, such as vision and audio, and serves 28 | as a modular toolbox for inference and training of 29 | diffusion models. 30 | keywords: 31 | - deep-learning 32 | - pytorch 33 | - image-generation 34 | - diffusion 35 | - text2image 36 | - image2image 37 | - score-based-generative-modeling 38 | - stable-diffusion 39 | license: Apache-2.0 40 | version: 0.12.1 41 | -------------------------------------------------------------------------------- /diffusers/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include src/diffusers/utils/model_card_template.md 3 | -------------------------------------------------------------------------------- /diffusers/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples 2 | 3 | # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!) 4 | export PYTHONPATH = src 5 | 6 | check_dirs := examples scripts src tests utils 7 | 8 | modified_only_fixup: 9 | $(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs))) 10 | @if test -n "$(modified_py_files)"; then \ 11 | echo "Checking/fixing $(modified_py_files)"; \ 12 | black $(modified_py_files); \ 13 | ruff $(modified_py_files); \ 14 | else \ 15 | echo "No library .py files were modified"; \ 16 | fi 17 | 18 | # Update src/diffusers/dependency_versions_table.py 19 | 20 | deps_table_update: 21 | @python setup.py deps_table_update 22 | 23 | deps_table_check_updated: 24 | @md5sum src/diffusers/dependency_versions_table.py > md5sum.saved 25 | @python setup.py deps_table_update 26 | @md5sum -c --quiet md5sum.saved || (printf "\nError: the version dependency table is outdated.\nPlease run 'make fixup' or 'make style' and commit the changes.\n\n" && exit 1) 27 | @rm md5sum.saved 28 | 29 | # autogenerating code 30 | 31 | autogenerate_code: deps_table_update 32 | 33 | # Check that the repo is in a good state 34 | 35 | repo-consistency: 36 | python utils/check_dummies.py 37 | python utils/check_repo.py 38 | python utils/check_inits.py 39 | 40 | # this target runs checks on all files 41 | 42 | quality: 43 | black --check $(check_dirs) 44 | ruff $(check_dirs) 45 | doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source 46 | python utils/check_doc_toc.py 47 | 48 | # Format source code automatically and check is there are any problems left that need manual fixing 49 | 50 | extra_style_checks: 51 | python utils/custom_init_isort.py 52 | doc-builder style src/diffusers docs/source --max_len 119 --path_to_docs docs/source 53 | python utils/check_doc_toc.py --fix_and_overwrite 54 | 55 | # this target runs checks on all files and potentially modifies some of them 56 | 57 | style: 58 | black $(check_dirs) 59 | ruff $(check_dirs) --fix 60 | ${MAKE} autogenerate_code 61 | ${MAKE} extra_style_checks 62 | 63 | # Super fast fix and check target that only works on relevant modified files since the branch was made 64 | 65 | fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency 66 | 67 | # Make marked copies of snippets of codes conform to the original 68 | 69 | fix-copies: 70 | python utils/check_copies.py --fix_and_overwrite 71 | python utils/check_dummies.py --fix_and_overwrite 72 | 73 | # Run tests for the library 74 | 75 | test: 76 | python -m pytest -n auto --dist=loadfile -s -v ./tests/ 77 | 78 | # Run tests for examples 79 | 80 | test-examples: 81 | python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/ 82 | 83 | 84 | # Release stuff 85 | 86 | pre-release: 87 | python utils/release.py 88 | 89 | pre-patch: 90 | python utils/release.py --patch 91 | 92 | post-release: 93 | python utils/release.py --post_release 94 | 95 | post-patch: 96 | python utils/release.py --post_release --patch 97 | -------------------------------------------------------------------------------- /diffusers/_typos.toml: -------------------------------------------------------------------------------- 1 | # Files for typos 2 | # Instruction: https://github.com/marketplace/actions/typos-action#getting-started 3 | 4 | [default.extend-identifiers] 5 | 6 | [default.extend-words] 7 | NIN="NIN" # NIN is used in scripts/convert_ncsnpp_original_checkpoint_to_diffusers.py 8 | nd="np" # nd may be np (numpy) 9 | parms="parms" # parms is used in scripts/convert_original_stable_diffusion_to_diffusers.py 10 | 11 | 12 | [files] 13 | extend-exclude = ["_typos.toml"] 14 | -------------------------------------------------------------------------------- /diffusers/docker/diffusers-flax-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="diffusers" 4 | 5 | ENV DEBIAN_FRONTEND=noninteractive 6 | 7 | RUN apt update && \ 8 | apt install -y bash \ 9 | build-essential \ 10 | git \ 11 | git-lfs \ 12 | curl \ 13 | ca-certificates \ 14 | libsndfile1-dev \ 15 | python3.8 \ 16 | python3-pip \ 17 | python3.8-venv && \ 18 | rm -rf /var/lib/apt/lists 19 | 20 | # make sure to use venv 21 | RUN python3 -m venv /opt/venv 22 | ENV PATH="/opt/venv/bin:$PATH" 23 | 24 | # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) 25 | # follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container 26 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 27 | python3 -m pip install --upgrade --no-cache-dir \ 28 | clu \ 29 | "jax[cpu]>=0.2.16,!=0.3.2" \ 30 | "flax>=0.4.1" \ 31 | "jaxlib>=0.1.65" && \ 32 | python3 -m pip install --no-cache-dir \ 33 | accelerate \ 34 | datasets \ 35 | hf-doc-builder \ 36 | huggingface-hub \ 37 | Jinja2 \ 38 | librosa \ 39 | numpy \ 40 | scipy \ 41 | tensorboard \ 42 | transformers 43 | 44 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /diffusers/docker/diffusers-flax-tpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="diffusers" 4 | 5 | ENV DEBIAN_FRONTEND=noninteractive 6 | 7 | RUN apt update && \ 8 | apt install -y bash \ 9 | build-essential \ 10 | git \ 11 | git-lfs \ 12 | curl \ 13 | ca-certificates \ 14 | libsndfile1-dev \ 15 | python3.8 \ 16 | python3-pip \ 17 | python3.8-venv && \ 18 | rm -rf /var/lib/apt/lists 19 | 20 | # make sure to use venv 21 | RUN python3 -m venv /opt/venv 22 | ENV PATH="/opt/venv/bin:$PATH" 23 | 24 | # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) 25 | # follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container 26 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 27 | python3 -m pip install --no-cache-dir \ 28 | "jax[tpu]>=0.2.16,!=0.3.2" \ 29 | -f https://storage.googleapis.com/jax-releases/libtpu_releases.html && \ 30 | python3 -m pip install --upgrade --no-cache-dir \ 31 | clu \ 32 | "flax>=0.4.1" \ 33 | "jaxlib>=0.1.65" && \ 34 | python3 -m pip install --no-cache-dir \ 35 | accelerate \ 36 | datasets \ 37 | hf-doc-builder \ 38 | huggingface-hub \ 39 | Jinja2 \ 40 | librosa \ 41 | numpy \ 42 | scipy \ 43 | tensorboard \ 44 | transformers 45 | 46 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /diffusers/docker/diffusers-onnxruntime-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="diffusers" 4 | 5 | ENV DEBIAN_FRONTEND=noninteractive 6 | 7 | RUN apt update && \ 8 | apt install -y bash \ 9 | build-essential \ 10 | git \ 11 | git-lfs \ 12 | curl \ 13 | ca-certificates \ 14 | libsndfile1-dev \ 15 | python3.8 \ 16 | python3-pip \ 17 | python3.8-venv && \ 18 | rm -rf /var/lib/apt/lists 19 | 20 | # make sure to use venv 21 | RUN python3 -m venv /opt/venv 22 | ENV PATH="/opt/venv/bin:$PATH" 23 | 24 | # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) 25 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 26 | python3 -m pip install --no-cache-dir \ 27 | torch \ 28 | torchvision \ 29 | torchaudio \ 30 | onnxruntime \ 31 | --extra-index-url https://download.pytorch.org/whl/cpu && \ 32 | python3 -m pip install --no-cache-dir \ 33 | accelerate \ 34 | datasets \ 35 | hf-doc-builder \ 36 | huggingface-hub \ 37 | Jinja2 \ 38 | librosa \ 39 | numpy \ 40 | scipy \ 41 | tensorboard \ 42 | transformers 43 | 44 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /diffusers/docker/diffusers-onnxruntime-cuda/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="diffusers" 4 | 5 | ENV DEBIAN_FRONTEND=noninteractive 6 | 7 | RUN apt update && \ 8 | apt install -y bash \ 9 | build-essential \ 10 | git \ 11 | git-lfs \ 12 | curl \ 13 | ca-certificates \ 14 | libsndfile1-dev \ 15 | python3.8 \ 16 | python3-pip \ 17 | python3.8-venv && \ 18 | rm -rf /var/lib/apt/lists 19 | 20 | # make sure to use venv 21 | RUN python3 -m venv /opt/venv 22 | ENV PATH="/opt/venv/bin:$PATH" 23 | 24 | # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) 25 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 26 | python3 -m pip install --no-cache-dir \ 27 | torch \ 28 | torchvision \ 29 | torchaudio \ 30 | "onnxruntime-gpu>=1.13.1" \ 31 | --extra-index-url https://download.pytorch.org/whl/cu117 && \ 32 | python3 -m pip install --no-cache-dir \ 33 | accelerate \ 34 | datasets \ 35 | hf-doc-builder \ 36 | huggingface-hub \ 37 | Jinja2 \ 38 | librosa \ 39 | numpy \ 40 | scipy \ 41 | tensorboard \ 42 | transformers 43 | 44 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /diffusers/docker/diffusers-pytorch-cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="diffusers" 4 | 5 | ENV DEBIAN_FRONTEND=noninteractive 6 | 7 | RUN apt update && \ 8 | apt install -y bash \ 9 | build-essential \ 10 | git \ 11 | git-lfs \ 12 | curl \ 13 | ca-certificates \ 14 | libsndfile1-dev \ 15 | python3.8 \ 16 | python3-pip \ 17 | python3.8-venv && \ 18 | rm -rf /var/lib/apt/lists 19 | 20 | # make sure to use venv 21 | RUN python3 -m venv /opt/venv 22 | ENV PATH="/opt/venv/bin:$PATH" 23 | 24 | # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) 25 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 26 | python3 -m pip install --no-cache-dir \ 27 | torch \ 28 | torchvision \ 29 | torchaudio \ 30 | --extra-index-url https://download.pytorch.org/whl/cpu && \ 31 | python3 -m pip install --no-cache-dir \ 32 | accelerate \ 33 | datasets \ 34 | hf-doc-builder \ 35 | huggingface-hub \ 36 | Jinja2 \ 37 | librosa \ 38 | numpy \ 39 | scipy \ 40 | tensorboard \ 41 | transformers 42 | 43 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /diffusers/docker/diffusers-pytorch-cuda/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04 2 | LABEL maintainer="Hugging Face" 3 | LABEL repository="diffusers" 4 | 5 | ENV DEBIAN_FRONTEND=noninteractive 6 | 7 | RUN apt update && \ 8 | apt install -y bash \ 9 | build-essential \ 10 | git \ 11 | git-lfs \ 12 | curl \ 13 | ca-certificates \ 14 | libsndfile1-dev \ 15 | python3.8 \ 16 | python3-pip \ 17 | python3.8-venv && \ 18 | rm -rf /var/lib/apt/lists 19 | 20 | # make sure to use venv 21 | RUN python3 -m venv /opt/venv 22 | ENV PATH="/opt/venv/bin:$PATH" 23 | 24 | # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py) 25 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \ 26 | python3 -m pip install --no-cache-dir \ 27 | torch \ 28 | torchvision \ 29 | torchaudio \ 30 | --extra-index-url https://download.pytorch.org/whl/cu117 && \ 31 | python3 -m pip install --no-cache-dir \ 32 | accelerate \ 33 | datasets \ 34 | hf-doc-builder \ 35 | huggingface-hub \ 36 | Jinja2 \ 37 | librosa \ 38 | numpy \ 39 | scipy \ 40 | tensorboard \ 41 | transformers 42 | 43 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Configuration 14 | 15 | In Diffusers, schedulers of type [`schedulers.scheduling_utils.SchedulerMixin`], and models of type [`ModelMixin`] inherit from [`ConfigMixin`] which conveniently takes care of storing all parameters that are 16 | passed to the respective `__init__` methods in a JSON-configuration file. 17 | 18 | ## ConfigMixin 19 | 20 | [[autodoc]] ConfigMixin 21 | - load_config 22 | - from_config 23 | - save_config 24 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/diffusion_pipeline.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Pipelines 14 | 15 | The [`DiffusionPipeline`] is the easiest way to load any pretrained diffusion pipeline from the [Hub](https://huggingface.co/models?library=diffusers) and to use it in inference. 16 | 17 | 18 | 19 | One should not use the Diffusion Pipeline class for training or fine-tuning a diffusion model. Individual 20 | components of diffusion pipelines are usually trained individually, so we suggest to directly work 21 | with [`UNetModel`] and [`UNetConditionModel`]. 22 | 23 | 24 | 25 | Any diffusion pipeline that is loaded with [`~DiffusionPipeline.from_pretrained`] will automatically 26 | detect the pipeline type, *e.g.* [`StableDiffusionPipeline`] and consequently load each component of the 27 | pipeline and pass them into the `__init__` function of the pipeline, *e.g.* [`~StableDiffusionPipeline.__init__`]. 28 | 29 | Any pipeline object can be saved locally with [`~DiffusionPipeline.save_pretrained`]. 30 | 31 | ## DiffusionPipeline 32 | [[autodoc]] DiffusionPipeline 33 | - all 34 | - __call__ 35 | - device 36 | - to 37 | - components 38 | 39 | ## ImagePipelineOutput 40 | By default diffusion pipelines return an object of class 41 | 42 | [[autodoc]] pipelines.ImagePipelineOutput 43 | 44 | ## AudioPipelineOutput 45 | By default diffusion pipelines return an object of class 46 | 47 | [[autodoc]] pipelines.AudioPipelineOutput 48 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/experimental/rl.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # TODO 14 | 15 | Coming soon! -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/loaders.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Loaders 14 | 15 | There are many ways to train adapter neural networks for diffusion models, such as 16 | - [Textual Inversion](./training/text_inversion.mdx) 17 | - [LoRA](https://github.com/cloneofsimo/lora) 18 | - [Hypernetworks](https://arxiv.org/abs/1609.09106) 19 | 20 | Such adapter neural networks often only consist of a fraction of the number of weights compared 21 | to the pretrained model and as such are very portable. The Diffusers library offers an easy-to-use 22 | API to load such adapter neural networks via the [`loaders.py` module](https://github.com/huggingface/diffusers/blob/main/src/diffusers/loaders.py). 23 | 24 | **Note**: This module is still highly experimental and prone to future changes. 25 | 26 | ## LoaderMixins 27 | 28 | ### UNet2DConditionLoadersMixin 29 | 30 | [[autodoc]] loaders.UNet2DConditionLoadersMixin 31 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/models.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Models 14 | 15 | Diffusers contains pretrained models for popular algorithms and modules for creating the next set of diffusion models. 16 | The primary function of these models is to denoise an input sample, by modeling the distribution $p_\theta(\mathbf{x}_{t-1}|\mathbf{x}_t)$. 17 | The models are built on the base class ['ModelMixin'] that is a `torch.nn.module` with basic functionality for saving and loading models both locally and from the HuggingFace hub. 18 | 19 | ## ModelMixin 20 | [[autodoc]] ModelMixin 21 | 22 | ## UNet2DOutput 23 | [[autodoc]] models.unet_2d.UNet2DOutput 24 | 25 | ## UNet2DModel 26 | [[autodoc]] UNet2DModel 27 | 28 | ## UNet1DOutput 29 | [[autodoc]] models.unet_1d.UNet1DOutput 30 | 31 | ## UNet1DModel 32 | [[autodoc]] UNet1DModel 33 | 34 | ## UNet2DConditionOutput 35 | [[autodoc]] models.unet_2d_condition.UNet2DConditionOutput 36 | 37 | ## UNet2DConditionModel 38 | [[autodoc]] UNet2DConditionModel 39 | 40 | ## DecoderOutput 41 | [[autodoc]] models.vae.DecoderOutput 42 | 43 | ## VQEncoderOutput 44 | [[autodoc]] models.vq_model.VQEncoderOutput 45 | 46 | ## VQModel 47 | [[autodoc]] VQModel 48 | 49 | ## AutoencoderKLOutput 50 | [[autodoc]] models.autoencoder_kl.AutoencoderKLOutput 51 | 52 | ## AutoencoderKL 53 | [[autodoc]] AutoencoderKL 54 | 55 | ## Transformer2DModel 56 | [[autodoc]] Transformer2DModel 57 | 58 | ## Transformer2DModelOutput 59 | [[autodoc]] models.transformer_2d.Transformer2DModelOutput 60 | 61 | ## PriorTransformer 62 | [[autodoc]] models.prior_transformer.PriorTransformer 63 | 64 | ## PriorTransformerOutput 65 | [[autodoc]] models.prior_transformer.PriorTransformerOutput 66 | 67 | ## ControlNetOutput 68 | [[autodoc]] models.controlnet.ControlNetOutput 69 | 70 | ## ControlNetModel 71 | [[autodoc]] ControlNetModel 72 | 73 | ## FlaxModelMixin 74 | [[autodoc]] FlaxModelMixin 75 | 76 | ## FlaxUNet2DConditionOutput 77 | [[autodoc]] models.unet_2d_condition_flax.FlaxUNet2DConditionOutput 78 | 79 | ## FlaxUNet2DConditionModel 80 | [[autodoc]] FlaxUNet2DConditionModel 81 | 82 | ## FlaxDecoderOutput 83 | [[autodoc]] models.vae_flax.FlaxDecoderOutput 84 | 85 | ## FlaxAutoencoderKLOutput 86 | [[autodoc]] models.vae_flax.FlaxAutoencoderKLOutput 87 | 88 | ## FlaxAutoencoderKL 89 | [[autodoc]] FlaxAutoencoderKL 90 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/outputs.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # BaseOutputs 14 | 15 | All models have outputs that are instances of subclasses of [`~utils.BaseOutput`]. Those are 16 | data structures containing all the information returned by the model, but that can also be used as tuples or 17 | dictionaries. 18 | 19 | Let's see how this looks in an example: 20 | 21 | ```python 22 | from diffusers import DDIMPipeline 23 | 24 | pipeline = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32") 25 | outputs = pipeline() 26 | ``` 27 | 28 | The `outputs` object is a [`~pipelines.ImagePipelineOutput`], as we can see in the 29 | documentation of that class below, it means it has an image attribute. 30 | 31 | You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you will get `None`: 32 | 33 | ```python 34 | outputs.images 35 | ``` 36 | 37 | or via keyword lookup 38 | 39 | ```python 40 | outputs["images"] 41 | ``` 42 | 43 | When considering our `outputs` object as tuple, it only considers the attributes that don't have `None` values. 44 | Here for instance, we could retrieve images via indexing: 45 | 46 | ```python 47 | outputs[:1] 48 | ``` 49 | 50 | which will return the tuple `(outputs.images)` for instance. 51 | 52 | ## BaseOutput 53 | 54 | [[autodoc]] utils.BaseOutput 55 | - to_tuple 56 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/dance_diffusion.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Dance Diffusion 14 | 15 | ## Overview 16 | 17 | [Dance Diffusion](https://github.com/Harmonai-org/sample-generator) by Zach Evans. 18 | 19 | Dance Diffusion is the first in a suite of generative audio tools for producers and musicians to be released by Harmonai. 20 | For more info or to get involved in the development of these tools, please visit https://harmonai.org and fill out the form on the front page. 21 | 22 | The original codebase of this implementation can be found [here](https://github.com/Harmonai-org/sample-generator). 23 | 24 | ## Available Pipelines: 25 | 26 | | Pipeline | Tasks | Colab 27 | |---|---|:---:| 28 | | [pipeline_dance_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py) | *Unconditional Audio Generation* | - | 29 | 30 | 31 | ## DanceDiffusionPipeline 32 | [[autodoc]] DanceDiffusionPipeline 33 | - all 34 | - __call__ 35 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/ddim.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # DDIM 14 | 15 | ## Overview 16 | 17 | [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) (DDIM) by Jiaming Song, Chenlin Meng and Stefano Ermon. 18 | 19 | The abstract of the paper is the following: 20 | 21 | Denoising diffusion probabilistic models (DDPMs) have achieved high quality image generation without adversarial training, yet they require simulating a Markov chain for many steps to produce a sample. To accelerate sampling, we present denoising diffusion implicit models (DDIMs), a more efficient class of iterative implicit probabilistic models with the same training procedure as DDPMs. In DDPMs, the generative process is defined as the reverse of a Markovian diffusion process. We construct a class of non-Markovian diffusion processes that lead to the same training objective, but whose reverse process can be much faster to sample from. We empirically demonstrate that DDIMs can produce high quality samples 10× to 50× faster in terms of wall-clock time compared to DDPMs, allow us to trade off computation for sample quality, and can perform semantically meaningful image interpolation directly in the latent space. 22 | 23 | The original codebase of this paper can be found here: [ermongroup/ddim](https://github.com/ermongroup/ddim). 24 | For questions, feel free to contact the author on [tsong.me](https://tsong.me/). 25 | 26 | ## Available Pipelines: 27 | 28 | | Pipeline | Tasks | Colab 29 | |---|---|:---:| 30 | | [pipeline_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/ddim/pipeline_ddim.py) | *Unconditional Image Generation* | - | 31 | 32 | 33 | ## DDIMPipeline 34 | [[autodoc]] DDIMPipeline 35 | - all 36 | - __call__ 37 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/ddpm.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # DDPM 14 | 15 | ## Overview 16 | 17 | [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) 18 | (DDPM) by Jonathan Ho, Ajay Jain and Pieter Abbeel proposes the diffusion based model of the same name, but in the context of the 🤗 Diffusers library, DDPM refers to the discrete denoising scheduler from the paper as well as the pipeline. 19 | 20 | The abstract of the paper is the following: 21 | 22 | We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional CIFAR10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art FID score of 3.17. On 256x256 LSUN, we obtain sample quality similar to ProgressiveGAN. 23 | 24 | The original codebase of this paper can be found [here](https://github.com/hojonathanho/diffusion). 25 | 26 | 27 | ## Available Pipelines: 28 | 29 | | Pipeline | Tasks | Colab 30 | |---|---|:---:| 31 | | [pipeline_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/ddpm/pipeline_ddpm.py) | *Unconditional Image Generation* | - | 32 | 33 | 34 | # DDPMPipeline 35 | [[autodoc]] DDPMPipeline 36 | - all 37 | - __call__ 38 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/dit.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Scalable Diffusion Models with Transformers (DiT) 14 | 15 | ## Overview 16 | 17 | [Scalable Diffusion Models with Transformers](https://arxiv.org/abs/2212.09748) (DiT) by William Peebles and Saining Xie. 18 | 19 | The abstract of the paper is the following: 20 | 21 | *We explore a new class of diffusion models based on the transformer architecture. We train latent diffusion models of images, replacing the commonly-used U-Net backbone with a transformer that operates on latent patches. We analyze the scalability of our Diffusion Transformers (DiTs) through the lens of forward pass complexity as measured by Gflops. We find that DiTs with higher Gflops -- through increased transformer depth/width or increased number of input tokens -- consistently have lower FID. In addition to possessing good scalability properties, our largest DiT-XL/2 models outperform all prior diffusion models on the class-conditional ImageNet 512x512 and 256x256 benchmarks, achieving a state-of-the-art FID of 2.27 on the latter.* 22 | 23 | The original codebase of this paper can be found here: [facebookresearch/dit](https://github.com/facebookresearch/dit). 24 | 25 | ## Available Pipelines: 26 | 27 | | Pipeline | Tasks | Colab 28 | |---|---|:---:| 29 | | [pipeline_dit.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/dit/pipeline_dit.py) | *Conditional Image Generation* | - | 30 | 31 | 32 | ## Usage example 33 | 34 | ```python 35 | from diffusers import DiTPipeline, DPMSolverMultistepScheduler 36 | import torch 37 | 38 | pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-256", torch_dtype=torch.float16) 39 | pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) 40 | pipe = pipe.to("cuda") 41 | 42 | # pick words from Imagenet class labels 43 | pipe.labels # to print all available words 44 | 45 | # pick words that exist in ImageNet 46 | words = ["white shark", "umbrella"] 47 | 48 | class_ids = pipe.get_label_ids(words) 49 | 50 | generator = torch.manual_seed(33) 51 | output = pipe(class_labels=class_ids, num_inference_steps=25, generator=generator) 52 | 53 | image = output.images[0] # label 'white shark' 54 | ``` 55 | 56 | ## DiTPipeline 57 | [[autodoc]] DiTPipeline 58 | - all 59 | - __call__ 60 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/latent_diffusion_uncond.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Unconditional Latent Diffusion 14 | 15 | ## Overview 16 | 17 | Unconditional Latent Diffusion was proposed in [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer. 18 | 19 | The abstract of the paper is the following: 20 | 21 | *By decomposing the image formation process into a sequential application of denoising autoencoders, diffusion models (DMs) achieve state-of-the-art synthesis results on image data and beyond. Additionally, their formulation allows for a guiding mechanism to control the image generation process without retraining. However, since these models typically operate directly in pixel space, optimization of powerful DMs often consumes hundreds of GPU days and inference is expensive due to sequential evaluations. To enable DM training on limited computational resources while retaining their quality and flexibility, we apply them in the latent space of powerful pretrained autoencoders. In contrast to previous work, training diffusion models on such a representation allows for the first time to reach a near-optimal point between complexity reduction and detail preservation, greatly boosting visual fidelity. By introducing cross-attention layers into the model architecture, we turn diffusion models into powerful and flexible generators for general conditioning inputs such as text or bounding boxes and high-resolution synthesis becomes possible in a convolutional manner. Our latent diffusion models (LDMs) achieve a new state of the art for image inpainting and highly competitive performance on various tasks, including unconditional image generation, semantic scene synthesis, and super-resolution, while significantly reducing computational requirements compared to pixel-based DMs.* 22 | 23 | The original codebase can be found [here](https://github.com/CompVis/latent-diffusion). 24 | 25 | ## Tips: 26 | 27 | - 28 | - 29 | - 30 | 31 | ## Available Pipelines: 32 | 33 | | Pipeline | Tasks | Colab 34 | |---|---|:---:| 35 | | [pipeline_latent_diffusion_uncond.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py) | *Unconditional Image Generation* | - | 36 | 37 | ## Examples: 38 | 39 | ## LDMPipeline 40 | [[autodoc]] LDMPipeline 41 | - all 42 | - __call__ 43 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/pndm.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # PNDM 14 | 15 | ## Overview 16 | 17 | [Pseudo Numerical methods for Diffusion Models on manifolds](https://arxiv.org/abs/2202.09778) (PNDM) by Luping Liu, Yi Ren, Zhijie Lin and Zhou Zhao. 18 | 19 | The abstract of the paper is the following: 20 | 21 | Denoising Diffusion Probabilistic Models (DDPMs) can generate high-quality samples such as image and audio samples. However, DDPMs require hundreds to thousands of iterations to produce final samples. Several prior works have successfully accelerated DDPMs through adjusting the variance schedule (e.g., Improved Denoising Diffusion Probabilistic Models) or the denoising equation (e.g., Denoising Diffusion Implicit Models (DDIMs)). However, these acceleration methods cannot maintain the quality of samples and even introduce new noise at a high speedup rate, which limit their practicability. To accelerate the inference process while keeping the sample quality, we provide a fresh perspective that DDPMs should be treated as solving differential equations on manifolds. Under such a perspective, we propose pseudo numerical methods for diffusion models (PNDMs). Specifically, we figure out how to solve differential equations on manifolds and show that DDIMs are simple cases of pseudo numerical methods. We change several classical numerical methods to corresponding pseudo numerical methods and find that the pseudo linear multi-step method is the best in most situations. According to our experiments, by directly using pre-trained models on Cifar10, CelebA and LSUN, PNDMs can generate higher quality synthetic images with only 50 steps compared with 1000-step DDIMs (20x speedup), significantly outperform DDIMs with 250 steps (by around 0.4 in FID) and have good generalization on different variance schedules. 22 | 23 | The original codebase can be found [here](https://github.com/luping-liu/PNDM). 24 | 25 | ## Available Pipelines: 26 | 27 | | Pipeline | Tasks | Colab 28 | |---|---|:---:| 29 | | [pipeline_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pndm/pipeline_pndm.py) | *Unconditional Image Generation* | - | 30 | 31 | 32 | ## PNDMPipeline 33 | [[autodoc]] PNDMPipeline 34 | - all 35 | - __call__ 36 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/stable_diffusion/depth2img.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Depth-to-Image Generation 14 | 15 | ## StableDiffusionDepth2ImgPipeline 16 | 17 | The depth-guided stable diffusion model was created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/), and [LAION](https://laion.ai/), as part of Stable Diffusion 2.0. It uses [MiDas](https://github.com/isl-org/MiDaS) to infer depth based on an image. 18 | 19 | [`StableDiffusionDepth2ImgPipeline`] lets you pass a text prompt and an initial image to condition the generation of new images as well as a `depth_map` to preserve the images’ structure. 20 | 21 | The original codebase can be found here: 22 | - *Stable Diffusion v2*: [Stability-AI/stablediffusion](https://github.com/Stability-AI/stablediffusion#depth-conditional-stable-diffusion) 23 | 24 | Available Checkpoints are: 25 | - *stable-diffusion-2-depth*: [stabilityai/stable-diffusion-2-depth](https://huggingface.co/stabilityai/stable-diffusion-2-depth) 26 | 27 | [[autodoc]] StableDiffusionDepth2ImgPipeline 28 | - all 29 | - __call__ 30 | - enable_attention_slicing 31 | - disable_attention_slicing 32 | - enable_xformers_memory_efficient_attention 33 | - disable_xformers_memory_efficient_attention -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/stable_diffusion/image_variation.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Image Variation 14 | 15 | ## StableDiffusionImageVariationPipeline 16 | 17 | [`StableDiffusionImageVariationPipeline`] lets you generate variations from an input image using Stable Diffusion. It uses a fine-tuned version of Stable Diffusion model, trained by [Justin Pinkney](https://www.justinpinkney.com/) (@Buntworthy) at [Lambda](https://lambdalabs.com/) 18 | 19 | The original codebase can be found here: 20 | [Stable Diffusion Image Variations](https://github.com/LambdaLabsML/lambda-diffusers#stable-diffusion-image-variations) 21 | 22 | Available Checkpoints are: 23 | - *sd-image-variations-diffusers*: [lambdalabs/sd-image-variations-diffusers](https://huggingface.co/lambdalabs/sd-image-variations-diffusers) 24 | 25 | [[autodoc]] StableDiffusionImageVariationPipeline 26 | - all 27 | - __call__ 28 | - enable_attention_slicing 29 | - disable_attention_slicing 30 | - enable_xformers_memory_efficient_attention 31 | - disable_xformers_memory_efficient_attention -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/stable_diffusion/img2img.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Image-to-Image Generation 14 | 15 | ## StableDiffusionImg2ImgPipeline 16 | 17 | The Stable Diffusion model was created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/), [runway](https://github.com/runwayml), and [LAION](https://laion.ai/). The [`StableDiffusionImg2ImgPipeline`] lets you pass a text prompt and an initial image to condition the generation of new images using Stable Diffusion. 18 | 19 | The original codebase can be found here: [CampVis/stable-diffusion](https://github.com/CompVis/stable-diffusion/blob/main/scripts/img2img.py) 20 | 21 | [`StableDiffusionImg2ImgPipeline`] is compatible with all Stable Diffusion checkpoints for [Text-to-Image](./text2img) 22 | 23 | The pipeline uses the diffusion-denoising mechanism proposed by SDEdit ([SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations](https://arxiv.org/abs/2108.01073) 24 | proposed by Chenlin Meng, Yutong He, Yang Song, Jiaming Song, Jiajun Wu, Jun-Yan Zhu, Stefano Ermon). 25 | 26 | [[autodoc]] StableDiffusionImg2ImgPipeline 27 | - all 28 | - __call__ 29 | - enable_attention_slicing 30 | - disable_attention_slicing 31 | - enable_xformers_memory_efficient_attention 32 | - disable_xformers_memory_efficient_attention -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/stable_diffusion/inpaint.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Text-Guided Image Inpainting 14 | 15 | ## StableDiffusionInpaintPipeline 16 | 17 | The Stable Diffusion model was created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/), [runway](https://github.com/runwayml), and [LAION](https://laion.ai/). The [`StableDiffusionInpaintPipeline`] lets you edit specific parts of an image by providing a mask and a text prompt using Stable Diffusion. 18 | 19 | The original codebase can be found here: 20 | - *Stable Diffusion V1*: [CampVis/stable-diffusion](https://github.com/runwayml/stable-diffusion#inpainting-with-stable-diffusion) 21 | - *Stable Diffusion V2*: [Stability-AI/stablediffusion](https://github.com/Stability-AI/stablediffusion#image-inpainting-with-stable-diffusion) 22 | 23 | Available checkpoints are: 24 | - *stable-diffusion-inpainting (512x512 resolution)*: [runwayml/stable-diffusion-inpainting](https://huggingface.co/runwayml/stable-diffusion-inpainting) 25 | - *stable-diffusion-2-inpainting (512x512 resolution)*: [stabilityai/stable-diffusion-2-inpainting](https://huggingface.co/stabilityai/stable-diffusion-2-inpainting) 26 | 27 | [[autodoc]] StableDiffusionInpaintPipeline 28 | - all 29 | - __call__ 30 | - enable_attention_slicing 31 | - disable_attention_slicing 32 | - enable_xformers_memory_efficient_attention 33 | - disable_xformers_memory_efficient_attention -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/stable_diffusion/latent_upscale.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Stable Diffusion Latent Upscaler 14 | 15 | ## StableDiffusionLatentUpscalePipeline 16 | 17 | The Stable Diffusion Latent Upscaler model was created by [Katherine Crowson](https://github.com/crowsonkb/k-diffusion) in collaboration with [Stability AI](https://stability.ai/). It can be used on top of any [`StableDiffusionUpscalePipeline`] checkpoint to enhance its output image resolution by a factor of 2. 18 | 19 | A notebook that demonstrates the original implementation can be found here: 20 | - [Stable Diffusion Upscaler Demo](https://colab.research.google.com/drive/1o1qYJcFeywzCIdkfKJy7cTpgZTCM2EI4) 21 | 22 | Available Checkpoints are: 23 | - *stabilityai/latent-upscaler*: [stabilityai/sd-x2-latent-upscaler](https://huggingface.co/stabilityai/sd-x2-latent-upscaler) 24 | 25 | 26 | [[autodoc]] StableDiffusionLatentUpscalePipeline 27 | - all 28 | - __call__ 29 | - enable_sequential_cpu_offload 30 | - enable_attention_slicing 31 | - disable_attention_slicing 32 | - enable_xformers_memory_efficient_attention 33 | - disable_xformers_memory_efficient_attention -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Text-to-Image Generation 14 | 15 | ## StableDiffusionPipeline 16 | 17 | The Stable Diffusion model was created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/), [runway](https://github.com/runwayml), and [LAION](https://laion.ai/). The [`StableDiffusionPipeline`] is capable of generating photo-realistic images given any text input using Stable Diffusion. 18 | 19 | The original codebase can be found here: 20 | - *Stable Diffusion V1*: [CompVis/stable-diffusion](https://github.com/CompVis/stable-diffusion) 21 | - *Stable Diffusion v2*: [Stability-AI/stablediffusion](https://github.com/Stability-AI/stablediffusion) 22 | 23 | Available Checkpoints are: 24 | - *stable-diffusion-v1-4 (512x512 resolution)* [CompVis/stable-diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4) 25 | - *stable-diffusion-v1-5 (512x512 resolution)* [runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5) 26 | - *stable-diffusion-2-base (512x512 resolution)*: [stabilityai/stable-diffusion-2-base](https://huggingface.co/stabilityai/stable-diffusion-2-base) 27 | - *stable-diffusion-2 (768x768 resolution)*: [stabilityai/stable-diffusion-2](https://huggingface.co/stabilityai/stable-diffusion-2) 28 | - *stable-diffusion-2-1-base (512x512 resolution)* [stabilityai/stable-diffusion-2-1-base](https://huggingface.co/stabilityai/stable-diffusion-2-1-base) 29 | - *stable-diffusion-2-1 (768x768 resolution)*: [stabilityai/stable-diffusion-2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1) 30 | 31 | [[autodoc]] StableDiffusionPipeline 32 | - all 33 | - __call__ 34 | - enable_attention_slicing 35 | - disable_attention_slicing 36 | - enable_vae_slicing 37 | - disable_vae_slicing 38 | - enable_xformers_memory_efficient_attention 39 | - disable_xformers_memory_efficient_attention 40 | - enable_vae_tiling 41 | - disable_vae_tiling 42 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/stable_diffusion/upscale.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Super-Resolution 14 | 15 | ## StableDiffusionUpscalePipeline 16 | 17 | The upscaler diffusion model was created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/), and [LAION](https://laion.ai/), as part of Stable Diffusion 2.0. [`StableDiffusionUpscalePipeline`] can be used to enhance the resolution of input images by a factor of 4. 18 | 19 | The original codebase can be found here: 20 | - *Stable Diffusion v2*: [Stability-AI/stablediffusion](https://github.com/Stability-AI/stablediffusion#image-upscaling-with-stable-diffusion) 21 | 22 | Available Checkpoints are: 23 | - *stabilityai/stable-diffusion-x4-upscaler (x4 resolution resolution)*: [stable-diffusion-x4-upscaler](https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler) 24 | 25 | 26 | [[autodoc]] StableDiffusionUpscalePipeline 27 | - all 28 | - __call__ 29 | - enable_attention_slicing 30 | - disable_attention_slicing 31 | - enable_xformers_memory_efficient_attention 32 | - disable_xformers_memory_efficient_attention -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/stochastic_karras_ve.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Stochastic Karras VE 14 | 15 | ## Overview 16 | 17 | [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364) by Tero Karras, Miika Aittala, Timo Aila and Samuli Laine. 18 | 19 | The abstract of the paper is the following: 20 | 21 | We argue that the theory and practice of diffusion-based generative models are currently unnecessarily convoluted and seek to remedy the situation by presenting a design space that clearly separates the concrete design choices. This lets us identify several changes to both the sampling and training processes, as well as preconditioning of the score networks. Together, our improvements yield new state-of-the-art FID of 1.79 for CIFAR-10 in a class-conditional setting and 1.97 in an unconditional setting, with much faster sampling (35 network evaluations per image) than prior designs. To further demonstrate their modular nature, we show that our design changes dramatically improve both the efficiency and quality obtainable with pre-trained score networks from previous work, including improving the FID of an existing ImageNet-64 model from 2.07 to near-SOTA 1.55. 22 | 23 | This pipeline implements the Stochastic sampling tailored to the Variance-Expanding (VE) models. 24 | 25 | 26 | ## Available Pipelines: 27 | 28 | | Pipeline | Tasks | Colab 29 | |---|---|:---:| 30 | | [pipeline_stochastic_karras_ve.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py) | *Unconditional Image Generation* | - | 31 | 32 | 33 | ## KarrasVePipeline 34 | [[autodoc]] KarrasVePipeline 35 | - all 36 | - __call__ 37 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/unclip.mdx: -------------------------------------------------------------------------------- 1 | 9 | 10 | # unCLIP 11 | 12 | ## Overview 13 | 14 | [Hierarchical Text-Conditional Image Generation with CLIP Latents](https://arxiv.org/abs/2204.06125) by Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, Mark Chen 15 | 16 | The abstract of the paper is the following: 17 | 18 | Contrastive models like CLIP have been shown to learn robust representations of images that capture both semantics and style. To leverage these representations for image generation, we propose a two-stage model: a prior that generates a CLIP image embedding given a text caption, and a decoder that generates an image conditioned on the image embedding. We show that explicitly generating image representations improves image diversity with minimal loss in photorealism and caption similarity. Our decoders conditioned on image representations can also produce variations of an image that preserve both its semantics and style, while varying the non-essential details absent from the image representation. Moreover, the joint embedding space of CLIP enables language-guided image manipulations in a zero-shot fashion. We use diffusion models for the decoder and experiment with both autoregressive and diffusion models for the prior, finding that the latter are computationally more efficient and produce higher-quality samples. 19 | 20 | The unCLIP model in diffusers comes from kakaobrain's karlo and the original codebase can be found [here](https://github.com/kakaobrain/karlo). Additionally, lucidrains has a DALL-E 2 recreation [here](https://github.com/lucidrains/DALLE2-pytorch). 21 | 22 | ## Available Pipelines: 23 | 24 | | Pipeline | Tasks | Colab 25 | |---|---|:---:| 26 | | [pipeline_unclip.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/unclip/pipeline_unclip.py) | *Text-to-Image Generation* | - | 27 | | [pipeline_unclip_image_variation.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/unclip/pipeline_unclip_image_variation.py) | *Image-Guided Image Generation* | - | 28 | 29 | 30 | ## UnCLIPPipeline 31 | [[autodoc]] UnCLIPPipeline 32 | - all 33 | - __call__ 34 | 35 | [[autodoc]] UnCLIPImageVariationPipeline 36 | - all 37 | - __call__ 38 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/pipelines/vq_diffusion.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # VQDiffusion 14 | 15 | ## Overview 16 | 17 | [Vector Quantized Diffusion Model for Text-to-Image Synthesis](https://arxiv.org/abs/2111.14822) by Shuyang Gu, Dong Chen, Jianmin Bao, Fang Wen, Bo Zhang, Dongdong Chen, Lu Yuan, Baining Guo 18 | 19 | The abstract of the paper is the following: 20 | 21 | We present the vector quantized diffusion (VQ-Diffusion) model for text-to-image generation. This method is based on a vector quantized variational autoencoder (VQ-VAE) whose latent space is modeled by a conditional variant of the recently developed Denoising Diffusion Probabilistic Model (DDPM). We find that this latent-space method is well-suited for text-to-image generation tasks because it not only eliminates the unidirectional bias with existing methods but also allows us to incorporate a mask-and-replace diffusion strategy to avoid the accumulation of errors, which is a serious problem with existing methods. Our experiments show that the VQ-Diffusion produces significantly better text-to-image generation results when compared with conventional autoregressive (AR) models with similar numbers of parameters. Compared with previous GAN-based text-to-image methods, our VQ-Diffusion can handle more complex scenes and improve the synthesized image quality by a large margin. Finally, we show that the image generation computation in our method can be made highly efficient by reparameterization. With traditional AR methods, the text-to-image generation time increases linearly with the output image resolution and hence is quite time consuming even for normal size images. The VQ-Diffusion allows us to achieve a better trade-off between quality and speed. Our experiments indicate that the VQ-Diffusion model with the reparameterization is fifteen times faster than traditional AR methods while achieving a better image quality. 22 | 23 | The original codebase can be found [here](https://github.com/microsoft/VQ-Diffusion). 24 | 25 | ## Available Pipelines: 26 | 27 | | Pipeline | Tasks | Colab 28 | |---|---|:---:| 29 | | [pipeline_vq_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/vq_diffusion/pipeline_vq_diffusion.py) | *Text-to-Image Generation* | - | 30 | 31 | 32 | ## VQDiffusionPipeline 33 | [[autodoc]] VQDiffusionPipeline 34 | - all 35 | - __call__ 36 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/ddim.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Denoising diffusion implicit models (DDIM) 14 | 15 | ## Overview 16 | 17 | [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) (DDIM) by Jiaming Song, Chenlin Meng and Stefano Ermon. 18 | 19 | The abstract of the paper is the following: 20 | 21 | Denoising diffusion probabilistic models (DDPMs) have achieved high quality image generation without adversarial training, yet they require simulating a Markov chain for many steps to produce a sample. To accelerate sampling, we present denoising diffusion implicit models (DDIMs), a more efficient class of iterative implicit probabilistic models with the same training procedure as DDPMs. In DDPMs, the generative process is defined as the reverse of a Markovian diffusion process. We construct a class of non-Markovian diffusion processes that lead to the same training objective, but whose reverse process can be much faster to sample from. We empirically demonstrate that DDIMs can produce high quality samples 10× to 50× faster in terms of wall-clock time compared to DDPMs, allow us to trade off computation for sample quality, and can perform semantically meaningful image interpolation directly in the latent space. 22 | 23 | The original codebase of this paper can be found here: [ermongroup/ddim](https://github.com/ermongroup/ddim). 24 | For questions, feel free to contact the author on [tsong.me](https://tsong.me/). 25 | 26 | ## DDIMScheduler 27 | [[autodoc]] DDIMScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/ddim_inverse.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Inverse Denoising Diffusion Implicit Models (DDIMInverse) 14 | 15 | ## Overview 16 | 17 | This scheduler is the inverted scheduler of [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) (DDIM) by Jiaming Song, Chenlin Meng and Stefano Ermon. 18 | The implementation is mostly based on the DDIM inversion definition of [Null-text Inversion for Editing Real Images using Guided Diffusion Models](https://arxiv.org/pdf/2211.09794.pdf) 19 | 20 | ## DDIMInverseScheduler 21 | [[autodoc]] DDIMInverseScheduler 22 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/ddpm.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Denoising diffusion probabilistic models (DDPM) 14 | 15 | ## Overview 16 | 17 | [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) 18 | (DDPM) by Jonathan Ho, Ajay Jain and Pieter Abbeel proposes the diffusion based model of the same name, but in the context of the 🤗 Diffusers library, DDPM refers to the discrete denoising scheduler from the paper as well as the pipeline. 19 | 20 | The abstract of the paper is the following: 21 | 22 | We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional CIFAR10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art FID score of 3.17. On 256x256 LSUN, we obtain sample quality similar to ProgressiveGAN. 23 | 24 | The original paper can be found [here](https://arxiv.org/abs/2010.02502). 25 | 26 | ## DDPMScheduler 27 | [[autodoc]] DDPMScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/deis.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # DEIS 14 | 15 | Fast Sampling of Diffusion Models with Exponential Integrator. 16 | 17 | ## Overview 18 | 19 | Original paper can be found [here](https://arxiv.org/abs/2204.13902). The original implementation can be found [here](https://github.com/qsh-zh/deis). 20 | 21 | ## DEISMultistepScheduler 22 | [[autodoc]] DEISMultistepScheduler 23 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/dpm_discrete.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # DPM Discrete Scheduler inspired by Karras et. al paper 14 | 15 | ## Overview 16 | 17 | Inspired by [Karras et. al](https://arxiv.org/abs/2206.00364). Scheduler ported from @crowsonkb's https://github.com/crowsonkb/k-diffusion library: 18 | 19 | All credit for making this scheduler work goes to [Katherine Crowson](https://github.com/crowsonkb/) 20 | 21 | ## KDPM2DiscreteScheduler 22 | [[autodoc]] KDPM2DiscreteScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/dpm_discrete_ancestral.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # DPM Discrete Scheduler with ancestral sampling inspired by Karras et. al paper 14 | 15 | ## Overview 16 | 17 | Inspired by [Karras et. al](https://arxiv.org/abs/2206.00364). Scheduler ported from @crowsonkb's https://github.com/crowsonkb/k-diffusion library: 18 | 19 | All credit for making this scheduler work goes to [Katherine Crowson](https://github.com/crowsonkb/) 20 | 21 | ## KDPM2AncestralDiscreteScheduler 22 | [[autodoc]] KDPM2AncestralDiscreteScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/euler.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Euler scheduler 14 | 15 | ## Overview 16 | 17 | Euler scheduler (Algorithm 2) from the paper [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364) by Karras et al. (2022). Based on the original [k-diffusion](https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L51) implementation by Katherine Crowson. 18 | Fast scheduler which often times generates good outputs with 20-30 steps. 19 | 20 | ## EulerDiscreteScheduler 21 | [[autodoc]] EulerDiscreteScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/euler_ancestral.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Euler Ancestral scheduler 14 | 15 | ## Overview 16 | 17 | Ancestral sampling with Euler method steps. Based on the original (k-diffusion)[https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L72] implementation by Katherine Crowson. 18 | Fast scheduler which often times generates good outputs with 20-30 steps. 19 | 20 | ## EulerAncestralDiscreteScheduler 21 | [[autodoc]] EulerAncestralDiscreteScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/heun.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Heun scheduler inspired by Karras et. al paper 14 | 15 | ## Overview 16 | 17 | Algorithm 1 of [Karras et. al](https://arxiv.org/abs/2206.00364). 18 | Scheduler ported from @crowsonkb's https://github.com/crowsonkb/k-diffusion library: 19 | 20 | All credit for making this scheduler work goes to [Katherine Crowson](https://github.com/crowsonkb/) 21 | 22 | ## HeunDiscreteScheduler 23 | [[autodoc]] HeunDiscreteScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/ipndm.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # improved pseudo numerical methods for diffusion models (iPNDM) 14 | 15 | ## Overview 16 | 17 | Original implementation can be found [here](https://github.com/crowsonkb/v-diffusion-pytorch/blob/987f8985e38208345c1959b0ea767a625831cc9b/diffusion/sampling.py#L296). 18 | 19 | ## IPNDMScheduler 20 | [[autodoc]] IPNDMScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/lms_discrete.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Linear multistep scheduler for discrete beta schedules 14 | 15 | ## Overview 16 | 17 | Original implementation can be found [here](https://arxiv.org/abs/2206.00364). 18 | 19 | ## LMSDiscreteScheduler 20 | [[autodoc]] LMSDiscreteScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/multistep_dpm_solver.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Multistep DPM-Solver 14 | 15 | ## Overview 16 | 17 | Original paper can be found [here](https://arxiv.org/abs/2206.00927) and the [improved version](https://arxiv.org/abs/2211.01095). The original implementation can be found [here](https://github.com/LuChengTHU/dpm-solver). 18 | 19 | ## DPMSolverMultistepScheduler 20 | [[autodoc]] DPMSolverMultistepScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/pndm.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Pseudo numerical methods for diffusion models (PNDM) 14 | 15 | ## Overview 16 | 17 | Original implementation can be found [here](https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L181). 18 | 19 | ## PNDMScheduler 20 | [[autodoc]] PNDMScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/repaint.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # RePaint scheduler 14 | 15 | ## Overview 16 | 17 | DDPM-based inpainting scheduler for unsupervised inpainting with extreme masks. 18 | Intended for use with [`RePaintPipeline`]. 19 | Based on the paper [RePaint: Inpainting using Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2201.09865) 20 | and the original implementation by Andreas Lugmayr et al.: https://github.com/andreas128/RePaint 21 | 22 | ## RePaintScheduler 23 | [[autodoc]] RePaintScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/score_sde_ve.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # variance exploding stochastic differential equation (VE-SDE) scheduler 14 | 15 | ## Overview 16 | 17 | Original paper can be found [here](https://arxiv.org/abs/2011.13456). 18 | 19 | ## ScoreSdeVeScheduler 20 | [[autodoc]] ScoreSdeVeScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/score_sde_vp.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Variance preserving stochastic differential equation (VP-SDE) scheduler 14 | 15 | ## Overview 16 | 17 | Original paper can be found [here](https://arxiv.org/abs/2011.13456). 18 | 19 | 20 | 21 | Score SDE-VP is under construction. 22 | 23 | 24 | 25 | ## ScoreSdeVpScheduler 26 | [[autodoc]] schedulers.scheduling_sde_vp.ScoreSdeVpScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/singlestep_dpm_solver.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Singlestep DPM-Solver 14 | 15 | ## Overview 16 | 17 | Original paper can be found [here](https://arxiv.org/abs/2206.00927) and the [improved version](https://arxiv.org/abs/2211.01095). The original implementation can be found [here](https://github.com/LuChengTHU/dpm-solver). 18 | 19 | ## DPMSolverSinglestepScheduler 20 | [[autodoc]] DPMSolverSinglestepScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/stochastic_karras_ve.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Variance exploding, stochastic sampling from Karras et. al 14 | 15 | ## Overview 16 | 17 | Original paper can be found [here](https://arxiv.org/abs/2206.00364). 18 | 19 | ## KarrasVeScheduler 20 | [[autodoc]] KarrasVeScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/unipc.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # UniPC 14 | 15 | ## Overview 16 | 17 | UniPC is a training-free framework designed for the fast sampling of diffusion models, which consists of a corrector (UniC) and a predictor (UniP) that share a unified analytical form and support arbitrary orders. 18 | 19 | For more details about the method, please refer to the [[paper]](https://arxiv.org/abs/2302.04867) and the [[code]](https://github.com/wl-zhao/UniPC). 20 | 21 | Fast Sampling of Diffusion Models with Exponential Integrator. 22 | 23 | ## UniPCMultistepScheduler 24 | [[autodoc]] UniPCMultistepScheduler 25 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/api/schedulers/vq_diffusion.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # VQDiffusionScheduler 14 | 15 | ## Overview 16 | 17 | Original paper can be found [here](https://arxiv.org/abs/2111.14822) 18 | 19 | ## VQDiffusionScheduler 20 | [[autodoc]] VQDiffusionScheduler -------------------------------------------------------------------------------- /diffusers/docs/source/en/imgs/access_request.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/docs/source/en/imgs/access_request.png -------------------------------------------------------------------------------- /diffusers/docs/source/en/imgs/diffusers_library.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/docs/source/en/imgs/diffusers_library.jpg -------------------------------------------------------------------------------- /diffusers/docs/source/en/optimization/onnx.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | 14 | # How to use the ONNX Runtime for inference 15 | 16 | 🤗 Diffusers provides a Stable Diffusion pipeline compatible with the ONNX Runtime. This allows you to run Stable Diffusion on any hardware that supports ONNX (including CPUs), and where an accelerated version of PyTorch is not available. 17 | 18 | ## Installation 19 | 20 | - TODO 21 | 22 | ## Stable Diffusion Inference 23 | 24 | The snippet below demonstrates how to use the ONNX runtime. You need to use `OnnxStableDiffusionPipeline` instead of `StableDiffusionPipeline`. You also need to download the weights from the `onnx` branch of the repository, and indicate the runtime provider you want to use. 25 | 26 | ```python 27 | # make sure you're logged in with `huggingface-cli login` 28 | from diffusers import OnnxStableDiffusionPipeline 29 | 30 | pipe = OnnxStableDiffusionPipeline.from_pretrained( 31 | "runwayml/stable-diffusion-v1-5", 32 | revision="onnx", 33 | provider="CUDAExecutionProvider", 34 | ) 35 | 36 | prompt = "a photo of an astronaut riding a horse on mars" 37 | image = pipe(prompt).images[0] 38 | ``` 39 | 40 | The snippet below demonstrates how to use the ONNX runtime with the Stable Diffusion upscaling pipeline. 41 | 42 | ```python 43 | from diffusers import OnnxStableDiffusionPipeline, OnnxStableDiffusionUpscalePipeline 44 | 45 | prompt = "a photo of an astronaut riding a horse on mars" 46 | steps = 50 47 | 48 | txt2img = OnnxStableDiffusionPipeline.from_pretrained( 49 | "runwayml/stable-diffusion-v1-5", 50 | revision="onnx", 51 | provider="CUDAExecutionProvider", 52 | ) 53 | small_image = txt2img( 54 | prompt, 55 | num_inference_steps=steps, 56 | ).images[0] 57 | 58 | generator = torch.manual_seed(0) 59 | upscale = OnnxStableDiffusionUpscalePipeline.from_pretrained( 60 | "ssube/stable-diffusion-x4-upscaler-onnx", 61 | provider="CUDAExecutionProvider", 62 | ) 63 | large_image = upscale( 64 | prompt, 65 | small_image, 66 | generator=generator, 67 | num_inference_steps=steps, 68 | ).images[0] 69 | ``` 70 | 71 | ## Known Issues 72 | 73 | - Generating multiple prompts in a batch seems to take too much memory. While we look into it, you may need to iterate instead of batching. 74 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/optimization/open_vino.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # OpenVINO 14 | 15 | Under construction 🚧 16 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/optimization/xformers.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Installing xFormers 14 | 15 | We recommend the use of [xFormers](https://github.com/facebookresearch/xformers) for both inference and training. In our tests, the optimizations performed in the attention blocks allow for both faster speed and reduced memory consumption. 16 | 17 | Starting from version `0.0.16` of xFormers, released on January 2023, installation can be easily performed using pre-built pip wheels: 18 | 19 | ```bash 20 | pip install xformers 21 | ``` 22 | 23 | 24 | 25 | The xFormers PIP package requires the latest version of PyTorch (1.13.1 as of xFormers 0.0.16). If you need to use a previous version of PyTorch, then we recommend you install xFormers from source using [the project instructions](https://github.com/facebookresearch/xformers#installing-xformers). 26 | 27 | 28 | 29 | After xFormers is installed, you can use `enable_xformers_memory_efficient_attention()` for faster inference and reduced memory consumption, as discussed [here](fp16#memory-efficient-attention). 30 | 31 | 32 | 33 | According to [this issue](https://github.com/huggingface/diffusers/issues/2234#issuecomment-1416931212), xFormers `v0.0.16` cannot be used for training (fine-tune or Dreambooth) in some GPUs. If you observe that problem, please install a development version as indicated in that comment. 34 | 35 | 36 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/using-diffusers/audio.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Using Diffusers for audio 14 | 15 | [`DanceDiffusionPipeline`] and [`AudioDiffusionPipeline`] can be used to generate 16 | audio rapidly! More coming soon! -------------------------------------------------------------------------------- /diffusers/docs/source/en/using-diffusers/conditional_image_generation.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Conditional Image Generation 14 | 15 | The [`DiffusionPipeline`] is the easiest way to use a pre-trained diffusion system for inference 16 | 17 | Start by creating an instance of [`DiffusionPipeline`] and specify which pipeline checkpoint you would like to download. 18 | You can use the [`DiffusionPipeline`] for any [Diffusers' checkpoint](https://huggingface.co/models?library=diffusers&sort=downloads). 19 | In this guide though, you'll use [`DiffusionPipeline`] for text-to-image generation with [Latent Diffusion](https://huggingface.co/CompVis/ldm-text2im-large-256): 20 | 21 | ```python 22 | >>> from diffusers import DiffusionPipeline 23 | 24 | >>> generator = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256") 25 | ``` 26 | The [`DiffusionPipeline`] downloads and caches all modeling, tokenization, and scheduling components. 27 | Because the model consists of roughly 1.4 billion parameters, we strongly recommend running it on GPU. 28 | You can move the generator object to GPU, just like you would in PyTorch. 29 | 30 | ```python 31 | >>> generator.to("cuda") 32 | ``` 33 | 34 | Now you can use the `generator` on your text prompt: 35 | 36 | ```python 37 | >>> image = generator("An image of a squirrel in Picasso style").images[0] 38 | ``` 39 | 40 | The output is by default wrapped into a [PIL Image object](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class). 41 | 42 | You can save the image by simply calling: 43 | 44 | ```python 45 | >>> image.save("image_of_squirrel_painting.png") 46 | ``` 47 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/using-diffusers/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | 14 | 15 | # Configuration 16 | 17 | The handling of configurations in Diffusers is with the `ConfigMixin` class. 18 | 19 | [[autodoc]] ConfigMixin 20 | 21 | Under further construction 🚧, open a [PR](https://github.com/huggingface/diffusers/compare) if you want to contribute! 22 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/using-diffusers/depth2img.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Text-Guided Image-to-Image Generation 14 | 15 | The [`StableDiffusionDepth2ImgPipeline`] lets you pass a text prompt and an initial image to condition the generation of new images as well as a `depth_map` to preserve the images' structure. If no `depth_map` is provided, the pipeline will automatically predict the depth via an integrated depth-estimation model. 16 | 17 | ```python 18 | import torch 19 | import requests 20 | from PIL import Image 21 | 22 | from diffusers import StableDiffusionDepth2ImgPipeline 23 | 24 | pipe = StableDiffusionDepth2ImgPipeline.from_pretrained( 25 | "stabilityai/stable-diffusion-2-depth", 26 | torch_dtype=torch.float16, 27 | ).to("cuda") 28 | 29 | 30 | url = "http://images.cocodataset.org/val2017/000000039769.jpg" 31 | init_image = Image.open(requests.get(url, stream=True).raw) 32 | prompt = "two tigers" 33 | n_prompt = "bad, deformed, ugly, bad anatomy" 34 | image = pipe(prompt=prompt, image=init_image, negative_prompt=n_prompt, strength=0.7).images[0] 35 | ``` 36 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/using-diffusers/img2img.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Text-Guided Image-to-Image Generation 14 | 15 | The [`StableDiffusionImg2ImgPipeline`] lets you pass a text prompt and an initial image to condition the generation of new images. 16 | 17 | ```python 18 | import torch 19 | import requests 20 | from PIL import Image 21 | from io import BytesIO 22 | 23 | from diffusers import StableDiffusionImg2ImgPipeline 24 | 25 | # load the pipeline 26 | device = "cuda" 27 | pipe = StableDiffusionImg2ImgPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to( 28 | device 29 | ) 30 | 31 | # let's download an initial image 32 | url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg" 33 | 34 | response = requests.get(url) 35 | init_image = Image.open(BytesIO(response.content)).convert("RGB") 36 | init_image.thumbnail((768, 768)) 37 | 38 | prompt = "A fantasy landscape, trending on artstation" 39 | 40 | images = pipe(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images 41 | 42 | images[0].save("fantasy_landscape.png") 43 | ``` 44 | You can also run this example on colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) 45 | 46 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/using-diffusers/other-modalities.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Using Diffusers with other modalities 14 | 15 | Diffusers is in the process of expanding to modalities other than images. 16 | 17 | Example type | Colab | Pipeline | 18 | :-------------------------:|:-------------------------:|:-------------------------:| 19 | [Molecule conformation](https://www.nature.com/subjects/molecular-conformation#:~:text=Definition,to%20changes%20in%20their%20environment.) generation | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/geodiff_molecule_conformation.ipynb) | ❌ 20 | 21 | More coming soon! -------------------------------------------------------------------------------- /diffusers/docs/source/en/using-diffusers/rl.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Using Diffusers for reinforcement learning 14 | 15 | Support for one RL model and related pipelines is included in the `experimental` source of diffusers. 16 | More models and examples coming soon! 17 | 18 | # Diffuser Value-guided Planning 19 | 20 | You can run the model from [*Planning with Diffusion for Flexible Behavior Synthesis*](https://arxiv.org/abs/2205.09991) with Diffusers. 21 | The script is located in the [RL Examples](https://github.com/huggingface/diffusers/tree/main/examples/rl) folder. 22 | 23 | Or, run this example in Colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/reinforcement_learning_with_diffusers.ipynb) 24 | 25 | [[autodoc]] diffusers.experimental.ValueGuidedRLPipeline -------------------------------------------------------------------------------- /diffusers/docs/source/en/using-diffusers/unconditional_image_generation.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | 14 | 15 | # Unconditional Image Generation 16 | 17 | The [`DiffusionPipeline`] is the easiest way to use a pre-trained diffusion system for inference 18 | 19 | Start by creating an instance of [`DiffusionPipeline`] and specify which pipeline checkpoint you would like to download. 20 | You can use the [`DiffusionPipeline`] for any [Diffusers' checkpoint](https://huggingface.co/models?library=diffusers&sort=downloads). 21 | In this guide though, you'll use [`DiffusionPipeline`] for unconditional image generation with [DDPM](https://arxiv.org/abs/2006.11239): 22 | 23 | ```python 24 | >>> from diffusers import DiffusionPipeline 25 | 26 | >>> generator = DiffusionPipeline.from_pretrained("google/ddpm-celebahq-256") 27 | ``` 28 | The [`DiffusionPipeline`] downloads and caches all modeling, tokenization, and scheduling components. 29 | Because the model consists of roughly 1.4 billion parameters, we strongly recommend running it on GPU. 30 | You can move the generator object to GPU, just like you would in PyTorch. 31 | 32 | ```python 33 | >>> generator.to("cuda") 34 | ``` 35 | 36 | Now you can use the `generator` on your text prompt: 37 | 38 | ```python 39 | >>> image = generator().images[0] 40 | ``` 41 | 42 | The output is by default wrapped into a [PIL Image object](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class). 43 | 44 | You can save the image by simply calling: 45 | 46 | ```python 47 | >>> image.save("generated_image.png") 48 | ``` 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /diffusers/docs/source/en/using-diffusers/using_safetensors: -------------------------------------------------------------------------------- 1 | # What is safetensors ? 2 | 3 | [safetensors](https://github.com/huggingface/safetensors) is a different format 4 | from the classic `.bin` which uses Pytorch which uses pickle. 5 | 6 | Pickle is notoriously unsafe which allow any malicious file to execute arbitrary code. 7 | The hub itself tries to prevent issues from it, but it's not a silver bullet. 8 | 9 | `safetensors` first and foremost goal is to make loading machine learning models *safe* 10 | in the sense that no takeover of your computer can be done. 11 | 12 | # Why use safetensors ? 13 | 14 | **Safety** can be one reason, if you're attempting to use a not well known model and 15 | you're not sure about the source of the file. 16 | 17 | And a secondary reason, is **the speed of loading**. Safetensors can load models much faster 18 | than regular pickle files. If you spend a lot of times switching models, this can be 19 | a huge timesave. 20 | -------------------------------------------------------------------------------- /diffusers/docs/source/ko/in_translation.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # 번역중 14 | 15 | 열심히 번역을 진행중입니다. 조금만 기다려주세요. 16 | 감사합니다! -------------------------------------------------------------------------------- /diffusers/examples/community/one_step_unet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import torch 3 | 4 | from diffusers import DiffusionPipeline 5 | 6 | 7 | class UnetSchedulerOneForwardPipeline(DiffusionPipeline): 8 | def __init__(self, unet, scheduler): 9 | super().__init__() 10 | 11 | self.register_modules(unet=unet, scheduler=scheduler) 12 | 13 | def __call__(self): 14 | image = torch.randn( 15 | (1, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), 16 | ) 17 | timestep = 1 18 | 19 | model_output = self.unet(image, timestep).sample 20 | scheduler_output = self.scheduler.step(model_output, timestep, image).prev_sample 21 | 22 | result = scheduler_output - scheduler_output + torch.ones_like(scheduler_output) 23 | 24 | return result 25 | -------------------------------------------------------------------------------- /diffusers/examples/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # tests directory-specific settings - this file is run automatically 16 | # by pytest before any tests are run 17 | 18 | import sys 19 | import warnings 20 | from os.path import abspath, dirname, join 21 | 22 | 23 | # allow having multiple repository checkouts and not needing to remember to rerun 24 | # 'pip install -e .[dev]' when switching between checkouts and running tests. 25 | git_repo_path = abspath(join(dirname(dirname(dirname(__file__))), "src")) 26 | sys.path.insert(1, git_repo_path) 27 | 28 | 29 | # silence FutureWarning warnings in tests since often we can't act on them until 30 | # they become normal warnings - i.e. the tests still need to test the current functionality 31 | warnings.simplefilter(action="ignore", category=FutureWarning) 32 | 33 | 34 | def pytest_addoption(parser): 35 | from diffusers.utils.testing_utils import pytest_addoption_shared 36 | 37 | pytest_addoption_shared(parser) 38 | 39 | 40 | def pytest_terminal_summary(terminalreporter): 41 | from diffusers.utils.testing_utils import pytest_terminal_summary_main 42 | 43 | make_reports = terminalreporter.config.getoption("--make-reports") 44 | if make_reports: 45 | pytest_terminal_summary_main(terminalreporter, id=make_reports) 46 | -------------------------------------------------------------------------------- /diffusers/examples/dreambooth/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | ftfy 5 | tensorboard 6 | Jinja2 7 | -------------------------------------------------------------------------------- /diffusers/examples/dreambooth/requirements_flax.txt: -------------------------------------------------------------------------------- 1 | transformers>=4.25.1 2 | flax 3 | optax 4 | torch 5 | torchvision 6 | ftfy 7 | tensorboard 8 | Jinja2 9 | -------------------------------------------------------------------------------- /diffusers/examples/inference/README.md: -------------------------------------------------------------------------------- 1 | # Inference Examples 2 | 3 | **The inference examples folder is deprecated and will be removed in a future version**. 4 | **Officially supported inference examples can be found in the [Pipelines folder](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines)**. 5 | 6 | - For `Image-to-Image text-guided generation with Stable Diffusion`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples) 7 | - For `In-painting using Stable Diffusion`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples) 8 | - For `Tweak prompts reusing seeds and latents`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples) 9 | -------------------------------------------------------------------------------- /diffusers/examples/inference/image_to_image.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from diffusers import StableDiffusionImg2ImgPipeline # noqa F401 4 | 5 | 6 | warnings.warn( 7 | "The `image_to_image.py` script is outdated. Please use directly `from diffusers import" 8 | " StableDiffusionImg2ImgPipeline` instead." 9 | ) 10 | -------------------------------------------------------------------------------- /diffusers/examples/inference/inpainting.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from diffusers import StableDiffusionInpaintPipeline as StableDiffusionInpaintPipeline # noqa F401 4 | 5 | 6 | warnings.warn( 7 | "The `inpainting.py` script is outdated. Please use directly `from diffusers import" 8 | " StableDiffusionInpaintPipeline` instead." 9 | ) 10 | -------------------------------------------------------------------------------- /diffusers/examples/research_projects/README.md: -------------------------------------------------------------------------------- 1 | # Research projects 2 | 3 | This folder contains various research projects using 🧨 Diffusers. 4 | They are not really maintained by the core maintainers of this library and often require a specific version of Diffusers that is indicated in the requirements file of each folder. 5 | Updating them to the most recent version of the library will require some work. 6 | 7 | To use any of them, just run the command 8 | 9 | ``` 10 | pip install -r requirements.txt 11 | ``` 12 | inside the folder of your choice. 13 | 14 | If you need help with any of those, please open an issue where you directly ping the author(s), as indicated at the top of the README of each folder. 15 | -------------------------------------------------------------------------------- /diffusers/examples/research_projects/colossalai/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from diffusers import StableDiffusionPipeline 4 | 5 | 6 | model_id = "path-to-your-trained-model" 7 | pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") 8 | 9 | prompt = "A photo of sks dog in a bucket" 10 | image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0] 11 | 12 | image.save("dog-bucket.png") 13 | -------------------------------------------------------------------------------- /diffusers/examples/research_projects/colossalai/requirement.txt: -------------------------------------------------------------------------------- 1 | diffusers 2 | torch 3 | torchvision 4 | ftfy 5 | tensorboard 6 | Jinja2 7 | transformers -------------------------------------------------------------------------------- /diffusers/examples/research_projects/dreambooth_inpaint/requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers==0.9.0 2 | accelerate 3 | torchvision 4 | transformers>=4.21.0 5 | ftfy 6 | tensorboard 7 | Jinja2 8 | -------------------------------------------------------------------------------- /diffusers/examples/research_projects/intel_opts/README.md: -------------------------------------------------------------------------------- 1 | ## Diffusers examples with Intel optimizations 2 | 3 | **This research project is not actively maintained by the diffusers team. For any questions or comments, please make sure to tag @hshen14 .** 4 | 5 | This aims to provide diffusers examples with Intel optimizations such as Bfloat16 for training/fine-tuning acceleration and 8-bit integer (INT8) for inference acceleration on Intel platforms. 6 | 7 | ## Accelerating the fine-tuning for textual inversion 8 | 9 | We accelereate the fine-tuning for textual inversion with Intel Extension for PyTorch. The [examples](textual_inversion) enable both single node and multi-node distributed training with Bfloat16 support on Intel Xeon Scalable Processor. 10 | 11 | ## Accelerating the inference for Stable Diffusion using Bfloat16 12 | 13 | We start the inference acceleration with Bfloat16 using Intel Extension for PyTorch. The [script](inference_bf16.py) is generally designed to support standard Stable Diffusion models with Bfloat16 support. 14 | 15 | ## Accelerating the inference for Stable Diffusion using INT8 16 | 17 | Coming soon ... 18 | -------------------------------------------------------------------------------- /diffusers/examples/research_projects/intel_opts/inference_bf16.py: -------------------------------------------------------------------------------- 1 | import intel_extension_for_pytorch as ipex 2 | import torch 3 | from PIL import Image 4 | 5 | from diffusers import StableDiffusionPipeline 6 | 7 | 8 | def image_grid(imgs, rows, cols): 9 | assert len(imgs) == rows * cols 10 | 11 | w, h = imgs[0].size 12 | grid = Image.new("RGB", size=(cols * w, rows * h)) 13 | grid_w, grid_h = grid.size 14 | 15 | for i, img in enumerate(imgs): 16 | grid.paste(img, box=(i % cols * w, i // cols * h)) 17 | return grid 18 | 19 | 20 | prompt = ["a lovely in red dress and hat, in the snowly and brightly night, with many brighly buildings"] 21 | batch_size = 8 22 | prompt = prompt * batch_size 23 | 24 | device = "cpu" 25 | model_id = "path-to-your-trained-model" 26 | model = StableDiffusionPipeline.from_pretrained(model_id) 27 | model = model.to(device) 28 | 29 | # to channels last 30 | model.unet = model.unet.to(memory_format=torch.channels_last) 31 | model.vae = model.vae.to(memory_format=torch.channels_last) 32 | model.text_encoder = model.text_encoder.to(memory_format=torch.channels_last) 33 | model.safety_checker = model.safety_checker.to(memory_format=torch.channels_last) 34 | 35 | # optimize with ipex 36 | model.unet = ipex.optimize(model.unet.eval(), dtype=torch.bfloat16, inplace=True) 37 | model.vae = ipex.optimize(model.vae.eval(), dtype=torch.bfloat16, inplace=True) 38 | model.text_encoder = ipex.optimize(model.text_encoder.eval(), dtype=torch.bfloat16, inplace=True) 39 | model.safety_checker = ipex.optimize(model.safety_checker.eval(), dtype=torch.bfloat16, inplace=True) 40 | 41 | # compute 42 | seed = 666 43 | generator = torch.Generator(device).manual_seed(seed) 44 | with torch.cpu.amp.autocast(enabled=True, dtype=torch.bfloat16): 45 | images = model(prompt, guidance_scale=7.5, num_inference_steps=50, generator=generator).images 46 | 47 | # save image 48 | grid = image_grid(images, rows=2, cols=4) 49 | grid.save(model_id + ".png") 50 | -------------------------------------------------------------------------------- /diffusers/examples/research_projects/intel_opts/textual_inversion/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.21.0 4 | ftfy 5 | tensorboard 6 | Jinja2 7 | intel_extension_for_pytorch>=1.13 8 | -------------------------------------------------------------------------------- /diffusers/examples/research_projects/multi_subject_dreambooth/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | ftfy 5 | tensorboard 6 | Jinja2 -------------------------------------------------------------------------------- /diffusers/examples/research_projects/onnxruntime/README.md: -------------------------------------------------------------------------------- 1 | ## Diffusers examples with ONNXRuntime optimizations 2 | 3 | **This research project is not actively maintained by the diffusers team. For any questions or comments, please contact Prathik Rao (prathikr), Sunghoon Choi (hanbitmyths), Ashwini Khade (askhade), or Peng Wang (pengwa) on github with any questions.** 4 | 5 | This aims to provide diffusers examples with ONNXRuntime optimizations for training/fine-tuning unconditional image generation, text to image, and textual inversion. Please see individual directories for more details on how to run each task using ONNXRuntime. -------------------------------------------------------------------------------- /diffusers/examples/research_projects/onnxruntime/text_to_image/README.md: -------------------------------------------------------------------------------- 1 | # Stable Diffusion text-to-image fine-tuning 2 | 3 | The `train_text_to_image.py` script shows how to fine-tune stable diffusion model on your own dataset. 4 | 5 | ___Note___: 6 | 7 | ___This script is experimental. The script fine-tunes the whole model and often times the model overfits and runs into issues like catastrophic forgetting. It's recommended to try different hyperparamters to get the best result on your dataset.___ 8 | 9 | 10 | ## Running locally with PyTorch 11 | ### Installing the dependencies 12 | 13 | Before running the scripts, make sure to install the library's training dependencies: 14 | 15 | **Important** 16 | 17 | To make sure you can successfully run the latest versions of the example scripts, we highly recommend **installing from source** and keeping the install up to date as we update the example scripts frequently and install some example-specific requirements. To do this, execute the following steps in a new virtual environment: 18 | ```bash 19 | git clone https://github.com/huggingface/diffusers 20 | cd diffusers 21 | pip install . 22 | ``` 23 | 24 | Then cd in the example folder and run 25 | ```bash 26 | pip install -r requirements.txt 27 | ``` 28 | 29 | And initialize an [🤗Accelerate](https://github.com/huggingface/accelerate/) environment with: 30 | 31 | ```bash 32 | accelerate config 33 | ``` 34 | 35 | ### Pokemon example 36 | 37 | You need to accept the model license before downloading or using the weights. In this example we'll use model version `v1-4`, so you'll need to visit [its card](https://huggingface.co/CompVis/stable-diffusion-v1-4), read the license and tick the checkbox if you agree. 38 | 39 | You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work. For more information on access tokens, please refer to [this section of the documentation](https://huggingface.co/docs/hub/security-tokens). 40 | 41 | Run the following command to authenticate your token 42 | 43 | ```bash 44 | huggingface-cli login 45 | ``` 46 | 47 | If you have already cloned the repo, then you won't need to go through these steps. 48 | 49 |
50 | 51 | ## Use ONNXRuntime to accelerate training 52 | In order to leverage onnxruntime to accelerate training, please use train_text_to_image.py 53 | 54 | The command to train a DDPM UNetCondition model on the Pokemon dataset with onnxruntime: 55 | 56 | ```bash 57 | export MODEL_NAME="CompVis/stable-diffusion-v1-4" 58 | export dataset_name="lambdalabs/pokemon-blip-captions" 59 | accelerate launch --mixed_precision="fp16" train_text_to_image.py \ 60 | --pretrained_model_name_or_path=$MODEL_NAME \ 61 | --dataset_name=$dataset_name \ 62 | --use_ema \ 63 | --resolution=512 --center_crop --random_flip \ 64 | --train_batch_size=1 \ 65 | --gradient_accumulation_steps=4 \ 66 | --gradient_checkpointing \ 67 | --max_train_steps=15000 \ 68 | --learning_rate=1e-05 \ 69 | --max_grad_norm=1 \ 70 | --lr_scheduler="constant" --lr_warmup_steps=0 \ 71 | --output_dir="sd-pokemon-model" 72 | ``` 73 | 74 | Please contact Prathik Rao (prathikr), Sunghoon Choi (hanbitmyths), Ashwini Khade (askhade), or Peng Wang (pengwa) on github with any questions. -------------------------------------------------------------------------------- /diffusers/examples/research_projects/onnxruntime/text_to_image/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | datasets 5 | ftfy 6 | tensorboard 7 | modelcards 8 | -------------------------------------------------------------------------------- /diffusers/examples/research_projects/onnxruntime/textual_inversion/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | ftfy 5 | tensorboard 6 | modelcards 7 | -------------------------------------------------------------------------------- /diffusers/examples/research_projects/onnxruntime/unconditional_image_generation/README.md: -------------------------------------------------------------------------------- 1 | ## Training examples 2 | 3 | Creating a training image set is [described in a different document](https://huggingface.co/docs/datasets/image_process#image-datasets). 4 | 5 | ### Installing the dependencies 6 | 7 | Before running the scripts, make sure to install the library's training dependencies: 8 | 9 | **Important** 10 | 11 | To make sure you can successfully run the latest versions of the example scripts, we highly recommend **installing from source** and keeping the install up to date as we update the example scripts frequently and install some example-specific requirements. To do this, execute the following steps in a new virtual environment: 12 | ```bash 13 | git clone https://github.com/huggingface/diffusers 14 | cd diffusers 15 | pip install . 16 | ``` 17 | 18 | Then cd in the example folder and run 19 | ```bash 20 | pip install -r requirements.txt 21 | ``` 22 | 23 | 24 | And initialize an [🤗Accelerate](https://github.com/huggingface/accelerate/) environment with: 25 | 26 | ```bash 27 | accelerate config 28 | ``` 29 | 30 | #### Use ONNXRuntime to accelerate training 31 | 32 | In order to leverage onnxruntime to accelerate training, please use train_unconditional_ort.py 33 | 34 | The command to train a DDPM UNet model on the Oxford Flowers dataset with onnxruntime: 35 | 36 | ```bash 37 | accelerate launch train_unconditional_ort.py \ 38 | --dataset_name="huggan/flowers-102-categories" \ 39 | --resolution=64 --center_crop --random_flip \ 40 | --output_dir="ddpm-ema-flowers-64" \ 41 | --use_ema \ 42 | --train_batch_size=16 \ 43 | --num_epochs=1 \ 44 | --gradient_accumulation_steps=1 \ 45 | --learning_rate=1e-4 \ 46 | --lr_warmup_steps=500 \ 47 | --mixed_precision=fp16 48 | ``` 49 | 50 | Please contact Prathik Rao (prathikr), Sunghoon Choi (hanbitmyths), Ashwini Khade (askhade), or Peng Wang (pengwa) on github with any questions. 51 | -------------------------------------------------------------------------------- /diffusers/examples/research_projects/onnxruntime/unconditional_image_generation/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | datasets 4 | -------------------------------------------------------------------------------- /diffusers/examples/rl/README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | These examples show how to run [Diffuser](https://arxiv.org/abs/2205.09991) in Diffusers. 4 | There are two ways to use the script, `run_diffuser_locomotion.py`. 5 | 6 | The key option is a change of the variable `n_guide_steps`. 7 | When `n_guide_steps=0`, the trajectories are sampled from the diffusion model, but not fine-tuned to maximize reward in the environment. 8 | By default, `n_guide_steps=2` to match the original implementation. 9 | 10 | 11 | You will need some RL specific requirements to run the examples: 12 | 13 | ``` 14 | pip install -f https://download.pytorch.org/whl/torch_stable.html \ 15 | free-mujoco-py \ 16 | einops \ 17 | gym==0.24.1 \ 18 | protobuf==3.20.1 \ 19 | git+https://github.com/rail-berkeley/d4rl.git \ 20 | mediapy \ 21 | Pillow==9.0.0 22 | ``` 23 | -------------------------------------------------------------------------------- /diffusers/examples/rl/run_diffuser_locomotion.py: -------------------------------------------------------------------------------- 1 | import d4rl # noqa 2 | import gym 3 | import tqdm 4 | from diffusers.experimental import ValueGuidedRLPipeline 5 | 6 | 7 | config = dict( 8 | n_samples=64, 9 | horizon=32, 10 | num_inference_steps=20, 11 | n_guide_steps=2, # can set to 0 for faster sampling, does not use value network 12 | scale_grad_by_std=True, 13 | scale=0.1, 14 | eta=0.0, 15 | t_grad_cutoff=2, 16 | device="cpu", 17 | ) 18 | 19 | 20 | if __name__ == "__main__": 21 | env_name = "hopper-medium-v2" 22 | env = gym.make(env_name) 23 | 24 | pipeline = ValueGuidedRLPipeline.from_pretrained( 25 | "bglick13/hopper-medium-v2-value-function-hor32", 26 | env=env, 27 | ) 28 | 29 | env.seed(0) 30 | obs = env.reset() 31 | total_reward = 0 32 | total_score = 0 33 | T = 1000 34 | rollout = [obs.copy()] 35 | try: 36 | for t in tqdm.tqdm(range(T)): 37 | # call the policy 38 | denorm_actions = pipeline(obs, planning_horizon=32) 39 | 40 | # execute action in environment 41 | next_observation, reward, terminal, _ = env.step(denorm_actions) 42 | score = env.get_normalized_score(total_reward) 43 | 44 | # update return 45 | total_reward += reward 46 | total_score += score 47 | print( 48 | f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score:" 49 | f" {total_score}" 50 | ) 51 | 52 | # save observations for rendering 53 | rollout.append(next_observation.copy()) 54 | 55 | obs = next_observation 56 | except KeyboardInterrupt: 57 | pass 58 | 59 | print(f"Total reward: {total_reward}") 60 | -------------------------------------------------------------------------------- /diffusers/examples/text_to_image/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | datasets 5 | ftfy 6 | tensorboard 7 | Jinja2 8 | -------------------------------------------------------------------------------- /diffusers/examples/text_to_image/requirements_flax.txt: -------------------------------------------------------------------------------- 1 | transformers>=4.25.1 2 | datasets 3 | flax 4 | optax 5 | torch 6 | torchvision 7 | ftfy 8 | tensorboard 9 | Jinja2 10 | -------------------------------------------------------------------------------- /diffusers/examples/textual_inversion/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.25.1 4 | ftfy 5 | tensorboard 6 | Jinja2 7 | -------------------------------------------------------------------------------- /diffusers/examples/textual_inversion/requirements_flax.txt: -------------------------------------------------------------------------------- 1 | transformers>=4.25.1 2 | flax 3 | optax 4 | torch 5 | torchvision 6 | ftfy 7 | tensorboard 8 | Jinja2 9 | -------------------------------------------------------------------------------- /diffusers/examples/unconditional_image_generation/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | datasets 4 | -------------------------------------------------------------------------------- /diffusers/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 119 3 | target-version = ['py37'] 4 | 5 | [tool.ruff] 6 | # Never enforce `E501` (line length violations). 7 | ignore = ["E501", "E741", "W605"] 8 | select = ["E", "F", "I", "W"] 9 | line-length = 119 10 | 11 | # Ignore import violations in all `__init__.py` files. 12 | [tool.ruff.per-file-ignores] 13 | "__init__.py" = ["E402", "F401", "F403", "F811"] 14 | "src/diffusers/utils/dummy_*.py" = ["F401"] 15 | 16 | [tool.ruff.isort] 17 | lines-after-imports = 2 18 | known-first-party = ["diffusers"] 19 | -------------------------------------------------------------------------------- /diffusers/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/scripts/__init__.py -------------------------------------------------------------------------------- /diffusers/scripts/conversion_ldm_uncond.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import OmegaConf 4 | import torch 5 | 6 | from diffusers import DDIMScheduler, LDMPipeline, UNetLDMModel, VQModel 7 | 8 | 9 | def convert_ldm_original(checkpoint_path, config_path, output_path): 10 | config = OmegaConf.load(config_path) 11 | state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] 12 | keys = list(state_dict.keys()) 13 | 14 | # extract state_dict for VQVAE 15 | first_stage_dict = {} 16 | first_stage_key = "first_stage_model." 17 | for key in keys: 18 | if key.startswith(first_stage_key): 19 | first_stage_dict[key.replace(first_stage_key, "")] = state_dict[key] 20 | 21 | # extract state_dict for UNetLDM 22 | unet_state_dict = {} 23 | unet_key = "model.diffusion_model." 24 | for key in keys: 25 | if key.startswith(unet_key): 26 | unet_state_dict[key.replace(unet_key, "")] = state_dict[key] 27 | 28 | vqvae_init_args = config.model.params.first_stage_config.params 29 | unet_init_args = config.model.params.unet_config.params 30 | 31 | vqvae = VQModel(**vqvae_init_args).eval() 32 | vqvae.load_state_dict(first_stage_dict) 33 | 34 | unet = UNetLDMModel(**unet_init_args).eval() 35 | unet.load_state_dict(unet_state_dict) 36 | 37 | noise_scheduler = DDIMScheduler( 38 | timesteps=config.model.params.timesteps, 39 | beta_schedule="scaled_linear", 40 | beta_start=config.model.params.linear_start, 41 | beta_end=config.model.params.linear_end, 42 | clip_sample=False, 43 | ) 44 | 45 | pipeline = LDMPipeline(vqvae, unet, noise_scheduler) 46 | pipeline.save_pretrained(output_path) 47 | 48 | 49 | if __name__ == "__main__": 50 | parser = argparse.ArgumentParser() 51 | parser.add_argument("--checkpoint_path", type=str, required=True) 52 | parser.add_argument("--config_path", type=str, required=True) 53 | parser.add_argument("--output_path", type=str, required=True) 54 | args = parser.parse_args() 55 | 56 | convert_ldm_original(args.checkpoint_path, args.config_path, args.output_path) 57 | -------------------------------------------------------------------------------- /diffusers/scripts/convert_unclip_txt2img_to_image_variation.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection 4 | 5 | from diffusers import UnCLIPImageVariationPipeline, UnCLIPPipeline 6 | 7 | 8 | if __name__ == "__main__": 9 | parser = argparse.ArgumentParser() 10 | 11 | parser.add_argument("--dump_path", default=None, type=str, required=True, help="Path to the output model.") 12 | 13 | parser.add_argument( 14 | "--txt2img_unclip", 15 | default="kakaobrain/karlo-v1-alpha", 16 | type=str, 17 | required=False, 18 | help="The pretrained txt2img unclip.", 19 | ) 20 | 21 | args = parser.parse_args() 22 | 23 | txt2img = UnCLIPPipeline.from_pretrained(args.txt2img_unclip) 24 | 25 | feature_extractor = CLIPImageProcessor() 26 | image_encoder = CLIPVisionModelWithProjection.from_pretrained("openai/clip-vit-large-patch14") 27 | 28 | img2img = UnCLIPImageVariationPipeline( 29 | decoder=txt2img.decoder, 30 | text_encoder=txt2img.text_encoder, 31 | tokenizer=txt2img.tokenizer, 32 | text_proj=txt2img.text_proj, 33 | feature_extractor=feature_extractor, 34 | image_encoder=image_encoder, 35 | super_res_first=txt2img.super_res_first, 36 | super_res_last=txt2img.super_res_last, 37 | decoder_scheduler=txt2img.decoder_scheduler, 38 | super_res_scheduler=txt2img.super_res_scheduler, 39 | ) 40 | 41 | img2img.save_pretrained(args.dump_path) 42 | -------------------------------------------------------------------------------- /diffusers/setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | default_section = FIRSTPARTY 3 | ensure_newline_before_comments = True 4 | force_grid_wrap = 0 5 | include_trailing_comma = True 6 | known_first_party = accelerate 7 | known_third_party = 8 | numpy 9 | torch 10 | torch_xla 11 | 12 | line_length = 119 13 | lines_after_imports = 2 14 | multi_line_output = 3 15 | use_parentheses = True 16 | 17 | [flake8] 18 | ignore = E203, E722, E501, E741, W503, W605 19 | max-line-length = 119 20 | per-file-ignores = __init__.py:F401 21 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from argparse import ArgumentParser 17 | 18 | 19 | class BaseDiffusersCLICommand(ABC): 20 | @staticmethod 21 | @abstractmethod 22 | def register_subcommand(parser: ArgumentParser): 23 | raise NotImplementedError() 24 | 25 | @abstractmethod 26 | def run(self): 27 | raise NotImplementedError() 28 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/commands/diffusers_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from argparse import ArgumentParser 17 | 18 | from .env import EnvironmentCommand 19 | 20 | 21 | def main(): 22 | parser = ArgumentParser("Diffusers CLI tool", usage="diffusers-cli []") 23 | commands_parser = parser.add_subparsers(help="diffusers-cli command helpers") 24 | 25 | # Register commands 26 | EnvironmentCommand.register_subcommand(commands_parser) 27 | 28 | # Let's go 29 | args = parser.parse_args() 30 | 31 | if not hasattr(args, "func"): 32 | parser.print_help() 33 | exit(1) 34 | 35 | # Run 36 | service = args.func(args) 37 | service.run() 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/dependency_versions_check.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import sys 15 | 16 | from .dependency_versions_table import deps 17 | from .utils.versions import require_version, require_version_core 18 | 19 | 20 | # define which module versions we always want to check at run time 21 | # (usually the ones defined in `install_requires` in setup.py) 22 | # 23 | # order specific notes: 24 | # - tqdm must be checked before tokenizers 25 | 26 | pkgs_to_check_at_runtime = "python tqdm regex requests packaging filelock numpy tokenizers".split() 27 | if sys.version_info < (3, 7): 28 | pkgs_to_check_at_runtime.append("dataclasses") 29 | if sys.version_info < (3, 8): 30 | pkgs_to_check_at_runtime.append("importlib_metadata") 31 | 32 | for pkg in pkgs_to_check_at_runtime: 33 | if pkg in deps: 34 | if pkg == "tokenizers": 35 | # must be loaded here, or else tqdm check may fail 36 | from .utils import is_tokenizers_available 37 | 38 | if not is_tokenizers_available(): 39 | continue # not required, check version only if installed 40 | 41 | require_version_core(deps[pkg]) 42 | else: 43 | raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py") 44 | 45 | 46 | def dep_version_check(pkg, hint=None): 47 | require_version(deps[pkg], hint) 48 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/dependency_versions_table.py: -------------------------------------------------------------------------------- 1 | # THIS FILE HAS BEEN AUTOGENERATED. To update: 2 | # 1. modify the `_deps` dict in setup.py 3 | # 2. run `make deps_table_update`` 4 | deps = { 5 | "Pillow": "Pillow", 6 | "accelerate": "accelerate>=0.11.0", 7 | "black": "black~=23.1", 8 | "datasets": "datasets", 9 | "filelock": "filelock", 10 | "flax": "flax>=0.4.1", 11 | "hf-doc-builder": "hf-doc-builder>=0.3.0", 12 | "huggingface-hub": "huggingface-hub>=0.10.0", 13 | "importlib_metadata": "importlib_metadata", 14 | "isort": "isort>=5.5.4", 15 | "jax": "jax>=0.2.8,!=0.3.2", 16 | "jaxlib": "jaxlib>=0.1.65", 17 | "Jinja2": "Jinja2", 18 | "k-diffusion": "k-diffusion>=0.0.12", 19 | "librosa": "librosa", 20 | "numpy": "numpy", 21 | "parameterized": "parameterized", 22 | "pytest": "pytest", 23 | "pytest-timeout": "pytest-timeout", 24 | "pytest-xdist": "pytest-xdist", 25 | "ruff": "ruff>=0.0.241", 26 | "safetensors": "safetensors", 27 | "sentencepiece": "sentencepiece>=0.1.91,!=0.1.92", 28 | "scipy": "scipy", 29 | "regex": "regex!=2019.12.17", 30 | "requests": "requests", 31 | "tensorboard": "tensorboard", 32 | "torch": "torch>=1.4", 33 | "torchvision": "torchvision", 34 | "transformers": "transformers>=4.25.1", 35 | } 36 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/experimental/README.md: -------------------------------------------------------------------------------- 1 | # 🧨 Diffusers Experimental 2 | 3 | We are adding experimental code to support novel applications and usages of the Diffusers library. 4 | Currently, the following experiments are supported: 5 | * Reinforcement learning via an implementation of the [Diffuser](https://arxiv.org/abs/2205.09991) model. -------------------------------------------------------------------------------- /diffusers/src/diffusers/experimental/__init__.py: -------------------------------------------------------------------------------- 1 | from .rl import ValueGuidedRLPipeline 2 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/experimental/rl/__init__.py: -------------------------------------------------------------------------------- 1 | from .value_guided_sampling import ValueGuidedRLPipeline 2 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | 3 | For more detail on the models, please refer to the [docs](https://huggingface.co/docs/diffusers/api/models). -------------------------------------------------------------------------------- /diffusers/src/diffusers/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ..utils import is_flax_available, is_torch_available 16 | 17 | 18 | if is_torch_available(): 19 | from .autoencoder_kl import AutoencoderKL 20 | from .controlnet import ControlNetModel 21 | from .dual_transformer_2d import DualTransformer2DModel 22 | from .modeling_utils import ModelMixin 23 | from .prior_transformer import PriorTransformer 24 | from .transformer_2d import Transformer2DModel 25 | from .unet_1d import UNet1DModel 26 | from .unet_2d import UNet2DModel 27 | from .unet_2d_condition import UNet2DConditionModel, UNet2DConditionModelEmb 28 | from .promptnet import PromptNetModel 29 | from .vq_model import VQModel 30 | 31 | if is_flax_available(): 32 | from .unet_2d_condition_flax import FlaxUNet2DConditionModel 33 | from .vae_flax import FlaxAutoencoderKL 34 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipeline_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | 14 | # limitations under the License. 15 | 16 | # NOTE: This file is deprecated and will be removed in a future version. 17 | # It only exists so that temporarely `from diffusers.pipelines import DiffusionPipeline` works 18 | 19 | from .pipelines import DiffusionPipeline, ImagePipelineOutput # noqa: F401 20 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/alt_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional, Union 3 | 4 | import numpy as np 5 | import PIL 6 | from PIL import Image 7 | 8 | from ...utils import BaseOutput, is_torch_available, is_transformers_available 9 | 10 | 11 | @dataclass 12 | # Copied from diffusers.pipelines.stable_diffusion.__init__.StableDiffusionPipelineOutput with Stable->Alt 13 | class AltDiffusionPipelineOutput(BaseOutput): 14 | """ 15 | Output class for Alt Diffusion pipelines. 16 | 17 | Args: 18 | images (`List[PIL.Image.Image]` or `np.ndarray`) 19 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, 20 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. 21 | nsfw_content_detected (`List[bool]`) 22 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work" 23 | (nsfw) content, or `None` if safety checking could not be performed. 24 | """ 25 | 26 | images: Union[List[PIL.Image.Image], np.ndarray] 27 | nsfw_content_detected: Optional[List[bool]] 28 | 29 | 30 | if is_transformers_available() and is_torch_available(): 31 | from .modeling_roberta_series import RobertaSeriesModelWithTransformation 32 | from .pipeline_alt_diffusion import AltDiffusionPipeline 33 | from .pipeline_alt_diffusion_img2img import AltDiffusionImg2ImgPipeline 34 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/audio_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from .mel import Mel 2 | from .pipeline_audio_diffusion import AudioDiffusionPipeline 3 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/dance_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_dance_diffusion import DanceDiffusionPipeline 2 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/ddim/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_ddim import DDIMPipeline 2 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/ddpm/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_ddpm import DDPMPipeline 2 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/dit/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_dit import DiTPipeline 2 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/latent_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import is_transformers_available 2 | from .pipeline_latent_diffusion_superresolution import LDMSuperResolutionPipeline 3 | 4 | 5 | if is_transformers_available(): 6 | from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline 7 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/latent_diffusion_uncond/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_latent_diffusion_uncond import LDMPipeline 2 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/paint_by_example/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional, Union 3 | 4 | import numpy as np 5 | import PIL 6 | from PIL import Image 7 | 8 | from ...utils import is_torch_available, is_transformers_available 9 | 10 | 11 | if is_transformers_available() and is_torch_available(): 12 | from .image_encoder import PaintByExampleImageEncoder 13 | from .pipeline_paint_by_example import PaintByExamplePipeline 14 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/paint_by_example/image_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import torch 15 | from torch import nn 16 | from transformers import CLIPPreTrainedModel, CLIPVisionModel 17 | 18 | from ...models.attention import BasicTransformerBlock 19 | from ...utils import logging 20 | 21 | 22 | logger = logging.get_logger(__name__) # pylint: disable=invalid-name 23 | 24 | 25 | class PaintByExampleImageEncoder(CLIPPreTrainedModel): 26 | def __init__(self, config, proj_size=768): 27 | super().__init__(config) 28 | self.proj_size = proj_size 29 | 30 | self.model = CLIPVisionModel(config) 31 | self.mapper = PaintByExampleMapper(config) 32 | self.final_layer_norm = nn.LayerNorm(config.hidden_size) 33 | self.proj_out = nn.Linear(config.hidden_size, self.proj_size) 34 | 35 | # uncondition for scaling 36 | self.uncond_vector = nn.Parameter(torch.randn((1, 1, self.proj_size))) 37 | 38 | def forward(self, pixel_values, return_uncond_vector=False): 39 | clip_output = self.model(pixel_values=pixel_values) 40 | latent_states = clip_output.pooler_output 41 | latent_states = self.mapper(latent_states[:, None]) 42 | latent_states = self.final_layer_norm(latent_states) 43 | latent_states = self.proj_out(latent_states) 44 | if return_uncond_vector: 45 | return latent_states, self.uncond_vector 46 | 47 | return latent_states 48 | 49 | 50 | class PaintByExampleMapper(nn.Module): 51 | def __init__(self, config): 52 | super().__init__() 53 | num_layers = (config.num_hidden_layers + 1) // 5 54 | hid_size = config.hidden_size 55 | num_heads = 1 56 | self.blocks = nn.ModuleList( 57 | [ 58 | BasicTransformerBlock(hid_size, num_heads, hid_size, activation_fn="gelu", attention_bias=True) 59 | for _ in range(num_layers) 60 | ] 61 | ) 62 | 63 | def forward(self, hidden_states): 64 | for block in self.blocks: 65 | hidden_states = block(hidden_states) 66 | 67 | return hidden_states 68 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/pndm/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_pndm import PNDMPipeline 2 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/repaint/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_repaint import RePaintPipeline 2 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/score_sde_ve/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_score_sde_ve import ScoreSdeVePipeline 2 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/semantic_stable_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from enum import Enum 3 | from typing import List, Optional, Union 4 | 5 | import numpy as np 6 | import PIL 7 | from PIL import Image 8 | 9 | from ...utils import BaseOutput, is_torch_available, is_transformers_available 10 | 11 | 12 | @dataclass 13 | class SemanticStableDiffusionPipelineOutput(BaseOutput): 14 | """ 15 | Output class for Stable Diffusion pipelines. 16 | 17 | Args: 18 | images (`List[PIL.Image.Image]` or `np.ndarray`) 19 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, 20 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. 21 | nsfw_content_detected (`List[bool]`) 22 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work" 23 | (nsfw) content, or `None` if safety checking could not be performed. 24 | """ 25 | 26 | images: Union[List[PIL.Image.Image], np.ndarray] 27 | nsfw_content_detected: Optional[List[bool]] 28 | 29 | 30 | if is_transformers_available() and is_torch_available(): 31 | from .pipeline_semantic_stable_diffusion import SemanticStableDiffusionPipeline 32 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import torch 16 | from torch import nn 17 | 18 | from ...configuration_utils import ConfigMixin, register_to_config 19 | from ...models.modeling_utils import ModelMixin 20 | 21 | 22 | class StableUnCLIPImageNormalizer(ModelMixin, ConfigMixin): 23 | """ 24 | This class is used to hold the mean and standard deviation of the CLIP embedder used in stable unCLIP. 25 | 26 | It is used to normalize the image embeddings before the noise is applied and un-normalize the noised image 27 | embeddings. 28 | """ 29 | 30 | @register_to_config 31 | def __init__( 32 | self, 33 | embedding_dim: int = 768, 34 | ): 35 | super().__init__() 36 | 37 | self.mean = nn.Parameter(torch.zeros(1, embedding_dim)) 38 | self.std = nn.Parameter(torch.ones(1, embedding_dim)) 39 | 40 | def scale(self, embeds): 41 | embeds = (embeds - self.mean) * 1.0 / self.std 42 | return embeds 43 | 44 | def unscale(self, embeds): 45 | embeds = (embeds * self.std) + self.mean 46 | return embeds 47 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/stable_diffusion_safe/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from enum import Enum 3 | from typing import List, Optional, Union 4 | 5 | import numpy as np 6 | import PIL 7 | from PIL import Image 8 | 9 | from ...utils import BaseOutput, is_torch_available, is_transformers_available 10 | 11 | 12 | @dataclass 13 | class SafetyConfig(object): 14 | WEAK = { 15 | "sld_warmup_steps": 15, 16 | "sld_guidance_scale": 20, 17 | "sld_threshold": 0.0, 18 | "sld_momentum_scale": 0.0, 19 | "sld_mom_beta": 0.0, 20 | } 21 | MEDIUM = { 22 | "sld_warmup_steps": 10, 23 | "sld_guidance_scale": 1000, 24 | "sld_threshold": 0.01, 25 | "sld_momentum_scale": 0.3, 26 | "sld_mom_beta": 0.4, 27 | } 28 | STRONG = { 29 | "sld_warmup_steps": 7, 30 | "sld_guidance_scale": 2000, 31 | "sld_threshold": 0.025, 32 | "sld_momentum_scale": 0.5, 33 | "sld_mom_beta": 0.7, 34 | } 35 | MAX = { 36 | "sld_warmup_steps": 0, 37 | "sld_guidance_scale": 5000, 38 | "sld_threshold": 1.0, 39 | "sld_momentum_scale": 0.5, 40 | "sld_mom_beta": 0.7, 41 | } 42 | 43 | 44 | @dataclass 45 | class StableDiffusionSafePipelineOutput(BaseOutput): 46 | """ 47 | Output class for Safe Stable Diffusion pipelines. 48 | 49 | Args: 50 | images (`List[PIL.Image.Image]` or `np.ndarray`) 51 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, 52 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. 53 | nsfw_content_detected (`List[bool]`) 54 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work" 55 | (nsfw) content, or `None` if safety checking could not be performed. 56 | images (`List[PIL.Image.Image]` or `np.ndarray`) 57 | List of denoised PIL images that were flagged by the safety checker any may contain "not-safe-for-work" 58 | (nsfw) content, or `None` if no safety check was performed or no images were flagged. 59 | applied_safety_concept (`str`) 60 | The safety concept that was applied for safety guidance, or `None` if safety guidance was disabled 61 | """ 62 | 63 | images: Union[List[PIL.Image.Image], np.ndarray] 64 | nsfw_content_detected: Optional[List[bool]] 65 | unsafe_images: Optional[Union[List[PIL.Image.Image], np.ndarray]] 66 | applied_safety_concept: Optional[str] 67 | 68 | 69 | if is_transformers_available() and is_torch_available(): 70 | from .pipeline_stable_diffusion_safe import StableDiffusionPipelineSafe 71 | from .safety_checker import SafeStableDiffusionSafetyChecker 72 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/stochastic_karras_ve/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline_stochastic_karras_ve import KarrasVePipeline 2 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/unclip/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import ( 2 | OptionalDependencyNotAvailable, 3 | is_torch_available, 4 | is_transformers_available, 5 | is_transformers_version, 6 | ) 7 | 8 | 9 | try: 10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")): 11 | raise OptionalDependencyNotAvailable() 12 | except OptionalDependencyNotAvailable: 13 | from ...utils.dummy_torch_and_transformers_objects import UnCLIPImageVariationPipeline, UnCLIPPipeline 14 | else: 15 | from .pipeline_unclip import UnCLIPPipeline 16 | from .pipeline_unclip_image_variation import UnCLIPImageVariationPipeline 17 | from .text_proj import UnCLIPTextProjModel 18 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/versatile_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import ( 2 | OptionalDependencyNotAvailable, 3 | is_torch_available, 4 | is_transformers_available, 5 | is_transformers_version, 6 | ) 7 | 8 | 9 | try: 10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")): 11 | raise OptionalDependencyNotAvailable() 12 | except OptionalDependencyNotAvailable: 13 | from ...utils.dummy_torch_and_transformers_objects import ( 14 | VersatileDiffusionDualGuidedPipeline, 15 | VersatileDiffusionImageVariationPipeline, 16 | VersatileDiffusionPipeline, 17 | VersatileDiffusionTextToImagePipeline, 18 | ) 19 | else: 20 | from .modeling_text_unet import UNetFlatConditionModel 21 | from .pipeline_versatile_diffusion import VersatileDiffusionPipeline 22 | from .pipeline_versatile_diffusion_dual_guided import VersatileDiffusionDualGuidedPipeline 23 | from .pipeline_versatile_diffusion_image_variation import VersatileDiffusionImageVariationPipeline 24 | from .pipeline_versatile_diffusion_text_to_image import VersatileDiffusionTextToImagePipeline 25 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/pipelines/vq_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from ...utils import is_torch_available, is_transformers_available 2 | 3 | 4 | if is_transformers_available() and is_torch_available(): 5 | from .pipeline_vq_diffusion import LearnedClassifierFreeSamplingEmbeddings, VQDiffusionPipeline 6 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/schedulers/README.md: -------------------------------------------------------------------------------- 1 | # Schedulers 2 | 3 | For more information on the schedulers, please refer to the [docs](https://huggingface.co/docs/diffusers/api/schedulers/overview). -------------------------------------------------------------------------------- /diffusers/src/diffusers/utils/accelerate_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Accelerate utilities: Utilities related to accelerate 16 | """ 17 | 18 | from packaging import version 19 | 20 | from .import_utils import is_accelerate_available 21 | 22 | 23 | if is_accelerate_available(): 24 | import accelerate 25 | 26 | 27 | def apply_forward_hook(method): 28 | """ 29 | Decorator that applies a registered CpuOffload hook to an arbitrary function rather than `forward`. This is useful 30 | for cases where a PyTorch module provides functions other than `forward` that should trigger a move to the 31 | appropriate acceleration device. This is the case for `encode` and `decode` in [`AutoencoderKL`]. 32 | 33 | This decorator looks inside the internal `_hf_hook` property to find a registered offload hook. 34 | 35 | :param method: The method to decorate. This method should be a method of a PyTorch module. 36 | """ 37 | if not is_accelerate_available(): 38 | return method 39 | accelerate_version = version.parse(accelerate.__version__).base_version 40 | if version.parse(accelerate_version) < version.parse("0.17.0"): 41 | return method 42 | 43 | def wrapper(self, *args, **kwargs): 44 | if hasattr(self, "_hf_hook") and hasattr(self._hf_hook, "pre_forward"): 45 | self._hf_hook.pre_forward(self) 46 | return method(self, *args, **kwargs) 47 | 48 | return wrapper 49 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/utils/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE, hf_cache_home 17 | 18 | 19 | default_cache_path = HUGGINGFACE_HUB_CACHE 20 | 21 | 22 | CONFIG_NAME = "config.json" 23 | WEIGHTS_NAME = "diffusion_pytorch_model.bin" 24 | FLAX_WEIGHTS_NAME = "diffusion_flax_model.msgpack" 25 | ONNX_WEIGHTS_NAME = "model.onnx" 26 | SAFETENSORS_WEIGHTS_NAME = "diffusion_pytorch_model.safetensors" 27 | ONNX_EXTERNAL_WEIGHTS_NAME = "weights.pb" 28 | HUGGINGFACE_CO_RESOLVE_ENDPOINT = "https://huggingface.co" 29 | DIFFUSERS_CACHE = default_cache_path 30 | DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules" 31 | HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules")) 32 | DEPRECATED_REVISION_ARGS = ["fp16", "non-ema"] 33 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/utils/deprecation_utils.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import warnings 3 | from typing import Any, Dict, Optional, Union 4 | 5 | from packaging import version 6 | 7 | 8 | def deprecate(*args, take_from: Optional[Union[Dict, Any]] = None, standard_warn=True): 9 | from .. import __version__ 10 | 11 | deprecated_kwargs = take_from 12 | values = () 13 | if not isinstance(args[0], tuple): 14 | args = (args,) 15 | 16 | for attribute, version_name, message in args: 17 | if version.parse(version.parse(__version__).base_version) >= version.parse(version_name): 18 | raise ValueError( 19 | f"The deprecation tuple {(attribute, version_name, message)} should be removed since diffusers'" 20 | f" version {__version__} is >= {version_name}" 21 | ) 22 | 23 | warning = None 24 | if isinstance(deprecated_kwargs, dict) and attribute in deprecated_kwargs: 25 | values += (deprecated_kwargs.pop(attribute),) 26 | warning = f"The `{attribute}` argument is deprecated and will be removed in version {version_name}." 27 | elif hasattr(deprecated_kwargs, attribute): 28 | values += (getattr(deprecated_kwargs, attribute),) 29 | warning = f"The `{attribute}` attribute is deprecated and will be removed in version {version_name}." 30 | elif deprecated_kwargs is None: 31 | warning = f"`{attribute}` is deprecated and will be removed in version {version_name}." 32 | 33 | if warning is not None: 34 | warning = warning + " " if standard_warn else "" 35 | warnings.warn(warning + message, FutureWarning, stacklevel=2) 36 | 37 | if isinstance(deprecated_kwargs, dict) and len(deprecated_kwargs) > 0: 38 | call_frame = inspect.getouterframes(inspect.currentframe())[1] 39 | filename = call_frame.filename 40 | line_number = call_frame.lineno 41 | function = call_frame.function 42 | key, value = next(iter(deprecated_kwargs.items())) 43 | raise TypeError(f"{function} in {filename} line {line_number-1} got an unexpected keyword argument `{key}`") 44 | 45 | if len(values) == 0: 46 | return 47 | elif len(values) == 1: 48 | return values[0] 49 | return values 50 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/utils/doc_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Doc utilities: Utilities related to documentation 16 | """ 17 | import re 18 | 19 | 20 | def replace_example_docstring(example_docstring): 21 | def docstring_decorator(fn): 22 | func_doc = fn.__doc__ 23 | lines = func_doc.split("\n") 24 | i = 0 25 | while i < len(lines) and re.search(r"^\s*Examples?:\s*$", lines[i]) is None: 26 | i += 1 27 | if i < len(lines): 28 | lines[i] = example_docstring 29 | func_doc = "\n".join(lines) 30 | else: 31 | raise ValueError( 32 | f"The function {fn} should have an empty 'Examples:' in its docstring as placeholder, " 33 | f"current docstring is:\n{func_doc}" 34 | ) 35 | fn.__doc__ = func_doc 36 | return fn 37 | 38 | return docstring_decorator 39 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/utils/dummy_flax_and_transformers_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class FlaxStableDiffusionImg2ImgPipeline(metaclass=DummyObject): 6 | _backends = ["flax", "transformers"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["flax", "transformers"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["flax", "transformers"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["flax", "transformers"]) 18 | 19 | 20 | class FlaxStableDiffusionInpaintPipeline(metaclass=DummyObject): 21 | _backends = ["flax", "transformers"] 22 | 23 | def __init__(self, *args, **kwargs): 24 | requires_backends(self, ["flax", "transformers"]) 25 | 26 | @classmethod 27 | def from_config(cls, *args, **kwargs): 28 | requires_backends(cls, ["flax", "transformers"]) 29 | 30 | @classmethod 31 | def from_pretrained(cls, *args, **kwargs): 32 | requires_backends(cls, ["flax", "transformers"]) 33 | 34 | 35 | class FlaxStableDiffusionPipeline(metaclass=DummyObject): 36 | _backends = ["flax", "transformers"] 37 | 38 | def __init__(self, *args, **kwargs): 39 | requires_backends(self, ["flax", "transformers"]) 40 | 41 | @classmethod 42 | def from_config(cls, *args, **kwargs): 43 | requires_backends(cls, ["flax", "transformers"]) 44 | 45 | @classmethod 46 | def from_pretrained(cls, *args, **kwargs): 47 | requires_backends(cls, ["flax", "transformers"]) 48 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/utils/dummy_onnx_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class OnnxRuntimeModel(metaclass=DummyObject): 6 | _backends = ["onnx"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["onnx"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["onnx"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["onnx"]) 18 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/utils/dummy_torch_and_librosa_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class AudioDiffusionPipeline(metaclass=DummyObject): 6 | _backends = ["torch", "librosa"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["torch", "librosa"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["torch", "librosa"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["torch", "librosa"]) 18 | 19 | 20 | class Mel(metaclass=DummyObject): 21 | _backends = ["torch", "librosa"] 22 | 23 | def __init__(self, *args, **kwargs): 24 | requires_backends(self, ["torch", "librosa"]) 25 | 26 | @classmethod 27 | def from_config(cls, *args, **kwargs): 28 | requires_backends(cls, ["torch", "librosa"]) 29 | 30 | @classmethod 31 | def from_pretrained(cls, *args, **kwargs): 32 | requires_backends(cls, ["torch", "librosa"]) 33 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/utils/dummy_torch_and_scipy_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class LMSDiscreteScheduler(metaclass=DummyObject): 6 | _backends = ["torch", "scipy"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["torch", "scipy"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["torch", "scipy"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["torch", "scipy"]) 18 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/utils/dummy_torch_and_transformers_and_k_diffusion_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | from ..utils import DummyObject, requires_backends 3 | 4 | 5 | class StableDiffusionKDiffusionPipeline(metaclass=DummyObject): 6 | _backends = ["torch", "transformers", "k_diffusion"] 7 | 8 | def __init__(self, *args, **kwargs): 9 | requires_backends(self, ["torch", "transformers", "k_diffusion"]) 10 | 11 | @classmethod 12 | def from_config(cls, *args, **kwargs): 13 | requires_backends(cls, ["torch", "transformers", "k_diffusion"]) 14 | 15 | @classmethod 16 | def from_pretrained(cls, *args, **kwargs): 17 | requires_backends(cls, ["torch", "transformers", "k_diffusion"]) 18 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/utils/model_card_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | {{ card_data }} 3 | --- 4 | 5 | 7 | 8 | # {{ model_name | default("Diffusion Model") }} 9 | 10 | ## Model description 11 | 12 | This diffusion model is trained with the [🤗 Diffusers](https://github.com/huggingface/diffusers) library 13 | on the `{{ dataset_name }}` dataset. 14 | 15 | ## Intended uses & limitations 16 | 17 | #### How to use 18 | 19 | ```python 20 | # TODO: add an example code snippet for running this diffusion pipeline 21 | ``` 22 | 23 | #### Limitations and bias 24 | 25 | [TODO: provide examples of latent issues and potential remediations] 26 | 27 | ## Training data 28 | 29 | [TODO: describe the data used to train the model] 30 | 31 | ### Training hyperparameters 32 | 33 | The following hyperparameters were used during training: 34 | - learning_rate: {{ learning_rate }} 35 | - train_batch_size: {{ train_batch_size }} 36 | - eval_batch_size: {{ eval_batch_size }} 37 | - gradient_accumulation_steps: {{ gradient_accumulation_steps }} 38 | - optimizer: AdamW with betas=({{ adam_beta1 }}, {{ adam_beta2 }}), weight_decay={{ adam_weight_decay }} and epsilon={{ adam_epsilon }} 39 | - lr_scheduler: {{ lr_scheduler }} 40 | - lr_warmup_steps: {{ lr_warmup_steps }} 41 | - ema_inv_gamma: {{ ema_inv_gamma }} 42 | - ema_inv_gamma: {{ ema_power }} 43 | - ema_inv_gamma: {{ ema_max_decay }} 44 | - mixed_precision: {{ mixed_precision }} 45 | 46 | ### Training results 47 | 48 | 📈 [TensorBoard logs](https://huggingface.co/{{ repo_name }}/tensorboard?#scalars) 49 | 50 | 51 | -------------------------------------------------------------------------------- /diffusers/src/diffusers/utils/pil_utils.py: -------------------------------------------------------------------------------- 1 | import PIL.Image 2 | import PIL.ImageOps 3 | from packaging import version 4 | 5 | 6 | if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"): 7 | PIL_INTERPOLATION = { 8 | "linear": PIL.Image.Resampling.BILINEAR, 9 | "bilinear": PIL.Image.Resampling.BILINEAR, 10 | "bicubic": PIL.Image.Resampling.BICUBIC, 11 | "lanczos": PIL.Image.Resampling.LANCZOS, 12 | "nearest": PIL.Image.Resampling.NEAREST, 13 | } 14 | else: 15 | PIL_INTERPOLATION = { 16 | "linear": PIL.Image.LINEAR, 17 | "bilinear": PIL.Image.BILINEAR, 18 | "bicubic": PIL.Image.BICUBIC, 19 | "lanczos": PIL.Image.LANCZOS, 20 | "nearest": PIL.Image.NEAREST, 21 | } 22 | -------------------------------------------------------------------------------- /diffusers/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # tests directory-specific settings - this file is run automatically 16 | # by pytest before any tests are run 17 | 18 | import sys 19 | import warnings 20 | from os.path import abspath, dirname, join 21 | 22 | 23 | # allow having multiple repository checkouts and not needing to remember to rerun 24 | # 'pip install -e .[dev]' when switching between checkouts and running tests. 25 | git_repo_path = abspath(join(dirname(dirname(__file__)), "src")) 26 | sys.path.insert(1, git_repo_path) 27 | 28 | # silence FutureWarning warnings in tests since often we can't act on them until 29 | # they become normal warnings - i.e. the tests still need to test the current functionality 30 | warnings.simplefilter(action="ignore", category=FutureWarning) 31 | 32 | 33 | def pytest_addoption(parser): 34 | from diffusers.utils.testing_utils import pytest_addoption_shared 35 | 36 | pytest_addoption_shared(parser) 37 | 38 | 39 | def pytest_terminal_summary(terminalreporter): 40 | from diffusers.utils.testing_utils import pytest_terminal_summary_main 41 | 42 | make_reports = terminalreporter.config.getoption("--make-reports") 43 | if make_reports: 44 | pytest_terminal_summary_main(terminalreporter, id=make_reports) 45 | -------------------------------------------------------------------------------- /diffusers/tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/models/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/models/test_models_vae_flax.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from diffusers import FlaxAutoencoderKL 4 | from diffusers.utils import is_flax_available 5 | from diffusers.utils.testing_utils import require_flax 6 | 7 | from ..test_modeling_common_flax import FlaxModelTesterMixin 8 | 9 | 10 | if is_flax_available(): 11 | import jax 12 | 13 | 14 | @require_flax 15 | class FlaxAutoencoderKLTests(FlaxModelTesterMixin, unittest.TestCase): 16 | model_class = FlaxAutoencoderKL 17 | 18 | @property 19 | def dummy_input(self): 20 | batch_size = 4 21 | num_channels = 3 22 | sizes = (32, 32) 23 | 24 | prng_key = jax.random.PRNGKey(0) 25 | image = jax.random.uniform(prng_key, ((batch_size, num_channels) + sizes)) 26 | 27 | return {"sample": image, "prng_key": prng_key} 28 | 29 | def prepare_init_args_and_inputs_for_common(self): 30 | init_dict = { 31 | "block_out_channels": [32, 64], 32 | "in_channels": 3, 33 | "out_channels": 3, 34 | "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"], 35 | "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"], 36 | "latent_channels": 4, 37 | } 38 | inputs_dict = self.dummy_input 39 | return init_dict, inputs_dict 40 | -------------------------------------------------------------------------------- /diffusers/tests/pipelines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/altdiffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/altdiffusion/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/audio_diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/audio_diffusion/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/dance_diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/dance_diffusion/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/ddim/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/ddim/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/ddpm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/ddpm/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/dit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/dit/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/karras_ve/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/karras_ve/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/latent_diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/latent_diffusion/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/paint_by_example/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/paint_by_example/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/pndm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/pndm/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/repaint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/repaint/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/score_sde_ve/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/score_sde_ve/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/semantic_stable_diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/semantic_stable_diffusion/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/stable_diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/stable_diffusion/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/stable_diffusion/test_stable_diffusion_k_diffusion.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import gc 17 | import unittest 18 | 19 | import numpy as np 20 | import torch 21 | 22 | from diffusers import StableDiffusionKDiffusionPipeline 23 | from diffusers.utils import slow, torch_device 24 | from diffusers.utils.testing_utils import require_torch_gpu 25 | 26 | 27 | torch.backends.cuda.matmul.allow_tf32 = False 28 | 29 | 30 | @slow 31 | @require_torch_gpu 32 | class StableDiffusionPipelineIntegrationTests(unittest.TestCase): 33 | def tearDown(self): 34 | # clean up the VRAM after each test 35 | super().tearDown() 36 | gc.collect() 37 | torch.cuda.empty_cache() 38 | 39 | def test_stable_diffusion_1(self): 40 | sd_pipe = StableDiffusionKDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") 41 | sd_pipe = sd_pipe.to(torch_device) 42 | sd_pipe.set_progress_bar_config(disable=None) 43 | 44 | sd_pipe.set_scheduler("sample_euler") 45 | 46 | prompt = "A painting of a squirrel eating a burger" 47 | generator = torch.manual_seed(0) 48 | output = sd_pipe([prompt], generator=generator, guidance_scale=9.0, num_inference_steps=20, output_type="np") 49 | 50 | image = output.images 51 | 52 | image_slice = image[0, -3:, -3:, -1] 53 | 54 | assert image.shape == (1, 512, 512, 3) 55 | expected_slice = np.array([0.0447, 0.0492, 0.0468, 0.0408, 0.0383, 0.0408, 0.0354, 0.0380, 0.0339]) 56 | 57 | assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 58 | 59 | def test_stable_diffusion_2(self): 60 | sd_pipe = StableDiffusionKDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base") 61 | sd_pipe = sd_pipe.to(torch_device) 62 | sd_pipe.set_progress_bar_config(disable=None) 63 | 64 | sd_pipe.set_scheduler("sample_euler") 65 | 66 | prompt = "A painting of a squirrel eating a burger" 67 | generator = torch.manual_seed(0) 68 | output = sd_pipe([prompt], generator=generator, guidance_scale=9.0, num_inference_steps=20, output_type="np") 69 | 70 | image = output.images 71 | 72 | image_slice = image[0, -3:, -3:, -1] 73 | 74 | assert image.shape == (1, 512, 512, 3) 75 | expected_slice = np.array([0.1237, 0.1320, 0.1438, 0.1359, 0.1390, 0.1132, 0.1277, 0.1175, 0.1112]) 76 | 77 | assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-1 78 | -------------------------------------------------------------------------------- /diffusers/tests/pipelines/stable_diffusion_2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/stable_diffusion_2/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/stable_diffusion_safe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/stable_diffusion_safe/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/stable_unclip/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/stable_unclip/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/unclip/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/unclip/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/versatile_diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/versatile_diffusion/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/pipelines/versatile_diffusion/test_versatile_diffusion_image_variation.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | 18 | import numpy as np 19 | import torch 20 | 21 | from diffusers import VersatileDiffusionImageVariationPipeline 22 | from diffusers.utils.testing_utils import load_image, require_torch_gpu, slow, torch_device 23 | 24 | 25 | torch.backends.cuda.matmul.allow_tf32 = False 26 | 27 | 28 | class VersatileDiffusionImageVariationPipelineFastTests(unittest.TestCase): 29 | pass 30 | 31 | 32 | @slow 33 | @require_torch_gpu 34 | class VersatileDiffusionImageVariationPipelineIntegrationTests(unittest.TestCase): 35 | def test_inference_image_variations(self): 36 | pipe = VersatileDiffusionImageVariationPipeline.from_pretrained("shi-labs/versatile-diffusion") 37 | pipe.to(torch_device) 38 | pipe.set_progress_bar_config(disable=None) 39 | 40 | image_prompt = load_image( 41 | "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/versatile_diffusion/benz.jpg" 42 | ) 43 | generator = torch.manual_seed(0) 44 | image = pipe( 45 | image=image_prompt, 46 | generator=generator, 47 | guidance_scale=7.5, 48 | num_inference_steps=50, 49 | output_type="numpy", 50 | ).images 51 | 52 | image_slice = image[0, 253:256, 253:256, -1] 53 | 54 | assert image.shape == (1, 512, 512, 3) 55 | expected_slice = np.array([0.0441, 0.0469, 0.0507, 0.0575, 0.0632, 0.0650, 0.0865, 0.0909, 0.0945]) 56 | 57 | assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 58 | -------------------------------------------------------------------------------- /diffusers/tests/pipelines/vq_diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/diffusers/tests/pipelines/vq_diffusion/__init__.py -------------------------------------------------------------------------------- /diffusers/tests/test_hub_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | import unittest 16 | from pathlib import Path 17 | from tempfile import TemporaryDirectory 18 | from unittest.mock import Mock, patch 19 | 20 | import diffusers.utils.hub_utils 21 | 22 | 23 | class CreateModelCardTest(unittest.TestCase): 24 | @patch("diffusers.utils.hub_utils.get_full_repo_name") 25 | def test_create_model_card(self, repo_name_mock: Mock) -> None: 26 | repo_name_mock.return_value = "full_repo_name" 27 | with TemporaryDirectory() as tmpdir: 28 | # Dummy args values 29 | args = Mock() 30 | args.output_dir = tmpdir 31 | args.local_rank = 0 32 | args.hub_token = "hub_token" 33 | args.dataset_name = "dataset_name" 34 | args.learning_rate = 0.01 35 | args.train_batch_size = 100000 36 | args.eval_batch_size = 10000 37 | args.gradient_accumulation_steps = 0.01 38 | args.adam_beta1 = 0.02 39 | args.adam_beta2 = 0.03 40 | args.adam_weight_decay = 0.0005 41 | args.adam_epsilon = 0.000001 42 | args.lr_scheduler = 1 43 | args.lr_warmup_steps = 10 44 | args.ema_inv_gamma = 0.001 45 | args.ema_power = 0.1 46 | args.ema_max_decay = 0.2 47 | args.mixed_precision = True 48 | 49 | # Model card mush be rendered and saved 50 | diffusers.utils.hub_utils.create_model_card(args, model_name="model_name") 51 | self.assertTrue((Path(tmpdir) / "README.md").is_file()) 52 | -------------------------------------------------------------------------------- /diffusers/tests/test_modeling_common_flax.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | from diffusers.utils import is_flax_available 4 | from diffusers.utils.testing_utils import require_flax 5 | 6 | 7 | if is_flax_available(): 8 | import jax 9 | 10 | 11 | @require_flax 12 | class FlaxModelTesterMixin: 13 | def test_output(self): 14 | init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() 15 | 16 | model = self.model_class(**init_dict) 17 | variables = model.init(inputs_dict["prng_key"], inputs_dict["sample"]) 18 | jax.lax.stop_gradient(variables) 19 | 20 | output = model.apply(variables, inputs_dict["sample"]) 21 | 22 | if isinstance(output, dict): 23 | output = output.sample 24 | 25 | self.assertIsNotNone(output) 26 | expected_shape = inputs_dict["sample"].shape 27 | self.assertEqual(output.shape, expected_shape, "Input and output shapes do not match") 28 | 29 | def test_forward_with_norm_groups(self): 30 | init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() 31 | 32 | init_dict["norm_num_groups"] = 16 33 | init_dict["block_out_channels"] = (16, 32) 34 | 35 | model = self.model_class(**init_dict) 36 | variables = model.init(inputs_dict["prng_key"], inputs_dict["sample"]) 37 | jax.lax.stop_gradient(variables) 38 | 39 | output = model.apply(variables, inputs_dict["sample"]) 40 | 41 | if isinstance(output, dict): 42 | output = output.sample 43 | 44 | self.assertIsNotNone(output) 45 | expected_shape = inputs_dict["sample"].shape 46 | self.assertEqual(output.shape, expected_shape, "Input and output shapes do not match") 47 | 48 | def test_deprecated_kwargs(self): 49 | has_kwarg_in_model_class = "kwargs" in inspect.signature(self.model_class.__init__).parameters 50 | has_deprecated_kwarg = len(self.model_class._deprecated_kwargs) > 0 51 | 52 | if has_kwarg_in_model_class and not has_deprecated_kwarg: 53 | raise ValueError( 54 | f"{self.model_class} has `**kwargs` in its __init__ method but has not defined any deprecated kwargs" 55 | " under the `_deprecated_kwargs` class attribute. Make sure to either remove `**kwargs` if there are" 56 | " no deprecated arguments or add the deprecated argument with `_deprecated_kwargs =" 57 | " []`" 58 | ) 59 | 60 | if not has_kwarg_in_model_class and has_deprecated_kwarg: 61 | raise ValueError( 62 | f"{self.model_class} doesn't have `**kwargs` in its __init__ method but has defined deprecated kwargs" 63 | " under the `_deprecated_kwargs` class attribute. Make sure to either add the `**kwargs` argument to" 64 | f" {self.model_class}.__init__ if there are deprecated arguments or remove the deprecated argument" 65 | " from `_deprecated_kwargs = []`" 66 | ) 67 | -------------------------------------------------------------------------------- /diffusers/tests/test_outputs.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from dataclasses import dataclass 3 | from typing import List, Union 4 | 5 | import numpy as np 6 | import PIL.Image 7 | 8 | from diffusers.utils.outputs import BaseOutput 9 | 10 | 11 | @dataclass 12 | class CustomOutput(BaseOutput): 13 | images: Union[List[PIL.Image.Image], np.ndarray] 14 | 15 | 16 | class ConfigTester(unittest.TestCase): 17 | def test_outputs_single_attribute(self): 18 | outputs = CustomOutput(images=np.random.rand(1, 3, 4, 4)) 19 | 20 | # check every way of getting the attribute 21 | assert isinstance(outputs.images, np.ndarray) 22 | assert outputs.images.shape == (1, 3, 4, 4) 23 | assert isinstance(outputs["images"], np.ndarray) 24 | assert outputs["images"].shape == (1, 3, 4, 4) 25 | assert isinstance(outputs[0], np.ndarray) 26 | assert outputs[0].shape == (1, 3, 4, 4) 27 | 28 | # test with a non-tensor attribute 29 | outputs = CustomOutput(images=[PIL.Image.new("RGB", (4, 4))]) 30 | 31 | # check every way of getting the attribute 32 | assert isinstance(outputs.images, list) 33 | assert isinstance(outputs.images[0], PIL.Image.Image) 34 | assert isinstance(outputs["images"], list) 35 | assert isinstance(outputs["images"][0], PIL.Image.Image) 36 | assert isinstance(outputs[0], list) 37 | assert isinstance(outputs[0][0], PIL.Image.Image) 38 | 39 | def test_outputs_dict_init(self): 40 | # test output reinitialization with a `dict` for compatibility with `accelerate` 41 | outputs = CustomOutput({"images": np.random.rand(1, 3, 4, 4)}) 42 | 43 | # check every way of getting the attribute 44 | assert isinstance(outputs.images, np.ndarray) 45 | assert outputs.images.shape == (1, 3, 4, 4) 46 | assert isinstance(outputs["images"], np.ndarray) 47 | assert outputs["images"].shape == (1, 3, 4, 4) 48 | assert isinstance(outputs[0], np.ndarray) 49 | assert outputs[0].shape == (1, 3, 4, 4) 50 | 51 | # test with a non-tensor attribute 52 | outputs = CustomOutput({"images": [PIL.Image.new("RGB", (4, 4))]}) 53 | 54 | # check every way of getting the attribute 55 | assert isinstance(outputs.images, list) 56 | assert isinstance(outputs.images[0], PIL.Image.Image) 57 | assert isinstance(outputs["images"], list) 58 | assert isinstance(outputs["images"][0], PIL.Image.Image) 59 | assert isinstance(outputs[0], list) 60 | assert isinstance(outputs[0][0], PIL.Image.Image) 61 | -------------------------------------------------------------------------------- /diffusers/tests/test_pipelines_onnx_common.py: -------------------------------------------------------------------------------- 1 | from diffusers.utils.testing_utils import require_onnxruntime 2 | 3 | 4 | @require_onnxruntime 5 | class OnnxPipelineTesterMixin: 6 | """ 7 | This mixin is designed to be used with unittest.TestCase classes. 8 | It provides a set of common tests for each ONNXRuntime pipeline, e.g. saving and loading the pipeline, 9 | equivalence of dict and tuple outputs, etc. 10 | """ 11 | 12 | pass 13 | -------------------------------------------------------------------------------- /diffusers/utils/get_modified_files.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # this script reports modified .py files under the desired list of top-level sub-dirs passed as a list of arguments, e.g.: 17 | # python ./utils/get_modified_files.py utils src tests examples 18 | # 19 | # it uses git to find the forking point and which files were modified - i.e. files not under git won't be considered 20 | # since the output of this script is fed into Makefile commands it doesn't print a newline after the results 21 | 22 | import re 23 | import subprocess 24 | import sys 25 | 26 | 27 | fork_point_sha = subprocess.check_output("git merge-base main HEAD".split()).decode("utf-8") 28 | modified_files = subprocess.check_output(f"git diff --name-only {fork_point_sha}".split()).decode("utf-8").split() 29 | 30 | joined_dirs = "|".join(sys.argv[1:]) 31 | regex = re.compile(rf"^({joined_dirs}).*?\.py$") 32 | 33 | relevant_modified_files = [x for x in modified_files if regex.match(x)] 34 | print(" ".join(relevant_modified_files), end="") 35 | -------------------------------------------------------------------------------- /diffusers/utils/print_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # coding=utf-8 4 | # Copyright 2023 The HuggingFace Inc. team. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # this script dumps information about the environment 19 | 20 | import os 21 | import platform 22 | import sys 23 | 24 | 25 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" 26 | 27 | print("Python version:", sys.version) 28 | 29 | print("OS platform:", platform.platform()) 30 | print("OS architecture:", platform.machine()) 31 | 32 | try: 33 | import torch 34 | 35 | print("Torch version:", torch.__version__) 36 | print("Cuda available:", torch.cuda.is_available()) 37 | print("Cuda version:", torch.version.cuda) 38 | print("CuDNN version:", torch.backends.cudnn.version()) 39 | print("Number of GPUs available:", torch.cuda.device_count()) 40 | except ImportError: 41 | print("Torch version:", None) 42 | 43 | try: 44 | import transformers 45 | 46 | print("transformers version:", transformers.__version__) 47 | except ImportError: 48 | print("transformers version:", None) 49 | -------------------------------------------------------------------------------- /imgs/ProFusion_example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/imgs/ProFusion_example.jpg -------------------------------------------------------------------------------- /imgs/daniel.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/imgs/daniel.jpg -------------------------------------------------------------------------------- /imgs/examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/imgs/examples.png -------------------------------------------------------------------------------- /imgs/framework.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/imgs/framework.jpg -------------------------------------------------------------------------------- /imgs/main_results_cafe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/imgs/main_results_cafe.jpg -------------------------------------------------------------------------------- /imgs/object_results_cafe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/imgs/object_results_cafe.jpg -------------------------------------------------------------------------------- /process_img.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import argparse 4 | import PIL.Image 5 | import numpy as np 6 | from tqdm import tqdm 7 | import joblib as jlb # parallelizing 8 | 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("--src", type=str) 12 | parser.add_argument("--dest", type=str) 13 | parser.add_argument("--num", type=int) 14 | parser.add_argument("--size", type=int) 15 | 16 | 17 | opt = parser.parse_args() 18 | path = opt.src 19 | size = opt.size 20 | dest_path = opt.dest 21 | if dest_path is None: 22 | dest_path = path 23 | 24 | if opt.num is None: 25 | num = 20 26 | else: 27 | num = opt.num 28 | 29 | 30 | print(path, dest_path) 31 | 32 | def file_ext(fname): 33 | return os.path.splitext(fname)[1].lower() 34 | 35 | PIL.Image.init() 36 | 37 | all_fnames = {os.path.relpath(os.path.join(root, fname), start=path) for root, _dirs, files in os.walk(path) for fname in files} # relative paths to --src 38 | image_fnames = sorted(fname for fname in all_fnames if file_ext(fname) in PIL.Image.EXTENSION) 39 | 40 | print(len(image_fnames)) 41 | 42 | def resize_img(fname): 43 | try: 44 | with open(os.path.join(path, fname), 'rb') as f: 45 | image = PIL.Image.open(f).convert('RGB') 46 | w, h = image.size 47 | crop = min(w, h) 48 | image = image.crop(((w-crop)//2, (h-crop)//2, (w+crop)//2, (h+crop)//2)) 49 | image = image.resize((size, size), PIL.Image.LANCZOS) 50 | os.makedirs(os.path.dirname(os.path.join(dest_path, fname)), exist_ok=True) 51 | image.save(os.path.join(dest_path, fname)) 52 | except: 53 | pass 54 | 55 | jlb.Parallel(n_jobs=num)(jlb.delayed(resize_img)(name) for name in tqdm(image_fnames)) 56 | -------------------------------------------------------------------------------- /test_imgs/00041.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/test_imgs/00041.jpg -------------------------------------------------------------------------------- /test_imgs/bengio.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/test_imgs/bengio.jpg -------------------------------------------------------------------------------- /test_imgs/danielwu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drboog/ProFusion/6752070ab8e4e1080a9b467259484e224c2bafe4/test_imgs/danielwu.jpg --------------------------------------------------------------------------------