├── .gitignore
├── LICENSE
├── README.md
├── anp_config.py
├── anp_defense.py
├── anp_model.py
├── anp_util.py
├── baddiffusion.py
├── dataset.py
├── diffusers
├── .github
│ ├── ISSUE_TEMPLATE
│ │ ├── bug-report.yml
│ │ ├── config.yml
│ │ ├── feature_request.md
│ │ ├── feedback.md
│ │ └── new-model-addition.yml
│ ├── actions
│ │ └── setup-miniconda
│ │ │ └── action.yml
│ └── workflows
│ │ ├── build_docker_images.yml
│ │ ├── build_documentation.yml
│ │ ├── build_pr_documentation.yml
│ │ ├── delete_doc_comment.yml
│ │ ├── nightly_tests.yml
│ │ ├── pr_quality.yml
│ │ ├── pr_tests.yml
│ │ ├── push_tests.yml
│ │ ├── push_tests_fast.yml
│ │ ├── stale.yml
│ │ └── typos.yml
├── .gitignore
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── PHILOSOPHY.md
├── README.md
├── _typos.toml
├── docker
│ ├── diffusers-flax-cpu
│ │ └── Dockerfile
│ ├── diffusers-flax-tpu
│ │ └── Dockerfile
│ ├── diffusers-onnxruntime-cpu
│ │ └── Dockerfile
│ ├── diffusers-onnxruntime-cuda
│ │ └── Dockerfile
│ ├── diffusers-pytorch-cpu
│ │ └── Dockerfile
│ └── diffusers-pytorch-cuda
│ │ └── Dockerfile
├── docs
│ ├── README.md
│ ├── TRANSLATING.md
│ └── source
│ │ ├── _config.py
│ │ ├── en
│ │ ├── _toctree.yml
│ │ ├── api
│ │ │ ├── configuration.mdx
│ │ │ ├── diffusion_pipeline.mdx
│ │ │ ├── experimental
│ │ │ │ └── rl.mdx
│ │ │ ├── loaders.mdx
│ │ │ ├── logging.mdx
│ │ │ ├── models.mdx
│ │ │ ├── outputs.mdx
│ │ │ ├── pipelines
│ │ │ │ ├── alt_diffusion.mdx
│ │ │ │ ├── audio_diffusion.mdx
│ │ │ │ ├── audioldm.mdx
│ │ │ │ ├── cycle_diffusion.mdx
│ │ │ │ ├── dance_diffusion.mdx
│ │ │ │ ├── ddim.mdx
│ │ │ │ ├── ddpm.mdx
│ │ │ │ ├── dit.mdx
│ │ │ │ ├── latent_diffusion.mdx
│ │ │ │ ├── latent_diffusion_uncond.mdx
│ │ │ │ ├── overview.mdx
│ │ │ │ ├── paint_by_example.mdx
│ │ │ │ ├── pndm.mdx
│ │ │ │ ├── repaint.mdx
│ │ │ │ ├── score_sde_ve.mdx
│ │ │ │ ├── semantic_stable_diffusion.mdx
│ │ │ │ ├── spectrogram_diffusion.mdx
│ │ │ │ ├── stable_diffusion
│ │ │ │ │ ├── attend_and_excite.mdx
│ │ │ │ │ ├── controlnet.mdx
│ │ │ │ │ ├── depth2img.mdx
│ │ │ │ │ ├── image_variation.mdx
│ │ │ │ │ ├── img2img.mdx
│ │ │ │ │ ├── inpaint.mdx
│ │ │ │ │ ├── latent_upscale.mdx
│ │ │ │ │ ├── model_editing.mdx
│ │ │ │ │ ├── overview.mdx
│ │ │ │ │ ├── panorama.mdx
│ │ │ │ │ ├── pix2pix.mdx
│ │ │ │ │ ├── pix2pix_zero.mdx
│ │ │ │ │ ├── self_attention_guidance.mdx
│ │ │ │ │ ├── text2img.mdx
│ │ │ │ │ └── upscale.mdx
│ │ │ │ ├── stable_diffusion_2.mdx
│ │ │ │ ├── stable_diffusion_safe.mdx
│ │ │ │ ├── stable_unclip.mdx
│ │ │ │ ├── stochastic_karras_ve.mdx
│ │ │ │ ├── text_to_video.mdx
│ │ │ │ ├── text_to_video_zero.mdx
│ │ │ │ ├── unclip.mdx
│ │ │ │ ├── versatile_diffusion.mdx
│ │ │ │ └── vq_diffusion.mdx
│ │ │ └── schedulers
│ │ │ │ ├── ddim.mdx
│ │ │ │ ├── ddim_inverse.mdx
│ │ │ │ ├── ddpm.mdx
│ │ │ │ ├── deis.mdx
│ │ │ │ ├── dpm_discrete.mdx
│ │ │ │ ├── dpm_discrete_ancestral.mdx
│ │ │ │ ├── euler.mdx
│ │ │ │ ├── euler_ancestral.mdx
│ │ │ │ ├── heun.mdx
│ │ │ │ ├── ipndm.mdx
│ │ │ │ ├── lms_discrete.mdx
│ │ │ │ ├── multistep_dpm_solver.mdx
│ │ │ │ ├── overview.mdx
│ │ │ │ ├── pndm.mdx
│ │ │ │ ├── repaint.mdx
│ │ │ │ ├── score_sde_ve.mdx
│ │ │ │ ├── score_sde_vp.mdx
│ │ │ │ ├── singlestep_dpm_solver.mdx
│ │ │ │ ├── stochastic_karras_ve.mdx
│ │ │ │ ├── unipc.mdx
│ │ │ │ └── vq_diffusion.mdx
│ │ ├── conceptual
│ │ │ ├── contribution.mdx
│ │ │ ├── ethical_guidelines.mdx
│ │ │ ├── evaluation.mdx
│ │ │ └── philosophy.mdx
│ │ ├── index.mdx
│ │ ├── installation.mdx
│ │ ├── optimization
│ │ │ ├── coreml.mdx
│ │ │ ├── fp16.mdx
│ │ │ ├── habana.mdx
│ │ │ ├── mps.mdx
│ │ │ ├── onnx.mdx
│ │ │ ├── open_vino.mdx
│ │ │ ├── opt_overview.mdx
│ │ │ ├── torch2.0.mdx
│ │ │ └── xformers.mdx
│ │ ├── quicktour.mdx
│ │ ├── stable_diffusion.mdx
│ │ ├── training
│ │ │ ├── controlnet.mdx
│ │ │ ├── dreambooth.mdx
│ │ │ ├── instructpix2pix.mdx
│ │ │ ├── lora.mdx
│ │ │ ├── overview.mdx
│ │ │ ├── text2image.mdx
│ │ │ ├── text_inversion.mdx
│ │ │ └── unconditional_training.mdx
│ │ ├── tutorials
│ │ │ ├── basic_training.mdx
│ │ │ └── tutorial_overview.mdx
│ │ └── using-diffusers
│ │ │ ├── audio.mdx
│ │ │ ├── conditional_image_generation.mdx
│ │ │ ├── contribute_pipeline.mdx
│ │ │ ├── controlling_generation.mdx
│ │ │ ├── custom_pipeline_examples.mdx
│ │ │ ├── custom_pipeline_overview.mdx
│ │ │ ├── depth2img.mdx
│ │ │ ├── img2img.mdx
│ │ │ ├── inpaint.mdx
│ │ │ ├── kerascv.mdx
│ │ │ ├── loading.mdx
│ │ │ ├── loading_overview.mdx
│ │ │ ├── other-modalities.mdx
│ │ │ ├── pipeline_overview.mdx
│ │ │ ├── reproducibility.mdx
│ │ │ ├── reusing_seeds.mdx
│ │ │ ├── rl.mdx
│ │ │ ├── schedulers.mdx
│ │ │ ├── stable_diffusion_jax_how_to.mdx
│ │ │ ├── unconditional_image_generation.mdx
│ │ │ ├── using_safetensors
│ │ │ ├── using_safetensors.mdx
│ │ │ ├── weighted_prompts.mdx
│ │ │ └── write_own_pipeline.mdx
│ │ ├── ko
│ │ ├── _toctree.yml
│ │ ├── in_translation.mdx
│ │ ├── index.mdx
│ │ ├── installation.mdx
│ │ └── quicktour.mdx
│ │ └── zh
│ │ ├── _toctree.yml
│ │ ├── index.mdx
│ │ ├── installation.mdx
│ │ └── quicktour.mdx
├── examples
│ ├── README.md
│ ├── community
│ │ ├── README.md
│ │ ├── bit_diffusion.py
│ │ ├── checkpoint_merger.py
│ │ ├── clip_guided_stable_diffusion.py
│ │ ├── clip_guided_stable_diffusion_img2img.py
│ │ ├── composable_stable_diffusion.py
│ │ ├── ddim_noise_comparative_analysis.py
│ │ ├── imagic_stable_diffusion.py
│ │ ├── img2img_inpainting.py
│ │ ├── interpolate_stable_diffusion.py
│ │ ├── lpw_stable_diffusion.py
│ │ ├── lpw_stable_diffusion_onnx.py
│ │ ├── magic_mix.py
│ │ ├── multilingual_stable_diffusion.py
│ │ ├── one_step_unet.py
│ │ ├── sd_text2img_k_diffusion.py
│ │ ├── seed_resize_stable_diffusion.py
│ │ ├── speech_to_image_diffusion.py
│ │ ├── stable_diffusion_comparison.py
│ │ ├── stable_diffusion_controlnet_img2img.py
│ │ ├── stable_diffusion_controlnet_inpaint.py
│ │ ├── stable_diffusion_controlnet_inpaint_img2img.py
│ │ ├── stable_diffusion_mega.py
│ │ ├── stable_unclip.py
│ │ ├── text_inpainting.py
│ │ ├── tiled_upscaling.py
│ │ ├── unclip_image_interpolation.py
│ │ ├── unclip_text_interpolation.py
│ │ └── wildcard_stable_diffusion.py
│ ├── conftest.py
│ ├── controlnet
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ ├── requirements_flax.txt
│ │ ├── train_controlnet.py
│ │ └── train_controlnet_flax.py
│ ├── dreambooth
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ ├── requirements_flax.txt
│ │ ├── train_dreambooth.py
│ │ ├── train_dreambooth_flax.py
│ │ └── train_dreambooth_lora.py
│ ├── inference
│ │ ├── README.md
│ │ ├── image_to_image.py
│ │ └── inpainting.py
│ ├── instruct_pix2pix
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ └── train_instruct_pix2pix.py
│ ├── rl
│ │ ├── README.md
│ │ └── run_diffuser_locomotion.py
│ ├── test_examples.py
│ ├── text_to_image
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ ├── requirements_flax.txt
│ │ ├── train_text_to_image.py
│ │ ├── train_text_to_image_flax.py
│ │ └── train_text_to_image_lora.py
│ ├── textual_inversion
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ ├── requirements_flax.txt
│ │ ├── textual_inversion.py
│ │ └── textual_inversion_flax.py
│ └── unconditional_image_generation
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ └── train_unconditional.py
├── pyproject.toml
├── scripts
│ ├── __init__.py
│ ├── change_naming_configs_and_checkpoints.py
│ ├── conversion_ldm_uncond.py
│ ├── convert_dance_diffusion_to_diffusers.py
│ ├── convert_ddpm_original_checkpoint_to_diffusers.py
│ ├── convert_diffusers_to_original_stable_diffusion.py
│ ├── convert_dit_to_diffusers.py
│ ├── convert_k_upscaler_to_diffusers.py
│ ├── convert_kakao_brain_unclip_to_diffusers.py
│ ├── convert_ldm_original_checkpoint_to_diffusers.py
│ ├── convert_lora_safetensor_to_diffusers.py
│ ├── convert_models_diffuser_to_diffusers.py
│ ├── convert_ms_text_to_video_to_diffusers.py
│ ├── convert_music_spectrogram_to_diffusers.py
│ ├── convert_ncsnpp_original_checkpoint_to_diffusers.py
│ ├── convert_original_audioldm_to_diffusers.py
│ ├── convert_original_controlnet_to_diffusers.py
│ ├── convert_original_stable_diffusion_to_diffusers.py
│ ├── convert_stable_diffusion_checkpoint_to_onnx.py
│ ├── convert_unclip_txt2img_to_image_variation.py
│ ├── convert_vae_diff_to_onnx.py
│ ├── convert_vae_pt_to_diffusers.py
│ ├── convert_versatile_diffusion_to_diffusers.py
│ ├── convert_vq_diffusion_to_diffusers.py
│ └── generate_logits.py
├── setup.cfg
├── setup.py
├── src
│ └── diffusers
│ │ ├── __init__.py
│ │ ├── commands
│ │ ├── __init__.py
│ │ ├── diffusers_cli.py
│ │ └── env.py
│ │ ├── configuration_utils.py
│ │ ├── dependency_versions_check.py
│ │ ├── dependency_versions_table.py
│ │ ├── experimental
│ │ ├── README.md
│ │ ├── __init__.py
│ │ └── rl
│ │ │ ├── __init__.py
│ │ │ └── value_guided_sampling.py
│ │ ├── image_processor.py
│ │ ├── loaders.py
│ │ ├── models
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── attention.py
│ │ ├── attention_flax.py
│ │ ├── attention_processor.py
│ │ ├── autoencoder_kl.py
│ │ ├── controlnet.py
│ │ ├── controlnet_flax.py
│ │ ├── cross_attention.py
│ │ ├── dual_transformer_2d.py
│ │ ├── embeddings.py
│ │ ├── embeddings_flax.py
│ │ ├── modeling_flax_pytorch_utils.py
│ │ ├── modeling_flax_utils.py
│ │ ├── modeling_pytorch_flax_utils.py
│ │ ├── modeling_utils.py
│ │ ├── prior_transformer.py
│ │ ├── resnet.py
│ │ ├── resnet_flax.py
│ │ ├── t5_film_transformer.py
│ │ ├── transformer_2d.py
│ │ ├── transformer_temporal.py
│ │ ├── unet_1d.py
│ │ ├── unet_1d_blocks.py
│ │ ├── unet_2d.py
│ │ ├── unet_2d_blocks.py
│ │ ├── unet_2d_blocks_flax.py
│ │ ├── unet_2d_condition.py
│ │ ├── unet_2d_condition_flax.py
│ │ ├── unet_3d_blocks.py
│ │ ├── unet_3d_condition.py
│ │ ├── vae.py
│ │ ├── vae_flax.py
│ │ └── vq_model.py
│ │ ├── optimization.py
│ │ ├── pipeline_utils.py
│ │ ├── pipelines
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── alt_diffusion
│ │ │ ├── __init__.py
│ │ │ ├── modeling_roberta_series.py
│ │ │ ├── pipeline_alt_diffusion.py
│ │ │ └── pipeline_alt_diffusion_img2img.py
│ │ ├── audio_diffusion
│ │ │ ├── __init__.py
│ │ │ ├── mel.py
│ │ │ └── pipeline_audio_diffusion.py
│ │ ├── audioldm
│ │ │ ├── __init__.py
│ │ │ └── pipeline_audioldm.py
│ │ ├── dance_diffusion
│ │ │ ├── __init__.py
│ │ │ └── pipeline_dance_diffusion.py
│ │ ├── ddim
│ │ │ ├── __init__.py
│ │ │ └── pipeline_ddim.py
│ │ ├── ddpm
│ │ │ ├── __init__.py
│ │ │ └── pipeline_ddpm.py
│ │ ├── dit
│ │ │ ├── __init__.py
│ │ │ └── pipeline_dit.py
│ │ ├── latent_diffusion
│ │ │ ├── __init__.py
│ │ │ ├── pipeline_latent_diffusion.py
│ │ │ └── pipeline_latent_diffusion_superresolution.py
│ │ ├── latent_diffusion_uncond
│ │ │ ├── __init__.py
│ │ │ └── pipeline_latent_diffusion_uncond.py
│ │ ├── onnx_utils.py
│ │ ├── paint_by_example
│ │ │ ├── __init__.py
│ │ │ ├── image_encoder.py
│ │ │ └── pipeline_paint_by_example.py
│ │ ├── pipeline_flax_utils.py
│ │ ├── pipeline_utils.py
│ │ ├── pndm
│ │ │ ├── __init__.py
│ │ │ └── pipeline_pndm.py
│ │ ├── repaint
│ │ │ ├── __init__.py
│ │ │ └── pipeline_repaint.py
│ │ ├── score_sde_ve
│ │ │ ├── __init__.py
│ │ │ └── pipeline_score_sde_ve.py
│ │ ├── semantic_stable_diffusion
│ │ │ ├── __init__.py
│ │ │ └── pipeline_semantic_stable_diffusion.py
│ │ ├── spectrogram_diffusion
│ │ │ ├── __init__.py
│ │ │ ├── continous_encoder.py
│ │ │ ├── midi_utils.py
│ │ │ ├── notes_encoder.py
│ │ │ └── pipeline_spectrogram_diffusion.py
│ │ ├── stable_diffusion
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── convert_from_ckpt.py
│ │ │ ├── pipeline_cycle_diffusion.py
│ │ │ ├── pipeline_flax_stable_diffusion.py
│ │ │ ├── pipeline_flax_stable_diffusion_controlnet.py
│ │ │ ├── pipeline_flax_stable_diffusion_img2img.py
│ │ │ ├── pipeline_flax_stable_diffusion_inpaint.py
│ │ │ ├── pipeline_onnx_stable_diffusion.py
│ │ │ ├── pipeline_onnx_stable_diffusion_img2img.py
│ │ │ ├── pipeline_onnx_stable_diffusion_inpaint.py
│ │ │ ├── pipeline_onnx_stable_diffusion_inpaint_legacy.py
│ │ │ ├── pipeline_onnx_stable_diffusion_upscale.py
│ │ │ ├── pipeline_stable_diffusion.py
│ │ │ ├── pipeline_stable_diffusion_attend_and_excite.py
│ │ │ ├── pipeline_stable_diffusion_controlnet.py
│ │ │ ├── pipeline_stable_diffusion_depth2img.py
│ │ │ ├── pipeline_stable_diffusion_image_variation.py
│ │ │ ├── pipeline_stable_diffusion_img2img.py
│ │ │ ├── pipeline_stable_diffusion_inpaint.py
│ │ │ ├── pipeline_stable_diffusion_inpaint_legacy.py
│ │ │ ├── pipeline_stable_diffusion_instruct_pix2pix.py
│ │ │ ├── pipeline_stable_diffusion_k_diffusion.py
│ │ │ ├── pipeline_stable_diffusion_latent_upscale.py
│ │ │ ├── pipeline_stable_diffusion_model_editing.py
│ │ │ ├── pipeline_stable_diffusion_panorama.py
│ │ │ ├── pipeline_stable_diffusion_pix2pix_zero.py
│ │ │ ├── pipeline_stable_diffusion_sag.py
│ │ │ ├── pipeline_stable_diffusion_upscale.py
│ │ │ ├── pipeline_stable_unclip.py
│ │ │ ├── pipeline_stable_unclip_img2img.py
│ │ │ ├── safety_checker.py
│ │ │ ├── safety_checker_flax.py
│ │ │ └── stable_unclip_image_normalizer.py
│ │ ├── stable_diffusion_safe
│ │ │ ├── __init__.py
│ │ │ ├── pipeline_stable_diffusion_safe.py
│ │ │ └── safety_checker.py
│ │ ├── stochastic_karras_ve
│ │ │ ├── __init__.py
│ │ │ └── pipeline_stochastic_karras_ve.py
│ │ ├── text_to_video_synthesis
│ │ │ ├── __init__.py
│ │ │ ├── pipeline_text_to_video_synth.py
│ │ │ └── pipeline_text_to_video_zero.py
│ │ ├── unclip
│ │ │ ├── __init__.py
│ │ │ ├── pipeline_unclip.py
│ │ │ ├── pipeline_unclip_image_variation.py
│ │ │ └── text_proj.py
│ │ ├── versatile_diffusion
│ │ │ ├── __init__.py
│ │ │ ├── modeling_text_unet.py
│ │ │ ├── pipeline_versatile_diffusion.py
│ │ │ ├── pipeline_versatile_diffusion_dual_guided.py
│ │ │ ├── pipeline_versatile_diffusion_image_variation.py
│ │ │ └── pipeline_versatile_diffusion_text_to_image.py
│ │ └── vq_diffusion
│ │ │ ├── __init__.py
│ │ │ └── pipeline_vq_diffusion.py
│ │ ├── schedulers
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── scheduling_ddim.py
│ │ ├── scheduling_ddim_flax.py
│ │ ├── scheduling_ddim_inverse.py
│ │ ├── scheduling_ddpm.py
│ │ ├── scheduling_ddpm_flax.py
│ │ ├── scheduling_deis_multistep.py
│ │ ├── scheduling_dpmsolver_multistep.py
│ │ ├── scheduling_dpmsolver_multistep_flax.py
│ │ ├── scheduling_dpmsolver_singlestep.py
│ │ ├── scheduling_euler_ancestral_discrete.py
│ │ ├── scheduling_euler_discrete.py
│ │ ├── scheduling_heun_discrete.py
│ │ ├── scheduling_ipndm.py
│ │ ├── scheduling_k_dpm_2_ancestral_discrete.py
│ │ ├── scheduling_k_dpm_2_discrete.py
│ │ ├── scheduling_karras_ve.py
│ │ ├── scheduling_karras_ve_flax.py
│ │ ├── scheduling_lms_discrete.py
│ │ ├── scheduling_lms_discrete_flax.py
│ │ ├── scheduling_pndm.py
│ │ ├── scheduling_pndm_flax.py
│ │ ├── scheduling_repaint.py
│ │ ├── scheduling_sde_ve.py
│ │ ├── scheduling_sde_ve_flax.py
│ │ ├── scheduling_sde_vp.py
│ │ ├── scheduling_unclip.py
│ │ ├── scheduling_unipc_multistep.py
│ │ ├── scheduling_utils.py
│ │ ├── scheduling_utils_flax.py
│ │ └── scheduling_vq_diffusion.py
│ │ ├── training_utils.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── accelerate_utils.py
│ │ ├── constants.py
│ │ ├── deprecation_utils.py
│ │ ├── doc_utils.py
│ │ ├── dummy_flax_and_transformers_objects.py
│ │ ├── dummy_flax_objects.py
│ │ ├── dummy_note_seq_objects.py
│ │ ├── dummy_onnx_objects.py
│ │ ├── dummy_pt_objects.py
│ │ ├── dummy_torch_and_librosa_objects.py
│ │ ├── dummy_torch_and_scipy_objects.py
│ │ ├── dummy_torch_and_transformers_and_k_diffusion_objects.py
│ │ ├── dummy_torch_and_transformers_and_onnx_objects.py
│ │ ├── dummy_torch_and_transformers_objects.py
│ │ ├── dummy_transformers_and_torch_and_note_seq_objects.py
│ │ ├── dynamic_modules_utils.py
│ │ ├── hub_utils.py
│ │ ├── import_utils.py
│ │ ├── logging.py
│ │ ├── model_card_template.md
│ │ ├── outputs.py
│ │ ├── pil_utils.py
│ │ ├── testing_utils.py
│ │ └── torch_utils.py
├── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── fixtures
│ │ ├── custom_pipeline
│ │ │ ├── pipeline.py
│ │ │ └── what_ever.py
│ │ └── elise_format0.mid
│ ├── models
│ │ ├── __init__.py
│ │ ├── test_attention_processor.py
│ │ ├── test_layers_utils.py
│ │ ├── test_lora_layers.py
│ │ ├── test_modeling_common.py
│ │ ├── test_modeling_common_flax.py
│ │ ├── test_models_unet_1d.py
│ │ ├── test_models_unet_2d.py
│ │ ├── test_models_unet_2d_condition.py
│ │ ├── test_models_unet_2d_flax.py
│ │ ├── test_models_unet_3d_condition.py
│ │ ├── test_models_vae.py
│ │ ├── test_models_vae_flax.py
│ │ ├── test_models_vq.py
│ │ ├── test_unet_2d_blocks.py
│ │ └── test_unet_blocks_common.py
│ ├── others
│ │ ├── test_check_copies.py
│ │ ├── test_check_dummies.py
│ │ ├── test_config.py
│ │ ├── test_ema.py
│ │ ├── test_hub_utils.py
│ │ ├── test_image_processor.py
│ │ ├── test_outputs.py
│ │ ├── test_training.py
│ │ └── test_utils.py
│ ├── pipelines
│ │ ├── __init__.py
│ │ ├── altdiffusion
│ │ │ ├── __init__.py
│ │ │ ├── test_alt_diffusion.py
│ │ │ └── test_alt_diffusion_img2img.py
│ │ ├── audio_diffusion
│ │ │ ├── __init__.py
│ │ │ └── test_audio_diffusion.py
│ │ ├── audioldm
│ │ │ ├── __init__.py
│ │ │ └── test_audioldm.py
│ │ ├── dance_diffusion
│ │ │ ├── __init__.py
│ │ │ └── test_dance_diffusion.py
│ │ ├── ddim
│ │ │ ├── __init__.py
│ │ │ └── test_ddim.py
│ │ ├── ddpm
│ │ │ ├── __init__.py
│ │ │ └── test_ddpm.py
│ │ ├── dit
│ │ │ ├── __init__.py
│ │ │ └── test_dit.py
│ │ ├── karras_ve
│ │ │ ├── __init__.py
│ │ │ └── test_karras_ve.py
│ │ ├── latent_diffusion
│ │ │ ├── __init__.py
│ │ │ ├── test_latent_diffusion.py
│ │ │ ├── test_latent_diffusion_superresolution.py
│ │ │ └── test_latent_diffusion_uncond.py
│ │ ├── paint_by_example
│ │ │ ├── __init__.py
│ │ │ └── test_paint_by_example.py
│ │ ├── pipeline_params.py
│ │ ├── pndm
│ │ │ ├── __init__.py
│ │ │ └── test_pndm.py
│ │ ├── repaint
│ │ │ ├── __init__.py
│ │ │ └── test_repaint.py
│ │ ├── score_sde_ve
│ │ │ ├── __init__.py
│ │ │ └── test_score_sde_ve.py
│ │ ├── semantic_stable_diffusion
│ │ │ ├── __init__.py
│ │ │ └── test_semantic_diffusion.py
│ │ ├── spectrogram_diffusion
│ │ │ ├── __init__.py
│ │ │ └── test_spectrogram_diffusion.py
│ │ ├── stable_diffusion
│ │ │ ├── __init__.py
│ │ │ ├── test_cycle_diffusion.py
│ │ │ ├── test_onnx_stable_diffusion.py
│ │ │ ├── test_onnx_stable_diffusion_img2img.py
│ │ │ ├── test_onnx_stable_diffusion_inpaint.py
│ │ │ ├── test_onnx_stable_diffusion_inpaint_legacy.py
│ │ │ ├── test_onnx_stable_diffusion_upscale.py
│ │ │ ├── test_stable_diffusion.py
│ │ │ ├── test_stable_diffusion_controlnet.py
│ │ │ ├── test_stable_diffusion_flax_controlnet.py
│ │ │ ├── test_stable_diffusion_image_variation.py
│ │ │ ├── test_stable_diffusion_img2img.py
│ │ │ ├── test_stable_diffusion_inpaint.py
│ │ │ ├── test_stable_diffusion_inpaint_legacy.py
│ │ │ ├── test_stable_diffusion_instruction_pix2pix.py
│ │ │ ├── test_stable_diffusion_k_diffusion.py
│ │ │ ├── test_stable_diffusion_model_editing.py
│ │ │ ├── test_stable_diffusion_panorama.py
│ │ │ ├── test_stable_diffusion_pix2pix_zero.py
│ │ │ └── test_stable_diffusion_sag.py
│ │ ├── stable_diffusion_2
│ │ │ ├── __init__.py
│ │ │ ├── test_stable_diffusion.py
│ │ │ ├── test_stable_diffusion_attend_and_excite.py
│ │ │ ├── test_stable_diffusion_depth.py
│ │ │ ├── test_stable_diffusion_flax.py
│ │ │ ├── test_stable_diffusion_flax_inpaint.py
│ │ │ ├── test_stable_diffusion_inpaint.py
│ │ │ ├── test_stable_diffusion_latent_upscale.py
│ │ │ ├── test_stable_diffusion_upscale.py
│ │ │ └── test_stable_diffusion_v_pred.py
│ │ ├── stable_diffusion_safe
│ │ │ ├── __init__.py
│ │ │ └── test_safe_diffusion.py
│ │ ├── stable_unclip
│ │ │ ├── __init__.py
│ │ │ ├── test_stable_unclip.py
│ │ │ └── test_stable_unclip_img2img.py
│ │ ├── test_pipeline_utils.py
│ │ ├── test_pipelines.py
│ │ ├── test_pipelines_common.py
│ │ ├── test_pipelines_flax.py
│ │ ├── test_pipelines_onnx_common.py
│ │ ├── text_to_video
│ │ │ ├── __init__.py
│ │ │ ├── test_text_to_video.py
│ │ │ └── test_text_to_video_zero.py
│ │ ├── unclip
│ │ │ ├── __init__.py
│ │ │ ├── test_unclip.py
│ │ │ └── test_unclip_image_variation.py
│ │ ├── versatile_diffusion
│ │ │ ├── __init__.py
│ │ │ ├── test_versatile_diffusion_dual_guided.py
│ │ │ ├── test_versatile_diffusion_image_variation.py
│ │ │ ├── test_versatile_diffusion_mega.py
│ │ │ └── test_versatile_diffusion_text_to_image.py
│ │ └── vq_diffusion
│ │ │ ├── __init__.py
│ │ │ └── test_vq_diffusion.py
│ └── schedulers
│ │ ├── __init__.py
│ │ ├── test_scheduler_ddim.py
│ │ ├── test_scheduler_ddpm.py
│ │ ├── test_scheduler_deis.py
│ │ ├── test_scheduler_dpm_multi.py
│ │ ├── test_scheduler_dpm_single.py
│ │ ├── test_scheduler_euler.py
│ │ ├── test_scheduler_euler_ancestral.py
│ │ ├── test_scheduler_flax.py
│ │ ├── test_scheduler_heun.py
│ │ ├── test_scheduler_ipndm.py
│ │ ├── test_scheduler_kdpm2_ancestral.py
│ │ ├── test_scheduler_kdpm2_discrete.py
│ │ ├── test_scheduler_lms.py
│ │ ├── test_scheduler_pndm.py
│ │ ├── test_scheduler_score_sde_ve.py
│ │ ├── test_scheduler_unclip.py
│ │ ├── test_scheduler_unipc.py
│ │ ├── test_scheduler_vq_diffusion.py
│ │ └── test_schedulers.py
└── utils
│ ├── check_config_docstrings.py
│ ├── check_copies.py
│ ├── check_doc_toc.py
│ ├── check_dummies.py
│ ├── check_inits.py
│ ├── check_repo.py
│ ├── check_table.py
│ ├── custom_init_isort.py
│ ├── get_modified_files.py
│ ├── overwrite_expected_slice.py
│ ├── print_env.py
│ ├── release.py
│ └── stale.py
├── fid_score.py
├── install.sh
├── loss.py
├── model.py
├── requirements.txt
├── static
├── cat_wo_bg.png
├── fedora-hat.png
├── glasses.png
├── hat.png
├── stop_sign_bg_blk.jpg
├── stop_sign_bg_w.jpg
└── stop_sign_wo_bg.png
└── util.py
/.gitignore:
--------------------------------------------------------------------------------
1 | ~/
2 | *.gif
3 | *.png
4 | *.jpg
5 |
6 | *.zip
7 | *.tar*
8 | *.pth
9 | *.pkl
10 |
11 | *.json
12 | *.pyc
13 | *.log
14 |
15 | core.*
16 | res*
17 | tmp*
18 | *.out
19 |
20 | datasets/
21 | data/
22 | test/
23 | __pycache__/
24 | .ipynb_checkpoints/
25 | .vscode/
26 | diffusion/
27 | wandb/
28 | ANP_backdoor/
29 | Default/
30 | diffusers_old/
31 |
32 | !static/*
33 | NCSNPP_CIFAR10_scratch/
34 | NCSN_CIFAR10_my/
--------------------------------------------------------------------------------
/diffusers/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | contact_links:
2 | - name: Blank issue
3 | url: https://github.com/huggingface/diffusers/issues/new
4 | about: Other
5 | - name: Forum
6 | url: https://discuss.huggingface.co/
7 | about: General usage questions and community discussions
--------------------------------------------------------------------------------
/diffusers/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "\U0001F680 Feature request"
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/diffusers/.github/ISSUE_TEMPLATE/feedback.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "💬 Feedback about API Design"
3 | about: Give feedback about the current API design
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **What API design would you like to have changed or added to the library? Why?**
11 |
12 | **What use case would this enable or better enable? Can you give us a code example?**
13 |
--------------------------------------------------------------------------------
/diffusers/.github/ISSUE_TEMPLATE/new-model-addition.yml:
--------------------------------------------------------------------------------
1 | name: "\U0001F31F New model/pipeline/scheduler addition"
2 | description: Submit a proposal/request to implement a new diffusion model / pipeline / scheduler
3 | labels: [ "New model/pipeline/scheduler" ]
4 |
5 | body:
6 | - type: textarea
7 | id: description-request
8 | validations:
9 | required: true
10 | attributes:
11 | label: Model/Pipeline/Scheduler description
12 | description: |
13 | Put any and all important information relative to the model/pipeline/scheduler
14 |
15 | - type: checkboxes
16 | id: information-tasks
17 | attributes:
18 | label: Open source status
19 | description: |
20 | Please note that if the model implementation isn't available or if the weights aren't open-source, we are less likely to implement it in `diffusers`.
21 | options:
22 | - label: "The model implementation is available"
23 | - label: "The model weights are available (Only relevant if addition is not a scheduler)."
24 |
25 | - type: textarea
26 | id: additional-info
27 | attributes:
28 | label: Provide useful links for the implementation
29 | description: |
30 | Please provide information regarding the implementation, the weights, and the authors.
31 | Please mention the authors by @gh-username if you're aware of their usernames.
32 |
--------------------------------------------------------------------------------
/diffusers/.github/workflows/build_docker_images.yml:
--------------------------------------------------------------------------------
1 | name: Build Docker images (nightly)
2 |
3 | on:
4 | workflow_dispatch:
5 | schedule:
6 | - cron: "0 0 * * *" # every day at midnight
7 |
8 | concurrency:
9 | group: docker-image-builds
10 | cancel-in-progress: false
11 |
12 | env:
13 | REGISTRY: diffusers
14 |
15 | jobs:
16 | build-docker-images:
17 | runs-on: ubuntu-latest
18 |
19 | permissions:
20 | contents: read
21 | packages: write
22 |
23 | strategy:
24 | fail-fast: false
25 | matrix:
26 | image-name:
27 | - diffusers-pytorch-cpu
28 | - diffusers-pytorch-cuda
29 | - diffusers-flax-cpu
30 | - diffusers-flax-tpu
31 | - diffusers-onnxruntime-cpu
32 | - diffusers-onnxruntime-cuda
33 |
34 | steps:
35 | - name: Checkout repository
36 | uses: actions/checkout@v3
37 |
38 | - name: Login to Docker Hub
39 | uses: docker/login-action@v2
40 | with:
41 | username: ${{ env.REGISTRY }}
42 | password: ${{ secrets.DOCKERHUB_TOKEN }}
43 |
44 | - name: Build and push
45 | uses: docker/build-push-action@v3
46 | with:
47 | no-cache: true
48 | context: ./docker/${{ matrix.image-name }}
49 | push: true
50 | tags: ${{ env.REGISTRY }}/${{ matrix.image-name }}:latest
51 |
--------------------------------------------------------------------------------
/diffusers/.github/workflows/build_documentation.yml:
--------------------------------------------------------------------------------
1 | name: Build documentation
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | - doc-builder*
8 | - v*-release
9 |
10 | jobs:
11 | build:
12 | uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
13 | with:
14 | commit_sha: ${{ github.sha }}
15 | package: diffusers
16 | notebook_folder: diffusers_doc
17 | languages: en ko
18 | secrets:
19 | token: ${{ secrets.HUGGINGFACE_PUSH }}
20 |
--------------------------------------------------------------------------------
/diffusers/.github/workflows/build_pr_documentation.yml:
--------------------------------------------------------------------------------
1 | name: Build PR Documentation
2 |
3 | on:
4 | pull_request:
5 |
6 | concurrency:
7 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
8 | cancel-in-progress: true
9 |
10 | jobs:
11 | build:
12 | uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
13 | with:
14 | commit_sha: ${{ github.event.pull_request.head.sha }}
15 | pr_number: ${{ github.event.number }}
16 | package: diffusers
17 | languages: en ko
18 |
--------------------------------------------------------------------------------
/diffusers/.github/workflows/delete_doc_comment.yml:
--------------------------------------------------------------------------------
1 | name: Delete dev documentation
2 |
3 | on:
4 | pull_request:
5 | types: [ closed ]
6 |
7 |
8 | jobs:
9 | delete:
10 | uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main
11 | with:
12 | pr_number: ${{ github.event.number }}
13 | package: diffusers
14 |
--------------------------------------------------------------------------------
/diffusers/.github/workflows/pr_quality.yml:
--------------------------------------------------------------------------------
1 | name: Run code quality checks
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - main
7 | push:
8 | branches:
9 | - main
10 |
11 | concurrency:
12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
13 | cancel-in-progress: true
14 |
15 | jobs:
16 | check_code_quality:
17 | runs-on: ubuntu-latest
18 | steps:
19 | - uses: actions/checkout@v3
20 | - name: Set up Python
21 | uses: actions/setup-python@v4
22 | with:
23 | python-version: "3.7"
24 | - name: Install dependencies
25 | run: |
26 | python -m pip install --upgrade pip
27 | pip install .[quality]
28 | - name: Check quality
29 | run: |
30 | black --check examples tests src utils scripts
31 | ruff examples tests src utils scripts
32 | doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
33 |
34 | check_repository_consistency:
35 | runs-on: ubuntu-latest
36 | steps:
37 | - uses: actions/checkout@v3
38 | - name: Set up Python
39 | uses: actions/setup-python@v4
40 | with:
41 | python-version: "3.7"
42 | - name: Install dependencies
43 | run: |
44 | python -m pip install --upgrade pip
45 | pip install .[quality]
46 | - name: Check quality
47 | run: |
48 | python utils/check_copies.py
49 | python utils/check_dummies.py
50 | make deps_table_check_updated
51 |
--------------------------------------------------------------------------------
/diffusers/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | name: Stale Bot
2 |
3 | on:
4 | schedule:
5 | - cron: "0 15 * * *"
6 |
7 | jobs:
8 | close_stale_issues:
9 | name: Close Stale Issues
10 | if: github.repository == 'huggingface/diffusers'
11 | runs-on: ubuntu-latest
12 | env:
13 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
14 | steps:
15 | - uses: actions/checkout@v2
16 |
17 | - name: Setup Python
18 | uses: actions/setup-python@v1
19 | with:
20 | python-version: 3.7
21 |
22 | - name: Install requirements
23 | run: |
24 | pip install PyGithub
25 | - name: Close stale issues
26 | run: |
27 | python utils/stale.py
28 |
--------------------------------------------------------------------------------
/diffusers/.github/workflows/typos.yml:
--------------------------------------------------------------------------------
1 | name: Check typos
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | build:
8 | runs-on: ubuntu-latest
9 |
10 | steps:
11 | - uses: actions/checkout@v3
12 |
13 | - name: typos-action
14 | uses: crate-ci/typos@v1.12.4
15 |
--------------------------------------------------------------------------------
/diffusers/.gitignore:
--------------------------------------------------------------------------------
1 | # Initially taken from Github's Python gitignore file
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # tests and logs
12 | tests/fixtures/cached_*_text.txt
13 | logs/
14 | lightning_logs/
15 | lang_code_data/
16 |
17 | # Distribution / packaging
18 | .Python
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | wheels/
31 | *.egg-info/
32 | .installed.cfg
33 | *.egg
34 | MANIFEST
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .nox/
50 | .coverage
51 | .coverage.*
52 | .cache
53 | nosetests.xml
54 | coverage.xml
55 | *.cover
56 | .hypothesis/
57 | .pytest_cache/
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Django stuff:
64 | *.log
65 | local_settings.py
66 | db.sqlite3
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | .python-version
90 |
91 | # celery beat schedule file
92 | celerybeat-schedule
93 |
94 | # SageMath parsed files
95 | *.sage.py
96 |
97 | # Environments
98 | .env
99 | .venv
100 | env/
101 | venv/
102 | ENV/
103 | env.bak/
104 | venv.bak/
105 |
106 | # Spyder project settings
107 | .spyderproject
108 | .spyproject
109 |
110 | # Rope project settings
111 | .ropeproject
112 |
113 | # mkdocs documentation
114 | /site
115 |
116 | # mypy
117 | .mypy_cache/
118 | .dmypy.json
119 | dmypy.json
120 |
121 | # Pyre type checker
122 | .pyre/
123 |
124 | # vscode
125 | .vs
126 | .vscode
127 |
128 | # Pycharm
129 | .idea
130 |
131 | # TF code
132 | tensorflow_code
133 |
134 | # Models
135 | proc_data
136 |
137 | # examples
138 | runs
139 | /runs_old
140 | /wandb
141 | /examples/runs
142 | /examples/**/*.args
143 | /examples/rag/sweep
144 |
145 | # data
146 | /data
147 | serialization_dir
148 |
149 | # emacs
150 | *.*~
151 | debug.env
152 |
153 | # vim
154 | .*.swp
155 |
156 | #ctags
157 | tags
158 |
159 | # pre-commit
160 | .pre-commit*
161 |
162 | # .lock
163 | *.lock
164 |
165 | # DS_Store (MacOS)
166 | .DS_Store
167 | # RL pipelines may produce mp4 outputs
168 | *.mp4
169 |
170 | # dependencies
171 | /transformers
172 |
173 | # ruff
174 | .ruff_cache
175 |
176 | wandb
--------------------------------------------------------------------------------
/diffusers/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | title: 'Diffusers: State-of-the-art diffusion models'
3 | message: >-
4 | If you use this software, please cite it using the
5 | metadata from this file.
6 | type: software
7 | authors:
8 | - given-names: Patrick
9 | family-names: von Platen
10 | - given-names: Suraj
11 | family-names: Patil
12 | - given-names: Anton
13 | family-names: Lozhkov
14 | - given-names: Pedro
15 | family-names: Cuenca
16 | - given-names: Nathan
17 | family-names: Lambert
18 | - given-names: Kashif
19 | family-names: Rasul
20 | - given-names: Mishig
21 | family-names: Davaadorj
22 | - given-names: Thomas
23 | family-names: Wolf
24 | repository-code: 'https://github.com/huggingface/diffusers'
25 | abstract: >-
26 | Diffusers provides pretrained diffusion models across
27 | multiple modalities, such as vision and audio, and serves
28 | as a modular toolbox for inference and training of
29 | diffusion models.
30 | keywords:
31 | - deep-learning
32 | - pytorch
33 | - image-generation
34 | - diffusion
35 | - text2image
36 | - image2image
37 | - score-based-generative-modeling
38 | - stable-diffusion
39 | license: Apache-2.0
40 | version: 0.12.1
41 |
--------------------------------------------------------------------------------
/diffusers/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include src/diffusers/utils/model_card_template.md
3 |
--------------------------------------------------------------------------------
/diffusers/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples
2 |
3 | # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
4 | export PYTHONPATH = src
5 |
6 | check_dirs := examples scripts src tests utils
7 |
8 | modified_only_fixup:
9 | $(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
10 | @if test -n "$(modified_py_files)"; then \
11 | echo "Checking/fixing $(modified_py_files)"; \
12 | black $(modified_py_files); \
13 | ruff $(modified_py_files); \
14 | else \
15 | echo "No library .py files were modified"; \
16 | fi
17 |
18 | # Update src/diffusers/dependency_versions_table.py
19 |
20 | deps_table_update:
21 | @python setup.py deps_table_update
22 |
23 | deps_table_check_updated:
24 | @md5sum src/diffusers/dependency_versions_table.py > md5sum.saved
25 | @python setup.py deps_table_update
26 | @md5sum -c --quiet md5sum.saved || (printf "\nError: the version dependency table is outdated.\nPlease run 'make fixup' or 'make style' and commit the changes.\n\n" && exit 1)
27 | @rm md5sum.saved
28 |
29 | # autogenerating code
30 |
31 | autogenerate_code: deps_table_update
32 |
33 | # Check that the repo is in a good state
34 |
35 | repo-consistency:
36 | python utils/check_dummies.py
37 | python utils/check_repo.py
38 | python utils/check_inits.py
39 |
40 | # this target runs checks on all files
41 |
42 | quality:
43 | black --check $(check_dirs)
44 | ruff $(check_dirs)
45 | doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
46 | python utils/check_doc_toc.py
47 |
48 | # Format source code automatically and check is there are any problems left that need manual fixing
49 |
50 | extra_style_checks:
51 | python utils/custom_init_isort.py
52 | doc-builder style src/diffusers docs/source --max_len 119 --path_to_docs docs/source
53 | python utils/check_doc_toc.py --fix_and_overwrite
54 |
55 | # this target runs checks on all files and potentially modifies some of them
56 |
57 | style:
58 | black $(check_dirs)
59 | ruff $(check_dirs) --fix
60 | ${MAKE} autogenerate_code
61 | ${MAKE} extra_style_checks
62 |
63 | # Super fast fix and check target that only works on relevant modified files since the branch was made
64 |
65 | fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
66 |
67 | # Make marked copies of snippets of codes conform to the original
68 |
69 | fix-copies:
70 | python utils/check_copies.py --fix_and_overwrite
71 | python utils/check_dummies.py --fix_and_overwrite
72 |
73 | # Run tests for the library
74 |
75 | test:
76 | python -m pytest -n auto --dist=loadfile -s -v ./tests/
77 |
78 | # Run tests for examples
79 |
80 | test-examples:
81 | python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/
82 |
83 |
84 | # Release stuff
85 |
86 | pre-release:
87 | python utils/release.py
88 |
89 | pre-patch:
90 | python utils/release.py --patch
91 |
92 | post-release:
93 | python utils/release.py --post_release
94 |
95 | post-patch:
96 | python utils/release.py --post_release --patch
97 |
--------------------------------------------------------------------------------
/diffusers/_typos.toml:
--------------------------------------------------------------------------------
1 | # Files for typos
2 | # Instruction: https://github.com/marketplace/actions/typos-action#getting-started
3 |
4 | [default.extend-identifiers]
5 |
6 | [default.extend-words]
7 | NIN="NIN" # NIN is used in scripts/convert_ncsnpp_original_checkpoint_to_diffusers.py
8 | nd="np" # nd may be np (numpy)
9 | parms="parms" # parms is used in scripts/convert_original_stable_diffusion_to_diffusers.py
10 |
11 |
12 | [files]
13 | extend-exclude = ["_typos.toml"]
14 |
--------------------------------------------------------------------------------
/diffusers/docker/diffusers-flax-cpu/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:20.04
2 | LABEL maintainer="Hugging Face"
3 | LABEL repository="diffusers"
4 |
5 | ENV DEBIAN_FRONTEND=noninteractive
6 |
7 | RUN apt update && \
8 | apt install -y bash \
9 | build-essential \
10 | git \
11 | git-lfs \
12 | curl \
13 | ca-certificates \
14 | libsndfile1-dev \
15 | python3.8 \
16 | python3-pip \
17 | python3.8-venv && \
18 | rm -rf /var/lib/apt/lists
19 |
20 | # make sure to use venv
21 | RUN python3 -m venv /opt/venv
22 | ENV PATH="/opt/venv/bin:$PATH"
23 |
24 | # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
25 | # follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container
26 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
27 | python3 -m pip install --upgrade --no-cache-dir \
28 | clu \
29 | "jax[cpu]>=0.2.16,!=0.3.2" \
30 | "flax>=0.4.1" \
31 | "jaxlib>=0.1.65" && \
32 | python3 -m pip install --no-cache-dir \
33 | accelerate \
34 | datasets \
35 | hf-doc-builder \
36 | huggingface-hub \
37 | Jinja2 \
38 | librosa \
39 | numpy \
40 | scipy \
41 | tensorboard \
42 | transformers
43 |
44 | CMD ["/bin/bash"]
--------------------------------------------------------------------------------
/diffusers/docker/diffusers-flax-tpu/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:20.04
2 | LABEL maintainer="Hugging Face"
3 | LABEL repository="diffusers"
4 |
5 | ENV DEBIAN_FRONTEND=noninteractive
6 |
7 | RUN apt update && \
8 | apt install -y bash \
9 | build-essential \
10 | git \
11 | git-lfs \
12 | curl \
13 | ca-certificates \
14 | libsndfile1-dev \
15 | python3.8 \
16 | python3-pip \
17 | python3.8-venv && \
18 | rm -rf /var/lib/apt/lists
19 |
20 | # make sure to use venv
21 | RUN python3 -m venv /opt/venv
22 | ENV PATH="/opt/venv/bin:$PATH"
23 |
24 | # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
25 | # follow the instructions here: https://cloud.google.com/tpu/docs/run-in-container#train_a_jax_model_in_a_docker_container
26 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
27 | python3 -m pip install --no-cache-dir \
28 | "jax[tpu]>=0.2.16,!=0.3.2" \
29 | -f https://storage.googleapis.com/jax-releases/libtpu_releases.html && \
30 | python3 -m pip install --upgrade --no-cache-dir \
31 | clu \
32 | "flax>=0.4.1" \
33 | "jaxlib>=0.1.65" && \
34 | python3 -m pip install --no-cache-dir \
35 | accelerate \
36 | datasets \
37 | hf-doc-builder \
38 | huggingface-hub \
39 | Jinja2 \
40 | librosa \
41 | numpy \
42 | scipy \
43 | tensorboard \
44 | transformers
45 |
46 | CMD ["/bin/bash"]
--------------------------------------------------------------------------------
/diffusers/docker/diffusers-onnxruntime-cpu/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:20.04
2 | LABEL maintainer="Hugging Face"
3 | LABEL repository="diffusers"
4 |
5 | ENV DEBIAN_FRONTEND=noninteractive
6 |
7 | RUN apt update && \
8 | apt install -y bash \
9 | build-essential \
10 | git \
11 | git-lfs \
12 | curl \
13 | ca-certificates \
14 | libsndfile1-dev \
15 | python3.8 \
16 | python3-pip \
17 | python3.8-venv && \
18 | rm -rf /var/lib/apt/lists
19 |
20 | # make sure to use venv
21 | RUN python3 -m venv /opt/venv
22 | ENV PATH="/opt/venv/bin:$PATH"
23 |
24 | # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
25 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
26 | python3 -m pip install --no-cache-dir \
27 | torch \
28 | torchvision \
29 | torchaudio \
30 | onnxruntime \
31 | --extra-index-url https://download.pytorch.org/whl/cpu && \
32 | python3 -m pip install --no-cache-dir \
33 | accelerate \
34 | datasets \
35 | hf-doc-builder \
36 | huggingface-hub \
37 | Jinja2 \
38 | librosa \
39 | numpy \
40 | scipy \
41 | tensorboard \
42 | transformers
43 |
44 | CMD ["/bin/bash"]
--------------------------------------------------------------------------------
/diffusers/docker/diffusers-onnxruntime-cuda/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04
2 | LABEL maintainer="Hugging Face"
3 | LABEL repository="diffusers"
4 |
5 | ENV DEBIAN_FRONTEND=noninteractive
6 |
7 | RUN apt update && \
8 | apt install -y bash \
9 | build-essential \
10 | git \
11 | git-lfs \
12 | curl \
13 | ca-certificates \
14 | libsndfile1-dev \
15 | python3.8 \
16 | python3-pip \
17 | python3.8-venv && \
18 | rm -rf /var/lib/apt/lists
19 |
20 | # make sure to use venv
21 | RUN python3 -m venv /opt/venv
22 | ENV PATH="/opt/venv/bin:$PATH"
23 |
24 | # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
25 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
26 | python3 -m pip install --no-cache-dir \
27 | torch \
28 | torchvision \
29 | torchaudio \
30 | "onnxruntime-gpu>=1.13.1" \
31 | --extra-index-url https://download.pytorch.org/whl/cu117 && \
32 | python3 -m pip install --no-cache-dir \
33 | accelerate \
34 | datasets \
35 | hf-doc-builder \
36 | huggingface-hub \
37 | Jinja2 \
38 | librosa \
39 | numpy \
40 | scipy \
41 | tensorboard \
42 | transformers
43 |
44 | CMD ["/bin/bash"]
--------------------------------------------------------------------------------
/diffusers/docker/diffusers-pytorch-cpu/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:20.04
2 | LABEL maintainer="Hugging Face"
3 | LABEL repository="diffusers"
4 |
5 | ENV DEBIAN_FRONTEND=noninteractive
6 |
7 | RUN apt update && \
8 | apt install -y bash \
9 | build-essential \
10 | git \
11 | git-lfs \
12 | curl \
13 | ca-certificates \
14 | libsndfile1-dev \
15 | python3.8 \
16 | python3-pip \
17 | python3.8-venv && \
18 | rm -rf /var/lib/apt/lists
19 |
20 | # make sure to use venv
21 | RUN python3 -m venv /opt/venv
22 | ENV PATH="/opt/venv/bin:$PATH"
23 |
24 | # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
25 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
26 | python3 -m pip install --no-cache-dir \
27 | torch \
28 | torchvision \
29 | torchaudio \
30 | --extra-index-url https://download.pytorch.org/whl/cpu && \
31 | python3 -m pip install --no-cache-dir \
32 | accelerate \
33 | datasets \
34 | hf-doc-builder \
35 | huggingface-hub \
36 | Jinja2 \
37 | librosa \
38 | numpy \
39 | scipy \
40 | tensorboard \
41 | transformers
42 |
43 | CMD ["/bin/bash"]
--------------------------------------------------------------------------------
/diffusers/docker/diffusers-pytorch-cuda/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:11.7.1-cudnn8-runtime-ubuntu20.04
2 | LABEL maintainer="Hugging Face"
3 | LABEL repository="diffusers"
4 |
5 | ENV DEBIAN_FRONTEND=noninteractive
6 |
7 | RUN apt update && \
8 | apt install -y bash \
9 | build-essential \
10 | git \
11 | git-lfs \
12 | curl \
13 | ca-certificates \
14 | libsndfile1-dev \
15 | python3.8 \
16 | python3-pip \
17 | python3.8-venv && \
18 | rm -rf /var/lib/apt/lists
19 |
20 | # make sure to use venv
21 | RUN python3 -m venv /opt/venv
22 | ENV PATH="/opt/venv/bin:$PATH"
23 |
24 | # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
25 | RUN python3 -m pip install --no-cache-dir --upgrade pip && \
26 | python3 -m pip install --no-cache-dir \
27 | torch \
28 | torchvision \
29 | torchaudio \
30 | python3 -m pip install --no-cache-dir \
31 | accelerate \
32 | datasets \
33 | hf-doc-builder \
34 | huggingface-hub \
35 | Jinja2 \
36 | librosa \
37 | numpy \
38 | scipy \
39 | tensorboard \
40 | transformers
41 |
42 | CMD ["/bin/bash"]
43 |
--------------------------------------------------------------------------------
/diffusers/docs/source/_config.py:
--------------------------------------------------------------------------------
1 | # docstyle-ignore
2 | INSTALL_CONTENT = """
3 | # Diffusers installation
4 | ! pip install diffusers transformers datasets accelerate
5 | # To install from source instead of the last release, comment the command above and uncomment the following one.
6 | # ! pip install git+https://github.com/huggingface/diffusers.git
7 | """
8 |
9 | notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/configuration.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Configuration
14 |
15 | Schedulers from [`~schedulers.scheduling_utils.SchedulerMixin`] and models from [`ModelMixin`] inherit from [`ConfigMixin`] which conveniently takes care of storing all the parameters that are
16 | passed to their respective `__init__` methods in a JSON-configuration file.
17 |
18 | ## ConfigMixin
19 |
20 | [[autodoc]] ConfigMixin
21 | - load_config
22 | - from_config
23 | - save_config
24 | - to_json_file
25 | - to_json_string
26 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/diffusion_pipeline.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Pipelines
14 |
15 | The [`DiffusionPipeline`] is the easiest way to load any pretrained diffusion pipeline from the [Hub](https://huggingface.co/models?library=diffusers) and to use it in inference.
16 |
17 |
18 |
19 | One should not use the Diffusion Pipeline class for training or fine-tuning a diffusion model. Individual
20 | components of diffusion pipelines are usually trained individually, so we suggest to directly work
21 | with [`UNetModel`] and [`UNetConditionModel`].
22 |
23 |
24 |
25 | Any diffusion pipeline that is loaded with [`~DiffusionPipeline.from_pretrained`] will automatically
26 | detect the pipeline type, *e.g.* [`StableDiffusionPipeline`] and consequently load each component of the
27 | pipeline and pass them into the `__init__` function of the pipeline, *e.g.* [`~StableDiffusionPipeline.__init__`].
28 |
29 | Any pipeline object can be saved locally with [`~DiffusionPipeline.save_pretrained`].
30 |
31 | ## DiffusionPipeline
32 | [[autodoc]] DiffusionPipeline
33 | - all
34 | - __call__
35 | - device
36 | - to
37 | - components
38 |
39 | ## ImagePipelineOutput
40 | By default diffusion pipelines return an object of class
41 |
42 | [[autodoc]] pipelines.ImagePipelineOutput
43 |
44 | ## AudioPipelineOutput
45 | By default diffusion pipelines return an object of class
46 |
47 | [[autodoc]] pipelines.AudioPipelineOutput
48 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/experimental/rl.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # TODO
14 |
15 | Coming soon!
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/loaders.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Loaders
14 |
15 | There are many ways to train adapter neural networks for diffusion models, such as
16 | - [Textual Inversion](./training/text_inversion.mdx)
17 | - [LoRA](https://github.com/cloneofsimo/lora)
18 | - [Hypernetworks](https://arxiv.org/abs/1609.09106)
19 |
20 | Such adapter neural networks often only consist of a fraction of the number of weights compared
21 | to the pretrained model and as such are very portable. The Diffusers library offers an easy-to-use
22 | API to load such adapter neural networks via the [`loaders.py` module](https://github.com/huggingface/diffusers/blob/main/src/diffusers/loaders.py).
23 |
24 | **Note**: This module is still highly experimental and prone to future changes.
25 |
26 | ## LoaderMixins
27 |
28 | ### UNet2DConditionLoadersMixin
29 |
30 | [[autodoc]] loaders.UNet2DConditionLoadersMixin
31 |
32 | ### TextualInversionLoaderMixin
33 |
34 | [[autodoc]] loaders.TextualInversionLoaderMixin
35 |
36 | ### LoraLoaderMixin
37 |
38 | [[autodoc]] loaders.LoraLoaderMixin
39 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/outputs.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # BaseOutputs
14 |
15 | All models have outputs that are instances of subclasses of [`~utils.BaseOutput`]. Those are
16 | data structures containing all the information returned by the model, but that can also be used as tuples or
17 | dictionaries.
18 |
19 | Let's see how this looks in an example:
20 |
21 | ```python
22 | from diffusers import DDIMPipeline
23 |
24 | pipeline = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32")
25 | outputs = pipeline()
26 | ```
27 |
28 | The `outputs` object is a [`~pipelines.ImagePipelineOutput`], as we can see in the
29 | documentation of that class below, it means it has an image attribute.
30 |
31 | You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you will get `None`:
32 |
33 | ```python
34 | outputs.images
35 | ```
36 |
37 | or via keyword lookup
38 |
39 | ```python
40 | outputs["images"]
41 | ```
42 |
43 | When considering our `outputs` object as tuple, it only considers the attributes that don't have `None` values.
44 | Here for instance, we could retrieve images via indexing:
45 |
46 | ```python
47 | outputs[:1]
48 | ```
49 |
50 | which will return the tuple `(outputs.images)` for instance.
51 |
52 | ## BaseOutput
53 |
54 | [[autodoc]] utils.BaseOutput
55 | - to_tuple
56 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/dance_diffusion.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Dance Diffusion
14 |
15 | ## Overview
16 |
17 | [Dance Diffusion](https://github.com/Harmonai-org/sample-generator) by Zach Evans.
18 |
19 | Dance Diffusion is the first in a suite of generative audio tools for producers and musicians to be released by Harmonai.
20 | For more info or to get involved in the development of these tools, please visit https://harmonai.org and fill out the form on the front page.
21 |
22 | The original codebase of this implementation can be found [here](https://github.com/Harmonai-org/sample-generator).
23 |
24 | ## Available Pipelines:
25 |
26 | | Pipeline | Tasks | Colab
27 | |---|---|:---:|
28 | | [pipeline_dance_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py) | *Unconditional Audio Generation* | - |
29 |
30 |
31 | ## DanceDiffusionPipeline
32 | [[autodoc]] DanceDiffusionPipeline
33 | - all
34 | - __call__
35 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/ddim.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # DDIM
14 |
15 | ## Overview
16 |
17 | [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) (DDIM) by Jiaming Song, Chenlin Meng and Stefano Ermon.
18 |
19 | The abstract of the paper is the following:
20 |
21 | Denoising diffusion probabilistic models (DDPMs) have achieved high quality image generation without adversarial training, yet they require simulating a Markov chain for many steps to produce a sample. To accelerate sampling, we present denoising diffusion implicit models (DDIMs), a more efficient class of iterative implicit probabilistic models with the same training procedure as DDPMs. In DDPMs, the generative process is defined as the reverse of a Markovian diffusion process. We construct a class of non-Markovian diffusion processes that lead to the same training objective, but whose reverse process can be much faster to sample from. We empirically demonstrate that DDIMs can produce high quality samples 10× to 50× faster in terms of wall-clock time compared to DDPMs, allow us to trade off computation for sample quality, and can perform semantically meaningful image interpolation directly in the latent space.
22 |
23 | The original codebase of this paper can be found here: [ermongroup/ddim](https://github.com/ermongroup/ddim).
24 | For questions, feel free to contact the author on [tsong.me](https://tsong.me/).
25 |
26 | ## Available Pipelines:
27 |
28 | | Pipeline | Tasks | Colab
29 | |---|---|:---:|
30 | | [pipeline_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/ddim/pipeline_ddim.py) | *Unconditional Image Generation* | - |
31 |
32 |
33 | ## DDIMPipeline
34 | [[autodoc]] DDIMPipeline
35 | - all
36 | - __call__
37 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/ddpm.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # DDPM
14 |
15 | ## Overview
16 |
17 | [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239)
18 | (DDPM) by Jonathan Ho, Ajay Jain and Pieter Abbeel proposes the diffusion based model of the same name, but in the context of the 🤗 Diffusers library, DDPM refers to the discrete denoising scheduler from the paper as well as the pipeline.
19 |
20 | The abstract of the paper is the following:
21 |
22 | We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional CIFAR10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art FID score of 3.17. On 256x256 LSUN, we obtain sample quality similar to ProgressiveGAN.
23 |
24 | The original codebase of this paper can be found [here](https://github.com/hojonathanho/diffusion).
25 |
26 |
27 | ## Available Pipelines:
28 |
29 | | Pipeline | Tasks | Colab
30 | |---|---|:---:|
31 | | [pipeline_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/ddpm/pipeline_ddpm.py) | *Unconditional Image Generation* | - |
32 |
33 |
34 | # DDPMPipeline
35 | [[autodoc]] DDPMPipeline
36 | - all
37 | - __call__
38 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/dit.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Scalable Diffusion Models with Transformers (DiT)
14 |
15 | ## Overview
16 |
17 | [Scalable Diffusion Models with Transformers](https://arxiv.org/abs/2212.09748) (DiT) by William Peebles and Saining Xie.
18 |
19 | The abstract of the paper is the following:
20 |
21 | *We explore a new class of diffusion models based on the transformer architecture. We train latent diffusion models of images, replacing the commonly-used U-Net backbone with a transformer that operates on latent patches. We analyze the scalability of our Diffusion Transformers (DiTs) through the lens of forward pass complexity as measured by Gflops. We find that DiTs with higher Gflops -- through increased transformer depth/width or increased number of input tokens -- consistently have lower FID. In addition to possessing good scalability properties, our largest DiT-XL/2 models outperform all prior diffusion models on the class-conditional ImageNet 512x512 and 256x256 benchmarks, achieving a state-of-the-art FID of 2.27 on the latter.*
22 |
23 | The original codebase of this paper can be found here: [facebookresearch/dit](https://github.com/facebookresearch/dit).
24 |
25 | ## Available Pipelines:
26 |
27 | | Pipeline | Tasks | Colab
28 | |---|---|:---:|
29 | | [pipeline_dit.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/dit/pipeline_dit.py) | *Conditional Image Generation* | - |
30 |
31 |
32 | ## Usage example
33 |
34 | ```python
35 | from diffusers import DiTPipeline, DPMSolverMultistepScheduler
36 | import torch
37 |
38 | pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-256", torch_dtype=torch.float16)
39 | pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
40 | pipe = pipe.to("cuda")
41 |
42 | # pick words from Imagenet class labels
43 | pipe.labels # to print all available words
44 |
45 | # pick words that exist in ImageNet
46 | words = ["white shark", "umbrella"]
47 |
48 | class_ids = pipe.get_label_ids(words)
49 |
50 | generator = torch.manual_seed(33)
51 | output = pipe(class_labels=class_ids, num_inference_steps=25, generator=generator)
52 |
53 | image = output.images[0] # label 'white shark'
54 | ```
55 |
56 | ## DiTPipeline
57 | [[autodoc]] DiTPipeline
58 | - all
59 | - __call__
60 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/latent_diffusion_uncond.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Unconditional Latent Diffusion
14 |
15 | ## Overview
16 |
17 | Unconditional Latent Diffusion was proposed in [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer.
18 |
19 | The abstract of the paper is the following:
20 |
21 | *By decomposing the image formation process into a sequential application of denoising autoencoders, diffusion models (DMs) achieve state-of-the-art synthesis results on image data and beyond. Additionally, their formulation allows for a guiding mechanism to control the image generation process without retraining. However, since these models typically operate directly in pixel space, optimization of powerful DMs often consumes hundreds of GPU days and inference is expensive due to sequential evaluations. To enable DM training on limited computational resources while retaining their quality and flexibility, we apply them in the latent space of powerful pretrained autoencoders. In contrast to previous work, training diffusion models on such a representation allows for the first time to reach a near-optimal point between complexity reduction and detail preservation, greatly boosting visual fidelity. By introducing cross-attention layers into the model architecture, we turn diffusion models into powerful and flexible generators for general conditioning inputs such as text or bounding boxes and high-resolution synthesis becomes possible in a convolutional manner. Our latent diffusion models (LDMs) achieve a new state of the art for image inpainting and highly competitive performance on various tasks, including unconditional image generation, semantic scene synthesis, and super-resolution, while significantly reducing computational requirements compared to pixel-based DMs.*
22 |
23 | The original codebase can be found [here](https://github.com/CompVis/latent-diffusion).
24 |
25 | ## Tips:
26 |
27 | -
28 | -
29 | -
30 |
31 | ## Available Pipelines:
32 |
33 | | Pipeline | Tasks | Colab
34 | |---|---|:---:|
35 | | [pipeline_latent_diffusion_uncond.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py) | *Unconditional Image Generation* | - |
36 |
37 | ## Examples:
38 |
39 | ## LDMPipeline
40 | [[autodoc]] LDMPipeline
41 | - all
42 | - __call__
43 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/pndm.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # PNDM
14 |
15 | ## Overview
16 |
17 | [Pseudo Numerical methods for Diffusion Models on manifolds](https://arxiv.org/abs/2202.09778) (PNDM) by Luping Liu, Yi Ren, Zhijie Lin and Zhou Zhao.
18 |
19 | The abstract of the paper is the following:
20 |
21 | Denoising Diffusion Probabilistic Models (DDPMs) can generate high-quality samples such as image and audio samples. However, DDPMs require hundreds to thousands of iterations to produce final samples. Several prior works have successfully accelerated DDPMs through adjusting the variance schedule (e.g., Improved Denoising Diffusion Probabilistic Models) or the denoising equation (e.g., Denoising Diffusion Implicit Models (DDIMs)). However, these acceleration methods cannot maintain the quality of samples and even introduce new noise at a high speedup rate, which limit their practicability. To accelerate the inference process while keeping the sample quality, we provide a fresh perspective that DDPMs should be treated as solving differential equations on manifolds. Under such a perspective, we propose pseudo numerical methods for diffusion models (PNDMs). Specifically, we figure out how to solve differential equations on manifolds and show that DDIMs are simple cases of pseudo numerical methods. We change several classical numerical methods to corresponding pseudo numerical methods and find that the pseudo linear multi-step method is the best in most situations. According to our experiments, by directly using pre-trained models on Cifar10, CelebA and LSUN, PNDMs can generate higher quality synthetic images with only 50 steps compared with 1000-step DDIMs (20x speedup), significantly outperform DDIMs with 250 steps (by around 0.4 in FID) and have good generalization on different variance schedules.
22 |
23 | The original codebase can be found [here](https://github.com/luping-liu/PNDM).
24 |
25 | ## Available Pipelines:
26 |
27 | | Pipeline | Tasks | Colab
28 | |---|---|:---:|
29 | | [pipeline_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pndm/pipeline_pndm.py) | *Unconditional Image Generation* | - |
30 |
31 |
32 | ## PNDMPipeline
33 | [[autodoc]] PNDMPipeline
34 | - all
35 | - __call__
36 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/stable_diffusion/depth2img.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Depth-to-Image Generation
14 |
15 | ## StableDiffusionDepth2ImgPipeline
16 |
17 | The depth-guided stable diffusion model was created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/), and [LAION](https://laion.ai/), as part of Stable Diffusion 2.0. It uses [MiDas](https://github.com/isl-org/MiDaS) to infer depth based on an image.
18 |
19 | [`StableDiffusionDepth2ImgPipeline`] lets you pass a text prompt and an initial image to condition the generation of new images as well as a `depth_map` to preserve the images’ structure.
20 |
21 | The original codebase can be found here:
22 | - *Stable Diffusion v2*: [Stability-AI/stablediffusion](https://github.com/Stability-AI/stablediffusion#depth-conditional-stable-diffusion)
23 |
24 | Available Checkpoints are:
25 | - *stable-diffusion-2-depth*: [stabilityai/stable-diffusion-2-depth](https://huggingface.co/stabilityai/stable-diffusion-2-depth)
26 |
27 | [[autodoc]] StableDiffusionDepth2ImgPipeline
28 | - all
29 | - __call__
30 | - enable_attention_slicing
31 | - disable_attention_slicing
32 | - enable_xformers_memory_efficient_attention
33 | - disable_xformers_memory_efficient_attention
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/stable_diffusion/image_variation.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Image Variation
14 |
15 | ## StableDiffusionImageVariationPipeline
16 |
17 | [`StableDiffusionImageVariationPipeline`] lets you generate variations from an input image using Stable Diffusion. It uses a fine-tuned version of Stable Diffusion model, trained by [Justin Pinkney](https://www.justinpinkney.com/) (@Buntworthy) at [Lambda](https://lambdalabs.com/).
18 |
19 | The original codebase can be found here:
20 | [Stable Diffusion Image Variations](https://github.com/LambdaLabsML/lambda-diffusers#stable-diffusion-image-variations)
21 |
22 | Available Checkpoints are:
23 | - *sd-image-variations-diffusers*: [lambdalabs/sd-image-variations-diffusers](https://huggingface.co/lambdalabs/sd-image-variations-diffusers)
24 |
25 | [[autodoc]] StableDiffusionImageVariationPipeline
26 | - all
27 | - __call__
28 | - enable_attention_slicing
29 | - disable_attention_slicing
30 | - enable_xformers_memory_efficient_attention
31 | - disable_xformers_memory_efficient_attention
32 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/stable_diffusion/img2img.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Image-to-Image Generation
14 |
15 | ## StableDiffusionImg2ImgPipeline
16 |
17 | The Stable Diffusion model was created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/), [runway](https://github.com/runwayml), and [LAION](https://laion.ai/). The [`StableDiffusionImg2ImgPipeline`] lets you pass a text prompt and an initial image to condition the generation of new images using Stable Diffusion.
18 |
19 | The original codebase can be found here: [CampVis/stable-diffusion](https://github.com/CompVis/stable-diffusion/blob/main/scripts/img2img.py)
20 |
21 | [`StableDiffusionImg2ImgPipeline`] is compatible with all Stable Diffusion checkpoints for [Text-to-Image](./text2img)
22 |
23 | The pipeline uses the diffusion-denoising mechanism proposed by SDEdit ([SDEdit: Guided Image Synthesis and Editing with Stochastic Differential Equations](https://arxiv.org/abs/2108.01073)
24 | proposed by Chenlin Meng, Yutong He, Yang Song, Jiaming Song, Jiajun Wu, Jun-Yan Zhu, Stefano Ermon).
25 |
26 | [[autodoc]] StableDiffusionImg2ImgPipeline
27 | - all
28 | - __call__
29 | - enable_attention_slicing
30 | - disable_attention_slicing
31 | - enable_xformers_memory_efficient_attention
32 | - disable_xformers_memory_efficient_attention
33 |
34 | [[autodoc]] FlaxStableDiffusionImg2ImgPipeline
35 | - all
36 | - __call__
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/stable_diffusion/inpaint.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Text-Guided Image Inpainting
14 |
15 | ## StableDiffusionInpaintPipeline
16 |
17 | The Stable Diffusion model was created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/), [runway](https://github.com/runwayml), and [LAION](https://laion.ai/). The [`StableDiffusionInpaintPipeline`] lets you edit specific parts of an image by providing a mask and a text prompt using Stable Diffusion.
18 |
19 | The original codebase can be found here:
20 | - *Stable Diffusion V1*: [CampVis/stable-diffusion](https://github.com/runwayml/stable-diffusion#inpainting-with-stable-diffusion)
21 | - *Stable Diffusion V2*: [Stability-AI/stablediffusion](https://github.com/Stability-AI/stablediffusion#image-inpainting-with-stable-diffusion)
22 |
23 | Available checkpoints are:
24 | - *stable-diffusion-inpainting (512x512 resolution)*: [runwayml/stable-diffusion-inpainting](https://huggingface.co/runwayml/stable-diffusion-inpainting)
25 | - *stable-diffusion-2-inpainting (512x512 resolution)*: [stabilityai/stable-diffusion-2-inpainting](https://huggingface.co/stabilityai/stable-diffusion-2-inpainting)
26 |
27 | [[autodoc]] StableDiffusionInpaintPipeline
28 | - all
29 | - __call__
30 | - enable_attention_slicing
31 | - disable_attention_slicing
32 | - enable_xformers_memory_efficient_attention
33 | - disable_xformers_memory_efficient_attention
34 |
35 | [[autodoc]] FlaxStableDiffusionInpaintPipeline
36 | - all
37 | - __call__
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/stable_diffusion/latent_upscale.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Stable Diffusion Latent Upscaler
14 |
15 | ## StableDiffusionLatentUpscalePipeline
16 |
17 | The Stable Diffusion Latent Upscaler model was created by [Katherine Crowson](https://github.com/crowsonkb/k-diffusion) in collaboration with [Stability AI](https://stability.ai/). It can be used on top of any [`StableDiffusionUpscalePipeline`] checkpoint to enhance its output image resolution by a factor of 2.
18 |
19 | A notebook that demonstrates the original implementation can be found here:
20 | - [Stable Diffusion Upscaler Demo](https://colab.research.google.com/drive/1o1qYJcFeywzCIdkfKJy7cTpgZTCM2EI4)
21 |
22 | Available Checkpoints are:
23 | - *stabilityai/latent-upscaler*: [stabilityai/sd-x2-latent-upscaler](https://huggingface.co/stabilityai/sd-x2-latent-upscaler)
24 |
25 |
26 | [[autodoc]] StableDiffusionLatentUpscalePipeline
27 | - all
28 | - __call__
29 | - enable_sequential_cpu_offload
30 | - enable_attention_slicing
31 | - disable_attention_slicing
32 | - enable_xformers_memory_efficient_attention
33 | - disable_xformers_memory_efficient_attention
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/stable_diffusion/text2img.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Text-to-Image Generation
14 |
15 | ## StableDiffusionPipeline
16 |
17 | The Stable Diffusion model was created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/), [runway](https://github.com/runwayml), and [LAION](https://laion.ai/). The [`StableDiffusionPipeline`] is capable of generating photo-realistic images given any text input using Stable Diffusion.
18 |
19 | The original codebase can be found here:
20 | - *Stable Diffusion V1*: [CompVis/stable-diffusion](https://github.com/CompVis/stable-diffusion)
21 | - *Stable Diffusion v2*: [Stability-AI/stablediffusion](https://github.com/Stability-AI/stablediffusion)
22 |
23 | Available Checkpoints are:
24 | - *stable-diffusion-v1-4 (512x512 resolution)* [CompVis/stable-diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4)
25 | - *stable-diffusion-v1-5 (512x512 resolution)* [runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)
26 | - *stable-diffusion-2-base (512x512 resolution)*: [stabilityai/stable-diffusion-2-base](https://huggingface.co/stabilityai/stable-diffusion-2-base)
27 | - *stable-diffusion-2 (768x768 resolution)*: [stabilityai/stable-diffusion-2](https://huggingface.co/stabilityai/stable-diffusion-2)
28 | - *stable-diffusion-2-1-base (512x512 resolution)* [stabilityai/stable-diffusion-2-1-base](https://huggingface.co/stabilityai/stable-diffusion-2-1-base)
29 | - *stable-diffusion-2-1 (768x768 resolution)*: [stabilityai/stable-diffusion-2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1)
30 |
31 | [[autodoc]] StableDiffusionPipeline
32 | - all
33 | - __call__
34 | - enable_attention_slicing
35 | - disable_attention_slicing
36 | - enable_vae_slicing
37 | - disable_vae_slicing
38 | - enable_xformers_memory_efficient_attention
39 | - disable_xformers_memory_efficient_attention
40 | - enable_vae_tiling
41 | - disable_vae_tiling
42 |
43 | [[autodoc]] FlaxStableDiffusionPipeline
44 | - all
45 | - __call__
46 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/stable_diffusion/upscale.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Super-Resolution
14 |
15 | ## StableDiffusionUpscalePipeline
16 |
17 | The upscaler diffusion model was created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/), and [LAION](https://laion.ai/), as part of Stable Diffusion 2.0. [`StableDiffusionUpscalePipeline`] can be used to enhance the resolution of input images by a factor of 4.
18 |
19 | The original codebase can be found here:
20 | - *Stable Diffusion v2*: [Stability-AI/stablediffusion](https://github.com/Stability-AI/stablediffusion#image-upscaling-with-stable-diffusion)
21 |
22 | Available Checkpoints are:
23 | - *stabilityai/stable-diffusion-x4-upscaler (x4 resolution resolution)*: [stable-diffusion-x4-upscaler](https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler)
24 |
25 |
26 | [[autodoc]] StableDiffusionUpscalePipeline
27 | - all
28 | - __call__
29 | - enable_attention_slicing
30 | - disable_attention_slicing
31 | - enable_xformers_memory_efficient_attention
32 | - disable_xformers_memory_efficient_attention
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/stochastic_karras_ve.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Stochastic Karras VE
14 |
15 | ## Overview
16 |
17 | [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364) by Tero Karras, Miika Aittala, Timo Aila and Samuli Laine.
18 |
19 | The abstract of the paper is the following:
20 |
21 | We argue that the theory and practice of diffusion-based generative models are currently unnecessarily convoluted and seek to remedy the situation by presenting a design space that clearly separates the concrete design choices. This lets us identify several changes to both the sampling and training processes, as well as preconditioning of the score networks. Together, our improvements yield new state-of-the-art FID of 1.79 for CIFAR-10 in a class-conditional setting and 1.97 in an unconditional setting, with much faster sampling (35 network evaluations per image) than prior designs. To further demonstrate their modular nature, we show that our design changes dramatically improve both the efficiency and quality obtainable with pre-trained score networks from previous work, including improving the FID of an existing ImageNet-64 model from 2.07 to near-SOTA 1.55.
22 |
23 | This pipeline implements the Stochastic sampling tailored to the Variance-Expanding (VE) models.
24 |
25 |
26 | ## Available Pipelines:
27 |
28 | | Pipeline | Tasks | Colab
29 | |---|---|:---:|
30 | | [pipeline_stochastic_karras_ve.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py) | *Unconditional Image Generation* | - |
31 |
32 |
33 | ## KarrasVePipeline
34 | [[autodoc]] KarrasVePipeline
35 | - all
36 | - __call__
37 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/unclip.mdx:
--------------------------------------------------------------------------------
1 |
9 |
10 | # unCLIP
11 |
12 | ## Overview
13 |
14 | [Hierarchical Text-Conditional Image Generation with CLIP Latents](https://arxiv.org/abs/2204.06125) by Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, Mark Chen
15 |
16 | The abstract of the paper is the following:
17 |
18 | Contrastive models like CLIP have been shown to learn robust representations of images that capture both semantics and style. To leverage these representations for image generation, we propose a two-stage model: a prior that generates a CLIP image embedding given a text caption, and a decoder that generates an image conditioned on the image embedding. We show that explicitly generating image representations improves image diversity with minimal loss in photorealism and caption similarity. Our decoders conditioned on image representations can also produce variations of an image that preserve both its semantics and style, while varying the non-essential details absent from the image representation. Moreover, the joint embedding space of CLIP enables language-guided image manipulations in a zero-shot fashion. We use diffusion models for the decoder and experiment with both autoregressive and diffusion models for the prior, finding that the latter are computationally more efficient and produce higher-quality samples.
19 |
20 | The unCLIP model in diffusers comes from kakaobrain's karlo and the original codebase can be found [here](https://github.com/kakaobrain/karlo). Additionally, lucidrains has a DALL-E 2 recreation [here](https://github.com/lucidrains/DALLE2-pytorch).
21 |
22 | ## Available Pipelines:
23 |
24 | | Pipeline | Tasks | Colab
25 | |---|---|:---:|
26 | | [pipeline_unclip.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/unclip/pipeline_unclip.py) | *Text-to-Image Generation* | - |
27 | | [pipeline_unclip_image_variation.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/unclip/pipeline_unclip_image_variation.py) | *Image-Guided Image Generation* | - |
28 |
29 |
30 | ## UnCLIPPipeline
31 | [[autodoc]] UnCLIPPipeline
32 | - all
33 | - __call__
34 |
35 | [[autodoc]] UnCLIPImageVariationPipeline
36 | - all
37 | - __call__
38 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/pipelines/vq_diffusion.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # VQDiffusion
14 |
15 | ## Overview
16 |
17 | [Vector Quantized Diffusion Model for Text-to-Image Synthesis](https://arxiv.org/abs/2111.14822) by Shuyang Gu, Dong Chen, Jianmin Bao, Fang Wen, Bo Zhang, Dongdong Chen, Lu Yuan, Baining Guo
18 |
19 | The abstract of the paper is the following:
20 |
21 | We present the vector quantized diffusion (VQ-Diffusion) model for text-to-image generation. This method is based on a vector quantized variational autoencoder (VQ-VAE) whose latent space is modeled by a conditional variant of the recently developed Denoising Diffusion Probabilistic Model (DDPM). We find that this latent-space method is well-suited for text-to-image generation tasks because it not only eliminates the unidirectional bias with existing methods but also allows us to incorporate a mask-and-replace diffusion strategy to avoid the accumulation of errors, which is a serious problem with existing methods. Our experiments show that the VQ-Diffusion produces significantly better text-to-image generation results when compared with conventional autoregressive (AR) models with similar numbers of parameters. Compared with previous GAN-based text-to-image methods, our VQ-Diffusion can handle more complex scenes and improve the synthesized image quality by a large margin. Finally, we show that the image generation computation in our method can be made highly efficient by reparameterization. With traditional AR methods, the text-to-image generation time increases linearly with the output image resolution and hence is quite time consuming even for normal size images. The VQ-Diffusion allows us to achieve a better trade-off between quality and speed. Our experiments indicate that the VQ-Diffusion model with the reparameterization is fifteen times faster than traditional AR methods while achieving a better image quality.
22 |
23 | The original codebase can be found [here](https://github.com/microsoft/VQ-Diffusion).
24 |
25 | ## Available Pipelines:
26 |
27 | | Pipeline | Tasks | Colab
28 | |---|---|:---:|
29 | | [pipeline_vq_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/vq_diffusion/pipeline_vq_diffusion.py) | *Text-to-Image Generation* | - |
30 |
31 |
32 | ## VQDiffusionPipeline
33 | [[autodoc]] VQDiffusionPipeline
34 | - all
35 | - __call__
36 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/ddim.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Denoising Diffusion Implicit Models (DDIM)
14 |
15 | ## Overview
16 |
17 | [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) (DDIM) by Jiaming Song, Chenlin Meng and Stefano Ermon.
18 |
19 | The abstract of the paper is the following:
20 |
21 | Denoising diffusion probabilistic models (DDPMs) have achieved high quality image generation without adversarial training, yet they require simulating a Markov chain for many steps to produce a sample. To accelerate sampling, we present denoising diffusion implicit models (DDIMs), a more efficient class of iterative implicit probabilistic models with the same training procedure as DDPMs. In DDPMs, the generative process is defined as the reverse of a Markovian diffusion process. We construct a class of non-Markovian diffusion processes that lead to the same training objective, but whose reverse process can be much faster to sample from. We empirically demonstrate that DDIMs can produce high quality samples 10× to 50× faster in terms of wall-clock time compared to DDPMs, allow us to trade off computation for sample quality, and can perform semantically meaningful image interpolation directly in the latent space.
22 |
23 | The original codebase of this paper can be found here: [ermongroup/ddim](https://github.com/ermongroup/ddim).
24 | For questions, feel free to contact the author on [tsong.me](https://tsong.me/).
25 |
26 | ## DDIMScheduler
27 | [[autodoc]] DDIMScheduler
28 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/ddim_inverse.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Inverse Denoising Diffusion Implicit Models (DDIMInverse)
14 |
15 | ## Overview
16 |
17 | This scheduler is the inverted scheduler of [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) (DDIM) by Jiaming Song, Chenlin Meng and Stefano Ermon.
18 | The implementation is mostly based on the DDIM inversion definition of [Null-text Inversion for Editing Real Images using Guided Diffusion Models](https://arxiv.org/pdf/2211.09794.pdf)
19 |
20 | ## DDIMInverseScheduler
21 | [[autodoc]] DDIMInverseScheduler
22 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/ddpm.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Denoising Diffusion Probabilistic Models (DDPM)
14 |
15 | ## Overview
16 |
17 | [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239)
18 | (DDPM) by Jonathan Ho, Ajay Jain and Pieter Abbeel proposes the diffusion based model of the same name, but in the context of the 🤗 Diffusers library, DDPM refers to the discrete denoising scheduler from the paper as well as the pipeline.
19 |
20 | The abstract of the paper is the following:
21 |
22 | We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional CIFAR10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art FID score of 3.17. On 256x256 LSUN, we obtain sample quality similar to ProgressiveGAN.
23 |
24 | The original paper can be found [here](https://arxiv.org/abs/2010.02502).
25 |
26 | ## DDPMScheduler
27 | [[autodoc]] DDPMScheduler
28 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/deis.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # DEIS
14 |
15 | Fast Sampling of Diffusion Models with Exponential Integrator.
16 |
17 | ## Overview
18 |
19 | Original paper can be found [here](https://arxiv.org/abs/2204.13902). The original implementation can be found [here](https://github.com/qsh-zh/deis).
20 |
21 | ## DEISMultistepScheduler
22 | [[autodoc]] DEISMultistepScheduler
23 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/dpm_discrete.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # DPM Discrete Scheduler inspired by Karras et. al paper
14 |
15 | ## Overview
16 |
17 | Inspired by [Karras et. al](https://arxiv.org/abs/2206.00364). Scheduler ported from @crowsonkb's https://github.com/crowsonkb/k-diffusion library:
18 |
19 | All credit for making this scheduler work goes to [Katherine Crowson](https://github.com/crowsonkb/)
20 |
21 | ## KDPM2DiscreteScheduler
22 | [[autodoc]] KDPM2DiscreteScheduler
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/dpm_discrete_ancestral.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # DPM Discrete Scheduler with ancestral sampling inspired by Karras et. al paper
14 |
15 | ## Overview
16 |
17 | Inspired by [Karras et. al](https://arxiv.org/abs/2206.00364). Scheduler ported from @crowsonkb's https://github.com/crowsonkb/k-diffusion library:
18 |
19 | All credit for making this scheduler work goes to [Katherine Crowson](https://github.com/crowsonkb/)
20 |
21 | ## KDPM2AncestralDiscreteScheduler
22 | [[autodoc]] KDPM2AncestralDiscreteScheduler
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/euler.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Euler scheduler
14 |
15 | ## Overview
16 |
17 | Euler scheduler (Algorithm 2) from the paper [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364) by Karras et al. (2022). Based on the original [k-diffusion](https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L51) implementation by Katherine Crowson.
18 | Fast scheduler which often times generates good outputs with 20-30 steps.
19 |
20 | ## EulerDiscreteScheduler
21 | [[autodoc]] EulerDiscreteScheduler
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/euler_ancestral.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Euler Ancestral scheduler
14 |
15 | ## Overview
16 |
17 | Ancestral sampling with Euler method steps. Based on the original [k-diffusion](https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L72) implementation by Katherine Crowson.
18 | Fast scheduler which often times generates good outputs with 20-30 steps.
19 |
20 | ## EulerAncestralDiscreteScheduler
21 | [[autodoc]] EulerAncestralDiscreteScheduler
22 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/heun.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Heun scheduler inspired by Karras et. al paper
14 |
15 | ## Overview
16 |
17 | Algorithm 1 of [Karras et. al](https://arxiv.org/abs/2206.00364).
18 | Scheduler ported from @crowsonkb's https://github.com/crowsonkb/k-diffusion library:
19 |
20 | All credit for making this scheduler work goes to [Katherine Crowson](https://github.com/crowsonkb/)
21 |
22 | ## HeunDiscreteScheduler
23 | [[autodoc]] HeunDiscreteScheduler
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/ipndm.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # improved pseudo numerical methods for diffusion models (iPNDM)
14 |
15 | ## Overview
16 |
17 | Original implementation can be found [here](https://github.com/crowsonkb/v-diffusion-pytorch/blob/987f8985e38208345c1959b0ea767a625831cc9b/diffusion/sampling.py#L296).
18 |
19 | ## IPNDMScheduler
20 | [[autodoc]] IPNDMScheduler
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/lms_discrete.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Linear multistep scheduler for discrete beta schedules
14 |
15 | ## Overview
16 |
17 | Original implementation can be found [here](https://arxiv.org/abs/2206.00364).
18 |
19 | ## LMSDiscreteScheduler
20 | [[autodoc]] LMSDiscreteScheduler
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/multistep_dpm_solver.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Multistep DPM-Solver
14 |
15 | ## Overview
16 |
17 | Original paper can be found [here](https://arxiv.org/abs/2206.00927) and the [improved version](https://arxiv.org/abs/2211.01095). The original implementation can be found [here](https://github.com/LuChengTHU/dpm-solver).
18 |
19 | ## DPMSolverMultistepScheduler
20 | [[autodoc]] DPMSolverMultistepScheduler
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/pndm.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Pseudo numerical methods for diffusion models (PNDM)
14 |
15 | ## Overview
16 |
17 | Original implementation can be found [here](https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L181).
18 |
19 | ## PNDMScheduler
20 | [[autodoc]] PNDMScheduler
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/repaint.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # RePaint scheduler
14 |
15 | ## Overview
16 |
17 | DDPM-based inpainting scheduler for unsupervised inpainting with extreme masks.
18 | Intended for use with [`RePaintPipeline`].
19 | Based on the paper [RePaint: Inpainting using Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2201.09865)
20 | and the original implementation by Andreas Lugmayr et al.: https://github.com/andreas128/RePaint
21 |
22 | ## RePaintScheduler
23 | [[autodoc]] RePaintScheduler
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/score_sde_ve.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Variance Exploding Stochastic Differential Equation (VE-SDE) scheduler
14 |
15 | ## Overview
16 |
17 | Original paper can be found [here](https://arxiv.org/abs/2011.13456).
18 |
19 | ## ScoreSdeVeScheduler
20 | [[autodoc]] ScoreSdeVeScheduler
21 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/score_sde_vp.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Variance Preserving Stochastic Differential Equation (VP-SDE) scheduler
14 |
15 | ## Overview
16 |
17 | Original paper can be found [here](https://arxiv.org/abs/2011.13456).
18 |
19 |
20 |
21 | Score SDE-VP is under construction.
22 |
23 |
24 |
25 | ## ScoreSdeVpScheduler
26 | [[autodoc]] schedulers.scheduling_sde_vp.ScoreSdeVpScheduler
27 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/singlestep_dpm_solver.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Singlestep DPM-Solver
14 |
15 | ## Overview
16 |
17 | Original paper can be found [here](https://arxiv.org/abs/2206.00927) and the [improved version](https://arxiv.org/abs/2211.01095). The original implementation can be found [here](https://github.com/LuChengTHU/dpm-solver).
18 |
19 | ## DPMSolverSinglestepScheduler
20 | [[autodoc]] DPMSolverSinglestepScheduler
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/stochastic_karras_ve.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Variance exploding, stochastic sampling from Karras et. al
14 |
15 | ## Overview
16 |
17 | Original paper can be found [here](https://arxiv.org/abs/2206.00364).
18 |
19 | ## KarrasVeScheduler
20 | [[autodoc]] KarrasVeScheduler
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/unipc.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # UniPC
14 |
15 | ## Overview
16 |
17 | UniPC is a training-free framework designed for the fast sampling of diffusion models, which consists of a corrector (UniC) and a predictor (UniP) that share a unified analytical form and support arbitrary orders.
18 |
19 | For more details about the method, please refer to the [paper](https://arxiv.org/abs/2302.04867) and the [code](https://github.com/wl-zhao/UniPC).
20 |
21 | Fast Sampling of Diffusion Models with Exponential Integrator.
22 |
23 | ## UniPCMultistepScheduler
24 | [[autodoc]] UniPCMultistepScheduler
25 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/api/schedulers/vq_diffusion.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # VQDiffusionScheduler
14 |
15 | ## Overview
16 |
17 | Original paper can be found [here](https://arxiv.org/abs/2111.14822)
18 |
19 | ## VQDiffusionScheduler
20 | [[autodoc]] VQDiffusionScheduler
--------------------------------------------------------------------------------
/diffusers/docs/source/en/optimization/onnx.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 |
14 | # How to use the ONNX Runtime for inference
15 |
16 | 🤗 [Optimum](https://github.com/huggingface/optimum) provides a Stable Diffusion pipeline compatible with ONNX Runtime.
17 |
18 | ## Installation
19 |
20 | Install 🤗 Optimum with the following command for ONNX Runtime support:
21 |
22 | ```
23 | pip install optimum["onnxruntime"]
24 | ```
25 |
26 | ## Stable Diffusion Inference
27 |
28 | To load an ONNX model and run inference with the ONNX Runtime, you need to replace [`StableDiffusionPipeline`] with `ORTStableDiffusionPipeline`. In case you want to load
29 | a PyTorch model and convert it to the ONNX format on-the-fly, you can set `export=True`.
30 |
31 | ```python
32 | from optimum.onnxruntime import ORTStableDiffusionPipeline
33 |
34 | model_id = "runwayml/stable-diffusion-v1-5"
35 | pipe = ORTStableDiffusionPipeline.from_pretrained(model_id, export=True)
36 | prompt = "a photo of an astronaut riding a horse on mars"
37 | images = pipe(prompt).images[0]
38 | pipe.save_pretrained("./onnx-stable-diffusion-v1-5")
39 | ```
40 |
41 | If you want to export the pipeline in the ONNX format offline and later use it for inference,
42 | you can use the [`optimum-cli export`](https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#exporting-a-model-to-onnx-using-the-cli) command:
43 |
44 | ```bash
45 | optimum-cli export onnx --model runwayml/stable-diffusion-v1-5 sd_v15_onnx/
46 | ```
47 |
48 | Then perform inference:
49 |
50 | ```python
51 | from optimum.onnxruntime import ORTStableDiffusionPipeline
52 |
53 | model_id = "sd_v15_onnx"
54 | pipe = ORTStableDiffusionPipeline.from_pretrained(model_id)
55 | prompt = "a photo of an astronaut riding a horse on mars"
56 | images = pipe(prompt).images[0]
57 | ```
58 |
59 | Notice that we didn't have to specify `export=True` above.
60 |
61 | You can find more examples in [optimum documentation](https://huggingface.co/docs/optimum/).
62 |
63 | ## Known Issues
64 |
65 | - Generating multiple prompts in a batch seems to take too much memory. While we look into it, you may need to iterate instead of batching.
66 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/optimization/open_vino.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 |
14 | # How to use OpenVINO for inference
15 |
16 | 🤗 [Optimum](https://github.com/huggingface/optimum-intel) provides a Stable Diffusion pipeline compatible with OpenVINO. You can now easily perform inference with OpenVINO Runtime on a variety of Intel processors ([see](https://docs.openvino.ai/latest/openvino_docs_OV_UG_supported_plugins_Supported_Devices.html) the full list of supported devices).
17 |
18 | ## Installation
19 |
20 | Install 🤗 Optimum Intel with the following command:
21 |
22 | ```
23 | pip install optimum["openvino"]
24 | ```
25 |
26 | ## Stable Diffusion Inference
27 |
28 | To load an OpenVINO model and run inference with OpenVINO Runtime, you need to replace `StableDiffusionPipeline` with `OVStableDiffusionPipeline`. In case you want to load a PyTorch model and convert it to the OpenVINO format on-the-fly, you can set `export=True`.
29 |
30 | ```python
31 | from optimum.intel.openvino import OVStableDiffusionPipeline
32 |
33 | model_id = "runwayml/stable-diffusion-v1-5"
34 | pipe = OVStableDiffusionPipeline.from_pretrained(model_id, export=True)
35 | prompt = "a photo of an astronaut riding a horse on mars"
36 | images = pipe(prompt).images[0]
37 | ```
38 |
39 | You can find more examples (such as static reshaping and model compilation) in [optimum documentation](https://huggingface.co/docs/optimum/intel/inference#export-and-inference-of-stable-diffusion-models).
40 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/optimization/opt_overview.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Overview
14 |
15 | Generating high-quality outputs is computationally intensive, especially during each iterative step where you go from a noisy output to a less noisy output. One of 🧨 Diffuser's goal is to make this technology widely accessible to everyone, which includes enabling fast inference on consumer and specialized hardware.
16 |
17 | This section will cover tips and tricks - like half-precision weights and sliced attention - for optimizing inference speed and reducing memory-consumption. You can also learn how to speed up your PyTorch code with [`torch.compile`](https://pytorch.org/tutorials/intermediate/torch_compile_tutorial.html) or [ONNX Runtime](https://onnxruntime.ai/docs/), and enable memory-efficient attention with [xFormers](https://facebookresearch.github.io/xformers/). There are also guides for running inference on specific hardware like Apple Silicon, and Intel or Habana processors.
--------------------------------------------------------------------------------
/diffusers/docs/source/en/optimization/xformers.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Installing xFormers
14 |
15 | We recommend the use of [xFormers](https://github.com/facebookresearch/xformers) for both inference and training. In our tests, the optimizations performed in the attention blocks allow for both faster speed and reduced memory consumption.
16 |
17 | Starting from version `0.0.16` of xFormers, released on January 2023, installation can be easily performed using pre-built pip wheels:
18 |
19 | ```bash
20 | pip install xformers
21 | ```
22 |
23 |
24 |
25 | The xFormers PIP package requires the latest version of PyTorch (1.13.1 as of xFormers 0.0.16). If you need to use a previous version of PyTorch, then we recommend you install xFormers from source using [the project instructions](https://github.com/facebookresearch/xformers#installing-xformers).
26 |
27 |
28 |
29 | After xFormers is installed, you can use `enable_xformers_memory_efficient_attention()` for faster inference and reduced memory consumption, as discussed [here](fp16#memory-efficient-attention).
30 |
31 |
32 |
33 | According to [this issue](https://github.com/huggingface/diffusers/issues/2234#issuecomment-1416931212), xFormers `v0.0.16` cannot be used for training (fine-tune or Dreambooth) in some GPUs. If you observe that problem, please install a development version as indicated in that comment.
34 |
35 |
36 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/tutorials/tutorial_overview.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Overview
14 |
15 | Welcome to 🧨 Diffusers! If you're new to diffusion models and generative AI, and want to learn more, then you've come to the right place. These beginner-friendly tutorials are designed to provide a gentle introduction to diffusion models and help you understand the library fundamentals - the core components and how 🧨 Diffusers is meant to be used.
16 |
17 | You'll learn how to use a pipeline for inference to rapidly generate things, and then deconstruct that pipeline to really understand how to use the library as a modular toolbox for building your own diffusion systems. In the next lesson, you'll learn how to train your own diffusion model to generate what you want.
18 |
19 | After completing the tutorials, you'll have gained the necessary skills to start exploring the library on your own and see how to use it for your own projects and applications.
20 |
21 | Feel free to join our community on [Discord](https://discord.com/invite/JfAtkvEtRb) or the [forums](https://discuss.huggingface.co/c/discussion-related-to-httpsgithubcomhuggingfacediffusers/63) to connect and collaborate with other users and developers!
22 |
23 | Let's start diffusing! 🧨
--------------------------------------------------------------------------------
/diffusers/docs/source/en/using-diffusers/audio.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Using Diffusers for audio
14 |
15 | [`DanceDiffusionPipeline`] and [`AudioDiffusionPipeline`] can be used to generate
16 | audio rapidly! More coming soon!
--------------------------------------------------------------------------------
/diffusers/docs/source/en/using-diffusers/conditional_image_generation.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Conditional image generation
14 |
15 | [[open-in-colab]]
16 |
17 | Conditional image generation allows you to generate images from a text prompt. The text is converted into embeddings which are used to condition the model to generate an image from noise.
18 |
19 | The [`DiffusionPipeline`] is the easiest way to use a pre-trained diffusion system for inference.
20 |
21 | Start by creating an instance of [`DiffusionPipeline`] and specify which pipeline [checkpoint](https://huggingface.co/models?library=diffusers&sort=downloads) you would like to download.
22 |
23 | In this guide, you'll use [`DiffusionPipeline`] for text-to-image generation with [Latent Diffusion](https://huggingface.co/CompVis/ldm-text2im-large-256):
24 |
25 | ```python
26 | >>> from diffusers import DiffusionPipeline
27 |
28 | >>> generator = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256")
29 | ```
30 |
31 | The [`DiffusionPipeline`] downloads and caches all modeling, tokenization, and scheduling components.
32 | Because the model consists of roughly 1.4 billion parameters, we strongly recommend running it on a GPU.
33 | You can move the generator object to a GPU, just like you would in PyTorch:
34 |
35 | ```python
36 | >>> generator.to("cuda")
37 | ```
38 |
39 | Now you can use the `generator` on your text prompt:
40 |
41 | ```python
42 | >>> image = generator("An image of a squirrel in Picasso style").images[0]
43 | ```
44 |
45 | The output is by default wrapped into a [`PIL.Image`](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class) object.
46 |
47 | You can save the image by calling:
48 |
49 | ```python
50 | >>> image.save("image_of_squirrel_painting.png")
51 | ```
52 |
53 | Try out the Spaces below, and feel free to play around with the guidance scale parameter to see how it affects the image quality!
54 |
55 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/using-diffusers/depth2img.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Text-guided depth-to-image generation
14 |
15 | [[open-in-colab]]
16 |
17 | The [`StableDiffusionDepth2ImgPipeline`] lets you pass a text prompt and an initial image to condition the generation of new images. In addition, you can also pass a `depth_map` to preserve the image structure. If no `depth_map` is provided, the pipeline automatically predicts the depth via an integrated [depth-estimation model](https://github.com/isl-org/MiDaS).
18 |
19 | Start by creating an instance of the [`StableDiffusionDepth2ImgPipeline`]:
20 |
21 | ```python
22 | import torch
23 | import requests
24 | from PIL import Image
25 |
26 | from diffusers import StableDiffusionDepth2ImgPipeline
27 |
28 | pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
29 | "stabilityai/stable-diffusion-2-depth",
30 | torch_dtype=torch.float16,
31 | ).to("cuda")
32 | ```
33 |
34 | Now pass your prompt to the pipeline. You can also pass a `negative_prompt` to prevent certain words from guiding how an image is generated:
35 |
36 | ```python
37 | url = "http://images.cocodataset.org/val2017/000000039769.jpg"
38 | init_image = Image.open(requests.get(url, stream=True).raw)
39 | prompt = "two tigers"
40 | n_prompt = "bad, deformed, ugly, bad anatomy"
41 | image = pipe(prompt=prompt, image=init_image, negative_prompt=n_prompt, strength=0.7).images[0]
42 | image
43 | ```
44 |
45 | | Input | Output |
46 | |---------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------|
47 | |
|
|
48 |
49 | Play around with the Spaces below and see if you notice a difference between generated images with and without a depth map!
50 |
51 |
57 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/using-diffusers/loading_overview.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Overview
14 |
15 | 🧨 Diffusers offers many pipelines, models, and schedulers for generative tasks. To make loading these components as simple as possible, we provide a single and unified method - `from_pretrained()` - that loads any of these components from either the Hugging Face [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) or your local machine. Whenever you load a pipeline or model, the latest files are automatically downloaded and cached so you can quickly reuse them next time without redownloading the files.
16 |
17 | This section will show you everything you need to know about loading pipelines, how to load different components in a pipeline, how to load checkpoint variants, and how to load community pipelines. You'll also learn how to load schedulers and compare the speed and quality trade-offs of using different schedulers. Finally, you'll see how to convert and load KerasCV checkpoints so you can use them in PyTorch with 🧨 Diffusers.
--------------------------------------------------------------------------------
/diffusers/docs/source/en/using-diffusers/other-modalities.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Using Diffusers with other modalities
14 |
15 | Diffusers is in the process of expanding to modalities other than images.
16 |
17 | Example type | Colab | Pipeline |
18 | :-------------------------:|:-------------------------:|:-------------------------:|
19 | [Molecule conformation](https://www.nature.com/subjects/molecular-conformation#:~:text=Definition,to%20changes%20in%20their%20environment.) generation | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/geodiff_molecule_conformation.ipynb) | ❌
20 |
21 | More coming soon!
--------------------------------------------------------------------------------
/diffusers/docs/source/en/using-diffusers/pipeline_overview.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Overview
14 |
15 | A pipeline is an end-to-end class that provides a quick and easy way to use a diffusion system for inference by bundling independently trained models and schedulers together. Certain combinations of models and schedulers define specific pipeline types, like [`StableDiffusionPipeline`] or [`StableDiffusionControlNetPipeline`], with specific capabilities. All pipeline types inherit from the base [`DiffusionPipeline`] class; pass it any checkpoint, and it'll automatically detect the pipeline type and load the necessary components.
16 |
17 | This section introduces you to some of the tasks supported by our pipelines such as unconditional image generation and different techniques and variations of text-to-image generation. You'll also learn how to gain more control over the generation process by setting a seed for reproducibility and weighting prompts to adjust the influence certain words in the prompt has over the output. Finally, you'll see how you can create a community pipeline for a custom task like generating images from speech.
--------------------------------------------------------------------------------
/diffusers/docs/source/en/using-diffusers/rl.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Using Diffusers for reinforcement learning
14 |
15 | Support for one RL model and related pipelines is included in the `experimental` source of diffusers.
16 | More models and examples coming soon!
17 |
18 | # Diffuser Value-guided Planning
19 |
20 | You can run the model from [*Planning with Diffusion for Flexible Behavior Synthesis*](https://arxiv.org/abs/2205.09991) with Diffusers.
21 | The script is located in the [RL Examples](https://github.com/huggingface/diffusers/tree/main/examples/rl) folder.
22 |
23 | Or, run this example in Colab [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/reinforcement_learning_with_diffusers.ipynb)
24 |
25 | [[autodoc]] diffusers.experimental.ValueGuidedRLPipeline
--------------------------------------------------------------------------------
/diffusers/docs/source/en/using-diffusers/unconditional_image_generation.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Unconditional image generation
14 |
15 | [[open-in-colab]]
16 |
17 | Unconditional image generation is a relatively straightforward task. The model only generates images - without any additional context like text or an image - resembling the training data it was trained on.
18 |
19 | The [`DiffusionPipeline`] is the easiest way to use a pre-trained diffusion system for inference.
20 |
21 | Start by creating an instance of [`DiffusionPipeline`] and specify which pipeline checkpoint you would like to download.
22 | You can use any of the 🧨 Diffusers [checkpoints](https://huggingface.co/models?library=diffusers&sort=downloads) from the Hub (the checkpoint you'll use generates images of butterflies).
23 |
24 |
25 |
26 | 💡 Want to train your own unconditional image generation model? Take a look at the training [guide](training/unconditional_training) to learn how to generate your own images.
27 |
28 |
29 |
30 | In this guide, you'll use [`DiffusionPipeline`] for unconditional image generation with [DDPM](https://arxiv.org/abs/2006.11239):
31 |
32 | ```python
33 | >>> from diffusers import DiffusionPipeline
34 |
35 | >>> generator = DiffusionPipeline.from_pretrained("anton-l/ddpm-butterflies-128")
36 | ```
37 |
38 | The [`DiffusionPipeline`] downloads and caches all modeling, tokenization, and scheduling components.
39 | Because the model consists of roughly 1.4 billion parameters, we strongly recommend running it on a GPU.
40 | You can move the generator object to a GPU, just like you would in PyTorch:
41 |
42 | ```python
43 | >>> generator.to("cuda")
44 | ```
45 |
46 | Now you can use the `generator` to generate an image:
47 |
48 | ```python
49 | >>> image = generator().images[0]
50 | ```
51 |
52 | The output is by default wrapped into a [`PIL.Image`](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class) object.
53 |
54 | You can save the image by calling:
55 |
56 | ```python
57 | >>> image.save("generated_image.png")
58 | ```
59 |
60 | Try out the Spaces below, and feel free to play around with the inference steps parameter to see how it affects the image quality!
61 |
62 |
68 |
69 |
70 |
--------------------------------------------------------------------------------
/diffusers/docs/source/en/using-diffusers/using_safetensors:
--------------------------------------------------------------------------------
1 | # What is safetensors ?
2 |
3 | [safetensors](https://github.com/huggingface/safetensors) is a different format
4 | from the classic `.bin` which uses Pytorch which uses pickle.
5 |
6 | Pickle is notoriously unsafe which allow any malicious file to execute arbitrary code.
7 | The hub itself tries to prevent issues from it, but it's not a silver bullet.
8 |
9 | `safetensors` first and foremost goal is to make loading machine learning models *safe*
10 | in the sense that no takeover of your computer can be done.
11 |
12 | # Why use safetensors ?
13 |
14 | **Safety** can be one reason, if you're attempting to use a not well known model and
15 | you're not sure about the source of the file.
16 |
17 | And a secondary reason, is **the speed of loading**. Safetensors can load models much faster
18 | than regular pickle files. If you spend a lot of times switching models, this can be
19 | a huge timesave.
20 |
--------------------------------------------------------------------------------
/diffusers/docs/source/ko/in_translation.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # 번역중
14 |
15 | 열심히 번역을 진행중입니다. 조금만 기다려주세요.
16 | 감사합니다!
--------------------------------------------------------------------------------
/diffusers/examples/community/one_step_unet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import torch
3 |
4 | from diffusers import DiffusionPipeline
5 |
6 |
7 | class UnetSchedulerOneForwardPipeline(DiffusionPipeline):
8 | def __init__(self, unet, scheduler):
9 | super().__init__()
10 |
11 | self.register_modules(unet=unet, scheduler=scheduler)
12 |
13 | def __call__(self):
14 | image = torch.randn(
15 | (1, self.unet.config.in_channels, self.unet.config.sample_size, self.unet.config.sample_size),
16 | )
17 | timestep = 1
18 |
19 | model_output = self.unet(image, timestep).sample
20 | scheduler_output = self.scheduler.step(model_output, timestep, image).prev_sample
21 |
22 | result = scheduler_output - scheduler_output + torch.ones_like(scheduler_output)
23 |
24 | return result
25 |
--------------------------------------------------------------------------------
/diffusers/examples/conftest.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # tests directory-specific settings - this file is run automatically
16 | # by pytest before any tests are run
17 |
18 | import sys
19 | import warnings
20 | from os.path import abspath, dirname, join
21 |
22 |
23 | # allow having multiple repository checkouts and not needing to remember to rerun
24 | # 'pip install -e .[dev]' when switching between checkouts and running tests.
25 | git_repo_path = abspath(join(dirname(dirname(dirname(__file__))), "src"))
26 | sys.path.insert(1, git_repo_path)
27 |
28 |
29 | # silence FutureWarning warnings in tests since often we can't act on them until
30 | # they become normal warnings - i.e. the tests still need to test the current functionality
31 | warnings.simplefilter(action="ignore", category=FutureWarning)
32 |
33 |
34 | def pytest_addoption(parser):
35 | from diffusers.utils.testing_utils import pytest_addoption_shared
36 |
37 | pytest_addoption_shared(parser)
38 |
39 |
40 | def pytest_terminal_summary(terminalreporter):
41 | from diffusers.utils.testing_utils import pytest_terminal_summary_main
42 |
43 | make_reports = terminalreporter.config.getoption("--make-reports")
44 | if make_reports:
45 | pytest_terminal_summary_main(terminalreporter, id=make_reports)
46 |
--------------------------------------------------------------------------------
/diffusers/examples/controlnet/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate>=0.16.0
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | datasets
7 |
--------------------------------------------------------------------------------
/diffusers/examples/controlnet/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.25.1
2 | datasets
3 | flax
4 | optax
5 | torch
6 | torchvision
7 | ftfy
8 | tensorboard
9 | Jinja2
10 |
--------------------------------------------------------------------------------
/diffusers/examples/dreambooth/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate>=0.16.0
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | Jinja2
7 |
--------------------------------------------------------------------------------
/diffusers/examples/dreambooth/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.25.1
2 | flax
3 | optax
4 | torch
5 | torchvision
6 | ftfy
7 | tensorboard
8 | Jinja2
9 |
--------------------------------------------------------------------------------
/diffusers/examples/inference/README.md:
--------------------------------------------------------------------------------
1 | # Inference Examples
2 |
3 | **The inference examples folder is deprecated and will be removed in a future version**.
4 | **Officially supported inference examples can be found in the [Pipelines folder](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines)**.
5 |
6 | - For `Image-to-Image text-guided generation with Stable Diffusion`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples)
7 | - For `In-painting using Stable Diffusion`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples)
8 | - For `Tweak prompts reusing seeds and latents`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples)
9 |
--------------------------------------------------------------------------------
/diffusers/examples/inference/image_to_image.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from diffusers import StableDiffusionImg2ImgPipeline # noqa F401
4 |
5 |
6 | warnings.warn(
7 | "The `image_to_image.py` script is outdated. Please use directly `from diffusers import"
8 | " StableDiffusionImg2ImgPipeline` instead."
9 | )
10 |
--------------------------------------------------------------------------------
/diffusers/examples/inference/inpainting.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from diffusers import StableDiffusionInpaintPipeline as StableDiffusionInpaintPipeline # noqa F401
4 |
5 |
6 | warnings.warn(
7 | "The `inpainting.py` script is outdated. Please use directly `from diffusers import"
8 | " StableDiffusionInpaintPipeline` instead."
9 | )
10 |
--------------------------------------------------------------------------------
/diffusers/examples/instruct_pix2pix/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate>=0.16.0
2 | torchvision
3 | transformers>=4.25.1
4 | datasets
5 | ftfy
6 | tensorboard
--------------------------------------------------------------------------------
/diffusers/examples/rl/README.md:
--------------------------------------------------------------------------------
1 | # Overview
2 |
3 | These examples show how to run [Diffuser](https://arxiv.org/abs/2205.09991) in Diffusers.
4 | There are two ways to use the script, `run_diffuser_locomotion.py`.
5 |
6 | The key option is a change of the variable `n_guide_steps`.
7 | When `n_guide_steps=0`, the trajectories are sampled from the diffusion model, but not fine-tuned to maximize reward in the environment.
8 | By default, `n_guide_steps=2` to match the original implementation.
9 |
10 |
11 | You will need some RL specific requirements to run the examples:
12 |
13 | ```
14 | pip install -f https://download.pytorch.org/whl/torch_stable.html \
15 | free-mujoco-py \
16 | einops \
17 | gym==0.24.1 \
18 | protobuf==3.20.1 \
19 | git+https://github.com/rail-berkeley/d4rl.git \
20 | mediapy \
21 | Pillow==9.0.0
22 | ```
23 |
--------------------------------------------------------------------------------
/diffusers/examples/rl/run_diffuser_locomotion.py:
--------------------------------------------------------------------------------
1 | import d4rl # noqa
2 | import gym
3 | import tqdm
4 | from diffusers.experimental import ValueGuidedRLPipeline
5 |
6 |
7 | config = {
8 | "n_samples": 64,
9 | "horizon": 32,
10 | "num_inference_steps": 20,
11 | "n_guide_steps": 2, # can set to 0 for faster sampling, does not use value network
12 | "scale_grad_by_std": True,
13 | "scale": 0.1,
14 | "eta": 0.0,
15 | "t_grad_cutoff": 2,
16 | "device": "cpu",
17 | }
18 |
19 |
20 | if __name__ == "__main__":
21 | env_name = "hopper-medium-v2"
22 | env = gym.make(env_name)
23 |
24 | pipeline = ValueGuidedRLPipeline.from_pretrained(
25 | "bglick13/hopper-medium-v2-value-function-hor32",
26 | env=env,
27 | )
28 |
29 | env.seed(0)
30 | obs = env.reset()
31 | total_reward = 0
32 | total_score = 0
33 | T = 1000
34 | rollout = [obs.copy()]
35 | try:
36 | for t in tqdm.tqdm(range(T)):
37 | # call the policy
38 | denorm_actions = pipeline(obs, planning_horizon=32)
39 |
40 | # execute action in environment
41 | next_observation, reward, terminal, _ = env.step(denorm_actions)
42 | score = env.get_normalized_score(total_reward)
43 |
44 | # update return
45 | total_reward += reward
46 | total_score += score
47 | print(
48 | f"Step: {t}, Reward: {reward}, Total Reward: {total_reward}, Score: {score}, Total Score:"
49 | f" {total_score}"
50 | )
51 |
52 | # save observations for rendering
53 | rollout.append(next_observation.copy())
54 |
55 | obs = next_observation
56 | except KeyboardInterrupt:
57 | pass
58 |
59 | print(f"Total reward: {total_reward}")
60 |
--------------------------------------------------------------------------------
/diffusers/examples/text_to_image/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate>=0.16.0
2 | torchvision
3 | transformers>=4.25.1
4 | datasets
5 | ftfy
6 | tensorboard
7 | Jinja2
8 |
--------------------------------------------------------------------------------
/diffusers/examples/text_to_image/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.25.1
2 | datasets
3 | flax
4 | optax
5 | torch
6 | torchvision
7 | ftfy
8 | tensorboard
9 | Jinja2
10 |
--------------------------------------------------------------------------------
/diffusers/examples/textual_inversion/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate>=0.16.0
2 | torchvision
3 | transformers>=4.25.1
4 | ftfy
5 | tensorboard
6 | Jinja2
7 |
--------------------------------------------------------------------------------
/diffusers/examples/textual_inversion/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.25.1
2 | flax
3 | optax
4 | torch
5 | torchvision
6 | ftfy
7 | tensorboard
8 | Jinja2
9 |
--------------------------------------------------------------------------------
/diffusers/examples/unconditional_image_generation/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate>=0.16.0
2 | torchvision
3 | datasets
4 |
--------------------------------------------------------------------------------
/diffusers/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 119
3 | target-version = ['py37']
4 |
5 | [tool.ruff]
6 | # Never enforce `E501` (line length violations).
7 | ignore = ["C901", "E501", "E741", "W605"]
8 | select = ["C", "E", "F", "I", "W"]
9 | line-length = 119
10 |
11 | # Ignore import violations in all `__init__.py` files.
12 | [tool.ruff.per-file-ignores]
13 | "__init__.py" = ["E402", "F401", "F403", "F811"]
14 | "src/diffusers/utils/dummy_*.py" = ["F401"]
15 |
16 | [tool.ruff.isort]
17 | lines-after-imports = 2
18 | known-first-party = ["diffusers"]
19 |
--------------------------------------------------------------------------------
/diffusers/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/scripts/__init__.py
--------------------------------------------------------------------------------
/diffusers/scripts/conversion_ldm_uncond.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import OmegaConf
4 | import torch
5 |
6 | from diffusers import DDIMScheduler, LDMPipeline, UNetLDMModel, VQModel
7 |
8 |
9 | def convert_ldm_original(checkpoint_path, config_path, output_path):
10 | config = OmegaConf.load(config_path)
11 | state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
12 | keys = list(state_dict.keys())
13 |
14 | # extract state_dict for VQVAE
15 | first_stage_dict = {}
16 | first_stage_key = "first_stage_model."
17 | for key in keys:
18 | if key.startswith(first_stage_key):
19 | first_stage_dict[key.replace(first_stage_key, "")] = state_dict[key]
20 |
21 | # extract state_dict for UNetLDM
22 | unet_state_dict = {}
23 | unet_key = "model.diffusion_model."
24 | for key in keys:
25 | if key.startswith(unet_key):
26 | unet_state_dict[key.replace(unet_key, "")] = state_dict[key]
27 |
28 | vqvae_init_args = config.model.params.first_stage_config.params
29 | unet_init_args = config.model.params.unet_config.params
30 |
31 | vqvae = VQModel(**vqvae_init_args).eval()
32 | vqvae.load_state_dict(first_stage_dict)
33 |
34 | unet = UNetLDMModel(**unet_init_args).eval()
35 | unet.load_state_dict(unet_state_dict)
36 |
37 | noise_scheduler = DDIMScheduler(
38 | timesteps=config.model.params.timesteps,
39 | beta_schedule="scaled_linear",
40 | beta_start=config.model.params.linear_start,
41 | beta_end=config.model.params.linear_end,
42 | clip_sample=False,
43 | )
44 |
45 | pipeline = LDMPipeline(vqvae, unet, noise_scheduler)
46 | pipeline.save_pretrained(output_path)
47 |
48 |
49 | if __name__ == "__main__":
50 | parser = argparse.ArgumentParser()
51 | parser.add_argument("--checkpoint_path", type=str, required=True)
52 | parser.add_argument("--config_path", type=str, required=True)
53 | parser.add_argument("--output_path", type=str, required=True)
54 | args = parser.parse_args()
55 |
56 | convert_ldm_original(args.checkpoint_path, args.config_path, args.output_path)
57 |
--------------------------------------------------------------------------------
/diffusers/scripts/convert_unclip_txt2img_to_image_variation.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
4 |
5 | from diffusers import UnCLIPImageVariationPipeline, UnCLIPPipeline
6 |
7 |
8 | if __name__ == "__main__":
9 | parser = argparse.ArgumentParser()
10 |
11 | parser.add_argument("--dump_path", default=None, type=str, required=True, help="Path to the output model.")
12 |
13 | parser.add_argument(
14 | "--txt2img_unclip",
15 | default="kakaobrain/karlo-v1-alpha",
16 | type=str,
17 | required=False,
18 | help="The pretrained txt2img unclip.",
19 | )
20 |
21 | args = parser.parse_args()
22 |
23 | txt2img = UnCLIPPipeline.from_pretrained(args.txt2img_unclip)
24 |
25 | feature_extractor = CLIPImageProcessor()
26 | image_encoder = CLIPVisionModelWithProjection.from_pretrained("openai/clip-vit-large-patch14")
27 |
28 | img2img = UnCLIPImageVariationPipeline(
29 | decoder=txt2img.decoder,
30 | text_encoder=txt2img.text_encoder,
31 | tokenizer=txt2img.tokenizer,
32 | text_proj=txt2img.text_proj,
33 | feature_extractor=feature_extractor,
34 | image_encoder=image_encoder,
35 | super_res_first=txt2img.super_res_first,
36 | super_res_last=txt2img.super_res_last,
37 | decoder_scheduler=txt2img.decoder_scheduler,
38 | super_res_scheduler=txt2img.super_res_scheduler,
39 | )
40 |
41 | img2img.save_pretrained(args.dump_path)
42 |
--------------------------------------------------------------------------------
/diffusers/setup.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | default_section = FIRSTPARTY
3 | ensure_newline_before_comments = True
4 | force_grid_wrap = 0
5 | include_trailing_comma = True
6 | known_first_party = accelerate
7 | known_third_party =
8 | numpy
9 | torch
10 | torch_xla
11 |
12 | line_length = 119
13 | lines_after_imports = 2
14 | multi_line_output = 3
15 | use_parentheses = True
16 |
17 | [flake8]
18 | ignore = E203, E722, E501, E741, W503, W605
19 | max-line-length = 119
20 | per-file-ignores = __init__.py:F401
21 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/commands/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from abc import ABC, abstractmethod
16 | from argparse import ArgumentParser
17 |
18 |
19 | class BaseDiffusersCLICommand(ABC):
20 | @staticmethod
21 | @abstractmethod
22 | def register_subcommand(parser: ArgumentParser):
23 | raise NotImplementedError()
24 |
25 | @abstractmethod
26 | def run(self):
27 | raise NotImplementedError()
28 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/commands/diffusers_cli.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | from argparse import ArgumentParser
17 |
18 | from .env import EnvironmentCommand
19 |
20 |
21 | def main():
22 | parser = ArgumentParser("Diffusers CLI tool", usage="diffusers-cli []")
23 | commands_parser = parser.add_subparsers(help="diffusers-cli command helpers")
24 |
25 | # Register commands
26 | EnvironmentCommand.register_subcommand(commands_parser)
27 |
28 | # Let's go
29 | args = parser.parse_args()
30 |
31 | if not hasattr(args, "func"):
32 | parser.print_help()
33 | exit(1)
34 |
35 | # Run
36 | service = args.func(args)
37 | service.run()
38 |
39 |
40 | if __name__ == "__main__":
41 | main()
42 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/dependency_versions_check.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import sys
15 |
16 | from .dependency_versions_table import deps
17 | from .utils.versions import require_version, require_version_core
18 |
19 |
20 | # define which module versions we always want to check at run time
21 | # (usually the ones defined in `install_requires` in setup.py)
22 | #
23 | # order specific notes:
24 | # - tqdm must be checked before tokenizers
25 |
26 | pkgs_to_check_at_runtime = "python tqdm regex requests packaging filelock numpy tokenizers".split()
27 | if sys.version_info < (3, 7):
28 | pkgs_to_check_at_runtime.append("dataclasses")
29 | if sys.version_info < (3, 8):
30 | pkgs_to_check_at_runtime.append("importlib_metadata")
31 |
32 | for pkg in pkgs_to_check_at_runtime:
33 | if pkg in deps:
34 | if pkg == "tokenizers":
35 | # must be loaded here, or else tqdm check may fail
36 | from .utils import is_tokenizers_available
37 |
38 | if not is_tokenizers_available():
39 | continue # not required, check version only if installed
40 |
41 | require_version_core(deps[pkg])
42 | else:
43 | raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py")
44 |
45 |
46 | def dep_version_check(pkg, hint=None):
47 | require_version(deps[pkg], hint)
48 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/dependency_versions_table.py:
--------------------------------------------------------------------------------
1 | # THIS FILE HAS BEEN AUTOGENERATED. To update:
2 | # 1. modify the `_deps` dict in setup.py
3 | # 2. run `make deps_table_update``
4 | deps = {
5 | "Pillow": "Pillow",
6 | "accelerate": "accelerate>=0.11.0",
7 | "compel": "compel==0.1.8",
8 | "black": "black~=23.1",
9 | "datasets": "datasets",
10 | "filelock": "filelock",
11 | "flax": "flax>=0.4.1",
12 | "hf-doc-builder": "hf-doc-builder>=0.3.0",
13 | "huggingface-hub": "huggingface-hub>=0.13.2",
14 | "requests-mock": "requests-mock==1.10.0",
15 | "importlib_metadata": "importlib_metadata",
16 | "isort": "isort>=5.5.4",
17 | "jax": "jax>=0.2.8,!=0.3.2",
18 | "jaxlib": "jaxlib>=0.1.65",
19 | "Jinja2": "Jinja2",
20 | "k-diffusion": "k-diffusion>=0.0.12",
21 | "librosa": "librosa",
22 | "note-seq": "note-seq",
23 | "numpy": "numpy",
24 | "parameterized": "parameterized",
25 | "protobuf": "protobuf>=3.20.3,<4",
26 | "pytest": "pytest",
27 | "pytest-timeout": "pytest-timeout",
28 | "pytest-xdist": "pytest-xdist",
29 | "ruff": "ruff>=0.0.241",
30 | "safetensors": "safetensors",
31 | "sentencepiece": "sentencepiece>=0.1.91,!=0.1.92",
32 | "scipy": "scipy",
33 | "regex": "regex!=2019.12.17",
34 | "requests": "requests",
35 | "tensorboard": "tensorboard",
36 | "torch": "torch>=1.4",
37 | "torchvision": "torchvision",
38 | "transformers": "transformers>=4.25.1",
39 | }
40 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/experimental/README.md:
--------------------------------------------------------------------------------
1 | # 🧨 Diffusers Experimental
2 |
3 | We are adding experimental code to support novel applications and usages of the Diffusers library.
4 | Currently, the following experiments are supported:
5 | * Reinforcement learning via an implementation of the [Diffuser](https://arxiv.org/abs/2205.09991) model.
--------------------------------------------------------------------------------
/diffusers/src/diffusers/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | from .rl import ValueGuidedRLPipeline
2 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/experimental/rl/__init__.py:
--------------------------------------------------------------------------------
1 | from .value_guided_sampling import ValueGuidedRLPipeline
2 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/models/README.md:
--------------------------------------------------------------------------------
1 | # Models
2 |
3 | For more detail on the models, please refer to the [docs](https://huggingface.co/docs/diffusers/api/models).
--------------------------------------------------------------------------------
/diffusers/src/diffusers/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from ..utils import is_flax_available, is_torch_available
16 |
17 |
18 | if is_torch_available():
19 | from .autoencoder_kl import AutoencoderKL
20 | from .controlnet import ControlNetModel
21 | from .dual_transformer_2d import DualTransformer2DModel
22 | from .modeling_utils import ModelMixin
23 | from .prior_transformer import PriorTransformer
24 | from .t5_film_transformer import T5FilmDecoder
25 | from .transformer_2d import Transformer2DModel
26 | from .unet_1d import UNet1DModel
27 | from .unet_2d import UNet2DModel
28 | from .unet_2d_condition import UNet2DConditionModel
29 | from .unet_3d_condition import UNet3DConditionModel
30 | from .vq_model import VQModel
31 |
32 | if is_flax_available():
33 | from .controlnet_flax import FlaxControlNetModel
34 | from .unet_2d_condition_flax import FlaxUNet2DConditionModel
35 | from .vae_flax import FlaxAutoencoderKL
36 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipeline_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 |
14 | # limitations under the License.
15 |
16 | # NOTE: This file is deprecated and will be removed in a future version.
17 | # It only exists so that temporarely `from diffusers.pipelines import DiffusionPipeline` works
18 |
19 | from .pipelines import DiffusionPipeline, ImagePipelineOutput # noqa: F401
20 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/alt_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import List, Optional, Union
3 |
4 | import numpy as np
5 | import PIL
6 | from PIL import Image
7 |
8 | from ...utils import BaseOutput, is_torch_available, is_transformers_available
9 |
10 |
11 | @dataclass
12 | # Copied from diffusers.pipelines.stable_diffusion.__init__.StableDiffusionPipelineOutput with Stable->Alt
13 | class AltDiffusionPipelineOutput(BaseOutput):
14 | """
15 | Output class for Alt Diffusion pipelines.
16 |
17 | Args:
18 | images (`List[PIL.Image.Image]` or `np.ndarray`)
19 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
20 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
21 | nsfw_content_detected (`List[bool]`)
22 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work"
23 | (nsfw) content, or `None` if safety checking could not be performed.
24 | """
25 |
26 | images: Union[List[PIL.Image.Image], np.ndarray]
27 | nsfw_content_detected: Optional[List[bool]]
28 |
29 |
30 | if is_transformers_available() and is_torch_available():
31 | from .modeling_roberta_series import RobertaSeriesModelWithTransformation
32 | from .pipeline_alt_diffusion import AltDiffusionPipeline
33 | from .pipeline_alt_diffusion_img2img import AltDiffusionImg2ImgPipeline
34 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/audio_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .mel import Mel
2 | from .pipeline_audio_diffusion import AudioDiffusionPipeline
3 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/audioldm/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import (
2 | OptionalDependencyNotAvailable,
3 | is_torch_available,
4 | is_transformers_available,
5 | is_transformers_version,
6 | )
7 |
8 |
9 | try:
10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.27.0")):
11 | raise OptionalDependencyNotAvailable()
12 | except OptionalDependencyNotAvailable:
13 | from ...utils.dummy_torch_and_transformers_objects import (
14 | AudioLDMPipeline,
15 | )
16 | else:
17 | from .pipeline_audioldm import AudioLDMPipeline
18 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/dance_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_dance_diffusion import DanceDiffusionPipeline
2 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/ddim/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_ddim import DDIMPipeline
2 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/ddpm/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_ddpm import DDPMPipeline
2 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/dit/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_dit import DiTPipeline
2 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/latent_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import is_transformers_available
2 | from .pipeline_latent_diffusion_superresolution import LDMSuperResolutionPipeline
3 |
4 |
5 | if is_transformers_available():
6 | from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline
7 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/latent_diffusion_uncond/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_latent_diffusion_uncond import LDMPipeline
2 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/paint_by_example/__init__.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import List, Optional, Union
3 |
4 | import numpy as np
5 | import PIL
6 | from PIL import Image
7 |
8 | from ...utils import is_torch_available, is_transformers_available
9 |
10 |
11 | if is_transformers_available() and is_torch_available():
12 | from .image_encoder import PaintByExampleImageEncoder
13 | from .pipeline_paint_by_example import PaintByExamplePipeline
14 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/paint_by_example/image_encoder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import torch
15 | from torch import nn
16 | from transformers import CLIPPreTrainedModel, CLIPVisionModel
17 |
18 | from ...models.attention import BasicTransformerBlock
19 | from ...utils import logging
20 |
21 |
22 | logger = logging.get_logger(__name__) # pylint: disable=invalid-name
23 |
24 |
25 | class PaintByExampleImageEncoder(CLIPPreTrainedModel):
26 | def __init__(self, config, proj_size=768):
27 | super().__init__(config)
28 | self.proj_size = proj_size
29 |
30 | self.model = CLIPVisionModel(config)
31 | self.mapper = PaintByExampleMapper(config)
32 | self.final_layer_norm = nn.LayerNorm(config.hidden_size)
33 | self.proj_out = nn.Linear(config.hidden_size, self.proj_size)
34 |
35 | # uncondition for scaling
36 | self.uncond_vector = nn.Parameter(torch.randn((1, 1, self.proj_size)))
37 |
38 | def forward(self, pixel_values, return_uncond_vector=False):
39 | clip_output = self.model(pixel_values=pixel_values)
40 | latent_states = clip_output.pooler_output
41 | latent_states = self.mapper(latent_states[:, None])
42 | latent_states = self.final_layer_norm(latent_states)
43 | latent_states = self.proj_out(latent_states)
44 | if return_uncond_vector:
45 | return latent_states, self.uncond_vector
46 |
47 | return latent_states
48 |
49 |
50 | class PaintByExampleMapper(nn.Module):
51 | def __init__(self, config):
52 | super().__init__()
53 | num_layers = (config.num_hidden_layers + 1) // 5
54 | hid_size = config.hidden_size
55 | num_heads = 1
56 | self.blocks = nn.ModuleList(
57 | [
58 | BasicTransformerBlock(hid_size, num_heads, hid_size, activation_fn="gelu", attention_bias=True)
59 | for _ in range(num_layers)
60 | ]
61 | )
62 |
63 | def forward(self, hidden_states):
64 | for block in self.blocks:
65 | hidden_states = block(hidden_states)
66 |
67 | return hidden_states
68 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/pndm/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_pndm import PNDMPipeline
2 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/repaint/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_repaint import RePaintPipeline
2 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/score_sde_ve/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_score_sde_ve import ScoreSdeVePipeline
2 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/semantic_stable_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from enum import Enum
3 | from typing import List, Optional, Union
4 |
5 | import numpy as np
6 | import PIL
7 | from PIL import Image
8 |
9 | from ...utils import BaseOutput, is_torch_available, is_transformers_available
10 |
11 |
12 | @dataclass
13 | class SemanticStableDiffusionPipelineOutput(BaseOutput):
14 | """
15 | Output class for Stable Diffusion pipelines.
16 |
17 | Args:
18 | images (`List[PIL.Image.Image]` or `np.ndarray`)
19 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
20 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
21 | nsfw_content_detected (`List[bool]`)
22 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work"
23 | (nsfw) content, or `None` if safety checking could not be performed.
24 | """
25 |
26 | images: Union[List[PIL.Image.Image], np.ndarray]
27 | nsfw_content_detected: Optional[List[bool]]
28 |
29 |
30 | if is_transformers_available() and is_torch_available():
31 | from .pipeline_semantic_stable_diffusion import SemanticStableDiffusionPipeline
32 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/spectrogram_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from ...utils import is_note_seq_available, is_transformers_available, is_torch_available
3 | from ...utils import OptionalDependencyNotAvailable
4 |
5 |
6 | try:
7 | if not (is_transformers_available() and is_torch_available()):
8 | raise OptionalDependencyNotAvailable()
9 | except OptionalDependencyNotAvailable:
10 | from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
11 | else:
12 | from .notes_encoder import SpectrogramNotesEncoder
13 | from .continous_encoder import SpectrogramContEncoder
14 | from .pipeline_spectrogram_diffusion import (
15 | SpectrogramContEncoder,
16 | SpectrogramDiffusionPipeline,
17 | T5FilmDecoder,
18 | )
19 |
20 | try:
21 | if not (is_transformers_available() and is_torch_available() and is_note_seq_available()):
22 | raise OptionalDependencyNotAvailable()
23 | except OptionalDependencyNotAvailable:
24 | from ...utils.dummy_transformers_and_torch_and_note_seq_objects import * # noqa F403
25 | else:
26 | from .midi_utils import MidiProcessor
27 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from typing import Optional, Union
16 |
17 | import torch
18 | from torch import nn
19 |
20 | from ...configuration_utils import ConfigMixin, register_to_config
21 | from ...models.modeling_utils import ModelMixin
22 |
23 |
24 | class StableUnCLIPImageNormalizer(ModelMixin, ConfigMixin):
25 | """
26 | This class is used to hold the mean and standard deviation of the CLIP embedder used in stable unCLIP.
27 |
28 | It is used to normalize the image embeddings before the noise is applied and un-normalize the noised image
29 | embeddings.
30 | """
31 |
32 | @register_to_config
33 | def __init__(
34 | self,
35 | embedding_dim: int = 768,
36 | ):
37 | super().__init__()
38 |
39 | self.mean = nn.Parameter(torch.zeros(1, embedding_dim))
40 | self.std = nn.Parameter(torch.ones(1, embedding_dim))
41 |
42 | def to(
43 | self,
44 | torch_device: Optional[Union[str, torch.device]] = None,
45 | torch_dtype: Optional[torch.dtype] = None,
46 | ):
47 | self.mean = nn.Parameter(self.mean.to(torch_device).to(torch_dtype))
48 | self.std = nn.Parameter(self.std.to(torch_device).to(torch_dtype))
49 | return self
50 |
51 | def scale(self, embeds):
52 | embeds = (embeds - self.mean) * 1.0 / self.std
53 | return embeds
54 |
55 | def unscale(self, embeds):
56 | embeds = (embeds * self.std) + self.mean
57 | return embeds
58 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/stable_diffusion_safe/__init__.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from enum import Enum
3 | from typing import List, Optional, Union
4 |
5 | import numpy as np
6 | import PIL
7 | from PIL import Image
8 |
9 | from ...utils import BaseOutput, is_torch_available, is_transformers_available
10 |
11 |
12 | @dataclass
13 | class SafetyConfig(object):
14 | WEAK = {
15 | "sld_warmup_steps": 15,
16 | "sld_guidance_scale": 20,
17 | "sld_threshold": 0.0,
18 | "sld_momentum_scale": 0.0,
19 | "sld_mom_beta": 0.0,
20 | }
21 | MEDIUM = {
22 | "sld_warmup_steps": 10,
23 | "sld_guidance_scale": 1000,
24 | "sld_threshold": 0.01,
25 | "sld_momentum_scale": 0.3,
26 | "sld_mom_beta": 0.4,
27 | }
28 | STRONG = {
29 | "sld_warmup_steps": 7,
30 | "sld_guidance_scale": 2000,
31 | "sld_threshold": 0.025,
32 | "sld_momentum_scale": 0.5,
33 | "sld_mom_beta": 0.7,
34 | }
35 | MAX = {
36 | "sld_warmup_steps": 0,
37 | "sld_guidance_scale": 5000,
38 | "sld_threshold": 1.0,
39 | "sld_momentum_scale": 0.5,
40 | "sld_mom_beta": 0.7,
41 | }
42 |
43 |
44 | @dataclass
45 | class StableDiffusionSafePipelineOutput(BaseOutput):
46 | """
47 | Output class for Safe Stable Diffusion pipelines.
48 |
49 | Args:
50 | images (`List[PIL.Image.Image]` or `np.ndarray`)
51 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
52 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
53 | nsfw_content_detected (`List[bool]`)
54 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work"
55 | (nsfw) content, or `None` if safety checking could not be performed.
56 | images (`List[PIL.Image.Image]` or `np.ndarray`)
57 | List of denoised PIL images that were flagged by the safety checker any may contain "not-safe-for-work"
58 | (nsfw) content, or `None` if no safety check was performed or no images were flagged.
59 | applied_safety_concept (`str`)
60 | The safety concept that was applied for safety guidance, or `None` if safety guidance was disabled
61 | """
62 |
63 | images: Union[List[PIL.Image.Image], np.ndarray]
64 | nsfw_content_detected: Optional[List[bool]]
65 | unsafe_images: Optional[Union[List[PIL.Image.Image], np.ndarray]]
66 | applied_safety_concept: Optional[str]
67 |
68 |
69 | if is_transformers_available() and is_torch_available():
70 | from .pipeline_stable_diffusion_safe import StableDiffusionPipelineSafe
71 | from .safety_checker import SafeStableDiffusionSafetyChecker
72 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/stochastic_karras_ve/__init__.py:
--------------------------------------------------------------------------------
1 | from .pipeline_stochastic_karras_ve import KarrasVePipeline
2 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/text_to_video_synthesis/__init__.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import List, Optional, Union
3 |
4 | import numpy as np
5 | import torch
6 |
7 | from ...utils import BaseOutput, OptionalDependencyNotAvailable, is_torch_available, is_transformers_available
8 |
9 |
10 | @dataclass
11 | class TextToVideoSDPipelineOutput(BaseOutput):
12 | """
13 | Output class for text to video pipelines.
14 |
15 | Args:
16 | frames (`List[np.ndarray]` or `torch.FloatTensor`)
17 | List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as
18 | a `torch` tensor. NumPy array present the denoised images of the diffusion pipeline. The length of the list
19 | denotes the video length i.e., the number of frames.
20 | """
21 |
22 | frames: Union[List[np.ndarray], torch.FloatTensor]
23 |
24 |
25 | try:
26 | if not (is_transformers_available() and is_torch_available()):
27 | raise OptionalDependencyNotAvailable()
28 | except OptionalDependencyNotAvailable:
29 | from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
30 | else:
31 | from .pipeline_text_to_video_synth import TextToVideoSDPipeline # noqa: F401
32 | from .pipeline_text_to_video_zero import TextToVideoZeroPipeline
33 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/unclip/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import (
2 | OptionalDependencyNotAvailable,
3 | is_torch_available,
4 | is_transformers_available,
5 | is_transformers_version,
6 | )
7 |
8 |
9 | try:
10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")):
11 | raise OptionalDependencyNotAvailable()
12 | except OptionalDependencyNotAvailable:
13 | from ...utils.dummy_torch_and_transformers_objects import UnCLIPImageVariationPipeline, UnCLIPPipeline
14 | else:
15 | from .pipeline_unclip import UnCLIPPipeline
16 | from .pipeline_unclip_image_variation import UnCLIPImageVariationPipeline
17 | from .text_proj import UnCLIPTextProjModel
18 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/versatile_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import (
2 | OptionalDependencyNotAvailable,
3 | is_torch_available,
4 | is_transformers_available,
5 | is_transformers_version,
6 | )
7 |
8 |
9 | try:
10 | if not (is_transformers_available() and is_torch_available() and is_transformers_version(">=", "4.25.0")):
11 | raise OptionalDependencyNotAvailable()
12 | except OptionalDependencyNotAvailable:
13 | from ...utils.dummy_torch_and_transformers_objects import (
14 | VersatileDiffusionDualGuidedPipeline,
15 | VersatileDiffusionImageVariationPipeline,
16 | VersatileDiffusionPipeline,
17 | VersatileDiffusionTextToImagePipeline,
18 | )
19 | else:
20 | from .modeling_text_unet import UNetFlatConditionModel
21 | from .pipeline_versatile_diffusion import VersatileDiffusionPipeline
22 | from .pipeline_versatile_diffusion_dual_guided import VersatileDiffusionDualGuidedPipeline
23 | from .pipeline_versatile_diffusion_image_variation import VersatileDiffusionImageVariationPipeline
24 | from .pipeline_versatile_diffusion_text_to_image import VersatileDiffusionTextToImagePipeline
25 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/pipelines/vq_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from ...utils import is_torch_available, is_transformers_available
2 |
3 |
4 | if is_transformers_available() and is_torch_available():
5 | from .pipeline_vq_diffusion import LearnedClassifierFreeSamplingEmbeddings, VQDiffusionPipeline
6 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/schedulers/README.md:
--------------------------------------------------------------------------------
1 | # Schedulers
2 |
3 | For more information on the schedulers, please refer to the [docs](https://huggingface.co/docs/diffusers/api/schedulers/overview).
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/accelerate_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Accelerate utilities: Utilities related to accelerate
16 | """
17 |
18 | from packaging import version
19 |
20 | from .import_utils import is_accelerate_available
21 |
22 |
23 | if is_accelerate_available():
24 | import accelerate
25 |
26 |
27 | def apply_forward_hook(method):
28 | """
29 | Decorator that applies a registered CpuOffload hook to an arbitrary function rather than `forward`. This is useful
30 | for cases where a PyTorch module provides functions other than `forward` that should trigger a move to the
31 | appropriate acceleration device. This is the case for `encode` and `decode` in [`AutoencoderKL`].
32 |
33 | This decorator looks inside the internal `_hf_hook` property to find a registered offload hook.
34 |
35 | :param method: The method to decorate. This method should be a method of a PyTorch module.
36 | """
37 | if not is_accelerate_available():
38 | return method
39 | accelerate_version = version.parse(accelerate.__version__).base_version
40 | if version.parse(accelerate_version) < version.parse("0.17.0"):
41 | return method
42 |
43 | def wrapper(self, *args, **kwargs):
44 | if hasattr(self, "_hf_hook") and hasattr(self._hf_hook, "pre_forward"):
45 | self._hf_hook.pre_forward(self)
46 | return method(self, *args, **kwargs)
47 |
48 | return wrapper
49 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/constants.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import os
15 |
16 | from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE, hf_cache_home
17 |
18 |
19 | default_cache_path = HUGGINGFACE_HUB_CACHE
20 |
21 |
22 | CONFIG_NAME = "config.json"
23 | WEIGHTS_NAME = "diffusion_pytorch_model.bin"
24 | FLAX_WEIGHTS_NAME = "diffusion_flax_model.msgpack"
25 | ONNX_WEIGHTS_NAME = "model.onnx"
26 | SAFETENSORS_WEIGHTS_NAME = "diffusion_pytorch_model.safetensors"
27 | ONNX_EXTERNAL_WEIGHTS_NAME = "weights.pb"
28 | HUGGINGFACE_CO_RESOLVE_ENDPOINT = "https://huggingface.co"
29 | DIFFUSERS_CACHE = default_cache_path
30 | DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules"
31 | HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules"))
32 | DEPRECATED_REVISION_ARGS = ["fp16", "non-ema"]
33 | TEXT_ENCODER_TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "out_proj"]
34 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/deprecation_utils.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import warnings
3 | from typing import Any, Dict, Optional, Union
4 |
5 | from packaging import version
6 |
7 |
8 | def deprecate(*args, take_from: Optional[Union[Dict, Any]] = None, standard_warn=True):
9 | from .. import __version__
10 |
11 | deprecated_kwargs = take_from
12 | values = ()
13 | if not isinstance(args[0], tuple):
14 | args = (args,)
15 |
16 | for attribute, version_name, message in args:
17 | if version.parse(version.parse(__version__).base_version) >= version.parse(version_name):
18 | raise ValueError(
19 | f"The deprecation tuple {(attribute, version_name, message)} should be removed since diffusers'"
20 | f" version {__version__} is >= {version_name}"
21 | )
22 |
23 | warning = None
24 | if isinstance(deprecated_kwargs, dict) and attribute in deprecated_kwargs:
25 | values += (deprecated_kwargs.pop(attribute),)
26 | warning = f"The `{attribute}` argument is deprecated and will be removed in version {version_name}."
27 | elif hasattr(deprecated_kwargs, attribute):
28 | values += (getattr(deprecated_kwargs, attribute),)
29 | warning = f"The `{attribute}` attribute is deprecated and will be removed in version {version_name}."
30 | elif deprecated_kwargs is None:
31 | warning = f"`{attribute}` is deprecated and will be removed in version {version_name}."
32 |
33 | if warning is not None:
34 | warning = warning + " " if standard_warn else ""
35 | warnings.warn(warning + message, FutureWarning, stacklevel=2)
36 |
37 | if isinstance(deprecated_kwargs, dict) and len(deprecated_kwargs) > 0:
38 | call_frame = inspect.getouterframes(inspect.currentframe())[1]
39 | filename = call_frame.filename
40 | line_number = call_frame.lineno
41 | function = call_frame.function
42 | key, value = next(iter(deprecated_kwargs.items()))
43 | raise TypeError(f"{function} in {filename} line {line_number-1} got an unexpected keyword argument `{key}`")
44 |
45 | if len(values) == 0:
46 | return
47 | elif len(values) == 1:
48 | return values[0]
49 | return values
50 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/doc_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Doc utilities: Utilities related to documentation
16 | """
17 | import re
18 |
19 |
20 | def replace_example_docstring(example_docstring):
21 | def docstring_decorator(fn):
22 | func_doc = fn.__doc__
23 | lines = func_doc.split("\n")
24 | i = 0
25 | while i < len(lines) and re.search(r"^\s*Examples?:\s*$", lines[i]) is None:
26 | i += 1
27 | if i < len(lines):
28 | lines[i] = example_docstring
29 | func_doc = "\n".join(lines)
30 | else:
31 | raise ValueError(
32 | f"The function {fn} should have an empty 'Examples:' in its docstring as placeholder, "
33 | f"current docstring is:\n{func_doc}"
34 | )
35 | fn.__doc__ = func_doc
36 | return fn
37 |
38 | return docstring_decorator
39 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/dummy_flax_and_transformers_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class FlaxStableDiffusionControlNetPipeline(metaclass=DummyObject):
6 | _backends = ["flax", "transformers"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["flax", "transformers"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["flax", "transformers"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["flax", "transformers"])
18 |
19 |
20 | class FlaxStableDiffusionImg2ImgPipeline(metaclass=DummyObject):
21 | _backends = ["flax", "transformers"]
22 |
23 | def __init__(self, *args, **kwargs):
24 | requires_backends(self, ["flax", "transformers"])
25 |
26 | @classmethod
27 | def from_config(cls, *args, **kwargs):
28 | requires_backends(cls, ["flax", "transformers"])
29 |
30 | @classmethod
31 | def from_pretrained(cls, *args, **kwargs):
32 | requires_backends(cls, ["flax", "transformers"])
33 |
34 |
35 | class FlaxStableDiffusionInpaintPipeline(metaclass=DummyObject):
36 | _backends = ["flax", "transformers"]
37 |
38 | def __init__(self, *args, **kwargs):
39 | requires_backends(self, ["flax", "transformers"])
40 |
41 | @classmethod
42 | def from_config(cls, *args, **kwargs):
43 | requires_backends(cls, ["flax", "transformers"])
44 |
45 | @classmethod
46 | def from_pretrained(cls, *args, **kwargs):
47 | requires_backends(cls, ["flax", "transformers"])
48 |
49 |
50 | class FlaxStableDiffusionPipeline(metaclass=DummyObject):
51 | _backends = ["flax", "transformers"]
52 |
53 | def __init__(self, *args, **kwargs):
54 | requires_backends(self, ["flax", "transformers"])
55 |
56 | @classmethod
57 | def from_config(cls, *args, **kwargs):
58 | requires_backends(cls, ["flax", "transformers"])
59 |
60 | @classmethod
61 | def from_pretrained(cls, *args, **kwargs):
62 | requires_backends(cls, ["flax", "transformers"])
63 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/dummy_note_seq_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class MidiProcessor(metaclass=DummyObject):
6 | _backends = ["note_seq"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["note_seq"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["note_seq"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["note_seq"])
18 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/dummy_onnx_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class OnnxRuntimeModel(metaclass=DummyObject):
6 | _backends = ["onnx"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["onnx"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["onnx"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["onnx"])
18 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/dummy_torch_and_librosa_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class AudioDiffusionPipeline(metaclass=DummyObject):
6 | _backends = ["torch", "librosa"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["torch", "librosa"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["torch", "librosa"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["torch", "librosa"])
18 |
19 |
20 | class Mel(metaclass=DummyObject):
21 | _backends = ["torch", "librosa"]
22 |
23 | def __init__(self, *args, **kwargs):
24 | requires_backends(self, ["torch", "librosa"])
25 |
26 | @classmethod
27 | def from_config(cls, *args, **kwargs):
28 | requires_backends(cls, ["torch", "librosa"])
29 |
30 | @classmethod
31 | def from_pretrained(cls, *args, **kwargs):
32 | requires_backends(cls, ["torch", "librosa"])
33 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/dummy_torch_and_scipy_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class LMSDiscreteScheduler(metaclass=DummyObject):
6 | _backends = ["torch", "scipy"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["torch", "scipy"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["torch", "scipy"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["torch", "scipy"])
18 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/dummy_torch_and_transformers_and_k_diffusion_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class StableDiffusionKDiffusionPipeline(metaclass=DummyObject):
6 | _backends = ["torch", "transformers", "k_diffusion"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["torch", "transformers", "k_diffusion"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["torch", "transformers", "k_diffusion"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["torch", "transformers", "k_diffusion"])
18 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/dummy_transformers_and_torch_and_note_seq_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | from ..utils import DummyObject, requires_backends
3 |
4 |
5 | class SpectrogramDiffusionPipeline(metaclass=DummyObject):
6 | _backends = ["transformers", "torch", "note_seq"]
7 |
8 | def __init__(self, *args, **kwargs):
9 | requires_backends(self, ["transformers", "torch", "note_seq"])
10 |
11 | @classmethod
12 | def from_config(cls, *args, **kwargs):
13 | requires_backends(cls, ["transformers", "torch", "note_seq"])
14 |
15 | @classmethod
16 | def from_pretrained(cls, *args, **kwargs):
17 | requires_backends(cls, ["transformers", "torch", "note_seq"])
18 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/model_card_template.md:
--------------------------------------------------------------------------------
1 | ---
2 | {{ card_data }}
3 | ---
4 |
5 |
7 |
8 | # {{ model_name | default("Diffusion Model") }}
9 |
10 | ## Model description
11 |
12 | This diffusion model is trained with the [🤗 Diffusers](https://github.com/huggingface/diffusers) library
13 | on the `{{ dataset_name }}` dataset.
14 |
15 | ## Intended uses & limitations
16 |
17 | #### How to use
18 |
19 | ```python
20 | # TODO: add an example code snippet for running this diffusion pipeline
21 | ```
22 |
23 | #### Limitations and bias
24 |
25 | [TODO: provide examples of latent issues and potential remediations]
26 |
27 | ## Training data
28 |
29 | [TODO: describe the data used to train the model]
30 |
31 | ### Training hyperparameters
32 |
33 | The following hyperparameters were used during training:
34 | - learning_rate: {{ learning_rate }}
35 | - train_batch_size: {{ train_batch_size }}
36 | - eval_batch_size: {{ eval_batch_size }}
37 | - gradient_accumulation_steps: {{ gradient_accumulation_steps }}
38 | - optimizer: AdamW with betas=({{ adam_beta1 }}, {{ adam_beta2 }}), weight_decay={{ adam_weight_decay }} and epsilon={{ adam_epsilon }}
39 | - lr_scheduler: {{ lr_scheduler }}
40 | - lr_warmup_steps: {{ lr_warmup_steps }}
41 | - ema_inv_gamma: {{ ema_inv_gamma }}
42 | - ema_inv_gamma: {{ ema_power }}
43 | - ema_inv_gamma: {{ ema_max_decay }}
44 | - mixed_precision: {{ mixed_precision }}
45 |
46 | ### Training results
47 |
48 | 📈 [TensorBoard logs](https://huggingface.co/{{ repo_name }}/tensorboard?#scalars)
49 |
50 |
51 |
--------------------------------------------------------------------------------
/diffusers/src/diffusers/utils/pil_utils.py:
--------------------------------------------------------------------------------
1 | import PIL.Image
2 | import PIL.ImageOps
3 | from packaging import version
4 |
5 |
6 | if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"):
7 | PIL_INTERPOLATION = {
8 | "linear": PIL.Image.Resampling.BILINEAR,
9 | "bilinear": PIL.Image.Resampling.BILINEAR,
10 | "bicubic": PIL.Image.Resampling.BICUBIC,
11 | "lanczos": PIL.Image.Resampling.LANCZOS,
12 | "nearest": PIL.Image.Resampling.NEAREST,
13 | }
14 | else:
15 | PIL_INTERPOLATION = {
16 | "linear": PIL.Image.LINEAR,
17 | "bilinear": PIL.Image.BILINEAR,
18 | "bicubic": PIL.Image.BICUBIC,
19 | "lanczos": PIL.Image.LANCZOS,
20 | "nearest": PIL.Image.NEAREST,
21 | }
22 |
--------------------------------------------------------------------------------
/diffusers/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/conftest.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # tests directory-specific settings - this file is run automatically
16 | # by pytest before any tests are run
17 |
18 | import sys
19 | import warnings
20 | from os.path import abspath, dirname, join
21 |
22 |
23 | # allow having multiple repository checkouts and not needing to remember to rerun
24 | # 'pip install -e .[dev]' when switching between checkouts and running tests.
25 | git_repo_path = abspath(join(dirname(dirname(__file__)), "src"))
26 | sys.path.insert(1, git_repo_path)
27 |
28 | # silence FutureWarning warnings in tests since often we can't act on them until
29 | # they become normal warnings - i.e. the tests still need to test the current functionality
30 | warnings.simplefilter(action="ignore", category=FutureWarning)
31 |
32 |
33 | def pytest_addoption(parser):
34 | from diffusers.utils.testing_utils import pytest_addoption_shared
35 |
36 | pytest_addoption_shared(parser)
37 |
38 |
39 | def pytest_terminal_summary(terminalreporter):
40 | from diffusers.utils.testing_utils import pytest_terminal_summary_main
41 |
42 | make_reports = terminalreporter.config.getoption("--make-reports")
43 | if make_reports:
44 | pytest_terminal_summary_main(terminalreporter, id=make_reports)
45 |
--------------------------------------------------------------------------------
/diffusers/tests/fixtures/elise_format0.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/fixtures/elise_format0.mid
--------------------------------------------------------------------------------
/diffusers/tests/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/models/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/models/test_attention_processor.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import torch
4 |
5 | from diffusers.models.attention_processor import Attention, AttnAddedKVProcessor
6 |
7 |
8 | class AttnAddedKVProcessorTests(unittest.TestCase):
9 | def get_constructor_arguments(self, only_cross_attention: bool = False):
10 | query_dim = 10
11 |
12 | if only_cross_attention:
13 | cross_attention_dim = 12
14 | else:
15 | # when only cross attention is not set, the cross attention dim must be the same as the query dim
16 | cross_attention_dim = query_dim
17 |
18 | return {
19 | "query_dim": query_dim,
20 | "cross_attention_dim": cross_attention_dim,
21 | "heads": 2,
22 | "dim_head": 4,
23 | "added_kv_proj_dim": 6,
24 | "norm_num_groups": 1,
25 | "only_cross_attention": only_cross_attention,
26 | "processor": AttnAddedKVProcessor(),
27 | }
28 |
29 | def get_forward_arguments(self, query_dim, added_kv_proj_dim):
30 | batch_size = 2
31 |
32 | hidden_states = torch.rand(batch_size, query_dim, 3, 2)
33 | encoder_hidden_states = torch.rand(batch_size, 4, added_kv_proj_dim)
34 | attention_mask = None
35 |
36 | return {
37 | "hidden_states": hidden_states,
38 | "encoder_hidden_states": encoder_hidden_states,
39 | "attention_mask": attention_mask,
40 | }
41 |
42 | def test_only_cross_attention(self):
43 | # self and cross attention
44 |
45 | torch.manual_seed(0)
46 |
47 | constructor_args = self.get_constructor_arguments(only_cross_attention=False)
48 | attn = Attention(**constructor_args)
49 |
50 | self.assertTrue(attn.to_k is not None)
51 | self.assertTrue(attn.to_v is not None)
52 |
53 | forward_args = self.get_forward_arguments(
54 | query_dim=constructor_args["query_dim"], added_kv_proj_dim=constructor_args["added_kv_proj_dim"]
55 | )
56 |
57 | self_and_cross_attn_out = attn(**forward_args)
58 |
59 | # only self attention
60 |
61 | torch.manual_seed(0)
62 |
63 | constructor_args = self.get_constructor_arguments(only_cross_attention=True)
64 | attn = Attention(**constructor_args)
65 |
66 | self.assertTrue(attn.to_k is None)
67 | self.assertTrue(attn.to_v is None)
68 |
69 | forward_args = self.get_forward_arguments(
70 | query_dim=constructor_args["query_dim"], added_kv_proj_dim=constructor_args["added_kv_proj_dim"]
71 | )
72 |
73 | only_cross_attn_out = attn(**forward_args)
74 |
75 | self.assertTrue((only_cross_attn_out != self_and_cross_attn_out).all())
76 |
--------------------------------------------------------------------------------
/diffusers/tests/models/test_modeling_common_flax.py:
--------------------------------------------------------------------------------
1 | import inspect
2 |
3 | from diffusers.utils import is_flax_available
4 | from diffusers.utils.testing_utils import require_flax
5 |
6 |
7 | if is_flax_available():
8 | import jax
9 |
10 |
11 | @require_flax
12 | class FlaxModelTesterMixin:
13 | def test_output(self):
14 | init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
15 |
16 | model = self.model_class(**init_dict)
17 | variables = model.init(inputs_dict["prng_key"], inputs_dict["sample"])
18 | jax.lax.stop_gradient(variables)
19 |
20 | output = model.apply(variables, inputs_dict["sample"])
21 |
22 | if isinstance(output, dict):
23 | output = output.sample
24 |
25 | self.assertIsNotNone(output)
26 | expected_shape = inputs_dict["sample"].shape
27 | self.assertEqual(output.shape, expected_shape, "Input and output shapes do not match")
28 |
29 | def test_forward_with_norm_groups(self):
30 | init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
31 |
32 | init_dict["norm_num_groups"] = 16
33 | init_dict["block_out_channels"] = (16, 32)
34 |
35 | model = self.model_class(**init_dict)
36 | variables = model.init(inputs_dict["prng_key"], inputs_dict["sample"])
37 | jax.lax.stop_gradient(variables)
38 |
39 | output = model.apply(variables, inputs_dict["sample"])
40 |
41 | if isinstance(output, dict):
42 | output = output.sample
43 |
44 | self.assertIsNotNone(output)
45 | expected_shape = inputs_dict["sample"].shape
46 | self.assertEqual(output.shape, expected_shape, "Input and output shapes do not match")
47 |
48 | def test_deprecated_kwargs(self):
49 | has_kwarg_in_model_class = "kwargs" in inspect.signature(self.model_class.__init__).parameters
50 | has_deprecated_kwarg = len(self.model_class._deprecated_kwargs) > 0
51 |
52 | if has_kwarg_in_model_class and not has_deprecated_kwarg:
53 | raise ValueError(
54 | f"{self.model_class} has `**kwargs` in its __init__ method but has not defined any deprecated kwargs"
55 | " under the `_deprecated_kwargs` class attribute. Make sure to either remove `**kwargs` if there are"
56 | " no deprecated arguments or add the deprecated argument with `_deprecated_kwargs ="
57 | " []`"
58 | )
59 |
60 | if not has_kwarg_in_model_class and has_deprecated_kwarg:
61 | raise ValueError(
62 | f"{self.model_class} doesn't have `**kwargs` in its __init__ method but has defined deprecated kwargs"
63 | " under the `_deprecated_kwargs` class attribute. Make sure to either add the `**kwargs` argument to"
64 | f" {self.model_class}.__init__ if there are deprecated arguments or remove the deprecated argument"
65 | " from `_deprecated_kwargs = []`"
66 | )
67 |
--------------------------------------------------------------------------------
/diffusers/tests/models/test_models_vae_flax.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from diffusers import FlaxAutoencoderKL
4 | from diffusers.utils import is_flax_available
5 | from diffusers.utils.testing_utils import require_flax
6 |
7 | from .test_modeling_common_flax import FlaxModelTesterMixin
8 |
9 |
10 | if is_flax_available():
11 | import jax
12 |
13 |
14 | @require_flax
15 | class FlaxAutoencoderKLTests(FlaxModelTesterMixin, unittest.TestCase):
16 | model_class = FlaxAutoencoderKL
17 |
18 | @property
19 | def dummy_input(self):
20 | batch_size = 4
21 | num_channels = 3
22 | sizes = (32, 32)
23 |
24 | prng_key = jax.random.PRNGKey(0)
25 | image = jax.random.uniform(prng_key, ((batch_size, num_channels) + sizes))
26 |
27 | return {"sample": image, "prng_key": prng_key}
28 |
29 | def prepare_init_args_and_inputs_for_common(self):
30 | init_dict = {
31 | "block_out_channels": [32, 64],
32 | "in_channels": 3,
33 | "out_channels": 3,
34 | "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
35 | "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
36 | "latent_channels": 4,
37 | }
38 | inputs_dict = self.dummy_input
39 | return init_dict, inputs_dict
40 |
--------------------------------------------------------------------------------
/diffusers/tests/others/test_hub_utils.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 HuggingFace Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | import unittest
16 | from pathlib import Path
17 | from tempfile import TemporaryDirectory
18 | from unittest.mock import Mock, patch
19 |
20 | import diffusers.utils.hub_utils
21 |
22 |
23 | class CreateModelCardTest(unittest.TestCase):
24 | @patch("diffusers.utils.hub_utils.get_full_repo_name")
25 | def test_create_model_card(self, repo_name_mock: Mock) -> None:
26 | repo_name_mock.return_value = "full_repo_name"
27 | with TemporaryDirectory() as tmpdir:
28 | # Dummy args values
29 | args = Mock()
30 | args.output_dir = tmpdir
31 | args.local_rank = 0
32 | args.hub_token = "hub_token"
33 | args.dataset_name = "dataset_name"
34 | args.learning_rate = 0.01
35 | args.train_batch_size = 100000
36 | args.eval_batch_size = 10000
37 | args.gradient_accumulation_steps = 0.01
38 | args.adam_beta1 = 0.02
39 | args.adam_beta2 = 0.03
40 | args.adam_weight_decay = 0.0005
41 | args.adam_epsilon = 0.000001
42 | args.lr_scheduler = 1
43 | args.lr_warmup_steps = 10
44 | args.ema_inv_gamma = 0.001
45 | args.ema_power = 0.1
46 | args.ema_max_decay = 0.2
47 | args.mixed_precision = True
48 |
49 | # Model card mush be rendered and saved
50 | diffusers.utils.hub_utils.create_model_card(args, model_name="model_name")
51 | self.assertTrue((Path(tmpdir) / "README.md").is_file())
52 |
--------------------------------------------------------------------------------
/diffusers/tests/others/test_outputs.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from dataclasses import dataclass
3 | from typing import List, Union
4 |
5 | import numpy as np
6 | import PIL.Image
7 |
8 | from diffusers.utils.outputs import BaseOutput
9 |
10 |
11 | @dataclass
12 | class CustomOutput(BaseOutput):
13 | images: Union[List[PIL.Image.Image], np.ndarray]
14 |
15 |
16 | class ConfigTester(unittest.TestCase):
17 | def test_outputs_single_attribute(self):
18 | outputs = CustomOutput(images=np.random.rand(1, 3, 4, 4))
19 |
20 | # check every way of getting the attribute
21 | assert isinstance(outputs.images, np.ndarray)
22 | assert outputs.images.shape == (1, 3, 4, 4)
23 | assert isinstance(outputs["images"], np.ndarray)
24 | assert outputs["images"].shape == (1, 3, 4, 4)
25 | assert isinstance(outputs[0], np.ndarray)
26 | assert outputs[0].shape == (1, 3, 4, 4)
27 |
28 | # test with a non-tensor attribute
29 | outputs = CustomOutput(images=[PIL.Image.new("RGB", (4, 4))])
30 |
31 | # check every way of getting the attribute
32 | assert isinstance(outputs.images, list)
33 | assert isinstance(outputs.images[0], PIL.Image.Image)
34 | assert isinstance(outputs["images"], list)
35 | assert isinstance(outputs["images"][0], PIL.Image.Image)
36 | assert isinstance(outputs[0], list)
37 | assert isinstance(outputs[0][0], PIL.Image.Image)
38 |
39 | def test_outputs_dict_init(self):
40 | # test output reinitialization with a `dict` for compatibility with `accelerate`
41 | outputs = CustomOutput({"images": np.random.rand(1, 3, 4, 4)})
42 |
43 | # check every way of getting the attribute
44 | assert isinstance(outputs.images, np.ndarray)
45 | assert outputs.images.shape == (1, 3, 4, 4)
46 | assert isinstance(outputs["images"], np.ndarray)
47 | assert outputs["images"].shape == (1, 3, 4, 4)
48 | assert isinstance(outputs[0], np.ndarray)
49 | assert outputs[0].shape == (1, 3, 4, 4)
50 |
51 | # test with a non-tensor attribute
52 | outputs = CustomOutput({"images": [PIL.Image.new("RGB", (4, 4))]})
53 |
54 | # check every way of getting the attribute
55 | assert isinstance(outputs.images, list)
56 | assert isinstance(outputs.images[0], PIL.Image.Image)
57 | assert isinstance(outputs["images"], list)
58 | assert isinstance(outputs["images"][0], PIL.Image.Image)
59 | assert isinstance(outputs[0], list)
60 | assert isinstance(outputs[0][0], PIL.Image.Image)
61 |
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/altdiffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/altdiffusion/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/audio_diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/audio_diffusion/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/audioldm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/audioldm/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/dance_diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/dance_diffusion/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/ddim/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/ddim/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/ddpm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/ddpm/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/dit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/dit/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/karras_ve/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/karras_ve/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/latent_diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/latent_diffusion/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/paint_by_example/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/paint_by_example/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/pndm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/pndm/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/repaint/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/repaint/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/score_sde_ve/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/score_sde_ve/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/semantic_stable_diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/semantic_stable_diffusion/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/spectrogram_diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/spectrogram_diffusion/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/stable_diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/stable_diffusion/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/stable_diffusion_2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/stable_diffusion_2/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/stable_diffusion_safe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/stable_diffusion_safe/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/stable_unclip/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/stable_unclip/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/test_pipelines_onnx_common.py:
--------------------------------------------------------------------------------
1 | from diffusers.utils.testing_utils import require_onnxruntime
2 |
3 |
4 | @require_onnxruntime
5 | class OnnxPipelineTesterMixin:
6 | """
7 | This mixin is designed to be used with unittest.TestCase classes.
8 | It provides a set of common tests for each ONNXRuntime pipeline, e.g. saving and loading the pipeline,
9 | equivalence of dict and tuple outputs, etc.
10 | """
11 |
12 | pass
13 |
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/text_to_video/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/text_to_video/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/text_to_video/test_text_to_video_zero.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 HuggingFace Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import unittest
17 |
18 | import torch
19 |
20 | from diffusers import DDIMScheduler, TextToVideoZeroPipeline
21 | from diffusers.utils import load_pt, require_torch_gpu, slow
22 |
23 | from ..test_pipelines_common import assert_mean_pixel_difference
24 |
25 |
26 | @slow
27 | @require_torch_gpu
28 | class TextToVideoZeroPipelineSlowTests(unittest.TestCase):
29 | def test_full_model(self):
30 | model_id = "runwayml/stable-diffusion-v1-5"
31 | pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
32 | pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
33 | generator = torch.Generator(device="cuda").manual_seed(0)
34 |
35 | prompt = "A bear is playing a guitar on Times Square"
36 | result = pipe(prompt=prompt, generator=generator).images
37 |
38 | expected_result = load_pt(
39 | "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text-to-video/A bear is playing a guitar on Times Square.pt"
40 | )
41 |
42 | assert_mean_pixel_difference(result, expected_result)
43 |
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/unclip/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/unclip/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/versatile_diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/versatile_diffusion/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/versatile_diffusion/test_versatile_diffusion_image_variation.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 HuggingFace Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import unittest
17 |
18 | import numpy as np
19 | import torch
20 |
21 | from diffusers import VersatileDiffusionImageVariationPipeline
22 | from diffusers.utils.testing_utils import load_image, require_torch_gpu, slow, torch_device
23 |
24 |
25 | torch.backends.cuda.matmul.allow_tf32 = False
26 |
27 |
28 | class VersatileDiffusionImageVariationPipelineFastTests(unittest.TestCase):
29 | pass
30 |
31 |
32 | @slow
33 | @require_torch_gpu
34 | class VersatileDiffusionImageVariationPipelineIntegrationTests(unittest.TestCase):
35 | def test_inference_image_variations(self):
36 | pipe = VersatileDiffusionImageVariationPipeline.from_pretrained("shi-labs/versatile-diffusion")
37 | pipe.to(torch_device)
38 | pipe.set_progress_bar_config(disable=None)
39 |
40 | image_prompt = load_image(
41 | "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/versatile_diffusion/benz.jpg"
42 | )
43 | generator = torch.manual_seed(0)
44 | image = pipe(
45 | image=image_prompt,
46 | generator=generator,
47 | guidance_scale=7.5,
48 | num_inference_steps=50,
49 | output_type="numpy",
50 | ).images
51 |
52 | image_slice = image[0, 253:256, 253:256, -1]
53 |
54 | assert image.shape == (1, 512, 512, 3)
55 | expected_slice = np.array([0.0441, 0.0469, 0.0507, 0.0575, 0.0632, 0.0650, 0.0865, 0.0909, 0.0945])
56 |
57 | assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
58 |
--------------------------------------------------------------------------------
/diffusers/tests/pipelines/vq_diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/pipelines/vq_diffusion/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/schedulers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/diffusers/tests/schedulers/__init__.py
--------------------------------------------------------------------------------
/diffusers/tests/schedulers/test_scheduler_vq_diffusion.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 |
4 | from diffusers import VQDiffusionScheduler
5 |
6 | from .test_schedulers import SchedulerCommonTest
7 |
8 |
9 | class VQDiffusionSchedulerTest(SchedulerCommonTest):
10 | scheduler_classes = (VQDiffusionScheduler,)
11 |
12 | def get_scheduler_config(self, **kwargs):
13 | config = {
14 | "num_vec_classes": 4097,
15 | "num_train_timesteps": 100,
16 | }
17 |
18 | config.update(**kwargs)
19 | return config
20 |
21 | def dummy_sample(self, num_vec_classes):
22 | batch_size = 4
23 | height = 8
24 | width = 8
25 |
26 | sample = torch.randint(0, num_vec_classes, (batch_size, height * width))
27 |
28 | return sample
29 |
30 | @property
31 | def dummy_sample_deter(self):
32 | assert False
33 |
34 | def dummy_model(self, num_vec_classes):
35 | def model(sample, t, *args):
36 | batch_size, num_latent_pixels = sample.shape
37 | logits = torch.rand((batch_size, num_vec_classes - 1, num_latent_pixels))
38 | return_value = F.log_softmax(logits.double(), dim=1).float()
39 | return return_value
40 |
41 | return model
42 |
43 | def test_timesteps(self):
44 | for timesteps in [2, 5, 100, 1000]:
45 | self.check_over_configs(num_train_timesteps=timesteps)
46 |
47 | def test_num_vec_classes(self):
48 | for num_vec_classes in [5, 100, 1000, 4000]:
49 | self.check_over_configs(num_vec_classes=num_vec_classes)
50 |
51 | def test_time_indices(self):
52 | for t in [0, 50, 99]:
53 | self.check_over_forward(time_step=t)
54 |
55 | def test_add_noise_device(self):
56 | pass
57 |
--------------------------------------------------------------------------------
/diffusers/utils/get_modified_files.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The HuggingFace Inc. team.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # this script reports modified .py files under the desired list of top-level sub-dirs passed as a list of arguments, e.g.:
17 | # python ./utils/get_modified_files.py utils src tests examples
18 | #
19 | # it uses git to find the forking point and which files were modified - i.e. files not under git won't be considered
20 | # since the output of this script is fed into Makefile commands it doesn't print a newline after the results
21 |
22 | import re
23 | import subprocess
24 | import sys
25 |
26 |
27 | fork_point_sha = subprocess.check_output("git merge-base main HEAD".split()).decode("utf-8")
28 | modified_files = subprocess.check_output(f"git diff --name-only {fork_point_sha}".split()).decode("utf-8").split()
29 |
30 | joined_dirs = "|".join(sys.argv[1:])
31 | regex = re.compile(rf"^({joined_dirs}).*?\.py$")
32 |
33 | relevant_modified_files = [x for x in modified_files if regex.match(x)]
34 | print(" ".join(relevant_modified_files), end="")
35 |
--------------------------------------------------------------------------------
/diffusers/utils/print_env.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # coding=utf-8
4 | # Copyright 2023 The HuggingFace Inc. team.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | # this script dumps information about the environment
19 |
20 | import os
21 | import platform
22 | import sys
23 |
24 |
25 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
26 |
27 | print("Python version:", sys.version)
28 |
29 | print("OS platform:", platform.platform())
30 | print("OS architecture:", platform.machine())
31 |
32 | try:
33 | import torch
34 |
35 | print("Torch version:", torch.__version__)
36 | print("Cuda available:", torch.cuda.is_available())
37 | print("Cuda version:", torch.version.cuda)
38 | print("CuDNN version:", torch.backends.cudnn.version())
39 | print("Number of GPUs available:", torch.cuda.device_count())
40 | except ImportError:
41 | print("Torch version:", None)
42 |
43 | try:
44 | import transformers
45 |
46 | print("transformers version:", transformers.__version__)
47 | except ImportError:
48 | print("transformers version:", None)
49 |
--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
1 | pip install pyarrow==6.0.1
2 | # pip install accelerate comet-ml matplotlib datasets tqdm tensorboard tensorboardX torchvision tensorflow-datasets einops pytorch-fid joblib PyYAML kaggle wandb torchsummary torchinfo
3 | pip install -r requirements.txt
4 |
5 | cd diffusers
6 | pip install .
7 | cd ..
8 |
9 | mkdir measure
10 | mkdir datasets
11 | mkdir measure/CELEBA-HQ
12 | mkdir measure/CIFAR10
13 | mkdir datasets/celeba_hq_256
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate==0.11.0
2 | comet-ml==3.31.12
3 | matplotlib==3.3.0
4 | datasets==2.5.2
5 | tqdm==4.64.1
6 | tensorboard==2.10.1
7 | tensorboardX==2.5.1
8 | torchvision==0.11.2+cu111
9 | tensorflow-datasets==4.6.0
10 | einops==0.4.1
11 | pytorch-fid==0.2.1
12 | joblib==1.1.0
13 | PyYAML==6.0
14 | kaggle==1.5.12
15 | wandb==0.13.4
16 | torchsummary==1.5.1
17 | torchinfo==1.7.1
18 |
--------------------------------------------------------------------------------
/static/cat_wo_bg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/static/cat_wo_bg.png
--------------------------------------------------------------------------------
/static/fedora-hat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/static/fedora-hat.png
--------------------------------------------------------------------------------
/static/glasses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/static/glasses.png
--------------------------------------------------------------------------------
/static/hat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/static/hat.png
--------------------------------------------------------------------------------
/static/stop_sign_bg_blk.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/static/stop_sign_bg_blk.jpg
--------------------------------------------------------------------------------
/static/stop_sign_bg_w.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/static/stop_sign_bg_w.jpg
--------------------------------------------------------------------------------
/static/stop_sign_wo_bg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/BadDiffusion/c806a7fe8dc3420ff095d8729cd7a63a3b87b969/static/stop_sign_wo_bg.png
--------------------------------------------------------------------------------