├── tests ├── __init__.py ├── models │ ├── __init__.py │ ├── test_models_vae_flax.py │ ├── test_models_unet_1d.py │ └── test_models_vq.py ├── pipelines │ ├── __init__.py │ ├── ddim │ │ └── __init__.py │ ├── ddpm │ │ ├── __init__.py │ │ └── test_ddpm.py │ ├── pndm │ │ ├── __init__.py │ │ └── test_pndm.py │ ├── karras_ve │ │ ├── __init__.py │ │ └── test_karras_ve.py │ ├── score_sde_ve │ │ ├── __init__.py │ │ └── test_score_sde_ve.py │ ├── latent_diffusion │ │ └── __init__.py │ └── stable_diffusion │ │ ├── __init__.py │ │ ├── test_onnx_stable_diffusion_img2img.py │ │ ├── test_onnx_stable_diffusion_inpaint.py │ │ └── test_onnx_stable_diffusion.py ├── test_pipelines_common.py ├── test_pipelines_onnx_common.py ├── test_modeling_common_flax.py ├── conftest.py ├── test_outputs.py └── test_training.py ├── scripts ├── __init__.py ├── conversion_ldm_uncond.py └── change_naming_configs_and_checkpoints.py ├── pyproject.toml ├── MANIFEST.in ├── examples ├── textual_inversion │ ├── requirements.txt │ └── requirements_flax.txt ├── unconditional_image_generation │ └── requirements.txt ├── dreambooth │ ├── requirements.txt │ └── requirements_flax.txt ├── text_to_image │ ├── requirements.txt │ └── requirements_flax.txt ├── inference │ ├── image_to_image.py │ ├── inpainting.py │ └── README.md ├── community │ └── one_step_unet.py └── conftest.py ├── src └── diffusers │ ├── pipelines │ ├── ddim │ │ └── __init__.py │ ├── ddpm │ │ ├── __init__.py │ │ └── pipeline_ddpm.py │ ├── pndm │ │ ├── __init__.py │ │ └── pipeline_pndm.py │ ├── score_sde_ve │ │ ├── __init__.py │ │ └── pipeline_score_sde_ve.py │ ├── dance_diffusion │ │ └── __init__.py │ ├── latent_diffusion_uncond │ │ ├── __init__.py │ │ └── pipeline_latent_diffusion_uncond.py │ ├── stochastic_karras_ve │ │ └── __init__.py │ ├── latent_diffusion │ │ └── __init__.py │ ├── __init__.py │ └── stable_diffusion │ │ ├── __init__.py │ │ └── safety_checker_flax.py │ ├── models │ ├── README.md │ ├── __init__.py │ ├── embeddings_flax.py │ └── resnet_flax.py │ ├── schedulers │ ├── README.md │ ├── scheduling_utils.py │ ├── scheduling_utils_flax.py │ ├── __init__.py │ └── scheduling_sde_vp.py │ ├── utils │ ├── dummy_torch_and_scipy_objects.py │ ├── dummy_flax_and_transformers_objects.py │ ├── model_card_template.md │ ├── deprecation_utils.py │ ├── __init__.py │ ├── dummy_torch_and_transformers_and_onnx_objects.py │ ├── dummy_torch_and_transformers_objects.py │ └── outputs.py │ ├── commands │ ├── __init__.py │ ├── diffusers_cli.py │ └── env.py │ ├── dependency_versions_table.py │ ├── dependency_versions_check.py │ └── __init__.py ├── docs └── source │ ├── imgs │ └── diffusers_library.jpg │ ├── optimization │ ├── open_vino.mdx │ ├── onnx.mdx │ └── mps.mdx │ ├── conceptual │ ├── stable_diffusion.mdx │ └── philosophy.mdx │ ├── using-diffusers │ ├── configuration.mdx │ ├── loading.mdx │ ├── img2img.mdx │ ├── unconditional_image_generation.mdx │ ├── conditional_image_generation.mdx │ └── inpaint.mdx │ ├── api │ ├── configuration.mdx │ ├── pipelines │ │ ├── dance_diffusion.mdx │ │ ├── ddim.mdx │ │ ├── ddpm.mdx │ │ ├── stochastic_karras_ve.mdx │ │ ├── pndm.mdx │ │ ├── latent_diffusion_uncond.mdx │ │ ├── latent_diffusion.mdx │ │ └── score_sde_ve.mdx │ ├── diffusion_pipeline.mdx │ ├── outputs.mdx │ ├── models.mdx │ └── logging.mdx │ ├── _toctree.yml │ └── installation.mdx ├── .github ├── workflows │ ├── typos.yml │ ├── delete_doc_comment.yml │ ├── build_documentation.yml │ ├── build_pr_documentation.yml │ ├── stale.yml │ ├── pr_quality.yml │ ├── push_tests.yml │ └── pr_tests.yml └── ISSUE_TEMPLATE │ ├── feedback.md │ ├── config.yml │ ├── feature_request.md │ ├── new-model-addition.yml │ └── bug-report.yml ├── _typos.toml ├── setup.cfg ├── utils ├── print_env.py ├── get_modified_files.py ├── stale.py └── check_config_docstrings.py ├── .gitignore └── Makefile /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipelines/ddim/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipelines/ddpm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipelines/pndm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipelines/karras_ve/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipelines/score_sde_ve/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipelines/latent_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipelines/stable_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 119 3 | target-version = ['py36'] 4 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include src/diffusers/utils/model_card_template.md 3 | -------------------------------------------------------------------------------- /examples/textual_inversion/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers>=4.21.0 4 | -------------------------------------------------------------------------------- /examples/unconditional_image_generation/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | datasets 4 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/ddim/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_ddim import DDIMPipeline 3 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/ddpm/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_ddpm import DDPMPipeline 3 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/pndm/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_pndm import PNDMPipeline 3 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/score_sde_ve/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_score_sde_ve import ScoreSdeVePipeline 3 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/dance_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_dance_diffusion import DanceDiffusionPipeline 3 | -------------------------------------------------------------------------------- /docs/source/imgs/diffusers_library.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CrazyBoyM/diffusers_dreambooth/main/docs/source/imgs/diffusers_library.jpg -------------------------------------------------------------------------------- /src/diffusers/pipelines/latent_diffusion_uncond/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_latent_diffusion_uncond import LDMPipeline 3 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/stochastic_karras_ve/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_stochastic_karras_ve import KarrasVePipeline 3 | -------------------------------------------------------------------------------- /examples/dreambooth/requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers>==0.5.0 2 | accelerate 3 | torchvision 4 | transformers>=4.21.0 5 | ftfy 6 | tensorboard 7 | modelcards -------------------------------------------------------------------------------- /examples/text_to_image/requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers==0.4.1 2 | accelerate 3 | torchvision 4 | transformers>=4.21.0 5 | ftfy 6 | tensorboard 7 | modelcards -------------------------------------------------------------------------------- /src/diffusers/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | 3 | For more detail on the models, please refer to the [docs](https://huggingface.co/docs/diffusers/api/models). -------------------------------------------------------------------------------- /examples/dreambooth/requirements_flax.txt: -------------------------------------------------------------------------------- 1 | diffusers>==0.5.1 2 | transformers>=4.21.0 3 | flax 4 | optax 5 | torch 6 | torchvision 7 | ftfy 8 | tensorboard 9 | modelcards -------------------------------------------------------------------------------- /examples/text_to_image/requirements_flax.txt: -------------------------------------------------------------------------------- 1 | diffusers>==0.5.1 2 | transformers>=4.21.0 3 | flax 4 | optax 5 | torch 6 | torchvision 7 | ftfy 8 | tensorboard 9 | modelcards -------------------------------------------------------------------------------- /examples/textual_inversion/requirements_flax.txt: -------------------------------------------------------------------------------- 1 | diffusers>==0.5.1 2 | transformers>=4.21.0 3 | flax 4 | optax 5 | torch 6 | torchvision 7 | ftfy 8 | tensorboard 9 | modelcards -------------------------------------------------------------------------------- /src/diffusers/schedulers/README.md: -------------------------------------------------------------------------------- 1 | # Schedulers 2 | 3 | For more information on the schedulers, please refer to the [docs](https://huggingface.co/docs/diffusers/api/schedulers). -------------------------------------------------------------------------------- /src/diffusers/pipelines/latent_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from ...utils import is_transformers_available 3 | 4 | 5 | if is_transformers_available(): 6 | from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline 7 | -------------------------------------------------------------------------------- /.github/workflows/typos.yml: -------------------------------------------------------------------------------- 1 | name: Check typos 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v3 12 | 13 | - name: typos-action 14 | uses: crate-ci/typos@v1.12.4 15 | -------------------------------------------------------------------------------- /examples/inference/image_to_image.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from diffusers import StableDiffusionImg2ImgPipeline # noqa F401 4 | 5 | 6 | warnings.warn( 7 | "The `image_to_image.py` script is outdated. Please use directly `from diffusers import" 8 | " StableDiffusionImg2ImgPipeline` instead." 9 | ) 10 | -------------------------------------------------------------------------------- /examples/inference/inpainting.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from diffusers import StableDiffusionInpaintPipeline as StableDiffusionInpaintPipeline # noqa F401 4 | 5 | 6 | warnings.warn( 7 | "The `inpainting.py` script is outdated. Please use directly `from diffusers import" 8 | " StableDiffusionInpaintPipeline` instead." 9 | ) 10 | -------------------------------------------------------------------------------- /.github/workflows/delete_doc_comment.yml: -------------------------------------------------------------------------------- 1 | name: Delete dev documentation 2 | 3 | on: 4 | pull_request: 5 | types: [ closed ] 6 | 7 | 8 | jobs: 9 | delete: 10 | uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main 11 | with: 12 | pr_number: ${{ github.event.number }} 13 | package: diffusers 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feedback.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "💬 Feedback about API Design" 3 | about: Give feedback about the current API design 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What API design would you like to have changed or added to the library? Why?** 11 | 12 | **What use case would this enable or better enable? Can you give us a code example?** 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | contact_links: 2 | - name: Forum 3 | url: https://discuss.huggingface.co/c/discussion-related-to-httpsgithubcomhuggingfacediffusers/63 4 | about: General usage questions and community discussions 5 | - name: Blank issue 6 | url: https://github.com/huggingface/diffusers/issues/new 7 | about: Please note that the Forum is in most places the right place for discussions 8 | -------------------------------------------------------------------------------- /tests/test_pipelines_common.py: -------------------------------------------------------------------------------- 1 | from diffusers.utils.testing_utils import require_torch 2 | 3 | 4 | @require_torch 5 | class PipelineTesterMixin: 6 | """ 7 | This mixin is designed to be used with unittest.TestCase classes. 8 | It provides a set of common tests for each PyTorch pipeline, e.g. saving and loading the pipeline, 9 | equivalence of dict and tuple outputs, etc. 10 | """ 11 | 12 | pass 13 | -------------------------------------------------------------------------------- /tests/test_pipelines_onnx_common.py: -------------------------------------------------------------------------------- 1 | from diffusers.utils.testing_utils import require_onnxruntime 2 | 3 | 4 | @require_onnxruntime 5 | class OnnxPipelineTesterMixin: 6 | """ 7 | This mixin is designed to be used with unittest.TestCase classes. 8 | It provides a set of common tests for each ONNXRuntime pipeline, e.g. saving and loading the pipeline, 9 | equivalence of dict and tuple outputs, etc. 10 | """ 11 | 12 | pass 13 | -------------------------------------------------------------------------------- /.github/workflows/build_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - doc-builder* 8 | - v*-release 9 | 10 | jobs: 11 | build: 12 | uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main 13 | with: 14 | commit_sha: ${{ github.sha }} 15 | package: diffusers 16 | secrets: 17 | token: ${{ secrets.HUGGINGFACE_PUSH }} 18 | -------------------------------------------------------------------------------- /_typos.toml: -------------------------------------------------------------------------------- 1 | # Files for typos 2 | # Instruction: https://github.com/marketplace/actions/typos-action#getting-started 3 | 4 | [default.extend-identifiers] 5 | 6 | [default.extend-words] 7 | NIN="NIN" # NIN is used in scripts/convert_ncsnpp_original_checkpoint_to_diffusers.py 8 | nd="np" # nd may be np (numpy) 9 | parms="parms" # parms is used in scripts/convert_original_stable_diffusion_to_diffusers.py 10 | 11 | 12 | [files] 13 | extend-exclude = ["_typos.toml"] 14 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | default_section = FIRSTPARTY 3 | ensure_newline_before_comments = True 4 | force_grid_wrap = 0 5 | include_trailing_comma = True 6 | known_first_party = accelerate 7 | known_third_party = 8 | numpy 9 | torch 10 | torch_xla 11 | 12 | line_length = 119 13 | lines_after_imports = 2 14 | multi_line_output = 3 15 | use_parentheses = True 16 | 17 | [flake8] 18 | ignore = E203, E722, E501, E741, W503, W605 19 | max-line-length = 119 20 | per-file-ignores = __init__.py:F401 21 | -------------------------------------------------------------------------------- /.github/workflows/build_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build PR Documentation 2 | 3 | on: 4 | pull_request: 5 | 6 | concurrency: 7 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 8 | cancel-in-progress: true 9 | 10 | jobs: 11 | build: 12 | uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main 13 | with: 14 | commit_sha: ${{ github.event.pull_request.head.sha }} 15 | pr_number: ${{ github.event.number }} 16 | package: diffusers 17 | -------------------------------------------------------------------------------- /src/diffusers/utils/dummy_torch_and_scipy_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | # flake8: noqa 3 | 4 | from ..utils import DummyObject, requires_backends 5 | 6 | 7 | class LMSDiscreteScheduler(metaclass=DummyObject): 8 | _backends = ["torch", "scipy"] 9 | 10 | def __init__(self, *args, **kwargs): 11 | requires_backends(self, ["torch", "scipy"]) 12 | 13 | @classmethod 14 | def from_config(cls, *args, **kwargs): 15 | requires_backends(cls, ["torch", "scipy"]) 16 | 17 | @classmethod 18 | def from_pretrained(cls, *args, **kwargs): 19 | requires_backends(cls, ["torch", "scipy"]) 20 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Stale Bot 2 | 3 | on: 4 | schedule: 5 | - cron: "0 15 * * *" 6 | 7 | jobs: 8 | close_stale_issues: 9 | name: Close Stale Issues 10 | if: github.repository == 'huggingface/diffusers' 11 | runs-on: ubuntu-latest 12 | env: 13 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 14 | steps: 15 | - uses: actions/checkout@v2 16 | 17 | - name: Setup Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: 3.7 21 | 22 | - name: Install requirements 23 | run: | 24 | pip install PyGithub 25 | - name: Close stale issues 26 | run: | 27 | python utils/stale.py 28 | -------------------------------------------------------------------------------- /docs/source/optimization/open_vino.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # OpenVINO 14 | 15 | Under construction 🚧 16 | -------------------------------------------------------------------------------- /src/diffusers/utils/dummy_flax_and_transformers_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | # flake8: noqa 3 | 4 | from ..utils import DummyObject, requires_backends 5 | 6 | 7 | class FlaxStableDiffusionPipeline(metaclass=DummyObject): 8 | _backends = ["flax", "transformers"] 9 | 10 | def __init__(self, *args, **kwargs): 11 | requires_backends(self, ["flax", "transformers"]) 12 | 13 | @classmethod 14 | def from_config(cls, *args, **kwargs): 15 | requires_backends(cls, ["flax", "transformers"]) 16 | 17 | @classmethod 18 | def from_pretrained(cls, *args, **kwargs): 19 | requires_backends(cls, ["flax", "transformers"]) 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680 Feature request" 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /examples/community/one_step_unet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import torch 3 | 4 | from diffusers import DiffusionPipeline 5 | 6 | 7 | class UnetSchedulerOneForwardPipeline(DiffusionPipeline): 8 | def __init__(self, unet, scheduler): 9 | super().__init__() 10 | 11 | self.register_modules(unet=unet, scheduler=scheduler) 12 | 13 | def __call__(self): 14 | image = torch.randn( 15 | (1, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), 16 | ) 17 | timestep = 1 18 | 19 | model_output = self.unet(image, timestep).sample 20 | scheduler_output = self.scheduler.step(model_output, timestep, image).prev_sample 21 | 22 | return scheduler_output 23 | -------------------------------------------------------------------------------- /docs/source/conceptual/stable_diffusion.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Stable Diffusion 14 | 15 | Please visit this [very in-detail blog post](https://huggingface.co/blog/stable_diffusion) on Stable Diffusion! 16 | -------------------------------------------------------------------------------- /examples/inference/README.md: -------------------------------------------------------------------------------- 1 | # Inference Examples 2 | 3 | **The inference examples folder is deprecated and will be removed in a future version**. 4 | **Officially supported inference examples can be found in the [Pipelines folder](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines)**. 5 | 6 | - For `Image-to-Image text-guided generation with Stable Diffusion`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples) 7 | - For `In-painting using Stable Diffusion`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples) 8 | - For `Tweak prompts reusing seeds and latents`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples) 9 | -------------------------------------------------------------------------------- /docs/source/using-diffusers/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | 14 | 15 | # Configuration 16 | 17 | The handling of configurations in Diffusers is with the `ConfigMixin` class. 18 | 19 | [[autodoc]] ConfigMixin 20 | 21 | Under further construction 🚧, open a [PR](https://github.com/huggingface/diffusers/compare) if you want to contribute! 22 | -------------------------------------------------------------------------------- /src/diffusers/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from argparse import ArgumentParser 17 | 18 | 19 | class BaseDiffusersCLICommand(ABC): 20 | @staticmethod 21 | @abstractmethod 22 | def register_subcommand(parser: ArgumentParser): 23 | raise NotImplementedError() 24 | 25 | @abstractmethod 26 | def run(self): 27 | raise NotImplementedError() 28 | -------------------------------------------------------------------------------- /docs/source/api/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Configuration 14 | 15 | In Diffusers, schedulers of type [`schedulers.scheduling_utils.SchedulerMixin`], and models of type [`ModelMixin`] inherit from [`ConfigMixin`] which conveniently takes care of storing all parameters that are 16 | passed to the respective `__init__` methods in a JSON-configuration file. 17 | 18 | TODO(PVP) - add example and better info here 19 | 20 | ## ConfigMixin 21 | [[autodoc]] ConfigMixin 22 | - from_config 23 | - save_config 24 | -------------------------------------------------------------------------------- /src/diffusers/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ..utils import is_flax_available, is_torch_available 16 | 17 | 18 | if is_torch_available(): 19 | from .unet_1d import UNet1DModel 20 | from .unet_2d import UNet2DModel 21 | from .unet_2d_condition import UNet2DConditionModel 22 | from .vae import AutoencoderKL, VQModel 23 | 24 | if is_flax_available(): 25 | from .unet_2d_condition_flax import FlaxUNet2DConditionModel 26 | from .vae_flax import FlaxAutoencoderKL 27 | -------------------------------------------------------------------------------- /src/diffusers/dependency_versions_table.py: -------------------------------------------------------------------------------- 1 | # THIS FILE HAS BEEN AUTOGENERATED. To update: 2 | # 1. modify the `_deps` dict in setup.py 3 | # 2. run `make deps_table_update`` 4 | deps = { 5 | "Pillow": "Pillow<10.0", 6 | "accelerate": "accelerate>=0.11.0", 7 | "black": "black==22.8", 8 | "datasets": "datasets", 9 | "filelock": "filelock", 10 | "flake8": "flake8>=3.8.3", 11 | "flax": "flax>=0.4.1", 12 | "hf-doc-builder": "hf-doc-builder>=0.3.0", 13 | "huggingface-hub": "huggingface-hub>=0.10.0", 14 | "importlib_metadata": "importlib_metadata", 15 | "isort": "isort>=5.5.4", 16 | "jax": "jax>=0.2.8,!=0.3.2,<=0.3.6", 17 | "jaxlib": "jaxlib>=0.1.65,<=0.3.6", 18 | "modelcards": "modelcards>=0.1.4", 19 | "numpy": "numpy", 20 | "onnxruntime": "onnxruntime", 21 | "parameterized": "parameterized", 22 | "pytest": "pytest", 23 | "pytest-timeout": "pytest-timeout", 24 | "pytest-xdist": "pytest-xdist", 25 | "scipy": "scipy", 26 | "regex": "regex!=2019.12.17", 27 | "requests": "requests", 28 | "tensorboard": "tensorboard", 29 | "torch": "torch>=1.4", 30 | "torchvision": "torchvision", 31 | "transformers": "transformers>=4.21.0", 32 | } 33 | -------------------------------------------------------------------------------- /tests/models/test_models_vae_flax.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from diffusers import FlaxAutoencoderKL 4 | from diffusers.utils import is_flax_available 5 | from diffusers.utils.testing_utils import require_flax 6 | 7 | from ..test_modeling_common_flax import FlaxModelTesterMixin 8 | 9 | 10 | if is_flax_available(): 11 | import jax 12 | 13 | 14 | @require_flax 15 | class FlaxAutoencoderKLTests(FlaxModelTesterMixin, unittest.TestCase): 16 | model_class = FlaxAutoencoderKL 17 | 18 | @property 19 | def dummy_input(self): 20 | batch_size = 4 21 | num_channels = 3 22 | sizes = (32, 32) 23 | 24 | prng_key = jax.random.PRNGKey(0) 25 | image = jax.random.uniform(prng_key, ((batch_size, num_channels) + sizes)) 26 | 27 | return {"sample": image, "prng_key": prng_key} 28 | 29 | def prepare_init_args_and_inputs_for_common(self): 30 | init_dict = { 31 | "block_out_channels": [32, 64], 32 | "in_channels": 3, 33 | "out_channels": 3, 34 | "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"], 35 | "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"], 36 | "latent_channels": 4, 37 | } 38 | inputs_dict = self.dummy_input 39 | return init_dict, inputs_dict 40 | -------------------------------------------------------------------------------- /docs/source/using-diffusers/loading.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Loading 14 | 15 | The core functionality for saving and loading systems in `Diffusers` is the HuggingFace Hub. 16 | 17 | [[autodoc]] modeling_utils.ModelMixin 18 | - from_pretrained 19 | - save_pretrained 20 | 21 | [[autodoc]] pipeline_utils.DiffusionPipeline 22 | - from_pretrained 23 | - save_pretrained 24 | 25 | [[autodoc]] modeling_flax_utils.FlaxModelMixin 26 | - from_pretrained 27 | - save_pretrained 28 | 29 | [[autodoc]] pipeline_flax_utils.FlaxDiffusionPipeline 30 | - from_pretrained 31 | - save_pretrained 32 | 33 | 34 | Under further construction 🚧, open a [PR](https://github.com/huggingface/diffusers/compare) if you want to contribute! 35 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from ..utils import is_flax_available, is_onnx_available, is_torch_available, is_transformers_available 2 | 3 | 4 | if is_torch_available(): 5 | from .dance_diffusion import DanceDiffusionPipeline 6 | from .ddim import DDIMPipeline 7 | from .ddpm import DDPMPipeline 8 | from .latent_diffusion_uncond import LDMPipeline 9 | from .pndm import PNDMPipeline 10 | from .score_sde_ve import ScoreSdeVePipeline 11 | from .stochastic_karras_ve import KarrasVePipeline 12 | else: 13 | from ..utils.dummy_pt_objects import * # noqa F403 14 | 15 | if is_torch_available() and is_transformers_available(): 16 | from .latent_diffusion import LDMTextToImagePipeline 17 | from .stable_diffusion import ( 18 | StableDiffusionImg2ImgPipeline, 19 | StableDiffusionInpaintPipeline, 20 | StableDiffusionInpaintPipelineLegacy, 21 | StableDiffusionPipeline, 22 | ) 23 | 24 | if is_transformers_available() and is_onnx_available(): 25 | from .stable_diffusion import ( 26 | OnnxStableDiffusionImg2ImgPipeline, 27 | OnnxStableDiffusionInpaintPipeline, 28 | OnnxStableDiffusionPipeline, 29 | StableDiffusionOnnxPipeline, 30 | ) 31 | 32 | if is_transformers_available() and is_flax_available(): 33 | from .stable_diffusion import FlaxStableDiffusionPipeline 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new-model-addition.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F31F New model/pipeline/scheduler addition" 2 | description: Submit a proposal/request to implement a new diffusion model / pipeline / scheduler 3 | labels: [ "New model/pipeline/scheduler" ] 4 | 5 | body: 6 | - type: textarea 7 | id: description-request 8 | validations: 9 | required: true 10 | attributes: 11 | label: Model/Pipeline/Scheduler description 12 | description: | 13 | Put any and all important information relative to the model/pipeline/scheduler 14 | 15 | - type: checkboxes 16 | id: information-tasks 17 | attributes: 18 | label: Open source status 19 | description: | 20 | Please note that if the model implementation isn't available or if the weights aren't open-source, we are less likely to implement it in `diffusers`. 21 | options: 22 | - label: "The model implementation is available" 23 | - label: "The model weights are available (Only relevant if addition is not a scheduler)." 24 | 25 | - type: textarea 26 | id: additional-info 27 | attributes: 28 | label: Provide useful links for the implementation 29 | description: | 30 | Please provide information regarding the implementation, the weights, and the authors. 31 | Please mention the authors by @gh-username if you're aware of their usernames. 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F41B Bug Report" 2 | description: Report a bug on diffusers 3 | labels: [ "bug" ] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Thanks for taking the time to fill out this bug report! 9 | - type: textarea 10 | id: bug-description 11 | attributes: 12 | label: Describe the bug 13 | description: A clear and concise description of what the bug is. If you intend to submit a pull request for this issue, tell us in the description. Thanks! 14 | placeholder: Bug description 15 | validations: 16 | required: true 17 | - type: textarea 18 | id: reproduction 19 | attributes: 20 | label: Reproduction 21 | description: Please provide a minimal reproducible code which we can copy/paste and reproduce the issue. 22 | placeholder: Reproduction 23 | - type: textarea 24 | id: logs 25 | attributes: 26 | label: Logs 27 | description: "Please include the Python logs if you can." 28 | render: shell 29 | - type: textarea 30 | id: system-info 31 | attributes: 32 | label: System Info 33 | description: Please share your system info with us. You can run the command `diffusers-cli env` and copy-paste its output below. 34 | placeholder: diffusers version, platform, python version, ... 35 | validations: 36 | required: true 37 | -------------------------------------------------------------------------------- /src/diffusers/commands/diffusers_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from argparse import ArgumentParser 17 | 18 | from .env import EnvironmentCommand 19 | 20 | 21 | def main(): 22 | parser = ArgumentParser("Diffusers CLI tool", usage="diffusers-cli []") 23 | commands_parser = parser.add_subparsers(help="diffusers-cli command helpers") 24 | 25 | # Register commands 26 | EnvironmentCommand.register_subcommand(commands_parser) 27 | 28 | # Let's go 29 | args = parser.parse_args() 30 | 31 | if not hasattr(args, "func"): 32 | parser.print_help() 33 | exit(1) 34 | 35 | # Run 36 | service = args.func(args) 37 | service.run() 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /src/diffusers/schedulers/scheduling_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from dataclasses import dataclass 15 | 16 | import torch 17 | 18 | from ..utils import BaseOutput 19 | 20 | 21 | SCHEDULER_CONFIG_NAME = "scheduler_config.json" 22 | 23 | 24 | @dataclass 25 | class SchedulerOutput(BaseOutput): 26 | """ 27 | Base class for the scheduler's step function output. 28 | 29 | Args: 30 | prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): 31 | Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the 32 | denoising loop. 33 | """ 34 | 35 | prev_sample: torch.FloatTensor 36 | 37 | 38 | class SchedulerMixin: 39 | """ 40 | Mixin containing common functions for the schedulers. 41 | """ 42 | 43 | config_name = SCHEDULER_CONFIG_NAME 44 | -------------------------------------------------------------------------------- /docs/source/api/pipelines/dance_diffusion.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Dance Diffusion 14 | 15 | ## Overview 16 | 17 | [Dance Diffusion](https://github.com/Harmonai-org/sample-generator) by Zach Evans. 18 | 19 | Dance Diffusion is the first in a suite of generative audio tools for producers and musicians to be released by Harmonai. 20 | For more info or to get involved in the development of these tools, please visit https://harmonai.org and fill out the form on the front page. 21 | 22 | The original codebase of this implementation can be found [here](https://github.com/Harmonai-org/sample-generator). 23 | 24 | ## Available Pipelines: 25 | 26 | | Pipeline | Tasks | Colab 27 | |---|---|:---:| 28 | | [pipeline_dance_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py) | *Unconditional Audio Generation* | - | 29 | 30 | 31 | ## DanceDiffusionPipeline 32 | [[autodoc]] DanceDiffusionPipeline 33 | - __call__ 34 | -------------------------------------------------------------------------------- /.github/workflows/pr_quality.yml: -------------------------------------------------------------------------------- 1 | name: Run code quality checks 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | push: 8 | branches: 9 | - main 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | check_code_quality: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Set up Python 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: "3.7" 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install .[quality] 28 | - name: Check quality 29 | run: | 30 | black --check --preview examples tests src utils scripts 31 | isort --check-only examples tests src utils scripts 32 | flake8 examples tests src utils scripts 33 | doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source 34 | 35 | check_repository_consistency: 36 | runs-on: ubuntu-latest 37 | steps: 38 | - uses: actions/checkout@v3 39 | - name: Set up Python 40 | uses: actions/setup-python@v4 41 | with: 42 | python-version: "3.7" 43 | - name: Install dependencies 44 | run: | 45 | python -m pip install --upgrade pip 46 | pip install .[quality] 47 | - name: Check quality 48 | run: | 49 | python utils/check_copies.py 50 | python utils/check_dummies.py 51 | -------------------------------------------------------------------------------- /utils/print_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # coding=utf-8 4 | # Copyright 2022 The HuggingFace Inc. team. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # this script dumps information about the environment 19 | 20 | import os 21 | import platform 22 | import sys 23 | 24 | 25 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" 26 | 27 | print("Python version:", sys.version) 28 | 29 | print("OS platform:", platform.platform()) 30 | print("OS architecture:", platform.machine()) 31 | 32 | try: 33 | import torch 34 | 35 | print("Torch version:", torch.__version__) 36 | print("Cuda available:", torch.cuda.is_available()) 37 | print("Cuda version:", torch.version.cuda) 38 | print("CuDNN version:", torch.backends.cudnn.version()) 39 | print("Number of GPUs available:", torch.cuda.device_count()) 40 | except ImportError: 41 | print("Torch version:", None) 42 | 43 | try: 44 | import transformers 45 | 46 | print("transformers version:", transformers.__version__) 47 | except ImportError: 48 | print("transformers version:", None) 49 | -------------------------------------------------------------------------------- /utils/get_modified_files.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # this script reports modified .py files under the desired list of top-level sub-dirs passed as a list of arguments, e.g.: 17 | # python ./utils/get_modified_files.py utils src tests examples 18 | # 19 | # it uses git to find the forking point and which files were modified - i.e. files not under git won't be considered 20 | # since the output of this script is fed into Makefile commands it doesn't print a newline after the results 21 | 22 | import re 23 | import subprocess 24 | import sys 25 | 26 | 27 | fork_point_sha = subprocess.check_output("git merge-base main HEAD".split()).decode("utf-8") 28 | modified_files = subprocess.check_output(f"git diff --name-only {fork_point_sha}".split()).decode("utf-8").split() 29 | 30 | joined_dirs = "|".join(sys.argv[1:]) 31 | regex = re.compile(rf"^({joined_dirs}).*?\.py$") 32 | 33 | relevant_modified_files = [x for x in modified_files if regex.match(x)] 34 | print(" ".join(relevant_modified_files), end="") 35 | -------------------------------------------------------------------------------- /tests/models/test_models_unet_1d.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | 18 | import torch 19 | 20 | from diffusers import UNet1DModel 21 | from diffusers.utils import slow, torch_device 22 | 23 | 24 | torch.backends.cuda.matmul.allow_tf32 = False 25 | 26 | 27 | class UnetModel1DTests(unittest.TestCase): 28 | @slow 29 | def test_unet_1d_maestro(self): 30 | model_id = "harmonai/maestro-150k" 31 | model = UNet1DModel.from_pretrained(model_id, subfolder="unet", device_map="auto") 32 | model.to(torch_device) 33 | 34 | sample_size = 65536 35 | noise = torch.sin(torch.arange(sample_size)[None, None, :].repeat(1, 2, 1)).to(torch_device) 36 | timestep = torch.tensor([1]).to(torch_device) 37 | 38 | with torch.no_grad(): 39 | output = model(noise, timestep).sample 40 | 41 | output_sum = output.abs().sum() 42 | output_max = output.abs().max() 43 | 44 | assert (output_sum - 224.0896).abs() < 4e-2 45 | assert (output_max - 0.0607).abs() < 4e-4 46 | -------------------------------------------------------------------------------- /tests/test_modeling_common_flax.py: -------------------------------------------------------------------------------- 1 | from diffusers.utils import is_flax_available 2 | from diffusers.utils.testing_utils import require_flax 3 | 4 | 5 | if is_flax_available(): 6 | import jax 7 | 8 | 9 | @require_flax 10 | class FlaxModelTesterMixin: 11 | def test_output(self): 12 | init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() 13 | 14 | model = self.model_class(**init_dict) 15 | variables = model.init(inputs_dict["prng_key"], inputs_dict["sample"]) 16 | jax.lax.stop_gradient(variables) 17 | 18 | output = model.apply(variables, inputs_dict["sample"]) 19 | 20 | if isinstance(output, dict): 21 | output = output.sample 22 | 23 | self.assertIsNotNone(output) 24 | expected_shape = inputs_dict["sample"].shape 25 | self.assertEqual(output.shape, expected_shape, "Input and output shapes do not match") 26 | 27 | def test_forward_with_norm_groups(self): 28 | init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() 29 | 30 | init_dict["norm_num_groups"] = 16 31 | init_dict["block_out_channels"] = (16, 32) 32 | 33 | model = self.model_class(**init_dict) 34 | variables = model.init(inputs_dict["prng_key"], inputs_dict["sample"]) 35 | jax.lax.stop_gradient(variables) 36 | 37 | output = model.apply(variables, inputs_dict["sample"]) 38 | 39 | if isinstance(output, dict): 40 | output = output.sample 41 | 42 | self.assertIsNotNone(output) 43 | expected_shape = inputs_dict["sample"].shape 44 | self.assertEqual(output.shape, expected_shape, "Input and output shapes do not match") 45 | -------------------------------------------------------------------------------- /src/diffusers/utils/model_card_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | {{ card_data }} 3 | --- 4 | 5 | 7 | 8 | # {{ model_name | default("Diffusion Model") }} 9 | 10 | ## Model description 11 | 12 | This diffusion model is trained with the [🤗 Diffusers](https://github.com/huggingface/diffusers) library 13 | on the `{{ dataset_name }}` dataset. 14 | 15 | ## Intended uses & limitations 16 | 17 | #### How to use 18 | 19 | ```python 20 | # TODO: add an example code snippet for running this diffusion pipeline 21 | ``` 22 | 23 | #### Limitations and bias 24 | 25 | [TODO: provide examples of latent issues and potential remediations] 26 | 27 | ## Training data 28 | 29 | [TODO: describe the data used to train the model] 30 | 31 | ### Training hyperparameters 32 | 33 | The following hyperparameters were used during training: 34 | - learning_rate: {{ learning_rate }} 35 | - train_batch_size: {{ train_batch_size }} 36 | - eval_batch_size: {{ eval_batch_size }} 37 | - gradient_accumulation_steps: {{ gradient_accumulation_steps }} 38 | - optimizer: AdamW with betas=({{ adam_beta1 }}, {{ adam_beta2 }}), weight_decay={{ adam_weight_decay }} and epsilon={{ adam_epsilon }} 39 | - lr_scheduler: {{ lr_scheduler }} 40 | - lr_warmup_steps: {{ lr_warmup_steps }} 41 | - ema_inv_gamma: {{ ema_inv_gamma }} 42 | - ema_inv_gamma: {{ ema_power }} 43 | - ema_inv_gamma: {{ ema_max_decay }} 44 | - mixed_precision: {{ mixed_precision }} 45 | 46 | ### Training results 47 | 48 | 📈 [TensorBoard logs](https://huggingface.co/{{ repo_name }}/tensorboard?#scalars) 49 | 50 | 51 | -------------------------------------------------------------------------------- /src/diffusers/schedulers/scheduling_utils_flax.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from dataclasses import dataclass 15 | from typing import Tuple 16 | 17 | import jax.numpy as jnp 18 | 19 | from ..utils import BaseOutput 20 | 21 | 22 | SCHEDULER_CONFIG_NAME = "scheduler_config.json" 23 | 24 | 25 | @dataclass 26 | class FlaxSchedulerOutput(BaseOutput): 27 | """ 28 | Base class for the scheduler's step function output. 29 | 30 | Args: 31 | prev_sample (`jnp.ndarray` of shape `(batch_size, num_channels, height, width)` for images): 32 | Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the 33 | denoising loop. 34 | """ 35 | 36 | prev_sample: jnp.ndarray 37 | 38 | 39 | class FlaxSchedulerMixin: 40 | """ 41 | Mixin containing common functions for the schedulers. 42 | """ 43 | 44 | config_name = SCHEDULER_CONFIG_NAME 45 | 46 | 47 | def broadcast_to_shape_from_left(x: jnp.ndarray, shape: Tuple[int]) -> jnp.ndarray: 48 | assert len(shape) >= x.ndim 49 | return jnp.broadcast_to(x.reshape(x.shape + (1,) * (len(shape) - x.ndim)), shape) 50 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # tests directory-specific settings - this file is run automatically 16 | # by pytest before any tests are run 17 | 18 | import sys 19 | import warnings 20 | from os.path import abspath, dirname, join 21 | 22 | 23 | # allow having multiple repository checkouts and not needing to remember to rerun 24 | # 'pip install -e .[dev]' when switching between checkouts and running tests. 25 | git_repo_path = abspath(join(dirname(dirname(__file__)), "src")) 26 | sys.path.insert(1, git_repo_path) 27 | 28 | # silence FutureWarning warnings in tests since often we can't act on them until 29 | # they become normal warnings - i.e. the tests still need to test the current functionality 30 | warnings.simplefilter(action="ignore", category=FutureWarning) 31 | 32 | 33 | def pytest_addoption(parser): 34 | from diffusers.utils.testing_utils import pytest_addoption_shared 35 | 36 | pytest_addoption_shared(parser) 37 | 38 | 39 | def pytest_terminal_summary(terminalreporter): 40 | from diffusers.utils.testing_utils import pytest_terminal_summary_main 41 | 42 | make_reports = terminalreporter.config.getoption("--make-reports") 43 | if make_reports: 44 | pytest_terminal_summary_main(terminalreporter, id=make_reports) 45 | -------------------------------------------------------------------------------- /docs/source/api/diffusion_pipeline.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Pipelines 14 | 15 | The [`DiffusionPipeline`] is the easiest way to load any pretrained diffusion pipeline from the [Hub](https://huggingface.co/models?library=diffusers) and to use it in inference. 16 | 17 | 18 | 19 | One should not use the Diffusion Pipeline class for training or fine-tuning a diffusion model. Individual 20 | components of diffusion pipelines are usually trained individually, so we suggest to directly work 21 | with [`UNetModel`] and [`UNetConditionModel`]. 22 | 23 | 24 | 25 | Any diffusion pipeline that is loaded with [`~DiffusionPipeline.from_pretrained`] will automatically 26 | detect the pipeline type, *e.g.* [`StableDiffusionPipeline`] and consequently load each component of the 27 | pipeline and pass them into the `__init__` function of the pipeline, *e.g.* [`~StableDiffusionPipeline.__init__`]. 28 | 29 | Any pipeline object can be saved locally with [`~DiffusionPipeline.save_pretrained`]. 30 | 31 | ## DiffusionPipeline 32 | [[autodoc]] DiffusionPipeline 33 | - from_pretrained 34 | - save_pretrained 35 | - to 36 | - device 37 | - components 38 | 39 | ## ImagePipelineOutput 40 | By default diffusion pipelines return an object of class 41 | 42 | [[autodoc]] pipeline_utils.ImagePipelineOutput 43 | -------------------------------------------------------------------------------- /examples/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # tests directory-specific settings - this file is run automatically 16 | # by pytest before any tests are run 17 | 18 | import sys 19 | import warnings 20 | from os.path import abspath, dirname, join 21 | 22 | 23 | # allow having multiple repository checkouts and not needing to remember to rerun 24 | # 'pip install -e .[dev]' when switching between checkouts and running tests. 25 | git_repo_path = abspath(join(dirname(dirname(dirname(__file__))), "src")) 26 | sys.path.insert(1, git_repo_path) 27 | 28 | 29 | # silence FutureWarning warnings in tests since often we can't act on them until 30 | # they become normal warnings - i.e. the tests still need to test the current functionality 31 | warnings.simplefilter(action="ignore", category=FutureWarning) 32 | 33 | 34 | def pytest_addoption(parser): 35 | from diffusers.utils.testing_utils import pytest_addoption_shared 36 | 37 | pytest_addoption_shared(parser) 38 | 39 | 40 | def pytest_terminal_summary(terminalreporter): 41 | from diffusers.utils.testing_utils import pytest_terminal_summary_main 42 | 43 | make_reports = terminalreporter.config.getoption("--make-reports") 44 | if make_reports: 45 | pytest_terminal_summary_main(terminalreporter, id=make_reports) 46 | -------------------------------------------------------------------------------- /docs/source/optimization/onnx.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | 14 | # How to use the ONNX Runtime for inference 15 | 16 | 🤗 Diffusers provides a Stable Diffusion pipeline compatible with the ONNX Runtime. This allows you to run Stable Diffusion on any hardware that supports ONNX (including CPUs), and where an accelerated version of PyTorch is not available. 17 | 18 | ## Installation 19 | 20 | - TODO 21 | 22 | ## Stable Diffusion Inference 23 | 24 | The snippet below demonstrates how to use the ONNX runtime. You need to use `StableDiffusionOnnxPipeline` instead of `StableDiffusionPipeline`. You also need to download the weights from the `onnx` branch of the repository, and indicate the runtime provider you want to use. 25 | 26 | ```python 27 | # make sure you're logged in with `huggingface-cli login` 28 | from diffusers import StableDiffusionOnnxPipeline 29 | 30 | pipe = StableDiffusionOnnxPipeline.from_pretrained( 31 | "runwayml/stable-diffusion-v1-5", 32 | revision="onnx", 33 | provider="CUDAExecutionProvider", 34 | ) 35 | 36 | prompt = "a photo of an astronaut riding a horse on mars" 37 | image = pipe(prompt).images[0] 38 | ``` 39 | 40 | ## Known Issues 41 | 42 | - Generating multiple prompts in a batch seems to take too much memory. While we look into it, you may need to iterate instead of batching. 43 | -------------------------------------------------------------------------------- /docs/source/api/outputs.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # BaseOutputs 14 | 15 | All models have outputs that are instances of subclasses of [`~utils.BaseOutput`]. Those are 16 | data structures containing all the information returned by the model, but that can also be used as tuples or 17 | dictionaries. 18 | 19 | Let's see how this looks in an example: 20 | 21 | ```python 22 | from diffusers import DDIMPipeline 23 | 24 | pipeline = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32") 25 | outputs = pipeline() 26 | ``` 27 | 28 | The `outputs` object is a [`~pipeline_utils.ImagePipelineOutput`], as we can see in the 29 | documentation of that class below, it means it has an image attribute. 30 | 31 | You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you will get `None`: 32 | 33 | ```python 34 | outputs.images 35 | ``` 36 | 37 | or via keyword lookup 38 | 39 | ```python 40 | outputs["images"] 41 | ``` 42 | 43 | When considering our `outputs` object as tuple, it only considers the attributes that don't have `None` values. 44 | Here for instance, we could retrieve images via indexing: 45 | 46 | ```python 47 | outputs[:1] 48 | ``` 49 | 50 | which will return the tuple `(outputs.images)` for instance. 51 | 52 | ## BaseOutput 53 | 54 | [[autodoc]] utils.BaseOutput 55 | - to_tuple 56 | -------------------------------------------------------------------------------- /docs/source/conceptual/philosophy.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Philosophy 14 | 15 | - Readability and clarity are preferred over highly optimized code. A strong importance is put on providing readable, intuitive and elementary code design. *E.g.*, the provided [schedulers](https://github.com/huggingface/diffusers/tree/main/src/diffusers/schedulers) are separated from the provided [models](https://github.com/huggingface/diffusers/tree/main/src/diffusers/models) and use well-commented code that can be read alongside the original paper. 16 | - Diffusers is **modality independent** and focuses on providing pretrained models and tools to build systems that generate **continuous outputs**, *e.g.* vision and audio. This is one of the guiding goals even if the initial pipelines are devoted to vision tasks. 17 | - Diffusion models and schedulers are provided as concise, elementary building blocks. In contrast, diffusion pipelines are a collection of end-to-end diffusion systems that can be used out-of-the-box, should stay as close as possible to their original implementations and can include components of other libraries, such as text encoders. Examples of diffusion pipelines are [Glide](https://github.com/openai/glide-text2im), [Latent Diffusion](https://github.com/CompVis/latent-diffusion) and [Stable Diffusion](https://github.com/compvis/stable-diffusion). 18 | -------------------------------------------------------------------------------- /src/diffusers/dependency_versions_check.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import sys 15 | 16 | from .dependency_versions_table import deps 17 | from .utils.versions import require_version, require_version_core 18 | 19 | 20 | # define which module versions we always want to check at run time 21 | # (usually the ones defined in `install_requires` in setup.py) 22 | # 23 | # order specific notes: 24 | # - tqdm must be checked before tokenizers 25 | 26 | pkgs_to_check_at_runtime = "python tqdm regex requests packaging filelock numpy tokenizers".split() 27 | if sys.version_info < (3, 7): 28 | pkgs_to_check_at_runtime.append("dataclasses") 29 | if sys.version_info < (3, 8): 30 | pkgs_to_check_at_runtime.append("importlib_metadata") 31 | 32 | for pkg in pkgs_to_check_at_runtime: 33 | if pkg in deps: 34 | if pkg == "tokenizers": 35 | # must be loaded here, or else tqdm check may fail 36 | from .utils import is_tokenizers_available 37 | 38 | if not is_tokenizers_available(): 39 | continue # not required, check version only if installed 40 | 41 | require_version_core(deps[pkg]) 42 | else: 43 | raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py") 44 | 45 | 46 | def dep_version_check(pkg, hint=None): 47 | require_version(deps[pkg], hint) 48 | -------------------------------------------------------------------------------- /docs/source/using-diffusers/img2img.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Text-Guided Image-to-Image Generation 14 | 15 | The [`StableDiffusionImg2ImgPipeline`] lets you pass a text prompt and an initial image to condition the generation of new images. 16 | 17 | ```python 18 | import torch 19 | import requests 20 | from PIL import Image 21 | from io import BytesIO 22 | 23 | from diffusers import StableDiffusionImg2ImgPipeline 24 | 25 | # load the pipeline 26 | device = "cuda" 27 | pipe = StableDiffusionImg2ImgPipeline.from_pretrained( 28 | "runwayml/stable-diffusion-v1-5", revision="fp16", torch_dtype=torch.float16 29 | ).to(device) 30 | 31 | # let's download an initial image 32 | url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg" 33 | 34 | response = requests.get(url) 35 | init_image = Image.open(BytesIO(response.content)).convert("RGB") 36 | init_image = init_image.resize((768, 512)) 37 | 38 | prompt = "A fantasy landscape, trending on artstation" 39 | 40 | images = pipe(prompt=prompt, init_image=init_image, strength=0.75, guidance_scale=7.5).images 41 | 42 | images[0].save("fantasy_landscape.png") 43 | ``` 44 | You can also run this example on colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) 45 | 46 | -------------------------------------------------------------------------------- /tests/pipelines/ddpm/test_ddpm.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | 18 | import numpy as np 19 | import torch 20 | 21 | from diffusers import DDPMPipeline, DDPMScheduler, UNet2DModel 22 | from diffusers.utils.testing_utils import require_torch, slow, torch_device 23 | 24 | from ...test_pipelines_common import PipelineTesterMixin 25 | 26 | 27 | torch.backends.cuda.matmul.allow_tf32 = False 28 | 29 | 30 | class DDPMPipelineFastTests(PipelineTesterMixin, unittest.TestCase): 31 | # FIXME: add fast tests 32 | pass 33 | 34 | 35 | @slow 36 | @require_torch 37 | class DDPMPipelineIntegrationTests(unittest.TestCase): 38 | def test_inference_cifar10(self): 39 | model_id = "google/ddpm-cifar10-32" 40 | 41 | unet = UNet2DModel.from_pretrained(model_id, device_map="auto") 42 | scheduler = DDPMScheduler.from_config(model_id) 43 | 44 | ddpm = DDPMPipeline(unet=unet, scheduler=scheduler) 45 | ddpm.to(torch_device) 46 | ddpm.set_progress_bar_config(disable=None) 47 | 48 | generator = torch.manual_seed(0) 49 | image = ddpm(generator=generator, output_type="numpy").images 50 | 51 | image_slice = image[0, -3:, -3:, -1] 52 | 53 | assert image.shape == (1, 32, 32, 3) 54 | expected_slice = np.array([0.41995, 0.35885, 0.19385, 0.38475, 0.3382, 0.2647, 0.41545, 0.3582, 0.33845]) 55 | assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 56 | -------------------------------------------------------------------------------- /scripts/conversion_ldm_uncond.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | 5 | import OmegaConf 6 | from diffusers import DDIMScheduler, LDMPipeline, UNetLDMModel, VQModel 7 | 8 | 9 | def convert_ldm_original(checkpoint_path, config_path, output_path): 10 | config = OmegaConf.load(config_path) 11 | state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] 12 | keys = list(state_dict.keys()) 13 | 14 | # extract state_dict for VQVAE 15 | first_stage_dict = {} 16 | first_stage_key = "first_stage_model." 17 | for key in keys: 18 | if key.startswith(first_stage_key): 19 | first_stage_dict[key.replace(first_stage_key, "")] = state_dict[key] 20 | 21 | # extract state_dict for UNetLDM 22 | unet_state_dict = {} 23 | unet_key = "model.diffusion_model." 24 | for key in keys: 25 | if key.startswith(unet_key): 26 | unet_state_dict[key.replace(unet_key, "")] = state_dict[key] 27 | 28 | vqvae_init_args = config.model.params.first_stage_config.params 29 | unet_init_args = config.model.params.unet_config.params 30 | 31 | vqvae = VQModel(**vqvae_init_args).eval() 32 | vqvae.load_state_dict(first_stage_dict) 33 | 34 | unet = UNetLDMModel(**unet_init_args).eval() 35 | unet.load_state_dict(unet_state_dict) 36 | 37 | noise_scheduler = DDIMScheduler( 38 | timesteps=config.model.params.timesteps, 39 | beta_schedule="scaled_linear", 40 | beta_start=config.model.params.linear_start, 41 | beta_end=config.model.params.linear_end, 42 | clip_sample=False, 43 | ) 44 | 45 | pipeline = LDMPipeline(vqvae, unet, noise_scheduler) 46 | pipeline.save_pretrained(output_path) 47 | 48 | 49 | if __name__ == "__main__": 50 | parser = argparse.ArgumentParser() 51 | parser.add_argument("--checkpoint_path", type=str, required=True) 52 | parser.add_argument("--config_path", type=str, required=True) 53 | parser.add_argument("--output_path", type=str, required=True) 54 | args = parser.parse_args() 55 | 56 | convert_ldm_original(args.checkpoint_path, args.config_path, args.output_path) 57 | -------------------------------------------------------------------------------- /docs/source/using-diffusers/unconditional_image_generation.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | 14 | 15 | # Unconditional Image Generation 16 | 17 | The [`DiffusionPipeline`] is the easiest way to use a pre-trained diffusion system for inference 18 | 19 | Start by creating an instance of [`DiffusionPipeline`] and specify which pipeline checkpoint you would like to download. 20 | You can use the [`DiffusionPipeline`] for any [Diffusers' checkpoint](https://huggingface.co/models?library=diffusers&sort=downloads). 21 | In this guide though, you'll use [`DiffusionPipeline`] for unconditional image generation with [DDPM](https://arxiv.org/abs/2006.11239): 22 | 23 | ```python 24 | >>> from diffusers import DiffusionPipeline 25 | 26 | >>> generator = DiffusionPipeline.from_pretrained("google/ddpm-celebahq-256") 27 | ``` 28 | The [`DiffusionPipeline`] downloads and caches all modeling, tokenization, and scheduling components. 29 | Because the model consists of roughly 1.4 billion parameters, we strongly recommend running it on GPU. 30 | You can move the generator object to GPU, just like you would in PyTorch. 31 | 32 | ```python 33 | >>> generator.to("cuda") 34 | ``` 35 | 36 | Now you can use the `generator` on your text prompt: 37 | 38 | ```python 39 | >>> image = generator().images[0] 40 | ``` 41 | 42 | The output is by default wrapped into a [PIL Image object](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class). 43 | 44 | You can save the image by simply calling: 45 | 46 | ```python 47 | >>> image.save("generated_image.png") 48 | ``` 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /docs/source/api/pipelines/ddim.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # DDIM 14 | 15 | ## Overview 16 | 17 | [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) (DDIM) by Jiaming Song, Chenlin Meng and Stefano Ermon. 18 | 19 | The abstract of the paper is the following: 20 | 21 | Denoising diffusion probabilistic models (DDPMs) have achieved high quality image generation without adversarial training, yet they require simulating a Markov chain for many steps to produce a sample. To accelerate sampling, we present denoising diffusion implicit models (DDIMs), a more efficient class of iterative implicit probabilistic models with the same training procedure as DDPMs. In DDPMs, the generative process is defined as the reverse of a Markovian diffusion process. We construct a class of non-Markovian diffusion processes that lead to the same training objective, but whose reverse process can be much faster to sample from. We empirically demonstrate that DDIMs can produce high quality samples 10× to 50× faster in terms of wall-clock time compared to DDPMs, allow us to trade off computation for sample quality, and can perform semantically meaningful image interpolation directly in the latent space. 22 | 23 | The original codebase of this paper can be found [here](https://github.com/ermongroup/ddim). 24 | 25 | ## Available Pipelines: 26 | 27 | | Pipeline | Tasks | Colab 28 | |---|---|:---:| 29 | | [pipeline_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/ddim/pipeline_ddim.py) | *Unconditional Image Generation* | - | 30 | 31 | 32 | ## DDIMPipeline 33 | [[autodoc]] DDIMPipeline 34 | - __call__ 35 | -------------------------------------------------------------------------------- /docs/source/api/pipelines/ddpm.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # DDPM 14 | 15 | ## Overview 16 | 17 | [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) 18 | (DDPM) by Jonathan Ho, Ajay Jain and Pieter Abbeel proposes the diffusion based model of the same name, but in the context of the 🤗 Diffusers library, DDPM refers to the discrete denoising scheduler from the paper as well as the pipeline. 19 | 20 | The abstract of the paper is the following: 21 | 22 | We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional CIFAR10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art FID score of 3.17. On 256x256 LSUN, we obtain sample quality similar to ProgressiveGAN. 23 | 24 | The original codebase of this paper can be found [here](https://github.com/hojonathanho/diffusion). 25 | 26 | 27 | ## Available Pipelines: 28 | 29 | | Pipeline | Tasks | Colab 30 | |---|---|:---:| 31 | | [pipeline_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/ddpm/pipeline_ddpm.py) | *Unconditional Image Generation* | - | 32 | 33 | 34 | # DDPMPipeline 35 | [[autodoc]] DDPMPipeline 36 | - __call__ 37 | -------------------------------------------------------------------------------- /docs/source/using-diffusers/conditional_image_generation.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Conditional Image Generation 14 | 15 | The [`DiffusionPipeline`] is the easiest way to use a pre-trained diffusion system for inference 16 | 17 | Start by creating an instance of [`DiffusionPipeline`] and specify which pipeline checkpoint you would like to download. 18 | You can use the [`DiffusionPipeline`] for any [Diffusers' checkpoint](https://huggingface.co/models?library=diffusers&sort=downloads). 19 | In this guide though, you'll use [`DiffusionPipeline`] for text-to-image generation with [Latent Diffusion](https://huggingface.co/CompVis/ldm-text2im-large-256): 20 | 21 | ```python 22 | >>> from diffusers import DiffusionPipeline 23 | 24 | >>> generator = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256") 25 | ``` 26 | The [`DiffusionPipeline`] downloads and caches all modeling, tokenization, and scheduling components. 27 | Because the model consists of roughly 1.4 billion parameters, we strongly recommend running it on GPU. 28 | You can move the generator object to GPU, just like you would in PyTorch. 29 | 30 | ```python 31 | >>> generator.to("cuda") 32 | ``` 33 | 34 | Now you can use the `generator` on your text prompt: 35 | 36 | ```python 37 | >>> image = generator("An image of a squirrel in Picasso style").images[0] 38 | ``` 39 | 40 | The output is by default wrapped into a [PIL Image object](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class). 41 | 42 | You can save the image by simply calling: 43 | 44 | ```python 45 | >>> image.save("image_of_squirrel_painting.png") 46 | ``` 47 | 48 | 49 | -------------------------------------------------------------------------------- /src/diffusers/utils/deprecation_utils.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import warnings 3 | from typing import Any, Dict, Optional, Union 4 | 5 | from packaging import version 6 | 7 | 8 | def deprecate(*args, take_from: Optional[Union[Dict, Any]] = None, standard_warn=True): 9 | from .. import __version__ 10 | 11 | deprecated_kwargs = take_from 12 | values = () 13 | if not isinstance(args[0], tuple): 14 | args = (args,) 15 | 16 | for attribute, version_name, message in args: 17 | if version.parse(version.parse(__version__).base_version) >= version.parse(version_name): 18 | raise ValueError( 19 | f"The deprecation tuple {(attribute, version_name, message)} should be removed since diffusers'" 20 | f" version {__version__} is >= {version_name}" 21 | ) 22 | 23 | warning = None 24 | if isinstance(deprecated_kwargs, dict) and attribute in deprecated_kwargs: 25 | values += (deprecated_kwargs.pop(attribute),) 26 | warning = f"The `{attribute}` argument is deprecated and will be removed in version {version_name}." 27 | elif hasattr(deprecated_kwargs, attribute): 28 | values += (getattr(deprecated_kwargs, attribute),) 29 | warning = f"The `{attribute}` attribute is deprecated and will be removed in version {version_name}." 30 | elif deprecated_kwargs is None: 31 | warning = f"`{attribute}` is deprecated and will be removed in version {version_name}." 32 | 33 | if warning is not None: 34 | warning = warning + " " if standard_warn else "" 35 | warnings.warn(warning + message, DeprecationWarning) 36 | 37 | if isinstance(deprecated_kwargs, dict) and len(deprecated_kwargs) > 0: 38 | call_frame = inspect.getouterframes(inspect.currentframe())[1] 39 | filename = call_frame.filename 40 | line_number = call_frame.lineno 41 | function = call_frame.function 42 | key, value = next(iter(deprecated_kwargs.items())) 43 | raise TypeError(f"{function} in {filename} line {line_number-1} got an unexpected keyword argument `{key}`") 44 | 45 | if len(values) == 0: 46 | return 47 | elif len(values) == 1: 48 | return values[0] 49 | return values 50 | -------------------------------------------------------------------------------- /src/diffusers/schedulers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from ..utils import is_flax_available, is_scipy_available, is_torch_available 17 | 18 | 19 | if is_torch_available(): 20 | from .scheduling_ddim import DDIMScheduler 21 | from .scheduling_ddpm import DDPMScheduler 22 | from .scheduling_euler_ancestral_discrete import EulerAncestralDiscreteScheduler 23 | from .scheduling_euler_discrete import EulerDiscreteScheduler 24 | from .scheduling_ipndm import IPNDMScheduler 25 | from .scheduling_karras_ve import KarrasVeScheduler 26 | from .scheduling_pndm import PNDMScheduler 27 | from .scheduling_sde_ve import ScoreSdeVeScheduler 28 | from .scheduling_sde_vp import ScoreSdeVpScheduler 29 | from .scheduling_utils import SchedulerMixin 30 | else: 31 | from ..utils.dummy_pt_objects import * # noqa F403 32 | 33 | if is_flax_available(): 34 | from .scheduling_ddim_flax import FlaxDDIMScheduler 35 | from .scheduling_ddpm_flax import FlaxDDPMScheduler 36 | from .scheduling_karras_ve_flax import FlaxKarrasVeScheduler 37 | from .scheduling_lms_discrete_flax import FlaxLMSDiscreteScheduler 38 | from .scheduling_pndm_flax import FlaxPNDMScheduler 39 | from .scheduling_sde_ve_flax import FlaxScoreSdeVeScheduler 40 | from .scheduling_utils_flax import FlaxSchedulerMixin, FlaxSchedulerOutput, broadcast_to_shape_from_left 41 | else: 42 | from ..utils.dummy_flax_objects import * # noqa F403 43 | 44 | 45 | if is_scipy_available() and is_torch_available(): 46 | from .scheduling_lms_discrete import LMSDiscreteScheduler 47 | else: 48 | from ..utils.dummy_torch_and_scipy_objects import * # noqa F403 49 | -------------------------------------------------------------------------------- /docs/source/api/pipelines/stochastic_karras_ve.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Stochastic Karras VE 14 | 15 | ## Overview 16 | 17 | [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364) by Tero Karras, Miika Aittala, Timo Aila and Samuli Laine. 18 | 19 | The abstract of the paper is the following: 20 | 21 | We argue that the theory and practice of diffusion-based generative models are currently unnecessarily convoluted and seek to remedy the situation by presenting a design space that clearly separates the concrete design choices. This lets us identify several changes to both the sampling and training processes, as well as preconditioning of the score networks. Together, our improvements yield new state-of-the-art FID of 1.79 for CIFAR-10 in a class-conditional setting and 1.97 in an unconditional setting, with much faster sampling (35 network evaluations per image) than prior designs. To further demonstrate their modular nature, we show that our design changes dramatically improve both the efficiency and quality obtainable with pre-trained score networks from previous work, including improving the FID of an existing ImageNet-64 model from 2.07 to near-SOTA 1.55. 22 | 23 | This pipeline implements the Stochastic sampling tailored to the Variance-Expanding (VE) models. 24 | 25 | 26 | ## Available Pipelines: 27 | 28 | | Pipeline | Tasks | Colab 29 | |---|---|:---:| 30 | | [pipeline_stochastic_karras_ve.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py) | *Unconditional Image Generation* | - | 31 | 32 | 33 | ## KarrasVePipeline 34 | [[autodoc]] KarrasVePipeline 35 | - __call__ 36 | -------------------------------------------------------------------------------- /docs/source/api/models.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Models 14 | 15 | Diffusers contains pretrained models for popular algorithms and modules for creating the next set of diffusion models. 16 | The primary function of these models is to denoise an input sample, by modeling the distribution $p_\theta(\mathbf{x}_{t-1}|\mathbf{x}_t)$. 17 | The models are built on the base class ['ModelMixin'] that is a `torch.nn.module` with basic functionality for saving and loading models both locally and from the HuggingFace hub. 18 | 19 | ## ModelMixin 20 | [[autodoc]] ModelMixin 21 | 22 | ## UNet2DOutput 23 | [[autodoc]] models.unet_2d.UNet2DOutput 24 | 25 | ## UNet1DModel 26 | [[autodoc]] UNet1DModel 27 | 28 | ## UNet2DModel 29 | [[autodoc]] UNet2DModel 30 | 31 | ## UNet2DConditionOutput 32 | [[autodoc]] models.unet_2d_condition.UNet2DConditionOutput 33 | 34 | ## UNet2DConditionModel 35 | [[autodoc]] UNet2DConditionModel 36 | 37 | ## DecoderOutput 38 | [[autodoc]] models.vae.DecoderOutput 39 | 40 | ## VQEncoderOutput 41 | [[autodoc]] models.vae.VQEncoderOutput 42 | 43 | ## VQModel 44 | [[autodoc]] VQModel 45 | 46 | ## AutoencoderKLOutput 47 | [[autodoc]] models.vae.AutoencoderKLOutput 48 | 49 | ## AutoencoderKL 50 | [[autodoc]] AutoencoderKL 51 | 52 | ## FlaxModelMixin 53 | [[autodoc]] FlaxModelMixin 54 | 55 | ## FlaxUNet2DConditionOutput 56 | [[autodoc]] models.unet_2d_condition_flax.FlaxUNet2DConditionOutput 57 | 58 | ## FlaxUNet2DConditionModel 59 | [[autodoc]] FlaxUNet2DConditionModel 60 | 61 | ## FlaxDecoderOutput 62 | [[autodoc]] models.vae_flax.FlaxDecoderOutput 63 | 64 | ## FlaxAutoencoderKLOutput 65 | [[autodoc]] models.vae_flax.FlaxAutoencoderKLOutput 66 | 67 | ## FlaxAutoencoderKL 68 | [[autodoc]] FlaxAutoencoderKL 69 | -------------------------------------------------------------------------------- /src/diffusers/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Inc. team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import os 17 | 18 | from .deprecation_utils import deprecate 19 | from .import_utils import ( 20 | ENV_VARS_TRUE_AND_AUTO_VALUES, 21 | ENV_VARS_TRUE_VALUES, 22 | USE_JAX, 23 | USE_TF, 24 | USE_TORCH, 25 | DummyObject, 26 | is_accelerate_available, 27 | is_flax_available, 28 | is_inflect_available, 29 | is_modelcards_available, 30 | is_onnx_available, 31 | is_scipy_available, 32 | is_tf_available, 33 | is_torch_available, 34 | is_transformers_available, 35 | is_unidecode_available, 36 | requires_backends, 37 | ) 38 | from .logging import get_logger 39 | from .outputs import BaseOutput 40 | 41 | 42 | if is_torch_available(): 43 | from .testing_utils import ( 44 | floats_tensor, 45 | load_image, 46 | load_numpy, 47 | parse_flag_from_env, 48 | require_torch_gpu, 49 | slow, 50 | torch_all_close, 51 | torch_device, 52 | ) 53 | 54 | 55 | logger = get_logger(__name__) 56 | 57 | 58 | hf_cache_home = os.path.expanduser( 59 | os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface")) 60 | ) 61 | default_cache_path = os.path.join(hf_cache_home, "diffusers") 62 | 63 | 64 | CONFIG_NAME = "config.json" 65 | WEIGHTS_NAME = "diffusion_pytorch_model.bin" 66 | FLAX_WEIGHTS_NAME = "diffusion_flax_model.msgpack" 67 | ONNX_WEIGHTS_NAME = "model.onnx" 68 | HUGGINGFACE_CO_RESOLVE_ENDPOINT = "https://huggingface.co" 69 | DIFFUSERS_CACHE = default_cache_path 70 | DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules" 71 | HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules")) 72 | -------------------------------------------------------------------------------- /src/diffusers/utils/dummy_torch_and_transformers_and_onnx_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | # flake8: noqa 3 | 4 | from ..utils import DummyObject, requires_backends 5 | 6 | 7 | class OnnxStableDiffusionImg2ImgPipeline(metaclass=DummyObject): 8 | _backends = ["torch", "transformers", "onnx"] 9 | 10 | def __init__(self, *args, **kwargs): 11 | requires_backends(self, ["torch", "transformers", "onnx"]) 12 | 13 | @classmethod 14 | def from_config(cls, *args, **kwargs): 15 | requires_backends(cls, ["torch", "transformers", "onnx"]) 16 | 17 | @classmethod 18 | def from_pretrained(cls, *args, **kwargs): 19 | requires_backends(cls, ["torch", "transformers", "onnx"]) 20 | 21 | 22 | class OnnxStableDiffusionInpaintPipeline(metaclass=DummyObject): 23 | _backends = ["torch", "transformers", "onnx"] 24 | 25 | def __init__(self, *args, **kwargs): 26 | requires_backends(self, ["torch", "transformers", "onnx"]) 27 | 28 | @classmethod 29 | def from_config(cls, *args, **kwargs): 30 | requires_backends(cls, ["torch", "transformers", "onnx"]) 31 | 32 | @classmethod 33 | def from_pretrained(cls, *args, **kwargs): 34 | requires_backends(cls, ["torch", "transformers", "onnx"]) 35 | 36 | 37 | class OnnxStableDiffusionPipeline(metaclass=DummyObject): 38 | _backends = ["torch", "transformers", "onnx"] 39 | 40 | def __init__(self, *args, **kwargs): 41 | requires_backends(self, ["torch", "transformers", "onnx"]) 42 | 43 | @classmethod 44 | def from_config(cls, *args, **kwargs): 45 | requires_backends(cls, ["torch", "transformers", "onnx"]) 46 | 47 | @classmethod 48 | def from_pretrained(cls, *args, **kwargs): 49 | requires_backends(cls, ["torch", "transformers", "onnx"]) 50 | 51 | 52 | class StableDiffusionOnnxPipeline(metaclass=DummyObject): 53 | _backends = ["torch", "transformers", "onnx"] 54 | 55 | def __init__(self, *args, **kwargs): 56 | requires_backends(self, ["torch", "transformers", "onnx"]) 57 | 58 | @classmethod 59 | def from_config(cls, *args, **kwargs): 60 | requires_backends(cls, ["torch", "transformers", "onnx"]) 61 | 62 | @classmethod 63 | def from_pretrained(cls, *args, **kwargs): 64 | requires_backends(cls, ["torch", "transformers", "onnx"]) 65 | -------------------------------------------------------------------------------- /tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | 18 | import numpy as np 19 | 20 | from diffusers import OnnxStableDiffusionImg2ImgPipeline 21 | from diffusers.utils.testing_utils import load_image, require_onnxruntime, slow 22 | 23 | from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin 24 | 25 | 26 | class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase): 27 | # FIXME: add fast tests 28 | pass 29 | 30 | 31 | @slow 32 | @require_onnxruntime 33 | class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): 34 | def test_inference(self): 35 | init_image = load_image( 36 | "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" 37 | "/img2img/sketch-mountains-input.jpg" 38 | ) 39 | init_image = init_image.resize((768, 512)) 40 | pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained( 41 | "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CPUExecutionProvider" 42 | ) 43 | pipe.set_progress_bar_config(disable=None) 44 | 45 | prompt = "A fantasy landscape, trending on artstation" 46 | 47 | np.random.seed(0) 48 | output = pipe( 49 | prompt=prompt, 50 | init_image=init_image, 51 | strength=0.75, 52 | guidance_scale=7.5, 53 | num_inference_steps=8, 54 | output_type="np", 55 | ) 56 | images = output.images 57 | image_slice = images[0, 255:258, 383:386, -1] 58 | 59 | assert images.shape == (1, 512, 768, 3) 60 | expected_slice = np.array([0.4830, 0.5242, 0.5603, 0.5016, 0.5131, 0.5111, 0.4928, 0.5025, 0.5055]) 61 | # TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues 62 | assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2 63 | -------------------------------------------------------------------------------- /tests/test_outputs.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from dataclasses import dataclass 3 | from typing import List, Union 4 | 5 | import numpy as np 6 | 7 | import PIL.Image 8 | from diffusers.utils.outputs import BaseOutput 9 | 10 | 11 | @dataclass 12 | class CustomOutput(BaseOutput): 13 | images: Union[List[PIL.Image.Image], np.ndarray] 14 | 15 | 16 | class ConfigTester(unittest.TestCase): 17 | def test_outputs_single_attribute(self): 18 | outputs = CustomOutput(images=np.random.rand(1, 3, 4, 4)) 19 | 20 | # check every way of getting the attribute 21 | assert isinstance(outputs.images, np.ndarray) 22 | assert outputs.images.shape == (1, 3, 4, 4) 23 | assert isinstance(outputs["images"], np.ndarray) 24 | assert outputs["images"].shape == (1, 3, 4, 4) 25 | assert isinstance(outputs[0], np.ndarray) 26 | assert outputs[0].shape == (1, 3, 4, 4) 27 | 28 | # test with a non-tensor attribute 29 | outputs = CustomOutput(images=[PIL.Image.new("RGB", (4, 4))]) 30 | 31 | # check every way of getting the attribute 32 | assert isinstance(outputs.images, list) 33 | assert isinstance(outputs.images[0], PIL.Image.Image) 34 | assert isinstance(outputs["images"], list) 35 | assert isinstance(outputs["images"][0], PIL.Image.Image) 36 | assert isinstance(outputs[0], list) 37 | assert isinstance(outputs[0][0], PIL.Image.Image) 38 | 39 | def test_outputs_dict_init(self): 40 | # test output reinitialization with a `dict` for compatibility with `accelerate` 41 | outputs = CustomOutput({"images": np.random.rand(1, 3, 4, 4)}) 42 | 43 | # check every way of getting the attribute 44 | assert isinstance(outputs.images, np.ndarray) 45 | assert outputs.images.shape == (1, 3, 4, 4) 46 | assert isinstance(outputs["images"], np.ndarray) 47 | assert outputs["images"].shape == (1, 3, 4, 4) 48 | assert isinstance(outputs[0], np.ndarray) 49 | assert outputs[0].shape == (1, 3, 4, 4) 50 | 51 | # test with a non-tensor attribute 52 | outputs = CustomOutput({"images": [PIL.Image.new("RGB", (4, 4))]}) 53 | 54 | # check every way of getting the attribute 55 | assert isinstance(outputs.images, list) 56 | assert isinstance(outputs.images[0], PIL.Image.Image) 57 | assert isinstance(outputs["images"], list) 58 | assert isinstance(outputs["images"][0], PIL.Image.Image) 59 | assert isinstance(outputs[0], list) 60 | assert isinstance(outputs[0][0], PIL.Image.Image) 61 | -------------------------------------------------------------------------------- /tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | 18 | import numpy as np 19 | 20 | from diffusers import OnnxStableDiffusionInpaintPipeline 21 | from diffusers.utils.testing_utils import load_image, require_onnxruntime, slow 22 | 23 | from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin 24 | 25 | 26 | class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase): 27 | # FIXME: add fast tests 28 | pass 29 | 30 | 31 | @slow 32 | @require_onnxruntime 33 | class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): 34 | def test_stable_diffusion_inpaint_onnx(self): 35 | init_image = load_image( 36 | "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" 37 | "/in_paint/overture-creations-5sI6fQgYIuo.png" 38 | ) 39 | mask_image = load_image( 40 | "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" 41 | "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" 42 | ) 43 | 44 | pipe = OnnxStableDiffusionInpaintPipeline.from_pretrained( 45 | "runwayml/stable-diffusion-inpainting", revision="onnx", provider="CPUExecutionProvider" 46 | ) 47 | pipe.set_progress_bar_config(disable=None) 48 | 49 | prompt = "A red cat sitting on a park bench" 50 | 51 | np.random.seed(0) 52 | output = pipe( 53 | prompt=prompt, 54 | image=init_image, 55 | mask_image=mask_image, 56 | guidance_scale=7.5, 57 | num_inference_steps=8, 58 | output_type="np", 59 | ) 60 | images = output.images 61 | image_slice = images[0, 255:258, 255:258, -1] 62 | 63 | assert images.shape == (1, 512, 512, 3) 64 | expected_slice = np.array([0.2951, 0.2955, 0.2922, 0.2036, 0.1977, 0.2279, 0.1716, 0.1641, 0.1799]) 65 | assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 66 | -------------------------------------------------------------------------------- /src/diffusers/commands/env.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import platform 16 | from argparse import ArgumentParser 17 | 18 | import huggingface_hub 19 | 20 | from .. import __version__ as version 21 | from ..utils import is_torch_available, is_transformers_available 22 | from . import BaseDiffusersCLICommand 23 | 24 | 25 | def info_command_factory(_): 26 | return EnvironmentCommand() 27 | 28 | 29 | class EnvironmentCommand(BaseDiffusersCLICommand): 30 | @staticmethod 31 | def register_subcommand(parser: ArgumentParser): 32 | download_parser = parser.add_parser("env") 33 | download_parser.set_defaults(func=info_command_factory) 34 | 35 | def run(self): 36 | hub_version = huggingface_hub.__version__ 37 | 38 | pt_version = "not installed" 39 | pt_cuda_available = "NA" 40 | if is_torch_available(): 41 | import torch 42 | 43 | pt_version = torch.__version__ 44 | pt_cuda_available = torch.cuda.is_available() 45 | 46 | transformers_version = "not installed" 47 | if is_transformers_available: 48 | import transformers 49 | 50 | transformers_version = transformers.__version__ 51 | 52 | info = { 53 | "`diffusers` version": version, 54 | "Platform": platform.platform(), 55 | "Python version": platform.python_version(), 56 | "PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})", 57 | "Huggingface_hub version": hub_version, 58 | "Transformers version": transformers_version, 59 | "Using GPU in script?": "", 60 | "Using distributed or parallel set-up in script?": "", 61 | } 62 | 63 | print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n") 64 | print(self.format_dict(info)) 65 | 66 | return info 67 | 68 | @staticmethod 69 | def format_dict(d): 70 | return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n" 71 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Initially taken from Github's Python gitignore file 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # tests and logs 12 | tests/fixtures/cached_*_text.txt 13 | logs/ 14 | lightning_logs/ 15 | lang_code_data/ 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # celery beat schedule file 92 | celerybeat-schedule 93 | 94 | # SageMath parsed files 95 | *.sage.py 96 | 97 | # Environments 98 | .env 99 | .venv 100 | env/ 101 | venv/ 102 | ENV/ 103 | env.bak/ 104 | venv.bak/ 105 | 106 | # Spyder project settings 107 | .spyderproject 108 | .spyproject 109 | 110 | # Rope project settings 111 | .ropeproject 112 | 113 | # mkdocs documentation 114 | /site 115 | 116 | # mypy 117 | .mypy_cache/ 118 | .dmypy.json 119 | dmypy.json 120 | 121 | # Pyre type checker 122 | .pyre/ 123 | 124 | # vscode 125 | .vs 126 | .vscode 127 | 128 | # Pycharm 129 | .idea 130 | 131 | # TF code 132 | tensorflow_code 133 | 134 | # Models 135 | proc_data 136 | 137 | # examples 138 | runs 139 | /runs_old 140 | /wandb 141 | /examples/runs 142 | /examples/**/*.args 143 | /examples/rag/sweep 144 | 145 | # data 146 | /data 147 | serialization_dir 148 | 149 | # emacs 150 | *.*~ 151 | debug.env 152 | 153 | # vim 154 | .*.swp 155 | 156 | #ctags 157 | tags 158 | 159 | # pre-commit 160 | .pre-commit* 161 | 162 | # .lock 163 | *.lock 164 | 165 | # DS_Store (MacOS) 166 | .DS_Store -------------------------------------------------------------------------------- /src/diffusers/utils/dummy_torch_and_transformers_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | # flake8: noqa 3 | 4 | from ..utils import DummyObject, requires_backends 5 | 6 | 7 | class LDMTextToImagePipeline(metaclass=DummyObject): 8 | _backends = ["torch", "transformers"] 9 | 10 | def __init__(self, *args, **kwargs): 11 | requires_backends(self, ["torch", "transformers"]) 12 | 13 | @classmethod 14 | def from_config(cls, *args, **kwargs): 15 | requires_backends(cls, ["torch", "transformers"]) 16 | 17 | @classmethod 18 | def from_pretrained(cls, *args, **kwargs): 19 | requires_backends(cls, ["torch", "transformers"]) 20 | 21 | 22 | class StableDiffusionImg2ImgPipeline(metaclass=DummyObject): 23 | _backends = ["torch", "transformers"] 24 | 25 | def __init__(self, *args, **kwargs): 26 | requires_backends(self, ["torch", "transformers"]) 27 | 28 | @classmethod 29 | def from_config(cls, *args, **kwargs): 30 | requires_backends(cls, ["torch", "transformers"]) 31 | 32 | @classmethod 33 | def from_pretrained(cls, *args, **kwargs): 34 | requires_backends(cls, ["torch", "transformers"]) 35 | 36 | 37 | class StableDiffusionInpaintPipeline(metaclass=DummyObject): 38 | _backends = ["torch", "transformers"] 39 | 40 | def __init__(self, *args, **kwargs): 41 | requires_backends(self, ["torch", "transformers"]) 42 | 43 | @classmethod 44 | def from_config(cls, *args, **kwargs): 45 | requires_backends(cls, ["torch", "transformers"]) 46 | 47 | @classmethod 48 | def from_pretrained(cls, *args, **kwargs): 49 | requires_backends(cls, ["torch", "transformers"]) 50 | 51 | 52 | class StableDiffusionInpaintPipelineLegacy(metaclass=DummyObject): 53 | _backends = ["torch", "transformers"] 54 | 55 | def __init__(self, *args, **kwargs): 56 | requires_backends(self, ["torch", "transformers"]) 57 | 58 | @classmethod 59 | def from_config(cls, *args, **kwargs): 60 | requires_backends(cls, ["torch", "transformers"]) 61 | 62 | @classmethod 63 | def from_pretrained(cls, *args, **kwargs): 64 | requires_backends(cls, ["torch", "transformers"]) 65 | 66 | 67 | class StableDiffusionPipeline(metaclass=DummyObject): 68 | _backends = ["torch", "transformers"] 69 | 70 | def __init__(self, *args, **kwargs): 71 | requires_backends(self, ["torch", "transformers"]) 72 | 73 | @classmethod 74 | def from_config(cls, *args, **kwargs): 75 | requires_backends(cls, ["torch", "transformers"]) 76 | 77 | @classmethod 78 | def from_pretrained(cls, *args, **kwargs): 79 | requires_backends(cls, ["torch", "transformers"]) 80 | -------------------------------------------------------------------------------- /docs/source/api/pipelines/pndm.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # PNDM 14 | 15 | ## Overview 16 | 17 | [Pseudo Numerical methods for Diffusion Models on manifolds](https://arxiv.org/abs/2202.09778) (PNDM) by Luping Liu, Yi Ren, Zhijie Lin and Zhou Zhao. 18 | 19 | The abstract of the paper is the following: 20 | 21 | Denoising Diffusion Probabilistic Models (DDPMs) can generate high-quality samples such as image and audio samples. However, DDPMs require hundreds to thousands of iterations to produce final samples. Several prior works have successfully accelerated DDPMs through adjusting the variance schedule (e.g., Improved Denoising Diffusion Probabilistic Models) or the denoising equation (e.g., Denoising Diffusion Implicit Models (DDIMs)). However, these acceleration methods cannot maintain the quality of samples and even introduce new noise at a high speedup rate, which limit their practicability. To accelerate the inference process while keeping the sample quality, we provide a fresh perspective that DDPMs should be treated as solving differential equations on manifolds. Under such a perspective, we propose pseudo numerical methods for diffusion models (PNDMs). Specifically, we figure out how to solve differential equations on manifolds and show that DDIMs are simple cases of pseudo numerical methods. We change several classical numerical methods to corresponding pseudo numerical methods and find that the pseudo linear multi-step method is the best in most situations. According to our experiments, by directly using pre-trained models on Cifar10, CelebA and LSUN, PNDMs can generate higher quality synthetic images with only 50 steps compared with 1000-step DDIMs (20x speedup), significantly outperform DDIMs with 250 steps (by around 0.4 in FID) and have good generalization on different variance schedules. 22 | 23 | The original codebase can be found [here](https://github.com/luping-liu/PNDM). 24 | 25 | ## Available Pipelines: 26 | 27 | | Pipeline | Tasks | Colab 28 | |---|---|:---:| 29 | | [pipeline_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pndm/pipeline_pndm.py) | *Unconditional Image Generation* | - | 30 | 31 | 32 | ## PNDMPipeline 33 | [[autodoc]] pipelines.pndm.pipeline_pndm.PNDMPipeline 34 | - __call__ 35 | 36 | -------------------------------------------------------------------------------- /src/diffusers/models/embeddings_flax.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import math 15 | 16 | import flax.linen as nn 17 | import jax.numpy as jnp 18 | 19 | 20 | # This is like models.embeddings.get_timestep_embedding (PyTorch) but 21 | # less general (only handles the case we currently need). 22 | def get_sinusoidal_embeddings(timesteps, embedding_dim, freq_shift: float = 1): 23 | """ 24 | This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings. 25 | 26 | :param timesteps: a 1-D tensor of N indices, one per batch element. 27 | These may be fractional. 28 | :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the 29 | embeddings. :return: an [N x dim] tensor of positional embeddings. 30 | """ 31 | half_dim = embedding_dim // 2 32 | emb = math.log(10000) / (half_dim - freq_shift) 33 | emb = jnp.exp(jnp.arange(half_dim) * -emb) 34 | emb = timesteps[:, None] * emb[None, :] 35 | emb = jnp.concatenate([jnp.cos(emb), jnp.sin(emb)], -1) 36 | return emb 37 | 38 | 39 | class FlaxTimestepEmbedding(nn.Module): 40 | r""" 41 | Time step Embedding Module. Learns embeddings for input time steps. 42 | 43 | Args: 44 | time_embed_dim (`int`, *optional*, defaults to `32`): 45 | Time step embedding dimension 46 | dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32): 47 | Parameters `dtype` 48 | """ 49 | time_embed_dim: int = 32 50 | dtype: jnp.dtype = jnp.float32 51 | 52 | @nn.compact 53 | def __call__(self, temb): 54 | temb = nn.Dense(self.time_embed_dim, dtype=self.dtype, name="linear_1")(temb) 55 | temb = nn.silu(temb) 56 | temb = nn.Dense(self.time_embed_dim, dtype=self.dtype, name="linear_2")(temb) 57 | return temb 58 | 59 | 60 | class FlaxTimesteps(nn.Module): 61 | r""" 62 | Wrapper Module for sinusoidal Time step Embeddings as described in https://arxiv.org/abs/2006.11239 63 | 64 | Args: 65 | dim (`int`, *optional*, defaults to `32`): 66 | Time step embedding dimension 67 | """ 68 | dim: int = 32 69 | freq_shift: float = 1 70 | 71 | @nn.compact 72 | def __call__(self, timesteps): 73 | return get_sinusoidal_embeddings(timesteps, self.dim, freq_shift=self.freq_shift) 74 | -------------------------------------------------------------------------------- /docs/source/api/pipelines/latent_diffusion_uncond.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Unconditional Latent Diffusion 14 | 15 | ## Overview 16 | 17 | Unconditional Latent Diffusion was proposed in [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer. 18 | 19 | The abstract of the paper is the following: 20 | 21 | *By decomposing the image formation process into a sequential application of denoising autoencoders, diffusion models (DMs) achieve state-of-the-art synthesis results on image data and beyond. Additionally, their formulation allows for a guiding mechanism to control the image generation process without retraining. However, since these models typically operate directly in pixel space, optimization of powerful DMs often consumes hundreds of GPU days and inference is expensive due to sequential evaluations. To enable DM training on limited computational resources while retaining their quality and flexibility, we apply them in the latent space of powerful pretrained autoencoders. In contrast to previous work, training diffusion models on such a representation allows for the first time to reach a near-optimal point between complexity reduction and detail preservation, greatly boosting visual fidelity. By introducing cross-attention layers into the model architecture, we turn diffusion models into powerful and flexible generators for general conditioning inputs such as text or bounding boxes and high-resolution synthesis becomes possible in a convolutional manner. Our latent diffusion models (LDMs) achieve a new state of the art for image inpainting and highly competitive performance on various tasks, including unconditional image generation, semantic scene synthesis, and super-resolution, while significantly reducing computational requirements compared to pixel-based DMs.* 22 | 23 | The original codebase can be found [here](https://github.com/CompVis/latent-diffusion). 24 | 25 | ## Tips: 26 | 27 | - 28 | - 29 | - 30 | 31 | ## Available Pipelines: 32 | 33 | | Pipeline | Tasks | Colab 34 | |---|---|:---:| 35 | | [pipeline_latent_diffusion_uncond.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py) | *Unconditional Image Generation* | - | 36 | 37 | ## Examples: 38 | 39 | ## LDMPipeline 40 | [[autodoc]] LDMPipeline 41 | - __call__ 42 | -------------------------------------------------------------------------------- /docs/source/api/pipelines/latent_diffusion.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Latent Diffusion 14 | 15 | ## Overview 16 | 17 | Latent Diffusion was proposed in [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer. 18 | 19 | The abstract of the paper is the following: 20 | 21 | *By decomposing the image formation process into a sequential application of denoising autoencoders, diffusion models (DMs) achieve state-of-the-art synthesis results on image data and beyond. Additionally, their formulation allows for a guiding mechanism to control the image generation process without retraining. However, since these models typically operate directly in pixel space, optimization of powerful DMs often consumes hundreds of GPU days and inference is expensive due to sequential evaluations. To enable DM training on limited computational resources while retaining their quality and flexibility, we apply them in the latent space of powerful pretrained autoencoders. In contrast to previous work, training diffusion models on such a representation allows for the first time to reach a near-optimal point between complexity reduction and detail preservation, greatly boosting visual fidelity. By introducing cross-attention layers into the model architecture, we turn diffusion models into powerful and flexible generators for general conditioning inputs such as text or bounding boxes and high-resolution synthesis becomes possible in a convolutional manner. Our latent diffusion models (LDMs) achieve a new state of the art for image inpainting and highly competitive performance on various tasks, including unconditional image generation, semantic scene synthesis, and super-resolution, while significantly reducing computational requirements compared to pixel-based DMs.* 22 | 23 | The original codebase can be found [here](https://github.com/CompVis/latent-diffusion). 24 | 25 | ## Tips: 26 | 27 | - 28 | - 29 | - 30 | 31 | ## Available Pipelines: 32 | 33 | | Pipeline | Tasks | Colab 34 | |---|---|:---:| 35 | | [pipeline_latent_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py) | *Text-to-Image Generation* | - | 36 | 37 | ## Examples: 38 | 39 | 40 | ## LDMTextToImagePipeline 41 | [[autodoc]] pipelines.latent_diffusion.pipeline_latent_diffusion.LDMTextToImagePipeline 42 | - __call__ 43 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional, Union 3 | 4 | import numpy as np 5 | 6 | import PIL 7 | from PIL import Image 8 | 9 | from ...utils import BaseOutput, is_flax_available, is_onnx_available, is_torch_available, is_transformers_available 10 | 11 | 12 | @dataclass 13 | class StableDiffusionPipelineOutput(BaseOutput): 14 | """ 15 | Output class for Stable Diffusion pipelines. 16 | 17 | Args: 18 | images (`List[PIL.Image.Image]` or `np.ndarray`) 19 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, 20 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. 21 | nsfw_content_detected (`List[bool]`) 22 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work" 23 | (nsfw) content, or `None` if safety checking could not be performed. 24 | """ 25 | 26 | images: Union[List[PIL.Image.Image], np.ndarray] 27 | nsfw_content_detected: Optional[List[bool]] 28 | 29 | 30 | if is_transformers_available() and is_torch_available(): 31 | from .pipeline_stable_diffusion import StableDiffusionPipeline 32 | from .pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline 33 | from .pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline 34 | from .pipeline_stable_diffusion_inpaint_legacy import StableDiffusionInpaintPipelineLegacy 35 | from .safety_checker import StableDiffusionSafetyChecker 36 | 37 | if is_transformers_available() and is_onnx_available(): 38 | from .pipeline_onnx_stable_diffusion import OnnxStableDiffusionPipeline, StableDiffusionOnnxPipeline 39 | from .pipeline_onnx_stable_diffusion_img2img import OnnxStableDiffusionImg2ImgPipeline 40 | from .pipeline_onnx_stable_diffusion_inpaint import OnnxStableDiffusionInpaintPipeline 41 | 42 | if is_transformers_available() and is_flax_available(): 43 | import flax 44 | 45 | @flax.struct.dataclass 46 | class FlaxStableDiffusionPipelineOutput(BaseOutput): 47 | """ 48 | Output class for Stable Diffusion pipelines. 49 | 50 | Args: 51 | images (`List[PIL.Image.Image]` or `np.ndarray`) 52 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, 53 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. 54 | nsfw_content_detected (`List[bool]`) 55 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work" 56 | (nsfw) content. 57 | """ 58 | 59 | images: Union[List[PIL.Image.Image], np.ndarray] 60 | nsfw_content_detected: List[bool] 61 | 62 | from ...schedulers.scheduling_pndm_flax import PNDMSchedulerState 63 | from .pipeline_flax_stable_diffusion import FlaxStableDiffusionPipeline 64 | from .safety_checker_flax import FlaxStableDiffusionSafetyChecker 65 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples 2 | 3 | # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!) 4 | export PYTHONPATH = src 5 | 6 | check_dirs := examples scripts src tests utils 7 | 8 | modified_only_fixup: 9 | $(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs))) 10 | @if test -n "$(modified_py_files)"; then \ 11 | echo "Checking/fixing $(modified_py_files)"; \ 12 | black --preview $(modified_py_files); \ 13 | isort $(modified_py_files); \ 14 | flake8 $(modified_py_files); \ 15 | else \ 16 | echo "No library .py files were modified"; \ 17 | fi 18 | 19 | # Update src/diffusers/dependency_versions_table.py 20 | 21 | deps_table_update: 22 | @python setup.py deps_table_update 23 | 24 | deps_table_check_updated: 25 | @md5sum src/diffusers/dependency_versions_table.py > md5sum.saved 26 | @python setup.py deps_table_update 27 | @md5sum -c --quiet md5sum.saved || (printf "\nError: the version dependency table is outdated.\nPlease run 'make fixup' or 'make style' and commit the changes.\n\n" && exit 1) 28 | @rm md5sum.saved 29 | 30 | # autogenerating code 31 | 32 | autogenerate_code: deps_table_update 33 | 34 | # Check that the repo is in a good state 35 | 36 | repo-consistency: 37 | python utils/check_dummies.py 38 | python utils/check_repo.py 39 | python utils/check_inits.py 40 | 41 | # this target runs checks on all files 42 | 43 | quality: 44 | black --check --preview $(check_dirs) 45 | isort --check-only $(check_dirs) 46 | flake8 $(check_dirs) 47 | doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source 48 | 49 | # Format source code automatically and check is there are any problems left that need manual fixing 50 | 51 | extra_style_checks: 52 | python utils/custom_init_isort.py 53 | doc-builder style src/diffusers docs/source --max_len 119 --path_to_docs docs/source 54 | 55 | # this target runs checks on all files and potentially modifies some of them 56 | 57 | style: 58 | black --preview $(check_dirs) 59 | isort $(check_dirs) 60 | ${MAKE} autogenerate_code 61 | ${MAKE} extra_style_checks 62 | 63 | # Super fast fix and check target that only works on relevant modified files since the branch was made 64 | 65 | fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency 66 | 67 | # Make marked copies of snippets of codes conform to the original 68 | 69 | fix-copies: 70 | python utils/check_copies.py --fix_and_overwrite 71 | python utils/check_dummies.py --fix_and_overwrite 72 | 73 | # Run tests for the library 74 | 75 | test: 76 | python -m pytest -n auto --dist=loadfile -s -v ./tests/ 77 | 78 | # Run tests for examples 79 | 80 | test-examples: 81 | python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/ 82 | 83 | 84 | # Release stuff 85 | 86 | pre-release: 87 | python utils/release.py 88 | 89 | pre-patch: 90 | python utils/release.py --patch 91 | 92 | post-release: 93 | python utils/release.py --post_release 94 | 95 | post-patch: 96 | python utils/release.py --post_release --patch 97 | -------------------------------------------------------------------------------- /docs/source/api/pipelines/score_sde_ve.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Score SDE VE 14 | 15 | ## Overview 16 | 17 | [Score-Based Generative Modeling through Stochastic Differential Equations](https://arxiv.org/abs/2011.13456) (Score SDE) by Yang Song, Jascha Sohl-Dickstein, Diederik P. Kingma, Abhishek Kumar, Stefano Ermon and Ben Poole. 18 | 19 | The abstract of the paper is the following: 20 | 21 | Creating noise from data is easy; creating data from noise is generative modeling. We present a stochastic differential equation (SDE) that smoothly transforms a complex data distribution to a known prior distribution by slowly injecting noise, and a corresponding reverse-time SDE that transforms the prior distribution back into the data distribution by slowly removing the noise. Crucially, the reverse-time SDE depends only on the time-dependent gradient field (\aka, score) of the perturbed data distribution. By leveraging advances in score-based generative modeling, we can accurately estimate these scores with neural networks, and use numerical SDE solvers to generate samples. We show that this framework encapsulates previous approaches in score-based generative modeling and diffusion probabilistic modeling, allowing for new sampling procedures and new modeling capabilities. In particular, we introduce a predictor-corrector framework to correct errors in the evolution of the discretized reverse-time SDE. We also derive an equivalent neural ODE that samples from the same distribution as the SDE, but additionally enables exact likelihood computation, and improved sampling efficiency. In addition, we provide a new way to solve inverse problems with score-based models, as demonstrated with experiments on class-conditional generation, image inpainting, and colorization. Combined with multiple architectural improvements, we achieve record-breaking performance for unconditional image generation on CIFAR-10 with an Inception score of 9.89 and FID of 2.20, a competitive likelihood of 2.99 bits/dim, and demonstrate high fidelity generation of 1024 x 1024 images for the first time from a score-based generative model. 22 | 23 | The original codebase can be found [here](https://github.com/yang-song/score_sde_pytorch). 24 | 25 | This pipeline implements the Variance Expanding (VE) variant of the method. 26 | 27 | ## Available Pipelines: 28 | 29 | | Pipeline | Tasks | Colab 30 | |---|---|:---:| 31 | | [pipeline_score_sde_ve.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py) | *Unconditional Image Generation* | - | 32 | 33 | ## ScoreSdeVePipeline 34 | [[autodoc]] ScoreSdeVePipeline 35 | - __call__ 36 | 37 | -------------------------------------------------------------------------------- /utils/stale.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team, the AllenNLP library authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Script to close stale issue. Taken in part from the AllenNLP repository. 16 | https://github.com/allenai/allennlp. 17 | """ 18 | import os 19 | from datetime import datetime as dt 20 | 21 | from github import Github 22 | 23 | 24 | LABELS_TO_EXEMPT = [ 25 | "good first issue", 26 | "good second issue", 27 | "good difficult issue", 28 | "enhancement", 29 | "new pipeline/model", 30 | "new scheduler", 31 | "wip", 32 | ] 33 | 34 | 35 | def main(): 36 | g = Github(os.environ["GITHUB_TOKEN"]) 37 | repo = g.get_repo("huggingface/diffusers") 38 | open_issues = repo.get_issues(state="open") 39 | 40 | for issue in open_issues: 41 | comments = sorted([comment for comment in issue.get_comments()], key=lambda i: i.created_at, reverse=True) 42 | last_comment = comments[0] if len(comments) > 0 else None 43 | if ( 44 | last_comment is not None 45 | and last_comment.user.login == "github-actions[bot]" 46 | and (dt.utcnow() - issue.updated_at).days > 7 47 | and (dt.utcnow() - issue.created_at).days >= 30 48 | and not any(label.name.lower() in LABELS_TO_EXEMPT for label in issue.get_labels()) 49 | ): 50 | # Closes the issue after 7 days of inactivity since the Stalebot notification. 51 | issue.edit(state="closed") 52 | elif ( 53 | "stale" in issue.get_labels() 54 | and last_comment is not None 55 | and last_comment.user.login != "github-actions[bot]" 56 | ): 57 | # Opens the issue if someone other than Stalebot commented. 58 | issue.edit(state="open") 59 | issue.remove_from_labels("stale") 60 | elif ( 61 | (dt.utcnow() - issue.updated_at).days > 23 62 | and (dt.utcnow() - issue.created_at).days >= 30 63 | and not any(label.name.lower() in LABELS_TO_EXEMPT for label in issue.get_labels()) 64 | ): 65 | # Post a Stalebot notification after 23 days of inactivity. 66 | issue.create_comment( 67 | "This issue has been automatically marked as stale because it has not had " 68 | "recent activity. If you think this still needs to be addressed " 69 | "please comment on this thread.\n\nPlease note that issues that do not follow the " 70 | "[contributing guidelines](https://github.com/huggingface/diffusers/blob/main/CONTRIBUTING.md) " 71 | "are likely to be ignored." 72 | ) 73 | issue.add_to_labels("stale") 74 | 75 | 76 | if __name__ == "__main__": 77 | main() 78 | -------------------------------------------------------------------------------- /utils/check_config_docstrings.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import importlib 17 | import inspect 18 | import os 19 | import re 20 | 21 | 22 | # All paths are set with the intent you should run this script from the root of the repo with the command 23 | # python utils/check_config_docstrings.py 24 | PATH_TO_TRANSFORMERS = "src/transformers" 25 | 26 | 27 | # This is to make sure the transformers module imported is the one in the repo. 28 | spec = importlib.util.spec_from_file_location( 29 | "transformers", 30 | os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), 31 | submodule_search_locations=[PATH_TO_TRANSFORMERS], 32 | ) 33 | transformers = spec.loader.load_module() 34 | 35 | CONFIG_MAPPING = transformers.models.auto.configuration_auto.CONFIG_MAPPING 36 | 37 | # Regex pattern used to find the checkpoint mentioned in the docstring of `config_class`. 38 | # For example, `[bert-base-uncased](https://huggingface.co/bert-base-uncased)` 39 | _re_checkpoint = re.compile("\[(.+?)\]\((https://huggingface\.co/.+?)\)") 40 | 41 | 42 | CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK = { 43 | "CLIPConfigMixin", 44 | "DecisionTransformerConfigMixin", 45 | "EncoderDecoderConfigMixin", 46 | "RagConfigMixin", 47 | "SpeechEncoderDecoderConfigMixin", 48 | "VisionEncoderDecoderConfigMixin", 49 | "VisionTextDualEncoderConfigMixin", 50 | } 51 | 52 | 53 | def check_config_docstrings_have_checkpoints(): 54 | configs_without_checkpoint = [] 55 | 56 | for config_class in list(CONFIG_MAPPING.values()): 57 | checkpoint_found = False 58 | 59 | # source code of `config_class` 60 | config_source = inspect.getsource(config_class) 61 | checkpoints = _re_checkpoint.findall(config_source) 62 | 63 | for checkpoint in checkpoints: 64 | # Each `checkpoint` is a tuple of a checkpoint name and a checkpoint link. 65 | # For example, `('bert-base-uncased', 'https://huggingface.co/bert-base-uncased')` 66 | ckpt_name, ckpt_link = checkpoint 67 | 68 | # verify the checkpoint name corresponds to the checkpoint link 69 | ckpt_link_from_name = f"https://huggingface.co/{ckpt_name}" 70 | if ckpt_link == ckpt_link_from_name: 71 | checkpoint_found = True 72 | break 73 | 74 | name = config_class.__name__ 75 | if not checkpoint_found and name not in CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK: 76 | configs_without_checkpoint.append(name) 77 | 78 | if len(configs_without_checkpoint) > 0: 79 | message = "\n".join(sorted(configs_without_checkpoint)) 80 | raise ValueError(f"The following configurations don't contain any valid checkpoint:\n{message}") 81 | 82 | 83 | if __name__ == "__main__": 84 | check_config_docstrings_have_checkpoints() 85 | -------------------------------------------------------------------------------- /tests/pipelines/pndm/test_pndm.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | 18 | import numpy as np 19 | import torch 20 | 21 | from diffusers import PNDMPipeline, PNDMScheduler, UNet2DModel 22 | from diffusers.utils.testing_utils import require_torch, slow, torch_device 23 | 24 | from ...test_pipelines_common import PipelineTesterMixin 25 | 26 | 27 | torch.backends.cuda.matmul.allow_tf32 = False 28 | 29 | 30 | class PNDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase): 31 | @property 32 | def dummy_uncond_unet(self): 33 | torch.manual_seed(0) 34 | model = UNet2DModel( 35 | block_out_channels=(32, 64), 36 | layers_per_block=2, 37 | sample_size=32, 38 | in_channels=3, 39 | out_channels=3, 40 | down_block_types=("DownBlock2D", "AttnDownBlock2D"), 41 | up_block_types=("AttnUpBlock2D", "UpBlock2D"), 42 | ) 43 | return model 44 | 45 | def test_inference(self): 46 | unet = self.dummy_uncond_unet 47 | scheduler = PNDMScheduler() 48 | 49 | pndm = PNDMPipeline(unet=unet, scheduler=scheduler) 50 | pndm.to(torch_device) 51 | pndm.set_progress_bar_config(disable=None) 52 | 53 | generator = torch.manual_seed(0) 54 | image = pndm(generator=generator, num_inference_steps=20, output_type="numpy").images 55 | 56 | generator = torch.manual_seed(0) 57 | image_from_tuple = pndm(generator=generator, num_inference_steps=20, output_type="numpy", return_dict=False)[0] 58 | 59 | image_slice = image[0, -3:, -3:, -1] 60 | image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] 61 | 62 | assert image.shape == (1, 32, 32, 3) 63 | expected_slice = np.array([1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]) 64 | assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 65 | assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 66 | 67 | 68 | @slow 69 | @require_torch 70 | class PNDMPipelineIntegrationTests(unittest.TestCase): 71 | def test_inference_cifar10(self): 72 | model_id = "google/ddpm-cifar10-32" 73 | 74 | unet = UNet2DModel.from_pretrained(model_id, device_map="auto") 75 | scheduler = PNDMScheduler() 76 | 77 | pndm = PNDMPipeline(unet=unet, scheduler=scheduler) 78 | pndm.to(torch_device) 79 | pndm.set_progress_bar_config(disable=None) 80 | generator = torch.manual_seed(0) 81 | image = pndm(generator=generator, output_type="numpy").images 82 | 83 | image_slice = image[0, -3:, -3:, -1] 84 | 85 | assert image.shape == (1, 32, 32, 3) 86 | expected_slice = np.array([0.1564, 0.14645, 0.1406, 0.14715, 0.12425, 0.14045, 0.13115, 0.12175, 0.125]) 87 | assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 88 | -------------------------------------------------------------------------------- /tests/models/test_models_vq.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | 18 | import torch 19 | 20 | from diffusers import VQModel 21 | from diffusers.utils import floats_tensor, torch_device 22 | 23 | from ..test_modeling_common import ModelTesterMixin 24 | 25 | 26 | torch.backends.cuda.matmul.allow_tf32 = False 27 | 28 | 29 | class VQModelTests(ModelTesterMixin, unittest.TestCase): 30 | model_class = VQModel 31 | 32 | @property 33 | def dummy_input(self, sizes=(32, 32)): 34 | batch_size = 4 35 | num_channels = 3 36 | 37 | image = floats_tensor((batch_size, num_channels) + sizes).to(torch_device) 38 | 39 | return {"sample": image} 40 | 41 | @property 42 | def input_shape(self): 43 | return (3, 32, 32) 44 | 45 | @property 46 | def output_shape(self): 47 | return (3, 32, 32) 48 | 49 | def prepare_init_args_and_inputs_for_common(self): 50 | init_dict = { 51 | "block_out_channels": [32, 64], 52 | "in_channels": 3, 53 | "out_channels": 3, 54 | "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"], 55 | "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"], 56 | "latent_channels": 3, 57 | } 58 | inputs_dict = self.dummy_input 59 | return init_dict, inputs_dict 60 | 61 | def test_forward_signature(self): 62 | pass 63 | 64 | def test_training(self): 65 | pass 66 | 67 | def test_from_pretrained_hub(self): 68 | model, loading_info = VQModel.from_pretrained("fusing/vqgan-dummy", output_loading_info=True) 69 | self.assertIsNotNone(model) 70 | self.assertEqual(len(loading_info["missing_keys"]), 0) 71 | 72 | model.to(torch_device) 73 | image = model(**self.dummy_input) 74 | 75 | assert image is not None, "Make sure output is not None" 76 | 77 | def test_output_pretrained(self): 78 | model = VQModel.from_pretrained("fusing/vqgan-dummy") 79 | model.to(torch_device).eval() 80 | 81 | torch.manual_seed(0) 82 | if torch.cuda.is_available(): 83 | torch.cuda.manual_seed_all(0) 84 | 85 | image = torch.randn(1, model.config.in_channels, model.config.sample_size, model.config.sample_size) 86 | image = image.to(torch_device) 87 | with torch.no_grad(): 88 | # Warmup pass when using mps (see #372) 89 | if torch_device == "mps": 90 | _ = model(image) 91 | output = model(image).sample 92 | 93 | output_slice = output[0, -1, -3:, -3:].flatten().cpu() 94 | # fmt: off 95 | expected_output_slice = torch.tensor([-0.0153, -0.4044, -0.1880, -0.5161, -0.2418, -0.4072, -0.1612, -0.0633, -0.0143]) 96 | # fmt: on 97 | self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3)) 98 | -------------------------------------------------------------------------------- /tests/pipelines/karras_ve/test_karras_ve.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | 18 | import numpy as np 19 | import torch 20 | 21 | from diffusers import KarrasVePipeline, KarrasVeScheduler, UNet2DModel 22 | from diffusers.utils.testing_utils import require_torch, slow, torch_device 23 | 24 | from ...test_pipelines_common import PipelineTesterMixin 25 | 26 | 27 | torch.backends.cuda.matmul.allow_tf32 = False 28 | 29 | 30 | class KarrasVePipelineFastTests(PipelineTesterMixin, unittest.TestCase): 31 | @property 32 | def dummy_uncond_unet(self): 33 | torch.manual_seed(0) 34 | model = UNet2DModel( 35 | block_out_channels=(32, 64), 36 | layers_per_block=2, 37 | sample_size=32, 38 | in_channels=3, 39 | out_channels=3, 40 | down_block_types=("DownBlock2D", "AttnDownBlock2D"), 41 | up_block_types=("AttnUpBlock2D", "UpBlock2D"), 42 | ) 43 | return model 44 | 45 | def test_inference(self): 46 | unet = self.dummy_uncond_unet 47 | scheduler = KarrasVeScheduler() 48 | 49 | pipe = KarrasVePipeline(unet=unet, scheduler=scheduler) 50 | pipe.to(torch_device) 51 | pipe.set_progress_bar_config(disable=None) 52 | 53 | generator = torch.manual_seed(0) 54 | image = pipe(num_inference_steps=2, generator=generator, output_type="numpy").images 55 | 56 | generator = torch.manual_seed(0) 57 | image_from_tuple = pipe(num_inference_steps=2, generator=generator, output_type="numpy", return_dict=False)[0] 58 | 59 | image_slice = image[0, -3:, -3:, -1] 60 | image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] 61 | 62 | assert image.shape == (1, 32, 32, 3) 63 | expected_slice = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]) 64 | assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 65 | assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 66 | 67 | 68 | @slow 69 | @require_torch 70 | class KarrasVePipelineIntegrationTests(unittest.TestCase): 71 | def test_inference(self): 72 | model_id = "google/ncsnpp-celebahq-256" 73 | model = UNet2DModel.from_pretrained(model_id, device_map="auto") 74 | scheduler = KarrasVeScheduler() 75 | 76 | pipe = KarrasVePipeline(unet=model, scheduler=scheduler) 77 | pipe.to(torch_device) 78 | pipe.set_progress_bar_config(disable=None) 79 | 80 | generator = torch.manual_seed(0) 81 | image = pipe(num_inference_steps=20, generator=generator, output_type="numpy").images 82 | 83 | image_slice = image[0, -3:, -3:, -1] 84 | assert image.shape == (1, 256, 256, 3) 85 | expected_slice = np.array([0.578, 0.5811, 0.5924, 0.5809, 0.587, 0.5886, 0.5861, 0.5802, 0.586]) 86 | assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 87 | -------------------------------------------------------------------------------- /.github/workflows/push_tests.yml: -------------------------------------------------------------------------------- 1 | name: Run all tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | env: 9 | DIFFUSERS_IS_CI: yes 10 | HF_HOME: /mnt/cache 11 | OMP_NUM_THREADS: 8 12 | MKL_NUM_THREADS: 8 13 | PYTEST_TIMEOUT: 1000 14 | RUN_SLOW: yes 15 | 16 | jobs: 17 | run_tests_single_gpu: 18 | name: Diffusers tests 19 | runs-on: [ self-hosted, docker-gpu, single-gpu ] 20 | container: 21 | image: nvcr.io/nvidia/pytorch:22.07-py3 22 | options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache 23 | 24 | steps: 25 | - name: Checkout diffusers 26 | uses: actions/checkout@v3 27 | with: 28 | fetch-depth: 2 29 | 30 | - name: NVIDIA-SMI 31 | run: | 32 | nvidia-smi 33 | 34 | - name: Install dependencies 35 | run: | 36 | python -m pip install --upgrade pip 37 | python -m pip uninstall -y torch torchvision torchtext 38 | python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu117 39 | python -m pip install -e .[quality,test] 40 | python -m pip install git+https://github.com/huggingface/accelerate 41 | 42 | - name: Environment 43 | run: | 44 | python utils/print_env.py 45 | 46 | - name: Run all (incl. slow) tests on GPU 47 | env: 48 | HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} 49 | run: | 50 | python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=tests_torch_gpu tests/ 51 | 52 | - name: Failure short reports 53 | if: ${{ failure() }} 54 | run: cat reports/tests_torch_gpu_failures_short.txt 55 | 56 | - name: Test suite reports artifacts 57 | if: ${{ always() }} 58 | uses: actions/upload-artifact@v2 59 | with: 60 | name: torch_test_reports 61 | path: reports 62 | 63 | run_examples_single_gpu: 64 | name: Examples tests 65 | runs-on: [ self-hosted, docker-gpu, single-gpu ] 66 | container: 67 | image: nvcr.io/nvidia/pytorch:22.07-py3 68 | options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache 69 | 70 | steps: 71 | - name: Checkout diffusers 72 | uses: actions/checkout@v3 73 | with: 74 | fetch-depth: 2 75 | 76 | - name: NVIDIA-SMI 77 | run: | 78 | nvidia-smi 79 | 80 | - name: Install dependencies 81 | run: | 82 | python -m pip install --upgrade pip 83 | python -m pip uninstall -y torch torchvision torchtext 84 | python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu117 85 | python -m pip install -e .[quality,test,training] 86 | python -m pip install git+https://github.com/huggingface/accelerate 87 | 88 | - name: Environment 89 | run: | 90 | python utils/print_env.py 91 | 92 | - name: Run example tests on GPU 93 | env: 94 | HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} 95 | run: | 96 | python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_gpu examples/ 97 | 98 | - name: Failure short reports 99 | if: ${{ failure() }} 100 | run: cat reports/examples_torch_gpu_failures_short.txt 101 | 102 | - name: Test suite reports artifacts 103 | if: ${{ always() }} 104 | uses: actions/upload-artifact@v2 105 | with: 106 | name: examples_test_reports 107 | path: reports 108 | -------------------------------------------------------------------------------- /tests/pipelines/score_sde_ve/test_score_sde_ve.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | 18 | import numpy as np 19 | import torch 20 | 21 | from diffusers import ScoreSdeVePipeline, ScoreSdeVeScheduler, UNet2DModel 22 | from diffusers.utils.testing_utils import require_torch, slow, torch_device 23 | 24 | from ...test_pipelines_common import PipelineTesterMixin 25 | 26 | 27 | torch.backends.cuda.matmul.allow_tf32 = False 28 | 29 | 30 | class ScoreSdeVeipelineFastTests(PipelineTesterMixin, unittest.TestCase): 31 | @property 32 | def dummy_uncond_unet(self): 33 | torch.manual_seed(0) 34 | model = UNet2DModel( 35 | block_out_channels=(32, 64), 36 | layers_per_block=2, 37 | sample_size=32, 38 | in_channels=3, 39 | out_channels=3, 40 | down_block_types=("DownBlock2D", "AttnDownBlock2D"), 41 | up_block_types=("AttnUpBlock2D", "UpBlock2D"), 42 | ) 43 | return model 44 | 45 | def test_inference(self): 46 | unet = self.dummy_uncond_unet 47 | scheduler = ScoreSdeVeScheduler() 48 | 49 | sde_ve = ScoreSdeVePipeline(unet=unet, scheduler=scheduler) 50 | sde_ve.to(torch_device) 51 | sde_ve.set_progress_bar_config(disable=None) 52 | 53 | generator = torch.manual_seed(0) 54 | image = sde_ve(num_inference_steps=2, output_type="numpy", generator=generator).images 55 | 56 | generator = torch.manual_seed(0) 57 | image_from_tuple = sde_ve(num_inference_steps=2, output_type="numpy", generator=generator, return_dict=False)[ 58 | 0 59 | ] 60 | 61 | image_slice = image[0, -3:, -3:, -1] 62 | image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1] 63 | 64 | assert image.shape == (1, 32, 32, 3) 65 | expected_slice = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]) 66 | assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 67 | assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 68 | 69 | 70 | @slow 71 | @require_torch 72 | class ScoreSdeVePipelineIntegrationTests(unittest.TestCase): 73 | def test_inference(self): 74 | model_id = "google/ncsnpp-church-256" 75 | model = UNet2DModel.from_pretrained(model_id, device_map="auto") 76 | 77 | scheduler = ScoreSdeVeScheduler.from_config(model_id) 78 | 79 | sde_ve = ScoreSdeVePipeline(unet=model, scheduler=scheduler) 80 | sde_ve.to(torch_device) 81 | sde_ve.set_progress_bar_config(disable=None) 82 | 83 | generator = torch.manual_seed(0) 84 | image = sde_ve(num_inference_steps=10, output_type="numpy", generator=generator).images 85 | 86 | image_slice = image[0, -3:, -3:, -1] 87 | 88 | assert image.shape == (1, 256, 256, 3) 89 | 90 | expected_slice = np.array([0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0]) 91 | assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 92 | -------------------------------------------------------------------------------- /docs/source/_toctree.yml: -------------------------------------------------------------------------------- 1 | - sections: 2 | - local: index 3 | title: "🧨 Diffusers" 4 | - local: quicktour 5 | title: "Quicktour" 6 | - local: installation 7 | title: "Installation" 8 | title: "Get started" 9 | - sections: 10 | - sections: 11 | - local: using-diffusers/loading 12 | title: "Loading Pipelines, Models, and Schedulers" 13 | - local: using-diffusers/configuration 14 | title: "Configuring Pipelines, Models, and Schedulers" 15 | - local: using-diffusers/custom_pipeline_overview 16 | title: "Loading and Adding Custom Pipelines" 17 | title: "Loading & Hub" 18 | - sections: 19 | - local: using-diffusers/unconditional_image_generation 20 | title: "Unconditional Image Generation" 21 | - local: using-diffusers/conditional_image_generation 22 | title: "Text-to-Image Generation" 23 | - local: using-diffusers/img2img 24 | title: "Text-Guided Image-to-Image" 25 | - local: using-diffusers/inpaint 26 | title: "Text-Guided Image-Inpainting" 27 | - local: using-diffusers/custom_pipeline_examples 28 | title: "Community Pipelines" 29 | - local: using-diffusers/contribute_pipeline 30 | title: "How to contribute a Pipeline" 31 | title: "Pipelines for Inference" 32 | title: "Using Diffusers" 33 | - sections: 34 | - local: optimization/fp16 35 | title: "Memory and Speed" 36 | - local: optimization/onnx 37 | title: "ONNX" 38 | - local: optimization/open_vino 39 | title: "OpenVINO" 40 | - local: optimization/mps 41 | title: "MPS" 42 | title: "Optimization/Special Hardware" 43 | - sections: 44 | - local: training/overview 45 | title: "Overview" 46 | - local: training/unconditional_training 47 | title: "Unconditional Image Generation" 48 | - local: training/text_inversion 49 | title: "Textual Inversion" 50 | - local: training/dreambooth 51 | title: "Dreambooth" 52 | - local: training/text2image 53 | title: "Text-to-image fine-tuning" 54 | title: "Training" 55 | - sections: 56 | - local: conceptual/stable_diffusion 57 | title: "Stable Diffusion" 58 | - local: conceptual/philosophy 59 | title: "Philosophy" 60 | - local: conceptual/contribution 61 | title: "How to contribute?" 62 | title: "Conceptual Guides" 63 | - sections: 64 | - sections: 65 | - local: api/models 66 | title: "Models" 67 | - local: api/schedulers 68 | title: "Schedulers" 69 | - local: api/diffusion_pipeline 70 | title: "Diffusion Pipeline" 71 | - local: api/logging 72 | title: "Logging" 73 | - local: api/configuration 74 | title: "Configuration" 75 | - local: api/outputs 76 | title: "Outputs" 77 | title: "Main Classes" 78 | - sections: 79 | - local: api/pipelines/overview 80 | title: "Overview" 81 | - local: api/pipelines/ddim 82 | title: "DDIM" 83 | - local: api/pipelines/ddpm 84 | title: "DDPM" 85 | - local: api/pipelines/latent_diffusion 86 | title: "Latent Diffusion" 87 | - local: api/pipelines/latent_diffusion_uncond 88 | title: "Unconditional Latent Diffusion" 89 | - local: api/pipelines/pndm 90 | title: "PNDM" 91 | - local: api/pipelines/score_sde_ve 92 | title: "Score SDE VE" 93 | - local: api/pipelines/stable_diffusion 94 | title: "Stable Diffusion" 95 | - local: api/pipelines/stochastic_karras_ve 96 | title: "Stochastic Karras VE" 97 | - local: api/pipelines/dance_diffusion 98 | title: "Dance Diffusion" 99 | title: "Pipelines" 100 | title: "API" 101 | -------------------------------------------------------------------------------- /docs/source/installation.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Installation 14 | 15 | Install Diffusers for with PyTorch. Support for other libraries will come in the future 16 | 17 | 🤗 Diffusers is tested on Python 3.7+, and PyTorch 1.7.0+. 18 | 19 | ## Install with pip 20 | 21 | You should install 🤗 Diffusers in a [virtual environment](https://docs.python.org/3/library/venv.html). 22 | If you're unfamiliar with Python virtual environments, take a look at this [guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/). 23 | A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies. 24 | 25 | Start by creating a virtual environment in your project directory: 26 | 27 | ```bash 28 | python -m venv .env 29 | ``` 30 | 31 | Activate the virtual environment: 32 | 33 | ```bash 34 | source .env/bin/activate 35 | ``` 36 | 37 | Now you're ready to install 🤗 Diffusers with the following command: 38 | 39 | ```bash 40 | pip install diffusers 41 | ``` 42 | 43 | ## Install from source 44 | 45 | Install 🤗 Diffusers from source with the following command: 46 | 47 | ```bash 48 | pip install git+https://github.com/huggingface/diffusers 49 | ``` 50 | 51 | This command installs the bleeding edge `main` version rather than the latest `stable` version. 52 | The `main` version is useful for staying up-to-date with the latest developments. 53 | For instance, if a bug has been fixed since the last official release but a new release hasn't been rolled out yet. 54 | However, this means the `main` version may not always be stable. 55 | We strive to keep the `main` version operational, and most issues are usually resolved within a few hours or a day. 56 | If you run into a problem, please open an [Issue](https://github.com/huggingface/transformers/issues), so we can fix it even sooner! 57 | 58 | ## Editable install 59 | 60 | You will need an editable install if you'd like to: 61 | 62 | * Use the `main` version of the source code. 63 | * Contribute to 🤗 Diffusers and need to test changes in the code. 64 | 65 | Clone the repository and install 🤗 Diffusers with the following commands: 66 | 67 | ```bash 68 | git clone https://github.com/huggingface/diffusers.git 69 | cd diffusers 70 | pip install -e . 71 | ``` 72 | 73 | These commands will link the folder you cloned the repository to and your Python library paths. 74 | Python will now look inside the folder you cloned to in addition to the normal library paths. 75 | For example, if your Python packages are typically installed in `~/anaconda3/envs/main/lib/python3.7/site-packages/`, Python will also search the folder you cloned to: `~/diffusers/`. 76 | 77 | 78 | 79 | You must keep the `diffusers` folder if you want to keep using the library. 80 | 81 | 82 | 83 | Now you can easily update your clone to the latest version of 🤗 Diffusers with the following command: 84 | 85 | ```bash 86 | cd ~/diffusers/ 87 | git pull 88 | ``` 89 | 90 | Your Python environment will find the `main` version of 🤗 Diffusers on the next run. 91 | -------------------------------------------------------------------------------- /.github/workflows/pr_tests.yml: -------------------------------------------------------------------------------- 1 | name: Run fast tests 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 10 | cancel-in-progress: true 11 | 12 | env: 13 | DIFFUSERS_IS_CI: yes 14 | OMP_NUM_THREADS: 8 15 | MKL_NUM_THREADS: 8 16 | PYTEST_TIMEOUT: 60 17 | MPS_TORCH_VERSION: 1.13.0 18 | 19 | jobs: 20 | run_tests_cpu: 21 | name: CPU tests on Ubuntu 22 | runs-on: [ self-hosted, docker-gpu ] 23 | container: 24 | image: python:3.7 25 | options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ 26 | 27 | steps: 28 | - name: Checkout diffusers 29 | uses: actions/checkout@v3 30 | with: 31 | fetch-depth: 2 32 | 33 | - name: Install dependencies 34 | run: | 35 | python -m pip install --upgrade pip 36 | python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cpu 37 | python -m pip install -e .[quality,test] 38 | python -m pip install git+https://github.com/huggingface/accelerate 39 | 40 | - name: Environment 41 | run: | 42 | python utils/print_env.py 43 | 44 | - name: Run all fast tests on CPU 45 | env: 46 | HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} 47 | run: | 48 | python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=tests_torch_cpu tests/ 49 | 50 | - name: Failure short reports 51 | if: ${{ failure() }} 52 | run: cat reports/tests_torch_cpu_failures_short.txt 53 | 54 | - name: Test suite reports artifacts 55 | if: ${{ always() }} 56 | uses: actions/upload-artifact@v2 57 | with: 58 | name: pr_torch_cpu_test_reports 59 | path: reports 60 | 61 | run_tests_apple_m1: 62 | name: MPS tests on Apple M1 63 | runs-on: [ self-hosted, apple-m1 ] 64 | 65 | steps: 66 | - name: Checkout diffusers 67 | uses: actions/checkout@v3 68 | with: 69 | fetch-depth: 2 70 | 71 | - name: Clean checkout 72 | shell: arch -arch arm64 bash {0} 73 | run: | 74 | git clean -fxd 75 | 76 | - name: Setup miniconda 77 | uses: ./.github/actions/setup-miniconda 78 | with: 79 | python-version: 3.9 80 | 81 | - name: Install dependencies 82 | shell: arch -arch arm64 bash {0} 83 | run: | 84 | ${CONDA_RUN} python -m pip install --upgrade pip 85 | ${CONDA_RUN} python -m pip install -e .[quality,test] 86 | ${CONDA_RUN} python -m pip install --pre torch==${MPS_TORCH_VERSION} --extra-index-url https://download.pytorch.org/whl/test/cpu 87 | ${CONDA_RUN} python -m pip install git+https://github.com/huggingface/accelerate 88 | 89 | - name: Environment 90 | shell: arch -arch arm64 bash {0} 91 | run: | 92 | ${CONDA_RUN} python utils/print_env.py 93 | 94 | - name: Run all fast tests on MPS 95 | shell: arch -arch arm64 bash {0} 96 | env: 97 | HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} 98 | run: | 99 | ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps tests/ 100 | 101 | - name: Failure short reports 102 | if: ${{ failure() }} 103 | run: cat reports/tests_torch_mps_failures_short.txt 104 | 105 | - name: Test suite reports artifacts 106 | if: ${{ always() }} 107 | uses: actions/upload-artifact@v2 108 | with: 109 | name: pr_torch_mps_test_reports 110 | path: reports 111 | -------------------------------------------------------------------------------- /src/diffusers/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import ( 2 | is_flax_available, 3 | is_inflect_available, 4 | is_onnx_available, 5 | is_scipy_available, 6 | is_torch_available, 7 | is_transformers_available, 8 | is_unidecode_available, 9 | ) 10 | 11 | 12 | __version__ = "0.7.0.dev0" 13 | 14 | from .configuration_utils import ConfigMixin 15 | from .onnx_utils import OnnxRuntimeModel 16 | from .utils import logging 17 | 18 | 19 | if is_torch_available(): 20 | from .modeling_utils import ModelMixin 21 | from .models import AutoencoderKL, UNet1DModel, UNet2DConditionModel, UNet2DModel, VQModel 22 | from .optimization import ( 23 | get_constant_schedule, 24 | get_constant_schedule_with_warmup, 25 | get_cosine_schedule_with_warmup, 26 | get_cosine_with_hard_restarts_schedule_with_warmup, 27 | get_linear_schedule_with_warmup, 28 | get_polynomial_decay_schedule_with_warmup, 29 | get_scheduler, 30 | ) 31 | from .pipeline_utils import DiffusionPipeline 32 | from .pipelines import ( 33 | DanceDiffusionPipeline, 34 | DDIMPipeline, 35 | DDPMPipeline, 36 | KarrasVePipeline, 37 | LDMPipeline, 38 | PNDMPipeline, 39 | ScoreSdeVePipeline, 40 | ) 41 | from .schedulers import ( 42 | DDIMScheduler, 43 | DDPMScheduler, 44 | EulerAncestralDiscreteScheduler, 45 | EulerDiscreteScheduler, 46 | IPNDMScheduler, 47 | KarrasVeScheduler, 48 | PNDMScheduler, 49 | SchedulerMixin, 50 | ScoreSdeVeScheduler, 51 | ) 52 | from .training_utils import EMAModel 53 | else: 54 | from .utils.dummy_pt_objects import * # noqa F403 55 | 56 | if is_torch_available() and is_scipy_available(): 57 | from .schedulers import LMSDiscreteScheduler 58 | else: 59 | from .utils.dummy_torch_and_scipy_objects import * # noqa F403 60 | 61 | if is_torch_available() and is_transformers_available(): 62 | from .pipelines import ( 63 | LDMTextToImagePipeline, 64 | StableDiffusionImg2ImgPipeline, 65 | StableDiffusionInpaintPipeline, 66 | StableDiffusionInpaintPipelineLegacy, 67 | StableDiffusionPipeline, 68 | ) 69 | else: 70 | from .utils.dummy_torch_and_transformers_objects import * # noqa F403 71 | 72 | if is_torch_available() and is_transformers_available() and is_onnx_available(): 73 | from .pipelines import ( 74 | OnnxStableDiffusionImg2ImgPipeline, 75 | OnnxStableDiffusionInpaintPipeline, 76 | OnnxStableDiffusionPipeline, 77 | StableDiffusionOnnxPipeline, 78 | ) 79 | else: 80 | from .utils.dummy_torch_and_transformers_and_onnx_objects import * # noqa F403 81 | 82 | if is_flax_available(): 83 | from .modeling_flax_utils import FlaxModelMixin 84 | from .models.unet_2d_condition_flax import FlaxUNet2DConditionModel 85 | from .models.vae_flax import FlaxAutoencoderKL 86 | from .pipeline_flax_utils import FlaxDiffusionPipeline 87 | from .schedulers import ( 88 | FlaxDDIMScheduler, 89 | FlaxDDPMScheduler, 90 | FlaxKarrasVeScheduler, 91 | FlaxLMSDiscreteScheduler, 92 | FlaxPNDMScheduler, 93 | FlaxSchedulerMixin, 94 | FlaxScoreSdeVeScheduler, 95 | ) 96 | else: 97 | from .utils.dummy_flax_objects import * # noqa F403 98 | 99 | if is_flax_available() and is_transformers_available(): 100 | from .pipelines import FlaxStableDiffusionPipeline 101 | else: 102 | from .utils.dummy_flax_and_transformers_objects import * # noqa F403 103 | -------------------------------------------------------------------------------- /src/diffusers/schedulers/scheduling_sde_vp.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch 16 | 17 | import math 18 | from typing import Union 19 | 20 | import torch 21 | 22 | from ..configuration_utils import ConfigMixin, register_to_config 23 | from .scheduling_utils import SchedulerMixin 24 | 25 | 26 | class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin): 27 | """ 28 | The variance preserving stochastic differential equation (SDE) scheduler. 29 | 30 | [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` 31 | function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. 32 | [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and 33 | [`~ConfigMixin.from_config`] functions. 34 | 35 | For more information, see the original paper: https://arxiv.org/abs/2011.13456 36 | 37 | UNDER CONSTRUCTION 38 | 39 | """ 40 | 41 | @register_to_config 42 | def __init__(self, num_train_timesteps=2000, beta_min=0.1, beta_max=20, sampling_eps=1e-3): 43 | self.sigmas = None 44 | self.discrete_sigmas = None 45 | self.timesteps = None 46 | 47 | def set_timesteps(self, num_inference_steps, device: Union[str, torch.device] = None): 48 | self.timesteps = torch.linspace(1, self.config.sampling_eps, num_inference_steps, device=device) 49 | 50 | def step_pred(self, score, x, t, generator=None): 51 | if self.timesteps is None: 52 | raise ValueError( 53 | "`self.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler" 54 | ) 55 | 56 | # TODO(Patrick) better comments + non-PyTorch 57 | # postprocess model score 58 | log_mean_coeff = ( 59 | -0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min 60 | ) 61 | std = torch.sqrt(1.0 - torch.exp(2.0 * log_mean_coeff)) 62 | std = std.flatten() 63 | while len(std.shape) < len(score.shape): 64 | std = std.unsqueeze(-1) 65 | score = -score / std 66 | 67 | # compute 68 | dt = -1.0 / len(self.timesteps) 69 | 70 | beta_t = self.config.beta_min + t * (self.config.beta_max - self.config.beta_min) 71 | beta_t = beta_t.flatten() 72 | while len(beta_t.shape) < len(x.shape): 73 | beta_t = beta_t.unsqueeze(-1) 74 | drift = -0.5 * beta_t * x 75 | 76 | diffusion = torch.sqrt(beta_t) 77 | drift = drift - diffusion**2 * score 78 | x_mean = x + drift * dt 79 | 80 | # add noise 81 | noise = torch.randn(x.shape, layout=x.layout, generator=generator).to(x.device) 82 | x = x_mean + diffusion * math.sqrt(-dt) * noise 83 | 84 | return x, x_mean 85 | 86 | def __len__(self): 87 | return self.config.num_train_timesteps 88 | -------------------------------------------------------------------------------- /tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | 18 | import numpy as np 19 | 20 | from diffusers import OnnxStableDiffusionPipeline 21 | from diffusers.utils.testing_utils import require_onnxruntime, slow 22 | 23 | from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin 24 | 25 | 26 | class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase): 27 | # FIXME: add fast tests 28 | pass 29 | 30 | 31 | @slow 32 | @require_onnxruntime 33 | class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase): 34 | def test_inference(self): 35 | sd_pipe = OnnxStableDiffusionPipeline.from_pretrained( 36 | "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CPUExecutionProvider" 37 | ) 38 | 39 | prompt = "A painting of a squirrel eating a burger" 40 | np.random.seed(0) 41 | output = sd_pipe([prompt], guidance_scale=6.0, num_inference_steps=5, output_type="np") 42 | image = output.images 43 | 44 | image_slice = image[0, -3:, -3:, -1] 45 | 46 | assert image.shape == (1, 512, 512, 3) 47 | expected_slice = np.array([0.3602, 0.3688, 0.3652, 0.3895, 0.3782, 0.3747, 0.3927, 0.4241, 0.4327]) 48 | assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3 49 | 50 | def test_intermediate_state(self): 51 | number_of_steps = 0 52 | 53 | def test_callback_fn(step: int, timestep: int, latents: np.ndarray) -> None: 54 | test_callback_fn.has_been_called = True 55 | nonlocal number_of_steps 56 | number_of_steps += 1 57 | if step == 0: 58 | assert latents.shape == (1, 4, 64, 64) 59 | latents_slice = latents[0, -3:, -3:, -1] 60 | expected_slice = np.array( 61 | [-0.5950, -0.3039, -1.1672, 0.1594, -1.1572, 0.6719, -1.9712, -0.0403, 0.9592] 62 | ) 63 | assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3 64 | elif step == 5: 65 | assert latents.shape == (1, 4, 64, 64) 66 | latents_slice = latents[0, -3:, -3:, -1] 67 | expected_slice = np.array( 68 | [-0.4776, -0.0119, -0.8519, -0.0275, -0.9764, 0.9820, -0.3843, 0.3788, 1.2264] 69 | ) 70 | assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3 71 | 72 | test_callback_fn.has_been_called = False 73 | 74 | pipe = OnnxStableDiffusionPipeline.from_pretrained( 75 | "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CPUExecutionProvider" 76 | ) 77 | pipe.set_progress_bar_config(disable=None) 78 | 79 | prompt = "Andromeda galaxy in a bottle" 80 | 81 | np.random.seed(0) 82 | pipe(prompt=prompt, num_inference_steps=5, guidance_scale=7.5, callback=test_callback_fn, callback_steps=1) 83 | assert test_callback_fn.has_been_called 84 | assert number_of_steps == 6 85 | -------------------------------------------------------------------------------- /docs/source/optimization/mps.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # How to use Stable Diffusion in Apple Silicon (M1/M2) 14 | 15 | 🤗 Diffusers is compatible with Apple silicon for Stable Diffusion inference, using the PyTorch `mps` device. These are the steps you need to follow to use your M1 or M2 computer with Stable Diffusion. 16 | 17 | ## Requirements 18 | 19 | - Mac computer with Apple silicon (M1/M2) hardware. 20 | - macOS 12.6 or later (13.0 or later recommended). 21 | - arm64 version of Python. 22 | - PyTorch 1.13.0 RC (Release Candidate). You can install it with `pip` using: 23 | 24 | ``` 25 | pip3 install --pre torch --extra-index-url https://download.pytorch.org/whl/test/cpu 26 | ``` 27 | 28 | ## Inference Pipeline 29 | 30 | The snippet below demonstrates how to use the `mps` backend using the familiar `to()` interface to move the Stable Diffusion pipeline to your M1 or M2 device. 31 | 32 | We recommend to "prime" the pipeline using an additional one-time pass through it. This is a temporary workaround for a weird issue we have detected: the first inference pass produces slightly different results than subsequent ones. You only need to do this pass once, and it's ok to use just one inference step and discard the result. 33 | 34 | ```python 35 | # make sure you're logged in with `huggingface-cli login` 36 | from diffusers import StableDiffusionPipeline 37 | 38 | pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5") 39 | pipe = pipe.to("mps") 40 | 41 | # Recommended if your computer has < 64 GB of RAM 42 | pipe.enable_attention_slicing() 43 | 44 | prompt = "a photo of an astronaut riding a horse on mars" 45 | 46 | # First-time "warmup" pass (see explanation above) 47 | _ = pipe(prompt, num_inference_steps=1) 48 | 49 | # Results match those from the CPU device after the warmup pass. 50 | image = pipe(prompt).images[0] 51 | ``` 52 | 53 | ## Performance Recommendations 54 | 55 | M1/M2 performance is very sensitive to memory pressure. The system will automatically swap if it needs to, but performance will degrade significantly when it does. 56 | 57 | We recommend you use _attention slicing_ to reduce memory pressure during inference and prevent swapping, particularly if your computer has lass than 64 GB of system RAM, or if you generate images at non-standard resolutions larger than 512 × 512 pixels. Attention slicing performs the costly attention operation in multiple steps instead of all at once. It usually has a performance impact of ~20% in computers without universal memory, but we have observed _better performance_ in most Apple Silicon computers, unless you have 64 GB or more. 58 | 59 | ```python 60 | pipeline.enable_attention_slicing() 61 | ``` 62 | 63 | ## Known Issues 64 | 65 | - As mentioned above, we are investigating a strange [first-time inference issue](https://github.com/huggingface/diffusers/issues/372). 66 | - Generating multiple prompts in a batch [crashes or doesn't work reliably](https://github.com/huggingface/diffusers/issues/363). We believe this is related to the [`mps` backend in PyTorch](https://github.com/pytorch/pytorch/issues/84039). For now, we recommend to iterate instead of batching. 67 | -------------------------------------------------------------------------------- /tests/test_training.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | 18 | import torch 19 | 20 | from diffusers import DDIMScheduler, DDPMScheduler, UNet2DModel 21 | from diffusers.training_utils import set_seed 22 | from diffusers.utils.testing_utils import slow 23 | 24 | 25 | torch.backends.cuda.matmul.allow_tf32 = False 26 | 27 | 28 | class TrainingTests(unittest.TestCase): 29 | def get_model_optimizer(self, resolution=32): 30 | set_seed(0) 31 | model = UNet2DModel(sample_size=resolution, in_channels=3, out_channels=3) 32 | optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) 33 | return model, optimizer 34 | 35 | @slow 36 | def test_training_step_equality(self): 37 | device = "cpu" # ensure full determinism without setting the CUBLAS_WORKSPACE_CONFIG env variable 38 | ddpm_scheduler = DDPMScheduler( 39 | num_train_timesteps=1000, 40 | beta_start=0.0001, 41 | beta_end=0.02, 42 | beta_schedule="linear", 43 | clip_sample=True, 44 | ) 45 | ddim_scheduler = DDIMScheduler( 46 | num_train_timesteps=1000, 47 | beta_start=0.0001, 48 | beta_end=0.02, 49 | beta_schedule="linear", 50 | clip_sample=True, 51 | ) 52 | 53 | assert ddpm_scheduler.config.num_train_timesteps == ddim_scheduler.config.num_train_timesteps 54 | 55 | # shared batches for DDPM and DDIM 56 | set_seed(0) 57 | clean_images = [torch.randn((4, 3, 32, 32)).clip(-1, 1).to(device) for _ in range(4)] 58 | noise = [torch.randn((4, 3, 32, 32)).to(device) for _ in range(4)] 59 | timesteps = [torch.randint(0, 1000, (4,)).long().to(device) for _ in range(4)] 60 | 61 | # train with a DDPM scheduler 62 | model, optimizer = self.get_model_optimizer(resolution=32) 63 | model.train().to(device) 64 | for i in range(4): 65 | optimizer.zero_grad() 66 | ddpm_noisy_images = ddpm_scheduler.add_noise(clean_images[i], noise[i], timesteps[i]) 67 | ddpm_noise_pred = model(ddpm_noisy_images, timesteps[i]).sample 68 | loss = torch.nn.functional.mse_loss(ddpm_noise_pred, noise[i]) 69 | loss.backward() 70 | optimizer.step() 71 | del model, optimizer 72 | 73 | # recreate the model and optimizer, and retry with DDIM 74 | model, optimizer = self.get_model_optimizer(resolution=32) 75 | model.train().to(device) 76 | for i in range(4): 77 | optimizer.zero_grad() 78 | ddim_noisy_images = ddim_scheduler.add_noise(clean_images[i], noise[i], timesteps[i]) 79 | ddim_noise_pred = model(ddim_noisy_images, timesteps[i]).sample 80 | loss = torch.nn.functional.mse_loss(ddim_noise_pred, noise[i]) 81 | loss.backward() 82 | optimizer.step() 83 | del model, optimizer 84 | 85 | self.assertTrue(torch.allclose(ddpm_noisy_images, ddim_noisy_images, atol=1e-5)) 86 | self.assertTrue(torch.allclose(ddpm_noise_pred, ddim_noise_pred, atol=1e-5)) 87 | -------------------------------------------------------------------------------- /docs/source/api/logging.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Logging 14 | 15 | 🧨 Diffusers has a centralized logging system, so that you can setup the verbosity of the library easily. 16 | 17 | Currently the default verbosity of the library is `WARNING`. 18 | 19 | To change the level of verbosity, just use one of the direct setters. For instance, here is how to change the verbosity 20 | to the INFO level. 21 | 22 | ```python 23 | import diffusers 24 | 25 | diffusers.logging.set_verbosity_info() 26 | ``` 27 | 28 | You can also use the environment variable `DIFFUSERS_VERBOSITY` to override the default verbosity. You can set it 29 | to one of the following: `debug`, `info`, `warning`, `error`, `critical`. For example: 30 | 31 | ```bash 32 | DIFFUSERS_VERBOSITY=error ./myprogram.py 33 | ``` 34 | 35 | Additionally, some `warnings` can be disabled by setting the environment variable 36 | `DIFFUSERS_NO_ADVISORY_WARNINGS` to a true value, like *1*. This will disable any warning that is logged using 37 | [`logger.warning_advice`]. For example: 38 | 39 | ```bash 40 | DIFFUSERS_NO_ADVISORY_WARNINGS=1 ./myprogram.py 41 | ``` 42 | 43 | Here is an example of how to use the same logger as the library in your own module or script: 44 | 45 | ```python 46 | from diffusers.utils import logging 47 | 48 | logging.set_verbosity_info() 49 | logger = logging.get_logger("diffusers") 50 | logger.info("INFO") 51 | logger.warning("WARN") 52 | ``` 53 | 54 | 55 | All the methods of this logging module are documented below, the main ones are 56 | [`logging.get_verbosity`] to get the current level of verbosity in the logger and 57 | [`logging.set_verbosity`] to set the verbosity to the level of your choice. In order (from the least 58 | verbose to the most verbose), those levels (with their corresponding int values in parenthesis) are: 59 | 60 | - `diffusers.logging.CRITICAL` or `diffusers.logging.FATAL` (int value, 50): only report the most 61 | critical errors. 62 | - `diffusers.logging.ERROR` (int value, 40): only report errors. 63 | - `diffusers.logging.WARNING` or `diffusers.logging.WARN` (int value, 30): only reports error and 64 | warnings. This the default level used by the library. 65 | - `diffusers.logging.INFO` (int value, 20): reports error, warnings and basic information. 66 | - `diffusers.logging.DEBUG` (int value, 10): report all information. 67 | 68 | By default, `tqdm` progress bars will be displayed during model download. [`logging.disable_progress_bar`] and [`logging.enable_progress_bar`] can be used to suppress or unsuppress this behavior. 69 | 70 | ## Base setters 71 | 72 | [[autodoc]] logging.set_verbosity_error 73 | 74 | [[autodoc]] logging.set_verbosity_warning 75 | 76 | [[autodoc]] logging.set_verbosity_info 77 | 78 | [[autodoc]] logging.set_verbosity_debug 79 | 80 | ## Other functions 81 | 82 | [[autodoc]] logging.get_verbosity 83 | 84 | [[autodoc]] logging.set_verbosity 85 | 86 | [[autodoc]] logging.get_logger 87 | 88 | [[autodoc]] logging.enable_default_handler 89 | 90 | [[autodoc]] logging.disable_default_handler 91 | 92 | [[autodoc]] logging.enable_explicit_format 93 | 94 | [[autodoc]] logging.reset_format 95 | 96 | [[autodoc]] logging.enable_progress_bar 97 | 98 | [[autodoc]] logging.disable_progress_bar 99 | -------------------------------------------------------------------------------- /docs/source/using-diffusers/inpaint.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Text-Guided Image-Inpainting 14 | 15 | The [`StableDiffusionInpaintPipeline`] lets you edit specific parts of an image by providing a mask and a text prompt. It uses a version of Stable Diffusion specifically trained for in-painting tasks. 16 | 17 | 18 | Note that this model is distributed separately from the regular Stable Diffusion model, so you have to accept its license even if you accepted the Stable Diffusion one in the past. 19 | 20 | Please, visit the [model card](https://huggingface.co/runwayml/stable-diffusion-inpainting), read the license carefully and tick the checkbox if you agree. You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work. For more information on access tokens, please refer to [this section](https://huggingface.co/docs/hub/security-tokens) of the documentation. 21 | 22 | 23 | ```python 24 | import PIL 25 | import requests 26 | import torch 27 | from io import BytesIO 28 | 29 | from diffusers import StableDiffusionInpaintPipeline 30 | 31 | 32 | def download_image(url): 33 | response = requests.get(url) 34 | return PIL.Image.open(BytesIO(response.content)).convert("RGB") 35 | 36 | 37 | img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" 38 | mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" 39 | 40 | init_image = download_image(img_url).resize((512, 512)) 41 | mask_image = download_image(mask_url).resize((512, 512)) 42 | 43 | pipe = StableDiffusionInpaintPipeline.from_pretrained( 44 | "runwayml/stable-diffusion-inpainting", 45 | revision="fp16", 46 | torch_dtype=torch.float16, 47 | ) 48 | pipe = pipe.to("cuda") 49 | 50 | prompt = "Face of a yellow cat, high resolution, sitting on a park bench" 51 | image = pipe(prompt=prompt, image=init_image, mask_image=mask_image).images[0] 52 | ``` 53 | 54 | `image` | `mask_image` | `prompt` | **Output** | 55 | :-------------------------:|:-------------------------:|:-------------------------:|-------------------------:| 56 | drawing | drawing | ***Face of a yellow cat, high resolution, sitting on a park bench*** | drawing | 57 | 58 | 59 | You can also run this example on colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb) 60 | 61 | 62 | A previous experimental implementation of in-painting used a different, lower-quality process. To ensure backwards compatibility, loading a pretrained pipeline that doesn't contain the new model will still apply the old in-painting method. 63 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from typing import Optional, Tuple, Union 3 | 4 | import torch 5 | 6 | from ...models import UNet2DModel 7 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput 8 | from ...schedulers import ScoreSdeVeScheduler 9 | 10 | 11 | class ScoreSdeVePipeline(DiffusionPipeline): 12 | r""" 13 | Parameters: 14 | This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the 15 | library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) 16 | unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image. scheduler ([`SchedulerMixin`]): 17 | The [`ScoreSdeVeScheduler`] scheduler to be used in combination with `unet` to denoise the encoded image. 18 | """ 19 | unet: UNet2DModel 20 | scheduler: ScoreSdeVeScheduler 21 | 22 | def __init__(self, unet: UNet2DModel, scheduler: DiffusionPipeline): 23 | super().__init__() 24 | self.register_modules(unet=unet, scheduler=scheduler) 25 | 26 | @torch.no_grad() 27 | def __call__( 28 | self, 29 | batch_size: int = 1, 30 | num_inference_steps: int = 2000, 31 | generator: Optional[torch.Generator] = None, 32 | output_type: Optional[str] = "pil", 33 | return_dict: bool = True, 34 | **kwargs, 35 | ) -> Union[ImagePipelineOutput, Tuple]: 36 | r""" 37 | Args: 38 | batch_size (`int`, *optional*, defaults to 1): 39 | The number of images to generate. 40 | generator (`torch.Generator`, *optional*): 41 | A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation 42 | deterministic. 43 | output_type (`str`, *optional*, defaults to `"pil"`): 44 | The output format of the generate image. Choose between 45 | [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. 46 | return_dict (`bool`, *optional*, defaults to `True`): 47 | Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. 48 | 49 | Returns: 50 | [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if 51 | `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the 52 | generated images. 53 | """ 54 | 55 | img_size = self.unet.config.sample_size 56 | shape = (batch_size, 3, img_size, img_size) 57 | 58 | model = self.unet 59 | 60 | sample = torch.randn(*shape, generator=generator) * self.scheduler.init_noise_sigma 61 | sample = sample.to(self.device) 62 | 63 | self.scheduler.set_timesteps(num_inference_steps) 64 | self.scheduler.set_sigmas(num_inference_steps) 65 | 66 | for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)): 67 | sigma_t = self.scheduler.sigmas[i] * torch.ones(shape[0], device=self.device) 68 | 69 | # correction step 70 | for _ in range(self.scheduler.config.correct_steps): 71 | model_output = self.unet(sample, sigma_t).sample 72 | sample = self.scheduler.step_correct(model_output, sample, generator=generator).prev_sample 73 | 74 | # prediction step 75 | model_output = model(sample, sigma_t).sample 76 | output = self.scheduler.step_pred(model_output, t, sample, generator=generator) 77 | 78 | sample, sample_mean = output.prev_sample, output.prev_sample_mean 79 | 80 | sample = sample_mean.clamp(0, 1) 81 | sample = sample.cpu().permute(0, 2, 3, 1).numpy() 82 | if output_type == "pil": 83 | sample = self.numpy_to_pil(sample) 84 | 85 | if not return_dict: 86 | return (sample,) 87 | 88 | return ImagePipelineOutput(images=sample) 89 | -------------------------------------------------------------------------------- /src/diffusers/utils/outputs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Generic utilities 16 | """ 17 | 18 | from collections import OrderedDict 19 | from dataclasses import fields 20 | from typing import Any, Tuple 21 | 22 | import numpy as np 23 | 24 | from .import_utils import is_torch_available 25 | 26 | 27 | def is_tensor(x): 28 | """ 29 | Tests if `x` is a `torch.Tensor` or `np.ndarray`. 30 | """ 31 | if is_torch_available(): 32 | import torch 33 | 34 | if isinstance(x, torch.Tensor): 35 | return True 36 | 37 | return isinstance(x, np.ndarray) 38 | 39 | 40 | class BaseOutput(OrderedDict): 41 | """ 42 | Base class for all model outputs as dataclass. Has a `__getitem__` that allows indexing by integer or slice (like a 43 | tuple) or strings (like a dictionary) that will ignore the `None` attributes. Otherwise behaves like a regular 44 | python dictionary. 45 | 46 | 47 | 48 | You can't unpack a `BaseOutput` directly. Use the [`~utils.BaseOutput.to_tuple`] method to convert it to a tuple 49 | before. 50 | 51 | 52 | """ 53 | 54 | def __post_init__(self): 55 | class_fields = fields(self) 56 | 57 | # Safety and consistency checks 58 | if not len(class_fields): 59 | raise ValueError(f"{self.__class__.__name__} has no fields.") 60 | 61 | first_field = getattr(self, class_fields[0].name) 62 | other_fields_are_none = all(getattr(self, field.name) is None for field in class_fields[1:]) 63 | 64 | if other_fields_are_none and isinstance(first_field, dict): 65 | for key, value in first_field.items(): 66 | self[key] = value 67 | else: 68 | for field in class_fields: 69 | v = getattr(self, field.name) 70 | if v is not None: 71 | self[field.name] = v 72 | 73 | def __delitem__(self, *args, **kwargs): 74 | raise Exception(f"You cannot use ``__delitem__`` on a {self.__class__.__name__} instance.") 75 | 76 | def setdefault(self, *args, **kwargs): 77 | raise Exception(f"You cannot use ``setdefault`` on a {self.__class__.__name__} instance.") 78 | 79 | def pop(self, *args, **kwargs): 80 | raise Exception(f"You cannot use ``pop`` on a {self.__class__.__name__} instance.") 81 | 82 | def update(self, *args, **kwargs): 83 | raise Exception(f"You cannot use ``update`` on a {self.__class__.__name__} instance.") 84 | 85 | def __getitem__(self, k): 86 | if isinstance(k, str): 87 | inner_dict = {k: v for (k, v) in self.items()} 88 | return inner_dict[k] 89 | else: 90 | return self.to_tuple()[k] 91 | 92 | def __setattr__(self, name, value): 93 | if name in self.keys() and value is not None: 94 | # Don't call self.__setitem__ to avoid recursion errors 95 | super().__setitem__(name, value) 96 | super().__setattr__(name, value) 97 | 98 | def __setitem__(self, key, value): 99 | # Will raise a KeyException if needed 100 | super().__setitem__(key, value) 101 | # Don't call self.__setattr__ to avoid recursion errors 102 | super().__setattr__(key, value) 103 | 104 | def to_tuple(self) -> Tuple[Any]: 105 | """ 106 | Convert self to a tuple containing all the attributes/keys that are not `None`. 107 | """ 108 | return tuple(self[k] for k in self.keys()) 109 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/stable_diffusion/safety_checker_flax.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple 2 | 3 | import jax 4 | import jax.numpy as jnp 5 | from flax import linen as nn 6 | from flax.core.frozen_dict import FrozenDict 7 | from transformers import CLIPConfig, FlaxPreTrainedModel 8 | from transformers.models.clip.modeling_flax_clip import FlaxCLIPVisionModule 9 | 10 | 11 | def jax_cosine_distance(emb_1, emb_2, eps=1e-12): 12 | norm_emb_1 = jnp.divide(emb_1.T, jnp.clip(jnp.linalg.norm(emb_1, axis=1), a_min=eps)).T 13 | norm_emb_2 = jnp.divide(emb_2.T, jnp.clip(jnp.linalg.norm(emb_2, axis=1), a_min=eps)).T 14 | return jnp.matmul(norm_emb_1, norm_emb_2.T) 15 | 16 | 17 | class FlaxStableDiffusionSafetyCheckerModule(nn.Module): 18 | config: CLIPConfig 19 | dtype: jnp.dtype = jnp.float32 20 | 21 | def setup(self): 22 | self.vision_model = FlaxCLIPVisionModule(self.config.vision_config) 23 | self.visual_projection = nn.Dense(self.config.projection_dim, use_bias=False, dtype=self.dtype) 24 | 25 | self.concept_embeds = self.param("concept_embeds", jax.nn.initializers.ones, (17, self.config.projection_dim)) 26 | self.special_care_embeds = self.param( 27 | "special_care_embeds", jax.nn.initializers.ones, (3, self.config.projection_dim) 28 | ) 29 | 30 | self.concept_embeds_weights = self.param("concept_embeds_weights", jax.nn.initializers.ones, (17,)) 31 | self.special_care_embeds_weights = self.param("special_care_embeds_weights", jax.nn.initializers.ones, (3,)) 32 | 33 | def __call__(self, clip_input): 34 | pooled_output = self.vision_model(clip_input)[1] 35 | image_embeds = self.visual_projection(pooled_output) 36 | 37 | special_cos_dist = jax_cosine_distance(image_embeds, self.special_care_embeds) 38 | cos_dist = jax_cosine_distance(image_embeds, self.concept_embeds) 39 | 40 | # increase this value to create a stronger `nfsw` filter 41 | # at the cost of increasing the possibility of filtering benign image inputs 42 | adjustment = 0.0 43 | 44 | special_scores = special_cos_dist - self.special_care_embeds_weights[None, :] + adjustment 45 | special_scores = jnp.round(special_scores, 3) 46 | is_special_care = jnp.any(special_scores > 0, axis=1, keepdims=True) 47 | # Use a lower threshold if an image has any special care concept 48 | special_adjustment = is_special_care * 0.01 49 | 50 | concept_scores = cos_dist - self.concept_embeds_weights[None, :] + special_adjustment 51 | concept_scores = jnp.round(concept_scores, 3) 52 | has_nsfw_concepts = jnp.any(concept_scores > 0, axis=1) 53 | 54 | return has_nsfw_concepts 55 | 56 | 57 | class FlaxStableDiffusionSafetyChecker(FlaxPreTrainedModel): 58 | config_class = CLIPConfig 59 | main_input_name = "clip_input" 60 | module_class = FlaxStableDiffusionSafetyCheckerModule 61 | 62 | def __init__( 63 | self, 64 | config: CLIPConfig, 65 | input_shape: Optional[Tuple] = None, 66 | seed: int = 0, 67 | dtype: jnp.dtype = jnp.float32, 68 | _do_init: bool = True, 69 | **kwargs, 70 | ): 71 | if input_shape is None: 72 | input_shape = (1, 224, 224, 3) 73 | module = self.module_class(config=config, dtype=dtype, **kwargs) 74 | super().__init__(config, module, input_shape=input_shape, seed=seed, dtype=dtype, _do_init=_do_init) 75 | 76 | def init_weights(self, rng: jax.random.PRNGKey, input_shape: Tuple, params: FrozenDict = None) -> FrozenDict: 77 | # init input tensor 78 | clip_input = jax.random.normal(rng, input_shape) 79 | 80 | params_rng, dropout_rng = jax.random.split(rng) 81 | rngs = {"params": params_rng, "dropout": dropout_rng} 82 | 83 | random_params = self.module.init(rngs, clip_input)["params"] 84 | 85 | return random_params 86 | 87 | def __call__( 88 | self, 89 | clip_input, 90 | params: dict = None, 91 | ): 92 | clip_input = jnp.transpose(clip_input, (0, 2, 3, 1)) 93 | 94 | return self.module.apply( 95 | {"params": params or self.params}, 96 | jnp.array(clip_input, dtype=jnp.float32), 97 | rngs={}, 98 | ) 99 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/ddpm/pipeline_ddpm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | 14 | # limitations under the License. 15 | 16 | 17 | from typing import Optional, Tuple, Union 18 | 19 | import torch 20 | 21 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput 22 | 23 | 24 | class DDPMPipeline(DiffusionPipeline): 25 | r""" 26 | This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the 27 | library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) 28 | 29 | Parameters: 30 | unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image. 31 | scheduler ([`SchedulerMixin`]): 32 | A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of 33 | [`DDPMScheduler`], or [`DDIMScheduler`]. 34 | """ 35 | 36 | def __init__(self, unet, scheduler): 37 | super().__init__() 38 | self.register_modules(unet=unet, scheduler=scheduler) 39 | 40 | @torch.no_grad() 41 | def __call__( 42 | self, 43 | batch_size: int = 1, 44 | generator: Optional[torch.Generator] = None, 45 | num_inference_steps: int = 1000, 46 | output_type: Optional[str] = "pil", 47 | return_dict: bool = True, 48 | **kwargs, 49 | ) -> Union[ImagePipelineOutput, Tuple]: 50 | r""" 51 | Args: 52 | batch_size (`int`, *optional*, defaults to 1): 53 | The number of images to generate. 54 | generator (`torch.Generator`, *optional*): 55 | A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation 56 | deterministic. 57 | num_inference_steps (`int`, *optional*, defaults to 1000): 58 | The number of denoising steps. More denoising steps usually lead to a higher quality image at the 59 | expense of slower inference. 60 | output_type (`str`, *optional*, defaults to `"pil"`): 61 | The output format of the generate image. Choose between 62 | [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. 63 | return_dict (`bool`, *optional*, defaults to `True`): 64 | Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. 65 | 66 | Returns: 67 | [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if 68 | `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the 69 | generated images. 70 | """ 71 | 72 | # Sample gaussian noise to begin loop 73 | image = torch.randn( 74 | (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), 75 | generator=generator, 76 | ) 77 | image = image.to(self.device) 78 | 79 | # set step values 80 | self.scheduler.set_timesteps(num_inference_steps) 81 | 82 | for t in self.progress_bar(self.scheduler.timesteps): 83 | # 1. predict noise model_output 84 | model_output = self.unet(image, t).sample 85 | 86 | # 2. compute previous image: x_t -> x_t-1 87 | image = self.scheduler.step(model_output, t, image, generator=generator).prev_sample 88 | 89 | image = (image / 2 + 0.5).clamp(0, 1) 90 | image = image.cpu().permute(0, 2, 3, 1).numpy() 91 | if output_type == "pil": 92 | image = self.numpy_to_pil(image) 93 | 94 | if not return_dict: 95 | return (image,) 96 | 97 | return ImagePipelineOutput(images=image) 98 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/pndm/pipeline_pndm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | 14 | # limitations under the License. 15 | 16 | 17 | from typing import Optional, Tuple, Union 18 | 19 | import torch 20 | 21 | from ...models import UNet2DModel 22 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput 23 | from ...schedulers import PNDMScheduler 24 | 25 | 26 | class PNDMPipeline(DiffusionPipeline): 27 | r""" 28 | This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the 29 | library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) 30 | 31 | Parameters: 32 | unet (`UNet2DModel`): U-Net architecture to denoise the encoded image latents. 33 | scheduler ([`SchedulerMixin`]): 34 | The `PNDMScheduler` to be used in combination with `unet` to denoise the encoded image. 35 | """ 36 | 37 | unet: UNet2DModel 38 | scheduler: PNDMScheduler 39 | 40 | def __init__(self, unet: UNet2DModel, scheduler: PNDMScheduler): 41 | super().__init__() 42 | self.register_modules(unet=unet, scheduler=scheduler) 43 | 44 | @torch.no_grad() 45 | def __call__( 46 | self, 47 | batch_size: int = 1, 48 | num_inference_steps: int = 50, 49 | generator: Optional[torch.Generator] = None, 50 | output_type: Optional[str] = "pil", 51 | return_dict: bool = True, 52 | **kwargs, 53 | ) -> Union[ImagePipelineOutput, Tuple]: 54 | r""" 55 | Args: 56 | batch_size (`int`, `optional`, defaults to 1): The number of images to generate. 57 | num_inference_steps (`int`, `optional`, defaults to 50): 58 | The number of denoising steps. More denoising steps usually lead to a higher quality image at the 59 | expense of slower inference. 60 | generator (`torch.Generator`, `optional`): A [torch 61 | generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation 62 | deterministic. 63 | output_type (`str`, `optional`, defaults to `"pil"`): The output format of the generate image. Choose 64 | between [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. 65 | return_dict (`bool`, `optional`, defaults to `True`): Whether or not to return a 66 | [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. 67 | 68 | Returns: 69 | [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if 70 | `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the 71 | generated images. 72 | """ 73 | # For more information on the sampling method you can take a look at Algorithm 2 of 74 | # the official paper: https://arxiv.org/pdf/2202.09778.pdf 75 | 76 | # Sample gaussian noise to begin loop 77 | image = torch.randn( 78 | (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), 79 | generator=generator, 80 | ) 81 | image = image.to(self.device) 82 | 83 | self.scheduler.set_timesteps(num_inference_steps) 84 | for t in self.progress_bar(self.scheduler.timesteps): 85 | model_output = self.unet(image, t).sample 86 | 87 | image = self.scheduler.step(model_output, t, image).prev_sample 88 | 89 | image = (image / 2 + 0.5).clamp(0, 1) 90 | image = image.cpu().permute(0, 2, 3, 1).numpy() 91 | if output_type == "pil": 92 | image = self.numpy_to_pil(image) 93 | 94 | if not return_dict: 95 | return (image,) 96 | 97 | return ImagePipelineOutput(images=image) 98 | -------------------------------------------------------------------------------- /scripts/change_naming_configs_and_checkpoints.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ Conversion script for the LDM checkpoints. """ 16 | 17 | import argparse 18 | import json 19 | import os 20 | 21 | import torch 22 | 23 | from diffusers import UNet2DConditionModel, UNet2DModel 24 | from transformers.file_utils import has_file 25 | 26 | 27 | do_only_config = False 28 | do_only_weights = True 29 | do_only_renaming = False 30 | 31 | 32 | if __name__ == "__main__": 33 | parser = argparse.ArgumentParser() 34 | 35 | parser.add_argument( 36 | "--repo_path", 37 | default=None, 38 | type=str, 39 | required=True, 40 | help="The config json file corresponding to the architecture.", 41 | ) 42 | 43 | parser.add_argument("--dump_path", default=None, type=str, required=True, help="Path to the output model.") 44 | 45 | args = parser.parse_args() 46 | 47 | config_parameters_to_change = { 48 | "image_size": "sample_size", 49 | "num_res_blocks": "layers_per_block", 50 | "block_channels": "block_out_channels", 51 | "down_blocks": "down_block_types", 52 | "up_blocks": "up_block_types", 53 | "downscale_freq_shift": "freq_shift", 54 | "resnet_num_groups": "norm_num_groups", 55 | "resnet_act_fn": "act_fn", 56 | "resnet_eps": "norm_eps", 57 | "num_head_channels": "attention_head_dim", 58 | } 59 | 60 | key_parameters_to_change = { 61 | "time_steps": "time_proj", 62 | "mid": "mid_block", 63 | "downsample_blocks": "down_blocks", 64 | "upsample_blocks": "up_blocks", 65 | } 66 | 67 | subfolder = "" if has_file(args.repo_path, "config.json") else "unet" 68 | 69 | with open(os.path.join(args.repo_path, subfolder, "config.json"), "r", encoding="utf-8") as reader: 70 | text = reader.read() 71 | config = json.loads(text) 72 | 73 | if do_only_config: 74 | for key in config_parameters_to_change.keys(): 75 | config.pop(key, None) 76 | 77 | if has_file(args.repo_path, "config.json"): 78 | model = UNet2DModel(**config) 79 | else: 80 | class_name = UNet2DConditionModel if "ldm-text2im-large-256" in args.repo_path else UNet2DModel 81 | model = class_name(**config) 82 | 83 | if do_only_config: 84 | model.save_config(os.path.join(args.repo_path, subfolder)) 85 | 86 | config = dict(model.config) 87 | 88 | if do_only_renaming: 89 | for key, value in config_parameters_to_change.items(): 90 | if key in config: 91 | config[value] = config[key] 92 | del config[key] 93 | 94 | config["down_block_types"] = [k.replace("UNetRes", "") for k in config["down_block_types"]] 95 | config["up_block_types"] = [k.replace("UNetRes", "") for k in config["up_block_types"]] 96 | 97 | if do_only_weights: 98 | state_dict = torch.load(os.path.join(args.repo_path, subfolder, "diffusion_pytorch_model.bin")) 99 | 100 | new_state_dict = {} 101 | for param_key, param_value in state_dict.items(): 102 | if param_key.endswith(".op.bias") or param_key.endswith(".op.weight"): 103 | continue 104 | has_changed = False 105 | for key, new_key in key_parameters_to_change.items(): 106 | if not has_changed and param_key.split(".")[0] == key: 107 | new_state_dict[".".join([new_key] + param_key.split(".")[1:])] = param_value 108 | has_changed = True 109 | if not has_changed: 110 | new_state_dict[param_key] = param_value 111 | 112 | model.load_state_dict(new_state_dict) 113 | model.save_pretrained(os.path.join(args.repo_path, subfolder)) 114 | -------------------------------------------------------------------------------- /src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from typing import Optional, Tuple, Union 3 | 4 | import torch 5 | 6 | from ...models import UNet2DModel, VQModel 7 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput 8 | from ...schedulers import DDIMScheduler 9 | 10 | 11 | class LDMPipeline(DiffusionPipeline): 12 | r""" 13 | This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the 14 | library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) 15 | 16 | Parameters: 17 | vqvae ([`VQModel`]): 18 | Vector-quantized (VQ) Model to encode and decode images to and from latent representations. 19 | unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image latents. 20 | scheduler ([`SchedulerMixin`]): 21 | [`DDIMScheduler`] is to be used in combination with `unet` to denoise the encoded image latens. 22 | """ 23 | 24 | def __init__(self, vqvae: VQModel, unet: UNet2DModel, scheduler: DDIMScheduler): 25 | super().__init__() 26 | self.register_modules(vqvae=vqvae, unet=unet, scheduler=scheduler) 27 | 28 | @torch.no_grad() 29 | def __call__( 30 | self, 31 | batch_size: int = 1, 32 | generator: Optional[torch.Generator] = None, 33 | eta: float = 0.0, 34 | num_inference_steps: int = 50, 35 | output_type: Optional[str] = "pil", 36 | return_dict: bool = True, 37 | **kwargs, 38 | ) -> Union[Tuple, ImagePipelineOutput]: 39 | r""" 40 | Args: 41 | batch_size (`int`, *optional*, defaults to 1): 42 | Number of images to generate. 43 | generator (`torch.Generator`, *optional*): 44 | A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation 45 | deterministic. 46 | num_inference_steps (`int`, *optional*, defaults to 50): 47 | The number of denoising steps. More denoising steps usually lead to a higher quality image at the 48 | expense of slower inference. 49 | output_type (`str`, *optional*, defaults to `"pil"`): 50 | The output format of the generate image. Choose between 51 | [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. 52 | return_dict (`bool`, *optional*, defaults to `True`): 53 | Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. 54 | 55 | Returns: 56 | [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if 57 | `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the 58 | generated images. 59 | """ 60 | 61 | latents = torch.randn( 62 | (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), 63 | generator=generator, 64 | ) 65 | latents = latents.to(self.device) 66 | 67 | # scale the initial noise by the standard deviation required by the scheduler 68 | latents = latents * self.scheduler.init_noise_sigma 69 | 70 | self.scheduler.set_timesteps(num_inference_steps) 71 | 72 | # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature 73 | accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) 74 | 75 | extra_kwargs = {} 76 | if accepts_eta: 77 | extra_kwargs["eta"] = eta 78 | 79 | for t in self.progress_bar(self.scheduler.timesteps): 80 | latent_model_input = self.scheduler.scale_model_input(latents, t) 81 | # predict the noise residual 82 | noise_prediction = self.unet(latent_model_input, t).sample 83 | # compute the previous noisy sample x_t -> x_t-1 84 | latents = self.scheduler.step(noise_prediction, t, latents, **extra_kwargs).prev_sample 85 | 86 | # decode the image latents with the VAE 87 | image = self.vqvae.decode(latents).sample 88 | 89 | image = (image / 2 + 0.5).clamp(0, 1) 90 | image = image.cpu().permute(0, 2, 3, 1).numpy() 91 | if output_type == "pil": 92 | image = self.numpy_to_pil(image) 93 | 94 | if not return_dict: 95 | return (image,) 96 | 97 | return ImagePipelineOutput(images=image) 98 | -------------------------------------------------------------------------------- /src/diffusers/models/resnet_flax.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import flax.linen as nn 15 | import jax 16 | import jax.numpy as jnp 17 | 18 | 19 | class FlaxUpsample2D(nn.Module): 20 | out_channels: int 21 | dtype: jnp.dtype = jnp.float32 22 | 23 | def setup(self): 24 | self.conv = nn.Conv( 25 | self.out_channels, 26 | kernel_size=(3, 3), 27 | strides=(1, 1), 28 | padding=((1, 1), (1, 1)), 29 | dtype=self.dtype, 30 | ) 31 | 32 | def __call__(self, hidden_states): 33 | batch, height, width, channels = hidden_states.shape 34 | hidden_states = jax.image.resize( 35 | hidden_states, 36 | shape=(batch, height * 2, width * 2, channels), 37 | method="nearest", 38 | ) 39 | hidden_states = self.conv(hidden_states) 40 | return hidden_states 41 | 42 | 43 | class FlaxDownsample2D(nn.Module): 44 | out_channels: int 45 | dtype: jnp.dtype = jnp.float32 46 | 47 | def setup(self): 48 | self.conv = nn.Conv( 49 | self.out_channels, 50 | kernel_size=(3, 3), 51 | strides=(2, 2), 52 | padding=((1, 1), (1, 1)), # padding="VALID", 53 | dtype=self.dtype, 54 | ) 55 | 56 | def __call__(self, hidden_states): 57 | # pad = ((0, 0), (0, 1), (0, 1), (0, 0)) # pad height and width dim 58 | # hidden_states = jnp.pad(hidden_states, pad_width=pad) 59 | hidden_states = self.conv(hidden_states) 60 | return hidden_states 61 | 62 | 63 | class FlaxResnetBlock2D(nn.Module): 64 | in_channels: int 65 | out_channels: int = None 66 | dropout_prob: float = 0.0 67 | use_nin_shortcut: bool = None 68 | dtype: jnp.dtype = jnp.float32 69 | 70 | def setup(self): 71 | out_channels = self.in_channels if self.out_channels is None else self.out_channels 72 | 73 | self.norm1 = nn.GroupNorm(num_groups=32, epsilon=1e-5) 74 | self.conv1 = nn.Conv( 75 | out_channels, 76 | kernel_size=(3, 3), 77 | strides=(1, 1), 78 | padding=((1, 1), (1, 1)), 79 | dtype=self.dtype, 80 | ) 81 | 82 | self.time_emb_proj = nn.Dense(out_channels, dtype=self.dtype) 83 | 84 | self.norm2 = nn.GroupNorm(num_groups=32, epsilon=1e-5) 85 | self.dropout = nn.Dropout(self.dropout_prob) 86 | self.conv2 = nn.Conv( 87 | out_channels, 88 | kernel_size=(3, 3), 89 | strides=(1, 1), 90 | padding=((1, 1), (1, 1)), 91 | dtype=self.dtype, 92 | ) 93 | 94 | use_nin_shortcut = self.in_channels != out_channels if self.use_nin_shortcut is None else self.use_nin_shortcut 95 | 96 | self.conv_shortcut = None 97 | if use_nin_shortcut: 98 | self.conv_shortcut = nn.Conv( 99 | out_channels, 100 | kernel_size=(1, 1), 101 | strides=(1, 1), 102 | padding="VALID", 103 | dtype=self.dtype, 104 | ) 105 | 106 | def __call__(self, hidden_states, temb, deterministic=True): 107 | residual = hidden_states 108 | hidden_states = self.norm1(hidden_states) 109 | hidden_states = nn.swish(hidden_states) 110 | hidden_states = self.conv1(hidden_states) 111 | 112 | temb = self.time_emb_proj(nn.swish(temb)) 113 | temb = jnp.expand_dims(jnp.expand_dims(temb, 1), 1) 114 | hidden_states = hidden_states + temb 115 | 116 | hidden_states = self.norm2(hidden_states) 117 | hidden_states = nn.swish(hidden_states) 118 | hidden_states = self.dropout(hidden_states, deterministic) 119 | hidden_states = self.conv2(hidden_states) 120 | 121 | if self.conv_shortcut is not None: 122 | residual = self.conv_shortcut(residual) 123 | 124 | return hidden_states + residual 125 | --------------------------------------------------------------------------------