├── tests
    ├── __init__.py
    ├── models
    │   ├── __init__.py
    │   ├── test_models_vae_flax.py
    │   ├── test_models_unet_1d.py
    │   └── test_models_vq.py
    ├── pipelines
    │   ├── __init__.py
    │   ├── ddim
    │   │   └── __init__.py
    │   ├── ddpm
    │   │   ├── __init__.py
    │   │   └── test_ddpm.py
    │   ├── pndm
    │   │   ├── __init__.py
    │   │   └── test_pndm.py
    │   ├── karras_ve
    │   │   ├── __init__.py
    │   │   └── test_karras_ve.py
    │   ├── score_sde_ve
    │   │   ├── __init__.py
    │   │   └── test_score_sde_ve.py
    │   ├── latent_diffusion
    │   │   └── __init__.py
    │   └── stable_diffusion
    │   │   ├── __init__.py
    │   │   ├── test_onnx_stable_diffusion_img2img.py
    │   │   ├── test_onnx_stable_diffusion_inpaint.py
    │   │   └── test_onnx_stable_diffusion.py
    ├── test_pipelines_common.py
    ├── test_pipelines_onnx_common.py
    ├── test_modeling_common_flax.py
    ├── conftest.py
    ├── test_outputs.py
    └── test_training.py
├── scripts
    ├── __init__.py
    ├── conversion_ldm_uncond.py
    └── change_naming_configs_and_checkpoints.py
├── pyproject.toml
├── MANIFEST.in
├── examples
    ├── textual_inversion
    │   ├── requirements.txt
    │   └── requirements_flax.txt
    ├── unconditional_image_generation
    │   └── requirements.txt
    ├── dreambooth
    │   ├── requirements.txt
    │   └── requirements_flax.txt
    ├── text_to_image
    │   ├── requirements.txt
    │   └── requirements_flax.txt
    ├── inference
    │   ├── image_to_image.py
    │   ├── inpainting.py
    │   └── README.md
    ├── community
    │   └── one_step_unet.py
    └── conftest.py
├── src
    └── diffusers
    │   ├── pipelines
    │       ├── ddim
    │       │   └── __init__.py
    │       ├── ddpm
    │       │   ├── __init__.py
    │       │   └── pipeline_ddpm.py
    │       ├── pndm
    │       │   ├── __init__.py
    │       │   └── pipeline_pndm.py
    │       ├── score_sde_ve
    │       │   ├── __init__.py
    │       │   └── pipeline_score_sde_ve.py
    │       ├── dance_diffusion
    │       │   └── __init__.py
    │       ├── latent_diffusion_uncond
    │       │   ├── __init__.py
    │       │   └── pipeline_latent_diffusion_uncond.py
    │       ├── stochastic_karras_ve
    │       │   └── __init__.py
    │       ├── latent_diffusion
    │       │   └── __init__.py
    │       ├── __init__.py
    │       └── stable_diffusion
    │       │   ├── __init__.py
    │       │   └── safety_checker_flax.py
    │   ├── models
    │       ├── README.md
    │       ├── __init__.py
    │       ├── embeddings_flax.py
    │       └── resnet_flax.py
    │   ├── schedulers
    │       ├── README.md
    │       ├── scheduling_utils.py
    │       ├── scheduling_utils_flax.py
    │       ├── __init__.py
    │       └── scheduling_sde_vp.py
    │   ├── utils
    │       ├── dummy_torch_and_scipy_objects.py
    │       ├── dummy_flax_and_transformers_objects.py
    │       ├── model_card_template.md
    │       ├── deprecation_utils.py
    │       ├── __init__.py
    │       ├── dummy_torch_and_transformers_and_onnx_objects.py
    │       ├── dummy_torch_and_transformers_objects.py
    │       └── outputs.py
    │   ├── commands
    │       ├── __init__.py
    │       ├── diffusers_cli.py
    │       └── env.py
    │   ├── dependency_versions_table.py
    │   ├── dependency_versions_check.py
    │   └── __init__.py
├── docs
    └── source
    │   ├── imgs
    │       └── diffusers_library.jpg
    │   ├── optimization
    │       ├── open_vino.mdx
    │       ├── onnx.mdx
    │       └── mps.mdx
    │   ├── conceptual
    │       ├── stable_diffusion.mdx
    │       └── philosophy.mdx
    │   ├── using-diffusers
    │       ├── configuration.mdx
    │       ├── loading.mdx
    │       ├── img2img.mdx
    │       ├── unconditional_image_generation.mdx
    │       ├── conditional_image_generation.mdx
    │       └── inpaint.mdx
    │   ├── api
    │       ├── configuration.mdx
    │       ├── pipelines
    │       │   ├── dance_diffusion.mdx
    │       │   ├── ddim.mdx
    │       │   ├── ddpm.mdx
    │       │   ├── stochastic_karras_ve.mdx
    │       │   ├── pndm.mdx
    │       │   ├── latent_diffusion_uncond.mdx
    │       │   ├── latent_diffusion.mdx
    │       │   └── score_sde_ve.mdx
    │       ├── diffusion_pipeline.mdx
    │       ├── outputs.mdx
    │       ├── models.mdx
    │       └── logging.mdx
    │   ├── _toctree.yml
    │   └── installation.mdx
├── .github
    ├── workflows
    │   ├── typos.yml
    │   ├── delete_doc_comment.yml
    │   ├── build_documentation.yml
    │   ├── build_pr_documentation.yml
    │   ├── stale.yml
    │   ├── pr_quality.yml
    │   ├── push_tests.yml
    │   └── pr_tests.yml
    └── ISSUE_TEMPLATE
    │   ├── feedback.md
    │   ├── config.yml
    │   ├── feature_request.md
    │   ├── new-model-addition.yml
    │   └── bug-report.yml
├── _typos.toml
├── setup.cfg
├── utils
    ├── print_env.py
    ├── get_modified_files.py
    ├── stale.py
    └── check_config_docstrings.py
├── .gitignore
└── Makefile


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/pipelines/ddim/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/pipelines/ddpm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/pipelines/pndm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/pipelines/karras_ve/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/pipelines/score_sde_ve/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/pipelines/latent_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/pipelines/stable_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 119
3 | target-version = ['py36']
4 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include src/diffusers/utils/model_card_template.md
3 | 


--------------------------------------------------------------------------------
/examples/textual_inversion/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers>=4.21.0
4 | 


--------------------------------------------------------------------------------
/examples/unconditional_image_generation/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | datasets
4 | 


--------------------------------------------------------------------------------
/src/diffusers/pipelines/ddim/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_ddim import DDIMPipeline
3 | 


--------------------------------------------------------------------------------
/src/diffusers/pipelines/ddpm/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_ddpm import DDPMPipeline
3 | 


--------------------------------------------------------------------------------
/src/diffusers/pipelines/pndm/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_pndm import PNDMPipeline
3 | 


--------------------------------------------------------------------------------
/src/diffusers/pipelines/score_sde_ve/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_score_sde_ve import ScoreSdeVePipeline
3 | 


--------------------------------------------------------------------------------
/src/diffusers/pipelines/dance_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_dance_diffusion import DanceDiffusionPipeline
3 | 


--------------------------------------------------------------------------------
/docs/source/imgs/diffusers_library.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CrazyBoyM/diffusers_dreambooth/main/docs/source/imgs/diffusers_library.jpg


--------------------------------------------------------------------------------
/src/diffusers/pipelines/latent_diffusion_uncond/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_latent_diffusion_uncond import LDMPipeline
3 | 


--------------------------------------------------------------------------------
/src/diffusers/pipelines/stochastic_karras_ve/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_stochastic_karras_ve import KarrasVePipeline
3 | 


--------------------------------------------------------------------------------
/examples/dreambooth/requirements.txt:
--------------------------------------------------------------------------------
1 | diffusers>==0.5.0
2 | accelerate
3 | torchvision
4 | transformers>=4.21.0
5 | ftfy
6 | tensorboard
7 | modelcards


--------------------------------------------------------------------------------
/examples/text_to_image/requirements.txt:
--------------------------------------------------------------------------------
1 | diffusers==0.4.1
2 | accelerate
3 | torchvision
4 | transformers>=4.21.0
5 | ftfy
6 | tensorboard
7 | modelcards


--------------------------------------------------------------------------------
/src/diffusers/models/README.md:
--------------------------------------------------------------------------------
1 | # Models
2 | 
3 | For more detail on the models, please refer to the [docs](https://huggingface.co/docs/diffusers/api/models).


--------------------------------------------------------------------------------
/examples/dreambooth/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | diffusers>==0.5.1
2 | transformers>=4.21.0
3 | flax
4 | optax
5 | torch
6 | torchvision
7 | ftfy
8 | tensorboard
9 | modelcards


--------------------------------------------------------------------------------
/examples/text_to_image/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | diffusers>==0.5.1
2 | transformers>=4.21.0
3 | flax
4 | optax
5 | torch
6 | torchvision
7 | ftfy
8 | tensorboard
9 | modelcards


--------------------------------------------------------------------------------
/examples/textual_inversion/requirements_flax.txt:
--------------------------------------------------------------------------------
1 | diffusers>==0.5.1
2 | transformers>=4.21.0
3 | flax
4 | optax
5 | torch
6 | torchvision
7 | ftfy
8 | tensorboard
9 | modelcards


--------------------------------------------------------------------------------
/src/diffusers/schedulers/README.md:
--------------------------------------------------------------------------------
1 | # Schedulers
2 | 
3 | For more information on the schedulers, please refer to the [docs](https://huggingface.co/docs/diffusers/api/schedulers).


--------------------------------------------------------------------------------
/src/diffusers/pipelines/latent_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from ...utils import is_transformers_available
3 | 
4 | 
5 | if is_transformers_available():
6 |     from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline
7 | 


--------------------------------------------------------------------------------
/.github/workflows/typos.yml:
--------------------------------------------------------------------------------
 1 | name: Check typos
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 | 
 6 | jobs:
 7 |   build:
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     steps:
11 |       - uses: actions/checkout@v3
12 | 
13 |       - name: typos-action
14 |         uses: crate-ci/typos@v1.12.4
15 | 


--------------------------------------------------------------------------------
/examples/inference/image_to_image.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | from diffusers import StableDiffusionImg2ImgPipeline  # noqa F401
 4 | 
 5 | 
 6 | warnings.warn(
 7 |     "The `image_to_image.py` script is outdated. Please use directly `from diffusers import"
 8 |     " StableDiffusionImg2ImgPipeline` instead."
 9 | )
10 | 


--------------------------------------------------------------------------------
/examples/inference/inpainting.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | from diffusers import StableDiffusionInpaintPipeline as StableDiffusionInpaintPipeline  # noqa F401
 4 | 
 5 | 
 6 | warnings.warn(
 7 |     "The `inpainting.py` script is outdated. Please use directly `from diffusers import"
 8 |     " StableDiffusionInpaintPipeline` instead."
 9 | )
10 | 


--------------------------------------------------------------------------------
/.github/workflows/delete_doc_comment.yml:
--------------------------------------------------------------------------------
 1 | name: Delete dev documentation
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [ closed ]
 6 | 
 7 | 
 8 | jobs:
 9 |   delete:
10 |     uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main
11 |     with:
12 |       pr_number: ${{ github.event.number }}
13 |       package: diffusers
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feedback.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "💬 Feedback about API Design"
 3 | about: Give feedback about the current API design
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **What API design would you like to have changed or added to the library? Why?**
11 | 
12 | **What use case would this enable or better enable? Can you give us a code example?**
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | contact_links:
2 |   - name: Forum
3 |     url: https://discuss.huggingface.co/c/discussion-related-to-httpsgithubcomhuggingfacediffusers/63
4 |     about: General usage questions and community discussions
5 |   - name: Blank issue
6 |     url: https://github.com/huggingface/diffusers/issues/new
7 |     about: Please note that the Forum is in most places the right place for discussions
8 | 


--------------------------------------------------------------------------------
/tests/test_pipelines_common.py:
--------------------------------------------------------------------------------
 1 | from diffusers.utils.testing_utils import require_torch
 2 | 
 3 | 
 4 | @require_torch
 5 | class PipelineTesterMixin:
 6 |     """
 7 |     This mixin is designed to be used with unittest.TestCase classes.
 8 |     It provides a set of common tests for each PyTorch pipeline, e.g. saving and loading the pipeline,
 9 |     equivalence of dict and tuple outputs, etc.
10 |     """
11 | 
12 |     pass
13 | 


--------------------------------------------------------------------------------
/tests/test_pipelines_onnx_common.py:
--------------------------------------------------------------------------------
 1 | from diffusers.utils.testing_utils import require_onnxruntime
 2 | 
 3 | 
 4 | @require_onnxruntime
 5 | class OnnxPipelineTesterMixin:
 6 |     """
 7 |     This mixin is designed to be used with unittest.TestCase classes.
 8 |     It provides a set of common tests for each ONNXRuntime pipeline, e.g. saving and loading the pipeline,
 9 |     equivalence of dict and tuple outputs, etc.
10 |     """
11 | 
12 |     pass
13 | 


--------------------------------------------------------------------------------
/.github/workflows/build_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Build documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - doc-builder*
 8 |       - v*-release
 9 | 
10 | jobs:
11 |    build:
12 |     uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
13 |     with:
14 |       commit_sha: ${{ github.sha }}
15 |       package: diffusers
16 |     secrets:
17 |       token: ${{ secrets.HUGGINGFACE_PUSH }}
18 | 


--------------------------------------------------------------------------------
/_typos.toml:
--------------------------------------------------------------------------------
 1 | # Files for typos
 2 | # Instruction:  https://github.com/marketplace/actions/typos-action#getting-started
 3 | 
 4 | [default.extend-identifiers]
 5 | 
 6 | [default.extend-words]
 7 | NIN="NIN" # NIN is used in scripts/convert_ncsnpp_original_checkpoint_to_diffusers.py
 8 | nd="np" # nd may be np (numpy)
 9 | parms="parms" # parms is used in scripts/convert_original_stable_diffusion_to_diffusers.py
10 | 
11 | 
12 | [files]
13 | extend-exclude = ["_typos.toml"]
14 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | default_section = FIRSTPARTY
 3 | ensure_newline_before_comments = True
 4 | force_grid_wrap = 0
 5 | include_trailing_comma = True
 6 | known_first_party = accelerate
 7 | known_third_party =
 8 |     numpy
 9 |     torch
10 |     torch_xla
11 | 
12 | line_length = 119
13 | lines_after_imports = 2
14 | multi_line_output = 3
15 | use_parentheses = True
16 | 
17 | [flake8]
18 | ignore = E203, E722, E501, E741, W503, W605
19 | max-line-length = 119
20 | per-file-ignores = __init__.py:F401
21 | 


--------------------------------------------------------------------------------
/.github/workflows/build_pr_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Build PR Documentation
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 | concurrency:
 7 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 8 |   cancel-in-progress: true
 9 | 
10 | jobs:
11 |   build:
12 |     uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
13 |     with:
14 |       commit_sha: ${{ github.event.pull_request.head.sha }}
15 |       pr_number: ${{ github.event.number }}
16 |       package: diffusers
17 | 


--------------------------------------------------------------------------------
/src/diffusers/utils/dummy_torch_and_scipy_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | 
 4 | from ..utils import DummyObject, requires_backends
 5 | 
 6 | 
 7 | class LMSDiscreteScheduler(metaclass=DummyObject):
 8 |     _backends = ["torch", "scipy"]
 9 | 
10 |     def __init__(self, *args, **kwargs):
11 |         requires_backends(self, ["torch", "scipy"])
12 | 
13 |     @classmethod
14 |     def from_config(cls, *args, **kwargs):
15 |         requires_backends(cls, ["torch", "scipy"])
16 | 
17 |     @classmethod
18 |     def from_pretrained(cls, *args, **kwargs):
19 |         requires_backends(cls, ["torch", "scipy"])
20 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | name: Stale Bot
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: "0 15 * * *"
 6 | 
 7 | jobs:
 8 |   close_stale_issues:
 9 |     name: Close Stale Issues
10 |     if: github.repository == 'huggingface/diffusers'
11 |     runs-on: ubuntu-latest
12 |     env:
13 |       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
14 |     steps:
15 |     - uses: actions/checkout@v2
16 | 
17 |     - name: Setup Python
18 |       uses: actions/setup-python@v1
19 |       with:
20 |         python-version: 3.7
21 | 
22 |     - name: Install requirements
23 |       run: |
24 |         pip install PyGithub
25 |     - name: Close stale issues
26 |       run: |
27 |         python utils/stale.py
28 | 


--------------------------------------------------------------------------------
/docs/source/optimization/open_vino.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # OpenVINO
14 | 
15 | Under construction 🚧
16 | 


--------------------------------------------------------------------------------
/src/diffusers/utils/dummy_flax_and_transformers_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | 
 4 | from ..utils import DummyObject, requires_backends
 5 | 
 6 | 
 7 | class FlaxStableDiffusionPipeline(metaclass=DummyObject):
 8 |     _backends = ["flax", "transformers"]
 9 | 
10 |     def __init__(self, *args, **kwargs):
11 |         requires_backends(self, ["flax", "transformers"])
12 | 
13 |     @classmethod
14 |     def from_config(cls, *args, **kwargs):
15 |         requires_backends(cls, ["flax", "transformers"])
16 | 
17 |     @classmethod
18 |     def from_pretrained(cls, *args, **kwargs):
19 |         requires_backends(cls, ["flax", "transformers"])
20 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680 Feature request"
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/examples/community/one_step_unet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import torch
 3 | 
 4 | from diffusers import DiffusionPipeline
 5 | 
 6 | 
 7 | class UnetSchedulerOneForwardPipeline(DiffusionPipeline):
 8 |     def __init__(self, unet, scheduler):
 9 |         super().__init__()
10 | 
11 |         self.register_modules(unet=unet, scheduler=scheduler)
12 | 
13 |     def __call__(self):
14 |         image = torch.randn(
15 |             (1, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
16 |         )
17 |         timestep = 1
18 | 
19 |         model_output = self.unet(image, timestep).sample
20 |         scheduler_output = self.scheduler.step(model_output, timestep, image).prev_sample
21 | 
22 |         return scheduler_output
23 | 


--------------------------------------------------------------------------------
/docs/source/conceptual/stable_diffusion.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Stable Diffusion
14 | 
15 | Please visit this [very in-detail blog post](https://huggingface.co/blog/stable_diffusion) on Stable Diffusion!
16 | 


--------------------------------------------------------------------------------
/examples/inference/README.md:
--------------------------------------------------------------------------------
1 | # Inference Examples
2 | 
3 | **The inference examples folder is deprecated and will be removed in a future version**.
4 | **Officially supported inference examples can be found in the [Pipelines folder](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines)**.
5 | 
6 | - For `Image-to-Image text-guided generation with Stable Diffusion`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples)
7 | - For `In-painting using Stable Diffusion`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples)
8 | - For `Tweak prompts reusing seeds and latents`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples)
9 | 


--------------------------------------------------------------------------------
/docs/source/using-diffusers/configuration.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | 
14 | 
15 | # Configuration
16 | 
17 | The handling of configurations in Diffusers is with the `ConfigMixin` class.
18 | 
19 | [[autodoc]] ConfigMixin
20 | 
21 | Under further construction 🚧, open a [PR](https://github.com/huggingface/diffusers/compare) if you want to contribute!
22 | 


--------------------------------------------------------------------------------
/src/diffusers/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from argparse import ArgumentParser
17 | 
18 | 
19 | class BaseDiffusersCLICommand(ABC):
20 |     @staticmethod
21 |     @abstractmethod
22 |     def register_subcommand(parser: ArgumentParser):
23 |         raise NotImplementedError()
24 | 
25 |     @abstractmethod
26 |     def run(self):
27 |         raise NotImplementedError()
28 | 


--------------------------------------------------------------------------------
/docs/source/api/configuration.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Configuration
14 | 
15 | In Diffusers, schedulers of type [`schedulers.scheduling_utils.SchedulerMixin`], and models of type [`ModelMixin`] inherit from [`ConfigMixin`] which conveniently takes care of storing all parameters that are 
16 | passed to the respective `__init__` methods in a JSON-configuration file.
17 | 
18 | TODO(PVP) - add example and better info here
19 | 
20 | ## ConfigMixin
21 | [[autodoc]] ConfigMixin
22 | 	- from_config
23 | 	- save_config
24 | 


--------------------------------------------------------------------------------
/src/diffusers/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from ..utils import is_flax_available, is_torch_available
16 | 
17 | 
18 | if is_torch_available():
19 |     from .unet_1d import UNet1DModel
20 |     from .unet_2d import UNet2DModel
21 |     from .unet_2d_condition import UNet2DConditionModel
22 |     from .vae import AutoencoderKL, VQModel
23 | 
24 | if is_flax_available():
25 |     from .unet_2d_condition_flax import FlaxUNet2DConditionModel
26 |     from .vae_flax import FlaxAutoencoderKL
27 | 


--------------------------------------------------------------------------------
/src/diffusers/dependency_versions_table.py:
--------------------------------------------------------------------------------
 1 | # THIS FILE HAS BEEN AUTOGENERATED. To update:
 2 | # 1. modify the `_deps` dict in setup.py
 3 | # 2. run `make deps_table_update``
 4 | deps = {
 5 |     "Pillow": "Pillow<10.0",
 6 |     "accelerate": "accelerate>=0.11.0",
 7 |     "black": "black==22.8",
 8 |     "datasets": "datasets",
 9 |     "filelock": "filelock",
10 |     "flake8": "flake8>=3.8.3",
11 |     "flax": "flax>=0.4.1",
12 |     "hf-doc-builder": "hf-doc-builder>=0.3.0",
13 |     "huggingface-hub": "huggingface-hub>=0.10.0",
14 |     "importlib_metadata": "importlib_metadata",
15 |     "isort": "isort>=5.5.4",
16 |     "jax": "jax>=0.2.8,!=0.3.2,<=0.3.6",
17 |     "jaxlib": "jaxlib>=0.1.65,<=0.3.6",
18 |     "modelcards": "modelcards>=0.1.4",
19 |     "numpy": "numpy",
20 |     "onnxruntime": "onnxruntime",
21 |     "parameterized": "parameterized",
22 |     "pytest": "pytest",
23 |     "pytest-timeout": "pytest-timeout",
24 |     "pytest-xdist": "pytest-xdist",
25 |     "scipy": "scipy",
26 |     "regex": "regex!=2019.12.17",
27 |     "requests": "requests",
28 |     "tensorboard": "tensorboard",
29 |     "torch": "torch>=1.4",
30 |     "torchvision": "torchvision",
31 |     "transformers": "transformers>=4.21.0",
32 | }
33 | 


--------------------------------------------------------------------------------
/tests/models/test_models_vae_flax.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from diffusers import FlaxAutoencoderKL
 4 | from diffusers.utils import is_flax_available
 5 | from diffusers.utils.testing_utils import require_flax
 6 | 
 7 | from ..test_modeling_common_flax import FlaxModelTesterMixin
 8 | 
 9 | 
10 | if is_flax_available():
11 |     import jax
12 | 
13 | 
14 | @require_flax
15 | class FlaxAutoencoderKLTests(FlaxModelTesterMixin, unittest.TestCase):
16 |     model_class = FlaxAutoencoderKL
17 | 
18 |     @property
19 |     def dummy_input(self):
20 |         batch_size = 4
21 |         num_channels = 3
22 |         sizes = (32, 32)
23 | 
24 |         prng_key = jax.random.PRNGKey(0)
25 |         image = jax.random.uniform(prng_key, ((batch_size, num_channels) + sizes))
26 | 
27 |         return {"sample": image, "prng_key": prng_key}
28 | 
29 |     def prepare_init_args_and_inputs_for_common(self):
30 |         init_dict = {
31 |             "block_out_channels": [32, 64],
32 |             "in_channels": 3,
33 |             "out_channels": 3,
34 |             "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
35 |             "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
36 |             "latent_channels": 4,
37 |         }
38 |         inputs_dict = self.dummy_input
39 |         return init_dict, inputs_dict
40 | 


--------------------------------------------------------------------------------
/docs/source/using-diffusers/loading.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Loading
14 | 
15 | The core functionality for saving and loading systems in `Diffusers` is the HuggingFace Hub.
16 | 
17 | [[autodoc]] modeling_utils.ModelMixin
18 |     - from_pretrained
19 |     - save_pretrained
20 | 
21 | [[autodoc]] pipeline_utils.DiffusionPipeline
22 |     - from_pretrained
23 |     - save_pretrained
24 | 
25 | [[autodoc]] modeling_flax_utils.FlaxModelMixin
26 |     - from_pretrained
27 |     - save_pretrained
28 | 
29 | [[autodoc]] pipeline_flax_utils.FlaxDiffusionPipeline
30 |     - from_pretrained
31 |     - save_pretrained
32 | 
33 | 
34 | Under further construction 🚧, open a [PR](https://github.com/huggingface/diffusers/compare) if you want to contribute!
35 | 


--------------------------------------------------------------------------------
/src/diffusers/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from ..utils import is_flax_available, is_onnx_available, is_torch_available, is_transformers_available
 2 | 
 3 | 
 4 | if is_torch_available():
 5 |     from .dance_diffusion import DanceDiffusionPipeline
 6 |     from .ddim import DDIMPipeline
 7 |     from .ddpm import DDPMPipeline
 8 |     from .latent_diffusion_uncond import LDMPipeline
 9 |     from .pndm import PNDMPipeline
10 |     from .score_sde_ve import ScoreSdeVePipeline
11 |     from .stochastic_karras_ve import KarrasVePipeline
12 | else:
13 |     from ..utils.dummy_pt_objects import *  # noqa F403
14 | 
15 | if is_torch_available() and is_transformers_available():
16 |     from .latent_diffusion import LDMTextToImagePipeline
17 |     from .stable_diffusion import (
18 |         StableDiffusionImg2ImgPipeline,
19 |         StableDiffusionInpaintPipeline,
20 |         StableDiffusionInpaintPipelineLegacy,
21 |         StableDiffusionPipeline,
22 |     )
23 | 
24 | if is_transformers_available() and is_onnx_available():
25 |     from .stable_diffusion import (
26 |         OnnxStableDiffusionImg2ImgPipeline,
27 |         OnnxStableDiffusionInpaintPipeline,
28 |         OnnxStableDiffusionPipeline,
29 |         StableDiffusionOnnxPipeline,
30 |     )
31 | 
32 | if is_transformers_available() and is_flax_available():
33 |     from .stable_diffusion import FlaxStableDiffusionPipeline
34 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/new-model-addition.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F31F New model/pipeline/scheduler addition"
 2 | description: Submit a proposal/request to implement a new diffusion model / pipeline / scheduler
 3 | labels: [ "New model/pipeline/scheduler" ]
 4 | 
 5 | body:
 6 |   - type: textarea
 7 |     id: description-request
 8 |     validations:
 9 |       required: true
10 |     attributes:
11 |       label: Model/Pipeline/Scheduler description
12 |       description: |
13 |         Put any and all important information relative to the model/pipeline/scheduler
14 | 
15 |   - type: checkboxes
16 |     id: information-tasks
17 |     attributes:
18 |       label: Open source status
19 |       description: |
20 |           Please note that if the model implementation isn't available or if the weights aren't open-source, we are less likely to implement it in `diffusers`.
21 |       options:
22 |         - label: "The model implementation is available"
23 |         - label: "The model weights are available (Only relevant if addition is not a scheduler)."
24 | 
25 |   - type: textarea
26 |     id: additional-info
27 |     attributes:
28 |       label: Provide useful links for the implementation
29 |       description: |
30 |         Please provide information regarding the implementation, the weights, and the authors.
31 |         Please mention the authors by @gh-username if you're aware of their usernames.
32 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F41B Bug Report"
 2 | description: Report a bug on diffusers
 3 | labels: [ "bug" ]
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: |
 8 |         Thanks for taking the time to fill out this bug report!
 9 |   - type: textarea
10 |     id: bug-description
11 |     attributes:
12 |       label: Describe the bug
13 |       description: A clear and concise description of what the bug is. If you intend to submit a pull request for this issue, tell us in the description. Thanks!
14 |       placeholder: Bug description
15 |     validations:
16 |       required: true
17 |   - type: textarea
18 |     id: reproduction
19 |     attributes:
20 |       label: Reproduction
21 |       description: Please provide a minimal reproducible code which we can copy/paste and reproduce the issue.
22 |       placeholder: Reproduction
23 |   - type: textarea
24 |     id: logs
25 |     attributes:
26 |       label: Logs
27 |       description: "Please include the Python logs if you can."
28 |       render: shell
29 |   - type: textarea
30 |     id: system-info
31 |     attributes:
32 |       label: System Info
33 |       description: Please share your system info with us. You can run the command `diffusers-cli env` and copy-paste its output below.
34 |       placeholder: diffusers version, platform, python version, ...
35 |     validations:
36 |       required: true
37 | 


--------------------------------------------------------------------------------
/src/diffusers/commands/diffusers_cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from argparse import ArgumentParser
17 | 
18 | from .env import EnvironmentCommand
19 | 
20 | 
21 | def main():
22 |     parser = ArgumentParser("Diffusers CLI tool", usage="diffusers-cli <command> [<args>]")
23 |     commands_parser = parser.add_subparsers(help="diffusers-cli command helpers")
24 | 
25 |     # Register commands
26 |     EnvironmentCommand.register_subcommand(commands_parser)
27 | 
28 |     # Let's go
29 |     args = parser.parse_args()
30 | 
31 |     if not hasattr(args, "func"):
32 |         parser.print_help()
33 |         exit(1)
34 | 
35 |     # Run
36 |     service = args.func(args)
37 |     service.run()
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     main()
42 | 


--------------------------------------------------------------------------------
/src/diffusers/schedulers/scheduling_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from dataclasses import dataclass
15 | 
16 | import torch
17 | 
18 | from ..utils import BaseOutput
19 | 
20 | 
21 | SCHEDULER_CONFIG_NAME = "scheduler_config.json"
22 | 
23 | 
24 | @dataclass
25 | class SchedulerOutput(BaseOutput):
26 |     """
27 |     Base class for the scheduler's step function output.
28 | 
29 |     Args:
30 |         prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
31 |             Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
32 |             denoising loop.
33 |     """
34 | 
35 |     prev_sample: torch.FloatTensor
36 | 
37 | 
38 | class SchedulerMixin:
39 |     """
40 |     Mixin containing common functions for the schedulers.
41 |     """
42 | 
43 |     config_name = SCHEDULER_CONFIG_NAME
44 | 


--------------------------------------------------------------------------------
/docs/source/api/pipelines/dance_diffusion.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Dance Diffusion
14 | 
15 | ## Overview
16 | 
17 | [Dance Diffusion](https://github.com/Harmonai-org/sample-generator) by Zach Evans.
18 | 
19 | Dance Diffusion is the first in a suite of generative audio tools for producers and musicians to be released by Harmonai.
20 | For more info or to get involved in the development of these tools, please visit https://harmonai.org and fill out the form on the front page.
21 | 
22 | The original codebase of this implementation can be found [here](https://github.com/Harmonai-org/sample-generator).
23 | 
24 | ## Available Pipelines:
25 | 
26 | | Pipeline | Tasks | Colab
27 | |---|---|:---:|
28 | | [pipeline_dance_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py) | *Unconditional Audio Generation* | - |
29 | 
30 | 
31 | ## DanceDiffusionPipeline
32 | [[autodoc]] DanceDiffusionPipeline
33 |     - __call__
34 | 


--------------------------------------------------------------------------------
/.github/workflows/pr_quality.yml:
--------------------------------------------------------------------------------
 1 | name: Run code quality checks
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - main
 7 |   push:
 8 |     branches:
 9 |       - main
10 | 
11 | concurrency:
12 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
13 |   cancel-in-progress: true
14 | 
15 | jobs:
16 |   check_code_quality:
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |       - uses: actions/checkout@v3
20 |       - name: Set up Python
21 |         uses: actions/setup-python@v4
22 |         with:
23 |           python-version: "3.7"
24 |       - name: Install dependencies
25 |         run: |
26 |           python -m pip install --upgrade pip
27 |           pip install .[quality]
28 |       - name: Check quality
29 |         run: |
30 |           black  --check --preview examples tests src utils scripts
31 |           isort --check-only examples tests src utils scripts
32 |           flake8 examples tests src utils scripts
33 |           doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
34 | 
35 |   check_repository_consistency:
36 |     runs-on: ubuntu-latest
37 |     steps:
38 |       - uses: actions/checkout@v3
39 |       - name: Set up Python
40 |         uses: actions/setup-python@v4
41 |         with:
42 |           python-version: "3.7"
43 |       - name: Install dependencies
44 |         run: |
45 |           python -m pip install --upgrade pip
46 |           pip install .[quality]
47 |       - name: Check quality
48 |         run: |
49 |           python utils/check_copies.py
50 |           python utils/check_dummies.py
51 | 


--------------------------------------------------------------------------------
/utils/print_env.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # coding=utf-8
 4 | # Copyright 2022 The HuggingFace Inc. team.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # this script dumps information about the environment
19 | 
20 | import os
21 | import platform
22 | import sys
23 | 
24 | 
25 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
26 | 
27 | print("Python version:", sys.version)
28 | 
29 | print("OS platform:", platform.platform())
30 | print("OS architecture:", platform.machine())
31 | 
32 | try:
33 |     import torch
34 | 
35 |     print("Torch version:", torch.__version__)
36 |     print("Cuda available:", torch.cuda.is_available())
37 |     print("Cuda version:", torch.version.cuda)
38 |     print("CuDNN version:", torch.backends.cudnn.version())
39 |     print("Number of GPUs available:", torch.cuda.device_count())
40 | except ImportError:
41 |     print("Torch version:", None)
42 | 
43 | try:
44 |     import transformers
45 | 
46 |     print("transformers version:", transformers.__version__)
47 | except ImportError:
48 |     print("transformers version:", None)
49 | 


--------------------------------------------------------------------------------
/utils/get_modified_files.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # this script reports modified .py files under the desired list of top-level sub-dirs passed as a list of arguments, e.g.:
17 | #   python ./utils/get_modified_files.py utils src tests examples
18 | #
19 | # it uses git to find the forking point and which files were modified - i.e. files not under git won't be considered
20 | # since the output of this script is fed into Makefile commands it doesn't print a newline after the results
21 | 
22 | import re
23 | import subprocess
24 | import sys
25 | 
26 | 
27 | fork_point_sha = subprocess.check_output("git merge-base main HEAD".split()).decode("utf-8")
28 | modified_files = subprocess.check_output(f"git diff --name-only {fork_point_sha}".split()).decode("utf-8").split()
29 | 
30 | joined_dirs = "|".join(sys.argv[1:])
31 | regex = re.compile(rf"^({joined_dirs}).*?\.py$")
32 | 
33 | relevant_modified_files = [x for x in modified_files if regex.match(x)]
34 | print(" ".join(relevant_modified_files), end="")
35 | 


--------------------------------------------------------------------------------
/tests/models/test_models_unet_1d.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import unittest
17 | 
18 | import torch
19 | 
20 | from diffusers import UNet1DModel
21 | from diffusers.utils import slow, torch_device
22 | 
23 | 
24 | torch.backends.cuda.matmul.allow_tf32 = False
25 | 
26 | 
27 | class UnetModel1DTests(unittest.TestCase):
28 |     @slow
29 |     def test_unet_1d_maestro(self):
30 |         model_id = "harmonai/maestro-150k"
31 |         model = UNet1DModel.from_pretrained(model_id, subfolder="unet", device_map="auto")
32 |         model.to(torch_device)
33 | 
34 |         sample_size = 65536
35 |         noise = torch.sin(torch.arange(sample_size)[None, None, :].repeat(1, 2, 1)).to(torch_device)
36 |         timestep = torch.tensor([1]).to(torch_device)
37 | 
38 |         with torch.no_grad():
39 |             output = model(noise, timestep).sample
40 | 
41 |         output_sum = output.abs().sum()
42 |         output_max = output.abs().max()
43 | 
44 |         assert (output_sum - 224.0896).abs() < 4e-2
45 |         assert (output_max - 0.0607).abs() < 4e-4
46 | 


--------------------------------------------------------------------------------
/tests/test_modeling_common_flax.py:
--------------------------------------------------------------------------------
 1 | from diffusers.utils import is_flax_available
 2 | from diffusers.utils.testing_utils import require_flax
 3 | 
 4 | 
 5 | if is_flax_available():
 6 |     import jax
 7 | 
 8 | 
 9 | @require_flax
10 | class FlaxModelTesterMixin:
11 |     def test_output(self):
12 |         init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
13 | 
14 |         model = self.model_class(**init_dict)
15 |         variables = model.init(inputs_dict["prng_key"], inputs_dict["sample"])
16 |         jax.lax.stop_gradient(variables)
17 | 
18 |         output = model.apply(variables, inputs_dict["sample"])
19 | 
20 |         if isinstance(output, dict):
21 |             output = output.sample
22 | 
23 |         self.assertIsNotNone(output)
24 |         expected_shape = inputs_dict["sample"].shape
25 |         self.assertEqual(output.shape, expected_shape, "Input and output shapes do not match")
26 | 
27 |     def test_forward_with_norm_groups(self):
28 |         init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
29 | 
30 |         init_dict["norm_num_groups"] = 16
31 |         init_dict["block_out_channels"] = (16, 32)
32 | 
33 |         model = self.model_class(**init_dict)
34 |         variables = model.init(inputs_dict["prng_key"], inputs_dict["sample"])
35 |         jax.lax.stop_gradient(variables)
36 | 
37 |         output = model.apply(variables, inputs_dict["sample"])
38 | 
39 |         if isinstance(output, dict):
40 |             output = output.sample
41 | 
42 |         self.assertIsNotNone(output)
43 |         expected_shape = inputs_dict["sample"].shape
44 |         self.assertEqual(output.shape, expected_shape, "Input and output shapes do not match")
45 | 


--------------------------------------------------------------------------------
/src/diffusers/utils/model_card_template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | {{ card_data }}
 3 | ---
 4 | 
 5 | <!-- This model card has been generated automatically according to the information the training script had access to. You
 6 | should probably proofread and complete it, then remove this comment. -->
 7 | 
 8 | # {{ model_name | default("Diffusion Model") }}
 9 | 
10 | ## Model description
11 | 
12 | This diffusion model is trained with the [🤗 Diffusers](https://github.com/huggingface/diffusers) library 
13 | on the `{{ dataset_name }}` dataset.
14 | 
15 | ## Intended uses & limitations
16 | 
17 | #### How to use
18 | 
19 | ```python
20 | # TODO: add an example code snippet for running this diffusion pipeline
21 | ```
22 | 
23 | #### Limitations and bias
24 | 
25 | [TODO: provide examples of latent issues and potential remediations]
26 | 
27 | ## Training data
28 | 
29 | [TODO: describe the data used to train the model]
30 | 
31 | ### Training hyperparameters
32 | 
33 | The following hyperparameters were used during training:
34 | - learning_rate: {{ learning_rate }}
35 | - train_batch_size: {{ train_batch_size }}
36 | - eval_batch_size: {{ eval_batch_size }}
37 | - gradient_accumulation_steps: {{ gradient_accumulation_steps }}
38 | - optimizer: AdamW with betas=({{ adam_beta1 }}, {{ adam_beta2 }}), weight_decay={{ adam_weight_decay }} and epsilon={{ adam_epsilon }}
39 | - lr_scheduler: {{ lr_scheduler }}
40 | - lr_warmup_steps: {{ lr_warmup_steps }}
41 | - ema_inv_gamma: {{ ema_inv_gamma }}
42 | - ema_inv_gamma: {{ ema_power }}
43 | - ema_inv_gamma: {{ ema_max_decay }}
44 | - mixed_precision: {{ mixed_precision }}
45 | 
46 | ### Training results
47 | 
48 | 📈 [TensorBoard logs](https://huggingface.co/{{ repo_name }}/tensorboard?#scalars)
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/src/diffusers/schedulers/scheduling_utils_flax.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from dataclasses import dataclass
15 | from typing import Tuple
16 | 
17 | import jax.numpy as jnp
18 | 
19 | from ..utils import BaseOutput
20 | 
21 | 
22 | SCHEDULER_CONFIG_NAME = "scheduler_config.json"
23 | 
24 | 
25 | @dataclass
26 | class FlaxSchedulerOutput(BaseOutput):
27 |     """
28 |     Base class for the scheduler's step function output.
29 | 
30 |     Args:
31 |         prev_sample (`jnp.ndarray` of shape `(batch_size, num_channels, height, width)` for images):
32 |             Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
33 |             denoising loop.
34 |     """
35 | 
36 |     prev_sample: jnp.ndarray
37 | 
38 | 
39 | class FlaxSchedulerMixin:
40 |     """
41 |     Mixin containing common functions for the schedulers.
42 |     """
43 | 
44 |     config_name = SCHEDULER_CONFIG_NAME
45 | 
46 | 
47 | def broadcast_to_shape_from_left(x: jnp.ndarray, shape: Tuple[int]) -> jnp.ndarray:
48 |     assert len(shape) >= x.ndim
49 |     return jnp.broadcast_to(x.reshape(x.shape + (1,) * (len(shape) - x.ndim)), shape)
50 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # tests directory-specific settings - this file is run automatically
16 | # by pytest before any tests are run
17 | 
18 | import sys
19 | import warnings
20 | from os.path import abspath, dirname, join
21 | 
22 | 
23 | # allow having multiple repository checkouts and not needing to remember to rerun
24 | # 'pip install -e .[dev]' when switching between checkouts and running tests.
25 | git_repo_path = abspath(join(dirname(dirname(__file__)), "src"))
26 | sys.path.insert(1, git_repo_path)
27 | 
28 | # silence FutureWarning warnings in tests since often we can't act on them until
29 | # they become normal warnings - i.e. the tests still need to test the current functionality
30 | warnings.simplefilter(action="ignore", category=FutureWarning)
31 | 
32 | 
33 | def pytest_addoption(parser):
34 |     from diffusers.utils.testing_utils import pytest_addoption_shared
35 | 
36 |     pytest_addoption_shared(parser)
37 | 
38 | 
39 | def pytest_terminal_summary(terminalreporter):
40 |     from diffusers.utils.testing_utils import pytest_terminal_summary_main
41 | 
42 |     make_reports = terminalreporter.config.getoption("--make-reports")
43 |     if make_reports:
44 |         pytest_terminal_summary_main(terminalreporter, id=make_reports)
45 | 


--------------------------------------------------------------------------------
/docs/source/api/diffusion_pipeline.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Pipelines
14 | 
15 | The [`DiffusionPipeline`] is the easiest way to load any pretrained diffusion pipeline from the [Hub](https://huggingface.co/models?library=diffusers) and to use it in inference.
16 | 
17 | <Tip>
18 | 	
19 | 	One should not use the Diffusion Pipeline class for training or fine-tuning a diffusion model. Individual 
20 | 	components of diffusion pipelines are usually trained individually, so we suggest to directly work 
21 | 	with [`UNetModel`] and [`UNetConditionModel`].
22 | 
23 | </Tip>
24 | 
25 | Any diffusion pipeline that is loaded with [`~DiffusionPipeline.from_pretrained`] will automatically 
26 | detect the pipeline type, *e.g.* [`StableDiffusionPipeline`] and consequently load each component of the 
27 | pipeline and pass them into the `__init__` function of the pipeline, *e.g.* [`~StableDiffusionPipeline.__init__`].
28 | 
29 | Any pipeline object can be saved locally with [`~DiffusionPipeline.save_pretrained`].
30 | 
31 | ## DiffusionPipeline
32 | [[autodoc]] DiffusionPipeline
33 | 	- from_pretrained
34 | 	- save_pretrained
35 | 	- to
36 | 	- device
37 | 	- components
38 | 
39 | ## ImagePipelineOutput
40 | By default diffusion pipelines return an object of class
41 | 
42 | [[autodoc]] pipeline_utils.ImagePipelineOutput
43 | 


--------------------------------------------------------------------------------
/examples/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # tests directory-specific settings - this file is run automatically
16 | # by pytest before any tests are run
17 | 
18 | import sys
19 | import warnings
20 | from os.path import abspath, dirname, join
21 | 
22 | 
23 | # allow having multiple repository checkouts and not needing to remember to rerun
24 | # 'pip install -e .[dev]' when switching between checkouts and running tests.
25 | git_repo_path = abspath(join(dirname(dirname(dirname(__file__))), "src"))
26 | sys.path.insert(1, git_repo_path)
27 | 
28 | 
29 | # silence FutureWarning warnings in tests since often we can't act on them until
30 | # they become normal warnings - i.e. the tests still need to test the current functionality
31 | warnings.simplefilter(action="ignore", category=FutureWarning)
32 | 
33 | 
34 | def pytest_addoption(parser):
35 |     from diffusers.utils.testing_utils import pytest_addoption_shared
36 | 
37 |     pytest_addoption_shared(parser)
38 | 
39 | 
40 | def pytest_terminal_summary(terminalreporter):
41 |     from diffusers.utils.testing_utils import pytest_terminal_summary_main
42 | 
43 |     make_reports = terminalreporter.config.getoption("--make-reports")
44 |     if make_reports:
45 |         pytest_terminal_summary_main(terminalreporter, id=make_reports)
46 | 


--------------------------------------------------------------------------------
/docs/source/optimization/onnx.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | 
14 | # How to use the ONNX Runtime for inference
15 | 
16 | 🤗 Diffusers provides a Stable Diffusion pipeline compatible with the ONNX Runtime. This allows you to run Stable Diffusion on any hardware that supports ONNX (including CPUs), and where an accelerated version of PyTorch is not available.
17 | 
18 | ## Installation
19 | 
20 | - TODO
21 | 
22 | ## Stable Diffusion Inference
23 | 
24 | The snippet below demonstrates how to use the ONNX runtime. You need to use `StableDiffusionOnnxPipeline` instead of `StableDiffusionPipeline`. You also need to download the weights from the `onnx` branch of the repository, and indicate the runtime provider you want to use.
25 | 
26 | ```python
27 | # make sure you're logged in with `huggingface-cli login`
28 | from diffusers import StableDiffusionOnnxPipeline
29 | 
30 | pipe = StableDiffusionOnnxPipeline.from_pretrained(
31 |     "runwayml/stable-diffusion-v1-5",
32 |     revision="onnx",
33 |     provider="CUDAExecutionProvider",
34 | )
35 | 
36 | prompt = "a photo of an astronaut riding a horse on mars"
37 | image = pipe(prompt).images[0]
38 | ```
39 | 
40 | ## Known Issues
41 | 
42 | - Generating multiple prompts in a batch seems to take too much memory. While we look into it, you may need to iterate instead of batching.
43 | 


--------------------------------------------------------------------------------
/docs/source/api/outputs.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # BaseOutputs
14 | 
15 | All models have outputs that are instances of subclasses of [`~utils.BaseOutput`]. Those are
16 | data structures containing all the information returned by the model, but that can also be used as tuples or
17 | dictionaries.
18 | 
19 | Let's see how this looks in an example:
20 | 
21 | ```python
22 | from diffusers import DDIMPipeline
23 | 
24 | pipeline = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32")
25 | outputs = pipeline()
26 | ```
27 | 
28 | The `outputs` object is a [`~pipeline_utils.ImagePipelineOutput`], as we can see in the
29 | documentation of that class below, it means it has an image attribute.
30 | 
31 | You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you will get `None`:
32 | 
33 | ```python
34 | outputs.images
35 | ```
36 | 
37 | or via keyword lookup
38 | 
39 | ```python
40 | outputs["images"]
41 | ```
42 | 
43 | When considering our `outputs` object as tuple, it only considers the attributes that don't have `None` values.
44 | Here for instance, we could retrieve images via indexing:
45 | 
46 | ```python
47 | outputs[:1]
48 | ```
49 | 
50 | which will return the tuple `(outputs.images)` for instance.
51 | 
52 | ## BaseOutput
53 | 
54 | [[autodoc]] utils.BaseOutput
55 |     - to_tuple
56 | 


--------------------------------------------------------------------------------
/docs/source/conceptual/philosophy.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Philosophy
14 | 
15 | - Readability and clarity are preferred over highly optimized code. A strong importance is put on providing readable, intuitive and elementary code design. *E.g.*, the provided [schedulers](https://github.com/huggingface/diffusers/tree/main/src/diffusers/schedulers) are separated from the provided [models](https://github.com/huggingface/diffusers/tree/main/src/diffusers/models) and use well-commented code that can be read alongside the original paper.
16 | - Diffusers is **modality independent** and focuses on providing pretrained models and tools to build systems that generate **continuous outputs**, *e.g.* vision and audio. This is one of the guiding goals even if the initial pipelines are devoted to vision tasks.
17 | - Diffusion models and schedulers are provided as concise, elementary building blocks. In contrast, diffusion pipelines are a collection of end-to-end diffusion systems that can be used out-of-the-box, should stay as close as possible to their original implementations and can include components of other libraries, such as text encoders. Examples of diffusion pipelines are [Glide](https://github.com/openai/glide-text2im), [Latent Diffusion](https://github.com/CompVis/latent-diffusion) and [Stable Diffusion](https://github.com/compvis/stable-diffusion).
18 | 


--------------------------------------------------------------------------------
/src/diffusers/dependency_versions_check.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import sys
15 | 
16 | from .dependency_versions_table import deps
17 | from .utils.versions import require_version, require_version_core
18 | 
19 | 
20 | # define which module versions we always want to check at run time
21 | # (usually the ones defined in `install_requires` in setup.py)
22 | #
23 | # order specific notes:
24 | # - tqdm must be checked before tokenizers
25 | 
26 | pkgs_to_check_at_runtime = "python tqdm regex requests packaging filelock numpy tokenizers".split()
27 | if sys.version_info < (3, 7):
28 |     pkgs_to_check_at_runtime.append("dataclasses")
29 | if sys.version_info < (3, 8):
30 |     pkgs_to_check_at_runtime.append("importlib_metadata")
31 | 
32 | for pkg in pkgs_to_check_at_runtime:
33 |     if pkg in deps:
34 |         if pkg == "tokenizers":
35 |             # must be loaded here, or else tqdm check may fail
36 |             from .utils import is_tokenizers_available
37 | 
38 |             if not is_tokenizers_available():
39 |                 continue  # not required, check version only if installed
40 | 
41 |         require_version_core(deps[pkg])
42 |     else:
43 |         raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py")
44 | 
45 | 
46 | def dep_version_check(pkg, hint=None):
47 |     require_version(deps[pkg], hint)
48 | 


--------------------------------------------------------------------------------
/docs/source/using-diffusers/img2img.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Text-Guided Image-to-Image Generation
14 | 
15 | The [`StableDiffusionImg2ImgPipeline`] lets you pass a text prompt and an initial image to condition the generation of new images.
16 | 
17 | ```python
18 | import torch
19 | import requests
20 | from PIL import Image
21 | from io import BytesIO
22 | 
23 | from diffusers import StableDiffusionImg2ImgPipeline
24 | 
25 | # load the pipeline
26 | device = "cuda"
27 | pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
28 |     "runwayml/stable-diffusion-v1-5", revision="fp16", torch_dtype=torch.float16
29 | ).to(device)
30 | 
31 | # let's download an initial image
32 | url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
33 | 
34 | response = requests.get(url)
35 | init_image = Image.open(BytesIO(response.content)).convert("RGB")
36 | init_image = init_image.resize((768, 512))
37 | 
38 | prompt = "A fantasy landscape, trending on artstation"
39 | 
40 | images = pipe(prompt=prompt, init_image=init_image, strength=0.75, guidance_scale=7.5).images
41 | 
42 | images[0].save("fantasy_landscape.png")
43 | ```
44 | You can also run this example on colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb)
45 | 
46 | 


--------------------------------------------------------------------------------
/tests/pipelines/ddpm/test_ddpm.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import unittest
17 | 
18 | import numpy as np
19 | import torch
20 | 
21 | from diffusers import DDPMPipeline, DDPMScheduler, UNet2DModel
22 | from diffusers.utils.testing_utils import require_torch, slow, torch_device
23 | 
24 | from ...test_pipelines_common import PipelineTesterMixin
25 | 
26 | 
27 | torch.backends.cuda.matmul.allow_tf32 = False
28 | 
29 | 
30 | class DDPMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
31 |     # FIXME: add fast tests
32 |     pass
33 | 
34 | 
35 | @slow
36 | @require_torch
37 | class DDPMPipelineIntegrationTests(unittest.TestCase):
38 |     def test_inference_cifar10(self):
39 |         model_id = "google/ddpm-cifar10-32"
40 | 
41 |         unet = UNet2DModel.from_pretrained(model_id, device_map="auto")
42 |         scheduler = DDPMScheduler.from_config(model_id)
43 | 
44 |         ddpm = DDPMPipeline(unet=unet, scheduler=scheduler)
45 |         ddpm.to(torch_device)
46 |         ddpm.set_progress_bar_config(disable=None)
47 | 
48 |         generator = torch.manual_seed(0)
49 |         image = ddpm(generator=generator, output_type="numpy").images
50 | 
51 |         image_slice = image[0, -3:, -3:, -1]
52 | 
53 |         assert image.shape == (1, 32, 32, 3)
54 |         expected_slice = np.array([0.41995, 0.35885, 0.19385, 0.38475, 0.3382, 0.2647, 0.41545, 0.3582, 0.33845])
55 |         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
56 | 


--------------------------------------------------------------------------------
/scripts/conversion_ldm_uncond.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import torch
 4 | 
 5 | import OmegaConf
 6 | from diffusers import DDIMScheduler, LDMPipeline, UNetLDMModel, VQModel
 7 | 
 8 | 
 9 | def convert_ldm_original(checkpoint_path, config_path, output_path):
10 |     config = OmegaConf.load(config_path)
11 |     state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
12 |     keys = list(state_dict.keys())
13 | 
14 |     # extract state_dict for VQVAE
15 |     first_stage_dict = {}
16 |     first_stage_key = "first_stage_model."
17 |     for key in keys:
18 |         if key.startswith(first_stage_key):
19 |             first_stage_dict[key.replace(first_stage_key, "")] = state_dict[key]
20 | 
21 |     # extract state_dict for UNetLDM
22 |     unet_state_dict = {}
23 |     unet_key = "model.diffusion_model."
24 |     for key in keys:
25 |         if key.startswith(unet_key):
26 |             unet_state_dict[key.replace(unet_key, "")] = state_dict[key]
27 | 
28 |     vqvae_init_args = config.model.params.first_stage_config.params
29 |     unet_init_args = config.model.params.unet_config.params
30 | 
31 |     vqvae = VQModel(**vqvae_init_args).eval()
32 |     vqvae.load_state_dict(first_stage_dict)
33 | 
34 |     unet = UNetLDMModel(**unet_init_args).eval()
35 |     unet.load_state_dict(unet_state_dict)
36 | 
37 |     noise_scheduler = DDIMScheduler(
38 |         timesteps=config.model.params.timesteps,
39 |         beta_schedule="scaled_linear",
40 |         beta_start=config.model.params.linear_start,
41 |         beta_end=config.model.params.linear_end,
42 |         clip_sample=False,
43 |     )
44 | 
45 |     pipeline = LDMPipeline(vqvae, unet, noise_scheduler)
46 |     pipeline.save_pretrained(output_path)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     parser = argparse.ArgumentParser()
51 |     parser.add_argument("--checkpoint_path", type=str, required=True)
52 |     parser.add_argument("--config_path", type=str, required=True)
53 |     parser.add_argument("--output_path", type=str, required=True)
54 |     args = parser.parse_args()
55 | 
56 |     convert_ldm_original(args.checkpoint_path, args.config_path, args.output_path)
57 | 


--------------------------------------------------------------------------------
/docs/source/using-diffusers/unconditional_image_generation.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | 
14 | 
15 | # Unconditional Image Generation
16 | 
17 | The [`DiffusionPipeline`] is the easiest way to use a pre-trained diffusion system for inference
18 | 
19 | Start by creating an instance of [`DiffusionPipeline`] and specify which pipeline checkpoint you would like to download.
20 | You can use the [`DiffusionPipeline`] for any [Diffusers' checkpoint](https://huggingface.co/models?library=diffusers&sort=downloads).
21 | In this guide though, you'll use [`DiffusionPipeline`] for unconditional image generation with [DDPM](https://arxiv.org/abs/2006.11239):
22 | 
23 | ```python
24 | >>> from diffusers import DiffusionPipeline
25 | 
26 | >>> generator = DiffusionPipeline.from_pretrained("google/ddpm-celebahq-256")
27 | ```
28 | The [`DiffusionPipeline`] downloads and caches all modeling, tokenization, and scheduling components. 
29 | Because the model consists of roughly 1.4 billion parameters, we strongly recommend running it on GPU.
30 | You can move the generator object to GPU, just like you would in PyTorch.
31 | 
32 | ```python
33 | >>> generator.to("cuda")
34 | ```
35 | 
36 | Now you can use the `generator` on your text prompt:
37 | 
38 | ```python
39 | >>> image = generator().images[0]
40 | ```
41 | 
42 | The output is by default wrapped into a [PIL Image object](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class).
43 | 
44 | You can save the image by simply calling:
45 | 
46 | ```python
47 | >>> image.save("generated_image.png")
48 | ```
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/docs/source/api/pipelines/ddim.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # DDIM
14 | 
15 | ## Overview
16 | 
17 | [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) (DDIM) by Jiaming Song, Chenlin Meng and Stefano Ermon.
18 | 
19 | The abstract of the paper is the following:
20 | 
21 | Denoising diffusion probabilistic models (DDPMs) have achieved high quality image generation without adversarial training, yet they require simulating a Markov chain for many steps to produce a sample. To accelerate sampling, we present denoising diffusion implicit models (DDIMs), a more efficient class of iterative implicit probabilistic models with the same training procedure as DDPMs. In DDPMs, the generative process is defined as the reverse of a Markovian diffusion process. We construct a class of non-Markovian diffusion processes that lead to the same training objective, but whose reverse process can be much faster to sample from. We empirically demonstrate that DDIMs can produce high quality samples 10× to 50× faster in terms of wall-clock time compared to DDPMs, allow us to trade off computation for sample quality, and can perform semantically meaningful image interpolation directly in the latent space.
22 | 
23 | The original codebase of this paper can be found [here](https://github.com/ermongroup/ddim).
24 | 
25 | ## Available Pipelines:
26 | 
27 | | Pipeline | Tasks | Colab
28 | |---|---|:---:|
29 | | [pipeline_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/ddim/pipeline_ddim.py) | *Unconditional Image Generation* | - |
30 | 
31 | 
32 | ## DDIMPipeline
33 | [[autodoc]] DDIMPipeline
34 |     - __call__
35 | 


--------------------------------------------------------------------------------
/docs/source/api/pipelines/ddpm.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # DDPM
14 | 
15 | ## Overview
16 | 
17 | [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) 
18 |  (DDPM) by Jonathan Ho, Ajay Jain and Pieter Abbeel proposes the diffusion based model of the same name, but in the context of the 🤗 Diffusers library, DDPM refers to the discrete denoising scheduler from the paper as well as the pipeline.
19 | 
20 | The abstract of the paper is the following:
21 | 
22 | We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional CIFAR10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art FID score of 3.17. On 256x256 LSUN, we obtain sample quality similar to ProgressiveGAN.
23 | 
24 | The original codebase of this paper can be found [here](https://github.com/hojonathanho/diffusion).
25 | 
26 | 
27 | ## Available Pipelines:
28 | 
29 | | Pipeline | Tasks | Colab
30 | |---|---|:---:|
31 | | [pipeline_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/ddpm/pipeline_ddpm.py) | *Unconditional Image Generation* | - |
32 | 
33 | 
34 | # DDPMPipeline
35 | [[autodoc]] DDPMPipeline
36 |     - __call__
37 | 


--------------------------------------------------------------------------------
/docs/source/using-diffusers/conditional_image_generation.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Conditional Image Generation
14 | 
15 | The [`DiffusionPipeline`] is the easiest way to use a pre-trained diffusion system for inference
16 | 
17 | Start by creating an instance of [`DiffusionPipeline`] and specify which pipeline checkpoint you would like to download.
18 | You can use the [`DiffusionPipeline`] for any [Diffusers' checkpoint](https://huggingface.co/models?library=diffusers&sort=downloads).
19 | In this guide though, you'll use [`DiffusionPipeline`] for text-to-image generation with [Latent Diffusion](https://huggingface.co/CompVis/ldm-text2im-large-256):
20 | 
21 | ```python
22 | >>> from diffusers import DiffusionPipeline
23 | 
24 | >>> generator = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256")
25 | ```
26 | The [`DiffusionPipeline`] downloads and caches all modeling, tokenization, and scheduling components. 
27 | Because the model consists of roughly 1.4 billion parameters, we strongly recommend running it on GPU.
28 | You can move the generator object to GPU, just like you would in PyTorch.
29 | 
30 | ```python
31 | >>> generator.to("cuda")
32 | ```
33 | 
34 | Now you can use the `generator` on your text prompt:
35 | 
36 | ```python
37 | >>> image = generator("An image of a squirrel in Picasso style").images[0]
38 | ```
39 | 
40 | The output is by default wrapped into a [PIL Image object](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class).
41 | 
42 | You can save the image by simply calling:
43 | 
44 | ```python
45 | >>> image.save("image_of_squirrel_painting.png")
46 | ```
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/src/diffusers/utils/deprecation_utils.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import warnings
 3 | from typing import Any, Dict, Optional, Union
 4 | 
 5 | from packaging import version
 6 | 
 7 | 
 8 | def deprecate(*args, take_from: Optional[Union[Dict, Any]] = None, standard_warn=True):
 9 |     from .. import __version__
10 | 
11 |     deprecated_kwargs = take_from
12 |     values = ()
13 |     if not isinstance(args[0], tuple):
14 |         args = (args,)
15 | 
16 |     for attribute, version_name, message in args:
17 |         if version.parse(version.parse(__version__).base_version) >= version.parse(version_name):
18 |             raise ValueError(
19 |                 f"The deprecation tuple {(attribute, version_name, message)} should be removed since diffusers'"
20 |                 f" version {__version__} is >= {version_name}"
21 |             )
22 | 
23 |         warning = None
24 |         if isinstance(deprecated_kwargs, dict) and attribute in deprecated_kwargs:
25 |             values += (deprecated_kwargs.pop(attribute),)
26 |             warning = f"The `{attribute}` argument is deprecated and will be removed in version {version_name}."
27 |         elif hasattr(deprecated_kwargs, attribute):
28 |             values += (getattr(deprecated_kwargs, attribute),)
29 |             warning = f"The `{attribute}` attribute is deprecated and will be removed in version {version_name}."
30 |         elif deprecated_kwargs is None:
31 |             warning = f"`{attribute}` is deprecated and will be removed in version {version_name}."
32 | 
33 |         if warning is not None:
34 |             warning = warning + " " if standard_warn else ""
35 |             warnings.warn(warning + message, DeprecationWarning)
36 | 
37 |     if isinstance(deprecated_kwargs, dict) and len(deprecated_kwargs) > 0:
38 |         call_frame = inspect.getouterframes(inspect.currentframe())[1]
39 |         filename = call_frame.filename
40 |         line_number = call_frame.lineno
41 |         function = call_frame.function
42 |         key, value = next(iter(deprecated_kwargs.items()))
43 |         raise TypeError(f"{function} in {filename} line {line_number-1} got an unexpected keyword argument `{key}`")
44 | 
45 |     if len(values) == 0:
46 |         return
47 |     elif len(values) == 1:
48 |         return values[0]
49 |     return values
50 | 


--------------------------------------------------------------------------------
/src/diffusers/schedulers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from ..utils import is_flax_available, is_scipy_available, is_torch_available
17 | 
18 | 
19 | if is_torch_available():
20 |     from .scheduling_ddim import DDIMScheduler
21 |     from .scheduling_ddpm import DDPMScheduler
22 |     from .scheduling_euler_ancestral_discrete import EulerAncestralDiscreteScheduler
23 |     from .scheduling_euler_discrete import EulerDiscreteScheduler
24 |     from .scheduling_ipndm import IPNDMScheduler
25 |     from .scheduling_karras_ve import KarrasVeScheduler
26 |     from .scheduling_pndm import PNDMScheduler
27 |     from .scheduling_sde_ve import ScoreSdeVeScheduler
28 |     from .scheduling_sde_vp import ScoreSdeVpScheduler
29 |     from .scheduling_utils import SchedulerMixin
30 | else:
31 |     from ..utils.dummy_pt_objects import *  # noqa F403
32 | 
33 | if is_flax_available():
34 |     from .scheduling_ddim_flax import FlaxDDIMScheduler
35 |     from .scheduling_ddpm_flax import FlaxDDPMScheduler
36 |     from .scheduling_karras_ve_flax import FlaxKarrasVeScheduler
37 |     from .scheduling_lms_discrete_flax import FlaxLMSDiscreteScheduler
38 |     from .scheduling_pndm_flax import FlaxPNDMScheduler
39 |     from .scheduling_sde_ve_flax import FlaxScoreSdeVeScheduler
40 |     from .scheduling_utils_flax import FlaxSchedulerMixin, FlaxSchedulerOutput, broadcast_to_shape_from_left
41 | else:
42 |     from ..utils.dummy_flax_objects import *  # noqa F403
43 | 
44 | 
45 | if is_scipy_available() and is_torch_available():
46 |     from .scheduling_lms_discrete import LMSDiscreteScheduler
47 | else:
48 |     from ..utils.dummy_torch_and_scipy_objects import *  # noqa F403
49 | 


--------------------------------------------------------------------------------
/docs/source/api/pipelines/stochastic_karras_ve.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Stochastic Karras VE
14 | 
15 | ## Overview
16 | 
17 | [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364) by Tero Karras, Miika Aittala, Timo Aila and Samuli Laine.
18 | 
19 | The abstract of the paper is the following:
20 | 
21 | We argue that the theory and practice of diffusion-based generative models are currently unnecessarily convoluted and seek to remedy the situation by presenting a design space that clearly separates the concrete design choices. This lets us identify several changes to both the sampling and training processes, as well as preconditioning of the score networks. Together, our improvements yield new state-of-the-art FID of 1.79 for CIFAR-10 in a class-conditional setting and 1.97 in an unconditional setting, with much faster sampling (35 network evaluations per image) than prior designs. To further demonstrate their modular nature, we show that our design changes dramatically improve both the efficiency and quality obtainable with pre-trained score networks from previous work, including improving the FID of an existing ImageNet-64 model from 2.07 to near-SOTA 1.55.
22 | 
23 | This pipeline implements the Stochastic sampling tailored to the Variance-Expanding (VE) models.
24 | 
25 | 
26 | ## Available Pipelines:
27 | 
28 | | Pipeline | Tasks | Colab
29 | |---|---|:---:|
30 | | [pipeline_stochastic_karras_ve.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py) | *Unconditional Image Generation* | - |
31 | 
32 | 
33 | ## KarrasVePipeline
34 | [[autodoc]] KarrasVePipeline
35 |     - __call__
36 | 


--------------------------------------------------------------------------------
/docs/source/api/models.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Models
14 | 
15 | Diffusers contains pretrained models for popular algorithms and modules for creating the next set of diffusion models.
16 | The primary function of these models is to denoise an input sample, by modeling the distribution $p_\theta(\mathbf{x}_{t-1}|\mathbf{x}_t)$.
17 | The models are built on the base class ['ModelMixin'] that is a `torch.nn.module` with basic functionality for saving and loading models both locally and from the HuggingFace hub.
18 | 
19 | ## ModelMixin
20 | [[autodoc]] ModelMixin
21 | 
22 | ## UNet2DOutput
23 | [[autodoc]] models.unet_2d.UNet2DOutput
24 | 
25 | ## UNet1DModel
26 | [[autodoc]] UNet1DModel
27 | 
28 | ## UNet2DModel
29 | [[autodoc]] UNet2DModel
30 | 
31 | ## UNet2DConditionOutput
32 | [[autodoc]] models.unet_2d_condition.UNet2DConditionOutput
33 | 
34 | ## UNet2DConditionModel
35 | [[autodoc]] UNet2DConditionModel
36 | 
37 | ## DecoderOutput
38 | [[autodoc]] models.vae.DecoderOutput
39 | 
40 | ## VQEncoderOutput
41 | [[autodoc]] models.vae.VQEncoderOutput
42 | 
43 | ## VQModel
44 | [[autodoc]] VQModel
45 | 
46 | ## AutoencoderKLOutput
47 | [[autodoc]] models.vae.AutoencoderKLOutput
48 | 
49 | ## AutoencoderKL
50 | [[autodoc]] AutoencoderKL
51 | 
52 | ## FlaxModelMixin
53 | [[autodoc]] FlaxModelMixin
54 | 
55 | ## FlaxUNet2DConditionOutput
56 | [[autodoc]] models.unet_2d_condition_flax.FlaxUNet2DConditionOutput
57 | 
58 | ## FlaxUNet2DConditionModel
59 | [[autodoc]] FlaxUNet2DConditionModel
60 | 
61 | ## FlaxDecoderOutput
62 | [[autodoc]] models.vae_flax.FlaxDecoderOutput
63 | 
64 | ## FlaxAutoencoderKLOutput
65 | [[autodoc]] models.vae_flax.FlaxAutoencoderKLOutput
66 | 
67 | ## FlaxAutoencoderKL
68 | [[autodoc]] FlaxAutoencoderKL
69 | 


--------------------------------------------------------------------------------
/src/diffusers/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import os
17 | 
18 | from .deprecation_utils import deprecate
19 | from .import_utils import (
20 |     ENV_VARS_TRUE_AND_AUTO_VALUES,
21 |     ENV_VARS_TRUE_VALUES,
22 |     USE_JAX,
23 |     USE_TF,
24 |     USE_TORCH,
25 |     DummyObject,
26 |     is_accelerate_available,
27 |     is_flax_available,
28 |     is_inflect_available,
29 |     is_modelcards_available,
30 |     is_onnx_available,
31 |     is_scipy_available,
32 |     is_tf_available,
33 |     is_torch_available,
34 |     is_transformers_available,
35 |     is_unidecode_available,
36 |     requires_backends,
37 | )
38 | from .logging import get_logger
39 | from .outputs import BaseOutput
40 | 
41 | 
42 | if is_torch_available():
43 |     from .testing_utils import (
44 |         floats_tensor,
45 |         load_image,
46 |         load_numpy,
47 |         parse_flag_from_env,
48 |         require_torch_gpu,
49 |         slow,
50 |         torch_all_close,
51 |         torch_device,
52 |     )
53 | 
54 | 
55 | logger = get_logger(__name__)
56 | 
57 | 
58 | hf_cache_home = os.path.expanduser(
59 |     os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface"))
60 | )
61 | default_cache_path = os.path.join(hf_cache_home, "diffusers")
62 | 
63 | 
64 | CONFIG_NAME = "config.json"
65 | WEIGHTS_NAME = "diffusion_pytorch_model.bin"
66 | FLAX_WEIGHTS_NAME = "diffusion_flax_model.msgpack"
67 | ONNX_WEIGHTS_NAME = "model.onnx"
68 | HUGGINGFACE_CO_RESOLVE_ENDPOINT = "https://huggingface.co"
69 | DIFFUSERS_CACHE = default_cache_path
70 | DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules"
71 | HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules"))
72 | 


--------------------------------------------------------------------------------
/src/diffusers/utils/dummy_torch_and_transformers_and_onnx_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | 
 4 | from ..utils import DummyObject, requires_backends
 5 | 
 6 | 
 7 | class OnnxStableDiffusionImg2ImgPipeline(metaclass=DummyObject):
 8 |     _backends = ["torch", "transformers", "onnx"]
 9 | 
10 |     def __init__(self, *args, **kwargs):
11 |         requires_backends(self, ["torch", "transformers", "onnx"])
12 | 
13 |     @classmethod
14 |     def from_config(cls, *args, **kwargs):
15 |         requires_backends(cls, ["torch", "transformers", "onnx"])
16 | 
17 |     @classmethod
18 |     def from_pretrained(cls, *args, **kwargs):
19 |         requires_backends(cls, ["torch", "transformers", "onnx"])
20 | 
21 | 
22 | class OnnxStableDiffusionInpaintPipeline(metaclass=DummyObject):
23 |     _backends = ["torch", "transformers", "onnx"]
24 | 
25 |     def __init__(self, *args, **kwargs):
26 |         requires_backends(self, ["torch", "transformers", "onnx"])
27 | 
28 |     @classmethod
29 |     def from_config(cls, *args, **kwargs):
30 |         requires_backends(cls, ["torch", "transformers", "onnx"])
31 | 
32 |     @classmethod
33 |     def from_pretrained(cls, *args, **kwargs):
34 |         requires_backends(cls, ["torch", "transformers", "onnx"])
35 | 
36 | 
37 | class OnnxStableDiffusionPipeline(metaclass=DummyObject):
38 |     _backends = ["torch", "transformers", "onnx"]
39 | 
40 |     def __init__(self, *args, **kwargs):
41 |         requires_backends(self, ["torch", "transformers", "onnx"])
42 | 
43 |     @classmethod
44 |     def from_config(cls, *args, **kwargs):
45 |         requires_backends(cls, ["torch", "transformers", "onnx"])
46 | 
47 |     @classmethod
48 |     def from_pretrained(cls, *args, **kwargs):
49 |         requires_backends(cls, ["torch", "transformers", "onnx"])
50 | 
51 | 
52 | class StableDiffusionOnnxPipeline(metaclass=DummyObject):
53 |     _backends = ["torch", "transformers", "onnx"]
54 | 
55 |     def __init__(self, *args, **kwargs):
56 |         requires_backends(self, ["torch", "transformers", "onnx"])
57 | 
58 |     @classmethod
59 |     def from_config(cls, *args, **kwargs):
60 |         requires_backends(cls, ["torch", "transformers", "onnx"])
61 | 
62 |     @classmethod
63 |     def from_pretrained(cls, *args, **kwargs):
64 |         requires_backends(cls, ["torch", "transformers", "onnx"])
65 | 


--------------------------------------------------------------------------------
/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import unittest
17 | 
18 | import numpy as np
19 | 
20 | from diffusers import OnnxStableDiffusionImg2ImgPipeline
21 | from diffusers.utils.testing_utils import load_image, require_onnxruntime, slow
22 | 
23 | from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin
24 | 
25 | 
26 | class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase):
27 |     # FIXME: add fast tests
28 |     pass
29 | 
30 | 
31 | @slow
32 | @require_onnxruntime
33 | class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
34 |     def test_inference(self):
35 |         init_image = load_image(
36 |             "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
37 |             "/img2img/sketch-mountains-input.jpg"
38 |         )
39 |         init_image = init_image.resize((768, 512))
40 |         pipe = OnnxStableDiffusionImg2ImgPipeline.from_pretrained(
41 |             "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CPUExecutionProvider"
42 |         )
43 |         pipe.set_progress_bar_config(disable=None)
44 | 
45 |         prompt = "A fantasy landscape, trending on artstation"
46 | 
47 |         np.random.seed(0)
48 |         output = pipe(
49 |             prompt=prompt,
50 |             init_image=init_image,
51 |             strength=0.75,
52 |             guidance_scale=7.5,
53 |             num_inference_steps=8,
54 |             output_type="np",
55 |         )
56 |         images = output.images
57 |         image_slice = images[0, 255:258, 383:386, -1]
58 | 
59 |         assert images.shape == (1, 512, 768, 3)
60 |         expected_slice = np.array([0.4830, 0.5242, 0.5603, 0.5016, 0.5131, 0.5111, 0.4928, 0.5025, 0.5055])
61 |         # TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues
62 |         assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2
63 | 


--------------------------------------------------------------------------------
/tests/test_outputs.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from dataclasses import dataclass
 3 | from typing import List, Union
 4 | 
 5 | import numpy as np
 6 | 
 7 | import PIL.Image
 8 | from diffusers.utils.outputs import BaseOutput
 9 | 
10 | 
11 | @dataclass
12 | class CustomOutput(BaseOutput):
13 |     images: Union[List[PIL.Image.Image], np.ndarray]
14 | 
15 | 
16 | class ConfigTester(unittest.TestCase):
17 |     def test_outputs_single_attribute(self):
18 |         outputs = CustomOutput(images=np.random.rand(1, 3, 4, 4))
19 | 
20 |         # check every way of getting the attribute
21 |         assert isinstance(outputs.images, np.ndarray)
22 |         assert outputs.images.shape == (1, 3, 4, 4)
23 |         assert isinstance(outputs["images"], np.ndarray)
24 |         assert outputs["images"].shape == (1, 3, 4, 4)
25 |         assert isinstance(outputs[0], np.ndarray)
26 |         assert outputs[0].shape == (1, 3, 4, 4)
27 | 
28 |         # test with a non-tensor attribute
29 |         outputs = CustomOutput(images=[PIL.Image.new("RGB", (4, 4))])
30 | 
31 |         # check every way of getting the attribute
32 |         assert isinstance(outputs.images, list)
33 |         assert isinstance(outputs.images[0], PIL.Image.Image)
34 |         assert isinstance(outputs["images"], list)
35 |         assert isinstance(outputs["images"][0], PIL.Image.Image)
36 |         assert isinstance(outputs[0], list)
37 |         assert isinstance(outputs[0][0], PIL.Image.Image)
38 | 
39 |     def test_outputs_dict_init(self):
40 |         # test output reinitialization with a `dict` for compatibility with `accelerate`
41 |         outputs = CustomOutput({"images": np.random.rand(1, 3, 4, 4)})
42 | 
43 |         # check every way of getting the attribute
44 |         assert isinstance(outputs.images, np.ndarray)
45 |         assert outputs.images.shape == (1, 3, 4, 4)
46 |         assert isinstance(outputs["images"], np.ndarray)
47 |         assert outputs["images"].shape == (1, 3, 4, 4)
48 |         assert isinstance(outputs[0], np.ndarray)
49 |         assert outputs[0].shape == (1, 3, 4, 4)
50 | 
51 |         # test with a non-tensor attribute
52 |         outputs = CustomOutput({"images": [PIL.Image.new("RGB", (4, 4))]})
53 | 
54 |         # check every way of getting the attribute
55 |         assert isinstance(outputs.images, list)
56 |         assert isinstance(outputs.images[0], PIL.Image.Image)
57 |         assert isinstance(outputs["images"], list)
58 |         assert isinstance(outputs["images"][0], PIL.Image.Image)
59 |         assert isinstance(outputs[0], list)
60 |         assert isinstance(outputs[0][0], PIL.Image.Image)
61 | 


--------------------------------------------------------------------------------
/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import unittest
17 | 
18 | import numpy as np
19 | 
20 | from diffusers import OnnxStableDiffusionInpaintPipeline
21 | from diffusers.utils.testing_utils import load_image, require_onnxruntime, slow
22 | 
23 | from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin
24 | 
25 | 
26 | class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase):
27 |     # FIXME: add fast tests
28 |     pass
29 | 
30 | 
31 | @slow
32 | @require_onnxruntime
33 | class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
34 |     def test_stable_diffusion_inpaint_onnx(self):
35 |         init_image = load_image(
36 |             "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
37 |             "/in_paint/overture-creations-5sI6fQgYIuo.png"
38 |         )
39 |         mask_image = load_image(
40 |             "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
41 |             "/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
42 |         )
43 | 
44 |         pipe = OnnxStableDiffusionInpaintPipeline.from_pretrained(
45 |             "runwayml/stable-diffusion-inpainting", revision="onnx", provider="CPUExecutionProvider"
46 |         )
47 |         pipe.set_progress_bar_config(disable=None)
48 | 
49 |         prompt = "A red cat sitting on a park bench"
50 | 
51 |         np.random.seed(0)
52 |         output = pipe(
53 |             prompt=prompt,
54 |             image=init_image,
55 |             mask_image=mask_image,
56 |             guidance_scale=7.5,
57 |             num_inference_steps=8,
58 |             output_type="np",
59 |         )
60 |         images = output.images
61 |         image_slice = images[0, 255:258, 255:258, -1]
62 | 
63 |         assert images.shape == (1, 512, 512, 3)
64 |         expected_slice = np.array([0.2951, 0.2955, 0.2922, 0.2036, 0.1977, 0.2279, 0.1716, 0.1641, 0.1799])
65 |         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
66 | 


--------------------------------------------------------------------------------
/src/diffusers/commands/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import platform
16 | from argparse import ArgumentParser
17 | 
18 | import huggingface_hub
19 | 
20 | from .. import __version__ as version
21 | from ..utils import is_torch_available, is_transformers_available
22 | from . import BaseDiffusersCLICommand
23 | 
24 | 
25 | def info_command_factory(_):
26 |     return EnvironmentCommand()
27 | 
28 | 
29 | class EnvironmentCommand(BaseDiffusersCLICommand):
30 |     @staticmethod
31 |     def register_subcommand(parser: ArgumentParser):
32 |         download_parser = parser.add_parser("env")
33 |         download_parser.set_defaults(func=info_command_factory)
34 | 
35 |     def run(self):
36 |         hub_version = huggingface_hub.__version__
37 | 
38 |         pt_version = "not installed"
39 |         pt_cuda_available = "NA"
40 |         if is_torch_available():
41 |             import torch
42 | 
43 |             pt_version = torch.__version__
44 |             pt_cuda_available = torch.cuda.is_available()
45 | 
46 |         transformers_version = "not installed"
47 |         if is_transformers_available:
48 |             import transformers
49 | 
50 |             transformers_version = transformers.__version__
51 | 
52 |         info = {
53 |             "`diffusers` version": version,
54 |             "Platform": platform.platform(),
55 |             "Python version": platform.python_version(),
56 |             "PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})",
57 |             "Huggingface_hub version": hub_version,
58 |             "Transformers version": transformers_version,
59 |             "Using GPU in script?": "<fill in>",
60 |             "Using distributed or parallel set-up in script?": "<fill in>",
61 |         }
62 | 
63 |         print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n")
64 |         print(self.format_dict(info))
65 | 
66 |         return info
67 | 
68 |     @staticmethod
69 |     def format_dict(d):
70 |         return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n"
71 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Initially taken from Github's Python gitignore file
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # tests and logs
 12 | tests/fixtures/cached_*_text.txt
 13 | logs/
 14 | lightning_logs/
 15 | lang_code_data/
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .nox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | .hypothesis/
 57 | .pytest_cache/
 58 | 
 59 | # Translations
 60 | *.mo
 61 | *.pot
 62 | 
 63 | # Django stuff:
 64 | *.log
 65 | local_settings.py
 66 | db.sqlite3
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # celery beat schedule file
 92 | celerybeat-schedule
 93 | 
 94 | # SageMath parsed files
 95 | *.sage.py
 96 | 
 97 | # Environments
 98 | .env
 99 | .venv
100 | env/
101 | venv/
102 | ENV/
103 | env.bak/
104 | venv.bak/
105 | 
106 | # Spyder project settings
107 | .spyderproject
108 | .spyproject
109 | 
110 | # Rope project settings
111 | .ropeproject
112 | 
113 | # mkdocs documentation
114 | /site
115 | 
116 | # mypy
117 | .mypy_cache/
118 | .dmypy.json
119 | dmypy.json
120 | 
121 | # Pyre type checker
122 | .pyre/
123 | 
124 | # vscode
125 | .vs
126 | .vscode
127 | 
128 | # Pycharm
129 | .idea
130 | 
131 | # TF code
132 | tensorflow_code
133 | 
134 | # Models
135 | proc_data
136 | 
137 | # examples
138 | runs
139 | /runs_old
140 | /wandb
141 | /examples/runs
142 | /examples/**/*.args
143 | /examples/rag/sweep
144 | 
145 | # data
146 | /data
147 | serialization_dir
148 | 
149 | # emacs
150 | *.*~
151 | debug.env
152 | 
153 | # vim
154 | .*.swp
155 | 
156 | #ctags
157 | tags
158 | 
159 | # pre-commit
160 | .pre-commit*
161 | 
162 | # .lock
163 | *.lock
164 | 
165 | # DS_Store (MacOS)
166 | .DS_Store


--------------------------------------------------------------------------------
/src/diffusers/utils/dummy_torch_and_transformers_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | 
 4 | from ..utils import DummyObject, requires_backends
 5 | 
 6 | 
 7 | class LDMTextToImagePipeline(metaclass=DummyObject):
 8 |     _backends = ["torch", "transformers"]
 9 | 
10 |     def __init__(self, *args, **kwargs):
11 |         requires_backends(self, ["torch", "transformers"])
12 | 
13 |     @classmethod
14 |     def from_config(cls, *args, **kwargs):
15 |         requires_backends(cls, ["torch", "transformers"])
16 | 
17 |     @classmethod
18 |     def from_pretrained(cls, *args, **kwargs):
19 |         requires_backends(cls, ["torch", "transformers"])
20 | 
21 | 
22 | class StableDiffusionImg2ImgPipeline(metaclass=DummyObject):
23 |     _backends = ["torch", "transformers"]
24 | 
25 |     def __init__(self, *args, **kwargs):
26 |         requires_backends(self, ["torch", "transformers"])
27 | 
28 |     @classmethod
29 |     def from_config(cls, *args, **kwargs):
30 |         requires_backends(cls, ["torch", "transformers"])
31 | 
32 |     @classmethod
33 |     def from_pretrained(cls, *args, **kwargs):
34 |         requires_backends(cls, ["torch", "transformers"])
35 | 
36 | 
37 | class StableDiffusionInpaintPipeline(metaclass=DummyObject):
38 |     _backends = ["torch", "transformers"]
39 | 
40 |     def __init__(self, *args, **kwargs):
41 |         requires_backends(self, ["torch", "transformers"])
42 | 
43 |     @classmethod
44 |     def from_config(cls, *args, **kwargs):
45 |         requires_backends(cls, ["torch", "transformers"])
46 | 
47 |     @classmethod
48 |     def from_pretrained(cls, *args, **kwargs):
49 |         requires_backends(cls, ["torch", "transformers"])
50 | 
51 | 
52 | class StableDiffusionInpaintPipelineLegacy(metaclass=DummyObject):
53 |     _backends = ["torch", "transformers"]
54 | 
55 |     def __init__(self, *args, **kwargs):
56 |         requires_backends(self, ["torch", "transformers"])
57 | 
58 |     @classmethod
59 |     def from_config(cls, *args, **kwargs):
60 |         requires_backends(cls, ["torch", "transformers"])
61 | 
62 |     @classmethod
63 |     def from_pretrained(cls, *args, **kwargs):
64 |         requires_backends(cls, ["torch", "transformers"])
65 | 
66 | 
67 | class StableDiffusionPipeline(metaclass=DummyObject):
68 |     _backends = ["torch", "transformers"]
69 | 
70 |     def __init__(self, *args, **kwargs):
71 |         requires_backends(self, ["torch", "transformers"])
72 | 
73 |     @classmethod
74 |     def from_config(cls, *args, **kwargs):
75 |         requires_backends(cls, ["torch", "transformers"])
76 | 
77 |     @classmethod
78 |     def from_pretrained(cls, *args, **kwargs):
79 |         requires_backends(cls, ["torch", "transformers"])
80 | 


--------------------------------------------------------------------------------
/docs/source/api/pipelines/pndm.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # PNDM
14 | 
15 | ## Overview
16 | 
17 | [Pseudo Numerical methods for Diffusion Models on manifolds](https://arxiv.org/abs/2202.09778) (PNDM) by  Luping Liu, Yi Ren, Zhijie Lin and Zhou Zhao.
18 | 
19 | The abstract of the paper is the following:
20 | 
21 | Denoising Diffusion Probabilistic Models (DDPMs) can generate high-quality samples such as image and audio samples. However, DDPMs require hundreds to thousands of iterations to produce final samples. Several prior works have successfully accelerated DDPMs through adjusting the variance schedule (e.g., Improved Denoising Diffusion Probabilistic Models) or the denoising equation (e.g., Denoising Diffusion Implicit Models (DDIMs)). However, these acceleration methods cannot maintain the quality of samples and even introduce new noise at a high speedup rate, which limit their practicability. To accelerate the inference process while keeping the sample quality, we provide a fresh perspective that DDPMs should be treated as solving differential equations on manifolds. Under such a perspective, we propose pseudo numerical methods for diffusion models (PNDMs). Specifically, we figure out how to solve differential equations on manifolds and show that DDIMs are simple cases of pseudo numerical methods. We change several classical numerical methods to corresponding pseudo numerical methods and find that the pseudo linear multi-step method is the best in most situations. According to our experiments, by directly using pre-trained models on Cifar10, CelebA and LSUN, PNDMs can generate higher quality synthetic images with only 50 steps compared with 1000-step DDIMs (20x speedup), significantly outperform DDIMs with 250 steps (by around 0.4 in FID) and have good generalization on different variance schedules. 
22 | 
23 | The original codebase can be found [here](https://github.com/luping-liu/PNDM).
24 | 
25 | ## Available Pipelines:
26 | 
27 | | Pipeline | Tasks | Colab
28 | |---|---|:---:|
29 | | [pipeline_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pndm/pipeline_pndm.py) | *Unconditional Image Generation* | - |
30 | 
31 | 
32 | ## PNDMPipeline
33 | [[autodoc]] pipelines.pndm.pipeline_pndm.PNDMPipeline
34 |     - __call__
35 | 
36 | 


--------------------------------------------------------------------------------
/src/diffusers/models/embeddings_flax.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import math
15 | 
16 | import flax.linen as nn
17 | import jax.numpy as jnp
18 | 
19 | 
20 | # This is like models.embeddings.get_timestep_embedding (PyTorch) but
21 | # less general (only handles the case we currently need).
22 | def get_sinusoidal_embeddings(timesteps, embedding_dim, freq_shift: float = 1):
23 |     """
24 |     This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.
25 | 
26 |     :param timesteps: a 1-D tensor of N indices, one per batch element.
27 |                       These may be fractional.
28 |     :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the
29 |     embeddings. :return: an [N x dim] tensor of positional embeddings.
30 |     """
31 |     half_dim = embedding_dim // 2
32 |     emb = math.log(10000) / (half_dim - freq_shift)
33 |     emb = jnp.exp(jnp.arange(half_dim) * -emb)
34 |     emb = timesteps[:, None] * emb[None, :]
35 |     emb = jnp.concatenate([jnp.cos(emb), jnp.sin(emb)], -1)
36 |     return emb
37 | 
38 | 
39 | class FlaxTimestepEmbedding(nn.Module):
40 |     r"""
41 |     Time step Embedding Module. Learns embeddings for input time steps.
42 | 
43 |     Args:
44 |         time_embed_dim (`int`, *optional*, defaults to `32`):
45 |                 Time step embedding dimension
46 |         dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
47 |                 Parameters `dtype`
48 |     """
49 |     time_embed_dim: int = 32
50 |     dtype: jnp.dtype = jnp.float32
51 | 
52 |     @nn.compact
53 |     def __call__(self, temb):
54 |         temb = nn.Dense(self.time_embed_dim, dtype=self.dtype, name="linear_1")(temb)
55 |         temb = nn.silu(temb)
56 |         temb = nn.Dense(self.time_embed_dim, dtype=self.dtype, name="linear_2")(temb)
57 |         return temb
58 | 
59 | 
60 | class FlaxTimesteps(nn.Module):
61 |     r"""
62 |     Wrapper Module for sinusoidal Time step Embeddings as described in https://arxiv.org/abs/2006.11239
63 | 
64 |     Args:
65 |         dim (`int`, *optional*, defaults to `32`):
66 |                 Time step embedding dimension
67 |     """
68 |     dim: int = 32
69 |     freq_shift: float = 1
70 | 
71 |     @nn.compact
72 |     def __call__(self, timesteps):
73 |         return get_sinusoidal_embeddings(timesteps, self.dim, freq_shift=self.freq_shift)
74 | 


--------------------------------------------------------------------------------
/docs/source/api/pipelines/latent_diffusion_uncond.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Unconditional Latent Diffusion
14 | 
15 | ## Overview
16 | 
17 | Unconditional Latent Diffusion was proposed in [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer.
18 | 
19 | The abstract of the paper is the following:
20 | 
21 | *By decomposing the image formation process into a sequential application of denoising autoencoders, diffusion models (DMs) achieve state-of-the-art synthesis results on image data and beyond. Additionally, their formulation allows for a guiding mechanism to control the image generation process without retraining. However, since these models typically operate directly in pixel space, optimization of powerful DMs often consumes hundreds of GPU days and inference is expensive due to sequential evaluations. To enable DM training on limited computational resources while retaining their quality and flexibility, we apply them in the latent space of powerful pretrained autoencoders. In contrast to previous work, training diffusion models on such a representation allows for the first time to reach a near-optimal point between complexity reduction and detail preservation, greatly boosting visual fidelity. By introducing cross-attention layers into the model architecture, we turn diffusion models into powerful and flexible generators for general conditioning inputs such as text or bounding boxes and high-resolution synthesis becomes possible in a convolutional manner. Our latent diffusion models (LDMs) achieve a new state of the art for image inpainting and highly competitive performance on various tasks, including unconditional image generation, semantic scene synthesis, and super-resolution, while significantly reducing computational requirements compared to pixel-based DMs.*
22 | 
23 | The original codebase can be found [here](https://github.com/CompVis/latent-diffusion).
24 | 
25 | ## Tips:
26 | 
27 | - 
28 | - 
29 | - 
30 | 
31 | ## Available Pipelines:
32 | 
33 | | Pipeline | Tasks | Colab
34 | |---|---|:---:|
35 | | [pipeline_latent_diffusion_uncond.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py) | *Unconditional Image Generation* | - |
36 | 
37 | ## Examples:
38 | 
39 | ## LDMPipeline
40 | [[autodoc]] LDMPipeline
41 |     - __call__
42 | 


--------------------------------------------------------------------------------
/docs/source/api/pipelines/latent_diffusion.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Latent Diffusion
14 | 
15 | ## Overview
16 | 
17 | Latent Diffusion was proposed in [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer.
18 | 
19 | The abstract of the paper is the following:
20 | 
21 | *By decomposing the image formation process into a sequential application of denoising autoencoders, diffusion models (DMs) achieve state-of-the-art synthesis results on image data and beyond. Additionally, their formulation allows for a guiding mechanism to control the image generation process without retraining. However, since these models typically operate directly in pixel space, optimization of powerful DMs often consumes hundreds of GPU days and inference is expensive due to sequential evaluations. To enable DM training on limited computational resources while retaining their quality and flexibility, we apply them in the latent space of powerful pretrained autoencoders. In contrast to previous work, training diffusion models on such a representation allows for the first time to reach a near-optimal point between complexity reduction and detail preservation, greatly boosting visual fidelity. By introducing cross-attention layers into the model architecture, we turn diffusion models into powerful and flexible generators for general conditioning inputs such as text or bounding boxes and high-resolution synthesis becomes possible in a convolutional manner. Our latent diffusion models (LDMs) achieve a new state of the art for image inpainting and highly competitive performance on various tasks, including unconditional image generation, semantic scene synthesis, and super-resolution, while significantly reducing computational requirements compared to pixel-based DMs.*
22 | 
23 | The original codebase can be found [here](https://github.com/CompVis/latent-diffusion).
24 | 
25 | ## Tips:
26 | 
27 | - 
28 | - 
29 | - 
30 | 
31 | ## Available Pipelines:
32 | 
33 | | Pipeline | Tasks | Colab
34 | |---|---|:---:|
35 | | [pipeline_latent_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py) | *Text-to-Image Generation* | - |
36 | 
37 | ## Examples:
38 | 
39 | 
40 | ## LDMTextToImagePipeline
41 | [[autodoc]] pipelines.latent_diffusion.pipeline_latent_diffusion.LDMTextToImagePipeline
42 |     - __call__
43 | 


--------------------------------------------------------------------------------
/src/diffusers/pipelines/stable_diffusion/__init__.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List, Optional, Union
 3 | 
 4 | import numpy as np
 5 | 
 6 | import PIL
 7 | from PIL import Image
 8 | 
 9 | from ...utils import BaseOutput, is_flax_available, is_onnx_available, is_torch_available, is_transformers_available
10 | 
11 | 
12 | @dataclass
13 | class StableDiffusionPipelineOutput(BaseOutput):
14 |     """
15 |     Output class for Stable Diffusion pipelines.
16 | 
17 |     Args:
18 |         images (`List[PIL.Image.Image]` or `np.ndarray`)
19 |             List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
20 |             num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
21 |         nsfw_content_detected (`List[bool]`)
22 |             List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work"
23 |             (nsfw) content, or `None` if safety checking could not be performed.
24 |     """
25 | 
26 |     images: Union[List[PIL.Image.Image], np.ndarray]
27 |     nsfw_content_detected: Optional[List[bool]]
28 | 
29 | 
30 | if is_transformers_available() and is_torch_available():
31 |     from .pipeline_stable_diffusion import StableDiffusionPipeline
32 |     from .pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline
33 |     from .pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline
34 |     from .pipeline_stable_diffusion_inpaint_legacy import StableDiffusionInpaintPipelineLegacy
35 |     from .safety_checker import StableDiffusionSafetyChecker
36 | 
37 | if is_transformers_available() and is_onnx_available():
38 |     from .pipeline_onnx_stable_diffusion import OnnxStableDiffusionPipeline, StableDiffusionOnnxPipeline
39 |     from .pipeline_onnx_stable_diffusion_img2img import OnnxStableDiffusionImg2ImgPipeline
40 |     from .pipeline_onnx_stable_diffusion_inpaint import OnnxStableDiffusionInpaintPipeline
41 | 
42 | if is_transformers_available() and is_flax_available():
43 |     import flax
44 | 
45 |     @flax.struct.dataclass
46 |     class FlaxStableDiffusionPipelineOutput(BaseOutput):
47 |         """
48 |         Output class for Stable Diffusion pipelines.
49 | 
50 |         Args:
51 |             images (`List[PIL.Image.Image]` or `np.ndarray`)
52 |                 List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
53 |                 num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
54 |             nsfw_content_detected (`List[bool]`)
55 |                 List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work"
56 |                 (nsfw) content.
57 |         """
58 | 
59 |         images: Union[List[PIL.Image.Image], np.ndarray]
60 |         nsfw_content_detected: List[bool]
61 | 
62 |     from ...schedulers.scheduling_pndm_flax import PNDMSchedulerState
63 |     from .pipeline_flax_stable_diffusion import FlaxStableDiffusionPipeline
64 |     from .safety_checker_flax import FlaxStableDiffusionSafetyChecker
65 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples
 2 | 
 3 | # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
 4 | export PYTHONPATH = src
 5 | 
 6 | check_dirs := examples scripts src tests utils
 7 | 
 8 | modified_only_fixup:
 9 | 	$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
10 | 	@if test -n "$(modified_py_files)"; then \
11 | 		echo "Checking/fixing $(modified_py_files)"; \
12 | 		black --preview $(modified_py_files); \
13 | 		isort $(modified_py_files); \
14 | 		flake8 $(modified_py_files); \
15 | 	else \
16 | 		echo "No library .py files were modified"; \
17 | 	fi
18 | 
19 | # Update src/diffusers/dependency_versions_table.py
20 | 
21 | deps_table_update:
22 | 	@python setup.py deps_table_update
23 | 
24 | deps_table_check_updated:
25 | 	@md5sum src/diffusers/dependency_versions_table.py > md5sum.saved
26 | 	@python setup.py deps_table_update
27 | 	@md5sum -c --quiet md5sum.saved || (printf "\nError: the version dependency table is outdated.\nPlease run 'make fixup' or 'make style' and commit the changes.\n\n" && exit 1)
28 | 	@rm md5sum.saved
29 | 
30 | # autogenerating code
31 | 
32 | autogenerate_code: deps_table_update
33 | 
34 | # Check that the repo is in a good state
35 | 
36 | repo-consistency:
37 | 	python utils/check_dummies.py
38 | 	python utils/check_repo.py
39 | 	python utils/check_inits.py
40 | 
41 | # this target runs checks on all files
42 | 
43 | quality:
44 | 	black --check --preview $(check_dirs)
45 | 	isort --check-only $(check_dirs)
46 | 	flake8 $(check_dirs)
47 | 	doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
48 | 
49 | # Format source code automatically and check is there are any problems left that need manual fixing
50 | 
51 | extra_style_checks:
52 | 	python utils/custom_init_isort.py
53 | 	doc-builder style src/diffusers docs/source --max_len 119 --path_to_docs docs/source
54 | 
55 | # this target runs checks on all files and potentially modifies some of them
56 | 
57 | style:
58 | 	black --preview $(check_dirs)
59 | 	isort $(check_dirs)
60 | 	${MAKE} autogenerate_code
61 | 	${MAKE} extra_style_checks
62 | 
63 | # Super fast fix and check target that only works on relevant modified files since the branch was made
64 | 
65 | fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
66 | 
67 | # Make marked copies of snippets of codes conform to the original
68 | 
69 | fix-copies:
70 | 	python utils/check_copies.py --fix_and_overwrite
71 | 	python utils/check_dummies.py --fix_and_overwrite
72 | 
73 | # Run tests for the library
74 | 
75 | test:
76 | 	python -m pytest -n auto --dist=loadfile -s -v ./tests/
77 | 
78 | # Run tests for examples
79 | 
80 | test-examples:
81 | 	python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/
82 | 
83 | 
84 | # Release stuff
85 | 
86 | pre-release:
87 | 	python utils/release.py
88 | 
89 | pre-patch:
90 | 	python utils/release.py --patch
91 | 
92 | post-release:
93 | 	python utils/release.py --post_release
94 | 
95 | post-patch:
96 | 	python utils/release.py --post_release --patch
97 | 


--------------------------------------------------------------------------------
/docs/source/api/pipelines/score_sde_ve.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Score SDE VE
14 | 
15 | ## Overview
16 | 
17 | [Score-Based Generative Modeling through Stochastic Differential Equations](https://arxiv.org/abs/2011.13456) (Score SDE) by Yang Song, Jascha Sohl-Dickstein, Diederik P. Kingma, Abhishek Kumar, Stefano Ermon and Ben Poole.
18 | 
19 | The abstract of the paper is the following:
20 | 
21 | Creating noise from data is easy; creating data from noise is generative modeling. We present a stochastic differential equation (SDE) that smoothly transforms a complex data distribution to a known prior distribution by slowly injecting noise, and a corresponding reverse-time SDE that transforms the prior distribution back into the data distribution by slowly removing the noise. Crucially, the reverse-time SDE depends only on the time-dependent gradient field (\aka, score) of the perturbed data distribution. By leveraging advances in score-based generative modeling, we can accurately estimate these scores with neural networks, and use numerical SDE solvers to generate samples. We show that this framework encapsulates previous approaches in score-based generative modeling and diffusion probabilistic modeling, allowing for new sampling procedures and new modeling capabilities. In particular, we introduce a predictor-corrector framework to correct errors in the evolution of the discretized reverse-time SDE. We also derive an equivalent neural ODE that samples from the same distribution as the SDE, but additionally enables exact likelihood computation, and improved sampling efficiency. In addition, we provide a new way to solve inverse problems with score-based models, as demonstrated with experiments on class-conditional generation, image inpainting, and colorization. Combined with multiple architectural improvements, we achieve record-breaking performance for unconditional image generation on CIFAR-10 with an Inception score of 9.89 and FID of 2.20, a competitive likelihood of 2.99 bits/dim, and demonstrate high fidelity generation of 1024 x 1024 images for the first time from a score-based generative model.
22 | 
23 | The original codebase can be found [here](https://github.com/yang-song/score_sde_pytorch).
24 | 
25 | This pipeline implements the Variance Expanding (VE) variant of the method.
26 | 
27 | ## Available Pipelines:
28 | 
29 | | Pipeline | Tasks | Colab
30 | |---|---|:---:|
31 | | [pipeline_score_sde_ve.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py) | *Unconditional Image Generation* | - |
32 | 
33 | ## ScoreSdeVePipeline
34 | [[autodoc]] ScoreSdeVePipeline
35 |     - __call__
36 | 
37 | 


--------------------------------------------------------------------------------
/utils/stale.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team, the AllenNLP library authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Script to close stale issue. Taken in part from the AllenNLP repository.
16 | https://github.com/allenai/allennlp.
17 | """
18 | import os
19 | from datetime import datetime as dt
20 | 
21 | from github import Github
22 | 
23 | 
24 | LABELS_TO_EXEMPT = [
25 |     "good first issue",
26 |     "good second issue",
27 |     "good difficult issue",
28 |     "enhancement",
29 |     "new pipeline/model",
30 |     "new scheduler",
31 |     "wip",
32 | ]
33 | 
34 | 
35 | def main():
36 |     g = Github(os.environ["GITHUB_TOKEN"])
37 |     repo = g.get_repo("huggingface/diffusers")
38 |     open_issues = repo.get_issues(state="open")
39 | 
40 |     for issue in open_issues:
41 |         comments = sorted([comment for comment in issue.get_comments()], key=lambda i: i.created_at, reverse=True)
42 |         last_comment = comments[0] if len(comments) > 0 else None
43 |         if (
44 |             last_comment is not None
45 |             and last_comment.user.login == "github-actions[bot]"
46 |             and (dt.utcnow() - issue.updated_at).days > 7
47 |             and (dt.utcnow() - issue.created_at).days >= 30
48 |             and not any(label.name.lower() in LABELS_TO_EXEMPT for label in issue.get_labels())
49 |         ):
50 |             # Closes the issue after 7 days of inactivity since the Stalebot notification.
51 |             issue.edit(state="closed")
52 |         elif (
53 |             "stale" in issue.get_labels()
54 |             and last_comment is not None
55 |             and last_comment.user.login != "github-actions[bot]"
56 |         ):
57 |             # Opens the issue if someone other than Stalebot commented.
58 |             issue.edit(state="open")
59 |             issue.remove_from_labels("stale")
60 |         elif (
61 |             (dt.utcnow() - issue.updated_at).days > 23
62 |             and (dt.utcnow() - issue.created_at).days >= 30
63 |             and not any(label.name.lower() in LABELS_TO_EXEMPT for label in issue.get_labels())
64 |         ):
65 |             # Post a Stalebot notification after 23 days of inactivity.
66 |             issue.create_comment(
67 |                 "This issue has been automatically marked as stale because it has not had "
68 |                 "recent activity. If you think this still needs to be addressed "
69 |                 "please comment on this thread.\n\nPlease note that issues that do not follow the "
70 |                 "[contributing guidelines](https://github.com/huggingface/diffusers/blob/main/CONTRIBUTING.md) "
71 |                 "are likely to be ignored."
72 |             )
73 |             issue.add_to_labels("stale")
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     main()
78 | 


--------------------------------------------------------------------------------
/utils/check_config_docstrings.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import importlib
17 | import inspect
18 | import os
19 | import re
20 | 
21 | 
22 | # All paths are set with the intent you should run this script from the root of the repo with the command
23 | # python utils/check_config_docstrings.py
24 | PATH_TO_TRANSFORMERS = "src/transformers"
25 | 
26 | 
27 | # This is to make sure the transformers module imported is the one in the repo.
28 | spec = importlib.util.spec_from_file_location(
29 |     "transformers",
30 |     os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"),
31 |     submodule_search_locations=[PATH_TO_TRANSFORMERS],
32 | )
33 | transformers = spec.loader.load_module()
34 | 
35 | CONFIG_MAPPING = transformers.models.auto.configuration_auto.CONFIG_MAPPING
36 | 
37 | # Regex pattern used to find the checkpoint mentioned in the docstring of `config_class`.
38 | # For example, `[bert-base-uncased](https://huggingface.co/bert-base-uncased)`
39 | _re_checkpoint = re.compile("\[(.+?)\]\((https://huggingface\.co/.+?)\)")
40 | 
41 | 
42 | CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK = {
43 |     "CLIPConfigMixin",
44 |     "DecisionTransformerConfigMixin",
45 |     "EncoderDecoderConfigMixin",
46 |     "RagConfigMixin",
47 |     "SpeechEncoderDecoderConfigMixin",
48 |     "VisionEncoderDecoderConfigMixin",
49 |     "VisionTextDualEncoderConfigMixin",
50 | }
51 | 
52 | 
53 | def check_config_docstrings_have_checkpoints():
54 |     configs_without_checkpoint = []
55 | 
56 |     for config_class in list(CONFIG_MAPPING.values()):
57 |         checkpoint_found = False
58 | 
59 |         # source code of `config_class`
60 |         config_source = inspect.getsource(config_class)
61 |         checkpoints = _re_checkpoint.findall(config_source)
62 | 
63 |         for checkpoint in checkpoints:
64 |             # Each `checkpoint` is a tuple of a checkpoint name and a checkpoint link.
65 |             # For example, `('bert-base-uncased', 'https://huggingface.co/bert-base-uncased')`
66 |             ckpt_name, ckpt_link = checkpoint
67 | 
68 |             # verify the checkpoint name corresponds to the checkpoint link
69 |             ckpt_link_from_name = f"https://huggingface.co/{ckpt_name}"
70 |             if ckpt_link == ckpt_link_from_name:
71 |                 checkpoint_found = True
72 |                 break
73 | 
74 |         name = config_class.__name__
75 |         if not checkpoint_found and name not in CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK:
76 |             configs_without_checkpoint.append(name)
77 | 
78 |     if len(configs_without_checkpoint) > 0:
79 |         message = "\n".join(sorted(configs_without_checkpoint))
80 |         raise ValueError(f"The following configurations don't contain any valid checkpoint:\n{message}")
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     check_config_docstrings_have_checkpoints()
85 | 


--------------------------------------------------------------------------------
/tests/pipelines/pndm/test_pndm.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import unittest
17 | 
18 | import numpy as np
19 | import torch
20 | 
21 | from diffusers import PNDMPipeline, PNDMScheduler, UNet2DModel
22 | from diffusers.utils.testing_utils import require_torch, slow, torch_device
23 | 
24 | from ...test_pipelines_common import PipelineTesterMixin
25 | 
26 | 
27 | torch.backends.cuda.matmul.allow_tf32 = False
28 | 
29 | 
30 | class PNDMPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
31 |     @property
32 |     def dummy_uncond_unet(self):
33 |         torch.manual_seed(0)
34 |         model = UNet2DModel(
35 |             block_out_channels=(32, 64),
36 |             layers_per_block=2,
37 |             sample_size=32,
38 |             in_channels=3,
39 |             out_channels=3,
40 |             down_block_types=("DownBlock2D", "AttnDownBlock2D"),
41 |             up_block_types=("AttnUpBlock2D", "UpBlock2D"),
42 |         )
43 |         return model
44 | 
45 |     def test_inference(self):
46 |         unet = self.dummy_uncond_unet
47 |         scheduler = PNDMScheduler()
48 | 
49 |         pndm = PNDMPipeline(unet=unet, scheduler=scheduler)
50 |         pndm.to(torch_device)
51 |         pndm.set_progress_bar_config(disable=None)
52 | 
53 |         generator = torch.manual_seed(0)
54 |         image = pndm(generator=generator, num_inference_steps=20, output_type="numpy").images
55 | 
56 |         generator = torch.manual_seed(0)
57 |         image_from_tuple = pndm(generator=generator, num_inference_steps=20, output_type="numpy", return_dict=False)[0]
58 | 
59 |         image_slice = image[0, -3:, -3:, -1]
60 |         image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
61 | 
62 |         assert image.shape == (1, 32, 32, 3)
63 |         expected_slice = np.array([1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0])
64 |         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
65 |         assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
66 | 
67 | 
68 | @slow
69 | @require_torch
70 | class PNDMPipelineIntegrationTests(unittest.TestCase):
71 |     def test_inference_cifar10(self):
72 |         model_id = "google/ddpm-cifar10-32"
73 | 
74 |         unet = UNet2DModel.from_pretrained(model_id, device_map="auto")
75 |         scheduler = PNDMScheduler()
76 | 
77 |         pndm = PNDMPipeline(unet=unet, scheduler=scheduler)
78 |         pndm.to(torch_device)
79 |         pndm.set_progress_bar_config(disable=None)
80 |         generator = torch.manual_seed(0)
81 |         image = pndm(generator=generator, output_type="numpy").images
82 | 
83 |         image_slice = image[0, -3:, -3:, -1]
84 | 
85 |         assert image.shape == (1, 32, 32, 3)
86 |         expected_slice = np.array([0.1564, 0.14645, 0.1406, 0.14715, 0.12425, 0.14045, 0.13115, 0.12175, 0.125])
87 |         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
88 | 


--------------------------------------------------------------------------------
/tests/models/test_models_vq.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import unittest
17 | 
18 | import torch
19 | 
20 | from diffusers import VQModel
21 | from diffusers.utils import floats_tensor, torch_device
22 | 
23 | from ..test_modeling_common import ModelTesterMixin
24 | 
25 | 
26 | torch.backends.cuda.matmul.allow_tf32 = False
27 | 
28 | 
29 | class VQModelTests(ModelTesterMixin, unittest.TestCase):
30 |     model_class = VQModel
31 | 
32 |     @property
33 |     def dummy_input(self, sizes=(32, 32)):
34 |         batch_size = 4
35 |         num_channels = 3
36 | 
37 |         image = floats_tensor((batch_size, num_channels) + sizes).to(torch_device)
38 | 
39 |         return {"sample": image}
40 | 
41 |     @property
42 |     def input_shape(self):
43 |         return (3, 32, 32)
44 | 
45 |     @property
46 |     def output_shape(self):
47 |         return (3, 32, 32)
48 | 
49 |     def prepare_init_args_and_inputs_for_common(self):
50 |         init_dict = {
51 |             "block_out_channels": [32, 64],
52 |             "in_channels": 3,
53 |             "out_channels": 3,
54 |             "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
55 |             "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
56 |             "latent_channels": 3,
57 |         }
58 |         inputs_dict = self.dummy_input
59 |         return init_dict, inputs_dict
60 | 
61 |     def test_forward_signature(self):
62 |         pass
63 | 
64 |     def test_training(self):
65 |         pass
66 | 
67 |     def test_from_pretrained_hub(self):
68 |         model, loading_info = VQModel.from_pretrained("fusing/vqgan-dummy", output_loading_info=True)
69 |         self.assertIsNotNone(model)
70 |         self.assertEqual(len(loading_info["missing_keys"]), 0)
71 | 
72 |         model.to(torch_device)
73 |         image = model(**self.dummy_input)
74 | 
75 |         assert image is not None, "Make sure output is not None"
76 | 
77 |     def test_output_pretrained(self):
78 |         model = VQModel.from_pretrained("fusing/vqgan-dummy")
79 |         model.to(torch_device).eval()
80 | 
81 |         torch.manual_seed(0)
82 |         if torch.cuda.is_available():
83 |             torch.cuda.manual_seed_all(0)
84 | 
85 |         image = torch.randn(1, model.config.in_channels, model.config.sample_size, model.config.sample_size)
86 |         image = image.to(torch_device)
87 |         with torch.no_grad():
88 |             # Warmup pass when using mps (see #372)
89 |             if torch_device == "mps":
90 |                 _ = model(image)
91 |             output = model(image).sample
92 | 
93 |         output_slice = output[0, -1, -3:, -3:].flatten().cpu()
94 |         # fmt: off
95 |         expected_output_slice = torch.tensor([-0.0153, -0.4044, -0.1880, -0.5161, -0.2418, -0.4072, -0.1612, -0.0633, -0.0143])
96 |         # fmt: on
97 |         self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
98 | 


--------------------------------------------------------------------------------
/tests/pipelines/karras_ve/test_karras_ve.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import unittest
17 | 
18 | import numpy as np
19 | import torch
20 | 
21 | from diffusers import KarrasVePipeline, KarrasVeScheduler, UNet2DModel
22 | from diffusers.utils.testing_utils import require_torch, slow, torch_device
23 | 
24 | from ...test_pipelines_common import PipelineTesterMixin
25 | 
26 | 
27 | torch.backends.cuda.matmul.allow_tf32 = False
28 | 
29 | 
30 | class KarrasVePipelineFastTests(PipelineTesterMixin, unittest.TestCase):
31 |     @property
32 |     def dummy_uncond_unet(self):
33 |         torch.manual_seed(0)
34 |         model = UNet2DModel(
35 |             block_out_channels=(32, 64),
36 |             layers_per_block=2,
37 |             sample_size=32,
38 |             in_channels=3,
39 |             out_channels=3,
40 |             down_block_types=("DownBlock2D", "AttnDownBlock2D"),
41 |             up_block_types=("AttnUpBlock2D", "UpBlock2D"),
42 |         )
43 |         return model
44 | 
45 |     def test_inference(self):
46 |         unet = self.dummy_uncond_unet
47 |         scheduler = KarrasVeScheduler()
48 | 
49 |         pipe = KarrasVePipeline(unet=unet, scheduler=scheduler)
50 |         pipe.to(torch_device)
51 |         pipe.set_progress_bar_config(disable=None)
52 | 
53 |         generator = torch.manual_seed(0)
54 |         image = pipe(num_inference_steps=2, generator=generator, output_type="numpy").images
55 | 
56 |         generator = torch.manual_seed(0)
57 |         image_from_tuple = pipe(num_inference_steps=2, generator=generator, output_type="numpy", return_dict=False)[0]
58 | 
59 |         image_slice = image[0, -3:, -3:, -1]
60 |         image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
61 | 
62 |         assert image.shape == (1, 32, 32, 3)
63 |         expected_slice = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0])
64 |         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
65 |         assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
66 | 
67 | 
68 | @slow
69 | @require_torch
70 | class KarrasVePipelineIntegrationTests(unittest.TestCase):
71 |     def test_inference(self):
72 |         model_id = "google/ncsnpp-celebahq-256"
73 |         model = UNet2DModel.from_pretrained(model_id, device_map="auto")
74 |         scheduler = KarrasVeScheduler()
75 | 
76 |         pipe = KarrasVePipeline(unet=model, scheduler=scheduler)
77 |         pipe.to(torch_device)
78 |         pipe.set_progress_bar_config(disable=None)
79 | 
80 |         generator = torch.manual_seed(0)
81 |         image = pipe(num_inference_steps=20, generator=generator, output_type="numpy").images
82 | 
83 |         image_slice = image[0, -3:, -3:, -1]
84 |         assert image.shape == (1, 256, 256, 3)
85 |         expected_slice = np.array([0.578, 0.5811, 0.5924, 0.5809, 0.587, 0.5886, 0.5861, 0.5802, 0.586])
86 |         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
87 | 


--------------------------------------------------------------------------------
/.github/workflows/push_tests.yml:
--------------------------------------------------------------------------------
  1 | name: Run all tests
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - main
  7 | 
  8 | env:
  9 |   DIFFUSERS_IS_CI: yes
 10 |   HF_HOME: /mnt/cache
 11 |   OMP_NUM_THREADS: 8
 12 |   MKL_NUM_THREADS: 8
 13 |   PYTEST_TIMEOUT: 1000
 14 |   RUN_SLOW: yes
 15 | 
 16 | jobs:
 17 |   run_tests_single_gpu:
 18 |     name: Diffusers tests
 19 |     runs-on: [ self-hosted, docker-gpu, single-gpu ]
 20 |     container:
 21 |       image: nvcr.io/nvidia/pytorch:22.07-py3
 22 |       options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache
 23 | 
 24 |     steps:
 25 |     - name: Checkout diffusers
 26 |       uses: actions/checkout@v3
 27 |       with:
 28 |         fetch-depth: 2
 29 | 
 30 |     - name: NVIDIA-SMI
 31 |       run: |
 32 |         nvidia-smi
 33 | 
 34 |     - name: Install dependencies
 35 |       run: |
 36 |         python -m pip install --upgrade pip
 37 |         python -m pip uninstall -y torch torchvision torchtext
 38 |         python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu117
 39 |         python -m pip install -e .[quality,test]
 40 |         python -m pip install git+https://github.com/huggingface/accelerate
 41 | 
 42 |     - name: Environment
 43 |       run: |
 44 |         python utils/print_env.py
 45 | 
 46 |     - name: Run all (incl. slow) tests on GPU
 47 |       env:
 48 |         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 49 |       run: |
 50 |         python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=tests_torch_gpu tests/
 51 | 
 52 |     - name: Failure short reports
 53 |       if: ${{ failure() }}
 54 |       run: cat reports/tests_torch_gpu_failures_short.txt
 55 | 
 56 |     - name: Test suite reports artifacts
 57 |       if: ${{ always() }}
 58 |       uses: actions/upload-artifact@v2
 59 |       with:
 60 |         name: torch_test_reports
 61 |         path: reports
 62 | 
 63 |   run_examples_single_gpu:
 64 |     name: Examples tests
 65 |     runs-on: [ self-hosted, docker-gpu, single-gpu ]
 66 |     container:
 67 |       image: nvcr.io/nvidia/pytorch:22.07-py3
 68 |       options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache
 69 | 
 70 |     steps:
 71 |     - name: Checkout diffusers
 72 |       uses: actions/checkout@v3
 73 |       with:
 74 |         fetch-depth: 2
 75 | 
 76 |     - name: NVIDIA-SMI
 77 |       run: |
 78 |         nvidia-smi
 79 | 
 80 |     - name: Install dependencies
 81 |       run: |
 82 |         python -m pip install --upgrade pip
 83 |         python -m pip uninstall -y torch torchvision torchtext
 84 |         python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu117
 85 |         python -m pip install -e .[quality,test,training]
 86 |         python -m pip install git+https://github.com/huggingface/accelerate
 87 | 
 88 |     - name: Environment
 89 |       run: |
 90 |         python utils/print_env.py
 91 | 
 92 |     - name: Run example tests on GPU
 93 |       env:
 94 |         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 95 |       run: |
 96 |         python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=examples_torch_gpu examples/
 97 | 
 98 |     - name: Failure short reports
 99 |       if: ${{ failure() }}
100 |       run: cat reports/examples_torch_gpu_failures_short.txt
101 | 
102 |     - name: Test suite reports artifacts
103 |       if: ${{ always() }}
104 |       uses: actions/upload-artifact@v2
105 |       with:
106 |         name: examples_test_reports
107 |         path: reports
108 | 


--------------------------------------------------------------------------------
/tests/pipelines/score_sde_ve/test_score_sde_ve.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import unittest
17 | 
18 | import numpy as np
19 | import torch
20 | 
21 | from diffusers import ScoreSdeVePipeline, ScoreSdeVeScheduler, UNet2DModel
22 | from diffusers.utils.testing_utils import require_torch, slow, torch_device
23 | 
24 | from ...test_pipelines_common import PipelineTesterMixin
25 | 
26 | 
27 | torch.backends.cuda.matmul.allow_tf32 = False
28 | 
29 | 
30 | class ScoreSdeVeipelineFastTests(PipelineTesterMixin, unittest.TestCase):
31 |     @property
32 |     def dummy_uncond_unet(self):
33 |         torch.manual_seed(0)
34 |         model = UNet2DModel(
35 |             block_out_channels=(32, 64),
36 |             layers_per_block=2,
37 |             sample_size=32,
38 |             in_channels=3,
39 |             out_channels=3,
40 |             down_block_types=("DownBlock2D", "AttnDownBlock2D"),
41 |             up_block_types=("AttnUpBlock2D", "UpBlock2D"),
42 |         )
43 |         return model
44 | 
45 |     def test_inference(self):
46 |         unet = self.dummy_uncond_unet
47 |         scheduler = ScoreSdeVeScheduler()
48 | 
49 |         sde_ve = ScoreSdeVePipeline(unet=unet, scheduler=scheduler)
50 |         sde_ve.to(torch_device)
51 |         sde_ve.set_progress_bar_config(disable=None)
52 | 
53 |         generator = torch.manual_seed(0)
54 |         image = sde_ve(num_inference_steps=2, output_type="numpy", generator=generator).images
55 | 
56 |         generator = torch.manual_seed(0)
57 |         image_from_tuple = sde_ve(num_inference_steps=2, output_type="numpy", generator=generator, return_dict=False)[
58 |             0
59 |         ]
60 | 
61 |         image_slice = image[0, -3:, -3:, -1]
62 |         image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
63 | 
64 |         assert image.shape == (1, 32, 32, 3)
65 |         expected_slice = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0])
66 |         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
67 |         assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
68 | 
69 | 
70 | @slow
71 | @require_torch
72 | class ScoreSdeVePipelineIntegrationTests(unittest.TestCase):
73 |     def test_inference(self):
74 |         model_id = "google/ncsnpp-church-256"
75 |         model = UNet2DModel.from_pretrained(model_id, device_map="auto")
76 | 
77 |         scheduler = ScoreSdeVeScheduler.from_config(model_id)
78 | 
79 |         sde_ve = ScoreSdeVePipeline(unet=model, scheduler=scheduler)
80 |         sde_ve.to(torch_device)
81 |         sde_ve.set_progress_bar_config(disable=None)
82 | 
83 |         generator = torch.manual_seed(0)
84 |         image = sde_ve(num_inference_steps=10, output_type="numpy", generator=generator).images
85 | 
86 |         image_slice = image[0, -3:, -3:, -1]
87 | 
88 |         assert image.shape == (1, 256, 256, 3)
89 | 
90 |         expected_slice = np.array([0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0])
91 |         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
92 | 


--------------------------------------------------------------------------------
/docs/source/_toctree.yml:
--------------------------------------------------------------------------------
  1 | - sections:
  2 |   - local: index
  3 |     title: "🧨 Diffusers"
  4 |   - local: quicktour
  5 |     title: "Quicktour"
  6 |   - local: installation
  7 |     title: "Installation"
  8 |   title: "Get started"
  9 | - sections:
 10 |   - sections:
 11 |     - local: using-diffusers/loading
 12 |       title: "Loading Pipelines, Models, and Schedulers"
 13 |     - local: using-diffusers/configuration
 14 |       title: "Configuring Pipelines, Models, and Schedulers"
 15 |     - local: using-diffusers/custom_pipeline_overview
 16 |       title: "Loading and Adding Custom Pipelines"
 17 |     title: "Loading & Hub"
 18 |   - sections:
 19 |     - local: using-diffusers/unconditional_image_generation
 20 |       title: "Unconditional Image Generation"
 21 |     - local: using-diffusers/conditional_image_generation
 22 |       title: "Text-to-Image Generation"
 23 |     - local: using-diffusers/img2img
 24 |       title: "Text-Guided Image-to-Image"
 25 |     - local: using-diffusers/inpaint
 26 |       title: "Text-Guided Image-Inpainting"
 27 |     - local: using-diffusers/custom_pipeline_examples
 28 |       title: "Community Pipelines"
 29 |     - local: using-diffusers/contribute_pipeline
 30 |       title: "How to contribute a Pipeline"
 31 |     title: "Pipelines for Inference"
 32 |   title: "Using Diffusers"
 33 | - sections:
 34 |   - local: optimization/fp16
 35 |     title: "Memory and Speed"
 36 |   - local: optimization/onnx
 37 |     title: "ONNX"
 38 |   - local: optimization/open_vino
 39 |     title: "OpenVINO"
 40 |   - local: optimization/mps
 41 |     title: "MPS"
 42 |   title: "Optimization/Special Hardware"
 43 | - sections:
 44 |   - local: training/overview
 45 |     title: "Overview"
 46 |   - local: training/unconditional_training
 47 |     title: "Unconditional Image Generation"
 48 |   - local: training/text_inversion
 49 |     title: "Textual Inversion"
 50 |   - local: training/dreambooth
 51 |     title: "Dreambooth"
 52 |   - local: training/text2image
 53 |     title: "Text-to-image fine-tuning"
 54 |   title: "Training"
 55 | - sections:
 56 |   - local: conceptual/stable_diffusion
 57 |     title: "Stable Diffusion"
 58 |   - local: conceptual/philosophy
 59 |     title: "Philosophy"
 60 |   - local: conceptual/contribution
 61 |     title: "How to contribute?"
 62 |   title: "Conceptual Guides"
 63 | - sections:
 64 |   - sections:
 65 |     - local: api/models
 66 |       title: "Models"
 67 |     - local: api/schedulers
 68 |       title: "Schedulers"
 69 |     - local: api/diffusion_pipeline
 70 |       title: "Diffusion Pipeline"
 71 |     - local: api/logging
 72 |       title: "Logging"
 73 |     - local: api/configuration
 74 |       title: "Configuration"
 75 |     - local: api/outputs
 76 |       title: "Outputs"
 77 |     title: "Main Classes"
 78 |   - sections:
 79 |     - local: api/pipelines/overview
 80 |       title: "Overview"
 81 |     - local: api/pipelines/ddim
 82 |       title: "DDIM"
 83 |     - local: api/pipelines/ddpm
 84 |       title: "DDPM"
 85 |     - local: api/pipelines/latent_diffusion
 86 |       title: "Latent Diffusion"
 87 |     - local: api/pipelines/latent_diffusion_uncond
 88 |       title: "Unconditional Latent Diffusion"
 89 |     - local: api/pipelines/pndm
 90 |       title: "PNDM"
 91 |     - local: api/pipelines/score_sde_ve
 92 |       title: "Score SDE VE"
 93 |     - local: api/pipelines/stable_diffusion
 94 |       title: "Stable Diffusion"
 95 |     - local: api/pipelines/stochastic_karras_ve
 96 |       title: "Stochastic Karras VE"
 97 |     - local: api/pipelines/dance_diffusion
 98 |       title: "Dance Diffusion"
 99 |     title: "Pipelines"
100 |   title: "API"
101 | 


--------------------------------------------------------------------------------
/docs/source/installation.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Installation
14 | 
15 | Install Diffusers for with PyTorch. Support for other libraries will come in the future
16 | 
17 | 🤗 Diffusers is tested on Python 3.7+, and PyTorch 1.7.0+.
18 | 
19 | ## Install with pip
20 | 
21 | You should install 🤗 Diffusers in a [virtual environment](https://docs.python.org/3/library/venv.html).
22 | If you're unfamiliar with Python virtual environments, take a look at this [guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
23 | A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
24 | 
25 | Start by creating a virtual environment in your project directory:
26 | 
27 | ```bash
28 | python -m venv .env
29 | ```
30 | 
31 | Activate the virtual environment:
32 | 
33 | ```bash
34 | source .env/bin/activate
35 | ```
36 | 
37 | Now you're ready to install 🤗 Diffusers with the following command:
38 | 
39 | ```bash
40 | pip install diffusers
41 | ```
42 | 
43 | ## Install from source
44 | 
45 | Install 🤗 Diffusers from source with the following command:
46 | 
47 | ```bash
48 | pip install git+https://github.com/huggingface/diffusers
49 | ```
50 | 
51 | This command installs the bleeding edge `main` version rather than the latest `stable` version.
52 | The `main` version is useful for staying up-to-date with the latest developments.
53 | For instance, if a bug has been fixed since the last official release but a new release hasn't been rolled out yet.
54 | However, this means the `main` version may not always be stable.
55 | We strive to keep the `main` version operational, and most issues are usually resolved within a few hours or a day.
56 | If you run into a problem, please open an [Issue](https://github.com/huggingface/transformers/issues), so we can fix it even sooner!
57 | 
58 | ## Editable install
59 | 
60 | You will need an editable install if you'd like to:
61 | 
62 | * Use the `main` version of the source code.
63 | * Contribute to 🤗 Diffusers and need to test changes in the code.
64 | 
65 | Clone the repository and install 🤗 Diffusers with the following commands:
66 | 
67 | ```bash
68 | git clone https://github.com/huggingface/diffusers.git
69 | cd diffusers
70 | pip install -e .
71 | ```
72 | 
73 | These commands will link the folder you cloned the repository to and your Python library paths.
74 | Python will now look inside the folder you cloned to in addition to the normal library paths.
75 | For example, if your Python packages are typically installed in `~/anaconda3/envs/main/lib/python3.7/site-packages/`, Python will also search the folder you cloned to: `~/diffusers/`.
76 | 
77 | <Tip warning={true}>
78 | 
79 | You must keep the `diffusers` folder if you want to keep using the library.
80 | 
81 | </Tip>
82 | 
83 | Now you can easily update your clone to the latest version of 🤗 Diffusers with the following command:
84 | 
85 | ```bash
86 | cd ~/diffusers/
87 | git pull
88 | ```
89 | 
90 | Your Python environment will find the `main` version of 🤗 Diffusers on the next run.
91 | 


--------------------------------------------------------------------------------
/.github/workflows/pr_tests.yml:
--------------------------------------------------------------------------------
  1 | name: Run fast tests
  2 | 
  3 | on:
  4 |   pull_request:
  5 |     branches:
  6 |       - main
  7 | 
  8 | concurrency:
  9 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 10 |   cancel-in-progress: true
 11 | 
 12 | env:
 13 |   DIFFUSERS_IS_CI: yes
 14 |   OMP_NUM_THREADS: 8
 15 |   MKL_NUM_THREADS: 8
 16 |   PYTEST_TIMEOUT: 60
 17 |   MPS_TORCH_VERSION: 1.13.0
 18 | 
 19 | jobs:
 20 |   run_tests_cpu:
 21 |     name: CPU tests on Ubuntu
 22 |     runs-on: [ self-hosted, docker-gpu ]
 23 |     container:
 24 |       image: python:3.7
 25 |       options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
 26 | 
 27 |     steps:
 28 |     - name: Checkout diffusers
 29 |       uses: actions/checkout@v3
 30 |       with:
 31 |         fetch-depth: 2
 32 | 
 33 |     - name: Install dependencies
 34 |       run: |
 35 |         python -m pip install --upgrade pip
 36 |         python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
 37 |         python -m pip install -e .[quality,test]
 38 |         python -m pip install git+https://github.com/huggingface/accelerate
 39 | 
 40 |     - name: Environment
 41 |       run: |
 42 |         python utils/print_env.py
 43 | 
 44 |     - name: Run all fast tests on CPU
 45 |       env:
 46 |         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 47 |       run: |
 48 |         python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=tests_torch_cpu tests/
 49 | 
 50 |     - name: Failure short reports
 51 |       if: ${{ failure() }}
 52 |       run: cat reports/tests_torch_cpu_failures_short.txt
 53 | 
 54 |     - name: Test suite reports artifacts
 55 |       if: ${{ always() }}
 56 |       uses: actions/upload-artifact@v2
 57 |       with:
 58 |         name: pr_torch_cpu_test_reports
 59 |         path: reports
 60 | 
 61 |   run_tests_apple_m1:
 62 |     name: MPS tests on Apple M1
 63 |     runs-on: [ self-hosted, apple-m1 ]
 64 | 
 65 |     steps:
 66 |     - name: Checkout diffusers
 67 |       uses: actions/checkout@v3
 68 |       with:
 69 |         fetch-depth: 2
 70 | 
 71 |     - name: Clean checkout
 72 |       shell: arch -arch arm64 bash {0}
 73 |       run: |
 74 |         git clean -fxd
 75 | 
 76 |     - name: Setup miniconda
 77 |       uses: ./.github/actions/setup-miniconda
 78 |       with:
 79 |         python-version: 3.9
 80 | 
 81 |     - name: Install dependencies
 82 |       shell: arch -arch arm64 bash {0}
 83 |       run: |
 84 |         ${CONDA_RUN} python -m pip install --upgrade pip
 85 |         ${CONDA_RUN} python -m pip install -e .[quality,test]
 86 |         ${CONDA_RUN} python -m pip install --pre torch==${MPS_TORCH_VERSION} --extra-index-url https://download.pytorch.org/whl/test/cpu
 87 |         ${CONDA_RUN} python -m pip install git+https://github.com/huggingface/accelerate
 88 | 
 89 |     - name: Environment
 90 |       shell: arch -arch arm64 bash {0}
 91 |       run: |
 92 |         ${CONDA_RUN} python utils/print_env.py
 93 | 
 94 |     - name: Run all fast tests on MPS
 95 |       shell: arch -arch arm64 bash {0}
 96 |       env:
 97 |         HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 98 |       run: |
 99 |         ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps tests/
100 | 
101 |     - name: Failure short reports
102 |       if: ${{ failure() }}
103 |       run: cat reports/tests_torch_mps_failures_short.txt
104 | 
105 |     - name: Test suite reports artifacts
106 |       if: ${{ always() }}
107 |       uses: actions/upload-artifact@v2
108 |       with:
109 |         name: pr_torch_mps_test_reports
110 |         path: reports
111 | 


--------------------------------------------------------------------------------
/src/diffusers/__init__.py:
--------------------------------------------------------------------------------
  1 | from .utils import (
  2 |     is_flax_available,
  3 |     is_inflect_available,
  4 |     is_onnx_available,
  5 |     is_scipy_available,
  6 |     is_torch_available,
  7 |     is_transformers_available,
  8 |     is_unidecode_available,
  9 | )
 10 | 
 11 | 
 12 | __version__ = "0.7.0.dev0"
 13 | 
 14 | from .configuration_utils import ConfigMixin
 15 | from .onnx_utils import OnnxRuntimeModel
 16 | from .utils import logging
 17 | 
 18 | 
 19 | if is_torch_available():
 20 |     from .modeling_utils import ModelMixin
 21 |     from .models import AutoencoderKL, UNet1DModel, UNet2DConditionModel, UNet2DModel, VQModel
 22 |     from .optimization import (
 23 |         get_constant_schedule,
 24 |         get_constant_schedule_with_warmup,
 25 |         get_cosine_schedule_with_warmup,
 26 |         get_cosine_with_hard_restarts_schedule_with_warmup,
 27 |         get_linear_schedule_with_warmup,
 28 |         get_polynomial_decay_schedule_with_warmup,
 29 |         get_scheduler,
 30 |     )
 31 |     from .pipeline_utils import DiffusionPipeline
 32 |     from .pipelines import (
 33 |         DanceDiffusionPipeline,
 34 |         DDIMPipeline,
 35 |         DDPMPipeline,
 36 |         KarrasVePipeline,
 37 |         LDMPipeline,
 38 |         PNDMPipeline,
 39 |         ScoreSdeVePipeline,
 40 |     )
 41 |     from .schedulers import (
 42 |         DDIMScheduler,
 43 |         DDPMScheduler,
 44 |         EulerAncestralDiscreteScheduler,
 45 |         EulerDiscreteScheduler,
 46 |         IPNDMScheduler,
 47 |         KarrasVeScheduler,
 48 |         PNDMScheduler,
 49 |         SchedulerMixin,
 50 |         ScoreSdeVeScheduler,
 51 |     )
 52 |     from .training_utils import EMAModel
 53 | else:
 54 |     from .utils.dummy_pt_objects import *  # noqa F403
 55 | 
 56 | if is_torch_available() and is_scipy_available():
 57 |     from .schedulers import LMSDiscreteScheduler
 58 | else:
 59 |     from .utils.dummy_torch_and_scipy_objects import *  # noqa F403
 60 | 
 61 | if is_torch_available() and is_transformers_available():
 62 |     from .pipelines import (
 63 |         LDMTextToImagePipeline,
 64 |         StableDiffusionImg2ImgPipeline,
 65 |         StableDiffusionInpaintPipeline,
 66 |         StableDiffusionInpaintPipelineLegacy,
 67 |         StableDiffusionPipeline,
 68 |     )
 69 | else:
 70 |     from .utils.dummy_torch_and_transformers_objects import *  # noqa F403
 71 | 
 72 | if is_torch_available() and is_transformers_available() and is_onnx_available():
 73 |     from .pipelines import (
 74 |         OnnxStableDiffusionImg2ImgPipeline,
 75 |         OnnxStableDiffusionInpaintPipeline,
 76 |         OnnxStableDiffusionPipeline,
 77 |         StableDiffusionOnnxPipeline,
 78 |     )
 79 | else:
 80 |     from .utils.dummy_torch_and_transformers_and_onnx_objects import *  # noqa F403
 81 | 
 82 | if is_flax_available():
 83 |     from .modeling_flax_utils import FlaxModelMixin
 84 |     from .models.unet_2d_condition_flax import FlaxUNet2DConditionModel
 85 |     from .models.vae_flax import FlaxAutoencoderKL
 86 |     from .pipeline_flax_utils import FlaxDiffusionPipeline
 87 |     from .schedulers import (
 88 |         FlaxDDIMScheduler,
 89 |         FlaxDDPMScheduler,
 90 |         FlaxKarrasVeScheduler,
 91 |         FlaxLMSDiscreteScheduler,
 92 |         FlaxPNDMScheduler,
 93 |         FlaxSchedulerMixin,
 94 |         FlaxScoreSdeVeScheduler,
 95 |     )
 96 | else:
 97 |     from .utils.dummy_flax_objects import *  # noqa F403
 98 | 
 99 | if is_flax_available() and is_transformers_available():
100 |     from .pipelines import FlaxStableDiffusionPipeline
101 | else:
102 |     from .utils.dummy_flax_and_transformers_objects import *  # noqa F403
103 | 


--------------------------------------------------------------------------------
/src/diffusers/schedulers/scheduling_sde_vp.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch
16 | 
17 | import math
18 | from typing import Union
19 | 
20 | import torch
21 | 
22 | from ..configuration_utils import ConfigMixin, register_to_config
23 | from .scheduling_utils import SchedulerMixin
24 | 
25 | 
26 | class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
27 |     """
28 |     The variance preserving stochastic differential equation (SDE) scheduler.
29 | 
30 |     [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
31 |     function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
32 |     [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and
33 |     [`~ConfigMixin.from_config`] functions.
34 | 
35 |     For more information, see the original paper: https://arxiv.org/abs/2011.13456
36 | 
37 |     UNDER CONSTRUCTION
38 | 
39 |     """
40 | 
41 |     @register_to_config
42 |     def __init__(self, num_train_timesteps=2000, beta_min=0.1, beta_max=20, sampling_eps=1e-3):
43 |         self.sigmas = None
44 |         self.discrete_sigmas = None
45 |         self.timesteps = None
46 | 
47 |     def set_timesteps(self, num_inference_steps, device: Union[str, torch.device] = None):
48 |         self.timesteps = torch.linspace(1, self.config.sampling_eps, num_inference_steps, device=device)
49 | 
50 |     def step_pred(self, score, x, t, generator=None):
51 |         if self.timesteps is None:
52 |             raise ValueError(
53 |                 "`self.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler"
54 |             )
55 | 
56 |         # TODO(Patrick) better comments + non-PyTorch
57 |         # postprocess model score
58 |         log_mean_coeff = (
59 |             -0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min
60 |         )
61 |         std = torch.sqrt(1.0 - torch.exp(2.0 * log_mean_coeff))
62 |         std = std.flatten()
63 |         while len(std.shape) < len(score.shape):
64 |             std = std.unsqueeze(-1)
65 |         score = -score / std
66 | 
67 |         # compute
68 |         dt = -1.0 / len(self.timesteps)
69 | 
70 |         beta_t = self.config.beta_min + t * (self.config.beta_max - self.config.beta_min)
71 |         beta_t = beta_t.flatten()
72 |         while len(beta_t.shape) < len(x.shape):
73 |             beta_t = beta_t.unsqueeze(-1)
74 |         drift = -0.5 * beta_t * x
75 | 
76 |         diffusion = torch.sqrt(beta_t)
77 |         drift = drift - diffusion**2 * score
78 |         x_mean = x + drift * dt
79 | 
80 |         # add noise
81 |         noise = torch.randn(x.shape, layout=x.layout, generator=generator).to(x.device)
82 |         x = x_mean + diffusion * math.sqrt(-dt) * noise
83 | 
84 |         return x, x_mean
85 | 
86 |     def __len__(self):
87 |         return self.config.num_train_timesteps
88 | 


--------------------------------------------------------------------------------
/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import unittest
17 | 
18 | import numpy as np
19 | 
20 | from diffusers import OnnxStableDiffusionPipeline
21 | from diffusers.utils.testing_utils import require_onnxruntime, slow
22 | 
23 | from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin
24 | 
25 | 
26 | class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase):
27 |     # FIXME: add fast tests
28 |     pass
29 | 
30 | 
31 | @slow
32 | @require_onnxruntime
33 | class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
34 |     def test_inference(self):
35 |         sd_pipe = OnnxStableDiffusionPipeline.from_pretrained(
36 |             "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CPUExecutionProvider"
37 |         )
38 | 
39 |         prompt = "A painting of a squirrel eating a burger"
40 |         np.random.seed(0)
41 |         output = sd_pipe([prompt], guidance_scale=6.0, num_inference_steps=5, output_type="np")
42 |         image = output.images
43 | 
44 |         image_slice = image[0, -3:, -3:, -1]
45 | 
46 |         assert image.shape == (1, 512, 512, 3)
47 |         expected_slice = np.array([0.3602, 0.3688, 0.3652, 0.3895, 0.3782, 0.3747, 0.3927, 0.4241, 0.4327])
48 |         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
49 | 
50 |     def test_intermediate_state(self):
51 |         number_of_steps = 0
52 | 
53 |         def test_callback_fn(step: int, timestep: int, latents: np.ndarray) -> None:
54 |             test_callback_fn.has_been_called = True
55 |             nonlocal number_of_steps
56 |             number_of_steps += 1
57 |             if step == 0:
58 |                 assert latents.shape == (1, 4, 64, 64)
59 |                 latents_slice = latents[0, -3:, -3:, -1]
60 |                 expected_slice = np.array(
61 |                     [-0.5950, -0.3039, -1.1672, 0.1594, -1.1572, 0.6719, -1.9712, -0.0403, 0.9592]
62 |                 )
63 |                 assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
64 |             elif step == 5:
65 |                 assert latents.shape == (1, 4, 64, 64)
66 |                 latents_slice = latents[0, -3:, -3:, -1]
67 |                 expected_slice = np.array(
68 |                     [-0.4776, -0.0119, -0.8519, -0.0275, -0.9764, 0.9820, -0.3843, 0.3788, 1.2264]
69 |                 )
70 |                 assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
71 | 
72 |         test_callback_fn.has_been_called = False
73 | 
74 |         pipe = OnnxStableDiffusionPipeline.from_pretrained(
75 |             "CompVis/stable-diffusion-v1-4", revision="onnx", provider="CPUExecutionProvider"
76 |         )
77 |         pipe.set_progress_bar_config(disable=None)
78 | 
79 |         prompt = "Andromeda galaxy in a bottle"
80 | 
81 |         np.random.seed(0)
82 |         pipe(prompt=prompt, num_inference_steps=5, guidance_scale=7.5, callback=test_callback_fn, callback_steps=1)
83 |         assert test_callback_fn.has_been_called
84 |         assert number_of_steps == 6
85 | 


--------------------------------------------------------------------------------
/docs/source/optimization/mps.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # How to use Stable Diffusion in Apple Silicon (M1/M2)
14 | 
15 | 🤗 Diffusers is compatible with Apple silicon for Stable Diffusion inference, using the PyTorch `mps` device. These are the steps you need to follow to use your M1 or M2 computer with Stable Diffusion.
16 | 
17 | ## Requirements
18 | 
19 | - Mac computer with Apple silicon (M1/M2) hardware.
20 | - macOS 12.6 or later (13.0 or later recommended).
21 | - arm64 version of Python.
22 | - PyTorch 1.13.0 RC (Release Candidate). You can install it with `pip` using:
23 | 
24 | ```
25 | pip3 install --pre torch --extra-index-url https://download.pytorch.org/whl/test/cpu
26 | ```
27 | 
28 | ## Inference Pipeline
29 | 
30 | The snippet below demonstrates how to use the `mps` backend using the familiar `to()` interface to move the Stable Diffusion pipeline to your M1 or M2 device.
31 | 
32 | We recommend to "prime" the pipeline using an additional one-time pass through it. This is a temporary workaround for a weird issue we have detected: the first inference pass produces slightly different results than subsequent ones. You only need to do this pass once, and it's ok to use just one inference step and discard the result.
33 | 
34 | ```python
35 | # make sure you're logged in with `huggingface-cli login`
36 | from diffusers import StableDiffusionPipeline
37 | 
38 | pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
39 | pipe = pipe.to("mps")
40 | 
41 | # Recommended if your computer has < 64 GB of RAM
42 | pipe.enable_attention_slicing()
43 | 
44 | prompt = "a photo of an astronaut riding a horse on mars"
45 | 
46 | # First-time "warmup" pass (see explanation above)
47 | _ = pipe(prompt, num_inference_steps=1)
48 | 
49 | # Results match those from the CPU device after the warmup pass.
50 | image = pipe(prompt).images[0]
51 | ```
52 | 
53 | ## Performance Recommendations
54 | 
55 | M1/M2 performance is very sensitive to memory pressure. The system will automatically swap if it needs to, but performance will degrade significantly when it does.
56 | 
57 | We recommend you use _attention slicing_ to reduce memory pressure during inference and prevent swapping, particularly if your computer has lass than 64 GB of system RAM, or if you generate images at non-standard resolutions larger than 512 × 512 pixels. Attention slicing performs the costly attention operation in multiple steps instead of all at once. It usually has a performance impact of ~20% in computers without universal memory, but we have observed _better performance_ in most Apple Silicon computers, unless you have 64 GB or more.
58 | 
59 | ```python
60 | pipeline.enable_attention_slicing()
61 | ```
62 | 
63 | ## Known Issues
64 | 
65 | - As mentioned above, we are investigating a strange [first-time inference issue](https://github.com/huggingface/diffusers/issues/372).
66 | - Generating multiple prompts in a batch [crashes or doesn't work reliably](https://github.com/huggingface/diffusers/issues/363). We believe this is related to the [`mps` backend in PyTorch](https://github.com/pytorch/pytorch/issues/84039). For now, we recommend to iterate instead of batching.
67 | 


--------------------------------------------------------------------------------
/tests/test_training.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import unittest
17 | 
18 | import torch
19 | 
20 | from diffusers import DDIMScheduler, DDPMScheduler, UNet2DModel
21 | from diffusers.training_utils import set_seed
22 | from diffusers.utils.testing_utils import slow
23 | 
24 | 
25 | torch.backends.cuda.matmul.allow_tf32 = False
26 | 
27 | 
28 | class TrainingTests(unittest.TestCase):
29 |     def get_model_optimizer(self, resolution=32):
30 |         set_seed(0)
31 |         model = UNet2DModel(sample_size=resolution, in_channels=3, out_channels=3)
32 |         optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
33 |         return model, optimizer
34 | 
35 |     @slow
36 |     def test_training_step_equality(self):
37 |         device = "cpu"  # ensure full determinism without setting the CUBLAS_WORKSPACE_CONFIG env variable
38 |         ddpm_scheduler = DDPMScheduler(
39 |             num_train_timesteps=1000,
40 |             beta_start=0.0001,
41 |             beta_end=0.02,
42 |             beta_schedule="linear",
43 |             clip_sample=True,
44 |         )
45 |         ddim_scheduler = DDIMScheduler(
46 |             num_train_timesteps=1000,
47 |             beta_start=0.0001,
48 |             beta_end=0.02,
49 |             beta_schedule="linear",
50 |             clip_sample=True,
51 |         )
52 | 
53 |         assert ddpm_scheduler.config.num_train_timesteps == ddim_scheduler.config.num_train_timesteps
54 | 
55 |         # shared batches for DDPM and DDIM
56 |         set_seed(0)
57 |         clean_images = [torch.randn((4, 3, 32, 32)).clip(-1, 1).to(device) for _ in range(4)]
58 |         noise = [torch.randn((4, 3, 32, 32)).to(device) for _ in range(4)]
59 |         timesteps = [torch.randint(0, 1000, (4,)).long().to(device) for _ in range(4)]
60 | 
61 |         # train with a DDPM scheduler
62 |         model, optimizer = self.get_model_optimizer(resolution=32)
63 |         model.train().to(device)
64 |         for i in range(4):
65 |             optimizer.zero_grad()
66 |             ddpm_noisy_images = ddpm_scheduler.add_noise(clean_images[i], noise[i], timesteps[i])
67 |             ddpm_noise_pred = model(ddpm_noisy_images, timesteps[i]).sample
68 |             loss = torch.nn.functional.mse_loss(ddpm_noise_pred, noise[i])
69 |             loss.backward()
70 |             optimizer.step()
71 |         del model, optimizer
72 | 
73 |         # recreate the model and optimizer, and retry with DDIM
74 |         model, optimizer = self.get_model_optimizer(resolution=32)
75 |         model.train().to(device)
76 |         for i in range(4):
77 |             optimizer.zero_grad()
78 |             ddim_noisy_images = ddim_scheduler.add_noise(clean_images[i], noise[i], timesteps[i])
79 |             ddim_noise_pred = model(ddim_noisy_images, timesteps[i]).sample
80 |             loss = torch.nn.functional.mse_loss(ddim_noise_pred, noise[i])
81 |             loss.backward()
82 |             optimizer.step()
83 |         del model, optimizer
84 | 
85 |         self.assertTrue(torch.allclose(ddpm_noisy_images, ddim_noisy_images, atol=1e-5))
86 |         self.assertTrue(torch.allclose(ddpm_noise_pred, ddim_noise_pred, atol=1e-5))
87 | 


--------------------------------------------------------------------------------
/docs/source/api/logging.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Logging
14 | 
15 | 🧨 Diffusers has a centralized logging system, so that you can setup the verbosity of the library easily.
16 | 
17 | Currently the default verbosity of the library is `WARNING`.
18 | 
19 | To change the level of verbosity, just use one of the direct setters. For instance, here is how to change the verbosity
20 | to the INFO level.
21 | 
22 | ```python
23 | import diffusers
24 | 
25 | diffusers.logging.set_verbosity_info()
26 | ```
27 | 
28 | You can also use the environment variable `DIFFUSERS_VERBOSITY` to override the default verbosity. You can set it
29 | to one of the following: `debug`, `info`, `warning`, `error`, `critical`. For example:
30 | 
31 | ```bash
32 | DIFFUSERS_VERBOSITY=error ./myprogram.py
33 | ```
34 | 
35 | Additionally, some `warnings` can be disabled by setting the environment variable
36 | `DIFFUSERS_NO_ADVISORY_WARNINGS` to a true value, like *1*. This will disable any warning that is logged using
37 | [`logger.warning_advice`]. For example:
38 | 
39 | ```bash
40 | DIFFUSERS_NO_ADVISORY_WARNINGS=1 ./myprogram.py
41 | ```
42 | 
43 | Here is an example of how to use the same logger as the library in your own module or script:
44 | 
45 | ```python
46 | from diffusers.utils import logging
47 | 
48 | logging.set_verbosity_info()
49 | logger = logging.get_logger("diffusers")
50 | logger.info("INFO")
51 | logger.warning("WARN")
52 | ```
53 | 
54 | 
55 | All the methods of this logging module are documented below, the main ones are
56 | [`logging.get_verbosity`] to get the current level of verbosity in the logger and
57 | [`logging.set_verbosity`] to set the verbosity to the level of your choice. In order (from the least
58 | verbose to the most verbose), those levels (with their corresponding int values in parenthesis) are:
59 | 
60 | - `diffusers.logging.CRITICAL` or `diffusers.logging.FATAL` (int value, 50): only report the most
61 |   critical errors.
62 | - `diffusers.logging.ERROR` (int value, 40): only report errors.
63 | - `diffusers.logging.WARNING` or `diffusers.logging.WARN` (int value, 30): only reports error and
64 |   warnings. This the default level used by the library.
65 | - `diffusers.logging.INFO` (int value, 20): reports error, warnings and basic information.
66 | - `diffusers.logging.DEBUG` (int value, 10): report all information.
67 | 
68 | By default, `tqdm` progress bars will be displayed during model download. [`logging.disable_progress_bar`] and [`logging.enable_progress_bar`] can be used to suppress or unsuppress this behavior.
69 | 
70 | ## Base setters
71 | 
72 | [[autodoc]] logging.set_verbosity_error
73 | 
74 | [[autodoc]] logging.set_verbosity_warning
75 | 
76 | [[autodoc]] logging.set_verbosity_info
77 | 
78 | [[autodoc]] logging.set_verbosity_debug
79 | 
80 | ## Other functions
81 | 
82 | [[autodoc]] logging.get_verbosity
83 | 
84 | [[autodoc]] logging.set_verbosity
85 | 
86 | [[autodoc]] logging.get_logger
87 | 
88 | [[autodoc]] logging.enable_default_handler
89 | 
90 | [[autodoc]] logging.disable_default_handler
91 | 
92 | [[autodoc]] logging.enable_explicit_format
93 | 
94 | [[autodoc]] logging.reset_format
95 | 
96 | [[autodoc]] logging.enable_progress_bar
97 | 
98 | [[autodoc]] logging.disable_progress_bar
99 | 


--------------------------------------------------------------------------------
/docs/source/using-diffusers/inpaint.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Text-Guided Image-Inpainting
14 | 
15 | The [`StableDiffusionInpaintPipeline`] lets you edit specific parts of an image by providing a mask and a text prompt. It uses a version of Stable Diffusion specifically trained for in-painting tasks.
16 | 
17 | <Tip warning={true}>
18 | Note that this model is distributed separately from the regular Stable Diffusion model, so you have to accept its license even if you accepted the Stable Diffusion one in the past.
19 | 
20 | Please, visit the [model card](https://huggingface.co/runwayml/stable-diffusion-inpainting), read the license carefully and tick the checkbox if you agree. You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work. For more information on access tokens, please refer to [this section](https://huggingface.co/docs/hub/security-tokens) of the documentation.
21 | </Tip>
22 | 
23 | ```python
24 | import PIL
25 | import requests
26 | import torch
27 | from io import BytesIO
28 | 
29 | from diffusers import StableDiffusionInpaintPipeline
30 | 
31 | 
32 | def download_image(url):
33 |     response = requests.get(url)
34 |     return PIL.Image.open(BytesIO(response.content)).convert("RGB")
35 | 
36 | 
37 | img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
38 | mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
39 | 
40 | init_image = download_image(img_url).resize((512, 512))
41 | mask_image = download_image(mask_url).resize((512, 512))
42 | 
43 | pipe = StableDiffusionInpaintPipeline.from_pretrained(
44 |     "runwayml/stable-diffusion-inpainting",
45 |     revision="fp16",
46 |     torch_dtype=torch.float16,
47 | )
48 | pipe = pipe.to("cuda")
49 | 
50 | prompt = "Face of a yellow cat, high resolution, sitting on a park bench"
51 | image = pipe(prompt=prompt, image=init_image, mask_image=mask_image).images[0]
52 | ```
53 | 
54 | `image`          | `mask_image` | `prompt` | **Output** |
55 | :-------------------------:|:-------------------------:|:-------------------------:|-------------------------:|
56 | <img src="https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" alt="drawing" width="250"/> | <img src="https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" alt="drawing" width="250"/> | ***Face of a yellow cat, high resolution, sitting on a park bench*** | <img src="https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/test.png" alt="drawing" width="250"/> |
57 | 
58 | 
59 | You can also run this example on colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb)
60 | 
61 | <Tip warning={true}>
62 | A previous experimental implementation of in-painting used a different, lower-quality process. To ensure backwards compatibility, loading a pretrained pipeline that doesn't contain the new model will still apply the old in-painting method.
63 | </Tip>


--------------------------------------------------------------------------------
/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from typing import Optional, Tuple, Union
 3 | 
 4 | import torch
 5 | 
 6 | from ...models import UNet2DModel
 7 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 8 | from ...schedulers import ScoreSdeVeScheduler
 9 | 
10 | 
11 | class ScoreSdeVePipeline(DiffusionPipeline):
12 |     r"""
13 |     Parameters:
14 |     This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
15 |     library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
16 |         unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image. scheduler ([`SchedulerMixin`]):
17 |             The [`ScoreSdeVeScheduler`] scheduler to be used in combination with `unet` to denoise the encoded image.
18 |     """
19 |     unet: UNet2DModel
20 |     scheduler: ScoreSdeVeScheduler
21 | 
22 |     def __init__(self, unet: UNet2DModel, scheduler: DiffusionPipeline):
23 |         super().__init__()
24 |         self.register_modules(unet=unet, scheduler=scheduler)
25 | 
26 |     @torch.no_grad()
27 |     def __call__(
28 |         self,
29 |         batch_size: int = 1,
30 |         num_inference_steps: int = 2000,
31 |         generator: Optional[torch.Generator] = None,
32 |         output_type: Optional[str] = "pil",
33 |         return_dict: bool = True,
34 |         **kwargs,
35 |     ) -> Union[ImagePipelineOutput, Tuple]:
36 |         r"""
37 |         Args:
38 |             batch_size (`int`, *optional*, defaults to 1):
39 |                 The number of images to generate.
40 |             generator (`torch.Generator`, *optional*):
41 |                 A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
42 |                 deterministic.
43 |             output_type (`str`, *optional*, defaults to `"pil"`):
44 |                 The output format of the generate image. Choose between
45 |                 [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
46 |             return_dict (`bool`, *optional*, defaults to `True`):
47 |                 Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple.
48 | 
49 |         Returns:
50 |             [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if
51 |             `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the
52 |             generated images.
53 |         """
54 | 
55 |         img_size = self.unet.config.sample_size
56 |         shape = (batch_size, 3, img_size, img_size)
57 | 
58 |         model = self.unet
59 | 
60 |         sample = torch.randn(*shape, generator=generator) * self.scheduler.init_noise_sigma
61 |         sample = sample.to(self.device)
62 | 
63 |         self.scheduler.set_timesteps(num_inference_steps)
64 |         self.scheduler.set_sigmas(num_inference_steps)
65 | 
66 |         for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)):
67 |             sigma_t = self.scheduler.sigmas[i] * torch.ones(shape[0], device=self.device)
68 | 
69 |             # correction step
70 |             for _ in range(self.scheduler.config.correct_steps):
71 |                 model_output = self.unet(sample, sigma_t).sample
72 |                 sample = self.scheduler.step_correct(model_output, sample, generator=generator).prev_sample
73 | 
74 |             # prediction step
75 |             model_output = model(sample, sigma_t).sample
76 |             output = self.scheduler.step_pred(model_output, t, sample, generator=generator)
77 | 
78 |             sample, sample_mean = output.prev_sample, output.prev_sample_mean
79 | 
80 |         sample = sample_mean.clamp(0, 1)
81 |         sample = sample.cpu().permute(0, 2, 3, 1).numpy()
82 |         if output_type == "pil":
83 |             sample = self.numpy_to_pil(sample)
84 | 
85 |         if not return_dict:
86 |             return (sample,)
87 | 
88 |         return ImagePipelineOutput(images=sample)
89 | 


--------------------------------------------------------------------------------
/src/diffusers/utils/outputs.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | Generic utilities
 16 | """
 17 | 
 18 | from collections import OrderedDict
 19 | from dataclasses import fields
 20 | from typing import Any, Tuple
 21 | 
 22 | import numpy as np
 23 | 
 24 | from .import_utils import is_torch_available
 25 | 
 26 | 
 27 | def is_tensor(x):
 28 |     """
 29 |     Tests if `x` is a `torch.Tensor` or `np.ndarray`.
 30 |     """
 31 |     if is_torch_available():
 32 |         import torch
 33 | 
 34 |         if isinstance(x, torch.Tensor):
 35 |             return True
 36 | 
 37 |     return isinstance(x, np.ndarray)
 38 | 
 39 | 
 40 | class BaseOutput(OrderedDict):
 41 |     """
 42 |     Base class for all model outputs as dataclass. Has a `__getitem__` that allows indexing by integer or slice (like a
 43 |     tuple) or strings (like a dictionary) that will ignore the `None` attributes. Otherwise behaves like a regular
 44 |     python dictionary.
 45 | 
 46 |     <Tip warning={true}>
 47 | 
 48 |     You can't unpack a `BaseOutput` directly. Use the [`~utils.BaseOutput.to_tuple`] method to convert it to a tuple
 49 |     before.
 50 | 
 51 |     </Tip>
 52 |     """
 53 | 
 54 |     def __post_init__(self):
 55 |         class_fields = fields(self)
 56 | 
 57 |         # Safety and consistency checks
 58 |         if not len(class_fields):
 59 |             raise ValueError(f"{self.__class__.__name__} has no fields.")
 60 | 
 61 |         first_field = getattr(self, class_fields[0].name)
 62 |         other_fields_are_none = all(getattr(self, field.name) is None for field in class_fields[1:])
 63 | 
 64 |         if other_fields_are_none and isinstance(first_field, dict):
 65 |             for key, value in first_field.items():
 66 |                 self[key] = value
 67 |         else:
 68 |             for field in class_fields:
 69 |                 v = getattr(self, field.name)
 70 |                 if v is not None:
 71 |                     self[field.name] = v
 72 | 
 73 |     def __delitem__(self, *args, **kwargs):
 74 |         raise Exception(f"You cannot use ``__delitem__`` on a {self.__class__.__name__} instance.")
 75 | 
 76 |     def setdefault(self, *args, **kwargs):
 77 |         raise Exception(f"You cannot use ``setdefault`` on a {self.__class__.__name__} instance.")
 78 | 
 79 |     def pop(self, *args, **kwargs):
 80 |         raise Exception(f"You cannot use ``pop`` on a {self.__class__.__name__} instance.")
 81 | 
 82 |     def update(self, *args, **kwargs):
 83 |         raise Exception(f"You cannot use ``update`` on a {self.__class__.__name__} instance.")
 84 | 
 85 |     def __getitem__(self, k):
 86 |         if isinstance(k, str):
 87 |             inner_dict = {k: v for (k, v) in self.items()}
 88 |             return inner_dict[k]
 89 |         else:
 90 |             return self.to_tuple()[k]
 91 | 
 92 |     def __setattr__(self, name, value):
 93 |         if name in self.keys() and value is not None:
 94 |             # Don't call self.__setitem__ to avoid recursion errors
 95 |             super().__setitem__(name, value)
 96 |         super().__setattr__(name, value)
 97 | 
 98 |     def __setitem__(self, key, value):
 99 |         # Will raise a KeyException if needed
100 |         super().__setitem__(key, value)
101 |         # Don't call self.__setattr__ to avoid recursion errors
102 |         super().__setattr__(key, value)
103 | 
104 |     def to_tuple(self) -> Tuple[Any]:
105 |         """
106 |         Convert self to a tuple containing all the attributes/keys that are not `None`.
107 |         """
108 |         return tuple(self[k] for k in self.keys())
109 | 


--------------------------------------------------------------------------------
/src/diffusers/pipelines/stable_diffusion/safety_checker_flax.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Tuple
 2 | 
 3 | import jax
 4 | import jax.numpy as jnp
 5 | from flax import linen as nn
 6 | from flax.core.frozen_dict import FrozenDict
 7 | from transformers import CLIPConfig, FlaxPreTrainedModel
 8 | from transformers.models.clip.modeling_flax_clip import FlaxCLIPVisionModule
 9 | 
10 | 
11 | def jax_cosine_distance(emb_1, emb_2, eps=1e-12):
12 |     norm_emb_1 = jnp.divide(emb_1.T, jnp.clip(jnp.linalg.norm(emb_1, axis=1), a_min=eps)).T
13 |     norm_emb_2 = jnp.divide(emb_2.T, jnp.clip(jnp.linalg.norm(emb_2, axis=1), a_min=eps)).T
14 |     return jnp.matmul(norm_emb_1, norm_emb_2.T)
15 | 
16 | 
17 | class FlaxStableDiffusionSafetyCheckerModule(nn.Module):
18 |     config: CLIPConfig
19 |     dtype: jnp.dtype = jnp.float32
20 | 
21 |     def setup(self):
22 |         self.vision_model = FlaxCLIPVisionModule(self.config.vision_config)
23 |         self.visual_projection = nn.Dense(self.config.projection_dim, use_bias=False, dtype=self.dtype)
24 | 
25 |         self.concept_embeds = self.param("concept_embeds", jax.nn.initializers.ones, (17, self.config.projection_dim))
26 |         self.special_care_embeds = self.param(
27 |             "special_care_embeds", jax.nn.initializers.ones, (3, self.config.projection_dim)
28 |         )
29 | 
30 |         self.concept_embeds_weights = self.param("concept_embeds_weights", jax.nn.initializers.ones, (17,))
31 |         self.special_care_embeds_weights = self.param("special_care_embeds_weights", jax.nn.initializers.ones, (3,))
32 | 
33 |     def __call__(self, clip_input):
34 |         pooled_output = self.vision_model(clip_input)[1]
35 |         image_embeds = self.visual_projection(pooled_output)
36 | 
37 |         special_cos_dist = jax_cosine_distance(image_embeds, self.special_care_embeds)
38 |         cos_dist = jax_cosine_distance(image_embeds, self.concept_embeds)
39 | 
40 |         # increase this value to create a stronger `nfsw` filter
41 |         # at the cost of increasing the possibility of filtering benign image inputs
42 |         adjustment = 0.0
43 | 
44 |         special_scores = special_cos_dist - self.special_care_embeds_weights[None, :] + adjustment
45 |         special_scores = jnp.round(special_scores, 3)
46 |         is_special_care = jnp.any(special_scores > 0, axis=1, keepdims=True)
47 |         # Use a lower threshold if an image has any special care concept
48 |         special_adjustment = is_special_care * 0.01
49 | 
50 |         concept_scores = cos_dist - self.concept_embeds_weights[None, :] + special_adjustment
51 |         concept_scores = jnp.round(concept_scores, 3)
52 |         has_nsfw_concepts = jnp.any(concept_scores > 0, axis=1)
53 | 
54 |         return has_nsfw_concepts
55 | 
56 | 
57 | class FlaxStableDiffusionSafetyChecker(FlaxPreTrainedModel):
58 |     config_class = CLIPConfig
59 |     main_input_name = "clip_input"
60 |     module_class = FlaxStableDiffusionSafetyCheckerModule
61 | 
62 |     def __init__(
63 |         self,
64 |         config: CLIPConfig,
65 |         input_shape: Optional[Tuple] = None,
66 |         seed: int = 0,
67 |         dtype: jnp.dtype = jnp.float32,
68 |         _do_init: bool = True,
69 |         **kwargs,
70 |     ):
71 |         if input_shape is None:
72 |             input_shape = (1, 224, 224, 3)
73 |         module = self.module_class(config=config, dtype=dtype, **kwargs)
74 |         super().__init__(config, module, input_shape=input_shape, seed=seed, dtype=dtype, _do_init=_do_init)
75 | 
76 |     def init_weights(self, rng: jax.random.PRNGKey, input_shape: Tuple, params: FrozenDict = None) -> FrozenDict:
77 |         # init input tensor
78 |         clip_input = jax.random.normal(rng, input_shape)
79 | 
80 |         params_rng, dropout_rng = jax.random.split(rng)
81 |         rngs = {"params": params_rng, "dropout": dropout_rng}
82 | 
83 |         random_params = self.module.init(rngs, clip_input)["params"]
84 | 
85 |         return random_params
86 | 
87 |     def __call__(
88 |         self,
89 |         clip_input,
90 |         params: dict = None,
91 |     ):
92 |         clip_input = jnp.transpose(clip_input, (0, 2, 3, 1))
93 | 
94 |         return self.module.apply(
95 |             {"params": params or self.params},
96 |             jnp.array(clip_input, dtype=jnp.float32),
97 |             rngs={},
98 |         )
99 | 


--------------------------------------------------------------------------------
/src/diffusers/pipelines/ddpm/pipeline_ddpm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | 
14 | # limitations under the License.
15 | 
16 | 
17 | from typing import Optional, Tuple, Union
18 | 
19 | import torch
20 | 
21 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
22 | 
23 | 
24 | class DDPMPipeline(DiffusionPipeline):
25 |     r"""
26 |     This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
27 |     library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
28 | 
29 |     Parameters:
30 |         unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image.
31 |         scheduler ([`SchedulerMixin`]):
32 |             A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of
33 |             [`DDPMScheduler`], or [`DDIMScheduler`].
34 |     """
35 | 
36 |     def __init__(self, unet, scheduler):
37 |         super().__init__()
38 |         self.register_modules(unet=unet, scheduler=scheduler)
39 | 
40 |     @torch.no_grad()
41 |     def __call__(
42 |         self,
43 |         batch_size: int = 1,
44 |         generator: Optional[torch.Generator] = None,
45 |         num_inference_steps: int = 1000,
46 |         output_type: Optional[str] = "pil",
47 |         return_dict: bool = True,
48 |         **kwargs,
49 |     ) -> Union[ImagePipelineOutput, Tuple]:
50 |         r"""
51 |         Args:
52 |             batch_size (`int`, *optional*, defaults to 1):
53 |                 The number of images to generate.
54 |             generator (`torch.Generator`, *optional*):
55 |                 A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
56 |                 deterministic.
57 |             num_inference_steps (`int`, *optional*, defaults to 1000):
58 |                 The number of denoising steps. More denoising steps usually lead to a higher quality image at the
59 |                 expense of slower inference.
60 |             output_type (`str`, *optional*, defaults to `"pil"`):
61 |                 The output format of the generate image. Choose between
62 |                 [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
63 |             return_dict (`bool`, *optional*, defaults to `True`):
64 |                 Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple.
65 | 
66 |         Returns:
67 |             [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if
68 |             `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the
69 |             generated images.
70 |         """
71 | 
72 |         # Sample gaussian noise to begin loop
73 |         image = torch.randn(
74 |             (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
75 |             generator=generator,
76 |         )
77 |         image = image.to(self.device)
78 | 
79 |         # set step values
80 |         self.scheduler.set_timesteps(num_inference_steps)
81 | 
82 |         for t in self.progress_bar(self.scheduler.timesteps):
83 |             # 1. predict noise model_output
84 |             model_output = self.unet(image, t).sample
85 | 
86 |             # 2. compute previous image: x_t -> x_t-1
87 |             image = self.scheduler.step(model_output, t, image, generator=generator).prev_sample
88 | 
89 |         image = (image / 2 + 0.5).clamp(0, 1)
90 |         image = image.cpu().permute(0, 2, 3, 1).numpy()
91 |         if output_type == "pil":
92 |             image = self.numpy_to_pil(image)
93 | 
94 |         if not return_dict:
95 |             return (image,)
96 | 
97 |         return ImagePipelineOutput(images=image)
98 | 


--------------------------------------------------------------------------------
/src/diffusers/pipelines/pndm/pipeline_pndm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | 
14 | # limitations under the License.
15 | 
16 | 
17 | from typing import Optional, Tuple, Union
18 | 
19 | import torch
20 | 
21 | from ...models import UNet2DModel
22 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
23 | from ...schedulers import PNDMScheduler
24 | 
25 | 
26 | class PNDMPipeline(DiffusionPipeline):
27 |     r"""
28 |     This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
29 |     library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
30 | 
31 |     Parameters:
32 |         unet (`UNet2DModel`): U-Net architecture to denoise the encoded image latents.
33 |         scheduler ([`SchedulerMixin`]):
34 |             The `PNDMScheduler` to be used in combination with `unet` to denoise the encoded image.
35 |     """
36 | 
37 |     unet: UNet2DModel
38 |     scheduler: PNDMScheduler
39 | 
40 |     def __init__(self, unet: UNet2DModel, scheduler: PNDMScheduler):
41 |         super().__init__()
42 |         self.register_modules(unet=unet, scheduler=scheduler)
43 | 
44 |     @torch.no_grad()
45 |     def __call__(
46 |         self,
47 |         batch_size: int = 1,
48 |         num_inference_steps: int = 50,
49 |         generator: Optional[torch.Generator] = None,
50 |         output_type: Optional[str] = "pil",
51 |         return_dict: bool = True,
52 |         **kwargs,
53 |     ) -> Union[ImagePipelineOutput, Tuple]:
54 |         r"""
55 |         Args:
56 |             batch_size (`int`, `optional`, defaults to 1): The number of images to generate.
57 |             num_inference_steps (`int`, `optional`, defaults to 50):
58 |                 The number of denoising steps. More denoising steps usually lead to a higher quality image at the
59 |                 expense of slower inference.
60 |             generator (`torch.Generator`, `optional`): A [torch
61 |                 generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
62 |                 deterministic.
63 |             output_type (`str`, `optional`, defaults to `"pil"`): The output format of the generate image. Choose
64 |                 between [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
65 |             return_dict (`bool`, `optional`, defaults to `True`): Whether or not to return a
66 |                 [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple.
67 | 
68 |         Returns:
69 |             [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if
70 |             `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the
71 |             generated images.
72 |         """
73 |         # For more information on the sampling method you can take a look at Algorithm 2 of
74 |         # the official paper: https://arxiv.org/pdf/2202.09778.pdf
75 | 
76 |         # Sample gaussian noise to begin loop
77 |         image = torch.randn(
78 |             (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
79 |             generator=generator,
80 |         )
81 |         image = image.to(self.device)
82 | 
83 |         self.scheduler.set_timesteps(num_inference_steps)
84 |         for t in self.progress_bar(self.scheduler.timesteps):
85 |             model_output = self.unet(image, t).sample
86 | 
87 |             image = self.scheduler.step(model_output, t, image).prev_sample
88 | 
89 |         image = (image / 2 + 0.5).clamp(0, 1)
90 |         image = image.cpu().permute(0, 2, 3, 1).numpy()
91 |         if output_type == "pil":
92 |             image = self.numpy_to_pil(image)
93 | 
94 |         if not return_dict:
95 |             return (image,)
96 | 
97 |         return ImagePipelineOutput(images=image)
98 | 


--------------------------------------------------------------------------------
/scripts/change_naming_configs_and_checkpoints.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2022 The HuggingFace Inc. team.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | """ Conversion script for the LDM checkpoints. """
 16 | 
 17 | import argparse
 18 | import json
 19 | import os
 20 | 
 21 | import torch
 22 | 
 23 | from diffusers import UNet2DConditionModel, UNet2DModel
 24 | from transformers.file_utils import has_file
 25 | 
 26 | 
 27 | do_only_config = False
 28 | do_only_weights = True
 29 | do_only_renaming = False
 30 | 
 31 | 
 32 | if __name__ == "__main__":
 33 |     parser = argparse.ArgumentParser()
 34 | 
 35 |     parser.add_argument(
 36 |         "--repo_path",
 37 |         default=None,
 38 |         type=str,
 39 |         required=True,
 40 |         help="The config json file corresponding to the architecture.",
 41 |     )
 42 | 
 43 |     parser.add_argument("--dump_path", default=None, type=str, required=True, help="Path to the output model.")
 44 | 
 45 |     args = parser.parse_args()
 46 | 
 47 |     config_parameters_to_change = {
 48 |         "image_size": "sample_size",
 49 |         "num_res_blocks": "layers_per_block",
 50 |         "block_channels": "block_out_channels",
 51 |         "down_blocks": "down_block_types",
 52 |         "up_blocks": "up_block_types",
 53 |         "downscale_freq_shift": "freq_shift",
 54 |         "resnet_num_groups": "norm_num_groups",
 55 |         "resnet_act_fn": "act_fn",
 56 |         "resnet_eps": "norm_eps",
 57 |         "num_head_channels": "attention_head_dim",
 58 |     }
 59 | 
 60 |     key_parameters_to_change = {
 61 |         "time_steps": "time_proj",
 62 |         "mid": "mid_block",
 63 |         "downsample_blocks": "down_blocks",
 64 |         "upsample_blocks": "up_blocks",
 65 |     }
 66 | 
 67 |     subfolder = "" if has_file(args.repo_path, "config.json") else "unet"
 68 | 
 69 |     with open(os.path.join(args.repo_path, subfolder, "config.json"), "r", encoding="utf-8") as reader:
 70 |         text = reader.read()
 71 |         config = json.loads(text)
 72 | 
 73 |     if do_only_config:
 74 |         for key in config_parameters_to_change.keys():
 75 |             config.pop(key, None)
 76 | 
 77 |     if has_file(args.repo_path, "config.json"):
 78 |         model = UNet2DModel(**config)
 79 |     else:
 80 |         class_name = UNet2DConditionModel if "ldm-text2im-large-256" in args.repo_path else UNet2DModel
 81 |         model = class_name(**config)
 82 | 
 83 |     if do_only_config:
 84 |         model.save_config(os.path.join(args.repo_path, subfolder))
 85 | 
 86 |     config = dict(model.config)
 87 | 
 88 |     if do_only_renaming:
 89 |         for key, value in config_parameters_to_change.items():
 90 |             if key in config:
 91 |                 config[value] = config[key]
 92 |                 del config[key]
 93 | 
 94 |         config["down_block_types"] = [k.replace("UNetRes", "") for k in config["down_block_types"]]
 95 |         config["up_block_types"] = [k.replace("UNetRes", "") for k in config["up_block_types"]]
 96 | 
 97 |     if do_only_weights:
 98 |         state_dict = torch.load(os.path.join(args.repo_path, subfolder, "diffusion_pytorch_model.bin"))
 99 | 
100 |         new_state_dict = {}
101 |         for param_key, param_value in state_dict.items():
102 |             if param_key.endswith(".op.bias") or param_key.endswith(".op.weight"):
103 |                 continue
104 |             has_changed = False
105 |             for key, new_key in key_parameters_to_change.items():
106 |                 if not has_changed and param_key.split(".")[0] == key:
107 |                     new_state_dict[".".join([new_key] + param_key.split(".")[1:])] = param_value
108 |                     has_changed = True
109 |             if not has_changed:
110 |                 new_state_dict[param_key] = param_value
111 | 
112 |         model.load_state_dict(new_state_dict)
113 |         model.save_pretrained(os.path.join(args.repo_path, subfolder))
114 | 


--------------------------------------------------------------------------------
/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from typing import Optional, Tuple, Union
 3 | 
 4 | import torch
 5 | 
 6 | from ...models import UNet2DModel, VQModel
 7 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 8 | from ...schedulers import DDIMScheduler
 9 | 
10 | 
11 | class LDMPipeline(DiffusionPipeline):
12 |     r"""
13 |     This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
14 |     library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
15 | 
16 |     Parameters:
17 |         vqvae ([`VQModel`]):
18 |             Vector-quantized (VQ) Model to encode and decode images to and from latent representations.
19 |         unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image latents.
20 |         scheduler ([`SchedulerMixin`]):
21 |             [`DDIMScheduler`] is to be used in combination with `unet` to denoise the encoded image latens.
22 |     """
23 | 
24 |     def __init__(self, vqvae: VQModel, unet: UNet2DModel, scheduler: DDIMScheduler):
25 |         super().__init__()
26 |         self.register_modules(vqvae=vqvae, unet=unet, scheduler=scheduler)
27 | 
28 |     @torch.no_grad()
29 |     def __call__(
30 |         self,
31 |         batch_size: int = 1,
32 |         generator: Optional[torch.Generator] = None,
33 |         eta: float = 0.0,
34 |         num_inference_steps: int = 50,
35 |         output_type: Optional[str] = "pil",
36 |         return_dict: bool = True,
37 |         **kwargs,
38 |     ) -> Union[Tuple, ImagePipelineOutput]:
39 |         r"""
40 |         Args:
41 |             batch_size (`int`, *optional*, defaults to 1):
42 |                 Number of images to generate.
43 |             generator (`torch.Generator`, *optional*):
44 |                 A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
45 |                 deterministic.
46 |             num_inference_steps (`int`, *optional*, defaults to 50):
47 |                 The number of denoising steps. More denoising steps usually lead to a higher quality image at the
48 |                 expense of slower inference.
49 |             output_type (`str`, *optional*, defaults to `"pil"`):
50 |                 The output format of the generate image. Choose between
51 |                 [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
52 |             return_dict (`bool`, *optional*, defaults to `True`):
53 |                 Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple.
54 | 
55 |         Returns:
56 |             [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if
57 |             `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the
58 |             generated images.
59 |         """
60 | 
61 |         latents = torch.randn(
62 |             (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
63 |             generator=generator,
64 |         )
65 |         latents = latents.to(self.device)
66 | 
67 |         # scale the initial noise by the standard deviation required by the scheduler
68 |         latents = latents * self.scheduler.init_noise_sigma
69 | 
70 |         self.scheduler.set_timesteps(num_inference_steps)
71 | 
72 |         # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
73 |         accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
74 | 
75 |         extra_kwargs = {}
76 |         if accepts_eta:
77 |             extra_kwargs["eta"] = eta
78 | 
79 |         for t in self.progress_bar(self.scheduler.timesteps):
80 |             latent_model_input = self.scheduler.scale_model_input(latents, t)
81 |             # predict the noise residual
82 |             noise_prediction = self.unet(latent_model_input, t).sample
83 |             # compute the previous noisy sample x_t -> x_t-1
84 |             latents = self.scheduler.step(noise_prediction, t, latents, **extra_kwargs).prev_sample
85 | 
86 |         # decode the image latents with the VAE
87 |         image = self.vqvae.decode(latents).sample
88 | 
89 |         image = (image / 2 + 0.5).clamp(0, 1)
90 |         image = image.cpu().permute(0, 2, 3, 1).numpy()
91 |         if output_type == "pil":
92 |             image = self.numpy_to_pil(image)
93 | 
94 |         if not return_dict:
95 |             return (image,)
96 | 
97 |         return ImagePipelineOutput(images=image)
98 | 


--------------------------------------------------------------------------------
/src/diffusers/models/resnet_flax.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import flax.linen as nn
 15 | import jax
 16 | import jax.numpy as jnp
 17 | 
 18 | 
 19 | class FlaxUpsample2D(nn.Module):
 20 |     out_channels: int
 21 |     dtype: jnp.dtype = jnp.float32
 22 | 
 23 |     def setup(self):
 24 |         self.conv = nn.Conv(
 25 |             self.out_channels,
 26 |             kernel_size=(3, 3),
 27 |             strides=(1, 1),
 28 |             padding=((1, 1), (1, 1)),
 29 |             dtype=self.dtype,
 30 |         )
 31 | 
 32 |     def __call__(self, hidden_states):
 33 |         batch, height, width, channels = hidden_states.shape
 34 |         hidden_states = jax.image.resize(
 35 |             hidden_states,
 36 |             shape=(batch, height * 2, width * 2, channels),
 37 |             method="nearest",
 38 |         )
 39 |         hidden_states = self.conv(hidden_states)
 40 |         return hidden_states
 41 | 
 42 | 
 43 | class FlaxDownsample2D(nn.Module):
 44 |     out_channels: int
 45 |     dtype: jnp.dtype = jnp.float32
 46 | 
 47 |     def setup(self):
 48 |         self.conv = nn.Conv(
 49 |             self.out_channels,
 50 |             kernel_size=(3, 3),
 51 |             strides=(2, 2),
 52 |             padding=((1, 1), (1, 1)),  # padding="VALID",
 53 |             dtype=self.dtype,
 54 |         )
 55 | 
 56 |     def __call__(self, hidden_states):
 57 |         # pad = ((0, 0), (0, 1), (0, 1), (0, 0))  # pad height and width dim
 58 |         # hidden_states = jnp.pad(hidden_states, pad_width=pad)
 59 |         hidden_states = self.conv(hidden_states)
 60 |         return hidden_states
 61 | 
 62 | 
 63 | class FlaxResnetBlock2D(nn.Module):
 64 |     in_channels: int
 65 |     out_channels: int = None
 66 |     dropout_prob: float = 0.0
 67 |     use_nin_shortcut: bool = None
 68 |     dtype: jnp.dtype = jnp.float32
 69 | 
 70 |     def setup(self):
 71 |         out_channels = self.in_channels if self.out_channels is None else self.out_channels
 72 | 
 73 |         self.norm1 = nn.GroupNorm(num_groups=32, epsilon=1e-5)
 74 |         self.conv1 = nn.Conv(
 75 |             out_channels,
 76 |             kernel_size=(3, 3),
 77 |             strides=(1, 1),
 78 |             padding=((1, 1), (1, 1)),
 79 |             dtype=self.dtype,
 80 |         )
 81 | 
 82 |         self.time_emb_proj = nn.Dense(out_channels, dtype=self.dtype)
 83 | 
 84 |         self.norm2 = nn.GroupNorm(num_groups=32, epsilon=1e-5)
 85 |         self.dropout = nn.Dropout(self.dropout_prob)
 86 |         self.conv2 = nn.Conv(
 87 |             out_channels,
 88 |             kernel_size=(3, 3),
 89 |             strides=(1, 1),
 90 |             padding=((1, 1), (1, 1)),
 91 |             dtype=self.dtype,
 92 |         )
 93 | 
 94 |         use_nin_shortcut = self.in_channels != out_channels if self.use_nin_shortcut is None else self.use_nin_shortcut
 95 | 
 96 |         self.conv_shortcut = None
 97 |         if use_nin_shortcut:
 98 |             self.conv_shortcut = nn.Conv(
 99 |                 out_channels,
100 |                 kernel_size=(1, 1),
101 |                 strides=(1, 1),
102 |                 padding="VALID",
103 |                 dtype=self.dtype,
104 |             )
105 | 
106 |     def __call__(self, hidden_states, temb, deterministic=True):
107 |         residual = hidden_states
108 |         hidden_states = self.norm1(hidden_states)
109 |         hidden_states = nn.swish(hidden_states)
110 |         hidden_states = self.conv1(hidden_states)
111 | 
112 |         temb = self.time_emb_proj(nn.swish(temb))
113 |         temb = jnp.expand_dims(jnp.expand_dims(temb, 1), 1)
114 |         hidden_states = hidden_states + temb
115 | 
116 |         hidden_states = self.norm2(hidden_states)
117 |         hidden_states = nn.swish(hidden_states)
118 |         hidden_states = self.dropout(hidden_states, deterministic)
119 |         hidden_states = self.conv2(hidden_states)
120 | 
121 |         if self.conv_shortcut is not None:
122 |             residual = self.conv_shortcut(residual)
123 | 
124 |         return hidden_states + residual
125 | 


--------------------------------------------------------------------------------