├── .github
├── pull_request_template.md
├── stochastic_logo_dark.svg
├── stochastic_logo_light.svg
├── stochastic_x_dashboard.jpeg
├── stochasticai_demo.gif
└── workflows
│ └── semgrep.yml
├── .gitignore
├── .pre-commit-config.yaml
├── AITemplate
├── Dockerfile
├── README.md
├── benchmark.py
├── benchmark_pt.py
├── client.py
├── compile.py
├── demo.py
├── modeling
│ ├── attention.py
│ ├── clip.py
│ ├── embeddings.py
│ ├── resnet.py
│ ├── unet_2d_condition.py
│ ├── unet_blocks.py
│ └── vae.py
├── pipeline_stable_diffusion_ait.py
├── requirements.txt
└── server.py
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── FlashAttention
├── Dockerfile
├── README.md
├── client.py
├── demo.py
├── diffusers
│ ├── .github
│ │ ├── ISSUE_TEMPLATE
│ │ │ ├── bug-report.yml
│ │ │ ├── config.yml
│ │ │ ├── feature_request.md
│ │ │ ├── feedback.md
│ │ │ └── new-model-addition.yml
│ │ └── workflows
│ │ │ ├── build_documentation.yml
│ │ │ ├── build_pr_documentation.yml
│ │ │ ├── delete_doc_comment.yml
│ │ │ ├── pr_quality.yml
│ │ │ ├── pr_tests.yml
│ │ │ ├── push_tests.yml
│ │ │ ├── stale.yml
│ │ │ └── typos.yml
│ ├── .gitignore
│ ├── CODE_OF_CONDUCT.md
│ ├── CONTRIBUTING.md
│ ├── LICENSE
│ ├── MANIFEST.in
│ ├── Makefile
│ ├── README.md
│ ├── _typos.toml
│ ├── docs
│ │ └── source
│ │ │ ├── _toctree.yml
│ │ │ ├── api
│ │ │ ├── configuration.mdx
│ │ │ ├── diffusion_pipeline.mdx
│ │ │ ├── logging.mdx
│ │ │ ├── models.mdx
│ │ │ ├── outputs.mdx
│ │ │ ├── pipelines
│ │ │ │ ├── ddim.mdx
│ │ │ │ ├── ddpm.mdx
│ │ │ │ ├── latent_diffusion.mdx
│ │ │ │ ├── latent_diffusion_uncond.mdx
│ │ │ │ ├── overview.mdx
│ │ │ │ ├── pndm.mdx
│ │ │ │ ├── score_sde_ve.mdx
│ │ │ │ ├── stable_diffusion.mdx
│ │ │ │ └── stochastic_karras_ve.mdx
│ │ │ └── schedulers.mdx
│ │ │ ├── conceptual
│ │ │ ├── contribution.mdx
│ │ │ ├── philosophy.mdx
│ │ │ └── stable_diffusion.mdx
│ │ │ ├── imgs
│ │ │ └── diffusers_library.jpg
│ │ │ ├── index.mdx
│ │ │ ├── installation.mdx
│ │ │ ├── optimization
│ │ │ ├── fp16.mdx
│ │ │ ├── mps.mdx
│ │ │ ├── onnx.mdx
│ │ │ └── open_vino.mdx
│ │ │ ├── quicktour.mdx
│ │ │ ├── training
│ │ │ ├── overview.mdx
│ │ │ ├── text2image.mdx
│ │ │ ├── text_inversion.mdx
│ │ │ └── unconditional_training.mdx
│ │ │ └── using-diffusers
│ │ │ ├── conditional_image_generation.mdx
│ │ │ ├── configuration.mdx
│ │ │ ├── custom.mdx
│ │ │ ├── img2img.mdx
│ │ │ ├── inpaint.mdx
│ │ │ ├── loading.mdx
│ │ │ └── unconditional_image_generation.mdx
│ ├── examples
│ │ ├── README.md
│ │ ├── community
│ │ │ └── README.md
│ │ ├── inference
│ │ │ ├── README.md
│ │ │ ├── image_to_image.py
│ │ │ └── inpainting.py
│ │ ├── textual_inversion
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ └── textual_inversion.py
│ │ └── unconditional_image_generation
│ │ │ ├── README.md
│ │ │ ├── requirements.txt
│ │ │ └── train_unconditional.py
│ ├── pyproject.toml
│ ├── scripts
│ │ ├── __init__.py
│ │ ├── change_naming_configs_and_checkpoints.py
│ │ ├── conversion_ldm_uncond.py
│ │ ├── convert_ddpm_original_checkpoint_to_diffusers.py
│ │ ├── convert_ldm_original_checkpoint_to_diffusers.py
│ │ ├── convert_ncsnpp_original_checkpoint_to_diffusers.py
│ │ ├── convert_original_stable_diffusion_to_diffusers.py
│ │ ├── convert_stable_diffusion_checkpoint_to_onnx.py
│ │ └── generate_logits.py
│ ├── setup.cfg
│ ├── setup.py
│ ├── src
│ │ └── diffusers
│ │ │ ├── __init__.py
│ │ │ ├── commands
│ │ │ ├── __init__.py
│ │ │ ├── diffusers_cli.py
│ │ │ └── env.py
│ │ │ ├── configuration_utils.py
│ │ │ ├── dependency_versions_check.py
│ │ │ ├── dependency_versions_table.py
│ │ │ ├── dynamic_modules_utils.py
│ │ │ ├── hub_utils.py
│ │ │ ├── modeling_flax_utils.py
│ │ │ ├── modeling_utils.py
│ │ │ ├── models
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── attention.py
│ │ │ ├── attention_flax.py
│ │ │ ├── embeddings.py
│ │ │ ├── embeddings_flax.py
│ │ │ ├── resnet.py
│ │ │ ├── resnet_flax.py
│ │ │ ├── unet_2d.py
│ │ │ ├── unet_2d_condition.py
│ │ │ ├── unet_2d_condition_flax.py
│ │ │ ├── unet_blocks.py
│ │ │ ├── unet_blocks_flax.py
│ │ │ └── vae.py
│ │ │ ├── onnx_utils.py
│ │ │ ├── optimization.py
│ │ │ ├── pipeline_utils.py
│ │ │ ├── pipelines
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── ddim
│ │ │ │ ├── __init__.py
│ │ │ │ └── pipeline_ddim.py
│ │ │ ├── ddpm
│ │ │ │ ├── __init__.py
│ │ │ │ └── pipeline_ddpm.py
│ │ │ ├── latent_diffusion
│ │ │ │ ├── __init__.py
│ │ │ │ └── pipeline_latent_diffusion.py
│ │ │ ├── latent_diffusion_uncond
│ │ │ │ ├── __init__.py
│ │ │ │ └── pipeline_latent_diffusion_uncond.py
│ │ │ ├── pndm
│ │ │ │ ├── __init__.py
│ │ │ │ └── pipeline_pndm.py
│ │ │ ├── score_sde_ve
│ │ │ │ ├── __init__.py
│ │ │ │ └── pipeline_score_sde_ve.py
│ │ │ ├── stable_diffusion
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── pipeline_stable_diffusion.py
│ │ │ │ ├── pipeline_stable_diffusion_img2img.py
│ │ │ │ ├── pipeline_stable_diffusion_inpaint.py
│ │ │ │ ├── pipeline_stable_diffusion_onnx.py
│ │ │ │ └── safety_checker.py
│ │ │ └── stochastic_karras_ve
│ │ │ │ ├── __init__.py
│ │ │ │ └── pipeline_stochastic_karras_ve.py
│ │ │ ├── schedulers
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── scheduling_ddim.py
│ │ │ ├── scheduling_ddim_flax.py
│ │ │ ├── scheduling_ddpm.py
│ │ │ ├── scheduling_ddpm_flax.py
│ │ │ ├── scheduling_karras_ve.py
│ │ │ ├── scheduling_karras_ve_flax.py
│ │ │ ├── scheduling_lms_discrete.py
│ │ │ ├── scheduling_lms_discrete_flax.py
│ │ │ ├── scheduling_pndm.py
│ │ │ ├── scheduling_pndm_flax.py
│ │ │ ├── scheduling_sde_ve.py
│ │ │ ├── scheduling_sde_ve_flax.py
│ │ │ ├── scheduling_sde_vp.py
│ │ │ └── scheduling_utils.py
│ │ │ ├── testing_utils.py
│ │ │ ├── training_utils.py
│ │ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── dummy_flax_objects.py
│ │ │ ├── dummy_pt_objects.py
│ │ │ ├── dummy_torch_and_scipy_objects.py
│ │ │ ├── dummy_torch_and_transformers_and_onnx_objects.py
│ │ │ ├── dummy_torch_and_transformers_objects.py
│ │ │ ├── import_utils.py
│ │ │ ├── logging.py
│ │ │ ├── model_card_template.md
│ │ │ └── outputs.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── test_config.py
│ │ ├── test_layers_utils.py
│ │ ├── test_modeling_common.py
│ │ ├── test_models_unet.py
│ │ ├── test_models_vae.py
│ │ ├── test_models_vq.py
│ │ ├── test_pipelines.py
│ │ ├── test_scheduler.py
│ │ └── test_training.py
│ └── utils
│ │ ├── check_config_docstrings.py
│ │ ├── check_copies.py
│ │ ├── check_dummies.py
│ │ ├── check_inits.py
│ │ ├── check_repo.py
│ │ ├── check_table.py
│ │ ├── check_tf_ops.py
│ │ ├── custom_init_isort.py
│ │ ├── print_env.py
│ │ └── stale.py
├── requirements.txt
└── server.py
├── LICENSE
├── ONNX
├── Dockerfile
├── README.md
├── demo.py
├── model.py
├── requirements.txt
└── server.py
├── PyTorch
├── Dockerfile
├── README.md
├── demo.py
├── model.py
├── notebook.ipynb
├── requirements.txt
└── server.py
├── README.md
├── TensorRT
├── Dockerfile
├── Notebook.ipynb
├── README.md
├── client.py
├── convert_unet_to_tensorrt.py
├── demo.py
├── requirements.txt
├── server.py
└── trt_model.py
├── generated_images
├── AITemplate
│ ├── 0.png
│ ├── 1.png
│ ├── 2.png
│ ├── 3.png
│ ├── 4.png
│ ├── 5.png
│ ├── 6.png
│ ├── 7.png
│ ├── 8.png
│ ├── 9.png
│ └── README.md
├── FlashAttention
│ ├── 0.png
│ ├── 1.png
│ ├── 2.png
│ ├── 3.png
│ ├── 4.png
│ ├── 5.png
│ ├── 6.png
│ ├── 7.png
│ ├── 8.png
│ ├── 9.png
│ └── README.md
├── PyTorch
│ ├── 0.png
│ ├── 1.png
│ ├── 2.png
│ ├── 3.png
│ ├── 4.png
│ ├── 5.png
│ ├── 6.png
│ ├── 7.png
│ ├── 8.png
│ ├── 9.png
│ └── README.md
├── README.md
├── TensorRT
│ ├── 0.png
│ ├── 1.png
│ ├── 2.png
│ ├── 3.png
│ ├── 4.png
│ ├── 5.png
│ ├── 6.png
│ ├── 7.png
│ ├── 8.png
│ ├── 9.png
│ └── README.md
└── nvFuser
│ ├── 0.png
│ ├── 1.png
│ ├── 2.png
│ ├── 3.png
│ ├── 4.png
│ ├── 5.png
│ ├── 6.png
│ ├── 7.png
│ ├── 8.png
│ ├── 9.png
│ └── README.md
├── graphs
├── A100_GPU_batch.png
├── A100_GPU_latency.png
└── T4_GPU_latency.png
└── nvFuser
├── Dockerfile
├── README.md
├── client.py
├── create_unet_nvfuser_model.py
├── demo.py
├── requirements.txt
├── server.py
└── test.py
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ### Summary
2 |
3 |
4 |
5 | ### Checklist
6 |
7 |
8 | - [ ] Tested
9 | - [ ] Documented
10 |
11 | ### Additional Information
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.github/stochastic_x_dashboard.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/.github/stochastic_x_dashboard.jpeg
--------------------------------------------------------------------------------
/.github/stochasticai_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/.github/stochasticai_demo.gif
--------------------------------------------------------------------------------
/.github/workflows/semgrep.yml:
--------------------------------------------------------------------------------
1 | on:
2 | workflow_dispatch: {}
3 | pull_request: {}
4 | push:
5 | branches:
6 | - main
7 | - master
8 | paths:
9 | - .github/workflows/semgrep.yml
10 | schedule:
11 | # random HH:MM to avoid a load spike on GitHub Actions at 00:00
12 | - cron: 28 9 * * *
13 | name: Semgrep
14 | jobs:
15 | semgrep:
16 | name: semgrep/ci
17 | runs-on: ubuntu-20.04
18 | env:
19 | SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN }}
20 | container:
21 | image: returntocorp/semgrep
22 | steps:
23 | - uses: actions/checkout@v3
24 | - run: semgrep ci
25 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/psf/black
3 | rev: 22.12.0
4 | hooks:
5 | - id: black
6 | # It is recommended to specify the latest version of Python
7 | # supported by your project here, or alternatively use
8 | # pre-commit's default_language_version, see
9 | # https://pre-commit.com/#top_level-default_language_version
10 | language_version: python3.9
11 |
--------------------------------------------------------------------------------
/AITemplate/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.9-slim
2 |
3 | WORKDIR /code
4 |
5 | RUN pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html
6 |
7 | COPY requirements.txt /code/requirements.txt
8 |
9 | RUN apt-get update && apt-get -y install curl && apt -y install git
10 |
11 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12 |
13 | RUN git clone --recursive https://github.com/facebookincubator/AITemplate
14 |
15 | RUN cd AITemplate/python && python3 setup.py bdist_wheel && pip install dist/*.whl --force-reinstall
16 |
17 | COPY . /code/
18 |
19 | EXPOSE 5000
20 |
21 | CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5000", "--workers", "1"]
--------------------------------------------------------------------------------
/AITemplate/README.md:
--------------------------------------------------------------------------------
1 | ## AITemplate Stable Diffusion Example
2 |
3 | ### Build Dependencies
4 |
5 | Install AITemplate
6 |
7 | ```
8 | git clone --recursive https://github.com/facebookincubator/AITemplate
9 | cd python
10 | python setup.py bdist_wheel
11 | pip install dist/*.whl --force-reinstall
12 | ```
13 |
14 | Install libraries
15 |
16 | ```
17 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html
18 | pip install -r requirements.txt
19 | ```
20 |
21 | Verify the library versions. We have tested transformers 4.22, diffusers 0.4 and torch 1.12.
22 |
23 | ### Compile AITemplate models
24 |
25 | You need to register in HuggingFace hub. Get your access token from [Hugging Face account settings](https://huggingface.co/settings/tokens). Then login using `huggingface-cli login` command.
26 |
27 | ```
28 | python3 compile.py
29 | ```
30 |
31 | Compiled models are store in `./tmp` folder
32 |
33 | ### Benchmark
34 |
35 | ```
36 | python3 demo.py --benchmark
37 | ```
38 |
39 | Check the resulted image: `example_ait.png`
40 |
41 | ### Deploy as rest-api end-point
42 |
43 | ```
44 | docker build -t ait_diffusion .
45 | docker run -p 5000:5000 -ti --gpus=all ait_diffusion
46 | ```
47 |
48 | ### Test API
49 |
50 | ```
51 | python3 client.py
52 | ```
53 |
54 | Check the resulted image: `output_api.png`
--------------------------------------------------------------------------------
/AITemplate/benchmark_pt.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | import click
16 | import torch
17 |
18 | from aitemplate.testing.benchmark_pt import benchmark_torch_function
19 | from diffusers import StableDiffusionPipeline
20 |
21 |
22 | @click.command()
23 | @click.option("--token", default="", help="access token")
24 | @click.option("--prompt", default="A vision of paradise, Unreal Engine", help="prompt")
25 | @click.option(
26 | "--benchmark", type=bool, default=False, help="run stable diffusion e2e benchmark"
27 | )
28 | def run(token, prompt, benchmark):
29 | pipe = StableDiffusionPipeline.from_pretrained(
30 | "CompVis/stable-diffusion-v1-4",
31 | revision="fp16",
32 | torch_dtype=torch.float16,
33 | use_auth_token=token,
34 | ).to("cuda")
35 |
36 | with torch.autocast("cuda"):
37 | image = pipe(prompt).images[0]
38 | if benchmark:
39 | t = benchmark_torch_function(10, pipe, prompt)
40 | print(f"sd pt e2e: {t} ms")
41 |
42 | image.save("example_pt.png")
43 |
44 |
45 | if __name__ == "__main__":
46 | run()
47 |
--------------------------------------------------------------------------------
/AITemplate/client.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import time
4 |
5 | if __name__ == "__main__":
6 | text = "The Easter bunny riding a motorcycle in New York City"
7 | t0 = time.time()
8 | for i in range(50):
9 | print("Iteration: ", i)
10 | out = requests.post(
11 | "http://localhost:5000/predict/",
12 | data=json.dumps({"prompt": [text], "num_inference_steps": 30}),
13 | )
14 | t1 = time.time()
15 | print("Inference time is: ", (t1 - t0) / 50)
16 | with open("output_api_new.png", "wb") as f:
17 | f.write(out.content)
18 |
--------------------------------------------------------------------------------
/AITemplate/demo.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | import click
16 | import torch
17 |
18 | from aitemplate.testing.benchmark_pt import benchmark_torch_function
19 | from pipeline_stable_diffusion_ait import StableDiffusionAITPipeline
20 | from PIL import Image
21 |
22 |
23 | @click.command()
24 | @click.option("--prompt", default="A vision of paradise, Unreal Engine", help="prompt")
25 | @click.option(
26 | "--benchmark", type=bool, default=False, help="run stable diffusion e2e benchmark"
27 | )
28 | @click.option("--batch_size", type=int, default=1, help="batch size")
29 | def run(prompt, benchmark, batch_size):
30 | pipe = StableDiffusionAITPipeline()
31 | height = 512
32 | width = 512
33 | num_inference_steps = 50
34 | with torch.autocast("cuda"):
35 | images = pipe([prompt] * batch_size)
36 | if benchmark:
37 | t = benchmark_torch_function(
38 | 10, pipe, [prompt] * batch_size, height, width, num_inference_steps
39 | )
40 | print(f"sd e2e: {t} ms")
41 |
42 | images = (images * 255).round().astype("uint8")
43 | pil_images = [Image.fromarray(image) for image in images]
44 | pil_images[0].save("example_ait.png")
45 |
46 |
47 | if __name__ == "__main__":
48 | run()
49 |
--------------------------------------------------------------------------------
/AITemplate/modeling/embeddings.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | import math
16 |
17 | from aitemplate.compiler import ops
18 | from aitemplate.frontend import nn, Tensor
19 |
20 |
21 | def get_shape(x):
22 | shape = [it.value() for it in x._attrs["shape"]]
23 | return shape
24 |
25 |
26 | def get_timestep_embedding(
27 | timesteps: Tensor,
28 | embedding_dim: int,
29 | flip_sin_to_cos: bool = False,
30 | downscale_freq_shift: float = 1,
31 | scale: float = 1,
32 | max_period: int = 10000,
33 | ):
34 | """
35 | This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.
36 |
37 | :param timesteps: a 1-D Tensor of N indices, one per batch element.
38 | These may be fractional.
39 | :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the
40 | embeddings. :return: an [N x dim] Tensor of positional embeddings.
41 | """
42 | assert len(get_shape(timesteps)) == 1, "Timesteps should be a 1d-array"
43 |
44 | half_dim = embedding_dim // 2
45 |
46 | exponent = (-math.log(max_period)) * Tensor(
47 | shape=[half_dim], dtype="float16", name="arange"
48 | )
49 |
50 | exponent = exponent * (1.0 / (half_dim - downscale_freq_shift))
51 |
52 | emb = ops.exp(exponent)
53 | emb = ops.reshape()(timesteps, [-1, 1]) * ops.reshape()(emb, [1, -1])
54 |
55 | # scale embeddings
56 | emb = scale * emb
57 |
58 | # concat sine and cosine embeddings
59 | if flip_sin_to_cos:
60 | emb = ops.concatenate()(
61 | [ops.cos(emb), ops.sin(emb)],
62 | dim=-1,
63 | )
64 | else:
65 | emb = ops.concatenate()(
66 | [ops.sin(emb), ops.cos(emb)],
67 | dim=-1,
68 | )
69 | return emb
70 |
71 |
72 | class TimestepEmbedding(nn.Module):
73 | def __init__(self, channel: int, time_embed_dim: int, act_fn: str = "silu"):
74 | super().__init__()
75 |
76 | self.linear_1 = nn.Linear(channel, time_embed_dim, specialization="swish")
77 | self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim)
78 |
79 | def forward(self, sample):
80 | sample = self.linear_1(sample)
81 | sample = self.linear_2(sample)
82 | return sample
83 |
84 |
85 | class Timesteps(nn.Module):
86 | def __init__(
87 | self, num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float
88 | ):
89 | super().__init__()
90 | self.num_channels = num_channels
91 | self.flip_sin_to_cos = flip_sin_to_cos
92 | self.downscale_freq_shift = downscale_freq_shift
93 |
94 | def forward(self, timesteps):
95 | t_emb = get_timestep_embedding(
96 | timesteps,
97 | self.num_channels,
98 | flip_sin_to_cos=self.flip_sin_to_cos,
99 | downscale_freq_shift=self.downscale_freq_shift,
100 | )
101 | return t_emb
102 |
--------------------------------------------------------------------------------
/AITemplate/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.21.2
2 | diffusers==0.3.0
3 | #torch==1.12.1+cu116
4 | scipy
5 | uvicorn
6 | pydantic
7 | fastapi
--------------------------------------------------------------------------------
/AITemplate/server.py:
--------------------------------------------------------------------------------
1 | from fastapi import FastAPI
2 | from typing import List, Union
3 | from pydantic import BaseModel
4 | from pipeline_stable_diffusion_ait import StableDiffusionAITPipeline
5 | import torch
6 | from tqdm import tqdm
7 | from PIL import Image
8 | import io
9 | from fastapi import Response
10 |
11 | torch_device = torch.device("cuda:0")
12 |
13 |
14 | class Item(BaseModel):
15 | prompt: Union[str, List[str]]
16 | img_height: int = 512
17 | img_width: int = 512
18 | num_inference_steps: int = 50
19 | guidance_scale: float = 7.5
20 |
21 |
22 | app = FastAPI()
23 |
24 | pipe = StableDiffusionAITPipeline()
25 |
26 |
27 | @app.post("/predict/")
28 | async def predict(input_api: Item):
29 | with torch.autocast("cuda"):
30 | images = pipe(
31 | input_api.prompt,
32 | height=input_api.img_height,
33 | width=input_api.img_width,
34 | num_inference_steps=input_api.num_inference_steps,
35 | guidance_scale=input_api.guidance_scale,
36 | )
37 | if images.ndim == 3:
38 | images = images[None, ...]
39 | image = (images[0] * 255).round().astype("uint8")
40 | image = Image.fromarray(image)
41 |
42 | # save image to an in-memory bytes buffer
43 | with io.BytesIO() as buf:
44 | image.save(buf, format="PNG")
45 | im_bytes = buf.getvalue()
46 | headers = {"Content-Disposition": 'inline; filename="test.png"'}
47 | return Response(im_bytes, headers=headers, media_type="image/png")
48 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 |
2 | # Contributor Covenant Code of Conduct
3 |
4 | ## Our Pledge
5 |
6 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
7 |
8 | ## Our Standards
9 |
10 | Examples of behavior that contributes to a positive environment for our
11 | community include:
12 |
13 | * Demonstrating empathy and kindness toward other people
14 | * Being respectful of differing opinions, viewpoints, and experiences
15 | * Giving and gracefully accepting constructive feedback
16 | * Accepting responsibility and apologizing to those affected by our mistakes,
17 | and learning from the experience
18 | * Focusing on what is best not just for us as individuals, but for the overall
19 | community
20 |
21 | Examples of unacceptable behavior include:
22 |
23 | * The use of sexualized language or imagery, and sexual attention or advances of
24 | any kind
25 | * Trolling, insulting or derogatory comments, and personal or political attacks
26 | * Public or private harassment
27 | * Publishing others' private information, such as a physical or email address,
28 | without their explicit permission
29 | * Other conduct which could reasonably be considered inappropriate in a
30 | professional setting
31 |
32 | ## Scope
33 |
34 | This Code of Conduct applies within all community spaces, and also applies when
35 | an individual is officially representing the community in public spaces.
36 | Examples of representing our community include using an official e-mail address,
37 | posting via an official social media account, or acting as an appointed
38 | representative at an online or offline event.
39 |
40 | ## Enforcement
41 |
42 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the organization at hello@stochastic.ai. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances.
43 |
44 |
45 | ## Attribution
46 |
47 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
48 | version 2.1, available at
49 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
50 |
51 | Community Impact Guidelines were inspired by
52 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
53 |
54 | For answers to common questions about this code of conduct, see the FAQ at
55 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
56 | [https://www.contributor-covenant.org/translations][translations].
57 |
58 | [homepage]: https://www.contributor-covenant.org
59 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
60 | [Mozilla CoC]: https://github.com/mozilla/diversity
61 | [FAQ]: https://www.contributor-covenant.org/faq
62 | [translations]: https://www.contributor-covenant.org/translations
63 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to x-stable-diffusion
2 |
3 | Everyone is welcome to contribute, and we value everybody's contribution. Code
4 | is thus not the only way to help the community. Answering questions, helping
5 | others, reaching out and improving the documentations are immensely valuable to
6 | the community.
7 |
8 | It also helps us if you spread the word: through blog posts, shout out on Twitter, or simply starring the repo to say "thank you".
9 |
10 | Whichever way you choose to contribute, please be mindful to respect our
11 | [code of conduct](https://github.com/stochasticai/x-stable-diffusion/blob/main/CODE_OF_CONDUCT.md).
12 |
13 | ## Ways to contribute
14 |
15 | * Fixing outstanding issues
16 | * Implementing new optimizations
17 | * Contributing to the examples or to the documentation
18 | * Submitting issues related to bugs or desired new features
--------------------------------------------------------------------------------
/FlashAttention/Dockerfile:
--------------------------------------------------------------------------------
1 |
2 | FROM nvidia/cuda:11.6.0-devel-ubuntu20.04
3 |
4 | RUN apt-get update && apt-get install --no-install-recommends -y curl && apt-get -y install git
5 |
6 | ENV CONDA_AUTO_UPDATE_CONDA=false \
7 | PATH=/opt/miniconda/bin:$PATH
8 | RUN curl -sLo ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh \
9 | && chmod +x ~/miniconda.sh \
10 | && ~/miniconda.sh -b -p /opt/miniconda \
11 | && rm ~/miniconda.sh \
12 | && sed -i "$ a PATH=/opt/miniconda/bin:\$PATH" /etc/environment
13 |
14 | RUN python3 -m pip --no-cache-dir install --upgrade pip
15 |
16 | WORKDIR /code
17 |
18 | RUN pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 -f https://download.pytorch.org/whl/torch_stable.html
19 |
20 | COPY requirements.txt /code/requirements.txt
21 |
22 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
23 |
24 | RUN apt-get update && apt-get -y install wget
25 |
26 | RUN pip install git+https://github.com/facebookresearch/xformers@51dd119#egg=xformers
27 |
28 | COPY . /code/
29 |
30 | RUN cd diffusers && pip install -e .
31 |
32 | EXPOSE 5000
33 |
34 | ENV USE_MEMORY_EFFICIENT_ATTENTION=1
35 |
36 | CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5000", "--workers", "1"]
--------------------------------------------------------------------------------
/FlashAttention/README.md:
--------------------------------------------------------------------------------
1 | ## Flash-Attention Stable Diffusion Example
2 |
3 | ### Build Dependencies
4 |
5 | Require python 3.9 or python 3.10, Pytorch 1.12.1-cuda11.6.
6 |
7 | ```
8 | conda create -n diffusion_fa python=3.10
9 | conda activate diffusion_fa
10 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
11 | pip install git+https://github.com/facebookresearch/xformers@51dd119#egg=xformers
12 | cd diffusers
13 | pip install -e .
14 | ```
15 |
16 | Install libraries
17 |
18 | ```
19 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html
20 | pip install -r requirements.txt
21 | ```
22 | ### Benchmark
23 |
24 | You need to register in HuggingFace hub. Get your access token from [Hugging Face account settings](https://huggingface.co/settings/tokens). Then login using `huggingface-cli login` command.
25 |
26 | ```
27 | USE_MEMORY_EFFICIENT_ATTENTION=1 python3 demo.py --benchmark
28 | ```
29 |
30 | ### Deploy as rest-api end-point
31 |
32 | Requirement: Make sure that you enable Nvidia runtime when building docker image as Xformers requires GPU to turn on some flags.
33 |
34 | You need provide the HuggingFace token in file `server.py`.
35 |
36 | ```
37 | docker build -t fa_diffusion .
38 | docker run -p 5000:5000 -ti --gpus=all fa_diffusion
39 | ```
40 |
41 | Note: Building Xformers takes about 35 mins - be patient
42 |
43 | ### Test API
44 |
45 | ```
46 | python3 client.py
47 | ```
48 |
49 | Check the resulted image: `output_api.png`
--------------------------------------------------------------------------------
/FlashAttention/client.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import time
4 |
5 | if __name__ == "__main__":
6 | text = "The Easter bunny riding a motorcycle in New York City"
7 | t0 = time.time()
8 | for i in range(50):
9 | print("Iteration: ", i)
10 | out = requests.post(
11 | "http://localhost:5000/predict/", data=json.dumps({"prompt": [text]})
12 | )
13 | t1 = time.time()
14 | print("Inference time is: ", (t1 - t0) / 50)
15 | with open("output_api.png", "wb") as f:
16 | f.write(out.content)
17 |
--------------------------------------------------------------------------------
/FlashAttention/demo.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import argparse
3 | import time
4 | from tqdm import tqdm
5 | from diffusers import StableDiffusionPipeline
6 |
7 |
8 | def get_args():
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument(
11 | "--prompt",
12 | default="Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci",
13 | help="input prompt",
14 | )
15 | parser.add_argument("--batch_size", default=1, type=int, help="batch size")
16 | parser.add_argument(
17 | "--img_size", default=(512, 512), help="Unet input image size (h,w)"
18 | )
19 | parser.add_argument(
20 | "--benchmark",
21 | action="store_true",
22 | help="Running benchmark by average num iteration",
23 | )
24 | parser.add_argument(
25 | "--n_iters", default=50, help="Running benchmark by average num iteration"
26 | )
27 |
28 | return parser.parse_args()
29 |
30 |
31 | if __name__ == "__main__":
32 | args = get_args()
33 | pipe = StableDiffusionPipeline.from_pretrained(
34 | "CompVis/stable-diffusion-v1-4",
35 | revision="fp16",
36 | torch_dtype=torch.float16,
37 | use_auth_token=True,
38 | ).to("cuda")
39 | if args.benchmark:
40 | n_iters = args.n_iters
41 | # warm up
42 | for i in tqdm(range(3)):
43 | with torch.inference_mode(), torch.autocast("cuda"):
44 | images = pipe(args.prompt, num_inference_steps=50)
45 | else:
46 | n_ters = 1
47 |
48 | start = time.time()
49 | for i in tqdm(range(n_iters)):
50 | with torch.inference_mode(), torch.autocast("cuda"):
51 | images = pipe(args.prompt, num_inference_steps=50)
52 | end = time.time()
53 | if args.benchmark:
54 | print("Average inference time is: ", (end - start) / n_iters)
55 | images.images[0].save("image_generated.png")
56 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
1 | name: "\U0001F41B Bug Report"
2 | description: Report a bug on diffusers
3 | labels: [ "bug" ]
4 | body:
5 | - type: markdown
6 | attributes:
7 | value: |
8 | Thanks for taking the time to fill out this bug report!
9 | - type: textarea
10 | id: bug-description
11 | attributes:
12 | label: Describe the bug
13 | description: A clear and concise description of what the bug is. If you intend to submit a pull request for this issue, tell us in the description. Thanks!
14 | placeholder: Bug description
15 | validations:
16 | required: true
17 | - type: textarea
18 | id: reproduction
19 | attributes:
20 | label: Reproduction
21 | description: Please provide a minimal reproducible code which we can copy/paste and reproduce the issue.
22 | placeholder: Reproduction
23 | - type: textarea
24 | id: logs
25 | attributes:
26 | label: Logs
27 | description: "Please include the Python logs if you can."
28 | render: shell
29 | - type: textarea
30 | id: system-info
31 | attributes:
32 | label: System Info
33 | description: Please share your system info with us. You can run the command `diffusers-cli env` and copy-paste its output below.
34 | placeholder: diffusers version, platform, python version, ...
35 | validations:
36 | required: true
37 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | contact_links:
2 | - name: Forum
3 | url: https://discuss.huggingface.co/c/discussion-related-to-httpsgithubcomhuggingfacediffusers/63
4 | about: General usage questions and community discussions
5 | - name: Blank issue
6 | url: https://github.com/huggingface/diffusers/issues/new
7 | about: Please note that the Forum is in most places the right place for discussions
8 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "\U0001F680 Feature request"
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/ISSUE_TEMPLATE/feedback.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: "💬 Feedback about API Design"
3 | about: Give feedback about the current API design
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **What API design would you like to have changed or added to the library? Why?**
11 |
12 | **What use case would this enable or better enable? Can you give us a code example?**
13 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/ISSUE_TEMPLATE/new-model-addition.yml:
--------------------------------------------------------------------------------
1 | name: "\U0001F31F New model/pipeline/scheduler addition"
2 | description: Submit a proposal/request to implement a new diffusion model / pipeline / scheduler
3 | labels: [ "New model/pipeline/scheduler" ]
4 |
5 | body:
6 | - type: textarea
7 | id: description-request
8 | validations:
9 | required: true
10 | attributes:
11 | label: Model/Pipeline/Scheduler description
12 | description: |
13 | Put any and all important information relative to the model/pipeline/scheduler
14 |
15 | - type: checkboxes
16 | id: information-tasks
17 | attributes:
18 | label: Open source status
19 | description: |
20 | Please note that if the model implementation isn't available or if the weights aren't open-source, we are less likely to implement it in `diffusers`.
21 | options:
22 | - label: "The model implementation is available"
23 | - label: "The model weights are available (Only relevant if addition is not a scheduler)."
24 |
25 | - type: textarea
26 | id: additional-info
27 | attributes:
28 | label: Provide useful links for the implementation
29 | description: |
30 | Please provide information regarding the implementation, the weights, and the authors.
31 | Please mention the authors by @gh-username if you're aware of their usernames.
32 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/workflows/build_documentation.yml:
--------------------------------------------------------------------------------
1 | name: Build documentation
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | - doc-builder*
8 | - v*-release
9 |
10 | jobs:
11 | build:
12 | uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
13 | with:
14 | commit_sha: ${{ github.sha }}
15 | package: diffusers
16 | secrets:
17 | token: ${{ secrets.HUGGINGFACE_PUSH }}
18 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/workflows/build_pr_documentation.yml:
--------------------------------------------------------------------------------
1 | name: Build PR Documentation
2 |
3 | on:
4 | pull_request:
5 |
6 | concurrency:
7 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
8 | cancel-in-progress: true
9 |
10 | jobs:
11 | build:
12 | uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
13 | with:
14 | commit_sha: ${{ github.event.pull_request.head.sha }}
15 | pr_number: ${{ github.event.number }}
16 | package: diffusers
17 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/workflows/delete_doc_comment.yml:
--------------------------------------------------------------------------------
1 | name: Delete dev documentation
2 |
3 | on:
4 | pull_request:
5 | types: [ closed ]
6 |
7 |
8 | jobs:
9 | delete:
10 | uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main
11 | with:
12 | pr_number: ${{ github.event.number }}
13 | package: diffusers
14 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/workflows/pr_quality.yml:
--------------------------------------------------------------------------------
1 | name: Run code quality checks
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - main
7 | push:
8 | branches:
9 | - main
10 |
11 | concurrency:
12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
13 | cancel-in-progress: true
14 |
15 | jobs:
16 | check_code_quality:
17 | runs-on: ubuntu-latest
18 | steps:
19 | - uses: actions/checkout@v3
20 | - name: Set up Python
21 | uses: actions/setup-python@v4
22 | with:
23 | python-version: "3.7"
24 | - name: Install dependencies
25 | run: |
26 | python -m pip install --upgrade pip
27 | pip install .[quality]
28 | - name: Check quality
29 | run: |
30 | black --check --preview examples tests src utils scripts
31 | isort --check-only examples tests src utils scripts
32 | flake8 examples tests src utils scripts
33 | doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
34 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/workflows/pr_tests.yml:
--------------------------------------------------------------------------------
1 | name: Run non-slow tests
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - main
7 |
8 | concurrency:
9 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
10 | cancel-in-progress: true
11 |
12 | env:
13 | HF_HOME: /mnt/cache
14 | OMP_NUM_THREADS: 8
15 | MKL_NUM_THREADS: 8
16 | PYTEST_TIMEOUT: 60
17 |
18 | jobs:
19 | run_tests_cpu:
20 | name: Diffusers tests
21 | runs-on: [ self-hosted, docker-gpu ]
22 | container:
23 | image: python:3.7
24 | options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
25 |
26 | steps:
27 | - name: Checkout diffusers
28 | uses: actions/checkout@v3
29 | with:
30 | fetch-depth: 2
31 |
32 | - name: Install dependencies
33 | run: |
34 | python -m pip install --upgrade pip
35 | python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
36 | python -m pip install -e .[quality,test]
37 |
38 | - name: Environment
39 | run: |
40 | python utils/print_env.py
41 |
42 | - name: Run all non-slow selected tests on CPU
43 | run: |
44 | python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile -s tests/
45 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/workflows/push_tests.yml:
--------------------------------------------------------------------------------
1 | name: Run all tests
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | env:
9 | HF_HOME: /mnt/cache
10 | OMP_NUM_THREADS: 8
11 | MKL_NUM_THREADS: 8
12 | PYTEST_TIMEOUT: 1000
13 | RUN_SLOW: yes
14 |
15 | jobs:
16 | run_tests_single_gpu:
17 | name: Diffusers tests
18 | strategy:
19 | fail-fast: false
20 | matrix:
21 | machine_type: [ single-gpu ]
22 | runs-on: [ self-hosted, docker-gpu, '${{ matrix.machine_type }}' ]
23 | container:
24 | image: nvcr.io/nvidia/pytorch:22.07-py3
25 | options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
26 |
27 | steps:
28 | - name: Checkout diffusers
29 | uses: actions/checkout@v3
30 | with:
31 | fetch-depth: 2
32 |
33 | - name: NVIDIA-SMI
34 | run: |
35 | nvidia-smi
36 |
37 | - name: Install dependencies
38 | run: |
39 | python -m pip install --upgrade pip
40 | python -m pip uninstall -y torch torchvision torchtext
41 | python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu116
42 | python -m pip install -e .[quality,test]
43 |
44 | - name: Environment
45 | run: |
46 | python utils/print_env.py
47 |
48 | - name: Run all (incl. slow) tests on GPU
49 | env:
50 | HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
51 | run: |
52 | python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s tests/
53 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | name: Stale Bot
2 |
3 | on:
4 | schedule:
5 | - cron: "0 15 * * *"
6 |
7 | jobs:
8 | close_stale_issues:
9 | name: Close Stale Issues
10 | if: github.repository == 'huggingface/diffusers'
11 | runs-on: ubuntu-latest
12 | env:
13 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
14 | steps:
15 | - uses: actions/checkout@v2
16 |
17 | - name: Setup Python
18 | uses: actions/setup-python@v1
19 | with:
20 | python-version: 3.7
21 |
22 | - name: Install requirements
23 | run: |
24 | pip install PyGithub
25 | - name: Close stale issues
26 | run: |
27 | python utils/stale.py
28 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.github/workflows/typos.yml:
--------------------------------------------------------------------------------
1 | name: Check typos
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | build:
8 | runs-on: ubuntu-latest
9 |
10 | steps:
11 | - uses: actions/checkout@v3
12 |
13 | - name: typos-action
14 | uses: crate-ci/typos@v1.12.4
15 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/.gitignore:
--------------------------------------------------------------------------------
1 | # Initially taken from Github's Python gitignore file
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # tests and logs
12 | tests/fixtures/cached_*_text.txt
13 | logs/
14 | lightning_logs/
15 | lang_code_data/
16 |
17 | # Distribution / packaging
18 | .Python
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | wheels/
31 | *.egg-info/
32 | .installed.cfg
33 | *.egg
34 | MANIFEST
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .nox/
50 | .coverage
51 | .coverage.*
52 | .cache
53 | nosetests.xml
54 | coverage.xml
55 | *.cover
56 | .hypothesis/
57 | .pytest_cache/
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Django stuff:
64 | *.log
65 | local_settings.py
66 | db.sqlite3
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | .python-version
90 |
91 | # celery beat schedule file
92 | celerybeat-schedule
93 |
94 | # SageMath parsed files
95 | *.sage.py
96 |
97 | # Environments
98 | .env
99 | .venv
100 | env/
101 | venv/
102 | ENV/
103 | env.bak/
104 | venv.bak/
105 |
106 | # Spyder project settings
107 | .spyderproject
108 | .spyproject
109 |
110 | # Rope project settings
111 | .ropeproject
112 |
113 | # mkdocs documentation
114 | /site
115 |
116 | # mypy
117 | .mypy_cache/
118 | .dmypy.json
119 | dmypy.json
120 |
121 | # Pyre type checker
122 | .pyre/
123 |
124 | # vscode
125 | .vs
126 | .vscode
127 |
128 | # Pycharm
129 | .idea
130 |
131 | # TF code
132 | tensorflow_code
133 |
134 | # Models
135 | proc_data
136 |
137 | # examples
138 | runs
139 | /runs_old
140 | /wandb
141 | /examples/runs
142 | /examples/**/*.args
143 | /examples/rag/sweep
144 |
145 | # data
146 | /data
147 | serialization_dir
148 |
149 | # emacs
150 | *.*~
151 | debug.env
152 |
153 | # vim
154 | .*.swp
155 |
156 | #ctags
157 | tags
158 |
159 | # pre-commit
160 | .pre-commit*
161 |
162 | # .lock
163 | *.lock
164 |
165 | # DS_Store (MacOS)
166 | .DS_Store
--------------------------------------------------------------------------------
/FlashAttention/diffusers/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include src/diffusers/utils/model_card_template.md
2 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples
2 |
3 | # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
4 | export PYTHONPATH = src
5 |
6 | check_dirs := examples scripts src tests utils
7 |
8 | modified_only_fixup:
9 | $(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
10 | @if test -n "$(modified_py_files)"; then \
11 | echo "Checking/fixing $(modified_py_files)"; \
12 | black --preview $(modified_py_files); \
13 | isort $(modified_py_files); \
14 | flake8 $(modified_py_files); \
15 | else \
16 | echo "No library .py files were modified"; \
17 | fi
18 |
19 | # Update src/diffusers/dependency_versions_table.py
20 |
21 | deps_table_update:
22 | @python setup.py deps_table_update
23 |
24 | deps_table_check_updated:
25 | @md5sum src/diffusers/dependency_versions_table.py > md5sum.saved
26 | @python setup.py deps_table_update
27 | @md5sum -c --quiet md5sum.saved || (printf "\nError: the version dependency table is outdated.\nPlease run 'make fixup' or 'make style' and commit the changes.\n\n" && exit 1)
28 | @rm md5sum.saved
29 |
30 | # autogenerating code
31 |
32 | autogenerate_code: deps_table_update
33 |
34 | # Check that the repo is in a good state
35 |
36 | repo-consistency:
37 | python utils/check_dummies.py
38 | python utils/check_repo.py
39 | python utils/check_inits.py
40 |
41 | # this target runs checks on all files
42 |
43 | quality:
44 | black --check --preview $(check_dirs)
45 | isort --check-only $(check_dirs)
46 | flake8 $(check_dirs)
47 | doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
48 |
49 | # Format source code automatically and check is there are any problems left that need manual fixing
50 |
51 | extra_style_checks:
52 | python utils/custom_init_isort.py
53 | doc-builder style src/diffusers docs/source --max_len 119 --path_to_docs docs/source
54 |
55 | # this target runs checks on all files and potentially modifies some of them
56 |
57 | style:
58 | black --preview $(check_dirs)
59 | isort $(check_dirs)
60 | ${MAKE} autogenerate_code
61 | ${MAKE} extra_style_checks
62 |
63 | # Super fast fix and check target that only works on relevant modified files since the branch was made
64 |
65 | fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
66 |
67 | # Make marked copies of snippets of codes conform to the original
68 |
69 | fix-copies:
70 | python utils/check_dummies.py --fix_and_overwrite
71 |
72 | # Run tests for the library
73 |
74 | test:
75 | python -m pytest -n auto --dist=loadfile -s -v ./tests/
76 |
77 | # Run tests for examples
78 |
79 | test-examples:
80 | python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/
81 |
82 |
83 | # Release stuff
84 |
85 | pre-release:
86 | python utils/release.py
87 |
88 | pre-patch:
89 | python utils/release.py --patch
90 |
91 | post-release:
92 | python utils/release.py --post_release
93 |
94 | post-patch:
95 | python utils/release.py --post_release --patch
96 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/_typos.toml:
--------------------------------------------------------------------------------
1 | # Files for typos
2 | # Instruction: https://github.com/marketplace/actions/typos-action#getting-started
3 |
4 | [default.extend-identifiers]
5 |
6 | [default.extend-words]
7 | NIN_="NIN" # NIN is used in scripts/convert_ncsnpp_original_checkpoint_to_diffusers.py
8 | nd="np" # nd may be np (numpy)
9 |
10 |
11 | [files]
12 | extend-exclude = ["_typos.toml"]
13 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/_toctree.yml:
--------------------------------------------------------------------------------
1 | - sections:
2 | - local: index
3 | title: "🧨 Diffusers"
4 | - local: quicktour
5 | title: "Quicktour"
6 | - local: installation
7 | title: "Installation"
8 | title: "Get started"
9 | - sections:
10 | - sections:
11 | - local: using-diffusers/loading
12 | title: "Loading Pipelines, Models, and Schedulers"
13 | - local: using-diffusers/configuration
14 | title: "Configuring Pipelines, Models, and Schedulers"
15 | title: "Loading"
16 | - sections:
17 | - local: using-diffusers/unconditional_image_generation
18 | title: "Unconditional Image Generation"
19 | - local: using-diffusers/conditional_image_generation
20 | title: "Text-to-Image Generation"
21 | - local: using-diffusers/img2img
22 | title: "Text-Guided Image-to-Image"
23 | - local: using-diffusers/inpaint
24 | title: "Text-Guided Image-Inpainting"
25 | - local: using-diffusers/custom
26 | title: "Create a custom pipeline"
27 | title: "Pipelines for Inference"
28 | title: "Using Diffusers"
29 | - sections:
30 | - local: optimization/fp16
31 | title: "Memory and Speed"
32 | - local: optimization/onnx
33 | title: "ONNX"
34 | - local: optimization/open_vino
35 | title: "Open Vino"
36 | - local: optimization/mps
37 | title: "MPS"
38 | title: "Optimization/Special Hardware"
39 | - sections:
40 | - local: training/overview
41 | title: "Overview"
42 | - local: training/unconditional_training
43 | title: "Unconditional Image Generation"
44 | - local: training/text_inversion
45 | title: "Text Inversion"
46 | - local: training/text2image
47 | title: "Text-to-image"
48 | title: "Training"
49 | - sections:
50 | - local: conceptual/stable_diffusion
51 | title: "Stable Diffusion"
52 | - local: conceptual/philosophy
53 | title: "Philosophy"
54 | - local: conceptual/contribution
55 | title: "How to contribute?"
56 | title: "Conceptual Guides"
57 | - sections:
58 | - sections:
59 | - local: api/models
60 | title: "Models"
61 | - local: api/schedulers
62 | title: "Schedulers"
63 | - local: api/diffusion_pipeline
64 | title: "Diffusion Pipeline"
65 | - local: api/logging
66 | title: "Logging"
67 | - local: api/configuration
68 | title: "Configuration"
69 | - local: api/outputs
70 | title: "Outputs"
71 | title: "Main Classes"
72 | - sections:
73 | - local: api/pipelines/overview
74 | title: "Overview"
75 | - local: api/pipelines/ddim
76 | title: "DDIM"
77 | - local: api/pipelines/ddpm
78 | title: "DDPM"
79 | - local: api/pipelines/latent_diffusion
80 | title: "Latent Diffusion"
81 | - local: api/pipelines/latent_diffusion_uncond
82 | title: "Unconditional Latent Diffusion"
83 | - local: api/pipelines/pndm
84 | title: "PNDM"
85 | - local: api/pipelines/score_sde_ve
86 | title: "Score SDE VE"
87 | - local: api/pipelines/stable_diffusion
88 | title: "Stable Diffusion"
89 | - local: api/pipelines/stochastic_karras_ve
90 | title: "Stochastic Karras VE"
91 | title: "Pipelines"
92 | title: "API"
93 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/configuration.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Configuration
14 |
15 | In Diffusers, schedulers of type [`schedulers.scheduling_utils.SchedulerMixin`], and models of type [`ModelMixin`] inherit from [`ConfigMixin`] which conveniently takes care of storing all parameters that are
16 | passed to the respective `__init__` methods in a JSON-configuration file.
17 |
18 | TODO(PVP) - add example and better info here
19 |
20 | ## ConfigMixin
21 | [[autodoc]] ConfigMixin
22 | - from_config
23 | - save_config
24 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/diffusion_pipeline.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Pipelines
14 |
15 | The [`DiffusionPipeline`] is the easiest way to load any pretrained diffusion pipeline from the [Hub](https://huggingface.co/models?library=diffusers) and to use it in inference.
16 |
17 |
18 |
19 | One should not use the Diffusion Pipeline class for training or fine-tuning a diffusion model. Individual
20 | components of diffusion pipelines are usually trained individually, so we suggest to directly work
21 | with [`UNetModel`] and [`UNetConditionModel`].
22 |
23 |
24 |
25 | Any diffusion pipeline that is loaded with [`~DiffusionPipeline.from_pretrained`] will automatically
26 | detect the pipeline type, *e.g.* [`StableDiffusionPipeline`] and consequently load each component of the
27 | pipeline and pass them into the `__init__` function of the pipeline, *e.g.* [`~StableDiffusionPipeline.__init__`].
28 |
29 | Any pipeline object can be saved locally with [`~DiffusionPipeline.save_pretrained`].
30 |
31 | ## DiffusionPipeline
32 | [[autodoc]] DiffusionPipeline
33 | - from_pretrained
34 | - save_pretrained
35 |
36 | ## ImagePipelineOutput
37 | By default diffusion pipelines return an object of class
38 |
39 | [[autodoc]] pipeline_utils.ImagePipelineOutput
40 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/logging.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Logging
14 |
15 | 🧨 Diffusers has a centralized logging system, so that you can setup the verbosity of the library easily.
16 |
17 | Currently the default verbosity of the library is `WARNING`.
18 |
19 | To change the level of verbosity, just use one of the direct setters. For instance, here is how to change the verbosity
20 | to the INFO level.
21 |
22 | ```python
23 | import diffusers
24 |
25 | diffusers.logging.set_verbosity_info()
26 | ```
27 |
28 | You can also use the environment variable `DIFFUSERS_VERBOSITY` to override the default verbosity. You can set it
29 | to one of the following: `debug`, `info`, `warning`, `error`, `critical`. For example:
30 |
31 | ```bash
32 | DIFFUSERS_VERBOSITY=error ./myprogram.py
33 | ```
34 |
35 | Additionally, some `warnings` can be disabled by setting the environment variable
36 | `DIFFUSERS_NO_ADVISORY_WARNINGS` to a true value, like *1*. This will disable any warning that is logged using
37 | [`logger.warning_advice`]. For example:
38 |
39 | ```bash
40 | DIFFUSERS_NO_ADVISORY_WARNINGS=1 ./myprogram.py
41 | ```
42 |
43 | Here is an example of how to use the same logger as the library in your own module or script:
44 |
45 | ```python
46 | from diffusers.utils import logging
47 |
48 | logging.set_verbosity_info()
49 | logger = logging.get_logger("diffusers")
50 | logger.info("INFO")
51 | logger.warning("WARN")
52 | ```
53 |
54 |
55 | All the methods of this logging module are documented below, the main ones are
56 | [`logging.get_verbosity`] to get the current level of verbosity in the logger and
57 | [`logging.set_verbosity`] to set the verbosity to the level of your choice. In order (from the least
58 | verbose to the most verbose), those levels (with their corresponding int values in parenthesis) are:
59 |
60 | - `diffusers.logging.CRITICAL` or `diffusers.logging.FATAL` (int value, 50): only report the most
61 | critical errors.
62 | - `diffusers.logging.ERROR` (int value, 40): only report errors.
63 | - `diffusers.logging.WARNING` or `diffusers.logging.WARN` (int value, 30): only reports error and
64 | warnings. This the default level used by the library.
65 | - `diffusers.logging.INFO` (int value, 20): reports error, warnings and basic information.
66 | - `diffusers.logging.DEBUG` (int value, 10): report all information.
67 |
68 | By default, `tqdm` progress bars will be displayed during model download. [`logging.disable_progress_bar`] and [`logging.enable_progress_bar`] can be used to suppress or unsuppress this behavior.
69 |
70 | ## Base setters
71 |
72 | [[autodoc]] logging.set_verbosity_error
73 |
74 | [[autodoc]] logging.set_verbosity_warning
75 |
76 | [[autodoc]] logging.set_verbosity_info
77 |
78 | [[autodoc]] logging.set_verbosity_debug
79 |
80 | ## Other functions
81 |
82 | [[autodoc]] logging.get_verbosity
83 |
84 | [[autodoc]] logging.set_verbosity
85 |
86 | [[autodoc]] logging.get_logger
87 |
88 | [[autodoc]] logging.enable_default_handler
89 |
90 | [[autodoc]] logging.disable_default_handler
91 |
92 | [[autodoc]] logging.enable_explicit_format
93 |
94 | [[autodoc]] logging.reset_format
95 |
96 | [[autodoc]] logging.enable_progress_bar
97 |
98 | [[autodoc]] logging.disable_progress_bar
99 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/models.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Models
14 |
15 | Diffusers contains pretrained models for popular algorithms and modules for creating the next set of diffusion models.
16 | The primary function of these models is to denoise an input sample, by modeling the distribution $p_\theta(\mathbf{x}_{t-1}|\mathbf{x}_t)$.
17 | The models are built on the base class ['ModelMixin'] that is a `torch.nn.module` with basic functionality for saving and loading models both locally and from the HuggingFace hub.
18 |
19 | ## ModelMixin
20 | [[autodoc]] ModelMixin
21 |
22 | ## UNet2DOutput
23 | [[autodoc]] models.unet_2d.UNet2DOutput
24 |
25 | ## UNet2DModel
26 | [[autodoc]] UNet2DModel
27 |
28 | ## UNet2DConditionOutput
29 | [[autodoc]] models.unet_2d_condition.UNet2DConditionOutput
30 |
31 | ## UNet2DConditionModel
32 | [[autodoc]] UNet2DConditionModel
33 |
34 | ## DecoderOutput
35 | [[autodoc]] models.vae.DecoderOutput
36 |
37 | ## VQEncoderOutput
38 | [[autodoc]] models.vae.VQEncoderOutput
39 |
40 | ## VQModel
41 | [[autodoc]] VQModel
42 |
43 | ## AutoencoderKLOutput
44 | [[autodoc]] models.vae.AutoencoderKLOutput
45 |
46 | ## AutoencoderKL
47 | [[autodoc]] AutoencoderKL
48 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/outputs.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # BaseOutputs
14 |
15 | All models have outputs that are instances of subclasses of [`~utils.BaseOutput`]. Those are
16 | data structures containing all the information returned by the model, but that can also be used as tuples or
17 | dictionaries.
18 |
19 | Let's see how this looks in an example:
20 |
21 | ```python
22 | from diffusers import DDIMPipeline
23 |
24 | pipeline = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32")
25 | outputs = pipeline()
26 | ```
27 |
28 | The `outputs` object is a [`~pipeline_utils.ImagePipelineOutput`], as we can see in the
29 | documentation of that class below, it means it has an image attribute.
30 |
31 | You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you will get `None`:
32 |
33 | ```python
34 | outputs.images
35 | ```
36 |
37 | or via keyword lookup
38 |
39 | ```python
40 | outputs["images"]
41 | ```
42 |
43 | When considering our `outputs` object as tuple, it only considers the attributes that don't have `None` values.
44 | Here for instance, we could retrieve images via indexing:
45 |
46 | ```python
47 | outputs[:1]
48 | ```
49 |
50 | which will return the tuple `(outputs.images)` for instance.
51 |
52 | ## BaseOutput
53 |
54 | [[autodoc]] utils.BaseOutput
55 | - to_tuple
56 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/pipelines/ddim.mdx:
--------------------------------------------------------------------------------
1 | # DDIM
2 |
3 | ## Overview
4 |
5 | [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) (DDIM) by Jiaming Song, Chenlin Meng and Stefano Ermon.
6 |
7 | The abstract of the paper is the following:
8 |
9 | Denoising diffusion probabilistic models (DDPMs) have achieved high quality image generation without adversarial training, yet they require simulating a Markov chain for many steps to produce a sample. To accelerate sampling, we present denoising diffusion implicit models (DDIMs), a more efficient class of iterative implicit probabilistic models with the same training procedure as DDPMs. In DDPMs, the generative process is defined as the reverse of a Markovian diffusion process. We construct a class of non-Markovian diffusion processes that lead to the same training objective, but whose reverse process can be much faster to sample from. We empirically demonstrate that DDIMs can produce high quality samples 10× to 50× faster in terms of wall-clock time compared to DDPMs, allow us to trade off computation for sample quality, and can perform semantically meaningful image interpolation directly in the latent space.
10 |
11 | The original codebase of this paper can be found [here](https://github.com/ermongroup/ddim).
12 |
13 | ## Available Pipelines:
14 |
15 | | Pipeline | Tasks | Colab
16 | |---|---|:---:|
17 | | [pipeline_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/ddim/pipeline_ddim.py) | *Unconditional Image Generation* | - |
18 |
19 |
20 | ## DDIMPipeline
21 | [[autodoc]] DDIMPipeline
22 | - __call__
23 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/pipelines/ddpm.mdx:
--------------------------------------------------------------------------------
1 | # DDPM
2 |
3 | ## Overview
4 |
5 | [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239)
6 | (DDPM) by Jonathan Ho, Ajay Jain and Pieter Abbeel proposes the diffusion based model of the same name, but in the context of the 🤗 Diffusers library, DDPM refers to the discrete denoising scheduler from the paper as well as the pipeline.
7 |
8 | The abstract of the paper is the following:
9 |
10 | We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional CIFAR10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art FID score of 3.17. On 256x256 LSUN, we obtain sample quality similar to ProgressiveGAN.
11 |
12 | The original codebase of this paper can be found [here](https://github.com/hojonathanho/diffusion).
13 |
14 |
15 | ## Available Pipelines:
16 |
17 | | Pipeline | Tasks | Colab
18 | |---|---|:---:|
19 | | [pipeline_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/ddpm/pipeline_ddpm.py) | *Unconditional Image Generation* | - |
20 |
21 |
22 | # DDPMPipeline
23 | [[autodoc]] DDPMPipeline
24 | - __call__
25 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/pipelines/latent_diffusion.mdx:
--------------------------------------------------------------------------------
1 | # Latent Diffusion
2 |
3 | ## Overview
4 |
5 | Latent Diffusion was proposed in [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer.
6 |
7 | The abstract of the paper is the following:
8 |
9 | *By decomposing the image formation process into a sequential application of denoising autoencoders, diffusion models (DMs) achieve state-of-the-art synthesis results on image data and beyond. Additionally, their formulation allows for a guiding mechanism to control the image generation process without retraining. However, since these models typically operate directly in pixel space, optimization of powerful DMs often consumes hundreds of GPU days and inference is expensive due to sequential evaluations. To enable DM training on limited computational resources while retaining their quality and flexibility, we apply them in the latent space of powerful pretrained autoencoders. In contrast to previous work, training diffusion models on such a representation allows for the first time to reach a near-optimal point between complexity reduction and detail preservation, greatly boosting visual fidelity. By introducing cross-attention layers into the model architecture, we turn diffusion models into powerful and flexible generators for general conditioning inputs such as text or bounding boxes and high-resolution synthesis becomes possible in a convolutional manner. Our latent diffusion models (LDMs) achieve a new state of the art for image inpainting and highly competitive performance on various tasks, including unconditional image generation, semantic scene synthesis, and super-resolution, while significantly reducing computational requirements compared to pixel-based DMs.*
10 |
11 | The original codebase can be found [here](https://github.com/CompVis/latent-diffusion).
12 |
13 | ## Tips:
14 |
15 | -
16 | -
17 | -
18 |
19 | ## Available Pipelines:
20 |
21 | | Pipeline | Tasks | Colab
22 | |---|---|:---:|
23 | | [pipeline_latent_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py) | *Text-to-Image Generation* | - |
24 |
25 | ## Examples:
26 |
27 |
28 | ## LDMTextToImagePipeline
29 | [[autodoc]] pipelines.latent_diffusion.pipeline_latent_diffusion.LDMTextToImagePipeline
30 | - __call__
31 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/pipelines/latent_diffusion_uncond.mdx:
--------------------------------------------------------------------------------
1 | # Unconditional Latent Diffusion
2 |
3 | ## Overview
4 |
5 | Unconditional Latent Diffusion was proposed in [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer.
6 |
7 | The abstract of the paper is the following:
8 |
9 | *By decomposing the image formation process into a sequential application of denoising autoencoders, diffusion models (DMs) achieve state-of-the-art synthesis results on image data and beyond. Additionally, their formulation allows for a guiding mechanism to control the image generation process without retraining. However, since these models typically operate directly in pixel space, optimization of powerful DMs often consumes hundreds of GPU days and inference is expensive due to sequential evaluations. To enable DM training on limited computational resources while retaining their quality and flexibility, we apply them in the latent space of powerful pretrained autoencoders. In contrast to previous work, training diffusion models on such a representation allows for the first time to reach a near-optimal point between complexity reduction and detail preservation, greatly boosting visual fidelity. By introducing cross-attention layers into the model architecture, we turn diffusion models into powerful and flexible generators for general conditioning inputs such as text or bounding boxes and high-resolution synthesis becomes possible in a convolutional manner. Our latent diffusion models (LDMs) achieve a new state of the art for image inpainting and highly competitive performance on various tasks, including unconditional image generation, semantic scene synthesis, and super-resolution, while significantly reducing computational requirements compared to pixel-based DMs.*
10 |
11 | The original codebase can be found [here](https://github.com/CompVis/latent-diffusion).
12 |
13 | ## Tips:
14 |
15 | -
16 | -
17 | -
18 |
19 | ## Available Pipelines:
20 |
21 | | Pipeline | Tasks | Colab
22 | |---|---|:---:|
23 | | [pipeline_latent_diffusion_uncond.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py) | *Unconditional Image Generation* | - |
24 |
25 | ## Examples:
26 |
27 | ## LDMPipeline
28 | [[autodoc]] LDMPipeline
29 | - __call__
30 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/pipelines/pndm.mdx:
--------------------------------------------------------------------------------
1 | # PNDM
2 |
3 | ## Overview
4 |
5 | [Pseudo Numerical methods for Diffusion Models on manifolds](https://arxiv.org/abs/2202.09778) (PNDM) by Luping Liu, Yi Ren, Zhijie Lin and Zhou Zhao.
6 |
7 | The abstract of the paper is the following:
8 |
9 | Denoising Diffusion Probabilistic Models (DDPMs) can generate high-quality samples such as image and audio samples. However, DDPMs require hundreds to thousands of iterations to produce final samples. Several prior works have successfully accelerated DDPMs through adjusting the variance schedule (e.g., Improved Denoising Diffusion Probabilistic Models) or the denoising equation (e.g., Denoising Diffusion Implicit Models (DDIMs)). However, these acceleration methods cannot maintain the quality of samples and even introduce new noise at a high speedup rate, which limit their practicability. To accelerate the inference process while keeping the sample quality, we provide a fresh perspective that DDPMs should be treated as solving differential equations on manifolds. Under such a perspective, we propose pseudo numerical methods for diffusion models (PNDMs). Specifically, we figure out how to solve differential equations on manifolds and show that DDIMs are simple cases of pseudo numerical methods. We change several classical numerical methods to corresponding pseudo numerical methods and find that the pseudo linear multi-step method is the best in most situations. According to our experiments, by directly using pre-trained models on Cifar10, CelebA and LSUN, PNDMs can generate higher quality synthetic images with only 50 steps compared with 1000-step DDIMs (20x speedup), significantly outperform DDIMs with 250 steps (by around 0.4 in FID) and have good generalization on different variance schedules.
10 |
11 | The original codebase can be found [here](https://github.com/luping-liu/PNDM).
12 |
13 | ## Available Pipelines:
14 |
15 | | Pipeline | Tasks | Colab
16 | |---|---|:---:|
17 | | [pipeline_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pndm/pipeline_pndm.py) | *Unconditional Image Generation* | - |
18 |
19 |
20 | ## PNDMPipeline
21 | [[autodoc]] pipelines.pndm.pipeline_pndm.PNDMPipeline
22 | - __call__
23 |
24 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/pipelines/score_sde_ve.mdx:
--------------------------------------------------------------------------------
1 | # Score SDE VE
2 |
3 | ## Overview
4 |
5 | [Score-Based Generative Modeling through Stochastic Differential Equations](https://arxiv.org/abs/2011.13456) (Score SDE) by Yang Song, Jascha Sohl-Dickstein, Diederik P. Kingma, Abhishek Kumar, Stefano Ermon and Ben Poole.
6 |
7 | The abstract of the paper is the following:
8 |
9 | Creating noise from data is easy; creating data from noise is generative modeling. We present a stochastic differential equation (SDE) that smoothly transforms a complex data distribution to a known prior distribution by slowly injecting noise, and a corresponding reverse-time SDE that transforms the prior distribution back into the data distribution by slowly removing the noise. Crucially, the reverse-time SDE depends only on the time-dependent gradient field (\aka, score) of the perturbed data distribution. By leveraging advances in score-based generative modeling, we can accurately estimate these scores with neural networks, and use numerical SDE solvers to generate samples. We show that this framework encapsulates previous approaches in score-based generative modeling and diffusion probabilistic modeling, allowing for new sampling procedures and new modeling capabilities. In particular, we introduce a predictor-corrector framework to correct errors in the evolution of the discretized reverse-time SDE. We also derive an equivalent neural ODE that samples from the same distribution as the SDE, but additionally enables exact likelihood computation, and improved sampling efficiency. In addition, we provide a new way to solve inverse problems with score-based models, as demonstrated with experiments on class-conditional generation, image inpainting, and colorization. Combined with multiple architectural improvements, we achieve record-breaking performance for unconditional image generation on CIFAR-10 with an Inception score of 9.89 and FID of 2.20, a competitive likelihood of 2.99 bits/dim, and demonstrate high fidelity generation of 1024 x 1024 images for the first time from a score-based generative model.
10 |
11 | The original codebase can be found [here](https://github.com/yang-song/score_sde_pytorch).
12 |
13 | This pipeline implements the Variance Expanding (VE) variant of the method.
14 |
15 | ## Available Pipelines:
16 |
17 | | Pipeline | Tasks | Colab
18 | |---|---|:---:|
19 | | [pipeline_score_sde_ve.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py) | *Unconditional Image Generation* | - |
20 |
21 | ## ScoreSdeVePipeline
22 | [[autodoc]] ScoreSdeVePipeline
23 | - __call__
24 |
25 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/pipelines/stable_diffusion.mdx:
--------------------------------------------------------------------------------
1 | # Stable diffusion pipelines
2 |
3 | Stable Diffusion is a text-to-image _latent diffusion_ model created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/) and [LAION](https://laion.ai/). It's trained on 512x512 images from a subset of the [LAION-5B](https://laion.ai/blog/laion-5b/) dataset. This model uses a frozen CLIP ViT-L/14 text encoder to condition the model on text prompts. With its 860M UNet and 123M text encoder, the model is relatively lightweight and can run on consumer GPUs.
4 |
5 | Latent diffusion is the research on top of which Stable Diffusion was built. It was proposed in [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer. You can learn more details about it in the [specific pipeline for latent diffusion](pipelines/latent_diffusion) that is part of 🤗 Diffusers.
6 |
7 | For more details about how Stable Diffusion works and how it differs from the base latent diffusion model, please refer to the official [launch announcement post](https://stability.ai/blog/stable-diffusion-announcement) and [this section of our own blog post](https://huggingface.co/blog/stable_diffusion#how-does-stable-diffusion-work).
8 |
9 | *Tips*:
10 | - To tweak your prompts on a specific result you liked, you can generate your own latents, as demonstrated in the following notebook: [](https://colab.research.google.com/github/pcuenca/diffusers-examples/blob/main/notebooks/stable-diffusion-seeds.ipynb)
11 |
12 | *Overview*:
13 |
14 | | Pipeline | Tasks | Colab | Demo
15 | |---|---|:---:|:---:|
16 | | [pipeline_stable_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py) | *Text-to-Image Generation* | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb) | [🤗 Stable Diffusion](https://huggingface.co/spaces/stabilityai/stable-diffusion)
17 | | [pipeline_stable_diffusion_img2img.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py) | *Image-to-Image Text-Guided Generation* | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) | [🤗 Diffuse the Rest](https://huggingface.co/spaces/huggingface/diffuse-the-rest)
18 | | [pipeline_stable_diffusion_inpaint.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py) | **Experimental** – *Text-Guided Image Inpainting* | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb) | Coming soon
19 |
20 | ## StableDiffusionPipelineOutput
21 | [[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
22 |
23 | ## StableDiffusionPipeline
24 | [[autodoc]] StableDiffusionPipeline
25 | - __call__
26 | - enable_attention_slicing
27 | - disable_attention_slicing
28 |
29 | ## StableDiffusionImg2ImgPipeline
30 | [[autodoc]] StableDiffusionImg2ImgPipeline
31 | - __call__
32 | - enable_attention_slicing
33 | - disable_attention_slicing
34 |
35 | ## StableDiffusionInpaintPipeline
36 | [[autodoc]] StableDiffusionInpaintPipeline
37 | - __call__
38 | - enable_attention_slicing
39 | - disable_attention_slicing
40 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/api/pipelines/stochastic_karras_ve.mdx:
--------------------------------------------------------------------------------
1 | # Stochastic Karras VE
2 |
3 | ## Overview
4 |
5 | [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364) by Tero Karras, Miika Aittala, Timo Aila and Samuli Laine.
6 |
7 | The abstract of the paper is the following:
8 |
9 | We argue that the theory and practice of diffusion-based generative models are currently unnecessarily convoluted and seek to remedy the situation by presenting a design space that clearly separates the concrete design choices. This lets us identify several changes to both the sampling and training processes, as well as preconditioning of the score networks. Together, our improvements yield new state-of-the-art FID of 1.79 for CIFAR-10 in a class-conditional setting and 1.97 in an unconditional setting, with much faster sampling (35 network evaluations per image) than prior designs. To further demonstrate their modular nature, we show that our design changes dramatically improve both the efficiency and quality obtainable with pre-trained score networks from previous work, including improving the FID of an existing ImageNet-64 model from 2.07 to near-SOTA 1.55.
10 |
11 | This pipeline implements the Stochastic sampling tailored to the Variance-Expanding (VE) models.
12 |
13 |
14 | ## Available Pipelines:
15 |
16 | | Pipeline | Tasks | Colab
17 | |---|---|:---:|
18 | | [pipeline_stochastic_karras_ve.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py) | *Unconditional Image Generation* | - |
19 |
20 |
21 | ## KarrasVePipeline
22 | [[autodoc]] KarrasVePipeline
23 | - __call__
24 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/conceptual/philosophy.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Philosophy
14 |
15 | - Readability and clarity are preferred over highly optimized code. A strong importance is put on providing readable, intuitive and elementary code design. *E.g.*, the provided [schedulers](https://github.com/huggingface/diffusers/tree/main/src/diffusers/schedulers) are separated from the provided [models](https://github.com/huggingface/diffusers/tree/main/src/diffusers/models) and use well-commented code that can be read alongside the original paper.
16 | - Diffusers is **modality independent** and focuses on providing pretrained models and tools to build systems that generate **continuous outputs**, *e.g.* vision and audio. This is one of the guiding goals even if the initial pipelines are devoted to vision tasks.
17 | - Diffusion models and schedulers are provided as concise, elementary building blocks. In contrast, diffusion pipelines are a collection of end-to-end diffusion systems that can be used out-of-the-box, should stay as close as possible to their original implementations and can include components of other libraries, such as text encoders. Examples of diffusion pipelines are [Glide](https://github.com/openai/glide-text2im), [Latent Diffusion](https://github.com/CompVis/latent-diffusion) and [Stable Diffusion](https://github.com/compvis/stable-diffusion).
18 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/conceptual/stable_diffusion.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Stable Diffusion
14 |
15 | Under construction 🚧
16 |
17 | For now please visit this [very in-detail blog post](https://huggingface.co/blog/stable_diffusion)
18 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/imgs/diffusers_library.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/FlashAttention/diffusers/docs/source/imgs/diffusers_library.jpg
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/installation.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Installation
14 |
15 | Install Diffusers for with PyTorch. Support for other libraries will come in the future
16 |
17 | 🤗 Diffusers is tested on Python 3.7+, and PyTorch 1.7.0+.
18 |
19 | ## Install with pip
20 |
21 | You should install 🤗 Diffusers in a [virtual environment](https://docs.python.org/3/library/venv.html).
22 | If you're unfamiliar with Python virtual environments, take a look at this [guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/).
23 | A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
24 |
25 | Start by creating a virtual environment in your project directory:
26 |
27 | ```bash
28 | python -m venv .env
29 | ```
30 |
31 | Activate the virtual environment:
32 |
33 | ```bash
34 | source .env/bin/activate
35 | ```
36 |
37 | Now you're ready to install 🤗 Diffusers with the following command:
38 |
39 | ```bash
40 | pip install diffusers
41 | ```
42 |
43 | ## Install from source
44 |
45 | Install 🤗 Diffusers from source with the following command:
46 |
47 | ```bash
48 | pip install git+https://github.com/huggingface/diffusers
49 | ```
50 |
51 | This command installs the bleeding edge `main` version rather than the latest `stable` version.
52 | The `main` version is useful for staying up-to-date with the latest developments.
53 | For instance, if a bug has been fixed since the last official release but a new release hasn't been rolled out yet.
54 | However, this means the `main` version may not always be stable.
55 | We strive to keep the `main` version operational, and most issues are usually resolved within a few hours or a day.
56 | If you run into a problem, please open an [Issue](https://github.com/huggingface/transformers/issues) so we can fix it even sooner!
57 |
58 | ## Editable install
59 |
60 | You will need an editable install if you'd like to:
61 |
62 | * Use the `main` version of the source code.
63 | * Contribute to 🤗 Diffusers and need to test changes in the code.
64 |
65 | Clone the repository and install 🤗 Diffusers with the following commands:
66 |
67 | ```bash
68 | git clone https://github.com/huggingface/diffusers.git
69 | cd diffusers
70 | pip install -e .
71 | ```
72 |
73 | These commands will link the folder you cloned the repository to and your Python library paths.
74 | Python will now look inside the folder you cloned to in addition to the normal library paths.
75 | For example, if your Python packages are typically installed in `~/anaconda3/envs/main/lib/python3.7/site-packages/`, Python will also search the folder you cloned to: `~/diffusers/`.
76 |
77 |
78 |
79 | You must keep the `diffusers` folder if you want to keep using the library.
80 |
81 |
82 |
83 | Now you can easily update your clone to the latest version of 🤗 Diffusers with the following command:
84 |
85 | ```bash
86 | cd ~/diffusers/
87 | git pull
88 | ```
89 |
90 | Your Python environment will find the `main` version of 🤗 Diffusers on the next run.
91 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/optimization/fp16.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Memory and speed
14 |
15 | We present some techniques and ideas to optimize 🤗 Diffusers _inference_ for memory or speed.
16 |
17 | ## CUDA `autocast`
18 |
19 | If you use a CUDA GPU, you can take advantage of `torch.autocast` to perform inference roughly twice as fast at the cost of slightly lower precision. All you need to do is put your inference call inside an `autocast` context manager. The following example shows how to do it using Stable Diffusion text-to-image generation as an example:
20 |
21 | ```Python
22 | from torch import autocast
23 | from diffusers import StableDiffusionPipeline
24 |
25 | pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=True)
26 | pipe = pipe.to("cuda")
27 |
28 | prompt = "a photo of an astronaut riding a horse on mars"
29 | with autocast("cuda"):
30 | image = pipe(prompt).images[0]
31 | ```
32 |
33 | Despite the precision loss, in our experience the final image results look the same as the `float32` versions. Feel free to experiment and report back!
34 |
35 | ## Half precision weights
36 |
37 | To save more GPU memory, you can load the model weights directly in half precision. This involves loading the float16 version of the weights, which was saved to a branch named `fp16`, and telling PyTorch to use the `float16` type when loading them:
38 |
39 | ```Python
40 | pipe = StableDiffusionPipeline.from_pretrained(
41 | "CompVis/stable-diffusion-v1-4",
42 | revision="fp16",
43 | torch_dtype=torch.float16,
44 | use_auth_token=True
45 | )
46 | ```
47 |
48 | ## Sliced attention for additional memory savings
49 |
50 | For even additional memory savings, you can use a sliced version of attention that performs the computation in steps instead of all at once.
51 |
52 |
53 | Attention slicing is useful even if a batch size of just 1 is used - as long as the model uses more than one attention head. If there is more than one attention head the *QK^T* attention matrix can be computed sequentially for each head which can save a significant amount of memory.
54 |
55 |
56 | To perform the attention computation sequentially over each head, you only need to invoke [`~StableDiffusionPipeline.enable_attention_slicing`] in your pipeline before inference, like here:
57 |
58 | ```Python
59 | import torch
60 | from diffusers import StableDiffusionPipeline
61 |
62 | pipe = StableDiffusionPipeline.from_pretrained(
63 | "CompVis/stable-diffusion-v1-4",
64 | revision="fp16",
65 | torch_dtype=torch.float16,
66 | use_auth_token=True
67 | )
68 | pipe = pipe.to("cuda")
69 |
70 | prompt = "a photo of an astronaut riding a horse on mars"
71 | pipe.enable_attention_slicing()
72 | with torch.autocast("cuda"):
73 | image = pipe(prompt).images[0]
74 | ```
75 |
76 | There's a small performance penalty of about 10% slower inference times, but this method allows you to use Stable Diffusion in as little as 3.2 GB of VRAM!
77 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/optimization/mps.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # How to use Stable Diffusion in Apple Silicon (M1/M2)
14 |
15 | 🤗 Diffusers is compatible with Apple silicon for Stable Diffusion inference, using the PyTorch `mps` device. These are the steps you need to follow to use your M1 or M2 computer with Stable Diffusion.
16 |
17 | ## Requirements
18 |
19 | - Mac computer with Apple silicon (M1/M2) hardware.
20 | - macOS 12.3 or later.
21 | - arm64 version of Python.
22 | - PyTorch [Preview (Nightly)](https://pytorch.org/get-started/locally/), version `1.13.0.dev20220830` or later.
23 |
24 | ## Inference Pipeline
25 |
26 | The snippet below demonstrates how to use the `mps` backend using the familiar `to()` interface to move the Stable Diffusion pipeline to your M1 or M2 device.
27 |
28 | We recommend to "prime" the pipeline using an additional one-time pass through it. This is a temporary workaround for a weird issue we have detected: the first inference pass produces slightly different results than subsequent ones. You only need to do this pass once, and it's ok to use just one inference step and discard the result.
29 |
30 | ```python
31 | # make sure you're logged in with `huggingface-cli login`
32 | from diffusers import StableDiffusionPipeline
33 |
34 | pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=True)
35 | pipe = pipe.to("mps")
36 |
37 | prompt = "a photo of an astronaut riding a horse on mars"
38 |
39 | # First-time "warmup" pass (see explanation above)
40 | _ = pipe(prompt, num_inference_steps=1)
41 |
42 | # Results match those from the CPU device after the warmup pass.
43 | image = pipe(prompt).images[0]
44 | ```
45 |
46 | ## Known Issues
47 |
48 | - As mentioned above, we are investigating a strange [first-time inference issue](https://github.com/huggingface/diffusers/issues/372).
49 | - Generating multiple prompts in a batch [crashes or doesn't work reliably](https://github.com/huggingface/diffusers/issues/363). We believe this might be related to the [`mps` backend in PyTorch](https://github.com/pytorch/pytorch/issues/84039#issuecomment-1237735249), but we need to investigate in more depth. For now, we recommend to iterate instead of batching.
50 |
51 | ## Performance
52 |
53 | These are the results we got on a M1 Max MacBook Pro with 64 GB of RAM, running macOS Ventura Version 13.0 Beta (22A5331f). We performed Stable Diffusion text-to-image generation of the same prompt for 50 inference steps, using a guidance scale of 7.5.
54 |
55 | | Device | Steps | Time |
56 | |--------|-------|---------|
57 | | CPU | 50 | 213.46s |
58 | | MPS | 50 | 30.81s |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/optimization/onnx.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 |
14 | # How to use the ONNX Runtime for inference
15 |
16 | 🤗 Diffusers provides a Stable Diffusion pipeline compatible with the ONNX Runtime. This allows you to run Stable Diffusion on any hardware that supports ONNX (including CPUs), and where an accelerated version of PyTorch is not available.
17 |
18 | ## Installation
19 |
20 | - TODO
21 |
22 | ## Stable Diffusion Inference
23 |
24 | The snippet below demonstrates how to use the ONNX runtime. You need to use `StableDiffusionOnnxPipeline` instead of `StableDiffusionPipeline`. You also need to download the weights from the `onnx` branch of the repository, and indicate the runtime provider you want to use.
25 |
26 | ```python
27 | # make sure you're logged in with `huggingface-cli login`
28 | from diffusers import StableDiffusionOnnxPipeline
29 |
30 | pipe = StableDiffusionOnnxPipeline.from_pretrained(
31 | "CompVis/stable-diffusion-v1-4",
32 | revision="onnx",
33 | provider="CUDAExecutionProvider",
34 | use_auth_token=True,
35 | )
36 |
37 | prompt = "a photo of an astronaut riding a horse on mars"
38 | image = pipe(prompt).images[0]
39 | ```
40 |
41 | ## Known Issues
42 |
43 | - Generating multiple prompts in a batch seems to take too much memory. While we look into it, you may need to iterate instead of batching.
44 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/optimization/open_vino.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # OpenVINO
14 |
15 | Under construction 🚧
16 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/training/text2image.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 |
14 | # Text-to-Image Training
15 |
16 | Under construction 🚧
17 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/using-diffusers/conditional_image_generation.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Conditional Image Generation
14 |
15 | The [`DiffusionPipeline`] is the easiest way to use a pre-trained diffusion system for inference
16 |
17 | Start by creating an instance of [`DiffusionPipeline`] and specify which pipeline checkpoint you would like to download.
18 | You can use the [`DiffusionPipeline`] for any [Diffusers' checkpoint](https://huggingface.co/models?library=diffusers&sort=downloads).
19 | In this guide though, you'll use [`DiffusionPipeline`] for text-to-image generation with [Latent Diffusion](https://huggingface.co/CompVis/ldm-text2im-large-256):
20 |
21 | ```python
22 | >>> from diffusers import DiffusionPipeline
23 |
24 | >>> generator = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256")
25 | ```
26 | The [`DiffusionPipeline`] downloads and caches all modeling, tokenization, and scheduling components.
27 | Because the model consists of roughly 1.4 billion parameters, we strongly recommend running it on GPU.
28 | You can move the generator object to GPU, just like you would in PyTorch.
29 |
30 | ```python
31 | >>> generator.to("cuda")
32 | ```
33 |
34 | Now you can use the `generator` on your text prompt:
35 |
36 | ```python
37 | >>> image = generator("An image of a squirrel in Picasso style").images[0]
38 | ```
39 |
40 | The output is by default wrapped into a [PIL Image object](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class).
41 |
42 | You can save the image by simply calling:
43 |
44 | ```python
45 | >>> image.save("image_of_squirrel_painting.png")
46 | ```
47 |
48 |
49 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/using-diffusers/configuration.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 |
14 |
15 | # Quicktour
16 |
17 | Start using Diffusers🧨 quickly!
18 | To start, use the [`DiffusionPipeline`] for quick inference and sample generations!
19 |
20 | ```
21 | pip install diffusers
22 | ```
23 |
24 | ## Main classes
25 |
26 | ### Models
27 |
28 | ### Schedulers
29 |
30 | ### Pipeliens
31 |
32 |
33 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/using-diffusers/custom.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Custom Pipeline
14 |
15 | Under construction 🚧
16 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/using-diffusers/img2img.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Text-Guided Image-to-Image Generation
14 |
15 | The [`StableDiffusionImg2ImgPipeline`] lets you pass a text prompt and an initial image to condition the generation of new images.
16 |
17 | ```python
18 | from torch import autocast
19 | import requests
20 | from PIL import Image
21 | from io import BytesIO
22 |
23 | from diffusers import StableDiffusionImg2ImgPipeline
24 |
25 | # load the pipeline
26 | device = "cuda"
27 | pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
28 | "CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=True
29 | ).to(device)
30 |
31 | # let's download an initial image
32 | url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
33 |
34 | response = requests.get(url)
35 | init_image = Image.open(BytesIO(response.content)).convert("RGB")
36 | init_image = init_image.resize((768, 512))
37 |
38 | prompt = "A fantasy landscape, trending on artstation"
39 |
40 | with autocast("cuda"):
41 | images = pipe(prompt=prompt, init_image=init_image, strength=0.75, guidance_scale=7.5).images
42 |
43 | images[0].save("fantasy_landscape.png")
44 | ```
45 | You can also run this example on colab [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb)
46 |
47 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/using-diffusers/inpaint.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Text-Guided Image-Inpainting
14 |
15 | The [`StableDiffusionInpaintPipeline`] lets you edit specific parts of an image by providing a mask and text prompt.
16 |
17 | ```python
18 | from io import BytesIO
19 |
20 | from torch import autocast
21 | import requests
22 | import PIL
23 |
24 | from diffusers import StableDiffusionInpaintPipeline
25 |
26 |
27 | def download_image(url):
28 | response = requests.get(url)
29 | return PIL.Image.open(BytesIO(response.content)).convert("RGB")
30 |
31 |
32 | img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
33 | mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
34 |
35 | init_image = download_image(img_url).resize((512, 512))
36 | mask_image = download_image(mask_url).resize((512, 512))
37 |
38 | device = "cuda"
39 | pipe = StableDiffusionInpaintPipeline.from_pretrained(
40 | "CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=True
41 | ).to(device)
42 |
43 | prompt = "a cat sitting on a bench"
44 | with autocast("cuda"):
45 | images = pipe(prompt=prompt, init_image=init_image, mask_image=mask_image, strength=0.75).images
46 |
47 | images[0].save("cat_on_bench.png")
48 | ```
49 |
50 | You can also run this example on colab [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb)
51 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/using-diffusers/loading.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Loading
14 |
15 | Under construction 🚧
16 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/docs/source/using-diffusers/unconditional_image_generation.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 |
14 |
15 | # Unonditional Image Generation
16 |
17 | The [`DiffusionPipeline`] is the easiest way to use a pre-trained diffusion system for inference
18 |
19 | Start by creating an instance of [`DiffusionPipeline`] and specify which pipeline checkpoint you would like to download.
20 | You can use the [`DiffusionPipeline`] for any [Diffusers' checkpoint](https://huggingface.co/models?library=diffusers&sort=downloads).
21 | In this guide though, you'll use [`DiffusionPipeline`] for unconditional image generation with [DDPM](https://arxiv.org/abs/2006.11239):
22 |
23 | ```python
24 | >>> from diffusers import DiffusionPipeline
25 |
26 | >>> generator = DiffusionPipeline.from_pretrained("google/ddpm-celebahq-256")
27 | ```
28 | The [`DiffusionPipeline`] downloads and caches all modeling, tokenization, and scheduling components.
29 | Because the model consists of roughly 1.4 billion parameters, we strongly recommend running it on GPU.
30 | You can move the generator object to GPU, just like you would in PyTorch.
31 |
32 | ```python
33 | >>> generator.to("cuda")
34 | ```
35 |
36 | Now you can use the `generator` on your text prompt:
37 |
38 | ```python
39 | >>> image = generator().images[0]
40 | ```
41 |
42 | The output is by default wrapped into a [PIL Image object](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class).
43 |
44 | You can save the image by simply calling:
45 |
46 | ```python
47 | >>> image.save("generated_image.png")
48 | ```
49 |
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/examples/community/README.md:
--------------------------------------------------------------------------------
1 | # Community Examples
2 |
3 | **Community** examples consist of both inference and training examples that have been added by the community.
4 |
5 | | Example | Description | Author | |
6 | |:----------|:-------------|:-------------|------:|
7 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/examples/inference/README.md:
--------------------------------------------------------------------------------
1 | # Inference Examples
2 |
3 | **The inference examples folder is deprecated and will be removed in a future version**.
4 | **Officially supported inference examples can be found in the [Pipelines folder](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines)**.
5 |
6 | - For `Image-to-Image text-guided generation with Stable Diffusion`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples)
7 | - For `In-painting using Stable Diffusion`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples)
8 | - For `Tweak prompts reusing seeds and latents`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples)
9 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/examples/inference/image_to_image.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from diffusers import StableDiffusionImg2ImgPipeline # noqa F401
4 |
5 |
6 | warnings.warn(
7 | "The `image_to_image.py` script is outdated. Please use directly `from diffusers import"
8 | " StableDiffusionImg2ImgPipeline` instead."
9 | )
10 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/examples/inference/inpainting.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from diffusers import (
4 | StableDiffusionInpaintPipeline as StableDiffusionInpaintPipeline,
5 | ) # noqa F401
6 |
7 |
8 | warnings.warn(
9 | "The `inpainting.py` script is outdated. Please use directly `from diffusers import"
10 | " StableDiffusionInpaintPipeline` instead."
11 | )
12 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/examples/textual_inversion/README.md:
--------------------------------------------------------------------------------
1 | ## Textual Inversion fine-tuning example
2 |
3 | [Textual inversion](https://arxiv.org/abs/2208.01618) is a method to personalize text2image models like stable diffusion on your own images using just 3-5 examples.
4 | The `textual_inversion.py` script shows how to implement the training procedure and adapt it for stable diffusion.
5 |
6 | ## Running on Colab
7 |
8 | Colab for training
9 | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_textual_inversion_training.ipynb)
10 |
11 | Colab for inference
12 | [](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_conceptualizer_inference.ipynb)
13 |
14 | ## Running locally
15 | ### Installing the dependencies
16 |
17 | Before running the scripts, make sure to install the library's training dependencies:
18 |
19 | ```bash
20 | pip install diffusers[training] accelerate transformers
21 | ```
22 |
23 | And initialize an [🤗Accelerate](https://github.com/huggingface/accelerate/) environment with:
24 |
25 | ```bash
26 | accelerate config
27 | ```
28 |
29 |
30 | ### Cat toy example
31 |
32 | You need to accept the model license before downloading or using the weights. In this example we'll use model version `v1-4`, so you'll need to visit [its card](https://huggingface.co/CompVis/stable-diffusion-v1-4), read the license and tick the checkbox if you agree.
33 |
34 | You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work. For more information on access tokens, please refer to [this section of the documentation](https://huggingface.co/docs/hub/security-tokens).
35 |
36 | Run the following command to authenticate your token
37 |
38 | ```bash
39 | huggingface-cli login
40 | ```
41 |
42 | If you have already cloned the repo, then you won't need to go through these steps. You can simple remove the `--use_auth_token` arg from the following command.
43 |
44 |
45 |
46 | Now let's get our dataset.Download 3-4 images from [here](https://drive.google.com/drive/folders/1fmJMs25nxS_rSNqS5hTcRdLem_YQXbq5) and save them in a directory. This will be our training data.
47 |
48 | And launch the training using
49 |
50 | ```bash
51 | export MODEL_NAME="CompVis/stable-diffusion-v1-4"
52 | export DATA_DIR="path-to-dir-containing-images"
53 |
54 | accelerate launch textual_inversion.py \
55 | --pretrained_model_name_or_path=$MODEL_NAME --use_auth_token \
56 | --train_data_dir=$DATA_DIR \
57 | --learnable_property="object" \
58 | --placeholder_token="" --initializer_token="toy" \
59 | --resolution=512 \
60 | --train_batch_size=1 \
61 | --gradient_accumulation_steps=4 \
62 | --max_train_steps=3000 \
63 | --learning_rate=5.0e-04 --scale_lr \
64 | --lr_scheduler="constant" \
65 | --lr_warmup_steps=0 \
66 | --output_dir="textual_inversion_cat"
67 | ```
68 |
69 | A full training run takes ~1 hour on one V100 GPU.
70 |
71 |
72 | ### Inference
73 |
74 | Once you have trained a model using above command, the inference can be done simply using the `StableDiffusionPipeline`. Make sure to include the `placeholder_token` in your prompt.
75 |
76 | ```python
77 |
78 | from torch import autocast
79 | from diffusers import StableDiffusionPipeline
80 |
81 | model_id = "path-to-your-trained-model"
82 | pipe = StableDiffusionPipeline.from_pretrained(model_id,torch_dtype=torch.float16).to("cuda")
83 |
84 | prompt = "A backpack"
85 |
86 | with autocast("cuda"):
87 | image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0]
88 |
89 | image.save("cat-backpack.png")
90 | ```
91 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/examples/textual_inversion/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | transformers
4 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/examples/unconditional_image_generation/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate
2 | torchvision
3 | datasets
4 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 119
3 | target-version = ['py36']
4 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/FlashAttention/diffusers/scripts/__init__.py
--------------------------------------------------------------------------------
/FlashAttention/diffusers/scripts/conversion_ldm_uncond.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import torch
4 |
5 | import OmegaConf
6 | from diffusers import DDIMScheduler, LDMPipeline, UNetLDMModel, VQModel
7 |
8 |
9 | def convert_ldm_original(checkpoint_path, config_path, output_path):
10 | config = OmegaConf.load(config_path)
11 | state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
12 | keys = list(state_dict.keys())
13 |
14 | # extract state_dict for VQVAE
15 | first_stage_dict = {}
16 | first_stage_key = "first_stage_model."
17 | for key in keys:
18 | if key.startswith(first_stage_key):
19 | first_stage_dict[key.replace(first_stage_key, "")] = state_dict[key]
20 |
21 | # extract state_dict for UNetLDM
22 | unet_state_dict = {}
23 | unet_key = "model.diffusion_model."
24 | for key in keys:
25 | if key.startswith(unet_key):
26 | unet_state_dict[key.replace(unet_key, "")] = state_dict[key]
27 |
28 | vqvae_init_args = config.model.params.first_stage_config.params
29 | unet_init_args = config.model.params.unet_config.params
30 |
31 | vqvae = VQModel(**vqvae_init_args).eval()
32 | vqvae.load_state_dict(first_stage_dict)
33 |
34 | unet = UNetLDMModel(**unet_init_args).eval()
35 | unet.load_state_dict(unet_state_dict)
36 |
37 | noise_scheduler = DDIMScheduler(
38 | timesteps=config.model.params.timesteps,
39 | beta_schedule="scaled_linear",
40 | beta_start=config.model.params.linear_start,
41 | beta_end=config.model.params.linear_end,
42 | clip_sample=False,
43 | )
44 |
45 | pipeline = LDMPipeline(vqvae, unet, noise_scheduler)
46 | pipeline.save_pretrained(output_path)
47 |
48 |
49 | if __name__ == "__main__":
50 | parser = argparse.ArgumentParser()
51 | parser.add_argument("--checkpoint_path", type=str, required=True)
52 | parser.add_argument("--config_path", type=str, required=True)
53 | parser.add_argument("--output_path", type=str, required=True)
54 | args = parser.parse_args()
55 |
56 | convert_ldm_original(args.checkpoint_path, args.config_path, args.output_path)
57 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/setup.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | default_section = FIRSTPARTY
3 | ensure_newline_before_comments = True
4 | force_grid_wrap = 0
5 | include_trailing_comma = True
6 | known_first_party = accelerate
7 | known_third_party =
8 | numpy
9 | torch
10 | torch_xla
11 |
12 | line_length = 119
13 | lines_after_imports = 2
14 | multi_line_output = 3
15 | use_parentheses = True
16 |
17 | [flake8]
18 | ignore = E203, E722, E501, E741, W503, W605
19 | max-line-length = 119
20 | per-file-ignores = __init__.py:F401
21 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import (
2 | is_flax_available,
3 | is_inflect_available,
4 | is_onnx_available,
5 | is_scipy_available,
6 | is_torch_available,
7 | is_transformers_available,
8 | is_unidecode_available,
9 | )
10 |
11 |
12 | __version__ = "0.4.0.dev0"
13 |
14 | from .configuration_utils import ConfigMixin
15 | from .onnx_utils import OnnxRuntimeModel
16 | from .utils import logging
17 |
18 |
19 | if is_torch_available():
20 | from .modeling_utils import ModelMixin
21 | from .models import AutoencoderKL, UNet2DConditionModel, UNet2DModel, VQModel
22 | from .optimization import (
23 | get_constant_schedule,
24 | get_constant_schedule_with_warmup,
25 | get_cosine_schedule_with_warmup,
26 | get_cosine_with_hard_restarts_schedule_with_warmup,
27 | get_linear_schedule_with_warmup,
28 | get_polynomial_decay_schedule_with_warmup,
29 | get_scheduler,
30 | )
31 | from .pipeline_utils import DiffusionPipeline
32 | from .pipelines import (
33 | DDIMPipeline,
34 | DDPMPipeline,
35 | KarrasVePipeline,
36 | LDMPipeline,
37 | PNDMPipeline,
38 | ScoreSdeVePipeline,
39 | )
40 | from .schedulers import (
41 | DDIMScheduler,
42 | DDPMScheduler,
43 | KarrasVeScheduler,
44 | PNDMScheduler,
45 | SchedulerMixin,
46 | ScoreSdeVeScheduler,
47 | )
48 | from .training_utils import EMAModel
49 | else:
50 | from .utils.dummy_pt_objects import * # noqa F403
51 |
52 | if is_torch_available() and is_scipy_available():
53 | from .schedulers import LMSDiscreteScheduler
54 | else:
55 | from .utils.dummy_torch_and_scipy_objects import * # noqa F403
56 |
57 | if is_torch_available() and is_transformers_available():
58 | from .pipelines import (
59 | LDMTextToImagePipeline,
60 | StableDiffusionImg2ImgPipeline,
61 | StableDiffusionInpaintPipeline,
62 | StableDiffusionPipeline,
63 | )
64 | else:
65 | from .utils.dummy_torch_and_transformers_objects import * # noqa F403
66 |
67 | if is_torch_available() and is_transformers_available() and is_onnx_available():
68 | from .pipelines import StableDiffusionOnnxPipeline
69 | else:
70 | from .utils.dummy_torch_and_transformers_and_onnx_objects import * # noqa F403
71 |
72 | if is_flax_available():
73 | from .modeling_flax_utils import FlaxModelMixin
74 | from .models.unet_2d_condition_flax import FlaxUNet2DConditionModel
75 | from .schedulers import (
76 | FlaxDDIMScheduler,
77 | FlaxDDPMScheduler,
78 | FlaxKarrasVeScheduler,
79 | FlaxLMSDiscreteScheduler,
80 | FlaxPNDMScheduler,
81 | FlaxScoreSdeVeScheduler,
82 | )
83 | else:
84 | from .utils.dummy_flax_objects import * # noqa F403
85 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/commands/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from abc import ABC, abstractmethod
16 | from argparse import ArgumentParser
17 |
18 |
19 | class BaseDiffusersCLICommand(ABC):
20 | @staticmethod
21 | @abstractmethod
22 | def register_subcommand(parser: ArgumentParser):
23 | raise NotImplementedError()
24 |
25 | @abstractmethod
26 | def run(self):
27 | raise NotImplementedError()
28 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/commands/diffusers_cli.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | from argparse import ArgumentParser
17 |
18 | from .env import EnvironmentCommand
19 |
20 |
21 | def main():
22 | parser = ArgumentParser(
23 | "Diffusers CLI tool", usage="diffusers-cli []"
24 | )
25 | commands_parser = parser.add_subparsers(help="diffusers-cli command helpers")
26 |
27 | # Register commands
28 | EnvironmentCommand.register_subcommand(commands_parser)
29 |
30 | # Let's go
31 | args = parser.parse_args()
32 |
33 | if not hasattr(args, "func"):
34 | parser.print_help()
35 | exit(1)
36 |
37 | # Run
38 | service = args.func(args)
39 | service.run()
40 |
41 |
42 | if __name__ == "__main__":
43 | main()
44 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/commands/env.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import platform
16 | from argparse import ArgumentParser
17 |
18 | import huggingface_hub
19 |
20 | from .. import __version__ as version
21 | from ..utils import is_torch_available, is_transformers_available
22 | from . import BaseDiffusersCLICommand
23 |
24 |
25 | def info_command_factory(_):
26 | return EnvironmentCommand()
27 |
28 |
29 | class EnvironmentCommand(BaseDiffusersCLICommand):
30 | @staticmethod
31 | def register_subcommand(parser: ArgumentParser):
32 | download_parser = parser.add_parser("env")
33 | download_parser.set_defaults(func=info_command_factory)
34 |
35 | def run(self):
36 | hub_version = huggingface_hub.__version__
37 |
38 | pt_version = "not installed"
39 | pt_cuda_available = "NA"
40 | if is_torch_available():
41 | import torch
42 |
43 | pt_version = torch.__version__
44 | pt_cuda_available = torch.cuda.is_available()
45 |
46 | transformers_version = "not installed"
47 | if is_transformers_available:
48 | import transformers
49 |
50 | transformers_version = transformers.__version__
51 |
52 | info = {
53 | "`diffusers` version": version,
54 | "Platform": platform.platform(),
55 | "Python version": platform.python_version(),
56 | "PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})",
57 | "Huggingface_hub version": hub_version,
58 | "Transformers version": transformers_version,
59 | "Using GPU in script?": "",
60 | "Using distributed or parallel set-up in script?": "",
61 | }
62 |
63 | print(
64 | "\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n"
65 | )
66 | print(self.format_dict(info))
67 |
68 | return info
69 |
70 | @staticmethod
71 | def format_dict(d):
72 | return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n"
73 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/dependency_versions_check.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import sys
15 |
16 | from .dependency_versions_table import deps
17 | from .utils.versions import require_version, require_version_core
18 |
19 |
20 | # define which module versions we always want to check at run time
21 | # (usually the ones defined in `install_requires` in setup.py)
22 | #
23 | # order specific notes:
24 | # - tqdm must be checked before tokenizers
25 |
26 | pkgs_to_check_at_runtime = (
27 | "python tqdm regex requests packaging filelock numpy tokenizers".split()
28 | )
29 | if sys.version_info < (3, 7):
30 | pkgs_to_check_at_runtime.append("dataclasses")
31 | if sys.version_info < (3, 8):
32 | pkgs_to_check_at_runtime.append("importlib_metadata")
33 |
34 | for pkg in pkgs_to_check_at_runtime:
35 | if pkg in deps:
36 | if pkg == "tokenizers":
37 | # must be loaded here, or else tqdm check may fail
38 | from .utils import is_tokenizers_available
39 |
40 | if not is_tokenizers_available():
41 | continue # not required, check version only if installed
42 |
43 | require_version_core(deps[pkg])
44 | else:
45 | raise ValueError(
46 | f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py"
47 | )
48 |
49 |
50 | def dep_version_check(pkg, hint=None):
51 | require_version(deps[pkg], hint)
52 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/dependency_versions_table.py:
--------------------------------------------------------------------------------
1 | # THIS FILE HAS BEEN AUTOGENERATED. To update:
2 | # 1. modify the `_deps` dict in setup.py
3 | # 2. run `make deps_table_update``
4 | deps = {
5 | "Pillow": "Pillow",
6 | "accelerate": "accelerate>=0.11.0",
7 | "black": "black==22.8",
8 | "datasets": "datasets",
9 | "filelock": "filelock",
10 | "flake8": "flake8>=3.8.3",
11 | "flax": "flax>=0.4.1",
12 | "hf-doc-builder": "hf-doc-builder>=0.3.0",
13 | "huggingface-hub": "huggingface-hub>=0.8.1",
14 | "importlib_metadata": "importlib_metadata",
15 | "isort": "isort>=5.5.4",
16 | "jax": "jax>=0.2.8,!=0.3.2,<=0.3.6",
17 | "jaxlib": "jaxlib>=0.1.65,<=0.3.6",
18 | "modelcards": "modelcards==0.1.4",
19 | "numpy": "numpy",
20 | "pytest": "pytest",
21 | "pytest-timeout": "pytest-timeout",
22 | "pytest-xdist": "pytest-xdist",
23 | "scipy": "scipy",
24 | "regex": "regex!=2019.12.17",
25 | "requests": "requests",
26 | "tensorboard": "tensorboard",
27 | "torch": "torch>=1.4",
28 | "transformers": "transformers>=4.21.0",
29 | }
30 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/models/README.md:
--------------------------------------------------------------------------------
1 | # Models
2 |
3 | - Models: Neural network that models $p_\theta(\mathbf{x}_{t-1}|\mathbf{x}_t)$ (see image below) and is trained end-to-end to denoise a noisy input to an image. Examples: UNet, Conditioned UNet, 3D UNet, Transformer UNet
4 |
5 | ## API
6 |
7 | TODO(Suraj, Patrick)
8 |
9 | ## Examples
10 |
11 | TODO(Suraj, Patrick)
12 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .unet_2d import UNet2DModel
16 | from .unet_2d_condition import UNet2DConditionModel
17 | from .vae import AutoencoderKL, VQModel
18 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/models/embeddings_flax.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import math
15 |
16 | import flax.linen as nn
17 | import jax.numpy as jnp
18 |
19 |
20 | # This is like models.embeddings.get_timestep_embedding (PyTorch) but
21 | # less general (only handles the case we currently need).
22 | def get_sinusoidal_embeddings(timesteps, embedding_dim):
23 | """
24 | This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.
25 |
26 | :param timesteps: a 1-D tensor of N indices, one per batch element.
27 | These may be fractional.
28 | :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the
29 | embeddings. :return: an [N x dim] tensor of positional embeddings.
30 | """
31 | half_dim = embedding_dim // 2
32 | emb = math.log(10000) / (half_dim - 1)
33 | emb = jnp.exp(jnp.arange(half_dim) * -emb)
34 | emb = timesteps[:, None] * emb[None, :]
35 | emb = jnp.concatenate([jnp.cos(emb), jnp.sin(emb)], -1)
36 | return emb
37 |
38 |
39 | class FlaxTimestepEmbedding(nn.Module):
40 | time_embed_dim: int = 32
41 | dtype: jnp.dtype = jnp.float32
42 |
43 | @nn.compact
44 | def __call__(self, temb):
45 | temb = nn.Dense(self.time_embed_dim, dtype=self.dtype, name="linear_1")(temb)
46 | temb = nn.silu(temb)
47 | temb = nn.Dense(self.time_embed_dim, dtype=self.dtype, name="linear_2")(temb)
48 | return temb
49 |
50 |
51 | class FlaxTimesteps(nn.Module):
52 | dim: int = 32
53 |
54 | @nn.compact
55 | def __call__(self, timesteps):
56 | return get_sinusoidal_embeddings(timesteps, self.dim)
57 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | from ..utils import is_onnx_available, is_transformers_available
2 | from .ddim import DDIMPipeline
3 | from .ddpm import DDPMPipeline
4 | from .latent_diffusion_uncond import LDMPipeline
5 | from .pndm import PNDMPipeline
6 | from .score_sde_ve import ScoreSdeVePipeline
7 | from .stochastic_karras_ve import KarrasVePipeline
8 |
9 |
10 | if is_transformers_available():
11 | from .latent_diffusion import LDMTextToImagePipeline
12 | from .stable_diffusion import (
13 | StableDiffusionImg2ImgPipeline,
14 | StableDiffusionInpaintPipeline,
15 | StableDiffusionPipeline,
16 | )
17 |
18 | if is_transformers_available() and is_onnx_available():
19 | from .stable_diffusion import StableDiffusionOnnxPipeline
20 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/pipelines/ddim/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_ddim import DDIMPipeline
3 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/pipelines/ddpm/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_ddpm import DDPMPipeline
3 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/pipelines/latent_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from ...utils import is_transformers_available
3 |
4 |
5 | if is_transformers_available():
6 | from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline
7 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/pipelines/latent_diffusion_uncond/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_latent_diffusion_uncond import LDMPipeline
3 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/pipelines/pndm/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_pndm import PNDMPipeline
3 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/pipelines/score_sde_ve/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_score_sde_ve import ScoreSdeVePipeline
3 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/pipelines/stable_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import List, Union
3 |
4 | import numpy as np
5 |
6 | import PIL
7 | from PIL import Image
8 |
9 | from ...utils import BaseOutput, is_onnx_available, is_transformers_available
10 |
11 |
12 | @dataclass
13 | class StableDiffusionPipelineOutput(BaseOutput):
14 | """
15 | Output class for Stable Diffusion pipelines.
16 |
17 | Args:
18 | images (`List[PIL.Image.Image]` or `np.ndarray`)
19 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
20 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
21 | nsfw_content_detected (`List[bool]`)
22 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work"
23 | (nsfw) content.
24 | """
25 |
26 | images: Union[List[PIL.Image.Image], np.ndarray]
27 | nsfw_content_detected: List[bool]
28 |
29 |
30 | if is_transformers_available():
31 | from .pipeline_stable_diffusion import StableDiffusionPipeline
32 | from .pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline
33 | from .pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline
34 | from .safety_checker import StableDiffusionSafetyChecker
35 |
36 | if is_transformers_available() and is_onnx_available():
37 | from .pipeline_stable_diffusion_onnx import StableDiffusionOnnxPipeline
38 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/pipelines/stochastic_karras_ve/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_stochastic_karras_ve import KarrasVePipeline
3 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/schedulers/README.md:
--------------------------------------------------------------------------------
1 | # Schedulers
2 |
3 | - Schedulers are the algorithms to use diffusion models in inference as well as for training. They include the noise schedules and define algorithm-specific diffusion steps.
4 | - Schedulers can be used interchangeable between diffusion models in inference to find the preferred trade-off between speed and generation quality.
5 | - Schedulers are available in numpy, but can easily be transformed into PyTorch.
6 |
7 | ## API
8 |
9 | - Schedulers should provide one or more `def step(...)` functions that should be called iteratively to unroll the diffusion loop during
10 | the forward pass.
11 | - Schedulers should be framework-agnostic, but provide a simple functionality to convert the scheduler into a specific framework, such as PyTorch
12 | with a `set_format(...)` method.
13 |
14 | ## Examples
15 |
16 | - The DDPM scheduler was proposed in [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) and can be found in [scheduling_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddpm.py). An example of how to use this scheduler can be found in [pipeline_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_ddpm.py).
17 | - The DDIM scheduler was proposed in [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) and can be found in [scheduling_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddim.py). An example of how to use this scheduler can be found in [pipeline_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_ddim.py).
18 | - The PNDM scheduler was proposed in [Pseudo Numerical Methods for Diffusion Models on Manifolds](https://arxiv.org/abs/2202.09778) and can be found in [scheduling_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_pndm.py). An example of how to use this scheduler can be found in [pipeline_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py).
19 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/schedulers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | from ..utils import is_flax_available, is_scipy_available, is_torch_available
17 |
18 |
19 | if is_torch_available():
20 | from .scheduling_ddim import DDIMScheduler
21 | from .scheduling_ddpm import DDPMScheduler
22 | from .scheduling_karras_ve import KarrasVeScheduler
23 | from .scheduling_pndm import PNDMScheduler
24 | from .scheduling_sde_ve import ScoreSdeVeScheduler
25 | from .scheduling_sde_vp import ScoreSdeVpScheduler
26 | from .scheduling_utils import SchedulerMixin
27 | else:
28 | from ..utils.dummy_pt_objects import * # noqa F403
29 |
30 | if is_flax_available():
31 | from .scheduling_ddim_flax import FlaxDDIMScheduler
32 | from .scheduling_ddpm_flax import FlaxDDPMScheduler
33 | from .scheduling_karras_ve_flax import FlaxKarrasVeScheduler
34 | from .scheduling_lms_discrete_flax import FlaxLMSDiscreteScheduler
35 | from .scheduling_pndm_flax import FlaxPNDMScheduler
36 | from .scheduling_sde_ve_flax import FlaxScoreSdeVeScheduler
37 | else:
38 | from ..utils.dummy_flax_objects import * # noqa F403
39 |
40 | if is_scipy_available():
41 | from .scheduling_lms_discrete import LMSDiscreteScheduler
42 | else:
43 | from ..utils.dummy_torch_and_scipy_objects import * # noqa F403
44 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/schedulers/scheduling_sde_vp.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch
16 |
17 | # TODO(Patrick, Anton, Suraj) - make scheduler framework independent and clean-up a bit
18 |
19 | import numpy as np
20 | import torch
21 |
22 | from ..configuration_utils import ConfigMixin, register_to_config
23 | from .scheduling_utils import SchedulerMixin
24 |
25 |
26 | class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
27 | """
28 | The variance preserving stochastic differential equation (SDE) scheduler.
29 |
30 | [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
31 | function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
32 | [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and
33 | [`~ConfigMixin.from_config`] functions.
34 |
35 | For more information, see the original paper: https://arxiv.org/abs/2011.13456
36 |
37 | UNDER CONSTRUCTION
38 |
39 | """
40 |
41 | @register_to_config
42 | def __init__(
43 | self,
44 | num_train_timesteps=2000,
45 | beta_min=0.1,
46 | beta_max=20,
47 | sampling_eps=1e-3,
48 | tensor_format="np",
49 | ):
50 | self.sigmas = None
51 | self.discrete_sigmas = None
52 | self.timesteps = None
53 |
54 | def set_timesteps(self, num_inference_steps):
55 | self.timesteps = torch.linspace(
56 | 1, self.config.sampling_eps, num_inference_steps
57 | )
58 |
59 | def step_pred(self, score, x, t):
60 | if self.timesteps is None:
61 | raise ValueError(
62 | "`self.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler"
63 | )
64 |
65 | # TODO(Patrick) better comments + non-PyTorch
66 | # postprocess model score
67 | log_mean_coeff = (
68 | -0.25 * t**2 * (self.config.beta_max - self.config.beta_min)
69 | - 0.5 * t * self.config.beta_min
70 | )
71 | std = torch.sqrt(1.0 - torch.exp(2.0 * log_mean_coeff))
72 | score = -score / std[:, None, None, None]
73 |
74 | # compute
75 | dt = -1.0 / len(self.timesteps)
76 |
77 | beta_t = self.config.beta_min + t * (
78 | self.config.beta_max - self.config.beta_min
79 | )
80 | drift = -0.5 * beta_t[:, None, None, None] * x
81 | diffusion = torch.sqrt(beta_t)
82 | drift = drift - diffusion[:, None, None, None] ** 2 * score
83 | x_mean = x + drift * dt
84 |
85 | # add noise
86 | noise = torch.randn_like(x)
87 | x = x_mean + diffusion[:, None, None, None] * np.sqrt(-dt) * noise
88 |
89 | return x, x_mean
90 |
91 | def __len__(self):
92 | return self.config.num_train_timesteps
93 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/testing_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import unittest
4 | from distutils.util import strtobool
5 | from typing import Union
6 |
7 | import torch
8 |
9 | import PIL.Image
10 | import PIL.ImageOps
11 | import requests
12 | from packaging import version
13 |
14 |
15 | global_rng = random.Random()
16 | torch_device = "cuda" if torch.cuda.is_available() else "cpu"
17 | is_torch_higher_equal_than_1_12 = version.parse(
18 | version.parse(torch.__version__).base_version
19 | ) >= version.parse("1.12")
20 |
21 | if is_torch_higher_equal_than_1_12:
22 | torch_device = "mps" if torch.backends.mps.is_available() else torch_device
23 |
24 |
25 | def parse_flag_from_env(key, default=False):
26 | try:
27 | value = os.environ[key]
28 | except KeyError:
29 | # KEY isn't set, default to `default`.
30 | _value = default
31 | else:
32 | # KEY is set, convert it to True or False.
33 | try:
34 | _value = strtobool(value)
35 | except ValueError:
36 | # More values are supported, but let's keep the message simple.
37 | raise ValueError(f"If set, {key} must be yes or no.")
38 | return _value
39 |
40 |
41 | _run_slow_tests = parse_flag_from_env("RUN_SLOW", default=False)
42 |
43 |
44 | def floats_tensor(shape, scale=1.0, rng=None, name=None):
45 | """Creates a random float32 tensor"""
46 | if rng is None:
47 | rng = global_rng
48 |
49 | total_dims = 1
50 | for dim in shape:
51 | total_dims *= dim
52 |
53 | values = []
54 | for _ in range(total_dims):
55 | values.append(rng.random() * scale)
56 |
57 | return torch.tensor(data=values, dtype=torch.float).view(shape).contiguous()
58 |
59 |
60 | def slow(test_case):
61 | """
62 | Decorator marking a test as slow.
63 |
64 | Slow tests are skipped by default. Set the RUN_SLOW environment variable to a truthy value to run them.
65 |
66 | """
67 | return unittest.skipUnless(_run_slow_tests, "test is slow")(test_case)
68 |
69 |
70 | def load_image(image: Union[str, PIL.Image.Image]) -> PIL.Image.Image:
71 | """
72 | Args:
73 | Loads `image` to a PIL Image.
74 | image (`str` or `PIL.Image.Image`):
75 | The image to convert to the PIL Image format.
76 | Returns:
77 | `PIL.Image.Image`: A PIL Image.
78 | """
79 | if isinstance(image, str):
80 | if image.startswith("http://") or image.startswith("https://"):
81 | image = PIL.Image.open(requests.get(image, stream=True).raw)
82 | elif os.path.isfile(image):
83 | image = PIL.Image.open(image)
84 | else:
85 | raise ValueError(
86 | f"Incorrect path or url, URLs must start with `http://` or `https://`, and {image} is not a valid path"
87 | )
88 | elif isinstance(image, PIL.Image.Image):
89 | image = image
90 | else:
91 | raise ValueError(
92 | "Incorrect format used for image. Should be an url linking to an image, a local path, or a PIL image."
93 | )
94 | image = PIL.ImageOps.exif_transpose(image)
95 | image = image.convert("RGB")
96 | return image
97 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 The HuggingFace Inc. team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | import os
17 |
18 | from .import_utils import (
19 | ENV_VARS_TRUE_AND_AUTO_VALUES,
20 | ENV_VARS_TRUE_VALUES,
21 | USE_JAX,
22 | USE_TF,
23 | USE_TORCH,
24 | DummyObject,
25 | is_flax_available,
26 | is_inflect_available,
27 | is_modelcards_available,
28 | is_onnx_available,
29 | is_scipy_available,
30 | is_tf_available,
31 | is_torch_available,
32 | is_transformers_available,
33 | is_unidecode_available,
34 | requires_backends,
35 | )
36 | from .logging import get_logger
37 | from .outputs import BaseOutput
38 |
39 |
40 | logger = get_logger(__name__)
41 |
42 |
43 | hf_cache_home = os.path.expanduser(
44 | os.getenv(
45 | "HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface")
46 | )
47 | )
48 | default_cache_path = os.path.join(hf_cache_home, "diffusers")
49 |
50 |
51 | CONFIG_NAME = "config.json"
52 | HUGGINGFACE_CO_RESOLVE_ENDPOINT = "https://huggingface.co"
53 | DIFFUSERS_CACHE = default_cache_path
54 | DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules"
55 | HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules"))
56 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/utils/dummy_flax_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | # flake8: noqa
3 |
4 | from ..utils import DummyObject, requires_backends
5 |
6 |
7 | class FlaxModelMixin(metaclass=DummyObject):
8 | _backends = ["flax"]
9 |
10 | def __init__(self, *args, **kwargs):
11 | requires_backends(self, ["flax"])
12 |
13 |
14 | class FlaxDDIMScheduler(metaclass=DummyObject):
15 | _backends = ["flax"]
16 |
17 | def __init__(self, *args, **kwargs):
18 | requires_backends(self, ["flax"])
19 |
20 |
21 | class FlaxDDPMScheduler(metaclass=DummyObject):
22 | _backends = ["flax"]
23 |
24 | def __init__(self, *args, **kwargs):
25 | requires_backends(self, ["flax"])
26 |
27 |
28 | class FlaxKarrasVeScheduler(metaclass=DummyObject):
29 | _backends = ["flax"]
30 |
31 | def __init__(self, *args, **kwargs):
32 | requires_backends(self, ["flax"])
33 |
34 |
35 | class FlaxLMSDiscreteScheduler(metaclass=DummyObject):
36 | _backends = ["flax"]
37 |
38 | def __init__(self, *args, **kwargs):
39 | requires_backends(self, ["flax"])
40 |
41 |
42 | class FlaxPNDMScheduler(metaclass=DummyObject):
43 | _backends = ["flax"]
44 |
45 | def __init__(self, *args, **kwargs):
46 | requires_backends(self, ["flax"])
47 |
48 |
49 | class FlaxUNet2DConditionModel(metaclass=DummyObject):
50 | _backends = ["flax"]
51 |
52 | def __init__(self, *args, **kwargs):
53 | requires_backends(self, ["flax"])
54 |
55 |
56 | class FlaxScoreSdeVeScheduler(metaclass=DummyObject):
57 | _backends = ["flax"]
58 |
59 | def __init__(self, *args, **kwargs):
60 | requires_backends(self, ["flax"])
61 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/utils/dummy_torch_and_scipy_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | # flake8: noqa
3 |
4 | from ..utils import DummyObject, requires_backends
5 |
6 |
7 | class LMSDiscreteScheduler(metaclass=DummyObject):
8 | _backends = ["torch", "scipy"]
9 |
10 | def __init__(self, *args, **kwargs):
11 | requires_backends(self, ["torch", "scipy"])
12 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/utils/dummy_torch_and_transformers_and_onnx_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | # flake8: noqa
3 |
4 | from ..utils import DummyObject, requires_backends
5 |
6 |
7 | class StableDiffusionOnnxPipeline(metaclass=DummyObject):
8 | _backends = ["torch", "transformers", "onnx"]
9 |
10 | def __init__(self, *args, **kwargs):
11 | requires_backends(self, ["torch", "transformers", "onnx"])
12 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/utils/dummy_torch_and_transformers_objects.py:
--------------------------------------------------------------------------------
1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
2 | # flake8: noqa
3 |
4 | from ..utils import DummyObject, requires_backends
5 |
6 |
7 | class LDMTextToImagePipeline(metaclass=DummyObject):
8 | _backends = ["torch", "transformers"]
9 |
10 | def __init__(self, *args, **kwargs):
11 | requires_backends(self, ["torch", "transformers"])
12 |
13 |
14 | class StableDiffusionImg2ImgPipeline(metaclass=DummyObject):
15 | _backends = ["torch", "transformers"]
16 |
17 | def __init__(self, *args, **kwargs):
18 | requires_backends(self, ["torch", "transformers"])
19 |
20 |
21 | class StableDiffusionInpaintPipeline(metaclass=DummyObject):
22 | _backends = ["torch", "transformers"]
23 |
24 | def __init__(self, *args, **kwargs):
25 | requires_backends(self, ["torch", "transformers"])
26 |
27 |
28 | class StableDiffusionPipeline(metaclass=DummyObject):
29 | _backends = ["torch", "transformers"]
30 |
31 | def __init__(self, *args, **kwargs):
32 | requires_backends(self, ["torch", "transformers"])
33 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/src/diffusers/utils/model_card_template.md:
--------------------------------------------------------------------------------
1 | ---
2 | {{ card_data }}
3 | ---
4 |
5 |
7 |
8 | # {{ model_name | default("Diffusion Model") }}
9 |
10 | ## Model description
11 |
12 | This diffusion model is trained with the [🤗 Diffusers](https://github.com/huggingface/diffusers) library
13 | on the `{{ dataset_name }}` dataset.
14 |
15 | ## Intended uses & limitations
16 |
17 | #### How to use
18 |
19 | ```python
20 | # TODO: add an example code snippet for running this diffusion pipeline
21 | ```
22 |
23 | #### Limitations and bias
24 |
25 | [TODO: provide examples of latent issues and potential remediations]
26 |
27 | ## Training data
28 |
29 | [TODO: describe the data used to train the model]
30 |
31 | ### Training hyperparameters
32 |
33 | The following hyperparameters were used during training:
34 | - learning_rate: {{ learning_rate }}
35 | - train_batch_size: {{ train_batch_size }}
36 | - eval_batch_size: {{ eval_batch_size }}
37 | - gradient_accumulation_steps: {{ gradient_accumulation_steps }}
38 | - optimizer: AdamW with betas=({{ adam_beta1 }}, {{ adam_beta2 }}), weight_decay={{ adam_weight_decay }} and epsilon={{ adam_epsilon }}
39 | - lr_scheduler: {{ lr_scheduler }}
40 | - lr_warmup_steps: {{ lr_warmup_steps }}
41 | - ema_inv_gamma: {{ ema_inv_gamma }}
42 | - ema_inv_gamma: {{ ema_power }}
43 | - ema_inv_gamma: {{ ema_max_decay }}
44 | - mixed_precision: {{ mixed_precision }}
45 |
46 | ### Training results
47 |
48 | 📈 [TensorBoard logs](https://huggingface.co/{{ repo_name }}/tensorboard?#scalars)
49 |
50 |
51 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/FlashAttention/diffusers/tests/__init__.py
--------------------------------------------------------------------------------
/FlashAttention/diffusers/tests/test_config.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2022 HuggingFace Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import tempfile
17 | import unittest
18 |
19 | from diffusers.configuration_utils import ConfigMixin, register_to_config
20 |
21 |
22 | class SampleObject(ConfigMixin):
23 | config_name = "config.json"
24 |
25 | @register_to_config
26 | def __init__(
27 | self,
28 | a=2,
29 | b=5,
30 | c=(2, 5),
31 | d="for diffusion",
32 | e=[1, 3],
33 | ):
34 | pass
35 |
36 |
37 | class ConfigTester(unittest.TestCase):
38 | def test_load_not_from_mixin(self):
39 | with self.assertRaises(ValueError):
40 | ConfigMixin.from_config("dummy_path")
41 |
42 | def test_register_to_config(self):
43 | obj = SampleObject()
44 | config = obj.config
45 | assert config["a"] == 2
46 | assert config["b"] == 5
47 | assert config["c"] == (2, 5)
48 | assert config["d"] == "for diffusion"
49 | assert config["e"] == [1, 3]
50 |
51 | # init ignore private arguments
52 | obj = SampleObject(_name_or_path="lalala")
53 | config = obj.config
54 | assert config["a"] == 2
55 | assert config["b"] == 5
56 | assert config["c"] == (2, 5)
57 | assert config["d"] == "for diffusion"
58 | assert config["e"] == [1, 3]
59 |
60 | # can override default
61 | obj = SampleObject(c=6)
62 | config = obj.config
63 | assert config["a"] == 2
64 | assert config["b"] == 5
65 | assert config["c"] == 6
66 | assert config["d"] == "for diffusion"
67 | assert config["e"] == [1, 3]
68 |
69 | # can use positional arguments.
70 | obj = SampleObject(1, c=6)
71 | config = obj.config
72 | assert config["a"] == 1
73 | assert config["b"] == 5
74 | assert config["c"] == 6
75 | assert config["d"] == "for diffusion"
76 | assert config["e"] == [1, 3]
77 |
78 | def test_save_load(self):
79 | obj = SampleObject()
80 | config = obj.config
81 |
82 | assert config["a"] == 2
83 | assert config["b"] == 5
84 | assert config["c"] == (2, 5)
85 | assert config["d"] == "for diffusion"
86 | assert config["e"] == [1, 3]
87 |
88 | with tempfile.TemporaryDirectory() as tmpdirname:
89 | obj.save_config(tmpdirname)
90 | new_obj = SampleObject.from_config(tmpdirname)
91 | new_config = new_obj.config
92 |
93 | # unfreeze configs
94 | config = dict(config)
95 | new_config = dict(new_config)
96 |
97 | assert config.pop("c") == (2, 5) # instantiated as tuple
98 | assert new_config.pop("c") == [2, 5] # saved & loaded as list because of json
99 | assert config == new_config
100 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/tests/test_models_vq.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2022 HuggingFace Inc.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import unittest
17 |
18 | import torch
19 |
20 | from diffusers import VQModel
21 | from diffusers.testing_utils import floats_tensor, torch_device
22 |
23 | from .test_modeling_common import ModelTesterMixin
24 |
25 |
26 | torch.backends.cuda.matmul.allow_tf32 = False
27 |
28 |
29 | class VQModelTests(ModelTesterMixin, unittest.TestCase):
30 | model_class = VQModel
31 |
32 | @property
33 | def dummy_input(self, sizes=(32, 32)):
34 | batch_size = 4
35 | num_channels = 3
36 |
37 | image = floats_tensor((batch_size, num_channels) + sizes).to(torch_device)
38 |
39 | return {"sample": image}
40 |
41 | @property
42 | def input_shape(self):
43 | return (3, 32, 32)
44 |
45 | @property
46 | def output_shape(self):
47 | return (3, 32, 32)
48 |
49 | def prepare_init_args_and_inputs_for_common(self):
50 | init_dict = {
51 | "block_out_channels": [32, 64],
52 | "in_channels": 3,
53 | "out_channels": 3,
54 | "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"],
55 | "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"],
56 | "latent_channels": 3,
57 | }
58 | inputs_dict = self.dummy_input
59 | return init_dict, inputs_dict
60 |
61 | def test_forward_signature(self):
62 | pass
63 |
64 | def test_training(self):
65 | pass
66 |
67 | def test_from_pretrained_hub(self):
68 | model, loading_info = VQModel.from_pretrained(
69 | "fusing/vqgan-dummy", output_loading_info=True
70 | )
71 | self.assertIsNotNone(model)
72 | self.assertEqual(len(loading_info["missing_keys"]), 0)
73 |
74 | model.to(torch_device)
75 | image = model(**self.dummy_input)
76 |
77 | assert image is not None, "Make sure output is not None"
78 |
79 | def test_output_pretrained(self):
80 | model = VQModel.from_pretrained("fusing/vqgan-dummy")
81 | model.to(torch_device).eval()
82 |
83 | torch.manual_seed(0)
84 | if torch.cuda.is_available():
85 | torch.cuda.manual_seed_all(0)
86 |
87 | image = torch.randn(
88 | 1,
89 | model.config.in_channels,
90 | model.config.sample_size,
91 | model.config.sample_size,
92 | )
93 | image = image.to(torch_device)
94 | with torch.no_grad():
95 | # Warmup pass when using mps (see #372)
96 | if torch_device == "mps":
97 | _ = model(image)
98 | output = model(image).sample
99 |
100 | output_slice = output[0, -1, -3:, -3:].flatten().cpu()
101 | # fmt: off
102 | expected_output_slice = torch.tensor([-0.0153, -0.4044, -0.1880, -0.5161, -0.2418, -0.4072, -0.1612, -0.0633, -0.0143])
103 | # fmt: on
104 | self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3))
105 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/utils/check_config_docstrings.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2022 The HuggingFace Inc. team.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import importlib
17 | import inspect
18 | import os
19 | import re
20 |
21 |
22 | # All paths are set with the intent you should run this script from the root of the repo with the command
23 | # python utils/check_config_docstrings.py
24 | PATH_TO_TRANSFORMERS = "src/transformers"
25 |
26 |
27 | # This is to make sure the transformers module imported is the one in the repo.
28 | spec = importlib.util.spec_from_file_location(
29 | "transformers",
30 | os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"),
31 | submodule_search_locations=[PATH_TO_TRANSFORMERS],
32 | )
33 | transformers = spec.loader.load_module()
34 |
35 | CONFIG_MAPPING = transformers.models.auto.configuration_auto.CONFIG_MAPPING
36 |
37 | # Regex pattern used to find the checkpoint mentioned in the docstring of `config_class`.
38 | # For example, `[bert-base-uncased](https://huggingface.co/bert-base-uncased)`
39 | _re_checkpoint = re.compile("\[(.+?)\]\((https://huggingface\.co/.+?)\)")
40 |
41 |
42 | CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK = {
43 | "CLIPConfigMixin",
44 | "DecisionTransformerConfigMixin",
45 | "EncoderDecoderConfigMixin",
46 | "RagConfigMixin",
47 | "SpeechEncoderDecoderConfigMixin",
48 | "VisionEncoderDecoderConfigMixin",
49 | "VisionTextDualEncoderConfigMixin",
50 | }
51 |
52 |
53 | def check_config_docstrings_have_checkpoints():
54 | configs_without_checkpoint = []
55 |
56 | for config_class in list(CONFIG_MAPPING.values()):
57 | checkpoint_found = False
58 |
59 | # source code of `config_class`
60 | config_source = inspect.getsource(config_class)
61 | checkpoints = _re_checkpoint.findall(config_source)
62 |
63 | for checkpoint in checkpoints:
64 | # Each `checkpoint` is a tuple of a checkpoint name and a checkpoint link.
65 | # For example, `('bert-base-uncased', 'https://huggingface.co/bert-base-uncased')`
66 | ckpt_name, ckpt_link = checkpoint
67 |
68 | # verify the checkpoint name corresponds to the checkpoint link
69 | ckpt_link_from_name = f"https://huggingface.co/{ckpt_name}"
70 | if ckpt_link == ckpt_link_from_name:
71 | checkpoint_found = True
72 | break
73 |
74 | name = config_class.__name__
75 | if (
76 | not checkpoint_found
77 | and name not in CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK
78 | ):
79 | configs_without_checkpoint.append(name)
80 |
81 | if len(configs_without_checkpoint) > 0:
82 | message = "\n".join(sorted(configs_without_checkpoint))
83 | raise ValueError(
84 | f"The following configurations don't contain any valid checkpoint:\n{message}"
85 | )
86 |
87 |
88 | if __name__ == "__main__":
89 | check_config_docstrings_have_checkpoints()
90 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/utils/print_env.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # coding=utf-8
4 | # Copyright 2022 The HuggingFace Inc. team.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | # this script dumps information about the environment
19 |
20 | import os
21 | import platform
22 | import sys
23 |
24 |
25 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
26 |
27 | print("Python version:", sys.version)
28 |
29 | print("OS platform:", platform.platform())
30 | print("OS architecture:", platform.machine())
31 |
32 | try:
33 | import torch
34 |
35 | print("Torch version:", torch.__version__)
36 | print("Cuda available:", torch.cuda.is_available())
37 | print("Cuda version:", torch.version.cuda)
38 | print("CuDNN version:", torch.backends.cudnn.version())
39 | print("Number of GPUs available:", torch.cuda.device_count())
40 | except ImportError:
41 | print("Torch version:", None)
42 |
43 | try:
44 | import transformers
45 |
46 | print("transformers version:", transformers.__version__)
47 | except ImportError:
48 | print("transformers version:", None)
49 |
--------------------------------------------------------------------------------
/FlashAttention/diffusers/utils/stale.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 The HuggingFace Team, the AllenNLP library authors. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """
15 | Script to close stale issue. Taken in part from the AllenNLP repository.
16 | https://github.com/allenai/allennlp.
17 | """
18 | import os
19 | from datetime import datetime as dt
20 |
21 | from github import Github
22 |
23 |
24 | LABELS_TO_EXEMPT = [
25 | "good first issue",
26 | "good second issue",
27 | "good difficult issue",
28 | "enhancement",
29 | "new pipeline/model",
30 | "new scheduler",
31 | "wip",
32 | ]
33 |
34 |
35 | def main():
36 | g = Github(os.environ["GITHUB_TOKEN"])
37 | repo = g.get_repo("huggingface/diffusers")
38 | open_issues = repo.get_issues(state="open")
39 |
40 | for issue in open_issues:
41 | comments = sorted(
42 | [comment for comment in issue.get_comments()],
43 | key=lambda i: i.created_at,
44 | reverse=True,
45 | )
46 | last_comment = comments[0] if len(comments) > 0 else None
47 | if (
48 | last_comment is not None
49 | and last_comment.user.login != "github-actions[bot]"
50 | and (dt.utcnow() - issue.updated_at).days > 23
51 | and (dt.utcnow() - issue.created_at).days >= 30
52 | and not any(
53 | label.name.lower() in LABELS_TO_EXEMPT for label in issue.get_labels()
54 | )
55 | ):
56 | issue.create_comment(
57 | "This issue has been automatically marked as stale because it has not had "
58 | "recent activity. If you think this still needs to be addressed "
59 | "please comment on this thread.\n\nPlease note that issues that do not follow the "
60 | "[contributing guidelines](https://github.com/huggingface/diffusers/blob/main/CONTRIBUTING.md) "
61 | "are likely to be ignored."
62 | )
63 | issue.edit(labels=["stale"])
64 |
65 |
66 | if __name__ == "__main__":
67 | main()
68 |
--------------------------------------------------------------------------------
/FlashAttention/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.21.2
2 | diffusers==0.3.0
3 | #torch==1.12.1+cu116
4 | scipy
5 | uvicorn
6 | pydantic
7 | fastapi
8 | huggingface_hub
--------------------------------------------------------------------------------
/FlashAttention/server.py:
--------------------------------------------------------------------------------
1 | from huggingface_hub import HfApi
2 | from huggingface_hub.commands.user import _login
3 |
4 | _login(HfApi(), token="")
5 | from fastapi import FastAPI
6 | from typing import List, Union
7 | from pydantic import BaseModel
8 | from diffusers import StableDiffusionPipeline
9 | import torch
10 | import io
11 | from fastapi import Response
12 |
13 | torch_device = torch.device("cuda:0")
14 |
15 |
16 | class Item(BaseModel):
17 | prompt: Union[str, List[str]]
18 | img_height: int = 512
19 | img_width: int = 512
20 | num_inference_steps: int = 50
21 | guidance_scale: float = 7.5
22 |
23 |
24 | app = FastAPI()
25 | pipe = StableDiffusionPipeline.from_pretrained(
26 | "CompVis/stable-diffusion-v1-4",
27 | revision="fp16",
28 | torch_dtype=torch.float16,
29 | use_auth_token=True,
30 | ).to("cuda")
31 |
32 |
33 | @app.post("/predict/")
34 | async def predict(input_api: Item):
35 | with torch.inference_mode(), torch.autocast("cuda"):
36 | images = pipe(input_api.prompt)
37 | im = images.images[0]
38 |
39 | # save image to an in-memory bytes buffer
40 | with io.BytesIO() as buf:
41 | im.save(buf, format="PNG")
42 | im_bytes = buf.getvalue()
43 | headers = {"Content-Disposition": 'inline; filename="test.png"'}
44 | return Response(im_bytes, headers=headers, media_type="image/png")
45 |
--------------------------------------------------------------------------------
/ONNX/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.9-slim
2 |
3 | WORKDIR /app
4 |
5 | ARG model_dir_path
6 | ARG onnx_execution_provider=CUDAExecutionProvider
7 |
8 | ENV ONNX_EXECUTION_PROVIDER=$onnx_execution_provider
9 |
10 | WORKDIR /app
11 | COPY requirements.txt requirements.txt
12 | RUN python -m pip install --upgrade pip && \
13 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html && \
14 | pip install -r /app/requirements.txt
15 | COPY $model_dir_path /app/model
16 | COPY server.py model.py ./
17 | EXPOSE 5000
18 | CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5000", "--workers", "1"]
19 |
20 | # Build Docker image example
21 | # sudo docker build --build-arg model_dir_path=stable_diffusion_onnx_model --build-arg onnx_execution_provider=CUDAExecutionProvider -f Dockerfile -t stable_diffusion_onnx_img .
22 |
23 | # Run Docker image example
24 | # sudo docker run --gpus all -p 5000:5000 stable_diffusion_onnx_img
25 |
--------------------------------------------------------------------------------
/ONNX/README.md:
--------------------------------------------------------------------------------
1 | # ONNX Stable Diffusion Example
2 |
3 | ## 1. Requirements
4 | The Stable Diffusion model will be downloaded from the Hugging Face Hub. That's why before running any of the scripts (`demo.py` or `server.py`) you will have to login in the Hugging Face Hub using the following command:
5 |
6 | ```
7 | huggingface-cli login
8 | ```
9 |
10 | If not, you can download the same model from the following path: `https://downloads.stochastic.ai/stable-diffusion/onnx_model.zip`
11 |
12 | ### 1.1. Docker execution
13 | [Install Docker](https://docs.docker.com/engine/install/)
14 |
15 |
16 | ### 1.2. Python execution
17 | [Install Python](https://www.python.org/downloads/) and the required libraries:
18 | ```
19 | pip install -r requirements.txt
20 | ```
21 |
22 | ## 2. REST API
23 |
24 | ### 2.1. Docker execution
25 |
26 | 1. Build the Docker image
27 | ```
28 | docker build --build-arg model_dir_path=/path/to/stable_diffusion/model -f Dockerfile -t stable_diffusion_img .
29 | ```
30 |
31 | 2. Execute the Docker Container
32 | ```
33 | sudo docker run --gpus all -p 5000:5000 stable_diffusion_img
34 | ```
35 |
36 | ### 2.2. Python execution
37 |
38 | To deploy the Stable Diffusion model as an API, execute the following command:
39 | ```
40 | uvicorn server:app --host 0.0.0.0 --port 5000
41 | ```
42 |
43 | ## 3. Demo App
44 |
45 | To generate images as a command line tool, execute the following command:
46 | ```
47 | python demo.py --prompt "an astronaut riding a horse"
48 | ```
49 |
50 | Check all the options of the command line tool with `python demo.py --help`
51 |
--------------------------------------------------------------------------------
/ONNX/model.py:
--------------------------------------------------------------------------------
1 | from diffusers import StableDiffusionOnnxPipeline
2 | import torch
3 | from typing import List, Union
4 | import time
5 | from PIL import Image
6 |
7 |
8 | def load_model(
9 | model_name_or_path="CompVis/stable-diffusion-v1-4", provider="CUDAExecutionProvider"
10 | ) -> StableDiffusionOnnxPipeline:
11 | """Loads the model
12 |
13 | :param model_name_or_path: model name or path, defaults to "CompVis/stable-diffusion-v1-4"
14 | :param provider: execution provider - Onnx Runtime, defaults to "CUDAExecutionProvider"
15 | :return: the model
16 | """
17 |
18 | pipe = StableDiffusionOnnxPipeline.from_pretrained(
19 | model_name_or_path,
20 | revision="onnx",
21 | provider=provider,
22 | use_auth_token=True,
23 | )
24 |
25 | return pipe
26 |
27 |
28 | def inference(
29 | model: StableDiffusionOnnxPipeline,
30 | prompt: Union[str, List[str]],
31 | img_height: int = 512,
32 | img_width: int = 512,
33 | num_inference_steps: int = 50,
34 | guidance_scale: float = 7.5,
35 | num_images_per_prompt: int = 1,
36 | seed: int = None,
37 | return_time=False,
38 | ) -> Image:
39 | """Function to start generating images
40 |
41 | :param model: model
42 | :param prompt: prompt
43 | :param img_height: image height, defaults to 512
44 | :param img_width: image width, defaults to 512
45 | :param num_inference_steps: number of inference steps, defaults to 50
46 | :param guidance_scale: guidance scale, defaults to 7.5
47 | :param num_images_per_prompt: number of images per prompt, defaults to 1
48 | :param seed: seed, defaults to None
49 | :param return_time: if the time to generate should be returned, defaults to False
50 | :return: the generated images and the time if return_time is True
51 | """
52 | generator = None
53 | if seed is not None:
54 | generator = torch.Generator(device="cuda")
55 | generator = generator.manual_seed(seed)
56 |
57 | start_time = time.time()
58 | output = model(
59 | prompt=prompt,
60 | height=img_height,
61 | width=img_width,
62 | num_inference_steps=num_inference_steps,
63 | guidance_scale=guidance_scale,
64 | num_images_per_prompt=num_images_per_prompt,
65 | generator=generator,
66 | )
67 | end_time = time.time()
68 |
69 | if return_time:
70 | return output.images, end_time - start_time
71 |
72 | return output.images
73 |
--------------------------------------------------------------------------------
/ONNX/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.21.2
2 | diffusers==0.3.0
3 | torch==1.12.1+cu116
4 | fastapi==0.85.0
5 | uvicorn[standard]==0.18.3
6 | onnxruntime-gpu==1.12.1
7 | numpy==1.23.4
--------------------------------------------------------------------------------
/ONNX/server.py:
--------------------------------------------------------------------------------
1 | from fastapi import FastAPI
2 | from model import load_model, inference
3 | from pydantic import BaseModel
4 | from typing import Union, List
5 | import torch
6 | import numpy as np
7 | import os
8 | from typing import Dict, Union
9 | from PIL import Image
10 |
11 |
12 | class Item(BaseModel):
13 | prompt: Union[str, List[str]]
14 | img_height: int = 512
15 | img_width: int = 512
16 | num_inference_steps: int = 50
17 | guidance_scale: float = 7.5
18 | num_images_per_prompt: int = 1
19 | seed: int = None
20 |
21 |
22 | exeuction_provider = os.getenv("ONNX_EXECUTION_PROVIDER")
23 |
24 | if exeuction_provider is None and torch.cuda.is_available():
25 | print("[+] Moving the model to the GPU")
26 | exeuction_provider = "CUDAExecutionProvider"
27 | elif exeuction_provider is None:
28 | print("[+] Your model will be executed in CPU. The execution might be very slow.")
29 | exeuction_provider = "CPUExecutionProvider"
30 |
31 |
32 | app = FastAPI()
33 | print("[+] Loading model")
34 |
35 | model = load_model(
36 | model_name_or_path="CompVis/stable-diffusion-v1-4"
37 | if os.getenv("MODEL_DIR_PATH") is None
38 | else os.getenv("MODEL_DIR_PATH"),
39 | provider=exeuction_provider,
40 | )
41 | print("[+] Model loaded")
42 |
43 |
44 | @app.post("/predict/")
45 | async def predict(input_api: Item) -> Dict:
46 | """POST method that received the prompts
47 |
48 | :param input_api: input
49 | :return: the images and the time to generate the images
50 | """
51 | model_input = {**input_api.dict(), **{"return_time": True}}
52 |
53 | images, time = inference(model=model, **model_input)
54 |
55 | images = np.array([np.array(img) for img in images]).tolist()
56 |
57 | return {"images": images, "generation_time_in_secs": time}
58 |
--------------------------------------------------------------------------------
/PyTorch/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.9-slim
2 |
3 | # Download the HuggingFace model in your local machine and specify the directory path
4 | ARG model_dir_path
5 |
6 | WORKDIR /code
7 | ENV MODEL_DIR_PATH=/code/model
8 | COPY requirements.txt requirements.txt
9 | RUN python -m pip install --upgrade pip && \
10 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html && \
11 | pip install -r /code/requirements.txt
12 | COPY $model_dir_path /code/model
13 | COPY server.py model.py ./
14 | EXPOSE 5000
15 | CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5000", "--workers", "1"]
16 |
17 | # Build Docker image example
18 | # sudo docker build --build-arg model_dir_path=stable_diffusion_torch_model -f Dockerfile -t stable_diffusion_img .
19 |
20 | # Run Docker image example
21 | # sudo docker run --gpus all -p 5000:5000 stable_diffusion_img
--------------------------------------------------------------------------------
/PyTorch/README.md:
--------------------------------------------------------------------------------
1 | # PyTorch FP16 Stable Diffusion Example
2 |
3 | ## 1. Requirements
4 | The Stable Diffusion model will be downloaded from the Hugging Face Hub. That's why before running any of the scripts (`demo.py` or `server.py`) you will have to login in the Hugging Face Hub using the following command:
5 |
6 | ```
7 | huggingface-cli login
8 | ```
9 |
10 | If not, you can download the same model from the following S3 path: `https://downloads.stochastic.ai/stable-diffusion/pytorch_model.zip`
11 |
12 | ### 1.1. Docker execution
13 | [Install Docker](https://docs.docker.com/engine/install/)
14 |
15 |
16 | ### 1.2. Python execution
17 | [Install Python](https://www.python.org/downloads/) and the required libraries:
18 | ```
19 | pip install -r requirements.txt
20 | ```
21 |
22 | ## 2. REST API
23 |
24 | ### 2.1. Docker execution
25 |
26 | 1. Build the Docker image
27 | ```
28 | docker build --build-arg model_dir_path=/path/to/stable_diffusion/model -f Dockerfile -t stable_diffusion_img .
29 | ```
30 |
31 | 2. Execute the Docker Container
32 | ```bash
33 | sudo docker run --gpus all -p 5000:5000 stable_diffusion_img
34 | ```
35 |
36 | ### 2.2. Python execution
37 |
38 | To deploy the Stable Diffusion model as an API, execute the following command:
39 | ```
40 | uvicorn server:app --host 0.0.0.0 --port 5000
41 | ```
42 |
43 | ## 3. Demo App
44 |
45 | To generate images as a command line tool, execute the following command:
46 | ```bash
47 | python demo.py --prompt "an astronaut riding a horse"
48 | ```
49 |
50 | Check all the options of the command line tool with `python demo.py --help`
--------------------------------------------------------------------------------
/PyTorch/demo.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from model import load_model, inference
3 | from pathlib import Path
4 | import uuid
5 |
6 |
7 | def get_args():
8 | """Configure argparser
9 |
10 | :return: arguments
11 | """
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument(
14 | "--prompt",
15 | default="Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci",
16 | help="input prompt",
17 | )
18 | parser.add_argument(
19 | "--img_height",
20 | type=int,
21 | default=512,
22 | help="The height in pixels of the generated image.",
23 | )
24 | parser.add_argument(
25 | "--img_width",
26 | type=int,
27 | default=512,
28 | help="The width in pixels of the generated image.",
29 | )
30 | parser.add_argument(
31 | "--num_inference_steps",
32 | type=int,
33 | default=50,
34 | help="The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference",
35 | )
36 | parser.add_argument(
37 | "--guidance_scale", type=float, default=7.5, help="Guidance scale"
38 | )
39 | parser.add_argument(
40 | "--num_images_per_prompt",
41 | type=int,
42 | default=1,
43 | help="The number of images to generate per prompt.",
44 | )
45 | parser.add_argument(
46 | "--seed", type=int, default=None, help="Seed to make generation deterministic"
47 | )
48 | parser.add_argument(
49 | "--saving_path",
50 | type=str,
51 | default="generated_images",
52 | help="Directory where the generated images will be saved",
53 | )
54 |
55 | return parser.parse_args()
56 |
57 |
58 | if __name__ == "__main__":
59 | args = get_args()
60 |
61 | # Create directory to save images if it does not exist
62 | saving_path = Path(args.saving_path)
63 | if not saving_path.exists():
64 | saving_path.mkdir(exist_ok=True, parents=True)
65 |
66 | print("[+] Loading the model")
67 | model = load_model()
68 | print("[+] Model loaded")
69 |
70 | print("[+] Generating images...")
71 | # PIL images
72 | images, time = inference(
73 | model=model,
74 | prompt=args.prompt,
75 | img_height=args.img_height,
76 | img_width=args.img_width,
77 | num_inference_steps=args.num_inference_steps,
78 | guidance_scale=args.guidance_scale,
79 | num_images_per_prompt=args.num_images_per_prompt,
80 | seed=args.seed,
81 | return_time=True,
82 | )
83 |
84 | print("[+] Time needed to generate the images: {} seconds".format(time))
85 |
86 | # Save PIL images with a random name
87 | for img in images:
88 | img.save("{}/{}.png".format(saving_path.as_posix(), uuid.uuid4()))
89 |
90 | print("[+] Images saved in the following path: {}".format(saving_path.as_posix()))
91 |
--------------------------------------------------------------------------------
/PyTorch/model.py:
--------------------------------------------------------------------------------
1 | from diffusers import StableDiffusionPipeline
2 | import torch
3 | from typing import List, Union
4 | import time
5 |
6 |
7 | def load_model(
8 | model_name_or_path="stabilityai/stable-diffusion-2-1",
9 | ) -> StableDiffusionPipeline:
10 | """Load model
11 |
12 | :param model_name_or_path: model name (downloaded from HF Hub) or model path (local), defaults to "CompVis/stable-diffusion-v1-4"
13 | :return: the Stable Diffusion pipeline
14 | """
15 | pipe = StableDiffusionPipeline.from_pretrained(
16 | model_name_or_path,
17 | # revision="fp16",
18 | torch_dtype=torch.float16,
19 | # use_auth_token=True,
20 | )
21 | pipe = pipe.to("cuda")
22 |
23 | return pipe
24 |
25 |
26 | def inference(
27 | model: StableDiffusionPipeline,
28 | prompt: Union[str, List[str]],
29 | img_height: int = 512,
30 | img_width: int = 512,
31 | num_inference_steps: int = 50,
32 | guidance_scale: float = 7.5,
33 | num_images_per_prompt: int = 1,
34 | seed: int = None,
35 | return_time=False,
36 | ):
37 | """Do inference
38 |
39 | :param model: the Stable Diffusion pipeline
40 | :param prompt: the prompt
41 | :param img_height: height of the generated image, defaults to 512
42 | :param img_width: width of the generated image, defaults to 512
43 | :param num_inference_steps: the number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference, defaults to 50
44 | :param guidance_scale: guidance scale, defaults to 7.5
45 | :param num_images_per_prompt: the number of images to generate per prompt, defaults to 1
46 | :param seed: Seed to make generation deterministic, defaults to None
47 | :param return_time: specify if time taken to generate the images should be returned, defaults to False
48 | :return: the output images and the time (if return time is True)
49 | """
50 | generator = None
51 | if seed is not None:
52 | generator = torch.Generator(device="cuda")
53 | generator = generator.manual_seed(seed)
54 |
55 | start_time = time.time()
56 | with torch.autocast("cuda"):
57 | output = model(
58 | prompt=prompt,
59 | height=img_height,
60 | width=img_width,
61 | num_inference_steps=num_inference_steps,
62 | guidance_scale=guidance_scale,
63 | num_images_per_prompt=num_images_per_prompt,
64 | generator=generator,
65 | )
66 | end_time = time.time()
67 |
68 | if return_time:
69 | return output.images, end_time - start_time
70 |
71 | return output.images
72 |
--------------------------------------------------------------------------------
/PyTorch/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.30.1
2 | diffusers==0.3.0
3 | torch==2.0.1+cu117
4 | fastapi==0.85.0
5 | uvicorn[standard]==0.18.3
6 | accelerate==0.20.3
7 | safetensors==0.3.1
8 | scipy==1.10.1
9 | torchvision==0.15.2+cu117
--------------------------------------------------------------------------------
/PyTorch/server.py:
--------------------------------------------------------------------------------
1 | import os
2 | from fastapi import FastAPI
3 | from model import load_model, inference
4 | from pydantic import BaseModel
5 | from typing import Union, List
6 | import torch
7 | import numpy as np
8 |
9 |
10 | class Item(BaseModel):
11 | prompt: Union[str, List[str]]
12 | img_height: int = 512
13 | img_width: int = 512
14 | num_inference_steps: int = 50
15 | guidance_scale: float = 7.5
16 | num_images_per_prompt: int = 1
17 | seed: int = None
18 |
19 |
20 | app = FastAPI()
21 | print("[+] Loading model")
22 | model = load_model(
23 | model_name_or_path="CompVis/stable-diffusion-v1-4"
24 | if os.getenv("MODEL_DIR_PATH") is None
25 | else os.getenv("MODEL_DIR_PATH")
26 | )
27 | print("[+] Model loaded")
28 |
29 | if torch.cuda.is_available():
30 | print("[+] Moving the model to the GPU")
31 | model = model.to("cuda")
32 | else:
33 | print("[+] Your model will be executed in CPU. The execution might be very slow.")
34 |
35 |
36 | @app.post("/predict/")
37 | async def predict(input_api: Item):
38 | model_input = {**input_api.dict(), **{"return_time": True}}
39 |
40 | images, time = inference(model=model, **model_input)
41 |
42 | images = np.array([np.array(img) for img in images]).tolist()
43 |
44 | return {"images": images, "generation_time_in_secs": time}
45 |
--------------------------------------------------------------------------------
/TensorRT/Dockerfile:
--------------------------------------------------------------------------------
1 |
2 | FROM nvidia/cuda:11.6.0-devel-ubuntu20.04
3 |
4 | RUN apt-get update && apt-get install --no-install-recommends -y curl && apt-get -y install git
5 |
6 | ENV CONDA_AUTO_UPDATE_CONDA=false \
7 | PATH=/opt/miniconda/bin:$PATH
8 | RUN curl -sLo ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh \
9 | && chmod +x ~/miniconda.sh \
10 | && ~/miniconda.sh -b -p /opt/miniconda \
11 | && rm ~/miniconda.sh \
12 | && sed -i "$ a PATH=/opt/miniconda/bin:\$PATH" /etc/environment
13 |
14 | RUN python3 -m pip --no-cache-dir install --upgrade pip
15 |
16 | WORKDIR /code
17 |
18 | RUN pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 -f https://download.pytorch.org/whl/torch_stable.html
19 |
20 | COPY requirements.txt /code/requirements.txt
21 |
22 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
23 |
24 | RUN apt-get update && apt-get -y install wget
25 |
26 | RUN wget https://developer.download.nvidia.com/compute/redist/nvidia-tensorrt/nvidia_tensorrt-8.4.2.4-cp39-none-linux_x86_64.whl
27 |
28 | RUN pip install nvidia_tensorrt-8.4.2.4-cp39-none-linux_x86_64.whl
29 |
30 | COPY . /code/
31 |
32 | EXPOSE 5000
33 |
34 | CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5000", "--workers", "1"]
--------------------------------------------------------------------------------
/TensorRT/README.md:
--------------------------------------------------------------------------------
1 | ## TensorRT Stable Diffusion Example
2 |
3 | ### Build Dependencies
4 |
5 | Install TensorRT 8.4.2.2.4
6 |
7 | ```
8 | wget https://developer.download.nvidia.com/compute/redist/nvidia-tensorrt/nvidia_tensorrt-8.4.2.4-cp39-none-linux_x86_64.whl
9 | pip install nvidia_tensorrt-8.4.2.4-cp39-none-linux_x86_64.whl
10 | ```
11 |
12 | Install libraries
13 |
14 | ```
15 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html
16 | pip install -r requirements.txt
17 | ```
18 |
19 | Verify the library versions. We have tested transformers 4.22, diffusers 0.3 and torch 1.12.
20 |
21 | ### Convert Unet Onnx model to TensorRT model
22 |
23 | You need to download Unet onnx model before converting. You can download from [HuggingFace hub](https://huggingface.co/kamalkraj/stable-diffusion-v1-4-onnx/resolve/main/models.tar.gz). Extract tar file and Unet onnx model is stored in `./models/unet/unet.onnx`.
24 |
25 | You also need to register in HuggingFace hub. Get your access token from [Hugging Face account settings](https://huggingface.co/settings/tokens). Then login using `huggingface-cli login` command.
26 |
27 | ```
28 | python3 convert_unet_to_tensorrt.py
29 | ```
30 |
31 | Unet TensorRT model is store in `./unet.engine`
32 |
33 | ### Benchmark
34 |
35 | ```
36 | python3 demo.py --benchmark
37 | ```
38 |
39 | ### Deploy as rest-api end-point
40 |
41 | You need provide the HuggingFace token in file `server.py`.
42 |
43 | ```
44 | docker build -t tensorrt_diffusion .
45 | docker run -p 5000:5000 -ti --gpus=all tensorrt_diffusion
46 | ```
47 |
48 | ### Test API
49 |
50 | ```
51 | python3 client.py
52 | ```
53 |
54 | Check the resulted image: `output_api.png`
55 |
--------------------------------------------------------------------------------
/TensorRT/client.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import time
4 |
5 | if __name__ == "__main__":
6 | text = "The Easter bunny riding a motorcycle in New York City"
7 | t0 = time.time()
8 | for i in range(50):
9 | print("Iteration: ", i)
10 | out = requests.post(
11 | "http://localhost:5000/predict/", data=json.dumps({"prompt": [text]})
12 | )
13 | t1 = time.time()
14 | print("Inference time is: ", (t1 - t0) / 50)
15 | with open("output_api.png", "wb") as f:
16 | f.write(out.content)
17 |
--------------------------------------------------------------------------------
/TensorRT/convert_unet_to_tensorrt.py:
--------------------------------------------------------------------------------
1 | import tensorrt as trt
2 | import os, sys, argparse
3 | import numpy as np
4 | import pycuda.driver as cuda
5 | import pycuda.autoinit # without this, "LogicError: explicit_context_dependent failed: invalid device context - no currently active context?"
6 | from time import time
7 |
8 |
9 | def get_args():
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument(
12 | "--onnx_unet_path",
13 | default="./models/unet/1/unet.onnx",
14 | type=str,
15 | help="Onnx unet model path",
16 | )
17 | parser.add_argument(
18 | "--save_path", default="unet.engine", type=str, help="TensorRT saved path"
19 | )
20 | parser.add_argument("--batch_size", default=1, type=int, help="batch size")
21 | parser.add_argument(
22 | "--img_size", default=(512, 512), help="Unet input image size (h,w)"
23 | )
24 | parser.add_argument(
25 | "--max_seq_length", default=64, help="Maximum sequence length of input text"
26 | )
27 |
28 | return parser.parse_args()
29 |
30 |
31 | def convert(args):
32 | TRT_LOGGER = trt.Logger(trt.Logger.INFO)
33 | TRT_BUILDER = trt.Builder(TRT_LOGGER)
34 | network = TRT_BUILDER.create_network(
35 | 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
36 | )
37 | onnx_parser = trt.OnnxParser(network, TRT_LOGGER)
38 | parse_success = onnx_parser.parse_from_file(args.onnx_unet_path)
39 | for idx in range(onnx_parser.num_errors):
40 | print(onnx_parser.get_error(idx))
41 | if not parse_success:
42 | sys.exit("ONNX model parsing failed")
43 | config = TRT_BUILDER.create_builder_config()
44 | profile = TRT_BUILDER.create_optimization_profile()
45 |
46 | latents_shape = (
47 | args.batch_size * 2,
48 | 4,
49 | args.img_size[0] // 8,
50 | args.img_size[1] // 8,
51 | )
52 | embed_shape = (args.batch_size * 2, args.max_seq_length, 768)
53 | timestep_shape = (args.batch_size,)
54 |
55 | profile.set_shape("sample", latents_shape, latents_shape, latents_shape)
56 | profile.set_shape("encoder_hidden_states", embed_shape, embed_shape, embed_shape)
57 | profile.set_shape("timestep", timestep_shape, timestep_shape, timestep_shape)
58 | config.add_optimization_profile(profile)
59 |
60 | # config.max_workspace_size = 4096 * (1 << 20)
61 | config.set_flag(trt.BuilderFlag.FP16)
62 | serialized_engine = TRT_BUILDER.build_serialized_network(network, config)
63 |
64 | ## save TRT engine
65 | with open(args.save_path, "wb") as f:
66 | f.write(serialized_engine)
67 | print(f"Engine is saved to {args.save_path}")
68 |
69 |
70 | if __name__ == "__main__":
71 | args = get_args()
72 | convert(args)
73 |
--------------------------------------------------------------------------------
/TensorRT/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.21.2
2 | diffusers==0.3.0
3 | #torch==1.12.1+cu116
4 | scipy
5 | uvicorn
6 | pydantic
7 | fastapi
8 | pycuda
9 | huggingface_hub
--------------------------------------------------------------------------------
/generated_images/AITemplate/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/0.png
--------------------------------------------------------------------------------
/generated_images/AITemplate/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/1.png
--------------------------------------------------------------------------------
/generated_images/AITemplate/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/2.png
--------------------------------------------------------------------------------
/generated_images/AITemplate/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/3.png
--------------------------------------------------------------------------------
/generated_images/AITemplate/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/4.png
--------------------------------------------------------------------------------
/generated_images/AITemplate/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/5.png
--------------------------------------------------------------------------------
/generated_images/AITemplate/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/6.png
--------------------------------------------------------------------------------
/generated_images/AITemplate/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/7.png
--------------------------------------------------------------------------------
/generated_images/AITemplate/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/8.png
--------------------------------------------------------------------------------
/generated_images/AITemplate/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/9.png
--------------------------------------------------------------------------------
/generated_images/AITemplate/README.md:
--------------------------------------------------------------------------------
1 | # Stable Diffusion images with AITemplate
2 |
3 | | Prompt | Generated image |
4 | | --- | ---
5 | | Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci | 
6 | | The Easter bunny riding a motorcycle in New York City | 
7 | | Lecco in the winter in the year 2055 | 
8 | | photorealistic orange 1935 ford in ancient Qgypt pyramid of Giza in background | 
9 | | the boulevards are crowded today | 
10 | | A photo of cat riding on a bicycle | 
11 | | Bird-eye view of a highway in Los Angeles | 
12 | | A beautiful sunrise on mars. High-definition. | 
13 | | A panda bear driving a car | 
14 | | Drone flythrough of a tropical jungle convered in snow | 
--------------------------------------------------------------------------------
/generated_images/FlashAttention/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/0.png
--------------------------------------------------------------------------------
/generated_images/FlashAttention/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/1.png
--------------------------------------------------------------------------------
/generated_images/FlashAttention/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/2.png
--------------------------------------------------------------------------------
/generated_images/FlashAttention/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/3.png
--------------------------------------------------------------------------------
/generated_images/FlashAttention/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/4.png
--------------------------------------------------------------------------------
/generated_images/FlashAttention/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/5.png
--------------------------------------------------------------------------------
/generated_images/FlashAttention/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/6.png
--------------------------------------------------------------------------------
/generated_images/FlashAttention/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/7.png
--------------------------------------------------------------------------------
/generated_images/FlashAttention/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/8.png
--------------------------------------------------------------------------------
/generated_images/FlashAttention/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/9.png
--------------------------------------------------------------------------------
/generated_images/FlashAttention/README.md:
--------------------------------------------------------------------------------
1 | # Stable Diffusion images with FlashAttention
2 |
3 | | Prompt | Generated image |
4 | | --- | ---
5 | | Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci | 
6 | | The Easter bunny riding a motorcycle in New York City | 
7 | | Lecco in the winter in the year 2055 | 
8 | | photorealistic orange 1935 ford in ancient Qgypt pyramid of Giza in background | 
9 | | the boulevards are crowded today | 
10 | | TA photo of cat riding on a bicycle | 
11 | | Bird-eye view of a highway in Los Angeles | 
12 | | A beautiful sunrise on mars. High-definition. | 
13 | | A panda bear driving a car | 
14 | | Drone flythrough of a tropical jungle convered in snow | 
--------------------------------------------------------------------------------
/generated_images/PyTorch/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/0.png
--------------------------------------------------------------------------------
/generated_images/PyTorch/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/1.png
--------------------------------------------------------------------------------
/generated_images/PyTorch/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/2.png
--------------------------------------------------------------------------------
/generated_images/PyTorch/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/3.png
--------------------------------------------------------------------------------
/generated_images/PyTorch/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/4.png
--------------------------------------------------------------------------------
/generated_images/PyTorch/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/5.png
--------------------------------------------------------------------------------
/generated_images/PyTorch/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/6.png
--------------------------------------------------------------------------------
/generated_images/PyTorch/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/7.png
--------------------------------------------------------------------------------
/generated_images/PyTorch/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/8.png
--------------------------------------------------------------------------------
/generated_images/PyTorch/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/9.png
--------------------------------------------------------------------------------
/generated_images/PyTorch/README.md:
--------------------------------------------------------------------------------
1 | # Stable Diffusion images with PyTorch
2 |
3 | | Prompt | Generated image |
4 | | --- | ---
5 | | Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci | 
6 | | The Easter bunny riding a motorcycle in New York City | 
7 | | Lecco in the winter in the year 2055 | 
8 | | photorealistic orange 1935 ford in ancient Qgypt pyramid of Giza in background | 
9 | | the boulevards are crowded today | 
10 | | TA photo of cat riding on a bicycle | 
11 | | Bird-eye view of a highway in Los Angeles | 
12 | | A beautiful sunrise on mars. High-definition. | 
13 | | A panda bear driving a car | 
14 | | Drone flythrough of a tropical jungle convered in snow | 
--------------------------------------------------------------------------------
/generated_images/README.md:
--------------------------------------------------------------------------------
1 | # Preview of generated images
2 | Generated images are categorized in directories. You can preview all generated images by going to the README.md of any directory.
3 |
4 | - [AITemplate](./AITemplate/README.md)
5 | - [FlashAttention](./FlashAttention/README.md)
6 | - [nvFuser](./nvFuser/README.md)
7 | - [PyTorch](./PyTorch/README.md)
8 | - [TensorRT](./TensorRT/README.md)
--------------------------------------------------------------------------------
/generated_images/TensorRT/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/0.png
--------------------------------------------------------------------------------
/generated_images/TensorRT/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/1.png
--------------------------------------------------------------------------------
/generated_images/TensorRT/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/2.png
--------------------------------------------------------------------------------
/generated_images/TensorRT/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/3.png
--------------------------------------------------------------------------------
/generated_images/TensorRT/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/4.png
--------------------------------------------------------------------------------
/generated_images/TensorRT/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/5.png
--------------------------------------------------------------------------------
/generated_images/TensorRT/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/6.png
--------------------------------------------------------------------------------
/generated_images/TensorRT/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/7.png
--------------------------------------------------------------------------------
/generated_images/TensorRT/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/8.png
--------------------------------------------------------------------------------
/generated_images/TensorRT/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/9.png
--------------------------------------------------------------------------------
/generated_images/TensorRT/README.md:
--------------------------------------------------------------------------------
1 | # Stable Diffusion images with TensorRT
2 |
3 | | Prompt | Generated image |
4 | | --- | ---
5 | | Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci | 
6 | | The Easter bunny riding a motorcycle in New York City | 
7 | | Lecco in the winter in the year 2055 | 
8 | | photorealistic orange 1935 ford in ancient Qgypt pyramid of Giza in background | 
9 | | the boulevards are crowded today | 
10 | | TA photo of cat riding on a bicycle | 
11 | | Bird-eye view of a highway in Los Angeles | 
12 | | A beautiful sunrise on mars. High-definition. | 
13 | | A panda bear driving a car | 
14 | | Drone flythrough of a tropical jungle convered in snow | 
--------------------------------------------------------------------------------
/generated_images/nvFuser/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/0.png
--------------------------------------------------------------------------------
/generated_images/nvFuser/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/1.png
--------------------------------------------------------------------------------
/generated_images/nvFuser/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/2.png
--------------------------------------------------------------------------------
/generated_images/nvFuser/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/3.png
--------------------------------------------------------------------------------
/generated_images/nvFuser/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/4.png
--------------------------------------------------------------------------------
/generated_images/nvFuser/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/5.png
--------------------------------------------------------------------------------
/generated_images/nvFuser/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/6.png
--------------------------------------------------------------------------------
/generated_images/nvFuser/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/7.png
--------------------------------------------------------------------------------
/generated_images/nvFuser/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/8.png
--------------------------------------------------------------------------------
/generated_images/nvFuser/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/9.png
--------------------------------------------------------------------------------
/generated_images/nvFuser/README.md:
--------------------------------------------------------------------------------
1 | # Stable Diffusion images with NvFuser
2 |
3 | | Prompt | Generated image |
4 | | --- | ---
5 | | Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci | 
6 | | The Easter bunny riding a motorcycle in New York City | 
7 | | Lecco in the winter in the year 2055 | 
8 | | photorealistic orange 1935 ford in ancient Qgypt pyramid of Giza in background | 
9 | | the boulevards are crowded today | 
10 | | TA photo of cat riding on a bicycle | 
11 | | Bird-eye view of a highway in Los Angeles | 
12 | | A beautiful sunrise on mars. High-definition. | 
13 | | A panda bear driving a car | 
14 | | Drone flythrough of a tropical jungle convered in snow | 
--------------------------------------------------------------------------------
/graphs/A100_GPU_batch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/graphs/A100_GPU_batch.png
--------------------------------------------------------------------------------
/graphs/A100_GPU_latency.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/graphs/A100_GPU_latency.png
--------------------------------------------------------------------------------
/graphs/T4_GPU_latency.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/graphs/T4_GPU_latency.png
--------------------------------------------------------------------------------
/nvFuser/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.9-slim
2 |
3 | WORKDIR /code
4 |
5 | RUN pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html
6 |
7 | COPY requirements.txt /code/requirements.txt
8 |
9 | RUN apt-get update && apt-get -y install curl && apt -y install git
10 |
11 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12 |
13 | COPY . /code/
14 |
15 | EXPOSE 5000
16 |
17 | CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5000", "--workers", "1"]
--------------------------------------------------------------------------------
/nvFuser/README.md:
--------------------------------------------------------------------------------
1 | ## Nvfuser fp16 Stable Diffusion Example
2 |
3 | ### Build Dependencies
4 |
5 | Install libraries
6 |
7 | ```
8 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html
9 | pip install -r requirements.txt
10 | ```
11 |
12 | ### Convert Unet model to Nvfuser torchscript fp16
13 |
14 | You also need to register in HuggingFace hub. Get your access token from [Hugging Face account settings](https://huggingface.co/settings/tokens). Then login using `huggingface-cli login` command.
15 |
16 | ```
17 | python3 convert_unet_to_tensorrt.py
18 | ```
19 |
20 | Unet Nvfuser fp16 model is store in `./unet_jit.pt`
21 |
22 | ### Benchmark
23 |
24 | ```
25 | python3 demo.py --benchmark
26 | ```
27 |
28 | ### Deploy as rest-api end-point
29 |
30 | You need provide the HuggingFace token in file `server.py`.
31 |
32 | ```
33 | docker build -t nvfuser_diffusion .
34 | docker run -p 5000:5000 -ti --gpus=all nvfuser_diffusion
35 | ```
36 |
37 | ### Test API
38 |
39 | ```
40 | python3 client.py
41 | ```
42 |
43 | Check the resulted image: `output_api.png`
--------------------------------------------------------------------------------
/nvFuser/client.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import time
4 |
5 | if __name__ == "__main__":
6 | text = "The Easter bunny riding a motorcycle in New York City"
7 | t0 = time.time()
8 | for i in range(50):
9 | print("Iteration: ", i)
10 | out = requests.post(
11 | "http://localhost:5000/predict/", data=json.dumps({"prompt": [text]})
12 | )
13 | t1 = time.time()
14 | print("Inference time is: ", (t1 - t0) / 50)
15 | with open("output_api.png", "wb") as f:
16 | f.write(out.content)
17 |
--------------------------------------------------------------------------------
/nvFuser/create_unet_nvfuser_model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import argparse
3 | from diffusers import UNet2DConditionModel
4 |
5 |
6 | def get_args():
7 | parser = argparse.ArgumentParser()
8 | parser.add_argument(
9 | "--save_path", default="./unet_jit.pt", type=str, help="Nvfuser saved path"
10 | )
11 | parser.add_argument("--batch_size", default=1, type=int, help="batch size")
12 | parser.add_argument(
13 | "--img_size", default=(512, 512), help="Unet input image size (h,w)"
14 | )
15 | parser.add_argument(
16 | "--max_seq_length", default=64, help="Maximum sequence length of input text"
17 | )
18 |
19 | return parser.parse_args()
20 |
21 |
22 | def convert(args):
23 | device = torch.device("cuda")
24 | unet = UNet2DConditionModel.from_pretrained(
25 | "CompVis/stable-diffusion-v1-4", subfolder="unet", use_auth_token=True
26 | ).to(device)
27 | unet.eval()
28 |
29 | latents = torch.randn(
30 | (args.batch_size, 4, args.img_size[0] // 8, args.img_size[1] // 8)
31 | )
32 | latent_model_input = torch.cat([latents] * 2).to(device)
33 | text_embeddings = (
34 | torch.randn((args.batch_size, args.max_seq_length, 768)).float().to(device)
35 | )
36 | text_embeddings = torch.cat([text_embeddings, text_embeddings])
37 | timestep_ = torch.tensor([10]).to(device)
38 | with torch.no_grad():
39 | with torch.autocast("cuda"):
40 | traced_applymodel_half = torch.jit.trace(
41 | unet,
42 | (latent_model_input, timestep_, text_embeddings),
43 | check_trace=False,
44 | )
45 |
46 | traced_applymodel_half.save(args.save_path)
47 |
48 |
49 | if __name__ == "__main__":
50 | args = get_args()
51 | convert(args)
52 |
--------------------------------------------------------------------------------
/nvFuser/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.21.2
2 | diffusers==0.3.0
3 | torch==1.12.1+cu116
4 | scipy
--------------------------------------------------------------------------------
/nvFuser/test.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import argparse
3 | from diffusers import UNet2DConditionModel
4 | from transformers import CLIPTextModel, CLIPTokenizer
5 | from diffusers import AutoencoderKL
6 | from diffusers import LMSDiscreteScheduler
7 | from torch import autocast
8 | from tqdm import tqdm
9 | from time import time
10 |
11 | device = torch.device("cuda")
12 | sd_fused = torch.jit.load("unet_jit.pt")
13 | sd_fused = sd_fused.to(device)
14 | tokenizer = CLIPTokenizer.from_pretrained(
15 | "CompVis/stable-diffusion-v1-4", subfolder="tokenizer", use_auth_token=True
16 | )
17 | prompt = "Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci"
18 | text_input = tokenizer(
19 | prompt, padding="max_length", max_length=64, truncation=True, return_tensors="pt"
20 | ).input_ids.cuda()
21 | uncond_input = tokenizer(
22 | [""] * 1, padding="max_length", max_length=64, return_tensors="pt"
23 | ).input_ids.cuda()
24 | batch_size = 1
25 | img_size = (512, 512)
26 | latents = torch.randn((batch_size, 4, img_size[0] // 8, img_size[1] // 8)).cuda()
27 |
28 | for _ in tqdm(range(5)):
29 | out = sd_fused(text_input, uncond_input, latents)
30 | torch.cuda.synchronize()
31 | start = time.perf_counter()
32 | for i in tqdm(range(100)):
33 | out = sd_fused(text_input, uncond_input, latents)
34 | torch.cuda.synchronize()
35 |
--------------------------------------------------------------------------------