├── .github ├── pull_request_template.md ├── stochastic_logo_dark.svg ├── stochastic_logo_light.svg ├── stochastic_x_dashboard.jpeg ├── stochasticai_demo.gif └── workflows │ └── semgrep.yml ├── .gitignore ├── .pre-commit-config.yaml ├── AITemplate ├── Dockerfile ├── README.md ├── benchmark.py ├── benchmark_pt.py ├── client.py ├── compile.py ├── demo.py ├── modeling │ ├── attention.py │ ├── clip.py │ ├── embeddings.py │ ├── resnet.py │ ├── unet_2d_condition.py │ ├── unet_blocks.py │ └── vae.py ├── pipeline_stable_diffusion_ait.py ├── requirements.txt └── server.py ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── FlashAttention ├── Dockerfile ├── README.md ├── client.py ├── demo.py ├── diffusers │ ├── .github │ │ ├── ISSUE_TEMPLATE │ │ │ ├── bug-report.yml │ │ │ ├── config.yml │ │ │ ├── feature_request.md │ │ │ ├── feedback.md │ │ │ └── new-model-addition.yml │ │ └── workflows │ │ │ ├── build_documentation.yml │ │ │ ├── build_pr_documentation.yml │ │ │ ├── delete_doc_comment.yml │ │ │ ├── pr_quality.yml │ │ │ ├── pr_tests.yml │ │ │ ├── push_tests.yml │ │ │ ├── stale.yml │ │ │ └── typos.yml │ ├── .gitignore │ ├── CODE_OF_CONDUCT.md │ ├── CONTRIBUTING.md │ ├── LICENSE │ ├── MANIFEST.in │ ├── Makefile │ ├── README.md │ ├── _typos.toml │ ├── docs │ │ └── source │ │ │ ├── _toctree.yml │ │ │ ├── api │ │ │ ├── configuration.mdx │ │ │ ├── diffusion_pipeline.mdx │ │ │ ├── logging.mdx │ │ │ ├── models.mdx │ │ │ ├── outputs.mdx │ │ │ ├── pipelines │ │ │ │ ├── ddim.mdx │ │ │ │ ├── ddpm.mdx │ │ │ │ ├── latent_diffusion.mdx │ │ │ │ ├── latent_diffusion_uncond.mdx │ │ │ │ ├── overview.mdx │ │ │ │ ├── pndm.mdx │ │ │ │ ├── score_sde_ve.mdx │ │ │ │ ├── stable_diffusion.mdx │ │ │ │ └── stochastic_karras_ve.mdx │ │ │ └── schedulers.mdx │ │ │ ├── conceptual │ │ │ ├── contribution.mdx │ │ │ ├── philosophy.mdx │ │ │ └── stable_diffusion.mdx │ │ │ ├── imgs │ │ │ └── diffusers_library.jpg │ │ │ ├── index.mdx │ │ │ ├── installation.mdx │ │ │ ├── optimization │ │ │ ├── fp16.mdx │ │ │ ├── mps.mdx │ │ │ ├── onnx.mdx │ │ │ └── open_vino.mdx │ │ │ ├── quicktour.mdx │ │ │ ├── training │ │ │ ├── overview.mdx │ │ │ ├── text2image.mdx │ │ │ ├── text_inversion.mdx │ │ │ └── unconditional_training.mdx │ │ │ └── using-diffusers │ │ │ ├── conditional_image_generation.mdx │ │ │ ├── configuration.mdx │ │ │ ├── custom.mdx │ │ │ ├── img2img.mdx │ │ │ ├── inpaint.mdx │ │ │ ├── loading.mdx │ │ │ └── unconditional_image_generation.mdx │ ├── examples │ │ ├── README.md │ │ ├── community │ │ │ └── README.md │ │ ├── inference │ │ │ ├── README.md │ │ │ ├── image_to_image.py │ │ │ └── inpainting.py │ │ ├── textual_inversion │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ └── textual_inversion.py │ │ └── unconditional_image_generation │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ └── train_unconditional.py │ ├── pyproject.toml │ ├── scripts │ │ ├── __init__.py │ │ ├── change_naming_configs_and_checkpoints.py │ │ ├── conversion_ldm_uncond.py │ │ ├── convert_ddpm_original_checkpoint_to_diffusers.py │ │ ├── convert_ldm_original_checkpoint_to_diffusers.py │ │ ├── convert_ncsnpp_original_checkpoint_to_diffusers.py │ │ ├── convert_original_stable_diffusion_to_diffusers.py │ │ ├── convert_stable_diffusion_checkpoint_to_onnx.py │ │ └── generate_logits.py │ ├── setup.cfg │ ├── setup.py │ ├── src │ │ └── diffusers │ │ │ ├── __init__.py │ │ │ ├── commands │ │ │ ├── __init__.py │ │ │ ├── diffusers_cli.py │ │ │ └── env.py │ │ │ ├── configuration_utils.py │ │ │ ├── dependency_versions_check.py │ │ │ ├── dependency_versions_table.py │ │ │ ├── dynamic_modules_utils.py │ │ │ ├── hub_utils.py │ │ │ ├── modeling_flax_utils.py │ │ │ ├── modeling_utils.py │ │ │ ├── models │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── attention.py │ │ │ ├── attention_flax.py │ │ │ ├── embeddings.py │ │ │ ├── embeddings_flax.py │ │ │ ├── resnet.py │ │ │ ├── resnet_flax.py │ │ │ ├── unet_2d.py │ │ │ ├── unet_2d_condition.py │ │ │ ├── unet_2d_condition_flax.py │ │ │ ├── unet_blocks.py │ │ │ ├── unet_blocks_flax.py │ │ │ └── vae.py │ │ │ ├── onnx_utils.py │ │ │ ├── optimization.py │ │ │ ├── pipeline_utils.py │ │ │ ├── pipelines │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── ddim │ │ │ │ ├── __init__.py │ │ │ │ └── pipeline_ddim.py │ │ │ ├── ddpm │ │ │ │ ├── __init__.py │ │ │ │ └── pipeline_ddpm.py │ │ │ ├── latent_diffusion │ │ │ │ ├── __init__.py │ │ │ │ └── pipeline_latent_diffusion.py │ │ │ ├── latent_diffusion_uncond │ │ │ │ ├── __init__.py │ │ │ │ └── pipeline_latent_diffusion_uncond.py │ │ │ ├── pndm │ │ │ │ ├── __init__.py │ │ │ │ └── pipeline_pndm.py │ │ │ ├── score_sde_ve │ │ │ │ ├── __init__.py │ │ │ │ └── pipeline_score_sde_ve.py │ │ │ ├── stable_diffusion │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── pipeline_stable_diffusion.py │ │ │ │ ├── pipeline_stable_diffusion_img2img.py │ │ │ │ ├── pipeline_stable_diffusion_inpaint.py │ │ │ │ ├── pipeline_stable_diffusion_onnx.py │ │ │ │ └── safety_checker.py │ │ │ └── stochastic_karras_ve │ │ │ │ ├── __init__.py │ │ │ │ └── pipeline_stochastic_karras_ve.py │ │ │ ├── schedulers │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── scheduling_ddim.py │ │ │ ├── scheduling_ddim_flax.py │ │ │ ├── scheduling_ddpm.py │ │ │ ├── scheduling_ddpm_flax.py │ │ │ ├── scheduling_karras_ve.py │ │ │ ├── scheduling_karras_ve_flax.py │ │ │ ├── scheduling_lms_discrete.py │ │ │ ├── scheduling_lms_discrete_flax.py │ │ │ ├── scheduling_pndm.py │ │ │ ├── scheduling_pndm_flax.py │ │ │ ├── scheduling_sde_ve.py │ │ │ ├── scheduling_sde_ve_flax.py │ │ │ ├── scheduling_sde_vp.py │ │ │ └── scheduling_utils.py │ │ │ ├── testing_utils.py │ │ │ ├── training_utils.py │ │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── dummy_flax_objects.py │ │ │ ├── dummy_pt_objects.py │ │ │ ├── dummy_torch_and_scipy_objects.py │ │ │ ├── dummy_torch_and_transformers_and_onnx_objects.py │ │ │ ├── dummy_torch_and_transformers_objects.py │ │ │ ├── import_utils.py │ │ │ ├── logging.py │ │ │ ├── model_card_template.md │ │ │ └── outputs.py │ ├── tests │ │ ├── __init__.py │ │ ├── test_config.py │ │ ├── test_layers_utils.py │ │ ├── test_modeling_common.py │ │ ├── test_models_unet.py │ │ ├── test_models_vae.py │ │ ├── test_models_vq.py │ │ ├── test_pipelines.py │ │ ├── test_scheduler.py │ │ └── test_training.py │ └── utils │ │ ├── check_config_docstrings.py │ │ ├── check_copies.py │ │ ├── check_dummies.py │ │ ├── check_inits.py │ │ ├── check_repo.py │ │ ├── check_table.py │ │ ├── check_tf_ops.py │ │ ├── custom_init_isort.py │ │ ├── print_env.py │ │ └── stale.py ├── requirements.txt └── server.py ├── LICENSE ├── ONNX ├── Dockerfile ├── README.md ├── demo.py ├── model.py ├── requirements.txt └── server.py ├── PyTorch ├── Dockerfile ├── README.md ├── demo.py ├── model.py ├── notebook.ipynb ├── requirements.txt └── server.py ├── README.md ├── TensorRT ├── Dockerfile ├── Notebook.ipynb ├── README.md ├── client.py ├── convert_unet_to_tensorrt.py ├── demo.py ├── requirements.txt ├── server.py └── trt_model.py ├── generated_images ├── AITemplate │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ ├── 9.png │ └── README.md ├── FlashAttention │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ ├── 9.png │ └── README.md ├── PyTorch │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ ├── 9.png │ └── README.md ├── README.md ├── TensorRT │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ ├── 9.png │ └── README.md └── nvFuser │ ├── 0.png │ ├── 1.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ ├── 9.png │ └── README.md ├── graphs ├── A100_GPU_batch.png ├── A100_GPU_latency.png └── T4_GPU_latency.png └── nvFuser ├── Dockerfile ├── README.md ├── client.py ├── create_unet_nvfuser_model.py ├── demo.py ├── requirements.txt ├── server.py └── test.py /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### Summary 2 | 3 | 4 | 5 | ### Checklist 6 | 7 | 8 | - [ ] Tested 9 | - [ ] Documented 10 | 11 | ### Additional Information 12 | 13 | 14 | -------------------------------------------------------------------------------- /.github/stochastic_x_dashboard.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/.github/stochastic_x_dashboard.jpeg -------------------------------------------------------------------------------- /.github/stochasticai_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/.github/stochasticai_demo.gif -------------------------------------------------------------------------------- /.github/workflows/semgrep.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: {} 3 | pull_request: {} 4 | push: 5 | branches: 6 | - main 7 | - master 8 | paths: 9 | - .github/workflows/semgrep.yml 10 | schedule: 11 | # random HH:MM to avoid a load spike on GitHub Actions at 00:00 12 | - cron: 28 9 * * * 13 | name: Semgrep 14 | jobs: 15 | semgrep: 16 | name: semgrep/ci 17 | runs-on: ubuntu-20.04 18 | env: 19 | SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN }} 20 | container: 21 | image: returntocorp/semgrep 22 | steps: 23 | - uses: actions/checkout@v3 24 | - run: semgrep ci 25 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 22.12.0 4 | hooks: 5 | - id: black 6 | # It is recommended to specify the latest version of Python 7 | # supported by your project here, or alternatively use 8 | # pre-commit's default_language_version, see 9 | # https://pre-commit.com/#top_level-default_language_version 10 | language_version: python3.9 11 | -------------------------------------------------------------------------------- /AITemplate/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim 2 | 3 | WORKDIR /code 4 | 5 | RUN pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html 6 | 7 | COPY requirements.txt /code/requirements.txt 8 | 9 | RUN apt-get update && apt-get -y install curl && apt -y install git 10 | 11 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt 12 | 13 | RUN git clone --recursive https://github.com/facebookincubator/AITemplate 14 | 15 | RUN cd AITemplate/python && python3 setup.py bdist_wheel && pip install dist/*.whl --force-reinstall 16 | 17 | COPY . /code/ 18 | 19 | EXPOSE 5000 20 | 21 | CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5000", "--workers", "1"] -------------------------------------------------------------------------------- /AITemplate/README.md: -------------------------------------------------------------------------------- 1 | ## AITemplate Stable Diffusion Example 2 | 3 | ### Build Dependencies 4 | 5 | Install AITemplate 6 | 7 | ``` 8 | git clone --recursive https://github.com/facebookincubator/AITemplate 9 | cd python 10 | python setup.py bdist_wheel 11 | pip install dist/*.whl --force-reinstall 12 | ``` 13 | 14 | Install libraries 15 | 16 | ``` 17 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | Verify the library versions. We have tested transformers 4.22, diffusers 0.4 and torch 1.12. 22 | 23 | ### Compile AITemplate models 24 | 25 | You need to register in HuggingFace hub. Get your access token from [Hugging Face account settings](https://huggingface.co/settings/tokens). Then login using `huggingface-cli login` command. 26 | 27 | ``` 28 | python3 compile.py 29 | ``` 30 | 31 | Compiled models are store in `./tmp` folder 32 | 33 | ### Benchmark 34 | 35 | ``` 36 | python3 demo.py --benchmark 37 | ``` 38 | 39 | Check the resulted image: `example_ait.png` 40 | 41 | ### Deploy as rest-api end-point 42 | 43 | ``` 44 | docker build -t ait_diffusion . 45 | docker run -p 5000:5000 -ti --gpus=all ait_diffusion 46 | ``` 47 | 48 | ### Test API 49 | 50 | ``` 51 | python3 client.py 52 | ``` 53 | 54 | Check the resulted image: `output_api.png` -------------------------------------------------------------------------------- /AITemplate/benchmark_pt.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | import click 16 | import torch 17 | 18 | from aitemplate.testing.benchmark_pt import benchmark_torch_function 19 | from diffusers import StableDiffusionPipeline 20 | 21 | 22 | @click.command() 23 | @click.option("--token", default="", help="access token") 24 | @click.option("--prompt", default="A vision of paradise, Unreal Engine", help="prompt") 25 | @click.option( 26 | "--benchmark", type=bool, default=False, help="run stable diffusion e2e benchmark" 27 | ) 28 | def run(token, prompt, benchmark): 29 | pipe = StableDiffusionPipeline.from_pretrained( 30 | "CompVis/stable-diffusion-v1-4", 31 | revision="fp16", 32 | torch_dtype=torch.float16, 33 | use_auth_token=token, 34 | ).to("cuda") 35 | 36 | with torch.autocast("cuda"): 37 | image = pipe(prompt).images[0] 38 | if benchmark: 39 | t = benchmark_torch_function(10, pipe, prompt) 40 | print(f"sd pt e2e: {t} ms") 41 | 42 | image.save("example_pt.png") 43 | 44 | 45 | if __name__ == "__main__": 46 | run() 47 | -------------------------------------------------------------------------------- /AITemplate/client.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import time 4 | 5 | if __name__ == "__main__": 6 | text = "The Easter bunny riding a motorcycle in New York City" 7 | t0 = time.time() 8 | for i in range(50): 9 | print("Iteration: ", i) 10 | out = requests.post( 11 | "http://localhost:5000/predict/", 12 | data=json.dumps({"prompt": [text], "num_inference_steps": 30}), 13 | ) 14 | t1 = time.time() 15 | print("Inference time is: ", (t1 - t0) / 50) 16 | with open("output_api_new.png", "wb") as f: 17 | f.write(out.content) 18 | -------------------------------------------------------------------------------- /AITemplate/demo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | import click 16 | import torch 17 | 18 | from aitemplate.testing.benchmark_pt import benchmark_torch_function 19 | from pipeline_stable_diffusion_ait import StableDiffusionAITPipeline 20 | from PIL import Image 21 | 22 | 23 | @click.command() 24 | @click.option("--prompt", default="A vision of paradise, Unreal Engine", help="prompt") 25 | @click.option( 26 | "--benchmark", type=bool, default=False, help="run stable diffusion e2e benchmark" 27 | ) 28 | @click.option("--batch_size", type=int, default=1, help="batch size") 29 | def run(prompt, benchmark, batch_size): 30 | pipe = StableDiffusionAITPipeline() 31 | height = 512 32 | width = 512 33 | num_inference_steps = 50 34 | with torch.autocast("cuda"): 35 | images = pipe([prompt] * batch_size) 36 | if benchmark: 37 | t = benchmark_torch_function( 38 | 10, pipe, [prompt] * batch_size, height, width, num_inference_steps 39 | ) 40 | print(f"sd e2e: {t} ms") 41 | 42 | images = (images * 255).round().astype("uint8") 43 | pil_images = [Image.fromarray(image) for image in images] 44 | pil_images[0].save("example_ait.png") 45 | 46 | 47 | if __name__ == "__main__": 48 | run() 49 | -------------------------------------------------------------------------------- /AITemplate/modeling/embeddings.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | import math 16 | 17 | from aitemplate.compiler import ops 18 | from aitemplate.frontend import nn, Tensor 19 | 20 | 21 | def get_shape(x): 22 | shape = [it.value() for it in x._attrs["shape"]] 23 | return shape 24 | 25 | 26 | def get_timestep_embedding( 27 | timesteps: Tensor, 28 | embedding_dim: int, 29 | flip_sin_to_cos: bool = False, 30 | downscale_freq_shift: float = 1, 31 | scale: float = 1, 32 | max_period: int = 10000, 33 | ): 34 | """ 35 | This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings. 36 | 37 | :param timesteps: a 1-D Tensor of N indices, one per batch element. 38 | These may be fractional. 39 | :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the 40 | embeddings. :return: an [N x dim] Tensor of positional embeddings. 41 | """ 42 | assert len(get_shape(timesteps)) == 1, "Timesteps should be a 1d-array" 43 | 44 | half_dim = embedding_dim // 2 45 | 46 | exponent = (-math.log(max_period)) * Tensor( 47 | shape=[half_dim], dtype="float16", name="arange" 48 | ) 49 | 50 | exponent = exponent * (1.0 / (half_dim - downscale_freq_shift)) 51 | 52 | emb = ops.exp(exponent) 53 | emb = ops.reshape()(timesteps, [-1, 1]) * ops.reshape()(emb, [1, -1]) 54 | 55 | # scale embeddings 56 | emb = scale * emb 57 | 58 | # concat sine and cosine embeddings 59 | if flip_sin_to_cos: 60 | emb = ops.concatenate()( 61 | [ops.cos(emb), ops.sin(emb)], 62 | dim=-1, 63 | ) 64 | else: 65 | emb = ops.concatenate()( 66 | [ops.sin(emb), ops.cos(emb)], 67 | dim=-1, 68 | ) 69 | return emb 70 | 71 | 72 | class TimestepEmbedding(nn.Module): 73 | def __init__(self, channel: int, time_embed_dim: int, act_fn: str = "silu"): 74 | super().__init__() 75 | 76 | self.linear_1 = nn.Linear(channel, time_embed_dim, specialization="swish") 77 | self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim) 78 | 79 | def forward(self, sample): 80 | sample = self.linear_1(sample) 81 | sample = self.linear_2(sample) 82 | return sample 83 | 84 | 85 | class Timesteps(nn.Module): 86 | def __init__( 87 | self, num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float 88 | ): 89 | super().__init__() 90 | self.num_channels = num_channels 91 | self.flip_sin_to_cos = flip_sin_to_cos 92 | self.downscale_freq_shift = downscale_freq_shift 93 | 94 | def forward(self, timesteps): 95 | t_emb = get_timestep_embedding( 96 | timesteps, 97 | self.num_channels, 98 | flip_sin_to_cos=self.flip_sin_to_cos, 99 | downscale_freq_shift=self.downscale_freq_shift, 100 | ) 101 | return t_emb 102 | -------------------------------------------------------------------------------- /AITemplate/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.21.2 2 | diffusers==0.3.0 3 | #torch==1.12.1+cu116 4 | scipy 5 | uvicorn 6 | pydantic 7 | fastapi -------------------------------------------------------------------------------- /AITemplate/server.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from typing import List, Union 3 | from pydantic import BaseModel 4 | from pipeline_stable_diffusion_ait import StableDiffusionAITPipeline 5 | import torch 6 | from tqdm import tqdm 7 | from PIL import Image 8 | import io 9 | from fastapi import Response 10 | 11 | torch_device = torch.device("cuda:0") 12 | 13 | 14 | class Item(BaseModel): 15 | prompt: Union[str, List[str]] 16 | img_height: int = 512 17 | img_width: int = 512 18 | num_inference_steps: int = 50 19 | guidance_scale: float = 7.5 20 | 21 | 22 | app = FastAPI() 23 | 24 | pipe = StableDiffusionAITPipeline() 25 | 26 | 27 | @app.post("/predict/") 28 | async def predict(input_api: Item): 29 | with torch.autocast("cuda"): 30 | images = pipe( 31 | input_api.prompt, 32 | height=input_api.img_height, 33 | width=input_api.img_width, 34 | num_inference_steps=input_api.num_inference_steps, 35 | guidance_scale=input_api.guidance_scale, 36 | ) 37 | if images.ndim == 3: 38 | images = images[None, ...] 39 | image = (images[0] * 255).round().astype("uint8") 40 | image = Image.fromarray(image) 41 | 42 | # save image to an in-memory bytes buffer 43 | with io.BytesIO() as buf: 44 | image.save(buf, format="PNG") 45 | im_bytes = buf.getvalue() 46 | headers = {"Content-Disposition": 'inline; filename="test.png"'} 47 | return Response(im_bytes, headers=headers, media_type="image/png") 48 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributor Covenant Code of Conduct 3 | 4 | ## Our Pledge 5 | 6 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. 7 | 8 | ## Our Standards 9 | 10 | Examples of behavior that contributes to a positive environment for our 11 | community include: 12 | 13 | * Demonstrating empathy and kindness toward other people 14 | * Being respectful of differing opinions, viewpoints, and experiences 15 | * Giving and gracefully accepting constructive feedback 16 | * Accepting responsibility and apologizing to those affected by our mistakes, 17 | and learning from the experience 18 | * Focusing on what is best not just for us as individuals, but for the overall 19 | community 20 | 21 | Examples of unacceptable behavior include: 22 | 23 | * The use of sexualized language or imagery, and sexual attention or advances of 24 | any kind 25 | * Trolling, insulting or derogatory comments, and personal or political attacks 26 | * Public or private harassment 27 | * Publishing others' private information, such as a physical or email address, 28 | without their explicit permission 29 | * Other conduct which could reasonably be considered inappropriate in a 30 | professional setting 31 | 32 | ## Scope 33 | 34 | This Code of Conduct applies within all community spaces, and also applies when 35 | an individual is officially representing the community in public spaces. 36 | Examples of representing our community include using an official e-mail address, 37 | posting via an official social media account, or acting as an appointed 38 | representative at an online or offline event. 39 | 40 | ## Enforcement 41 | 42 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the organization at hello@stochastic.ai. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. 43 | 44 | 45 | ## Attribution 46 | 47 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 48 | version 2.1, available at 49 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 50 | 51 | Community Impact Guidelines were inspired by 52 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 53 | 54 | For answers to common questions about this code of conduct, see the FAQ at 55 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 56 | [https://www.contributor-covenant.org/translations][translations]. 57 | 58 | [homepage]: https://www.contributor-covenant.org 59 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 60 | [Mozilla CoC]: https://github.com/mozilla/diversity 61 | [FAQ]: https://www.contributor-covenant.org/faq 62 | [translations]: https://www.contributor-covenant.org/translations 63 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to x-stable-diffusion 2 | 3 | Everyone is welcome to contribute, and we value everybody's contribution. Code 4 | is thus not the only way to help the community. Answering questions, helping 5 | others, reaching out and improving the documentations are immensely valuable to 6 | the community. 7 | 8 | It also helps us if you spread the word: through blog posts, shout out on Twitter, or simply starring the repo to say "thank you". 9 | 10 | Whichever way you choose to contribute, please be mindful to respect our 11 | [code of conduct](https://github.com/stochasticai/x-stable-diffusion/blob/main/CODE_OF_CONDUCT.md). 12 | 13 | ## Ways to contribute 14 | 15 | * Fixing outstanding issues 16 | * Implementing new optimizations 17 | * Contributing to the examples or to the documentation 18 | * Submitting issues related to bugs or desired new features -------------------------------------------------------------------------------- /FlashAttention/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | FROM nvidia/cuda:11.6.0-devel-ubuntu20.04 3 | 4 | RUN apt-get update && apt-get install --no-install-recommends -y curl && apt-get -y install git 5 | 6 | ENV CONDA_AUTO_UPDATE_CONDA=false \ 7 | PATH=/opt/miniconda/bin:$PATH 8 | RUN curl -sLo ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh \ 9 | && chmod +x ~/miniconda.sh \ 10 | && ~/miniconda.sh -b -p /opt/miniconda \ 11 | && rm ~/miniconda.sh \ 12 | && sed -i "$ a PATH=/opt/miniconda/bin:\$PATH" /etc/environment 13 | 14 | RUN python3 -m pip --no-cache-dir install --upgrade pip 15 | 16 | WORKDIR /code 17 | 18 | RUN pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 -f https://download.pytorch.org/whl/torch_stable.html 19 | 20 | COPY requirements.txt /code/requirements.txt 21 | 22 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt 23 | 24 | RUN apt-get update && apt-get -y install wget 25 | 26 | RUN pip install git+https://github.com/facebookresearch/xformers@51dd119#egg=xformers 27 | 28 | COPY . /code/ 29 | 30 | RUN cd diffusers && pip install -e . 31 | 32 | EXPOSE 5000 33 | 34 | ENV USE_MEMORY_EFFICIENT_ATTENTION=1 35 | 36 | CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5000", "--workers", "1"] -------------------------------------------------------------------------------- /FlashAttention/README.md: -------------------------------------------------------------------------------- 1 | ## Flash-Attention Stable Diffusion Example 2 | 3 | ### Build Dependencies 4 | 5 | Require python 3.9 or python 3.10, Pytorch 1.12.1-cuda11.6. 6 | 7 | ``` 8 | conda create -n diffusion_fa python=3.10 9 | conda activate diffusion_fa 10 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116 11 | pip install git+https://github.com/facebookresearch/xformers@51dd119#egg=xformers 12 | cd diffusers 13 | pip install -e . 14 | ``` 15 | 16 | Install libraries 17 | 18 | ``` 19 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html 20 | pip install -r requirements.txt 21 | ``` 22 | ### Benchmark 23 | 24 | You need to register in HuggingFace hub. Get your access token from [Hugging Face account settings](https://huggingface.co/settings/tokens). Then login using `huggingface-cli login` command. 25 | 26 | ``` 27 | USE_MEMORY_EFFICIENT_ATTENTION=1 python3 demo.py --benchmark 28 | ``` 29 | 30 | ### Deploy as rest-api end-point 31 | 32 | Requirement: Make sure that you enable Nvidia runtime when building docker image as Xformers requires GPU to turn on some flags. 33 | 34 | You need provide the HuggingFace token in file `server.py`. 35 | 36 | ``` 37 | docker build -t fa_diffusion . 38 | docker run -p 5000:5000 -ti --gpus=all fa_diffusion 39 | ``` 40 | 41 | Note: Building Xformers takes about 35 mins - be patient 42 | 43 | ### Test API 44 | 45 | ``` 46 | python3 client.py 47 | ``` 48 | 49 | Check the resulted image: `output_api.png` -------------------------------------------------------------------------------- /FlashAttention/client.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import time 4 | 5 | if __name__ == "__main__": 6 | text = "The Easter bunny riding a motorcycle in New York City" 7 | t0 = time.time() 8 | for i in range(50): 9 | print("Iteration: ", i) 10 | out = requests.post( 11 | "http://localhost:5000/predict/", data=json.dumps({"prompt": [text]}) 12 | ) 13 | t1 = time.time() 14 | print("Inference time is: ", (t1 - t0) / 50) 15 | with open("output_api.png", "wb") as f: 16 | f.write(out.content) 17 | -------------------------------------------------------------------------------- /FlashAttention/demo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import argparse 3 | import time 4 | from tqdm import tqdm 5 | from diffusers import StableDiffusionPipeline 6 | 7 | 8 | def get_args(): 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument( 11 | "--prompt", 12 | default="Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci", 13 | help="input prompt", 14 | ) 15 | parser.add_argument("--batch_size", default=1, type=int, help="batch size") 16 | parser.add_argument( 17 | "--img_size", default=(512, 512), help="Unet input image size (h,w)" 18 | ) 19 | parser.add_argument( 20 | "--benchmark", 21 | action="store_true", 22 | help="Running benchmark by average num iteration", 23 | ) 24 | parser.add_argument( 25 | "--n_iters", default=50, help="Running benchmark by average num iteration" 26 | ) 27 | 28 | return parser.parse_args() 29 | 30 | 31 | if __name__ == "__main__": 32 | args = get_args() 33 | pipe = StableDiffusionPipeline.from_pretrained( 34 | "CompVis/stable-diffusion-v1-4", 35 | revision="fp16", 36 | torch_dtype=torch.float16, 37 | use_auth_token=True, 38 | ).to("cuda") 39 | if args.benchmark: 40 | n_iters = args.n_iters 41 | # warm up 42 | for i in tqdm(range(3)): 43 | with torch.inference_mode(), torch.autocast("cuda"): 44 | images = pipe(args.prompt, num_inference_steps=50) 45 | else: 46 | n_ters = 1 47 | 48 | start = time.time() 49 | for i in tqdm(range(n_iters)): 50 | with torch.inference_mode(), torch.autocast("cuda"): 51 | images = pipe(args.prompt, num_inference_steps=50) 52 | end = time.time() 53 | if args.benchmark: 54 | print("Average inference time is: ", (end - start) / n_iters) 55 | images.images[0].save("image_generated.png") 56 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F41B Bug Report" 2 | description: Report a bug on diffusers 3 | labels: [ "bug" ] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Thanks for taking the time to fill out this bug report! 9 | - type: textarea 10 | id: bug-description 11 | attributes: 12 | label: Describe the bug 13 | description: A clear and concise description of what the bug is. If you intend to submit a pull request for this issue, tell us in the description. Thanks! 14 | placeholder: Bug description 15 | validations: 16 | required: true 17 | - type: textarea 18 | id: reproduction 19 | attributes: 20 | label: Reproduction 21 | description: Please provide a minimal reproducible code which we can copy/paste and reproduce the issue. 22 | placeholder: Reproduction 23 | - type: textarea 24 | id: logs 25 | attributes: 26 | label: Logs 27 | description: "Please include the Python logs if you can." 28 | render: shell 29 | - type: textarea 30 | id: system-info 31 | attributes: 32 | label: System Info 33 | description: Please share your system info with us. You can run the command `diffusers-cli env` and copy-paste its output below. 34 | placeholder: diffusers version, platform, python version, ... 35 | validations: 36 | required: true 37 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | contact_links: 2 | - name: Forum 3 | url: https://discuss.huggingface.co/c/discussion-related-to-httpsgithubcomhuggingfacediffusers/63 4 | about: General usage questions and community discussions 5 | - name: Blank issue 6 | url: https://github.com/huggingface/diffusers/issues/new 7 | about: Please note that the Forum is in most places the right place for discussions 8 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680 Feature request" 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/ISSUE_TEMPLATE/feedback.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "💬 Feedback about API Design" 3 | about: Give feedback about the current API design 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What API design would you like to have changed or added to the library? Why?** 11 | 12 | **What use case would this enable or better enable? Can you give us a code example?** 13 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/ISSUE_TEMPLATE/new-model-addition.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F31F New model/pipeline/scheduler addition" 2 | description: Submit a proposal/request to implement a new diffusion model / pipeline / scheduler 3 | labels: [ "New model/pipeline/scheduler" ] 4 | 5 | body: 6 | - type: textarea 7 | id: description-request 8 | validations: 9 | required: true 10 | attributes: 11 | label: Model/Pipeline/Scheduler description 12 | description: | 13 | Put any and all important information relative to the model/pipeline/scheduler 14 | 15 | - type: checkboxes 16 | id: information-tasks 17 | attributes: 18 | label: Open source status 19 | description: | 20 | Please note that if the model implementation isn't available or if the weights aren't open-source, we are less likely to implement it in `diffusers`. 21 | options: 22 | - label: "The model implementation is available" 23 | - label: "The model weights are available (Only relevant if addition is not a scheduler)." 24 | 25 | - type: textarea 26 | id: additional-info 27 | attributes: 28 | label: Provide useful links for the implementation 29 | description: | 30 | Please provide information regarding the implementation, the weights, and the authors. 31 | Please mention the authors by @gh-username if you're aware of their usernames. 32 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/workflows/build_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - doc-builder* 8 | - v*-release 9 | 10 | jobs: 11 | build: 12 | uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main 13 | with: 14 | commit_sha: ${{ github.sha }} 15 | package: diffusers 16 | secrets: 17 | token: ${{ secrets.HUGGINGFACE_PUSH }} 18 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/workflows/build_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build PR Documentation 2 | 3 | on: 4 | pull_request: 5 | 6 | concurrency: 7 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 8 | cancel-in-progress: true 9 | 10 | jobs: 11 | build: 12 | uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main 13 | with: 14 | commit_sha: ${{ github.event.pull_request.head.sha }} 15 | pr_number: ${{ github.event.number }} 16 | package: diffusers 17 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/workflows/delete_doc_comment.yml: -------------------------------------------------------------------------------- 1 | name: Delete dev documentation 2 | 3 | on: 4 | pull_request: 5 | types: [ closed ] 6 | 7 | 8 | jobs: 9 | delete: 10 | uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main 11 | with: 12 | pr_number: ${{ github.event.number }} 13 | package: diffusers 14 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/workflows/pr_quality.yml: -------------------------------------------------------------------------------- 1 | name: Run code quality checks 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | push: 8 | branches: 9 | - main 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | check_code_quality: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Set up Python 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: "3.7" 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install .[quality] 28 | - name: Check quality 29 | run: | 30 | black --check --preview examples tests src utils scripts 31 | isort --check-only examples tests src utils scripts 32 | flake8 examples tests src utils scripts 33 | doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source 34 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/workflows/pr_tests.yml: -------------------------------------------------------------------------------- 1 | name: Run non-slow tests 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 10 | cancel-in-progress: true 11 | 12 | env: 13 | HF_HOME: /mnt/cache 14 | OMP_NUM_THREADS: 8 15 | MKL_NUM_THREADS: 8 16 | PYTEST_TIMEOUT: 60 17 | 18 | jobs: 19 | run_tests_cpu: 20 | name: Diffusers tests 21 | runs-on: [ self-hosted, docker-gpu ] 22 | container: 23 | image: python:3.7 24 | options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ 25 | 26 | steps: 27 | - name: Checkout diffusers 28 | uses: actions/checkout@v3 29 | with: 30 | fetch-depth: 2 31 | 32 | - name: Install dependencies 33 | run: | 34 | python -m pip install --upgrade pip 35 | python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cpu 36 | python -m pip install -e .[quality,test] 37 | 38 | - name: Environment 39 | run: | 40 | python utils/print_env.py 41 | 42 | - name: Run all non-slow selected tests on CPU 43 | run: | 44 | python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile -s tests/ 45 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/workflows/push_tests.yml: -------------------------------------------------------------------------------- 1 | name: Run all tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | env: 9 | HF_HOME: /mnt/cache 10 | OMP_NUM_THREADS: 8 11 | MKL_NUM_THREADS: 8 12 | PYTEST_TIMEOUT: 1000 13 | RUN_SLOW: yes 14 | 15 | jobs: 16 | run_tests_single_gpu: 17 | name: Diffusers tests 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | machine_type: [ single-gpu ] 22 | runs-on: [ self-hosted, docker-gpu, '${{ matrix.machine_type }}' ] 23 | container: 24 | image: nvcr.io/nvidia/pytorch:22.07-py3 25 | options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ 26 | 27 | steps: 28 | - name: Checkout diffusers 29 | uses: actions/checkout@v3 30 | with: 31 | fetch-depth: 2 32 | 33 | - name: NVIDIA-SMI 34 | run: | 35 | nvidia-smi 36 | 37 | - name: Install dependencies 38 | run: | 39 | python -m pip install --upgrade pip 40 | python -m pip uninstall -y torch torchvision torchtext 41 | python -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu116 42 | python -m pip install -e .[quality,test] 43 | 44 | - name: Environment 45 | run: | 46 | python utils/print_env.py 47 | 48 | - name: Run all (incl. slow) tests on GPU 49 | env: 50 | HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} 51 | run: | 52 | python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s tests/ 53 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Stale Bot 2 | 3 | on: 4 | schedule: 5 | - cron: "0 15 * * *" 6 | 7 | jobs: 8 | close_stale_issues: 9 | name: Close Stale Issues 10 | if: github.repository == 'huggingface/diffusers' 11 | runs-on: ubuntu-latest 12 | env: 13 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 14 | steps: 15 | - uses: actions/checkout@v2 16 | 17 | - name: Setup Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: 3.7 21 | 22 | - name: Install requirements 23 | run: | 24 | pip install PyGithub 25 | - name: Close stale issues 26 | run: | 27 | python utils/stale.py 28 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.github/workflows/typos.yml: -------------------------------------------------------------------------------- 1 | name: Check typos 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v3 12 | 13 | - name: typos-action 14 | uses: crate-ci/typos@v1.12.4 15 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/.gitignore: -------------------------------------------------------------------------------- 1 | # Initially taken from Github's Python gitignore file 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # tests and logs 12 | tests/fixtures/cached_*_text.txt 13 | logs/ 14 | lightning_logs/ 15 | lang_code_data/ 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # celery beat schedule file 92 | celerybeat-schedule 93 | 94 | # SageMath parsed files 95 | *.sage.py 96 | 97 | # Environments 98 | .env 99 | .venv 100 | env/ 101 | venv/ 102 | ENV/ 103 | env.bak/ 104 | venv.bak/ 105 | 106 | # Spyder project settings 107 | .spyderproject 108 | .spyproject 109 | 110 | # Rope project settings 111 | .ropeproject 112 | 113 | # mkdocs documentation 114 | /site 115 | 116 | # mypy 117 | .mypy_cache/ 118 | .dmypy.json 119 | dmypy.json 120 | 121 | # Pyre type checker 122 | .pyre/ 123 | 124 | # vscode 125 | .vs 126 | .vscode 127 | 128 | # Pycharm 129 | .idea 130 | 131 | # TF code 132 | tensorflow_code 133 | 134 | # Models 135 | proc_data 136 | 137 | # examples 138 | runs 139 | /runs_old 140 | /wandb 141 | /examples/runs 142 | /examples/**/*.args 143 | /examples/rag/sweep 144 | 145 | # data 146 | /data 147 | serialization_dir 148 | 149 | # emacs 150 | *.*~ 151 | debug.env 152 | 153 | # vim 154 | .*.swp 155 | 156 | #ctags 157 | tags 158 | 159 | # pre-commit 160 | .pre-commit* 161 | 162 | # .lock 163 | *.lock 164 | 165 | # DS_Store (MacOS) 166 | .DS_Store -------------------------------------------------------------------------------- /FlashAttention/diffusers/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include src/diffusers/utils/model_card_template.md 2 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples 2 | 3 | # make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!) 4 | export PYTHONPATH = src 5 | 6 | check_dirs := examples scripts src tests utils 7 | 8 | modified_only_fixup: 9 | $(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs))) 10 | @if test -n "$(modified_py_files)"; then \ 11 | echo "Checking/fixing $(modified_py_files)"; \ 12 | black --preview $(modified_py_files); \ 13 | isort $(modified_py_files); \ 14 | flake8 $(modified_py_files); \ 15 | else \ 16 | echo "No library .py files were modified"; \ 17 | fi 18 | 19 | # Update src/diffusers/dependency_versions_table.py 20 | 21 | deps_table_update: 22 | @python setup.py deps_table_update 23 | 24 | deps_table_check_updated: 25 | @md5sum src/diffusers/dependency_versions_table.py > md5sum.saved 26 | @python setup.py deps_table_update 27 | @md5sum -c --quiet md5sum.saved || (printf "\nError: the version dependency table is outdated.\nPlease run 'make fixup' or 'make style' and commit the changes.\n\n" && exit 1) 28 | @rm md5sum.saved 29 | 30 | # autogenerating code 31 | 32 | autogenerate_code: deps_table_update 33 | 34 | # Check that the repo is in a good state 35 | 36 | repo-consistency: 37 | python utils/check_dummies.py 38 | python utils/check_repo.py 39 | python utils/check_inits.py 40 | 41 | # this target runs checks on all files 42 | 43 | quality: 44 | black --check --preview $(check_dirs) 45 | isort --check-only $(check_dirs) 46 | flake8 $(check_dirs) 47 | doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source 48 | 49 | # Format source code automatically and check is there are any problems left that need manual fixing 50 | 51 | extra_style_checks: 52 | python utils/custom_init_isort.py 53 | doc-builder style src/diffusers docs/source --max_len 119 --path_to_docs docs/source 54 | 55 | # this target runs checks on all files and potentially modifies some of them 56 | 57 | style: 58 | black --preview $(check_dirs) 59 | isort $(check_dirs) 60 | ${MAKE} autogenerate_code 61 | ${MAKE} extra_style_checks 62 | 63 | # Super fast fix and check target that only works on relevant modified files since the branch was made 64 | 65 | fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency 66 | 67 | # Make marked copies of snippets of codes conform to the original 68 | 69 | fix-copies: 70 | python utils/check_dummies.py --fix_and_overwrite 71 | 72 | # Run tests for the library 73 | 74 | test: 75 | python -m pytest -n auto --dist=loadfile -s -v ./tests/ 76 | 77 | # Run tests for examples 78 | 79 | test-examples: 80 | python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/ 81 | 82 | 83 | # Release stuff 84 | 85 | pre-release: 86 | python utils/release.py 87 | 88 | pre-patch: 89 | python utils/release.py --patch 90 | 91 | post-release: 92 | python utils/release.py --post_release 93 | 94 | post-patch: 95 | python utils/release.py --post_release --patch 96 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/_typos.toml: -------------------------------------------------------------------------------- 1 | # Files for typos 2 | # Instruction: https://github.com/marketplace/actions/typos-action#getting-started 3 | 4 | [default.extend-identifiers] 5 | 6 | [default.extend-words] 7 | NIN_="NIN" # NIN is used in scripts/convert_ncsnpp_original_checkpoint_to_diffusers.py 8 | nd="np" # nd may be np (numpy) 9 | 10 | 11 | [files] 12 | extend-exclude = ["_typos.toml"] 13 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/_toctree.yml: -------------------------------------------------------------------------------- 1 | - sections: 2 | - local: index 3 | title: "🧨 Diffusers" 4 | - local: quicktour 5 | title: "Quicktour" 6 | - local: installation 7 | title: "Installation" 8 | title: "Get started" 9 | - sections: 10 | - sections: 11 | - local: using-diffusers/loading 12 | title: "Loading Pipelines, Models, and Schedulers" 13 | - local: using-diffusers/configuration 14 | title: "Configuring Pipelines, Models, and Schedulers" 15 | title: "Loading" 16 | - sections: 17 | - local: using-diffusers/unconditional_image_generation 18 | title: "Unconditional Image Generation" 19 | - local: using-diffusers/conditional_image_generation 20 | title: "Text-to-Image Generation" 21 | - local: using-diffusers/img2img 22 | title: "Text-Guided Image-to-Image" 23 | - local: using-diffusers/inpaint 24 | title: "Text-Guided Image-Inpainting" 25 | - local: using-diffusers/custom 26 | title: "Create a custom pipeline" 27 | title: "Pipelines for Inference" 28 | title: "Using Diffusers" 29 | - sections: 30 | - local: optimization/fp16 31 | title: "Memory and Speed" 32 | - local: optimization/onnx 33 | title: "ONNX" 34 | - local: optimization/open_vino 35 | title: "Open Vino" 36 | - local: optimization/mps 37 | title: "MPS" 38 | title: "Optimization/Special Hardware" 39 | - sections: 40 | - local: training/overview 41 | title: "Overview" 42 | - local: training/unconditional_training 43 | title: "Unconditional Image Generation" 44 | - local: training/text_inversion 45 | title: "Text Inversion" 46 | - local: training/text2image 47 | title: "Text-to-image" 48 | title: "Training" 49 | - sections: 50 | - local: conceptual/stable_diffusion 51 | title: "Stable Diffusion" 52 | - local: conceptual/philosophy 53 | title: "Philosophy" 54 | - local: conceptual/contribution 55 | title: "How to contribute?" 56 | title: "Conceptual Guides" 57 | - sections: 58 | - sections: 59 | - local: api/models 60 | title: "Models" 61 | - local: api/schedulers 62 | title: "Schedulers" 63 | - local: api/diffusion_pipeline 64 | title: "Diffusion Pipeline" 65 | - local: api/logging 66 | title: "Logging" 67 | - local: api/configuration 68 | title: "Configuration" 69 | - local: api/outputs 70 | title: "Outputs" 71 | title: "Main Classes" 72 | - sections: 73 | - local: api/pipelines/overview 74 | title: "Overview" 75 | - local: api/pipelines/ddim 76 | title: "DDIM" 77 | - local: api/pipelines/ddpm 78 | title: "DDPM" 79 | - local: api/pipelines/latent_diffusion 80 | title: "Latent Diffusion" 81 | - local: api/pipelines/latent_diffusion_uncond 82 | title: "Unconditional Latent Diffusion" 83 | - local: api/pipelines/pndm 84 | title: "PNDM" 85 | - local: api/pipelines/score_sde_ve 86 | title: "Score SDE VE" 87 | - local: api/pipelines/stable_diffusion 88 | title: "Stable Diffusion" 89 | - local: api/pipelines/stochastic_karras_ve 90 | title: "Stochastic Karras VE" 91 | title: "Pipelines" 92 | title: "API" 93 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Configuration 14 | 15 | In Diffusers, schedulers of type [`schedulers.scheduling_utils.SchedulerMixin`], and models of type [`ModelMixin`] inherit from [`ConfigMixin`] which conveniently takes care of storing all parameters that are 16 | passed to the respective `__init__` methods in a JSON-configuration file. 17 | 18 | TODO(PVP) - add example and better info here 19 | 20 | ## ConfigMixin 21 | [[autodoc]] ConfigMixin 22 | - from_config 23 | - save_config 24 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/diffusion_pipeline.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Pipelines 14 | 15 | The [`DiffusionPipeline`] is the easiest way to load any pretrained diffusion pipeline from the [Hub](https://huggingface.co/models?library=diffusers) and to use it in inference. 16 | 17 | 18 | 19 | One should not use the Diffusion Pipeline class for training or fine-tuning a diffusion model. Individual 20 | components of diffusion pipelines are usually trained individually, so we suggest to directly work 21 | with [`UNetModel`] and [`UNetConditionModel`]. 22 | 23 | 24 | 25 | Any diffusion pipeline that is loaded with [`~DiffusionPipeline.from_pretrained`] will automatically 26 | detect the pipeline type, *e.g.* [`StableDiffusionPipeline`] and consequently load each component of the 27 | pipeline and pass them into the `__init__` function of the pipeline, *e.g.* [`~StableDiffusionPipeline.__init__`]. 28 | 29 | Any pipeline object can be saved locally with [`~DiffusionPipeline.save_pretrained`]. 30 | 31 | ## DiffusionPipeline 32 | [[autodoc]] DiffusionPipeline 33 | - from_pretrained 34 | - save_pretrained 35 | 36 | ## ImagePipelineOutput 37 | By default diffusion pipelines return an object of class 38 | 39 | [[autodoc]] pipeline_utils.ImagePipelineOutput 40 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/logging.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Logging 14 | 15 | 🧨 Diffusers has a centralized logging system, so that you can setup the verbosity of the library easily. 16 | 17 | Currently the default verbosity of the library is `WARNING`. 18 | 19 | To change the level of verbosity, just use one of the direct setters. For instance, here is how to change the verbosity 20 | to the INFO level. 21 | 22 | ```python 23 | import diffusers 24 | 25 | diffusers.logging.set_verbosity_info() 26 | ``` 27 | 28 | You can also use the environment variable `DIFFUSERS_VERBOSITY` to override the default verbosity. You can set it 29 | to one of the following: `debug`, `info`, `warning`, `error`, `critical`. For example: 30 | 31 | ```bash 32 | DIFFUSERS_VERBOSITY=error ./myprogram.py 33 | ``` 34 | 35 | Additionally, some `warnings` can be disabled by setting the environment variable 36 | `DIFFUSERS_NO_ADVISORY_WARNINGS` to a true value, like *1*. This will disable any warning that is logged using 37 | [`logger.warning_advice`]. For example: 38 | 39 | ```bash 40 | DIFFUSERS_NO_ADVISORY_WARNINGS=1 ./myprogram.py 41 | ``` 42 | 43 | Here is an example of how to use the same logger as the library in your own module or script: 44 | 45 | ```python 46 | from diffusers.utils import logging 47 | 48 | logging.set_verbosity_info() 49 | logger = logging.get_logger("diffusers") 50 | logger.info("INFO") 51 | logger.warning("WARN") 52 | ``` 53 | 54 | 55 | All the methods of this logging module are documented below, the main ones are 56 | [`logging.get_verbosity`] to get the current level of verbosity in the logger and 57 | [`logging.set_verbosity`] to set the verbosity to the level of your choice. In order (from the least 58 | verbose to the most verbose), those levels (with their corresponding int values in parenthesis) are: 59 | 60 | - `diffusers.logging.CRITICAL` or `diffusers.logging.FATAL` (int value, 50): only report the most 61 | critical errors. 62 | - `diffusers.logging.ERROR` (int value, 40): only report errors. 63 | - `diffusers.logging.WARNING` or `diffusers.logging.WARN` (int value, 30): only reports error and 64 | warnings. This the default level used by the library. 65 | - `diffusers.logging.INFO` (int value, 20): reports error, warnings and basic information. 66 | - `diffusers.logging.DEBUG` (int value, 10): report all information. 67 | 68 | By default, `tqdm` progress bars will be displayed during model download. [`logging.disable_progress_bar`] and [`logging.enable_progress_bar`] can be used to suppress or unsuppress this behavior. 69 | 70 | ## Base setters 71 | 72 | [[autodoc]] logging.set_verbosity_error 73 | 74 | [[autodoc]] logging.set_verbosity_warning 75 | 76 | [[autodoc]] logging.set_verbosity_info 77 | 78 | [[autodoc]] logging.set_verbosity_debug 79 | 80 | ## Other functions 81 | 82 | [[autodoc]] logging.get_verbosity 83 | 84 | [[autodoc]] logging.set_verbosity 85 | 86 | [[autodoc]] logging.get_logger 87 | 88 | [[autodoc]] logging.enable_default_handler 89 | 90 | [[autodoc]] logging.disable_default_handler 91 | 92 | [[autodoc]] logging.enable_explicit_format 93 | 94 | [[autodoc]] logging.reset_format 95 | 96 | [[autodoc]] logging.enable_progress_bar 97 | 98 | [[autodoc]] logging.disable_progress_bar 99 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/models.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Models 14 | 15 | Diffusers contains pretrained models for popular algorithms and modules for creating the next set of diffusion models. 16 | The primary function of these models is to denoise an input sample, by modeling the distribution $p_\theta(\mathbf{x}_{t-1}|\mathbf{x}_t)$. 17 | The models are built on the base class ['ModelMixin'] that is a `torch.nn.module` with basic functionality for saving and loading models both locally and from the HuggingFace hub. 18 | 19 | ## ModelMixin 20 | [[autodoc]] ModelMixin 21 | 22 | ## UNet2DOutput 23 | [[autodoc]] models.unet_2d.UNet2DOutput 24 | 25 | ## UNet2DModel 26 | [[autodoc]] UNet2DModel 27 | 28 | ## UNet2DConditionOutput 29 | [[autodoc]] models.unet_2d_condition.UNet2DConditionOutput 30 | 31 | ## UNet2DConditionModel 32 | [[autodoc]] UNet2DConditionModel 33 | 34 | ## DecoderOutput 35 | [[autodoc]] models.vae.DecoderOutput 36 | 37 | ## VQEncoderOutput 38 | [[autodoc]] models.vae.VQEncoderOutput 39 | 40 | ## VQModel 41 | [[autodoc]] VQModel 42 | 43 | ## AutoencoderKLOutput 44 | [[autodoc]] models.vae.AutoencoderKLOutput 45 | 46 | ## AutoencoderKL 47 | [[autodoc]] AutoencoderKL 48 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/outputs.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # BaseOutputs 14 | 15 | All models have outputs that are instances of subclasses of [`~utils.BaseOutput`]. Those are 16 | data structures containing all the information returned by the model, but that can also be used as tuples or 17 | dictionaries. 18 | 19 | Let's see how this looks in an example: 20 | 21 | ```python 22 | from diffusers import DDIMPipeline 23 | 24 | pipeline = DDIMPipeline.from_pretrained("google/ddpm-cifar10-32") 25 | outputs = pipeline() 26 | ``` 27 | 28 | The `outputs` object is a [`~pipeline_utils.ImagePipelineOutput`], as we can see in the 29 | documentation of that class below, it means it has an image attribute. 30 | 31 | You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you will get `None`: 32 | 33 | ```python 34 | outputs.images 35 | ``` 36 | 37 | or via keyword lookup 38 | 39 | ```python 40 | outputs["images"] 41 | ``` 42 | 43 | When considering our `outputs` object as tuple, it only considers the attributes that don't have `None` values. 44 | Here for instance, we could retrieve images via indexing: 45 | 46 | ```python 47 | outputs[:1] 48 | ``` 49 | 50 | which will return the tuple `(outputs.images)` for instance. 51 | 52 | ## BaseOutput 53 | 54 | [[autodoc]] utils.BaseOutput 55 | - to_tuple 56 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/pipelines/ddim.mdx: -------------------------------------------------------------------------------- 1 | # DDIM 2 | 3 | ## Overview 4 | 5 | [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) (DDIM) by Jiaming Song, Chenlin Meng and Stefano Ermon. 6 | 7 | The abstract of the paper is the following: 8 | 9 | Denoising diffusion probabilistic models (DDPMs) have achieved high quality image generation without adversarial training, yet they require simulating a Markov chain for many steps to produce a sample. To accelerate sampling, we present denoising diffusion implicit models (DDIMs), a more efficient class of iterative implicit probabilistic models with the same training procedure as DDPMs. In DDPMs, the generative process is defined as the reverse of a Markovian diffusion process. We construct a class of non-Markovian diffusion processes that lead to the same training objective, but whose reverse process can be much faster to sample from. We empirically demonstrate that DDIMs can produce high quality samples 10× to 50× faster in terms of wall-clock time compared to DDPMs, allow us to trade off computation for sample quality, and can perform semantically meaningful image interpolation directly in the latent space. 10 | 11 | The original codebase of this paper can be found [here](https://github.com/ermongroup/ddim). 12 | 13 | ## Available Pipelines: 14 | 15 | | Pipeline | Tasks | Colab 16 | |---|---|:---:| 17 | | [pipeline_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/ddim/pipeline_ddim.py) | *Unconditional Image Generation* | - | 18 | 19 | 20 | ## DDIMPipeline 21 | [[autodoc]] DDIMPipeline 22 | - __call__ 23 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/pipelines/ddpm.mdx: -------------------------------------------------------------------------------- 1 | # DDPM 2 | 3 | ## Overview 4 | 5 | [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) 6 | (DDPM) by Jonathan Ho, Ajay Jain and Pieter Abbeel proposes the diffusion based model of the same name, but in the context of the 🤗 Diffusers library, DDPM refers to the discrete denoising scheduler from the paper as well as the pipeline. 7 | 8 | The abstract of the paper is the following: 9 | 10 | We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional CIFAR10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art FID score of 3.17. On 256x256 LSUN, we obtain sample quality similar to ProgressiveGAN. 11 | 12 | The original codebase of this paper can be found [here](https://github.com/hojonathanho/diffusion). 13 | 14 | 15 | ## Available Pipelines: 16 | 17 | | Pipeline | Tasks | Colab 18 | |---|---|:---:| 19 | | [pipeline_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/ddpm/pipeline_ddpm.py) | *Unconditional Image Generation* | - | 20 | 21 | 22 | # DDPMPipeline 23 | [[autodoc]] DDPMPipeline 24 | - __call__ 25 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/pipelines/latent_diffusion.mdx: -------------------------------------------------------------------------------- 1 | # Latent Diffusion 2 | 3 | ## Overview 4 | 5 | Latent Diffusion was proposed in [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer. 6 | 7 | The abstract of the paper is the following: 8 | 9 | *By decomposing the image formation process into a sequential application of denoising autoencoders, diffusion models (DMs) achieve state-of-the-art synthesis results on image data and beyond. Additionally, their formulation allows for a guiding mechanism to control the image generation process without retraining. However, since these models typically operate directly in pixel space, optimization of powerful DMs often consumes hundreds of GPU days and inference is expensive due to sequential evaluations. To enable DM training on limited computational resources while retaining their quality and flexibility, we apply them in the latent space of powerful pretrained autoencoders. In contrast to previous work, training diffusion models on such a representation allows for the first time to reach a near-optimal point between complexity reduction and detail preservation, greatly boosting visual fidelity. By introducing cross-attention layers into the model architecture, we turn diffusion models into powerful and flexible generators for general conditioning inputs such as text or bounding boxes and high-resolution synthesis becomes possible in a convolutional manner. Our latent diffusion models (LDMs) achieve a new state of the art for image inpainting and highly competitive performance on various tasks, including unconditional image generation, semantic scene synthesis, and super-resolution, while significantly reducing computational requirements compared to pixel-based DMs.* 10 | 11 | The original codebase can be found [here](https://github.com/CompVis/latent-diffusion). 12 | 13 | ## Tips: 14 | 15 | - 16 | - 17 | - 18 | 19 | ## Available Pipelines: 20 | 21 | | Pipeline | Tasks | Colab 22 | |---|---|:---:| 23 | | [pipeline_latent_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py) | *Text-to-Image Generation* | - | 24 | 25 | ## Examples: 26 | 27 | 28 | ## LDMTextToImagePipeline 29 | [[autodoc]] pipelines.latent_diffusion.pipeline_latent_diffusion.LDMTextToImagePipeline 30 | - __call__ 31 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/pipelines/latent_diffusion_uncond.mdx: -------------------------------------------------------------------------------- 1 | # Unconditional Latent Diffusion 2 | 3 | ## Overview 4 | 5 | Unconditional Latent Diffusion was proposed in [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer. 6 | 7 | The abstract of the paper is the following: 8 | 9 | *By decomposing the image formation process into a sequential application of denoising autoencoders, diffusion models (DMs) achieve state-of-the-art synthesis results on image data and beyond. Additionally, their formulation allows for a guiding mechanism to control the image generation process without retraining. However, since these models typically operate directly in pixel space, optimization of powerful DMs often consumes hundreds of GPU days and inference is expensive due to sequential evaluations. To enable DM training on limited computational resources while retaining their quality and flexibility, we apply them in the latent space of powerful pretrained autoencoders. In contrast to previous work, training diffusion models on such a representation allows for the first time to reach a near-optimal point between complexity reduction and detail preservation, greatly boosting visual fidelity. By introducing cross-attention layers into the model architecture, we turn diffusion models into powerful and flexible generators for general conditioning inputs such as text or bounding boxes and high-resolution synthesis becomes possible in a convolutional manner. Our latent diffusion models (LDMs) achieve a new state of the art for image inpainting and highly competitive performance on various tasks, including unconditional image generation, semantic scene synthesis, and super-resolution, while significantly reducing computational requirements compared to pixel-based DMs.* 10 | 11 | The original codebase can be found [here](https://github.com/CompVis/latent-diffusion). 12 | 13 | ## Tips: 14 | 15 | - 16 | - 17 | - 18 | 19 | ## Available Pipelines: 20 | 21 | | Pipeline | Tasks | Colab 22 | |---|---|:---:| 23 | | [pipeline_latent_diffusion_uncond.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py) | *Unconditional Image Generation* | - | 24 | 25 | ## Examples: 26 | 27 | ## LDMPipeline 28 | [[autodoc]] LDMPipeline 29 | - __call__ 30 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/pipelines/pndm.mdx: -------------------------------------------------------------------------------- 1 | # PNDM 2 | 3 | ## Overview 4 | 5 | [Pseudo Numerical methods for Diffusion Models on manifolds](https://arxiv.org/abs/2202.09778) (PNDM) by Luping Liu, Yi Ren, Zhijie Lin and Zhou Zhao. 6 | 7 | The abstract of the paper is the following: 8 | 9 | Denoising Diffusion Probabilistic Models (DDPMs) can generate high-quality samples such as image and audio samples. However, DDPMs require hundreds to thousands of iterations to produce final samples. Several prior works have successfully accelerated DDPMs through adjusting the variance schedule (e.g., Improved Denoising Diffusion Probabilistic Models) or the denoising equation (e.g., Denoising Diffusion Implicit Models (DDIMs)). However, these acceleration methods cannot maintain the quality of samples and even introduce new noise at a high speedup rate, which limit their practicability. To accelerate the inference process while keeping the sample quality, we provide a fresh perspective that DDPMs should be treated as solving differential equations on manifolds. Under such a perspective, we propose pseudo numerical methods for diffusion models (PNDMs). Specifically, we figure out how to solve differential equations on manifolds and show that DDIMs are simple cases of pseudo numerical methods. We change several classical numerical methods to corresponding pseudo numerical methods and find that the pseudo linear multi-step method is the best in most situations. According to our experiments, by directly using pre-trained models on Cifar10, CelebA and LSUN, PNDMs can generate higher quality synthetic images with only 50 steps compared with 1000-step DDIMs (20x speedup), significantly outperform DDIMs with 250 steps (by around 0.4 in FID) and have good generalization on different variance schedules. 10 | 11 | The original codebase can be found [here](https://github.com/luping-liu/PNDM). 12 | 13 | ## Available Pipelines: 14 | 15 | | Pipeline | Tasks | Colab 16 | |---|---|:---:| 17 | | [pipeline_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pndm/pipeline_pndm.py) | *Unconditional Image Generation* | - | 18 | 19 | 20 | ## PNDMPipeline 21 | [[autodoc]] pipelines.pndm.pipeline_pndm.PNDMPipeline 22 | - __call__ 23 | 24 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/pipelines/score_sde_ve.mdx: -------------------------------------------------------------------------------- 1 | # Score SDE VE 2 | 3 | ## Overview 4 | 5 | [Score-Based Generative Modeling through Stochastic Differential Equations](https://arxiv.org/abs/2011.13456) (Score SDE) by Yang Song, Jascha Sohl-Dickstein, Diederik P. Kingma, Abhishek Kumar, Stefano Ermon and Ben Poole. 6 | 7 | The abstract of the paper is the following: 8 | 9 | Creating noise from data is easy; creating data from noise is generative modeling. We present a stochastic differential equation (SDE) that smoothly transforms a complex data distribution to a known prior distribution by slowly injecting noise, and a corresponding reverse-time SDE that transforms the prior distribution back into the data distribution by slowly removing the noise. Crucially, the reverse-time SDE depends only on the time-dependent gradient field (\aka, score) of the perturbed data distribution. By leveraging advances in score-based generative modeling, we can accurately estimate these scores with neural networks, and use numerical SDE solvers to generate samples. We show that this framework encapsulates previous approaches in score-based generative modeling and diffusion probabilistic modeling, allowing for new sampling procedures and new modeling capabilities. In particular, we introduce a predictor-corrector framework to correct errors in the evolution of the discretized reverse-time SDE. We also derive an equivalent neural ODE that samples from the same distribution as the SDE, but additionally enables exact likelihood computation, and improved sampling efficiency. In addition, we provide a new way to solve inverse problems with score-based models, as demonstrated with experiments on class-conditional generation, image inpainting, and colorization. Combined with multiple architectural improvements, we achieve record-breaking performance for unconditional image generation on CIFAR-10 with an Inception score of 9.89 and FID of 2.20, a competitive likelihood of 2.99 bits/dim, and demonstrate high fidelity generation of 1024 x 1024 images for the first time from a score-based generative model. 10 | 11 | The original codebase can be found [here](https://github.com/yang-song/score_sde_pytorch). 12 | 13 | This pipeline implements the Variance Expanding (VE) variant of the method. 14 | 15 | ## Available Pipelines: 16 | 17 | | Pipeline | Tasks | Colab 18 | |---|---|:---:| 19 | | [pipeline_score_sde_ve.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py) | *Unconditional Image Generation* | - | 20 | 21 | ## ScoreSdeVePipeline 22 | [[autodoc]] ScoreSdeVePipeline 23 | - __call__ 24 | 25 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/pipelines/stable_diffusion.mdx: -------------------------------------------------------------------------------- 1 | # Stable diffusion pipelines 2 | 3 | Stable Diffusion is a text-to-image _latent diffusion_ model created by the researchers and engineers from [CompVis](https://github.com/CompVis), [Stability AI](https://stability.ai/) and [LAION](https://laion.ai/). It's trained on 512x512 images from a subset of the [LAION-5B](https://laion.ai/blog/laion-5b/) dataset. This model uses a frozen CLIP ViT-L/14 text encoder to condition the model on text prompts. With its 860M UNet and 123M text encoder, the model is relatively lightweight and can run on consumer GPUs. 4 | 5 | Latent diffusion is the research on top of which Stable Diffusion was built. It was proposed in [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) by Robin Rombach, Andreas Blattmann, Dominik Lorenz, Patrick Esser, Björn Ommer. You can learn more details about it in the [specific pipeline for latent diffusion](pipelines/latent_diffusion) that is part of 🤗 Diffusers. 6 | 7 | For more details about how Stable Diffusion works and how it differs from the base latent diffusion model, please refer to the official [launch announcement post](https://stability.ai/blog/stable-diffusion-announcement) and [this section of our own blog post](https://huggingface.co/blog/stable_diffusion#how-does-stable-diffusion-work). 8 | 9 | *Tips*: 10 | - To tweak your prompts on a specific result you liked, you can generate your own latents, as demonstrated in the following notebook: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pcuenca/diffusers-examples/blob/main/notebooks/stable-diffusion-seeds.ipynb) 11 | 12 | *Overview*: 13 | 14 | | Pipeline | Tasks | Colab | Demo 15 | |---|---|:---:|:---:| 16 | | [pipeline_stable_diffusion.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py) | *Text-to-Image Generation* | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_diffusion.ipynb) | [🤗 Stable Diffusion](https://huggingface.co/spaces/stabilityai/stable-diffusion) 17 | | [pipeline_stable_diffusion_img2img.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py) | *Image-to-Image Text-Guided Generation* | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) | [🤗 Diffuse the Rest](https://huggingface.co/spaces/huggingface/diffuse-the-rest) 18 | | [pipeline_stable_diffusion_inpaint.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py) | **Experimental** – *Text-Guided Image Inpainting* | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb) | Coming soon 19 | 20 | ## StableDiffusionPipelineOutput 21 | [[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput 22 | 23 | ## StableDiffusionPipeline 24 | [[autodoc]] StableDiffusionPipeline 25 | - __call__ 26 | - enable_attention_slicing 27 | - disable_attention_slicing 28 | 29 | ## StableDiffusionImg2ImgPipeline 30 | [[autodoc]] StableDiffusionImg2ImgPipeline 31 | - __call__ 32 | - enable_attention_slicing 33 | - disable_attention_slicing 34 | 35 | ## StableDiffusionInpaintPipeline 36 | [[autodoc]] StableDiffusionInpaintPipeline 37 | - __call__ 38 | - enable_attention_slicing 39 | - disable_attention_slicing 40 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/api/pipelines/stochastic_karras_ve.mdx: -------------------------------------------------------------------------------- 1 | # Stochastic Karras VE 2 | 3 | ## Overview 4 | 5 | [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364) by Tero Karras, Miika Aittala, Timo Aila and Samuli Laine. 6 | 7 | The abstract of the paper is the following: 8 | 9 | We argue that the theory and practice of diffusion-based generative models are currently unnecessarily convoluted and seek to remedy the situation by presenting a design space that clearly separates the concrete design choices. This lets us identify several changes to both the sampling and training processes, as well as preconditioning of the score networks. Together, our improvements yield new state-of-the-art FID of 1.79 for CIFAR-10 in a class-conditional setting and 1.97 in an unconditional setting, with much faster sampling (35 network evaluations per image) than prior designs. To further demonstrate their modular nature, we show that our design changes dramatically improve both the efficiency and quality obtainable with pre-trained score networks from previous work, including improving the FID of an existing ImageNet-64 model from 2.07 to near-SOTA 1.55. 10 | 11 | This pipeline implements the Stochastic sampling tailored to the Variance-Expanding (VE) models. 12 | 13 | 14 | ## Available Pipelines: 15 | 16 | | Pipeline | Tasks | Colab 17 | |---|---|:---:| 18 | | [pipeline_stochastic_karras_ve.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py) | *Unconditional Image Generation* | - | 19 | 20 | 21 | ## KarrasVePipeline 22 | [[autodoc]] KarrasVePipeline 23 | - __call__ 24 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/conceptual/philosophy.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Philosophy 14 | 15 | - Readability and clarity are preferred over highly optimized code. A strong importance is put on providing readable, intuitive and elementary code design. *E.g.*, the provided [schedulers](https://github.com/huggingface/diffusers/tree/main/src/diffusers/schedulers) are separated from the provided [models](https://github.com/huggingface/diffusers/tree/main/src/diffusers/models) and use well-commented code that can be read alongside the original paper. 16 | - Diffusers is **modality independent** and focuses on providing pretrained models and tools to build systems that generate **continuous outputs**, *e.g.* vision and audio. This is one of the guiding goals even if the initial pipelines are devoted to vision tasks. 17 | - Diffusion models and schedulers are provided as concise, elementary building blocks. In contrast, diffusion pipelines are a collection of end-to-end diffusion systems that can be used out-of-the-box, should stay as close as possible to their original implementations and can include components of other libraries, such as text encoders. Examples of diffusion pipelines are [Glide](https://github.com/openai/glide-text2im), [Latent Diffusion](https://github.com/CompVis/latent-diffusion) and [Stable Diffusion](https://github.com/compvis/stable-diffusion). 18 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/conceptual/stable_diffusion.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Stable Diffusion 14 | 15 | Under construction 🚧 16 | 17 | For now please visit this [very in-detail blog post](https://huggingface.co/blog/stable_diffusion) 18 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/imgs/diffusers_library.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/FlashAttention/diffusers/docs/source/imgs/diffusers_library.jpg -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/installation.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Installation 14 | 15 | Install Diffusers for with PyTorch. Support for other libraries will come in the future 16 | 17 | 🤗 Diffusers is tested on Python 3.7+, and PyTorch 1.7.0+. 18 | 19 | ## Install with pip 20 | 21 | You should install 🤗 Diffusers in a [virtual environment](https://docs.python.org/3/library/venv.html). 22 | If you're unfamiliar with Python virtual environments, take a look at this [guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/). 23 | A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies. 24 | 25 | Start by creating a virtual environment in your project directory: 26 | 27 | ```bash 28 | python -m venv .env 29 | ``` 30 | 31 | Activate the virtual environment: 32 | 33 | ```bash 34 | source .env/bin/activate 35 | ``` 36 | 37 | Now you're ready to install 🤗 Diffusers with the following command: 38 | 39 | ```bash 40 | pip install diffusers 41 | ``` 42 | 43 | ## Install from source 44 | 45 | Install 🤗 Diffusers from source with the following command: 46 | 47 | ```bash 48 | pip install git+https://github.com/huggingface/diffusers 49 | ``` 50 | 51 | This command installs the bleeding edge `main` version rather than the latest `stable` version. 52 | The `main` version is useful for staying up-to-date with the latest developments. 53 | For instance, if a bug has been fixed since the last official release but a new release hasn't been rolled out yet. 54 | However, this means the `main` version may not always be stable. 55 | We strive to keep the `main` version operational, and most issues are usually resolved within a few hours or a day. 56 | If you run into a problem, please open an [Issue](https://github.com/huggingface/transformers/issues) so we can fix it even sooner! 57 | 58 | ## Editable install 59 | 60 | You will need an editable install if you'd like to: 61 | 62 | * Use the `main` version of the source code. 63 | * Contribute to 🤗 Diffusers and need to test changes in the code. 64 | 65 | Clone the repository and install 🤗 Diffusers with the following commands: 66 | 67 | ```bash 68 | git clone https://github.com/huggingface/diffusers.git 69 | cd diffusers 70 | pip install -e . 71 | ``` 72 | 73 | These commands will link the folder you cloned the repository to and your Python library paths. 74 | Python will now look inside the folder you cloned to in addition to the normal library paths. 75 | For example, if your Python packages are typically installed in `~/anaconda3/envs/main/lib/python3.7/site-packages/`, Python will also search the folder you cloned to: `~/diffusers/`. 76 | 77 | 78 | 79 | You must keep the `diffusers` folder if you want to keep using the library. 80 | 81 | 82 | 83 | Now you can easily update your clone to the latest version of 🤗 Diffusers with the following command: 84 | 85 | ```bash 86 | cd ~/diffusers/ 87 | git pull 88 | ``` 89 | 90 | Your Python environment will find the `main` version of 🤗 Diffusers on the next run. 91 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/optimization/fp16.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Memory and speed 14 | 15 | We present some techniques and ideas to optimize 🤗 Diffusers _inference_ for memory or speed. 16 | 17 | ## CUDA `autocast` 18 | 19 | If you use a CUDA GPU, you can take advantage of `torch.autocast` to perform inference roughly twice as fast at the cost of slightly lower precision. All you need to do is put your inference call inside an `autocast` context manager. The following example shows how to do it using Stable Diffusion text-to-image generation as an example: 20 | 21 | ```Python 22 | from torch import autocast 23 | from diffusers import StableDiffusionPipeline 24 | 25 | pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=True) 26 | pipe = pipe.to("cuda") 27 | 28 | prompt = "a photo of an astronaut riding a horse on mars" 29 | with autocast("cuda"): 30 | image = pipe(prompt).images[0] 31 | ``` 32 | 33 | Despite the precision loss, in our experience the final image results look the same as the `float32` versions. Feel free to experiment and report back! 34 | 35 | ## Half precision weights 36 | 37 | To save more GPU memory, you can load the model weights directly in half precision. This involves loading the float16 version of the weights, which was saved to a branch named `fp16`, and telling PyTorch to use the `float16` type when loading them: 38 | 39 | ```Python 40 | pipe = StableDiffusionPipeline.from_pretrained( 41 | "CompVis/stable-diffusion-v1-4", 42 | revision="fp16", 43 | torch_dtype=torch.float16, 44 | use_auth_token=True 45 | ) 46 | ``` 47 | 48 | ## Sliced attention for additional memory savings 49 | 50 | For even additional memory savings, you can use a sliced version of attention that performs the computation in steps instead of all at once. 51 | 52 | 53 | Attention slicing is useful even if a batch size of just 1 is used - as long as the model uses more than one attention head. If there is more than one attention head the *QK^T* attention matrix can be computed sequentially for each head which can save a significant amount of memory. 54 | 55 | 56 | To perform the attention computation sequentially over each head, you only need to invoke [`~StableDiffusionPipeline.enable_attention_slicing`] in your pipeline before inference, like here: 57 | 58 | ```Python 59 | import torch 60 | from diffusers import StableDiffusionPipeline 61 | 62 | pipe = StableDiffusionPipeline.from_pretrained( 63 | "CompVis/stable-diffusion-v1-4", 64 | revision="fp16", 65 | torch_dtype=torch.float16, 66 | use_auth_token=True 67 | ) 68 | pipe = pipe.to("cuda") 69 | 70 | prompt = "a photo of an astronaut riding a horse on mars" 71 | pipe.enable_attention_slicing() 72 | with torch.autocast("cuda"): 73 | image = pipe(prompt).images[0] 74 | ``` 75 | 76 | There's a small performance penalty of about 10% slower inference times, but this method allows you to use Stable Diffusion in as little as 3.2 GB of VRAM! 77 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/optimization/mps.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # How to use Stable Diffusion in Apple Silicon (M1/M2) 14 | 15 | 🤗 Diffusers is compatible with Apple silicon for Stable Diffusion inference, using the PyTorch `mps` device. These are the steps you need to follow to use your M1 or M2 computer with Stable Diffusion. 16 | 17 | ## Requirements 18 | 19 | - Mac computer with Apple silicon (M1/M2) hardware. 20 | - macOS 12.3 or later. 21 | - arm64 version of Python. 22 | - PyTorch [Preview (Nightly)](https://pytorch.org/get-started/locally/), version `1.13.0.dev20220830` or later. 23 | 24 | ## Inference Pipeline 25 | 26 | The snippet below demonstrates how to use the `mps` backend using the familiar `to()` interface to move the Stable Diffusion pipeline to your M1 or M2 device. 27 | 28 | We recommend to "prime" the pipeline using an additional one-time pass through it. This is a temporary workaround for a weird issue we have detected: the first inference pass produces slightly different results than subsequent ones. You only need to do this pass once, and it's ok to use just one inference step and discard the result. 29 | 30 | ```python 31 | # make sure you're logged in with `huggingface-cli login` 32 | from diffusers import StableDiffusionPipeline 33 | 34 | pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=True) 35 | pipe = pipe.to("mps") 36 | 37 | prompt = "a photo of an astronaut riding a horse on mars" 38 | 39 | # First-time "warmup" pass (see explanation above) 40 | _ = pipe(prompt, num_inference_steps=1) 41 | 42 | # Results match those from the CPU device after the warmup pass. 43 | image = pipe(prompt).images[0] 44 | ``` 45 | 46 | ## Known Issues 47 | 48 | - As mentioned above, we are investigating a strange [first-time inference issue](https://github.com/huggingface/diffusers/issues/372). 49 | - Generating multiple prompts in a batch [crashes or doesn't work reliably](https://github.com/huggingface/diffusers/issues/363). We believe this might be related to the [`mps` backend in PyTorch](https://github.com/pytorch/pytorch/issues/84039#issuecomment-1237735249), but we need to investigate in more depth. For now, we recommend to iterate instead of batching. 50 | 51 | ## Performance 52 | 53 | These are the results we got on a M1 Max MacBook Pro with 64 GB of RAM, running macOS Ventura Version 13.0 Beta (22A5331f). We performed Stable Diffusion text-to-image generation of the same prompt for 50 inference steps, using a guidance scale of 7.5. 54 | 55 | | Device | Steps | Time | 56 | |--------|-------|---------| 57 | | CPU | 50 | 213.46s | 58 | | MPS | 50 | 30.81s | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/optimization/onnx.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | 14 | # How to use the ONNX Runtime for inference 15 | 16 | 🤗 Diffusers provides a Stable Diffusion pipeline compatible with the ONNX Runtime. This allows you to run Stable Diffusion on any hardware that supports ONNX (including CPUs), and where an accelerated version of PyTorch is not available. 17 | 18 | ## Installation 19 | 20 | - TODO 21 | 22 | ## Stable Diffusion Inference 23 | 24 | The snippet below demonstrates how to use the ONNX runtime. You need to use `StableDiffusionOnnxPipeline` instead of `StableDiffusionPipeline`. You also need to download the weights from the `onnx` branch of the repository, and indicate the runtime provider you want to use. 25 | 26 | ```python 27 | # make sure you're logged in with `huggingface-cli login` 28 | from diffusers import StableDiffusionOnnxPipeline 29 | 30 | pipe = StableDiffusionOnnxPipeline.from_pretrained( 31 | "CompVis/stable-diffusion-v1-4", 32 | revision="onnx", 33 | provider="CUDAExecutionProvider", 34 | use_auth_token=True, 35 | ) 36 | 37 | prompt = "a photo of an astronaut riding a horse on mars" 38 | image = pipe(prompt).images[0] 39 | ``` 40 | 41 | ## Known Issues 42 | 43 | - Generating multiple prompts in a batch seems to take too much memory. While we look into it, you may need to iterate instead of batching. 44 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/optimization/open_vino.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # OpenVINO 14 | 15 | Under construction 🚧 16 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/training/text2image.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | 14 | # Text-to-Image Training 15 | 16 | Under construction 🚧 17 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/using-diffusers/conditional_image_generation.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Conditional Image Generation 14 | 15 | The [`DiffusionPipeline`] is the easiest way to use a pre-trained diffusion system for inference 16 | 17 | Start by creating an instance of [`DiffusionPipeline`] and specify which pipeline checkpoint you would like to download. 18 | You can use the [`DiffusionPipeline`] for any [Diffusers' checkpoint](https://huggingface.co/models?library=diffusers&sort=downloads). 19 | In this guide though, you'll use [`DiffusionPipeline`] for text-to-image generation with [Latent Diffusion](https://huggingface.co/CompVis/ldm-text2im-large-256): 20 | 21 | ```python 22 | >>> from diffusers import DiffusionPipeline 23 | 24 | >>> generator = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256") 25 | ``` 26 | The [`DiffusionPipeline`] downloads and caches all modeling, tokenization, and scheduling components. 27 | Because the model consists of roughly 1.4 billion parameters, we strongly recommend running it on GPU. 28 | You can move the generator object to GPU, just like you would in PyTorch. 29 | 30 | ```python 31 | >>> generator.to("cuda") 32 | ``` 33 | 34 | Now you can use the `generator` on your text prompt: 35 | 36 | ```python 37 | >>> image = generator("An image of a squirrel in Picasso style").images[0] 38 | ``` 39 | 40 | The output is by default wrapped into a [PIL Image object](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class). 41 | 42 | You can save the image by simply calling: 43 | 44 | ```python 45 | >>> image.save("image_of_squirrel_painting.png") 46 | ``` 47 | 48 | 49 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/using-diffusers/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | 14 | 15 | # Quicktour 16 | 17 | Start using Diffusers🧨 quickly! 18 | To start, use the [`DiffusionPipeline`] for quick inference and sample generations! 19 | 20 | ``` 21 | pip install diffusers 22 | ``` 23 | 24 | ## Main classes 25 | 26 | ### Models 27 | 28 | ### Schedulers 29 | 30 | ### Pipeliens 31 | 32 | 33 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/using-diffusers/custom.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Custom Pipeline 14 | 15 | Under construction 🚧 16 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/using-diffusers/img2img.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Text-Guided Image-to-Image Generation 14 | 15 | The [`StableDiffusionImg2ImgPipeline`] lets you pass a text prompt and an initial image to condition the generation of new images. 16 | 17 | ```python 18 | from torch import autocast 19 | import requests 20 | from PIL import Image 21 | from io import BytesIO 22 | 23 | from diffusers import StableDiffusionImg2ImgPipeline 24 | 25 | # load the pipeline 26 | device = "cuda" 27 | pipe = StableDiffusionImg2ImgPipeline.from_pretrained( 28 | "CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=True 29 | ).to(device) 30 | 31 | # let's download an initial image 32 | url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg" 33 | 34 | response = requests.get(url) 35 | init_image = Image.open(BytesIO(response.content)).convert("RGB") 36 | init_image = init_image.resize((768, 512)) 37 | 38 | prompt = "A fantasy landscape, trending on artstation" 39 | 40 | with autocast("cuda"): 41 | images = pipe(prompt=prompt, init_image=init_image, strength=0.75, guidance_scale=7.5).images 42 | 43 | images[0].save("fantasy_landscape.png") 44 | ``` 45 | You can also run this example on colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/image_2_image_using_diffusers.ipynb) 46 | 47 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/using-diffusers/inpaint.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Text-Guided Image-Inpainting 14 | 15 | The [`StableDiffusionInpaintPipeline`] lets you edit specific parts of an image by providing a mask and text prompt. 16 | 17 | ```python 18 | from io import BytesIO 19 | 20 | from torch import autocast 21 | import requests 22 | import PIL 23 | 24 | from diffusers import StableDiffusionInpaintPipeline 25 | 26 | 27 | def download_image(url): 28 | response = requests.get(url) 29 | return PIL.Image.open(BytesIO(response.content)).convert("RGB") 30 | 31 | 32 | img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" 33 | mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" 34 | 35 | init_image = download_image(img_url).resize((512, 512)) 36 | mask_image = download_image(mask_url).resize((512, 512)) 37 | 38 | device = "cuda" 39 | pipe = StableDiffusionInpaintPipeline.from_pretrained( 40 | "CompVis/stable-diffusion-v1-4", revision="fp16", torch_dtype=torch.float16, use_auth_token=True 41 | ).to(device) 42 | 43 | prompt = "a cat sitting on a bench" 44 | with autocast("cuda"): 45 | images = pipe(prompt=prompt, init_image=init_image, mask_image=mask_image, strength=0.75).images 46 | 47 | images[0].save("cat_on_bench.png") 48 | ``` 49 | 50 | You can also run this example on colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/in_painting_with_stable_diffusion_using_diffusers.ipynb) 51 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/using-diffusers/loading.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Loading 14 | 15 | Under construction 🚧 16 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/docs/source/using-diffusers/unconditional_image_generation.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | 14 | 15 | # Unonditional Image Generation 16 | 17 | The [`DiffusionPipeline`] is the easiest way to use a pre-trained diffusion system for inference 18 | 19 | Start by creating an instance of [`DiffusionPipeline`] and specify which pipeline checkpoint you would like to download. 20 | You can use the [`DiffusionPipeline`] for any [Diffusers' checkpoint](https://huggingface.co/models?library=diffusers&sort=downloads). 21 | In this guide though, you'll use [`DiffusionPipeline`] for unconditional image generation with [DDPM](https://arxiv.org/abs/2006.11239): 22 | 23 | ```python 24 | >>> from diffusers import DiffusionPipeline 25 | 26 | >>> generator = DiffusionPipeline.from_pretrained("google/ddpm-celebahq-256") 27 | ``` 28 | The [`DiffusionPipeline`] downloads and caches all modeling, tokenization, and scheduling components. 29 | Because the model consists of roughly 1.4 billion parameters, we strongly recommend running it on GPU. 30 | You can move the generator object to GPU, just like you would in PyTorch. 31 | 32 | ```python 33 | >>> generator.to("cuda") 34 | ``` 35 | 36 | Now you can use the `generator` on your text prompt: 37 | 38 | ```python 39 | >>> image = generator().images[0] 40 | ``` 41 | 42 | The output is by default wrapped into a [PIL Image object](https://pillow.readthedocs.io/en/stable/reference/Image.html?highlight=image#the-image-class). 43 | 44 | You can save the image by simply calling: 45 | 46 | ```python 47 | >>> image.save("generated_image.png") 48 | ``` 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/examples/community/README.md: -------------------------------------------------------------------------------- 1 | # Community Examples 2 | 3 | **Community** examples consist of both inference and training examples that have been added by the community. 4 | 5 | | Example | Description | Author | | 6 | |:----------|:-------------|:-------------|------:| 7 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/examples/inference/README.md: -------------------------------------------------------------------------------- 1 | # Inference Examples 2 | 3 | **The inference examples folder is deprecated and will be removed in a future version**. 4 | **Officially supported inference examples can be found in the [Pipelines folder](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines)**. 5 | 6 | - For `Image-to-Image text-guided generation with Stable Diffusion`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples) 7 | - For `In-painting using Stable Diffusion`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples) 8 | - For `Tweak prompts reusing seeds and latents`, please have a look at the official [Pipeline examples](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines#examples) 9 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/examples/inference/image_to_image.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from diffusers import StableDiffusionImg2ImgPipeline # noqa F401 4 | 5 | 6 | warnings.warn( 7 | "The `image_to_image.py` script is outdated. Please use directly `from diffusers import" 8 | " StableDiffusionImg2ImgPipeline` instead." 9 | ) 10 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/examples/inference/inpainting.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from diffusers import ( 4 | StableDiffusionInpaintPipeline as StableDiffusionInpaintPipeline, 5 | ) # noqa F401 6 | 7 | 8 | warnings.warn( 9 | "The `inpainting.py` script is outdated. Please use directly `from diffusers import" 10 | " StableDiffusionInpaintPipeline` instead." 11 | ) 12 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/examples/textual_inversion/README.md: -------------------------------------------------------------------------------- 1 | ## Textual Inversion fine-tuning example 2 | 3 | [Textual inversion](https://arxiv.org/abs/2208.01618) is a method to personalize text2image models like stable diffusion on your own images using just 3-5 examples. 4 | The `textual_inversion.py` script shows how to implement the training procedure and adapt it for stable diffusion. 5 | 6 | ## Running on Colab 7 | 8 | Colab for training 9 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_textual_inversion_training.ipynb) 10 | 11 | Colab for inference 12 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/stable_conceptualizer_inference.ipynb) 13 | 14 | ## Running locally 15 | ### Installing the dependencies 16 | 17 | Before running the scripts, make sure to install the library's training dependencies: 18 | 19 | ```bash 20 | pip install diffusers[training] accelerate transformers 21 | ``` 22 | 23 | And initialize an [🤗Accelerate](https://github.com/huggingface/accelerate/) environment with: 24 | 25 | ```bash 26 | accelerate config 27 | ``` 28 | 29 | 30 | ### Cat toy example 31 | 32 | You need to accept the model license before downloading or using the weights. In this example we'll use model version `v1-4`, so you'll need to visit [its card](https://huggingface.co/CompVis/stable-diffusion-v1-4), read the license and tick the checkbox if you agree. 33 | 34 | You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work. For more information on access tokens, please refer to [this section of the documentation](https://huggingface.co/docs/hub/security-tokens). 35 | 36 | Run the following command to authenticate your token 37 | 38 | ```bash 39 | huggingface-cli login 40 | ``` 41 | 42 | If you have already cloned the repo, then you won't need to go through these steps. You can simple remove the `--use_auth_token` arg from the following command. 43 | 44 |
45 | 46 | Now let's get our dataset.Download 3-4 images from [here](https://drive.google.com/drive/folders/1fmJMs25nxS_rSNqS5hTcRdLem_YQXbq5) and save them in a directory. This will be our training data. 47 | 48 | And launch the training using 49 | 50 | ```bash 51 | export MODEL_NAME="CompVis/stable-diffusion-v1-4" 52 | export DATA_DIR="path-to-dir-containing-images" 53 | 54 | accelerate launch textual_inversion.py \ 55 | --pretrained_model_name_or_path=$MODEL_NAME --use_auth_token \ 56 | --train_data_dir=$DATA_DIR \ 57 | --learnable_property="object" \ 58 | --placeholder_token="" --initializer_token="toy" \ 59 | --resolution=512 \ 60 | --train_batch_size=1 \ 61 | --gradient_accumulation_steps=4 \ 62 | --max_train_steps=3000 \ 63 | --learning_rate=5.0e-04 --scale_lr \ 64 | --lr_scheduler="constant" \ 65 | --lr_warmup_steps=0 \ 66 | --output_dir="textual_inversion_cat" 67 | ``` 68 | 69 | A full training run takes ~1 hour on one V100 GPU. 70 | 71 | 72 | ### Inference 73 | 74 | Once you have trained a model using above command, the inference can be done simply using the `StableDiffusionPipeline`. Make sure to include the `placeholder_token` in your prompt. 75 | 76 | ```python 77 | 78 | from torch import autocast 79 | from diffusers import StableDiffusionPipeline 80 | 81 | model_id = "path-to-your-trained-model" 82 | pipe = StableDiffusionPipeline.from_pretrained(model_id,torch_dtype=torch.float16).to("cuda") 83 | 84 | prompt = "A backpack" 85 | 86 | with autocast("cuda"): 87 | image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0] 88 | 89 | image.save("cat-backpack.png") 90 | ``` 91 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/examples/textual_inversion/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | transformers 4 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/examples/unconditional_image_generation/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | torchvision 3 | datasets 4 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 119 3 | target-version = ['py36'] 4 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/FlashAttention/diffusers/scripts/__init__.py -------------------------------------------------------------------------------- /FlashAttention/diffusers/scripts/conversion_ldm_uncond.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | 5 | import OmegaConf 6 | from diffusers import DDIMScheduler, LDMPipeline, UNetLDMModel, VQModel 7 | 8 | 9 | def convert_ldm_original(checkpoint_path, config_path, output_path): 10 | config = OmegaConf.load(config_path) 11 | state_dict = torch.load(checkpoint_path, map_location="cpu")["model"] 12 | keys = list(state_dict.keys()) 13 | 14 | # extract state_dict for VQVAE 15 | first_stage_dict = {} 16 | first_stage_key = "first_stage_model." 17 | for key in keys: 18 | if key.startswith(first_stage_key): 19 | first_stage_dict[key.replace(first_stage_key, "")] = state_dict[key] 20 | 21 | # extract state_dict for UNetLDM 22 | unet_state_dict = {} 23 | unet_key = "model.diffusion_model." 24 | for key in keys: 25 | if key.startswith(unet_key): 26 | unet_state_dict[key.replace(unet_key, "")] = state_dict[key] 27 | 28 | vqvae_init_args = config.model.params.first_stage_config.params 29 | unet_init_args = config.model.params.unet_config.params 30 | 31 | vqvae = VQModel(**vqvae_init_args).eval() 32 | vqvae.load_state_dict(first_stage_dict) 33 | 34 | unet = UNetLDMModel(**unet_init_args).eval() 35 | unet.load_state_dict(unet_state_dict) 36 | 37 | noise_scheduler = DDIMScheduler( 38 | timesteps=config.model.params.timesteps, 39 | beta_schedule="scaled_linear", 40 | beta_start=config.model.params.linear_start, 41 | beta_end=config.model.params.linear_end, 42 | clip_sample=False, 43 | ) 44 | 45 | pipeline = LDMPipeline(vqvae, unet, noise_scheduler) 46 | pipeline.save_pretrained(output_path) 47 | 48 | 49 | if __name__ == "__main__": 50 | parser = argparse.ArgumentParser() 51 | parser.add_argument("--checkpoint_path", type=str, required=True) 52 | parser.add_argument("--config_path", type=str, required=True) 53 | parser.add_argument("--output_path", type=str, required=True) 54 | args = parser.parse_args() 55 | 56 | convert_ldm_original(args.checkpoint_path, args.config_path, args.output_path) 57 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | default_section = FIRSTPARTY 3 | ensure_newline_before_comments = True 4 | force_grid_wrap = 0 5 | include_trailing_comma = True 6 | known_first_party = accelerate 7 | known_third_party = 8 | numpy 9 | torch 10 | torch_xla 11 | 12 | line_length = 119 13 | lines_after_imports = 2 14 | multi_line_output = 3 15 | use_parentheses = True 16 | 17 | [flake8] 18 | ignore = E203, E722, E501, E741, W503, W605 19 | max-line-length = 119 20 | per-file-ignores = __init__.py:F401 21 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import ( 2 | is_flax_available, 3 | is_inflect_available, 4 | is_onnx_available, 5 | is_scipy_available, 6 | is_torch_available, 7 | is_transformers_available, 8 | is_unidecode_available, 9 | ) 10 | 11 | 12 | __version__ = "0.4.0.dev0" 13 | 14 | from .configuration_utils import ConfigMixin 15 | from .onnx_utils import OnnxRuntimeModel 16 | from .utils import logging 17 | 18 | 19 | if is_torch_available(): 20 | from .modeling_utils import ModelMixin 21 | from .models import AutoencoderKL, UNet2DConditionModel, UNet2DModel, VQModel 22 | from .optimization import ( 23 | get_constant_schedule, 24 | get_constant_schedule_with_warmup, 25 | get_cosine_schedule_with_warmup, 26 | get_cosine_with_hard_restarts_schedule_with_warmup, 27 | get_linear_schedule_with_warmup, 28 | get_polynomial_decay_schedule_with_warmup, 29 | get_scheduler, 30 | ) 31 | from .pipeline_utils import DiffusionPipeline 32 | from .pipelines import ( 33 | DDIMPipeline, 34 | DDPMPipeline, 35 | KarrasVePipeline, 36 | LDMPipeline, 37 | PNDMPipeline, 38 | ScoreSdeVePipeline, 39 | ) 40 | from .schedulers import ( 41 | DDIMScheduler, 42 | DDPMScheduler, 43 | KarrasVeScheduler, 44 | PNDMScheduler, 45 | SchedulerMixin, 46 | ScoreSdeVeScheduler, 47 | ) 48 | from .training_utils import EMAModel 49 | else: 50 | from .utils.dummy_pt_objects import * # noqa F403 51 | 52 | if is_torch_available() and is_scipy_available(): 53 | from .schedulers import LMSDiscreteScheduler 54 | else: 55 | from .utils.dummy_torch_and_scipy_objects import * # noqa F403 56 | 57 | if is_torch_available() and is_transformers_available(): 58 | from .pipelines import ( 59 | LDMTextToImagePipeline, 60 | StableDiffusionImg2ImgPipeline, 61 | StableDiffusionInpaintPipeline, 62 | StableDiffusionPipeline, 63 | ) 64 | else: 65 | from .utils.dummy_torch_and_transformers_objects import * # noqa F403 66 | 67 | if is_torch_available() and is_transformers_available() and is_onnx_available(): 68 | from .pipelines import StableDiffusionOnnxPipeline 69 | else: 70 | from .utils.dummy_torch_and_transformers_and_onnx_objects import * # noqa F403 71 | 72 | if is_flax_available(): 73 | from .modeling_flax_utils import FlaxModelMixin 74 | from .models.unet_2d_condition_flax import FlaxUNet2DConditionModel 75 | from .schedulers import ( 76 | FlaxDDIMScheduler, 77 | FlaxDDPMScheduler, 78 | FlaxKarrasVeScheduler, 79 | FlaxLMSDiscreteScheduler, 80 | FlaxPNDMScheduler, 81 | FlaxScoreSdeVeScheduler, 82 | ) 83 | else: 84 | from .utils.dummy_flax_objects import * # noqa F403 85 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from argparse import ArgumentParser 17 | 18 | 19 | class BaseDiffusersCLICommand(ABC): 20 | @staticmethod 21 | @abstractmethod 22 | def register_subcommand(parser: ArgumentParser): 23 | raise NotImplementedError() 24 | 25 | @abstractmethod 26 | def run(self): 27 | raise NotImplementedError() 28 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/commands/diffusers_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from argparse import ArgumentParser 17 | 18 | from .env import EnvironmentCommand 19 | 20 | 21 | def main(): 22 | parser = ArgumentParser( 23 | "Diffusers CLI tool", usage="diffusers-cli []" 24 | ) 25 | commands_parser = parser.add_subparsers(help="diffusers-cli command helpers") 26 | 27 | # Register commands 28 | EnvironmentCommand.register_subcommand(commands_parser) 29 | 30 | # Let's go 31 | args = parser.parse_args() 32 | 33 | if not hasattr(args, "func"): 34 | parser.print_help() 35 | exit(1) 36 | 37 | # Run 38 | service = args.func(args) 39 | service.run() 40 | 41 | 42 | if __name__ == "__main__": 43 | main() 44 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/commands/env.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import platform 16 | from argparse import ArgumentParser 17 | 18 | import huggingface_hub 19 | 20 | from .. import __version__ as version 21 | from ..utils import is_torch_available, is_transformers_available 22 | from . import BaseDiffusersCLICommand 23 | 24 | 25 | def info_command_factory(_): 26 | return EnvironmentCommand() 27 | 28 | 29 | class EnvironmentCommand(BaseDiffusersCLICommand): 30 | @staticmethod 31 | def register_subcommand(parser: ArgumentParser): 32 | download_parser = parser.add_parser("env") 33 | download_parser.set_defaults(func=info_command_factory) 34 | 35 | def run(self): 36 | hub_version = huggingface_hub.__version__ 37 | 38 | pt_version = "not installed" 39 | pt_cuda_available = "NA" 40 | if is_torch_available(): 41 | import torch 42 | 43 | pt_version = torch.__version__ 44 | pt_cuda_available = torch.cuda.is_available() 45 | 46 | transformers_version = "not installed" 47 | if is_transformers_available: 48 | import transformers 49 | 50 | transformers_version = transformers.__version__ 51 | 52 | info = { 53 | "`diffusers` version": version, 54 | "Platform": platform.platform(), 55 | "Python version": platform.python_version(), 56 | "PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})", 57 | "Huggingface_hub version": hub_version, 58 | "Transformers version": transformers_version, 59 | "Using GPU in script?": "", 60 | "Using distributed or parallel set-up in script?": "", 61 | } 62 | 63 | print( 64 | "\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n" 65 | ) 66 | print(self.format_dict(info)) 67 | 68 | return info 69 | 70 | @staticmethod 71 | def format_dict(d): 72 | return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n" 73 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/dependency_versions_check.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import sys 15 | 16 | from .dependency_versions_table import deps 17 | from .utils.versions import require_version, require_version_core 18 | 19 | 20 | # define which module versions we always want to check at run time 21 | # (usually the ones defined in `install_requires` in setup.py) 22 | # 23 | # order specific notes: 24 | # - tqdm must be checked before tokenizers 25 | 26 | pkgs_to_check_at_runtime = ( 27 | "python tqdm regex requests packaging filelock numpy tokenizers".split() 28 | ) 29 | if sys.version_info < (3, 7): 30 | pkgs_to_check_at_runtime.append("dataclasses") 31 | if sys.version_info < (3, 8): 32 | pkgs_to_check_at_runtime.append("importlib_metadata") 33 | 34 | for pkg in pkgs_to_check_at_runtime: 35 | if pkg in deps: 36 | if pkg == "tokenizers": 37 | # must be loaded here, or else tqdm check may fail 38 | from .utils import is_tokenizers_available 39 | 40 | if not is_tokenizers_available(): 41 | continue # not required, check version only if installed 42 | 43 | require_version_core(deps[pkg]) 44 | else: 45 | raise ValueError( 46 | f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py" 47 | ) 48 | 49 | 50 | def dep_version_check(pkg, hint=None): 51 | require_version(deps[pkg], hint) 52 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/dependency_versions_table.py: -------------------------------------------------------------------------------- 1 | # THIS FILE HAS BEEN AUTOGENERATED. To update: 2 | # 1. modify the `_deps` dict in setup.py 3 | # 2. run `make deps_table_update`` 4 | deps = { 5 | "Pillow": "Pillow", 6 | "accelerate": "accelerate>=0.11.0", 7 | "black": "black==22.8", 8 | "datasets": "datasets", 9 | "filelock": "filelock", 10 | "flake8": "flake8>=3.8.3", 11 | "flax": "flax>=0.4.1", 12 | "hf-doc-builder": "hf-doc-builder>=0.3.0", 13 | "huggingface-hub": "huggingface-hub>=0.8.1", 14 | "importlib_metadata": "importlib_metadata", 15 | "isort": "isort>=5.5.4", 16 | "jax": "jax>=0.2.8,!=0.3.2,<=0.3.6", 17 | "jaxlib": "jaxlib>=0.1.65,<=0.3.6", 18 | "modelcards": "modelcards==0.1.4", 19 | "numpy": "numpy", 20 | "pytest": "pytest", 21 | "pytest-timeout": "pytest-timeout", 22 | "pytest-xdist": "pytest-xdist", 23 | "scipy": "scipy", 24 | "regex": "regex!=2019.12.17", 25 | "requests": "requests", 26 | "tensorboard": "tensorboard", 27 | "torch": "torch>=1.4", 28 | "transformers": "transformers>=4.21.0", 29 | } 30 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | 3 | - Models: Neural network that models $p_\theta(\mathbf{x}_{t-1}|\mathbf{x}_t)$ (see image below) and is trained end-to-end to denoise a noisy input to an image. Examples: UNet, Conditioned UNet, 3D UNet, Transformer UNet 4 | 5 | ## API 6 | 7 | TODO(Suraj, Patrick) 8 | 9 | ## Examples 10 | 11 | TODO(Suraj, Patrick) 12 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .unet_2d import UNet2DModel 16 | from .unet_2d_condition import UNet2DConditionModel 17 | from .vae import AutoencoderKL, VQModel 18 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/models/embeddings_flax.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import math 15 | 16 | import flax.linen as nn 17 | import jax.numpy as jnp 18 | 19 | 20 | # This is like models.embeddings.get_timestep_embedding (PyTorch) but 21 | # less general (only handles the case we currently need). 22 | def get_sinusoidal_embeddings(timesteps, embedding_dim): 23 | """ 24 | This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings. 25 | 26 | :param timesteps: a 1-D tensor of N indices, one per batch element. 27 | These may be fractional. 28 | :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the 29 | embeddings. :return: an [N x dim] tensor of positional embeddings. 30 | """ 31 | half_dim = embedding_dim // 2 32 | emb = math.log(10000) / (half_dim - 1) 33 | emb = jnp.exp(jnp.arange(half_dim) * -emb) 34 | emb = timesteps[:, None] * emb[None, :] 35 | emb = jnp.concatenate([jnp.cos(emb), jnp.sin(emb)], -1) 36 | return emb 37 | 38 | 39 | class FlaxTimestepEmbedding(nn.Module): 40 | time_embed_dim: int = 32 41 | dtype: jnp.dtype = jnp.float32 42 | 43 | @nn.compact 44 | def __call__(self, temb): 45 | temb = nn.Dense(self.time_embed_dim, dtype=self.dtype, name="linear_1")(temb) 46 | temb = nn.silu(temb) 47 | temb = nn.Dense(self.time_embed_dim, dtype=self.dtype, name="linear_2")(temb) 48 | return temb 49 | 50 | 51 | class FlaxTimesteps(nn.Module): 52 | dim: int = 32 53 | 54 | @nn.compact 55 | def __call__(self, timesteps): 56 | return get_sinusoidal_embeddings(timesteps, self.dim) 57 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from ..utils import is_onnx_available, is_transformers_available 2 | from .ddim import DDIMPipeline 3 | from .ddpm import DDPMPipeline 4 | from .latent_diffusion_uncond import LDMPipeline 5 | from .pndm import PNDMPipeline 6 | from .score_sde_ve import ScoreSdeVePipeline 7 | from .stochastic_karras_ve import KarrasVePipeline 8 | 9 | 10 | if is_transformers_available(): 11 | from .latent_diffusion import LDMTextToImagePipeline 12 | from .stable_diffusion import ( 13 | StableDiffusionImg2ImgPipeline, 14 | StableDiffusionInpaintPipeline, 15 | StableDiffusionPipeline, 16 | ) 17 | 18 | if is_transformers_available() and is_onnx_available(): 19 | from .stable_diffusion import StableDiffusionOnnxPipeline 20 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/pipelines/ddim/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_ddim import DDIMPipeline 3 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/pipelines/ddpm/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_ddpm import DDPMPipeline 3 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/pipelines/latent_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from ...utils import is_transformers_available 3 | 4 | 5 | if is_transformers_available(): 6 | from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline 7 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/pipelines/latent_diffusion_uncond/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_latent_diffusion_uncond import LDMPipeline 3 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/pipelines/pndm/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_pndm import PNDMPipeline 3 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/pipelines/score_sde_ve/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_score_sde_ve import ScoreSdeVePipeline 3 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/pipelines/stable_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Union 3 | 4 | import numpy as np 5 | 6 | import PIL 7 | from PIL import Image 8 | 9 | from ...utils import BaseOutput, is_onnx_available, is_transformers_available 10 | 11 | 12 | @dataclass 13 | class StableDiffusionPipelineOutput(BaseOutput): 14 | """ 15 | Output class for Stable Diffusion pipelines. 16 | 17 | Args: 18 | images (`List[PIL.Image.Image]` or `np.ndarray`) 19 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, 20 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. 21 | nsfw_content_detected (`List[bool]`) 22 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work" 23 | (nsfw) content. 24 | """ 25 | 26 | images: Union[List[PIL.Image.Image], np.ndarray] 27 | nsfw_content_detected: List[bool] 28 | 29 | 30 | if is_transformers_available(): 31 | from .pipeline_stable_diffusion import StableDiffusionPipeline 32 | from .pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline 33 | from .pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline 34 | from .safety_checker import StableDiffusionSafetyChecker 35 | 36 | if is_transformers_available() and is_onnx_available(): 37 | from .pipeline_stable_diffusion_onnx import StableDiffusionOnnxPipeline 38 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/pipelines/stochastic_karras_ve/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_stochastic_karras_ve import KarrasVePipeline 3 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/schedulers/README.md: -------------------------------------------------------------------------------- 1 | # Schedulers 2 | 3 | - Schedulers are the algorithms to use diffusion models in inference as well as for training. They include the noise schedules and define algorithm-specific diffusion steps. 4 | - Schedulers can be used interchangeable between diffusion models in inference to find the preferred trade-off between speed and generation quality. 5 | - Schedulers are available in numpy, but can easily be transformed into PyTorch. 6 | 7 | ## API 8 | 9 | - Schedulers should provide one or more `def step(...)` functions that should be called iteratively to unroll the diffusion loop during 10 | the forward pass. 11 | - Schedulers should be framework-agnostic, but provide a simple functionality to convert the scheduler into a specific framework, such as PyTorch 12 | with a `set_format(...)` method. 13 | 14 | ## Examples 15 | 16 | - The DDPM scheduler was proposed in [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) and can be found in [scheduling_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddpm.py). An example of how to use this scheduler can be found in [pipeline_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_ddpm.py). 17 | - The DDIM scheduler was proposed in [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) and can be found in [scheduling_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddim.py). An example of how to use this scheduler can be found in [pipeline_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_ddim.py). 18 | - The PNDM scheduler was proposed in [Pseudo Numerical Methods for Diffusion Models on Manifolds](https://arxiv.org/abs/2202.09778) and can be found in [scheduling_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_pndm.py). An example of how to use this scheduler can be found in [pipeline_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py). 19 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/schedulers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from ..utils import is_flax_available, is_scipy_available, is_torch_available 17 | 18 | 19 | if is_torch_available(): 20 | from .scheduling_ddim import DDIMScheduler 21 | from .scheduling_ddpm import DDPMScheduler 22 | from .scheduling_karras_ve import KarrasVeScheduler 23 | from .scheduling_pndm import PNDMScheduler 24 | from .scheduling_sde_ve import ScoreSdeVeScheduler 25 | from .scheduling_sde_vp import ScoreSdeVpScheduler 26 | from .scheduling_utils import SchedulerMixin 27 | else: 28 | from ..utils.dummy_pt_objects import * # noqa F403 29 | 30 | if is_flax_available(): 31 | from .scheduling_ddim_flax import FlaxDDIMScheduler 32 | from .scheduling_ddpm_flax import FlaxDDPMScheduler 33 | from .scheduling_karras_ve_flax import FlaxKarrasVeScheduler 34 | from .scheduling_lms_discrete_flax import FlaxLMSDiscreteScheduler 35 | from .scheduling_pndm_flax import FlaxPNDMScheduler 36 | from .scheduling_sde_ve_flax import FlaxScoreSdeVeScheduler 37 | else: 38 | from ..utils.dummy_flax_objects import * # noqa F403 39 | 40 | if is_scipy_available(): 41 | from .scheduling_lms_discrete import LMSDiscreteScheduler 42 | else: 43 | from ..utils.dummy_torch_and_scipy_objects import * # noqa F403 44 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/schedulers/scheduling_sde_vp.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch 16 | 17 | # TODO(Patrick, Anton, Suraj) - make scheduler framework independent and clean-up a bit 18 | 19 | import numpy as np 20 | import torch 21 | 22 | from ..configuration_utils import ConfigMixin, register_to_config 23 | from .scheduling_utils import SchedulerMixin 24 | 25 | 26 | class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin): 27 | """ 28 | The variance preserving stochastic differential equation (SDE) scheduler. 29 | 30 | [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` 31 | function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. 32 | [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and 33 | [`~ConfigMixin.from_config`] functions. 34 | 35 | For more information, see the original paper: https://arxiv.org/abs/2011.13456 36 | 37 | UNDER CONSTRUCTION 38 | 39 | """ 40 | 41 | @register_to_config 42 | def __init__( 43 | self, 44 | num_train_timesteps=2000, 45 | beta_min=0.1, 46 | beta_max=20, 47 | sampling_eps=1e-3, 48 | tensor_format="np", 49 | ): 50 | self.sigmas = None 51 | self.discrete_sigmas = None 52 | self.timesteps = None 53 | 54 | def set_timesteps(self, num_inference_steps): 55 | self.timesteps = torch.linspace( 56 | 1, self.config.sampling_eps, num_inference_steps 57 | ) 58 | 59 | def step_pred(self, score, x, t): 60 | if self.timesteps is None: 61 | raise ValueError( 62 | "`self.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler" 63 | ) 64 | 65 | # TODO(Patrick) better comments + non-PyTorch 66 | # postprocess model score 67 | log_mean_coeff = ( 68 | -0.25 * t**2 * (self.config.beta_max - self.config.beta_min) 69 | - 0.5 * t * self.config.beta_min 70 | ) 71 | std = torch.sqrt(1.0 - torch.exp(2.0 * log_mean_coeff)) 72 | score = -score / std[:, None, None, None] 73 | 74 | # compute 75 | dt = -1.0 / len(self.timesteps) 76 | 77 | beta_t = self.config.beta_min + t * ( 78 | self.config.beta_max - self.config.beta_min 79 | ) 80 | drift = -0.5 * beta_t[:, None, None, None] * x 81 | diffusion = torch.sqrt(beta_t) 82 | drift = drift - diffusion[:, None, None, None] ** 2 * score 83 | x_mean = x + drift * dt 84 | 85 | # add noise 86 | noise = torch.randn_like(x) 87 | x = x_mean + diffusion[:, None, None, None] * np.sqrt(-dt) * noise 88 | 89 | return x, x_mean 90 | 91 | def __len__(self): 92 | return self.config.num_train_timesteps 93 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/testing_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import unittest 4 | from distutils.util import strtobool 5 | from typing import Union 6 | 7 | import torch 8 | 9 | import PIL.Image 10 | import PIL.ImageOps 11 | import requests 12 | from packaging import version 13 | 14 | 15 | global_rng = random.Random() 16 | torch_device = "cuda" if torch.cuda.is_available() else "cpu" 17 | is_torch_higher_equal_than_1_12 = version.parse( 18 | version.parse(torch.__version__).base_version 19 | ) >= version.parse("1.12") 20 | 21 | if is_torch_higher_equal_than_1_12: 22 | torch_device = "mps" if torch.backends.mps.is_available() else torch_device 23 | 24 | 25 | def parse_flag_from_env(key, default=False): 26 | try: 27 | value = os.environ[key] 28 | except KeyError: 29 | # KEY isn't set, default to `default`. 30 | _value = default 31 | else: 32 | # KEY is set, convert it to True or False. 33 | try: 34 | _value = strtobool(value) 35 | except ValueError: 36 | # More values are supported, but let's keep the message simple. 37 | raise ValueError(f"If set, {key} must be yes or no.") 38 | return _value 39 | 40 | 41 | _run_slow_tests = parse_flag_from_env("RUN_SLOW", default=False) 42 | 43 | 44 | def floats_tensor(shape, scale=1.0, rng=None, name=None): 45 | """Creates a random float32 tensor""" 46 | if rng is None: 47 | rng = global_rng 48 | 49 | total_dims = 1 50 | for dim in shape: 51 | total_dims *= dim 52 | 53 | values = [] 54 | for _ in range(total_dims): 55 | values.append(rng.random() * scale) 56 | 57 | return torch.tensor(data=values, dtype=torch.float).view(shape).contiguous() 58 | 59 | 60 | def slow(test_case): 61 | """ 62 | Decorator marking a test as slow. 63 | 64 | Slow tests are skipped by default. Set the RUN_SLOW environment variable to a truthy value to run them. 65 | 66 | """ 67 | return unittest.skipUnless(_run_slow_tests, "test is slow")(test_case) 68 | 69 | 70 | def load_image(image: Union[str, PIL.Image.Image]) -> PIL.Image.Image: 71 | """ 72 | Args: 73 | Loads `image` to a PIL Image. 74 | image (`str` or `PIL.Image.Image`): 75 | The image to convert to the PIL Image format. 76 | Returns: 77 | `PIL.Image.Image`: A PIL Image. 78 | """ 79 | if isinstance(image, str): 80 | if image.startswith("http://") or image.startswith("https://"): 81 | image = PIL.Image.open(requests.get(image, stream=True).raw) 82 | elif os.path.isfile(image): 83 | image = PIL.Image.open(image) 84 | else: 85 | raise ValueError( 86 | f"Incorrect path or url, URLs must start with `http://` or `https://`, and {image} is not a valid path" 87 | ) 88 | elif isinstance(image, PIL.Image.Image): 89 | image = image 90 | else: 91 | raise ValueError( 92 | "Incorrect format used for image. Should be an url linking to an image, a local path, or a PIL image." 93 | ) 94 | image = PIL.ImageOps.exif_transpose(image) 95 | image = image.convert("RGB") 96 | return image 97 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Inc. team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import os 17 | 18 | from .import_utils import ( 19 | ENV_VARS_TRUE_AND_AUTO_VALUES, 20 | ENV_VARS_TRUE_VALUES, 21 | USE_JAX, 22 | USE_TF, 23 | USE_TORCH, 24 | DummyObject, 25 | is_flax_available, 26 | is_inflect_available, 27 | is_modelcards_available, 28 | is_onnx_available, 29 | is_scipy_available, 30 | is_tf_available, 31 | is_torch_available, 32 | is_transformers_available, 33 | is_unidecode_available, 34 | requires_backends, 35 | ) 36 | from .logging import get_logger 37 | from .outputs import BaseOutput 38 | 39 | 40 | logger = get_logger(__name__) 41 | 42 | 43 | hf_cache_home = os.path.expanduser( 44 | os.getenv( 45 | "HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface") 46 | ) 47 | ) 48 | default_cache_path = os.path.join(hf_cache_home, "diffusers") 49 | 50 | 51 | CONFIG_NAME = "config.json" 52 | HUGGINGFACE_CO_RESOLVE_ENDPOINT = "https://huggingface.co" 53 | DIFFUSERS_CACHE = default_cache_path 54 | DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules" 55 | HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules")) 56 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/utils/dummy_flax_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | # flake8: noqa 3 | 4 | from ..utils import DummyObject, requires_backends 5 | 6 | 7 | class FlaxModelMixin(metaclass=DummyObject): 8 | _backends = ["flax"] 9 | 10 | def __init__(self, *args, **kwargs): 11 | requires_backends(self, ["flax"]) 12 | 13 | 14 | class FlaxDDIMScheduler(metaclass=DummyObject): 15 | _backends = ["flax"] 16 | 17 | def __init__(self, *args, **kwargs): 18 | requires_backends(self, ["flax"]) 19 | 20 | 21 | class FlaxDDPMScheduler(metaclass=DummyObject): 22 | _backends = ["flax"] 23 | 24 | def __init__(self, *args, **kwargs): 25 | requires_backends(self, ["flax"]) 26 | 27 | 28 | class FlaxKarrasVeScheduler(metaclass=DummyObject): 29 | _backends = ["flax"] 30 | 31 | def __init__(self, *args, **kwargs): 32 | requires_backends(self, ["flax"]) 33 | 34 | 35 | class FlaxLMSDiscreteScheduler(metaclass=DummyObject): 36 | _backends = ["flax"] 37 | 38 | def __init__(self, *args, **kwargs): 39 | requires_backends(self, ["flax"]) 40 | 41 | 42 | class FlaxPNDMScheduler(metaclass=DummyObject): 43 | _backends = ["flax"] 44 | 45 | def __init__(self, *args, **kwargs): 46 | requires_backends(self, ["flax"]) 47 | 48 | 49 | class FlaxUNet2DConditionModel(metaclass=DummyObject): 50 | _backends = ["flax"] 51 | 52 | def __init__(self, *args, **kwargs): 53 | requires_backends(self, ["flax"]) 54 | 55 | 56 | class FlaxScoreSdeVeScheduler(metaclass=DummyObject): 57 | _backends = ["flax"] 58 | 59 | def __init__(self, *args, **kwargs): 60 | requires_backends(self, ["flax"]) 61 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/utils/dummy_torch_and_scipy_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | # flake8: noqa 3 | 4 | from ..utils import DummyObject, requires_backends 5 | 6 | 7 | class LMSDiscreteScheduler(metaclass=DummyObject): 8 | _backends = ["torch", "scipy"] 9 | 10 | def __init__(self, *args, **kwargs): 11 | requires_backends(self, ["torch", "scipy"]) 12 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/utils/dummy_torch_and_transformers_and_onnx_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | # flake8: noqa 3 | 4 | from ..utils import DummyObject, requires_backends 5 | 6 | 7 | class StableDiffusionOnnxPipeline(metaclass=DummyObject): 8 | _backends = ["torch", "transformers", "onnx"] 9 | 10 | def __init__(self, *args, **kwargs): 11 | requires_backends(self, ["torch", "transformers", "onnx"]) 12 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/utils/dummy_torch_and_transformers_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | # flake8: noqa 3 | 4 | from ..utils import DummyObject, requires_backends 5 | 6 | 7 | class LDMTextToImagePipeline(metaclass=DummyObject): 8 | _backends = ["torch", "transformers"] 9 | 10 | def __init__(self, *args, **kwargs): 11 | requires_backends(self, ["torch", "transformers"]) 12 | 13 | 14 | class StableDiffusionImg2ImgPipeline(metaclass=DummyObject): 15 | _backends = ["torch", "transformers"] 16 | 17 | def __init__(self, *args, **kwargs): 18 | requires_backends(self, ["torch", "transformers"]) 19 | 20 | 21 | class StableDiffusionInpaintPipeline(metaclass=DummyObject): 22 | _backends = ["torch", "transformers"] 23 | 24 | def __init__(self, *args, **kwargs): 25 | requires_backends(self, ["torch", "transformers"]) 26 | 27 | 28 | class StableDiffusionPipeline(metaclass=DummyObject): 29 | _backends = ["torch", "transformers"] 30 | 31 | def __init__(self, *args, **kwargs): 32 | requires_backends(self, ["torch", "transformers"]) 33 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/src/diffusers/utils/model_card_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | {{ card_data }} 3 | --- 4 | 5 | 7 | 8 | # {{ model_name | default("Diffusion Model") }} 9 | 10 | ## Model description 11 | 12 | This diffusion model is trained with the [🤗 Diffusers](https://github.com/huggingface/diffusers) library 13 | on the `{{ dataset_name }}` dataset. 14 | 15 | ## Intended uses & limitations 16 | 17 | #### How to use 18 | 19 | ```python 20 | # TODO: add an example code snippet for running this diffusion pipeline 21 | ``` 22 | 23 | #### Limitations and bias 24 | 25 | [TODO: provide examples of latent issues and potential remediations] 26 | 27 | ## Training data 28 | 29 | [TODO: describe the data used to train the model] 30 | 31 | ### Training hyperparameters 32 | 33 | The following hyperparameters were used during training: 34 | - learning_rate: {{ learning_rate }} 35 | - train_batch_size: {{ train_batch_size }} 36 | - eval_batch_size: {{ eval_batch_size }} 37 | - gradient_accumulation_steps: {{ gradient_accumulation_steps }} 38 | - optimizer: AdamW with betas=({{ adam_beta1 }}, {{ adam_beta2 }}), weight_decay={{ adam_weight_decay }} and epsilon={{ adam_epsilon }} 39 | - lr_scheduler: {{ lr_scheduler }} 40 | - lr_warmup_steps: {{ lr_warmup_steps }} 41 | - ema_inv_gamma: {{ ema_inv_gamma }} 42 | - ema_inv_gamma: {{ ema_power }} 43 | - ema_inv_gamma: {{ ema_max_decay }} 44 | - mixed_precision: {{ mixed_precision }} 45 | 46 | ### Training results 47 | 48 | 📈 [TensorBoard logs](https://huggingface.co/{{ repo_name }}/tensorboard?#scalars) 49 | 50 | 51 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/FlashAttention/diffusers/tests/__init__.py -------------------------------------------------------------------------------- /FlashAttention/diffusers/tests/test_config.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import tempfile 17 | import unittest 18 | 19 | from diffusers.configuration_utils import ConfigMixin, register_to_config 20 | 21 | 22 | class SampleObject(ConfigMixin): 23 | config_name = "config.json" 24 | 25 | @register_to_config 26 | def __init__( 27 | self, 28 | a=2, 29 | b=5, 30 | c=(2, 5), 31 | d="for diffusion", 32 | e=[1, 3], 33 | ): 34 | pass 35 | 36 | 37 | class ConfigTester(unittest.TestCase): 38 | def test_load_not_from_mixin(self): 39 | with self.assertRaises(ValueError): 40 | ConfigMixin.from_config("dummy_path") 41 | 42 | def test_register_to_config(self): 43 | obj = SampleObject() 44 | config = obj.config 45 | assert config["a"] == 2 46 | assert config["b"] == 5 47 | assert config["c"] == (2, 5) 48 | assert config["d"] == "for diffusion" 49 | assert config["e"] == [1, 3] 50 | 51 | # init ignore private arguments 52 | obj = SampleObject(_name_or_path="lalala") 53 | config = obj.config 54 | assert config["a"] == 2 55 | assert config["b"] == 5 56 | assert config["c"] == (2, 5) 57 | assert config["d"] == "for diffusion" 58 | assert config["e"] == [1, 3] 59 | 60 | # can override default 61 | obj = SampleObject(c=6) 62 | config = obj.config 63 | assert config["a"] == 2 64 | assert config["b"] == 5 65 | assert config["c"] == 6 66 | assert config["d"] == "for diffusion" 67 | assert config["e"] == [1, 3] 68 | 69 | # can use positional arguments. 70 | obj = SampleObject(1, c=6) 71 | config = obj.config 72 | assert config["a"] == 1 73 | assert config["b"] == 5 74 | assert config["c"] == 6 75 | assert config["d"] == "for diffusion" 76 | assert config["e"] == [1, 3] 77 | 78 | def test_save_load(self): 79 | obj = SampleObject() 80 | config = obj.config 81 | 82 | assert config["a"] == 2 83 | assert config["b"] == 5 84 | assert config["c"] == (2, 5) 85 | assert config["d"] == "for diffusion" 86 | assert config["e"] == [1, 3] 87 | 88 | with tempfile.TemporaryDirectory() as tmpdirname: 89 | obj.save_config(tmpdirname) 90 | new_obj = SampleObject.from_config(tmpdirname) 91 | new_config = new_obj.config 92 | 93 | # unfreeze configs 94 | config = dict(config) 95 | new_config = dict(new_config) 96 | 97 | assert config.pop("c") == (2, 5) # instantiated as tuple 98 | assert new_config.pop("c") == [2, 5] # saved & loaded as list because of json 99 | assert config == new_config 100 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/tests/test_models_vq.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import unittest 17 | 18 | import torch 19 | 20 | from diffusers import VQModel 21 | from diffusers.testing_utils import floats_tensor, torch_device 22 | 23 | from .test_modeling_common import ModelTesterMixin 24 | 25 | 26 | torch.backends.cuda.matmul.allow_tf32 = False 27 | 28 | 29 | class VQModelTests(ModelTesterMixin, unittest.TestCase): 30 | model_class = VQModel 31 | 32 | @property 33 | def dummy_input(self, sizes=(32, 32)): 34 | batch_size = 4 35 | num_channels = 3 36 | 37 | image = floats_tensor((batch_size, num_channels) + sizes).to(torch_device) 38 | 39 | return {"sample": image} 40 | 41 | @property 42 | def input_shape(self): 43 | return (3, 32, 32) 44 | 45 | @property 46 | def output_shape(self): 47 | return (3, 32, 32) 48 | 49 | def prepare_init_args_and_inputs_for_common(self): 50 | init_dict = { 51 | "block_out_channels": [32, 64], 52 | "in_channels": 3, 53 | "out_channels": 3, 54 | "down_block_types": ["DownEncoderBlock2D", "DownEncoderBlock2D"], 55 | "up_block_types": ["UpDecoderBlock2D", "UpDecoderBlock2D"], 56 | "latent_channels": 3, 57 | } 58 | inputs_dict = self.dummy_input 59 | return init_dict, inputs_dict 60 | 61 | def test_forward_signature(self): 62 | pass 63 | 64 | def test_training(self): 65 | pass 66 | 67 | def test_from_pretrained_hub(self): 68 | model, loading_info = VQModel.from_pretrained( 69 | "fusing/vqgan-dummy", output_loading_info=True 70 | ) 71 | self.assertIsNotNone(model) 72 | self.assertEqual(len(loading_info["missing_keys"]), 0) 73 | 74 | model.to(torch_device) 75 | image = model(**self.dummy_input) 76 | 77 | assert image is not None, "Make sure output is not None" 78 | 79 | def test_output_pretrained(self): 80 | model = VQModel.from_pretrained("fusing/vqgan-dummy") 81 | model.to(torch_device).eval() 82 | 83 | torch.manual_seed(0) 84 | if torch.cuda.is_available(): 85 | torch.cuda.manual_seed_all(0) 86 | 87 | image = torch.randn( 88 | 1, 89 | model.config.in_channels, 90 | model.config.sample_size, 91 | model.config.sample_size, 92 | ) 93 | image = image.to(torch_device) 94 | with torch.no_grad(): 95 | # Warmup pass when using mps (see #372) 96 | if torch_device == "mps": 97 | _ = model(image) 98 | output = model(image).sample 99 | 100 | output_slice = output[0, -1, -3:, -3:].flatten().cpu() 101 | # fmt: off 102 | expected_output_slice = torch.tensor([-0.0153, -0.4044, -0.1880, -0.5161, -0.2418, -0.4072, -0.1612, -0.0633, -0.0143]) 103 | # fmt: on 104 | self.assertTrue(torch.allclose(output_slice, expected_output_slice, atol=1e-3)) 105 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/utils/check_config_docstrings.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import importlib 17 | import inspect 18 | import os 19 | import re 20 | 21 | 22 | # All paths are set with the intent you should run this script from the root of the repo with the command 23 | # python utils/check_config_docstrings.py 24 | PATH_TO_TRANSFORMERS = "src/transformers" 25 | 26 | 27 | # This is to make sure the transformers module imported is the one in the repo. 28 | spec = importlib.util.spec_from_file_location( 29 | "transformers", 30 | os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), 31 | submodule_search_locations=[PATH_TO_TRANSFORMERS], 32 | ) 33 | transformers = spec.loader.load_module() 34 | 35 | CONFIG_MAPPING = transformers.models.auto.configuration_auto.CONFIG_MAPPING 36 | 37 | # Regex pattern used to find the checkpoint mentioned in the docstring of `config_class`. 38 | # For example, `[bert-base-uncased](https://huggingface.co/bert-base-uncased)` 39 | _re_checkpoint = re.compile("\[(.+?)\]\((https://huggingface\.co/.+?)\)") 40 | 41 | 42 | CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK = { 43 | "CLIPConfigMixin", 44 | "DecisionTransformerConfigMixin", 45 | "EncoderDecoderConfigMixin", 46 | "RagConfigMixin", 47 | "SpeechEncoderDecoderConfigMixin", 48 | "VisionEncoderDecoderConfigMixin", 49 | "VisionTextDualEncoderConfigMixin", 50 | } 51 | 52 | 53 | def check_config_docstrings_have_checkpoints(): 54 | configs_without_checkpoint = [] 55 | 56 | for config_class in list(CONFIG_MAPPING.values()): 57 | checkpoint_found = False 58 | 59 | # source code of `config_class` 60 | config_source = inspect.getsource(config_class) 61 | checkpoints = _re_checkpoint.findall(config_source) 62 | 63 | for checkpoint in checkpoints: 64 | # Each `checkpoint` is a tuple of a checkpoint name and a checkpoint link. 65 | # For example, `('bert-base-uncased', 'https://huggingface.co/bert-base-uncased')` 66 | ckpt_name, ckpt_link = checkpoint 67 | 68 | # verify the checkpoint name corresponds to the checkpoint link 69 | ckpt_link_from_name = f"https://huggingface.co/{ckpt_name}" 70 | if ckpt_link == ckpt_link_from_name: 71 | checkpoint_found = True 72 | break 73 | 74 | name = config_class.__name__ 75 | if ( 76 | not checkpoint_found 77 | and name not in CONFIG_CLASSES_TO_IGNORE_FOR_DOCSTRING_CHECKPOINT_CHECK 78 | ): 79 | configs_without_checkpoint.append(name) 80 | 81 | if len(configs_without_checkpoint) > 0: 82 | message = "\n".join(sorted(configs_without_checkpoint)) 83 | raise ValueError( 84 | f"The following configurations don't contain any valid checkpoint:\n{message}" 85 | ) 86 | 87 | 88 | if __name__ == "__main__": 89 | check_config_docstrings_have_checkpoints() 90 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/utils/print_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # coding=utf-8 4 | # Copyright 2022 The HuggingFace Inc. team. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # this script dumps information about the environment 19 | 20 | import os 21 | import platform 22 | import sys 23 | 24 | 25 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" 26 | 27 | print("Python version:", sys.version) 28 | 29 | print("OS platform:", platform.platform()) 30 | print("OS architecture:", platform.machine()) 31 | 32 | try: 33 | import torch 34 | 35 | print("Torch version:", torch.__version__) 36 | print("Cuda available:", torch.cuda.is_available()) 37 | print("Cuda version:", torch.version.cuda) 38 | print("CuDNN version:", torch.backends.cudnn.version()) 39 | print("Number of GPUs available:", torch.cuda.device_count()) 40 | except ImportError: 41 | print("Torch version:", None) 42 | 43 | try: 44 | import transformers 45 | 46 | print("transformers version:", transformers.__version__) 47 | except ImportError: 48 | print("transformers version:", None) 49 | -------------------------------------------------------------------------------- /FlashAttention/diffusers/utils/stale.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team, the AllenNLP library authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Script to close stale issue. Taken in part from the AllenNLP repository. 16 | https://github.com/allenai/allennlp. 17 | """ 18 | import os 19 | from datetime import datetime as dt 20 | 21 | from github import Github 22 | 23 | 24 | LABELS_TO_EXEMPT = [ 25 | "good first issue", 26 | "good second issue", 27 | "good difficult issue", 28 | "enhancement", 29 | "new pipeline/model", 30 | "new scheduler", 31 | "wip", 32 | ] 33 | 34 | 35 | def main(): 36 | g = Github(os.environ["GITHUB_TOKEN"]) 37 | repo = g.get_repo("huggingface/diffusers") 38 | open_issues = repo.get_issues(state="open") 39 | 40 | for issue in open_issues: 41 | comments = sorted( 42 | [comment for comment in issue.get_comments()], 43 | key=lambda i: i.created_at, 44 | reverse=True, 45 | ) 46 | last_comment = comments[0] if len(comments) > 0 else None 47 | if ( 48 | last_comment is not None 49 | and last_comment.user.login != "github-actions[bot]" 50 | and (dt.utcnow() - issue.updated_at).days > 23 51 | and (dt.utcnow() - issue.created_at).days >= 30 52 | and not any( 53 | label.name.lower() in LABELS_TO_EXEMPT for label in issue.get_labels() 54 | ) 55 | ): 56 | issue.create_comment( 57 | "This issue has been automatically marked as stale because it has not had " 58 | "recent activity. If you think this still needs to be addressed " 59 | "please comment on this thread.\n\nPlease note that issues that do not follow the " 60 | "[contributing guidelines](https://github.com/huggingface/diffusers/blob/main/CONTRIBUTING.md) " 61 | "are likely to be ignored." 62 | ) 63 | issue.edit(labels=["stale"]) 64 | 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /FlashAttention/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.21.2 2 | diffusers==0.3.0 3 | #torch==1.12.1+cu116 4 | scipy 5 | uvicorn 6 | pydantic 7 | fastapi 8 | huggingface_hub -------------------------------------------------------------------------------- /FlashAttention/server.py: -------------------------------------------------------------------------------- 1 | from huggingface_hub import HfApi 2 | from huggingface_hub.commands.user import _login 3 | 4 | _login(HfApi(), token="") 5 | from fastapi import FastAPI 6 | from typing import List, Union 7 | from pydantic import BaseModel 8 | from diffusers import StableDiffusionPipeline 9 | import torch 10 | import io 11 | from fastapi import Response 12 | 13 | torch_device = torch.device("cuda:0") 14 | 15 | 16 | class Item(BaseModel): 17 | prompt: Union[str, List[str]] 18 | img_height: int = 512 19 | img_width: int = 512 20 | num_inference_steps: int = 50 21 | guidance_scale: float = 7.5 22 | 23 | 24 | app = FastAPI() 25 | pipe = StableDiffusionPipeline.from_pretrained( 26 | "CompVis/stable-diffusion-v1-4", 27 | revision="fp16", 28 | torch_dtype=torch.float16, 29 | use_auth_token=True, 30 | ).to("cuda") 31 | 32 | 33 | @app.post("/predict/") 34 | async def predict(input_api: Item): 35 | with torch.inference_mode(), torch.autocast("cuda"): 36 | images = pipe(input_api.prompt) 37 | im = images.images[0] 38 | 39 | # save image to an in-memory bytes buffer 40 | with io.BytesIO() as buf: 41 | im.save(buf, format="PNG") 42 | im_bytes = buf.getvalue() 43 | headers = {"Content-Disposition": 'inline; filename="test.png"'} 44 | return Response(im_bytes, headers=headers, media_type="image/png") 45 | -------------------------------------------------------------------------------- /ONNX/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim 2 | 3 | WORKDIR /app 4 | 5 | ARG model_dir_path 6 | ARG onnx_execution_provider=CUDAExecutionProvider 7 | 8 | ENV ONNX_EXECUTION_PROVIDER=$onnx_execution_provider 9 | 10 | WORKDIR /app 11 | COPY requirements.txt requirements.txt 12 | RUN python -m pip install --upgrade pip && \ 13 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html && \ 14 | pip install -r /app/requirements.txt 15 | COPY $model_dir_path /app/model 16 | COPY server.py model.py ./ 17 | EXPOSE 5000 18 | CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5000", "--workers", "1"] 19 | 20 | # Build Docker image example 21 | # sudo docker build --build-arg model_dir_path=stable_diffusion_onnx_model --build-arg onnx_execution_provider=CUDAExecutionProvider -f Dockerfile -t stable_diffusion_onnx_img . 22 | 23 | # Run Docker image example 24 | # sudo docker run --gpus all -p 5000:5000 stable_diffusion_onnx_img 25 | -------------------------------------------------------------------------------- /ONNX/README.md: -------------------------------------------------------------------------------- 1 | # ONNX Stable Diffusion Example 2 | 3 | ## 1. Requirements 4 | The Stable Diffusion model will be downloaded from the Hugging Face Hub. That's why before running any of the scripts (`demo.py` or `server.py`) you will have to login in the Hugging Face Hub using the following command: 5 | 6 | ``` 7 | huggingface-cli login 8 | ``` 9 | 10 | If not, you can download the same model from the following path: `https://downloads.stochastic.ai/stable-diffusion/onnx_model.zip` 11 | 12 | ### 1.1. Docker execution 13 | [Install Docker](https://docs.docker.com/engine/install/) 14 | 15 | 16 | ### 1.2. Python execution 17 | [Install Python](https://www.python.org/downloads/) and the required libraries: 18 | ``` 19 | pip install -r requirements.txt 20 | ``` 21 | 22 | ## 2. REST API 23 | 24 | ### 2.1. Docker execution 25 | 26 | 1. Build the Docker image 27 | ``` 28 | docker build --build-arg model_dir_path=/path/to/stable_diffusion/model -f Dockerfile -t stable_diffusion_img . 29 | ``` 30 | 31 | 2. Execute the Docker Container 32 | ``` 33 | sudo docker run --gpus all -p 5000:5000 stable_diffusion_img 34 | ``` 35 | 36 | ### 2.2. Python execution 37 | 38 | To deploy the Stable Diffusion model as an API, execute the following command: 39 | ``` 40 | uvicorn server:app --host 0.0.0.0 --port 5000 41 | ``` 42 | 43 | ## 3. Demo App 44 | 45 | To generate images as a command line tool, execute the following command: 46 | ``` 47 | python demo.py --prompt "an astronaut riding a horse" 48 | ``` 49 | 50 | Check all the options of the command line tool with `python demo.py --help` 51 | -------------------------------------------------------------------------------- /ONNX/model.py: -------------------------------------------------------------------------------- 1 | from diffusers import StableDiffusionOnnxPipeline 2 | import torch 3 | from typing import List, Union 4 | import time 5 | from PIL import Image 6 | 7 | 8 | def load_model( 9 | model_name_or_path="CompVis/stable-diffusion-v1-4", provider="CUDAExecutionProvider" 10 | ) -> StableDiffusionOnnxPipeline: 11 | """Loads the model 12 | 13 | :param model_name_or_path: model name or path, defaults to "CompVis/stable-diffusion-v1-4" 14 | :param provider: execution provider - Onnx Runtime, defaults to "CUDAExecutionProvider" 15 | :return: the model 16 | """ 17 | 18 | pipe = StableDiffusionOnnxPipeline.from_pretrained( 19 | model_name_or_path, 20 | revision="onnx", 21 | provider=provider, 22 | use_auth_token=True, 23 | ) 24 | 25 | return pipe 26 | 27 | 28 | def inference( 29 | model: StableDiffusionOnnxPipeline, 30 | prompt: Union[str, List[str]], 31 | img_height: int = 512, 32 | img_width: int = 512, 33 | num_inference_steps: int = 50, 34 | guidance_scale: float = 7.5, 35 | num_images_per_prompt: int = 1, 36 | seed: int = None, 37 | return_time=False, 38 | ) -> Image: 39 | """Function to start generating images 40 | 41 | :param model: model 42 | :param prompt: prompt 43 | :param img_height: image height, defaults to 512 44 | :param img_width: image width, defaults to 512 45 | :param num_inference_steps: number of inference steps, defaults to 50 46 | :param guidance_scale: guidance scale, defaults to 7.5 47 | :param num_images_per_prompt: number of images per prompt, defaults to 1 48 | :param seed: seed, defaults to None 49 | :param return_time: if the time to generate should be returned, defaults to False 50 | :return: the generated images and the time if return_time is True 51 | """ 52 | generator = None 53 | if seed is not None: 54 | generator = torch.Generator(device="cuda") 55 | generator = generator.manual_seed(seed) 56 | 57 | start_time = time.time() 58 | output = model( 59 | prompt=prompt, 60 | height=img_height, 61 | width=img_width, 62 | num_inference_steps=num_inference_steps, 63 | guidance_scale=guidance_scale, 64 | num_images_per_prompt=num_images_per_prompt, 65 | generator=generator, 66 | ) 67 | end_time = time.time() 68 | 69 | if return_time: 70 | return output.images, end_time - start_time 71 | 72 | return output.images 73 | -------------------------------------------------------------------------------- /ONNX/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.21.2 2 | diffusers==0.3.0 3 | torch==1.12.1+cu116 4 | fastapi==0.85.0 5 | uvicorn[standard]==0.18.3 6 | onnxruntime-gpu==1.12.1 7 | numpy==1.23.4 -------------------------------------------------------------------------------- /ONNX/server.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from model import load_model, inference 3 | from pydantic import BaseModel 4 | from typing import Union, List 5 | import torch 6 | import numpy as np 7 | import os 8 | from typing import Dict, Union 9 | from PIL import Image 10 | 11 | 12 | class Item(BaseModel): 13 | prompt: Union[str, List[str]] 14 | img_height: int = 512 15 | img_width: int = 512 16 | num_inference_steps: int = 50 17 | guidance_scale: float = 7.5 18 | num_images_per_prompt: int = 1 19 | seed: int = None 20 | 21 | 22 | exeuction_provider = os.getenv("ONNX_EXECUTION_PROVIDER") 23 | 24 | if exeuction_provider is None and torch.cuda.is_available(): 25 | print("[+] Moving the model to the GPU") 26 | exeuction_provider = "CUDAExecutionProvider" 27 | elif exeuction_provider is None: 28 | print("[+] Your model will be executed in CPU. The execution might be very slow.") 29 | exeuction_provider = "CPUExecutionProvider" 30 | 31 | 32 | app = FastAPI() 33 | print("[+] Loading model") 34 | 35 | model = load_model( 36 | model_name_or_path="CompVis/stable-diffusion-v1-4" 37 | if os.getenv("MODEL_DIR_PATH") is None 38 | else os.getenv("MODEL_DIR_PATH"), 39 | provider=exeuction_provider, 40 | ) 41 | print("[+] Model loaded") 42 | 43 | 44 | @app.post("/predict/") 45 | async def predict(input_api: Item) -> Dict: 46 | """POST method that received the prompts 47 | 48 | :param input_api: input 49 | :return: the images and the time to generate the images 50 | """ 51 | model_input = {**input_api.dict(), **{"return_time": True}} 52 | 53 | images, time = inference(model=model, **model_input) 54 | 55 | images = np.array([np.array(img) for img in images]).tolist() 56 | 57 | return {"images": images, "generation_time_in_secs": time} 58 | -------------------------------------------------------------------------------- /PyTorch/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim 2 | 3 | # Download the HuggingFace model in your local machine and specify the directory path 4 | ARG model_dir_path 5 | 6 | WORKDIR /code 7 | ENV MODEL_DIR_PATH=/code/model 8 | COPY requirements.txt requirements.txt 9 | RUN python -m pip install --upgrade pip && \ 10 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html && \ 11 | pip install -r /code/requirements.txt 12 | COPY $model_dir_path /code/model 13 | COPY server.py model.py ./ 14 | EXPOSE 5000 15 | CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5000", "--workers", "1"] 16 | 17 | # Build Docker image example 18 | # sudo docker build --build-arg model_dir_path=stable_diffusion_torch_model -f Dockerfile -t stable_diffusion_img . 19 | 20 | # Run Docker image example 21 | # sudo docker run --gpus all -p 5000:5000 stable_diffusion_img -------------------------------------------------------------------------------- /PyTorch/README.md: -------------------------------------------------------------------------------- 1 | # PyTorch FP16 Stable Diffusion Example 2 | 3 | ## 1. Requirements 4 | The Stable Diffusion model will be downloaded from the Hugging Face Hub. That's why before running any of the scripts (`demo.py` or `server.py`) you will have to login in the Hugging Face Hub using the following command: 5 | 6 | ``` 7 | huggingface-cli login 8 | ``` 9 | 10 | If not, you can download the same model from the following S3 path: `https://downloads.stochastic.ai/stable-diffusion/pytorch_model.zip` 11 | 12 | ### 1.1. Docker execution 13 | [Install Docker](https://docs.docker.com/engine/install/) 14 | 15 | 16 | ### 1.2. Python execution 17 | [Install Python](https://www.python.org/downloads/) and the required libraries: 18 | ``` 19 | pip install -r requirements.txt 20 | ``` 21 | 22 | ## 2. REST API 23 | 24 | ### 2.1. Docker execution 25 | 26 | 1. Build the Docker image 27 | ``` 28 | docker build --build-arg model_dir_path=/path/to/stable_diffusion/model -f Dockerfile -t stable_diffusion_img . 29 | ``` 30 | 31 | 2. Execute the Docker Container 32 | ```bash 33 | sudo docker run --gpus all -p 5000:5000 stable_diffusion_img 34 | ``` 35 | 36 | ### 2.2. Python execution 37 | 38 | To deploy the Stable Diffusion model as an API, execute the following command: 39 | ``` 40 | uvicorn server:app --host 0.0.0.0 --port 5000 41 | ``` 42 | 43 | ## 3. Demo App 44 | 45 | To generate images as a command line tool, execute the following command: 46 | ```bash 47 | python demo.py --prompt "an astronaut riding a horse" 48 | ``` 49 | 50 | Check all the options of the command line tool with `python demo.py --help` -------------------------------------------------------------------------------- /PyTorch/demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from model import load_model, inference 3 | from pathlib import Path 4 | import uuid 5 | 6 | 7 | def get_args(): 8 | """Configure argparser 9 | 10 | :return: arguments 11 | """ 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument( 14 | "--prompt", 15 | default="Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci", 16 | help="input prompt", 17 | ) 18 | parser.add_argument( 19 | "--img_height", 20 | type=int, 21 | default=512, 22 | help="The height in pixels of the generated image.", 23 | ) 24 | parser.add_argument( 25 | "--img_width", 26 | type=int, 27 | default=512, 28 | help="The width in pixels of the generated image.", 29 | ) 30 | parser.add_argument( 31 | "--num_inference_steps", 32 | type=int, 33 | default=50, 34 | help="The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference", 35 | ) 36 | parser.add_argument( 37 | "--guidance_scale", type=float, default=7.5, help="Guidance scale" 38 | ) 39 | parser.add_argument( 40 | "--num_images_per_prompt", 41 | type=int, 42 | default=1, 43 | help="The number of images to generate per prompt.", 44 | ) 45 | parser.add_argument( 46 | "--seed", type=int, default=None, help="Seed to make generation deterministic" 47 | ) 48 | parser.add_argument( 49 | "--saving_path", 50 | type=str, 51 | default="generated_images", 52 | help="Directory where the generated images will be saved", 53 | ) 54 | 55 | return parser.parse_args() 56 | 57 | 58 | if __name__ == "__main__": 59 | args = get_args() 60 | 61 | # Create directory to save images if it does not exist 62 | saving_path = Path(args.saving_path) 63 | if not saving_path.exists(): 64 | saving_path.mkdir(exist_ok=True, parents=True) 65 | 66 | print("[+] Loading the model") 67 | model = load_model() 68 | print("[+] Model loaded") 69 | 70 | print("[+] Generating images...") 71 | # PIL images 72 | images, time = inference( 73 | model=model, 74 | prompt=args.prompt, 75 | img_height=args.img_height, 76 | img_width=args.img_width, 77 | num_inference_steps=args.num_inference_steps, 78 | guidance_scale=args.guidance_scale, 79 | num_images_per_prompt=args.num_images_per_prompt, 80 | seed=args.seed, 81 | return_time=True, 82 | ) 83 | 84 | print("[+] Time needed to generate the images: {} seconds".format(time)) 85 | 86 | # Save PIL images with a random name 87 | for img in images: 88 | img.save("{}/{}.png".format(saving_path.as_posix(), uuid.uuid4())) 89 | 90 | print("[+] Images saved in the following path: {}".format(saving_path.as_posix())) 91 | -------------------------------------------------------------------------------- /PyTorch/model.py: -------------------------------------------------------------------------------- 1 | from diffusers import StableDiffusionPipeline 2 | import torch 3 | from typing import List, Union 4 | import time 5 | 6 | 7 | def load_model( 8 | model_name_or_path="stabilityai/stable-diffusion-2-1", 9 | ) -> StableDiffusionPipeline: 10 | """Load model 11 | 12 | :param model_name_or_path: model name (downloaded from HF Hub) or model path (local), defaults to "CompVis/stable-diffusion-v1-4" 13 | :return: the Stable Diffusion pipeline 14 | """ 15 | pipe = StableDiffusionPipeline.from_pretrained( 16 | model_name_or_path, 17 | # revision="fp16", 18 | torch_dtype=torch.float16, 19 | # use_auth_token=True, 20 | ) 21 | pipe = pipe.to("cuda") 22 | 23 | return pipe 24 | 25 | 26 | def inference( 27 | model: StableDiffusionPipeline, 28 | prompt: Union[str, List[str]], 29 | img_height: int = 512, 30 | img_width: int = 512, 31 | num_inference_steps: int = 50, 32 | guidance_scale: float = 7.5, 33 | num_images_per_prompt: int = 1, 34 | seed: int = None, 35 | return_time=False, 36 | ): 37 | """Do inference 38 | 39 | :param model: the Stable Diffusion pipeline 40 | :param prompt: the prompt 41 | :param img_height: height of the generated image, defaults to 512 42 | :param img_width: width of the generated image, defaults to 512 43 | :param num_inference_steps: the number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference, defaults to 50 44 | :param guidance_scale: guidance scale, defaults to 7.5 45 | :param num_images_per_prompt: the number of images to generate per prompt, defaults to 1 46 | :param seed: Seed to make generation deterministic, defaults to None 47 | :param return_time: specify if time taken to generate the images should be returned, defaults to False 48 | :return: the output images and the time (if return time is True) 49 | """ 50 | generator = None 51 | if seed is not None: 52 | generator = torch.Generator(device="cuda") 53 | generator = generator.manual_seed(seed) 54 | 55 | start_time = time.time() 56 | with torch.autocast("cuda"): 57 | output = model( 58 | prompt=prompt, 59 | height=img_height, 60 | width=img_width, 61 | num_inference_steps=num_inference_steps, 62 | guidance_scale=guidance_scale, 63 | num_images_per_prompt=num_images_per_prompt, 64 | generator=generator, 65 | ) 66 | end_time = time.time() 67 | 68 | if return_time: 69 | return output.images, end_time - start_time 70 | 71 | return output.images 72 | -------------------------------------------------------------------------------- /PyTorch/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.30.1 2 | diffusers==0.3.0 3 | torch==2.0.1+cu117 4 | fastapi==0.85.0 5 | uvicorn[standard]==0.18.3 6 | accelerate==0.20.3 7 | safetensors==0.3.1 8 | scipy==1.10.1 9 | torchvision==0.15.2+cu117 -------------------------------------------------------------------------------- /PyTorch/server.py: -------------------------------------------------------------------------------- 1 | import os 2 | from fastapi import FastAPI 3 | from model import load_model, inference 4 | from pydantic import BaseModel 5 | from typing import Union, List 6 | import torch 7 | import numpy as np 8 | 9 | 10 | class Item(BaseModel): 11 | prompt: Union[str, List[str]] 12 | img_height: int = 512 13 | img_width: int = 512 14 | num_inference_steps: int = 50 15 | guidance_scale: float = 7.5 16 | num_images_per_prompt: int = 1 17 | seed: int = None 18 | 19 | 20 | app = FastAPI() 21 | print("[+] Loading model") 22 | model = load_model( 23 | model_name_or_path="CompVis/stable-diffusion-v1-4" 24 | if os.getenv("MODEL_DIR_PATH") is None 25 | else os.getenv("MODEL_DIR_PATH") 26 | ) 27 | print("[+] Model loaded") 28 | 29 | if torch.cuda.is_available(): 30 | print("[+] Moving the model to the GPU") 31 | model = model.to("cuda") 32 | else: 33 | print("[+] Your model will be executed in CPU. The execution might be very slow.") 34 | 35 | 36 | @app.post("/predict/") 37 | async def predict(input_api: Item): 38 | model_input = {**input_api.dict(), **{"return_time": True}} 39 | 40 | images, time = inference(model=model, **model_input) 41 | 42 | images = np.array([np.array(img) for img in images]).tolist() 43 | 44 | return {"images": images, "generation_time_in_secs": time} 45 | -------------------------------------------------------------------------------- /TensorRT/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | FROM nvidia/cuda:11.6.0-devel-ubuntu20.04 3 | 4 | RUN apt-get update && apt-get install --no-install-recommends -y curl && apt-get -y install git 5 | 6 | ENV CONDA_AUTO_UPDATE_CONDA=false \ 7 | PATH=/opt/miniconda/bin:$PATH 8 | RUN curl -sLo ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh \ 9 | && chmod +x ~/miniconda.sh \ 10 | && ~/miniconda.sh -b -p /opt/miniconda \ 11 | && rm ~/miniconda.sh \ 12 | && sed -i "$ a PATH=/opt/miniconda/bin:\$PATH" /etc/environment 13 | 14 | RUN python3 -m pip --no-cache-dir install --upgrade pip 15 | 16 | WORKDIR /code 17 | 18 | RUN pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 -f https://download.pytorch.org/whl/torch_stable.html 19 | 20 | COPY requirements.txt /code/requirements.txt 21 | 22 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt 23 | 24 | RUN apt-get update && apt-get -y install wget 25 | 26 | RUN wget https://developer.download.nvidia.com/compute/redist/nvidia-tensorrt/nvidia_tensorrt-8.4.2.4-cp39-none-linux_x86_64.whl 27 | 28 | RUN pip install nvidia_tensorrt-8.4.2.4-cp39-none-linux_x86_64.whl 29 | 30 | COPY . /code/ 31 | 32 | EXPOSE 5000 33 | 34 | CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5000", "--workers", "1"] -------------------------------------------------------------------------------- /TensorRT/README.md: -------------------------------------------------------------------------------- 1 | ## TensorRT Stable Diffusion Example 2 | 3 | ### Build Dependencies 4 | 5 | Install TensorRT 8.4.2.2.4 6 | 7 | ``` 8 | wget https://developer.download.nvidia.com/compute/redist/nvidia-tensorrt/nvidia_tensorrt-8.4.2.4-cp39-none-linux_x86_64.whl 9 | pip install nvidia_tensorrt-8.4.2.4-cp39-none-linux_x86_64.whl 10 | ``` 11 | 12 | Install libraries 13 | 14 | ``` 15 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html 16 | pip install -r requirements.txt 17 | ``` 18 | 19 | Verify the library versions. We have tested transformers 4.22, diffusers 0.3 and torch 1.12. 20 | 21 | ### Convert Unet Onnx model to TensorRT model 22 | 23 | You need to download Unet onnx model before converting. You can download from [HuggingFace hub](https://huggingface.co/kamalkraj/stable-diffusion-v1-4-onnx/resolve/main/models.tar.gz). Extract tar file and Unet onnx model is stored in `./models/unet/unet.onnx`. 24 | 25 | You also need to register in HuggingFace hub. Get your access token from [Hugging Face account settings](https://huggingface.co/settings/tokens). Then login using `huggingface-cli login` command. 26 | 27 | ``` 28 | python3 convert_unet_to_tensorrt.py 29 | ``` 30 | 31 | Unet TensorRT model is store in `./unet.engine` 32 | 33 | ### Benchmark 34 | 35 | ``` 36 | python3 demo.py --benchmark 37 | ``` 38 | 39 | ### Deploy as rest-api end-point 40 | 41 | You need provide the HuggingFace token in file `server.py`. 42 | 43 | ``` 44 | docker build -t tensorrt_diffusion . 45 | docker run -p 5000:5000 -ti --gpus=all tensorrt_diffusion 46 | ``` 47 | 48 | ### Test API 49 | 50 | ``` 51 | python3 client.py 52 | ``` 53 | 54 | Check the resulted image: `output_api.png` 55 | -------------------------------------------------------------------------------- /TensorRT/client.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import time 4 | 5 | if __name__ == "__main__": 6 | text = "The Easter bunny riding a motorcycle in New York City" 7 | t0 = time.time() 8 | for i in range(50): 9 | print("Iteration: ", i) 10 | out = requests.post( 11 | "http://localhost:5000/predict/", data=json.dumps({"prompt": [text]}) 12 | ) 13 | t1 = time.time() 14 | print("Inference time is: ", (t1 - t0) / 50) 15 | with open("output_api.png", "wb") as f: 16 | f.write(out.content) 17 | -------------------------------------------------------------------------------- /TensorRT/convert_unet_to_tensorrt.py: -------------------------------------------------------------------------------- 1 | import tensorrt as trt 2 | import os, sys, argparse 3 | import numpy as np 4 | import pycuda.driver as cuda 5 | import pycuda.autoinit # without this, "LogicError: explicit_context_dependent failed: invalid device context - no currently active context?" 6 | from time import time 7 | 8 | 9 | def get_args(): 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument( 12 | "--onnx_unet_path", 13 | default="./models/unet/1/unet.onnx", 14 | type=str, 15 | help="Onnx unet model path", 16 | ) 17 | parser.add_argument( 18 | "--save_path", default="unet.engine", type=str, help="TensorRT saved path" 19 | ) 20 | parser.add_argument("--batch_size", default=1, type=int, help="batch size") 21 | parser.add_argument( 22 | "--img_size", default=(512, 512), help="Unet input image size (h,w)" 23 | ) 24 | parser.add_argument( 25 | "--max_seq_length", default=64, help="Maximum sequence length of input text" 26 | ) 27 | 28 | return parser.parse_args() 29 | 30 | 31 | def convert(args): 32 | TRT_LOGGER = trt.Logger(trt.Logger.INFO) 33 | TRT_BUILDER = trt.Builder(TRT_LOGGER) 34 | network = TRT_BUILDER.create_network( 35 | 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) 36 | ) 37 | onnx_parser = trt.OnnxParser(network, TRT_LOGGER) 38 | parse_success = onnx_parser.parse_from_file(args.onnx_unet_path) 39 | for idx in range(onnx_parser.num_errors): 40 | print(onnx_parser.get_error(idx)) 41 | if not parse_success: 42 | sys.exit("ONNX model parsing failed") 43 | config = TRT_BUILDER.create_builder_config() 44 | profile = TRT_BUILDER.create_optimization_profile() 45 | 46 | latents_shape = ( 47 | args.batch_size * 2, 48 | 4, 49 | args.img_size[0] // 8, 50 | args.img_size[1] // 8, 51 | ) 52 | embed_shape = (args.batch_size * 2, args.max_seq_length, 768) 53 | timestep_shape = (args.batch_size,) 54 | 55 | profile.set_shape("sample", latents_shape, latents_shape, latents_shape) 56 | profile.set_shape("encoder_hidden_states", embed_shape, embed_shape, embed_shape) 57 | profile.set_shape("timestep", timestep_shape, timestep_shape, timestep_shape) 58 | config.add_optimization_profile(profile) 59 | 60 | # config.max_workspace_size = 4096 * (1 << 20) 61 | config.set_flag(trt.BuilderFlag.FP16) 62 | serialized_engine = TRT_BUILDER.build_serialized_network(network, config) 63 | 64 | ## save TRT engine 65 | with open(args.save_path, "wb") as f: 66 | f.write(serialized_engine) 67 | print(f"Engine is saved to {args.save_path}") 68 | 69 | 70 | if __name__ == "__main__": 71 | args = get_args() 72 | convert(args) 73 | -------------------------------------------------------------------------------- /TensorRT/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.21.2 2 | diffusers==0.3.0 3 | #torch==1.12.1+cu116 4 | scipy 5 | uvicorn 6 | pydantic 7 | fastapi 8 | pycuda 9 | huggingface_hub -------------------------------------------------------------------------------- /generated_images/AITemplate/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/0.png -------------------------------------------------------------------------------- /generated_images/AITemplate/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/1.png -------------------------------------------------------------------------------- /generated_images/AITemplate/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/2.png -------------------------------------------------------------------------------- /generated_images/AITemplate/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/3.png -------------------------------------------------------------------------------- /generated_images/AITemplate/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/4.png -------------------------------------------------------------------------------- /generated_images/AITemplate/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/5.png -------------------------------------------------------------------------------- /generated_images/AITemplate/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/6.png -------------------------------------------------------------------------------- /generated_images/AITemplate/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/7.png -------------------------------------------------------------------------------- /generated_images/AITemplate/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/8.png -------------------------------------------------------------------------------- /generated_images/AITemplate/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/AITemplate/9.png -------------------------------------------------------------------------------- /generated_images/AITemplate/README.md: -------------------------------------------------------------------------------- 1 | # Stable Diffusion images with AITemplate 2 | 3 | | Prompt | Generated image | 4 | | --- | --- 5 | | Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci | ![stable_diffusion-super_mario_airport](./0.png) 6 | | The Easter bunny riding a motorcycle in New York City | ![stable_diffusion-easter_bunny](./1.png) 7 | | Lecco in the winter in the year 2055 | ![stable_diffusion-lecco_winter](./2.png) 8 | | photorealistic orange 1935 ford in ancient Qgypt pyramid of Giza in background | ![stable_diffusion-pyramids_ford](./3.png) 9 | | the boulevards are crowded today | ![stable_diffusion-boulevards](./4.png) 10 | | A photo of cat riding on a bicycle | ![stable_diffusion-cat_riding_bicycle](./5.png) 11 | | Bird-eye view of a highway in Los Angeles | ![stable_diffusion-bird_highway](./6.png) 12 | | A beautiful sunrise on mars. High-definition. | ![stable_diffusion-sunrise_mars](./7.png) 13 | | A panda bear driving a car | ![stable_diffusion-panda_bear](./8.png) 14 | | Drone flythrough of a tropical jungle convered in snow | ![stable_diffusion-drone_tropical_jungle](./9.png) -------------------------------------------------------------------------------- /generated_images/FlashAttention/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/0.png -------------------------------------------------------------------------------- /generated_images/FlashAttention/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/1.png -------------------------------------------------------------------------------- /generated_images/FlashAttention/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/2.png -------------------------------------------------------------------------------- /generated_images/FlashAttention/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/3.png -------------------------------------------------------------------------------- /generated_images/FlashAttention/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/4.png -------------------------------------------------------------------------------- /generated_images/FlashAttention/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/5.png -------------------------------------------------------------------------------- /generated_images/FlashAttention/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/6.png -------------------------------------------------------------------------------- /generated_images/FlashAttention/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/7.png -------------------------------------------------------------------------------- /generated_images/FlashAttention/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/8.png -------------------------------------------------------------------------------- /generated_images/FlashAttention/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/FlashAttention/9.png -------------------------------------------------------------------------------- /generated_images/FlashAttention/README.md: -------------------------------------------------------------------------------- 1 | # Stable Diffusion images with FlashAttention 2 | 3 | | Prompt | Generated image | 4 | | --- | --- 5 | | Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci | ![stable_diffusion-super_mario_airport](./0.png) 6 | | The Easter bunny riding a motorcycle in New York City | ![stable_diffusion-easter_bunny](./1.png) 7 | | Lecco in the winter in the year 2055 | ![stable_diffusion-lecco_winter](./2.png) 8 | | photorealistic orange 1935 ford in ancient Qgypt pyramid of Giza in background | ![stable_diffusion-pyramids_ford](./3.png) 9 | | the boulevards are crowded today | ![stable_diffusion-boulevards](./4.png) 10 | | TA photo of cat riding on a bicycle | ![stable_diffusion-cat_riding_bicycle](./5.png) 11 | | Bird-eye view of a highway in Los Angeles | ![stable_diffusion-bird_highway](./6.png) 12 | | A beautiful sunrise on mars. High-definition. | ![stable_diffusion-sunrise_mars](./7.png) 13 | | A panda bear driving a car | ![stable_diffusion-panda_bear](./8.png) 14 | | Drone flythrough of a tropical jungle convered in snow | ![stable_diffusion-drone_tropical_jungle](./9.png) -------------------------------------------------------------------------------- /generated_images/PyTorch/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/0.png -------------------------------------------------------------------------------- /generated_images/PyTorch/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/1.png -------------------------------------------------------------------------------- /generated_images/PyTorch/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/2.png -------------------------------------------------------------------------------- /generated_images/PyTorch/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/3.png -------------------------------------------------------------------------------- /generated_images/PyTorch/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/4.png -------------------------------------------------------------------------------- /generated_images/PyTorch/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/5.png -------------------------------------------------------------------------------- /generated_images/PyTorch/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/6.png -------------------------------------------------------------------------------- /generated_images/PyTorch/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/7.png -------------------------------------------------------------------------------- /generated_images/PyTorch/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/8.png -------------------------------------------------------------------------------- /generated_images/PyTorch/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/PyTorch/9.png -------------------------------------------------------------------------------- /generated_images/PyTorch/README.md: -------------------------------------------------------------------------------- 1 | # Stable Diffusion images with PyTorch 2 | 3 | | Prompt | Generated image | 4 | | --- | --- 5 | | Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci | ![stable_diffusion-super_mario_airport](./0.png) 6 | | The Easter bunny riding a motorcycle in New York City | ![stable_diffusion-easter_bunny](./1.png) 7 | | Lecco in the winter in the year 2055 | ![stable_diffusion-lecco_winter](./2.png) 8 | | photorealistic orange 1935 ford in ancient Qgypt pyramid of Giza in background | ![stable_diffusion-pyramids_ford](./3.png) 9 | | the boulevards are crowded today | ![stable_diffusion-boulevards](./4.png) 10 | | TA photo of cat riding on a bicycle | ![stable_diffusion-cat_riding_bicycle](./5.png) 11 | | Bird-eye view of a highway in Los Angeles | ![stable_diffusion-bird_highway](./6.png) 12 | | A beautiful sunrise on mars. High-definition. | ![stable_diffusion-sunrise_mars](./7.png) 13 | | A panda bear driving a car | ![stable_diffusion-panda_bear](./8.png) 14 | | Drone flythrough of a tropical jungle convered in snow | ![stable_diffusion-drone_tropical_jungle](./9.png) -------------------------------------------------------------------------------- /generated_images/README.md: -------------------------------------------------------------------------------- 1 | # Preview of generated images 2 | Generated images are categorized in directories. You can preview all generated images by going to the README.md of any directory. 3 | 4 | - [AITemplate](./AITemplate/README.md) 5 | - [FlashAttention](./FlashAttention/README.md) 6 | - [nvFuser](./nvFuser/README.md) 7 | - [PyTorch](./PyTorch/README.md) 8 | - [TensorRT](./TensorRT/README.md) -------------------------------------------------------------------------------- /generated_images/TensorRT/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/0.png -------------------------------------------------------------------------------- /generated_images/TensorRT/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/1.png -------------------------------------------------------------------------------- /generated_images/TensorRT/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/2.png -------------------------------------------------------------------------------- /generated_images/TensorRT/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/3.png -------------------------------------------------------------------------------- /generated_images/TensorRT/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/4.png -------------------------------------------------------------------------------- /generated_images/TensorRT/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/5.png -------------------------------------------------------------------------------- /generated_images/TensorRT/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/6.png -------------------------------------------------------------------------------- /generated_images/TensorRT/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/7.png -------------------------------------------------------------------------------- /generated_images/TensorRT/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/8.png -------------------------------------------------------------------------------- /generated_images/TensorRT/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/TensorRT/9.png -------------------------------------------------------------------------------- /generated_images/TensorRT/README.md: -------------------------------------------------------------------------------- 1 | # Stable Diffusion images with TensorRT 2 | 3 | | Prompt | Generated image | 4 | | --- | --- 5 | | Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci | ![stable_diffusion-super_mario_airport](./0.png) 6 | | The Easter bunny riding a motorcycle in New York City | ![stable_diffusion-easter_bunny](./1.png) 7 | | Lecco in the winter in the year 2055 | ![stable_diffusion-lecco_winter](./2.png) 8 | | photorealistic orange 1935 ford in ancient Qgypt pyramid of Giza in background | ![stable_diffusion-pyramids_ford](./3.png) 9 | | the boulevards are crowded today | ![stable_diffusion-boulevards](./4.png) 10 | | TA photo of cat riding on a bicycle | ![stable_diffusion-cat_riding_bicycle](./5.png) 11 | | Bird-eye view of a highway in Los Angeles | ![stable_diffusion-bird_highway](./6.png) 12 | | A beautiful sunrise on mars. High-definition. | ![stable_diffusion-sunrise_mars](./7.png) 13 | | A panda bear driving a car | ![stable_diffusion-panda_bear](./8.png) 14 | | Drone flythrough of a tropical jungle convered in snow | ![stable_diffusion-drone_tropical_jungle](./9.png) -------------------------------------------------------------------------------- /generated_images/nvFuser/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/0.png -------------------------------------------------------------------------------- /generated_images/nvFuser/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/1.png -------------------------------------------------------------------------------- /generated_images/nvFuser/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/2.png -------------------------------------------------------------------------------- /generated_images/nvFuser/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/3.png -------------------------------------------------------------------------------- /generated_images/nvFuser/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/4.png -------------------------------------------------------------------------------- /generated_images/nvFuser/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/5.png -------------------------------------------------------------------------------- /generated_images/nvFuser/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/6.png -------------------------------------------------------------------------------- /generated_images/nvFuser/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/7.png -------------------------------------------------------------------------------- /generated_images/nvFuser/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/8.png -------------------------------------------------------------------------------- /generated_images/nvFuser/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/generated_images/nvFuser/9.png -------------------------------------------------------------------------------- /generated_images/nvFuser/README.md: -------------------------------------------------------------------------------- 1 | # Stable Diffusion images with NvFuser 2 | 3 | | Prompt | Generated image | 4 | | --- | --- 5 | | Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci | ![stable_diffusion-super_mario_airport](./0.png) 6 | | The Easter bunny riding a motorcycle in New York City | ![stable_diffusion-easter_bunny](./1.png) 7 | | Lecco in the winter in the year 2055 | ![stable_diffusion-lecco_winter](./2.png) 8 | | photorealistic orange 1935 ford in ancient Qgypt pyramid of Giza in background | ![stable_diffusion-pyramids_ford](./3.png) 9 | | the boulevards are crowded today | ![stable_diffusion-boulevards](./4.png) 10 | | TA photo of cat riding on a bicycle | ![stable_diffusion-cat_riding_bicycle](./5.png) 11 | | Bird-eye view of a highway in Los Angeles | ![stable_diffusion-bird_highway](./6.png) 12 | | A beautiful sunrise on mars. High-definition. | ![stable_diffusion-sunrise_mars](./7.png) 13 | | A panda bear driving a car | ![stable_diffusion-panda_bear](./8.png) 14 | | Drone flythrough of a tropical jungle convered in snow | ![stable_diffusion-drone_tropical_jungle](./9.png) -------------------------------------------------------------------------------- /graphs/A100_GPU_batch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/graphs/A100_GPU_batch.png -------------------------------------------------------------------------------- /graphs/A100_GPU_latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/graphs/A100_GPU_latency.png -------------------------------------------------------------------------------- /graphs/T4_GPU_latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stochasticai/x-stable-diffusion/56c8fc81a2caa042e864d9466825a018b7836321/graphs/T4_GPU_latency.png -------------------------------------------------------------------------------- /nvFuser/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim 2 | 3 | WORKDIR /code 4 | 5 | RUN pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html 6 | 7 | COPY requirements.txt /code/requirements.txt 8 | 9 | RUN apt-get update && apt-get -y install curl && apt -y install git 10 | 11 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt 12 | 13 | COPY . /code/ 14 | 15 | EXPOSE 5000 16 | 17 | CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5000", "--workers", "1"] -------------------------------------------------------------------------------- /nvFuser/README.md: -------------------------------------------------------------------------------- 1 | ## Nvfuser fp16 Stable Diffusion Example 2 | 3 | ### Build Dependencies 4 | 5 | Install libraries 6 | 7 | ``` 8 | pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html 9 | pip install -r requirements.txt 10 | ``` 11 | 12 | ### Convert Unet model to Nvfuser torchscript fp16 13 | 14 | You also need to register in HuggingFace hub. Get your access token from [Hugging Face account settings](https://huggingface.co/settings/tokens). Then login using `huggingface-cli login` command. 15 | 16 | ``` 17 | python3 convert_unet_to_tensorrt.py 18 | ``` 19 | 20 | Unet Nvfuser fp16 model is store in `./unet_jit.pt` 21 | 22 | ### Benchmark 23 | 24 | ``` 25 | python3 demo.py --benchmark 26 | ``` 27 | 28 | ### Deploy as rest-api end-point 29 | 30 | You need provide the HuggingFace token in file `server.py`. 31 | 32 | ``` 33 | docker build -t nvfuser_diffusion . 34 | docker run -p 5000:5000 -ti --gpus=all nvfuser_diffusion 35 | ``` 36 | 37 | ### Test API 38 | 39 | ``` 40 | python3 client.py 41 | ``` 42 | 43 | Check the resulted image: `output_api.png` -------------------------------------------------------------------------------- /nvFuser/client.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import time 4 | 5 | if __name__ == "__main__": 6 | text = "The Easter bunny riding a motorcycle in New York City" 7 | t0 = time.time() 8 | for i in range(50): 9 | print("Iteration: ", i) 10 | out = requests.post( 11 | "http://localhost:5000/predict/", data=json.dumps({"prompt": [text]}) 12 | ) 13 | t1 = time.time() 14 | print("Inference time is: ", (t1 - t0) / 50) 15 | with open("output_api.png", "wb") as f: 16 | f.write(out.content) 17 | -------------------------------------------------------------------------------- /nvFuser/create_unet_nvfuser_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import argparse 3 | from diffusers import UNet2DConditionModel 4 | 5 | 6 | def get_args(): 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument( 9 | "--save_path", default="./unet_jit.pt", type=str, help="Nvfuser saved path" 10 | ) 11 | parser.add_argument("--batch_size", default=1, type=int, help="batch size") 12 | parser.add_argument( 13 | "--img_size", default=(512, 512), help="Unet input image size (h,w)" 14 | ) 15 | parser.add_argument( 16 | "--max_seq_length", default=64, help="Maximum sequence length of input text" 17 | ) 18 | 19 | return parser.parse_args() 20 | 21 | 22 | def convert(args): 23 | device = torch.device("cuda") 24 | unet = UNet2DConditionModel.from_pretrained( 25 | "CompVis/stable-diffusion-v1-4", subfolder="unet", use_auth_token=True 26 | ).to(device) 27 | unet.eval() 28 | 29 | latents = torch.randn( 30 | (args.batch_size, 4, args.img_size[0] // 8, args.img_size[1] // 8) 31 | ) 32 | latent_model_input = torch.cat([latents] * 2).to(device) 33 | text_embeddings = ( 34 | torch.randn((args.batch_size, args.max_seq_length, 768)).float().to(device) 35 | ) 36 | text_embeddings = torch.cat([text_embeddings, text_embeddings]) 37 | timestep_ = torch.tensor([10]).to(device) 38 | with torch.no_grad(): 39 | with torch.autocast("cuda"): 40 | traced_applymodel_half = torch.jit.trace( 41 | unet, 42 | (latent_model_input, timestep_, text_embeddings), 43 | check_trace=False, 44 | ) 45 | 46 | traced_applymodel_half.save(args.save_path) 47 | 48 | 49 | if __name__ == "__main__": 50 | args = get_args() 51 | convert(args) 52 | -------------------------------------------------------------------------------- /nvFuser/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.21.2 2 | diffusers==0.3.0 3 | torch==1.12.1+cu116 4 | scipy -------------------------------------------------------------------------------- /nvFuser/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import argparse 3 | from diffusers import UNet2DConditionModel 4 | from transformers import CLIPTextModel, CLIPTokenizer 5 | from diffusers import AutoencoderKL 6 | from diffusers import LMSDiscreteScheduler 7 | from torch import autocast 8 | from tqdm import tqdm 9 | from time import time 10 | 11 | device = torch.device("cuda") 12 | sd_fused = torch.jit.load("unet_jit.pt") 13 | sd_fused = sd_fused.to(device) 14 | tokenizer = CLIPTokenizer.from_pretrained( 15 | "CompVis/stable-diffusion-v1-4", subfolder="tokenizer", use_auth_token=True 16 | ) 17 | prompt = "Super Mario learning to fly in an airport, Painting by Leonardo Da Vinci" 18 | text_input = tokenizer( 19 | prompt, padding="max_length", max_length=64, truncation=True, return_tensors="pt" 20 | ).input_ids.cuda() 21 | uncond_input = tokenizer( 22 | [""] * 1, padding="max_length", max_length=64, return_tensors="pt" 23 | ).input_ids.cuda() 24 | batch_size = 1 25 | img_size = (512, 512) 26 | latents = torch.randn((batch_size, 4, img_size[0] // 8, img_size[1] // 8)).cuda() 27 | 28 | for _ in tqdm(range(5)): 29 | out = sd_fused(text_input, uncond_input, latents) 30 | torch.cuda.synchronize() 31 | start = time.perf_counter() 32 | for i in tqdm(range(100)): 33 | out = sd_fused(text_input, uncond_input, latents) 34 | torch.cuda.synchronize() 35 | --------------------------------------------------------------------------------