├── README.md ├── image-editing ├── BDIA_experiments.ipynb ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── README.md ├── bdia_edict_functions.py ├── bdia_edit.py ├── environment.yaml ├── experiment_images │ ├── catgrass_original.png │ ├── charlotte-2069642_1280.jpg │ ├── cow.jpg │ ├── imagenet_dog_1.jpg │ ├── imagenet_dog_2.jpg │ ├── man-67467_1280.jpg │ ├── pixabay_boy.jpg │ ├── rooster.JPEG │ ├── truebsee-5337646_1280.jpg │ └── woman-657753_512.jpg ├── hf_auth └── my_diffusers │ ├── __init__.py │ ├── commands │ ├── __init__.py │ ├── diffusers_cli.py │ └── env.py │ ├── configuration_utils.py │ ├── dependency_versions_check.py │ ├── dependency_versions_table.py │ ├── dynamic_modules_utils.py │ ├── hub_utils.py │ ├── modeling_utils.py │ ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── attention.cpython-310.pyc │ │ ├── embeddings.cpython-310.pyc │ │ ├── resnet.cpython-310.pyc │ │ ├── unet_2d.cpython-310.pyc │ │ ├── unet_2d_condition.cpython-310.pyc │ │ ├── unet_blocks.cpython-310.pyc │ │ └── vae.cpython-310.pyc │ ├── attention.py │ ├── embeddings.py │ ├── resnet.py │ ├── unet_2d.py │ ├── unet_2d_condition.py │ ├── unet_blocks.py │ └── vae.py │ ├── onnx_utils.py │ ├── optimization.py │ ├── pipeline_utils.py │ ├── pipelines │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-310.pyc │ ├── ddim │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── pipeline_ddim.cpython-310.pyc │ │ └── pipeline_ddim.py │ ├── ddpm │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── pipeline_ddpm.cpython-310.pyc │ │ └── pipeline_ddpm.py │ ├── latent_diffusion │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── pipeline_latent_diffusion.cpython-310.pyc │ │ └── pipeline_latent_diffusion.py │ ├── latent_diffusion_uncond │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── pipeline_latent_diffusion_uncond.cpython-310.pyc │ │ └── pipeline_latent_diffusion_uncond.py │ ├── pndm │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── pipeline_pndm.cpython-310.pyc │ │ └── pipeline_pndm.py │ ├── score_sde_ve │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── pipeline_score_sde_ve.cpython-310.pyc │ │ └── pipeline_score_sde_ve.py │ ├── stable_diffusion │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── pipeline_stable_diffusion.cpython-310.pyc │ │ │ ├── pipeline_stable_diffusion_img2img.cpython-310.pyc │ │ │ ├── pipeline_stable_diffusion_inpaint.cpython-310.pyc │ │ │ └── safety_checker.cpython-310.pyc │ │ ├── pipeline_stable_diffusion.py │ │ ├── pipeline_stable_diffusion_img2img.py │ │ ├── pipeline_stable_diffusion_inpaint.py │ │ ├── pipeline_stable_diffusion_onnx.py │ │ └── safety_checker.py │ └── stochastic_karras_ve │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── pipeline_stochastic_karras_ve.cpython-310.pyc │ │ └── pipeline_stochastic_karras_ve.py │ ├── schedulers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── scheduling_ddim.cpython-310.pyc │ │ ├── scheduling_ddpm.cpython-310.pyc │ │ ├── scheduling_karras_ve.cpython-310.pyc │ │ ├── scheduling_lms_discrete.cpython-310.pyc │ │ ├── scheduling_pndm.cpython-310.pyc │ │ ├── scheduling_sde_ve.cpython-310.pyc │ │ ├── scheduling_sde_vp.cpython-310.pyc │ │ └── scheduling_utils.cpython-310.pyc │ ├── scheduling_ddim.py │ ├── scheduling_ddpm.py │ ├── scheduling_karras_ve.py │ ├── scheduling_lms_discrete.py │ ├── scheduling_pndm.py │ ├── scheduling_sde_ve.py │ ├── scheduling_sde_vp.py │ └── scheduling_utils.py │ ├── testing_utils.py │ ├── training_utils.py │ └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── dummy_transformers_and_onnx_objects.cpython-310.pyc │ ├── import_utils.cpython-310.pyc │ ├── logging.cpython-310.pyc │ └── outputs.cpython-310.pyc │ ├── dummy_scipy_objects.py │ ├── dummy_transformers_and_inflect_and_unidecode_objects.py │ ├── dummy_transformers_and_onnx_objects.py │ ├── dummy_transformers_objects.py │ ├── import_utils.py │ ├── logging.py │ ├── model_card_template.md │ └── outputs.py ├── image_examples ├── BDIADDIM_t2i_20pairs.png ├── controlnet_BDIA.png ├── controlnet_BDIA_2nd.png ├── controlnet_BDIA_pro.png ├── image_editing_cat_lion.png └── woman_editing_2nd.png └── text-to-image ├── Readme.md └── stablediffusionV2 ├── LICENSE ├── LICENSE-MODEL ├── __pycache__ └── image_resize.cpython-39.pyc ├── checkpoints └── checkpoints.txt ├── configs ├── karlo │ ├── decoder_900M_vit_l.yaml │ ├── improved_sr_64_256_1.4B.yaml │ └── prior_1B_vit_l.yaml └── stable-diffusion │ ├── intel │ ├── v2-inference-bf16.yaml │ ├── v2-inference-fp32.yaml │ ├── v2-inference-v-bf16.yaml │ └── v2-inference-v-fp32.yaml │ ├── v2-1-stable-unclip-h-inference.yaml │ ├── v2-1-stable-unclip-l-inference.yaml │ ├── v2-inference-v.yaml │ ├── v2-inference.yaml │ ├── v2-inpainting-inference.yaml │ ├── v2-midas-inference.yaml │ └── x4-upscaling.yaml ├── cv2 ├── diffusion_inversion.py ├── doc └── UNCLIP.MD ├── environment.yaml ├── fid_score.py ├── image_resize.py ├── inception.py ├── ldm ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-39.pyc │ └── util.cpython-39.pyc ├── data │ ├── __init__.py │ └── util.py ├── models │ ├── __pycache__ │ │ └── autoencoder.cpython-39.pyc │ ├── autoencoder.py │ └── diffusion │ │ ├── BDIAddim.py │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── ABDIAddim.cpython-39.pyc │ │ ├── BDIAddim.cpython-39.pyc │ │ ├── BDIAddim_backup_2nd.cpython-39.pyc │ │ ├── BDIAddimv2.cpython-39.pyc │ │ ├── HIBDIAddim.cpython-39.pyc │ │ ├── IIAddim.cpython-39.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── ddim.cpython-39.pyc │ │ ├── ddpm.cpython-39.pyc │ │ ├── plms.cpython-39.pyc │ │ └── sampling_util.cpython-39.pyc │ │ ├── ddim.py │ │ ├── ddpm.py │ │ ├── dpm_solver │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── dpm_solver.cpython-39.pyc │ │ │ └── sampler.cpython-39.pyc │ │ ├── dpm_solver.py │ │ └── sampler.py │ │ ├── plms.py │ │ └── sampling_util.py ├── modules │ ├── __pycache__ │ │ ├── attention.cpython-39.pyc │ │ └── ema.cpython-39.pyc │ ├── attention.py │ ├── diffusionmodules │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ ├── model.cpython-39.pyc │ │ │ ├── openaimodel.cpython-39.pyc │ │ │ ├── upscaling.cpython-39.pyc │ │ │ └── util.cpython-39.pyc │ │ ├── model.py │ │ ├── openaimodel.py │ │ ├── upscaling.py │ │ └── util.py │ ├── distributions │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ └── distributions.cpython-39.pyc │ │ └── distributions.py │ ├── ema.py │ ├── encoders │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-39.pyc │ │ │ └── modules.cpython-39.pyc │ │ └── modules.py │ ├── image_degradation │ │ ├── __init__.py │ │ ├── bsrgan.py │ │ ├── bsrgan_light.py │ │ ├── utils │ │ │ └── test.png │ │ └── utils_image.py │ ├── karlo │ │ ├── __init__.py │ │ ├── diffusers_pipeline.py │ │ └── kakao │ │ │ ├── __init__.py │ │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── clip.py │ │ │ ├── decoder_model.py │ │ │ ├── prior_model.py │ │ │ ├── sr_256_1k.py │ │ │ └── sr_64_256.py │ │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── diffusion │ │ │ │ ├── gaussian_diffusion.py │ │ │ │ └── respace.py │ │ │ ├── nn.py │ │ │ ├── resample.py │ │ │ ├── unet.py │ │ │ └── xf.py │ │ │ ├── sampler.py │ │ │ └── template.py │ └── midas │ │ ├── __init__.py │ │ ├── api.py │ │ ├── midas │ │ ├── __init__.py │ │ ├── base_model.py │ │ ├── blocks.py │ │ ├── dpt_depth.py │ │ ├── midas_net.py │ │ ├── midas_net_custom.py │ │ ├── transforms.py │ │ └── vit.py │ │ └── utils.py └── util.py ├── modelcard.md ├── pd ├── requirements.txt ├── sample.py ├── scripts ├── gradio │ ├── depth2img.py │ ├── inpainting.py │ └── superresolution.py ├── img2img.py ├── streamlit │ ├── depth2img.py │ ├── inpainting.py │ ├── stableunclip.py │ └── superresolution.py ├── tests │ └── test_watermark.py └── txt2img.py ├── setup.py ├── shutil ├── stable_diffusion.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt ├── requires.txt └── top_level.txt └── txt2img.py /image-editing/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Comment line immediately above ownership line is reserved for related other information. Please be careful while editing. 2 | #ECCN:Open Source 3 | -------------------------------------------------------------------------------- /image-editing/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Salesforce Open Source Community Code of Conduct 2 | 3 | ## About the Code of Conduct 4 | 5 | Equality is a core value at Salesforce. We believe a diverse and inclusive 6 | community fosters innovation and creativity, and are committed to building a 7 | culture where everyone feels included. 8 | 9 | Salesforce open-source projects are committed to providing a friendly, safe, and 10 | welcoming environment for all, regardless of gender identity and expression, 11 | sexual orientation, disability, physical appearance, body size, ethnicity, nationality, 12 | race, age, religion, level of experience, education, socioeconomic status, or 13 | other similar personal characteristics. 14 | 15 | The goal of this code of conduct is to specify a baseline standard of behavior so 16 | that people with different social values and communication styles can work 17 | together effectively, productively, and respectfully in our open source community. 18 | It also establishes a mechanism for reporting issues and resolving conflicts. 19 | 20 | All questions and reports of abusive, harassing, or otherwise unacceptable behavior 21 | in a Salesforce open-source project may be reported by contacting the Salesforce 22 | Open Source Conduct Committee at ossconduct@salesforce.com. 23 | 24 | ## Our Pledge 25 | 26 | In the interest of fostering an open and welcoming environment, we as 27 | contributors and maintainers pledge to making participation in our project and 28 | our community a harassment-free experience for everyone, regardless of gender 29 | identity and expression, sexual orientation, disability, physical appearance, 30 | body size, ethnicity, nationality, race, age, religion, level of experience, education, 31 | socioeconomic status, or other similar personal characteristics. 32 | 33 | ## Our Standards 34 | 35 | Examples of behavior that contributes to creating a positive environment 36 | include: 37 | 38 | * Using welcoming and inclusive language 39 | * Being respectful of differing viewpoints and experiences 40 | * Gracefully accepting constructive criticism 41 | * Focusing on what is best for the community 42 | * Showing empathy toward other community members 43 | 44 | Examples of unacceptable behavior by participants include: 45 | 46 | * The use of sexualized language or imagery and unwelcome sexual attention or 47 | advances 48 | * Personal attacks, insulting/derogatory comments, or trolling 49 | * Public or private harassment 50 | * Publishing, or threatening to publish, others' private information—such as 51 | a physical or electronic address—without explicit permission 52 | * Other conduct which could reasonably be considered inappropriate in a 53 | professional setting 54 | * Advocating for or encouraging any of the above behaviors 55 | 56 | ## Our Responsibilities 57 | 58 | Project maintainers are responsible for clarifying the standards of acceptable 59 | behavior and are expected to take appropriate and fair corrective action in 60 | response to any instances of unacceptable behavior. 61 | 62 | Project maintainers have the right and responsibility to remove, edit, or 63 | reject comments, commits, code, wiki edits, issues, and other contributions 64 | that are not aligned with this Code of Conduct, or to ban temporarily or 65 | permanently any contributor for other behaviors that they deem inappropriate, 66 | threatening, offensive, or harmful. 67 | 68 | ## Scope 69 | 70 | This Code of Conduct applies both within project spaces and in public spaces 71 | when an individual is representing the project or its community. Examples of 72 | representing a project or community include using an official project email 73 | address, posting via an official social media account, or acting as an appointed 74 | representative at an online or offline event. Representation of a project may be 75 | further defined and clarified by project maintainers. 76 | 77 | ## Enforcement 78 | 79 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 80 | reported by contacting the Salesforce Open Source Conduct Committee 81 | at ossconduct@salesforce.com. All complaints will be reviewed and investigated 82 | and will result in a response that is deemed necessary and appropriate to the 83 | circumstances. The committee is obligated to maintain confidentiality with 84 | regard to the reporter of an incident. Further details of specific enforcement 85 | policies may be posted separately. 86 | 87 | Project maintainers who do not follow or enforce the Code of Conduct in good 88 | faith may face temporary or permanent repercussions as determined by other 89 | members of the project's leadership and the Salesforce Open Source Conduct 90 | Committee. 91 | 92 | ## Attribution 93 | 94 | This Code of Conduct is adapted from the [Contributor Covenant][contributor-covenant-home], 95 | version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html. 96 | It includes adaptions and additions from [Go Community Code of Conduct][golang-coc], 97 | [CNCF Code of Conduct][cncf-coc], and [Microsoft Open Source Code of Conduct][microsoft-coc]. 98 | 99 | This Code of Conduct is licensed under the [Creative Commons Attribution 3.0 License][cc-by-3-us]. 100 | 101 | [contributor-covenant-home]: https://www.contributor-covenant.org (https://www.contributor-covenant.org/) 102 | [golang-coc]: https://golang.org/conduct 103 | [cncf-coc]: https://github.com/cncf/foundation/blob/master/code-of-conduct.md 104 | [microsoft-coc]: https://opensource.microsoft.com/codeofconduct/ 105 | [cc-by-3-us]: https://creativecommons.org/licenses/by/3.0/us/ 106 | -------------------------------------------------------------------------------- /image-editing/README.md: -------------------------------------------------------------------------------- 1 | 2 | ### Procedure for running the code for round-trip image editing 3 | 1. Download the source code and then put it to a folder in the google drive. 4 | 2. Upload and run BDIA_experiments.ipynb over google colab. (Note: one may need to change the work directory in BDIA_experiments.ipynb for proper running). 5 | 6 | Note 1: The parameter $\gamma$ in BDIA has a big impact on the resulting edited images. The recommanded range for $\gamma$ is [1.0, 0.92]. 7 | 8 | Note 2: BDIA-DDIM has the same running speed as DDIM, not 10 times slower than DDIM as mentioned in "Fixed-point Inversion for Text-to-image diffusion models". 9 | 10 | ### Acknowledgement 11 | The implementation for BDIA for the round-trip image editing depends heavly on the open-source of EDICT. The main python function for realizing BDIA-DDIM for round-trip image editing is BDIA_stablediffusion in bdia_edict_functions.py. 12 | 13 | -------------------------------------------------------------------------------- /image-editing/bdia_edit.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /image-editing/experiment_images/catgrass_original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/catgrass_original.png -------------------------------------------------------------------------------- /image-editing/experiment_images/charlotte-2069642_1280.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/charlotte-2069642_1280.jpg -------------------------------------------------------------------------------- /image-editing/experiment_images/cow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/cow.jpg -------------------------------------------------------------------------------- /image-editing/experiment_images/imagenet_dog_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/imagenet_dog_1.jpg -------------------------------------------------------------------------------- /image-editing/experiment_images/imagenet_dog_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/imagenet_dog_2.jpg -------------------------------------------------------------------------------- /image-editing/experiment_images/man-67467_1280.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/man-67467_1280.jpg -------------------------------------------------------------------------------- /image-editing/experiment_images/pixabay_boy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/pixabay_boy.jpg -------------------------------------------------------------------------------- /image-editing/experiment_images/rooster.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/rooster.JPEG -------------------------------------------------------------------------------- /image-editing/experiment_images/truebsee-5337646_1280.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/truebsee-5337646_1280.jpg -------------------------------------------------------------------------------- /image-editing/experiment_images/woman-657753_512.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/woman-657753_512.jpg -------------------------------------------------------------------------------- /image-editing/hf_auth: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import ( 2 | is_inflect_available, 3 | is_onnx_available, 4 | is_scipy_available, 5 | is_transformers_available, 6 | is_unidecode_available, 7 | ) 8 | 9 | 10 | __version__ = "0.3.0" 11 | 12 | from .configuration_utils import ConfigMixin 13 | from .modeling_utils import ModelMixin 14 | from .models import AutoencoderKL, UNet2DConditionModel, UNet2DModel, VQModel 15 | from .onnx_utils import OnnxRuntimeModel 16 | from .optimization import ( 17 | get_constant_schedule, 18 | get_constant_schedule_with_warmup, 19 | get_cosine_schedule_with_warmup, 20 | get_cosine_with_hard_restarts_schedule_with_warmup, 21 | get_linear_schedule_with_warmup, 22 | get_polynomial_decay_schedule_with_warmup, 23 | get_scheduler, 24 | ) 25 | from .pipeline_utils import DiffusionPipeline 26 | from .pipelines import DDIMPipeline, DDPMPipeline, KarrasVePipeline, LDMPipeline, PNDMPipeline, ScoreSdeVePipeline 27 | from .schedulers import ( 28 | DDIMScheduler, 29 | DDPMScheduler, 30 | KarrasVeScheduler, 31 | PNDMScheduler, 32 | SchedulerMixin, 33 | ScoreSdeVeScheduler, 34 | ) 35 | from .utils import logging 36 | 37 | 38 | if is_scipy_available(): 39 | from .schedulers import LMSDiscreteScheduler 40 | else: 41 | from .utils.dummy_scipy_objects import * # noqa F403 42 | 43 | from .training_utils import EMAModel 44 | 45 | 46 | if is_transformers_available(): 47 | from .pipelines import ( 48 | LDMTextToImagePipeline, 49 | StableDiffusionImg2ImgPipeline, 50 | StableDiffusionInpaintPipeline, 51 | StableDiffusionPipeline, 52 | ) 53 | else: 54 | from .utils.dummy_transformers_objects import * # noqa F403 55 | 56 | 57 | if is_transformers_available() and is_onnx_available(): 58 | from .pipelines import StableDiffusionOnnxPipeline 59 | else: 60 | from .utils.dummy_transformers_and_onnx_objects import * # noqa F403 61 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from argparse import ArgumentParser 17 | 18 | 19 | class BaseDiffusersCLICommand(ABC): 20 | @staticmethod 21 | @abstractmethod 22 | def register_subcommand(parser: ArgumentParser): 23 | raise NotImplementedError() 24 | 25 | @abstractmethod 26 | def run(self): 27 | raise NotImplementedError() 28 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/commands/diffusers_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from argparse import ArgumentParser 17 | 18 | from .env import EnvironmentCommand 19 | 20 | 21 | def main(): 22 | parser = ArgumentParser("Diffusers CLI tool", usage="diffusers-cli []") 23 | commands_parser = parser.add_subparsers(help="diffusers-cli command helpers") 24 | 25 | # Register commands 26 | EnvironmentCommand.register_subcommand(commands_parser) 27 | 28 | # Let's go 29 | args = parser.parse_args() 30 | 31 | if not hasattr(args, "func"): 32 | parser.print_help() 33 | exit(1) 34 | 35 | # Run 36 | service = args.func(args) 37 | service.run() 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/commands/env.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import platform 16 | from argparse import ArgumentParser 17 | 18 | import huggingface_hub 19 | 20 | from .. import __version__ as version 21 | from ..utils import is_torch_available, is_transformers_available 22 | from . import BaseDiffusersCLICommand 23 | 24 | 25 | def info_command_factory(_): 26 | return EnvironmentCommand() 27 | 28 | 29 | class EnvironmentCommand(BaseDiffusersCLICommand): 30 | @staticmethod 31 | def register_subcommand(parser: ArgumentParser): 32 | download_parser = parser.add_parser("env") 33 | download_parser.set_defaults(func=info_command_factory) 34 | 35 | def run(self): 36 | hub_version = huggingface_hub.__version__ 37 | 38 | pt_version = "not installed" 39 | pt_cuda_available = "NA" 40 | if is_torch_available(): 41 | import torch 42 | 43 | pt_version = torch.__version__ 44 | pt_cuda_available = torch.cuda.is_available() 45 | 46 | transformers_version = "not installed" 47 | if is_transformers_available: 48 | import transformers 49 | 50 | transformers_version = transformers.__version__ 51 | 52 | info = { 53 | "`diffusers` version": version, 54 | "Platform": platform.platform(), 55 | "Python version": platform.python_version(), 56 | "PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})", 57 | "Huggingface_hub version": hub_version, 58 | "Transformers version": transformers_version, 59 | "Using GPU in script?": "", 60 | "Using distributed or parallel set-up in script?": "", 61 | } 62 | 63 | print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n") 64 | print(self.format_dict(info)) 65 | 66 | return info 67 | 68 | @staticmethod 69 | def format_dict(d): 70 | return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n" 71 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/dependency_versions_check.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import sys 15 | 16 | from .dependency_versions_table import deps 17 | from .utils.versions import require_version, require_version_core 18 | 19 | 20 | # define which module versions we always want to check at run time 21 | # (usually the ones defined in `install_requires` in setup.py) 22 | # 23 | # order specific notes: 24 | # - tqdm must be checked before tokenizers 25 | 26 | pkgs_to_check_at_runtime = "python tqdm regex requests packaging filelock numpy tokenizers".split() 27 | if sys.version_info < (3, 7): 28 | pkgs_to_check_at_runtime.append("dataclasses") 29 | if sys.version_info < (3, 8): 30 | pkgs_to_check_at_runtime.append("importlib_metadata") 31 | 32 | for pkg in pkgs_to_check_at_runtime: 33 | if pkg in deps: 34 | if pkg == "tokenizers": 35 | # must be loaded here, or else tqdm check may fail 36 | from .utils import is_tokenizers_available 37 | 38 | if not is_tokenizers_available(): 39 | continue # not required, check version only if installed 40 | 41 | require_version_core(deps[pkg]) 42 | else: 43 | raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py") 44 | 45 | 46 | def dep_version_check(pkg, hint=None): 47 | require_version(deps[pkg], hint) 48 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/dependency_versions_table.py: -------------------------------------------------------------------------------- 1 | # THIS FILE HAS BEEN AUTOGENERATED. To update: 2 | # 1. modify the `_deps` dict in setup.py 3 | # 2. run `make deps_table_update`` 4 | deps = { 5 | "Pillow": "Pillow", 6 | "accelerate": "accelerate>=0.11.0", 7 | "black": "black==22.3", 8 | "datasets": "datasets", 9 | "filelock": "filelock", 10 | "flake8": "flake8>=3.8.3", 11 | "hf-doc-builder": "hf-doc-builder>=0.3.0", 12 | "huggingface-hub": "huggingface-hub>=0.8.1", 13 | "importlib_metadata": "importlib_metadata", 14 | "isort": "isort>=5.5.4", 15 | "modelcards": "modelcards==0.1.4", 16 | "numpy": "numpy", 17 | "pytest": "pytest", 18 | "pytest-timeout": "pytest-timeout", 19 | "pytest-xdist": "pytest-xdist", 20 | "scipy": "scipy", 21 | "regex": "regex!=2019.12.17", 22 | "requests": "requests", 23 | "tensorboard": "tensorboard", 24 | "torch": "torch>=1.4", 25 | "transformers": "transformers>=4.21.0", 26 | } 27 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .unet_2d import UNet2DModel 16 | from .unet_2d_condition import UNet2DConditionModel 17 | from .vae import AutoencoderKL, VQModel 18 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/models/__pycache__/attention.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/attention.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/models/__pycache__/embeddings.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/embeddings.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/models/__pycache__/resnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/resnet.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/models/__pycache__/unet_2d.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/unet_2d.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/models/__pycache__/unet_2d_condition.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/unet_2d_condition.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/models/__pycache__/unet_blocks.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/unet_blocks.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/models/__pycache__/vae.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/vae.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/models/embeddings.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import math 15 | 16 | import numpy as np 17 | import torch 18 | from torch import nn 19 | 20 | 21 | def get_timestep_embedding( 22 | timesteps: torch.Tensor, 23 | embedding_dim: int, 24 | flip_sin_to_cos: bool = False, 25 | downscale_freq_shift: float = 1, 26 | scale: float = 1, 27 | max_period: int = 10000, 28 | ): 29 | # print(timesteps) 30 | """ 31 | This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings. 32 | 33 | :param timesteps: a 1-D Tensor of N indices, one per batch element. 34 | These may be fractional. 35 | :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the 36 | embeddings. :return: an [N x dim] Tensor of positional embeddings. 37 | """ 38 | assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array" 39 | 40 | half_dim = embedding_dim // 2 41 | exponent = -math.log(max_period) * torch.arange(start=0, end=half_dim, dtype=torch.float64) 42 | exponent = exponent / (half_dim - downscale_freq_shift) 43 | 44 | emb = torch.exp(exponent).to(device=timesteps.device) 45 | emb = timesteps[:, None].double() * emb[None, :] 46 | 47 | # scale embeddings 48 | emb = scale * emb 49 | 50 | # concat sine and cosine embeddings 51 | emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1) 52 | 53 | # flip sine and cosine embeddings 54 | if flip_sin_to_cos: 55 | emb = torch.cat([emb[:, half_dim:], emb[:, :half_dim]], dim=-1) 56 | 57 | # zero pad 58 | if embedding_dim % 2 == 1: 59 | emb = torch.nn.functional.pad(emb, (0, 1, 0, 0)) 60 | return emb 61 | 62 | 63 | class TimestepEmbedding(nn.Module): 64 | def __init__(self, channel: int, time_embed_dim: int, act_fn: str = "silu"): 65 | super().__init__() 66 | 67 | self.linear_1 = nn.Linear(channel, time_embed_dim) 68 | self.act = None 69 | if act_fn == "silu": 70 | self.act = nn.SiLU() 71 | self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim) 72 | 73 | def forward(self, sample): 74 | sample = self.linear_1(sample) 75 | 76 | if self.act is not None: 77 | sample = self.act(sample) 78 | 79 | sample = self.linear_2(sample) 80 | return sample 81 | 82 | 83 | class Timesteps(nn.Module): 84 | def __init__(self, num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float): 85 | super().__init__() 86 | self.num_channels = num_channels 87 | self.flip_sin_to_cos = flip_sin_to_cos 88 | self.downscale_freq_shift = downscale_freq_shift 89 | 90 | def forward(self, timesteps): 91 | t_emb = get_timestep_embedding( 92 | timesteps, 93 | self.num_channels, 94 | flip_sin_to_cos=self.flip_sin_to_cos, 95 | downscale_freq_shift=self.downscale_freq_shift, 96 | ) 97 | return t_emb 98 | 99 | 100 | class GaussianFourierProjection(nn.Module): 101 | """Gaussian Fourier embeddings for noise levels.""" 102 | 103 | def __init__(self, embedding_size: int = 256, scale: float = 1.0): 104 | super().__init__() 105 | self.weight = nn.Parameter(torch.randn(embedding_size) * scale, requires_grad=False) 106 | 107 | # to delete later 108 | self.W = nn.Parameter(torch.randn(embedding_size) * scale, requires_grad=False) 109 | 110 | self.weight = self.W 111 | 112 | def forward(self, x): 113 | x = torch.log(x) 114 | x_proj = x[:, None] * self.weight[None, :] * 2 * np.pi 115 | out = torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1) 116 | return out 117 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from ..utils import is_onnx_available, is_transformers_available 2 | from .ddim import DDIMPipeline 3 | from .ddpm import DDPMPipeline 4 | from .latent_diffusion_uncond import LDMPipeline 5 | from .pndm import PNDMPipeline 6 | from .score_sde_ve import ScoreSdeVePipeline 7 | from .stochastic_karras_ve import KarrasVePipeline 8 | 9 | 10 | if is_transformers_available(): 11 | from .latent_diffusion import LDMTextToImagePipeline 12 | from .stable_diffusion import ( 13 | StableDiffusionImg2ImgPipeline, 14 | StableDiffusionInpaintPipeline, 15 | StableDiffusionPipeline, 16 | ) 17 | 18 | if is_transformers_available() and is_onnx_available(): 19 | from .stable_diffusion import StableDiffusionOnnxPipeline 20 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/ddim/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_ddim import DDIMPipeline 3 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/ddim/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/ddim/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/ddim/__pycache__/pipeline_ddim.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/ddim/__pycache__/pipeline_ddim.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/ddim/pipeline_ddim.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | 14 | # limitations under the License. 15 | 16 | 17 | import warnings 18 | from typing import Optional, Tuple, Union 19 | 20 | import torch 21 | 22 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput 23 | 24 | 25 | class DDIMPipeline(DiffusionPipeline): 26 | r""" 27 | This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the 28 | library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) 29 | 30 | Parameters: 31 | unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image. 32 | scheduler ([`SchedulerMixin`]): 33 | A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of 34 | [`DDPMScheduler`], or [`DDIMScheduler`]. 35 | """ 36 | 37 | def __init__(self, unet, scheduler): 38 | super().__init__() 39 | scheduler = scheduler.set_format("pt") 40 | self.register_modules(unet=unet, scheduler=scheduler) 41 | 42 | @torch.no_grad() 43 | def __call__( 44 | self, 45 | batch_size: int = 1, 46 | generator: Optional[torch.Generator] = None, 47 | eta: float = 0.0, 48 | num_inference_steps: int = 50, 49 | output_type: Optional[str] = "pil", 50 | return_dict: bool = True, 51 | **kwargs, 52 | ) -> Union[ImagePipelineOutput, Tuple]: 53 | r""" 54 | Args: 55 | batch_size (`int`, *optional*, defaults to 1): 56 | The number of images to generate. 57 | generator (`torch.Generator`, *optional*): 58 | A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation 59 | deterministic. 60 | eta (`float`, *optional*, defaults to 0.0): 61 | The eta parameter which controls the scale of the variance (0 is DDIM and 1 is one type of DDPM). 62 | num_inference_steps (`int`, *optional*, defaults to 50): 63 | The number of denoising steps. More denoising steps usually lead to a higher quality image at the 64 | expense of slower inference. 65 | output_type (`str`, *optional*, defaults to `"pil"`): 66 | The output format of the generate image. Choose between 67 | [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`. 68 | return_dict (`bool`, *optional*, defaults to `True`): 69 | Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. 70 | 71 | Returns: 72 | [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if 73 | `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the 74 | generated images. 75 | """ 76 | 77 | if "torch_device" in kwargs: 78 | device = kwargs.pop("torch_device") 79 | warnings.warn( 80 | "`torch_device` is deprecated as an input argument to `__call__` and will be removed in v0.3.0." 81 | " Consider using `pipe.to(torch_device)` instead." 82 | ) 83 | 84 | # Set device as before (to be removed in 0.3.0) 85 | if device is None: 86 | device = "cuda" if torch.cuda.is_available() else "cpu" 87 | self.to(device) 88 | 89 | # eta corresponds to η in paper and should be between [0, 1] 90 | 91 | # Sample gaussian noise to begin loop 92 | image = torch.randn( 93 | (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), 94 | generator=generator, 95 | ) 96 | image = image.to(self.device) 97 | 98 | # set step values 99 | self.scheduler.set_timesteps(num_inference_steps) 100 | 101 | for t in self.progress_bar(self.scheduler.timesteps): 102 | # 1. predict noise model_output 103 | model_output = self.unet(image, t).sample 104 | 105 | # 2. predict previous mean of image x_t-1 and add variance depending on eta 106 | # do x_t -> x_t-1 107 | image = self.scheduler.step(model_output, t, image, eta).prev_sample 108 | 109 | image = (image / 2 + 0.5).clamp(0, 1) 110 | image = image.cpu().permute(0, 2, 3, 1).numpy() 111 | if output_type == "pil": 112 | image = self.numpy_to_pil(image) 113 | 114 | if not return_dict: 115 | return (image,) 116 | 117 | return ImagePipelineOutput(images=image) 118 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/ddpm/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_ddpm import DDPMPipeline 3 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/ddpm/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/ddpm/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/ddpm/__pycache__/pipeline_ddpm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/ddpm/__pycache__/pipeline_ddpm.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/ddpm/pipeline_ddpm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | 14 | # limitations under the License. 15 | 16 | 17 | import warnings 18 | from typing import Optional, Tuple, Union 19 | 20 | import torch 21 | 22 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput 23 | 24 | 25 | class DDPMPipeline(DiffusionPipeline): 26 | r""" 27 | This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the 28 | library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) 29 | 30 | Parameters: 31 | unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image. 32 | scheduler ([`SchedulerMixin`]): 33 | A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of 34 | [`DDPMScheduler`], or [`DDIMScheduler`]. 35 | """ 36 | 37 | def __init__(self, unet, scheduler): 38 | super().__init__() 39 | scheduler = scheduler.set_format("pt") 40 | self.register_modules(unet=unet, scheduler=scheduler) 41 | 42 | @torch.no_grad() 43 | def __call__( 44 | self, 45 | batch_size: int = 1, 46 | generator: Optional[torch.Generator] = None, 47 | output_type: Optional[str] = "pil", 48 | return_dict: bool = True, 49 | **kwargs, 50 | ) -> Union[ImagePipelineOutput, Tuple]: 51 | r""" 52 | Args: 53 | batch_size (`int`, *optional*, defaults to 1): 54 | The number of images to generate. 55 | generator (`torch.Generator`, *optional*): 56 | A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation 57 | deterministic. 58 | output_type (`str`, *optional*, defaults to `"pil"`): 59 | The output format of the generate image. Choose between 60 | [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`. 61 | return_dict (`bool`, *optional*, defaults to `True`): 62 | Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. 63 | 64 | Returns: 65 | [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if 66 | `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the 67 | generated images. 68 | """ 69 | if "torch_device" in kwargs: 70 | device = kwargs.pop("torch_device") 71 | warnings.warn( 72 | "`torch_device` is deprecated as an input argument to `__call__` and will be removed in v0.3.0." 73 | " Consider using `pipe.to(torch_device)` instead." 74 | ) 75 | 76 | # Set device as before (to be removed in 0.3.0) 77 | if device is None: 78 | device = "cuda" if torch.cuda.is_available() else "cpu" 79 | self.to(device) 80 | 81 | # Sample gaussian noise to begin loop 82 | image = torch.randn( 83 | (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), 84 | generator=generator, 85 | ) 86 | image = image.to(self.device) 87 | 88 | # set step values 89 | self.scheduler.set_timesteps(1000) 90 | 91 | for t in self.progress_bar(self.scheduler.timesteps): 92 | # 1. predict noise model_output 93 | model_output = self.unet(image, t).sample 94 | 95 | # 2. compute previous image: x_t -> t_t-1 96 | image = self.scheduler.step(model_output, t, image, generator=generator).prev_sample 97 | 98 | image = (image / 2 + 0.5).clamp(0, 1) 99 | image = image.cpu().permute(0, 2, 3, 1).numpy() 100 | if output_type == "pil": 101 | image = self.numpy_to_pil(image) 102 | 103 | if not return_dict: 104 | return (image,) 105 | 106 | return ImagePipelineOutput(images=image) 107 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/latent_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from ...utils import is_transformers_available 3 | 4 | 5 | if is_transformers_available(): 6 | from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline 7 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/latent_diffusion/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/latent_diffusion/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/latent_diffusion/__pycache__/pipeline_latent_diffusion.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/latent_diffusion/__pycache__/pipeline_latent_diffusion.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/latent_diffusion_uncond/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_latent_diffusion_uncond import LDMPipeline 3 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/latent_diffusion_uncond/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/latent_diffusion_uncond/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/latent_diffusion_uncond/__pycache__/pipeline_latent_diffusion_uncond.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/latent_diffusion_uncond/__pycache__/pipeline_latent_diffusion_uncond.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import warnings 3 | from typing import Optional, Tuple, Union 4 | 5 | import torch 6 | 7 | from ...models import UNet2DModel, VQModel 8 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput 9 | from ...schedulers import DDIMScheduler 10 | 11 | 12 | class LDMPipeline(DiffusionPipeline): 13 | r""" 14 | This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the 15 | library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) 16 | 17 | Parameters: 18 | vqvae ([`VQModel`]): 19 | Vector-quantized (VQ) Model to encode and decode images to and from latent representations. 20 | unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image latents. 21 | scheduler ([`SchedulerMixin`]): 22 | [`DDIMScheduler`] is to be used in combination with `unet` to denoise the encoded image latens. 23 | """ 24 | 25 | def __init__(self, vqvae: VQModel, unet: UNet2DModel, scheduler: DDIMScheduler): 26 | super().__init__() 27 | scheduler = scheduler.set_format("pt") 28 | self.register_modules(vqvae=vqvae, unet=unet, scheduler=scheduler) 29 | 30 | @torch.no_grad() 31 | def __call__( 32 | self, 33 | batch_size: int = 1, 34 | generator: Optional[torch.Generator] = None, 35 | eta: float = 0.0, 36 | num_inference_steps: int = 50, 37 | output_type: Optional[str] = "pil", 38 | return_dict: bool = True, 39 | **kwargs, 40 | ) -> Union[Tuple, ImagePipelineOutput]: 41 | 42 | r""" 43 | Args: 44 | batch_size (`int`, *optional*, defaults to 1): 45 | Number of images to generate. 46 | generator (`torch.Generator`, *optional*): 47 | A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation 48 | deterministic. 49 | num_inference_steps (`int`, *optional*, defaults to 50): 50 | The number of denoising steps. More denoising steps usually lead to a higher quality image at the 51 | expense of slower inference. 52 | output_type (`str`, *optional*, defaults to `"pil"`): 53 | The output format of the generate image. Choose between 54 | [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`. 55 | return_dict (`bool`, *optional*, defaults to `True`): 56 | Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. 57 | 58 | Returns: 59 | [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if 60 | `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the 61 | generated images. 62 | """ 63 | 64 | if "torch_device" in kwargs: 65 | device = kwargs.pop("torch_device") 66 | warnings.warn( 67 | "`torch_device` is deprecated as an input argument to `__call__` and will be removed in v0.3.0." 68 | " Consider using `pipe.to(torch_device)` instead." 69 | ) 70 | 71 | # Set device as before (to be removed in 0.3.0) 72 | if device is None: 73 | device = "cuda" if torch.cuda.is_available() else "cpu" 74 | self.to(device) 75 | 76 | latents = torch.randn( 77 | (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), 78 | generator=generator, 79 | ) 80 | latents = latents.to(self.device) 81 | 82 | self.scheduler.set_timesteps(num_inference_steps) 83 | 84 | # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature 85 | accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) 86 | 87 | extra_kwargs = {} 88 | if accepts_eta: 89 | extra_kwargs["eta"] = eta 90 | 91 | for t in self.progress_bar(self.scheduler.timesteps): 92 | # predict the noise residual 93 | noise_prediction = self.unet(latents, t).sample 94 | # compute the previous noisy sample x_t -> x_t-1 95 | latents = self.scheduler.step(noise_prediction, t, latents, **extra_kwargs).prev_sample 96 | 97 | # decode the image latents with the VAE 98 | image = self.vqvae.decode(latents).sample 99 | 100 | image = (image / 2 + 0.5).clamp(0, 1) 101 | image = image.cpu().permute(0, 2, 3, 1).numpy() 102 | if output_type == "pil": 103 | image = self.numpy_to_pil(image) 104 | 105 | if not return_dict: 106 | return (image,) 107 | 108 | return ImagePipelineOutput(images=image) 109 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/pndm/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_pndm import PNDMPipeline 3 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/pndm/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/pndm/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/pndm/__pycache__/pipeline_pndm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/pndm/__pycache__/pipeline_pndm.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/pndm/pipeline_pndm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | 14 | # limitations under the License. 15 | 16 | 17 | import warnings 18 | from typing import Optional, Tuple, Union 19 | 20 | import torch 21 | 22 | from ...models import UNet2DModel 23 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput 24 | from ...schedulers import PNDMScheduler 25 | 26 | 27 | class PNDMPipeline(DiffusionPipeline): 28 | r""" 29 | This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the 30 | library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) 31 | 32 | Parameters: 33 | unet (`UNet2DModel`): U-Net architecture to denoise the encoded image latents. 34 | scheduler ([`SchedulerMixin`]): 35 | The `PNDMScheduler` to be used in combination with `unet` to denoise the encoded image. 36 | """ 37 | 38 | unet: UNet2DModel 39 | scheduler: PNDMScheduler 40 | 41 | def __init__(self, unet: UNet2DModel, scheduler: PNDMScheduler): 42 | super().__init__() 43 | scheduler = scheduler.set_format("pt") 44 | self.register_modules(unet=unet, scheduler=scheduler) 45 | 46 | @torch.no_grad() 47 | def __call__( 48 | self, 49 | batch_size: int = 1, 50 | num_inference_steps: int = 50, 51 | generator: Optional[torch.Generator] = None, 52 | output_type: Optional[str] = "pil", 53 | return_dict: bool = True, 54 | **kwargs, 55 | ) -> Union[ImagePipelineOutput, Tuple]: 56 | r""" 57 | Args: 58 | batch_size (`int`, `optional`, defaults to 1): The number of images to generate. 59 | num_inference_steps (`int`, `optional`, defaults to 50): 60 | The number of denoising steps. More denoising steps usually lead to a higher quality image at the 61 | expense of slower inference. 62 | generator (`torch.Generator`, `optional`): A [torch 63 | generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation 64 | deterministic. 65 | output_type (`str`, `optional`, defaults to `"pil"`): The output format of the generate image. Choose 66 | between [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`. 67 | return_dict (`bool`, `optional`, defaults to `True`): Whether or not to return a 68 | [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. 69 | 70 | Returns: 71 | [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if 72 | `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the 73 | generated images. 74 | """ 75 | # For more information on the sampling method you can take a look at Algorithm 2 of 76 | # the official paper: https://arxiv.org/pdf/2202.09778.pdf 77 | 78 | if "torch_device" in kwargs: 79 | device = kwargs.pop("torch_device") 80 | warnings.warn( 81 | "`torch_device` is deprecated as an input argument to `__call__` and will be removed in v0.3.0." 82 | " Consider using `pipe.to(torch_device)` instead." 83 | ) 84 | 85 | # Set device as before (to be removed in 0.3.0) 86 | if device is None: 87 | device = "cuda" if torch.cuda.is_available() else "cpu" 88 | self.to(device) 89 | 90 | # Sample gaussian noise to begin loop 91 | image = torch.randn( 92 | (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size), 93 | generator=generator, 94 | ) 95 | image = image.to(self.device) 96 | 97 | self.scheduler.set_timesteps(num_inference_steps) 98 | for t in self.progress_bar(self.scheduler.timesteps): 99 | model_output = self.unet(image, t).sample 100 | 101 | image = self.scheduler.step(model_output, t, image).prev_sample 102 | 103 | image = (image / 2 + 0.5).clamp(0, 1) 104 | image = image.cpu().permute(0, 2, 3, 1).numpy() 105 | if output_type == "pil": 106 | image = self.numpy_to_pil(image) 107 | 108 | if not return_dict: 109 | return (image,) 110 | 111 | return ImagePipelineOutput(images=image) 112 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/score_sde_ve/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_score_sde_ve import ScoreSdeVePipeline 3 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/score_sde_ve/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/score_sde_ve/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/score_sde_ve/__pycache__/pipeline_score_sde_ve.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/score_sde_ve/__pycache__/pipeline_score_sde_ve.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import warnings 3 | from typing import Optional, Tuple, Union 4 | 5 | import torch 6 | 7 | from ...models import UNet2DModel 8 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput 9 | from ...schedulers import ScoreSdeVeScheduler 10 | 11 | 12 | class ScoreSdeVePipeline(DiffusionPipeline): 13 | r""" 14 | Parameters: 15 | This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the 16 | library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) 17 | unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image. scheduler ([`SchedulerMixin`]): 18 | The [`ScoreSdeVeScheduler`] scheduler to be used in combination with `unet` to denoise the encoded image. 19 | """ 20 | unet: UNet2DModel 21 | scheduler: ScoreSdeVeScheduler 22 | 23 | def __init__(self, unet: UNet2DModel, scheduler: DiffusionPipeline): 24 | super().__init__() 25 | self.register_modules(unet=unet, scheduler=scheduler) 26 | 27 | @torch.no_grad() 28 | def __call__( 29 | self, 30 | batch_size: int = 1, 31 | num_inference_steps: int = 2000, 32 | generator: Optional[torch.Generator] = None, 33 | output_type: Optional[str] = "pil", 34 | return_dict: bool = True, 35 | **kwargs, 36 | ) -> Union[ImagePipelineOutput, Tuple]: 37 | r""" 38 | Args: 39 | batch_size (`int`, *optional*, defaults to 1): 40 | The number of images to generate. 41 | generator (`torch.Generator`, *optional*): 42 | A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation 43 | deterministic. 44 | output_type (`str`, *optional*, defaults to `"pil"`): 45 | The output format of the generate image. Choose between 46 | [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`. 47 | return_dict (`bool`, *optional*, defaults to `True`): 48 | Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple. 49 | 50 | Returns: 51 | [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if 52 | `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the 53 | generated images. 54 | """ 55 | 56 | if "torch_device" in kwargs: 57 | device = kwargs.pop("torch_device") 58 | warnings.warn( 59 | "`torch_device` is deprecated as an input argument to `__call__` and will be removed in v0.3.0." 60 | " Consider using `pipe.to(torch_device)` instead." 61 | ) 62 | 63 | # Set device as before (to be removed in 0.3.0) 64 | if device is None: 65 | device = "cuda" if torch.cuda.is_available() else "cpu" 66 | self.to(device) 67 | 68 | img_size = self.unet.config.sample_size 69 | shape = (batch_size, 3, img_size, img_size) 70 | 71 | model = self.unet 72 | 73 | sample = torch.randn(*shape, generator=generator) * self.scheduler.config.sigma_max 74 | sample = sample.to(self.device) 75 | 76 | self.scheduler.set_timesteps(num_inference_steps) 77 | self.scheduler.set_sigmas(num_inference_steps) 78 | 79 | for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)): 80 | sigma_t = self.scheduler.sigmas[i] * torch.ones(shape[0], device=self.device) 81 | 82 | # correction step 83 | for _ in range(self.scheduler.correct_steps): 84 | model_output = self.unet(sample, sigma_t).sample 85 | sample = self.scheduler.step_correct(model_output, sample, generator=generator).prev_sample 86 | 87 | # prediction step 88 | model_output = model(sample, sigma_t).sample 89 | output = self.scheduler.step_pred(model_output, t, sample, generator=generator) 90 | 91 | sample, sample_mean = output.prev_sample, output.prev_sample_mean 92 | 93 | sample = sample_mean.clamp(0, 1) 94 | sample = sample.cpu().permute(0, 2, 3, 1).numpy() 95 | if output_type == "pil": 96 | sample = self.numpy_to_pil(sample) 97 | 98 | if not return_dict: 99 | return (sample,) 100 | 101 | return ImagePipelineOutput(images=sample) 102 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/stable_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Union 3 | 4 | import numpy as np 5 | 6 | import PIL 7 | from PIL import Image 8 | 9 | from ...utils import BaseOutput, is_onnx_available, is_transformers_available 10 | 11 | 12 | @dataclass 13 | class StableDiffusionPipelineOutput(BaseOutput): 14 | """ 15 | Output class for Stable Diffusion pipelines. 16 | 17 | Args: 18 | images (`List[PIL.Image.Image]` or `np.ndarray`) 19 | List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, 20 | num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. 21 | nsfw_content_detected (`List[bool]`) 22 | List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work" 23 | (nsfw) content. 24 | """ 25 | 26 | images: Union[List[PIL.Image.Image], np.ndarray] 27 | nsfw_content_detected: List[bool] 28 | 29 | 30 | if is_transformers_available(): 31 | from .pipeline_stable_diffusion import StableDiffusionPipeline 32 | from .pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline 33 | from .pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline 34 | from .safety_checker import StableDiffusionSafetyChecker 35 | 36 | if is_transformers_available() and is_onnx_available(): 37 | from .pipeline_stable_diffusion_onnx import StableDiffusionOnnxPipeline 38 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_img2img.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_img2img.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_inpaint.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_inpaint.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/safety_checker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/safety_checker.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/stable_diffusion/safety_checker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel 6 | 7 | from ...utils import logging 8 | 9 | 10 | logger = logging.get_logger(__name__) 11 | 12 | 13 | def cosine_distance(image_embeds, text_embeds): 14 | normalized_image_embeds = nn.functional.normalize(image_embeds) 15 | normalized_text_embeds = nn.functional.normalize(text_embeds) 16 | return torch.mm(normalized_image_embeds, normalized_text_embeds.t()) 17 | 18 | 19 | class StableDiffusionSafetyChecker(PreTrainedModel): 20 | config_class = CLIPConfig 21 | 22 | def __init__(self, config: CLIPConfig): 23 | super().__init__(config) 24 | 25 | self.vision_model = CLIPVisionModel(config.vision_config) 26 | self.visual_projection = nn.Linear(config.vision_config.hidden_size, config.projection_dim, bias=False) 27 | 28 | self.concept_embeds = nn.Parameter(torch.ones(17, config.projection_dim), requires_grad=False) 29 | self.special_care_embeds = nn.Parameter(torch.ones(3, config.projection_dim), requires_grad=False) 30 | 31 | self.register_buffer("concept_embeds_weights", torch.ones(17)) 32 | self.register_buffer("special_care_embeds_weights", torch.ones(3)) 33 | 34 | @torch.no_grad() 35 | def forward(self, clip_input, images): 36 | pooled_output = self.vision_model(clip_input)[1] # pooled_output 37 | image_embeds = self.visual_projection(pooled_output) 38 | 39 | special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds).cpu().numpy() 40 | cos_dist = cosine_distance(image_embeds, self.concept_embeds).cpu().numpy() 41 | 42 | result = [] 43 | batch_size = image_embeds.shape[0] 44 | for i in range(batch_size): 45 | result_img = {"special_scores": {}, "special_care": [], "concept_scores": {}, "bad_concepts": []} 46 | 47 | # increase this value to create a stronger `nfsw` filter 48 | # at the cost of increasing the possibility of filtering benign images 49 | adjustment = 0.0 50 | 51 | for concet_idx in range(len(special_cos_dist[0])): 52 | concept_cos = special_cos_dist[i][concet_idx] 53 | concept_threshold = self.special_care_embeds_weights[concet_idx].item() 54 | result_img["special_scores"][concet_idx] = round(concept_cos - concept_threshold + adjustment, 3) 55 | if result_img["special_scores"][concet_idx] > 0: 56 | result_img["special_care"].append({concet_idx, result_img["special_scores"][concet_idx]}) 57 | adjustment = 0.01 58 | 59 | for concet_idx in range(len(cos_dist[0])): 60 | concept_cos = cos_dist[i][concet_idx] 61 | concept_threshold = self.concept_embeds_weights[concet_idx].item() 62 | result_img["concept_scores"][concet_idx] = round(concept_cos - concept_threshold + adjustment, 3) 63 | if result_img["concept_scores"][concet_idx] > 0: 64 | result_img["bad_concepts"].append(concet_idx) 65 | 66 | result.append(result_img) 67 | 68 | has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result] 69 | 70 | for idx, has_nsfw_concept in enumerate(has_nsfw_concepts): 71 | if has_nsfw_concept: 72 | images[idx] = np.zeros(images[idx].shape) # black image 73 | 74 | if any(has_nsfw_concepts): 75 | logger.warning( 76 | "Potential NSFW content was detected in one or more images. A black image will be returned instead." 77 | " Try again with a different prompt and/or seed." 78 | ) 79 | 80 | return images, has_nsfw_concepts 81 | 82 | @torch.inference_mode() 83 | def forward_onnx(self, clip_input: torch.FloatTensor, images: torch.FloatTensor): 84 | pooled_output = self.vision_model(clip_input)[1] # pooled_output 85 | image_embeds = self.visual_projection(pooled_output) 86 | 87 | special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds) 88 | cos_dist = cosine_distance(image_embeds, self.concept_embeds) 89 | 90 | # increase this value to create a stronger `nsfw` filter 91 | # at the cost of increasing the possibility of filtering benign images 92 | adjustment = 0.0 93 | 94 | special_scores = special_cos_dist - self.special_care_embeds_weights + adjustment 95 | # special_scores = special_scores.round(decimals=3) 96 | special_care = torch.any(special_scores > 0, dim=1) 97 | special_adjustment = special_care * 0.01 98 | special_adjustment = special_adjustment.unsqueeze(1).expand(-1, cos_dist.shape[1]) 99 | 100 | concept_scores = (cos_dist - self.concept_embeds_weights) + special_adjustment 101 | # concept_scores = concept_scores.round(decimals=3) 102 | has_nsfw_concepts = torch.any(concept_scores > 0, dim=1) 103 | 104 | images[has_nsfw_concepts] = 0.0 # black image 105 | 106 | return images, has_nsfw_concepts 107 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/stochastic_karras_ve/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pipeline_stochastic_karras_ve import KarrasVePipeline 3 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/stochastic_karras_ve/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stochastic_karras_ve/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/pipelines/stochastic_karras_ve/__pycache__/pipeline_stochastic_karras_ve.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stochastic_karras_ve/__pycache__/pipeline_stochastic_karras_ve.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/schedulers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ..utils import is_scipy_available 16 | from .scheduling_ddim import DDIMScheduler 17 | from .scheduling_ddpm import DDPMScheduler 18 | from .scheduling_karras_ve import KarrasVeScheduler 19 | from .scheduling_pndm import PNDMScheduler 20 | from .scheduling_sde_ve import ScoreSdeVeScheduler 21 | from .scheduling_sde_vp import ScoreSdeVpScheduler 22 | from .scheduling_utils import SchedulerMixin 23 | 24 | 25 | if is_scipy_available(): 26 | from .scheduling_lms_discrete import LMSDiscreteScheduler 27 | else: 28 | from ..utils.dummy_scipy_objects import * # noqa F403 29 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/schedulers/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/schedulers/__pycache__/scheduling_ddim.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_ddim.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/schedulers/__pycache__/scheduling_ddpm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_ddpm.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/schedulers/__pycache__/scheduling_karras_ve.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_karras_ve.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/schedulers/__pycache__/scheduling_lms_discrete.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_lms_discrete.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/schedulers/__pycache__/scheduling_pndm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_pndm.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/schedulers/__pycache__/scheduling_sde_ve.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_sde_ve.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/schedulers/__pycache__/scheduling_sde_vp.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_sde_vp.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/schedulers/__pycache__/scheduling_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_utils.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/schedulers/scheduling_sde_vp.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch 16 | 17 | # TODO(Patrick, Anton, Suraj) - make scheduler framework indepedent and clean-up a bit 18 | 19 | import numpy as np 20 | import torch 21 | 22 | from ..configuration_utils import ConfigMixin, register_to_config 23 | from .scheduling_utils import SchedulerMixin 24 | 25 | 26 | class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin): 27 | """ 28 | The variance preserving stochastic differential equation (SDE) scheduler. 29 | 30 | [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__` 31 | function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`. 32 | [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and 33 | [`~ConfigMixin.from_config`] functios. 34 | 35 | For more information, see the original paper: https://arxiv.org/abs/2011.13456 36 | 37 | UNDER CONSTRUCTION 38 | 39 | """ 40 | 41 | @register_to_config 42 | def __init__(self, num_train_timesteps=2000, beta_min=0.1, beta_max=20, sampling_eps=1e-3, tensor_format="np"): 43 | 44 | self.sigmas = None 45 | self.discrete_sigmas = None 46 | self.timesteps = None 47 | 48 | def set_timesteps(self, num_inference_steps): 49 | self.timesteps = torch.linspace(1, self.config.sampling_eps, num_inference_steps) 50 | 51 | def step_pred(self, score, x, t): 52 | if self.timesteps is None: 53 | raise ValueError( 54 | "`self.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler" 55 | ) 56 | 57 | # TODO(Patrick) better comments + non-PyTorch 58 | # postprocess model score 59 | log_mean_coeff = ( 60 | -0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min 61 | ) 62 | std = torch.sqrt(1.0 - torch.exp(2.0 * log_mean_coeff)) 63 | score = -score / std[:, None, None, None] 64 | 65 | # compute 66 | dt = -1.0 / len(self.timesteps) 67 | 68 | beta_t = self.config.beta_min + t * (self.config.beta_max - self.config.beta_min) 69 | drift = -0.5 * beta_t[:, None, None, None] * x 70 | diffusion = torch.sqrt(beta_t) 71 | drift = drift - diffusion[:, None, None, None] ** 2 * score 72 | x_mean = x + drift * dt 73 | 74 | # add noise 75 | noise = torch.randn_like(x) 76 | x = x_mean + diffusion[:, None, None, None] * np.sqrt(-dt) * noise 77 | 78 | return x, x_mean 79 | 80 | def __len__(self): 81 | return self.config.num_train_timesteps 82 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/schedulers/scheduling_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from dataclasses import dataclass 15 | from typing import Union 16 | 17 | import numpy as np 18 | import torch 19 | 20 | from ..utils import BaseOutput 21 | 22 | 23 | SCHEDULER_CONFIG_NAME = "scheduler_config.json" 24 | 25 | 26 | @dataclass 27 | class SchedulerOutput(BaseOutput): 28 | """ 29 | Base class for the scheduler's step function output. 30 | 31 | Args: 32 | prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): 33 | Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the 34 | denoising loop. 35 | """ 36 | 37 | prev_sample: torch.FloatTensor 38 | 39 | 40 | class SchedulerMixin: 41 | """ 42 | Mixin containing common functions for the schedulers. 43 | """ 44 | 45 | config_name = SCHEDULER_CONFIG_NAME 46 | ignore_for_config = ["tensor_format"] 47 | 48 | def set_format(self, tensor_format="pt"): 49 | self.tensor_format = tensor_format 50 | if tensor_format == "pt": 51 | for key, value in vars(self).items(): 52 | if isinstance(value, np.ndarray): 53 | setattr(self, key, torch.from_numpy(value)) 54 | 55 | return self 56 | 57 | def clip(self, tensor, min_value=None, max_value=None): 58 | tensor_format = getattr(self, "tensor_format", "pt") 59 | 60 | if tensor_format == "np": 61 | return np.clip(tensor, min_value, max_value) 62 | elif tensor_format == "pt": 63 | return torch.clamp(tensor, min_value, max_value) 64 | 65 | raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") 66 | 67 | def log(self, tensor): 68 | tensor_format = getattr(self, "tensor_format", "pt") 69 | 70 | if tensor_format == "np": 71 | return np.log(tensor) 72 | elif tensor_format == "pt": 73 | return torch.log(tensor) 74 | 75 | raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") 76 | 77 | def match_shape(self, values: Union[np.ndarray, torch.Tensor], broadcast_array: Union[np.ndarray, torch.Tensor]): 78 | """ 79 | Turns a 1-D array into an array or tensor with len(broadcast_array.shape) dims. 80 | 81 | Args: 82 | values: an array or tensor of values to extract. 83 | broadcast_array: an array with a larger shape of K dimensions with the batch 84 | dimension equal to the length of timesteps. 85 | Returns: 86 | a tensor of shape [batch_size, 1, ...] where the shape has K dims. 87 | """ 88 | 89 | tensor_format = getattr(self, "tensor_format", "pt") 90 | values = values.flatten() 91 | 92 | while len(values.shape) < len(broadcast_array.shape): 93 | values = values[..., None] 94 | if tensor_format == "pt": 95 | values = values.to(broadcast_array.device) 96 | 97 | return values 98 | 99 | def norm(self, tensor): 100 | tensor_format = getattr(self, "tensor_format", "pt") 101 | if tensor_format == "np": 102 | return np.linalg.norm(tensor) 103 | elif tensor_format == "pt": 104 | return torch.norm(tensor.reshape(tensor.shape[0], -1), dim=-1).mean() 105 | 106 | raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") 107 | 108 | def randn_like(self, tensor, generator=None): 109 | tensor_format = getattr(self, "tensor_format", "pt") 110 | if tensor_format == "np": 111 | return np.random.randn(*np.shape(tensor)) 112 | elif tensor_format == "pt": 113 | # return torch.randn_like(tensor) 114 | return torch.randn(tensor.shape, layout=tensor.layout, generator=generator).to(tensor.device) 115 | 116 | raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") 117 | 118 | def zeros_like(self, tensor): 119 | tensor_format = getattr(self, "tensor_format", "pt") 120 | if tensor_format == "np": 121 | return np.zeros_like(tensor) 122 | elif tensor_format == "pt": 123 | return torch.zeros_like(tensor) 124 | 125 | raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") 126 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/testing_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import unittest 4 | from distutils.util import strtobool 5 | 6 | import torch 7 | 8 | from packaging import version 9 | 10 | 11 | global_rng = random.Random() 12 | torch_device = "cuda" if torch.cuda.is_available() else "cpu" 13 | is_torch_higher_equal_than_1_12 = version.parse(version.parse(torch.__version__).base_version) >= version.parse("1.12") 14 | 15 | if is_torch_higher_equal_than_1_12: 16 | torch_device = "mps" if torch.backends.mps.is_available() else torch_device 17 | 18 | 19 | def parse_flag_from_env(key, default=False): 20 | try: 21 | value = os.environ[key] 22 | except KeyError: 23 | # KEY isn't set, default to `default`. 24 | _value = default 25 | else: 26 | # KEY is set, convert it to True or False. 27 | try: 28 | _value = strtobool(value) 29 | except ValueError: 30 | # More values are supported, but let's keep the message simple. 31 | raise ValueError(f"If set, {key} must be yes or no.") 32 | return _value 33 | 34 | 35 | _run_slow_tests = parse_flag_from_env("RUN_SLOW", default=False) 36 | 37 | 38 | def floats_tensor(shape, scale=1.0, rng=None, name=None): 39 | """Creates a random float32 tensor""" 40 | if rng is None: 41 | rng = global_rng 42 | 43 | total_dims = 1 44 | for dim in shape: 45 | total_dims *= dim 46 | 47 | values = [] 48 | for _ in range(total_dims): 49 | values.append(rng.random() * scale) 50 | 51 | return torch.tensor(data=values, dtype=torch.float).view(shape).contiguous() 52 | 53 | 54 | def slow(test_case): 55 | """ 56 | Decorator marking a test as slow. 57 | 58 | Slow tests are skipped by default. Set the RUN_SLOW environment variable to a truthy value to run them. 59 | 60 | """ 61 | return unittest.skipUnless(_run_slow_tests, "test is slow")(test_case) 62 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/training_utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import random 4 | 5 | import numpy as np 6 | import torch 7 | 8 | 9 | def enable_full_determinism(seed: int): 10 | """ 11 | Helper function for reproducible behavior during distributed training. See 12 | - https://pytorch.org/docs/stable/notes/randomness.html for pytorch 13 | """ 14 | # set seed first 15 | set_seed(seed) 16 | 17 | # Enable PyTorch deterministic mode. This potentially requires either the environment 18 | # variable 'CUDA_LAUNCH_BLOCKING' or 'CUBLAS_WORKSPACE_CONFIG' to be set, 19 | # depending on the CUDA version, so we set them both here 20 | os.environ["CUDA_LAUNCH_BLOCKING"] = "1" 21 | os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8" 22 | torch.use_deterministic_algorithms(True) 23 | 24 | # Enable CUDNN deterministic mode 25 | torch.backends.cudnn.deterministic = True 26 | torch.backends.cudnn.benchmark = False 27 | 28 | 29 | def set_seed(seed: int): 30 | """ 31 | Args: 32 | Helper function for reproducible behavior to set the seed in `random`, `numpy`, `torch`. 33 | seed (`int`): The seed to set. 34 | """ 35 | random.seed(seed) 36 | np.random.seed(seed) 37 | torch.manual_seed(seed) 38 | torch.cuda.manual_seed_all(seed) 39 | # ^^ safe to call this function even if cuda is not available 40 | 41 | 42 | class EMAModel: 43 | """ 44 | Exponential Moving Average of models weights 45 | """ 46 | 47 | def __init__( 48 | self, 49 | model, 50 | update_after_step=0, 51 | inv_gamma=1.0, 52 | power=2 / 3, 53 | min_value=0.0, 54 | max_value=0.9999, 55 | device=None, 56 | ): 57 | """ 58 | @crowsonkb's notes on EMA Warmup: 59 | If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are good values for models you plan 60 | to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999 at 1M steps), 61 | gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999 62 | at 215.4k steps). 63 | Args: 64 | inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1. 65 | power (float): Exponential factor of EMA warmup. Default: 2/3. 66 | min_value (float): The minimum EMA decay rate. Default: 0. 67 | """ 68 | 69 | self.averaged_model = copy.deepcopy(model).eval() 70 | self.averaged_model.requires_grad_(False) 71 | 72 | self.update_after_step = update_after_step 73 | self.inv_gamma = inv_gamma 74 | self.power = power 75 | self.min_value = min_value 76 | self.max_value = max_value 77 | 78 | if device is not None: 79 | self.averaged_model = self.averaged_model.to(device=device) 80 | 81 | self.decay = 0.0 82 | self.optimization_step = 0 83 | 84 | def get_decay(self, optimization_step): 85 | """ 86 | Compute the decay factor for the exponential moving average. 87 | """ 88 | step = max(0, optimization_step - self.update_after_step - 1) 89 | value = 1 - (1 + step / self.inv_gamma) ** -self.power 90 | 91 | if step <= 0: 92 | return 0.0 93 | 94 | return max(self.min_value, min(value, self.max_value)) 95 | 96 | @torch.no_grad() 97 | def step(self, new_model): 98 | ema_state_dict = {} 99 | ema_params = self.averaged_model.state_dict() 100 | 101 | self.decay = self.get_decay(self.optimization_step) 102 | 103 | for key, param in new_model.named_parameters(): 104 | if isinstance(param, dict): 105 | continue 106 | try: 107 | ema_param = ema_params[key] 108 | except KeyError: 109 | ema_param = param.float().clone() if param.ndim == 1 else copy.deepcopy(param) 110 | ema_params[key] = ema_param 111 | 112 | if not param.requires_grad: 113 | ema_params[key].copy_(param.to(dtype=ema_param.dtype).data) 114 | ema_param = ema_params[key] 115 | else: 116 | ema_param.mul_(self.decay) 117 | ema_param.add_(param.data.to(dtype=ema_param.dtype), alpha=1 - self.decay) 118 | 119 | ema_state_dict[key] = ema_param 120 | 121 | for key, param in new_model.named_buffers(): 122 | ema_state_dict[key] = param 123 | 124 | self.averaged_model.load_state_dict(ema_state_dict, strict=False) 125 | self.optimization_step += 1 126 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Inc. team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import os 17 | 18 | from .import_utils import ( 19 | ENV_VARS_TRUE_AND_AUTO_VALUES, 20 | ENV_VARS_TRUE_VALUES, 21 | USE_JAX, 22 | USE_TF, 23 | USE_TORCH, 24 | DummyObject, 25 | is_flax_available, 26 | is_inflect_available, 27 | is_modelcards_available, 28 | is_onnx_available, 29 | is_scipy_available, 30 | is_tf_available, 31 | is_torch_available, 32 | is_transformers_available, 33 | is_unidecode_available, 34 | requires_backends, 35 | ) 36 | from .logging import get_logger 37 | from .outputs import BaseOutput 38 | 39 | 40 | logger = get_logger(__name__) 41 | 42 | 43 | hf_cache_home = os.path.expanduser( 44 | os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface")) 45 | ) 46 | default_cache_path = os.path.join(hf_cache_home, "diffusers") 47 | 48 | 49 | CONFIG_NAME = "config.json" 50 | HUGGINGFACE_CO_RESOLVE_ENDPOINT = "https://huggingface.co" 51 | DIFFUSERS_CACHE = default_cache_path 52 | DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules" 53 | HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules")) 54 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/utils/__pycache__/dummy_transformers_and_onnx_objects.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/utils/__pycache__/dummy_transformers_and_onnx_objects.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/utils/__pycache__/import_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/utils/__pycache__/import_utils.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/utils/__pycache__/logging.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/utils/__pycache__/logging.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/utils/__pycache__/outputs.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/utils/__pycache__/outputs.cpython-310.pyc -------------------------------------------------------------------------------- /image-editing/my_diffusers/utils/dummy_scipy_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | # flake8: noqa 3 | 4 | from ..utils import DummyObject, requires_backends 5 | 6 | 7 | class LMSDiscreteScheduler(metaclass=DummyObject): 8 | _backends = ["scipy"] 9 | 10 | def __init__(self, *args, **kwargs): 11 | requires_backends(self, ["scipy"]) 12 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/utils/dummy_transformers_and_inflect_and_unidecode_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | # flake8: noqa 3 | from ..utils import DummyObject, requires_backends 4 | 5 | 6 | class GradTTSPipeline(metaclass=DummyObject): 7 | _backends = ["transformers", "inflect", "unidecode"] 8 | 9 | def __init__(self, *args, **kwargs): 10 | requires_backends(self, ["transformers", "inflect", "unidecode"]) 11 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/utils/dummy_transformers_and_onnx_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | # flake8: noqa 3 | 4 | from ..utils import DummyObject, requires_backends 5 | 6 | 7 | class StableDiffusionOnnxPipeline(metaclass=DummyObject): 8 | _backends = ["transformers", "onnx"] 9 | 10 | def __init__(self, *args, **kwargs): 11 | requires_backends(self, ["transformers", "onnx"]) 12 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/utils/dummy_transformers_objects.py: -------------------------------------------------------------------------------- 1 | # This file is autogenerated by the command `make fix-copies`, do not edit. 2 | # flake8: noqa 3 | 4 | from ..utils import DummyObject, requires_backends 5 | 6 | 7 | class LDMTextToImagePipeline(metaclass=DummyObject): 8 | _backends = ["transformers"] 9 | 10 | def __init__(self, *args, **kwargs): 11 | requires_backends(self, ["transformers"]) 12 | 13 | 14 | class StableDiffusionImg2ImgPipeline(metaclass=DummyObject): 15 | _backends = ["transformers"] 16 | 17 | def __init__(self, *args, **kwargs): 18 | requires_backends(self, ["transformers"]) 19 | 20 | 21 | class StableDiffusionInpaintPipeline(metaclass=DummyObject): 22 | _backends = ["transformers"] 23 | 24 | def __init__(self, *args, **kwargs): 25 | requires_backends(self, ["transformers"]) 26 | 27 | 28 | class StableDiffusionPipeline(metaclass=DummyObject): 29 | _backends = ["transformers"] 30 | 31 | def __init__(self, *args, **kwargs): 32 | requires_backends(self, ["transformers"]) 33 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/utils/model_card_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | {{ card_data }} 3 | --- 4 | 5 | 7 | 8 | # {{ model_name | default("Diffusion Model") }} 9 | 10 | ## Model description 11 | 12 | This diffusion model is trained with the [🤗 Diffusers](https://github.com/huggingface/diffusers) library 13 | on the `{{ dataset_name }}` dataset. 14 | 15 | ## Intended uses & limitations 16 | 17 | #### How to use 18 | 19 | ```python 20 | # TODO: add an example code snippet for running this diffusion pipeline 21 | ``` 22 | 23 | #### Limitations and bias 24 | 25 | [TODO: provide examples of latent issues and potential remediations] 26 | 27 | ## Training data 28 | 29 | [TODO: describe the data used to train the model] 30 | 31 | ### Training hyperparameters 32 | 33 | The following hyperparameters were used during training: 34 | - learning_rate: {{ learning_rate }} 35 | - train_batch_size: {{ train_batch_size }} 36 | - eval_batch_size: {{ eval_batch_size }} 37 | - gradient_accumulation_steps: {{ gradient_accumulation_steps }} 38 | - optimizer: AdamW with betas=({{ adam_beta1 }}, {{ adam_beta2 }}), weight_decay={{ adam_weight_decay }} and epsilon={{ adam_epsilon }} 39 | - lr_scheduler: {{ lr_scheduler }} 40 | - lr_warmup_steps: {{ lr_warmup_steps }} 41 | - ema_inv_gamma: {{ ema_inv_gamma }} 42 | - ema_inv_gamma: {{ ema_power }} 43 | - ema_inv_gamma: {{ ema_max_decay }} 44 | - mixed_precision: {{ mixed_precision }} 45 | 46 | ### Training results 47 | 48 | 📈 [TensorBoard logs](https://huggingface.co/{{ repo_name }}/tensorboard?#scalars) 49 | 50 | 51 | -------------------------------------------------------------------------------- /image-editing/my_diffusers/utils/outputs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | Generic utilities 16 | """ 17 | 18 | import warnings 19 | from collections import OrderedDict 20 | from dataclasses import fields 21 | from typing import Any, Tuple 22 | 23 | import numpy as np 24 | 25 | from .import_utils import is_torch_available 26 | 27 | 28 | def is_tensor(x): 29 | """ 30 | Tests if `x` is a `torch.Tensor` or `np.ndarray`. 31 | """ 32 | if is_torch_available(): 33 | import torch 34 | 35 | if isinstance(x, torch.Tensor): 36 | return True 37 | 38 | return isinstance(x, np.ndarray) 39 | 40 | 41 | class BaseOutput(OrderedDict): 42 | """ 43 | Base class for all model outputs as dataclass. Has a `__getitem__` that allows indexing by integer or slice (like a 44 | tuple) or strings (like a dictionary) that will ignore the `None` attributes. Otherwise behaves like a regular 45 | python dictionary. 46 | 47 | 48 | 49 | You can't unpack a `BaseOutput` directly. Use the [`~utils.BaseOutput.to_tuple`] method to convert it to a tuple 50 | before. 51 | 52 | 53 | """ 54 | 55 | def __post_init__(self): 56 | class_fields = fields(self) 57 | 58 | # Safety and consistency checks 59 | if not len(class_fields): 60 | raise ValueError(f"{self.__class__.__name__} has no fields.") 61 | 62 | for field in class_fields: 63 | v = getattr(self, field.name) 64 | if v is not None: 65 | self[field.name] = v 66 | 67 | def __delitem__(self, *args, **kwargs): 68 | raise Exception(f"You cannot use ``__delitem__`` on a {self.__class__.__name__} instance.") 69 | 70 | def setdefault(self, *args, **kwargs): 71 | raise Exception(f"You cannot use ``setdefault`` on a {self.__class__.__name__} instance.") 72 | 73 | def pop(self, *args, **kwargs): 74 | raise Exception(f"You cannot use ``pop`` on a {self.__class__.__name__} instance.") 75 | 76 | def update(self, *args, **kwargs): 77 | raise Exception(f"You cannot use ``update`` on a {self.__class__.__name__} instance.") 78 | 79 | def __getitem__(self, k): 80 | if isinstance(k, str): 81 | inner_dict = {k: v for (k, v) in self.items()} 82 | if self.__class__.__name__ in ["StableDiffusionPipelineOutput", "ImagePipelineOutput"] and k == "sample": 83 | warnings.warn( 84 | "The keyword 'samples' is deprecated and will be removed in version 0.4.0. Please use `.images` or" 85 | " `'images'` instead.", 86 | DeprecationWarning, 87 | ) 88 | return inner_dict["images"] 89 | return inner_dict[k] 90 | else: 91 | return self.to_tuple()[k] 92 | 93 | def __setattr__(self, name, value): 94 | if name in self.keys() and value is not None: 95 | # Don't call self.__setitem__ to avoid recursion errors 96 | super().__setitem__(name, value) 97 | super().__setattr__(name, value) 98 | 99 | def __setitem__(self, key, value): 100 | # Will raise a KeyException if needed 101 | super().__setitem__(key, value) 102 | # Don't call self.__setattr__ to avoid recursion errors 103 | super().__setattr__(key, value) 104 | 105 | def to_tuple(self) -> Tuple[Any]: 106 | """ 107 | Convert self to a tuple containing all the attributes/keys that are not `None`. 108 | """ 109 | return tuple(self[k] for k in self.keys()) 110 | -------------------------------------------------------------------------------- /image_examples/BDIADDIM_t2i_20pairs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image_examples/BDIADDIM_t2i_20pairs.png -------------------------------------------------------------------------------- /image_examples/controlnet_BDIA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image_examples/controlnet_BDIA.png -------------------------------------------------------------------------------- /image_examples/controlnet_BDIA_2nd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image_examples/controlnet_BDIA_2nd.png -------------------------------------------------------------------------------- /image_examples/controlnet_BDIA_pro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image_examples/controlnet_BDIA_pro.png -------------------------------------------------------------------------------- /image_examples/image_editing_cat_lion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image_examples/image_editing_cat_lion.png -------------------------------------------------------------------------------- /image_examples/woman_editing_2nd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image_examples/woman_editing_2nd.png -------------------------------------------------------------------------------- /text-to-image/Readme.md: -------------------------------------------------------------------------------- 1 | We implemented BDIA-DDIM into StableDiffusion V2. In particular, we introduced an additional file "BDIAddim.py" in the folder of "stablediffusionV2/ldm/models/diffusion" for BDIA-DDIM. 2 | 3 | Steps to run the code: 4 | 1. Download the pretrained model v2-1_512-ema-pruned.ckpt from the following link: https://huggingface.co/stabilityai/stable-diffusion-2-1-base/tree/main, and then put the model to the folder "checkpoints" 5 | 2. Run the python file "sample.py". The geneated images can be found in the "outputs" folder 6 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Stability AI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/__pycache__/image_resize.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/__pycache__/image_resize.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/checkpoints/checkpoints.txt: -------------------------------------------------------------------------------- 1 | Put unCLIP checkpoints here. -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/karlo/decoder_900M_vit_l.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | type: t2i-decoder 3 | diffusion_sampler: uniform 4 | hparams: 5 | image_size: 64 6 | num_channels: 320 7 | num_res_blocks: 3 8 | channel_mult: '' 9 | attention_resolutions: 32,16,8 10 | num_heads: -1 11 | num_head_channels: 64 12 | num_heads_upsample: -1 13 | use_scale_shift_norm: true 14 | dropout: 0.1 15 | clip_dim: 768 16 | clip_emb_mult: 4 17 | text_ctx: 77 18 | xf_width: 1536 19 | xf_layers: 0 20 | xf_heads: 0 21 | xf_final_ln: false 22 | resblock_updown: true 23 | learn_sigma: true 24 | text_drop: 0.3 25 | clip_emb_type: image 26 | clip_emb_drop: 0.1 27 | use_plm: true 28 | 29 | diffusion: 30 | steps: 1000 31 | learn_sigma: true 32 | sigma_small: false 33 | noise_schedule: squaredcos_cap_v2 34 | use_kl: false 35 | predict_xstart: false 36 | rescale_learned_sigmas: true 37 | timestep_respacing: '' 38 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/karlo/improved_sr_64_256_1.4B.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | type: improved_sr_64_256 3 | diffusion_sampler: uniform 4 | hparams: 5 | channels: 320 6 | depth: 3 7 | channels_multiple: 8 | - 1 9 | - 2 10 | - 3 11 | - 4 12 | dropout: 0.0 13 | 14 | diffusion: 15 | steps: 1000 16 | learn_sigma: false 17 | sigma_small: true 18 | noise_schedule: squaredcos_cap_v2 19 | use_kl: false 20 | predict_xstart: false 21 | rescale_learned_sigmas: true 22 | timestep_respacing: '7' 23 | 24 | 25 | sampling: 26 | timestep_respacing: '7' # fix 27 | clip_denoise: true 28 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/karlo/prior_1B_vit_l.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | type: prior 3 | diffusion_sampler: uniform 4 | hparams: 5 | text_ctx: 77 6 | xf_width: 2048 7 | xf_layers: 20 8 | xf_heads: 32 9 | xf_final_ln: true 10 | text_drop: 0.2 11 | clip_dim: 768 12 | 13 | diffusion: 14 | steps: 1000 15 | learn_sigma: false 16 | sigma_small: true 17 | noise_schedule: squaredcos_cap_v2 18 | use_kl: false 19 | predict_xstart: true 20 | rescale_learned_sigmas: false 21 | timestep_respacing: '' 22 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/stable-diffusion/intel/v2-inference-bf16.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 Intel Corporation 2 | # SPDX-License-Identifier: MIT 3 | 4 | model: 5 | base_learning_rate: 1.0e-4 6 | target: ldm.models.diffusion.ddpm.LatentDiffusion 7 | params: 8 | linear_start: 0.00085 9 | linear_end: 0.0120 10 | num_timesteps_cond: 1 11 | log_every_t: 200 12 | timesteps: 1000 13 | first_stage_key: "jpg" 14 | cond_stage_key: "txt" 15 | image_size: 64 16 | channels: 4 17 | cond_stage_trainable: false 18 | conditioning_key: crossattn 19 | monitor: val/loss_simple_ema 20 | scale_factor: 0.18215 21 | use_ema: False # we set this to false because this is an inference only config 22 | 23 | unet_config: 24 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 25 | params: 26 | use_checkpoint: False 27 | use_fp16: False 28 | use_bf16: True 29 | image_size: 32 # unused 30 | in_channels: 4 31 | out_channels: 4 32 | model_channels: 320 33 | attention_resolutions: [ 4, 2, 1 ] 34 | num_res_blocks: 2 35 | channel_mult: [ 1, 2, 4, 4 ] 36 | num_head_channels: 64 # need to fix for flash-attn 37 | use_spatial_transformer: True 38 | use_linear_in_transformer: True 39 | transformer_depth: 1 40 | context_dim: 1024 41 | legacy: False 42 | 43 | first_stage_config: 44 | target: ldm.models.autoencoder.AutoencoderKL 45 | params: 46 | embed_dim: 4 47 | monitor: val/rec_loss 48 | ddconfig: 49 | #attn_type: "vanilla-xformers" 50 | double_z: true 51 | z_channels: 4 52 | resolution: 256 53 | in_channels: 3 54 | out_ch: 3 55 | ch: 128 56 | ch_mult: 57 | - 1 58 | - 2 59 | - 4 60 | - 4 61 | num_res_blocks: 2 62 | attn_resolutions: [] 63 | dropout: 0.0 64 | lossconfig: 65 | target: torch.nn.Identity 66 | 67 | cond_stage_config: 68 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 69 | params: 70 | freeze: True 71 | layer: "penultimate" 72 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/stable-diffusion/intel/v2-inference-fp32.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 Intel Corporation 2 | # SPDX-License-Identifier: MIT 3 | 4 | model: 5 | base_learning_rate: 1.0e-4 6 | target: ldm.models.diffusion.ddpm.LatentDiffusion 7 | params: 8 | linear_start: 0.00085 9 | linear_end: 0.0120 10 | num_timesteps_cond: 1 11 | log_every_t: 200 12 | timesteps: 1000 13 | first_stage_key: "jpg" 14 | cond_stage_key: "txt" 15 | image_size: 64 16 | channels: 4 17 | cond_stage_trainable: false 18 | conditioning_key: crossattn 19 | monitor: val/loss_simple_ema 20 | scale_factor: 0.18215 21 | use_ema: False # we set this to false because this is an inference only config 22 | 23 | unet_config: 24 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 25 | params: 26 | use_checkpoint: False 27 | use_fp16: False 28 | image_size: 32 # unused 29 | in_channels: 4 30 | out_channels: 4 31 | model_channels: 320 32 | attention_resolutions: [ 4, 2, 1 ] 33 | num_res_blocks: 2 34 | channel_mult: [ 1, 2, 4, 4 ] 35 | num_head_channels: 64 # need to fix for flash-attn 36 | use_spatial_transformer: True 37 | use_linear_in_transformer: True 38 | transformer_depth: 1 39 | context_dim: 1024 40 | legacy: False 41 | 42 | first_stage_config: 43 | target: ldm.models.autoencoder.AutoencoderKL 44 | params: 45 | embed_dim: 4 46 | monitor: val/rec_loss 47 | ddconfig: 48 | #attn_type: "vanilla-xformers" 49 | double_z: true 50 | z_channels: 4 51 | resolution: 256 52 | in_channels: 3 53 | out_ch: 3 54 | ch: 128 55 | ch_mult: 56 | - 1 57 | - 2 58 | - 4 59 | - 4 60 | num_res_blocks: 2 61 | attn_resolutions: [] 62 | dropout: 0.0 63 | lossconfig: 64 | target: torch.nn.Identity 65 | 66 | cond_stage_config: 67 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 68 | params: 69 | freeze: True 70 | layer: "penultimate" 71 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/stable-diffusion/intel/v2-inference-v-bf16.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 Intel Corporation 2 | # SPDX-License-Identifier: MIT 3 | 4 | model: 5 | base_learning_rate: 1.0e-4 6 | target: ldm.models.diffusion.ddpm.LatentDiffusion 7 | params: 8 | parameterization: "v" 9 | linear_start: 0.00085 10 | linear_end: 0.0120 11 | num_timesteps_cond: 1 12 | log_every_t: 200 13 | timesteps: 1000 14 | first_stage_key: "jpg" 15 | cond_stage_key: "txt" 16 | image_size: 64 17 | channels: 4 18 | cond_stage_trainable: false 19 | conditioning_key: crossattn 20 | monitor: val/loss_simple_ema 21 | scale_factor: 0.18215 22 | use_ema: False # we set this to false because this is an inference only config 23 | 24 | unet_config: 25 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 26 | params: 27 | use_checkpoint: False 28 | use_fp16: False 29 | use_bf16: True 30 | image_size: 32 # unused 31 | in_channels: 4 32 | out_channels: 4 33 | model_channels: 320 34 | attention_resolutions: [ 4, 2, 1 ] 35 | num_res_blocks: 2 36 | channel_mult: [ 1, 2, 4, 4 ] 37 | num_head_channels: 64 # need to fix for flash-attn 38 | use_spatial_transformer: True 39 | use_linear_in_transformer: True 40 | transformer_depth: 1 41 | context_dim: 1024 42 | legacy: False 43 | 44 | first_stage_config: 45 | target: ldm.models.autoencoder.AutoencoderKL 46 | params: 47 | embed_dim: 4 48 | monitor: val/rec_loss 49 | ddconfig: 50 | #attn_type: "vanilla-xformers" 51 | double_z: true 52 | z_channels: 4 53 | resolution: 256 54 | in_channels: 3 55 | out_ch: 3 56 | ch: 128 57 | ch_mult: 58 | - 1 59 | - 2 60 | - 4 61 | - 4 62 | num_res_blocks: 2 63 | attn_resolutions: [] 64 | dropout: 0.0 65 | lossconfig: 66 | target: torch.nn.Identity 67 | 68 | cond_stage_config: 69 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 70 | params: 71 | freeze: True 72 | layer: "penultimate" 73 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/stable-diffusion/intel/v2-inference-v-fp32.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 Intel Corporation 2 | # SPDX-License-Identifier: MIT 3 | 4 | model: 5 | base_learning_rate: 1.0e-4 6 | target: ldm.models.diffusion.ddpm.LatentDiffusion 7 | params: 8 | parameterization: "v" 9 | linear_start: 0.00085 10 | linear_end: 0.0120 11 | num_timesteps_cond: 1 12 | log_every_t: 200 13 | timesteps: 1000 14 | first_stage_key: "jpg" 15 | cond_stage_key: "txt" 16 | image_size: 64 17 | channels: 4 18 | cond_stage_trainable: false 19 | conditioning_key: crossattn 20 | monitor: val/loss_simple_ema 21 | scale_factor: 0.18215 22 | use_ema: False # we set this to false because this is an inference only config 23 | 24 | unet_config: 25 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 26 | params: 27 | use_checkpoint: False 28 | use_fp16: False 29 | image_size: 32 # unused 30 | in_channels: 4 31 | out_channels: 4 32 | model_channels: 320 33 | attention_resolutions: [ 4, 2, 1 ] 34 | num_res_blocks: 2 35 | channel_mult: [ 1, 2, 4, 4 ] 36 | num_head_channels: 64 # need to fix for flash-attn 37 | use_spatial_transformer: True 38 | use_linear_in_transformer: True 39 | transformer_depth: 1 40 | context_dim: 1024 41 | legacy: False 42 | 43 | first_stage_config: 44 | target: ldm.models.autoencoder.AutoencoderKL 45 | params: 46 | embed_dim: 4 47 | monitor: val/rec_loss 48 | ddconfig: 49 | #attn_type: "vanilla-xformers" 50 | double_z: true 51 | z_channels: 4 52 | resolution: 256 53 | in_channels: 3 54 | out_ch: 3 55 | ch: 128 56 | ch_mult: 57 | - 1 58 | - 2 59 | - 4 60 | - 4 61 | num_res_blocks: 2 62 | attn_resolutions: [] 63 | dropout: 0.0 64 | lossconfig: 65 | target: torch.nn.Identity 66 | 67 | cond_stage_config: 68 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 69 | params: 70 | freeze: True 71 | layer: "penultimate" 72 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/stable-diffusion/v2-1-stable-unclip-h-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion 4 | params: 5 | embedding_dropout: 0.25 6 | parameterization: "v" 7 | linear_start: 0.00085 8 | linear_end: 0.0120 9 | log_every_t: 200 10 | timesteps: 1000 11 | first_stage_key: "jpg" 12 | cond_stage_key: "txt" 13 | image_size: 96 14 | channels: 4 15 | cond_stage_trainable: false 16 | conditioning_key: crossattn-adm 17 | scale_factor: 0.18215 18 | monitor: val/loss_simple_ema 19 | use_ema: False 20 | 21 | embedder_config: 22 | target: ldm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder 23 | 24 | noise_aug_config: 25 | target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation 26 | params: 27 | timestep_dim: 1024 28 | noise_schedule_config: 29 | timesteps: 1000 30 | beta_schedule: squaredcos_cap_v2 31 | 32 | unet_config: 33 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 34 | params: 35 | num_classes: "sequential" 36 | adm_in_channels: 2048 37 | use_checkpoint: True 38 | image_size: 32 # unused 39 | in_channels: 4 40 | out_channels: 4 41 | model_channels: 320 42 | attention_resolutions: [ 4, 2, 1 ] 43 | num_res_blocks: 2 44 | channel_mult: [ 1, 2, 4, 4 ] 45 | num_head_channels: 64 # need to fix for flash-attn 46 | use_spatial_transformer: True 47 | use_linear_in_transformer: True 48 | transformer_depth: 1 49 | context_dim: 1024 50 | legacy: False 51 | 52 | first_stage_config: 53 | target: ldm.models.autoencoder.AutoencoderKL 54 | params: 55 | embed_dim: 4 56 | monitor: val/rec_loss 57 | ddconfig: 58 | attn_type: "vanilla-xformers" 59 | double_z: true 60 | z_channels: 4 61 | resolution: 256 62 | in_channels: 3 63 | out_ch: 3 64 | ch: 128 65 | ch_mult: 66 | - 1 67 | - 2 68 | - 4 69 | - 4 70 | num_res_blocks: 2 71 | attn_resolutions: [ ] 72 | dropout: 0.0 73 | lossconfig: 74 | target: torch.nn.Identity 75 | 76 | cond_stage_config: 77 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 78 | params: 79 | freeze: True 80 | layer: "penultimate" 81 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/stable-diffusion/v2-1-stable-unclip-l-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion 4 | params: 5 | embedding_dropout: 0.25 6 | parameterization: "v" 7 | linear_start: 0.00085 8 | linear_end: 0.0120 9 | log_every_t: 200 10 | timesteps: 1000 11 | first_stage_key: "jpg" 12 | cond_stage_key: "txt" 13 | image_size: 96 14 | channels: 4 15 | cond_stage_trainable: false 16 | conditioning_key: crossattn-adm 17 | scale_factor: 0.18215 18 | monitor: val/loss_simple_ema 19 | use_ema: False 20 | 21 | embedder_config: 22 | target: ldm.modules.encoders.modules.ClipImageEmbedder 23 | params: 24 | model: "ViT-L/14" 25 | 26 | noise_aug_config: 27 | target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation 28 | params: 29 | clip_stats_path: "checkpoints/karlo_models/ViT-L-14_stats.th" 30 | timestep_dim: 768 31 | noise_schedule_config: 32 | timesteps: 1000 33 | beta_schedule: squaredcos_cap_v2 34 | 35 | unet_config: 36 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 37 | params: 38 | num_classes: "sequential" 39 | adm_in_channels: 1536 40 | use_checkpoint: True 41 | image_size: 32 # unused 42 | in_channels: 4 43 | out_channels: 4 44 | model_channels: 320 45 | attention_resolutions: [ 4, 2, 1 ] 46 | num_res_blocks: 2 47 | channel_mult: [ 1, 2, 4, 4 ] 48 | num_head_channels: 64 # need to fix for flash-attn 49 | use_spatial_transformer: True 50 | use_linear_in_transformer: True 51 | transformer_depth: 1 52 | context_dim: 1024 53 | legacy: False 54 | 55 | first_stage_config: 56 | target: ldm.models.autoencoder.AutoencoderKL 57 | params: 58 | embed_dim: 4 59 | monitor: val/rec_loss 60 | ddconfig: 61 | attn_type: "vanilla-xformers" 62 | double_z: true 63 | z_channels: 4 64 | resolution: 256 65 | in_channels: 3 66 | out_ch: 3 67 | ch: 128 68 | ch_mult: 69 | - 1 70 | - 2 71 | - 4 72 | - 4 73 | num_res_blocks: 2 74 | attn_resolutions: [ ] 75 | dropout: 0.0 76 | lossconfig: 77 | target: torch.nn.Identity 78 | 79 | cond_stage_config: 80 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 81 | params: 82 | freeze: True 83 | layer: "penultimate" -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/stable-diffusion/v2-inference-v.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | parameterization: "v" 6 | linear_start: 0.00085 7 | linear_end: 0.0120 8 | num_timesteps_cond: 1 9 | log_every_t: 200 10 | timesteps: 1000 11 | first_stage_key: "jpg" 12 | cond_stage_key: "txt" 13 | image_size: 64 14 | channels: 4 15 | cond_stage_trainable: false 16 | conditioning_key: crossattn 17 | monitor: val/loss_simple_ema 18 | scale_factor: 0.18215 19 | use_ema: False # we set this to false because this is an inference only config 20 | 21 | unet_config: 22 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 23 | params: 24 | use_checkpoint: True 25 | use_fp16: False 26 | image_size: 32 # unused 27 | in_channels: 4 28 | out_channels: 4 29 | model_channels: 320 30 | attention_resolutions: [ 4, 2, 1 ] 31 | num_res_blocks: 2 32 | channel_mult: [ 1, 2, 4, 4 ] 33 | num_head_channels: 64 # need to fix for flash-attn 34 | use_spatial_transformer: True 35 | use_linear_in_transformer: True 36 | transformer_depth: 1 37 | context_dim: 1024 38 | legacy: False 39 | 40 | first_stage_config: 41 | target: ldm.models.autoencoder.AutoencoderKL 42 | params: 43 | embed_dim: 4 44 | monitor: val/rec_loss 45 | ddconfig: 46 | #attn_type: "vanilla-xformers" 47 | double_z: true 48 | z_channels: 4 49 | resolution: 256 50 | in_channels: 3 51 | out_ch: 3 52 | ch: 128 53 | ch_mult: 54 | - 1 55 | - 2 56 | - 4 57 | - 4 58 | num_res_blocks: 2 59 | attn_resolutions: [] 60 | dropout: 0.0 61 | lossconfig: 62 | target: torch.nn.Identity 63 | 64 | cond_stage_config: 65 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 66 | params: 67 | freeze: True 68 | layer: "penultimate" 69 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/stable-diffusion/v2-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-4 3 | target: ldm.models.diffusion.ddpm.LatentDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false 15 | conditioning_key: crossattn 16 | monitor: val/loss_simple_ema 17 | scale_factor: 0.18215 18 | use_ema: False # we set this to false because this is an inference only config 19 | 20 | unet_config: 21 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 22 | params: 23 | use_checkpoint: True 24 | use_fp16: False 25 | image_size: 32 # unused 26 | in_channels: 4 27 | out_channels: 4 28 | model_channels: 320 29 | attention_resolutions: [ 4, 2, 1 ] 30 | num_res_blocks: 2 31 | channel_mult: [ 1, 2, 4, 4 ] 32 | num_head_channels: 64 # need to fix for flash-attn 33 | use_spatial_transformer: True 34 | use_linear_in_transformer: True 35 | transformer_depth: 1 36 | context_dim: 1024 37 | legacy: False 38 | 39 | first_stage_config: 40 | target: ldm.models.autoencoder.AutoencoderKL 41 | params: 42 | embed_dim: 4 43 | monitor: val/rec_loss 44 | ddconfig: 45 | #attn_type: "vanilla-xformers" 46 | double_z: true 47 | z_channels: 4 48 | resolution: 256 49 | in_channels: 3 50 | out_ch: 3 51 | ch: 128 52 | ch_mult: 53 | - 1 54 | - 2 55 | - 4 56 | - 4 57 | num_res_blocks: 2 58 | attn_resolutions: [] 59 | dropout: 0.0 60 | lossconfig: 61 | target: torch.nn.Identity 62 | 63 | cond_stage_config: 64 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 65 | params: 66 | freeze: True 67 | layer: "penultimate" 68 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/stable-diffusion/v2-inpainting-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 5.0e-05 3 | target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false 15 | conditioning_key: hybrid 16 | scale_factor: 0.18215 17 | monitor: val/loss_simple_ema 18 | finetune_keys: null 19 | use_ema: False 20 | 21 | unet_config: 22 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 23 | params: 24 | use_checkpoint: True 25 | image_size: 32 # unused 26 | in_channels: 9 27 | out_channels: 4 28 | model_channels: 320 29 | attention_resolutions: [ 4, 2, 1 ] 30 | num_res_blocks: 2 31 | channel_mult: [ 1, 2, 4, 4 ] 32 | num_head_channels: 64 # need to fix for flash-attn 33 | use_spatial_transformer: True 34 | use_linear_in_transformer: True 35 | transformer_depth: 1 36 | context_dim: 1024 37 | legacy: False 38 | 39 | first_stage_config: 40 | target: ldm.models.autoencoder.AutoencoderKL 41 | params: 42 | embed_dim: 4 43 | monitor: val/rec_loss 44 | ddconfig: 45 | #attn_type: "vanilla-xformers" 46 | double_z: true 47 | z_channels: 4 48 | resolution: 256 49 | in_channels: 3 50 | out_ch: 3 51 | ch: 128 52 | ch_mult: 53 | - 1 54 | - 2 55 | - 4 56 | - 4 57 | num_res_blocks: 2 58 | attn_resolutions: [ ] 59 | dropout: 0.0 60 | lossconfig: 61 | target: torch.nn.Identity 62 | 63 | cond_stage_config: 64 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 65 | params: 66 | freeze: True 67 | layer: "penultimate" 68 | 69 | 70 | data: 71 | target: ldm.data.laion.WebDataModuleFromConfig 72 | params: 73 | tar_base: null # for concat as in LAION-A 74 | p_unsafe_threshold: 0.1 75 | filter_word_list: "data/filters.yaml" 76 | max_pwatermark: 0.45 77 | batch_size: 8 78 | num_workers: 6 79 | multinode: True 80 | min_size: 512 81 | train: 82 | shards: 83 | - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -" 84 | - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -" 85 | - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -" 86 | - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -" 87 | - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -" #{00000-94333}.tar" 88 | shuffle: 10000 89 | image_key: jpg 90 | image_transforms: 91 | - target: torchvision.transforms.Resize 92 | params: 93 | size: 512 94 | interpolation: 3 95 | - target: torchvision.transforms.RandomCrop 96 | params: 97 | size: 512 98 | postprocess: 99 | target: ldm.data.laion.AddMask 100 | params: 101 | mode: "512train-large" 102 | p_drop: 0.25 103 | # NOTE use enough shards to avoid empty validation loops in workers 104 | validation: 105 | shards: 106 | - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - " 107 | shuffle: 0 108 | image_key: jpg 109 | image_transforms: 110 | - target: torchvision.transforms.Resize 111 | params: 112 | size: 512 113 | interpolation: 3 114 | - target: torchvision.transforms.CenterCrop 115 | params: 116 | size: 512 117 | postprocess: 118 | target: ldm.data.laion.AddMask 119 | params: 120 | mode: "512train-large" 121 | p_drop: 0.25 122 | 123 | lightning: 124 | find_unused_parameters: True 125 | modelcheckpoint: 126 | params: 127 | every_n_train_steps: 5000 128 | 129 | callbacks: 130 | metrics_over_trainsteps_checkpoint: 131 | params: 132 | every_n_train_steps: 10000 133 | 134 | image_logger: 135 | target: main.ImageLogger 136 | params: 137 | enable_autocast: False 138 | disabled: False 139 | batch_frequency: 1000 140 | max_images: 4 141 | increase_log_steps: False 142 | log_first_step: False 143 | log_images_kwargs: 144 | use_ema_scope: False 145 | inpaint: False 146 | plot_progressive_rows: False 147 | plot_diffusion_rows: False 148 | N: 4 149 | unconditional_guidance_scale: 5.0 150 | unconditional_guidance_label: [""] 151 | ddim_steps: 50 # todo check these out for depth2img, 152 | ddim_eta: 0.0 # todo check these out for depth2img, 153 | 154 | trainer: 155 | benchmark: True 156 | val_check_interval: 5000000 157 | num_sanity_val_steps: 0 158 | accumulate_grad_batches: 1 159 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/stable-diffusion/v2-midas-inference.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 5.0e-07 3 | target: ldm.models.diffusion.ddpm.LatentDepth2ImageDiffusion 4 | params: 5 | linear_start: 0.00085 6 | linear_end: 0.0120 7 | num_timesteps_cond: 1 8 | log_every_t: 200 9 | timesteps: 1000 10 | first_stage_key: "jpg" 11 | cond_stage_key: "txt" 12 | image_size: 64 13 | channels: 4 14 | cond_stage_trainable: false 15 | conditioning_key: hybrid 16 | scale_factor: 0.18215 17 | monitor: val/loss_simple_ema 18 | finetune_keys: null 19 | use_ema: False 20 | 21 | depth_stage_config: 22 | target: ldm.modules.midas.api.MiDaSInference 23 | params: 24 | model_type: "dpt_hybrid" 25 | 26 | unet_config: 27 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 28 | params: 29 | use_checkpoint: True 30 | image_size: 32 # unused 31 | in_channels: 5 32 | out_channels: 4 33 | model_channels: 320 34 | attention_resolutions: [ 4, 2, 1 ] 35 | num_res_blocks: 2 36 | channel_mult: [ 1, 2, 4, 4 ] 37 | num_head_channels: 64 # need to fix for flash-attn 38 | use_spatial_transformer: True 39 | use_linear_in_transformer: True 40 | transformer_depth: 1 41 | context_dim: 1024 42 | legacy: False 43 | 44 | first_stage_config: 45 | target: ldm.models.autoencoder.AutoencoderKL 46 | params: 47 | embed_dim: 4 48 | monitor: val/rec_loss 49 | ddconfig: 50 | #attn_type: "vanilla-xformers" 51 | double_z: true 52 | z_channels: 4 53 | resolution: 256 54 | in_channels: 3 55 | out_ch: 3 56 | ch: 128 57 | ch_mult: 58 | - 1 59 | - 2 60 | - 4 61 | - 4 62 | num_res_blocks: 2 63 | attn_resolutions: [ ] 64 | dropout: 0.0 65 | lossconfig: 66 | target: torch.nn.Identity 67 | 68 | cond_stage_config: 69 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 70 | params: 71 | freeze: True 72 | layer: "penultimate" 73 | 74 | 75 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/configs/stable-diffusion/x4-upscaling.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | base_learning_rate: 1.0e-04 3 | target: ldm.models.diffusion.ddpm.LatentUpscaleDiffusion 4 | params: 5 | parameterization: "v" 6 | low_scale_key: "lr" 7 | linear_start: 0.0001 8 | linear_end: 0.02 9 | num_timesteps_cond: 1 10 | log_every_t: 200 11 | timesteps: 1000 12 | first_stage_key: "jpg" 13 | cond_stage_key: "txt" 14 | image_size: 128 15 | channels: 4 16 | cond_stage_trainable: false 17 | conditioning_key: "hybrid-adm" 18 | monitor: val/loss_simple_ema 19 | scale_factor: 0.08333 20 | use_ema: False 21 | 22 | low_scale_config: 23 | target: ldm.modules.diffusionmodules.upscaling.ImageConcatWithNoiseAugmentation 24 | params: 25 | noise_schedule_config: # image space 26 | linear_start: 0.0001 27 | linear_end: 0.02 28 | max_noise_level: 350 29 | 30 | unet_config: 31 | target: ldm.modules.diffusionmodules.openaimodel.UNetModel 32 | params: 33 | use_checkpoint: True 34 | num_classes: 1000 # timesteps for noise conditioning (here constant, just need one) 35 | image_size: 128 36 | in_channels: 7 37 | out_channels: 4 38 | model_channels: 256 39 | attention_resolutions: [ 2,4,8] 40 | num_res_blocks: 2 41 | channel_mult: [ 1, 2, 2, 4] 42 | disable_self_attentions: [True, True, True, False] 43 | disable_middle_self_attn: False 44 | num_heads: 8 45 | use_spatial_transformer: True 46 | transformer_depth: 1 47 | context_dim: 1024 48 | legacy: False 49 | use_linear_in_transformer: True 50 | 51 | first_stage_config: 52 | target: ldm.models.autoencoder.AutoencoderKL 53 | params: 54 | embed_dim: 4 55 | ddconfig: 56 | # attn_type: "vanilla-xformers" this model needs efficient attention to be feasible on HR data, also the decoder seems to break in half precision (UNet is fine though) 57 | double_z: True 58 | z_channels: 4 59 | resolution: 256 60 | in_channels: 3 61 | out_ch: 3 62 | ch: 128 63 | ch_mult: [ 1,2,4 ] # num_down = len(ch_mult)-1 64 | num_res_blocks: 2 65 | attn_resolutions: [ ] 66 | dropout: 0.0 67 | 68 | lossconfig: 69 | target: torch.nn.Identity 70 | 71 | cond_stage_config: 72 | target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder 73 | params: 74 | freeze: True 75 | layer: "penultimate" 76 | 77 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/cv2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/cv2 -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/doc/UNCLIP.MD: -------------------------------------------------------------------------------- 1 | ### Stable unCLIP 2 | 3 | [unCLIP](https://openai.com/dall-e-2/) is the approach behind OpenAI's [DALL·E 2](https://openai.com/dall-e-2/), 4 | trained to invert CLIP image embeddings. 5 | We finetuned SD 2.1 to accept a CLIP ViT-L/14 image embedding in addition to the text encodings. 6 | This means that the model can be used to produce image variations, but can also be combined with a text-to-image 7 | embedding prior to yield a full text-to-image model at 768x768 resolution. 8 | 9 | If you would like to try a demo of this model on the web, please visit https://clipdrop.co/stable-diffusion-reimagine 10 | 11 | We provide two models, trained on OpenAI CLIP-L and OpenCLIP-H image embeddings, respectively, 12 | available from [https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip](https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip/tree/main). 13 | To use them, download from Hugging Face, and put and the weights into the `checkpoints` folder. 14 | 15 | #### Image Variations 16 | ![image-variations-l-1](../assets/stable-samples/stable-unclip/unclip-variations.png) 17 | 18 | Diffusers integration 19 | Stable UnCLIP Image Variations is integrated with the [🧨 diffusers](https://github.com/huggingface/diffusers) library 20 | ```python 21 | #pip install git+https://github.com/huggingface/diffusers.git transformers accelerate 22 | import requests 23 | import torch 24 | from PIL import Image 25 | from io import BytesIO 26 | 27 | from diffusers import StableUnCLIPImg2ImgPipeline 28 | 29 | #Start the StableUnCLIP Image variations pipeline 30 | pipe = StableUnCLIPImg2ImgPipeline.from_pretrained( 31 | "stabilityai/stable-diffusion-2-1-unclip", torch_dtype=torch.float16, variation="fp16" 32 | ) 33 | pipe = pipe.to("cuda") 34 | 35 | #Get image from URL 36 | url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/tarsila_do_amaral.png" 37 | response = requests.get(url) 38 | init_image = Image.open(BytesIO(response.content)).convert("RGB") 39 | 40 | #Pipe to make the variation 41 | images = pipe(init_image).images 42 | images[0].save("tarsila_variation.png") 43 | ``` 44 | Check out the [Stable UnCLIP pipeline docs here](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_unclip) 45 | 46 | Streamlit UI demo 47 | 48 | ``` 49 | streamlit run scripts/streamlit/stableunclip.py 50 | ``` 51 | to launch a streamlit script than can be used to make image variations with both models (CLIP-L and OpenCLIP-H). 52 | These models can process a `noise_level`, which specifies an amount of Gaussian noise added to the CLIP embeddings. 53 | This can be used to increase output variance as in the following examples. 54 | 55 | ![image-variations-noise](../assets/stable-samples/stable-unclip/unclip-variations_noise.png) 56 | 57 | 58 | ### Stable Diffusion Meets Karlo 59 | ![panda](../assets/stable-samples/stable-unclip/panda.jpg) 60 | 61 | Recently, [KakaoBrain](https://kakaobrain.com/) openly released [Karlo](https://github.com/kakaobrain/karlo), a pretrained, large-scale replication of [unCLIP](https://arxiv.org/abs/2204.06125). 62 | We introduce _Stable Karlo_, a combination of the Karlo CLIP image embedding prior, and Stable Diffusion v2.1-768. 63 | 64 | To run the model, first download the KARLO checkpoints 65 | ```shell 66 | mkdir -p checkpoints/karlo_models 67 | cd checkpoints/karlo_models 68 | wget https://arena.kakaocdn.net/brainrepo/models/karlo-public/v1.0.0.alpha/096db1af569b284eb76b3881534822d9/ViT-L-14.pt 69 | wget https://arena.kakaocdn.net/brainrepo/models/karlo-public/v1.0.0.alpha/0b62380a75e56f073e2844ab5199153d/ViT-L-14_stats.th 70 | wget https://arena.kakaocdn.net/brainrepo/models/karlo-public/v1.0.0.alpha/85626483eaca9f581e2a78d31ff905ca/prior-ckpt-step%3D01000000-of-01000000.ckpt 71 | cd ../../ 72 | ``` 73 | and the finetuned SD2.1 unCLIP-L checkpoint from [here](https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip/blob/main/sd21-unclip-l.ckpt), and put the ckpt into the `checkpoints folder` 74 | 75 | Then, run 76 | 77 | ``` 78 | streamlit run scripts/streamlit/stableunclip.py 79 | ``` 80 | and pick the `use_karlo` option in the GUI. 81 | The script optionally supports sampling from the full Karlo model. To use it, download the 64x64 decoder and 64->256 upscaler 82 | via 83 | ```shell 84 | cd checkpoints/karlo_models 85 | wget https://arena.kakaocdn.net/brainrepo/models/karlo-public/v1.0.0.alpha/efdf6206d8ed593961593dc029a8affa/decoder-ckpt-step%3D01000000-of-01000000.ckpt 86 | wget https://arena.kakaocdn.net/brainrepo/models/karlo-public/v1.0.0.alpha/4226b831ae0279020d134281f3c31590/improved-sr-ckpt-step%3D1.2M.ckpt 87 | cd ../../ 88 | ``` 89 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/environment.yaml: -------------------------------------------------------------------------------- 1 | name: ldm 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - python=3.8.5 7 | - pip=20.3 8 | - cudatoolkit=11.3 9 | - pytorch=1.12.1 10 | - torchvision=0.13.1 11 | - numpy=1.23.1 12 | - pip: 13 | - albumentations==1.3.0 14 | - opencv-python==4.6.0.66 15 | - imageio==2.9.0 16 | - imageio-ffmpeg==0.4.2 17 | - pytorch-lightning==1.4.2 18 | - omegaconf==2.1.1 19 | - test-tube>=0.7.5 20 | - streamlit==1.12.1 21 | - einops==0.3.0 22 | - transformers==4.19.2 23 | - webdataset==0.2.5 24 | - kornia==0.6 25 | - open_clip_torch==2.0.2 26 | - invisible-watermark>=0.1.5 27 | - streamlit-drawable-canvas==0.8.0 28 | - torchmetrics==0.6.0 29 | - -e . 30 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/image_resize.py: -------------------------------------------------------------------------------- 1 | # bash commands to download the data 2 | #wget http://images.cocodataset.org/zips/val2014.zip 3 | #wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip 4 | #unzip annotations_trainval2014.zip -d coco/ 5 | #unzip val2014.zip -d coco/ 6 | 7 | 8 | 9 | import cv2 10 | import os 11 | import glob 12 | 13 | def image_resize(path_source, path_des): 14 | 15 | if not os.path.exists(path_des): 16 | os.makedirs(path_des) 17 | 18 | fileList = glob.glob(os.path.join(path_source, "*.png")) 19 | 20 | for img_file in fileList: 21 | img = cv2.imread(img_file).astype(float)/255 22 | hei, width, _ = img.shape 23 | 24 | 25 | dim = min(hei, width) 26 | resized = cv2.resize(img, (int(width*256/dim),int(hei*256/dim)), interpolation = cv2.INTER_AREA) 27 | 28 | img_name = img_file.split('/')[-1].split('.')[0] 29 | cv2.imwrite(os.path.join(path_des,img_name+".jpg"),(resized*255).astype("uint8")) 30 | 31 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/__init__.py: -------------------------------------------------------------------------------- 1 | from .util import * -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/__pycache__/util.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/__pycache__/util.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/data/__init__.py -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/data/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ldm.modules.midas.api import load_midas_transform 4 | 5 | 6 | class AddMiDaS(object): 7 | def __init__(self, model_type): 8 | super().__init__() 9 | self.transform = load_midas_transform(model_type) 10 | 11 | def pt2np(self, x): 12 | x = ((x + 1.0) * .5).detach().cpu().numpy() 13 | return x 14 | 15 | def np2pt(self, x): 16 | x = torch.from_numpy(x) * 2 - 1. 17 | return x 18 | 19 | def __call__(self, sample): 20 | # sample['jpg'] is tensor hwc in [-1, 1] at this point 21 | x = self.pt2np(sample['jpg']) 22 | x = self.transform({"image": x})["image"] 23 | sample['midas_in'] = x 24 | return sample -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/__pycache__/autoencoder.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/__pycache__/autoencoder.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__init__.py -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/ABDIAddim.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/ABDIAddim.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/BDIAddim.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/BDIAddim.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/BDIAddim_backup_2nd.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/BDIAddim_backup_2nd.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/BDIAddimv2.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/BDIAddimv2.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/HIBDIAddim.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/HIBDIAddim.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/IIAddim.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/IIAddim.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/ddim.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/ddim.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/ddpm.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/ddpm.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/plms.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/plms.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/sampling_util.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/sampling_util.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__init__.py: -------------------------------------------------------------------------------- 1 | from .sampler import DPMSolverSampler -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__pycache__/dpm_solver.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__pycache__/dpm_solver.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__pycache__/sampler.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__pycache__/sampler.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/sampler.py: -------------------------------------------------------------------------------- 1 | """SAMPLING ONLY.""" 2 | import torch 3 | 4 | from .dpm_solver import NoiseScheduleVP, model_wrapper, DPM_Solver 5 | 6 | MODEL_TYPES = { 7 | "eps": "noise", 8 | "v": "v" 9 | } 10 | 11 | 12 | class DPMSolverSampler(object): 13 | def __init__(self, model, device=torch.device("cuda"), **kwargs): 14 | super().__init__() 15 | self.model = model 16 | self.device = device 17 | to_torch = lambda x: x.clone().detach().to(torch.float32).to(model.device) 18 | self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod)) 19 | 20 | def register_buffer(self, name, attr): 21 | if type(attr) == torch.Tensor: 22 | if attr.device != self.device: 23 | attr = attr.to(self.device) 24 | setattr(self, name, attr) 25 | 26 | @torch.no_grad() 27 | def sample(self, 28 | S, 29 | batch_size, 30 | shape, 31 | conditioning=None, 32 | callback=None, 33 | normals_sequence=None, 34 | img_callback=None, 35 | quantize_x0=False, 36 | eta=0., 37 | mask=None, 38 | x0=None, 39 | temperature=1., 40 | noise_dropout=0., 41 | score_corrector=None, 42 | corrector_kwargs=None, 43 | verbose=True, 44 | x_T=None, 45 | log_every_t=100, 46 | unconditional_guidance_scale=1., 47 | unconditional_conditioning=None, 48 | # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ... 49 | **kwargs 50 | ): 51 | 52 | if conditioning is not None: 53 | if isinstance(conditioning, dict): 54 | ctmp = conditioning[list(conditioning.keys())[0]] 55 | while isinstance(ctmp, list): ctmp = ctmp[0] 56 | if isinstance(ctmp, torch.Tensor): 57 | cbs = ctmp.shape[0] 58 | if cbs != batch_size: 59 | print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}") 60 | elif isinstance(conditioning, list): 61 | for ctmp in conditioning: 62 | if ctmp.shape[0] != batch_size: 63 | print(f"Warning: Got {ctmp.shape[0]} conditionings but batch-size is {batch_size}") 64 | else: 65 | if isinstance(conditioning, torch.Tensor): 66 | if conditioning.shape[0] != batch_size: 67 | print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}") 68 | 69 | # sampling 70 | C, H, W = shape 71 | size = (batch_size, C, H, W) 72 | 73 | print(f'Data shape for DPM-Solver sampling is {size}, sampling steps {S}') 74 | 75 | device = self.model.betas.device 76 | if x_T is None: 77 | img = torch.randn(size, device=device) 78 | else: 79 | img = x_T 80 | 81 | ns = NoiseScheduleVP('discrete', alphas_cumprod=self.alphas_cumprod) 82 | 83 | 84 | model_fn = model_wrapper( 85 | lambda x, t, c: self.model.apply_model(x, t, c), 86 | ns, 87 | model_type=MODEL_TYPES[self.model.parameterization], 88 | guidance_type="classifier-free", 89 | condition=conditioning, 90 | unconditional_condition=unconditional_conditioning, 91 | guidance_scale=unconditional_guidance_scale, 92 | ) 93 | 94 | dpm_solver = DPM_Solver(model_fn, ns, predict_x0=True, thresholding=False) 95 | x = dpm_solver.sample(img, steps=S, skip_type="time_uniform", method="multistep", order=2, 96 | lower_order_final=True) 97 | 98 | return x.to(device), None 99 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/models/diffusion/sampling_util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def append_dims(x, target_dims): 6 | """Appends dimensions to the end of a tensor until it has target_dims dimensions. 7 | From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py""" 8 | dims_to_append = target_dims - x.ndim 9 | if dims_to_append < 0: 10 | raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less') 11 | return x[(...,) + (None,) * dims_to_append] 12 | 13 | 14 | def norm_thresholding(x0, value): 15 | s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim) 16 | return x0 * (value / s) 17 | 18 | 19 | def spatial_norm_thresholding(x0, value): 20 | # b c h w 21 | s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value) 22 | return x0 * (value / s) -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/__pycache__/attention.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/__pycache__/attention.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/__pycache__/ema.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/__pycache__/ema.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__init__.py -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/model.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/upscaling.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/upscaling.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/util.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/util.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/upscaling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from functools import partial 5 | 6 | from ldm.modules.diffusionmodules.util import extract_into_tensor, make_beta_schedule 7 | from ldm.util import default 8 | 9 | 10 | class AbstractLowScaleModel(nn.Module): 11 | # for concatenating a downsampled image to the latent representation 12 | def __init__(self, noise_schedule_config=None): 13 | super(AbstractLowScaleModel, self).__init__() 14 | if noise_schedule_config is not None: 15 | self.register_schedule(**noise_schedule_config) 16 | 17 | def register_schedule(self, beta_schedule="linear", timesteps=1000, 18 | linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): 19 | betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, 20 | cosine_s=cosine_s) 21 | alphas = 1. - betas 22 | alphas_cumprod = np.cumprod(alphas, axis=0) 23 | alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) 24 | 25 | timesteps, = betas.shape 26 | self.num_timesteps = int(timesteps) 27 | self.linear_start = linear_start 28 | self.linear_end = linear_end 29 | assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep' 30 | 31 | to_torch = partial(torch.tensor, dtype=torch.float32) 32 | 33 | self.register_buffer('betas', to_torch(betas)) 34 | self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) 35 | self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev)) 36 | 37 | # calculations for diffusion q(x_t | x_{t-1}) and others 38 | self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod))) 39 | self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod))) 40 | self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod))) 41 | self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod))) 42 | self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1))) 43 | 44 | def q_sample(self, x_start, t, noise=None): 45 | noise = default(noise, lambda: torch.randn_like(x_start)) 46 | return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + 47 | extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) 48 | 49 | def forward(self, x): 50 | return x, None 51 | 52 | def decode(self, x): 53 | return x 54 | 55 | 56 | class SimpleImageConcat(AbstractLowScaleModel): 57 | # no noise level conditioning 58 | def __init__(self): 59 | super(SimpleImageConcat, self).__init__(noise_schedule_config=None) 60 | self.max_noise_level = 0 61 | 62 | def forward(self, x): 63 | # fix to constant noise level 64 | return x, torch.zeros(x.shape[0], device=x.device).long() 65 | 66 | 67 | class ImageConcatWithNoiseAugmentation(AbstractLowScaleModel): 68 | def __init__(self, noise_schedule_config, max_noise_level=1000, to_cuda=False): 69 | super().__init__(noise_schedule_config=noise_schedule_config) 70 | self.max_noise_level = max_noise_level 71 | 72 | def forward(self, x, noise_level=None): 73 | if noise_level is None: 74 | noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long() 75 | else: 76 | assert isinstance(noise_level, torch.Tensor) 77 | z = self.q_sample(x, noise_level) 78 | return z, noise_level 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/distributions/__init__.py -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/distributions/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/distributions/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/distributions/__pycache__/distributions.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/distributions/__pycache__/distributions.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/distributions/distributions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class AbstractDistribution: 6 | def sample(self): 7 | raise NotImplementedError() 8 | 9 | def mode(self): 10 | raise NotImplementedError() 11 | 12 | 13 | class DiracDistribution(AbstractDistribution): 14 | def __init__(self, value): 15 | self.value = value 16 | 17 | def sample(self): 18 | return self.value 19 | 20 | def mode(self): 21 | return self.value 22 | 23 | 24 | class DiagonalGaussianDistribution(object): 25 | def __init__(self, parameters, deterministic=False): 26 | self.parameters = parameters 27 | self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) 28 | self.logvar = torch.clamp(self.logvar, -30.0, 20.0) 29 | self.deterministic = deterministic 30 | self.std = torch.exp(0.5 * self.logvar) 31 | self.var = torch.exp(self.logvar) 32 | if self.deterministic: 33 | self.var = self.std = torch.zeros_like(self.mean).to(device=self.parameters.device) 34 | 35 | def sample(self): 36 | x = self.mean + self.std * torch.randn(self.mean.shape).to(device=self.parameters.device) 37 | return x 38 | 39 | def kl(self, other=None): 40 | if self.deterministic: 41 | return torch.Tensor([0.]) 42 | else: 43 | if other is None: 44 | return 0.5 * torch.sum(torch.pow(self.mean, 2) 45 | + self.var - 1.0 - self.logvar, 46 | dim=[1, 2, 3]) 47 | else: 48 | return 0.5 * torch.sum( 49 | torch.pow(self.mean - other.mean, 2) / other.var 50 | + self.var / other.var - 1.0 - self.logvar + other.logvar, 51 | dim=[1, 2, 3]) 52 | 53 | def nll(self, sample, dims=[1,2,3]): 54 | if self.deterministic: 55 | return torch.Tensor([0.]) 56 | logtwopi = np.log(2.0 * np.pi) 57 | return 0.5 * torch.sum( 58 | logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var, 59 | dim=dims) 60 | 61 | def mode(self): 62 | return self.mean 63 | 64 | 65 | def normal_kl(mean1, logvar1, mean2, logvar2): 66 | """ 67 | source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12 68 | Compute the KL divergence between two gaussians. 69 | Shapes are automatically broadcasted, so batches can be compared to 70 | scalars, among other use cases. 71 | """ 72 | tensor = None 73 | for obj in (mean1, logvar1, mean2, logvar2): 74 | if isinstance(obj, torch.Tensor): 75 | tensor = obj 76 | break 77 | assert tensor is not None, "at least one argument must be a Tensor" 78 | 79 | # Force variances to be Tensors. Broadcasting helps convert scalars to 80 | # Tensors, but it does not work for torch.exp(). 81 | logvar1, logvar2 = [ 82 | x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor) 83 | for x in (logvar1, logvar2) 84 | ] 85 | 86 | return 0.5 * ( 87 | -1.0 88 | + logvar2 89 | - logvar1 90 | + torch.exp(logvar1 - logvar2) 91 | + ((mean1 - mean2) ** 2) * torch.exp(-logvar2) 92 | ) 93 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class LitEma(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_upates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError('Decay must be between 0 and 1') 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates 14 | else torch.tensor(-1, dtype=torch.int)) 15 | 16 | for name, p in model.named_parameters(): 17 | if p.requires_grad: 18 | # remove as '.'-character is not allowed in buffers 19 | s_name = name.replace('.', '') 20 | self.m_name2s_name.update({name: s_name}) 21 | self.register_buffer(s_name, p.clone().detach().data) 22 | 23 | self.collected_params = [] 24 | 25 | def reset_num_updates(self): 26 | del self.num_updates 27 | self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int)) 28 | 29 | def forward(self, model): 30 | decay = self.decay 31 | 32 | if self.num_updates >= 0: 33 | self.num_updates += 1 34 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) 35 | 36 | one_minus_decay = 1.0 - decay 37 | 38 | with torch.no_grad(): 39 | m_param = dict(model.named_parameters()) 40 | shadow_params = dict(self.named_buffers()) 41 | 42 | for key in m_param: 43 | if m_param[key].requires_grad: 44 | sname = self.m_name2s_name[key] 45 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 46 | shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key])) 47 | else: 48 | assert not key in self.m_name2s_name 49 | 50 | def copy_to(self, model): 51 | m_param = dict(model.named_parameters()) 52 | shadow_params = dict(self.named_buffers()) 53 | for key in m_param: 54 | if m_param[key].requires_grad: 55 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 56 | else: 57 | assert not key in self.m_name2s_name 58 | 59 | def store(self, parameters): 60 | """ 61 | Save the current parameters for restoring later. 62 | Args: 63 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 64 | temporarily stored. 65 | """ 66 | self.collected_params = [param.clone() for param in parameters] 67 | 68 | def restore(self, parameters): 69 | """ 70 | Restore the parameters stored with the `store` method. 71 | Useful to validate the model with EMA parameters without affecting the 72 | original optimization process. Store the parameters before the 73 | `copy_to` method. After validation (or model saving), use this to 74 | restore the former parameters. 75 | Args: 76 | parameters: Iterable of `torch.nn.Parameter`; the parameters to be 77 | updated with the stored parameters. 78 | """ 79 | for c_param, param in zip(self.collected_params, parameters): 80 | param.data.copy_(c_param.data) 81 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/encoders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/encoders/__init__.py -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/encoders/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/encoders/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/encoders/__pycache__/modules.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/encoders/__pycache__/modules.cpython-39.pyc -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/image_degradation/__init__.py: -------------------------------------------------------------------------------- 1 | from ldm.modules.image_degradation.bsrgan import degradation_bsrgan_variant as degradation_fn_bsr 2 | from ldm.modules.image_degradation.bsrgan_light import degradation_bsrgan_variant as degradation_fn_bsr_light 3 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/image_degradation/utils/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/image_degradation/utils/test.png -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/karlo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/karlo/__init__.py -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/__init__.py -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/models/__init__.py -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/models/prior_model.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------ 2 | # Karlo-v1.0.alpha 3 | # Copyright (c) 2022 KakaoBrain. All Rights Reserved. 4 | # ------------------------------------------------------------------------------------ 5 | 6 | import copy 7 | import torch 8 | 9 | from ldm.modules.karlo.kakao.modules import create_gaussian_diffusion 10 | from ldm.modules.karlo.kakao.modules.xf import PriorTransformer 11 | 12 | 13 | class PriorDiffusionModel(torch.nn.Module): 14 | """ 15 | A prior that generates clip image feature based on the text prompt. 16 | 17 | :param config: yaml config to define the decoder. 18 | :param tokenizer: tokenizer used in clip. 19 | :param clip_mean: mean to normalize the clip image feature (zero-mean, unit variance). 20 | :param clip_std: std to noramlize the clip image feature (zero-mean, unit variance). 21 | """ 22 | 23 | def __init__(self, config, tokenizer, clip_mean, clip_std): 24 | super().__init__() 25 | 26 | self._conf = config 27 | self._model_conf = config.model.hparams 28 | self._diffusion_kwargs = dict( 29 | steps=config.diffusion.steps, 30 | learn_sigma=config.diffusion.learn_sigma, 31 | sigma_small=config.diffusion.sigma_small, 32 | noise_schedule=config.diffusion.noise_schedule, 33 | use_kl=config.diffusion.use_kl, 34 | predict_xstart=config.diffusion.predict_xstart, 35 | rescale_learned_sigmas=config.diffusion.rescale_learned_sigmas, 36 | timestep_respacing=config.diffusion.timestep_respacing, 37 | ) 38 | self._tokenizer = tokenizer 39 | 40 | self.register_buffer("clip_mean", clip_mean[None, :], persistent=False) 41 | self.register_buffer("clip_std", clip_std[None, :], persistent=False) 42 | 43 | causal_mask = self.get_causal_mask() 44 | self.register_buffer("causal_mask", causal_mask, persistent=False) 45 | 46 | self.model = PriorTransformer( 47 | text_ctx=self._model_conf.text_ctx, 48 | xf_width=self._model_conf.xf_width, 49 | xf_layers=self._model_conf.xf_layers, 50 | xf_heads=self._model_conf.xf_heads, 51 | xf_final_ln=self._model_conf.xf_final_ln, 52 | clip_dim=self._model_conf.clip_dim, 53 | ) 54 | 55 | cf_token, cf_mask = self.set_cf_text_tensor() 56 | self.register_buffer("cf_token", cf_token, persistent=False) 57 | self.register_buffer("cf_mask", cf_mask, persistent=False) 58 | 59 | @classmethod 60 | def load_from_checkpoint( 61 | cls, config, tokenizer, clip_mean, clip_std, ckpt_path, strict: bool = True 62 | ): 63 | ckpt = torch.load(ckpt_path, map_location="cpu")["state_dict"] 64 | 65 | model = cls(config, tokenizer, clip_mean, clip_std) 66 | model.load_state_dict(ckpt, strict=strict) 67 | return model 68 | 69 | def set_cf_text_tensor(self): 70 | return self._tokenizer.padded_tokens_and_mask([""], self.model.text_ctx) 71 | 72 | def get_sample_fn(self, timestep_respacing): 73 | use_ddim = timestep_respacing.startswith(("ddim", "fast")) 74 | 75 | diffusion_kwargs = copy.deepcopy(self._diffusion_kwargs) 76 | diffusion_kwargs.update(timestep_respacing=timestep_respacing) 77 | diffusion = create_gaussian_diffusion(**diffusion_kwargs) 78 | sample_fn = diffusion.ddim_sample_loop if use_ddim else diffusion.p_sample_loop 79 | 80 | return sample_fn 81 | 82 | def get_causal_mask(self): 83 | seq_len = self._model_conf.text_ctx + 4 84 | mask = torch.empty(seq_len, seq_len) 85 | mask.fill_(float("-inf")) 86 | mask.triu_(1) 87 | mask = mask[None, ...] 88 | return mask 89 | 90 | def forward( 91 | self, 92 | txt_feat, 93 | txt_feat_seq, 94 | mask, 95 | cf_guidance_scales=None, 96 | timestep_respacing=None, 97 | denoised_fn=True, 98 | ): 99 | # cfg should be enabled in inference 100 | assert cf_guidance_scales is not None and all(cf_guidance_scales > 0.0) 101 | 102 | bsz_ = txt_feat.shape[0] 103 | bsz = bsz_ // 2 104 | 105 | def guided_model_fn(x_t, ts, **kwargs): 106 | half = x_t[: len(x_t) // 2] 107 | combined = torch.cat([half, half], dim=0) 108 | model_out = self.model(combined, ts, **kwargs) 109 | eps, rest = ( 110 | model_out[:, : int(x_t.shape[1])], 111 | model_out[:, int(x_t.shape[1]) :], 112 | ) 113 | cond_eps, uncond_eps = torch.split(eps, len(eps) // 2, dim=0) 114 | half_eps = uncond_eps + cf_guidance_scales.view(-1, 1) * ( 115 | cond_eps - uncond_eps 116 | ) 117 | eps = torch.cat([half_eps, half_eps], dim=0) 118 | return torch.cat([eps, rest], dim=1) 119 | 120 | cond = { 121 | "text_emb": txt_feat, 122 | "text_enc": txt_feat_seq, 123 | "mask": mask, 124 | "causal_mask": self.causal_mask, 125 | } 126 | sample_fn = self.get_sample_fn(timestep_respacing) 127 | sample = sample_fn( 128 | guided_model_fn, 129 | (bsz_, self.model.clip_dim), 130 | noise=None, 131 | device=txt_feat.device, 132 | clip_denoised=False, 133 | denoised_fn=lambda x: torch.clamp(x, -10, 10), 134 | model_kwargs=cond, 135 | ) 136 | sample = (sample * self.clip_std) + self.clip_mean 137 | 138 | return sample[:bsz] 139 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/models/sr_256_1k.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------ 2 | # Karlo-v1.0.alpha 3 | # Copyright (c) 2022 KakaoBrain. All Rights Reserved. 4 | # ------------------------------------------------------------------------------------ 5 | 6 | from ldm.modules.karlo.kakao.models.sr_64_256 import SupRes64to256Progressive 7 | 8 | 9 | class SupRes256to1kProgressive(SupRes64to256Progressive): 10 | pass # no difference currently 11 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/models/sr_64_256.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------ 2 | # Karlo-v1.0.alpha 3 | # Copyright (c) 2022 KakaoBrain. All Rights Reserved. 4 | # ------------------------------------------------------------------------------------ 5 | 6 | import copy 7 | import torch 8 | 9 | from ldm.modules.karlo.kakao.modules.unet import SuperResUNetModel 10 | from ldm.modules.karlo.kakao.modules import create_gaussian_diffusion 11 | 12 | 13 | class ImprovedSupRes64to256ProgressiveModel(torch.nn.Module): 14 | """ 15 | ImprovedSR model fine-tunes the pretrained DDPM-based SR model by using adversarial and perceptual losses. 16 | In specific, the low-resolution sample is iteratively recovered by 6 steps with the frozen pretrained SR model. 17 | In the following additional one step, a seperate fine-tuned model recovers high-frequency details. 18 | This approach greatly improves the fidelity of images of 256x256px, even with small number of reverse steps. 19 | """ 20 | 21 | def __init__(self, config): 22 | super().__init__() 23 | 24 | self._config = config 25 | self._diffusion_kwargs = dict( 26 | steps=config.diffusion.steps, 27 | learn_sigma=config.diffusion.learn_sigma, 28 | sigma_small=config.diffusion.sigma_small, 29 | noise_schedule=config.diffusion.noise_schedule, 30 | use_kl=config.diffusion.use_kl, 31 | predict_xstart=config.diffusion.predict_xstart, 32 | rescale_learned_sigmas=config.diffusion.rescale_learned_sigmas, 33 | ) 34 | 35 | self.model_first_steps = SuperResUNetModel( 36 | in_channels=3, # auto-changed to 6 inside the model 37 | model_channels=config.model.hparams.channels, 38 | out_channels=3, 39 | num_res_blocks=config.model.hparams.depth, 40 | attention_resolutions=(), # no attention 41 | dropout=config.model.hparams.dropout, 42 | channel_mult=config.model.hparams.channels_multiple, 43 | resblock_updown=True, 44 | use_middle_attention=False, 45 | ) 46 | self.model_last_step = SuperResUNetModel( 47 | in_channels=3, # auto-changed to 6 inside the model 48 | model_channels=config.model.hparams.channels, 49 | out_channels=3, 50 | num_res_blocks=config.model.hparams.depth, 51 | attention_resolutions=(), # no attention 52 | dropout=config.model.hparams.dropout, 53 | channel_mult=config.model.hparams.channels_multiple, 54 | resblock_updown=True, 55 | use_middle_attention=False, 56 | ) 57 | 58 | @classmethod 59 | def load_from_checkpoint(cls, config, ckpt_path, strict: bool = True): 60 | ckpt = torch.load(ckpt_path, map_location="cpu")["state_dict"] 61 | 62 | model = cls(config) 63 | model.load_state_dict(ckpt, strict=strict) 64 | return model 65 | 66 | def get_sample_fn(self, timestep_respacing): 67 | diffusion_kwargs = copy.deepcopy(self._diffusion_kwargs) 68 | diffusion_kwargs.update(timestep_respacing=timestep_respacing) 69 | diffusion = create_gaussian_diffusion(**diffusion_kwargs) 70 | return diffusion.p_sample_loop_progressive_for_improved_sr 71 | 72 | def forward(self, low_res, timestep_respacing="7", **kwargs): 73 | assert ( 74 | timestep_respacing == "7" 75 | ), "different respacing method may work, but no guaranteed" 76 | 77 | sample_fn = self.get_sample_fn(timestep_respacing) 78 | sample_outputs = sample_fn( 79 | self.model_first_steps, 80 | self.model_last_step, 81 | shape=low_res.shape, 82 | clip_denoised=True, 83 | model_kwargs=dict(low_res=low_res), 84 | **kwargs, 85 | ) 86 | for x in sample_outputs: 87 | sample = x["sample"] 88 | yield sample 89 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------ 2 | # Adapted from Guided-Diffusion repo (https://github.com/openai/guided-diffusion) 3 | # ------------------------------------------------------------------------------------ 4 | 5 | 6 | from .diffusion import gaussian_diffusion as gd 7 | from .diffusion.respace import ( 8 | SpacedDiffusion, 9 | space_timesteps, 10 | ) 11 | 12 | 13 | def create_gaussian_diffusion( 14 | steps, 15 | learn_sigma, 16 | sigma_small, 17 | noise_schedule, 18 | use_kl, 19 | predict_xstart, 20 | rescale_learned_sigmas, 21 | timestep_respacing, 22 | ): 23 | betas = gd.get_named_beta_schedule(noise_schedule, steps) 24 | if use_kl: 25 | loss_type = gd.LossType.RESCALED_KL 26 | elif rescale_learned_sigmas: 27 | loss_type = gd.LossType.RESCALED_MSE 28 | else: 29 | loss_type = gd.LossType.MSE 30 | if not timestep_respacing: 31 | timestep_respacing = [steps] 32 | 33 | return SpacedDiffusion( 34 | use_timesteps=space_timesteps(steps, timestep_respacing), 35 | betas=betas, 36 | model_mean_type=( 37 | gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X 38 | ), 39 | model_var_type=( 40 | ( 41 | gd.ModelVarType.FIXED_LARGE 42 | if not sigma_small 43 | else gd.ModelVarType.FIXED_SMALL 44 | ) 45 | if not learn_sigma 46 | else gd.ModelVarType.LEARNED_RANGE 47 | ), 48 | loss_type=loss_type, 49 | ) 50 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/modules/diffusion/respace.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------ 2 | # Adapted from Guided-Diffusion repo (https://github.com/openai/guided-diffusion) 3 | # ------------------------------------------------------------------------------------ 4 | 5 | 6 | import torch as th 7 | 8 | from .gaussian_diffusion import GaussianDiffusion 9 | 10 | 11 | def space_timesteps(num_timesteps, section_counts): 12 | """ 13 | Create a list of timesteps to use from an original diffusion process, 14 | given the number of timesteps we want to take from equally-sized portions 15 | of the original process. 16 | 17 | For example, if there's 300 timesteps and the section counts are [10,15,20] 18 | then the first 100 timesteps are strided to be 10 timesteps, the second 100 19 | are strided to be 15 timesteps, and the final 100 are strided to be 20. 20 | 21 | :param num_timesteps: the number of diffusion steps in the original 22 | process to divide up. 23 | :param section_counts: either a list of numbers, or a string containing 24 | comma-separated numbers, indicating the step count 25 | per section. As a special case, use "ddimN" where N 26 | is a number of steps to use the striding from the 27 | DDIM paper. 28 | :return: a set of diffusion steps from the original process to use. 29 | """ 30 | if isinstance(section_counts, str): 31 | if section_counts.startswith("ddim"): 32 | desired_count = int(section_counts[len("ddim") :]) 33 | for i in range(1, num_timesteps): 34 | if len(range(0, num_timesteps, i)) == desired_count: 35 | return set(range(0, num_timesteps, i)) 36 | raise ValueError( 37 | f"cannot create exactly {num_timesteps} steps with an integer stride" 38 | ) 39 | elif section_counts == "fast27": 40 | steps = space_timesteps(num_timesteps, "10,10,3,2,2") 41 | # Help reduce DDIM artifacts from noisiest timesteps. 42 | steps.remove(num_timesteps - 1) 43 | steps.add(num_timesteps - 3) 44 | return steps 45 | section_counts = [int(x) for x in section_counts.split(",")] 46 | size_per = num_timesteps // len(section_counts) 47 | extra = num_timesteps % len(section_counts) 48 | start_idx = 0 49 | all_steps = [] 50 | for i, section_count in enumerate(section_counts): 51 | size = size_per + (1 if i < extra else 0) 52 | if size < section_count: 53 | raise ValueError( 54 | f"cannot divide section of {size} steps into {section_count}" 55 | ) 56 | if section_count <= 1: 57 | frac_stride = 1 58 | else: 59 | frac_stride = (size - 1) / (section_count - 1) 60 | cur_idx = 0.0 61 | taken_steps = [] 62 | for _ in range(section_count): 63 | taken_steps.append(start_idx + round(cur_idx)) 64 | cur_idx += frac_stride 65 | all_steps += taken_steps 66 | start_idx += size 67 | return set(all_steps) 68 | 69 | 70 | class SpacedDiffusion(GaussianDiffusion): 71 | """ 72 | A diffusion process which can skip steps in a base diffusion process. 73 | 74 | :param use_timesteps: a collection (sequence or set) of timesteps from the 75 | original diffusion process to retain. 76 | :param kwargs: the kwargs to create the base diffusion process. 77 | """ 78 | 79 | def __init__(self, use_timesteps, **kwargs): 80 | self.use_timesteps = set(use_timesteps) 81 | self.original_num_steps = len(kwargs["betas"]) 82 | 83 | base_diffusion = GaussianDiffusion(**kwargs) # pylint: disable=missing-kwoa 84 | last_alpha_cumprod = 1.0 85 | new_betas = [] 86 | timestep_map = [] 87 | for i, alpha_cumprod in enumerate(base_diffusion.alphas_cumprod): 88 | if i in self.use_timesteps: 89 | new_betas.append(1 - alpha_cumprod / last_alpha_cumprod) 90 | last_alpha_cumprod = alpha_cumprod 91 | timestep_map.append(i) 92 | kwargs["betas"] = th.tensor(new_betas).numpy() 93 | super().__init__(**kwargs) 94 | self.register_buffer("timestep_map", th.tensor(timestep_map), persistent=False) 95 | 96 | def p_mean_variance(self, model, *args, **kwargs): 97 | return super().p_mean_variance(self._wrap_model(model), *args, **kwargs) 98 | 99 | def condition_mean(self, cond_fn, *args, **kwargs): 100 | return super().condition_mean(self._wrap_model(cond_fn), *args, **kwargs) 101 | 102 | def condition_score(self, cond_fn, *args, **kwargs): 103 | return super().condition_score(self._wrap_model(cond_fn), *args, **kwargs) 104 | 105 | def _wrap_model(self, model): 106 | def wrapped(x, ts, **kwargs): 107 | ts_cpu = ts.detach().to("cpu") 108 | return model( 109 | x, self.timestep_map[ts_cpu].to(device=ts.device, dtype=ts.dtype), **kwargs 110 | ) 111 | 112 | return wrapped 113 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/modules/nn.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------ 2 | # Adapted from Guided-Diffusion repo (https://github.com/openai/guided-diffusion) 3 | # ------------------------------------------------------------------------------------ 4 | 5 | import math 6 | 7 | import torch as th 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | 12 | class GroupNorm32(nn.GroupNorm): 13 | def __init__(self, num_groups, num_channels, swish, eps=1e-5): 14 | super().__init__(num_groups=num_groups, num_channels=num_channels, eps=eps) 15 | self.swish = swish 16 | 17 | def forward(self, x): 18 | y = super().forward(x.float()).to(x.dtype) 19 | if self.swish == 1.0: 20 | y = F.silu(y) 21 | elif self.swish: 22 | y = y * F.sigmoid(y * float(self.swish)) 23 | return y 24 | 25 | 26 | def conv_nd(dims, *args, **kwargs): 27 | """ 28 | Create a 1D, 2D, or 3D convolution module. 29 | """ 30 | if dims == 1: 31 | return nn.Conv1d(*args, **kwargs) 32 | elif dims == 2: 33 | return nn.Conv2d(*args, **kwargs) 34 | elif dims == 3: 35 | return nn.Conv3d(*args, **kwargs) 36 | raise ValueError(f"unsupported dimensions: {dims}") 37 | 38 | 39 | def linear(*args, **kwargs): 40 | """ 41 | Create a linear module. 42 | """ 43 | return nn.Linear(*args, **kwargs) 44 | 45 | 46 | def avg_pool_nd(dims, *args, **kwargs): 47 | """ 48 | Create a 1D, 2D, or 3D average pooling module. 49 | """ 50 | if dims == 1: 51 | return nn.AvgPool1d(*args, **kwargs) 52 | elif dims == 2: 53 | return nn.AvgPool2d(*args, **kwargs) 54 | elif dims == 3: 55 | return nn.AvgPool3d(*args, **kwargs) 56 | raise ValueError(f"unsupported dimensions: {dims}") 57 | 58 | 59 | def zero_module(module): 60 | """ 61 | Zero out the parameters of a module and return it. 62 | """ 63 | for p in module.parameters(): 64 | p.detach().zero_() 65 | return module 66 | 67 | 68 | def scale_module(module, scale): 69 | """ 70 | Scale the parameters of a module and return it. 71 | """ 72 | for p in module.parameters(): 73 | p.detach().mul_(scale) 74 | return module 75 | 76 | 77 | def normalization(channels, swish=0.0): 78 | """ 79 | Make a standard normalization layer, with an optional swish activation. 80 | 81 | :param channels: number of input channels. 82 | :return: an nn.Module for normalization. 83 | """ 84 | return GroupNorm32(num_channels=channels, num_groups=32, swish=swish) 85 | 86 | 87 | def timestep_embedding(timesteps, dim, max_period=10000): 88 | """ 89 | Create sinusoidal timestep embeddings. 90 | 91 | :param timesteps: a 1-D Tensor of N indices, one per batch element. 92 | These may be fractional. 93 | :param dim: the dimension of the output. 94 | :param max_period: controls the minimum frequency of the embeddings. 95 | :return: an [N x dim] Tensor of positional embeddings. 96 | """ 97 | half = dim // 2 98 | freqs = th.exp( 99 | -math.log(max_period) 100 | * th.arange(start=0, end=half, dtype=th.float32, device=timesteps.device) 101 | / half 102 | ) 103 | args = timesteps[:, None].float() * freqs[None] 104 | embedding = th.cat([th.cos(args), th.sin(args)], dim=-1) 105 | if dim % 2: 106 | embedding = th.cat([embedding, th.zeros_like(embedding[:, :1])], dim=-1) 107 | return embedding 108 | 109 | 110 | def mean_flat(tensor): 111 | """ 112 | Take the mean over all non-batch dimensions. 113 | """ 114 | return tensor.mean(dim=list(range(1, len(tensor.shape)))) 115 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/modules/resample.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------ 2 | # Modified from Guided-Diffusion (https://github.com/openai/guided-diffusion) 3 | # ------------------------------------------------------------------------------------ 4 | 5 | from abc import abstractmethod 6 | 7 | import torch as th 8 | 9 | 10 | def create_named_schedule_sampler(name, diffusion): 11 | """ 12 | Create a ScheduleSampler from a library of pre-defined samplers. 13 | 14 | :param name: the name of the sampler. 15 | :param diffusion: the diffusion object to sample for. 16 | """ 17 | if name == "uniform": 18 | return UniformSampler(diffusion) 19 | else: 20 | raise NotImplementedError(f"unknown schedule sampler: {name}") 21 | 22 | 23 | class ScheduleSampler(th.nn.Module): 24 | """ 25 | A distribution over timesteps in the diffusion process, intended to reduce 26 | variance of the objective. 27 | 28 | By default, samplers perform unbiased importance sampling, in which the 29 | objective's mean is unchanged. 30 | However, subclasses may override sample() to change how the resampled 31 | terms are reweighted, allowing for actual changes in the objective. 32 | """ 33 | 34 | @abstractmethod 35 | def weights(self): 36 | """ 37 | Get a numpy array of weights, one per diffusion step. 38 | 39 | The weights needn't be normalized, but must be positive. 40 | """ 41 | 42 | def sample(self, batch_size, device): 43 | """ 44 | Importance-sample timesteps for a batch. 45 | 46 | :param batch_size: the number of timesteps. 47 | :param device: the torch device to save to. 48 | :return: a tuple (timesteps, weights): 49 | - timesteps: a tensor of timestep indices. 50 | - weights: a tensor of weights to scale the resulting losses. 51 | """ 52 | w = self.weights() 53 | p = w / th.sum(w) 54 | indices = p.multinomial(batch_size, replacement=True) 55 | weights = 1 / (len(p) * p[indices]) 56 | return indices, weights 57 | 58 | 59 | class UniformSampler(ScheduleSampler): 60 | def __init__(self, diffusion): 61 | super(UniformSampler, self).__init__() 62 | self.diffusion = diffusion 63 | self.register_buffer( 64 | "_weights", th.ones([diffusion.num_timesteps]), persistent=False 65 | ) 66 | 67 | def weights(self): 68 | return self._weights 69 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/template.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------------ 2 | # Karlo-v1.0.alpha 3 | # Copyright (c) 2022 KakaoBrain. All Rights Reserved. 4 | # ------------------------------------------------------------------------------------ 5 | 6 | import os 7 | import logging 8 | import torch 9 | 10 | from omegaconf import OmegaConf 11 | 12 | from ldm.modules.karlo.kakao.models.clip import CustomizedCLIP, CustomizedTokenizer 13 | from ldm.modules.karlo.kakao.models.prior_model import PriorDiffusionModel 14 | from ldm.modules.karlo.kakao.models.decoder_model import Text2ImProgressiveModel 15 | from ldm.modules.karlo.kakao.models.sr_64_256 import ImprovedSupRes64to256ProgressiveModel 16 | 17 | 18 | SAMPLING_CONF = { 19 | "default": { 20 | "prior_sm": "25", 21 | "prior_n_samples": 1, 22 | "prior_cf_scale": 4.0, 23 | "decoder_sm": "50", 24 | "decoder_cf_scale": 8.0, 25 | "sr_sm": "7", 26 | }, 27 | "fast": { 28 | "prior_sm": "25", 29 | "prior_n_samples": 1, 30 | "prior_cf_scale": 4.0, 31 | "decoder_sm": "25", 32 | "decoder_cf_scale": 8.0, 33 | "sr_sm": "7", 34 | }, 35 | } 36 | 37 | CKPT_PATH = { 38 | "prior": "prior-ckpt-step=01000000-of-01000000.ckpt", 39 | "decoder": "decoder-ckpt-step=01000000-of-01000000.ckpt", 40 | "sr_256": "improved-sr-ckpt-step=1.2M.ckpt", 41 | } 42 | 43 | 44 | class BaseSampler: 45 | _PRIOR_CLASS = PriorDiffusionModel 46 | _DECODER_CLASS = Text2ImProgressiveModel 47 | _SR256_CLASS = ImprovedSupRes64to256ProgressiveModel 48 | 49 | def __init__( 50 | self, 51 | root_dir: str, 52 | sampling_type: str = "fast", 53 | ): 54 | self._root_dir = root_dir 55 | 56 | sampling_type = SAMPLING_CONF[sampling_type] 57 | self._prior_sm = sampling_type["prior_sm"] 58 | self._prior_n_samples = sampling_type["prior_n_samples"] 59 | self._prior_cf_scale = sampling_type["prior_cf_scale"] 60 | 61 | assert self._prior_n_samples == 1 62 | 63 | self._decoder_sm = sampling_type["decoder_sm"] 64 | self._decoder_cf_scale = sampling_type["decoder_cf_scale"] 65 | 66 | self._sr_sm = sampling_type["sr_sm"] 67 | 68 | def __repr__(self): 69 | line = "" 70 | line += f"Prior, sampling method: {self._prior_sm}, cf_scale: {self._prior_cf_scale}\n" 71 | line += f"Decoder, sampling method: {self._decoder_sm}, cf_scale: {self._decoder_cf_scale}\n" 72 | line += f"SR(64->256), sampling method: {self._sr_sm}" 73 | 74 | return line 75 | 76 | def load_clip(self, clip_path: str): 77 | clip = CustomizedCLIP.load_from_checkpoint( 78 | os.path.join(self._root_dir, clip_path) 79 | ) 80 | clip = torch.jit.script(clip) 81 | clip.cuda() 82 | clip.eval() 83 | 84 | self._clip = clip 85 | self._tokenizer = CustomizedTokenizer() 86 | 87 | def load_prior( 88 | self, 89 | ckpt_path: str, 90 | clip_stat_path: str, 91 | prior_config: str = "configs/prior_1B_vit_l.yaml" 92 | ): 93 | logging.info(f"Loading prior: {ckpt_path}") 94 | 95 | config = OmegaConf.load(prior_config) 96 | clip_mean, clip_std = torch.load( 97 | os.path.join(self._root_dir, clip_stat_path), map_location="cpu" 98 | ) 99 | 100 | prior = self._PRIOR_CLASS.load_from_checkpoint( 101 | config, 102 | self._tokenizer, 103 | clip_mean, 104 | clip_std, 105 | os.path.join(self._root_dir, ckpt_path), 106 | strict=True, 107 | ) 108 | prior.cuda() 109 | prior.eval() 110 | logging.info("done.") 111 | 112 | self._prior = prior 113 | 114 | def load_decoder(self, ckpt_path: str, decoder_config: str = "configs/decoder_900M_vit_l.yaml"): 115 | logging.info(f"Loading decoder: {ckpt_path}") 116 | 117 | config = OmegaConf.load(decoder_config) 118 | decoder = self._DECODER_CLASS.load_from_checkpoint( 119 | config, 120 | self._tokenizer, 121 | os.path.join(self._root_dir, ckpt_path), 122 | strict=True, 123 | ) 124 | decoder.cuda() 125 | decoder.eval() 126 | logging.info("done.") 127 | 128 | self._decoder = decoder 129 | 130 | def load_sr_64_256(self, ckpt_path: str, sr_config: str = "configs/improved_sr_64_256_1.4B.yaml"): 131 | logging.info(f"Loading SR(64->256): {ckpt_path}") 132 | 133 | config = OmegaConf.load(sr_config) 134 | sr = self._SR256_CLASS.load_from_checkpoint( 135 | config, os.path.join(self._root_dir, ckpt_path), strict=True 136 | ) 137 | sr.cuda() 138 | sr.eval() 139 | logging.info("done.") 140 | 141 | self._sr_64_256 = sr -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/midas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/midas/__init__.py -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/midas/midas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/midas/midas/__init__.py -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/midas/midas/base_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class BaseModel(torch.nn.Module): 5 | def load(self, path): 6 | """Load model from file. 7 | 8 | Args: 9 | path (str): file path 10 | """ 11 | parameters = torch.load(path, map_location=torch.device('cpu')) 12 | 13 | if "optimizer" in parameters: 14 | parameters = parameters["model"] 15 | 16 | self.load_state_dict(parameters) 17 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/midas/midas/dpt_depth.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .base_model import BaseModel 6 | from .blocks import ( 7 | FeatureFusionBlock, 8 | FeatureFusionBlock_custom, 9 | Interpolate, 10 | _make_encoder, 11 | forward_vit, 12 | ) 13 | 14 | 15 | def _make_fusion_block(features, use_bn): 16 | return FeatureFusionBlock_custom( 17 | features, 18 | nn.ReLU(False), 19 | deconv=False, 20 | bn=use_bn, 21 | expand=False, 22 | align_corners=True, 23 | ) 24 | 25 | 26 | class DPT(BaseModel): 27 | def __init__( 28 | self, 29 | head, 30 | features=256, 31 | backbone="vitb_rn50_384", 32 | readout="project", 33 | channels_last=False, 34 | use_bn=False, 35 | ): 36 | 37 | super(DPT, self).__init__() 38 | 39 | self.channels_last = channels_last 40 | 41 | hooks = { 42 | "vitb_rn50_384": [0, 1, 8, 11], 43 | "vitb16_384": [2, 5, 8, 11], 44 | "vitl16_384": [5, 11, 17, 23], 45 | } 46 | 47 | # Instantiate backbone and reassemble blocks 48 | self.pretrained, self.scratch = _make_encoder( 49 | backbone, 50 | features, 51 | False, # Set to true of you want to train from scratch, uses ImageNet weights 52 | groups=1, 53 | expand=False, 54 | exportable=False, 55 | hooks=hooks[backbone], 56 | use_readout=readout, 57 | ) 58 | 59 | self.scratch.refinenet1 = _make_fusion_block(features, use_bn) 60 | self.scratch.refinenet2 = _make_fusion_block(features, use_bn) 61 | self.scratch.refinenet3 = _make_fusion_block(features, use_bn) 62 | self.scratch.refinenet4 = _make_fusion_block(features, use_bn) 63 | 64 | self.scratch.output_conv = head 65 | 66 | 67 | def forward(self, x): 68 | if self.channels_last == True: 69 | x.contiguous(memory_format=torch.channels_last) 70 | 71 | layer_1, layer_2, layer_3, layer_4 = forward_vit(self.pretrained, x) 72 | 73 | layer_1_rn = self.scratch.layer1_rn(layer_1) 74 | layer_2_rn = self.scratch.layer2_rn(layer_2) 75 | layer_3_rn = self.scratch.layer3_rn(layer_3) 76 | layer_4_rn = self.scratch.layer4_rn(layer_4) 77 | 78 | path_4 = self.scratch.refinenet4(layer_4_rn) 79 | path_3 = self.scratch.refinenet3(path_4, layer_3_rn) 80 | path_2 = self.scratch.refinenet2(path_3, layer_2_rn) 81 | path_1 = self.scratch.refinenet1(path_2, layer_1_rn) 82 | 83 | out = self.scratch.output_conv(path_1) 84 | 85 | return out 86 | 87 | 88 | class DPTDepthModel(DPT): 89 | def __init__(self, path=None, non_negative=True, **kwargs): 90 | features = kwargs["features"] if "features" in kwargs else 256 91 | 92 | head = nn.Sequential( 93 | nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1), 94 | Interpolate(scale_factor=2, mode="bilinear", align_corners=True), 95 | nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1), 96 | nn.ReLU(True), 97 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), 98 | nn.ReLU(True) if non_negative else nn.Identity(), 99 | nn.Identity(), 100 | ) 101 | 102 | super().__init__(head, **kwargs) 103 | 104 | if path is not None: 105 | self.load(path) 106 | 107 | def forward(self, x): 108 | return super().forward(x).squeeze(dim=1) 109 | 110 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/midas/midas/midas_net.py: -------------------------------------------------------------------------------- 1 | """MidashNet: Network for monocular depth estimation trained by mixing several datasets. 2 | This file contains code that is adapted from 3 | https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | 8 | from .base_model import BaseModel 9 | from .blocks import FeatureFusionBlock, Interpolate, _make_encoder 10 | 11 | 12 | class MidasNet(BaseModel): 13 | """Network for monocular depth estimation. 14 | """ 15 | 16 | def __init__(self, path=None, features=256, non_negative=True): 17 | """Init. 18 | 19 | Args: 20 | path (str, optional): Path to saved model. Defaults to None. 21 | features (int, optional): Number of features. Defaults to 256. 22 | backbone (str, optional): Backbone network for encoder. Defaults to resnet50 23 | """ 24 | print("Loading weights: ", path) 25 | 26 | super(MidasNet, self).__init__() 27 | 28 | use_pretrained = False if path is None else True 29 | 30 | self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained) 31 | 32 | self.scratch.refinenet4 = FeatureFusionBlock(features) 33 | self.scratch.refinenet3 = FeatureFusionBlock(features) 34 | self.scratch.refinenet2 = FeatureFusionBlock(features) 35 | self.scratch.refinenet1 = FeatureFusionBlock(features) 36 | 37 | self.scratch.output_conv = nn.Sequential( 38 | nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), 39 | Interpolate(scale_factor=2, mode="bilinear"), 40 | nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1), 41 | nn.ReLU(True), 42 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), 43 | nn.ReLU(True) if non_negative else nn.Identity(), 44 | ) 45 | 46 | if path: 47 | self.load(path) 48 | 49 | def forward(self, x): 50 | """Forward pass. 51 | 52 | Args: 53 | x (tensor): input data (image) 54 | 55 | Returns: 56 | tensor: depth 57 | """ 58 | 59 | layer_1 = self.pretrained.layer1(x) 60 | layer_2 = self.pretrained.layer2(layer_1) 61 | layer_3 = self.pretrained.layer3(layer_2) 62 | layer_4 = self.pretrained.layer4(layer_3) 63 | 64 | layer_1_rn = self.scratch.layer1_rn(layer_1) 65 | layer_2_rn = self.scratch.layer2_rn(layer_2) 66 | layer_3_rn = self.scratch.layer3_rn(layer_3) 67 | layer_4_rn = self.scratch.layer4_rn(layer_4) 68 | 69 | path_4 = self.scratch.refinenet4(layer_4_rn) 70 | path_3 = self.scratch.refinenet3(path_4, layer_3_rn) 71 | path_2 = self.scratch.refinenet2(path_3, layer_2_rn) 72 | path_1 = self.scratch.refinenet1(path_2, layer_1_rn) 73 | 74 | out = self.scratch.output_conv(path_1) 75 | 76 | return torch.squeeze(out, dim=1) 77 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/ldm/modules/midas/utils.py: -------------------------------------------------------------------------------- 1 | """Utils for monoDepth.""" 2 | import sys 3 | import re 4 | import numpy as np 5 | import cv2 6 | import torch 7 | 8 | 9 | def read_pfm(path): 10 | """Read pfm file. 11 | 12 | Args: 13 | path (str): path to file 14 | 15 | Returns: 16 | tuple: (data, scale) 17 | """ 18 | with open(path, "rb") as file: 19 | 20 | color = None 21 | width = None 22 | height = None 23 | scale = None 24 | endian = None 25 | 26 | header = file.readline().rstrip() 27 | if header.decode("ascii") == "PF": 28 | color = True 29 | elif header.decode("ascii") == "Pf": 30 | color = False 31 | else: 32 | raise Exception("Not a PFM file: " + path) 33 | 34 | dim_match = re.match(r"^(\d+)\s(\d+)\s$", file.readline().decode("ascii")) 35 | if dim_match: 36 | width, height = list(map(int, dim_match.groups())) 37 | else: 38 | raise Exception("Malformed PFM header.") 39 | 40 | scale = float(file.readline().decode("ascii").rstrip()) 41 | if scale < 0: 42 | # little-endian 43 | endian = "<" 44 | scale = -scale 45 | else: 46 | # big-endian 47 | endian = ">" 48 | 49 | data = np.fromfile(file, endian + "f") 50 | shape = (height, width, 3) if color else (height, width) 51 | 52 | data = np.reshape(data, shape) 53 | data = np.flipud(data) 54 | 55 | return data, scale 56 | 57 | 58 | def write_pfm(path, image, scale=1): 59 | """Write pfm file. 60 | 61 | Args: 62 | path (str): pathto file 63 | image (array): data 64 | scale (int, optional): Scale. Defaults to 1. 65 | """ 66 | 67 | with open(path, "wb") as file: 68 | color = None 69 | 70 | if image.dtype.name != "float32": 71 | raise Exception("Image dtype must be float32.") 72 | 73 | image = np.flipud(image) 74 | 75 | if len(image.shape) == 3 and image.shape[2] == 3: # color image 76 | color = True 77 | elif ( 78 | len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1 79 | ): # greyscale 80 | color = False 81 | else: 82 | raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.") 83 | 84 | file.write("PF\n" if color else "Pf\n".encode()) 85 | file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) 86 | 87 | endian = image.dtype.byteorder 88 | 89 | if endian == "<" or endian == "=" and sys.byteorder == "little": 90 | scale = -scale 91 | 92 | file.write("%f\n".encode() % scale) 93 | 94 | image.tofile(file) 95 | 96 | 97 | def read_image(path): 98 | """Read image and output RGB image (0-1). 99 | 100 | Args: 101 | path (str): path to file 102 | 103 | Returns: 104 | array: RGB image (0-1) 105 | """ 106 | img = cv2.imread(path) 107 | 108 | if img.ndim == 2: 109 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) 110 | 111 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 112 | 113 | return img 114 | 115 | 116 | def resize_image(img): 117 | """Resize image and make it fit for network. 118 | 119 | Args: 120 | img (array): image 121 | 122 | Returns: 123 | tensor: data ready for network 124 | """ 125 | height_orig = img.shape[0] 126 | width_orig = img.shape[1] 127 | 128 | if width_orig > height_orig: 129 | scale = width_orig / 384 130 | else: 131 | scale = height_orig / 384 132 | 133 | height = (np.ceil(height_orig / scale / 32) * 32).astype(int) 134 | width = (np.ceil(width_orig / scale / 32) * 32).astype(int) 135 | 136 | img_resized = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA) 137 | 138 | img_resized = ( 139 | torch.from_numpy(np.transpose(img_resized, (2, 0, 1))).contiguous().float() 140 | ) 141 | img_resized = img_resized.unsqueeze(0) 142 | 143 | return img_resized 144 | 145 | 146 | def resize_depth(depth, width, height): 147 | """Resize depth map and bring to CPU (numpy). 148 | 149 | Args: 150 | depth (tensor): depth 151 | width (int): image width 152 | height (int): image height 153 | 154 | Returns: 155 | array: processed depth 156 | """ 157 | depth = torch.squeeze(depth[0, :, :, :]).to("cpu") 158 | 159 | depth_resized = cv2.resize( 160 | depth.numpy(), (width, height), interpolation=cv2.INTER_CUBIC 161 | ) 162 | 163 | return depth_resized 164 | 165 | def write_depth(path, depth, bits=1): 166 | """Write depth map to pfm and png file. 167 | 168 | Args: 169 | path (str): filepath without extension 170 | depth (array): depth 171 | """ 172 | write_pfm(path + ".pfm", depth.astype(np.float32)) 173 | 174 | depth_min = depth.min() 175 | depth_max = depth.max() 176 | 177 | max_val = (2**(8*bits))-1 178 | 179 | if depth_max - depth_min > np.finfo("float").eps: 180 | out = max_val * (depth - depth_min) / (depth_max - depth_min) 181 | else: 182 | out = np.zeros(depth.shape, dtype=depth.type) 183 | 184 | if bits == 1: 185 | cv2.imwrite(path + ".png", out.astype("uint8")) 186 | elif bits == 2: 187 | cv2.imwrite(path + ".png", out.astype("uint16")) 188 | 189 | return 190 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/pd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/pd -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/requirements.txt: -------------------------------------------------------------------------------- 1 | albumentations==0.4.3 2 | opencv-python 3 | pudb==2019.2 4 | imageio==2.9.0 5 | imageio-ffmpeg==0.4.2 6 | pytorch-lightning==1.4.2 7 | torchmetrics==0.6 8 | omegaconf==2.1.1 9 | test-tube>=0.7.5 10 | streamlit>=0.73.1 11 | einops==0.3.0 12 | transformers==4.19.2 13 | webdataset==0.2.5 14 | open-clip-torch==2.7.0 15 | gradio==3.13.2 16 | kornia==0.6 17 | invisible-watermark>=0.1.5 18 | streamlit-drawable-canvas==0.8.0 19 | -e . 20 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/sample.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | #text_prompt = "a professional photograph of an astronaut riding a horse" 4 | text_prompt = '"A man dressed for the snowy mountain looks at the camera"' 5 | 6 | #ddim 7 | folder_DDIM = "outputs/txt2img-samples_DDIM" 8 | num_step = 10 9 | checkpoint = ".../v2-1_512-ema-pruned.ckpt" 10 | cmd = 'python txt2img.py --ddim --prompt ' + text_prompt + ' --ckpt ' + checkpoint +' --config "configs/stable-diffusion/v2-inference.yaml" --steps ' +str(num_step)+ ' --n_iter 1 --outdir ' +folder_DDIM+ ' --device "cuda" --seed 1 --n_samples 3 --precision full' 11 | os.system(cmd) 12 | 13 | 14 | #BDIAddim 15 | folder_BDIADDIM = "outputs/txt2img-samples_BDIADDIM" 16 | num_step = 10 17 | gamma = 0.5 # the parameter gamma is within the range [0, 1] 18 | checkpoint = ".../v2-1_512-ema-pruned.ckpt" 19 | cmd = 'python txt2img.py --bdiaddim --gamma ' +str(gamma) + ' --prompt ' + text_prompt + ' --ckpt ' + checkpoint +' --config "configs/stable-diffusion/v2-inference.yaml" --steps ' +str(num_step)+ ' --n_iter 1 --outdir ' + folder_BDIADDIM + ' --device "cuda" --seed 1 --n_samples 3 --precision full' 20 | os.system(cmd) 21 | 22 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/scripts/tests/test_watermark.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import fire 3 | from imwatermark import WatermarkDecoder 4 | 5 | 6 | def testit(img_path): 7 | bgr = cv2.imread(img_path) 8 | decoder = WatermarkDecoder('bytes', 136) 9 | watermark = decoder.decode(bgr, 'dwtDct') 10 | try: 11 | dec = watermark.decode('utf-8') 12 | except: 13 | dec = "null" 14 | print(dec) 15 | 16 | 17 | if __name__ == "__main__": 18 | fire.Fire(testit) -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='stable-diffusion', 5 | version='0.0.1', 6 | description='', 7 | packages=find_packages(), 8 | install_requires=[ 9 | 'torch', 10 | 'numpy', 11 | 'tqdm', 12 | ], 13 | ) -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/shutil: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/shutil -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/stable_diffusion.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: stable-diffusion 3 | Version: 0.0.1 4 | License-File: LICENSE 5 | License-File: LICENSE-MODEL 6 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/stable_diffusion.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | LICENSE 2 | LICENSE-MODEL 3 | README.md 4 | setup.py 5 | stable_diffusion.egg-info/PKG-INFO 6 | stable_diffusion.egg-info/SOURCES.txt 7 | stable_diffusion.egg-info/dependency_links.txt 8 | stable_diffusion.egg-info/requires.txt 9 | stable_diffusion.egg-info/top_level.txt -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/stable_diffusion.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/stable_diffusion.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | torch 2 | numpy 3 | tqdm 4 | -------------------------------------------------------------------------------- /text-to-image/stablediffusionV2/stable_diffusion.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | 2 | --------------------------------------------------------------------------------