├── README.md
├── image-editing
    ├── BDIA_experiments.ipynb
    ├── CODEOWNERS
    ├── CODE_OF_CONDUCT.md
    ├── README.md
    ├── bdia_edict_functions.py
    ├── bdia_edit.py
    ├── environment.yaml
    ├── experiment_images
    │   ├── catgrass_original.png
    │   ├── charlotte-2069642_1280.jpg
    │   ├── cow.jpg
    │   ├── imagenet_dog_1.jpg
    │   ├── imagenet_dog_2.jpg
    │   ├── man-67467_1280.jpg
    │   ├── pixabay_boy.jpg
    │   ├── rooster.JPEG
    │   ├── truebsee-5337646_1280.jpg
    │   └── woman-657753_512.jpg
    ├── hf_auth
    └── my_diffusers
    │   ├── __init__.py
    │   ├── commands
    │       ├── __init__.py
    │       ├── diffusers_cli.py
    │       └── env.py
    │   ├── configuration_utils.py
    │   ├── dependency_versions_check.py
    │   ├── dependency_versions_table.py
    │   ├── dynamic_modules_utils.py
    │   ├── hub_utils.py
    │   ├── modeling_utils.py
    │   ├── models
    │       ├── __init__.py
    │       ├── __pycache__
    │       │   ├── __init__.cpython-310.pyc
    │       │   ├── attention.cpython-310.pyc
    │       │   ├── embeddings.cpython-310.pyc
    │       │   ├── resnet.cpython-310.pyc
    │       │   ├── unet_2d.cpython-310.pyc
    │       │   ├── unet_2d_condition.cpython-310.pyc
    │       │   ├── unet_blocks.cpython-310.pyc
    │       │   └── vae.cpython-310.pyc
    │       ├── attention.py
    │       ├── embeddings.py
    │       ├── resnet.py
    │       ├── unet_2d.py
    │       ├── unet_2d_condition.py
    │       ├── unet_blocks.py
    │       └── vae.py
    │   ├── onnx_utils.py
    │   ├── optimization.py
    │   ├── pipeline_utils.py
    │   ├── pipelines
    │       ├── __init__.py
    │       ├── __pycache__
    │       │   └── __init__.cpython-310.pyc
    │       ├── ddim
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-310.pyc
    │       │   │   └── pipeline_ddim.cpython-310.pyc
    │       │   └── pipeline_ddim.py
    │       ├── ddpm
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-310.pyc
    │       │   │   └── pipeline_ddpm.cpython-310.pyc
    │       │   └── pipeline_ddpm.py
    │       ├── latent_diffusion
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-310.pyc
    │       │   │   └── pipeline_latent_diffusion.cpython-310.pyc
    │       │   └── pipeline_latent_diffusion.py
    │       ├── latent_diffusion_uncond
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-310.pyc
    │       │   │   └── pipeline_latent_diffusion_uncond.cpython-310.pyc
    │       │   └── pipeline_latent_diffusion_uncond.py
    │       ├── pndm
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-310.pyc
    │       │   │   └── pipeline_pndm.cpython-310.pyc
    │       │   └── pipeline_pndm.py
    │       ├── score_sde_ve
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-310.pyc
    │       │   │   └── pipeline_score_sde_ve.cpython-310.pyc
    │       │   └── pipeline_score_sde_ve.py
    │       ├── stable_diffusion
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-310.pyc
    │       │   │   ├── pipeline_stable_diffusion.cpython-310.pyc
    │       │   │   ├── pipeline_stable_diffusion_img2img.cpython-310.pyc
    │       │   │   ├── pipeline_stable_diffusion_inpaint.cpython-310.pyc
    │       │   │   └── safety_checker.cpython-310.pyc
    │       │   ├── pipeline_stable_diffusion.py
    │       │   ├── pipeline_stable_diffusion_img2img.py
    │       │   ├── pipeline_stable_diffusion_inpaint.py
    │       │   ├── pipeline_stable_diffusion_onnx.py
    │       │   └── safety_checker.py
    │       └── stochastic_karras_ve
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │       ├── __init__.cpython-310.pyc
    │       │       └── pipeline_stochastic_karras_ve.cpython-310.pyc
    │       │   └── pipeline_stochastic_karras_ve.py
    │   ├── schedulers
    │       ├── __init__.py
    │       ├── __pycache__
    │       │   ├── __init__.cpython-310.pyc
    │       │   ├── scheduling_ddim.cpython-310.pyc
    │       │   ├── scheduling_ddpm.cpython-310.pyc
    │       │   ├── scheduling_karras_ve.cpython-310.pyc
    │       │   ├── scheduling_lms_discrete.cpython-310.pyc
    │       │   ├── scheduling_pndm.cpython-310.pyc
    │       │   ├── scheduling_sde_ve.cpython-310.pyc
    │       │   ├── scheduling_sde_vp.cpython-310.pyc
    │       │   └── scheduling_utils.cpython-310.pyc
    │       ├── scheduling_ddim.py
    │       ├── scheduling_ddpm.py
    │       ├── scheduling_karras_ve.py
    │       ├── scheduling_lms_discrete.py
    │       ├── scheduling_pndm.py
    │       ├── scheduling_sde_ve.py
    │       ├── scheduling_sde_vp.py
    │       └── scheduling_utils.py
    │   ├── testing_utils.py
    │   ├── training_utils.py
    │   └── utils
    │       ├── __init__.py
    │       ├── __pycache__
    │           ├── __init__.cpython-310.pyc
    │           ├── dummy_transformers_and_onnx_objects.cpython-310.pyc
    │           ├── import_utils.cpython-310.pyc
    │           ├── logging.cpython-310.pyc
    │           └── outputs.cpython-310.pyc
    │       ├── dummy_scipy_objects.py
    │       ├── dummy_transformers_and_inflect_and_unidecode_objects.py
    │       ├── dummy_transformers_and_onnx_objects.py
    │       ├── dummy_transformers_objects.py
    │       ├── import_utils.py
    │       ├── logging.py
    │       ├── model_card_template.md
    │       └── outputs.py
├── image_examples
    ├── BDIADDIM_t2i_20pairs.png
    ├── controlnet_BDIA.png
    ├── controlnet_BDIA_2nd.png
    ├── controlnet_BDIA_pro.png
    ├── image_editing_cat_lion.png
    └── woman_editing_2nd.png
└── text-to-image
    ├── Readme.md
    └── stablediffusionV2
        ├── LICENSE
        ├── LICENSE-MODEL
        ├── __pycache__
            └── image_resize.cpython-39.pyc
        ├── checkpoints
            └── checkpoints.txt
        ├── configs
            ├── karlo
            │   ├── decoder_900M_vit_l.yaml
            │   ├── improved_sr_64_256_1.4B.yaml
            │   └── prior_1B_vit_l.yaml
            └── stable-diffusion
            │   ├── intel
            │       ├── v2-inference-bf16.yaml
            │       ├── v2-inference-fp32.yaml
            │       ├── v2-inference-v-bf16.yaml
            │       └── v2-inference-v-fp32.yaml
            │   ├── v2-1-stable-unclip-h-inference.yaml
            │   ├── v2-1-stable-unclip-l-inference.yaml
            │   ├── v2-inference-v.yaml
            │   ├── v2-inference.yaml
            │   ├── v2-inpainting-inference.yaml
            │   ├── v2-midas-inference.yaml
            │   └── x4-upscaling.yaml
        ├── cv2
        ├── diffusion_inversion.py
        ├── doc
            └── UNCLIP.MD
        ├── environment.yaml
        ├── fid_score.py
        ├── image_resize.py
        ├── inception.py
        ├── ldm
            ├── __init__.py
            ├── __pycache__
            │   ├── __init__.cpython-39.pyc
            │   └── util.cpython-39.pyc
            ├── data
            │   ├── __init__.py
            │   └── util.py
            ├── models
            │   ├── __pycache__
            │   │   └── autoencoder.cpython-39.pyc
            │   ├── autoencoder.py
            │   └── diffusion
            │   │   ├── BDIAddim.py
            │   │   ├── __init__.py
            │   │   ├── __pycache__
            │   │       ├── ABDIAddim.cpython-39.pyc
            │   │       ├── BDIAddim.cpython-39.pyc
            │   │       ├── BDIAddim_backup_2nd.cpython-39.pyc
            │   │       ├── BDIAddimv2.cpython-39.pyc
            │   │       ├── HIBDIAddim.cpython-39.pyc
            │   │       ├── IIAddim.cpython-39.pyc
            │   │       ├── __init__.cpython-39.pyc
            │   │       ├── ddim.cpython-39.pyc
            │   │       ├── ddpm.cpython-39.pyc
            │   │       ├── plms.cpython-39.pyc
            │   │       └── sampling_util.cpython-39.pyc
            │   │   ├── ddim.py
            │   │   ├── ddpm.py
            │   │   ├── dpm_solver
            │   │       ├── __init__.py
            │   │       ├── __pycache__
            │   │       │   ├── __init__.cpython-39.pyc
            │   │       │   ├── dpm_solver.cpython-39.pyc
            │   │       │   └── sampler.cpython-39.pyc
            │   │       ├── dpm_solver.py
            │   │       └── sampler.py
            │   │   ├── plms.py
            │   │   └── sampling_util.py
            ├── modules
            │   ├── __pycache__
            │   │   ├── attention.cpython-39.pyc
            │   │   └── ema.cpython-39.pyc
            │   ├── attention.py
            │   ├── diffusionmodules
            │   │   ├── __init__.py
            │   │   ├── __pycache__
            │   │   │   ├── __init__.cpython-39.pyc
            │   │   │   ├── model.cpython-39.pyc
            │   │   │   ├── openaimodel.cpython-39.pyc
            │   │   │   ├── upscaling.cpython-39.pyc
            │   │   │   └── util.cpython-39.pyc
            │   │   ├── model.py
            │   │   ├── openaimodel.py
            │   │   ├── upscaling.py
            │   │   └── util.py
            │   ├── distributions
            │   │   ├── __init__.py
            │   │   ├── __pycache__
            │   │   │   ├── __init__.cpython-39.pyc
            │   │   │   └── distributions.cpython-39.pyc
            │   │   └── distributions.py
            │   ├── ema.py
            │   ├── encoders
            │   │   ├── __init__.py
            │   │   ├── __pycache__
            │   │   │   ├── __init__.cpython-39.pyc
            │   │   │   └── modules.cpython-39.pyc
            │   │   └── modules.py
            │   ├── image_degradation
            │   │   ├── __init__.py
            │   │   ├── bsrgan.py
            │   │   ├── bsrgan_light.py
            │   │   ├── utils
            │   │   │   └── test.png
            │   │   └── utils_image.py
            │   ├── karlo
            │   │   ├── __init__.py
            │   │   ├── diffusers_pipeline.py
            │   │   └── kakao
            │   │   │   ├── __init__.py
            │   │   │   ├── models
            │   │   │       ├── __init__.py
            │   │   │       ├── clip.py
            │   │   │       ├── decoder_model.py
            │   │   │       ├── prior_model.py
            │   │   │       ├── sr_256_1k.py
            │   │   │       └── sr_64_256.py
            │   │   │   ├── modules
            │   │   │       ├── __init__.py
            │   │   │       ├── diffusion
            │   │   │       │   ├── gaussian_diffusion.py
            │   │   │       │   └── respace.py
            │   │   │       ├── nn.py
            │   │   │       ├── resample.py
            │   │   │       ├── unet.py
            │   │   │       └── xf.py
            │   │   │   ├── sampler.py
            │   │   │   └── template.py
            │   └── midas
            │   │   ├── __init__.py
            │   │   ├── api.py
            │   │   ├── midas
            │   │       ├── __init__.py
            │   │       ├── base_model.py
            │   │       ├── blocks.py
            │   │       ├── dpt_depth.py
            │   │       ├── midas_net.py
            │   │       ├── midas_net_custom.py
            │   │       ├── transforms.py
            │   │       └── vit.py
            │   │   └── utils.py
            └── util.py
        ├── modelcard.md
        ├── pd
        ├── requirements.txt
        ├── sample.py
        ├── scripts
            ├── gradio
            │   ├── depth2img.py
            │   ├── inpainting.py
            │   └── superresolution.py
            ├── img2img.py
            ├── streamlit
            │   ├── depth2img.py
            │   ├── inpainting.py
            │   ├── stableunclip.py
            │   └── superresolution.py
            ├── tests
            │   └── test_watermark.py
            └── txt2img.py
        ├── setup.py
        ├── shutil
        ├── stable_diffusion.egg-info
            ├── PKG-INFO
            ├── SOURCES.txt
            ├── dependency_links.txt
            ├── requires.txt
            └── top_level.txt
        └── txt2img.py


/image-editing/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # Comment line immediately above ownership line is reserved for related other information. Please be careful while editing.
2 | #ECCN:Open Source
3 | 


--------------------------------------------------------------------------------
/image-editing/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Salesforce Open Source Community Code of Conduct
  2 | 
  3 | ## About the Code of Conduct
  4 | 
  5 | Equality is a core value at Salesforce. We believe a diverse and inclusive
  6 | community fosters innovation and creativity, and are committed to building a
  7 | culture where everyone feels included.
  8 | 
  9 | Salesforce open-source projects are committed to providing a friendly, safe, and
 10 | welcoming environment for all, regardless of gender identity and expression,
 11 | sexual orientation, disability, physical appearance, body size, ethnicity, nationality, 
 12 | race, age, religion, level of experience, education, socioeconomic status, or 
 13 | other similar personal characteristics.
 14 | 
 15 | The goal of this code of conduct is to specify a baseline standard of behavior so
 16 | that people with different social values and communication styles can work
 17 | together effectively, productively, and respectfully in our open source community.
 18 | It also establishes a mechanism for reporting issues and resolving conflicts.
 19 | 
 20 | All questions and reports of abusive, harassing, or otherwise unacceptable behavior
 21 | in a Salesforce open-source project may be reported by contacting the Salesforce
 22 | Open Source Conduct Committee at ossconduct@salesforce.com.
 23 | 
 24 | ## Our Pledge
 25 | 
 26 | In the interest of fostering an open and welcoming environment, we as
 27 | contributors and maintainers pledge to making participation in our project and
 28 | our community a harassment-free experience for everyone, regardless of gender 
 29 | identity and expression, sexual orientation, disability, physical appearance, 
 30 | body size, ethnicity, nationality, race, age, religion, level of experience, education, 
 31 | socioeconomic status, or other similar personal characteristics.
 32 | 
 33 | ## Our Standards
 34 | 
 35 | Examples of behavior that contributes to creating a positive environment
 36 | include:
 37 | 
 38 | * Using welcoming and inclusive language
 39 | * Being respectful of differing viewpoints and experiences
 40 | * Gracefully accepting constructive criticism
 41 | * Focusing on what is best for the community
 42 | * Showing empathy toward other community members
 43 | 
 44 | Examples of unacceptable behavior by participants include:
 45 | 
 46 | * The use of sexualized language or imagery and unwelcome sexual attention or
 47 | advances
 48 | * Personal attacks, insulting/derogatory comments, or trolling
 49 | * Public or private harassment
 50 | * Publishing, or threatening to publish, others' private information—such as
 51 | a physical or electronic address—without explicit permission
 52 | * Other conduct which could reasonably be considered inappropriate in a
 53 | professional setting
 54 | * Advocating for or encouraging any of the above behaviors
 55 | 
 56 | ## Our Responsibilities
 57 | 
 58 | Project maintainers are responsible for clarifying the standards of acceptable
 59 | behavior and are expected to take appropriate and fair corrective action in
 60 | response to any instances of unacceptable behavior.
 61 | 
 62 | Project maintainers have the right and responsibility to remove, edit, or
 63 | reject comments, commits, code, wiki edits, issues, and other contributions
 64 | that are not aligned with this Code of Conduct, or to ban temporarily or
 65 | permanently any contributor for other behaviors that they deem inappropriate,
 66 | threatening, offensive, or harmful.
 67 | 
 68 | ## Scope
 69 | 
 70 | This Code of Conduct applies both within project spaces and in public spaces
 71 | when an individual is representing the project or its community. Examples of
 72 | representing a project or community include using an official project email
 73 | address, posting via an official social media account, or acting as an appointed
 74 | representative at an online or offline event. Representation of a project may be
 75 | further defined and clarified by project maintainers.
 76 | 
 77 | ## Enforcement
 78 | 
 79 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 80 | reported by contacting the Salesforce Open Source Conduct Committee 
 81 | at ossconduct@salesforce.com. All complaints will be reviewed and investigated 
 82 | and will result in a response that is deemed necessary and appropriate to the 
 83 | circumstances. The committee is obligated to maintain confidentiality with 
 84 | regard to the reporter of an incident. Further details of specific enforcement 
 85 | policies may be posted separately.
 86 | 
 87 | Project maintainers who do not follow or enforce the Code of Conduct in good
 88 | faith may face temporary or permanent repercussions as determined by other
 89 | members of the project's leadership and the Salesforce Open Source Conduct 
 90 | Committee.
 91 | 
 92 | ## Attribution
 93 | 
 94 | This Code of Conduct is adapted from the [Contributor Covenant][contributor-covenant-home],
 95 | version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html. 
 96 | It includes adaptions and additions from [Go Community Code of Conduct][golang-coc], 
 97 | [CNCF Code of Conduct][cncf-coc], and [Microsoft Open Source Code of Conduct][microsoft-coc].
 98 | 
 99 | This Code of Conduct is licensed under the [Creative Commons Attribution 3.0 License][cc-by-3-us].
100 | 
101 | [contributor-covenant-home]: https://www.contributor-covenant.org (https://www.contributor-covenant.org/)
102 | [golang-coc]: https://golang.org/conduct
103 | [cncf-coc]: https://github.com/cncf/foundation/blob/master/code-of-conduct.md
104 | [microsoft-coc]: https://opensource.microsoft.com/codeofconduct/
105 | [cc-by-3-us]: https://creativecommons.org/licenses/by/3.0/us/
106 | 


--------------------------------------------------------------------------------
/image-editing/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Procedure for running the code for round-trip image editing
 3 | 1. Download the source code and then put it to a folder in the google drive. 
 4 | 2. Upload and run BDIA_experiments.ipynb over google colab. (Note: one may need to change the work directory in BDIA_experiments.ipynb for proper running).  
 5 |    
 6 | Note 1: The parameter $\gamma$ in BDIA has a big impact on the resulting edited images. The recommanded range for $\gamma$ is [1.0, 0.92]. 
 7 | 
 8 | Note 2: BDIA-DDIM has the same running speed as DDIM, not 10 times slower than DDIM as mentioned in "Fixed-point Inversion for Text-to-image diffusion models". 
 9 | 
10 | ### Acknowledgement
11 | The implementation for BDIA for the round-trip image editing depends heavly on the open-source of EDICT. The main python function for realizing BDIA-DDIM for round-trip image editing is BDIA_stablediffusion in bdia_edict_functions.py.   
12 | 
13 | 


--------------------------------------------------------------------------------
/image-editing/bdia_edit.py:
--------------------------------------------------------------------------------
1 | 
2 |   
3 | 


--------------------------------------------------------------------------------
/image-editing/experiment_images/catgrass_original.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/catgrass_original.png


--------------------------------------------------------------------------------
/image-editing/experiment_images/charlotte-2069642_1280.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/charlotte-2069642_1280.jpg


--------------------------------------------------------------------------------
/image-editing/experiment_images/cow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/cow.jpg


--------------------------------------------------------------------------------
/image-editing/experiment_images/imagenet_dog_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/imagenet_dog_1.jpg


--------------------------------------------------------------------------------
/image-editing/experiment_images/imagenet_dog_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/imagenet_dog_2.jpg


--------------------------------------------------------------------------------
/image-editing/experiment_images/man-67467_1280.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/man-67467_1280.jpg


--------------------------------------------------------------------------------
/image-editing/experiment_images/pixabay_boy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/pixabay_boy.jpg


--------------------------------------------------------------------------------
/image-editing/experiment_images/rooster.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/rooster.JPEG


--------------------------------------------------------------------------------
/image-editing/experiment_images/truebsee-5337646_1280.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/truebsee-5337646_1280.jpg


--------------------------------------------------------------------------------
/image-editing/experiment_images/woman-657753_512.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/experiment_images/woman-657753_512.jpg


--------------------------------------------------------------------------------
/image-editing/hf_auth:
--------------------------------------------------------------------------------
1 | <insert your token here>
2 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .utils import (
 2 |     is_inflect_available,
 3 |     is_onnx_available,
 4 |     is_scipy_available,
 5 |     is_transformers_available,
 6 |     is_unidecode_available,
 7 | )
 8 | 
 9 | 
10 | __version__ = "0.3.0"
11 | 
12 | from .configuration_utils import ConfigMixin
13 | from .modeling_utils import ModelMixin
14 | from .models import AutoencoderKL, UNet2DConditionModel, UNet2DModel, VQModel
15 | from .onnx_utils import OnnxRuntimeModel
16 | from .optimization import (
17 |     get_constant_schedule,
18 |     get_constant_schedule_with_warmup,
19 |     get_cosine_schedule_with_warmup,
20 |     get_cosine_with_hard_restarts_schedule_with_warmup,
21 |     get_linear_schedule_with_warmup,
22 |     get_polynomial_decay_schedule_with_warmup,
23 |     get_scheduler,
24 | )
25 | from .pipeline_utils import DiffusionPipeline
26 | from .pipelines import DDIMPipeline, DDPMPipeline, KarrasVePipeline, LDMPipeline, PNDMPipeline, ScoreSdeVePipeline
27 | from .schedulers import (
28 |     DDIMScheduler,
29 |     DDPMScheduler,
30 |     KarrasVeScheduler,
31 |     PNDMScheduler,
32 |     SchedulerMixin,
33 |     ScoreSdeVeScheduler,
34 | )
35 | from .utils import logging
36 | 
37 | 
38 | if is_scipy_available():
39 |     from .schedulers import LMSDiscreteScheduler
40 | else:
41 |     from .utils.dummy_scipy_objects import *  # noqa F403
42 | 
43 | from .training_utils import EMAModel
44 | 
45 | 
46 | if is_transformers_available():
47 |     from .pipelines import (
48 |         LDMTextToImagePipeline,
49 |         StableDiffusionImg2ImgPipeline,
50 |         StableDiffusionInpaintPipeline,
51 |         StableDiffusionPipeline,
52 |     )
53 | else:
54 |     from .utils.dummy_transformers_objects import *  # noqa F403
55 | 
56 | 
57 | if is_transformers_available() and is_onnx_available():
58 |     from .pipelines import StableDiffusionOnnxPipeline
59 | else:
60 |     from .utils.dummy_transformers_and_onnx_objects import *  # noqa F403
61 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from argparse import ArgumentParser
17 | 
18 | 
19 | class BaseDiffusersCLICommand(ABC):
20 |     @staticmethod
21 |     @abstractmethod
22 |     def register_subcommand(parser: ArgumentParser):
23 |         raise NotImplementedError()
24 | 
25 |     @abstractmethod
26 |     def run(self):
27 |         raise NotImplementedError()
28 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/commands/diffusers_cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from argparse import ArgumentParser
17 | 
18 | from .env import EnvironmentCommand
19 | 
20 | 
21 | def main():
22 |     parser = ArgumentParser("Diffusers CLI tool", usage="diffusers-cli <command> [<args>]")
23 |     commands_parser = parser.add_subparsers(help="diffusers-cli command helpers")
24 | 
25 |     # Register commands
26 |     EnvironmentCommand.register_subcommand(commands_parser)
27 | 
28 |     # Let's go
29 |     args = parser.parse_args()
30 | 
31 |     if not hasattr(args, "func"):
32 |         parser.print_help()
33 |         exit(1)
34 | 
35 |     # Run
36 |     service = args.func(args)
37 |     service.run()
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     main()
42 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/commands/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import platform
16 | from argparse import ArgumentParser
17 | 
18 | import huggingface_hub
19 | 
20 | from .. import __version__ as version
21 | from ..utils import is_torch_available, is_transformers_available
22 | from . import BaseDiffusersCLICommand
23 | 
24 | 
25 | def info_command_factory(_):
26 |     return EnvironmentCommand()
27 | 
28 | 
29 | class EnvironmentCommand(BaseDiffusersCLICommand):
30 |     @staticmethod
31 |     def register_subcommand(parser: ArgumentParser):
32 |         download_parser = parser.add_parser("env")
33 |         download_parser.set_defaults(func=info_command_factory)
34 | 
35 |     def run(self):
36 |         hub_version = huggingface_hub.__version__
37 | 
38 |         pt_version = "not installed"
39 |         pt_cuda_available = "NA"
40 |         if is_torch_available():
41 |             import torch
42 | 
43 |             pt_version = torch.__version__
44 |             pt_cuda_available = torch.cuda.is_available()
45 | 
46 |         transformers_version = "not installed"
47 |         if is_transformers_available:
48 |             import transformers
49 | 
50 |             transformers_version = transformers.__version__
51 | 
52 |         info = {
53 |             "`diffusers` version": version,
54 |             "Platform": platform.platform(),
55 |             "Python version": platform.python_version(),
56 |             "PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})",
57 |             "Huggingface_hub version": hub_version,
58 |             "Transformers version": transformers_version,
59 |             "Using GPU in script?": "<fill in>",
60 |             "Using distributed or parallel set-up in script?": "<fill in>",
61 |         }
62 | 
63 |         print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n")
64 |         print(self.format_dict(info))
65 | 
66 |         return info
67 | 
68 |     @staticmethod
69 |     def format_dict(d):
70 |         return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n"
71 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/dependency_versions_check.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import sys
15 | 
16 | from .dependency_versions_table import deps
17 | from .utils.versions import require_version, require_version_core
18 | 
19 | 
20 | # define which module versions we always want to check at run time
21 | # (usually the ones defined in `install_requires` in setup.py)
22 | #
23 | # order specific notes:
24 | # - tqdm must be checked before tokenizers
25 | 
26 | pkgs_to_check_at_runtime = "python tqdm regex requests packaging filelock numpy tokenizers".split()
27 | if sys.version_info < (3, 7):
28 |     pkgs_to_check_at_runtime.append("dataclasses")
29 | if sys.version_info < (3, 8):
30 |     pkgs_to_check_at_runtime.append("importlib_metadata")
31 | 
32 | for pkg in pkgs_to_check_at_runtime:
33 |     if pkg in deps:
34 |         if pkg == "tokenizers":
35 |             # must be loaded here, or else tqdm check may fail
36 |             from .utils import is_tokenizers_available
37 | 
38 |             if not is_tokenizers_available():
39 |                 continue  # not required, check version only if installed
40 | 
41 |         require_version_core(deps[pkg])
42 |     else:
43 |         raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py")
44 | 
45 | 
46 | def dep_version_check(pkg, hint=None):
47 |     require_version(deps[pkg], hint)
48 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/dependency_versions_table.py:
--------------------------------------------------------------------------------
 1 | # THIS FILE HAS BEEN AUTOGENERATED. To update:
 2 | # 1. modify the `_deps` dict in setup.py
 3 | # 2. run `make deps_table_update``
 4 | deps = {
 5 |     "Pillow": "Pillow",
 6 |     "accelerate": "accelerate>=0.11.0",
 7 |     "black": "black==22.3",
 8 |     "datasets": "datasets",
 9 |     "filelock": "filelock",
10 |     "flake8": "flake8>=3.8.3",
11 |     "hf-doc-builder": "hf-doc-builder>=0.3.0",
12 |     "huggingface-hub": "huggingface-hub>=0.8.1",
13 |     "importlib_metadata": "importlib_metadata",
14 |     "isort": "isort>=5.5.4",
15 |     "modelcards": "modelcards==0.1.4",
16 |     "numpy": "numpy",
17 |     "pytest": "pytest",
18 |     "pytest-timeout": "pytest-timeout",
19 |     "pytest-xdist": "pytest-xdist",
20 |     "scipy": "scipy",
21 |     "regex": "regex!=2019.12.17",
22 |     "requests": "requests",
23 |     "tensorboard": "tensorboard",
24 |     "torch": "torch>=1.4",
25 |     "transformers": "transformers>=4.21.0",
26 | }
27 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .unet_2d import UNet2DModel
16 | from .unet_2d_condition import UNet2DConditionModel
17 | from .vae import AutoencoderKL, VQModel
18 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/models/__pycache__/attention.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/attention.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/models/__pycache__/embeddings.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/embeddings.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/models/__pycache__/resnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/resnet.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/models/__pycache__/unet_2d.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/unet_2d.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/models/__pycache__/unet_2d_condition.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/unet_2d_condition.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/models/__pycache__/unet_blocks.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/unet_blocks.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/models/__pycache__/vae.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/models/__pycache__/vae.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/models/embeddings.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import math
 15 | 
 16 | import numpy as np
 17 | import torch
 18 | from torch import nn
 19 | 
 20 | 
 21 | def get_timestep_embedding(
 22 |     timesteps: torch.Tensor,
 23 |     embedding_dim: int,
 24 |     flip_sin_to_cos: bool = False,
 25 |     downscale_freq_shift: float = 1,
 26 |     scale: float = 1,
 27 |     max_period: int = 10000,
 28 | ):
 29 |     # print(timesteps)
 30 |     """
 31 |     This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.
 32 | 
 33 |     :param timesteps: a 1-D Tensor of N indices, one per batch element.
 34 |                       These may be fractional.
 35 |     :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the
 36 |     embeddings. :return: an [N x dim] Tensor of positional embeddings.
 37 |     """
 38 |     assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array"
 39 | 
 40 |     half_dim = embedding_dim // 2
 41 |     exponent = -math.log(max_period) * torch.arange(start=0, end=half_dim, dtype=torch.float64)
 42 |     exponent = exponent / (half_dim - downscale_freq_shift)
 43 | 
 44 |     emb = torch.exp(exponent).to(device=timesteps.device)
 45 |     emb = timesteps[:, None].double() * emb[None, :]
 46 | 
 47 |     # scale embeddings
 48 |     emb = scale * emb
 49 | 
 50 |     # concat sine and cosine embeddings
 51 |     emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1)
 52 | 
 53 |     # flip sine and cosine embeddings
 54 |     if flip_sin_to_cos:
 55 |         emb = torch.cat([emb[:, half_dim:], emb[:, :half_dim]], dim=-1)
 56 | 
 57 |     # zero pad
 58 |     if embedding_dim % 2 == 1:
 59 |         emb = torch.nn.functional.pad(emb, (0, 1, 0, 0))
 60 |     return emb
 61 | 
 62 | 
 63 | class TimestepEmbedding(nn.Module):
 64 |     def __init__(self, channel: int, time_embed_dim: int, act_fn: str = "silu"):
 65 |         super().__init__()
 66 | 
 67 |         self.linear_1 = nn.Linear(channel, time_embed_dim)
 68 |         self.act = None
 69 |         if act_fn == "silu":
 70 |             self.act = nn.SiLU()
 71 |         self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim)
 72 | 
 73 |     def forward(self, sample):
 74 |         sample = self.linear_1(sample)
 75 | 
 76 |         if self.act is not None:
 77 |             sample = self.act(sample)
 78 | 
 79 |         sample = self.linear_2(sample)
 80 |         return sample
 81 | 
 82 | 
 83 | class Timesteps(nn.Module):
 84 |     def __init__(self, num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float):
 85 |         super().__init__()
 86 |         self.num_channels = num_channels
 87 |         self.flip_sin_to_cos = flip_sin_to_cos
 88 |         self.downscale_freq_shift = downscale_freq_shift
 89 | 
 90 |     def forward(self, timesteps):
 91 |         t_emb = get_timestep_embedding(
 92 |             timesteps,
 93 |             self.num_channels,
 94 |             flip_sin_to_cos=self.flip_sin_to_cos,
 95 |             downscale_freq_shift=self.downscale_freq_shift,
 96 |         )
 97 |         return t_emb
 98 | 
 99 | 
100 | class GaussianFourierProjection(nn.Module):
101 |     """Gaussian Fourier embeddings for noise levels."""
102 | 
103 |     def __init__(self, embedding_size: int = 256, scale: float = 1.0):
104 |         super().__init__()
105 |         self.weight = nn.Parameter(torch.randn(embedding_size) * scale, requires_grad=False)
106 | 
107 |         # to delete later
108 |         self.W = nn.Parameter(torch.randn(embedding_size) * scale, requires_grad=False)
109 | 
110 |         self.weight = self.W
111 | 
112 |     def forward(self, x):
113 |         x = torch.log(x)
114 |         x_proj = x[:, None] * self.weight[None, :] * 2 * np.pi
115 |         out = torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1)
116 |         return out
117 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from ..utils import is_onnx_available, is_transformers_available
 2 | from .ddim import DDIMPipeline
 3 | from .ddpm import DDPMPipeline
 4 | from .latent_diffusion_uncond import LDMPipeline
 5 | from .pndm import PNDMPipeline
 6 | from .score_sde_ve import ScoreSdeVePipeline
 7 | from .stochastic_karras_ve import KarrasVePipeline
 8 | 
 9 | 
10 | if is_transformers_available():
11 |     from .latent_diffusion import LDMTextToImagePipeline
12 |     from .stable_diffusion import (
13 |         StableDiffusionImg2ImgPipeline,
14 |         StableDiffusionInpaintPipeline,
15 |         StableDiffusionPipeline,
16 |     )
17 | 
18 | if is_transformers_available() and is_onnx_available():
19 |     from .stable_diffusion import StableDiffusionOnnxPipeline
20 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/ddim/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_ddim import DDIMPipeline
3 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/ddim/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/ddim/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/ddim/__pycache__/pipeline_ddim.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/ddim/__pycache__/pipeline_ddim.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/ddim/pipeline_ddim.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | 
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | import warnings
 18 | from typing import Optional, Tuple, Union
 19 | 
 20 | import torch
 21 | 
 22 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 23 | 
 24 | 
 25 | class DDIMPipeline(DiffusionPipeline):
 26 |     r"""
 27 |     This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
 28 |     library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
 29 | 
 30 |     Parameters:
 31 |         unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image.
 32 |         scheduler ([`SchedulerMixin`]):
 33 |             A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of
 34 |             [`DDPMScheduler`], or [`DDIMScheduler`].
 35 |     """
 36 | 
 37 |     def __init__(self, unet, scheduler):
 38 |         super().__init__()
 39 |         scheduler = scheduler.set_format("pt")
 40 |         self.register_modules(unet=unet, scheduler=scheduler)
 41 | 
 42 |     @torch.no_grad()
 43 |     def __call__(
 44 |         self,
 45 |         batch_size: int = 1,
 46 |         generator: Optional[torch.Generator] = None,
 47 |         eta: float = 0.0,
 48 |         num_inference_steps: int = 50,
 49 |         output_type: Optional[str] = "pil",
 50 |         return_dict: bool = True,
 51 |         **kwargs,
 52 |     ) -> Union[ImagePipelineOutput, Tuple]:
 53 |         r"""
 54 |         Args:
 55 |             batch_size (`int`, *optional*, defaults to 1):
 56 |                 The number of images to generate.
 57 |             generator (`torch.Generator`, *optional*):
 58 |                 A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
 59 |                 deterministic.
 60 |             eta (`float`, *optional*, defaults to 0.0):
 61 |                 The eta parameter which controls the scale of the variance (0 is DDIM and 1 is one type of DDPM).
 62 |             num_inference_steps (`int`, *optional*, defaults to 50):
 63 |                 The number of denoising steps. More denoising steps usually lead to a higher quality image at the
 64 |                 expense of slower inference.
 65 |             output_type (`str`, *optional*, defaults to `"pil"`):
 66 |                 The output format of the generate image. Choose between
 67 |                 [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`.
 68 |             return_dict (`bool`, *optional*, defaults to `True`):
 69 |                 Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple.
 70 | 
 71 |         Returns:
 72 |             [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if
 73 |             `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the
 74 |             generated images.
 75 |         """
 76 | 
 77 |         if "torch_device" in kwargs:
 78 |             device = kwargs.pop("torch_device")
 79 |             warnings.warn(
 80 |                 "`torch_device` is deprecated as an input argument to `__call__` and will be removed in v0.3.0."
 81 |                 " Consider using `pipe.to(torch_device)` instead."
 82 |             )
 83 | 
 84 |             # Set device as before (to be removed in 0.3.0)
 85 |             if device is None:
 86 |                 device = "cuda" if torch.cuda.is_available() else "cpu"
 87 |             self.to(device)
 88 | 
 89 |         # eta corresponds to η in paper and should be between [0, 1]
 90 | 
 91 |         # Sample gaussian noise to begin loop
 92 |         image = torch.randn(
 93 |             (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
 94 |             generator=generator,
 95 |         )
 96 |         image = image.to(self.device)
 97 | 
 98 |         # set step values
 99 |         self.scheduler.set_timesteps(num_inference_steps)
100 | 
101 |         for t in self.progress_bar(self.scheduler.timesteps):
102 |             # 1. predict noise model_output
103 |             model_output = self.unet(image, t).sample
104 | 
105 |             # 2. predict previous mean of image x_t-1 and add variance depending on eta
106 |             # do x_t -> x_t-1
107 |             image = self.scheduler.step(model_output, t, image, eta).prev_sample
108 | 
109 |         image = (image / 2 + 0.5).clamp(0, 1)
110 |         image = image.cpu().permute(0, 2, 3, 1).numpy()
111 |         if output_type == "pil":
112 |             image = self.numpy_to_pil(image)
113 | 
114 |         if not return_dict:
115 |             return (image,)
116 | 
117 |         return ImagePipelineOutput(images=image)
118 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/ddpm/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_ddpm import DDPMPipeline
3 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/ddpm/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/ddpm/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/ddpm/__pycache__/pipeline_ddpm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/ddpm/__pycache__/pipeline_ddpm.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/ddpm/pipeline_ddpm.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | 
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | import warnings
 18 | from typing import Optional, Tuple, Union
 19 | 
 20 | import torch
 21 | 
 22 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 23 | 
 24 | 
 25 | class DDPMPipeline(DiffusionPipeline):
 26 |     r"""
 27 |     This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
 28 |     library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
 29 | 
 30 |     Parameters:
 31 |         unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image.
 32 |         scheduler ([`SchedulerMixin`]):
 33 |             A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of
 34 |             [`DDPMScheduler`], or [`DDIMScheduler`].
 35 |     """
 36 | 
 37 |     def __init__(self, unet, scheduler):
 38 |         super().__init__()
 39 |         scheduler = scheduler.set_format("pt")
 40 |         self.register_modules(unet=unet, scheduler=scheduler)
 41 | 
 42 |     @torch.no_grad()
 43 |     def __call__(
 44 |         self,
 45 |         batch_size: int = 1,
 46 |         generator: Optional[torch.Generator] = None,
 47 |         output_type: Optional[str] = "pil",
 48 |         return_dict: bool = True,
 49 |         **kwargs,
 50 |     ) -> Union[ImagePipelineOutput, Tuple]:
 51 |         r"""
 52 |         Args:
 53 |             batch_size (`int`, *optional*, defaults to 1):
 54 |                 The number of images to generate.
 55 |             generator (`torch.Generator`, *optional*):
 56 |                 A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
 57 |                 deterministic.
 58 |             output_type (`str`, *optional*, defaults to `"pil"`):
 59 |                 The output format of the generate image. Choose between
 60 |                 [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`.
 61 |             return_dict (`bool`, *optional*, defaults to `True`):
 62 |                 Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple.
 63 | 
 64 |         Returns:
 65 |             [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if
 66 |             `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the
 67 |             generated images.
 68 |         """
 69 |         if "torch_device" in kwargs:
 70 |             device = kwargs.pop("torch_device")
 71 |             warnings.warn(
 72 |                 "`torch_device` is deprecated as an input argument to `__call__` and will be removed in v0.3.0."
 73 |                 " Consider using `pipe.to(torch_device)` instead."
 74 |             )
 75 | 
 76 |             # Set device as before (to be removed in 0.3.0)
 77 |             if device is None:
 78 |                 device = "cuda" if torch.cuda.is_available() else "cpu"
 79 |             self.to(device)
 80 | 
 81 |         # Sample gaussian noise to begin loop
 82 |         image = torch.randn(
 83 |             (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
 84 |             generator=generator,
 85 |         )
 86 |         image = image.to(self.device)
 87 | 
 88 |         # set step values
 89 |         self.scheduler.set_timesteps(1000)
 90 | 
 91 |         for t in self.progress_bar(self.scheduler.timesteps):
 92 |             # 1. predict noise model_output
 93 |             model_output = self.unet(image, t).sample
 94 | 
 95 |             # 2. compute previous image: x_t -> t_t-1
 96 |             image = self.scheduler.step(model_output, t, image, generator=generator).prev_sample
 97 | 
 98 |         image = (image / 2 + 0.5).clamp(0, 1)
 99 |         image = image.cpu().permute(0, 2, 3, 1).numpy()
100 |         if output_type == "pil":
101 |             image = self.numpy_to_pil(image)
102 | 
103 |         if not return_dict:
104 |             return (image,)
105 | 
106 |         return ImagePipelineOutput(images=image)
107 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/latent_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from ...utils import is_transformers_available
3 | 
4 | 
5 | if is_transformers_available():
6 |     from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline
7 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/latent_diffusion/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/latent_diffusion/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/latent_diffusion/__pycache__/pipeline_latent_diffusion.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/latent_diffusion/__pycache__/pipeline_latent_diffusion.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/latent_diffusion_uncond/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_latent_diffusion_uncond import LDMPipeline
3 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/latent_diffusion_uncond/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/latent_diffusion_uncond/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/latent_diffusion_uncond/__pycache__/pipeline_latent_diffusion_uncond.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/latent_diffusion_uncond/__pycache__/pipeline_latent_diffusion_uncond.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import warnings
  3 | from typing import Optional, Tuple, Union
  4 | 
  5 | import torch
  6 | 
  7 | from ...models import UNet2DModel, VQModel
  8 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
  9 | from ...schedulers import DDIMScheduler
 10 | 
 11 | 
 12 | class LDMPipeline(DiffusionPipeline):
 13 |     r"""
 14 |     This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
 15 |     library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
 16 | 
 17 |     Parameters:
 18 |         vqvae ([`VQModel`]):
 19 |             Vector-quantized (VQ) Model to encode and decode images to and from latent representations.
 20 |         unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image latents.
 21 |         scheduler ([`SchedulerMixin`]):
 22 |             [`DDIMScheduler`] is to be used in combination with `unet` to denoise the encoded image latens.
 23 |     """
 24 | 
 25 |     def __init__(self, vqvae: VQModel, unet: UNet2DModel, scheduler: DDIMScheduler):
 26 |         super().__init__()
 27 |         scheduler = scheduler.set_format("pt")
 28 |         self.register_modules(vqvae=vqvae, unet=unet, scheduler=scheduler)
 29 | 
 30 |     @torch.no_grad()
 31 |     def __call__(
 32 |         self,
 33 |         batch_size: int = 1,
 34 |         generator: Optional[torch.Generator] = None,
 35 |         eta: float = 0.0,
 36 |         num_inference_steps: int = 50,
 37 |         output_type: Optional[str] = "pil",
 38 |         return_dict: bool = True,
 39 |         **kwargs,
 40 |     ) -> Union[Tuple, ImagePipelineOutput]:
 41 | 
 42 |         r"""
 43 |         Args:
 44 |             batch_size (`int`, *optional*, defaults to 1):
 45 |                 Number of images to generate.
 46 |             generator (`torch.Generator`, *optional*):
 47 |                 A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
 48 |                 deterministic.
 49 |             num_inference_steps (`int`, *optional*, defaults to 50):
 50 |                 The number of denoising steps. More denoising steps usually lead to a higher quality image at the
 51 |                 expense of slower inference.
 52 |             output_type (`str`, *optional*, defaults to `"pil"`):
 53 |                 The output format of the generate image. Choose between
 54 |                 [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`.
 55 |             return_dict (`bool`, *optional*, defaults to `True`):
 56 |                 Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple.
 57 | 
 58 |         Returns:
 59 |             [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if
 60 |             `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the
 61 |             generated images.
 62 |         """
 63 | 
 64 |         if "torch_device" in kwargs:
 65 |             device = kwargs.pop("torch_device")
 66 |             warnings.warn(
 67 |                 "`torch_device` is deprecated as an input argument to `__call__` and will be removed in v0.3.0."
 68 |                 " Consider using `pipe.to(torch_device)` instead."
 69 |             )
 70 | 
 71 |             # Set device as before (to be removed in 0.3.0)
 72 |             if device is None:
 73 |                 device = "cuda" if torch.cuda.is_available() else "cpu"
 74 |             self.to(device)
 75 | 
 76 |         latents = torch.randn(
 77 |             (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
 78 |             generator=generator,
 79 |         )
 80 |         latents = latents.to(self.device)
 81 | 
 82 |         self.scheduler.set_timesteps(num_inference_steps)
 83 | 
 84 |         # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
 85 |         accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
 86 | 
 87 |         extra_kwargs = {}
 88 |         if accepts_eta:
 89 |             extra_kwargs["eta"] = eta
 90 | 
 91 |         for t in self.progress_bar(self.scheduler.timesteps):
 92 |             # predict the noise residual
 93 |             noise_prediction = self.unet(latents, t).sample
 94 |             # compute the previous noisy sample x_t -> x_t-1
 95 |             latents = self.scheduler.step(noise_prediction, t, latents, **extra_kwargs).prev_sample
 96 | 
 97 |         # decode the image latents with the VAE
 98 |         image = self.vqvae.decode(latents).sample
 99 | 
100 |         image = (image / 2 + 0.5).clamp(0, 1)
101 |         image = image.cpu().permute(0, 2, 3, 1).numpy()
102 |         if output_type == "pil":
103 |             image = self.numpy_to_pil(image)
104 | 
105 |         if not return_dict:
106 |             return (image,)
107 | 
108 |         return ImagePipelineOutput(images=image)
109 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/pndm/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_pndm import PNDMPipeline
3 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/pndm/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/pndm/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/pndm/__pycache__/pipeline_pndm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/pndm/__pycache__/pipeline_pndm.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/pndm/pipeline_pndm.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | 
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | import warnings
 18 | from typing import Optional, Tuple, Union
 19 | 
 20 | import torch
 21 | 
 22 | from ...models import UNet2DModel
 23 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
 24 | from ...schedulers import PNDMScheduler
 25 | 
 26 | 
 27 | class PNDMPipeline(DiffusionPipeline):
 28 |     r"""
 29 |     This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
 30 |     library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
 31 | 
 32 |     Parameters:
 33 |         unet (`UNet2DModel`): U-Net architecture to denoise the encoded image latents.
 34 |         scheduler ([`SchedulerMixin`]):
 35 |             The `PNDMScheduler` to be used in combination with `unet` to denoise the encoded image.
 36 |     """
 37 | 
 38 |     unet: UNet2DModel
 39 |     scheduler: PNDMScheduler
 40 | 
 41 |     def __init__(self, unet: UNet2DModel, scheduler: PNDMScheduler):
 42 |         super().__init__()
 43 |         scheduler = scheduler.set_format("pt")
 44 |         self.register_modules(unet=unet, scheduler=scheduler)
 45 | 
 46 |     @torch.no_grad()
 47 |     def __call__(
 48 |         self,
 49 |         batch_size: int = 1,
 50 |         num_inference_steps: int = 50,
 51 |         generator: Optional[torch.Generator] = None,
 52 |         output_type: Optional[str] = "pil",
 53 |         return_dict: bool = True,
 54 |         **kwargs,
 55 |     ) -> Union[ImagePipelineOutput, Tuple]:
 56 |         r"""
 57 |         Args:
 58 |             batch_size (`int`, `optional`, defaults to 1): The number of images to generate.
 59 |             num_inference_steps (`int`, `optional`, defaults to 50):
 60 |                 The number of denoising steps. More denoising steps usually lead to a higher quality image at the
 61 |                 expense of slower inference.
 62 |             generator (`torch.Generator`, `optional`): A [torch
 63 |                 generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
 64 |                 deterministic.
 65 |             output_type (`str`, `optional`, defaults to `"pil"`): The output format of the generate image. Choose
 66 |                 between [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`.
 67 |             return_dict (`bool`, `optional`, defaults to `True`): Whether or not to return a
 68 |                 [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple.
 69 | 
 70 |         Returns:
 71 |             [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if
 72 |             `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the
 73 |             generated images.
 74 |         """
 75 |         # For more information on the sampling method you can take a look at Algorithm 2 of
 76 |         # the official paper: https://arxiv.org/pdf/2202.09778.pdf
 77 | 
 78 |         if "torch_device" in kwargs:
 79 |             device = kwargs.pop("torch_device")
 80 |             warnings.warn(
 81 |                 "`torch_device` is deprecated as an input argument to `__call__` and will be removed in v0.3.0."
 82 |                 " Consider using `pipe.to(torch_device)` instead."
 83 |             )
 84 | 
 85 |             # Set device as before (to be removed in 0.3.0)
 86 |             if device is None:
 87 |                 device = "cuda" if torch.cuda.is_available() else "cpu"
 88 |             self.to(device)
 89 | 
 90 |         # Sample gaussian noise to begin loop
 91 |         image = torch.randn(
 92 |             (batch_size, self.unet.in_channels, self.unet.sample_size, self.unet.sample_size),
 93 |             generator=generator,
 94 |         )
 95 |         image = image.to(self.device)
 96 | 
 97 |         self.scheduler.set_timesteps(num_inference_steps)
 98 |         for t in self.progress_bar(self.scheduler.timesteps):
 99 |             model_output = self.unet(image, t).sample
100 | 
101 |             image = self.scheduler.step(model_output, t, image).prev_sample
102 | 
103 |         image = (image / 2 + 0.5).clamp(0, 1)
104 |         image = image.cpu().permute(0, 2, 3, 1).numpy()
105 |         if output_type == "pil":
106 |             image = self.numpy_to_pil(image)
107 | 
108 |         if not return_dict:
109 |             return (image,)
110 | 
111 |         return ImagePipelineOutput(images=image)
112 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/score_sde_ve/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_score_sde_ve import ScoreSdeVePipeline
3 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/score_sde_ve/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/score_sde_ve/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/score_sde_ve/__pycache__/pipeline_score_sde_ve.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/score_sde_ve/__pycache__/pipeline_score_sde_ve.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/score_sde_ve/pipeline_score_sde_ve.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import warnings
  3 | from typing import Optional, Tuple, Union
  4 | 
  5 | import torch
  6 | 
  7 | from ...models import UNet2DModel
  8 | from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
  9 | from ...schedulers import ScoreSdeVeScheduler
 10 | 
 11 | 
 12 | class ScoreSdeVePipeline(DiffusionPipeline):
 13 |     r"""
 14 |     Parameters:
 15 |     This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
 16 |     library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
 17 |         unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image. scheduler ([`SchedulerMixin`]):
 18 |             The [`ScoreSdeVeScheduler`] scheduler to be used in combination with `unet` to denoise the encoded image.
 19 |     """
 20 |     unet: UNet2DModel
 21 |     scheduler: ScoreSdeVeScheduler
 22 | 
 23 |     def __init__(self, unet: UNet2DModel, scheduler: DiffusionPipeline):
 24 |         super().__init__()
 25 |         self.register_modules(unet=unet, scheduler=scheduler)
 26 | 
 27 |     @torch.no_grad()
 28 |     def __call__(
 29 |         self,
 30 |         batch_size: int = 1,
 31 |         num_inference_steps: int = 2000,
 32 |         generator: Optional[torch.Generator] = None,
 33 |         output_type: Optional[str] = "pil",
 34 |         return_dict: bool = True,
 35 |         **kwargs,
 36 |     ) -> Union[ImagePipelineOutput, Tuple]:
 37 |         r"""
 38 |         Args:
 39 |             batch_size (`int`, *optional*, defaults to 1):
 40 |                 The number of images to generate.
 41 |             generator (`torch.Generator`, *optional*):
 42 |                 A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
 43 |                 deterministic.
 44 |             output_type (`str`, *optional*, defaults to `"pil"`):
 45 |                 The output format of the generate image. Choose between
 46 |                 [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `nd.array`.
 47 |             return_dict (`bool`, *optional*, defaults to `True`):
 48 |                 Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple.
 49 | 
 50 |         Returns:
 51 |             [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if
 52 |             `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the
 53 |             generated images.
 54 |         """
 55 | 
 56 |         if "torch_device" in kwargs:
 57 |             device = kwargs.pop("torch_device")
 58 |             warnings.warn(
 59 |                 "`torch_device` is deprecated as an input argument to `__call__` and will be removed in v0.3.0."
 60 |                 " Consider using `pipe.to(torch_device)` instead."
 61 |             )
 62 | 
 63 |             # Set device as before (to be removed in 0.3.0)
 64 |             if device is None:
 65 |                 device = "cuda" if torch.cuda.is_available() else "cpu"
 66 |             self.to(device)
 67 | 
 68 |         img_size = self.unet.config.sample_size
 69 |         shape = (batch_size, 3, img_size, img_size)
 70 | 
 71 |         model = self.unet
 72 | 
 73 |         sample = torch.randn(*shape, generator=generator) * self.scheduler.config.sigma_max
 74 |         sample = sample.to(self.device)
 75 | 
 76 |         self.scheduler.set_timesteps(num_inference_steps)
 77 |         self.scheduler.set_sigmas(num_inference_steps)
 78 | 
 79 |         for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)):
 80 |             sigma_t = self.scheduler.sigmas[i] * torch.ones(shape[0], device=self.device)
 81 | 
 82 |             # correction step
 83 |             for _ in range(self.scheduler.correct_steps):
 84 |                 model_output = self.unet(sample, sigma_t).sample
 85 |                 sample = self.scheduler.step_correct(model_output, sample, generator=generator).prev_sample
 86 | 
 87 |             # prediction step
 88 |             model_output = model(sample, sigma_t).sample
 89 |             output = self.scheduler.step_pred(model_output, t, sample, generator=generator)
 90 | 
 91 |             sample, sample_mean = output.prev_sample, output.prev_sample_mean
 92 | 
 93 |         sample = sample_mean.clamp(0, 1)
 94 |         sample = sample.cpu().permute(0, 2, 3, 1).numpy()
 95 |         if output_type == "pil":
 96 |             sample = self.numpy_to_pil(sample)
 97 | 
 98 |         if not return_dict:
 99 |             return (sample,)
100 | 
101 |         return ImagePipelineOutput(images=sample)
102 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/stable_diffusion/__init__.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List, Union
 3 | 
 4 | import numpy as np
 5 | 
 6 | import PIL
 7 | from PIL import Image
 8 | 
 9 | from ...utils import BaseOutput, is_onnx_available, is_transformers_available
10 | 
11 | 
12 | @dataclass
13 | class StableDiffusionPipelineOutput(BaseOutput):
14 |     """
15 |     Output class for Stable Diffusion pipelines.
16 | 
17 |     Args:
18 |         images (`List[PIL.Image.Image]` or `np.ndarray`)
19 |             List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
20 |             num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
21 |         nsfw_content_detected (`List[bool]`)
22 |             List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work"
23 |             (nsfw) content.
24 |     """
25 | 
26 |     images: Union[List[PIL.Image.Image], np.ndarray]
27 |     nsfw_content_detected: List[bool]
28 | 
29 | 
30 | if is_transformers_available():
31 |     from .pipeline_stable_diffusion import StableDiffusionPipeline
32 |     from .pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline
33 |     from .pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline
34 |     from .safety_checker import StableDiffusionSafetyChecker
35 | 
36 | if is_transformers_available() and is_onnx_available():
37 |     from .pipeline_stable_diffusion_onnx import StableDiffusionOnnxPipeline
38 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_img2img.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_img2img.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_inpaint.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/pipeline_stable_diffusion_inpaint.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/safety_checker.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stable_diffusion/__pycache__/safety_checker.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/stable_diffusion/safety_checker.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel
  6 | 
  7 | from ...utils import logging
  8 | 
  9 | 
 10 | logger = logging.get_logger(__name__)
 11 | 
 12 | 
 13 | def cosine_distance(image_embeds, text_embeds):
 14 |     normalized_image_embeds = nn.functional.normalize(image_embeds)
 15 |     normalized_text_embeds = nn.functional.normalize(text_embeds)
 16 |     return torch.mm(normalized_image_embeds, normalized_text_embeds.t())
 17 | 
 18 | 
 19 | class StableDiffusionSafetyChecker(PreTrainedModel):
 20 |     config_class = CLIPConfig
 21 | 
 22 |     def __init__(self, config: CLIPConfig):
 23 |         super().__init__(config)
 24 | 
 25 |         self.vision_model = CLIPVisionModel(config.vision_config)
 26 |         self.visual_projection = nn.Linear(config.vision_config.hidden_size, config.projection_dim, bias=False)
 27 | 
 28 |         self.concept_embeds = nn.Parameter(torch.ones(17, config.projection_dim), requires_grad=False)
 29 |         self.special_care_embeds = nn.Parameter(torch.ones(3, config.projection_dim), requires_grad=False)
 30 | 
 31 |         self.register_buffer("concept_embeds_weights", torch.ones(17))
 32 |         self.register_buffer("special_care_embeds_weights", torch.ones(3))
 33 | 
 34 |     @torch.no_grad()
 35 |     def forward(self, clip_input, images):
 36 |         pooled_output = self.vision_model(clip_input)[1]  # pooled_output
 37 |         image_embeds = self.visual_projection(pooled_output)
 38 | 
 39 |         special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds).cpu().numpy()
 40 |         cos_dist = cosine_distance(image_embeds, self.concept_embeds).cpu().numpy()
 41 | 
 42 |         result = []
 43 |         batch_size = image_embeds.shape[0]
 44 |         for i in range(batch_size):
 45 |             result_img = {"special_scores": {}, "special_care": [], "concept_scores": {}, "bad_concepts": []}
 46 | 
 47 |             # increase this value to create a stronger `nfsw` filter
 48 |             # at the cost of increasing the possibility of filtering benign images
 49 |             adjustment = 0.0
 50 | 
 51 |             for concet_idx in range(len(special_cos_dist[0])):
 52 |                 concept_cos = special_cos_dist[i][concet_idx]
 53 |                 concept_threshold = self.special_care_embeds_weights[concet_idx].item()
 54 |                 result_img["special_scores"][concet_idx] = round(concept_cos - concept_threshold + adjustment, 3)
 55 |                 if result_img["special_scores"][concet_idx] > 0:
 56 |                     result_img["special_care"].append({concet_idx, result_img["special_scores"][concet_idx]})
 57 |                     adjustment = 0.01
 58 | 
 59 |             for concet_idx in range(len(cos_dist[0])):
 60 |                 concept_cos = cos_dist[i][concet_idx]
 61 |                 concept_threshold = self.concept_embeds_weights[concet_idx].item()
 62 |                 result_img["concept_scores"][concet_idx] = round(concept_cos - concept_threshold + adjustment, 3)
 63 |                 if result_img["concept_scores"][concet_idx] > 0:
 64 |                     result_img["bad_concepts"].append(concet_idx)
 65 | 
 66 |             result.append(result_img)
 67 | 
 68 |         has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result]
 69 | 
 70 |         for idx, has_nsfw_concept in enumerate(has_nsfw_concepts):
 71 |             if has_nsfw_concept:
 72 |                 images[idx] = np.zeros(images[idx].shape)  # black image
 73 | 
 74 |         if any(has_nsfw_concepts):
 75 |             logger.warning(
 76 |                 "Potential NSFW content was detected in one or more images. A black image will be returned instead."
 77 |                 " Try again with a different prompt and/or seed."
 78 |             )
 79 | 
 80 |         return images, has_nsfw_concepts
 81 | 
 82 |     @torch.inference_mode()
 83 |     def forward_onnx(self, clip_input: torch.FloatTensor, images: torch.FloatTensor):
 84 |         pooled_output = self.vision_model(clip_input)[1]  # pooled_output
 85 |         image_embeds = self.visual_projection(pooled_output)
 86 | 
 87 |         special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds)
 88 |         cos_dist = cosine_distance(image_embeds, self.concept_embeds)
 89 | 
 90 |         # increase this value to create a stronger `nsfw` filter
 91 |         # at the cost of increasing the possibility of filtering benign images
 92 |         adjustment = 0.0
 93 | 
 94 |         special_scores = special_cos_dist - self.special_care_embeds_weights + adjustment
 95 |         # special_scores = special_scores.round(decimals=3)
 96 |         special_care = torch.any(special_scores > 0, dim=1)
 97 |         special_adjustment = special_care * 0.01
 98 |         special_adjustment = special_adjustment.unsqueeze(1).expand(-1, cos_dist.shape[1])
 99 | 
100 |         concept_scores = (cos_dist - self.concept_embeds_weights) + special_adjustment
101 |         # concept_scores = concept_scores.round(decimals=3)
102 |         has_nsfw_concepts = torch.any(concept_scores > 0, dim=1)
103 | 
104 |         images[has_nsfw_concepts] = 0.0  # black image
105 | 
106 |         return images, has_nsfw_concepts
107 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/stochastic_karras_ve/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .pipeline_stochastic_karras_ve import KarrasVePipeline
3 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/stochastic_karras_ve/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stochastic_karras_ve/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/pipelines/stochastic_karras_ve/__pycache__/pipeline_stochastic_karras_ve.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/pipelines/stochastic_karras_ve/__pycache__/pipeline_stochastic_karras_ve.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/schedulers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from ..utils import is_scipy_available
16 | from .scheduling_ddim import DDIMScheduler
17 | from .scheduling_ddpm import DDPMScheduler
18 | from .scheduling_karras_ve import KarrasVeScheduler
19 | from .scheduling_pndm import PNDMScheduler
20 | from .scheduling_sde_ve import ScoreSdeVeScheduler
21 | from .scheduling_sde_vp import ScoreSdeVpScheduler
22 | from .scheduling_utils import SchedulerMixin
23 | 
24 | 
25 | if is_scipy_available():
26 |     from .scheduling_lms_discrete import LMSDiscreteScheduler
27 | else:
28 |     from ..utils.dummy_scipy_objects import *  # noqa F403
29 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/schedulers/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/schedulers/__pycache__/scheduling_ddim.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_ddim.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/schedulers/__pycache__/scheduling_ddpm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_ddpm.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/schedulers/__pycache__/scheduling_karras_ve.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_karras_ve.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/schedulers/__pycache__/scheduling_lms_discrete.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_lms_discrete.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/schedulers/__pycache__/scheduling_pndm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_pndm.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/schedulers/__pycache__/scheduling_sde_ve.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_sde_ve.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/schedulers/__pycache__/scheduling_sde_vp.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_sde_vp.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/schedulers/__pycache__/scheduling_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/schedulers/__pycache__/scheduling_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/schedulers/scheduling_sde_vp.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch
16 | 
17 | # TODO(Patrick, Anton, Suraj) - make scheduler framework indepedent and clean-up a bit
18 | 
19 | import numpy as np
20 | import torch
21 | 
22 | from ..configuration_utils import ConfigMixin, register_to_config
23 | from .scheduling_utils import SchedulerMixin
24 | 
25 | 
26 | class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin):
27 |     """
28 |     The variance preserving stochastic differential equation (SDE) scheduler.
29 | 
30 |     [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
31 |     function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
32 |     [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and
33 |     [`~ConfigMixin.from_config`] functios.
34 | 
35 |     For more information, see the original paper: https://arxiv.org/abs/2011.13456
36 | 
37 |     UNDER CONSTRUCTION
38 | 
39 |     """
40 | 
41 |     @register_to_config
42 |     def __init__(self, num_train_timesteps=2000, beta_min=0.1, beta_max=20, sampling_eps=1e-3, tensor_format="np"):
43 | 
44 |         self.sigmas = None
45 |         self.discrete_sigmas = None
46 |         self.timesteps = None
47 | 
48 |     def set_timesteps(self, num_inference_steps):
49 |         self.timesteps = torch.linspace(1, self.config.sampling_eps, num_inference_steps)
50 | 
51 |     def step_pred(self, score, x, t):
52 |         if self.timesteps is None:
53 |             raise ValueError(
54 |                 "`self.timesteps` is not set, you need to run 'set_timesteps' after creating the scheduler"
55 |             )
56 | 
57 |         # TODO(Patrick) better comments + non-PyTorch
58 |         # postprocess model score
59 |         log_mean_coeff = (
60 |             -0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min
61 |         )
62 |         std = torch.sqrt(1.0 - torch.exp(2.0 * log_mean_coeff))
63 |         score = -score / std[:, None, None, None]
64 | 
65 |         # compute
66 |         dt = -1.0 / len(self.timesteps)
67 | 
68 |         beta_t = self.config.beta_min + t * (self.config.beta_max - self.config.beta_min)
69 |         drift = -0.5 * beta_t[:, None, None, None] * x
70 |         diffusion = torch.sqrt(beta_t)
71 |         drift = drift - diffusion[:, None, None, None] ** 2 * score
72 |         x_mean = x + drift * dt
73 | 
74 |         # add noise
75 |         noise = torch.randn_like(x)
76 |         x = x_mean + diffusion[:, None, None, None] * np.sqrt(-dt) * noise
77 | 
78 |         return x, x_mean
79 | 
80 |     def __len__(self):
81 |         return self.config.num_train_timesteps
82 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/schedulers/scheduling_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from dataclasses import dataclass
 15 | from typing import Union
 16 | 
 17 | import numpy as np
 18 | import torch
 19 | 
 20 | from ..utils import BaseOutput
 21 | 
 22 | 
 23 | SCHEDULER_CONFIG_NAME = "scheduler_config.json"
 24 | 
 25 | 
 26 | @dataclass
 27 | class SchedulerOutput(BaseOutput):
 28 |     """
 29 |     Base class for the scheduler's step function output.
 30 | 
 31 |     Args:
 32 |         prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
 33 |             Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
 34 |             denoising loop.
 35 |     """
 36 | 
 37 |     prev_sample: torch.FloatTensor
 38 | 
 39 | 
 40 | class SchedulerMixin:
 41 |     """
 42 |     Mixin containing common functions for the schedulers.
 43 |     """
 44 | 
 45 |     config_name = SCHEDULER_CONFIG_NAME
 46 |     ignore_for_config = ["tensor_format"]
 47 | 
 48 |     def set_format(self, tensor_format="pt"):
 49 |         self.tensor_format = tensor_format
 50 |         if tensor_format == "pt":
 51 |             for key, value in vars(self).items():
 52 |                 if isinstance(value, np.ndarray):
 53 |                     setattr(self, key, torch.from_numpy(value))
 54 | 
 55 |         return self
 56 | 
 57 |     def clip(self, tensor, min_value=None, max_value=None):
 58 |         tensor_format = getattr(self, "tensor_format", "pt")
 59 | 
 60 |         if tensor_format == "np":
 61 |             return np.clip(tensor, min_value, max_value)
 62 |         elif tensor_format == "pt":
 63 |             return torch.clamp(tensor, min_value, max_value)
 64 | 
 65 |         raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.")
 66 | 
 67 |     def log(self, tensor):
 68 |         tensor_format = getattr(self, "tensor_format", "pt")
 69 | 
 70 |         if tensor_format == "np":
 71 |             return np.log(tensor)
 72 |         elif tensor_format == "pt":
 73 |             return torch.log(tensor)
 74 | 
 75 |         raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.")
 76 | 
 77 |     def match_shape(self, values: Union[np.ndarray, torch.Tensor], broadcast_array: Union[np.ndarray, torch.Tensor]):
 78 |         """
 79 |         Turns a 1-D array into an array or tensor with len(broadcast_array.shape) dims.
 80 | 
 81 |         Args:
 82 |             values: an array or tensor of values to extract.
 83 |             broadcast_array: an array with a larger shape of K dimensions with the batch
 84 |                 dimension equal to the length of timesteps.
 85 |         Returns:
 86 |             a tensor of shape [batch_size, 1, ...] where the shape has K dims.
 87 |         """
 88 | 
 89 |         tensor_format = getattr(self, "tensor_format", "pt")
 90 |         values = values.flatten()
 91 | 
 92 |         while len(values.shape) < len(broadcast_array.shape):
 93 |             values = values[..., None]
 94 |         if tensor_format == "pt":
 95 |             values = values.to(broadcast_array.device)
 96 | 
 97 |         return values
 98 | 
 99 |     def norm(self, tensor):
100 |         tensor_format = getattr(self, "tensor_format", "pt")
101 |         if tensor_format == "np":
102 |             return np.linalg.norm(tensor)
103 |         elif tensor_format == "pt":
104 |             return torch.norm(tensor.reshape(tensor.shape[0], -1), dim=-1).mean()
105 | 
106 |         raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.")
107 | 
108 |     def randn_like(self, tensor, generator=None):
109 |         tensor_format = getattr(self, "tensor_format", "pt")
110 |         if tensor_format == "np":
111 |             return np.random.randn(*np.shape(tensor))
112 |         elif tensor_format == "pt":
113 |             # return torch.randn_like(tensor)
114 |             return torch.randn(tensor.shape, layout=tensor.layout, generator=generator).to(tensor.device)
115 | 
116 |         raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.")
117 | 
118 |     def zeros_like(self, tensor):
119 |         tensor_format = getattr(self, "tensor_format", "pt")
120 |         if tensor_format == "np":
121 |             return np.zeros_like(tensor)
122 |         elif tensor_format == "pt":
123 |             return torch.zeros_like(tensor)
124 | 
125 |         raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.")
126 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/testing_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import unittest
 4 | from distutils.util import strtobool
 5 | 
 6 | import torch
 7 | 
 8 | from packaging import version
 9 | 
10 | 
11 | global_rng = random.Random()
12 | torch_device = "cuda" if torch.cuda.is_available() else "cpu"
13 | is_torch_higher_equal_than_1_12 = version.parse(version.parse(torch.__version__).base_version) >= version.parse("1.12")
14 | 
15 | if is_torch_higher_equal_than_1_12:
16 |     torch_device = "mps" if torch.backends.mps.is_available() else torch_device
17 | 
18 | 
19 | def parse_flag_from_env(key, default=False):
20 |     try:
21 |         value = os.environ[key]
22 |     except KeyError:
23 |         # KEY isn't set, default to `default`.
24 |         _value = default
25 |     else:
26 |         # KEY is set, convert it to True or False.
27 |         try:
28 |             _value = strtobool(value)
29 |         except ValueError:
30 |             # More values are supported, but let's keep the message simple.
31 |             raise ValueError(f"If set, {key} must be yes or no.")
32 |     return _value
33 | 
34 | 
35 | _run_slow_tests = parse_flag_from_env("RUN_SLOW", default=False)
36 | 
37 | 
38 | def floats_tensor(shape, scale=1.0, rng=None, name=None):
39 |     """Creates a random float32 tensor"""
40 |     if rng is None:
41 |         rng = global_rng
42 | 
43 |     total_dims = 1
44 |     for dim in shape:
45 |         total_dims *= dim
46 | 
47 |     values = []
48 |     for _ in range(total_dims):
49 |         values.append(rng.random() * scale)
50 | 
51 |     return torch.tensor(data=values, dtype=torch.float).view(shape).contiguous()
52 | 
53 | 
54 | def slow(test_case):
55 |     """
56 |     Decorator marking a test as slow.
57 | 
58 |     Slow tests are skipped by default. Set the RUN_SLOW environment variable to a truthy value to run them.
59 | 
60 |     """
61 |     return unittest.skipUnless(_run_slow_tests, "test is slow")(test_case)
62 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/training_utils.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import os
  3 | import random
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | 
  8 | 
  9 | def enable_full_determinism(seed: int):
 10 |     """
 11 |     Helper function for reproducible behavior during distributed training. See
 12 |     - https://pytorch.org/docs/stable/notes/randomness.html for pytorch
 13 |     """
 14 |     # set seed first
 15 |     set_seed(seed)
 16 | 
 17 |     #  Enable PyTorch deterministic mode. This potentially requires either the environment
 18 |     #  variable 'CUDA_LAUNCH_BLOCKING' or 'CUBLAS_WORKSPACE_CONFIG' to be set,
 19 |     # depending on the CUDA version, so we set them both here
 20 |     os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
 21 |     os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"
 22 |     torch.use_deterministic_algorithms(True)
 23 | 
 24 |     # Enable CUDNN deterministic mode
 25 |     torch.backends.cudnn.deterministic = True
 26 |     torch.backends.cudnn.benchmark = False
 27 | 
 28 | 
 29 | def set_seed(seed: int):
 30 |     """
 31 |     Args:
 32 |     Helper function for reproducible behavior to set the seed in `random`, `numpy`, `torch`.
 33 |         seed (`int`): The seed to set.
 34 |     """
 35 |     random.seed(seed)
 36 |     np.random.seed(seed)
 37 |     torch.manual_seed(seed)
 38 |     torch.cuda.manual_seed_all(seed)
 39 |     # ^^ safe to call this function even if cuda is not available
 40 | 
 41 | 
 42 | class EMAModel:
 43 |     """
 44 |     Exponential Moving Average of models weights
 45 |     """
 46 | 
 47 |     def __init__(
 48 |         self,
 49 |         model,
 50 |         update_after_step=0,
 51 |         inv_gamma=1.0,
 52 |         power=2 / 3,
 53 |         min_value=0.0,
 54 |         max_value=0.9999,
 55 |         device=None,
 56 |     ):
 57 |         """
 58 |         @crowsonkb's notes on EMA Warmup:
 59 |             If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are good values for models you plan
 60 |             to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999 at 1M steps),
 61 |             gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999
 62 |             at 215.4k steps).
 63 |         Args:
 64 |             inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1.
 65 |             power (float): Exponential factor of EMA warmup. Default: 2/3.
 66 |             min_value (float): The minimum EMA decay rate. Default: 0.
 67 |         """
 68 | 
 69 |         self.averaged_model = copy.deepcopy(model).eval()
 70 |         self.averaged_model.requires_grad_(False)
 71 | 
 72 |         self.update_after_step = update_after_step
 73 |         self.inv_gamma = inv_gamma
 74 |         self.power = power
 75 |         self.min_value = min_value
 76 |         self.max_value = max_value
 77 | 
 78 |         if device is not None:
 79 |             self.averaged_model = self.averaged_model.to(device=device)
 80 | 
 81 |         self.decay = 0.0
 82 |         self.optimization_step = 0
 83 | 
 84 |     def get_decay(self, optimization_step):
 85 |         """
 86 |         Compute the decay factor for the exponential moving average.
 87 |         """
 88 |         step = max(0, optimization_step - self.update_after_step - 1)
 89 |         value = 1 - (1 + step / self.inv_gamma) ** -self.power
 90 | 
 91 |         if step <= 0:
 92 |             return 0.0
 93 | 
 94 |         return max(self.min_value, min(value, self.max_value))
 95 | 
 96 |     @torch.no_grad()
 97 |     def step(self, new_model):
 98 |         ema_state_dict = {}
 99 |         ema_params = self.averaged_model.state_dict()
100 | 
101 |         self.decay = self.get_decay(self.optimization_step)
102 | 
103 |         for key, param in new_model.named_parameters():
104 |             if isinstance(param, dict):
105 |                 continue
106 |             try:
107 |                 ema_param = ema_params[key]
108 |             except KeyError:
109 |                 ema_param = param.float().clone() if param.ndim == 1 else copy.deepcopy(param)
110 |                 ema_params[key] = ema_param
111 | 
112 |             if not param.requires_grad:
113 |                 ema_params[key].copy_(param.to(dtype=ema_param.dtype).data)
114 |                 ema_param = ema_params[key]
115 |             else:
116 |                 ema_param.mul_(self.decay)
117 |                 ema_param.add_(param.data.to(dtype=ema_param.dtype), alpha=1 - self.decay)
118 | 
119 |             ema_state_dict[key] = ema_param
120 | 
121 |         for key, param in new_model.named_buffers():
122 |             ema_state_dict[key] = param
123 | 
124 |         self.averaged_model.load_state_dict(ema_state_dict, strict=False)
125 |         self.optimization_step += 1
126 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import os
17 | 
18 | from .import_utils import (
19 |     ENV_VARS_TRUE_AND_AUTO_VALUES,
20 |     ENV_VARS_TRUE_VALUES,
21 |     USE_JAX,
22 |     USE_TF,
23 |     USE_TORCH,
24 |     DummyObject,
25 |     is_flax_available,
26 |     is_inflect_available,
27 |     is_modelcards_available,
28 |     is_onnx_available,
29 |     is_scipy_available,
30 |     is_tf_available,
31 |     is_torch_available,
32 |     is_transformers_available,
33 |     is_unidecode_available,
34 |     requires_backends,
35 | )
36 | from .logging import get_logger
37 | from .outputs import BaseOutput
38 | 
39 | 
40 | logger = get_logger(__name__)
41 | 
42 | 
43 | hf_cache_home = os.path.expanduser(
44 |     os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface"))
45 | )
46 | default_cache_path = os.path.join(hf_cache_home, "diffusers")
47 | 
48 | 
49 | CONFIG_NAME = "config.json"
50 | HUGGINGFACE_CO_RESOLVE_ENDPOINT = "https://huggingface.co"
51 | DIFFUSERS_CACHE = default_cache_path
52 | DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules"
53 | HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules"))
54 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/utils/__pycache__/dummy_transformers_and_onnx_objects.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/utils/__pycache__/dummy_transformers_and_onnx_objects.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/utils/__pycache__/import_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/utils/__pycache__/import_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/utils/__pycache__/logging.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/utils/__pycache__/logging.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/utils/__pycache__/outputs.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image-editing/my_diffusers/utils/__pycache__/outputs.cpython-310.pyc


--------------------------------------------------------------------------------
/image-editing/my_diffusers/utils/dummy_scipy_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | 
 4 | from ..utils import DummyObject, requires_backends
 5 | 
 6 | 
 7 | class LMSDiscreteScheduler(metaclass=DummyObject):
 8 |     _backends = ["scipy"]
 9 | 
10 |     def __init__(self, *args, **kwargs):
11 |         requires_backends(self, ["scipy"])
12 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/utils/dummy_transformers_and_inflect_and_unidecode_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | from ..utils import DummyObject, requires_backends
 4 | 
 5 | 
 6 | class GradTTSPipeline(metaclass=DummyObject):
 7 |     _backends = ["transformers", "inflect", "unidecode"]
 8 | 
 9 |     def __init__(self, *args, **kwargs):
10 |         requires_backends(self, ["transformers", "inflect", "unidecode"])
11 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/utils/dummy_transformers_and_onnx_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | 
 4 | from ..utils import DummyObject, requires_backends
 5 | 
 6 | 
 7 | class StableDiffusionOnnxPipeline(metaclass=DummyObject):
 8 |     _backends = ["transformers", "onnx"]
 9 | 
10 |     def __init__(self, *args, **kwargs):
11 |         requires_backends(self, ["transformers", "onnx"])
12 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/utils/dummy_transformers_objects.py:
--------------------------------------------------------------------------------
 1 | # This file is autogenerated by the command `make fix-copies`, do not edit.
 2 | # flake8: noqa
 3 | 
 4 | from ..utils import DummyObject, requires_backends
 5 | 
 6 | 
 7 | class LDMTextToImagePipeline(metaclass=DummyObject):
 8 |     _backends = ["transformers"]
 9 | 
10 |     def __init__(self, *args, **kwargs):
11 |         requires_backends(self, ["transformers"])
12 | 
13 | 
14 | class StableDiffusionImg2ImgPipeline(metaclass=DummyObject):
15 |     _backends = ["transformers"]
16 | 
17 |     def __init__(self, *args, **kwargs):
18 |         requires_backends(self, ["transformers"])
19 | 
20 | 
21 | class StableDiffusionInpaintPipeline(metaclass=DummyObject):
22 |     _backends = ["transformers"]
23 | 
24 |     def __init__(self, *args, **kwargs):
25 |         requires_backends(self, ["transformers"])
26 | 
27 | 
28 | class StableDiffusionPipeline(metaclass=DummyObject):
29 |     _backends = ["transformers"]
30 | 
31 |     def __init__(self, *args, **kwargs):
32 |         requires_backends(self, ["transformers"])
33 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/utils/model_card_template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | {{ card_data }}
 3 | ---
 4 | 
 5 | <!-- This model card has been generated automatically according to the information the training script had access to. You
 6 | should probably proofread and complete it, then remove this comment. -->
 7 | 
 8 | # {{ model_name | default("Diffusion Model") }}
 9 | 
10 | ## Model description
11 | 
12 | This diffusion model is trained with the [🤗 Diffusers](https://github.com/huggingface/diffusers) library 
13 | on the `{{ dataset_name }}` dataset.
14 | 
15 | ## Intended uses & limitations
16 | 
17 | #### How to use
18 | 
19 | ```python
20 | # TODO: add an example code snippet for running this diffusion pipeline
21 | ```
22 | 
23 | #### Limitations and bias
24 | 
25 | [TODO: provide examples of latent issues and potential remediations]
26 | 
27 | ## Training data
28 | 
29 | [TODO: describe the data used to train the model]
30 | 
31 | ### Training hyperparameters
32 | 
33 | The following hyperparameters were used during training:
34 | - learning_rate: {{ learning_rate }}
35 | - train_batch_size: {{ train_batch_size }}
36 | - eval_batch_size: {{ eval_batch_size }}
37 | - gradient_accumulation_steps: {{ gradient_accumulation_steps }}
38 | - optimizer: AdamW with betas=({{ adam_beta1 }}, {{ adam_beta2 }}), weight_decay={{ adam_weight_decay }} and epsilon={{ adam_epsilon }}
39 | - lr_scheduler: {{ lr_scheduler }}
40 | - lr_warmup_steps: {{ lr_warmup_steps }}
41 | - ema_inv_gamma: {{ ema_inv_gamma }}
42 | - ema_inv_gamma: {{ ema_power }}
43 | - ema_inv_gamma: {{ ema_max_decay }}
44 | - mixed_precision: {{ mixed_precision }}
45 | 
46 | ### Training results
47 | 
48 | 📈 [TensorBoard logs](https://huggingface.co/{{ repo_name }}/tensorboard?#scalars)
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/image-editing/my_diffusers/utils/outputs.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | Generic utilities
 16 | """
 17 | 
 18 | import warnings
 19 | from collections import OrderedDict
 20 | from dataclasses import fields
 21 | from typing import Any, Tuple
 22 | 
 23 | import numpy as np
 24 | 
 25 | from .import_utils import is_torch_available
 26 | 
 27 | 
 28 | def is_tensor(x):
 29 |     """
 30 |     Tests if `x` is a `torch.Tensor` or `np.ndarray`.
 31 |     """
 32 |     if is_torch_available():
 33 |         import torch
 34 | 
 35 |         if isinstance(x, torch.Tensor):
 36 |             return True
 37 | 
 38 |     return isinstance(x, np.ndarray)
 39 | 
 40 | 
 41 | class BaseOutput(OrderedDict):
 42 |     """
 43 |     Base class for all model outputs as dataclass. Has a `__getitem__` that allows indexing by integer or slice (like a
 44 |     tuple) or strings (like a dictionary) that will ignore the `None` attributes. Otherwise behaves like a regular
 45 |     python dictionary.
 46 | 
 47 |     <Tip warning={true}>
 48 | 
 49 |     You can't unpack a `BaseOutput` directly. Use the [`~utils.BaseOutput.to_tuple`] method to convert it to a tuple
 50 |     before.
 51 | 
 52 |     </Tip>
 53 |     """
 54 | 
 55 |     def __post_init__(self):
 56 |         class_fields = fields(self)
 57 | 
 58 |         # Safety and consistency checks
 59 |         if not len(class_fields):
 60 |             raise ValueError(f"{self.__class__.__name__} has no fields.")
 61 | 
 62 |         for field in class_fields:
 63 |             v = getattr(self, field.name)
 64 |             if v is not None:
 65 |                 self[field.name] = v
 66 | 
 67 |     def __delitem__(self, *args, **kwargs):
 68 |         raise Exception(f"You cannot use ``__delitem__`` on a {self.__class__.__name__} instance.")
 69 | 
 70 |     def setdefault(self, *args, **kwargs):
 71 |         raise Exception(f"You cannot use ``setdefault`` on a {self.__class__.__name__} instance.")
 72 | 
 73 |     def pop(self, *args, **kwargs):
 74 |         raise Exception(f"You cannot use ``pop`` on a {self.__class__.__name__} instance.")
 75 | 
 76 |     def update(self, *args, **kwargs):
 77 |         raise Exception(f"You cannot use ``update`` on a {self.__class__.__name__} instance.")
 78 | 
 79 |     def __getitem__(self, k):
 80 |         if isinstance(k, str):
 81 |             inner_dict = {k: v for (k, v) in self.items()}
 82 |             if self.__class__.__name__ in ["StableDiffusionPipelineOutput", "ImagePipelineOutput"] and k == "sample":
 83 |                 warnings.warn(
 84 |                     "The keyword 'samples' is deprecated and will be removed in version 0.4.0. Please use `.images` or"
 85 |                     " `'images'` instead.",
 86 |                     DeprecationWarning,
 87 |                 )
 88 |                 return inner_dict["images"]
 89 |             return inner_dict[k]
 90 |         else:
 91 |             return self.to_tuple()[k]
 92 | 
 93 |     def __setattr__(self, name, value):
 94 |         if name in self.keys() and value is not None:
 95 |             # Don't call self.__setitem__ to avoid recursion errors
 96 |             super().__setitem__(name, value)
 97 |         super().__setattr__(name, value)
 98 | 
 99 |     def __setitem__(self, key, value):
100 |         # Will raise a KeyException if needed
101 |         super().__setitem__(key, value)
102 |         # Don't call self.__setattr__ to avoid recursion errors
103 |         super().__setattr__(key, value)
104 | 
105 |     def to_tuple(self) -> Tuple[Any]:
106 |         """
107 |         Convert self to a tuple containing all the attributes/keys that are not `None`.
108 |         """
109 |         return tuple(self[k] for k in self.keys())
110 | 


--------------------------------------------------------------------------------
/image_examples/BDIADDIM_t2i_20pairs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image_examples/BDIADDIM_t2i_20pairs.png


--------------------------------------------------------------------------------
/image_examples/controlnet_BDIA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image_examples/controlnet_BDIA.png


--------------------------------------------------------------------------------
/image_examples/controlnet_BDIA_2nd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image_examples/controlnet_BDIA_2nd.png


--------------------------------------------------------------------------------
/image_examples/controlnet_BDIA_pro.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image_examples/controlnet_BDIA_pro.png


--------------------------------------------------------------------------------
/image_examples/image_editing_cat_lion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image_examples/image_editing_cat_lion.png


--------------------------------------------------------------------------------
/image_examples/woman_editing_2nd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/image_examples/woman_editing_2nd.png


--------------------------------------------------------------------------------
/text-to-image/Readme.md:
--------------------------------------------------------------------------------
1 | We implemented BDIA-DDIM into StableDiffusion V2.  In particular, we introduced an additional file "BDIAddim.py" in the folder of "stablediffusionV2/ldm/models/diffusion" for BDIA-DDIM. 
2 | 
3 | Steps to run the code: 
4 | 1. Download the pretrained model v2-1_512-ema-pruned.ckpt from the following link: https://huggingface.co/stabilityai/stable-diffusion-2-1-base/tree/main, and then put the model to the folder "checkpoints"
5 | 2. Run the python file "sample.py". The geneated images can be found in the "outputs" folder
6 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Stability AI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/__pycache__/image_resize.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/__pycache__/image_resize.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/checkpoints/checkpoints.txt:
--------------------------------------------------------------------------------
1 | Put unCLIP checkpoints here.


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/karlo/decoder_900M_vit_l.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   type: t2i-decoder
 3 |   diffusion_sampler: uniform
 4 |   hparams:
 5 |     image_size: 64
 6 |     num_channels: 320
 7 |     num_res_blocks: 3
 8 |     channel_mult: ''
 9 |     attention_resolutions: 32,16,8
10 |     num_heads: -1
11 |     num_head_channels: 64
12 |     num_heads_upsample: -1
13 |     use_scale_shift_norm: true
14 |     dropout: 0.1
15 |     clip_dim: 768
16 |     clip_emb_mult: 4
17 |     text_ctx: 77
18 |     xf_width: 1536
19 |     xf_layers: 0
20 |     xf_heads: 0
21 |     xf_final_ln: false
22 |     resblock_updown: true
23 |     learn_sigma: true
24 |     text_drop: 0.3
25 |     clip_emb_type: image
26 |     clip_emb_drop: 0.1
27 |     use_plm: true
28 | 
29 | diffusion:
30 |   steps: 1000
31 |   learn_sigma: true
32 |   sigma_small: false
33 |   noise_schedule: squaredcos_cap_v2
34 |   use_kl: false
35 |   predict_xstart: false
36 |   rescale_learned_sigmas: true
37 |   timestep_respacing: ''
38 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/karlo/improved_sr_64_256_1.4B.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   type: improved_sr_64_256
 3 |   diffusion_sampler: uniform
 4 |   hparams:
 5 |     channels: 320
 6 |     depth: 3
 7 |     channels_multiple:
 8 |     - 1
 9 |     - 2
10 |     - 3
11 |     - 4
12 |     dropout: 0.0
13 | 
14 | diffusion:
15 |   steps: 1000
16 |   learn_sigma: false
17 |   sigma_small: true
18 |   noise_schedule: squaredcos_cap_v2
19 |   use_kl: false
20 |   predict_xstart: false
21 |   rescale_learned_sigmas: true
22 |   timestep_respacing: '7'
23 | 
24 | 
25 | sampling:
26 |   timestep_respacing: '7' # fix
27 |   clip_denoise: true
28 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/karlo/prior_1B_vit_l.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   type: prior
 3 |   diffusion_sampler: uniform
 4 |   hparams:
 5 |     text_ctx: 77
 6 |     xf_width: 2048
 7 |     xf_layers: 20
 8 |     xf_heads: 32
 9 |     xf_final_ln: true
10 |     text_drop: 0.2
11 |     clip_dim: 768
12 | 
13 | diffusion:
14 |   steps: 1000
15 |   learn_sigma: false
16 |   sigma_small: true
17 |   noise_schedule: squaredcos_cap_v2
18 |   use_kl: false
19 |   predict_xstart: true
20 |   rescale_learned_sigmas: false
21 |   timestep_respacing: ''
22 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/stable-diffusion/intel/v2-inference-bf16.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2022 Intel Corporation
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | model:
 5 |   base_learning_rate: 1.0e-4
 6 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 7 |   params:
 8 |     linear_start: 0.00085
 9 |     linear_end: 0.0120
10 |     num_timesteps_cond: 1
11 |     log_every_t: 200
12 |     timesteps: 1000
13 |     first_stage_key: "jpg"
14 |     cond_stage_key: "txt"
15 |     image_size: 64
16 |     channels: 4
17 |     cond_stage_trainable: false
18 |     conditioning_key: crossattn
19 |     monitor: val/loss_simple_ema
20 |     scale_factor: 0.18215
21 |     use_ema: False # we set this to false because this is an inference only config
22 | 
23 |     unet_config:
24 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
25 |       params:
26 |         use_checkpoint: False
27 |         use_fp16: False
28 |         use_bf16: True
29 |         image_size: 32 # unused
30 |         in_channels: 4
31 |         out_channels: 4
32 |         model_channels: 320
33 |         attention_resolutions: [ 4, 2, 1 ]
34 |         num_res_blocks: 2
35 |         channel_mult: [ 1, 2, 4, 4 ]
36 |         num_head_channels: 64 # need to fix for flash-attn
37 |         use_spatial_transformer: True
38 |         use_linear_in_transformer: True
39 |         transformer_depth: 1
40 |         context_dim: 1024
41 |         legacy: False
42 | 
43 |     first_stage_config:
44 |       target: ldm.models.autoencoder.AutoencoderKL
45 |       params:
46 |         embed_dim: 4
47 |         monitor: val/rec_loss
48 |         ddconfig:
49 |           #attn_type: "vanilla-xformers"
50 |           double_z: true
51 |           z_channels: 4
52 |           resolution: 256
53 |           in_channels: 3
54 |           out_ch: 3
55 |           ch: 128
56 |           ch_mult:
57 |           - 1
58 |           - 2
59 |           - 4
60 |           - 4
61 |           num_res_blocks: 2
62 |           attn_resolutions: []
63 |           dropout: 0.0
64 |         lossconfig:
65 |           target: torch.nn.Identity
66 | 
67 |     cond_stage_config:
68 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
69 |       params:
70 |         freeze: True
71 |         layer: "penultimate"
72 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/stable-diffusion/intel/v2-inference-fp32.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2022 Intel Corporation
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | model:
 5 |   base_learning_rate: 1.0e-4
 6 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 7 |   params:
 8 |     linear_start: 0.00085
 9 |     linear_end: 0.0120
10 |     num_timesteps_cond: 1
11 |     log_every_t: 200
12 |     timesteps: 1000
13 |     first_stage_key: "jpg"
14 |     cond_stage_key: "txt"
15 |     image_size: 64
16 |     channels: 4
17 |     cond_stage_trainable: false
18 |     conditioning_key: crossattn
19 |     monitor: val/loss_simple_ema
20 |     scale_factor: 0.18215
21 |     use_ema: False # we set this to false because this is an inference only config
22 | 
23 |     unet_config:
24 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
25 |       params:
26 |         use_checkpoint: False
27 |         use_fp16: False
28 |         image_size: 32 # unused
29 |         in_channels: 4
30 |         out_channels: 4
31 |         model_channels: 320
32 |         attention_resolutions: [ 4, 2, 1 ]
33 |         num_res_blocks: 2
34 |         channel_mult: [ 1, 2, 4, 4 ]
35 |         num_head_channels: 64 # need to fix for flash-attn
36 |         use_spatial_transformer: True
37 |         use_linear_in_transformer: True
38 |         transformer_depth: 1
39 |         context_dim: 1024
40 |         legacy: False
41 | 
42 |     first_stage_config:
43 |       target: ldm.models.autoencoder.AutoencoderKL
44 |       params:
45 |         embed_dim: 4
46 |         monitor: val/rec_loss
47 |         ddconfig:
48 |           #attn_type: "vanilla-xformers"
49 |           double_z: true
50 |           z_channels: 4
51 |           resolution: 256
52 |           in_channels: 3
53 |           out_ch: 3
54 |           ch: 128
55 |           ch_mult:
56 |           - 1
57 |           - 2
58 |           - 4
59 |           - 4
60 |           num_res_blocks: 2
61 |           attn_resolutions: []
62 |           dropout: 0.0
63 |         lossconfig:
64 |           target: torch.nn.Identity
65 | 
66 |     cond_stage_config:
67 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
68 |       params:
69 |         freeze: True
70 |         layer: "penultimate"
71 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/stable-diffusion/intel/v2-inference-v-bf16.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2022 Intel Corporation
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | model:
 5 |   base_learning_rate: 1.0e-4
 6 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 7 |   params:
 8 |     parameterization: "v"
 9 |     linear_start: 0.00085
10 |     linear_end: 0.0120
11 |     num_timesteps_cond: 1
12 |     log_every_t: 200
13 |     timesteps: 1000
14 |     first_stage_key: "jpg"
15 |     cond_stage_key: "txt"
16 |     image_size: 64
17 |     channels: 4
18 |     cond_stage_trainable: false
19 |     conditioning_key: crossattn
20 |     monitor: val/loss_simple_ema
21 |     scale_factor: 0.18215
22 |     use_ema: False # we set this to false because this is an inference only config
23 | 
24 |     unet_config:
25 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
26 |       params:
27 |         use_checkpoint: False
28 |         use_fp16: False
29 |         use_bf16: True
30 |         image_size: 32 # unused
31 |         in_channels: 4
32 |         out_channels: 4
33 |         model_channels: 320
34 |         attention_resolutions: [ 4, 2, 1 ]
35 |         num_res_blocks: 2
36 |         channel_mult: [ 1, 2, 4, 4 ]
37 |         num_head_channels: 64 # need to fix for flash-attn
38 |         use_spatial_transformer: True
39 |         use_linear_in_transformer: True
40 |         transformer_depth: 1
41 |         context_dim: 1024
42 |         legacy: False
43 | 
44 |     first_stage_config:
45 |       target: ldm.models.autoencoder.AutoencoderKL
46 |       params:
47 |         embed_dim: 4
48 |         monitor: val/rec_loss
49 |         ddconfig:
50 |           #attn_type: "vanilla-xformers"
51 |           double_z: true
52 |           z_channels: 4
53 |           resolution: 256
54 |           in_channels: 3
55 |           out_ch: 3
56 |           ch: 128
57 |           ch_mult:
58 |           - 1
59 |           - 2
60 |           - 4
61 |           - 4
62 |           num_res_blocks: 2
63 |           attn_resolutions: []
64 |           dropout: 0.0
65 |         lossconfig:
66 |           target: torch.nn.Identity
67 | 
68 |     cond_stage_config:
69 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
70 |       params:
71 |         freeze: True
72 |         layer: "penultimate"
73 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/stable-diffusion/intel/v2-inference-v-fp32.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2022 Intel Corporation
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | model:
 5 |   base_learning_rate: 1.0e-4
 6 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 7 |   params:
 8 |     parameterization: "v"
 9 |     linear_start: 0.00085
10 |     linear_end: 0.0120
11 |     num_timesteps_cond: 1
12 |     log_every_t: 200
13 |     timesteps: 1000
14 |     first_stage_key: "jpg"
15 |     cond_stage_key: "txt"
16 |     image_size: 64
17 |     channels: 4
18 |     cond_stage_trainable: false
19 |     conditioning_key: crossattn
20 |     monitor: val/loss_simple_ema
21 |     scale_factor: 0.18215
22 |     use_ema: False # we set this to false because this is an inference only config
23 | 
24 |     unet_config:
25 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
26 |       params:
27 |         use_checkpoint: False
28 |         use_fp16: False
29 |         image_size: 32 # unused
30 |         in_channels: 4
31 |         out_channels: 4
32 |         model_channels: 320
33 |         attention_resolutions: [ 4, 2, 1 ]
34 |         num_res_blocks: 2
35 |         channel_mult: [ 1, 2, 4, 4 ]
36 |         num_head_channels: 64 # need to fix for flash-attn
37 |         use_spatial_transformer: True
38 |         use_linear_in_transformer: True
39 |         transformer_depth: 1
40 |         context_dim: 1024
41 |         legacy: False
42 | 
43 |     first_stage_config:
44 |       target: ldm.models.autoencoder.AutoencoderKL
45 |       params:
46 |         embed_dim: 4
47 |         monitor: val/rec_loss
48 |         ddconfig:
49 |           #attn_type: "vanilla-xformers"
50 |           double_z: true
51 |           z_channels: 4
52 |           resolution: 256
53 |           in_channels: 3
54 |           out_ch: 3
55 |           ch: 128
56 |           ch_mult:
57 |           - 1
58 |           - 2
59 |           - 4
60 |           - 4
61 |           num_res_blocks: 2
62 |           attn_resolutions: []
63 |           dropout: 0.0
64 |         lossconfig:
65 |           target: torch.nn.Identity
66 | 
67 |     cond_stage_config:
68 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
69 |       params:
70 |         freeze: True
71 |         layer: "penultimate"
72 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/stable-diffusion/v2-1-stable-unclip-h-inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-04
 3 |   target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion
 4 |   params:
 5 |     embedding_dropout: 0.25
 6 |     parameterization: "v"
 7 |     linear_start: 0.00085
 8 |     linear_end: 0.0120
 9 |     log_every_t: 200
10 |     timesteps: 1000
11 |     first_stage_key: "jpg"
12 |     cond_stage_key: "txt"
13 |     image_size: 96
14 |     channels: 4
15 |     cond_stage_trainable: false
16 |     conditioning_key: crossattn-adm
17 |     scale_factor: 0.18215
18 |     monitor: val/loss_simple_ema
19 |     use_ema: False
20 | 
21 |     embedder_config:
22 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder
23 | 
24 |     noise_aug_config:
25 |       target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation
26 |       params:
27 |         timestep_dim: 1024
28 |         noise_schedule_config:
29 |           timesteps: 1000
30 |           beta_schedule: squaredcos_cap_v2
31 | 
32 |     unet_config:
33 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
34 |       params:
35 |         num_classes: "sequential"
36 |         adm_in_channels: 2048
37 |         use_checkpoint: True
38 |         image_size: 32 # unused
39 |         in_channels: 4
40 |         out_channels: 4
41 |         model_channels: 320
42 |         attention_resolutions: [ 4, 2, 1 ]
43 |         num_res_blocks: 2
44 |         channel_mult: [ 1, 2, 4, 4 ]
45 |         num_head_channels: 64 # need to fix for flash-attn
46 |         use_spatial_transformer: True
47 |         use_linear_in_transformer: True
48 |         transformer_depth: 1
49 |         context_dim: 1024
50 |         legacy: False
51 | 
52 |     first_stage_config:
53 |       target: ldm.models.autoencoder.AutoencoderKL
54 |       params:
55 |         embed_dim: 4
56 |         monitor: val/rec_loss
57 |         ddconfig:
58 |           attn_type: "vanilla-xformers"
59 |           double_z: true
60 |           z_channels: 4
61 |           resolution: 256
62 |           in_channels: 3
63 |           out_ch: 3
64 |           ch: 128
65 |           ch_mult:
66 |             - 1
67 |             - 2
68 |             - 4
69 |             - 4
70 |           num_res_blocks: 2
71 |           attn_resolutions: [ ]
72 |           dropout: 0.0
73 |         lossconfig:
74 |           target: torch.nn.Identity
75 | 
76 |     cond_stage_config:
77 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
78 |       params:
79 |         freeze: True
80 |         layer: "penultimate"
81 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/stable-diffusion/v2-1-stable-unclip-l-inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-04
 3 |   target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion
 4 |   params:
 5 |     embedding_dropout: 0.25
 6 |     parameterization: "v"
 7 |     linear_start: 0.00085
 8 |     linear_end: 0.0120
 9 |     log_every_t: 200
10 |     timesteps: 1000
11 |     first_stage_key: "jpg"
12 |     cond_stage_key: "txt"
13 |     image_size: 96
14 |     channels: 4
15 |     cond_stage_trainable: false
16 |     conditioning_key: crossattn-adm
17 |     scale_factor: 0.18215
18 |     monitor: val/loss_simple_ema
19 |     use_ema: False
20 | 
21 |     embedder_config:
22 |       target: ldm.modules.encoders.modules.ClipImageEmbedder
23 |       params:
24 |         model: "ViT-L/14"
25 | 
26 |     noise_aug_config:
27 |       target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation
28 |       params:
29 |         clip_stats_path: "checkpoints/karlo_models/ViT-L-14_stats.th"
30 |         timestep_dim: 768
31 |         noise_schedule_config:
32 |           timesteps: 1000
33 |           beta_schedule: squaredcos_cap_v2
34 | 
35 |     unet_config:
36 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
37 |       params:
38 |         num_classes: "sequential"
39 |         adm_in_channels: 1536
40 |         use_checkpoint: True
41 |         image_size: 32 # unused
42 |         in_channels: 4
43 |         out_channels: 4
44 |         model_channels: 320
45 |         attention_resolutions: [ 4, 2, 1 ]
46 |         num_res_blocks: 2
47 |         channel_mult: [ 1, 2, 4, 4 ]
48 |         num_head_channels: 64 # need to fix for flash-attn
49 |         use_spatial_transformer: True
50 |         use_linear_in_transformer: True
51 |         transformer_depth: 1
52 |         context_dim: 1024
53 |         legacy: False
54 | 
55 |     first_stage_config:
56 |       target: ldm.models.autoencoder.AutoencoderKL
57 |       params:
58 |         embed_dim: 4
59 |         monitor: val/rec_loss
60 |         ddconfig:
61 |           attn_type: "vanilla-xformers"
62 |           double_z: true
63 |           z_channels: 4
64 |           resolution: 256
65 |           in_channels: 3
66 |           out_ch: 3
67 |           ch: 128
68 |           ch_mult:
69 |             - 1
70 |             - 2
71 |             - 4
72 |             - 4
73 |           num_res_blocks: 2
74 |           attn_resolutions: [ ]
75 |           dropout: 0.0
76 |         lossconfig:
77 |           target: torch.nn.Identity
78 | 
79 |     cond_stage_config:
80 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
81 |       params:
82 |         freeze: True
83 |         layer: "penultimate"


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/stable-diffusion/v2-inference-v.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-4
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     parameterization: "v"
 6 |     linear_start: 0.00085
 7 |     linear_end: 0.0120
 8 |     num_timesteps_cond: 1
 9 |     log_every_t: 200
10 |     timesteps: 1000
11 |     first_stage_key: "jpg"
12 |     cond_stage_key: "txt"
13 |     image_size: 64
14 |     channels: 4
15 |     cond_stage_trainable: false
16 |     conditioning_key: crossattn
17 |     monitor: val/loss_simple_ema
18 |     scale_factor: 0.18215
19 |     use_ema: False # we set this to false because this is an inference only config
20 | 
21 |     unet_config:
22 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23 |       params:
24 |         use_checkpoint: True
25 |         use_fp16: False
26 |         image_size: 32 # unused
27 |         in_channels: 4
28 |         out_channels: 4
29 |         model_channels: 320
30 |         attention_resolutions: [ 4, 2, 1 ]
31 |         num_res_blocks: 2
32 |         channel_mult: [ 1, 2, 4, 4 ]
33 |         num_head_channels: 64 # need to fix for flash-attn
34 |         use_spatial_transformer: True
35 |         use_linear_in_transformer: True
36 |         transformer_depth: 1
37 |         context_dim: 1024
38 |         legacy: False
39 | 
40 |     first_stage_config:
41 |       target: ldm.models.autoencoder.AutoencoderKL
42 |       params:
43 |         embed_dim: 4
44 |         monitor: val/rec_loss
45 |         ddconfig:
46 |           #attn_type: "vanilla-xformers"
47 |           double_z: true
48 |           z_channels: 4
49 |           resolution: 256
50 |           in_channels: 3
51 |           out_ch: 3
52 |           ch: 128
53 |           ch_mult:
54 |           - 1
55 |           - 2
56 |           - 4
57 |           - 4
58 |           num_res_blocks: 2
59 |           attn_resolutions: []
60 |           dropout: 0.0
61 |         lossconfig:
62 |           target: torch.nn.Identity
63 | 
64 |     cond_stage_config:
65 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
66 |       params:
67 |         freeze: True
68 |         layer: "penultimate"
69 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/stable-diffusion/v2-inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-4
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "jpg"
11 |     cond_stage_key: "txt"
12 |     image_size: 64
13 |     channels: 4
14 |     cond_stage_trainable: false
15 |     conditioning_key: crossattn
16 |     monitor: val/loss_simple_ema
17 |     scale_factor: 0.18215
18 |     use_ema: False # we set this to false because this is an inference only config
19 | 
20 |     unet_config:
21 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22 |       params:
23 |         use_checkpoint: True
24 |         use_fp16: False
25 |         image_size: 32 # unused
26 |         in_channels: 4
27 |         out_channels: 4
28 |         model_channels: 320
29 |         attention_resolutions: [ 4, 2, 1 ]
30 |         num_res_blocks: 2
31 |         channel_mult: [ 1, 2, 4, 4 ]
32 |         num_head_channels: 64 # need to fix for flash-attn
33 |         use_spatial_transformer: True
34 |         use_linear_in_transformer: True
35 |         transformer_depth: 1
36 |         context_dim: 1024
37 |         legacy: False
38 | 
39 |     first_stage_config:
40 |       target: ldm.models.autoencoder.AutoencoderKL
41 |       params:
42 |         embed_dim: 4
43 |         monitor: val/rec_loss
44 |         ddconfig:
45 |           #attn_type: "vanilla-xformers"
46 |           double_z: true
47 |           z_channels: 4
48 |           resolution: 256
49 |           in_channels: 3
50 |           out_ch: 3
51 |           ch: 128
52 |           ch_mult:
53 |           - 1
54 |           - 2
55 |           - 4
56 |           - 4
57 |           num_res_blocks: 2
58 |           attn_resolutions: []
59 |           dropout: 0.0
60 |         lossconfig:
61 |           target: torch.nn.Identity
62 | 
63 |     cond_stage_config:
64 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65 |       params:
66 |         freeze: True
67 |         layer: "penultimate"
68 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/stable-diffusion/v2-inpainting-inference.yaml:
--------------------------------------------------------------------------------
  1 | model:
  2 |   base_learning_rate: 5.0e-05
  3 |   target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
  4 |   params:
  5 |     linear_start: 0.00085
  6 |     linear_end: 0.0120
  7 |     num_timesteps_cond: 1
  8 |     log_every_t: 200
  9 |     timesteps: 1000
 10 |     first_stage_key: "jpg"
 11 |     cond_stage_key: "txt"
 12 |     image_size: 64
 13 |     channels: 4
 14 |     cond_stage_trainable: false
 15 |     conditioning_key: hybrid
 16 |     scale_factor: 0.18215
 17 |     monitor: val/loss_simple_ema
 18 |     finetune_keys: null
 19 |     use_ema: False
 20 | 
 21 |     unet_config:
 22 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
 23 |       params:
 24 |         use_checkpoint: True
 25 |         image_size: 32 # unused
 26 |         in_channels: 9
 27 |         out_channels: 4
 28 |         model_channels: 320
 29 |         attention_resolutions: [ 4, 2, 1 ]
 30 |         num_res_blocks: 2
 31 |         channel_mult: [ 1, 2, 4, 4 ]
 32 |         num_head_channels: 64 # need to fix for flash-attn
 33 |         use_spatial_transformer: True
 34 |         use_linear_in_transformer: True
 35 |         transformer_depth: 1
 36 |         context_dim: 1024
 37 |         legacy: False
 38 | 
 39 |     first_stage_config:
 40 |       target: ldm.models.autoencoder.AutoencoderKL
 41 |       params:
 42 |         embed_dim: 4
 43 |         monitor: val/rec_loss
 44 |         ddconfig:
 45 |           #attn_type: "vanilla-xformers"
 46 |           double_z: true
 47 |           z_channels: 4
 48 |           resolution: 256
 49 |           in_channels: 3
 50 |           out_ch: 3
 51 |           ch: 128
 52 |           ch_mult:
 53 |             - 1
 54 |             - 2
 55 |             - 4
 56 |             - 4
 57 |           num_res_blocks: 2
 58 |           attn_resolutions: [ ]
 59 |           dropout: 0.0
 60 |         lossconfig:
 61 |           target: torch.nn.Identity
 62 | 
 63 |     cond_stage_config:
 64 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
 65 |       params:
 66 |         freeze: True
 67 |         layer: "penultimate"
 68 | 
 69 | 
 70 | data:
 71 |   target: ldm.data.laion.WebDataModuleFromConfig
 72 |   params:
 73 |     tar_base: null  # for concat as in LAION-A
 74 |     p_unsafe_threshold: 0.1
 75 |     filter_word_list: "data/filters.yaml"
 76 |     max_pwatermark: 0.45
 77 |     batch_size: 8
 78 |     num_workers: 6
 79 |     multinode: True
 80 |     min_size: 512
 81 |     train:
 82 |       shards:
 83 |         - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -"
 84 |         - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -"
 85 |         - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -"
 86 |         - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -"
 87 |         - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -"  #{00000-94333}.tar"
 88 |       shuffle: 10000
 89 |       image_key: jpg
 90 |       image_transforms:
 91 |       - target: torchvision.transforms.Resize
 92 |         params:
 93 |           size: 512
 94 |           interpolation: 3
 95 |       - target: torchvision.transforms.RandomCrop
 96 |         params:
 97 |           size: 512
 98 |       postprocess:
 99 |         target: ldm.data.laion.AddMask
100 |         params:
101 |           mode: "512train-large"
102 |           p_drop: 0.25
103 |     # NOTE use enough shards to avoid empty validation loops in workers
104 |     validation:
105 |       shards:
106 |         - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - "
107 |       shuffle: 0
108 |       image_key: jpg
109 |       image_transforms:
110 |       - target: torchvision.transforms.Resize
111 |         params:
112 |           size: 512
113 |           interpolation: 3
114 |       - target: torchvision.transforms.CenterCrop
115 |         params:
116 |           size: 512
117 |       postprocess:
118 |         target: ldm.data.laion.AddMask
119 |         params:
120 |           mode: "512train-large"
121 |           p_drop: 0.25
122 | 
123 | lightning:
124 |   find_unused_parameters: True
125 |   modelcheckpoint:
126 |     params:
127 |       every_n_train_steps: 5000
128 | 
129 |   callbacks:
130 |     metrics_over_trainsteps_checkpoint:
131 |       params:
132 |         every_n_train_steps: 10000
133 | 
134 |     image_logger:
135 |       target: main.ImageLogger
136 |       params:
137 |         enable_autocast: False
138 |         disabled: False
139 |         batch_frequency: 1000
140 |         max_images: 4
141 |         increase_log_steps: False
142 |         log_first_step: False
143 |         log_images_kwargs:
144 |           use_ema_scope: False
145 |           inpaint: False
146 |           plot_progressive_rows: False
147 |           plot_diffusion_rows: False
148 |           N: 4
149 |           unconditional_guidance_scale: 5.0
150 |           unconditional_guidance_label: [""]
151 |           ddim_steps: 50  # todo check these out for depth2img,
152 |           ddim_eta: 0.0   # todo check these out for depth2img,
153 | 
154 |   trainer:
155 |     benchmark: True
156 |     val_check_interval: 5000000
157 |     num_sanity_val_steps: 0
158 |     accumulate_grad_batches: 1
159 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/stable-diffusion/v2-midas-inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 5.0e-07
 3 |   target: ldm.models.diffusion.ddpm.LatentDepth2ImageDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "jpg"
11 |     cond_stage_key: "txt"
12 |     image_size: 64
13 |     channels: 4
14 |     cond_stage_trainable: false
15 |     conditioning_key: hybrid
16 |     scale_factor: 0.18215
17 |     monitor: val/loss_simple_ema
18 |     finetune_keys: null
19 |     use_ema: False
20 | 
21 |     depth_stage_config:
22 |       target: ldm.modules.midas.api.MiDaSInference
23 |       params:
24 |         model_type: "dpt_hybrid"
25 | 
26 |     unet_config:
27 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
28 |       params:
29 |         use_checkpoint: True
30 |         image_size: 32 # unused
31 |         in_channels: 5
32 |         out_channels: 4
33 |         model_channels: 320
34 |         attention_resolutions: [ 4, 2, 1 ]
35 |         num_res_blocks: 2
36 |         channel_mult: [ 1, 2, 4, 4 ]
37 |         num_head_channels: 64 # need to fix for flash-attn
38 |         use_spatial_transformer: True
39 |         use_linear_in_transformer: True
40 |         transformer_depth: 1
41 |         context_dim: 1024
42 |         legacy: False
43 | 
44 |     first_stage_config:
45 |       target: ldm.models.autoencoder.AutoencoderKL
46 |       params:
47 |         embed_dim: 4
48 |         monitor: val/rec_loss
49 |         ddconfig:
50 |           #attn_type: "vanilla-xformers"
51 |           double_z: true
52 |           z_channels: 4
53 |           resolution: 256
54 |           in_channels: 3
55 |           out_ch: 3
56 |           ch: 128
57 |           ch_mult:
58 |             - 1
59 |             - 2
60 |             - 4
61 |             - 4
62 |           num_res_blocks: 2
63 |           attn_resolutions: [ ]
64 |           dropout: 0.0
65 |         lossconfig:
66 |           target: torch.nn.Identity
67 | 
68 |     cond_stage_config:
69 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
70 |       params:
71 |         freeze: True
72 |         layer: "penultimate"
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/configs/stable-diffusion/x4-upscaling.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-04
 3 |   target: ldm.models.diffusion.ddpm.LatentUpscaleDiffusion
 4 |   params:
 5 |     parameterization: "v"
 6 |     low_scale_key: "lr"
 7 |     linear_start: 0.0001
 8 |     linear_end: 0.02
 9 |     num_timesteps_cond: 1
10 |     log_every_t: 200
11 |     timesteps: 1000
12 |     first_stage_key: "jpg"
13 |     cond_stage_key: "txt"
14 |     image_size: 128
15 |     channels: 4
16 |     cond_stage_trainable: false
17 |     conditioning_key: "hybrid-adm"
18 |     monitor: val/loss_simple_ema
19 |     scale_factor: 0.08333
20 |     use_ema: False
21 | 
22 |     low_scale_config:
23 |       target: ldm.modules.diffusionmodules.upscaling.ImageConcatWithNoiseAugmentation
24 |       params:
25 |         noise_schedule_config: # image space
26 |           linear_start: 0.0001
27 |           linear_end: 0.02
28 |         max_noise_level: 350
29 | 
30 |     unet_config:
31 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
32 |       params:
33 |         use_checkpoint: True
34 |         num_classes: 1000  # timesteps for noise conditioning (here constant, just need one)
35 |         image_size: 128
36 |         in_channels: 7
37 |         out_channels: 4
38 |         model_channels: 256
39 |         attention_resolutions: [ 2,4,8]
40 |         num_res_blocks: 2
41 |         channel_mult: [ 1, 2, 2, 4]
42 |         disable_self_attentions: [True, True, True, False]
43 |         disable_middle_self_attn: False
44 |         num_heads: 8
45 |         use_spatial_transformer: True
46 |         transformer_depth: 1
47 |         context_dim: 1024
48 |         legacy: False
49 |         use_linear_in_transformer: True
50 | 
51 |     first_stage_config:
52 |       target: ldm.models.autoencoder.AutoencoderKL
53 |       params:
54 |         embed_dim: 4
55 |         ddconfig:
56 |           # attn_type: "vanilla-xformers" this model needs efficient attention to be feasible on HR data, also the decoder seems to break in half precision (UNet is fine though)
57 |           double_z: True
58 |           z_channels: 4
59 |           resolution: 256
60 |           in_channels: 3
61 |           out_ch: 3
62 |           ch: 128
63 |           ch_mult: [ 1,2,4 ]  # num_down = len(ch_mult)-1
64 |           num_res_blocks: 2
65 |           attn_resolutions: [ ]
66 |           dropout: 0.0
67 | 
68 |         lossconfig:
69 |           target: torch.nn.Identity
70 | 
71 |     cond_stage_config:
72 |       target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
73 |       params:
74 |         freeze: True
75 |         layer: "penultimate"
76 | 
77 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/cv2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/cv2


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/doc/UNCLIP.MD:
--------------------------------------------------------------------------------
 1 | ### Stable unCLIP
 2 | 
 3 | [unCLIP](https://openai.com/dall-e-2/) is the approach behind OpenAI's [DALL·E 2](https://openai.com/dall-e-2/), 
 4 | trained to invert CLIP image embeddings. 
 5 | We finetuned SD 2.1 to accept a CLIP ViT-L/14 image embedding in addition to the text encodings.
 6 | This means that the model can be used to produce image variations, but can also be combined with a text-to-image
 7 | embedding prior to yield a full text-to-image model at 768x768 resolution.
 8 | 
 9 | If you would like to try a demo of this model on the web, please visit https://clipdrop.co/stable-diffusion-reimagine
10 | 
11 | We provide two models, trained on OpenAI CLIP-L and OpenCLIP-H image embeddings, respectively, 
12 | available from [https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip](https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip/tree/main).
13 | To use them, download from Hugging Face, and put and the weights into the `checkpoints` folder.
14 | 
15 | #### Image Variations
16 | ![image-variations-l-1](../assets/stable-samples/stable-unclip/unclip-variations.png)
17 | 
18 | Diffusers integration 
19 | Stable UnCLIP Image Variations is integrated with the [🧨 diffusers](https://github.com/huggingface/diffusers) library
20 | ```python
21 | #pip install git+https://github.com/huggingface/diffusers.git transformers accelerate
22 | import requests
23 | import torch
24 | from PIL import Image
25 | from io import BytesIO
26 | 
27 | from diffusers import StableUnCLIPImg2ImgPipeline
28 | 
29 | #Start the StableUnCLIP Image variations pipeline
30 | pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
31 |     "stabilityai/stable-diffusion-2-1-unclip", torch_dtype=torch.float16, variation="fp16"
32 | )
33 | pipe = pipe.to("cuda")
34 | 
35 | #Get image from URL
36 | url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/tarsila_do_amaral.png"
37 | response = requests.get(url)
38 | init_image = Image.open(BytesIO(response.content)).convert("RGB")
39 | 
40 | #Pipe to make the variation
41 | images = pipe(init_image).images
42 | images[0].save("tarsila_variation.png")
43 | ```
44 | Check out the [Stable UnCLIP pipeline docs here](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_unclip)
45 | 
46 | Streamlit UI demo
47 | 
48 | ```
49 | streamlit run scripts/streamlit/stableunclip.py
50 | ```
51 | to launch a streamlit script than can be used to make image variations with both models (CLIP-L and OpenCLIP-H).
52 | These models can process a `noise_level`, which specifies an amount of Gaussian noise added to the CLIP embeddings. 
53 | This can be used to increase output variance as in the following examples.
54 | 
55 | ![image-variations-noise](../assets/stable-samples/stable-unclip/unclip-variations_noise.png)
56 | 
57 | 
58 | ### Stable Diffusion Meets Karlo
59 | ![panda](../assets/stable-samples/stable-unclip/panda.jpg) 
60 | 
61 | Recently, [KakaoBrain](https://kakaobrain.com/) openly released [Karlo](https://github.com/kakaobrain/karlo), a pretrained, large-scale replication of [unCLIP](https://arxiv.org/abs/2204.06125).
62 | We introduce _Stable Karlo_, a combination of the Karlo CLIP image embedding prior, and Stable Diffusion v2.1-768.
63 | 
64 | To run the model, first download the KARLO checkpoints
65 | ```shell
66 | mkdir -p checkpoints/karlo_models
67 | cd checkpoints/karlo_models
68 | wget https://arena.kakaocdn.net/brainrepo/models/karlo-public/v1.0.0.alpha/096db1af569b284eb76b3881534822d9/ViT-L-14.pt
69 | wget https://arena.kakaocdn.net/brainrepo/models/karlo-public/v1.0.0.alpha/0b62380a75e56f073e2844ab5199153d/ViT-L-14_stats.th
70 | wget https://arena.kakaocdn.net/brainrepo/models/karlo-public/v1.0.0.alpha/85626483eaca9f581e2a78d31ff905ca/prior-ckpt-step%3D01000000-of-01000000.ckpt
71 | cd ../../
72 | ```
73 | and the finetuned SD2.1 unCLIP-L checkpoint from [here](https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip/blob/main/sd21-unclip-l.ckpt), and put the ckpt into the `checkpoints folder` 
74 | 
75 | Then, run
76 | 
77 | ```
78 | streamlit run scripts/streamlit/stableunclip.py
79 | ```
80 | and pick the `use_karlo` option in the GUI.
81 | The script optionally supports sampling from the full Karlo model. To use it, download the 64x64 decoder and 64->256 upscaler 
82 | via 
83 | ```shell
84 | cd checkpoints/karlo_models
85 | wget https://arena.kakaocdn.net/brainrepo/models/karlo-public/v1.0.0.alpha/efdf6206d8ed593961593dc029a8affa/decoder-ckpt-step%3D01000000-of-01000000.ckpt
86 | wget https://arena.kakaocdn.net/brainrepo/models/karlo-public/v1.0.0.alpha/4226b831ae0279020d134281f3c31590/improved-sr-ckpt-step%3D1.2M.ckpt
87 | cd ../../
88 | ```
89 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: ldm
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 | dependencies:
 6 |   - python=3.8.5
 7 |   - pip=20.3
 8 |   - cudatoolkit=11.3
 9 |   - pytorch=1.12.1
10 |   - torchvision=0.13.1
11 |   - numpy=1.23.1
12 |   - pip:
13 |     - albumentations==1.3.0
14 |     - opencv-python==4.6.0.66
15 |     - imageio==2.9.0
16 |     - imageio-ffmpeg==0.4.2
17 |     - pytorch-lightning==1.4.2
18 |     - omegaconf==2.1.1
19 |     - test-tube>=0.7.5
20 |     - streamlit==1.12.1
21 |     - einops==0.3.0
22 |     - transformers==4.19.2
23 |     - webdataset==0.2.5
24 |     - kornia==0.6
25 |     - open_clip_torch==2.0.2
26 |     - invisible-watermark>=0.1.5
27 |     - streamlit-drawable-canvas==0.8.0
28 |     - torchmetrics==0.6.0
29 |     - -e .
30 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/image_resize.py:
--------------------------------------------------------------------------------
 1 | # bash commands to download the data
 2 | #wget http://images.cocodataset.org/zips/val2014.zip
 3 | #wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip
 4 | #unzip annotations_trainval2014.zip -d coco/
 5 | #unzip val2014.zip -d coco/
 6 | 
 7 | 
 8 | 
 9 | import cv2
10 | import os
11 | import glob
12 | 
13 | def image_resize(path_source, path_des):
14 | 
15 |     if not os.path.exists(path_des):
16 |         os.makedirs(path_des)
17 | 
18 |     fileList = glob.glob(os.path.join(path_source, "*.png"))
19 | 
20 |     for img_file in fileList:
21 |         img = cv2.imread(img_file).astype(float)/255
22 |         hei, width, _ = img.shape
23 | 
24 | 
25 |         dim = min(hei, width)
26 |         resized = cv2.resize(img, (int(width*256/dim),int(hei*256/dim)), interpolation = cv2.INTER_AREA)
27 |         
28 |         img_name = img_file.split('/')[-1].split('.')[0]
29 |         cv2.imwrite(os.path.join(path_des,img_name+".jpg"),(resized*255).astype("uint8"))      
30 |         
31 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/__init__.py:
--------------------------------------------------------------------------------
1 | from .util import *


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/__pycache__/util.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/__pycache__/util.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/data/__init__.py


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/data/util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ldm.modules.midas.api import load_midas_transform
 4 | 
 5 | 
 6 | class AddMiDaS(object):
 7 |     def __init__(self, model_type):
 8 |         super().__init__()
 9 |         self.transform = load_midas_transform(model_type)
10 | 
11 |     def pt2np(self, x):
12 |         x = ((x + 1.0) * .5).detach().cpu().numpy()
13 |         return x
14 | 
15 |     def np2pt(self, x):
16 |         x = torch.from_numpy(x) * 2 - 1.
17 |         return x
18 | 
19 |     def __call__(self, sample):
20 |         # sample['jpg'] is tensor hwc in [-1, 1] at this point
21 |         x = self.pt2np(sample['jpg'])
22 |         x = self.transform({"image": x})["image"]
23 |         sample['midas_in'] = x
24 |         return sample


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/__pycache__/autoencoder.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/__pycache__/autoencoder.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__init__.py


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/ABDIAddim.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/ABDIAddim.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/BDIAddim.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/BDIAddim.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/BDIAddim_backup_2nd.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/BDIAddim_backup_2nd.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/BDIAddimv2.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/BDIAddimv2.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/HIBDIAddim.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/HIBDIAddim.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/IIAddim.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/IIAddim.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/ddim.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/ddim.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/ddpm.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/ddpm.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/plms.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/plms.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/sampling_util.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/__pycache__/sampling_util.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__init__.py:
--------------------------------------------------------------------------------
1 | from .sampler import DPMSolverSampler


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__pycache__/dpm_solver.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__pycache__/dpm_solver.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__pycache__/sampler.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/__pycache__/sampler.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/dpm_solver/sampler.py:
--------------------------------------------------------------------------------
 1 | """SAMPLING ONLY."""
 2 | import torch
 3 | 
 4 | from .dpm_solver import NoiseScheduleVP, model_wrapper, DPM_Solver
 5 | 
 6 | MODEL_TYPES = {
 7 |     "eps": "noise",
 8 |     "v": "v"
 9 | }
10 | 
11 | 
12 | class DPMSolverSampler(object):
13 |     def __init__(self, model, device=torch.device("cuda"), **kwargs):
14 |         super().__init__()
15 |         self.model = model
16 |         self.device = device
17 |         to_torch = lambda x: x.clone().detach().to(torch.float32).to(model.device)
18 |         self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod))
19 | 
20 |     def register_buffer(self, name, attr):
21 |         if type(attr) == torch.Tensor:
22 |             if attr.device != self.device:
23 |                 attr = attr.to(self.device)
24 |         setattr(self, name, attr)
25 | 
26 |     @torch.no_grad()
27 |     def sample(self,
28 |                S,
29 |                batch_size,
30 |                shape,
31 |                conditioning=None,
32 |                callback=None,
33 |                normals_sequence=None,
34 |                img_callback=None,
35 |                quantize_x0=False,
36 |                eta=0.,
37 |                mask=None,
38 |                x0=None,
39 |                temperature=1.,
40 |                noise_dropout=0.,
41 |                score_corrector=None,
42 |                corrector_kwargs=None,
43 |                verbose=True,
44 |                x_T=None,
45 |                log_every_t=100,
46 |                unconditional_guidance_scale=1.,
47 |                unconditional_conditioning=None,
48 |                # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
49 |                **kwargs
50 |                ):
51 | 
52 |         if conditioning is not None:
53 |             if isinstance(conditioning, dict):
54 |                 ctmp = conditioning[list(conditioning.keys())[0]]
55 |                 while isinstance(ctmp, list): ctmp = ctmp[0]
56 |                 if isinstance(ctmp, torch.Tensor):
57 |                     cbs = ctmp.shape[0]
58 |                     if cbs != batch_size:
59 |                         print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
60 |             elif isinstance(conditioning, list):
61 |                 for ctmp in conditioning:
62 |                     if ctmp.shape[0] != batch_size:
63 |                         print(f"Warning: Got {ctmp.shape[0]} conditionings but batch-size is {batch_size}")
64 |             else:
65 |                 if isinstance(conditioning, torch.Tensor):
66 |                     if conditioning.shape[0] != batch_size:
67 |                         print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
68 | 
69 |         # sampling
70 |         C, H, W = shape
71 |         size = (batch_size, C, H, W)
72 | 
73 |         print(f'Data shape for DPM-Solver sampling is {size}, sampling steps {S}')
74 | 
75 |         device = self.model.betas.device
76 |         if x_T is None:
77 |             img = torch.randn(size, device=device)
78 |         else:
79 |             img = x_T
80 | 
81 |         ns = NoiseScheduleVP('discrete', alphas_cumprod=self.alphas_cumprod)
82 | 
83 | 
84 |         model_fn = model_wrapper(
85 |             lambda x, t, c: self.model.apply_model(x, t, c),
86 |             ns,
87 |             model_type=MODEL_TYPES[self.model.parameterization],
88 |             guidance_type="classifier-free",
89 |             condition=conditioning,
90 |             unconditional_condition=unconditional_conditioning,
91 |             guidance_scale=unconditional_guidance_scale,
92 |         )
93 |         
94 |         dpm_solver = DPM_Solver(model_fn, ns, predict_x0=True, thresholding=False)
95 |         x = dpm_solver.sample(img, steps=S, skip_type="time_uniform", method="multistep", order=2,
96 |                               lower_order_final=True)
97 | 
98 |         return x.to(device), None
99 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/models/diffusion/sampling_util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | def append_dims(x, target_dims):
 6 |     """Appends dimensions to the end of a tensor until it has target_dims dimensions.
 7 |     From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py"""
 8 |     dims_to_append = target_dims - x.ndim
 9 |     if dims_to_append < 0:
10 |         raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less')
11 |     return x[(...,) + (None,) * dims_to_append]
12 | 
13 | 
14 | def norm_thresholding(x0, value):
15 |     s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim)
16 |     return x0 * (value / s)
17 | 
18 | 
19 | def spatial_norm_thresholding(x0, value):
20 |     # b c h w
21 |     s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value)
22 |     return x0 * (value / s)


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/__pycache__/attention.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/__pycache__/attention.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/__pycache__/ema.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/__pycache__/ema.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/model.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/model.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/openaimodel.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/upscaling.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/upscaling.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/util.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/__pycache__/util.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/diffusionmodules/upscaling.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | from functools import partial
 5 | 
 6 | from ldm.modules.diffusionmodules.util import extract_into_tensor, make_beta_schedule
 7 | from ldm.util import default
 8 | 
 9 | 
10 | class AbstractLowScaleModel(nn.Module):
11 |     # for concatenating a downsampled image to the latent representation
12 |     def __init__(self, noise_schedule_config=None):
13 |         super(AbstractLowScaleModel, self).__init__()
14 |         if noise_schedule_config is not None:
15 |             self.register_schedule(**noise_schedule_config)
16 | 
17 |     def register_schedule(self, beta_schedule="linear", timesteps=1000,
18 |                           linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
19 |         betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end,
20 |                                    cosine_s=cosine_s)
21 |         alphas = 1. - betas
22 |         alphas_cumprod = np.cumprod(alphas, axis=0)
23 |         alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])
24 | 
25 |         timesteps, = betas.shape
26 |         self.num_timesteps = int(timesteps)
27 |         self.linear_start = linear_start
28 |         self.linear_end = linear_end
29 |         assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep'
30 | 
31 |         to_torch = partial(torch.tensor, dtype=torch.float32)
32 | 
33 |         self.register_buffer('betas', to_torch(betas))
34 |         self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
35 |         self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev))
36 | 
37 |         # calculations for diffusion q(x_t | x_{t-1}) and others
38 |         self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod)))
39 |         self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod)))
40 |         self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod)))
41 |         self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod)))
42 |         self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1)))
43 | 
44 |     def q_sample(self, x_start, t, noise=None):
45 |         noise = default(noise, lambda: torch.randn_like(x_start))
46 |         return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start +
47 |                 extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise)
48 | 
49 |     def forward(self, x):
50 |         return x, None
51 | 
52 |     def decode(self, x):
53 |         return x
54 | 
55 | 
56 | class SimpleImageConcat(AbstractLowScaleModel):
57 |     # no noise level conditioning
58 |     def __init__(self):
59 |         super(SimpleImageConcat, self).__init__(noise_schedule_config=None)
60 |         self.max_noise_level = 0
61 | 
62 |     def forward(self, x):
63 |         # fix to constant noise level
64 |         return x, torch.zeros(x.shape[0], device=x.device).long()
65 | 
66 | 
67 | class ImageConcatWithNoiseAugmentation(AbstractLowScaleModel):
68 |     def __init__(self, noise_schedule_config, max_noise_level=1000, to_cuda=False):
69 |         super().__init__(noise_schedule_config=noise_schedule_config)
70 |         self.max_noise_level = max_noise_level
71 | 
72 |     def forward(self, x, noise_level=None):
73 |         if noise_level is None:
74 |             noise_level = torch.randint(0, self.max_noise_level, (x.shape[0],), device=x.device).long()
75 |         else:
76 |             assert isinstance(noise_level, torch.Tensor)
77 |         z = self.q_sample(x, noise_level)
78 |         return z, noise_level
79 | 
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/distributions/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/distributions/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/distributions/__pycache__/distributions.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/distributions/__pycache__/distributions.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/distributions/distributions.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | class AbstractDistribution:
 6 |     def sample(self):
 7 |         raise NotImplementedError()
 8 | 
 9 |     def mode(self):
10 |         raise NotImplementedError()
11 | 
12 | 
13 | class DiracDistribution(AbstractDistribution):
14 |     def __init__(self, value):
15 |         self.value = value
16 | 
17 |     def sample(self):
18 |         return self.value
19 | 
20 |     def mode(self):
21 |         return self.value
22 | 
23 | 
24 | class DiagonalGaussianDistribution(object):
25 |     def __init__(self, parameters, deterministic=False):
26 |         self.parameters = parameters
27 |         self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
28 |         self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
29 |         self.deterministic = deterministic
30 |         self.std = torch.exp(0.5 * self.logvar)
31 |         self.var = torch.exp(self.logvar)
32 |         if self.deterministic:
33 |             self.var = self.std = torch.zeros_like(self.mean).to(device=self.parameters.device)
34 | 
35 |     def sample(self):
36 |         x = self.mean + self.std * torch.randn(self.mean.shape).to(device=self.parameters.device)
37 |         return x
38 | 
39 |     def kl(self, other=None):
40 |         if self.deterministic:
41 |             return torch.Tensor([0.])
42 |         else:
43 |             if other is None:
44 |                 return 0.5 * torch.sum(torch.pow(self.mean, 2)
45 |                                        + self.var - 1.0 - self.logvar,
46 |                                        dim=[1, 2, 3])
47 |             else:
48 |                 return 0.5 * torch.sum(
49 |                     torch.pow(self.mean - other.mean, 2) / other.var
50 |                     + self.var / other.var - 1.0 - self.logvar + other.logvar,
51 |                     dim=[1, 2, 3])
52 | 
53 |     def nll(self, sample, dims=[1,2,3]):
54 |         if self.deterministic:
55 |             return torch.Tensor([0.])
56 |         logtwopi = np.log(2.0 * np.pi)
57 |         return 0.5 * torch.sum(
58 |             logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
59 |             dim=dims)
60 | 
61 |     def mode(self):
62 |         return self.mean
63 | 
64 | 
65 | def normal_kl(mean1, logvar1, mean2, logvar2):
66 |     """
67 |     source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12
68 |     Compute the KL divergence between two gaussians.
69 |     Shapes are automatically broadcasted, so batches can be compared to
70 |     scalars, among other use cases.
71 |     """
72 |     tensor = None
73 |     for obj in (mean1, logvar1, mean2, logvar2):
74 |         if isinstance(obj, torch.Tensor):
75 |             tensor = obj
76 |             break
77 |     assert tensor is not None, "at least one argument must be a Tensor"
78 | 
79 |     # Force variances to be Tensors. Broadcasting helps convert scalars to
80 |     # Tensors, but it does not work for torch.exp().
81 |     logvar1, logvar2 = [
82 |         x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor)
83 |         for x in (logvar1, logvar2)
84 |     ]
85 | 
86 |     return 0.5 * (
87 |         -1.0
88 |         + logvar2
89 |         - logvar1
90 |         + torch.exp(logvar1 - logvar2)
91 |         + ((mean1 - mean2) ** 2) * torch.exp(-logvar2)
92 |     )
93 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/ema.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class LitEma(nn.Module):
 6 |     def __init__(self, model, decay=0.9999, use_num_upates=True):
 7 |         super().__init__()
 8 |         if decay < 0.0 or decay > 1.0:
 9 |             raise ValueError('Decay must be between 0 and 1')
10 | 
11 |         self.m_name2s_name = {}
12 |         self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32))
13 |         self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates
14 |         else torch.tensor(-1, dtype=torch.int))
15 | 
16 |         for name, p in model.named_parameters():
17 |             if p.requires_grad:
18 |                 # remove as '.'-character is not allowed in buffers
19 |                 s_name = name.replace('.', '')
20 |                 self.m_name2s_name.update({name: s_name})
21 |                 self.register_buffer(s_name, p.clone().detach().data)
22 | 
23 |         self.collected_params = []
24 | 
25 |     def reset_num_updates(self):
26 |         del self.num_updates
27 |         self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int))
28 | 
29 |     def forward(self, model):
30 |         decay = self.decay
31 | 
32 |         if self.num_updates >= 0:
33 |             self.num_updates += 1
34 |             decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates))
35 | 
36 |         one_minus_decay = 1.0 - decay
37 | 
38 |         with torch.no_grad():
39 |             m_param = dict(model.named_parameters())
40 |             shadow_params = dict(self.named_buffers())
41 | 
42 |             for key in m_param:
43 |                 if m_param[key].requires_grad:
44 |                     sname = self.m_name2s_name[key]
45 |                     shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
46 |                     shadow_params[sname].sub_(one_minus_decay * (shadow_params[sname] - m_param[key]))
47 |                 else:
48 |                     assert not key in self.m_name2s_name
49 | 
50 |     def copy_to(self, model):
51 |         m_param = dict(model.named_parameters())
52 |         shadow_params = dict(self.named_buffers())
53 |         for key in m_param:
54 |             if m_param[key].requires_grad:
55 |                 m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
56 |             else:
57 |                 assert not key in self.m_name2s_name
58 | 
59 |     def store(self, parameters):
60 |         """
61 |         Save the current parameters for restoring later.
62 |         Args:
63 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
64 |             temporarily stored.
65 |         """
66 |         self.collected_params = [param.clone() for param in parameters]
67 | 
68 |     def restore(self, parameters):
69 |         """
70 |         Restore the parameters stored with the `store` method.
71 |         Useful to validate the model with EMA parameters without affecting the
72 |         original optimization process. Store the parameters before the
73 |         `copy_to` method. After validation (or model saving), use this to
74 |         restore the former parameters.
75 |         Args:
76 |           parameters: Iterable of `torch.nn.Parameter`; the parameters to be
77 |             updated with the stored parameters.
78 |         """
79 |         for c_param, param in zip(self.collected_params, parameters):
80 |             param.data.copy_(c_param.data)
81 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/encoders/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/encoders/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/encoders/__pycache__/modules.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/encoders/__pycache__/modules.cpython-39.pyc


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/image_degradation/__init__.py:
--------------------------------------------------------------------------------
1 | from ldm.modules.image_degradation.bsrgan import degradation_bsrgan_variant as degradation_fn_bsr
2 | from ldm.modules.image_degradation.bsrgan_light import degradation_bsrgan_variant as degradation_fn_bsr_light
3 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/image_degradation/utils/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/image_degradation/utils/test.png


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/karlo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/karlo/__init__.py


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/__init__.py


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/models/__init__.py


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/models/prior_model.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------------
  2 | # Karlo-v1.0.alpha
  3 | # Copyright (c) 2022 KakaoBrain. All Rights Reserved.
  4 | # ------------------------------------------------------------------------------------
  5 | 
  6 | import copy
  7 | import torch
  8 | 
  9 | from ldm.modules.karlo.kakao.modules import create_gaussian_diffusion
 10 | from ldm.modules.karlo.kakao.modules.xf import PriorTransformer
 11 | 
 12 | 
 13 | class PriorDiffusionModel(torch.nn.Module):
 14 |     """
 15 |     A prior that generates clip image feature based on the text prompt.
 16 | 
 17 |     :param config: yaml config to define the decoder.
 18 |     :param tokenizer: tokenizer used in clip.
 19 |     :param clip_mean: mean to normalize the clip image feature (zero-mean, unit variance).
 20 |     :param clip_std: std to noramlize the clip image feature (zero-mean, unit variance).
 21 |     """
 22 | 
 23 |     def __init__(self, config, tokenizer, clip_mean, clip_std):
 24 |         super().__init__()
 25 | 
 26 |         self._conf = config
 27 |         self._model_conf = config.model.hparams
 28 |         self._diffusion_kwargs = dict(
 29 |             steps=config.diffusion.steps,
 30 |             learn_sigma=config.diffusion.learn_sigma,
 31 |             sigma_small=config.diffusion.sigma_small,
 32 |             noise_schedule=config.diffusion.noise_schedule,
 33 |             use_kl=config.diffusion.use_kl,
 34 |             predict_xstart=config.diffusion.predict_xstart,
 35 |             rescale_learned_sigmas=config.diffusion.rescale_learned_sigmas,
 36 |             timestep_respacing=config.diffusion.timestep_respacing,
 37 |         )
 38 |         self._tokenizer = tokenizer
 39 | 
 40 |         self.register_buffer("clip_mean", clip_mean[None, :], persistent=False)
 41 |         self.register_buffer("clip_std", clip_std[None, :], persistent=False)
 42 | 
 43 |         causal_mask = self.get_causal_mask()
 44 |         self.register_buffer("causal_mask", causal_mask, persistent=False)
 45 | 
 46 |         self.model = PriorTransformer(
 47 |             text_ctx=self._model_conf.text_ctx,
 48 |             xf_width=self._model_conf.xf_width,
 49 |             xf_layers=self._model_conf.xf_layers,
 50 |             xf_heads=self._model_conf.xf_heads,
 51 |             xf_final_ln=self._model_conf.xf_final_ln,
 52 |             clip_dim=self._model_conf.clip_dim,
 53 |         )
 54 | 
 55 |         cf_token, cf_mask = self.set_cf_text_tensor()
 56 |         self.register_buffer("cf_token", cf_token, persistent=False)
 57 |         self.register_buffer("cf_mask", cf_mask, persistent=False)
 58 | 
 59 |     @classmethod
 60 |     def load_from_checkpoint(
 61 |         cls, config, tokenizer, clip_mean, clip_std, ckpt_path, strict: bool = True
 62 |     ):
 63 |         ckpt = torch.load(ckpt_path, map_location="cpu")["state_dict"]
 64 | 
 65 |         model = cls(config, tokenizer, clip_mean, clip_std)
 66 |         model.load_state_dict(ckpt, strict=strict)
 67 |         return model
 68 | 
 69 |     def set_cf_text_tensor(self):
 70 |         return self._tokenizer.padded_tokens_and_mask([""], self.model.text_ctx)
 71 | 
 72 |     def get_sample_fn(self, timestep_respacing):
 73 |         use_ddim = timestep_respacing.startswith(("ddim", "fast"))
 74 | 
 75 |         diffusion_kwargs = copy.deepcopy(self._diffusion_kwargs)
 76 |         diffusion_kwargs.update(timestep_respacing=timestep_respacing)
 77 |         diffusion = create_gaussian_diffusion(**diffusion_kwargs)
 78 |         sample_fn = diffusion.ddim_sample_loop if use_ddim else diffusion.p_sample_loop
 79 | 
 80 |         return sample_fn
 81 | 
 82 |     def get_causal_mask(self):
 83 |         seq_len = self._model_conf.text_ctx + 4
 84 |         mask = torch.empty(seq_len, seq_len)
 85 |         mask.fill_(float("-inf"))
 86 |         mask.triu_(1)
 87 |         mask = mask[None, ...]
 88 |         return mask
 89 | 
 90 |     def forward(
 91 |         self,
 92 |         txt_feat,
 93 |         txt_feat_seq,
 94 |         mask,
 95 |         cf_guidance_scales=None,
 96 |         timestep_respacing=None,
 97 |         denoised_fn=True,
 98 |     ):
 99 |         # cfg should be enabled in inference
100 |         assert cf_guidance_scales is not None and all(cf_guidance_scales > 0.0)
101 | 
102 |         bsz_ = txt_feat.shape[0]
103 |         bsz = bsz_ // 2
104 | 
105 |         def guided_model_fn(x_t, ts, **kwargs):
106 |             half = x_t[: len(x_t) // 2]
107 |             combined = torch.cat([half, half], dim=0)
108 |             model_out = self.model(combined, ts, **kwargs)
109 |             eps, rest = (
110 |                 model_out[:, : int(x_t.shape[1])],
111 |                 model_out[:, int(x_t.shape[1]) :],
112 |             )
113 |             cond_eps, uncond_eps = torch.split(eps, len(eps) // 2, dim=0)
114 |             half_eps = uncond_eps + cf_guidance_scales.view(-1, 1) * (
115 |                 cond_eps - uncond_eps
116 |             )
117 |             eps = torch.cat([half_eps, half_eps], dim=0)
118 |             return torch.cat([eps, rest], dim=1)
119 | 
120 |         cond = {
121 |             "text_emb": txt_feat,
122 |             "text_enc": txt_feat_seq,
123 |             "mask": mask,
124 |             "causal_mask": self.causal_mask,
125 |         }
126 |         sample_fn = self.get_sample_fn(timestep_respacing)
127 |         sample = sample_fn(
128 |             guided_model_fn,
129 |             (bsz_, self.model.clip_dim),
130 |             noise=None,
131 |             device=txt_feat.device,
132 |             clip_denoised=False,
133 |             denoised_fn=lambda x: torch.clamp(x, -10, 10),
134 |             model_kwargs=cond,
135 |         )
136 |         sample = (sample * self.clip_std) + self.clip_mean
137 | 
138 |         return sample[:bsz]
139 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/models/sr_256_1k.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------
 2 | # Karlo-v1.0.alpha
 3 | # Copyright (c) 2022 KakaoBrain. All Rights Reserved.
 4 | # ------------------------------------------------------------------------------------
 5 | 
 6 | from ldm.modules.karlo.kakao.models.sr_64_256 import SupRes64to256Progressive
 7 | 
 8 | 
 9 | class SupRes256to1kProgressive(SupRes64to256Progressive):
10 |     pass  # no difference currently
11 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/models/sr_64_256.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------
 2 | # Karlo-v1.0.alpha
 3 | # Copyright (c) 2022 KakaoBrain. All Rights Reserved.
 4 | # ------------------------------------------------------------------------------------
 5 | 
 6 | import copy
 7 | import torch
 8 | 
 9 | from ldm.modules.karlo.kakao.modules.unet import SuperResUNetModel
10 | from ldm.modules.karlo.kakao.modules import create_gaussian_diffusion
11 | 
12 | 
13 | class ImprovedSupRes64to256ProgressiveModel(torch.nn.Module):
14 |     """
15 |     ImprovedSR model fine-tunes the pretrained DDPM-based SR model by using adversarial and perceptual losses.
16 |     In specific, the low-resolution sample is iteratively recovered by 6 steps with the frozen pretrained SR model.
17 |     In the following additional one step, a seperate fine-tuned model recovers high-frequency details.
18 |     This approach greatly improves the fidelity of images of 256x256px, even with small number of reverse steps.
19 |     """
20 | 
21 |     def __init__(self, config):
22 |         super().__init__()
23 | 
24 |         self._config = config
25 |         self._diffusion_kwargs = dict(
26 |             steps=config.diffusion.steps,
27 |             learn_sigma=config.diffusion.learn_sigma,
28 |             sigma_small=config.diffusion.sigma_small,
29 |             noise_schedule=config.diffusion.noise_schedule,
30 |             use_kl=config.diffusion.use_kl,
31 |             predict_xstart=config.diffusion.predict_xstart,
32 |             rescale_learned_sigmas=config.diffusion.rescale_learned_sigmas,
33 |         )
34 | 
35 |         self.model_first_steps = SuperResUNetModel(
36 |             in_channels=3,  # auto-changed to 6 inside the model
37 |             model_channels=config.model.hparams.channels,
38 |             out_channels=3,
39 |             num_res_blocks=config.model.hparams.depth,
40 |             attention_resolutions=(),  # no attention
41 |             dropout=config.model.hparams.dropout,
42 |             channel_mult=config.model.hparams.channels_multiple,
43 |             resblock_updown=True,
44 |             use_middle_attention=False,
45 |         )
46 |         self.model_last_step = SuperResUNetModel(
47 |             in_channels=3,  # auto-changed to 6 inside the model
48 |             model_channels=config.model.hparams.channels,
49 |             out_channels=3,
50 |             num_res_blocks=config.model.hparams.depth,
51 |             attention_resolutions=(),  # no attention
52 |             dropout=config.model.hparams.dropout,
53 |             channel_mult=config.model.hparams.channels_multiple,
54 |             resblock_updown=True,
55 |             use_middle_attention=False,
56 |         )
57 | 
58 |     @classmethod
59 |     def load_from_checkpoint(cls, config, ckpt_path, strict: bool = True):
60 |         ckpt = torch.load(ckpt_path, map_location="cpu")["state_dict"]
61 | 
62 |         model = cls(config)
63 |         model.load_state_dict(ckpt, strict=strict)
64 |         return model
65 | 
66 |     def get_sample_fn(self, timestep_respacing):
67 |         diffusion_kwargs = copy.deepcopy(self._diffusion_kwargs)
68 |         diffusion_kwargs.update(timestep_respacing=timestep_respacing)
69 |         diffusion = create_gaussian_diffusion(**diffusion_kwargs)
70 |         return diffusion.p_sample_loop_progressive_for_improved_sr
71 | 
72 |     def forward(self, low_res, timestep_respacing="7", **kwargs):
73 |         assert (
74 |             timestep_respacing == "7"
75 |         ), "different respacing method may work, but no guaranteed"
76 | 
77 |         sample_fn = self.get_sample_fn(timestep_respacing)
78 |         sample_outputs = sample_fn(
79 |             self.model_first_steps,
80 |             self.model_last_step,
81 |             shape=low_res.shape,
82 |             clip_denoised=True,
83 |             model_kwargs=dict(low_res=low_res),
84 |             **kwargs,
85 |         )
86 |         for x in sample_outputs:
87 |             sample = x["sample"]
88 |             yield sample
89 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------
 2 | # Adapted from Guided-Diffusion repo (https://github.com/openai/guided-diffusion)
 3 | # ------------------------------------------------------------------------------------
 4 | 
 5 | 
 6 | from .diffusion import gaussian_diffusion as gd
 7 | from .diffusion.respace import (
 8 |     SpacedDiffusion,
 9 |     space_timesteps,
10 | )
11 | 
12 | 
13 | def create_gaussian_diffusion(
14 |     steps,
15 |     learn_sigma,
16 |     sigma_small,
17 |     noise_schedule,
18 |     use_kl,
19 |     predict_xstart,
20 |     rescale_learned_sigmas,
21 |     timestep_respacing,
22 | ):
23 |     betas = gd.get_named_beta_schedule(noise_schedule, steps)
24 |     if use_kl:
25 |         loss_type = gd.LossType.RESCALED_KL
26 |     elif rescale_learned_sigmas:
27 |         loss_type = gd.LossType.RESCALED_MSE
28 |     else:
29 |         loss_type = gd.LossType.MSE
30 |     if not timestep_respacing:
31 |         timestep_respacing = [steps]
32 | 
33 |     return SpacedDiffusion(
34 |         use_timesteps=space_timesteps(steps, timestep_respacing),
35 |         betas=betas,
36 |         model_mean_type=(
37 |             gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X
38 |         ),
39 |         model_var_type=(
40 |             (
41 |                 gd.ModelVarType.FIXED_LARGE
42 |                 if not sigma_small
43 |                 else gd.ModelVarType.FIXED_SMALL
44 |             )
45 |             if not learn_sigma
46 |             else gd.ModelVarType.LEARNED_RANGE
47 |         ),
48 |         loss_type=loss_type,
49 |     )
50 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/modules/diffusion/respace.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------------
  2 | # Adapted from Guided-Diffusion repo (https://github.com/openai/guided-diffusion)
  3 | # ------------------------------------------------------------------------------------
  4 | 
  5 | 
  6 | import torch as th
  7 | 
  8 | from .gaussian_diffusion import GaussianDiffusion
  9 | 
 10 | 
 11 | def space_timesteps(num_timesteps, section_counts):
 12 |     """
 13 |     Create a list of timesteps to use from an original diffusion process,
 14 |     given the number of timesteps we want to take from equally-sized portions
 15 |     of the original process.
 16 | 
 17 |     For example, if there's 300 timesteps and the section counts are [10,15,20]
 18 |     then the first 100 timesteps are strided to be 10 timesteps, the second 100
 19 |     are strided to be 15 timesteps, and the final 100 are strided to be 20.
 20 | 
 21 |     :param num_timesteps: the number of diffusion steps in the original
 22 |                           process to divide up.
 23 |     :param section_counts: either a list of numbers, or a string containing
 24 |                            comma-separated numbers, indicating the step count
 25 |                            per section. As a special case, use "ddimN" where N
 26 |                            is a number of steps to use the striding from the
 27 |                            DDIM paper.
 28 |     :return: a set of diffusion steps from the original process to use.
 29 |     """
 30 |     if isinstance(section_counts, str):
 31 |         if section_counts.startswith("ddim"):
 32 |             desired_count = int(section_counts[len("ddim") :])
 33 |             for i in range(1, num_timesteps):
 34 |                 if len(range(0, num_timesteps, i)) == desired_count:
 35 |                     return set(range(0, num_timesteps, i))
 36 |             raise ValueError(
 37 |                 f"cannot create exactly {num_timesteps} steps with an integer stride"
 38 |             )
 39 |         elif section_counts == "fast27":
 40 |             steps = space_timesteps(num_timesteps, "10,10,3,2,2")
 41 |             # Help reduce DDIM artifacts from noisiest timesteps.
 42 |             steps.remove(num_timesteps - 1)
 43 |             steps.add(num_timesteps - 3)
 44 |             return steps
 45 |         section_counts = [int(x) for x in section_counts.split(",")]
 46 |     size_per = num_timesteps // len(section_counts)
 47 |     extra = num_timesteps % len(section_counts)
 48 |     start_idx = 0
 49 |     all_steps = []
 50 |     for i, section_count in enumerate(section_counts):
 51 |         size = size_per + (1 if i < extra else 0)
 52 |         if size < section_count:
 53 |             raise ValueError(
 54 |                 f"cannot divide section of {size} steps into {section_count}"
 55 |             )
 56 |         if section_count <= 1:
 57 |             frac_stride = 1
 58 |         else:
 59 |             frac_stride = (size - 1) / (section_count - 1)
 60 |         cur_idx = 0.0
 61 |         taken_steps = []
 62 |         for _ in range(section_count):
 63 |             taken_steps.append(start_idx + round(cur_idx))
 64 |             cur_idx += frac_stride
 65 |         all_steps += taken_steps
 66 |         start_idx += size
 67 |     return set(all_steps)
 68 | 
 69 | 
 70 | class SpacedDiffusion(GaussianDiffusion):
 71 |     """
 72 |     A diffusion process which can skip steps in a base diffusion process.
 73 | 
 74 |     :param use_timesteps: a collection (sequence or set) of timesteps from the
 75 |                           original diffusion process to retain.
 76 |     :param kwargs: the kwargs to create the base diffusion process.
 77 |     """
 78 | 
 79 |     def __init__(self, use_timesteps, **kwargs):
 80 |         self.use_timesteps = set(use_timesteps)
 81 |         self.original_num_steps = len(kwargs["betas"])
 82 | 
 83 |         base_diffusion = GaussianDiffusion(**kwargs)  # pylint: disable=missing-kwoa
 84 |         last_alpha_cumprod = 1.0
 85 |         new_betas = []
 86 |         timestep_map = []
 87 |         for i, alpha_cumprod in enumerate(base_diffusion.alphas_cumprod):
 88 |             if i in self.use_timesteps:
 89 |                 new_betas.append(1 - alpha_cumprod / last_alpha_cumprod)
 90 |                 last_alpha_cumprod = alpha_cumprod
 91 |                 timestep_map.append(i)
 92 |         kwargs["betas"] = th.tensor(new_betas).numpy()
 93 |         super().__init__(**kwargs)
 94 |         self.register_buffer("timestep_map", th.tensor(timestep_map), persistent=False)
 95 | 
 96 |     def p_mean_variance(self, model, *args, **kwargs):
 97 |         return super().p_mean_variance(self._wrap_model(model), *args, **kwargs)
 98 | 
 99 |     def condition_mean(self, cond_fn, *args, **kwargs):
100 |         return super().condition_mean(self._wrap_model(cond_fn), *args, **kwargs)
101 | 
102 |     def condition_score(self, cond_fn, *args, **kwargs):
103 |         return super().condition_score(self._wrap_model(cond_fn), *args, **kwargs)
104 | 
105 |     def _wrap_model(self, model):
106 |         def wrapped(x, ts, **kwargs):
107 |             ts_cpu = ts.detach().to("cpu")
108 |             return model(
109 |                 x, self.timestep_map[ts_cpu].to(device=ts.device, dtype=ts.dtype), **kwargs
110 |             )
111 | 
112 |         return wrapped
113 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/modules/nn.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------------
  2 | # Adapted from Guided-Diffusion repo (https://github.com/openai/guided-diffusion)
  3 | # ------------------------------------------------------------------------------------
  4 | 
  5 | import math
  6 | 
  7 | import torch as th
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | 
 12 | class GroupNorm32(nn.GroupNorm):
 13 |     def __init__(self, num_groups, num_channels, swish, eps=1e-5):
 14 |         super().__init__(num_groups=num_groups, num_channels=num_channels, eps=eps)
 15 |         self.swish = swish
 16 | 
 17 |     def forward(self, x):
 18 |         y = super().forward(x.float()).to(x.dtype)
 19 |         if self.swish == 1.0:
 20 |             y = F.silu(y)
 21 |         elif self.swish:
 22 |             y = y * F.sigmoid(y * float(self.swish))
 23 |         return y
 24 | 
 25 | 
 26 | def conv_nd(dims, *args, **kwargs):
 27 |     """
 28 |     Create a 1D, 2D, or 3D convolution module.
 29 |     """
 30 |     if dims == 1:
 31 |         return nn.Conv1d(*args, **kwargs)
 32 |     elif dims == 2:
 33 |         return nn.Conv2d(*args, **kwargs)
 34 |     elif dims == 3:
 35 |         return nn.Conv3d(*args, **kwargs)
 36 |     raise ValueError(f"unsupported dimensions: {dims}")
 37 | 
 38 | 
 39 | def linear(*args, **kwargs):
 40 |     """
 41 |     Create a linear module.
 42 |     """
 43 |     return nn.Linear(*args, **kwargs)
 44 | 
 45 | 
 46 | def avg_pool_nd(dims, *args, **kwargs):
 47 |     """
 48 |     Create a 1D, 2D, or 3D average pooling module.
 49 |     """
 50 |     if dims == 1:
 51 |         return nn.AvgPool1d(*args, **kwargs)
 52 |     elif dims == 2:
 53 |         return nn.AvgPool2d(*args, **kwargs)
 54 |     elif dims == 3:
 55 |         return nn.AvgPool3d(*args, **kwargs)
 56 |     raise ValueError(f"unsupported dimensions: {dims}")
 57 | 
 58 | 
 59 | def zero_module(module):
 60 |     """
 61 |     Zero out the parameters of a module and return it.
 62 |     """
 63 |     for p in module.parameters():
 64 |         p.detach().zero_()
 65 |     return module
 66 | 
 67 | 
 68 | def scale_module(module, scale):
 69 |     """
 70 |     Scale the parameters of a module and return it.
 71 |     """
 72 |     for p in module.parameters():
 73 |         p.detach().mul_(scale)
 74 |     return module
 75 | 
 76 | 
 77 | def normalization(channels, swish=0.0):
 78 |     """
 79 |     Make a standard normalization layer, with an optional swish activation.
 80 | 
 81 |     :param channels: number of input channels.
 82 |     :return: an nn.Module for normalization.
 83 |     """
 84 |     return GroupNorm32(num_channels=channels, num_groups=32, swish=swish)
 85 | 
 86 | 
 87 | def timestep_embedding(timesteps, dim, max_period=10000):
 88 |     """
 89 |     Create sinusoidal timestep embeddings.
 90 | 
 91 |     :param timesteps: a 1-D Tensor of N indices, one per batch element.
 92 |                       These may be fractional.
 93 |     :param dim: the dimension of the output.
 94 |     :param max_period: controls the minimum frequency of the embeddings.
 95 |     :return: an [N x dim] Tensor of positional embeddings.
 96 |     """
 97 |     half = dim // 2
 98 |     freqs = th.exp(
 99 |         -math.log(max_period)
100 |         * th.arange(start=0, end=half, dtype=th.float32, device=timesteps.device)
101 |         / half
102 |     )
103 |     args = timesteps[:, None].float() * freqs[None]
104 |     embedding = th.cat([th.cos(args), th.sin(args)], dim=-1)
105 |     if dim % 2:
106 |         embedding = th.cat([embedding, th.zeros_like(embedding[:, :1])], dim=-1)
107 |     return embedding
108 | 
109 | 
110 | def mean_flat(tensor):
111 |     """
112 |     Take the mean over all non-batch dimensions.
113 |     """
114 |     return tensor.mean(dim=list(range(1, len(tensor.shape))))
115 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/modules/resample.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------------
 2 | # Modified from Guided-Diffusion (https://github.com/openai/guided-diffusion)
 3 | # ------------------------------------------------------------------------------------
 4 | 
 5 | from abc import abstractmethod
 6 | 
 7 | import torch as th
 8 | 
 9 | 
10 | def create_named_schedule_sampler(name, diffusion):
11 |     """
12 |     Create a ScheduleSampler from a library of pre-defined samplers.
13 | 
14 |     :param name: the name of the sampler.
15 |     :param diffusion: the diffusion object to sample for.
16 |     """
17 |     if name == "uniform":
18 |         return UniformSampler(diffusion)
19 |     else:
20 |         raise NotImplementedError(f"unknown schedule sampler: {name}")
21 | 
22 | 
23 | class ScheduleSampler(th.nn.Module):
24 |     """
25 |     A distribution over timesteps in the diffusion process, intended to reduce
26 |     variance of the objective.
27 | 
28 |     By default, samplers perform unbiased importance sampling, in which the
29 |     objective's mean is unchanged.
30 |     However, subclasses may override sample() to change how the resampled
31 |     terms are reweighted, allowing for actual changes in the objective.
32 |     """
33 | 
34 |     @abstractmethod
35 |     def weights(self):
36 |         """
37 |         Get a numpy array of weights, one per diffusion step.
38 | 
39 |         The weights needn't be normalized, but must be positive.
40 |         """
41 | 
42 |     def sample(self, batch_size, device):
43 |         """
44 |         Importance-sample timesteps for a batch.
45 | 
46 |         :param batch_size: the number of timesteps.
47 |         :param device: the torch device to save to.
48 |         :return: a tuple (timesteps, weights):
49 |                  - timesteps: a tensor of timestep indices.
50 |                  - weights: a tensor of weights to scale the resulting losses.
51 |         """
52 |         w = self.weights()
53 |         p = w / th.sum(w)
54 |         indices = p.multinomial(batch_size, replacement=True)
55 |         weights = 1 / (len(p) * p[indices])
56 |         return indices, weights
57 | 
58 | 
59 | class UniformSampler(ScheduleSampler):
60 |     def __init__(self, diffusion):
61 |         super(UniformSampler, self).__init__()
62 |         self.diffusion = diffusion
63 |         self.register_buffer(
64 |             "_weights", th.ones([diffusion.num_timesteps]), persistent=False
65 |         )
66 | 
67 |     def weights(self):
68 |         return self._weights
69 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/karlo/kakao/template.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------------
  2 | # Karlo-v1.0.alpha
  3 | # Copyright (c) 2022 KakaoBrain. All Rights Reserved.
  4 | # ------------------------------------------------------------------------------------
  5 | 
  6 | import os
  7 | import logging
  8 | import torch
  9 | 
 10 | from omegaconf import OmegaConf
 11 | 
 12 | from ldm.modules.karlo.kakao.models.clip import CustomizedCLIP, CustomizedTokenizer
 13 | from ldm.modules.karlo.kakao.models.prior_model import PriorDiffusionModel
 14 | from ldm.modules.karlo.kakao.models.decoder_model import Text2ImProgressiveModel
 15 | from ldm.modules.karlo.kakao.models.sr_64_256 import ImprovedSupRes64to256ProgressiveModel
 16 | 
 17 | 
 18 | SAMPLING_CONF = {
 19 |     "default": {
 20 |         "prior_sm": "25",
 21 |         "prior_n_samples": 1,
 22 |         "prior_cf_scale": 4.0,
 23 |         "decoder_sm": "50",
 24 |         "decoder_cf_scale": 8.0,
 25 |         "sr_sm": "7",
 26 |     },
 27 |     "fast": {
 28 |         "prior_sm": "25",
 29 |         "prior_n_samples": 1,
 30 |         "prior_cf_scale": 4.0,
 31 |         "decoder_sm": "25",
 32 |         "decoder_cf_scale": 8.0,
 33 |         "sr_sm": "7",
 34 |     },
 35 | }
 36 | 
 37 | CKPT_PATH = {
 38 |     "prior": "prior-ckpt-step=01000000-of-01000000.ckpt",
 39 |     "decoder": "decoder-ckpt-step=01000000-of-01000000.ckpt",
 40 |     "sr_256": "improved-sr-ckpt-step=1.2M.ckpt",
 41 | }
 42 | 
 43 | 
 44 | class BaseSampler:
 45 |     _PRIOR_CLASS = PriorDiffusionModel
 46 |     _DECODER_CLASS = Text2ImProgressiveModel
 47 |     _SR256_CLASS = ImprovedSupRes64to256ProgressiveModel
 48 | 
 49 |     def __init__(
 50 |         self,
 51 |         root_dir: str,
 52 |         sampling_type: str = "fast",
 53 |     ):
 54 |         self._root_dir = root_dir
 55 | 
 56 |         sampling_type = SAMPLING_CONF[sampling_type]
 57 |         self._prior_sm = sampling_type["prior_sm"]
 58 |         self._prior_n_samples = sampling_type["prior_n_samples"]
 59 |         self._prior_cf_scale = sampling_type["prior_cf_scale"]
 60 | 
 61 |         assert self._prior_n_samples == 1
 62 | 
 63 |         self._decoder_sm = sampling_type["decoder_sm"]
 64 |         self._decoder_cf_scale = sampling_type["decoder_cf_scale"]
 65 | 
 66 |         self._sr_sm = sampling_type["sr_sm"]
 67 | 
 68 |     def __repr__(self):
 69 |         line = ""
 70 |         line += f"Prior, sampling method: {self._prior_sm}, cf_scale: {self._prior_cf_scale}\n"
 71 |         line += f"Decoder, sampling method: {self._decoder_sm}, cf_scale: {self._decoder_cf_scale}\n"
 72 |         line += f"SR(64->256), sampling method: {self._sr_sm}"
 73 | 
 74 |         return line
 75 | 
 76 |     def load_clip(self, clip_path: str):
 77 |         clip = CustomizedCLIP.load_from_checkpoint(
 78 |             os.path.join(self._root_dir, clip_path)
 79 |         )
 80 |         clip = torch.jit.script(clip)
 81 |         clip.cuda()
 82 |         clip.eval()
 83 | 
 84 |         self._clip = clip
 85 |         self._tokenizer = CustomizedTokenizer()
 86 | 
 87 |     def load_prior(
 88 |         self,
 89 |         ckpt_path: str,
 90 |         clip_stat_path: str,
 91 |         prior_config: str = "configs/prior_1B_vit_l.yaml"
 92 |     ):
 93 |         logging.info(f"Loading prior: {ckpt_path}")
 94 | 
 95 |         config = OmegaConf.load(prior_config)
 96 |         clip_mean, clip_std = torch.load(
 97 |             os.path.join(self._root_dir, clip_stat_path), map_location="cpu"
 98 |         )
 99 | 
100 |         prior = self._PRIOR_CLASS.load_from_checkpoint(
101 |             config,
102 |             self._tokenizer,
103 |             clip_mean,
104 |             clip_std,
105 |             os.path.join(self._root_dir, ckpt_path),
106 |             strict=True,
107 |         )
108 |         prior.cuda()
109 |         prior.eval()
110 |         logging.info("done.")
111 | 
112 |         self._prior = prior
113 | 
114 |     def load_decoder(self, ckpt_path: str, decoder_config: str = "configs/decoder_900M_vit_l.yaml"):
115 |         logging.info(f"Loading decoder: {ckpt_path}")
116 | 
117 |         config = OmegaConf.load(decoder_config)
118 |         decoder = self._DECODER_CLASS.load_from_checkpoint(
119 |             config,
120 |             self._tokenizer,
121 |             os.path.join(self._root_dir, ckpt_path),
122 |             strict=True,
123 |         )
124 |         decoder.cuda()
125 |         decoder.eval()
126 |         logging.info("done.")
127 | 
128 |         self._decoder = decoder
129 | 
130 |     def load_sr_64_256(self, ckpt_path: str, sr_config: str = "configs/improved_sr_64_256_1.4B.yaml"):
131 |         logging.info(f"Loading SR(64->256): {ckpt_path}")
132 | 
133 |         config = OmegaConf.load(sr_config)
134 |         sr = self._SR256_CLASS.load_from_checkpoint(
135 |             config, os.path.join(self._root_dir, ckpt_path), strict=True
136 |         )
137 |         sr.cuda()
138 |         sr.eval()
139 |         logging.info("done.")
140 | 
141 |         self._sr_64_256 = sr


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/midas/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/midas/__init__.py


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/midas/midas/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/ldm/modules/midas/midas/__init__.py


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/midas/midas/base_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class BaseModel(torch.nn.Module):
 5 |     def load(self, path):
 6 |         """Load model from file.
 7 | 
 8 |         Args:
 9 |             path (str): file path
10 |         """
11 |         parameters = torch.load(path, map_location=torch.device('cpu'))
12 | 
13 |         if "optimizer" in parameters:
14 |             parameters = parameters["model"]
15 | 
16 |         self.load_state_dict(parameters)
17 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/midas/midas/dpt_depth.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from .base_model import BaseModel
  6 | from .blocks import (
  7 |     FeatureFusionBlock,
  8 |     FeatureFusionBlock_custom,
  9 |     Interpolate,
 10 |     _make_encoder,
 11 |     forward_vit,
 12 | )
 13 | 
 14 | 
 15 | def _make_fusion_block(features, use_bn):
 16 |     return FeatureFusionBlock_custom(
 17 |         features,
 18 |         nn.ReLU(False),
 19 |         deconv=False,
 20 |         bn=use_bn,
 21 |         expand=False,
 22 |         align_corners=True,
 23 |     )
 24 | 
 25 | 
 26 | class DPT(BaseModel):
 27 |     def __init__(
 28 |         self,
 29 |         head,
 30 |         features=256,
 31 |         backbone="vitb_rn50_384",
 32 |         readout="project",
 33 |         channels_last=False,
 34 |         use_bn=False,
 35 |     ):
 36 | 
 37 |         super(DPT, self).__init__()
 38 | 
 39 |         self.channels_last = channels_last
 40 | 
 41 |         hooks = {
 42 |             "vitb_rn50_384": [0, 1, 8, 11],
 43 |             "vitb16_384": [2, 5, 8, 11],
 44 |             "vitl16_384": [5, 11, 17, 23],
 45 |         }
 46 | 
 47 |         # Instantiate backbone and reassemble blocks
 48 |         self.pretrained, self.scratch = _make_encoder(
 49 |             backbone,
 50 |             features,
 51 |             False, # Set to true of you want to train from scratch, uses ImageNet weights
 52 |             groups=1,
 53 |             expand=False,
 54 |             exportable=False,
 55 |             hooks=hooks[backbone],
 56 |             use_readout=readout,
 57 |         )
 58 | 
 59 |         self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
 60 |         self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
 61 |         self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
 62 |         self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
 63 | 
 64 |         self.scratch.output_conv = head
 65 | 
 66 | 
 67 |     def forward(self, x):
 68 |         if self.channels_last == True:
 69 |             x.contiguous(memory_format=torch.channels_last)
 70 | 
 71 |         layer_1, layer_2, layer_3, layer_4 = forward_vit(self.pretrained, x)
 72 | 
 73 |         layer_1_rn = self.scratch.layer1_rn(layer_1)
 74 |         layer_2_rn = self.scratch.layer2_rn(layer_2)
 75 |         layer_3_rn = self.scratch.layer3_rn(layer_3)
 76 |         layer_4_rn = self.scratch.layer4_rn(layer_4)
 77 | 
 78 |         path_4 = self.scratch.refinenet4(layer_4_rn)
 79 |         path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
 80 |         path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
 81 |         path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
 82 | 
 83 |         out = self.scratch.output_conv(path_1)
 84 | 
 85 |         return out
 86 | 
 87 | 
 88 | class DPTDepthModel(DPT):
 89 |     def __init__(self, path=None, non_negative=True, **kwargs):
 90 |         features = kwargs["features"] if "features" in kwargs else 256
 91 | 
 92 |         head = nn.Sequential(
 93 |             nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1),
 94 |             Interpolate(scale_factor=2, mode="bilinear", align_corners=True),
 95 |             nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1),
 96 |             nn.ReLU(True),
 97 |             nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
 98 |             nn.ReLU(True) if non_negative else nn.Identity(),
 99 |             nn.Identity(),
100 |         )
101 | 
102 |         super().__init__(head, **kwargs)
103 | 
104 |         if path is not None:
105 |            self.load(path)
106 | 
107 |     def forward(self, x):
108 |         return super().forward(x).squeeze(dim=1)
109 | 
110 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/midas/midas/midas_net.py:
--------------------------------------------------------------------------------
 1 | """MidashNet: Network for monocular depth estimation trained by mixing several datasets.
 2 | This file contains code that is adapted from
 3 | https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
 4 | """
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from .base_model import BaseModel
 9 | from .blocks import FeatureFusionBlock, Interpolate, _make_encoder
10 | 
11 | 
12 | class MidasNet(BaseModel):
13 |     """Network for monocular depth estimation.
14 |     """
15 | 
16 |     def __init__(self, path=None, features=256, non_negative=True):
17 |         """Init.
18 | 
19 |         Args:
20 |             path (str, optional): Path to saved model. Defaults to None.
21 |             features (int, optional): Number of features. Defaults to 256.
22 |             backbone (str, optional): Backbone network for encoder. Defaults to resnet50
23 |         """
24 |         print("Loading weights: ", path)
25 | 
26 |         super(MidasNet, self).__init__()
27 | 
28 |         use_pretrained = False if path is None else True
29 | 
30 |         self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained)
31 | 
32 |         self.scratch.refinenet4 = FeatureFusionBlock(features)
33 |         self.scratch.refinenet3 = FeatureFusionBlock(features)
34 |         self.scratch.refinenet2 = FeatureFusionBlock(features)
35 |         self.scratch.refinenet1 = FeatureFusionBlock(features)
36 | 
37 |         self.scratch.output_conv = nn.Sequential(
38 |             nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1),
39 |             Interpolate(scale_factor=2, mode="bilinear"),
40 |             nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1),
41 |             nn.ReLU(True),
42 |             nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
43 |             nn.ReLU(True) if non_negative else nn.Identity(),
44 |         )
45 | 
46 |         if path:
47 |             self.load(path)
48 | 
49 |     def forward(self, x):
50 |         """Forward pass.
51 | 
52 |         Args:
53 |             x (tensor): input data (image)
54 | 
55 |         Returns:
56 |             tensor: depth
57 |         """
58 | 
59 |         layer_1 = self.pretrained.layer1(x)
60 |         layer_2 = self.pretrained.layer2(layer_1)
61 |         layer_3 = self.pretrained.layer3(layer_2)
62 |         layer_4 = self.pretrained.layer4(layer_3)
63 | 
64 |         layer_1_rn = self.scratch.layer1_rn(layer_1)
65 |         layer_2_rn = self.scratch.layer2_rn(layer_2)
66 |         layer_3_rn = self.scratch.layer3_rn(layer_3)
67 |         layer_4_rn = self.scratch.layer4_rn(layer_4)
68 | 
69 |         path_4 = self.scratch.refinenet4(layer_4_rn)
70 |         path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
71 |         path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
72 |         path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
73 | 
74 |         out = self.scratch.output_conv(path_1)
75 | 
76 |         return torch.squeeze(out, dim=1)
77 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/ldm/modules/midas/utils.py:
--------------------------------------------------------------------------------
  1 | """Utils for monoDepth."""
  2 | import sys
  3 | import re
  4 | import numpy as np
  5 | import cv2
  6 | import torch
  7 | 
  8 | 
  9 | def read_pfm(path):
 10 |     """Read pfm file.
 11 | 
 12 |     Args:
 13 |         path (str): path to file
 14 | 
 15 |     Returns:
 16 |         tuple: (data, scale)
 17 |     """
 18 |     with open(path, "rb") as file:
 19 | 
 20 |         color = None
 21 |         width = None
 22 |         height = None
 23 |         scale = None
 24 |         endian = None
 25 | 
 26 |         header = file.readline().rstrip()
 27 |         if header.decode("ascii") == "PF":
 28 |             color = True
 29 |         elif header.decode("ascii") == "Pf":
 30 |             color = False
 31 |         else:
 32 |             raise Exception("Not a PFM file: " + path)
 33 | 
 34 |         dim_match = re.match(r"^(\d+)\s(\d+)\s$", file.readline().decode("ascii"))
 35 |         if dim_match:
 36 |             width, height = list(map(int, dim_match.groups()))
 37 |         else:
 38 |             raise Exception("Malformed PFM header.")
 39 | 
 40 |         scale = float(file.readline().decode("ascii").rstrip())
 41 |         if scale < 0:
 42 |             # little-endian
 43 |             endian = "<"
 44 |             scale = -scale
 45 |         else:
 46 |             # big-endian
 47 |             endian = ">"
 48 | 
 49 |         data = np.fromfile(file, endian + "f")
 50 |         shape = (height, width, 3) if color else (height, width)
 51 | 
 52 |         data = np.reshape(data, shape)
 53 |         data = np.flipud(data)
 54 | 
 55 |         return data, scale
 56 | 
 57 | 
 58 | def write_pfm(path, image, scale=1):
 59 |     """Write pfm file.
 60 | 
 61 |     Args:
 62 |         path (str): pathto file
 63 |         image (array): data
 64 |         scale (int, optional): Scale. Defaults to 1.
 65 |     """
 66 | 
 67 |     with open(path, "wb") as file:
 68 |         color = None
 69 | 
 70 |         if image.dtype.name != "float32":
 71 |             raise Exception("Image dtype must be float32.")
 72 | 
 73 |         image = np.flipud(image)
 74 | 
 75 |         if len(image.shape) == 3 and image.shape[2] == 3:  # color image
 76 |             color = True
 77 |         elif (
 78 |             len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1
 79 |         ):  # greyscale
 80 |             color = False
 81 |         else:
 82 |             raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.")
 83 | 
 84 |         file.write("PF\n" if color else "Pf\n".encode())
 85 |         file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
 86 | 
 87 |         endian = image.dtype.byteorder
 88 | 
 89 |         if endian == "<" or endian == "=" and sys.byteorder == "little":
 90 |             scale = -scale
 91 | 
 92 |         file.write("%f\n".encode() % scale)
 93 | 
 94 |         image.tofile(file)
 95 | 
 96 | 
 97 | def read_image(path):
 98 |     """Read image and output RGB image (0-1).
 99 | 
100 |     Args:
101 |         path (str): path to file
102 | 
103 |     Returns:
104 |         array: RGB image (0-1)
105 |     """
106 |     img = cv2.imread(path)
107 | 
108 |     if img.ndim == 2:
109 |         img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
110 | 
111 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
112 | 
113 |     return img
114 | 
115 | 
116 | def resize_image(img):
117 |     """Resize image and make it fit for network.
118 | 
119 |     Args:
120 |         img (array): image
121 | 
122 |     Returns:
123 |         tensor: data ready for network
124 |     """
125 |     height_orig = img.shape[0]
126 |     width_orig = img.shape[1]
127 | 
128 |     if width_orig > height_orig:
129 |         scale = width_orig / 384
130 |     else:
131 |         scale = height_orig / 384
132 | 
133 |     height = (np.ceil(height_orig / scale / 32) * 32).astype(int)
134 |     width = (np.ceil(width_orig / scale / 32) * 32).astype(int)
135 | 
136 |     img_resized = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)
137 | 
138 |     img_resized = (
139 |         torch.from_numpy(np.transpose(img_resized, (2, 0, 1))).contiguous().float()
140 |     )
141 |     img_resized = img_resized.unsqueeze(0)
142 | 
143 |     return img_resized
144 | 
145 | 
146 | def resize_depth(depth, width, height):
147 |     """Resize depth map and bring to CPU (numpy).
148 | 
149 |     Args:
150 |         depth (tensor): depth
151 |         width (int): image width
152 |         height (int): image height
153 | 
154 |     Returns:
155 |         array: processed depth
156 |     """
157 |     depth = torch.squeeze(depth[0, :, :, :]).to("cpu")
158 | 
159 |     depth_resized = cv2.resize(
160 |         depth.numpy(), (width, height), interpolation=cv2.INTER_CUBIC
161 |     )
162 | 
163 |     return depth_resized
164 | 
165 | def write_depth(path, depth, bits=1):
166 |     """Write depth map to pfm and png file.
167 | 
168 |     Args:
169 |         path (str): filepath without extension
170 |         depth (array): depth
171 |     """
172 |     write_pfm(path + ".pfm", depth.astype(np.float32))
173 | 
174 |     depth_min = depth.min()
175 |     depth_max = depth.max()
176 | 
177 |     max_val = (2**(8*bits))-1
178 | 
179 |     if depth_max - depth_min > np.finfo("float").eps:
180 |         out = max_val * (depth - depth_min) / (depth_max - depth_min)
181 |     else:
182 |         out = np.zeros(depth.shape, dtype=depth.type)
183 | 
184 |     if bits == 1:
185 |         cv2.imwrite(path + ".png", out.astype("uint8"))
186 |     elif bits == 2:
187 |         cv2.imwrite(path + ".png", out.astype("uint16"))
188 | 
189 |     return
190 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/pd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/pd


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/requirements.txt:
--------------------------------------------------------------------------------
 1 | albumentations==0.4.3
 2 | opencv-python
 3 | pudb==2019.2
 4 | imageio==2.9.0
 5 | imageio-ffmpeg==0.4.2
 6 | pytorch-lightning==1.4.2
 7 | torchmetrics==0.6
 8 | omegaconf==2.1.1
 9 | test-tube>=0.7.5
10 | streamlit>=0.73.1
11 | einops==0.3.0
12 | transformers==4.19.2
13 | webdataset==0.2.5
14 | open-clip-torch==2.7.0
15 | gradio==3.13.2
16 | kornia==0.6
17 | invisible-watermark>=0.1.5
18 | streamlit-drawable-canvas==0.8.0
19 | -e .
20 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/sample.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | #text_prompt = "a professional photograph of an astronaut riding a horse"
 4 | text_prompt = '"A man dressed for the snowy mountain looks at the camera"'
 5 | 
 6 | #ddim
 7 | folder_DDIM = "outputs/txt2img-samples_DDIM"
 8 | num_step  = 10
 9 | checkpoint = ".../v2-1_512-ema-pruned.ckpt"
10 | cmd = 'python txt2img.py --ddim --prompt ' + text_prompt + ' --ckpt ' + checkpoint +' --config "configs/stable-diffusion/v2-inference.yaml" --steps ' +str(num_step)+  ' --n_iter 1 --outdir ' +folder_DDIM+  ' --device "cuda" --seed 1 --n_samples 3 --precision full'
11 | os.system(cmd)
12 | 
13 | 
14 | #BDIAddim
15 | folder_BDIADDIM = "outputs/txt2img-samples_BDIADDIM"
16 | num_step  = 10
17 | gamma = 0.5 # the parameter gamma is within the range [0, 1]
18 | checkpoint = ".../v2-1_512-ema-pruned.ckpt"
19 | cmd = 'python txt2img.py --bdiaddim --gamma ' +str(gamma) + ' --prompt ' + text_prompt + ' --ckpt ' + checkpoint +' --config "configs/stable-diffusion/v2-inference.yaml" --steps ' +str(num_step)+  ' --n_iter 1 --outdir ' + folder_BDIADDIM +  ' --device "cuda" --seed 1 --n_samples 3 --precision full'
20 | os.system(cmd)
21 | 
22 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/scripts/tests/test_watermark.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import fire
 3 | from imwatermark import WatermarkDecoder
 4 | 
 5 | 
 6 | def testit(img_path):
 7 |     bgr = cv2.imread(img_path)
 8 |     decoder = WatermarkDecoder('bytes', 136)
 9 |     watermark = decoder.decode(bgr, 'dwtDct')
10 |     try:
11 |         dec = watermark.decode('utf-8')
12 |     except:
13 |         dec = "null"
14 |     print(dec)
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     fire.Fire(testit)


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='stable-diffusion',
 5 |     version='0.0.1',
 6 |     description='',
 7 |     packages=find_packages(),
 8 |     install_requires=[
 9 |         'torch',
10 |         'numpy',
11 |         'tqdm',
12 |     ],
13 | )


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/shutil:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guoqiang-zhang-x/BDIA/d4c951875a6b22343a8f3ffa77e38c1a1d3dcfe3/text-to-image/stablediffusionV2/shutil


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/stable_diffusion.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 2.1
2 | Name: stable-diffusion
3 | Version: 0.0.1
4 | License-File: LICENSE
5 | License-File: LICENSE-MODEL
6 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/stable_diffusion.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | LICENSE
2 | LICENSE-MODEL
3 | README.md
4 | setup.py
5 | stable_diffusion.egg-info/PKG-INFO
6 | stable_diffusion.egg-info/SOURCES.txt
7 | stable_diffusion.egg-info/dependency_links.txt
8 | stable_diffusion.egg-info/requires.txt
9 | stable_diffusion.egg-info/top_level.txt


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/stable_diffusion.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/stable_diffusion.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | torch
2 | numpy
3 | tqdm
4 | 


--------------------------------------------------------------------------------
/text-to-image/stablediffusionV2/stable_diffusion.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------