├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug-report.md
    │   └── feature_request.md
├── .gitignore
├── LICENSE
├── README.md
└── vision
    ├── Taichu-GLIDE
        ├── README.md
        ├── data
        │   └── prompts.txt
        ├── model
        │   ├── glide_text2im
        │   │   ├── __init__.py
        │   │   ├── custom_types.py
        │   │   ├── default_options.py
        │   │   ├── diffusion_creator.py
        │   │   ├── gaussian_computation.py
        │   │   ├── losses.py
        │   │   ├── main_funcs.py
        │   │   ├── model
        │   │   │   ├── __init__.py
        │   │   │   ├── gaussian_diffusion.py
        │   │   │   ├── guider.py
        │   │   │   ├── simple_nn.py
        │   │   │   ├── srgan.py
        │   │   │   ├── srgan_util.py
        │   │   │   ├── text2im_model.py
        │   │   │   ├── train_model.py
        │   │   │   ├── unet.py
        │   │   │   └── xf.py
        │   │   ├── model_creation.py
        │   │   ├── model_creator.py
        │   │   ├── tokenizer
        │   │   │   ├── __init__.py
        │   │   │   ├── bpe.py
        │   │   │   ├── caption_to_tokens.py
        │   │   │   └── chinese_tokenizer.py
        │   │   └── train
        │   │   │   ├── Loader.py
        │   │   │   ├── __init__.py
        │   │   │   ├── build_optimizer.py
        │   │   │   ├── cell_wrapper.py
        │   │   │   ├── config.py
        │   │   │   ├── config.yml
        │   │   │   ├── data_loader.py
        │   │   │   ├── generator.py
        │   │   │   ├── image_datasets.py
        │   │   │   ├── logger.py
        │   │   │   ├── parallel_transformer.py
        │   │   │   ├── resample.py
        │   │   │   ├── sampler.py
        │   │   │   ├── t2ids.py
        │   │   │   └── train_util.py
        │   └── glide_utils
        │   │   ├── __init__.py
        │   │   ├── callbackConfig.py
        │   │   ├── img_utils.py
        │   │   ├── learn_utils.py
        │   │   ├── moxing_adapter.py
        │   │   ├── parallelConfig.py
        │   │   └── parallel_utils.py
        ├── model_configs
        │   ├── model_config.json
        │   └── supres_model_config.json
        ├── requirements.txt
        ├── scripts
        │   ├── run_gen_finetune_dist.sh
        │   ├── run_infer.sh
        │   └── run_super_res_finetune_dist.sh
        └── src
        │   ├── train_txt2img.py
        │   └── txt2img.py
    ├── stablediffusionv2
        ├── README.md
        ├── __init__.py
        ├── configs
        │   ├── train_config.json
        │   ├── v2-inference.yaml
        │   └── v2-train.yaml
        ├── demo
        │   ├── city1.png
        │   ├── city2.png
        │   ├── horse1.png
        │   ├── horse2.png
        │   ├── sunflower1.png
        │   ├── sunflower2.png
        │   ├── tree1.png
        │   └── tree2.png
        ├── ldm
        │   ├── data
        │   │   ├── __init__.py
        │   │   ├── dataset.py
        │   │   └── t2i_collate.py
        │   ├── models
        │   │   ├── autoencoder.py
        │   │   ├── clip_zh
        │   │   │   ├── __init__.py
        │   │   │   ├── bpe_simple_vocab_16e6.txt.gz
        │   │   │   └── simple_tokenizer.py
        │   │   └── diffusion
        │   │   │   ├── __init__.py
        │   │   │   ├── ddpm.py
        │   │   │   ├── dpm_solver
        │   │   │       ├── __init__.py
        │   │   │       ├── dpm_solver.py
        │   │   │       └── sampler.py
        │   │   │   └── plms.py
        │   ├── modules
        │   │   ├── attention.py
        │   │   ├── diffusionmodules
        │   │   │   ├── __init__.py
        │   │   │   ├── model.py
        │   │   │   ├── openaimodel.py
        │   │   │   ├── upscaling.py
        │   │   │   └── util.py
        │   │   ├── distributions
        │   │   │   ├── __init__.py
        │   │   │   └── distributions.py
        │   │   ├── encoders
        │   │   │   ├── __init__.py
        │   │   │   ├── modules.py
        │   │   │   └── text_encoder.py
        │   │   └── train
        │   │   │   ├── callback.py
        │   │   │   ├── cell_wrapper.py
        │   │   │   ├── learningrate.py
        │   │   │   ├── optim.py
        │   │   │   ├── parallel_config.py
        │   │   │   ├── tools.py
        │   │   │   └── utils.py
        │   └── util.py
        ├── requirements.txt
        ├── run_train.py
        ├── scripts
        │   └── infer.sh
        └── txt2img.py
    └── wukong-huahua
        ├── README.md
        ├── README_EN.md
        ├── configs
            ├── clip-vit-l-14-zh
            │   └── config.json
            ├── train_config.json
            ├── train_db_config.json
            ├── v1-inference-chinese-lora.yaml
            ├── v1-inference-chinese.yaml
            ├── v1-train-chinese-lora.yaml
            ├── v1-train-chinese.yaml
            ├── v1-train-db-chinese.yaml
            └── wukong-huahua_inpaint_inference.yaml
        ├── demo
            ├── inpaint
            │   ├── overture-creations-5sI6fQgYIuo.png
            │   ├── overture-creations-5sI6fQgYIuo_mask.png
            │   └── 一只红色的狐狸坐在长椅上.png
            ├── 个性化生成效果-猫.jpg
            ├── 个性化训练数据-猫.jpg
            ├── 乡村 田野 屏保.png
            ├── 城市夜景 赛博朋克 格雷格·鲁特科夫斯基.png
            ├── 效果图合集.jpg
            ├── 时空 黑洞 辐射.png
            ├── 来自深渊 风景 绘画 写实风格.png
            ├── 海上日出时候的奔跑者.png
            ├── 莫奈 撑阳伞的女人 月亮 梦幻.png
            └── 诺亚方舟在世界末日起航 科幻插画.png
        ├── inpaint.py
        ├── ldm
            ├── data
            │   ├── dataset.py
            │   ├── dataset_db.py
            │   └── t2i_collate.py
            ├── models
            │   ├── autoencoder.py
            │   ├── clip_zh
            │   │   ├── bpe_simple_vocab_16e6.txt.gz
            │   │   ├── simple_tokenizer.py
            │   │   ├── utils.py
            │   │   └── vocab_zh.txt
            │   └── diffusion
            │   │   ├── ddpm.py
            │   │   ├── dpm_solver
            │   │       ├── __init__.py
            │   │       ├── dpm_solver.py
            │   │       └── sampler.py
            │   │   └── plms.py
            ├── modules
            │   ├── attention.py
            │   ├── diffusionmodules
            │   │   ├── model.py
            │   │   ├── openaimodel.py
            │   │   └── util.py
            │   ├── distributions
            │   │   └── distributions.py
            │   ├── encoders
            │   │   ├── modules.py
            │   │   └── text_encoder.py
            │   └── train
            │   │   ├── callback.py
            │   │   ├── cell_wrapper.py
            │   │   ├── learningrate.py
            │   │   ├── optim.py
            │   │   ├── parallel_config.py
            │   │   ├── tools.py
            │   │   └── utils.py
            └── util.py
        ├── requirements.txt
        ├── run_db_train.py
        ├── run_train.py
        ├── scripts
            ├── run_db_train.sh
            ├── run_inpaint.sh
            ├── run_train.sh
            ├── run_train_lora.sh
            ├── run_train_parallel.sh
            ├── run_train_parallel_lora.sh
            ├── run_txt2img.sh
            └── run_txt2img_lora.sh
        └── txt2img.py


/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: BUG report
 3 | about: BUG反馈
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | If this is your first time, please read our contributor guidelines:
11 | https://github.com/mindspore-lab/mindcv/blob/main/CONTRIBUTING.md
12 | 
13 | **Describe the bug/ 问题描述 (Mandatory / 必填)**
14 | A clear and concise description of what the bug is.
15 | 
16 | - **Hardware Environment(`Ascend`/`GPU`/`CPU`)  / 硬件环境**:
17 | > Please delete the backend not involved / 请删除不涉及的后端:
18 | > /device ascend/GPU/CPU/kirin/等其他芯片
19 | 
20 | - **Software Environment / 软件环境 (Mandatory / 必填)**:
21 | -- MindSpore version (e.g., 1.7.0.Bxxx) :
22 | -- Python version (e.g., Python 3.7.5) :
23 | -- OS platform and distribution (e.g., Linux Ubuntu 16.04):
24 | -- GCC/Compiler version (if compiled from source):
25 | 
26 | - **Excute Mode / 执行模式 (Mandatory / 必填)(`PyNative`/`Graph`)**:
27 | > Please delete the mode not involved / 请删除不涉及的模式:
28 | > /mode pynative
29 | > /mode graph
30 | 
31 | **To Reproduce / 重现步骤 (Mandatory / 必填)**
32 | Steps to reproduce the behavior:
33 | 1. Go to '...'
34 | 2. Click on '....'
35 | 3. Scroll down to '....'
36 | 4. See error
37 | 
38 | **Expected behavior / 预期结果 (Mandatory / 必填)**
39 | A clear and concise description of what you expected to happen.
40 | 
41 | **Screenshots/ 日志 / 截图 (Mandatory / 必填)**
42 | If applicable, add screenshots to help explain your problem.
43 | 
44 | **Additional context / 备注 (Optional / 选填)**
45 | Add any other context about the problem here.
46 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: 需求特性反馈
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | If this is your first time, please read our contributor guidelines: https://gitee.com/mindspore/mindspore/blob/master/CONTRIBUTING.md
11 | 
12 | **Is your feature request related to a problem? Please describe.**
13 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
14 | 
15 | **Describe the solution you'd like**
16 | A clear and concise description of what you want to happen.
17 | 
18 | **Describe alternatives you've considered**
19 | A clear and concise description of any alternative solutions or features you've considered.
20 | 
21 | **Additional context**
22 | Add any other context or screenshots about the feature request here.
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # obsidian
132 | .obsidian/
133 | 
134 | *.ckpt
135 | rank_?
136 | output
137 | .DS_Store
138 | 
139 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | ## NEWS
3 | 
4 | minddiffusion is no longer updated. 
5 | All diffusion models and generative models will be provided in new repo mindone
6 | https://github.com/mindspore-lab/mindone
7 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/README.md:
--------------------------------------------------------------------------------
 1 | # Taichu-GLIDE
 2 | ## 模型介绍
 3 | Taichu-GLIDE是**华为昇腾计算**携手**武汉人工智能研究院**、**中科院自动化所**基于昇腾昇思全栈开发的中文文生图大模型（紫东.太初系列模型之一），该模型采用了AIGC领域当前非常流行的扩散模型（Diffusion Model）技术，代码和预训练模型权重均对外进行开源，开发者可使用本仓进行以文生图任务的体验。
 4 | 
 5 | 
 6 | ![一幅画着柯基的油画](https://user-images.githubusercontent.com/17930313/206085057-e079d90a-3313-4b9a-9e1c-f67a0594245d.png) 
 7 | **&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp; &nbsp;  一幅画着柯基的油画**
 8 | 
 9 | ## 环境要求
10 | 
11 | 1. **安装 CANN（5.1.RC2 版本）及其配套版本的驱动（driver）和 固件（firemware）**  \
12 |     前往昇腾社区下载安装包：\
13 |     <https://www.hiascend.com/software/cann/commercial> \
14 |     以arm + 欧拉的系统配置为例 (x86的系统请选择x86的包) 
15 | 
16 | 2. **安装 MindSpore 1.8.1 版本** \
17 |     前往MindSpore官网，按照教程安装对应版本，链接如下: \
18 |     <https://www.mindspore.cn/install>
19 | 
20 | 3. **安装 requirements 依赖** \
21 |     pip install -r requirements.txt
22 | 
23 | ## 快速体验
24 | 
25 | ### 推理
26 | - 请先[点击此处](https://download.mindspore.cn/toolkits/minddiffusion/Taichu-GLIDE/)下载ckpt文件
27 | - 在data/prompts.txt添加自己想要生成的prompt
28 | - 修改 scripts/run_infer.sh中相关路径及配置
29 | ```bash
30 | bash scripts/run_infer.sh
31 | ```
32 | ### 训练
33 | 
34 | ```bash
35 | # 生成阶段分布式训练
36 | bash scripts/run_gen_finetune_dist.sh /path/hccl_xp_xxxx.json [DEVICE_NUM] [DEVICE_START]
37 | ```
38 | 
39 | ```bash
40 | # 超分阶段分布式训练
41 | bash scripts/run_super_res_finetune_dist.sh /path/hccl_xp_xxxx.json [DEVICE_NUM] [DEVICE_START]
42 | ```
43 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/data/prompts.txt:
--------------------------------------------------------------------------------
1 | 一张画着柯基的油画
2 | 一只可爱的猫坐在草地上


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | A codebase for performing model inference with a text-conditional diffusion model.
3 | """
4 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/custom_types.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | import enum
17 | 
18 | 
19 | class ModelMeanType(enum.Enum):
20 |     """
21 |     Which type of output the model predicts.
22 |     """
23 | 
24 |     PREVIOUS_X = enum.auto()  # the model predicts x_{t-1}
25 |     START_X = enum.auto()  # the model predicts x_0
26 |     EPSILON = enum.auto()  # the model predicts epsilon
27 | 
28 | 
29 | class ModelVarType(enum.Enum):
30 |     """
31 |     What is used as the model's output variance.
32 | 
33 |     The LEARNED_RANGE option has been added to allow the model to predict
34 |     values between FIXED_SMALL and FIXED_LARGE, making its job easier.
35 |     """
36 | 
37 |     LEARNED = enum.auto()
38 |     FIXED_SMALL = enum.auto()
39 |     FIXED_LARGE = enum.auto()
40 |     LEARNED_RANGE = enum.auto()
41 | 
42 | 
43 | class LossType(enum.Enum):
44 |     MSE = enum.auto()  # use raw MSE loss (and KL when learning variances)
45 |     BALANCED_MSE = enum.auto()
46 |     RESCALED_MSE = (
47 |         enum.auto()
48 |     )  # use raw MSE loss (with RESCALED_KL when learning variances)
49 |     KL = enum.auto()  # use the variational lower-bound
50 |     RESCALED_KL = enum.auto()  # like KL, but rescale to estimate the full VLB
51 | 
52 |     def is_vb(self):
53 |         return self == LossType.KL or self == LossType.RESCALED_KL
54 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/default_options.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | 
 16 | import mindspore
 17 | 
 18 | 
 19 | def model_and_diffusion_defaults(
 20 |         image_size=64,
 21 |         num_channels=192,
 22 |         num_res_blocks=3,
 23 |         channel_mult=(1, 2, 3, 4),
 24 |         num_heads=1,
 25 |         num_head_channels=64,
 26 |         num_heads_upsample=-1,
 27 |         attention_resolutions=tuple([2, 4, 8]),
 28 |         dropout=0.9,
 29 |         text_ctx=128,
 30 |         xf_width=512,
 31 |         xf_layers=16,
 32 |         xf_heads=8,
 33 |         xf_final_ln=True,
 34 |         n_vocab=50001,
 35 |         xf_padding=True,
 36 |         diffusion_steps=1000,
 37 |         noise_schedule="squaredcos_cap_v2",
 38 |         timestep_respacing="60",
 39 |         use_scale_shift_norm=True,
 40 |         resblock_updown=True,
 41 |         use_fp16=True,
 42 |         cache_text_emb=False,
 43 |         inpaint=False,
 44 |         super_res=False,
 45 |         chinese=True,
 46 |         sketch=False,
 47 |         class_balanced=False,
 48 |         sketch_classes=0,
 49 |         dtype=mindspore.float32
 50 | ):
 51 |     return dict(
 52 |         image_size=image_size,
 53 |         num_channels=num_channels,
 54 |         num_res_blocks=num_res_blocks,
 55 |         channel_mult=channel_mult,
 56 |         num_heads=num_heads,
 57 |         num_head_channels=num_head_channels,
 58 |         num_heads_upsample=num_heads_upsample,
 59 |         attention_resolutions=attention_resolutions,
 60 |         dropout=dropout,
 61 |         text_ctx=text_ctx,
 62 |         xf_width=xf_width,
 63 |         xf_layers=xf_layers,
 64 |         xf_heads=xf_heads,
 65 |         xf_final_ln=xf_final_ln,
 66 |         n_vocab=n_vocab,
 67 |         xf_padding=xf_padding,
 68 |         diffusion_steps=diffusion_steps,
 69 |         noise_schedule=noise_schedule,
 70 |         timestep_respacing=timestep_respacing,
 71 |         use_scale_shift_norm=use_scale_shift_norm,
 72 |         resblock_updown=resblock_updown,
 73 |         use_fp16=use_fp16,
 74 |         cache_text_emb=cache_text_emb,
 75 |         inpaint=inpaint,
 76 |         super_res=super_res,
 77 |         chinese=chinese,
 78 |         sketch=sketch,
 79 |         class_balanced=class_balanced,
 80 |         sketch_classes=sketch_classes,
 81 |         dtype=dtype
 82 |     )
 83 | 
 84 | 
 85 | def model_and_diffusion_upsample(
 86 |         image_size=256,
 87 |         num_channels=192,
 88 |         num_res_blocks=2,
 89 |         channel_mult=(1,1,2,2,4,4),
 90 |         num_heads=1,
 91 |         num_head_channels=64,
 92 |         num_heads_upsample=-1,
 93 |         attention_resolutions=tuple([32, 16, 8]),
 94 |         dropout=0.0,
 95 |         text_ctx=128,
 96 |         xf_width=512,
 97 |         xf_layers=16,
 98 |         xf_heads=8,
 99 |         xf_final_ln=True,
100 |         n_vocab=50257,
101 |         xf_padding=True,
102 |         diffusion_steps=1000,
103 |         noise_schedule="linear",
104 |         timestep_respacing="fast27",
105 |         use_scale_shift_norm=True,
106 |         resblock_updown=True,
107 |         use_fp16=True,
108 |         cache_text_emb=False,
109 |         inpaint=False,
110 |         super_res=False,
111 |         chinese=False,
112 |         sketch=False,
113 |         class_balanced=False,
114 |         sketch_classes=0,
115 |         dtype=mindspore.float32
116 | ):
117 |     return dict(
118 |         image_size=image_size,
119 |         num_channels=num_channels,
120 |         num_res_blocks=num_res_blocks,
121 |         channel_mult=channel_mult,
122 |         num_heads=num_heads,
123 |         num_head_channels=num_head_channels,
124 |         num_heads_upsample=num_heads_upsample,
125 |         attention_resolutions=attention_resolutions,
126 |         dropout=dropout,
127 |         text_ctx=text_ctx,
128 |         xf_width=xf_width,
129 |         xf_layers=xf_layers,
130 |         xf_heads=xf_heads,
131 |         xf_final_ln=xf_final_ln,
132 |         n_vocab=n_vocab,
133 |         xf_padding=xf_padding,
134 |         diffusion_steps=diffusion_steps,
135 |         noise_schedule=noise_schedule,
136 |         timestep_respacing=timestep_respacing,
137 |         use_scale_shift_norm=use_scale_shift_norm,
138 |         resblock_updown=resblock_updown,
139 |         use_fp16=use_fp16,
140 |         cache_text_emb=cache_text_emb,
141 |         inpaint=inpaint,
142 |         super_res=super_res,
143 |         chinese=chinese,
144 |         sketch=sketch,
145 |         class_balanced=class_balanced,
146 |         sketch_classes=sketch_classes,
147 |         dtype=dtype
148 |     )
149 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/diffusion_creator.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | 
 16 | import mindspore
 17 | from mindspore import load_checkpoint
 18 | 
 19 | from model.glide_text2im.gaussian_computation import *
 20 | from model.glide_text2im.model.gaussian_diffusion import GenerativePSampleDiffusionModel, DDimSampleDiffusionModel, PMeanVariance
 21 | from model.glide_text2im.custom_types import LossType, ModelMeanType
 22 | from model.glide_text2im.model.guider import SamplingWithGuidance
 23 | from model.glide_text2im.model_creator import create_model, create_upsample_model
 24 | 
 25 | 
 26 | 
 27 | def init_diffusion_model(options, guidance_scale, shape, ckpt_path=None):
 28 |     # init model
 29 |     model = create_model(**options)
 30 | 
 31 |     # init guidance
 32 |     pics_generated = int(shape[0] / 2)
 33 |     sampling_with_guidance = SamplingWithGuidance(model, guidance_scale, pics_generated)
 34 | 
 35 |     # init diffusion
 36 |     base_diffusion, _ = create_gaussian_diffusion(
 37 |         diffusion_steps=options["diffusion_steps"], noise_schedule=options["noise_schedule"],
 38 |         timestep_respacing=options["timestep_respacing"], class_balanced=options["class_balanced"],
 39 |         sketch_classes=options["sketch_classes"], guider_net=sampling_with_guidance,
 40 |         clip_denoised=True, denoised_net=None, dtype=options["dtype"], shape=shape
 41 |     )
 42 |     diffusion_with_p_sample = GenerativePSampleDiffusionModel(base_diffusion, shape=shape, dtype=options["dtype"])
 43 |     return diffusion_with_p_sample
 44 | 
 45 | 
 46 | def init_super_res_model(options, shape, ckpt_path=None):
 47 |     # init model
 48 |     up_sample_model = create_upsample_model(**options)
 49 | 
 50 |     if ckpt_path:
 51 |         load_checkpoint(ckpt_path, up_sample_model)
 52 | 
 53 |     # init diffusion
 54 |     base_diffusion, _ = create_gaussian_diffusion(
 55 |         diffusion_steps=options["diffusion_steps"], noise_schedule=options["noise_schedule"],
 56 |         timestep_respacing=options["timestep_respacing"], class_balanced=options["class_balanced"],
 57 |         sketch_classes=options["sketch_classes"], guider_net=up_sample_model,
 58 |         clip_denoised=True, denoised_net=None, dtype=options["dtype"], shape=shape
 59 |     )
 60 |     diffusion_with_ddim_sample = DDimSampleDiffusionModel(base_diffusion, shape=shape, dtype=options["dtype"])
 61 |     return diffusion_with_ddim_sample
 62 | 
 63 | 
 64 | def create_gaussian_diffusion(
 65 |     diffusion_steps,  # 1000
 66 |     noise_schedule,
 67 |     timestep_respacing,  # 200
 68 |     class_balanced,
 69 |     sketch_classes,
 70 |     guider_net=None,
 71 |     clip_denoised=True,
 72 |     denoised_net=None,
 73 |     shape=None,
 74 |     dtype=mindspore.float32
 75 | ):
 76 |     betas = get_named_beta_schedule(noise_schedule, diffusion_steps)  # 0-1之间，1000个数
 77 |     if not timestep_respacing:
 78 |         timestep_respacing = [diffusion_steps]
 79 |     if class_balanced:
 80 |         loss_type = LossType.BALANCED_MSE
 81 |     else:
 82 |         loss_type = LossType.MSE
 83 | 
 84 |     use_timesteps = space_timesteps(diffusion_steps, timestep_respacing)
 85 |     alphas_cumprod = alpha_calculator(betas)
 86 |     timestep_map, new_betas = space_diffusion_from_base(use_timesteps, alphas_cumprod)
 87 | 
 88 |     diffusion = PMeanVariance(
 89 |         guider_net=guider_net, clip_denoised=clip_denoised, denoised_net=denoised_net, timestep_map=timestep_map,
 90 |         betas=new_betas, model_mean_type=ModelMeanType.EPSILON, loss_type=loss_type, sketch_classes=sketch_classes,
 91 |         shape=shape, dtype=dtype
 92 |     )
 93 |     return diffusion, betas
 94 | 
 95 | 
 96 | def space_diffusion_from_base(use_timesteps, alphas_cumprod):
 97 |     timestep_map = []
 98 | 
 99 |     last_alpha_cumprod = 1.0
100 |     new_betas = []
101 |     for i, alpha_cumprod in enumerate(alphas_cumprod):
102 |         if i in use_timesteps:
103 |             new_betas.append(1 - alpha_cumprod / last_alpha_cumprod)
104 |             last_alpha_cumprod = alpha_cumprod
105 |             timestep_map.append(i)
106 |     return timestep_map, np.array(new_betas)
107 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/losses.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | import mindspore as ms
17 | from mindspore import Tensor
18 | from mindspore import ops
19 | from mindspore import context
20 | import mindspore.numpy as np
21 | import numpy
22 | 
23 | 
24 | def normal_kl(mean1, logvar1, mean2, logvar2):
25 |     """
26 |     Compute the KL divergence between two gaussians.
27 | 
28 |     Shapes are automatically broadcasted, so batches can be compared to
29 |     scalars, among other use cases.
30 |     """
31 |     
32 |     exp = ops.Exp()
33 |     prints = ops.Print()
34 |     pow = ops.Pow()
35 |     return 0.5 * (
36 |         -1.0
37 |         + logvar2
38 |         - logvar1
39 |         + exp(logvar1 - logvar2)
40 |         + (pow((mean1 - mean2), 2) * exp(-logvar2))
41 |     )
42 | 
43 | 
44 | def approx_standard_normal_cdf(x):
45 |     """
46 |     A fast approximation of the cumulative distribution function of the
47 |     standard normal.
48 |     """
49 |     tanh = ops.Tanh()
50 |     pow = ops.Pow()
51 |     return 0.5 * (1.0 + tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * pow(x, 3))))
52 | 
53 | 
54 | def discretized_gaussian_log_likelihood(x, *, means, log_scales):
55 |     """
56 |     Compute the log-likelihood of a Gaussian distribution discretizing to a
57 |     given image.
58 | 
59 |     :param x: the target images. It is assumed that this was uint8 values,
60 |               rescaled to the range [-1, 1].
61 |     :param means: the Gaussian mean Tensor.
62 |     :param log_scales: the Gaussian log stddev Tensor.
63 |     :return: a tensor like x of log probabilities (in nats).
64 |     """
65 |     exp = ops.Exp()
66 |     log = ops.Log()
67 |     assert x.shape == means.shape == log_scales.shape
68 |     centered_x = x - means
69 |     inv_stdv = exp(-log_scales)
70 |     plus_in = inv_stdv * (centered_x + 1.0 / 255.0)
71 |     cdf_plus = approx_standard_normal_cdf(plus_in)
72 |     min_in = inv_stdv * (centered_x - 1.0 / 255.0)
73 |     cdf_min = approx_standard_normal_cdf(min_in)
74 |     log_cdf_plus = log(ops.clip_by_value(cdf_plus, clip_value_min=1e-12, clip_value_max=1e10))
75 |     log_one_minus_cdf_min = log(ops.clip_by_value((1.0 - cdf_min), clip_value_min=1e-12,
76 |                                                   clip_value_max=1e10))
77 |     cdf_delta = cdf_plus - cdf_min
78 |     log_probs = np.where(
79 |         x < -0.999,
80 |         log_cdf_plus,
81 |         np.where(x > 0.999, log_one_minus_cdf_min, log(ops.clip_by_value(
82 |             cdf_delta, clip_value_min=1e-12, clip_value_max=1e10))),
83 |     )
84 |     assert log_probs.shape == x.shape
85 |     return log_probs
86 | 
87 | if __name__ == "__main__":
88 |     x = Tensor(numpy.random.standard_normal(10,).astype(numpy.float32))
89 |     y = Tensor(numpy.random.standard_normal(10,).astype(numpy.float32))
90 |     m1 = Tensor(numpy.random.standard_normal(10,).astype(numpy.float32))
91 |     m2 = Tensor(numpy.random.standard_normal(10,).astype(numpy.float32))


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/main_funcs.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | import mindspore
17 | from tqdm.auto import tqdm
18 | from random import choice
19 | 
20 | 
21 | def gaussian_p_sample_loop(diffusion_model, token, mask, shape, num_timesteps, tokenizer, text_ctx,
22 |                            noise=None, progress=False, dtype=mindspore.float32, vocab_len=50001):
23 |     # init original image(pure noise)
24 |     if noise is not None:
25 |         img = noise
26 |     else:
27 |         img = mindspore.ops.StandardNormal()(shape)
28 |         img = mindspore.ops.Cast()(img, dtype)
29 |     indices = list(range(num_timesteps))[::-1]
30 | 
31 |     # visualized progress bar
32 |     if progress:
33 |         indices = tqdm(indices)
34 | 
35 |     # recursively de-noising on img
36 |     for i in indices:
37 |         random_token_tensor = mindspore.numpy.randint(1, vocab_len-1, (text_ctx,), dtype=mindspore.int32)
38 |         random_mask_tensor = mindspore.numpy.ones((text_ctx,), mindspore.int32)
39 |         i_tensor = mindspore.Tensor([i], dtype=mindspore.int32)
40 |         sample, _ = diffusion_model(x=img, timesteps=i_tensor, token=token, mask=mask,
41 |                                     random_token=random_token_tensor, random_mask=random_mask_tensor)
42 |         img = sample
43 | 
44 |     return img
45 | 
46 | 
47 | def ddim_sample_loop(super_res_model, up_shape, samples, token, mask, num_timesteps, noise=None, progress=False,
48 |                      dtype=mindspore.float32):
49 |     # init original image(pure noise)
50 |     if noise is not None:
51 |         img = noise
52 |     else:
53 |         upsample_temp = 0.997
54 |         img = mindspore.ops.StandardNormal()(up_shape)
55 |         img = mindspore.ops.Mul()(img, upsample_temp)
56 |         img = mindspore.ops.Cast()(img, dtype)
57 | 
58 |     indices = list(range(num_timesteps))[::-1]
59 | 
60 |     # visualized progress bar
61 |     if progress:
62 |         indices = tqdm(indices)
63 | 
64 |     for i in indices:
65 |         i_tensor = mindspore.Tensor(input_data=[i], dtype=mindspore.int32)
66 |         sample, _ = super_res_model(x=img, timesteps=i_tensor, token=token, mask=mask, samples=samples)
67 |         img = sample
68 | 
69 |     return img
70 | 
71 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/Taichu-GLIDE/model/glide_text2im/model/__init__.py


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/model/guider.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | 
 16 | import mindspore.nn as nn
 17 | import mindspore
 18 | 
 19 | 
 20 | class CombinePrompt(nn.Cell):
 21 |     def __init__(self, pics_generated):
 22 |         super(CombinePrompt, self).__init__()
 23 |         # model attributes
 24 |         self.pics_generated = pics_generated
 25 | 
 26 |         # operations
 27 |         self.slice = mindspore.ops.Slice()
 28 |         self.concat = mindspore.ops.Concat(axis=0)
 29 |         self.broadcast_to = mindspore.ops.BroadcastTo((pics_generated, 128))
 30 |         self.cast = mindspore.ops.Cast()
 31 | 
 32 |     '''
 33 |     x_t: tensor
 34 |     kwargs: dict, {tokens : num_of_pics*2 x 128 tensor, mask : num_of_pics*2 x 128 tensor}
 35 |     '''
 36 |     def construct(self, x_t, in_token, in_mask, random_token, random_mask):
 37 |         # computes
 38 |         first_half_tokens = self.slice(in_token, (0, 0), (self.pics_generated, in_token.shape[1]))
 39 |         first_half_mask = self.slice(in_mask, (0, 0), (self.pics_generated, in_mask.shape[1]))
 40 | 
 41 |         _, channels, img_h, img_w = x_t.shape
 42 |         half = self.slice(x_t, (0, 0, 0, 0), (self.pics_generated, channels, img_h, img_w))
 43 |         combined = self.concat((half, half))
 44 | 
 45 |         last_half_tokens = self.broadcast_to(random_token)
 46 |         last_half_mask = self.broadcast_to(random_mask)
 47 |         tokens = self.concat((first_half_tokens, last_half_tokens))
 48 |         mask = self.concat((first_half_mask, last_half_mask))
 49 | 
 50 |         return combined, tokens, mask
 51 | 
 52 | 
 53 | class Guider(nn.Cell):
 54 |     def __init__(self, guidance_scale):
 55 |         super(Guider, self).__init__()
 56 |         # model attributes
 57 |         self.guidance_scale = guidance_scale
 58 | 
 59 |         # operations
 60 |         self.slice = mindspore.ops.Slice()
 61 |         self.concat = mindspore.ops.Concat(axis=0)
 62 |         self.concat_at_1 = mindspore.ops.Concat(axis=1)
 63 |         self.split = mindspore.ops.Split(axis=0, output_num=2)
 64 |         self.add = mindspore.ops.Add()
 65 |         self.mul = mindspore.ops.Mul()
 66 |         self.neg = mindspore.ops.Neg()
 67 | 
 68 |     '''
 69 |     x_t: tensor
 70 |     ts: tensor
 71 |     kwargs: dict, {tokens : num_of_pics*2 x 128 tensor, mask : num_of_pics*2 x 128 tensor}
 72 |     '''
 73 |     def construct(self, model_out):
 74 |         modelout_shape = model_out.shape
 75 |         eps = self.slice(model_out, (0, 0, 0, 0), (modelout_shape[0], 3, modelout_shape[2], modelout_shape[3]))
 76 |         rest = self.slice(model_out, (0, 3, 0, 0), (modelout_shape[0], 3, modelout_shape[2], modelout_shape[3]))
 77 | 
 78 |         cond_eps, uncond_eps = self.split(eps)
 79 | 
 80 |         diff_eps = self.add(cond_eps, self.neg(uncond_eps))
 81 |         scaled_diff_epq = self.mul(self.guidance_scale, diff_eps)
 82 |         half_eps = self.add(uncond_eps, scaled_diff_epq)
 83 |         eps = self.concat((half_eps, half_eps))
 84 |         out = self.concat_at_1((eps, rest))
 85 | 
 86 |         return out
 87 | 
 88 | 
 89 | class SamplingWithGuidance(nn.Cell):
 90 |     def __init__(self, model, guidance_scale, num_of_pics_generated):
 91 |         super(SamplingWithGuidance, self).__init__()
 92 |         self.combine_prompt = CombinePrompt(num_of_pics_generated)
 93 |         self.model = model
 94 |         self.guider = Guider(guidance_scale)
 95 |         self.broadcast_to = mindspore.ops.BroadcastTo((num_of_pics_generated * 2,))
 96 |         self.concat = mindspore.ops.Concat(axis=1)
 97 | 
 98 |     def construct(self, x_t, timesteps, in_token, in_mask, random_token, random_mask):
 99 |         combined, tokens, mask = self.combine_prompt(x_t, in_token, in_mask, random_token, random_mask)
100 |         timesteps = self.broadcast_to(timesteps)
101 |         model_out = self.model(combined, timesteps, tokens, mask)
102 |         out = self.guider(model_out)
103 |         return out
104 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/model/srgan.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | 
 16 | import math
 17 | import mindspore.nn as nn
 18 | import mindspore.ops as ops
 19 | from mindspore.common import initializer as init
 20 | 
 21 | 
 22 | def init_weights(net, init_type='normal', init_gain=0.1):
 23 |     """
 24 |     Initialize network weights
 25 |     """
 26 |     for _, cell in net.cells_and_names():
 27 |         if isinstance(cell, (nn.Conv2d, nn.Conv2dTranspose)):
 28 |             if init_type == 'normal':
 29 |                 cell.weight.set_data(init.initializer(init.Normal(init_gain), cell.weight.shape))
 30 |             elif init_type == 'xavier':
 31 |                 cell.weight.set_data(init.initializer(init.XavierUniform(init_gain), cell.weight.shape))
 32 |             elif init_type == 'constant':
 33 |                 cell.weight.set_data(init.initializer(0.001, cell.weight.shape))
 34 |             else:
 35 |                 raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
 36 |         elif isinstance(cell, nn.BatchNorm2d):
 37 |             cell.gamma.set_data(init.initializer('ones', cell.gamma.shape))
 38 |             cell.beta.set_data(init.initializer('zeros', cell.beta.shape))
 39 | 
 40 | 
 41 | class ResidualBlock(nn.Cell):
 42 |     """Structure of ResidualBlock"""
 43 |     def __init__(self, channels):
 44 |         super(ResidualBlock, self).__init__()
 45 |         self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, has_bias=True, pad_mode='pad')
 46 |         self.bn1 = nn.BatchNorm2d(channels)
 47 |         self.prelu = nn.PReLU(channels)
 48 |         self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, has_bias=True, pad_mode='pad')
 49 |         self.bn2 = nn.BatchNorm2d(channels)
 50 | 
 51 |     def construct(self, x):
 52 |         out = self.conv1(x)
 53 |         out = self.bn1(out)
 54 |         out = self.prelu(out)
 55 |         out = self.conv2(out)
 56 |         out = self.bn2(out)
 57 |         return out + x
 58 | 
 59 | 
 60 | class SubpixelConvolutionLayer(nn.Cell):
 61 |     """Structure of SubpixelConvolutionLayer"""
 62 |     def __init__(self, channels):
 63 |         super(SubpixelConvolutionLayer, self).__init__()
 64 |         self.conv = nn.Conv2d(channels, channels*4, kernel_size=3, stride=1, padding=1, has_bias=True, pad_mode='pad')
 65 |         self.pixel_shuffle = ops.DepthToSpace(2)
 66 |         self.prelu = nn.PReLU(channels)
 67 | 
 68 |     def construct(self, x):
 69 |         out = self.conv(x)
 70 |         out = self.pixel_shuffle(out)
 71 |         out = self.prelu(out)
 72 |         return out
 73 | 
 74 | 
 75 | class Generator(nn.Cell):
 76 |     """Structure of Generator"""
 77 |     def __init__(self, upscale_factor):
 78 | 
 79 |         super(Generator, self).__init__()
 80 |         # Calculating the number of subpixel convolution layers.
 81 |         num_subpixel_convolution_layers = int(math.log(upscale_factor, 2))
 82 |         # First layer.
 83 |         self.conv1 = nn.SequentialCell(
 84 |             nn.Conv2d(3, 64, kernel_size=9, stride=1, padding=4, has_bias=True, pad_mode='pad'),
 85 |             nn.PReLU(channel=64))
 86 | 
 87 |         # 16 Residual blocks
 88 |         trunk = []
 89 |         for _ in range(16):
 90 |             trunk.append(ResidualBlock(64))
 91 |         self.trunk = nn.SequentialCell(*trunk)
 92 | 
 93 |         # Second conv layer post residual blocks.
 94 |         self.conv2 = nn.SequentialCell(
 95 |             nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, has_bias=True, pad_mode='pad'),
 96 |             nn.PReLU(channel=64)
 97 |         )
 98 | 
 99 |         # 2 Sub-pixel convolution layers.
100 |         subpixel_conv_layers = []
101 |         for _ in range(num_subpixel_convolution_layers):
102 |             subpixel_conv_layers.append(SubpixelConvolutionLayer(64))
103 |         self.subpixel_conv = nn.SequentialCell(*subpixel_conv_layers)
104 | 
105 |         # Final output layer.
106 |         self.conv3 = nn.Conv2d(64, 3, kernel_size=9, stride=1, padding=4, has_bias=True, pad_mode='pad')
107 |         self.tanh = nn.Tanh()
108 | 
109 |     def construct(self, x):
110 |         conv1 = self.conv1(x)
111 |         trunk = self.trunk(conv1)
112 |         conv2 = self.conv2(trunk)
113 |         out = conv1+conv2
114 |         out = self.subpixel_conv(out)
115 |         out = self.conv3(out)
116 |         out = self.tanh(out)
117 |         return out
118 | 
119 | 
120 | def get_generator(upscale_factor, init_gain):
121 |     net = Generator(upscale_factor)
122 |     init_weights(net, 'normal', init_gain)
123 |     return net
124 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/model/srgan_util.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | from mindspore import ops
17 | from PIL import Image
18 | import numpy as np
19 | import mindspore
20 | from mindspore import load_checkpoint, load_param_into_net, Tensor
21 | 
22 | from .srgan import Generator
23 | 
24 | 
25 | def get_img(batch: mindspore.Tensor):
26 |     batch_plus = mindspore.ops.Add()(batch, 1)
27 |     scaled = mindspore.ops.Mul()(batch_plus, 127.5)
28 |     rounded_scaled = mindspore.ops.Rint()(scaled)
29 |     clipped_scaled = mindspore.ops.clip_by_value(rounded_scaled, mindspore.Tensor(0), mindspore.Tensor(255))
30 |     clipped_scaled = clipped_scaled.transpose((2, 0, 3, 1))
31 |     clipped_scaled = mindspore.ops.Cast()(clipped_scaled, mindspore.uint8)
32 |     reshaped = clipped_scaled.reshape(([batch.shape[2], -1, 3]))
33 |     return reshaped
34 | 
35 | 
36 | class SRGAN():
37 |     def __init__(self, upscale_factor, ckpt_path):
38 |         self.net = Generator(upscale_factor)
39 |         params = load_checkpoint(ckpt_path)
40 |         load_param_into_net(self.net, params)
41 |         self.reduce_dims = ops.ReduceSum(keep_dims=False)
42 |         self.expand_dims = ops.ExpandDims()
43 | 
44 |     # SR from Tensor
45 |     def sr_handle(self, lr):
46 |         output = self.net(lr)
47 |         return output
48 | 
49 |     # SR from image
50 |     def sr_image(self, lr_image, hr_image):
51 |         lr = np.array(Image.open(lr_image).convert("RGB"))
52 |         lr = (lr / 127.5) - 1.0
53 |         lr = lr.transpose(2, 0, 1).astype(np.float32)
54 |         lr = np.expand_dims(lr, axis=0)
55 |         output = self.sr_handle(Tensor(lr))
56 |         output = output.asnumpy()
57 |         output = np.squeeze(output, axis=0)
58 |         output = np.clip(output, -1.0, 1.0)
59 |         output = (output + 1.0) / 2.0
60 |         output = output.transpose(1, 2, 0)
61 |         Image.fromarray((output * 255.0).astype(np.uint8)).save(hr_image, quality=100)
62 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/model/xf.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | 
 16 | import math
 17 | 
 18 | import mindspore as ms
 19 | import mindspore.nn as nn
 20 | import mindspore.ops
 21 | import mindspore.ops as ops
 22 | 
 23 | from model.glide_text2im.model.simple_nn import Linear
 24 | 
 25 | 
 26 | class LayerNorm(nn.LayerNorm):
 27 |     """
 28 |     Implementation that supports fp16 inputs but fp32 gains/biases.
 29 |     """
 30 |     def construct(self, x: ms.Tensor):
 31 |         y = super().construct(ops.Cast()(x, ms.float32))
 32 |         y = ops.Cast()(y, x.dtype)
 33 |         return y
 34 | 
 35 | 
 36 | class MultiheadAttention(nn.Cell):
 37 |     def __init__(self, n_ctx, width, heads, dtype):
 38 |         super().__init__()
 39 |         self.n_ctx = n_ctx
 40 |         self.width = width
 41 |         self.heads = heads
 42 |         self.c_qkv = Linear(width, width * 3, dtype=dtype)
 43 |         self.c_proj = Linear(width, width, dtype=dtype)
 44 |         self.attention = QKVMultiheadAttention(width, heads, n_ctx, dtype)
 45 | 
 46 |     def construct(self, x):
 47 |         x = self.c_qkv(x)
 48 |         x = self.attention(x)
 49 |         x = self.c_proj(x)
 50 |         return x
 51 | 
 52 | 
 53 | class MLP(nn.Cell):
 54 |     def __init__(self, width, dtype):
 55 |         super().__init__()
 56 |         self.width = width
 57 |         self.c_fc = Linear(width, width * 4, dtype=dtype)
 58 |         self.c_proj = Linear(width * 4, width, dtype=dtype)
 59 |         self.gelu = nn.GELU()
 60 | 
 61 |     def construct(self, x):
 62 |         return self.c_proj(self.gelu(self.c_fc(x)))
 63 | 
 64 | 
 65 | class QKVMultiheadAttention(nn.Cell):
 66 |     def __init__(self, width: int, n_heads: int, n_ctx: int, dtype: mindspore.dtype):
 67 |         super().__init__()
 68 |         self.n_heads = n_heads
 69 |         self.n_ctx = n_ctx
 70 |         self.dtype = dtype
 71 |     
 72 |         self.concat = ops.Concat()
 73 |         self.sqrt = ops.Sqrt()
 74 |         self.softmax = nn.Softmax()
 75 |         self.print = ops.Print()
 76 |         self.split = ops.Split(axis=-1, output_num=3)
 77 |         self.cast = ops.Cast()
 78 |         self.transpose = ops.Transpose()
 79 | 
 80 |         self.scale = 1 / math.sqrt(math.sqrt(width * 3 // self.n_heads // 3))
 81 | 
 82 |     def construct(self, qkv):
 83 |         bs, _, _ = qkv.shape
 84 |         qkv = qkv.view(bs, self.n_ctx, self.n_heads, -1)
 85 |         q, k, v = self.split(qkv)
 86 |         q = q * self.scale
 87 |         k = k * self.scale
 88 |         q = self.transpose(q, (0, 2, 1, 3))
 89 |         k = self.transpose(k, (0, 2, 3, 1))
 90 |         weight = ops.matmul(q, k)
 91 |         wdtype = weight.dtype
 92 |         weight = self.cast(self.softmax(self.cast(weight, ms.float32)), wdtype)
 93 |         weight = self.transpose(weight, (0, 1, 2, 3))
 94 |         v = self.transpose(v, (0, 2, 1, 3))
 95 |         a = ops.matmul(weight, v)
 96 |         a = self.transpose(a, (0, 2, 1, 3))
 97 |         return a.reshape(bs, self.n_ctx, -1)
 98 | 
 99 | 
100 | class ResidualAttentionBlock(nn.Cell):
101 |     def __init__(
102 |         self,
103 |         n_ctx: int,
104 |         width: int,
105 |         heads: int,
106 |         dtype: mindspore.dtype
107 |     ):
108 |         super().__init__()
109 | 
110 |         self.attn = MultiheadAttention(
111 |             n_ctx,
112 |             width,
113 |             heads,
114 |             dtype
115 |         )
116 |         self.ln_1 = LayerNorm([width])
117 |         self.mlp = MLP(width, dtype)
118 |         self.ln_2 = LayerNorm([width])
119 | 
120 |     def construct(self, x: ms.Tensor):
121 |         x = x + self.attn(self.ln_1(x))
122 |         x = x + self.mlp(self.ln_2(x))
123 |         return x
124 | 
125 | 
126 | class Transformer(nn.Cell):
127 |     def __init__(
128 |         self,
129 |         n_ctx: int,
130 |         width: int,
131 |         layers: int,
132 |         heads: int,
133 |         dtype: mindspore.dtype,
134 |     ):
135 |         super().__init__()
136 |         self.n_ctx = n_ctx
137 |         self.width = width
138 |         self.layers = layers
139 |         self.resblocks = nn.CellList(
140 |             [
141 |                 ResidualAttentionBlock(
142 |                     n_ctx,
143 |                     width,
144 |                     heads,
145 |                     dtype
146 |                 )
147 |                 for _ in range(layers)
148 |             ]
149 |         )
150 | 
151 |     def construct(self, x: ms.Tensor):
152 |         for block in self.resblocks:
153 |             x = block(x)
154 |         return x
155 | 
156 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/model_creation.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | 
 16 | from model.glide_text2im.model.text2im_model import Text2ImUNet, SuperResText2ImUNet
 17 | from model.glide_text2im.tokenizer.chinese_tokenizer import from_pretrained
 18 | 
 19 | def model_and_diffusion_defaults():
 20 |     return dict(
 21 |         image_size=64,
 22 |         num_channels=192,
 23 |         num_res_blocks=3,
 24 |         channel_mult="",
 25 |         num_heads=1,
 26 |         num_head_channels=64,
 27 |         num_heads_upsample=-1,
 28 |         attention_resolutions="32,16,8",
 29 |         dropout=0.0,
 30 |         text_ctx=128,
 31 |         xf_width=512,
 32 |         xf_layers=16,
 33 |         xf_heads=8,
 34 |         xf_final_ln=True,
 35 |         xf_padding=True,
 36 |         diffusion_steps=1000,
 37 |         noise_schedule="squaredcos_cap_v2",
 38 |         timestep_respacing="",
 39 |         use_scale_shift_norm=True,
 40 |         resblock_updown=True,
 41 |         use_fp16=True,
 42 |         cache_text_emb=False,
 43 |         inpaint=False,
 44 |         super_res=False,
 45 |         chinese=False,
 46 |         sketch=False,
 47 |         class_balanced=False,
 48 |         sketch_classes=0
 49 |     )
 50 | 
 51 | def create_model_and_diffusion(options):
 52 |     #print(options)
 53 |     return create_model(**options)
 54 | 
 55 | def create_model(
 56 |     image_size,
 57 |     num_channels,
 58 |     num_res_blocks,
 59 |     channel_mult,
 60 |     num_heads,
 61 |     num_head_channels,
 62 |     num_heads_upsample,
 63 |     attention_resolutions,
 64 |     dropout,
 65 |     text_ctx,
 66 |     xf_width,
 67 |     xf_layers,
 68 |     xf_heads,
 69 |     xf_final_ln,
 70 |     n_vocab,
 71 |     xf_padding,
 72 |     diffusion_steps,
 73 |     noise_schedule,
 74 |     timestep_respacing,
 75 |     use_scale_shift_norm,
 76 |     resblock_updown,
 77 |     use_fp16,
 78 |     cache_text_emb,
 79 |     inpaint,
 80 |     super_res,
 81 |     chinese,
 82 |     sketch,
 83 |     class_balanced,
 84 |     sketch_classes,
 85 |     dtype):
 86 |     net = Text2ImUNet(
 87 |         text_ctx=text_ctx,
 88 |         xf_width=xf_width,
 89 |         xf_layers=xf_layers,
 90 |         xf_heads=xf_heads,
 91 |         xf_final_ln=xf_final_ln,
 92 |         n_vocab=n_vocab,
 93 |         xf_padding=xf_padding,
 94 |         in_channels=3,
 95 |         model_channels=num_channels,
 96 |         out_channels=6,
 97 |         num_res_blocks=num_res_blocks,
 98 |         attention_resolutions=attention_resolutions,
 99 |         dropout=dropout,
100 |         channel_mult=channel_mult,
101 |         use_fp16=use_fp16,
102 |         num_heads=num_heads,
103 |         num_head_channels=num_head_channels,
104 |         num_heads_upsample=num_heads_upsample,
105 |         use_scale_shift_norm=use_scale_shift_norm,
106 |         resblock_updown=resblock_updown,
107 |         cache_text_emb=cache_text_emb,
108 |         dtype=dtype
109 |     )
110 |     return net
111 | 
112 | 
113 | def create_upsample_model(
114 |     image_size,
115 |     num_channels,
116 |     num_res_blocks,
117 |     channel_mult,
118 |     num_heads,
119 |     num_head_channels,
120 |     num_heads_upsample,
121 |     attention_resolutions,
122 |     dropout,
123 |     text_ctx,
124 |     xf_width,
125 |     xf_layers,
126 |     xf_heads,
127 |     xf_final_ln,
128 |     n_vocab,
129 |     xf_padding,
130 |     diffusion_steps,
131 |     noise_schedule,
132 |     timestep_respacing,
133 |     use_scale_shift_norm,
134 |     resblock_updown,
135 |     use_fp16,
136 |     cache_text_emb,
137 |     inpaint,
138 |     super_res,
139 |     chinese,
140 |     sketch,
141 |     class_balanced,
142 |     sketch_classes,
143 |     dtype):
144 |     net = SuperResText2ImUNet(
145 |         image_size = image_size,
146 |         text_ctx=text_ctx,
147 |         xf_width=xf_width,
148 |         xf_layers=xf_layers,
149 |         xf_heads=xf_heads,
150 |         xf_final_ln=xf_final_ln,
151 |         n_vocab=n_vocab,
152 |         xf_padding=xf_padding,
153 |         in_channels=6,
154 |         model_channels=num_channels,
155 |         out_channels=6,
156 |         num_res_blocks=num_res_blocks,
157 |         attention_resolutions=attention_resolutions,
158 |         dropout=dropout,
159 |         channel_mult=channel_mult,
160 |         use_fp16=use_fp16,
161 |         num_heads=num_heads,
162 |         num_head_channels=num_head_channels,
163 |         num_heads_upsample=num_heads_upsample,
164 |         use_scale_shift_norm=use_scale_shift_norm,
165 |         resblock_updown=resblock_updown,
166 |         cache_text_emb=cache_text_emb,
167 |         dtype=dtype
168 |     )
169 |     return net
170 | 
171 | def add_dict_to_argparser(parser, default_dict):
172 |     for k, v in default_dict.items():
173 |         v_type = type(v)
174 |         if v is None:
175 |             v_type = str
176 |         elif isinstance(v, bool):
177 |             v_type = str2bool
178 |         parser.add_argument(f"--{k}", default=v, type=v_type)
179 | 
180 | 
181 | def args_to_dict(args, keys):
182 |     return {k: getattr(args, k) for k in keys}
183 | 
184 | def str2bool(v):
185 |     if isinstance(v, bool):
186 |         return v
187 |     if v.lower() in ("yes", "true", "t", "y", "1"):
188 |         return True
189 |     elif v.lower() in ("no", "false", "f", "n", "0"):
190 |         return False
191 |     else:
192 |         raise argparse.ArgumentTypeError("boolean value expected")
193 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/model_creator.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | 
 16 | from model.glide_text2im.model.text2im_model import Text2ImUNet, SuperResText2ImUNet
 17 | 
 18 | 
 19 | def create_model(
 20 |     image_size,
 21 |     num_channels,
 22 |     num_res_blocks,
 23 |     channel_mult,
 24 |     num_heads,
 25 |     num_head_channels,
 26 |     num_heads_upsample,
 27 |     attention_resolutions,
 28 |     dropout,
 29 |     text_ctx,
 30 |     xf_width,
 31 |     xf_layers,
 32 |     xf_heads,
 33 |     xf_final_ln,
 34 |     n_vocab,
 35 |     xf_padding,
 36 |     diffusion_steps,
 37 |     noise_schedule,
 38 |     timestep_respacing,
 39 |     use_scale_shift_norm,
 40 |     resblock_updown,
 41 |     use_fp16,
 42 |     cache_text_emb,
 43 |     inpaint,
 44 |     super_res,
 45 |     chinese,
 46 |     sketch,
 47 |     class_balanced,
 48 |     sketch_classes,
 49 |     dtype):
 50 |     print("origin t2i net")
 51 |     net = Text2ImUNet(
 52 |         text_ctx=text_ctx,
 53 |         xf_width=xf_width,
 54 |         xf_layers=xf_layers,
 55 |         xf_heads=xf_heads,
 56 |         xf_final_ln=xf_final_ln,
 57 |         n_vocab=n_vocab,
 58 |         xf_padding=xf_padding,
 59 |         in_channels=3,
 60 |         model_channels=num_channels,
 61 |         out_channels=6,
 62 |         num_res_blocks=num_res_blocks,
 63 |         attention_resolutions=attention_resolutions,
 64 |         dropout=dropout,
 65 |         channel_mult=channel_mult,
 66 |         use_fp16=use_fp16,
 67 |         num_heads=num_heads,
 68 |         num_head_channels=num_head_channels,
 69 |         num_heads_upsample=num_heads_upsample,
 70 |         use_scale_shift_norm=use_scale_shift_norm,
 71 |         resblock_updown=resblock_updown,
 72 |         cache_text_emb=cache_text_emb,
 73 |         dtype=dtype
 74 |     )
 75 |     return net
 76 | 
 77 | 
 78 | def create_upsample_model(
 79 |     image_size,
 80 |     num_channels,
 81 |     num_res_blocks,
 82 |     channel_mult,
 83 |     num_heads,
 84 |     num_head_channels,
 85 |     num_heads_upsample,
 86 |     attention_resolutions,
 87 |     dropout,
 88 |     text_ctx,
 89 |     xf_width,
 90 |     xf_layers,
 91 |     xf_heads,
 92 |     xf_final_ln,
 93 |     n_vocab,
 94 |     xf_padding,
 95 |     diffusion_steps,
 96 |     noise_schedule,
 97 |     timestep_respacing,
 98 |     use_scale_shift_norm,
 99 |     resblock_updown,
100 |     use_fp16,
101 |     cache_text_emb,
102 |     inpaint,
103 |     super_res,
104 |     chinese,
105 |     sketch,
106 |     class_balanced,
107 |     sketch_classes,
108 |     dtype):
109 |     print("super res net")
110 |     net = SuperResText2ImUNet(
111 |         image_size=image_size,
112 |         text_ctx=text_ctx,
113 |         xf_width=xf_width,
114 |         xf_layers=xf_layers,
115 |         xf_heads=xf_heads,
116 |         xf_final_ln=xf_final_ln,
117 |         n_vocab=n_vocab,
118 |         xf_padding=xf_padding,
119 |         in_channels=6,
120 |         model_channels=num_channels,
121 |         out_channels=6,
122 |         num_res_blocks=num_res_blocks,
123 |         attention_resolutions=attention_resolutions,
124 |         dropout=dropout,
125 |         channel_mult=channel_mult,
126 |         use_fp16=use_fp16,
127 |         num_heads=num_heads,
128 |         num_head_channels=num_head_channels,
129 |         num_heads_upsample=num_heads_upsample,
130 |         use_scale_shift_norm=use_scale_shift_norm,
131 |         resblock_updown=resblock_updown,
132 |         cache_text_emb=cache_text_emb,
133 |         dtype=dtype
134 |     )
135 |     return net
136 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/tokenizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/Taichu-GLIDE/model/glide_text2im/tokenizer/__init__.py


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/tokenizer/caption_to_tokens.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | import mindspore
17 | 
18 | import model.glide_text2im.train.image_datasets as data_reader
19 | 
20 | 
21 | def convert_input_to_token_gen(input_line, pics_generated, text_ctx, tokenizer):
22 |     tokens, mask = encode_and_pad(input_line, text_ctx, tokenizer)
23 |     uncond_tokens, uncond_mask = tokenizer.padded_tokens_and_mask([], text_ctx)
24 |     return (
25 |         mindspore.Tensor([tokens] * pics_generated + [uncond_tokens] * pics_generated, dtype=mindspore.int32),
26 |         mindspore.Tensor([mask] * pics_generated + [uncond_mask] * pics_generated, dtype=mindspore.int32)
27 |     )
28 | 
29 | 
30 | def convert_input_to_token_super_res(input_line, pics_generated, text_ctx, tokenizer):
31 |     tokens, mask = encode_and_pad(input_line, text_ctx, tokenizer)
32 |     tokens = mindspore.Tensor([tokens] * pics_generated, dtype=mindspore.int32)
33 |     mask = mindspore.Tensor([mask] * pics_generated, dtype=mindspore.int32)
34 |     return tokens, mask
35 | 
36 | 
37 | def encode_and_pad(input_line, text_ctx, tokenizer):
38 |     # Pack the tokens together into model kwargs.
39 |     tokens = tokenizer.encode(input_line)
40 |     tokens, mask = tokenizer.padded_tokens_and_mask(tokens, text_ctx)  # text_ctx 128
41 |     return tokens, mask
42 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/tokenizer/chinese_tokenizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | 
 16 | import json
 17 | import os
 18 | import sentencepiece as spm
 19 | from typing import List, Tuple
 20 | 
 21 | 
 22 | def get_pairs(word):
 23 |     pairs = set()
 24 |     prev_char = word[0]
 25 |     for char in word[1:]:
 26 |         pairs.add((prev_char, char))
 27 |         prev_char = char
 28 |     return pairs
 29 | 
 30 | 
 31 | class Encoder:
 32 |     def __init__(self, encoder, bpe_merges):
 33 |         self.encoder = encoder
 34 |         self.decoder = {v: k for k, v in self.encoder.items()}
 35 |         self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
 36 |         self.cache = {}
 37 |         self.max_len = 0
 38 | 
 39 |     def bpe(self, token):
 40 |         if token in self.cache:
 41 |             return self.cache[token]
 42 |         word = tuple(token)
 43 |         pairs = get_pairs(word)
 44 |         if not pairs:
 45 |             return token
 46 | 
 47 |         while True:
 48 |             bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf')))
 49 |             if bigram not in self.bpe_ranks:
 50 |                 break
 51 |             first, second = bigram
 52 |             new_word = []
 53 |             i = 0
 54 |             while i < len(word):
 55 |                 try:
 56 |                     j = word.index(first, i)
 57 |                     new_word.extend(word[i:j])
 58 |                     i = j
 59 |                 except:
 60 |                     new_word.extend(word[i:])
 61 |                     break
 62 | 
 63 |                 if word[i] == first and i < len(word) - 1 and word[i + 1] == second:
 64 |                     new_word.append(first + second)
 65 |                     i += 2
 66 |                 else:
 67 |                     new_word.append(word[i])
 68 |                     i += 1
 69 |             new_word = tuple(new_word)
 70 |             word = new_word
 71 |             if len(word) == 1:
 72 |                 break
 73 |             else:
 74 |                 pairs = get_pairs(word)
 75 |         word = ' '.join(word)
 76 |         self.cache[token] = word
 77 |         return word
 78 | 
 79 |     def encode(self, text):
 80 |         return [self.encoder.get(token, 1) for token in self.tokenize(text)]
 81 | 
 82 |     def decode(self, tokens):
 83 |         text = ''.join([self.decoder[token] for token in tokens])
 84 |         return text
 85 | 
 86 |     def tokenize(self, text):
 87 |         bpe_tokens = []
 88 |         bpe_tokens.extend(bpe_token for bpe_token in self.bpe(text).split(' '))
 89 |         return bpe_tokens
 90 | 
 91 |     def convert_tokens_to_ids(self, tokens):
 92 |         return [self.encoder.get(token, 1) for token in tokens]
 93 | 
 94 | 
 95 | class Encoder_SP:
 96 |     def __init__(self, model_path):
 97 |         self.sp = spm.SentencePieceProcessor()
 98 |         self.sp.Load(model_path)
 99 |         self.num_tokens = self.sp.vocab_size()
100 | 
101 |     @property
102 |     def n_vocab(self) -> int:
103 |         # +1 for end token
104 |         return self.num_tokens + 1
105 | 
106 |     @property
107 |     def end_token(self) -> int:
108 |         return self.n_vocab - 1
109 | 
110 |     def padded_tokens_and_mask(
111 |             self, tokens: List[int], text_ctx: int
112 |     ) -> Tuple[List[int], List[bool]]:
113 |         tokens = tokens[:text_ctx]
114 |         padding = text_ctx - len(tokens)
115 |         padded_tokens = tokens + [self.end_token] * padding
116 |         mask = [True] * len(tokens) + [False] * padding
117 |         return padded_tokens, mask
118 | 
119 |     def encode(self, text):
120 |         """
121 |         text="...."
122 |         """
123 |         return self.sp.EncodeAsIds(text)
124 | 
125 |     def decode(self, tokens):
126 |         """
127 |         tokens=[x1,x2,...]
128 |         """
129 |         text = [int(token) for token in tokens if int(token) != self.end_token]
130 |         return self.sp.DecodeIds(text)
131 | 
132 |     def tokenize(self, text):
133 |         return self.sp.EncodeAsPieces(text)
134 | 
135 |     def convert_tokens_to_ids(self, tokens):
136 |         return [self.sp.PieceToId(token) for token in tokens]
137 | 
138 |     def convert_token_to_id(self, token):
139 |         return self.sp.PieceToId(token)
140 | 
141 |     def convert_id_to_token(self, idx):
142 |         return self.sp.IdToPiece(idx)
143 | 
144 | 
145 | def get_encoder(encoder_file, bpe_file):
146 |     filepath, filename = os.path.split(encoder_file)
147 |     shotname, extension = os.path.splitext(filename)
148 | 
149 |     if (".model" == extension) and (bpe_file == ""):
150 |         return Encoder_SP(encoder_file)
151 |     else:
152 |         with open(encoder_file, 'r', encoding="utf-8") as f:
153 |             encoder = json.load(f)
154 |         with open(bpe_file, 'r', encoding="utf-8") as f:
155 |             bpe_data = f.read()
156 |         bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1]]
157 |         return Encoder(
158 |             encoder=encoder,
159 |             bpe_merges=bpe_merges,
160 |         )
161 | 
162 | 
163 | def from_pretrained(file):
164 |     return get_encoder(file, "")


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/Taichu-GLIDE/model/glide_text2im/train/__init__.py


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/train/build_optimizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 |  build optimizer for ms
17 |  for params containing the words 'layernorm' and not containing 'bias', we choose the adam.
18 |  for the other params, they are optimized by adam.
19 | """
20 | from mindspore.nn.optim.adam import Adam, AdamWeightDecay
21 | 
22 | 
23 | def build_optimizer(model, optim, betas, lr):
24 |     """
25 | 
26 |     :param model:
27 |     :param opts:
28 |     :param lr:
29 |     :return: optimizer
30 |     """
31 | 
32 |     decay_filter = lambda x: 'layernorm' not in x.name.lower() and "bias" not in x.name.lower()
33 |     param_optimizer = model.trainable_params()
34 |     decay_params = list(filter(decay_filter, param_optimizer))
35 |     other_params = list(filter(lambda x: not decay_filter(x), param_optimizer))
36 |     group_params = [{
37 |         'params': decay_params,
38 |         'weight_decay': 1e-2
39 |     }, {
40 |         'params': other_params,
41 |         'weight_decay': 0.0
42 |     }, {
43 |         'order_params': param_optimizer
44 |     }]
45 |     # currently Adam only
46 |     if optim == 'adam':
47 |         OptimCls = Adam
48 |     elif optim == 'adamw':
49 |         OptimCls = AdamWeightDecay
50 |     else:
51 |         raise ValueError('invalid optimizer')
52 |     optimizer = OptimCls(group_params,
53 |                          learning_rate=lr, beta1=betas[0], beta2=betas[1])
54 |     return optimizer
55 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/train/config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | MAX_TEXT_LEN = 48
17 | MAX_FULL_TEXT_LEN = 50
18 | MAX_IMG_LEN = 197 #448
19 | MAX_AUDIO_LEN = 50
20 | MAX_FULL_LEN = 297 #448
21 | MAX_DEFAULT_LEN = 50
22 | 
23 | MAX_IMG_TEXT_LEN = 247 #448
24 | 
25 | IMG_TOKEN_SIZE = 8192
26 | IMG_TOKEN_LEN = 64
27 | 
28 | MAX_TEXT_GTS_LEN = 29
29 | MAX_IMG_GTS_LEN = 63
30 | 
31 | MAX_MEL_LEN = 1289
32 | MAX_SRC_LEN = 89
33 | 
34 | IMG_DIM = 768
35 | AUDIO_DIM = 1024
36 | 
37 | IMG_LABEL_DIM = 1601
38 | AUDIO_LABEL_DIM = 1600
39 | 
40 | MASK_SIZE=2
41 | N_NEGATIVES=10
42 | IMG_PATCH_SIZE=448
43 | 
44 | MAX_TIME_STEPS=128


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/train/config.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   name: 
 3 |   num_hiddens: 256
 4 |   num_residual_layers: 4
 5 |   num_residual_hiddens: 256
 6 | 
 7 |   downsample: 8
 8 |   embedding_dim: 256
 9 |   num_embeddings: 8192
10 |   commitment_cost: 0.25
11 |   decay: 0.99
12 | 
13 | dataset:
14 |   name: Custom
15 |   img_size: 256
16 |   batchsize: 16
17 |   buffersize: 5
18 |   repeatsize: 1
19 |   num_workers: 2
20 |   data_dir: ./datasets/txt2img/mscoco/images
21 |   train_ids_path: ./datasets/txt2img/mscoco/cocodata_zh/COCO_trainids_vqvae.json
22 |   valid_ids_path: ./datasets/txt2img/mscoco/cocodata_zh/COCO_validids_vqvae.json
23 | 
24 | loss:
25 |   name: nMSE
26 | 
27 | train:
28 |   pretrain: ~
29 |   ckpt: ~
30 |   std_out: True
31 | 
32 |   lr: 0.0012
33 |   num_epochs: 10
34 |   num_workers: 2
35 |   sink_size: 1000
36 |   loss_scale: 4096
37 |   optimize: ADAM
38 |   keep_batchnorm_fp32: True
39 |   exp_name: VQVAEwBN_MSCOCO
40 |   exp_path: experiments/
41 | 
42 |   num_log_steps: 10
43 |   num_save_steps: 20000
44 |   num_test_steps: 5000
45 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/train/data_loader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """ dataloader """
16 | 
17 | import os
18 | 
19 | 
20 | class DataLoader:
21 |     """ DataLoader """
22 | 
23 |     def __init__(self, dataset, batch_sampler, collate_fn, is_train=True, device_num=256, drop_last=True):
24 |         self.dataset = dataset
25 |         self.batch_sampler = batch_sampler
26 |         self.collat_fn = collate_fn
27 |         self.device_num = device_num
28 |         rank_id_str = os.getenv('RANK_ID', '0')
29 |         self.rank_id = int(rank_id_str[rank_id_str.rfind('-') + 1:])  # 'RANK_ID': 'job24535502-job-facereidtome-hn-0/1'
30 |         self.is_train = is_train
31 |         self.drop_last = drop_last
32 |         self.batch_size = len(next(iter(self.batch_sampler)))
33 | 
34 |     def __iter__(self):
35 |         self.step_index = 0
36 |         self.batch_indices = iter(self.batch_sampler)
37 | 
38 |         return self
39 | 
40 |     def __next__(self):
41 |         
42 |         if self.is_train:
43 |             try:
44 |                 indices = next(self.batch_indices)
45 |                 if len(indices) != self.batch_size and self.drop_last:
46 |                     return self.__next__()
47 |             except StopIteration:
48 |                 self.batch_indices = iter(self.batch_sampler)
49 |                 indices = next(self.batch_indices)
50 |             data = []
51 |             per_batch = len(indices) // self.device_num
52 |             index = indices[self.rank_id * per_batch:(self.rank_id + 1) * per_batch]
53 |             for idx in index:
54 |                 data.append(self.dataset[idx])
55 | 
56 |             data = self.collat_fn(data)
57 |             return data
58 |         else:
59 |             indices = next(self.batch_indices)
60 |             data = []
61 |             per_batch = len(indices) // self.device_num
62 |             index = indices[self.rank_id * per_batch:(self.rank_id + 1) * per_batch]
63 |             for idx in index:
64 |                 data.append(self.dataset[idx])
65 | 
66 |             data = self.collat_fn(data)
67 | 
68 |             return data
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/train/generator.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """ generator """
16 | from collections import defaultdict
17 | import numpy as np
18 | from mindspore import Tensor
19 | 
20 | data_column = [
21 |     'input_ids',
22 |     'input_mask',
23 |     'img',
24 |     't',
25 |     'weights'
26 | ]
27 | 
28 | data_column_supres = [
29 |     'input_ids',
30 |     'input_mask',
31 |     'img',
32 |     't',
33 |     'weights',
34 |     'low_res'
35 | ]
36 | 
37 | data_column_audio = [
38 |     'input_ids',
39 |     'position_ids',
40 |     'attention_mask',
41 |     'mel_targets',
42 |     'duration_targets',
43 |     'speakers',
44 |     'texts',
45 |     'src_lens',
46 |     'mel_lens',
47 |     'audio_max_text_len',
48 |     'audio_max_mel_len',
49 |     'pitch_targets',
50 |     'energy_targets'
51 | ]
52 | 
53 | task2id = {
54 |     'mlmThree': 0,
55 |     'mrcThree': 1,
56 |     'mrfrThree': 2,
57 |     'mafrThree': 3,
58 |     'macThree': 4,
59 |     "itmThree": 5,
60 |     'mrctThree': 6,
61 |     "tdThree": 7,
62 |     "idThree": 8,
63 |     "adThree": 9,
64 |     "ret": 10,
65 |     "ftRet": 11
66 | }
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/train/resample.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | from abc import ABC, abstractmethod
17 | import numpy as np
18 | 
19 | 
20 | def create_named_schedule_sampler(name,timesteps):
21 |     """
22 |     Create a ScheduleSampler from a library of pre-defined samplers.
23 | 
24 |     :param name: the name of the sampler.
25 |     :param diffusion: the diffusion object to sample for.
26 |     """
27 | 
28 |     if name == "uniform":
29 |         return UniformSampler(timesteps)
30 |     else:
31 |         raise NotImplementedError(f"unknown schedule sampler: {name}")
32 | 
33 | 
34 | class ScheduleSampler(ABC):
35 |     """
36 |     A distribution over timesteps in the diffusion process, intended to reduce
37 |     variance of the objective.
38 | 
39 |     By default, samplers perform unbiased importance sampling, in which the
40 |     objective's mean is unchanged.
41 |     However, subclasses may override sample() to change how the resampled
42 |     terms are reweighted, allowing for actual changes in the objective.
43 |     """
44 | 
45 |     @abstractmethod
46 |     def weights(self):
47 |         """
48 |         Get a numpy array of weights, one per diffusion step.
49 | 
50 |         The weights needn't be normalized, but must be positive.
51 |         """
52 | 
53 |     def sample(self, batch_size):
54 |         """
55 |         Importance-sample timesteps for a batch.
56 | 
57 |         :param batch_size: the number of timesteps.
58 |         :param device: the torch device to save to.
59 |         :return: a tuple (timesteps, weights):
60 |                  - timesteps: a tensor of timestep indices.
61 |                  - weights: a tensor of weights to scale the resulting losses.
62 |         """
63 |         w = self.weights()
64 |         p = w / np.sum(w)
65 |         indices = np.random.choice(len(p), size=(batch_size,), p=p)
66 |         weights = 1 / (len(p) * p[indices])
67 |         return indices, weights
68 | 
69 | 
70 | class UniformSampler(ScheduleSampler):
71 |     def __init__(self, timesteps):
72 |         #self.diffusion = diffusion
73 |         self._weights = np.ones([timesteps])
74 | 
75 |     def weights(self):
76 |         return self._weights
77 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/train/sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | sampler for length bucketing (batch by tokens)
17 | """
18 | import random
19 | import gc
20 | 
21 | class EasySampler:
22 |     """
23 |         Sampler for token bucket path
24 |     """
25 | 
26 |     def __init__(self, dataset, batch_size, device_num=1):
27 |         self.dataset = dataset
28 |         self.per_batch = batch_size * device_num
29 | 
30 |     def _create_ids(self):
31 |         return list(range(len(self.dataset)))
32 | 
33 |     def __iter__(self):
34 |         ids = self._create_ids()
35 |         random.shuffle(ids)
36 |         batches = [ids[i:i + self.per_batch] for i in range(0, len(ids) - self.per_batch, self.per_batch)]
37 |         return iter(batches)
38 | 
39 |     def __len__(self):
40 |         raise ValueError("NOT supported. "
41 |                          "This has some randomness across epochs")
42 | 
43 | 
44 | class BatchSampler:
45 |     """
46 |         Batch Sampler
47 |     """
48 | 
49 |     def __init__(self, lens, batch_size, device_num):
50 |         self._lens = lens
51 |         self._batch_size = batch_size * device_num
52 | 
53 |     def _create_ids(self):
54 |         return list(range(self._lens))
55 | 
56 |     def __iter__(self):
57 |         ids = self._create_ids()
58 |         batches = [ids[i:i + self._batch_size] for i in range(0, len(ids), self._batch_size)]
59 |         gc.collect()
60 |         return iter(batches)
61 | 
62 |     def __len__(self):
63 |         raise ValueError("NOT supported. "
64 |                          "This has some randomness across epochs")
65 | 
66 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/train/t2ids.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | """
 16 | TextToImage Datasets
 17 | """
 18 | from toolz.sandbox import unzip
 19 | import os
 20 | import json
 21 | import numpy as np
 22 | import mindspore.dataset.vision.c_transforms as C
 23 | from mindspore.dataset.vision.utils import Inter
 24 | from PIL import Image
 25 | 
 26 | 
 27 | def pad_tensors(tensors, lens=None, pad=0, max_len=50):
 28 |     """B x [T, ...]"""
 29 |     if lens is None:
 30 |         lens = [t.shape[0] for t in tensors]
 31 |     if max_len == -1:
 32 |         max_len = max(lens)
 33 |     bs = len(tensors)
 34 |     hid = tensors[0].shape[-1]
 35 |     dtype = tensors[0].dtype
 36 |     output = np.zeros((bs, max_len, hid), dtype=dtype)
 37 |     if pad:
 38 |         output.fill(pad)
 39 |     for i, (t, l) in enumerate(zip(tensors, lens)):
 40 |         output[i, :l, ...] = t
 41 |     return output
 42 | 
 43 | def pad_tensors_pos(tensors, lens, feat, max_len=50):
 44 |     """ pad_tensors_pos """
 45 |     if tensors is None or tensors[0] is None:
 46 |         return np.expand_dims(np.arange(0, feat.shape[1], dtype=np.int64), 0)
 47 |     return pad_tensors(tensors, lens, max_len=max_len)
 48 | 
 49 | def get_ids_three(ids_path):
 50 |     ids = json.load(open(ids_path))
 51 |     size, rank = get_size_rank()
 52 |     return ids[rank::size]
 53 | 
 54 | def get_size_rank():
 55 |     size, rank = 1, 0
 56 |     return size, rank
 57 | 
 58 | def pad_sequence(sequences, batch_first=True, padding_value=0.0, max_lens=50):
 59 |     """pad_sequence"""
 60 |     lens = [len(x) for x in sequences]
 61 |     if max_lens == -1:
 62 |         max_lens = max(lens)
 63 | 
 64 |     padded_seq = []
 65 |     for x in sequences:
 66 |         pad_width = [(0, max_lens - len(x))]
 67 |         padded_seq.append(np.pad(x, pad_width, constant_values=(padding_value, padding_value)))
 68 | 
 69 |     sequences = np.stack(padded_seq, axis=0 if batch_first else 1)
 70 |     return sequences
 71 | 
 72 | 
 73 | def pad_sequence_(sequences, batch_first=False, padding_value=0.0, max_lens=50):
 74 |     """pad_sequence"""
 75 |     if sequences[0] is None:
 76 |         return None
 77 |     return pad_sequence(sequences, batch_first, padding_value, max_lens)
 78 | 
 79 | def t2i_collate(inputs):
 80 |     """
 81 |     Return:
 82 |     :input_ids    (n, max_L) padded with 0
 83 |     :position_ids (n, max_L) padded with 0
 84 |     :txt_lens     list of [txt_len]
 85 |     :img_feat     (n, max_num_bb, feat_dim)
 86 |     :img_pos_feat (n, max_num_bb, 7)
 87 |     :num_bbs      list of [num_bb]
 88 |     :attn_masks   (n, max_{L + num_bb}) padded with 0
 89 |     :txt_labels   (n, max_L) padded with -1
 90 |     :audio_feat   (n, audio_size, audio_dim)
 91 |     """
 92 |     img_feat, input_ids, input_mask, t, weights = map(list, unzip(inputs))
 93 | 
 94 |  
 95 |     batch = {
 96 |         'input_ids': input_ids,
 97 |         #'position_ids': position_ids,
 98 |         'input_mask': input_mask,
 99 |         'img_feat': img_feat,
100 |         't': t,
101 |         'weights': weights
102 |     }
103 |     return batch
104 | 
105 | def t2i_collate_supres(inputs):
106 |     """
107 |     Return:
108 |     datas
109 |     """
110 |     img_feat, input_ids, input_mask, t, weights, low_res = map(list, unzip(inputs))
111 |     
112 | 
113 |  
114 |     batch = {
115 |         'input_ids': input_ids,
116 |         'input_mask': input_mask,
117 |         'img_feat': img_feat,
118 |         't': t,
119 |         'weights': weights,
120 |         'low_res': low_res,
121 |     }
122 |     return batch


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_text2im/train/train_util.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import functools
  3 | 
  4 | import blobfile as bf
  5 | from PIL import Image
  6 | 
  7 | 
  8 | def save_images(batch: th.Tensor, path: str):
  9 |     """ Display a batch of images inline. """
 10 |     scaled = ((batch + 1) * 127.5).round().clamp(0, 255).to(th.uint8).cpu()
 11 |     reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
 12 |     Image.fromarray(reshaped.numpy()).save(path, quality=100)
 13 | 
 14 | # For ImageNet experiments, this was a good default value.
 15 | # We found that the lg_loss_scale quickly climbed to
 16 | # 20-21 within the first ~1K steps of training.
 17 | INITIAL_LOG_LOSS_SCALE = 20.0
 18 | 
 19 | class TrainLoop:
 20 |     def __init__(
 21 |         self,
 22 |         *,
 23 |         model,
 24 |         diffusion,
 25 |         data,
 26 |         batch_size,
 27 |         microbatch,
 28 |         lr,
 29 |         ema_rate,
 30 |         log_interval,
 31 |         save_interval,
 32 |         resume_checkpoint,
 33 |         use_fp16=False,
 34 |         fp16_scale_growth=1e-3,
 35 |         schedule_sampler=None,
 36 |         weight_decay=0.0,
 37 |         lr_anneal_steps=0,
 38 |     ):
 39 |     self.model = model
 40 |     self.diffusion = diffusion
 41 |         self.data = data
 42 |         self.batch_size = batch_size
 43 | 
 44 |         self.microbatch = microbatch if microbatch > 0 else batch_size
 45 |         self.lr = lr
 46 | 
 47 |         self.ema_rate = (
 48 |             [ema_rate]
 49 |             if isinstance(ema_rate, float)
 50 |             else [float(x) for x in ema_rate.split(",")]
 51 |         )
 52 | 
 53 |         self.log_interval = log_interval
 54 |         self.save_interval = save_interval
 55 |         self.resume_checkpoint = resume_checkpoint
 56 | 
 57 |         self.use_fp16 = use_fp16
 58 |         self.fp16_scale_growth = fp16_scale_growth
 59 |         self.schedule_sampler = schedule_sampler or UniformSampler(diffusion)
 60 | 
 61 |         self.weight_decay = weight_decay
 62 |         self.lr_anneal_steps = lr_anneal_steps
 63 | 
 64 |         self.step = 0
 65 |         self.resume_step = 0
 66 |         self.global_batch = self.batch_size
 67 |         
 68 | 
 69 |         self.sync_cuda = th.cuda.is_available()
 70 | 
 71 |         self._load_and_sync_parameters()
 72 |         self.mp_trainer = MixedPrecisionTrainer(
 73 |             model=self.model,
 74 |             use_fp16=self.use_fp16,
 75 |             fp16_scale_growth=fp16_scale_growth,
 76 |         )
 77 | 
 78 |         self.opt = AdamW(
 79 |             self.mp_trainer.master_params, lr=self.lr, weight_decay=self.weight_decay
 80 |         )
 81 | 
 82 |         if self.resume_step:
 83 |             self._load_optimizer_state()
 84 |             # Model was resumed, either due to a restart or a checkpoint
 85 |             # being specified at the command line.
 86 |             self.ema_params = [
 87 |                 self._load_ema_parameters(rate) for rate in self.ema_rate
 88 |             ]
 89 |         else:
 90 |             self.ema_params = [
 91 |                 copy.deepcopy(self.mp_trainer.master_params)
 92 |                 for _ in range(len(self.ema_rate))
 93 |             ]
 94 | 
 95 |         if th.cuda.is_available():
 96 |             self.use_ddp = True
 97 |             self.ddp_model = DDP(
 98 |                 self.model,
 99 |                 device_ids=[dist_util.dev()],
100 |                 output_device=dist_util.dev(),
101 |                 broadcast_buffers=False,
102 |                 bucket_cap_mb=128,
103 |                 find_unused_parameters=False,
104 |             )
105 |         else:
106 |             self.use_ddp = False
107 |             self.ddp_model = self.model
108 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_utils/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | curPath = os.path.abspath(os.path.dirname(__file__))
4 | sys.path.append(curPath)
5 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_utils/callbackConfig.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | import mindspore as ms
17 | 
18 | class StopAtStep(ms.Callback):
19 |     def __init__(self, start_step, stop_step, profiler):
20 |         super(StopAtStep, self).__init__()
21 |         self.start_step = start_step
22 |         self.stop_step = stop_step
23 |         self.profiler = profiler
24 |     def step_begin(self, run_context):
25 |         cb_params = run_context.original_args()
26 |         step_num = cb_params.cur_step_num
27 |         if step_num == self.start_step:
28 |             self.profiler.start()
29 |     def step_end(self, run_context):
30 |         cb_params = run_context.original_args()
31 |         step_num = cb_params.cur_step_num
32 |         if step_num == self.stop_step:
33 |             self.profiler.stop()
34 |     def end(self, run_context):
35 |         self.profiler.analyse()
36 | 
37 | class StopAtEpoch(ms.Callback):
38 |     def __init__(self, start_epoch, stop_epoch, profiler):
39 |         super(StopAtEpoch, self).__init__()
40 |         self.start_epoch = start_epoch
41 |         self.stop_epoch = stop_epoch
42 |         self.profiler = profiler
43 |     def epoch_begin(self, run_context):
44 |         cb_params = run_context.original_args()
45 |         epoch_num = cb_params.cur_epoch_num
46 |         if epoch_num == self.start_epoch:
47 |             self.profiler.start()
48 |     def epoch_end(self, run_context):
49 |         cb_params = run_context.original_args()
50 |         epoch_num = cb_params.cur_epoch_num
51 |         if epoch_num == self.stop_epoch:
52 |             self.profiler.stop()
53 |     def end(self, run_context):
54 |         self.profiler.analyse()
55 | 
56 | 
57 | ##moxing callback
58 | class UploadObs(ms.Callback):
59 |     def __init__(self, ckpt_dir, upload_url, ckpt_prefix="") -> None:
60 |         super(UploadObs, self).__init__()
61 |         self.ckpt_dir = ckpt_dir
62 |         self.upload_url = upload_url
63 |         self.ckpt_prefix = ckpt_prefix
64 | 
65 |     def epoch_end(self, run_context):
66 |         cb_params = run_context.original_args()
67 |         print("cb_params", cb_params)
68 |         cur_epoch_num = cb_params.get("cur_epoch_num", 1)
69 |         cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
70 |         ckpt_name = self.ckpt_prefix + "-" + str(cur_epoch_num) + "_" + str(cur_step_in_epoch) + ".ckpt"
71 |         ckpt_path = os.path.join(self.ckpt_dir, ckpt_name)
72 |         moxing.file.copy(ckpt_path, self.upload_url)
73 | 
74 | 
75 | class GetParametersEpoch(ms.Callback):
76 |     def __init__(self) -> None:
77 |         super(GetParametersEpoch, self).__init__()
78 |         
79 |     
80 |     def epoch_end(self, run_context):
81 |         cb_params = run_context.original_args()
82 |         train_net = cb_params.get("train_net")
83 | 
84 | class OverflowMonitor(ms.Callback):
85 |     def step_end(self, run_context):
86 |         cb_params = run_context.original_args()
87 |         cur_epoch_num = cb_params.get("cur_epoch_num", 1)
88 |         cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
89 |         overflow = cb_params.net_outputs[1]
90 |         if overflow:
91 |             print(f"overflow detected in epoch {cur_epoch_num} step {cur_step_in_epoch}")
92 |         return super().step_end(run_context)


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_utils/img_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | import mindspore
17 | from typing import Tuple
18 | import numpy as np
19 | from PIL import Image
20 | from IPython.display import display
21 | 
22 | 
23 | def read_image(path: str, size: int = 256) -> Tuple[mindspore.Tensor, mindspore.Tensor]:
24 |     pil_img = Image.open(path).convert('RGB')
25 |     pil_img = pil_img.resize((size, size), resample=Image.BICUBIC)
26 |     img = np.array(pil_img)
27 |     # print("img.shape", img.shape) [64, 64, 3]
28 |     dimmed = mindspore.Tensor(img)[None]
29 |     reshaped = dimmed.transpose((0, 3, 1, 2))
30 |     reshaped = mindspore.ops.Cast()(reshaped, mindspore.float32)
31 |     scaled = mindspore.ops.Add()(mindspore.ops.Div()(reshaped, 127.5), -1)
32 |     return scaled
33 | 
34 | 
35 | def get_img(batch: mindspore.Tensor):
36 |     batch_plus = mindspore.ops.Add()(batch, 1)
37 |     scaled = mindspore.ops.Mul()(batch_plus, 127.5)
38 |     rounded_scaled = mindspore.ops.Rint()(scaled)
39 |     clipped_scaled = mindspore.ops.clip_by_value(rounded_scaled, mindspore.Tensor(0), mindspore.Tensor(255))
40 |     clipped_scaled = clipped_scaled.transpose((2, 0, 3, 1))
41 |     clipped_scaled = mindspore.ops.Cast()(clipped_scaled, mindspore.uint8)
42 |     reshaped = clipped_scaled.reshape(([batch.shape[2], -1, 3]))
43 |     return reshaped
44 | 
45 | 
46 | def show_images(batch: mindspore.Tensor):
47 |     """ Display a batch of images inline. """
48 |     display(Image.fromarray(get_img(batch).asnumpy()))
49 | 
50 | 
51 | def save_images(batch: mindspore.Tensor, path: str):
52 |     """ Display a batch of images inline. """
53 |     batch_32 = mindspore.ops.Cast()(batch, mindspore.float32)
54 |     Image.fromarray(get_img(batch_32).asnumpy()).save(path, quality=100, subsampling=0)
55 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_utils/moxing_adapter.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | 
 16 | """Moxing adapter for ModelArts"""
 17 | 
 18 | import os
 19 | import functools
 20 | from mindspore import context
 21 | from mindspore.profiler import Profiler
 22 | from src.model_utils.config import config
 23 | 
 24 | _global_sync_count = 0
 25 | 
 26 | def get_device_id():
 27 |     device_id = os.getenv('DEVICE_ID', '0')
 28 |     return int(device_id)
 29 | 
 30 | 
 31 | def get_device_num():
 32 |     device_num = os.getenv('RANK_SIZE', '1')
 33 |     return int(device_num)
 34 | 
 35 | 
 36 | def get_rank_id():
 37 |     global_rank_id = os.getenv('RANK_ID', '0')
 38 |     return int(global_rank_id)
 39 | 
 40 | 
 41 | def get_job_id():
 42 |     job_id = os.getenv('JOB_ID')
 43 |     job_id = job_id if job_id != "" else "default"
 44 |     return job_id
 45 | 
 46 | def sync_data(from_path, to_path):
 47 |     """
 48 |     Download data from remote obs to local directory if the first url is remote url and the second one is local path
 49 |     Upload data from local directory to remote obs in contrast.
 50 |     """
 51 |     import moxing as mox
 52 |     import time
 53 |     global _global_sync_count
 54 |     sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count)
 55 |     _global_sync_count += 1
 56 | 
 57 |     # Each server contains 8 devices as most.
 58 |     if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
 59 |         print("from path: ", from_path)
 60 |         print("to path: ", to_path)
 61 |         mox.file.copy_parallel(from_path, to_path)
 62 |         print("===finish data synchronization===")
 63 |         try:
 64 |             os.mknod(sync_lock)
 65 |             # print("os.mknod({}) success".format(sync_lock))
 66 |         except IOError:
 67 |             pass
 68 |         print("===save flag===")
 69 | 
 70 |     while True:
 71 |         if os.path.exists(sync_lock):
 72 |             break
 73 |         time.sleep(1)
 74 | 
 75 |     print("Finish sync data from {} to {}.".format(from_path, to_path))
 76 | 
 77 | 
 78 | def moxing_wrapper(pre_process=None, post_process=None):
 79 |     """
 80 |     Moxing wrapper to download dataset and upload outputs.
 81 |     """
 82 |     def wrapper(run_func):
 83 |         @functools.wraps(run_func)
 84 |         def wrapped_func(*args, **kwargs):
 85 |             # Download data from data_url
 86 |             if config.enable_modelarts:
 87 |                 if config.data_url:
 88 |                     sync_data(config.data_url, config.data_path)
 89 |                     print("Dataset downloaded: ", os.listdir(config.data_path))
 90 |                 if config.checkpoint_url:
 91 |                     sync_data(config.checkpoint_url, config.load_path)
 92 |                     print("Preload downloaded: ", os.listdir(config.load_path))
 93 |                 if config.train_url:
 94 |                     sync_data(config.train_url, config.output_path)
 95 |                     print("Workspace downloaded: ", os.listdir(config.output_path))
 96 | 
 97 |                 context.set_context(save_graphs_path=os.path.join(config.output_path, str(get_rank_id())))
 98 |                 config.device_num = get_device_num()
 99 |                 config.device_id = get_device_id()
100 |                 if not os.path.exists(config.output_path):
101 |                     os.makedirs(config.output_path)
102 | 
103 |                 if pre_process:
104 |                     pre_process()
105 | 
106 |             if config.enable_profiling:
107 |                 profiler = Profiler()
108 | 
109 |             run_func(*args, **kwargs)
110 | 
111 |             if config.enable_profiling:
112 |                 profiler.analyse()
113 | 
114 |             # Upload data to train_url
115 |             if config.enable_modelarts:
116 |                 if post_process:
117 |                     post_process()
118 | 
119 |                 if config.train_url:
120 |                     print("Start to copy output directory")
121 |                     sync_data(config.output_path, config.train_url)
122 |         return wrapped_func
123 |     return wrapper
124 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model/glide_utils/parallelConfig.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | import numpy as np
17 | import mindspore.common.dtype as mstype
18 | from mindspore import context
19 | from mindspore import nn
20 | from mindspore._checkparam import Validator
21 | from mindspore._extends import cell_attr_register
22 | from mindspore.common.initializer import initializer
23 | from mindspore.common.parameter import Parameter
24 | from mindspore.common.seed import _get_graph_seed
25 | from mindspore.common.tensor import Tensor
26 | from mindspore.context import ParallelMode
27 | 
28 | 
29 | class ParallelConfig:
30 |     r"""
31 |         ParallelConfig for the setting the global data parallel, model parallel and fusion group.
32 |     """
33 |     dp = 8
34 |     mp = 1
35 |     pipeline_stage = 1
36 |     recompute = False
37 |     optimizer_shard = False
38 |     fusion_group = 1
39 |     parallel_mode = ParallelMode.SEMI_AUTO_PARALLEL
40 |     vocab_emb_dp = False
41 |     ep = dp
42 |     capacity_factor = 1.5
43 |     expert_num = 32
44 |     aux_loss_factor = 0.01
45 | 
46 |     @staticmethod
47 |     def set_global_parallel_config(dp=1,
48 |                                    mp=1,
49 |                                    recompute=True,
50 |                                    stages=1,
51 |                                    optimizer_shard=True,
52 |                                    fusion_group=4,
53 |                                    parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL,
54 |                                    vocab_emb_dp=True):
55 |         r"""
56 |         The parallel configure setting
57 | 
58 |         Args:
59 |             dp (int): The data parallel way. Default: 1
60 |             mp (int): The model parallel way. Default: 1
61 |             stages (int): The number of the pipeline stage. Should be a positive value. Default: 1.
62 |             optimizer_shard (bool): Enable optimizer state sharding or not. Default: True.
63 |             fusion_group (int): The fusion group size of the optimizer state sharding. Default: 4.
64 |             recompute (bool): Enable recomputation of the transformer block or not. Default: False.
65 |             parallel_mode (ParallelMode): Can be SEMI_AUTO_PARALLEL, DATA_AUTO_PARALLEL or AUTO_PARALLEL.
66 |             vocab_emb_dp (bool): Shard embedding in model parallel or data parallel. Default: True
67 | 
68 |         Supported Platforms:
69 |             ``Ascend`` ``GPU``
70 | 
71 |         Examples:
72 |             >>> ParallelConfig(dp=1, mp=1)
73 |             >>> ParallelConfig(stages=4)
74 |         """
75 |         ParallelConfig.dp = dp
76 |         ParallelConfig.mp = mp
77 |         ParallelConfig.pipeline_stage = stages
78 |         ParallelConfig.optimizer_shard = optimizer_shard
79 |         ParallelConfig.fusion_group = fusion_group
80 |         ParallelConfig.recompute = recompute
81 |         ParallelConfig.parallel_mode = parallel_mode
82 |         ParallelConfig.vocab_emb_dp = vocab_emb_dp


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model_configs/model_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "image_size":64,
 3 |     "num_channels":192,
 4 |     "num_res_blocks":3,
 5 |     "channel_mult":[1, 2, 3, 4],
 6 |     "num_heads":1,
 7 |     "num_head_channels":64,
 8 |     "num_heads_upsample":-1,
 9 |     "attention_resolutions":[2, 4, 8],
10 |     "dropout":0.9,
11 |     "text_ctx":128,
12 |     "xf_width":512,
13 |     "xf_layers":16,
14 |     "xf_heads":8,
15 |     "xf_final_ln":true,
16 |     "n_vocab":50001,
17 |     "xf_padding":true,
18 |     "diffusion_steps":1000,
19 |     "noise_schedule":"squaredcos_cap_v2",
20 |     "timestep_respacing":"60",
21 |     "use_scale_shift_norm":true,
22 |     "resblock_updown":true,
23 |     "use_fp16":true,
24 |     "cache_text_emb":false,
25 |     "inpaint":false,
26 |     "super_res":false,
27 |     "chinese":true,
28 |     "sketch":false,
29 |     "class_balanced":false,
30 |     "sketch_classes":0
31 | }


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/model_configs/supres_model_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "image_size":256,
 3 |     "num_channels":192,
 4 |     "num_res_blocks":2,
 5 |     "channel_mult":[1,1,2,2,4,4],
 6 |     "num_heads":1,
 7 |     "num_head_channels":64,
 8 |     "num_heads_upsample":-1,
 9 |     "attention_resolutions":[32, 16, 8],
10 |     "dropout":0.0,
11 |     "text_ctx":128,
12 |     "xf_width":512,
13 |     "xf_layers":16,
14 |     "xf_heads":8,
15 |     "xf_final_ln":true,
16 |     "n_vocab":50257,
17 |     "xf_padding":true,
18 |     "diffusion_steps":1000,
19 |     "noise_schedule":"linear",
20 |     "timestep_respacing":"fast27",
21 |     "use_scale_shift_norm":true,
22 |     "resblock_updown":true,
23 |     "use_fp16":true,
24 |     "cache_text_emb":false,
25 |     "inpaint":false,
26 |     "super_res":false,
27 |     "chinese":false,
28 |     "sketch":false,
29 |     "class_balanced":false,
30 |     "sketch_classes":0
31 | }


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/requirements.txt:
--------------------------------------------------------------------------------
1 | ipython
2 | regex
3 | sentencepiece
4 | blobfile
5 | toolz
6 | tqdm
7 | pathlib2


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/scripts/run_gen_finetune_dist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | output_dir="output/"
 4 | task_name="text2image_parallel"
 5 | 
 6 | if [ $# != 3 ]
 7 | then
 8 |     echo "Usage:
 9 |           bash scripts/train_caption_parallel.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [RANK_TABLE_FILE]"
10 |     exit 1
11 | fi
12 | 
13 | if [ $1 -lt 1 ] || [ $1 -gt 8 ]
14 | then
15 |     echo "error: DEVICE_NUM=$1 is not in [1,8]"
16 |     exit 1
17 | fi
18 | 
19 | VISIABLE_DEVICES=$2
20 | IFS="," read -r -a CANDIDATE_DEVICE <<< "$VISIABLE_DEVICES"
21 | if [ ${#CANDIDATE_DEVICE[@]} -ne $1 ]
22 | then
23 |     echo "error: DEVICE_NUM=$1 is not matched with VISIABLE_DEVICES=$2"
24 |     exit 1
25 | fi
26 | 
27 | if [ ! -f $3 ]
28 | then
29 |     echo "error: RANK_TABLE_FILE=$3 is not a file"
30 |     exit 1
31 | fi
32 | 
33 | export GLOG_v=3
34 | export ASCEND_GLOBAL_LOG_LEVEL=3
35 | export ASCEND_GLOBAL_EVENT_ENABLE=0
36 | export ASCEND_SLOG_PRINT_TO_STDOUT=1
37 | export HCCL_CONNECT_TIMEOUT=600
38 | 
39 | #ulimit -u unlimited
40 | ulimit -SHn 65535
41 | export DEVICE_NUM=$1
42 | export RANK_SIZE=$1
43 | RANK_TABLE_FILE=$(realpath $3)
44 | export RANK_TABLE_FILE
45 | echo "RANK_TABLE_FILE=${RANK_TABLE_FILE}"
46 | 
47 | rm -rf ${output_dir:?}/${task_name:?}
48 | mkdir -p ${output_dir:?}/${task_name:?}
49 | export MS_COMPILER_CACHE_PATH=${output_dir:?}/${task_name:?}
50 | export SERVER_ID=0
51 | rank_start=$((DEVICE_NUM * SERVER_ID))
52 | 
53 | for((i=0; i<${RANK_SIZE}; i++))
54 | do
55 |     export RANK_ID=$((rank_start + i))
56 |     export DEVICE_ID=${CANDIDATE_DEVICE[i]}
57 |     mkdir -p ${output_dir:?}/${task_name:?}/rank_$i
58 |     echo "start training for rank $RANK_ID, device $DEVICE_ID"
59 |     nohup python -u src/train_txt2img.py \
60 |     --data_path=/glide/dataset/ \
61 |     --output_path=/glide/output/ \
62 |     --pretrained_model_path=/glide/pretraind_models/ \
63 |     --is_chinese=True \
64 |     --use_parallel=True \
65 |     --pretrained_model="glide_gen.ckpt" \
66 |     --cog_model="cog-pretrain.model" \
67 |     --model_config=./model_configs/model_config.json \
68 |     --image_caption_path_file="image_caption_path_file.txt" \
69 |     --save_checkpoint_steps=1000 \
70 |     --batch_size=2 \
71 |     --epochs=10 \
72 |     --start_learning_rate=1e-4 \
73 |     --end_learning_rate=1e-9 \
74 |     > $output_dir/$task_name/rank_$i/log_train 2>&1 &
75 | done
76 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/scripts/run_infer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # -*- coding: UTF-8 -*-
 3 | # Copyright 2022 Huawei Technologies Co., Ltd
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | # ============================================================================
17 | 
18 | output_path=/glide/glide/output/
19 | ckpt_path=/glide/pretraind_models/
20 | model_config_path=/glide/configs/infer_model_config_glide.yaml
21 | is_chinese=True
22 | denoise_steps=60
23 | super_res_step=27
24 | pics_generated=4
25 | tokenizer_model="cog-pretrain.model"
26 | gen_ckpt="glide_gen.ckpt"
27 | super_ckpt="glide_super_res.ckpt"
28 | srgan_ckpt="srgan.ckpt"
29 | prompts_file=./data/prompts.txt
30 | 
31 | python  src/txt2img.py \
32 |         --output_path=$output_path \
33 |         --ckpt_path=$ckpt_path \
34 |         --model_config_path=$model_config_path \
35 |         --is_chinese=$is_chinese \
36 |         --denoise_steps=$denoise_steps \
37 |         --super_res_step=$super_res_step \
38 |         --pics_generated=$pics_generated \
39 |         --tokenizer_model=$tokenizer_model \
40 |         --gen_ckpt=$gen_ckpt \
41 |         --super_ckpt=$super_ckpt \
42 |         --srgan_ckpt=$srgan_ckpt \
43 |         --prompts_file=$prompts_file \
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/vision/Taichu-GLIDE/scripts/run_super_res_finetune_dist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | output_dir="output/"
 4 | task_name="text2image_parallel"
 5 | 
 6 | if [ $# != 3 ]
 7 | then
 8 |     echo "Usage:
 9 |           bash scripts/train_caption_parallel.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [RANK_TABLE_FILE]"
10 |     exit 1
11 | fi
12 | 
13 | if [ $1 -lt 1 ] || [ $1 -gt 8 ]
14 | then
15 |     echo "error: DEVICE_NUM=$1 is not in [1,8]"
16 |     exit 1
17 | fi
18 | 
19 | VISIABLE_DEVICES=$2
20 | IFS="," read -r -a CANDIDATE_DEVICE <<< "$VISIABLE_DEVICES"
21 | if [ ${#CANDIDATE_DEVICE[@]} -ne $1 ]
22 | then
23 |     echo "error: DEVICE_NUM=$1 is not matched with VISIABLE_DEVICES=$2"
24 |     exit 1
25 | fi
26 | 
27 | if [ ! -f $3 ]
28 | then
29 |     echo "error: RANK_TABLE_FILE=$3 is not a file"
30 |     exit 1
31 | fi
32 | 
33 | export GLOG_v=3
34 | export ASCEND_GLOBAL_LOG_LEVEL=3
35 | export ASCEND_GLOBAL_EVENT_ENABLE=0
36 | export ASCEND_SLOG_PRINT_TO_STDOUT=1
37 | export HCCL_CONNECT_TIMEOUT=600
38 | 
39 | #ulimit -u unlimited
40 | ulimit -SHn 65535
41 | export DEVICE_NUM=$1
42 | export RANK_SIZE=$1
43 | RANK_TABLE_FILE=$(realpath $3)
44 | export RANK_TABLE_FILE
45 | echo "RANK_TABLE_FILE=${RANK_TABLE_FILE}"
46 | 
47 | rm -rf ${output_dir:?}/${task_name:?}
48 | mkdir -p ${output_dir:?}/${task_name:?}
49 | export MS_COMPILER_CACHE_PATH=${output_dir:?}/${task_name:?}
50 | export SERVER_ID=0
51 | rank_start=$((DEVICE_NUM * SERVER_ID))
52 | 
53 | for((i=0; i<${RANK_SIZE}; i++))
54 | do
55 |     export RANK_ID=$((rank_start + i))
56 |     export DEVICE_ID=${CANDIDATE_DEVICE[i]}
57 |     mkdir -p ${output_dir:?}/${task_name:?}/rank_$i
58 |     echo "start training for rank $RANK_ID, device $DEVICE_ID"
59 |     nohup python -u src/train_txt2img.py \
60 |     --data_path=/glide/dataset/ \
61 |     --output_path=/glide/output/ \
62 |     --pretrained_model_path=/glide/pretraind_models/ \
63 |     --is_super_res=True \
64 |     --is_chinese=True \
65 |     --use_parallel=True \
66 |     --pretrained_model="glide_super_res.ckpt" \
67 |     --cog_model="cog-pretrain.model" \
68 |     --model_config=./model_configs/supres_model_config.json \
69 |     --image_caption_path_file="image_caption_path_file.txt" \
70 |     --save_checkpoint_steps=1000 \
71 |     --batch_size=2 \
72 |     --epochs=2 \
73 |     --start_learning_rate=1e-4 \
74 |     --end_learning_rate=1e-9 \
75 |     > $output_dir/$task_name/rank_$i/log_train 2>&1 &
76 | done
77 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Stablediffusionv2
  3 | 
  4 | ## 目录
  5 | 
  6 | - [Stablediffusionv2模型](#Stablediffusionv2模型)
  7 | - [环境依赖](#环境依赖)
  8 | - [快速开始](#快速开始)
  9 |   - [准备checkpoint](#准备checkpoint)
 10 |   - [文图生成](#文图生成)
 11 |   - [生成样例](#生成样例)
 12 | 
 13 | ## Stablediffusionv2模型
 14 | 
 15 | Stablediffusionv2模型由**Stability-AI**团队研发，由**华为昇腾**部门适配Mindspore+Ascend环境实现。
 16 | 
 17 | ## 环境依赖
 18 | 
 19 | 1. **昇腾软硬件解决方案(驱动+固件+CANN)**
 20 | 
 21 |    前往[昇腾社区](<https://www.hiascend.com/software/cann/commercial>)，按照说明下载安装。
 22 | 
 23 | 2. AI框架 - **MindSpore** == 1.9
 24 | 
 25 |    前往[MindSpore官网](<https://www.mindspore.cn/install>)，按照说明下载安装。
 26 | 
 27 |    如需更多帮助，可以参考以下资料
 28 |    
 29 |    -  [MindSpore 教程](https://www.mindspore.cn/tutorials/zh-CN/master/index.html)
 30 |    -  [MindSpore Python API](https://www.mindspore.cn/docs/zh-CN/master/index.html)
 31 | 
 32 | 3. **第三方依赖**
 33 | 
 34 |    ```python
 35 |    pip install -r requirements.txt
 36 |    ```
 37 | 
 38 | ## 快速开始
 39 | 
 40 | ### 准备checkpoint
 41 | 
 42 | 下载stablediffusionv2预训练参数 [stablediffusionv2_512.ckpt](https://download.mindspore.cn/toolkits/minddiffusion/stablediffusion/stablediffusionv2_512.ckpt) 至 stablediffusionv2/models/ 目录.
 43 | 
 44 | ### 文图生成
 45 | 
 46 | 要进行文图生成，可以运行txt2img.py 或者直接使用默认参数运行 infer.sh.
 47 | 
 48 | ```shell
 49 | python txt2img.py --prompt [input text] --ckpt_path [ckpt_path] --ckpt_name [ckpt_name] \
 50 | --H [image_height] --W [image_width] --output_path [image save folder] \
 51 | --n_samples [number of images to generate]
 52 | ```
 53 | 或者
 54 | ```shell
 55 | bash scripts/infer.sh
 56 | ```
 57 | 
 58 | 更高的分辨率需要更大的显存. 对于 Ascend 910 芯片, 我们可以同时生成8张512x512的图片。
 59 | 
 60 | 
 61 | ### 生成样例
 62 | 
 63 | 下面是我们的stablediffusionv2模型生成的一些样例以及对应的`[input text]`。
 64 | 
 65 | ```
 66 | A Van Gogh style oil painting of sunflower
 67 | ```
 68 | 
 69 | ![A Van Gogh style oil painting of sunflower](demo/sunflower1.png)
 70 | 
 71 | ```
 72 | A Van Gogh style oil painting of sunflower
 73 | ```
 74 | 
 75 | ![A Van Gogh style oil painting of sunflower](demo/sunflower2.png)
 76 | 
 77 | ```
 78 | a professional photograph of an astronaut riding a horse
 79 | ```
 80 | 
 81 | ![A Van Gogh style oil painting of starry sky](demo/horse1.png)
 82 | 
 83 | ```
 84 | a professional photograph of an astronaut riding a horse
 85 | ```
 86 | 
 87 | ![A Van Gogh style oil painting of starry sky](demo/horse2.png)
 88 | 
 89 | ```
 90 | The beautiful night view of the city has various buildings, traffic flow, and lights.
 91 | ```
 92 | 
 93 | ![A Van Gogh style oil painting of starry sky](demo/city1.png)
 94 | 
 95 | ```
 96 | The beautiful night view of the city has various buildings, traffic flow, and lights.
 97 | ```
 98 | 
 99 | ![A Van Gogh style oil painting of starry sky](demo/city2.png)
100 | 
101 | ```
102 | Modernist style, sunset, withered vines, old trees, and mountains
103 | ```
104 | 
105 | ![A Van Gogh style oil painting of starry sky](demo/tree1.png)
106 | 
107 | ```
108 | Modernist style, sunset, withered vines, old trees, and mountains
109 | ```
110 | 
111 | ![A Van Gogh style oil painting of starry sky](demo/tree2.png)
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/__init__.py


--------------------------------------------------------------------------------
/vision/stablediffusionv2/configs/train_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model_config": "configs/v1-train-chinese.yaml",
 3 |     "pretrained_model_path": "models/",
 4 |     "pretrained_model_file":"wukong-huahua-ms.ckpt",
 5 |     "data_path": "/secHome/FFHQ",
 6 |     "train_batch_size": 3,
 7 |     "gradient_accumulation_steps": 1,
 8 |     "optim": "adamw",
 9 |     "patch_size":32,
10 |     "epochs": 20,
11 |     "betas": [
12 |         0.9,
13 |         0.98
14 |     ],
15 |     "dropout": 0.1,
16 |     "weight_decay": 0.01,
17 |     "warmup_steps": 1000,
18 |     "seed": 3407,
19 |     "image_size": 512,
20 |     "image_filter_size": 256,
21 |     "random_crop": false,
22 |     "filter_small_size": true, 
23 |     "start_learning_rate": 1e-5,
24 |     "end_learning_rate": 1e-7,
25 |     "decay_steps": 0,
26 |     "save_checkpoint_steps": 10000
27 | }
28 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/configs/v2-inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-04
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "jpg"
11 |     cond_stage_key: "txt"
12 |     image_size: 64
13 |     channels: 4
14 |     cond_stage_trainable: false   # Note: different from the one we trained before
15 |     conditioning_key: crossattn
16 |     monitor: val/loss_simple_ema
17 |     scale_factor: 0.18215
18 |     use_ema: False
19 |     use_fp16: True
20 | 
21 |     unet_config:
22 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23 |       params:
24 |         image_size: 32 # unused
25 |         in_channels: 4
26 |         out_channels: 4
27 |         model_channels: 320
28 |         attention_resolutions: [ 4, 2, 1 ]
29 |         num_res_blocks: 2
30 |         channel_mult: [ 1, 2, 4, 4 ]
31 |         num_head_channels: 64
32 |         use_spatial_transformer: True
33 |         use_linear_in_transformer: True
34 |         transformer_depth: 1
35 |         context_dim: 1024
36 |         use_checkpoint: True
37 |         legacy: False
38 |         use_fp16: True
39 | 
40 |     first_stage_config:
41 |       target: ldm.models.autoencoder.AutoencoderKL
42 |       params:
43 |         embed_dim: 4
44 |         monitor: val/rec_loss
45 |         use_fp16: True
46 |         ddconfig:
47 |           double_z: true
48 |           z_channels: 4
49 |           resolution: 256
50 |           in_channels: 3
51 |           out_ch: 3
52 |           ch: 128
53 |           ch_mult:
54 |           - 1
55 |           - 2
56 |           - 4
57 |           - 4
58 |           num_res_blocks: 2
59 |           attn_resolutions: []
60 |           dropout: 0.0
61 | 
62 |     cond_stage_config:
63 |       target: ldm.modules.encoders.modules.FrozenCLIPEmbedder_ZH
64 |       params:
65 |         use_fp16: True
66 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/configs/v2-train.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-05
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "image"
11 |     cond_stage_key: "caption"
12 |     image_size: 64
13 |     channels: 4
14 |     conditioning_key: crossattn
15 |     monitor: val/loss_simple_ema
16 |     scale_factor: 0.18215
17 |     use_ema: False
18 |     use_fp16: True
19 | 
20 |     unet_config:
21 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22 |       params:
23 |         image_size: 32 # unused
24 |         in_channels: 4
25 |         out_channels: 4
26 |         model_channels: 320
27 |         attention_resolutions: [ 4, 2, 1 ]
28 |         num_res_blocks: 2
29 |         channel_mult: [ 1, 2, 4, 4 ]
30 |         num_heads: 8
31 |         use_spatial_transformer: True
32 |         transformer_depth: 1
33 |         context_dim: 768
34 |         use_checkpoint: True
35 |         legacy: False
36 |         use_fp16: True
37 |         dropout: 0.1
38 | 
39 |     first_stage_config:
40 |       target: ldm.models.autoencoder.AutoencoderKL
41 |       params:
42 |         embed_dim: 4
43 |         monitor: val/rec_loss
44 |         use_fp16: True
45 |         ddconfig:
46 |           double_z: true
47 |           z_channels: 4
48 |           resolution: 256
49 |           in_channels: 3
50 |           out_ch: 3
51 |           ch: 128
52 |           ch_mult:
53 |           - 1
54 |           - 2
55 |           - 4
56 |           - 4
57 |           num_res_blocks: 2
58 |           attn_resolutions: []
59 | 
60 |     cond_stage_config:
61 |       target: ldm.modules.encoders.modules.FrozenCLIPEmbedder_ZH
62 |       params:
63 |         use_fp16: True
64 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/demo/city1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/demo/city1.png


--------------------------------------------------------------------------------
/vision/stablediffusionv2/demo/city2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/demo/city2.png


--------------------------------------------------------------------------------
/vision/stablediffusionv2/demo/horse1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/demo/horse1.png


--------------------------------------------------------------------------------
/vision/stablediffusionv2/demo/horse2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/demo/horse2.png


--------------------------------------------------------------------------------
/vision/stablediffusionv2/demo/sunflower1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/demo/sunflower1.png


--------------------------------------------------------------------------------
/vision/stablediffusionv2/demo/sunflower2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/demo/sunflower2.png


--------------------------------------------------------------------------------
/vision/stablediffusionv2/demo/tree1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/demo/tree1.png


--------------------------------------------------------------------------------
/vision/stablediffusionv2/demo/tree2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/demo/tree2.png


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/ldm/data/__init__.py


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/data/t2i_collate.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | 
17 | from toolz.sandbox import unzip
18 | 
19 | 
20 | data_column = [
21 |     'img_feat',
22 |     'txt_tokens'
23 | ]
24 | 
25 | 
26 | def t2i_collate(inputs):
27 |     """
28 |     Return:
29 |     :img_feat     (batch_size, height, weight, 3)
30 |     :txt_tokens   (n, max_txt_len)
31 |     """
32 |     img_feat, txt_tokens = map(list, unzip(inputs))
33 |     batch = {
34 |         'img_feat': img_feat,
35 |         'txt_tokens': txt_tokens,
36 |     }
37 |     return batch


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/models/autoencoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | import mindspore as ms
16 | import mindspore.nn as nn
17 | import mindspore.ops as P
18 | 
19 | from ldm.modules.diffusionmodules.model import Encoder, Decoder
20 | from ldm.modules.distributions.distributions import DiagonalGaussianDistribution
21 | 
22 | class AutoencoderKL(nn.Cell):
23 |     def __init__(self,
24 |                  ddconfig,
25 |                  embed_dim,
26 |                  ckpt_path=None,
27 |                  ignore_keys=[],
28 |                  image_key="image",
29 |                  colorize_nlabels=None,
30 |                  monitor=None,
31 |                  use_fp16=False
32 |                  ):
33 |         super().__init__()
34 |         self.dtype = ms.float16 if use_fp16 else ms.float32
35 |         self.image_key = image_key
36 |         self.encoder = Encoder(dtype=self.dtype, **ddconfig)
37 |         self.decoder = Decoder(dtype=self.dtype, **ddconfig)
38 |         assert ddconfig["double_z"]
39 |         self.quant_conv = nn.Conv2d(2*ddconfig["z_channels"], 2*embed_dim, 1, pad_mode="valid", has_bias=True).to_float(self.dtype)
40 |         self.post_quant_conv = nn.Conv2d(embed_dim, ddconfig["z_channels"], 1, pad_mode="valid", has_bias=True).to_float(self.dtype)
41 |         self.embed_dim = embed_dim
42 |         if colorize_nlabels is not None:
43 |             assert type(colorize_nlabels)==int
44 |             self.register_buffer("colorize", ms.ops.standard_normal(3, colorize_nlabels, 1, 1))
45 |         if monitor is not None:
46 |             self.monitor = monitor
47 |         if ckpt_path is not None:
48 |             self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
49 |             
50 |         self.split = P.Split(axis=1, output_num=2)
51 |         self.exp = P.Exp()
52 |         self.stdnormal = P.StandardNormal()
53 | 
54 |     def init_from_ckpt(self, path, ignore_keys=list()):
55 |         sd = ms.load_checkpoint(path)["state_dict"]
56 |         keys = list(sd.keys())
57 |         for k in keys:
58 |             for ik in ignore_keys:
59 |                 if k.startswith(ik):
60 |                     print("Deleting key {} from state_dict.".format(k))
61 |                     del sd[k]
62 |         ms.load_param_into_net(self, sd, strict_load=False)
63 |         print(f"Restored from {path}")
64 | 
65 |     def decode(self, z):
66 |         z = self.post_quant_conv(z)
67 |         dec = self.decoder(z)
68 |         return dec
69 | 
70 |     def encode(self, x):
71 |         h = self.encoder(x)
72 |         moments = self.quant_conv(h)
73 |         mean, logvar = self.split(moments)
74 |         logvar = P.clip_by_value(logvar, -30.0, 20.0)
75 |         std = self.exp(0.5 * logvar)
76 |         x = mean + std * self.stdnormal(mean.shape)
77 |         return x
78 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/models/clip_zh/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/ldm/models/clip_zh/__init__.py


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/models/clip_zh/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/ldm/models/clip_zh/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/models/diffusion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/ldm/models/diffusion/__init__.py


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/models/diffusion/dpm_solver/__init__.py:
--------------------------------------------------------------------------------
1 | from .sampler import DPMSolverSampler


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/models/diffusion/dpm_solver/sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """SAMPLING ONLY."""
16 | 
17 | import mindspore as ms
18 | from mindspore import ops
19 | from .dpm_solver import NoiseScheduleVP, model_wrapper, DPM_Solver
20 | 
21 | 
22 | class DPMSolverSampler(object):
23 |     def __init__(self, model, **kwargs):
24 |         super().__init__()
25 |         self.model = model
26 |         self.register_buffer('alphas_cumprod', model.alphas_cumprod)
27 | 
28 |     def register_buffer(self, name, attr):
29 |         setattr(self, name, attr)
30 | 
31 |     def sample(self,
32 |                S,
33 |                batch_size,
34 |                shape,
35 |                conditioning=None,
36 |                callback=None,
37 |                normals_sequence=None,
38 |                img_callback=None,
39 |                quantize_x0=False,
40 |                eta=0.,
41 |                mask=None,
42 |                x0=None,
43 |                temperature=1.,
44 |                noise_dropout=0.,
45 |                score_corrector=None,
46 |                corrector_kwargs=None,
47 |                verbose=True,
48 |                x_T=None,
49 |                log_every_t=100,
50 |                unconditional_guidance_scale=1.,
51 |                unconditional_conditioning=None,
52 |                # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
53 |                **kwargs
54 |                ):
55 |         if conditioning is not None:
56 |             if isinstance(conditioning, dict):
57 |                 cbs = conditioning[list(conditioning.keys())[0]].shape[0]
58 |                 if cbs != batch_size:
59 |                     print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
60 |             else:
61 |                 if conditioning.shape[0] != batch_size:
62 |                     print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
63 | 
64 |         # sampling
65 |         C, H, W = shape
66 |         size = (batch_size, C, H, W)
67 | 
68 |         # print(f'Data shape for DPM-Solver sampling is {size}, sampling steps {S}')
69 | 
70 |         if x_T is None:
71 |             img = ops.standard_normal(size)
72 |         else:
73 |             img = x_T
74 | 
75 |         ns = NoiseScheduleVP('discrete', alphas_cumprod=self.alphas_cumprod)
76 | 
77 |         model_fn = model_wrapper(
78 |             lambda x, t, c: self.model.apply_model(x, t, c),
79 |             ns,
80 |             model_type="noise",
81 |             guidance_type="classifier-free",
82 |             condition=conditioning,
83 |             unconditional_condition=unconditional_conditioning,
84 |             guidance_scale=unconditional_guidance_scale,
85 |         )
86 | 
87 |         dpm_solver = DPM_Solver(model_fn, ns, predict_x0=True, thresholding=False)
88 | 
89 |         x = dpm_solver.sample(ops.Cast()(img, ms.float16), steps=S, skip_type="time_uniform",
90 |                               method="multistep", order=2, lower_order_final=True)
91 | 
92 |         return x, None


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/modules/diffusionmodules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/ldm/modules/diffusionmodules/__init__.py


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/modules/diffusionmodules/upscaling.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/ldm/modules/diffusionmodules/upscaling.py


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/modules/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/ldm/modules/distributions/__init__.py


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/modules/distributions/distributions.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | import mindspore as ms
16 | import mindspore.ops as ops
17 | 
18 | class DiagonalGaussianDistribution(object):
19 |     def __init__(self, parameters, deterministic=False):
20 | 
21 |         self.mean, self.logvar = ops.Split(axis=1,  output_num=2)(parameters)
22 |         self.logvar = ops.clip_by_value(self.logvar, -30.0, 20.0)
23 |         self.deterministic = deterministic
24 |         self.std = ops.exp(0.5 * self.logvar)
25 |         self.stdnormal = ops.StandardNormal()
26 | 
27 |     def sample(self):
28 |         x = self.mean + self.std * self.stdnormal(self.mean.shape)
29 |         return x


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/modules/encoders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/stablediffusionv2/ldm/modules/encoders/__init__.py


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/modules/encoders/modules.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | import mindspore as ms
16 | import mindspore.nn as nn
17 | import mindspore.ops as ops
18 | from mindspore import Tensor
19 | from ldm.models.clip_zh.simple_tokenizer import tokenize
20 | from .text_encoder import TextEncoder
21 | 
22 | 
23 | class FrozenCLIPEmbedder_ZH(nn.Cell):
24 |     def __init__(self, max_length=77, use_fp16=False):
25 |         super(FrozenCLIPEmbedder_ZH, self).__init__()
26 |         self.dtype = ms.float16 if use_fp16 else ms.float32
27 |         self.max_length = max_length
28 |         self.tokenizer = tokenize
29 |         self.transformer = TextEncoder(context_length=77, vocab_size=49408, output_dim=1024, width=1024, layers=23, heads=16, dtype=self.dtype)
30 | 
31 |     def tokenize(self, texts):
32 |         return self.tokenizer(texts)
33 | 
34 |     def encode(self, text):
35 |         batch_encoding = self.tokenize(text)
36 |         outputs = self.transformer(batch_encoding)
37 |         return outputs
38 | 
39 |     def construct(self, c):
40 |         outputs = self.transformer(c)
41 |         return outputs
42 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/modules/train/callback.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | import mindspore as ms
17 | 
18 | 
19 | class OverflowMonitor(ms.Callback):
20 |     def step_end(self, run_context):
21 |         cb_params = run_context.original_args()
22 |         cur_epoch_num = cb_params.get("cur_epoch_num", 1)
23 |         cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
24 |         overflow = cb_params.net_outputs[1]
25 |         if overflow:
26 |             print(f"overflow detected in epoch {cur_epoch_num} step {cur_step_in_epoch}")
27 |         return super().step_end(run_context)


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/modules/train/learningrate.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Utils function for the parallel training.
17 | This is an experimental interface that is subject to change and/or deletion.
18 | """
19 | 
20 | from mindspore.ops import operations as P
21 | import mindspore.common.dtype as mstype
22 | from mindspore.common.tensor import Tensor
23 | from mindspore.nn.learning_rate_schedule import LearningRateSchedule, PolynomialDecayLR, WarmUpLR, CosineDecayLR
24 | import numpy as np
25 | 
26 | 
27 | class LearningRate(LearningRateSchedule):
28 |     """
29 |         Learning_rate sheduler
30 |     """
31 | 
32 |     def __init__(self,
33 |                  start_learning_rate,
34 |                  end_learning_rate,
35 |                  warmup_steps,
36 |                  decay_steps,
37 |                  power=1.0,
38 |                  use_cosine=True):
39 |         super(LearningRate, self).__init__()
40 |         self.warmup_flag = False
41 |         if warmup_steps > 0:
42 |             self.warmup_flag = True
43 |             self.warmup_lr = WarmUpLR(start_learning_rate, warmup_steps)
44 |         self.decay_lr = PolynomialDecayLR(start_learning_rate, end_learning_rate, decay_steps, power)
45 |         self.cosine_decay_lr = CosineDecayLR(end_learning_rate, start_learning_rate, decay_steps)
46 |         self.warmup_steps = Tensor(np.array([warmup_steps]).astype(np.float32))
47 |         self.greater = P.Greater()
48 |         self.one = Tensor(np.array([1.0]).astype(np.float32))
49 |         self.cast = P.Cast()
50 |         self.use_cosine = use_cosine
51 | 
52 |     def construct(self, global_step):
53 |         """Learning_rate sheduler construct"""
54 |         if not self.use_cosine:
55 |             decay_lr = self.decay_lr(global_step)
56 |         else:
57 |             decay_lr = self.cosine_decay_lr(global_step)
58 |         if self.warmup_flag:
59 |             is_warmup = self.cast(self.greater(self.warmup_steps, global_step), mstype.float32)
60 |             warmup_lr = self.warmup_lr(global_step)
61 |             lr = (self.one - is_warmup) * decay_lr + is_warmup * warmup_lr
62 |         else:
63 |             lr = decay_lr
64 |         return lr
65 | 
66 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/modules/train/optim.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 |  build optimizer for ms
17 | """
18 | from mindspore.nn.optim.adam import Adam, AdamWeightDecay
19 | 
20 | 
21 | def build_optimizer(model, opts, lr):
22 |     """
23 | 
24 |     :param model:
25 |     :param opts:
26 |     :param lr:
27 |     :return: optimizer
28 |     """
29 | 
30 |     decay_filter = lambda x: 'layernorm' not in x.name.lower() and "bias" not in x.name.lower()
31 |     param_optimizer = model.trainable_params()
32 |     decay_params = list(filter(decay_filter, param_optimizer))
33 |     other_params = list(filter(lambda x: not decay_filter(x), param_optimizer))
34 |     group_params = [{
35 |         'params': decay_params,
36 |         'weight_decay': 1e-6
37 |     }, {
38 |         'params': other_params,
39 |         'weight_decay': 0.0
40 |     }, {
41 |         'order_params': param_optimizer
42 |     }]
43 |     if opts.optim == 'adam':
44 |         OptimCls = Adam
45 |     elif opts.optim == 'adamw':
46 |         OptimCls = AdamWeightDecay
47 |     else:
48 |         raise ValueError('invalid optimizer')
49 |     optimizer = OptimCls(group_params,
50 |                          learning_rate=lr, beta1=opts.betas[0], beta2=opts.betas[1])
51 |     return optimizer
52 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/modules/train/parallel_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """Transformer Networks"""
16 | 
17 | import math
18 | 
19 | import numpy as np
20 | import mindspore.common.dtype as mstype
21 | from mindspore.context import ParallelMode
22 | 
23 | class ParallelConfig:
24 |     r"""
25 |         ParallelConfig for the setting the global data parallel, model parallel and fusion group.
26 |     """
27 |     dp = 8
28 |     mp = 1
29 |     pipeline_stage = 1
30 |     recompute = False
31 |     optimizer_shard = False
32 |     fusion_group = 1
33 |     parallel_mode = ParallelMode.SEMI_AUTO_PARALLEL
34 |     vocab_emb_dp = False
35 |     ep = dp
36 |     capacity_factor = 1.5
37 |     expert_num = 32
38 |     aux_loss_factor = 0.01
39 | 
40 |     @staticmethod
41 |     def set_global_parallel_config(dp=1,
42 |                                    mp=1,
43 |                                    recompute=True,
44 |                                    stages=1,
45 |                                    optimizer_shard=True,
46 |                                    fusion_group=4,
47 |                                    parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL,
48 |                                    vocab_emb_dp=True):
49 |         r"""
50 |         The parallel configure setting
51 | 
52 |         Args:
53 |             dp (int): The data parallel way. Default: 1
54 |             mp (int): The model parallel way. Default: 1
55 |             stages (int): The number of the pipeline stage. Should be a positive value. Default: 1.
56 |             optimizer_shard (bool): Enable optimizer state sharding or not. Default: True.
57 |             fusion_group (int): The fusion group size of the optimizer state sharding. Default: 4.
58 |             recompute (bool): Enable recomputation of the transformer block or not. Default: False.
59 |             parallel_mode (ParallelMode): Can be SEMI_AUTO_PARALLEL, DATA_AUTO_PARALLEL or AUTO_PARALLEL.
60 |             vocab_emb_dp (bool): Shard embedding in model parallel or data parallel. Default: True
61 | 
62 |         Supported Platforms:
63 |             ``Ascend`` ``GPU``
64 | 
65 |         Examples:
66 |             >>> ParallelConfig(dp=1, mp=1)
67 |             >>> ParallelConfig(stages=4)
68 |         """
69 |         ParallelConfig.dp = dp
70 |         ParallelConfig.mp = mp
71 |         ParallelConfig.pipeline_stage = stages
72 |         ParallelConfig.optimizer_shard = optimizer_shard
73 |         ParallelConfig.fusion_group = fusion_group
74 |         ParallelConfig.recompute = recompute
75 |         ParallelConfig.parallel_mode = parallel_mode
76 |         ParallelConfig.vocab_emb_dp = vocab_emb_dp
77 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/modules/train/tools.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Copyright (c) Microsoft Corporation.
17 | Licensed under the MIT license.
18 | 
19 | Misc utilities
20 | """
21 | import json
22 | import os
23 | import sys
24 | import random
25 | import numpy as np
26 | import mindspore as ms
27 | 
28 | class NoOp:
29 |     """ useful for distributed training No-Ops """
30 | 
31 |     def __getattr__(self, name):
32 |         return self.noop
33 | 
34 |     def noop(self, *args, **kwargs):
35 |         return
36 | 
37 | 
38 | def parse_with_config(args):
39 |     """Parse With Config"""
40 |     if args.train_config is not None:
41 |         abs_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../.."))
42 |         args.train_config = os.path.join(abs_path, args.train_config)
43 |         config_args = json.load(open(args.train_config))
44 |         override_keys = {arg[2:].split('=')[0] for arg in sys.argv[1:]
45 |                          if arg.startswith('--')}
46 |         for k, v in config_args.items():
47 |             if k not in override_keys:
48 |                 setattr(args, k, v)
49 |     return args
50 | 
51 | 
52 | def set_random_seed(seed):
53 |     """Set Random Seed"""
54 |     print("random seed: ", seed)
55 |     random.seed(seed)
56 |     np.random.seed(seed)
57 |     ms.set_seed(seed)
58 | 
59 | class Struct:
60 |     def __init__(self, dict_):
61 |         self.__dict__.update(dict_)
62 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/ldm/util.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | import importlib
16 | from inspect import isfunction
17 | import mindspore.ops as ops
18 | 
19 | 
20 | def exists(x):
21 |     return x is not None
22 | 
23 | 
24 | def default(val, d):
25 |     if exists(val):
26 |         return val
27 |     return d() if isfunction(d) else d
28 | 
29 | 
30 | def count_params(model, verbose=False):
31 |     total_params = sum(p.numel() for p in model.parameters())
32 |     if verbose:
33 |         print(f"{model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.")
34 |     return total_params
35 | 
36 | 
37 | def instantiate_from_config(config):
38 |     if not "target" in config:
39 |         if config == '__is_first_stage__':
40 |             return None
41 |         elif config == "__is_unconditional__":
42 |             return None
43 |         raise KeyError("Expected key `target` to instantiate.")
44 |     return get_obj_from_str(config["target"])(**config.get("params", dict()))
45 | 
46 | 
47 | def get_obj_from_str(string, reload=False):
48 |     module, cls = string.rsplit(".", 1)
49 |     if reload:
50 |         module_imp = importlib.import_module(module)
51 |         importlib.reload(module_imp)
52 |     return getattr(importlib.import_module(module, package=None), cls)
53 | 
54 | def extract_into_tensor(a, t, x_shape):
55 |     b = t.shape[0]
56 |     out = ops.GatherD()(a, -1, t)
57 |     return out.reshape(b, *((1,) * (len(x_shape) - 1)))
58 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv-python
 2 | omegaconf
 3 | einops
 4 | ftfy
 5 | regex
 6 | albumentations
 7 | pandas
 8 | imagesize
 9 | toolz
10 | pillow
11 | 


--------------------------------------------------------------------------------
/vision/stablediffusionv2/scripts/infer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2022 Huawei Technologies Co., Ltd
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #    --prompt "A Van Gogh style oil painting of sunflower" \
16 | #     --prompt "a professional photograph of an astronaut riding a horse" \
17 | #    --prompt "The beautiful night view of the city has various buildings, traffic flow, and lights." \
18 | # ============================================================================
19 | 
20 | export GLOG_v=3
21 | export ASCEND_GLOBAL_LOG_LEVEL=3
22 | export ASCEND_SLOG_PRINT_TO_STDOUT=0
23 | export DEVICE_ID=0
24 | 
25 | python txt2img.py \
26 |     --prompt "Modernist style, sunset, withered vines, old trees, and mountains" \
27 |     --config configs/v2-inference.yaml \
28 |     --output_path ./output/ \
29 |     --seed 42 \
30 |     --n_iter 4 \
31 |     --n_samples 8 \
32 |     --W 512 \
33 |     --H 512 \
34 |     --ddim_steps 50 \
35 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/README_EN.md:
--------------------------------------------------------------------------------
  1 | # Wukong-Huahua
  2 | ## Contents
  3 | 
  4 | [查看中文](./README.md)
  5 | 
  6 | - [Wukong-Huahua Model](#wukong-huahua-model)
  7 | - [Environment Requirements](#environment-requirements)
  8 | - [Quick Start](#quick-start)
  9 |   - [Prepare Checkpoint](#prepare-checkpoint)
 10 |   - [Text to Image Generation](#text-to-image-generation)
 11 |   - [Fine-tuning](#fine-tuning)
 12 |   - [Demos](#demos)
 13 | 
 14 | ## Wukong-Huahua Model
 15 | 
 16 | Wukong-Huahua is a diffusion-based model that perfoms text-to-image task in Chinese, which was developed by the **Huawei Noah's Ark Lab** in cooperation with the **Distributed & Parallel Software Lab** and **Ascend Product Develop Unit**. It was trained on [Wukong dataset](https://wukong-dataset.github.io/wukong-dataset/)  and used [MindSpore](https://www.mindspore.cn/en) + Ascend, a software and hardware solution to implement. Welcome to try Wukong-Huahua by [Our Online Platform](https://xihe.mindspore.cn/modelzoo/wukong).
 17 | 
 18 | ## Environment Requirements
 19 | 
 20 | 1. **Ascend** Software + Hardware Solution (Driver + Firmware + CANN)
 21 | 	
 22 | 	Go to [Ascend website](<https://www.hiascend.com/software/cann/commercial>). Follow the instructions to download and install.
 23 | 2. AI Framework - **Mindspore** == 1.9
 24 | 
 25 | 	  Go to [MindSpore website](https://www.mindspore.cn/en "MindSpore")  1.9. Follow the instructions to install.
 26 | 	 
 27 | 	  If you need more help of MindSpore, please check
 28 | 	  - [MindSpore Tutorial](https://www.mindspore.cn/tutorials/en/master/index.html)
 29 | 	  - [MindSpore Python API](https://www.mindspore.cn/docs/en/master/index.htmll)		
 30 | 	  
 31 | 3. Third party dependency
 32 |    ```python
 33 |    pip install -r requirements.txt
 34 |    ```
 35 | 
 36 | 
 37 | ## Quick Start
 38 | 
 39 | ### Prepare Checkpoint
 40 | 
 41 | Download Wukong-Huahua pretrained checkpoint [wukong-huahua-ms.ckpt](https://download.mindspore.cn/toolkits/minddiffusion/wukong-huahua/wukong-huahua-ms.ckpt) and place it under wukong-huahua/models/ folder.
 42 | 
 43 | For fine tune task , we provide example datasets to show the format, please download [here](https://opt-release.obs.cn-central-221.ovaijisuan.com/wukonghuahua/dataset.tar.gz).
 44 | 
 45 | ### Text to Image Generation
 46 | 
 47 | To generate images according to input text, run txt2img.py or simply run infer.sh with default argumemts.
 48 | 
 49 | ```shell
 50 | python txt2img.py --prompt [input text] --ckpt_path [ckpt_path] --ckpt_name [ckpt_name] \
 51 | --H [image_height] --W [image_width] --output_path [image save folder] \
 52 | --n_samples [number of images to generate]
 53 | ```
 54 | or
 55 | ```shell
 56 | bash scripts/infer.sh
 57 | ```
 58 | 
 59 | Generating higher resolution requires more memory. For Ascend 910 chip, we can generate 2 1024x768 images or 16 512 x 512 images at same time.
 60 | 
 61 | ### Fine-tuning
 62 | 
 63 | - Single card fine-tune:
 64 | 
 65 | modify the related configs in scripts/run_train.sh
 66 | 
 67 | ```
 68 | bash scripts/run_train.sh
 69 | ```
 70 | 
 71 | - Multi-card fine-tune:
 72 | 
 73 | modify the related configs in scripts/run_train_parallel.sh
 74 | 
 75 | ```
 76 | bash scripts/run_train_parallel.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [RANK_TABLE_FILE]
 77 | ```
 78 | 
 79 | ### Demos
 80 | 
 81 | Below are some of the images generated by our wukong-huahua model and corresponding `[input text]`
 82 | 
 83 | ```
 84 | 城市夜景 赛博朋克 格雷格·鲁特科夫斯基
 85 | ```
 86 | 
 87 | ![城市夜景 赛博朋克 格雷格·鲁特科夫斯基](demo/城市夜景%20赛博朋克%20格雷格·鲁特科夫斯基.png)
 88 | 
 89 | ```
 90 | 莫奈 撑阳伞的女人 月亮 梦幻
 91 | ```
 92 | 
 93 | ![莫奈 撑阳伞的女人 月亮 梦幻](demo/莫奈%20撑阳伞的女人%20月亮%20梦幻.png)
 94 | 
 95 | ```
 96 | 海上日出时候的奔跑者
 97 | ```
 98 | 
 99 | ![海上日出时候的奔跑者](demo/海上日出时候的奔跑者.png)
100 | 
101 | ```
102 | 诺亚方舟在世界末日起航 科幻插画
103 | ```
104 | 
105 | ![诺亚方舟在世界末日起航 科幻插画](demo/诺亚方舟在世界末日起航%20科幻插画.png)
106 | 
107 | ```
108 | 时空 黑洞 辐射
109 | ```
110 | 
111 | ![时空 黑洞 辐射](demo/时空%20黑洞%20辐射.png)
112 | 
113 | ```
114 | 乡村 田野 屏保
115 | ```
116 | 
117 | ![乡村 田野 屏保](demo/乡村%20田野%20屏保.png)
118 | 
119 | ```
120 | 来自深渊 风景 绘画 写实风格
121 | ```
122 | 
123 | ![来自深渊 风景 绘画 写实风格](demo/来自深渊%20风景%20绘画%20写实风格.png)
124 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/configs/clip-vit-l-14-zh/config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "_name_or_path": "clip-vit-large-patch14/",
  3 |   "architectures": [
  4 |     "CLIPModel"
  5 |   ],
  6 |   "initializer_factor": 1.0,
  7 |   "logit_scale_init_value": 2.6592,
  8 |   "model_type": "clip",
  9 |   "projection_dim": 768,
 10 |   "text_config": {
 11 |     "_name_or_path": "",
 12 |     "add_cross_attention": false,
 13 |     "architectures": null,
 14 |     "attention_dropout": 0.0,
 15 |     "bad_words_ids": null,
 16 |     "bos_token_id": 0,
 17 |     "chunk_size_feed_forward": 0,
 18 |     "cross_attention_hidden_size": null,
 19 |     "decoder_start_token_id": null,
 20 |     "diversity_penalty": 0.0,
 21 |     "do_sample": false,
 22 |     "dropout": 0.0,
 23 |     "early_stopping": false,
 24 |     "encoder_no_repeat_ngram_size": 0,
 25 |     "eos_token_id": 2,
 26 |     "finetuning_task": null,
 27 |     "forced_bos_token_id": null,
 28 |     "forced_eos_token_id": null,
 29 |     "hidden_act": "quick_gelu",
 30 |     "hidden_size": 768,
 31 |     "id2label": {
 32 |       "0": "LABEL_0",
 33 |       "1": "LABEL_1"
 34 |     },
 35 |     "initializer_factor": 1.0,
 36 |     "initializer_range": 0.02,
 37 |     "intermediate_size": 3072,
 38 |     "is_decoder": false,
 39 |     "is_encoder_decoder": false,
 40 |     "label2id": {
 41 |       "LABEL_0": 0,
 42 |       "LABEL_1": 1
 43 |     },
 44 |     "layer_norm_eps": 1e-05,
 45 |     "length_penalty": 1.0,
 46 |     "max_length": 20,
 47 |     "max_position_embeddings": 77,
 48 |     "min_length": 0,
 49 |     "model_type": "clip_text_model",
 50 |     "no_repeat_ngram_size": 0,
 51 |     "num_attention_heads": 12,
 52 |     "num_beam_groups": 1,
 53 |     "num_beams": 1,
 54 |     "num_hidden_layers": 12,
 55 |     "num_return_sequences": 1,
 56 |     "output_attentions": false,
 57 |     "output_hidden_states": false,
 58 |     "output_scores": false,
 59 |     "pad_token_id": 1,
 60 |     "prefix": null,
 61 |     "problem_type": null,
 62 |     "projection_dim" : 768,
 63 |     "pruned_heads": {},
 64 |     "remove_invalid_values": false,
 65 |     "repetition_penalty": 1.0,
 66 |     "return_dict": true,
 67 |     "return_dict_in_generate": false,
 68 |     "sep_token_id": null,
 69 |     "task_specific_params": null,
 70 |     "temperature": 1.0,
 71 |     "tie_encoder_decoder": false,
 72 |     "tie_word_embeddings": true,
 73 |     "tokenizer_class": null,
 74 |     "top_k": 50,
 75 |     "top_p": 1.0,
 76 |     "torch_dtype": null,
 77 |     "torchscript": false,
 78 |     "transformers_version": "4.16.0.dev0",
 79 |     "use_bfloat16": false,
 80 |     "vocab_size": 49408
 81 |   },
 82 |   "text_config_dict": {
 83 |     "hidden_size": 768,
 84 |     "intermediate_size": 3072,
 85 |     "num_attention_heads": 12,
 86 |     "num_hidden_layers": 12,
 87 |     "projection_dim": 768
 88 |   },
 89 |   "torch_dtype": "float32",
 90 |   "transformers_version": null,
 91 |   "vision_config": {
 92 |     "_name_or_path": "",
 93 |     "add_cross_attention": false,
 94 |     "architectures": null,
 95 |     "attention_dropout": 0.0,
 96 |     "bad_words_ids": null,
 97 |     "bos_token_id": null,
 98 |     "chunk_size_feed_forward": 0,
 99 |     "cross_attention_hidden_size": null,
100 |     "decoder_start_token_id": null,
101 |     "diversity_penalty": 0.0,
102 |     "do_sample": false,
103 |     "dropout": 0.0,
104 |     "early_stopping": false,
105 |     "encoder_no_repeat_ngram_size": 0,
106 |     "eos_token_id": null,
107 |     "finetuning_task": null,
108 |     "forced_bos_token_id": null,
109 |     "forced_eos_token_id": null,
110 |     "hidden_act": "quick_gelu",
111 |     "hidden_size": 1024,
112 |     "id2label": {
113 |       "0": "LABEL_0",
114 |       "1": "LABEL_1"
115 |     },
116 |     "image_size": 224,
117 |     "initializer_factor": 1.0,
118 |     "initializer_range": 0.02,
119 |     "intermediate_size": 4096,
120 |     "is_decoder": false,
121 |     "is_encoder_decoder": false,
122 |     "label2id": {
123 |       "LABEL_0": 0,
124 |       "LABEL_1": 1
125 |     },
126 |     "layer_norm_eps": 1e-05,
127 |     "length_penalty": 1.0,
128 |     "max_length": 20,
129 |     "min_length": 0,
130 |     "model_type": "clip_vision_model",
131 |     "no_repeat_ngram_size": 0,
132 |     "num_attention_heads": 16,
133 |     "num_beam_groups": 1,
134 |     "num_beams": 1,
135 |     "num_hidden_layers": 24,
136 |     "num_return_sequences": 1,
137 |     "output_attentions": false,
138 |     "output_hidden_states": false,
139 |     "output_scores": false,
140 |     "pad_token_id": null,
141 |     "patch_size": 14,
142 |     "prefix": null,
143 |     "problem_type": null,
144 |     "projection_dim" : 768,
145 |     "pruned_heads": {},
146 |     "remove_invalid_values": false,
147 |     "repetition_penalty": 1.0,
148 |     "return_dict": true,
149 |     "return_dict_in_generate": false,
150 |     "sep_token_id": null,
151 |     "task_specific_params": null,
152 |     "temperature": 1.0,
153 |     "tie_encoder_decoder": false,
154 |     "tie_word_embeddings": true,
155 |     "tokenizer_class": null,
156 |     "top_k": 50,
157 |     "top_p": 1.0,
158 |     "torch_dtype": null,
159 |     "torchscript": false,
160 |     "transformers_version": "4.16.0.dev0",
161 |     "use_bfloat16": false
162 |   },
163 |   "vision_config_dict": {
164 |     "hidden_size": 1024,
165 |     "intermediate_size": 4096,
166 |     "num_attention_heads": 16,
167 |     "num_hidden_layers": 24,
168 |     "patch_size": 14,
169 |     "projection_dim": 768
170 |   }
171 | }
172 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/configs/train_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model_config": "configs/v1-train-chinese.yaml",
 3 |     "pretrained_model_path": "models/",
 4 |     "pretrained_model_file":"wukong-huahua-ms.ckpt",
 5 |     "data_path": "/secHome/FFHQ",
 6 |     "train_batch_size": 3,
 7 |     "gradient_accumulation_steps": 1,
 8 |     "optim": "adamw",
 9 |     "patch_size":32,
10 |     "epochs": 20,
11 |     "betas": [
12 |         0.9,
13 |         0.98
14 |     ],
15 |     "dropout": 0.1,
16 |     "weight_decay": 0.01,
17 |     "warmup_steps": 1000,
18 |     "seed": 3407,
19 |     "image_size": 512,
20 |     "image_filter_size": 256,
21 |     "random_crop": false,
22 |     "filter_small_size": true, 
23 |     "start_learning_rate": 1e-5,
24 |     "end_learning_rate": 1e-7,
25 |     "decay_steps": 0,
26 |     "save_checkpoint_steps": 10000
27 | }
28 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/configs/train_db_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model_config": "configs/v1-train-db-chinese.yaml",
 3 |     "pretrained_model_path": "models/",
 4 |     "pretrained_model_file":"wukong-huahua-ms.ckpt",
 5 |     "data_path": "/secHome/FFHQ",
 6 |     "train_data_path": "dataset/train_cat",
 7 |     "reg_data_path": "dataset/reg_cat",
 8 |     "train_data_repeats": 100,
 9 |     "class_word": "猫",
10 |     "token": "α",
11 |     "train_batch_size": 1,
12 |     "gradient_accumulation_steps": 1,
13 |     "optim": "adamw",
14 |     "patch_size":32,
15 |     "epochs": 5,
16 |     "betas": [
17 |         0.9,
18 |         0.98
19 |     ],
20 |     "dropout": 0.1,
21 |     "weight_decay": 0.01,
22 |     "warmup_steps": 100,
23 |     "seed": 3407,
24 |     "image_size": 512,
25 |     "image_filter_size": 256,
26 |     "random_crop": false,
27 |     "filter_small_size": true, 
28 |     "start_learning_rate": 1e-6,
29 |     "end_learning_rate": 1e-7,
30 |     "decay_steps": 0,
31 |     "save_checkpoint_steps": 1000
32 | }
33 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/configs/v1-inference-chinese-lora.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-04
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "jpg"
11 |     cond_stage_key: "txt"
12 |     image_size: 64
13 |     channels: 4
14 |     cond_stage_trainable: false   # Note: different from the one we trained before
15 |     conditioning_key: crossattn
16 |     monitor: val/loss_simple_ema
17 |     scale_factor: 0.18215
18 |     use_ema: False
19 |     use_fp16: True
20 | 
21 |     unet_config:
22 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23 |       params:
24 |         image_size: 32 # unused
25 |         in_channels: 4
26 |         out_channels: 4
27 |         model_channels: 320
28 |         attention_resolutions: [ 4, 2, 1 ]
29 |         num_res_blocks: 2
30 |         channel_mult: [ 1, 2, 4, 4 ]
31 |         num_heads: 8
32 |         use_spatial_transformer: True
33 |         transformer_depth: 1
34 |         context_dim: 768
35 |         use_checkpoint: True
36 |         legacy: False
37 |         use_fp16: True
38 |         enable_lora: True
39 |         lora_rank: 4
40 |         lora_alpha: 4
41 | 
42 |     first_stage_config:
43 |       target: ldm.models.autoencoder.AutoencoderKL
44 |       params:
45 |         embed_dim: 4
46 |         monitor: val/rec_loss
47 |         use_fp16: True
48 |         ddconfig:
49 |           double_z: true
50 |           z_channels: 4
51 |           resolution: 256
52 |           in_channels: 3
53 |           out_ch: 3
54 |           ch: 128
55 |           ch_mult:
56 |           - 1
57 |           - 2
58 |           - 4
59 |           - 4
60 |           num_res_blocks: 2
61 |           attn_resolutions: []
62 |           dropout: 0.0
63 | 
64 |     cond_stage_config:
65 |       target: ldm.modules.encoders.modules.FrozenCLIPEmbedder_ZH
66 |       params:
67 |         use_fp16: True
68 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/configs/v1-inference-chinese.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-04
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "jpg"
11 |     cond_stage_key: "txt"
12 |     image_size: 64
13 |     channels: 4
14 |     cond_stage_trainable: false   # Note: different from the one we trained before
15 |     conditioning_key: crossattn
16 |     monitor: val/loss_simple_ema
17 |     scale_factor: 0.18215
18 |     use_ema: False
19 |     use_fp16: True
20 | 
21 |     unet_config:
22 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23 |       params:
24 |         image_size: 32 # unused
25 |         in_channels: 4
26 |         out_channels: 4
27 |         model_channels: 320
28 |         attention_resolutions: [ 4, 2, 1 ]
29 |         num_res_blocks: 2
30 |         channel_mult: [ 1, 2, 4, 4 ]
31 |         num_heads: 8
32 |         use_spatial_transformer: True
33 |         transformer_depth: 1
34 |         context_dim: 768
35 |         use_checkpoint: True
36 |         legacy: False
37 |         use_fp16: True
38 | 
39 |     first_stage_config:
40 |       target: ldm.models.autoencoder.AutoencoderKL
41 |       params:
42 |         embed_dim: 4
43 |         monitor: val/rec_loss
44 |         use_fp16: True
45 |         ddconfig:
46 |           double_z: true
47 |           z_channels: 4
48 |           resolution: 256
49 |           in_channels: 3
50 |           out_ch: 3
51 |           ch: 128
52 |           ch_mult:
53 |           - 1
54 |           - 2
55 |           - 4
56 |           - 4
57 |           num_res_blocks: 2
58 |           attn_resolutions: []
59 |           dropout: 0.0
60 | 
61 |     cond_stage_config:
62 |       target: ldm.modules.encoders.modules.FrozenCLIPEmbedder_ZH
63 |       params:
64 |         use_fp16: True
65 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/configs/v1-train-chinese-lora.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-05
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "image"
11 |     cond_stage_key: "caption"
12 |     image_size: 64
13 |     channels: 4
14 |     conditioning_key: crossattn
15 |     monitor: val/loss_simple_ema
16 |     scale_factor: 0.18215
17 |     use_ema: False
18 |     use_fp16: True
19 | 
20 |     unet_config:
21 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22 |       params:
23 |         image_size: 32 # unused
24 |         in_channels: 4
25 |         out_channels: 4
26 |         model_channels: 320
27 |         attention_resolutions: [ 4, 2, 1 ]
28 |         num_res_blocks: 2
29 |         channel_mult: [ 1, 2, 4, 4 ]
30 |         num_heads: 8
31 |         use_spatial_transformer: True
32 |         transformer_depth: 1
33 |         context_dim: 768
34 |         use_checkpoint: True
35 |         legacy: False
36 |         use_fp16: True
37 |         dropout: 0.1
38 |         enable_lora: True
39 |         lora_rank: 4
40 |         lora_alpha: 4
41 | 
42 |     first_stage_config:
43 |       target: ldm.models.autoencoder.AutoencoderKL
44 |       params:
45 |         embed_dim: 4
46 |         monitor: val/rec_loss
47 |         use_fp16: True
48 |         ddconfig:
49 |           double_z: true
50 |           z_channels: 4
51 |           resolution: 256
52 |           in_channels: 3
53 |           out_ch: 3
54 |           ch: 128
55 |           ch_mult:
56 |           - 1
57 |           - 2
58 |           - 4
59 |           - 4
60 |           num_res_blocks: 2
61 |           attn_resolutions: []
62 | 
63 |     cond_stage_config:
64 |       target: ldm.modules.encoders.modules.FrozenCLIPEmbedder_ZH
65 |       params:
66 |         use_fp16: True
67 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/configs/v1-train-chinese.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-05
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusion
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "image"
11 |     cond_stage_key: "caption"
12 |     image_size: 64
13 |     channels: 4
14 |     conditioning_key: crossattn
15 |     monitor: val/loss_simple_ema
16 |     scale_factor: 0.18215
17 |     use_ema: False
18 |     use_fp16: True
19 | 
20 |     unet_config:
21 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22 |       params:
23 |         image_size: 32 # unused
24 |         in_channels: 4
25 |         out_channels: 4
26 |         model_channels: 320
27 |         attention_resolutions: [ 4, 2, 1 ]
28 |         num_res_blocks: 2
29 |         channel_mult: [ 1, 2, 4, 4 ]
30 |         num_heads: 8
31 |         use_spatial_transformer: True
32 |         transformer_depth: 1
33 |         context_dim: 768
34 |         use_checkpoint: True
35 |         legacy: False
36 |         use_fp16: True
37 |         dropout: 0.1
38 | 
39 |     first_stage_config:
40 |       target: ldm.models.autoencoder.AutoencoderKL
41 |       params:
42 |         embed_dim: 4
43 |         monitor: val/rec_loss
44 |         use_fp16: True
45 |         ddconfig:
46 |           double_z: true
47 |           z_channels: 4
48 |           resolution: 256
49 |           in_channels: 3
50 |           out_ch: 3
51 |           ch: 128
52 |           ch_mult:
53 |           - 1
54 |           - 2
55 |           - 4
56 |           - 4
57 |           num_res_blocks: 2
58 |           attn_resolutions: []
59 | 
60 |     cond_stage_config:
61 |       target: ldm.modules.encoders.modules.FrozenCLIPEmbedder_ZH
62 |       params:
63 |         use_fp16: True
64 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/configs/v1-train-db-chinese.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   base_learning_rate: 1.0e-05
 3 |   target: ldm.models.diffusion.ddpm.LatentDiffusionDB
 4 |   params:
 5 |     linear_start: 0.00085
 6 |     linear_end: 0.0120
 7 |     num_timesteps_cond: 1
 8 |     log_every_t: 200
 9 |     timesteps: 1000
10 |     first_stage_key: "image"
11 |     cond_stage_key: "caption"
12 |     image_size: 64
13 |     channels: 4
14 |     conditioning_key: crossattn
15 |     monitor: val/loss_simple_ema
16 |     scale_factor: 0.18215
17 |     use_ema: False
18 |     use_fp16: True
19 |     reg_weight: 0.5
20 | 
21 |     unet_config:
22 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
23 |       params:
24 |         image_size: 32 # unused
25 |         in_channels: 4
26 |         out_channels: 4
27 |         model_channels: 320
28 |         attention_resolutions: [ 4, 2, 1 ]
29 |         num_res_blocks: 2
30 |         channel_mult: [ 1, 2, 4, 4 ]
31 |         num_heads: 8
32 |         use_spatial_transformer: True
33 |         transformer_depth: 1
34 |         context_dim: 768
35 |         use_checkpoint: True
36 |         legacy: False
37 |         use_fp16: True
38 |         dropout: 0.1
39 | 
40 |     first_stage_config:
41 |       target: ldm.models.autoencoder.AutoencoderKL
42 |       params:
43 |         embed_dim: 4
44 |         monitor: val/rec_loss
45 |         use_fp16: True
46 |         ddconfig:
47 |           double_z: true
48 |           z_channels: 4
49 |           resolution: 256
50 |           in_channels: 3
51 |           out_ch: 3
52 |           ch: 128
53 |           ch_mult:
54 |           - 1
55 |           - 2
56 |           - 4
57 |           - 4
58 |           num_res_blocks: 2
59 |           attn_resolutions: []
60 | 
61 |     cond_stage_config:
62 |       target: ldm.modules.encoders.modules.FrozenCLIPEmbedder_ZH
63 |       params:
64 |         use_fp16: True
65 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/configs/wukong-huahua_inpaint_inference.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
 3 |   params:
 4 |     linear_start: 0.00085
 5 |     linear_end: 0.0120
 6 |     num_timesteps_cond: 1
 7 |     log_every_t: 200
 8 |     timesteps: 1000
 9 |     first_stage_key: "image"
10 |     cond_stage_key: "caption"
11 |     image_size: 64
12 |     channels: 4
13 |     cond_stage_trainable: false   # Note: different from the one we trained before
14 |     conditioning_key: hybrid   # important
15 |     monitor: val/loss_simple_ema
16 |     scale_factor: 0.18215
17 |     finetune_keys: null
18 |     use_ema: false
19 | 
20 |     unet_config:
21 |       target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22 |       params:
23 |         image_size: 64 # unused
24 |         in_channels: 9  # 4 data + 4 downscaled image + 1 mask
25 |         out_channels: 4
26 |         model_channels: 320
27 |         attention_resolutions: [ 4, 2, 1 ]
28 |         num_res_blocks: 2
29 |         channel_mult: [ 1, 2, 4, 4 ]
30 |         num_heads: 8
31 |         use_spatial_transformer: true
32 |         transformer_depth: 1
33 |         context_dim: 768
34 |         use_checkpoint: true
35 |         legacy: false
36 |         use_fp16: True
37 | 
38 |     first_stage_config:
39 |       target: ldm.models.autoencoder.AutoencoderKL
40 |       params:
41 |         embed_dim: 4
42 |         monitor: val/rec_loss
43 |         use_fp16: True
44 |         ddconfig:
45 |           double_z: true
46 |           z_channels: 4
47 |           resolution: 512
48 |           in_channels: 3
49 |           out_ch: 3
50 |           ch: 128
51 |           ch_mult:
52 |           - 1
53 |           - 2
54 |           - 4
55 |           - 4
56 |           num_res_blocks: 2
57 |           attn_resolutions: []
58 |           dropout: 0.0
59 | 
60 |     cond_stage_config:
61 |       target: ldm.modules.encoders.modules.FrozenCLIPEmbedder_ZH
62 |       params:
63 |         use_fp16: True
64 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/inpaint/overture-creations-5sI6fQgYIuo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/inpaint/overture-creations-5sI6fQgYIuo.png


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/inpaint/overture-creations-5sI6fQgYIuo_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/inpaint/overture-creations-5sI6fQgYIuo_mask.png


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/inpaint/一只红色的狐狸坐在长椅上.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/inpaint/一只红色的狐狸坐在长椅上.png


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/个性化生成效果-猫.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/个性化生成效果-猫.jpg


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/个性化训练数据-猫.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/个性化训练数据-猫.jpg


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/乡村 田野 屏保.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/乡村 田野 屏保.png


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/城市夜景 赛博朋克 格雷格·鲁特科夫斯基.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/城市夜景 赛博朋克 格雷格·鲁特科夫斯基.png


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/效果图合集.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/效果图合集.jpg


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/时空 黑洞 辐射.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/时空 黑洞 辐射.png


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/来自深渊 风景 绘画 写实风格.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/来自深渊 风景 绘画 写实风格.png


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/海上日出时候的奔跑者.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/海上日出时候的奔跑者.png


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/莫奈 撑阳伞的女人 月亮 梦幻.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/莫奈 撑阳伞的女人 月亮 梦幻.png


--------------------------------------------------------------------------------
/vision/wukong-huahua/demo/诺亚方舟在世界末日起航 科幻插画.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/demo/诺亚方舟在世界末日起航 科幻插画.png


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/data/t2i_collate.py:
--------------------------------------------------------------------------------
 1 | from toolz.sandbox import unzip
 2 | 
 3 | 
 4 | data_column = [
 5 |     'img_feat',
 6 |     'txt_tokens'
 7 | ]
 8 | 
 9 | 
10 | def t2i_collate(inputs):
11 |     """
12 |     Return:
13 |     :img_feat     (batch_size, height, weight, 3)
14 |     :txt_tokens   (n, max_txt_len)
15 |     """
16 |     img_feat, txt_tokens = map(list, unzip(inputs))
17 |     batch = {
18 |         'img_feat': img_feat,
19 |         'txt_tokens': txt_tokens,
20 |     }
21 |     return batch
22 | 
23 | 
24 | data_column_db = [
25 |     'train_img_feat',
26 |     'train_txt_tokens',
27 |     'reg_img_feat',
28 |     'reg_txt_tokens'
29 | ]
30 | 
31 | 
32 | def t2i_collate_db(inputs):
33 |     """
34 |     Return:
35 |     :train_img_feat     (batch_size, height, weight, 3)
36 |     :train_txt_tokens   (n, max_txt_len)
37 |     :reg_img_feat     (batch_size, height, weight, 3)
38 |     :reg_txt_tokens   (n, max_txt_len)
39 |     """
40 |     train_img_feat, train_txt_tokens, reg_img_feat, reg_txt_tokens= map(list, unzip(inputs))
41 |     batch = {
42 |         'train_img_feat': train_img_feat,
43 |         'train_txt_tokens': train_txt_tokens,
44 |         'reg_img_feat': reg_img_feat,
45 |         'reg_txt_tokens': reg_txt_tokens,
46 |     }
47 |     return batch


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/models/autoencoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | import mindspore as ms
16 | import mindspore.nn as nn
17 | import mindspore.ops as P
18 | 
19 | from ldm.modules.diffusionmodules.model import Encoder, Decoder
20 | from ldm.modules.distributions.distributions import DiagonalGaussianDistribution
21 | 
22 | class AutoencoderKL(nn.Cell):
23 |     def __init__(self,
24 |                  ddconfig,
25 |                  embed_dim,
26 |                  ckpt_path=None,
27 |                  ignore_keys=[],
28 |                  image_key="image",
29 |                  colorize_nlabels=None,
30 |                  monitor=None,
31 |                  use_fp16=False
32 |                  ):
33 |         super().__init__()
34 |         self.dtype = ms.float16 if use_fp16 else ms.float32
35 |         self.image_key = image_key
36 |         self.encoder = Encoder(dtype=self.dtype, **ddconfig)
37 |         self.decoder = Decoder(dtype=self.dtype, **ddconfig)
38 |         assert ddconfig["double_z"]
39 |         self.quant_conv = nn.Conv2d(2*ddconfig["z_channels"], 2*embed_dim, 1, pad_mode="valid", has_bias=True).to_float(self.dtype)
40 |         self.post_quant_conv = nn.Conv2d(embed_dim, ddconfig["z_channels"], 1, pad_mode="valid", has_bias=True).to_float(self.dtype)
41 |         self.embed_dim = embed_dim
42 |         if colorize_nlabels is not None:
43 |             assert type(colorize_nlabels)==int
44 |             self.register_buffer("colorize", ms.ops.standard_normal(3, colorize_nlabels, 1, 1))
45 |         if monitor is not None:
46 |             self.monitor = monitor
47 |         if ckpt_path is not None:
48 |             self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
49 |             
50 |         self.split = P.Split(axis=1, output_num=2)
51 |         self.exp = P.Exp()
52 |         self.stdnormal = P.StandardNormal()
53 | 
54 |     def init_from_ckpt(self, path, ignore_keys=list()):
55 |         sd = ms.load_checkpoint(path)["state_dict"]
56 |         keys = list(sd.keys())
57 |         for k in keys:
58 |             for ik in ignore_keys:
59 |                 if k.startswith(ik):
60 |                     print("Deleting key {} from state_dict.".format(k))
61 |                     del sd[k]
62 |         ms.load_param_into_net(self, sd, strict_load=False)
63 |         print(f"Restored from {path}")
64 | 
65 |     def decode(self, z):
66 |         z = self.post_quant_conv(z)
67 |         dec = self.decoder(z)
68 |         return dec
69 | 
70 |     def encode(self, x):
71 |         h = self.encoder(x)
72 |         moments = self.quant_conv(h)
73 |         mean, logvar = self.split(moments)
74 |         logvar = P.clip_by_value(logvar, -30.0, 20.0)
75 |         std = self.exp(0.5 * logvar)
76 |         x = mean + std * self.stdnormal(mean.shape)
77 |         return x
78 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/models/clip_zh/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindspore-lab/minddiffusion/5779fd04c17f60f277fa88e635287fcc1dd4ecc5/vision/wukong-huahua/ldm/models/clip_zh/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/models/clip_zh/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Huawei Technologies Co., Ltd
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ============================================================================
 15 | import os
 16 | import unicodedata
 17 | 
 18 | def abs_root_dir(cfg, data_root=None):
 19 |     def get_abs_path(data_dir, data_root):
 20 |         if os.path.isabs(data_dir):
 21 |             return data_dir
 22 |         return os.path.join(data_root, data_dir)
 23 | 
 24 |     if isinstance(cfg, dict):
 25 |         for key, value in cfg.items():
 26 |             if key == 'root_dir':
 27 |                 cfg[key] = get_abs_path(value, data_root)
 28 |                 break
 29 |             abs_root_dir(value, data_root=data_root)
 30 |     elif isinstance(cfg, list):
 31 |         for item in cfg:
 32 |             abs_root_dir(item, data_root=data_root)
 33 |     else:
 34 |         return
 35 | 
 36 | 
 37 | def is_control(char):
 38 |     """Checks whether `char` is a control character."""
 39 |     # These are technically control characters but we count them as whitespace
 40 |     # characters.
 41 |     if char == "\t" or char == "\n" or char == "\r":
 42 |         return False
 43 |     cat = unicodedata.category(char)
 44 |     if cat.startswith("C"):
 45 |         return True
 46 |     return False
 47 | 
 48 | 
 49 | def is_whitespace(char):
 50 |     """Checks whether `char` is a whitespace character."""
 51 |     # \t, \n, and \r are technically control characters but we treat them
 52 |     # as whitespace since they are generally considered as such.
 53 |     if char == " " or char == "\t" or char == "\n" or char == "\r":
 54 |         return True
 55 |     cat = unicodedata.category(char)
 56 |     if cat == "Zs":
 57 |         return True
 58 |     return False
 59 | 
 60 | 
 61 | def is_chinese_char(cp):
 62 |     """Checks whether CP is the codepoint of a CJK character."""
 63 |     # This defines a "chinese character" as anything in the CJK Unicode block:
 64 |     #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
 65 |     #
 66 |     # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
 67 |     # despite its name. The modern Korean Hangul alphabet is a different block,
 68 |     # as is Japanese Hiragana and Katakana. Those alphabets are used to write
 69 |     # space-separated words, so they are not treated specially and handled
 70 |     # like the all of the other languages.
 71 |     if (
 72 |             (0x4E00 <= cp <= 0x9FFF)
 73 |             or (0x3400 <= cp <= 0x4DBF)  #
 74 |             or (0x20000 <= cp <= 0x2A6DF)  #
 75 |             or (0x2A700 <= cp <= 0x2B73F)  #
 76 |             or (0x2B740 <= cp <= 0x2B81F)  #
 77 |             or (0x2B820 <= cp <= 0x2CEAF)  #
 78 |             or (0xF900 <= cp <= 0xFAFF)
 79 |             or (0x2F800 <= cp <= 0x2FA1F)  #
 80 |     ):  #
 81 |         return True
 82 | 
 83 |     return False
 84 | 
 85 | 
 86 | def is_punctuation(char):
 87 |     """Checks whether `char` is a punctuation character."""
 88 |     cp = ord(char)
 89 |     # We treat all non-letter/number ASCII as punctuation.
 90 |     # Characters such as "^", "$", and "`" are not in the Unicode
 91 |     # Punctuation class but we treat them as punctuation anyways, for
 92 |     # consistency.
 93 |     if (33 <= cp <= 47) or (58 <= cp <= 64) \
 94 |             or (91 <= cp <= 96) or (123 <= cp <= 126):
 95 |         return True
 96 |     cat = unicodedata.category(char)
 97 |     if cat.startswith("P"):
 98 |         return True
 99 |     return False
100 | 
101 | 
102 | def strip_accents(text):
103 |     """Strips accents from a piece of text."""
104 |     text = unicodedata.normalize("NFD", text)
105 |     output = []
106 |     for char in text:
107 |         cat = unicodedata.category(char)
108 |         if cat == "Mn":
109 |             continue
110 |         output.append(char)
111 |     return "".join(output)
112 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/models/diffusion/dpm_solver/__init__.py:
--------------------------------------------------------------------------------
1 | from .sampler import DPMSolverSampler


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/models/diffusion/dpm_solver/sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """SAMPLING ONLY."""
16 | 
17 | import mindspore as ms
18 | from mindspore import ops
19 | from .dpm_solver import NoiseScheduleVP, model_wrapper, DPM_Solver
20 | 
21 | 
22 | class DPMSolverSampler(object):
23 |     def __init__(self, model, **kwargs):
24 |         super().__init__()
25 |         self.model = model
26 |         self.register_buffer('alphas_cumprod', model.alphas_cumprod)
27 | 
28 |     def register_buffer(self, name, attr):
29 |         setattr(self, name, attr)
30 | 
31 |     def sample(self,
32 |                S,
33 |                batch_size,
34 |                shape,
35 |                conditioning=None,
36 |                callback=None,
37 |                normals_sequence=None,
38 |                img_callback=None,
39 |                quantize_x0=False,
40 |                eta=0.,
41 |                mask=None,
42 |                x0=None,
43 |                temperature=1.,
44 |                noise_dropout=0.,
45 |                score_corrector=None,
46 |                corrector_kwargs=None,
47 |                verbose=True,
48 |                x_T=None,
49 |                log_every_t=100,
50 |                unconditional_guidance_scale=1.,
51 |                unconditional_conditioning=None,
52 |                # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
53 |                **kwargs
54 |                ):
55 |         if conditioning is not None:
56 |             if isinstance(conditioning, dict):
57 |                 cbs = conditioning[list(conditioning.keys())[0]].shape[0]
58 |                 if cbs != batch_size:
59 |                     print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
60 |             else:
61 |                 if conditioning.shape[0] != batch_size:
62 |                     print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
63 | 
64 |         # sampling
65 |         C, H, W = shape
66 |         size = (batch_size, C, H, W)
67 | 
68 |         # print(f'Data shape for DPM-Solver sampling is {size}, sampling steps {S}')
69 | 
70 |         if x_T is None:
71 |             img = ops.standard_normal(size)
72 |         else:
73 |             img = x_T
74 | 
75 |         ns = NoiseScheduleVP('discrete', alphas_cumprod=self.alphas_cumprod)
76 |         model_fn = model_wrapper(
77 |             lambda x, t, c: self.model.apply_model(x, t,
78 |                                                    c_concat=c if self.model.model.conditioning_key == 'concat' else None,
79 |                                                    c_crossattn=c if self.model.model.conditioning_key == 'crossattn' else None),
80 |             ns,
81 |             model_type="noise",
82 |             guidance_type="classifier-free",
83 |             condition=conditioning,
84 |             unconditional_condition=unconditional_conditioning,
85 |             guidance_scale=unconditional_guidance_scale,
86 |         )
87 | 
88 |         dpm_solver = DPM_Solver(model_fn, ns, predict_x0=True, thresholding=False)
89 | 
90 |         x = dpm_solver.sample(ops.Cast()(img, ms.float16), steps=S, skip_type="time_uniform",
91 |                               method="multistep", order=2, lower_order_final=True)
92 | 
93 |         return x, None


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/modules/distributions/distributions.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | import mindspore as ms
16 | import mindspore.ops as ops
17 | 
18 | class DiagonalGaussianDistribution(object):
19 |     def __init__(self, parameters, deterministic=False):
20 | 
21 |         self.mean, self.logvar = ops.Split(axis=1,  output_num=2)(parameters)
22 |         self.logvar = ops.clip_by_value(self.logvar, -30.0, 20.0)
23 |         self.deterministic = deterministic
24 |         self.std = ops.exp(0.5 * self.logvar)
25 |         self.stdnormal = ops.StandardNormal()
26 | 
27 |     def sample(self):
28 |         x = self.mean + self.std * self.stdnormal(self.mean.shape)
29 |         return x


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/modules/encoders/modules.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | import mindspore as ms
16 | import mindspore.nn as nn
17 | import mindspore.ops as ops
18 | from mindspore import Tensor
19 | from ldm.models.clip_zh.simple_tokenizer import WordpieceTokenizer
20 | from .text_encoder import TextEncoder
21 | 
22 | 
23 | class FrozenCLIPEmbedder_ZH(nn.Cell):
24 |     def __init__(self, max_length=77, use_fp16=False):
25 |         super(FrozenCLIPEmbedder_ZH, self).__init__()
26 |         self.dtype = ms.float16 if use_fp16 else ms.float32
27 |         self.max_length = max_length
28 |         self.tokenizer = WordpieceTokenizer()
29 |         self.transformer = TextEncoder(context_length=77, vocab_size=49408, output_dim=768, width=768, layers=12, heads=12, dtype=self.dtype)
30 | 
31 |     def tokenize(self, texts):
32 |         SOT_TEXT = "[CLS]"
33 |         EOT_TEXT = "[SEP]"
34 |         CONTEXT_LEN = 77
35 | 
36 |         if isinstance(texts, str):
37 |             texts = [texts]
38 | 
39 |         sot_token = self.tokenizer.encoder[SOT_TEXT]
40 |         eot_token = self.tokenizer.encoder[EOT_TEXT]
41 |         all_tokens = [[sot_token] + self.tokenizer.encode(text) + [eot_token] for text in texts]
42 |         result = ops.Zeros()((len(all_tokens), CONTEXT_LEN), ms.int64)
43 | 
44 |         for i, tokens in enumerate(all_tokens):
45 |             if len(tokens) > CONTEXT_LEN:
46 |                 tokens = tokens[:CONTEXT_LEN - 1] + [eot_token]
47 | 
48 |             result[i, : len(tokens)] = Tensor(tokens)
49 | 
50 |         return result
51 | 
52 |     def encode(self, text):
53 |         batch_encoding = self.tokenize(text)
54 |         outputs = self.transformer(batch_encoding)
55 |         return outputs
56 | 
57 |     def construct(self, c):
58 |         outputs = self.transformer(c)
59 |         return outputs
60 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/modules/train/callback.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | 
16 | import mindspore as ms
17 | 
18 | 
19 | class OverflowMonitor(ms.Callback):
20 |     def step_end(self, run_context):
21 |         cb_params = run_context.original_args()
22 |         cur_epoch_num = cb_params.get("cur_epoch_num", 1)
23 |         cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
24 |         overflow = cb_params.net_outputs[1]
25 |         if overflow:
26 |             print(f"overflow detected in epoch {cur_epoch_num} step {cur_step_in_epoch}")
27 |         return super().step_end(run_context)


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/modules/train/learningrate.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Utils function for the parallel training.
17 | This is an experimental interface that is subject to change and/or deletion.
18 | """
19 | 
20 | from mindspore.ops import operations as P
21 | import mindspore.common.dtype as mstype
22 | from mindspore.common.tensor import Tensor
23 | from mindspore.nn.learning_rate_schedule import LearningRateSchedule, PolynomialDecayLR, WarmUpLR, CosineDecayLR
24 | import numpy as np
25 | 
26 | 
27 | class LearningRate(LearningRateSchedule):
28 |     """
29 |         Learning_rate sheduler
30 |     """
31 | 
32 |     def __init__(self,
33 |                  start_learning_rate,
34 |                  end_learning_rate,
35 |                  warmup_steps,
36 |                  decay_steps,
37 |                  power=1.0,
38 |                  use_cosine=True):
39 |         super(LearningRate, self).__init__()
40 |         self.warmup_flag = False
41 |         if warmup_steps > 0:
42 |             self.warmup_flag = True
43 |             self.warmup_lr = WarmUpLR(start_learning_rate, warmup_steps)
44 |         self.decay_lr = PolynomialDecayLR(start_learning_rate, end_learning_rate, decay_steps, power)
45 |         self.cosine_decay_lr = CosineDecayLR(end_learning_rate, start_learning_rate, decay_steps)
46 |         self.warmup_steps = Tensor(np.array([warmup_steps]).astype(np.float32))
47 |         self.greater = P.Greater()
48 |         self.one = Tensor(np.array([1.0]).astype(np.float32))
49 |         self.cast = P.Cast()
50 |         self.use_cosine = use_cosine
51 | 
52 |     def construct(self, global_step):
53 |         """Learning_rate sheduler construct"""
54 |         if not self.use_cosine:
55 |             decay_lr = self.decay_lr(global_step)
56 |         else:
57 |             decay_lr = self.cosine_decay_lr(global_step)
58 |         if self.warmup_flag:
59 |             is_warmup = self.cast(self.greater(self.warmup_steps, global_step), mstype.float32)
60 |             warmup_lr = self.warmup_lr(global_step)
61 |             lr = (self.one - is_warmup) * decay_lr + is_warmup * warmup_lr
62 |         else:
63 |             lr = decay_lr
64 |         return lr
65 | 
66 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/modules/train/optim.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 |  build optimizer for ms
17 | """
18 | from mindspore.nn.optim.adam import Adam, AdamWeightDecay
19 | 
20 | 
21 | def build_optimizer(model, opts, lr, enable_lora=False):
22 |     """
23 | 
24 |     :param model:
25 |     :param opts:
26 |     :param lr:
27 |     :return: optimizer
28 |     """
29 | 
30 |     decay_filter = lambda x: 'layernorm' not in x.name.lower() and "bias" not in x.name.lower()
31 |     param_optimizer = model.trainable_params()
32 |     decay_params = list(filter(decay_filter, param_optimizer))
33 |     other_params = list(filter(lambda x: not decay_filter(x), param_optimizer))
34 |     group_params = [{
35 |         'params': decay_params,
36 |         'weight_decay': 1e-6
37 |     }, {
38 |             'order_params': param_optimizer
39 |     }]
40 | 
41 |     # 适配lora后，得到的other_params为空，因此无需加入到group_params中
42 |     if not enable_lora:
43 |         group_params.append({
44 |             'params': other_params,
45 |             'weight_decay': 0.0
46 |         })
47 | 
48 |     if opts.optim == 'adam':
49 |         OptimCls = Adam
50 |     elif opts.optim == 'adamw':
51 |         OptimCls = AdamWeightDecay
52 |     else:
53 |         raise ValueError('invalid optimizer')
54 |     optimizer = OptimCls(group_params,
55 |                          learning_rate=lr, beta1=opts.betas[0], beta2=opts.betas[1])
56 |     return optimizer
57 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/modules/train/parallel_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """Transformer Networks"""
16 | 
17 | import math
18 | 
19 | import numpy as np
20 | import mindspore.common.dtype as mstype
21 | from mindspore.context import ParallelMode
22 | 
23 | class ParallelConfig:
24 |     r"""
25 |         ParallelConfig for the setting the global data parallel, model parallel and fusion group.
26 |     """
27 |     dp = 8
28 |     mp = 1
29 |     pipeline_stage = 1
30 |     recompute = False
31 |     optimizer_shard = False
32 |     fusion_group = 1
33 |     parallel_mode = ParallelMode.SEMI_AUTO_PARALLEL
34 |     vocab_emb_dp = False
35 |     ep = dp
36 |     capacity_factor = 1.5
37 |     expert_num = 32
38 |     aux_loss_factor = 0.01
39 | 
40 |     @staticmethod
41 |     def set_global_parallel_config(dp=1,
42 |                                    mp=1,
43 |                                    recompute=True,
44 |                                    stages=1,
45 |                                    optimizer_shard=True,
46 |                                    fusion_group=4,
47 |                                    parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL,
48 |                                    vocab_emb_dp=True):
49 |         r"""
50 |         The parallel configure setting
51 | 
52 |         Args:
53 |             dp (int): The data parallel way. Default: 1
54 |             mp (int): The model parallel way. Default: 1
55 |             stages (int): The number of the pipeline stage. Should be a positive value. Default: 1.
56 |             optimizer_shard (bool): Enable optimizer state sharding or not. Default: True.
57 |             fusion_group (int): The fusion group size of the optimizer state sharding. Default: 4.
58 |             recompute (bool): Enable recomputation of the transformer block or not. Default: False.
59 |             parallel_mode (ParallelMode): Can be SEMI_AUTO_PARALLEL, DATA_AUTO_PARALLEL or AUTO_PARALLEL.
60 |             vocab_emb_dp (bool): Shard embedding in model parallel or data parallel. Default: True
61 | 
62 |         Supported Platforms:
63 |             ``Ascend`` ``GPU``
64 | 
65 |         Examples:
66 |             >>> ParallelConfig(dp=1, mp=1)
67 |             >>> ParallelConfig(stages=4)
68 |         """
69 |         ParallelConfig.dp = dp
70 |         ParallelConfig.mp = mp
71 |         ParallelConfig.pipeline_stage = stages
72 |         ParallelConfig.optimizer_shard = optimizer_shard
73 |         ParallelConfig.fusion_group = fusion_group
74 |         ParallelConfig.recompute = recompute
75 |         ParallelConfig.parallel_mode = parallel_mode
76 |         ParallelConfig.vocab_emb_dp = vocab_emb_dp
77 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/modules/train/tools.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | """
16 | Copyright (c) Microsoft Corporation.
17 | Licensed under the MIT license.
18 | 
19 | Misc utilities
20 | """
21 | import json
22 | import os
23 | import sys
24 | import random
25 | import numpy as np
26 | import mindspore as ms
27 | 
28 | class NoOp:
29 |     """ useful for distributed training No-Ops """
30 | 
31 |     def __getattr__(self, name):
32 |         return self.noop
33 | 
34 |     def noop(self, *args, **kwargs):
35 |         return
36 | 
37 | 
38 | def parse_with_config(args):
39 |     """Parse With Config"""
40 |     if args.train_config is not None:
41 |         abs_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../.."))
42 |         args.train_config = os.path.join(abs_path, args.train_config)
43 |         config_args = json.load(open(args.train_config))
44 |         override_keys = {arg[2:].split('=')[0] for arg in sys.argv[1:]
45 |                          if arg.startswith('--')}
46 |         for k, v in config_args.items():
47 |             if k not in override_keys:
48 |                 setattr(args, k, v)
49 |     return args
50 | 
51 | 
52 | def set_random_seed(seed):
53 |     """Set Random Seed"""
54 |     print("random seed: ", seed)
55 |     random.seed(seed)
56 |     np.random.seed(seed)
57 |     ms.set_seed(seed)
58 | 
59 | class Struct:
60 |     def __init__(self, dict_):
61 |         self.__dict__.update(dict_)
62 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/ldm/util.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Huawei Technologies Co., Ltd
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ============================================================================
15 | import importlib
16 | from inspect import isfunction
17 | import mindspore.ops as ops
18 | 
19 | 
20 | def exists(x):
21 |     return x is not None
22 | 
23 | 
24 | def default(val, d):
25 |     if exists(val):
26 |         return val
27 |     return d() if isfunction(d) else d
28 | 
29 | 
30 | def count_params(model, verbose=False):
31 |     total_params = sum(p.numel() for p in model.parameters())
32 |     if verbose:
33 |         print(f"{model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.")
34 |     return total_params
35 | 
36 | 
37 | def instantiate_from_config(config):
38 |     if not "target" in config:
39 |         if config == '__is_first_stage__':
40 |             return None
41 |         elif config == "__is_unconditional__":
42 |             return None
43 |         raise KeyError("Expected key `target` to instantiate.")
44 |     return get_obj_from_str(config["target"])(**config.get("params", dict()))
45 | 
46 | 
47 | def get_obj_from_str(string, reload=False):
48 |     module, cls = string.rsplit(".", 1)
49 |     if reload:
50 |         module_imp = importlib.import_module(module)
51 |         importlib.reload(module_imp)
52 |     return getattr(importlib.import_module(module, package=None), cls)
53 | 
54 | def extract_into_tensor(a, t, x_shape):
55 |     b = t.shape[0]
56 |     out = ops.GatherD()(a, -1, t)
57 |     return out.reshape(b, *((1,) * (len(x_shape) - 1)))
58 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv-python
 2 | omegaconf
 3 | einops
 4 | ftfy
 5 | regex
 6 | albumentations
 7 | pandas
 8 | imagesize
 9 | toolz
10 | pillow
11 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/scripts/run_db_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2023 Huawei Technologies Co., Ltd
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ============================================================================
16 | 
17 | export GLOG_v=3
18 | export HCCL_CONNECT_TIMEOUT=600
19 | export ASCEND_GLOBAL_LOG_LEVEL=3
20 | export ASCEND_SLOG_PRINT_TO_STDOUT=0
21 | device_id=3
22 | 
23 | output_path=output
24 | task_name=α猫
25 | train_data_path=dataset/train_cat
26 | reg_data_path=dataset/reg_cat
27 | class_word=猫
28 | pretrained_model_path=models
29 | train_config_file=configs/train_db_config.json
30 | token=α
31 | 
32 | rm -rf ${output_path:?}/${task_name:?}
33 | mkdir -p ${output_path:?}/${task_name:?}
34 | export RANK_SIZE=1;export DEVICE_ID=$device_id;export MS_COMPILER_CACHE_PATH=${output_path:?}/${task_name:?}; \
35 | nohup python -u run_db_train.py \
36 |     --train_data_path=$train_data_path \
37 |     --reg_data_path=$reg_data_path \
38 |     --class_word=$class_word \
39 |     --token=$token \
40 |     --train_config=$train_config_file \
41 |     --output_path=$output_path/$task_name \
42 |     --use_parallel=False \
43 |     --pretrained_model_path=$pretrained_model_path \
44 |     > $output_path/$task_name/log_train 2>&1 &


--------------------------------------------------------------------------------
/vision/wukong-huahua/scripts/run_inpaint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2022 Huawei Technologies Co., Ltd
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ============================================================================
16 | 
17 | export GLOG_v=3
18 | export ASCEND_GLOBAL_LOG_LEVEL=3
19 | export ASCEND_SLOG_PRINT_TO_STDOUT=0
20 | 
21 | export DEVICE_ID=0; \
22 | python inpaint.py \
23 |     --prompt "一只红色的狐狸坐在长椅上" \
24 |     --img demo/inpaint/overture-creations-5sI6fQgYIuo.png \
25 |     --mask demo/inpaint/overture-creations-5sI6fQgYIuo_mask.png \
26 |     --config configs/wukong-huahua_inpaint_inference.yaml \
27 |     --ckpt_name wukong-huahua-inpaint-ms.ckpt 


--------------------------------------------------------------------------------
/vision/wukong-huahua/scripts/run_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2022 Huawei Technologies Co., Ltd
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ============================================================================
16 | 
17 | export GLOG_v=3
18 | export HCCL_CONNECT_TIMEOUT=600
19 | export ASCEND_GLOBAL_LOG_LEVEL=3
20 | export ASCEND_SLOG_PRINT_TO_STDOUT=0
21 | device_id=2
22 | 
23 | output_path=output/
24 | task_name=txt2img
25 | data_path=dataset/
26 | pretrained_model_path=models/
27 | train_config_file=configs/train_config.json
28 | 
29 | rm -rf ${output_path:?}/${task_name:?}
30 | mkdir -p ${output_path:?}/${task_name:?}
31 | export RANK_SIZE=1;export DEVICE_ID=$device_id;export MS_COMPILER_CACHE_PATH=${output_path:?}/${task_name:?}; \
32 | nohup python -u run_train.py \
33 |     --data_path=$data_path \
34 |     --train_config=$train_config_file \
35 |     --output_path=$output_path/$task_name \
36 |     --use_parallel=False \
37 |     --pretrained_model_path=$pretrained_model_path \
38 |     > $output_path/$task_name/log_train 2>&1 &


--------------------------------------------------------------------------------
/vision/wukong-huahua/scripts/run_train_lora.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2022 Huawei Technologies Co., Ltd
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ============================================================================
16 | 
17 | export GLOG_v=3
18 | export HCCL_CONNECT_TIMEOUT=600
19 | export ASCEND_GLOBAL_LOG_LEVEL=3
20 | export ASCEND_SLOG_PRINT_TO_STDOUT=0
21 | device_id=6
22 | 
23 | output_path=output/
24 | task_name=txt2img
25 | data_path=dataset
26 | pretrained_model_path=models/
27 | train_config_file=configs/train_config.json
28 | 
29 | rm -rf ${output_path:?}/${task_name:?}
30 | mkdir -p ${output_path:?}/${task_name:?}
31 | export RANK_SIZE=1;export DEVICE_ID=$device_id;export MS_COMPILER_CACHE_PATH=${output_path:?}/${task_name:?}; \
32 | nohup python -u run_train.py \
33 |     --data_path=$data_path \
34 |     --train_config=$train_config_file \
35 |     --output_path=$output_path/$task_name \
36 |     --use_parallel=False \
37 |     --pretrained_model_path=$pretrained_model_path \
38 |     --model_config=configs/v1-train-chinese-lora.yaml \
39 |     --start_learning_rate=1e-4 \
40 |     --end_learning_rate=1e-6 \
41 |     --enable_lora=True \
42 |     > $output_path/$task_name/train_1p_lora.log 2>&1 &


--------------------------------------------------------------------------------
/vision/wukong-huahua/scripts/run_train_parallel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # -*- coding: UTF-8 -*-
 3 | # Copyright 2022 Huawei Technologies Co., Ltd
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | # ============================================================================
17 | 
18 | output_path=output/
19 | task_name=txt2img
20 | data_path=dataset/
21 | pretrained_model_path=models/
22 | train_config_file=configs/train_config.json
23 | 
24 | if [ $# != 3 ]
25 | then
26 |     echo "Usage:
27 |           bash scripts/run_train_parallel.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [RANK_TABLE_FILE]"
28 |     exit 1
29 | fi
30 | 
31 | if [ $1 -lt 1 ] || [ $1 -gt 8 ]
32 | then
33 |     echo "error: DEVICE_NUM=$1 is not in [1,8]"
34 |     exit 1
35 | fi
36 | 
37 | VISIABLE_DEVICES=$2
38 | IFS="," read -r -a CANDIDATE_DEVICE <<< "$VISIABLE_DEVICES"
39 | if [ ${#CANDIDATE_DEVICE[@]} -ne $1 ]
40 | then
41 |     echo "error: DEVICE_NUM=$1 is not matched with VISIABLE_DEVICES=$2"
42 |     exit 1
43 | fi
44 | 
45 | if [ ! -f $3 ]
46 | then
47 |     echo "error: RANK_TABLE_FILE=$3 is not a file"
48 |     exit 1
49 | fi
50 | 
51 | export GLOG_v=3
52 | export ASCEND_GLOBAL_LOG_LEVEL=3
53 | export ASCEND_GLOBAL_EVENT_ENABLE=0
54 | export ASCEND_SLOG_PRINT_TO_STDOUT=1
55 | export HCCL_CONNECT_TIMEOUT=600
56 | 
57 | ulimit -u unlimited
58 | ulimit -SHn 65535
59 | export DEVICE_NUM=$1
60 | export RANK_SIZE=$1
61 | RANK_TABLE_FILE=$(realpath $3)
62 | export RANK_TABLE_FILE
63 | echo "RANK_TABLE_FILE=${RANK_TABLE_FILE}"
64 | 
65 | rm -rf ${output_path:?}/${task_name:?}
66 | mkdir -p ${output_path:?}/${task_name:?}
67 | export MS_COMPILER_CACHE_PATH=${output_path:?}/${task_name:?}
68 | export SERVER_ID=0
69 | rank_start=$((DEVICE_NUM * SERVER_ID))
70 | for((i=0; i<${RANK_SIZE}; i++))
71 | do
72 |     export RANK_ID=$((rank_start + i))
73 |     export DEVICE_ID=${CANDIDATE_DEVICE[i]}
74 |     mkdir -p ${output_path:?}/${task_name:?}/rank_$i
75 |     echo "start training for rank $RANK_ID, device $DEVICE_ID"
76 |     nohup python -u run_train.py \
77 |         --use_parallel=True \
78 |         --data_path=$data_path \
79 |         --train_config=$train_config_file \
80 |         --output_path=$output_path/$task_name \
81 |         --pretrained_model_path=$pretrained_model_path \
82 |         > $output_path/$task_name/rank_$i/log_train 2>&1 &
83 | done


--------------------------------------------------------------------------------
/vision/wukong-huahua/scripts/run_train_parallel_lora.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # -*- coding: UTF-8 -*-
 3 | # Copyright 2022 Huawei Technologies Co., Ltd
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | # ============================================================================
17 | output_path=output/
18 | task_name=txt2img
19 | data_path=dataset
20 | pretrained_model_path=models/
21 | train_config_file=configs/train_config.json
22 | 
23 | if [ $# != 3 ]
24 | then
25 |     echo "Usage:
26 |           bash scripts/run_train_parallel.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [RANK_TABLE_FILE]"
27 |     exit 1
28 | fi
29 | 
30 | if [ $1 -lt 1 ] || [ $1 -gt 8 ]
31 | then
32 |     echo "error: DEVICE_NUM=$1 is not in [1,8]"
33 |     exit 1
34 | fi
35 | 
36 | VISIABLE_DEVICES=$2
37 | IFS="," read -r -a CANDIDATE_DEVICE <<< "$VISIABLE_DEVICES"
38 | if [ ${#CANDIDATE_DEVICE[@]} -ne $1 ]
39 | then
40 |     echo "error: DEVICE_NUM=$1 is not matched with VISIABLE_DEVICES=$2"
41 |     exit 1
42 | fi
43 | 
44 | if [ ! -f $3 ]
45 | then
46 |     echo "error: RANK_TABLE_FILE=$3 is not a file"
47 |     exit 1
48 | fi
49 | 
50 | export GLOG_v=3
51 | export ASCEND_GLOBAL_LOG_LEVEL=3
52 | export ASCEND_GLOBAL_EVENT_ENABLE=0
53 | export ASCEND_SLOG_PRINT_TO_STDOUT=1
54 | export HCCL_CONNECT_TIMEOUT=600
55 | 
56 | ulimit -u unlimited
57 | ulimit -SHn 65535
58 | export DEVICE_NUM=$1
59 | export RANK_SIZE=$1
60 | RANK_TABLE_FILE=$(realpath $3)
61 | export RANK_TABLE_FILE
62 | echo "RANK_TABLE_FILE=${RANK_TABLE_FILE}"
63 | 
64 | rm -rf ${output_path:?}/${task_name:?}
65 | mkdir -p ${output_path:?}/${task_name:?}
66 | export MS_COMPILER_CACHE_PATH=${output_path:?}/${task_name:?}
67 | export SERVER_ID=0
68 | rank_start=$((DEVICE_NUM * SERVER_ID))
69 | for((i=0; i<${RANK_SIZE}; i++))
70 | do
71 |     export RANK_ID=$((rank_start + i))
72 |     export DEVICE_ID=${CANDIDATE_DEVICE[i]}
73 |     mkdir -p ${output_path:?}/${task_name:?}/rank_$i
74 |     echo "start training for rank $RANK_ID, device $DEVICE_ID"
75 |     nohup python -u run_train.py \
76 |         --use_parallel=True \
77 |         --data_path=$data_path \
78 |         --train_config=$train_config_file \
79 |         --output_path=$output_path/$task_name \
80 |         --pretrained_model_path=$pretrained_model_path \
81 |         --model_config=configs/v1-train-chinese-lora.yaml \
82 |         --start_learning_rate=1e-4 \
83 |         --end_learning_rate=1e-6 \
84 |         --enable_lora=True \
85 |         > $output_path/$task_name/rank_$i/train_lora_8p.log 2>&1 &
86 | done


--------------------------------------------------------------------------------
/vision/wukong-huahua/scripts/run_txt2img.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | #!/bin/bash
 3 | # Copyright 2022 Huawei Technologies Co., Ltd
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | # ============================================================================
17 | 
18 | export GLOG_v=3
19 | export ASCEND_GLOBAL_LOG_LEVEL=3
20 | export ASCEND_SLOG_PRINT_TO_STDOUT=0
21 | 
22 | export DEVICE_ID=0; \
23 | python txt2img.py \
24 |     --prompt "来自深渊 风景 绘画 写实风格" \
25 |     --config configs/v1-inference-chinese.yaml \
26 |     --output_path ./output/ \
27 |     --seed 42 \
28 |     --dpm_solver \
29 |     --n_iter 4 \
30 |     --n_samples 4 \
31 |     --W 512 \
32 |     --H 512 \
33 |     --ddim_steps 15
34 | 


--------------------------------------------------------------------------------
/vision/wukong-huahua/scripts/run_txt2img_lora.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | #!/bin/bash
 3 | # Copyright 2022 Huawei Technologies Co., Ltd
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | # ============================================================================
17 | 
18 | export GLOG_v=3
19 | export ASCEND_GLOBAL_LOG_LEVEL=3
20 | export ASCEND_SLOG_PRINT_TO_STDOUT=0
21 | 
22 | export DEVICE_ID=7; \
23 | python txt2img.py \
24 |     --prompt "一个带着红色蝴蝶结的小女孩" \
25 |     --config configs/v1-inference-chinese-lora.yaml \
26 |     --output_path ./output/ \
27 |     --enable_lora True \
28 |     --lora_ckpt_filepath ./output/txt2img_lora/ckpt/rank_0/wkhh_txt2img_lora-12_1224.ckpt \
29 |     --seed 42 \
30 |     --n_iter 4 \
31 |     --n_samples 4 \
32 |     --W 512 \
33 |     --H 512 \
34 |     --ddim_steps 30 > test_lora.log 2>&1 &
35 | 


--------------------------------------------------------------------------------