├── LICENSES
    ├── README.md
    ├── LICENSE
    └── LICENSE_guided_diffusion
├── download.sh
├── conf_mgt
    ├── __init__.py
    └── conf_base.py
├── guided_diffusion
    ├── __init__.py
    ├── dist_util.py
    ├── nn.py
    ├── scheduler.py
    ├── respace.py
    ├── image_datasets.py
    ├── fp16_util.py
    ├── script_util.py
    ├── gaussian_diffusion.py
    └── unet.py
├── utils
    └── __init__.py
├── confs
    ├── face_example.yml
    ├── test_p256_nn2.yml
    ├── test_p256_ex64.yml
    ├── test_p256_thin.yml
    ├── test_c256_ev2li.yml
    ├── test_p256_ev2li.yml
    ├── test_p256_thick.yml
    ├── test_p256_genhalf.yml
    ├── test_c256_nn2.yml
    ├── test_c256_ex64.yml
    ├── test_c256_thin.yml
    ├── test_c256_thick.yml
    ├── test_inet256_nn2.yml
    ├── test_c256_genhalf.yml
    ├── test_inet256_ex64.yml
    ├── test_inet256_thin.yml
    ├── test_inet256_ev2li.yml
    ├── test_inet256_thick.yml
    └── test_inet256_genhalf.yml
├── test.py
└── README.md


/LICENSES/README.md:
--------------------------------------------------------------------------------
 1 | # License and Acknowledgement
 2 | 
 3 | A big thanks to following contributes that open sourced their code and therefore helped us a lot in developing RePaint!
 4 | 
 5 | This repository was forked from:
 6 | https://github.com/openai/guided-diffusion
 7 | 
 8 | It contains code from:
 9 | https://github.com/hojonathanho/diffusion
10 | 
11 | If we missed a contribution, please contact us.


--------------------------------------------------------------------------------
/download.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | (
 4 | mkdir -p data/pretrained
 5 | cd data/pretrained
 6 | 
 7 | wget https://openaipublic.blob.core.windows.net/diffusion/jul-2021/256x256_classifier.pt # Trained by OpenAI
 8 | wget https://openaipublic.blob.core.windows.net/diffusion/jul-2021/256x256_diffusion.pt # Trained by OpenAI
 9 | 
10 | gdown https://drive.google.com/uc?id=1norNWWGYP3EZ_o05DmoW1ryKuKMmhlCX
11 | gdown https://drive.google.com/uc?id=1QEl-btGbzQz6IwkXiFGd49uQNTUtTHsk
12 | )
13 | 
14 | # data
15 | (
16 | gdown https://drive.google.com/uc?id=1Q_dxuyI41AAmSv9ti3780BwaJQqwvwMv
17 | unzip data.zip
18 | rm data.zip
19 | )


--------------------------------------------------------------------------------
/LICENSES/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | you may not use this file except in compliance with the License.
 4 | You may obtain a copy of the License at
 5 | 
 6 |     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | 
 8 | The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.


--------------------------------------------------------------------------------
/conf_mgt/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | 
18 | from conf_mgt.conf_base import Default_Conf
19 | 


--------------------------------------------------------------------------------
/guided_diffusion/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | """
18 | Based on "Improved Denoising Diffusion Probabilistic Models".
19 | """
20 | 


--------------------------------------------------------------------------------
/LICENSES/LICENSE_guided_diffusion:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 OpenAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | import yaml
18 | import os
19 | from PIL import Image
20 | 
21 | 
22 | def txtread(path):
23 |     path = os.path.expanduser(path)
24 |     with open(path, 'r') as f:
25 |         return f.read()
26 | 
27 | 
28 | def yamlread(path):
29 |     return yaml.safe_load(txtread(path=path))
30 | 
31 | def imwrite(path=None, img=None):
32 |     Image.fromarray(img).save(path)
33 | 


--------------------------------------------------------------------------------
/guided_diffusion/dist_util.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | """
18 | Helpers for distributed training.
19 | """
20 | 
21 | import io
22 | 
23 | import blobfile as bf
24 | import torch as th
25 | 
26 | 
27 | def dev(device):
28 |     """
29 |     Get the device to use for torch.distributed.
30 |     """
31 |     if device is None:
32 |         if th.cuda.is_available():
33 |             return th.device(f"cuda")
34 |         return th.device("cpu")
35 |     return th.device(device)
36 | 
37 | 
38 | def load_state_dict(path, backend=None, **kwargs):
39 |     with bf.BlobFile(path, "rb") as f:
40 |         data = f.read()
41 |     return th.load(io.BytesIO(data), **kwargs)
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/confs/face_example.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | latex_name: RePaint
53 | method_name: Repaint
54 | image_size: 256
55 | model_path: ./data/pretrained/celeba256_250000.pt
56 | name: face_example
57 | inpa_inj_sched_prev: true
58 | n_jobs: 1
59 | print_estimated_vars: true
60 | inpa_inj_sched_prev_cumnoise: false
61 | schedule_jump_params:
62 |   t_T: 250
63 |   n_sample: 1
64 |   jump_length: 10
65 |   jump_n_sample: 10
66 | data:
67 |   eval:
68 |     paper_face_mask:
69 |       mask_loader: true
70 |       gt_path: ./data/datasets/gts/face
71 |       mask_path: ./data/datasets/gt_keep_masks/face
72 |       image_size: 256
73 |       class_cond: false
74 |       deterministic: true
75 |       random_crop: false
76 |       random_flip: false
77 |       return_dict: true
78 |       drop_last: false
79 |       batch_size: 1
80 |       return_dataloader: true
81 |       offset: 0
82 |       max_len: 8
83 |       paths:
84 |         srs: ./log/face_example/inpainted
85 |         lrs: ./log/face_example/gt_masked
86 |         gts: ./log/face_example/gt
87 |         gt_keep_masks: ./log/face_example/gt_keep_mask
88 | 


--------------------------------------------------------------------------------
/confs/test_p256_nn2.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/places256_300000.pt
54 | name: test_p256_nn2
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 |   t_T: 250
61 |   n_sample: 1
62 |   jump_length: 10
63 |   jump_n_sample: 10
64 | data:
65 |   eval:
66 |     lama_p256_nn2_n100_test:
67 |       mask_loader: true
68 |       gt_path: ./data/datasets/gts/p256
69 |       mask_path: ./data/datasets/gt_keep_masks/nn2
70 |       image_size: 256
71 |       class_cond: false
72 |       deterministic: true
73 |       random_crop: false
74 |       random_flip: false
75 |       return_dict: true
76 |       drop_last: false
77 |       batch_size: 4
78 |       return_dataloader: true
79 |       ds_conf:
80 |         name: random_nn2_256
81 |       max_len: 100
82 |       paths:
83 |         srs: ./log/test_p256_nn2/inpainted
84 |         lrs: ./log/test_p256_nn2/gt_masked
85 |         gts: ./log/test_p256_nn2/gt
86 |         gt_keep_masks: ./log/test_p256_nn2/gt_keep_mask
87 | 


--------------------------------------------------------------------------------
/confs/test_p256_ex64.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/places256_300000.pt
54 | name: test_p256_ex64
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 |   t_T: 250
61 |   n_sample: 1
62 |   jump_length: 10
63 |   jump_n_sample: 10
64 | data:
65 |   eval:
66 |     lama_p256_ex64_n100_test:
67 |       mask_loader: true
68 |       gt_path: ./data/datasets/gts/p256
69 |       mask_path: ./data/datasets/gt_keep_masks/ex64
70 |       image_size: 256
71 |       class_cond: false
72 |       deterministic: true
73 |       random_crop: false
74 |       random_flip: false
75 |       return_dict: true
76 |       drop_last: false
77 |       batch_size: 4
78 |       return_dataloader: true
79 |       ds_conf:
80 |         name: random_ex64_256
81 |       max_len: 100
82 |       paths:
83 |         srs: ./log/test_p256_ex64/inpainted
84 |         lrs: ./log/test_p256_ex64/gt_masked
85 |         gts: ./log/test_p256_ex64/gt
86 |         gt_keep_masks: ./log/test_p256_ex64/gt_keep_mask
87 | 


--------------------------------------------------------------------------------
/confs/test_p256_thin.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/places256_300000.pt
54 | name: test_p256_thin
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 |   t_T: 250
61 |   n_sample: 1
62 |   jump_length: 10
63 |   jump_n_sample: 10
64 | data:
65 |   eval:
66 |     lama_p256_thin_n100_test:
67 |       mask_loader: true
68 |       gt_path: ./data/datasets/gts/p256
69 |       mask_path: ./data/datasets/gt_keep_masks/thin
70 |       image_size: 256
71 |       class_cond: false
72 |       deterministic: true
73 |       random_crop: false
74 |       random_flip: false
75 |       return_dict: true
76 |       drop_last: false
77 |       batch_size: 4
78 |       return_dataloader: true
79 |       ds_conf:
80 |         name: random_thin_256
81 |       max_len: 100
82 |       paths:
83 |         srs: ./log/test_p256_thin/inpainted
84 |         lrs: ./log/test_p256_thin/gt_masked
85 |         gts: ./log/test_p256_thin/gt
86 |         gt_keep_masks: ./log/test_p256_thin/gt_keep_mask
87 | 


--------------------------------------------------------------------------------
/confs/test_c256_ev2li.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/celeba256_250000.pt
54 | name: test_c256_ev2li
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 |   t_T: 250
61 |   n_sample: 1
62 |   jump_length: 10
63 |   jump_n_sample: 10
64 | data:
65 |   eval:
66 |     lama_c256_ev2li_n100_test:
67 |       mask_loader: true
68 |       gt_path: ./data/datasets/gts/c256
69 |       mask_path: ./data/datasets/gt_keep_masks/ev2li
70 |       image_size: 256
71 |       class_cond: false
72 |       deterministic: true
73 |       random_crop: false
74 |       random_flip: false
75 |       return_dict: true
76 |       drop_last: false
77 |       batch_size: 4
78 |       return_dataloader: true
79 |       ds_conf:
80 |         name: fix_ev2li_256
81 |       max_len: 100
82 |       paths:
83 |         srs: ./log/test_c256_ev2li/inpainted
84 |         lrs: ./log/test_c256_ev2li/gt_masked
85 |         gts: ./log/test_c256_ev2li/gt
86 |         gt_keep_masks: ./log/test_c256_ev2li/gt_keep_mask
87 | 


--------------------------------------------------------------------------------
/confs/test_p256_ev2li.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/places256_300000.pt
54 | name: test_p256_ev2li
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 |   t_T: 250
61 |   n_sample: 1
62 |   jump_length: 10
63 |   jump_n_sample: 10
64 | data:
65 |   eval:
66 |     lama_p256_ev2li_n100_test:
67 |       mask_loader: true
68 |       gt_path: ./data/datasets/gts/p256
69 |       mask_path: ./data/datasets/gt_keep_masks/ev2li
70 |       image_size: 256
71 |       class_cond: false
72 |       deterministic: true
73 |       random_crop: false
74 |       random_flip: false
75 |       return_dict: true
76 |       drop_last: false
77 |       batch_size: 4
78 |       return_dataloader: true
79 |       ds_conf:
80 |         name: random_ev2li_256
81 |       max_len: 100
82 |       paths:
83 |         srs: ./log/test_p256_ev2li/inpainted
84 |         lrs: ./log/test_p256_ev2li/gt_masked
85 |         gts: ./log/test_p256_ev2li/gt
86 |         gt_keep_masks: ./log/test_p256_ev2li/gt_keep_mask
87 | 


--------------------------------------------------------------------------------
/confs/test_p256_thick.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/places256_300000.pt
54 | name: test_p256_thick
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 |   t_T: 250
61 |   n_sample: 1
62 |   jump_length: 10
63 |   jump_n_sample: 10
64 | data:
65 |   eval:
66 |     lama_p256_thick_n100_test:
67 |       mask_loader: true
68 |       gt_path: ./data/datasets/gts/p256
69 |       mask_path: ./data/datasets/gt_keep_masks/thick
70 |       image_size: 256
71 |       class_cond: false
72 |       deterministic: true
73 |       random_crop: false
74 |       random_flip: false
75 |       return_dict: true
76 |       drop_last: false
77 |       batch_size: 4
78 |       return_dataloader: true
79 |       ds_conf:
80 |         name: random_thick_256
81 |       max_len: 100
82 |       paths:
83 |         srs: ./log/test_p256_thick/inpainted
84 |         lrs: ./log/test_p256_thick/gt_masked
85 |         gts: ./log/test_p256_thick/gt
86 |         gt_keep_masks: ./log/test_p256_thick/gt_keep_mask
87 | 


--------------------------------------------------------------------------------
/confs/test_p256_genhalf.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/places256_300000.pt
54 | name: test_p256_genhalf
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 |   t_T: 250
61 |   n_sample: 1
62 |   jump_length: 10
63 |   jump_n_sample: 10
64 | data:
65 |   eval:
66 |     lama_p256_genhalf_n100_test:
67 |       mask_loader: true
68 |       gt_path: ./data/datasets/gts/p256
69 |       mask_path: ./data/datasets/gt_keep_masks/genhalf
70 |       image_size: 256
71 |       class_cond: false
72 |       deterministic: true
73 |       random_crop: false
74 |       random_flip: false
75 |       return_dict: true
76 |       drop_last: false
77 |       batch_size: 4
78 |       return_dataloader: true
79 |       ds_conf:
80 |         name: random_genhalf_256
81 |       max_len: 100
82 |       paths:
83 |         srs: ./log/test_p256_genhalf/inpainted
84 |         lrs: ./log/test_p256_genhalf/gt_masked
85 |         gts: ./log/test_p256_genhalf/gt
86 |         gt_keep_masks: ./log/test_p256_genhalf/gt_keep_mask
87 | 


--------------------------------------------------------------------------------
/confs/test_c256_nn2.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: /cluster/work/cvl/gudiff/guided-diffusion/models/celeba256_diffsteps1000_4gpus/ema_0.9999_250000.pt
54 | name: test_c256_nn2
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 |   t_T: 250
61 |   n_sample: 1
62 |   jump_length: 10
63 |   jump_n_sample: 10
64 | data:
65 |   eval:
66 |     lama_c256_nn2_n100_test:
67 |       mask_loader: true
68 |       gt_path: ./data/datasets/gts/c256
69 |       mask_path: ./data/datasets/gt_keep_masks/nn2
70 |       image_size: 256
71 |       class_cond: false
72 |       deterministic: true
73 |       random_crop: false
74 |       random_flip: false
75 |       return_dict: true
76 |       drop_last: false
77 |       batch_size: 4
78 |       return_dataloader: true
79 |       ds_conf:
80 |         name: fix_nn2_256
81 |       max_len: 100
82 |       paths:
83 |         srs: ./log/test_c256_nn2/inpainted
84 |         lrs: ./log/test_c256_nn2/gt_masked
85 |         gts: ./log/test_c256_nn2/gt
86 |         gt_keep_masks: ./log/test_c256_nn2/gt_keep_mask
87 | 


--------------------------------------------------------------------------------
/confs/test_c256_ex64.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: /cluster/work/cvl/gudiff/guided-diffusion/models/celeba256_diffsteps1000_4gpus/ema_0.9999_250000.pt
54 | name: test_c256_ex64
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 |   t_T: 250
61 |   n_sample: 1
62 |   jump_length: 10
63 |   jump_n_sample: 10
64 | data:
65 |   eval:
66 |     lama_c256_ex64_n100_test:
67 |       mask_loader: true
68 |       gt_path: ./data/datasets/gts/c256
69 |       mask_path: ./data/datasets/gt_keep_masks/ex64
70 |       image_size: 256
71 |       class_cond: false
72 |       deterministic: true
73 |       random_crop: false
74 |       random_flip: false
75 |       return_dict: true
76 |       drop_last: false
77 |       batch_size: 4
78 |       return_dataloader: true
79 |       ds_conf:
80 |         name: fix_ex64_256
81 |       max_len: 100
82 |       paths:
83 |         srs: ./log/test_c256_ex64/inpainted
84 |         lrs: ./log/test_c256_ex64/gt_masked
85 |         gts: ./log/test_c256_ex64/gt
86 |         gt_keep_masks: ./log/test_c256_ex64/gt_keep_mask
87 | 


--------------------------------------------------------------------------------
/confs/test_c256_thin.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: /cluster/work/cvl/gudiff/guided-diffusion/models/celeba256_diffsteps1000_4gpus/ema_0.9999_250000.pt
54 | name: test_c256_thin
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 |   t_T: 250
61 |   n_sample: 1
62 |   jump_length: 10
63 |   jump_n_sample: 10
64 | data:
65 |   eval:
66 |     lama_c256_thin_n100_test:
67 |       mask_loader: true
68 |       gt_path: ./data/datasets/gts/c256
69 |       mask_path: ./data/datasets/gt_keep_masks/thin
70 |       image_size: 256
71 |       class_cond: false
72 |       deterministic: true
73 |       random_crop: false
74 |       random_flip: false
75 |       return_dict: true
76 |       drop_last: false
77 |       batch_size: 4
78 |       return_dataloader: true
79 |       ds_conf:
80 |         name: random_thin_256
81 |       max_len: 100
82 |       paths:
83 |         srs: ./log/test_c256_thin/inpainted
84 |         lrs: ./log/test_c256_thin/gt_masked
85 |         gts: ./log/test_c256_thin/gt
86 |         gt_keep_masks: ./log/test_c256_thin/gt_keep_mask
87 | 


--------------------------------------------------------------------------------
/confs/test_c256_thick.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: /cluster/work/cvl/gudiff/guided-diffusion/models/celeba256_diffsteps1000_4gpus/ema_0.9999_250000.pt
54 | name: test_c256_thick
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 |   t_T: 250
61 |   n_sample: 1
62 |   jump_length: 10
63 |   jump_n_sample: 10
64 | data:
65 |   eval:
66 |     lama_c256_thick_n100_test:
67 |       mask_loader: true
68 |       gt_path: ./data/datasets/gts/c256
69 |       mask_path: ./data/datasets/gt_keep_masks/thick
70 |       image_size: 256
71 |       class_cond: false
72 |       deterministic: true
73 |       random_crop: false
74 |       random_flip: false
75 |       return_dict: true
76 |       drop_last: false
77 |       batch_size: 4
78 |       return_dataloader: true
79 |       ds_conf:
80 |         name: random_thick_256
81 |       max_len: 100
82 |       paths:
83 |         srs: ./log/test_c256_thick/inpainted
84 |         lrs: ./log/test_c256_thick/gt_masked
85 |         gts: ./log/test_c256_thick/gt
86 |         gt_keep_masks: ./log/test_c256_thick/gt_keep_mask
87 | 


--------------------------------------------------------------------------------
/confs/test_inet256_nn2.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: true
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: true
28 | use_scale_shift_norm: true
29 | classifier_scale: 1.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | classifier_path: ./data/pretrained/256x256_classifier.pt
54 | model_path: ./data/pretrained/256x256_diffusion.pt
55 | name: test_inet256_nn2
56 | inpa_inj_sched_prev: true
57 | n_jobs: 25
58 | print_estimated_vars: true
59 | inpa_inj_sched_prev_cumnoise: false
60 | schedule_jump_params:
61 |   t_T: 250
62 |   n_sample: 1
63 |   jump_length: 10
64 |   jump_n_sample: 10
65 | data:
66 |   eval:
67 |     lama_inet256_nn2_n100_test:
68 |       mask_loader: true
69 |       gt_path: ./data/datasets/gts/inet256
70 |       mask_path: ./data/datasets/gt_keep_masks/nn2
71 |       image_size: 256
72 |       class_cond: false
73 |       deterministic: true
74 |       random_crop: false
75 |       random_flip: false
76 |       return_dict: true
77 |       drop_last: false
78 |       batch_size: 4
79 |       return_dataloader: true
80 |       ds_conf:
81 |         name: random_nn2_256
82 |       max_len: 100
83 |       paths:
84 |         srs: ./log/test_inet256_nn2/inpainted
85 |         lrs: ./log/test_inet256_nn2/gt_masked
86 |         gts: ./log/test_inet256_nn2/gt
87 |         gt_keep_masks: ./log/test_inet256_nn2/gt_keep_mask
88 | 


--------------------------------------------------------------------------------
/confs/test_c256_genhalf.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: /cluster/work/cvl/gudiff/guided-diffusion/models/celeba256_diffsteps1000_4gpus/ema_0.9999_250000.pt
54 | name: test_c256_genhalf
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 |   t_T: 250
61 |   n_sample: 1
62 |   jump_length: 10
63 |   jump_n_sample: 10
64 | data:
65 |   eval:
66 |     lama_c256_genhalf_n100_test:
67 |       mask_loader: true
68 |       gt_path: ./data/datasets/gts/c256
69 |       mask_path: ./data/datasets/gt_keep_masks/genhalf
70 |       image_size: 256
71 |       class_cond: false
72 |       deterministic: true
73 |       random_crop: false
74 |       random_flip: false
75 |       return_dict: true
76 |       drop_last: false
77 |       batch_size: 4
78 |       return_dataloader: true
79 |       ds_conf:
80 |         name: fix_genhalf_256
81 |       max_len: 100
82 |       paths:
83 |         srs: ./log/test_c256_genhalf/inpainted
84 |         lrs: ./log/test_c256_genhalf/gt_masked
85 |         gts: ./log/test_c256_genhalf/gt
86 |         gt_keep_masks: ./log/test_c256_genhalf/gt_keep_mask
87 | 


--------------------------------------------------------------------------------
/confs/test_inet256_ex64.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: true
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: true
28 | use_scale_shift_norm: true
29 | classifier_scale: 1.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | classifier_path: ./data/pretrained/256x256_classifier.pt
54 | model_path: ./data/pretrained/256x256_diffusion.pt
55 | name: test_inet256_ex64
56 | inpa_inj_sched_prev: true
57 | n_jobs: 25
58 | print_estimated_vars: true
59 | inpa_inj_sched_prev_cumnoise: false
60 | schedule_jump_params:
61 |   t_T: 250
62 |   n_sample: 1
63 |   jump_length: 10
64 |   jump_n_sample: 10
65 | data:
66 |   eval:
67 |     lama_inet256_ex64_n100_test:
68 |       mask_loader: true
69 |       gt_path: ./data/datasets/gts/inet256
70 |       mask_path: ./data/datasets/gt_keep_masks/ex64
71 |       image_size: 256
72 |       class_cond: false
73 |       deterministic: true
74 |       random_crop: false
75 |       random_flip: false
76 |       return_dict: true
77 |       drop_last: false
78 |       batch_size: 4
79 |       return_dataloader: true
80 |       ds_conf:
81 |         name: random_ex64_256
82 |       max_len: 100
83 |       paths:
84 |         srs: ./log/test_inet256_ex64/inpainted
85 |         lrs: ./log/test_inet256_ex64/gt_masked
86 |         gts: ./log/test_inet256_ex64/gt
87 |         gt_keep_masks: ./log/test_inet256_ex64/gt_keep_mask
88 | 


--------------------------------------------------------------------------------
/confs/test_inet256_thin.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: true
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: true
28 | use_scale_shift_norm: true
29 | classifier_scale: 1.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | classifier_path: ./data/pretrained/256x256_classifier.pt
54 | model_path: ./data/pretrained/256x256_diffusion.pt
55 | name: test_inet256_thin
56 | inpa_inj_sched_prev: true
57 | n_jobs: 25
58 | print_estimated_vars: true
59 | inpa_inj_sched_prev_cumnoise: false
60 | schedule_jump_params:
61 |   t_T: 250
62 |   n_sample: 1
63 |   jump_length: 10
64 |   jump_n_sample: 10
65 | data:
66 |   eval:
67 |     lama_inet256_thin_n100_test:
68 |       mask_loader: true
69 |       gt_path: ./data/datasets/gts/inet256
70 |       mask_path: ./data/datasets/gt_keep_masks/thin
71 |       image_size: 256
72 |       class_cond: false
73 |       deterministic: true
74 |       random_crop: false
75 |       random_flip: false
76 |       return_dict: true
77 |       drop_last: false
78 |       batch_size: 4
79 |       return_dataloader: true
80 |       ds_conf:
81 |         name: random_thin_256
82 |       max_len: 100
83 |       paths:
84 |         srs: ./log/test_inet256_thin/inpainted
85 |         lrs: ./log/test_inet256_thin/gt_masked
86 |         gts: ./log/test_inet256_thin/gt
87 |         gt_keep_masks: ./log/test_inet256_thin/gt_keep_mask
88 | 


--------------------------------------------------------------------------------
/confs/test_inet256_ev2li.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: true
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: true
28 | use_scale_shift_norm: true
29 | classifier_scale: 1.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | classifier_path: ./data/pretrained/256x256_classifier.pt
54 | model_path: ./data/pretrained/256x256_diffusion.pt
55 | name: test_inet256_ev2li
56 | inpa_inj_sched_prev: true
57 | n_jobs: 25
58 | print_estimated_vars: true
59 | inpa_inj_sched_prev_cumnoise: false
60 | schedule_jump_params:
61 |   t_T: 250
62 |   n_sample: 1
63 |   jump_length: 10
64 |   jump_n_sample: 10
65 | data:
66 |   eval:
67 |     lama_inet256_ev2li_n100_test:
68 |       mask_loader: true
69 |       gt_path: ./data/datasets/gts/inet256
70 |       mask_path: ./data/datasets/gt_keep_masks/ev2li
71 |       image_size: 256
72 |       class_cond: false
73 |       deterministic: true
74 |       random_crop: false
75 |       random_flip: false
76 |       return_dict: true
77 |       drop_last: false
78 |       batch_size: 4
79 |       return_dataloader: true
80 |       ds_conf:
81 |         name: random_ev2li_256
82 |       max_len: 100
83 |       paths:
84 |         srs: ./log/test_inet256_ev2li/inpainted
85 |         lrs: ./log/test_inet256_ev2li/gt_masked
86 |         gts: ./log/test_inet256_ev2li/gt
87 |         gt_keep_masks: ./log/test_inet256_ev2li/gt_keep_mask
88 | 


--------------------------------------------------------------------------------
/confs/test_inet256_thick.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: true
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: true
28 | use_scale_shift_norm: true
29 | classifier_scale: 1.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | classifier_path: ./data/pretrained/256x256_classifier.pt
54 | model_path: ./data/pretrained/256x256_diffusion.pt
55 | name: test_inet256_thick
56 | inpa_inj_sched_prev: true
57 | n_jobs: 25
58 | print_estimated_vars: true
59 | inpa_inj_sched_prev_cumnoise: false
60 | schedule_jump_params:
61 |   t_T: 250
62 |   n_sample: 1
63 |   jump_length: 10
64 |   jump_n_sample: 10
65 | data:
66 |   eval:
67 |     lama_inet256_thick_n100_test:
68 |       mask_loader: true
69 |       gt_path: ./data/datasets/gts/inet256
70 |       mask_path: ./data/datasets/gt_keep_masks/thick
71 |       image_size: 256
72 |       class_cond: false
73 |       deterministic: true
74 |       random_crop: false
75 |       random_flip: false
76 |       return_dict: true
77 |       drop_last: false
78 |       batch_size: 4
79 |       return_dataloader: true
80 |       ds_conf:
81 |         name: random_thick_256
82 |       max_len: 100
83 |       paths:
84 |         srs: ./log/test_inet256_thick/inpainted
85 |         lrs: ./log/test_inet256_thick/gt_masked
86 |         gts: ./log/test_inet256_thick/gt
87 |         gt_keep_masks: ./log/test_inet256_thick/gt_keep_mask
88 | 


--------------------------------------------------------------------------------
/confs/test_inet256_genhalf.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
 2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 7 | #
 8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 | 
17 | attention_resolutions: 32,16,8
18 | class_cond: true
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: true
28 | use_scale_shift_norm: true
29 | classifier_scale: 1.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | classifier_path: ./data/pretrained/256x256_classifier.pt
54 | model_path: ./data/pretrained/256x256_diffusion.pt
55 | name: test_inet256_genhalf
56 | inpa_inj_sched_prev: true
57 | n_jobs: 25
58 | print_estimated_vars: true
59 | inpa_inj_sched_prev_cumnoise: false
60 | schedule_jump_params:
61 |   t_T: 250
62 |   n_sample: 1
63 |   jump_length: 10
64 |   jump_n_sample: 10
65 | data:
66 |   eval:
67 |     lama_inet256_genhalf_n100_test:
68 |       mask_loader: true
69 |       gt_path: ./data/datasets/gts/inet256
70 |       mask_path: ./data/datasets/gt_keep_masks/genhalf
71 |       image_size: 256
72 |       class_cond: false
73 |       deterministic: true
74 |       random_crop: false
75 |       random_flip: false
76 |       return_dict: true
77 |       drop_last: false
78 |       batch_size: 4
79 |       return_dataloader: true
80 |       ds_conf:
81 |         name: random_genhalf_256
82 |       max_len: 100
83 |       paths:
84 |         srs: ./log/test_inet256_genhalf/inpainted
85 |         lrs: ./log/test_inet256_genhalf/gt_masked
86 |         gts: ./log/test_inet256_genhalf/gt
87 |         gt_keep_masks: ./log/test_inet256_genhalf/gt_keep_mask
88 | 


--------------------------------------------------------------------------------
/conf_mgt/conf_base.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
  2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
  7 | #
  8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
 16 | 
 17 | from functools import lru_cache
 18 | import os
 19 | import torch
 20 | from utils import imwrite
 21 | 
 22 | from collections import defaultdict
 23 | from os.path import isfile, expanduser
 24 | 
 25 | def to_file_ext(img_names, ext):
 26 |     img_names_out = []
 27 |     for img_name in img_names:
 28 |         splits = img_name.split('.')
 29 |         if not len(splits) == 2:
 30 |             raise RuntimeError("File name needs exactly one '.':", img_name)
 31 |         img_names_out.append(splits[0] + '.' + ext)
 32 | 
 33 |     return img_names_out
 34 | 
 35 | def write_images(imgs, img_names, dir_path):
 36 |     os.makedirs(dir_path, exist_ok=True)
 37 | 
 38 |     for image_name, image in zip(img_names, imgs):
 39 |         out_path = os.path.join(dir_path, image_name)
 40 |         imwrite(img=image, path=out_path)
 41 | 
 42 | 
 43 | 
 44 | class NoneDict(defaultdict):
 45 |     def __init__(self):
 46 |         super().__init__(self.return_None)
 47 | 
 48 |     @staticmethod
 49 |     def return_None():
 50 |         return None
 51 | 
 52 |     def __getattr__(self, attr):
 53 |         return self.get(attr)
 54 | 
 55 | 
 56 | class Default_Conf(NoneDict):
 57 |     def __init__(self):
 58 |         pass
 59 | 
 60 |     def get_dataloader(self, dset='train', dsName=None, batch_size=None, return_dataset=False):
 61 | 
 62 |         if batch_size is None:
 63 |             batch_size = self.batch_size
 64 | 
 65 |         candidates = self['data'][dset]
 66 |         ds_conf = candidates[dsName].copy()
 67 | 
 68 |         if ds_conf.get('mask_loader', False):
 69 |             from guided_diffusion.image_datasets import load_data_inpa
 70 |             return load_data_inpa(**ds_conf, conf=self)
 71 |         else:
 72 |             raise NotImplementedError()
 73 | 
 74 |     def get_debug_variance_path(self):
 75 |         return os.path.expanduser(os.path.join(self.get_default_eval_conf()['paths']['root'], 'debug/debug_variance'))
 76 | 
 77 |     @ staticmethod
 78 |     def device():
 79 |         return 'cuda' if torch.cuda.is_available() else 'cpu'
 80 | 
 81 |     def eval_imswrite(self, srs=None, img_names=None, dset=None, name=None, ext='png', lrs=None, gts=None, gt_keep_masks=None, verify_same=True):
 82 |         img_names = to_file_ext(img_names, ext)
 83 | 
 84 |         if dset is None:
 85 |             dset = self.get_default_eval_name()
 86 | 
 87 |         max_len = self['data'][dset][name].get('max_len')
 88 | 
 89 |         if srs is not None:
 90 |             sr_dir_path = expanduser(self['data'][dset][name]['paths']['srs'])
 91 |             write_images(srs, img_names, sr_dir_path)
 92 | 
 93 |         if gt_keep_masks is not None:
 94 |             mask_dir_path = expanduser(
 95 |                 self['data'][dset][name]['paths']['gt_keep_masks'])
 96 |             write_images(gt_keep_masks, img_names, mask_dir_path)
 97 | 
 98 |         gts_path = self['data'][dset][name]['paths'].get('gts')
 99 |         if gts is not None and gts_path:
100 |             gt_dir_path = expanduser(gts_path)
101 |             write_images(gts, img_names, gt_dir_path)
102 | 
103 |         if lrs is not None:
104 |             lrs_dir_path = expanduser(
105 |                 self['data'][dset][name]['paths']['lrs'])
106 |             write_images(lrs, img_names, lrs_dir_path)
107 | 
108 |     def get_default_eval_name(self):
109 |         candidates = self['data']['eval'].keys()
110 |         if len(candidates) != 1:
111 |             raise RuntimeError(
112 |                 f"Need exactly one candidate for {self.name}: {candidates}")
113 |         return list(candidates)[0]
114 | 
115 |     def pget(self, name, default=None):
116 |         if '.' in name:
117 |             names = name.split('.')
118 |         else:
119 |             names = [name]
120 | 
121 |         sub_dict = self
122 |         for name in names:
123 |             sub_dict = sub_dict.get(name, default)
124 | 
125 |             if sub_dict == None:
126 |                 return default
127 | 
128 |         return sub_dict
129 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
  2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
  7 | #
  8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
 16 | 
 17 | """
 18 | Like image_sample.py, but use a noisy image classifier to guide the sampling
 19 | process towards more realistic images.
 20 | """
 21 | 
 22 | import os
 23 | import argparse
 24 | import torch as th
 25 | import torch.nn.functional as F
 26 | import time
 27 | import conf_mgt
 28 | from utils import yamlread
 29 | from guided_diffusion import dist_util
 30 | 
 31 | # Workaround
 32 | try:
 33 |     import ctypes
 34 |     libgcc_s = ctypes.CDLL('libgcc_s.so.1')
 35 | except:
 36 |     pass
 37 | 
 38 | 
 39 | from guided_diffusion.script_util import (
 40 |     NUM_CLASSES,
 41 |     model_and_diffusion_defaults,
 42 |     classifier_defaults,
 43 |     create_model_and_diffusion,
 44 |     create_classifier,
 45 |     select_args,
 46 | )  # noqa: E402
 47 | 
 48 | def toU8(sample):
 49 |     if sample is None:
 50 |         return sample
 51 | 
 52 |     sample = ((sample + 1) * 127.5).clamp(0, 255).to(th.uint8)
 53 |     sample = sample.permute(0, 2, 3, 1)
 54 |     sample = sample.contiguous()
 55 |     sample = sample.detach().cpu().numpy()
 56 |     return sample
 57 | 
 58 | 
 59 | def main(conf: conf_mgt.Default_Conf):
 60 | 
 61 |     print("Start", conf['name'])
 62 | 
 63 |     device = dist_util.dev(conf.get('device'))
 64 | 
 65 | 
 66 |     model, diffusion = create_model_and_diffusion(
 67 |         **select_args(conf, model_and_diffusion_defaults().keys()), conf=conf
 68 |     )
 69 |     model.load_state_dict(
 70 |         dist_util.load_state_dict(os.path.expanduser(
 71 |             conf.model_path), map_location="cpu")
 72 |     )
 73 |     model.to(device)
 74 |     if conf.use_fp16:
 75 |         model.convert_to_fp16()
 76 |     model.eval()
 77 | 
 78 |     show_progress = conf.show_progress
 79 | 
 80 |     if conf.classifier_scale > 0 and conf.classifier_path:
 81 |         print("loading classifier...")
 82 |         classifier = create_classifier(
 83 |             **select_args(conf, classifier_defaults().keys()))
 84 |         classifier.load_state_dict(
 85 |             dist_util.load_state_dict(os.path.expanduser(
 86 |                 conf.classifier_path), map_location="cpu")
 87 |         )
 88 | 
 89 |         classifier.to(device)
 90 |         if conf.classifier_use_fp16:
 91 |             classifier.convert_to_fp16()
 92 |         classifier.eval()
 93 | 
 94 |         def cond_fn(x, t, y=None, gt=None, **kwargs):
 95 |             assert y is not None
 96 |             with th.enable_grad():
 97 |                 x_in = x.detach().requires_grad_(True)
 98 |                 logits = classifier(x_in, t)
 99 |                 log_probs = F.log_softmax(logits, dim=-1)
100 |                 selected = log_probs[range(len(logits)), y.view(-1)]
101 |                 return th.autograd.grad(selected.sum(), x_in)[0] * conf.classifier_scale
102 |     else:
103 |         cond_fn = None
104 | 
105 |     def model_fn(x, t, y=None, gt=None, **kwargs):
106 |         assert y is not None
107 |         return model(x, t, y if conf.class_cond else None, gt=gt)
108 | 
109 |     print("sampling...")
110 |     all_images = []
111 | 
112 |     dset = 'eval'
113 | 
114 |     eval_name = conf.get_default_eval_name()
115 | 
116 |     dl = conf.get_dataloader(dset=dset, dsName=eval_name)
117 | 
118 |     for batch in iter(dl):
119 | 
120 |         for k in batch.keys():
121 |             if isinstance(batch[k], th.Tensor):
122 |                 batch[k] = batch[k].to(device)
123 | 
124 |         model_kwargs = {}
125 | 
126 |         model_kwargs["gt"] = batch['GT']
127 | 
128 |         gt_keep_mask = batch.get('gt_keep_mask')
129 |         if gt_keep_mask is not None:
130 |             model_kwargs['gt_keep_mask'] = gt_keep_mask
131 | 
132 |         batch_size = model_kwargs["gt"].shape[0]
133 | 
134 |         if conf.cond_y is not None:
135 |             classes = th.ones(batch_size, dtype=th.long, device=device)
136 |             model_kwargs["y"] = classes * conf.cond_y
137 |         else:
138 |             classes = th.randint(
139 |                 low=0, high=NUM_CLASSES, size=(batch_size,), device=device
140 |             )
141 |             model_kwargs["y"] = classes
142 | 
143 |         sample_fn = (
144 |             diffusion.p_sample_loop if not conf.use_ddim else diffusion.ddim_sample_loop
145 |         )
146 | 
147 | 
148 |         result = sample_fn(
149 |             model_fn,
150 |             (batch_size, 3, conf.image_size, conf.image_size),
151 |             clip_denoised=conf.clip_denoised,
152 |             model_kwargs=model_kwargs,
153 |             cond_fn=cond_fn,
154 |             device=device,
155 |             progress=show_progress,
156 |             return_all=True,
157 |             conf=conf
158 |         )
159 |         srs = toU8(result['sample'])
160 |         gts = toU8(result['gt'])
161 |         lrs = toU8(result.get('gt') * model_kwargs.get('gt_keep_mask') + (-1) *
162 |                    th.ones_like(result.get('gt')) * (1 - model_kwargs.get('gt_keep_mask')))
163 | 
164 |         gt_keep_masks = toU8((model_kwargs.get('gt_keep_mask') * 2 - 1))
165 | 
166 |         conf.eval_imswrite(
167 |             srs=srs, gts=gts, lrs=lrs, gt_keep_masks=gt_keep_masks,
168 |             img_names=batch['GT_name'], dset=dset, name=eval_name, verify_same=False)
169 | 
170 |     print("sampling complete")
171 | 
172 | 
173 | if __name__ == "__main__":
174 |     parser = argparse.ArgumentParser()
175 |     parser.add_argument('--conf_path', type=str, required=False, default=None)
176 |     args = vars(parser.parse_args())
177 | 
178 |     conf_arg = conf_mgt.conf_base.Default_Conf()
179 |     conf_arg.update(yamlread(args.get('conf_path')))
180 |     main(conf_arg)
181 | 


--------------------------------------------------------------------------------
/guided_diffusion/nn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
  2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
  7 | #
  8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
 16 | 
 17 | """
 18 | Various utilities for neural networks.
 19 | """
 20 | 
 21 | import math
 22 | 
 23 | import torch as th
 24 | import torch.nn as nn
 25 | 
 26 | 
 27 | # PyTorch 1.7 has SiLU, but we support PyTorch 1.5.
 28 | class SiLU(nn.Module):
 29 |     def forward(self, x):
 30 |         return x * th.sigmoid(x)
 31 | 
 32 | 
 33 | class GroupNorm32(nn.GroupNorm):
 34 |     def forward(self, x):
 35 |         return super().forward(x.float()).type(x.dtype)
 36 | 
 37 | 
 38 | def conv_nd(dims, *args, **kwargs):
 39 |     """
 40 |     Create a 1D, 2D, or 3D convolution module.
 41 |     """
 42 |     if dims == 1:
 43 |         return nn.Conv1d(*args, **kwargs)
 44 |     elif dims == 2:
 45 |         return nn.Conv2d(*args, **kwargs)
 46 |     elif dims == 3:
 47 |         return nn.Conv3d(*args, **kwargs)
 48 |     raise ValueError(f"unsupported dimensions: {dims}")
 49 | 
 50 | 
 51 | def linear(*args, **kwargs):
 52 |     """
 53 |     Create a linear module.
 54 |     """
 55 |     return nn.Linear(*args, **kwargs)
 56 | 
 57 | 
 58 | def avg_pool_nd(dims, *args, **kwargs):
 59 |     """
 60 |     Create a 1D, 2D, or 3D average pooling module.
 61 |     """
 62 |     if dims == 1:
 63 |         return nn.AvgPool1d(*args, **kwargs)
 64 |     elif dims == 2:
 65 |         return nn.AvgPool2d(*args, **kwargs)
 66 |     elif dims == 3:
 67 |         return nn.AvgPool3d(*args, **kwargs)
 68 |     raise ValueError(f"unsupported dimensions: {dims}")
 69 | 
 70 | 
 71 | def update_ema(target_params, source_params, rate=0.99):
 72 |     """
 73 |     Update target parameters to be closer to those of source parameters using
 74 |     an exponential moving average.
 75 | 
 76 |     :param target_params: the target parameter sequence.
 77 |     :param source_params: the source parameter sequence.
 78 |     :param rate: the EMA rate (closer to 1 means slower).
 79 |     """
 80 |     for targ, src in zip(target_params, source_params):
 81 |         targ.detach().mul_(rate).add_(src, alpha=1 - rate)
 82 | 
 83 | 
 84 | def zero_module(module):
 85 |     """
 86 |     Zero out the parameters of a module and return it.
 87 |     """
 88 |     for p in module.parameters():
 89 |         p.detach().zero_()
 90 |     return module
 91 | 
 92 | 
 93 | def scale_module(module, scale):
 94 |     """
 95 |     Scale the parameters of a module and return it.
 96 |     """
 97 |     for p in module.parameters():
 98 |         p.detach().mul_(scale)
 99 |     return module
100 | 
101 | 
102 | def mean_flat(tensor):
103 |     """
104 |     Take the mean over all non-batch dimensions.
105 |     """
106 |     return tensor.mean(dim=list(range(1, len(tensor.shape))))
107 | 
108 | 
109 | def normalization(channels):
110 |     """
111 |     Make a standard normalization layer.
112 | 
113 |     :param channels: number of input channels.
114 |     :return: an nn.Module for normalization.
115 |     """
116 |     return GroupNorm32(32, channels)
117 | 
118 | 
119 | def timestep_embedding(timesteps, dim, max_period=10000):
120 |     """
121 |     Create sinusoidal timestep embeddings.
122 | 
123 |     :param timesteps: a 1-D Tensor of N indices, one per batch element.
124 |                       These may be fractional.
125 |     :param dim: the dimension of the output.
126 |     :param max_period: controls the minimum frequency of the embeddings.
127 |     :return: an [N x dim] Tensor of positional embeddings.
128 |     """
129 |     half = dim // 2
130 |     freqs = th.exp(
131 |         -math.log(max_period) * th.arange(start=0, end=half, dtype=th.float32) / half
132 |     ).to(device=timesteps.device)
133 |     args = timesteps[:, None].float() * freqs[None]
134 |     embedding = th.cat([th.cos(args), th.sin(args)], dim=-1)
135 |     if dim % 2:
136 |         embedding = th.cat([embedding, th.zeros_like(embedding[:, :1])], dim=-1)
137 |     return embedding
138 | 
139 | 
140 | def checkpoint(func, inputs, params, flag):
141 |     """
142 |     Evaluate a function without caching intermediate activations, allowing for
143 |     reduced memory at the expense of extra compute in the backward pass.
144 | 
145 |     :param func: the function to evaluate.
146 |     :param inputs: the argument sequence to pass to `func`.
147 |     :param params: a sequence of parameters `func` depends on but does not
148 |                    explicitly take as arguments.
149 |     :param flag: if False, disable gradient checkpointing.
150 |     """
151 |     if flag:
152 |         args = tuple(inputs) + tuple(params)
153 |         return CheckpointFunction.apply(func, len(inputs), *args)
154 |     else:
155 |         return func(*inputs)
156 | 
157 | 
158 | class CheckpointFunction(th.autograd.Function):
159 |     @staticmethod
160 |     def forward(ctx, run_function, length, *args):
161 |         ctx.run_function = run_function
162 |         ctx.input_tensors = list(args[:length])
163 |         ctx.input_params = list(args[length:])
164 |         with th.no_grad():
165 |             output_tensors = ctx.run_function(*ctx.input_tensors)
166 |         return output_tensors
167 | 
168 |     @staticmethod
169 |     def backward(ctx, *output_grads):
170 |         ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
171 |         with th.enable_grad():
172 |             # Fixes a bug where the first op in run_function modifies the
173 |             # Tensor storage in place, which is not allowed for detach()'d
174 |             # Tensors.
175 |             shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
176 |             output_tensors = ctx.run_function(*shallow_copies)
177 |         input_grads = th.autograd.grad(
178 |             output_tensors,
179 |             ctx.input_tensors + ctx.input_params,
180 |             output_grads,
181 |             allow_unused=True,
182 |         )
183 |         del ctx.input_tensors
184 |         del ctx.input_params
185 |         del output_tensors
186 |         return (None, None) + input_grads
187 | 


--------------------------------------------------------------------------------
/guided_diffusion/scheduler.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
  2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
  7 | #
  8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
 16 | 
 17 | def get_schedule(t_T, t_0, n_sample, n_steplength, debug=0):
 18 |     if n_steplength > 1:
 19 |         if not n_sample > 1:
 20 |             raise RuntimeError('n_steplength has no effect if n_sample=1')
 21 | 
 22 |     t = t_T
 23 |     times = [t]
 24 |     while t >= 0:
 25 |         t = t - 1
 26 |         times.append(t)
 27 |         n_steplength_cur = min(n_steplength, t_T - t)
 28 | 
 29 |         for _ in range(n_sample - 1):
 30 | 
 31 |             for _ in range(n_steplength_cur):
 32 |                 t = t + 1
 33 |                 times.append(t)
 34 |             for _ in range(n_steplength_cur):
 35 |                 t = t - 1
 36 |                 times.append(t)
 37 | 
 38 |     _check_times(times, t_0, t_T)
 39 | 
 40 |     if debug == 2:
 41 |         for x in [list(range(0, 50)), list(range(-1, -50, -1))]:
 42 |             _plot_times(x=x, times=[times[i] for i in x])
 43 | 
 44 |     return times
 45 | 
 46 | 
 47 | def _check_times(times, t_0, t_T):
 48 |     # Check end
 49 |     assert times[0] > times[1], (times[0], times[1])
 50 | 
 51 |     # Check beginning
 52 |     assert times[-1] == -1, times[-1]
 53 | 
 54 |     # Steplength = 1
 55 |     for t_last, t_cur in zip(times[:-1], times[1:]):
 56 |         assert abs(t_last - t_cur) == 1, (t_last, t_cur)
 57 | 
 58 |     # Value range
 59 |     for t in times:
 60 |         assert t >= t_0, (t, t_0)
 61 |         assert t <= t_T, (t, t_T)
 62 | 
 63 | 
 64 | def _plot_times(x, times):
 65 |     import matplotlib.pyplot as plt
 66 |     plt.plot(x, times)
 67 |     plt.show()
 68 | 
 69 | 
 70 | def get_schedule_jump(t_T, n_sample, jump_length, jump_n_sample,
 71 |                       jump2_length=1, jump2_n_sample=1,
 72 |                       jump3_length=1, jump3_n_sample=1,
 73 |                       start_resampling=100000000):
 74 | 
 75 |     jumps = {}
 76 |     for j in range(0, t_T - jump_length, jump_length):
 77 |         jumps[j] = jump_n_sample - 1
 78 | 
 79 |     jumps2 = {}
 80 |     for j in range(0, t_T - jump2_length, jump2_length):
 81 |         jumps2[j] = jump2_n_sample - 1
 82 | 
 83 |     jumps3 = {}
 84 |     for j in range(0, t_T - jump3_length, jump3_length):
 85 |         jumps3[j] = jump3_n_sample - 1
 86 | 
 87 |     t = t_T
 88 |     ts = []
 89 | 
 90 |     while t >= 1:
 91 |         t = t-1
 92 |         ts.append(t)
 93 | 
 94 |         if (
 95 |             t + 1 < t_T - 1 and
 96 |             t <= start_resampling
 97 |         ):
 98 |             for _ in range(n_sample - 1):
 99 |                 t = t + 1
100 |                 ts.append(t)
101 | 
102 |                 if t >= 0:
103 |                     t = t - 1
104 |                     ts.append(t)
105 | 
106 |         if (
107 |             jumps3.get(t, 0) > 0 and
108 |             t <= start_resampling - jump3_length
109 |         ):
110 |             jumps3[t] = jumps3[t] - 1
111 |             for _ in range(jump3_length):
112 |                 t = t + 1
113 |                 ts.append(t)
114 | 
115 |         if (
116 |             jumps2.get(t, 0) > 0 and
117 |             t <= start_resampling - jump2_length
118 |         ):
119 |             jumps2[t] = jumps2[t] - 1
120 |             for _ in range(jump2_length):
121 |                 t = t + 1
122 |                 ts.append(t)
123 |             jumps3 = {}
124 |             for j in range(0, t_T - jump3_length, jump3_length):
125 |                 jumps3[j] = jump3_n_sample - 1
126 | 
127 |         if (
128 |             jumps.get(t, 0) > 0 and
129 |             t <= start_resampling - jump_length
130 |         ):
131 |             jumps[t] = jumps[t] - 1
132 |             for _ in range(jump_length):
133 |                 t = t + 1
134 |                 ts.append(t)
135 |             jumps2 = {}
136 |             for j in range(0, t_T - jump2_length, jump2_length):
137 |                 jumps2[j] = jump2_n_sample - 1
138 | 
139 |             jumps3 = {}
140 |             for j in range(0, t_T - jump3_length, jump3_length):
141 |                 jumps3[j] = jump3_n_sample - 1
142 | 
143 |     ts.append(-1)
144 | 
145 |     _check_times(ts, -1, t_T)
146 | 
147 |     return ts
148 | 
149 | 
150 | def get_schedule_jump_paper():
151 |     t_T = 250
152 |     jump_length = 10
153 |     jump_n_sample = 10
154 | 
155 |     jumps = {}
156 |     for j in range(0, t_T - jump_length, jump_length):
157 |         jumps[j] = jump_n_sample - 1
158 | 
159 |     t = t_T
160 |     ts = []
161 | 
162 |     while t >= 1:
163 |         t = t-1
164 |         ts.append(t)
165 | 
166 |         if jumps.get(t, 0) > 0:
167 |             jumps[t] = jumps[t] - 1
168 |             for _ in range(jump_length):
169 |                 t = t + 1
170 |                 ts.append(t)
171 | 
172 |     ts.append(-1)
173 | 
174 |     _check_times(ts, -1, t_T)
175 | 
176 |     return ts
177 | 
178 | 
179 | def get_schedule_jump_test(to_supplement=False):
180 |     ts = get_schedule_jump(t_T=250, n_sample=1,
181 |                            jump_length=10, jump_n_sample=10,
182 |                            jump2_length=1, jump2_n_sample=1,
183 |                            jump3_length=1, jump3_n_sample=1,
184 |                            start_resampling=250)
185 | 
186 |     import matplotlib.pyplot as plt
187 |     SMALL_SIZE = 8*3
188 |     MEDIUM_SIZE = 10*3
189 |     BIGGER_SIZE = 12*3
190 | 
191 |     plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
192 |     plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
193 |     plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
194 |     plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
195 |     plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
196 |     plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
197 |     plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
198 | 
199 |     plt.plot(ts)
200 | 
201 |     fig = plt.gcf()
202 |     fig.set_size_inches(20, 10)
203 | 
204 |     ax = plt.gca()
205 |     ax.set_xlabel('Number of Transitions')
206 |     ax.set_ylabel('Diffusion time $t$')
207 | 
208 |     fig.tight_layout()
209 | 
210 |     if to_supplement:
211 |         out_path = "/cluster/home/alugmayr/gdiff/paper/supplement/figures/jump_sched.pdf"
212 |         plt.savefig(out_path)
213 | 
214 |     out_path = "./schedule.png"
215 |     plt.savefig(out_path)
216 |     print(out_path)
217 | 
218 | 
219 | def main():
220 |     get_schedule_jump_test()
221 | 
222 | 
223 | if __name__ == "__main__":
224 |     main()
225 | 


--------------------------------------------------------------------------------
/guided_diffusion/respace.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
  2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
  7 | #
  8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
 16 | 
 17 | import numpy as np
 18 | import torch as th
 19 | 
 20 | from .gaussian_diffusion import GaussianDiffusion
 21 | 
 22 | 
 23 | def space_timesteps(num_timesteps, section_counts):
 24 |     """
 25 |     Create a list of timesteps to use from an original diffusion process,
 26 |     given the number of timesteps we want to take from equally-sized portions
 27 |     of the original process.
 28 | 
 29 |     For example, if there's 300 timesteps and the section counts are [10,15,20]
 30 |     then the first 100 timesteps are strided to be 10 timesteps, the second 100
 31 |     are strided to be 15 timesteps, and the final 100 are strided to be 20.
 32 | 
 33 |     If the stride is a string starting with "ddim", then the fixed striding
 34 |     from the DDIM paper is used, and only one section is allowed.
 35 | 
 36 |     :param num_timesteps: the number of diffusion steps in the original
 37 |                           process to divide up.
 38 |     :param section_counts: either a list of numbers, or a string containing
 39 |                            comma-separated numbers, indicating the step count
 40 |                            per section. As a special case, use "ddimN" where N
 41 |                            is a number of steps to use the striding from the
 42 |                            DDIM paper.
 43 |     :return: a set of diffusion steps from the original process to use.
 44 |     """
 45 |     if isinstance(section_counts, str):
 46 |         if section_counts.startswith("ddim"):
 47 |             desired_count = int(section_counts[len("ddim"):])
 48 |             for i in range(1, num_timesteps):
 49 |                 if len(range(0, num_timesteps, i)) == desired_count:
 50 |                     return set(range(0, num_timesteps, i))
 51 |         section_counts = [int(x) for x in section_counts.split(",")]
 52 |     if isinstance(section_counts, int):
 53 |         section_counts = [section_counts]
 54 |     size_per = num_timesteps // len(section_counts)
 55 |     extra = num_timesteps % len(section_counts)
 56 |     start_idx = 0
 57 |     all_steps = []
 58 | 
 59 |     if len(section_counts) == 1 and section_counts[0] > num_timesteps:
 60 |         return set(np.linspace(start=0, stop=num_timesteps, num=section_counts[0]))
 61 | 
 62 |     for i, section_count in enumerate(section_counts):
 63 |         size = size_per + (1 if i < extra else 0)
 64 |         if size < section_count:
 65 |             raise ValueError(
 66 |                 f"cannot divide section of {size} steps into {section_count}"
 67 |             )
 68 |         if section_count <= 1:
 69 |             frac_stride = 1
 70 |         else:
 71 |             frac_stride = (size - 1) / (section_count - 1)
 72 |         cur_idx = 0.0
 73 |         taken_steps = []
 74 |         for _ in range(section_count):
 75 |             taken_steps.append(start_idx + round(cur_idx))
 76 |             cur_idx += frac_stride
 77 |         all_steps += taken_steps
 78 |         start_idx += size
 79 |     return set(all_steps)
 80 | 
 81 | 
 82 | class SpacedDiffusion(GaussianDiffusion):
 83 |     """
 84 |     A diffusion process which can skip steps in a base diffusion process.
 85 | 
 86 |     :param use_timesteps: a collection (sequence or set) of timesteps from the
 87 |                           original diffusion process to retain.
 88 |     :param kwargs: the kwargs to create the base diffusion process.
 89 |     """
 90 | 
 91 |     def __init__(self, use_timesteps, conf=None, **kwargs):
 92 |         self.use_timesteps = set(use_timesteps)
 93 |         self.original_num_steps = len(kwargs["betas"])
 94 |         self.conf = conf
 95 | 
 96 |         base_diffusion = GaussianDiffusion(conf=conf,
 97 |                                            **kwargs)  # pylint: disable=missing-kwoa
 98 | 
 99 |         if conf.respace_interpolate:
100 |             new_betas = resample_betas(
101 |                 kwargs["betas"], int(conf.timestep_respacing))
102 |             self.timestep_map = list(range(len(new_betas)))
103 |         else:
104 |             self.timestep_map = []
105 |             new_betas = []
106 |             last_alpha_cumprod = 1.0
107 |             for i, alpha_cumprod in enumerate(base_diffusion.alphas_cumprod):
108 |                 if i in self.use_timesteps:
109 |                     new_betas.append(1 - alpha_cumprod / last_alpha_cumprod)
110 |                     last_alpha_cumprod = alpha_cumprod
111 |                     self.timestep_map.append(i)
112 | 
113 |         kwargs["betas"] = np.array(new_betas)
114 | 
115 |         if conf.use_value_logger:
116 |             conf.value_logger.add_value(
117 |                 new_betas, 'new_betas SpacedDiffusion')
118 | 
119 |         super().__init__(conf=conf, **kwargs)
120 | 
121 |     def p_mean_variance(
122 |         self, model, *args, **kwargs
123 |     ):  # pylint: disable=signature-differs
124 |         return super().p_mean_variance(self._wrap_model(model), *args, **kwargs)
125 | 
126 |     def training_losses(
127 |         self, model, *args, **kwargs
128 |     ):  # pylint: disable=signature-differs
129 |         return super().training_losses(self._wrap_model(model), *args, **kwargs)
130 | 
131 |     def condition_mean(self, cond_fn, *args, **kwargs):
132 |         return super().condition_mean(self._wrap_model(cond_fn), *args, **kwargs)
133 | 
134 |     def condition_score(self, cond_fn, *args, **kwargs):
135 |         return super().condition_score(self._wrap_model(cond_fn), *args, **kwargs)
136 | 
137 |     def _wrap_model(self, model):
138 |         if isinstance(model, _WrappedModel):
139 |             return model
140 |         return _WrappedModel(
141 |             model, self.timestep_map, self.rescale_timesteps,
142 |             self.original_num_steps, self.conf
143 |         )
144 | 
145 |     def _scale_timesteps(self, t):
146 |         # Scaling is done by the wrapped model.
147 |         return t
148 | 
149 | 
150 | class _WrappedModel:
151 |     def __init__(self, model, timestep_map, rescale_timesteps, original_num_steps, conf):
152 |         self.model = model
153 |         self.timestep_map = timestep_map
154 |         self.rescale_timesteps = rescale_timesteps
155 |         self.original_num_steps = original_num_steps
156 |         self.conf = conf
157 | 
158 |     def __call__(self, x, ts, **kwargs):
159 |         map_tensor = th.tensor(  # pylint: disable=not-callable
160 |             self.timestep_map, device=ts.device, dtype=ts.dtype)
161 |         new_ts = map_tensor[ts]
162 |         if self.rescale_timesteps:
163 |             raise NotImplementedError()
164 |             #new_ts = self.do_rescale_timesteps(new_ts)
165 | 
166 |         if self.conf.respace_interpolate:
167 |             new_ts = new_ts.float() * (
168 |                 (self.conf.diffusion_steps - 1) / (float(self.conf.timestep_respacing) - 1.0))
169 | 
170 |         return self.model(x, new_ts, **kwargs)
171 | 
172 |     def do_rescale_timesteps(self, new_ts):
173 |         new_ts = new_ts.float() * (1000.0 / self.original_num_steps)
174 |         return new_ts
175 | 


--------------------------------------------------------------------------------
/guided_diffusion/image_datasets.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
  2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
  7 | #
  8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
 16 | 
 17 | import random
 18 | import os
 19 | 
 20 | from PIL import Image
 21 | import blobfile as bf
 22 | import numpy as np
 23 | from torch.utils.data import DataLoader, Dataset
 24 | 
 25 | def load_data_yield(loader):
 26 |     while True:
 27 |         yield from loader
 28 | 
 29 | def load_data_inpa(
 30 |     *,
 31 |     gt_path=None,
 32 |     mask_path=None,
 33 |     batch_size,
 34 |     image_size,
 35 |     class_cond=False,
 36 |     deterministic=False,
 37 |     random_crop=False,
 38 |     random_flip=True,
 39 |     return_dataloader=False,
 40 |     return_dict=False,
 41 |     max_len=None,
 42 |     drop_last=True,
 43 |     conf=None,
 44 |     offset=0,
 45 |     ** kwargs
 46 | ):
 47 |     """
 48 |     For a dataset, create a generator over (images, kwargs) pairs.
 49 | 
 50 |     Each images is an NCHW float tensor, and the kwargs dict contains zero or
 51 |     more keys, each of which map to a batched Tensor of their own.
 52 |     The kwargs dict can be used for class labels, in which case the key is "y"
 53 |     and the values are integer tensors of class labels.
 54 | 
 55 |     :param data_dir: a dataset directory.
 56 |     :param batch_size: the batch size of each returned pair.
 57 |     :param image_size: the size to which images are resized.
 58 |     :param class_cond: if True, include a "y" key in returned dicts for class
 59 |                        label. If classes are not available and this is true, an
 60 |                        exception will be raised.
 61 |     :param deterministic: if True, yield results in a deterministic order.
 62 |     :param random_crop: if True, randomly crop the images for augmentation.
 63 |     :param random_flip: if True, randomly flip the images for augmentation.
 64 |     """
 65 | 
 66 |     gt_dir = os.path.expanduser(gt_path)
 67 |     mask_dir = os.path.expanduser(mask_path)
 68 | 
 69 |     gt_paths = _list_image_files_recursively(gt_dir)
 70 |     mask_paths = _list_image_files_recursively(mask_dir)
 71 | 
 72 |     assert len(gt_paths) == len(mask_paths)
 73 | 
 74 |     classes = None
 75 |     if class_cond:
 76 |         raise NotImplementedError()
 77 | 
 78 |     dataset = ImageDatasetInpa(
 79 |         image_size,
 80 |         gt_paths=gt_paths,
 81 |         mask_paths=mask_paths,
 82 |         classes=classes,
 83 |         shard=0,
 84 |         num_shards=1,
 85 |         random_crop=random_crop,
 86 |         random_flip=random_flip,
 87 |         return_dict=return_dict,
 88 |         max_len=max_len,
 89 |         conf=conf,
 90 |         offset=offset
 91 |     )
 92 | 
 93 |     if deterministic:
 94 |         loader = DataLoader(
 95 |             dataset, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=drop_last
 96 |         )
 97 | 
 98 |     else:
 99 |         loader = DataLoader(
100 |             dataset, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=drop_last
101 |         )
102 | 
103 |     if return_dataloader:
104 |         return loader
105 |     else:
106 |         return load_data_yield(loader)
107 | 
108 | 
109 | def _list_image_files_recursively(data_dir):
110 |     results = []
111 |     for entry in sorted(bf.listdir(data_dir)):
112 |         full_path = bf.join(data_dir, entry)
113 |         ext = entry.split(".")[-1]
114 |         if "." in entry and ext.lower() in ["jpg", "jpeg", "png", "gif"]:
115 |             results.append(full_path)
116 |         elif bf.isdir(full_path):
117 |             results.extend(_list_image_files_recursively(full_path))
118 |     return results
119 | 
120 | 
121 | class ImageDatasetInpa(Dataset):
122 |     def __init__(
123 |         self,
124 |         resolution,
125 |         gt_paths,
126 |         mask_paths,
127 |         classes=None,
128 |         shard=0,
129 |         num_shards=1,
130 |         random_crop=False,
131 |         random_flip=True,
132 |         return_dict=False,
133 |         max_len=None,
134 |         conf=None,
135 |         offset=0
136 |     ):
137 |         super().__init__()
138 |         self.resolution = resolution
139 | 
140 |         gt_paths = sorted(gt_paths)[offset:]
141 |         mask_paths = sorted(mask_paths)[offset:]
142 | 
143 |         self.local_gts = gt_paths[shard:][::num_shards]
144 |         self.local_masks = mask_paths[shard:][::num_shards]
145 | 
146 |         self.local_classes = None if classes is None else classes[shard:][::num_shards]
147 | 
148 |         self.random_crop = random_crop
149 |         self.random_flip = random_flip
150 |         self.return_dict = return_dict
151 |         self.max_len = max_len
152 | 
153 |     def __len__(self):
154 |         if self.max_len is not None:
155 |             return self.max_len
156 | 
157 |         return len(self.local_gts)
158 | 
159 |     def __getitem__(self, idx):
160 |         gt_path = self.local_gts[idx]
161 |         pil_gt = self.imread(gt_path)
162 | 
163 |         mask_path = self.local_masks[idx]
164 |         pil_mask = self.imread(mask_path)
165 | 
166 |         if self.random_crop:
167 |             raise NotImplementedError()
168 |         else:
169 |             arr_gt = center_crop_arr(pil_gt, self.resolution)
170 |             arr_mask = center_crop_arr(pil_mask, self.resolution)
171 | 
172 |         if self.random_flip and random.random() < 0.5:
173 |             arr_gt = arr_gt[:, ::-1]
174 |             arr_mask = arr_mask[:, ::-1]
175 | 
176 |         arr_gt = arr_gt.astype(np.float32) / 127.5 - 1
177 |         arr_mask = arr_mask.astype(np.float32) / 255.0
178 | 
179 |         out_dict = {}
180 |         if self.local_classes is not None:
181 |             out_dict["y"] = np.array(self.local_classes[idx], dtype=np.int64)
182 | 
183 |         if self.return_dict:
184 |             name = os.path.basename(gt_path)
185 |             return {
186 |                 'GT': np.transpose(arr_gt, [2, 0, 1]),
187 |                 'GT_name': name,
188 |                 'gt_keep_mask': np.transpose(arr_mask, [2, 0, 1]),
189 |             }
190 |         else:
191 |             raise NotImplementedError()
192 | 
193 |     def imread(self, path):
194 |         with bf.BlobFile(path, "rb") as f:
195 |             pil_image = Image.open(f)
196 |             pil_image.load()
197 |         pil_image = pil_image.convert("RGB")
198 |         return pil_image
199 | 
200 | 
201 | def center_crop_arr(pil_image, image_size):
202 |     # We are not on a new enough PIL to support the `reducing_gap`
203 |     # argument, which uses BOX downsampling at powers of two first.
204 |     # Thus, we do it by hand to improve downsample quality.
205 |     while min(*pil_image.size) >= 2 * image_size:
206 |         pil_image = pil_image.resize(
207 |             tuple(x // 2 for x in pil_image.size), resample=Image.BOX
208 |         )
209 | 
210 |     scale = image_size / min(*pil_image.size)
211 |     pil_image = pil_image.resize(
212 |         tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
213 |     )
214 | 
215 |     arr = np.array(pil_image)
216 |     crop_y = (arr.shape[0] - image_size) // 2
217 |     crop_x = (arr.shape[1] - image_size) // 2
218 |     return arr[crop_y: crop_y + image_size, crop_x: crop_x + image_size]
219 | 


--------------------------------------------------------------------------------
/guided_diffusion/fp16_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
  2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
  7 | #
  8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
 16 | 
 17 | """
 18 | Helpers to train with 16-bit precision.
 19 | """
 20 | 
 21 | import numpy as np
 22 | import torch as th
 23 | import torch.nn as nn
 24 | from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
 25 | 
 26 | 
 27 | INITIAL_LOG_LOSS_SCALE = 20.0
 28 | 
 29 | 
 30 | def convert_module_to_f16(l):
 31 |     """
 32 |     Convert primitive modules to float16.
 33 |     """
 34 |     if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
 35 |         l.weight.data = l.weight.data.half()
 36 |         if l.bias is not None:
 37 |             l.bias.data = l.bias.data.half()
 38 | 
 39 | 
 40 | def convert_module_to_f32(l):
 41 |     """
 42 |     Convert primitive modules to float32, undoing convert_module_to_f16().
 43 |     """
 44 |     if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
 45 |         l.weight.data = l.weight.data.float()
 46 |         if l.bias is not None:
 47 |             l.bias.data = l.bias.data.float()
 48 | 
 49 | 
 50 | def make_master_params(param_groups_and_shapes):
 51 |     """
 52 |     Copy model parameters into a (differently-shaped) list of full-precision
 53 |     parameters.
 54 |     """
 55 |     master_params = []
 56 |     for param_group, shape in param_groups_and_shapes:
 57 |         master_param = nn.Parameter(
 58 |             _flatten_dense_tensors(
 59 |                 [param.detach().float() for (_, param) in param_group]
 60 |             ).view(shape)
 61 |         )
 62 |         master_param.requires_grad = True
 63 |         master_params.append(master_param)
 64 |     return master_params
 65 | 
 66 | 
 67 | def model_grads_to_master_grads(param_groups_and_shapes, master_params):
 68 |     """
 69 |     Copy the gradients from the model parameters into the master parameters
 70 |     from make_master_params().
 71 |     """
 72 |     for master_param, (param_group, shape) in zip(
 73 |         master_params, param_groups_and_shapes
 74 |     ):
 75 |         master_param.grad = _flatten_dense_tensors(
 76 |             [param_grad_or_zeros(param) for (_, param) in param_group]
 77 |         ).view(shape)
 78 | 
 79 | 
 80 | def master_params_to_model_params(param_groups_and_shapes, master_params):
 81 |     """
 82 |     Copy the master parameter data back into the model parameters.
 83 |     """
 84 |     # Without copying to a list, if a generator is passed, this will
 85 |     # silently not copy any parameters.
 86 |     for master_param, (param_group, _) in zip(master_params, param_groups_and_shapes):
 87 |         for (_, param), unflat_master_param in zip(
 88 |             param_group, unflatten_master_params(param_group, master_param.view(-1))
 89 |         ):
 90 |             param.detach().copy_(unflat_master_param)
 91 | 
 92 | 
 93 | def unflatten_master_params(param_group, master_param):
 94 |     return _unflatten_dense_tensors(master_param, [param for (_, param) in param_group])
 95 | 
 96 | 
 97 | def get_param_groups_and_shapes(named_model_params):
 98 |     named_model_params = list(named_model_params)
 99 |     scalar_vector_named_params = (
100 |         [(n, p) for (n, p) in named_model_params if p.ndim <= 1],
101 |         (-1),
102 |     )
103 |     matrix_named_params = (
104 |         [(n, p) for (n, p) in named_model_params if p.ndim > 1],
105 |         (1, -1),
106 |     )
107 |     return [scalar_vector_named_params, matrix_named_params]
108 | 
109 | 
110 | def master_params_to_state_dict(
111 |     model, param_groups_and_shapes, master_params, use_fp16
112 | ):
113 |     if use_fp16:
114 |         state_dict = model.state_dict()
115 |         for master_param, (param_group, _) in zip(
116 |             master_params, param_groups_and_shapes
117 |         ):
118 |             for (name, _), unflat_master_param in zip(
119 |                 param_group, unflatten_master_params(param_group, master_param.view(-1))
120 |             ):
121 |                 assert name in state_dict
122 |                 state_dict[name] = unflat_master_param
123 |     else:
124 |         state_dict = model.state_dict()
125 |         for i, (name, _value) in enumerate(model.named_parameters()):
126 |             assert name in state_dict
127 |             state_dict[name] = master_params[i]
128 |     return state_dict
129 | 
130 | 
131 | def state_dict_to_master_params(model, state_dict, use_fp16):
132 |     if use_fp16:
133 |         named_model_params = [
134 |             (name, state_dict[name]) for name, _ in model.named_parameters()
135 |         ]
136 |         param_groups_and_shapes = get_param_groups_and_shapes(named_model_params)
137 |         master_params = make_master_params(param_groups_and_shapes)
138 |     else:
139 |         master_params = [state_dict[name] for name, _ in model.named_parameters()]
140 |     return master_params
141 | 
142 | 
143 | def zero_master_grads(master_params):
144 |     for param in master_params:
145 |         param.grad = None
146 | 
147 | 
148 | def zero_grad(model_params):
149 |     for param in model_params:
150 |         # Taken from https://pytorch.org/docs/stable/_modules/torch/optim/optimizer.html#Optimizer.add_param_group
151 |         if param.grad is not None:
152 |             param.grad.detach_()
153 |             param.grad.zero_()
154 | 
155 | 
156 | def param_grad_or_zeros(param):
157 |     if param.grad is not None:
158 |         return param.grad.data.detach()
159 |     else:
160 |         return th.zeros_like(param)
161 | 
162 | 
163 | class MixedPrecisionTrainer:
164 |     def __init__(
165 |         self,
166 |         *,
167 |         model,
168 |         use_fp16=False,
169 |         fp16_scale_growth=1e-3,
170 |         initial_lg_loss_scale=INITIAL_LOG_LOSS_SCALE,
171 |     ):
172 |         self.model = model
173 |         self.use_fp16 = use_fp16
174 |         self.fp16_scale_growth = fp16_scale_growth
175 | 
176 |         self.model_params = list(self.model.parameters())
177 |         self.master_params = self.model_params
178 |         self.param_groups_and_shapes = None
179 |         self.lg_loss_scale = initial_lg_loss_scale
180 | 
181 |         if self.use_fp16:
182 |             self.param_groups_and_shapes = get_param_groups_and_shapes(
183 |                 self.model.named_parameters()
184 |             )
185 |             self.master_params = make_master_params(self.param_groups_and_shapes)
186 |             self.model.convert_to_fp16()
187 | 
188 |     def zero_grad(self):
189 |         zero_grad(self.model_params)
190 | 
191 |     def backward(self, loss: th.Tensor):
192 |         if self.use_fp16:
193 |             loss_scale = 2 ** self.lg_loss_scale
194 |             (loss * loss_scale).backward()
195 |         else:
196 |             loss.backward()
197 | 
198 |     def optimize(self, opt: th.optim.Optimizer):
199 |         if self.use_fp16:
200 |             return self._optimize_fp16(opt)
201 |         else:
202 |             return self._optimize_normal(opt)
203 | 
204 |     def _optimize_fp16(self, opt: th.optim.Optimizer):
205 |         model_grads_to_master_grads(self.param_groups_and_shapes, self.master_params)
206 |         grad_norm, param_norm = self._compute_norms(grad_scale=2 ** self.lg_loss_scale)
207 |         if check_overflow(grad_norm):
208 |             self.lg_loss_scale -= 1
209 |             zero_master_grads(self.master_params)
210 |             return False
211 | 
212 |         for p in self.master_params:
213 |             p.grad.mul_(1.0 / (2 ** self.lg_loss_scale))
214 |         opt.step()
215 |         zero_master_grads(self.master_params)
216 |         master_params_to_model_params(self.param_groups_and_shapes, self.master_params)
217 |         self.lg_loss_scale += self.fp16_scale_growth
218 |         return True
219 | 
220 |     def _optimize_normal(self, opt: th.optim.Optimizer):
221 |         grad_norm, param_norm = self._compute_norms()
222 |         opt.step()
223 |         return True
224 | 
225 |     def _compute_norms(self, grad_scale=1.0):
226 |         grad_norm = 0.0
227 |         param_norm = 0.0
228 |         for p in self.master_params:
229 |             with th.no_grad():
230 |                 param_norm += th.norm(p, p=2, dtype=th.float32).item() ** 2
231 |                 if p.grad is not None:
232 |                     grad_norm += th.norm(p.grad, p=2, dtype=th.float32).item() ** 2
233 |         return np.sqrt(grad_norm) / grad_scale, np.sqrt(param_norm)
234 | 
235 |     def master_params_to_state_dict(self, master_params):
236 |         return master_params_to_state_dict(
237 |             self.model, self.param_groups_and_shapes, master_params, self.use_fp16
238 |         )
239 | 
240 |     def state_dict_to_master_params(self, state_dict):
241 |         return state_dict_to_master_params(self.model, state_dict, self.use_fp16)
242 | 
243 | 
244 | def check_overflow(value):
245 |     return (value == float("inf")) or (value == -float("inf")) or (value != value)


--------------------------------------------------------------------------------
/guided_diffusion/script_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
  2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
  7 | #
  8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
 16 | 
 17 | import argparse
 18 | import inspect
 19 | 
 20 | from . import gaussian_diffusion as gd
 21 | from .respace import SpacedDiffusion, space_timesteps
 22 | from .unet import SuperResModel, UNetModel, EncoderUNetModel
 23 | 
 24 | NUM_CLASSES = 1000
 25 | 
 26 | 
 27 | def diffusion_defaults():
 28 |     """
 29 |     Defaults for image and classifier training.
 30 |     """
 31 |     return dict(
 32 |         learn_sigma=False,
 33 |         diffusion_steps=1000,
 34 |         noise_schedule="linear",
 35 |         timestep_respacing="",
 36 |         use_kl=False,
 37 |         predict_xstart=False,
 38 |         rescale_timesteps=False,
 39 |         rescale_learned_sigmas=False,
 40 |     )
 41 | 
 42 | 
 43 | def classifier_defaults():
 44 |     """
 45 |     Defaults for classifier models.
 46 |     """
 47 |     return dict(
 48 |         image_size=64,
 49 |         classifier_use_fp16=False,
 50 |         classifier_width=128,
 51 |         classifier_depth=2,
 52 |         classifier_attention_resolutions="32,16,8",
 53 |         classifier_use_scale_shift_norm=True,
 54 |         classifier_resblock_updown=True,
 55 |         classifier_pool="attention",
 56 |     )
 57 | 
 58 | 
 59 | def model_and_diffusion_defaults():
 60 |     """
 61 |     Defaults for image training.
 62 |     """
 63 |     res = dict(
 64 |         image_size=64,
 65 |         num_channels=128,
 66 |         num_res_blocks=2,
 67 |         num_heads=4,
 68 |         num_heads_upsample=-1,
 69 |         num_head_channels=-1,
 70 |         attention_resolutions="16,8",
 71 |         channel_mult="",
 72 |         dropout=0.0,
 73 |         class_cond=False,
 74 |         use_checkpoint=False,
 75 |         use_scale_shift_norm=True,
 76 |         resblock_updown=False,
 77 |         use_fp16=False,
 78 |         use_new_attention_order=False,
 79 |     )
 80 |     res.update(diffusion_defaults())
 81 |     return res
 82 | 
 83 | 
 84 | def classifier_and_diffusion_defaults():
 85 |     res = classifier_defaults()
 86 |     res.update(diffusion_defaults())
 87 |     return res
 88 | 
 89 | 
 90 | def create_model_and_diffusion(
 91 |     image_size,
 92 |     class_cond,
 93 |     learn_sigma,
 94 |     num_channels,
 95 |     num_res_blocks,
 96 |     channel_mult,
 97 |     num_heads,
 98 |     num_head_channels,
 99 |     num_heads_upsample,
100 |     attention_resolutions,
101 |     dropout,
102 |     diffusion_steps,
103 |     noise_schedule,
104 |     timestep_respacing,
105 |     use_kl,
106 |     predict_xstart,
107 |     rescale_timesteps,
108 |     rescale_learned_sigmas,
109 |     use_checkpoint,
110 |     use_scale_shift_norm,
111 |     resblock_updown,
112 |     use_fp16,
113 |     use_new_attention_order,
114 |     conf=None
115 | ):
116 |     model = create_model(
117 |         image_size,
118 |         num_channels,
119 |         num_res_blocks,
120 |         channel_mult=channel_mult,
121 |         learn_sigma=learn_sigma,
122 |         class_cond=class_cond,
123 |         use_checkpoint=use_checkpoint,
124 |         attention_resolutions=attention_resolutions,
125 |         num_heads=num_heads,
126 |         num_head_channels=num_head_channels,
127 |         num_heads_upsample=num_heads_upsample,
128 |         use_scale_shift_norm=use_scale_shift_norm,
129 |         dropout=dropout,
130 |         resblock_updown=resblock_updown,
131 |         use_fp16=use_fp16,
132 |         use_new_attention_order=use_new_attention_order,
133 |         conf=conf
134 |     )
135 |     diffusion = create_gaussian_diffusion(
136 |         steps=diffusion_steps,
137 |         learn_sigma=learn_sigma,
138 |         noise_schedule=noise_schedule,
139 |         use_kl=use_kl,
140 |         predict_xstart=predict_xstart,
141 |         rescale_timesteps=rescale_timesteps,
142 |         rescale_learned_sigmas=rescale_learned_sigmas,
143 |         timestep_respacing=timestep_respacing,
144 |         conf=conf
145 |     )
146 |     return model, diffusion
147 | 
148 | 
149 | def create_model(
150 |     image_size,
151 |     num_channels,
152 |     num_res_blocks,
153 |     channel_mult="",
154 |     learn_sigma=False,
155 |     class_cond=False,
156 |     use_checkpoint=False,
157 |     attention_resolutions="16",
158 |     num_heads=1,
159 |     num_head_channels=-1,
160 |     num_heads_upsample=-1,
161 |     use_scale_shift_norm=False,
162 |     dropout=0,
163 |     resblock_updown=False,
164 |     use_fp16=False,
165 |     use_new_attention_order=False,
166 |     image_size_inference=None,
167 |     conf=None
168 | ):
169 |     if channel_mult == "":
170 |         if image_size == 512:
171 |             channel_mult = (0.5, 1, 1, 2, 2, 4, 4)
172 |         elif image_size == 256:
173 |             channel_mult = (1, 1, 2, 2, 4, 4)
174 |         elif image_size == 128:
175 |             channel_mult = (1, 1, 2, 3, 4)
176 |         elif image_size == 64:
177 |             channel_mult = (1, 2, 3, 4)
178 |         else:
179 |             raise ValueError(f"unsupported image size: {image_size}")
180 |     elif isinstance(channel_mult, tuple):
181 |         pass
182 |     else:
183 |         channel_mult = tuple(int(ch_mult)
184 |                              for ch_mult in channel_mult.split(","))
185 | 
186 |     attention_ds = []
187 |     for res in attention_resolutions.split(","):
188 |         attention_ds.append(image_size // int(res))
189 | 
190 |     image_size_inference = image_size_inference or image_size
191 | 
192 |     return UNetModel(
193 |         image_size=image_size,
194 |         in_channels=3,
195 |         model_channels=num_channels,
196 |         out_channels=(3 if not learn_sigma else 6),
197 |         num_res_blocks=num_res_blocks,
198 |         attention_resolutions=tuple(attention_ds),
199 |         dropout=dropout,
200 |         channel_mult=channel_mult,
201 |         num_classes=(NUM_CLASSES if class_cond else None),
202 |         use_checkpoint=use_checkpoint,
203 |         use_fp16=use_fp16,
204 |         num_heads=num_heads,
205 |         num_head_channels=num_head_channels,
206 |         num_heads_upsample=num_heads_upsample,
207 |         use_scale_shift_norm=use_scale_shift_norm,
208 |         resblock_updown=resblock_updown,
209 |         use_new_attention_order=use_new_attention_order,
210 |         conf=conf
211 |     )
212 | 
213 | 
214 | def create_classifier(
215 |     image_size,
216 |     classifier_use_fp16,
217 |     classifier_width,
218 |     classifier_depth,
219 |     classifier_attention_resolutions,
220 |     classifier_use_scale_shift_norm,
221 |     classifier_resblock_updown,
222 |     classifier_pool,
223 |     image_size_inference=None
224 | ):
225 |     if image_size == 512:
226 |         channel_mult = (0.5, 1, 1, 2, 2, 4, 4)
227 |     elif image_size == 256:
228 |         channel_mult = (1, 1, 2, 2, 4, 4)
229 |     elif image_size == 128:
230 |         channel_mult = (1, 1, 2, 3, 4)
231 |     elif image_size == 64:
232 |         channel_mult = (1, 2, 3, 4)
233 |     else:
234 |         raise ValueError(f"unsupported image size: {image_size}")
235 | 
236 |     attention_ds = []
237 |     for res in classifier_attention_resolutions.split(","):
238 |         attention_ds.append(image_size // int(res))
239 | 
240 |     image_size_inference = image_size_inference or image_size
241 | 
242 |     return EncoderUNetModel(
243 |         image_size=image_size_inference,
244 |         in_channels=3,
245 |         model_channels=classifier_width,
246 |         out_channels=1000,
247 |         num_res_blocks=classifier_depth,
248 |         attention_resolutions=tuple(attention_ds),
249 |         channel_mult=channel_mult,
250 |         use_fp16=classifier_use_fp16,
251 |         num_head_channels=64,
252 |         use_scale_shift_norm=classifier_use_scale_shift_norm,
253 |         resblock_updown=classifier_resblock_updown,
254 |         pool=classifier_pool,
255 |     )
256 | 
257 | 
258 | def create_gaussian_diffusion(
259 |     *,
260 |     steps=1000,
261 |     learn_sigma=False,
262 |     sigma_small=False,
263 |     noise_schedule="linear",
264 |     use_kl=False,
265 |     predict_xstart=False,
266 |     rescale_timesteps=False,
267 |     rescale_learned_sigmas=False,
268 |     timestep_respacing="",
269 |     conf=None
270 | ):
271 | 
272 |     betas = gd.get_named_beta_schedule(noise_schedule, steps, use_scale=True)
273 | 
274 |     if conf.use_value_logger:
275 |         conf.value_logger.add_value(
276 |             betas, 'betas create_gaussian_diffusion')
277 | 
278 |     if use_kl:
279 |         loss_type = gd.LossType.RESCALED_KL
280 |     elif rescale_learned_sigmas:
281 |         loss_type = gd.LossType.RESCALED_MSE
282 |     else:
283 |         loss_type = gd.LossType.MSE
284 | 
285 |     if not timestep_respacing:
286 |         timestep_respacing = [steps]
287 | 
288 |     return SpacedDiffusion(
289 |         use_timesteps=space_timesteps(steps, timestep_respacing),
290 |         betas=betas,
291 |         model_mean_type=(
292 |             gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X
293 |         ),
294 |         model_var_type=(
295 |             (
296 |                 gd.ModelVarType.FIXED_LARGE
297 |                 if not sigma_small
298 |                 else gd.ModelVarType.FIXED_SMALL
299 |             )
300 |             if not learn_sigma
301 |             else gd.ModelVarType.LEARNED_RANGE
302 |         ),
303 |         loss_type=loss_type,
304 |         rescale_timesteps=rescale_timesteps,
305 |         conf=conf
306 |     )
307 | 
308 | def select_args(args_dict, keys):
309 |     return {k: args_dict[k] for k in keys}
310 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # RePaint
  2 | **Inpainting using Denoising Diffusion Probabilistic Models**
  3 | 
  4 | 
  5 | CVPR 2022 [[Paper]](https://bit.ly/3b1ABEb)
  6 | 
  7 | [![Denoising_Diffusion_Inpainting_Animation](https://user-images.githubusercontent.com/11280511/150849757-5cd762cb-07a3-46aa-a906-0fe4606eba3b.gif)](#)
  8 | 
  9 | ## Setup
 10 | 
 11 | ### 1. Code
 12 | 
 13 | ```bash
 14 | git clone https://github.com/andreas128/RePaint.git
 15 | ```
 16 | 
 17 | ### 2. Environment
 18 | ```bash
 19 | pip install numpy torch blobfile tqdm pyYaml pillow    # e.g. torch 1.7.1+cu110.
 20 | ```
 21 | 
 22 | ### 3. Download models and data
 23 | 
 24 | ```bash
 25 | pip install --upgrade gdown && bash ./download.sh
 26 | ```
 27 | 
 28 | That downloads the models for ImageNet, CelebA-HQ, and Places2, as well as the face example and example masks.
 29 | 
 30 | 
 31 | ### 4. Run example
 32 | ```bash
 33 | python test.py --conf_path confs/face_example.yml
 34 | ```
 35 | Find the output in `./log/face_example/inpainted`
 36 | 
 37 | *Note: After refactoring the code, we did not reevaluate all experiments.*
 38 | 
 39 | <br>
 40 | 
 41 | # RePaint fills a missing image part using diffusion models
 42 | 
 43 | <table border="0" cellspacing="0" cellpadding="0">
 44 |   <tr>
 45 |     <td><img alt="RePaint Inpainting using Denoising Diffusion Probabilistic Models Demo 1" src="https://user-images.githubusercontent.com/11280511/150766080-9f3d7bc9-99f2-472e-9e5d-b6ed456340d1.gif"></td>
 46 |         <td><img alt="RePaint Inpainting using Denoising Diffusion Probabilistic Models Demo 2" src="https://user-images.githubusercontent.com/11280511/150766125-adf5a3cb-17f2-432c-a8f6-ce0b97122819.gif"></td>
 47 |   </tr>
 48 | </table>
 49 | 
 50 | **What are the blue parts?** <br>
 51 | Those parts are missing and therefore have to be filled by RePaint. <br> RePaint generates the missing parts inspired by the known parts.
 52 | 
 53 | **How does it work?** <br>
 54 | RePaint starts from pure noise. Then the image is denoised step-by-step.  <br> It uses the known part to fill the unknown part in each step.
 55 | 
 56 | **Why does the noise level fluctuate during generation?** <br>
 57 | Our noise schedule improves the harmony between the generated and <br> the known part [[4.2 Resampling]](https://bit.ly/3b1ABEb).
 58 | 
 59 | <br>
 60 | 
 61 | ## Details on data
 62 | 
 63 | **Which datasets and masks have a ready-to-use config file?**
 64 | 
 65 | We provide config files for ImageNet (inet256), CelebA-HQ (c256) and Places2 (p256) for the masks "thin", "thick", "every second line", "super-resolution", "expand" and "half" in [`./confs`](https://github.com/andreas128/RePaint/tree/main/confs). You can use them as shown in the example above.
 66 | 
 67 | **How to prepare the test data?**
 68 | 
 69 | We use [LaMa](https://github.com/saic-mdal/lama) for validation and testing. Follow their instructions and add the images as specified in the config files. When you download the data using `download.sh`, you can see examples of masks we used.
 70 | 
 71 | **How to apply it to other images?**
 72 | 
 73 | Copy the config file for the dataset that matches your data best (for faces aligned like CelebA-HQ `_c256`, for diverse images `_inet256`). Then set the [`gt_path`](https://github.com/andreas128/RePaint/blob/0fea066b52346c331cdf1bf7aed616c8c8896714/confs/face_example.yml#L70) and [`mask_path`](https://github.com/andreas128/RePaint/blob/0fea066b52346c331cdf1bf7aed616c8c8896714/confs/face_example.yml#L71) to where your input is. The masks have the value 255 for known regions and 0 for unknown areas (the ones that get generated).
 74 | 
 75 | **How to apply it for other datasets?**
 76 | 
 77 | If you work with other data than faces, places or general images, train a model using the [guided-diffusion](https://github.com/openai/guided-diffusion) repository. Note that RePaint is an inference scheme. We do not train or finetune the diffusion model but condition pre-trained models.
 78 | 
 79 | ## Adapt the code
 80 | 
 81 | **How to design a new schedule?**
 82 | 
 83 | Fill in your own parameters in this [line](https://github.com/andreas128/RePaint/blob/0fea066b52346c331cdf1bf7aed616c8c8896714/guided_diffusion/scheduler.py#L180) to visualize the schedule using `python guided_diffusion/scheduler.py`. Then copy a config file, set your parameters in these [lines](https://github.com/andreas128/RePaint/blob/0fea066b52346c331cdf1bf7aed616c8c8896714/confs/face_example.yml#L61-L65) and run the inference using `python test.py --conf_path confs/my_schedule.yml`. 
 84 | 
 85 | **How to speed up the inference?**
 86 | 
 87 | The following settings are in the [schedule_jump_params](https://github.com/andreas128/RePaint/blob/0fea066b52346c331cdf1bf7aed616c8c8896714/confs/face_example.yml#L61) key in the config files. You can visualize them as described above.
 88 | 
 89 | - Reduce `t_T`, the total number of steps (without resampling). The lower it is, the more noise gets removed per step.
 90 | - Reduce `jump_n_sample` to resample fewer times.
 91 | - Apply resampling not from the beginning but only after a specific time by setting `start_resampling`.
 92 | 
 93 | ## Code overview
 94 | 
 95 | - **Schedule:** The list of diffusion times t which will be traversed are obtained in this [line](https://github.com/andreas128/RePaint/blob/76cb5b49d3f28715980f6e809c6859b148be9867/guided_diffusion/gaussian_diffusion.py#L503). e.g. times = [249, 248, 249, 248, 247, 248, 247, 248, 247, 246, ...]
 96 | - **Denoise:** Reverse diffusion steps from x<sub>t</sub> (more noise) to a x<sub>t-1</sub> (less noisy) are done below this [line](https://github.com/andreas128/RePaint/blob/76cb5b49d3f28715980f6e809c6859b148be9867/guided_diffusion/gaussian_diffusion.py#L515).
 97 | - **Predict:** The model is called [here](https://github.com/andreas128/RePaint/blob/76cb5b49d3f28715980f6e809c6859b148be9867/guided_diffusion/gaussian_diffusion.py#L237) and obtains x<sub>t</sub> and the time t to predict a tensor with 6 channels containing information about the mean and variance of x<sub>t-1</sub>. Then the value range of the variance is adjusted [here](https://github.com/andreas128/RePaint/blob/76cb5b49d3f28715980f6e809c6859b148be9867/guided_diffusion/gaussian_diffusion.py#L252). The mean of x<sub>t-1</sub> is obtained by the weighted sum of the estimated [x<sub>0</sub>](https://github.com/andreas128/RePaint/blob/76cb5b49d3f28715980f6e809c6859b148be9867/guided_diffusion/gaussian_diffusion.py#L270) and x<sub>t</sub> [here](https://github.com/andreas128/RePaint/blob/76cb5b49d3f28715980f6e809c6859b148be9867/guided_diffusion/gaussian_diffusion.py#L189). The obtained mean and variance is used [here](https://github.com/andreas128/RePaint/blob/76cb5b49d3f28715980f6e809c6859b148be9867/guided_diffusion/gaussian_diffusion.py#L402) to sample x<sub>t-1</sub>. (This is the original reverse step from [guided-diffusion](https://github.com/openai/guided-diffusion.git). )
 98 | - **Condition:** The known part of the input image needs to have the same amount of noise as the part that the diffusion model generates to join them. The required amount of noise is calculated [here](https://github.com/andreas128/RePaint/blob/76cb5b49d3f28715980f6e809c6859b148be9867/guided_diffusion/gaussian_diffusion.py#L368) and added to the known part [here](https://github.com/andreas128/RePaint/blob/76cb5b49d3f28715980f6e809c6859b148be9867/guided_diffusion/gaussian_diffusion.py#L371). The generated and sampled parts get joined using a maks [here](https://github.com/andreas128/RePaint/blob/76cb5b49d3f28715980f6e809c6859b148be9867/guided_diffusion/gaussian_diffusion.py#L373).
 99 | - **Undo:** The forward diffusion steps from x<sub>t-1</sub> to x<sub>t</sub> is done after this [line](https://github.com/andreas128/RePaint/blob/76cb5b49d3f28715980f6e809c6859b148be9867/guided_diffusion/gaussian_diffusion.py#L536). The noise gets added to x<sub>t-1</sub> [here](https://github.com/andreas128/RePaint/blob/76cb5b49d3f28715980f6e809c6859b148be9867/guided_diffusion/gaussian_diffusion.py#L176).
100 | 
101 | ## Issues
102 | 
103 | **Do you have further questions?**
104 | 
105 | Please open an [issue](https://github.com/andreas128/RePaint/issues), and we will try to help you.
106 | 
107 | **Did you find a mistake?**
108 | 
109 | Please create a pull request. For examply by clicking the pencil button on the top right on the github page.
110 | 
111 | <br>
112 | 
113 | # RePaint on diverse content and shapes of missing regions
114 | 
115 | The blue region is unknown and filled by RePaint:
116 | 
117 | ![Denoising Diffusion Probabilistic Models Inpainting](https://user-images.githubusercontent.com/11280511/150803812-a4729ef8-6ad4-46aa-ae99-8c27fbb2ea2e.png)
118 | 
119 | 
120 | **Note: RePaint creates many meaningful fillings.** <br>
121 | 1) **Face:** Expressions and features like an earring or a mole. <br>
122 | 2) **Computer:** The computer screen shows different images, text, and even a logo. <br>
123 | 3) **Greens:** RePaint makes sense of the tiny known part and incorporates it in a beetle, spaghetti, and plants. <br>
124 | 4) **Garden:** From simple filling like a curtain to complex filling like a human. <br>
125 | 
126 | 
127 | <br>
128 | 
129 | # Extreme Case 1: Generate every second line
130 | ![Denoising_Diffusion_Probabilistic_Models_Inpainting_Every_Second_Line](https://user-images.githubusercontent.com/11280511/150818064-29789cbe-73c7-45de-a955-9fad5fb24c0e.png)
131 | 
132 | - Every Second line of the input image is unknown.
133 | - Most inpainting methods fail on such masks.
134 | 
135 | 
136 | <br>
137 | 
138 | # Extreme Case 2: Upscale an image
139 | ![Denoising_Diffusion_Probabilistic_Models_Inpainting_Super_Resolution](https://user-images.githubusercontent.com/11280511/150818741-5ed19a0b-1cf8-4f28-9e57-2e4c12303c3e.png)
140 | 
141 | - The inpainting only knows pixels with a stridden access of 2.
142 | - A ratio of 3/4 of the image has to be filled.
143 | - This is equivalent to Super-Resolution with the Nearest Neighbor kernel.
144 | 
145 | <br>
146 | 
147 | # RePaint conditions the diffusion model on the known part
148 | 
149 | - RePaint uses unconditionally trained Denoising Diffusion Probabilistic Models.
150 | - We condition during inference on the given image content.
151 | 
152 | ![Denoising Diffusion Probabilistic Models Inpainting Method](https://user-images.githubusercontent.com/11280511/180631151-59b6674b-bf2c-4501-8307-03c9f5f593ae.gif)
153 | 
154 | **Intuition of one conditioned denoising step:**
155 | 1) **Sample the known part:** Add gaussian noise to the known regions of the image. <br> We obtain a noisy image that follows the denoising process exactly.
156 | 2) **Denoise one step:** Denoise the previous image for one step. This generates  <br> content for the unknown region conditioned on the known region.
157 | 3) **Join:** Merge the images from both steps.
158 | 
159 | Details are in Algorithm 1 on Page 5. [[Paper]](https://bit.ly/3b1ABEb)
160 | 
161 | 
162 | <br>
163 | 
164 | # How to harmonize the generated with the known part?
165 | 
166 | - **Fail:** When using only the algorithm above, the filling is not well harmonized with the known part (n=1).
167 | - **Fix:** When applying the [[4.2 Resampling]](https://bit.ly/3b1ABEb) technique, the images are better harmonized (n>1).
168 | 
169 | <img width="1577" alt="Diffusion Model Resampling" src="https://user-images.githubusercontent.com/11280511/150822917-737c00b0-b6bb-439d-a5bf-e73238d30990.png">
170 | 
171 | <br>
172 | 
173 | # RePaint Fails
174 | - The ImageNet model is biased towards inpainting dogs.
175 | - This is due to the high ratio of dog images in ImageNet.
176 | 
177 | <img width="1653" alt="RePaint Fails" src="https://user-images.githubusercontent.com/11280511/150853163-b965f59c-5ad4-485b-816e-4391e77b5199.png">
178 | 
179 | <br>
180 | 
181 | # User Study State-of-the-Art Comparison
182 | 
183 | - Outperforms autoregression-based and GAN-based SOTA methods, <br> with 95% significance for all masks except for two inconclusive cases.
184 | - The user study was done for six different masks on three datasets.
185 | - RePaint outperformed SOTA methods in 42 of 44 cases. [[Paper]](https://bit.ly/3b1ABEb)
186 | 
187 | <br>
188 | 
189 | # Explore the Visual Examples
190 | - Datasets: CelebA-HQ, ImageNet, Places2
191 | - Masks: Random strokes, half image, huge, sparse
192 | - Explore more examples like this in the [[Appendix]](https://bit.ly/3b1ABEb).
193 | 
194 | 
195 | <img width="1556" alt="Denosing Diffusion Inpainting Examples" src="https://user-images.githubusercontent.com/11280511/150864677-0eb482ae-c114-4b0b-b1e0-9be9574da307.png">
196 | 
197 | 
198 | <br>
199 | 
200 | 
201 | # Acknowledgement
202 | 
203 | This work was supported by the ETH Zürich Fund (OK), a Huawei Technologies Oy (Finland) project, and an Nvidia GPU grant.
204 | 
205 | This repository is based on [guided-diffuion](https://github.com/openai/guided-diffusion.git) from OpenAI.
206 | 


--------------------------------------------------------------------------------
/guided_diffusion/gaussian_diffusion.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
  2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
  7 | #
  8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
 16 | 
 17 | """
 18 | This code started out as a PyTorch port of Ho et al's diffusion models:
 19 | https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py
 20 | 
 21 | Docstrings have been added, as well as DDIM sampling and a new collection of beta schedules.
 22 | """
 23 | 
 24 | import enum
 25 | 
 26 | import numpy as np
 27 | import torch as th
 28 | 
 29 | from collections import defaultdict
 30 | 
 31 | from guided_diffusion.scheduler import get_schedule_jump
 32 | 
 33 | def get_named_beta_schedule(schedule_name, num_diffusion_timesteps, use_scale):
 34 |     """
 35 |     Get a pre-defined beta schedule for the given name.
 36 | 
 37 |     The beta schedule library consists of beta schedules which remain similar
 38 |     in the limit of num_diffusion_timesteps.
 39 |     Beta schedules may be added, but should not be removed or changed once
 40 |     they are committed to maintain backwards compatibility.
 41 |     """
 42 |     if schedule_name == "linear":
 43 |         # Linear schedule from Ho et al, extended to work for any number of
 44 |         # diffusion steps.
 45 | 
 46 |         if use_scale:
 47 |             scale = 1000 / num_diffusion_timesteps
 48 |         else:
 49 |             scale = 1
 50 | 
 51 |         beta_start = scale * 0.0001
 52 |         beta_end = scale * 0.02
 53 |         return np.linspace(
 54 |             beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64
 55 |         )
 56 | 
 57 | class ModelMeanType(enum.Enum):
 58 |     """
 59 |     Which type of output the model predicts.
 60 |     """
 61 | 
 62 |     PREVIOUS_X = enum.auto()  # the model predicts x_{t-1}
 63 |     START_X = enum.auto()  # the model predicts x_0
 64 |     EPSILON = enum.auto()  # the model predicts epsilon
 65 | 
 66 | 
 67 | class ModelVarType(enum.Enum):
 68 |     """
 69 |     What is used as the model's output variance.
 70 | 
 71 |     The LEARNED_RANGE option has been added to allow the model to predict
 72 |     values between FIXED_SMALL and FIXED_LARGE, making its job easier.
 73 |     """
 74 | 
 75 |     LEARNED = enum.auto()
 76 |     FIXED_SMALL = enum.auto()
 77 |     FIXED_LARGE = enum.auto()
 78 |     LEARNED_RANGE = enum.auto()
 79 | 
 80 | 
 81 | class LossType(enum.Enum):
 82 |     MSE = enum.auto()  # use raw MSE loss (and KL when learning variances)
 83 |     RESCALED_MSE = (
 84 |         enum.auto()
 85 |     )  # use raw MSE loss (with RESCALED_KL when learning variances)
 86 |     KL = enum.auto()  # use the variational lower-bound
 87 |     RESCALED_KL = enum.auto()  # like KL, but rescale to estimate the full VLB
 88 | 
 89 |     def is_vb(self):
 90 |         return self == LossType.KL or self == LossType.RESCALED_KL
 91 | 
 92 | 
 93 | class GaussianDiffusion:
 94 |     """
 95 |     Utilities for training and sampling diffusion models.
 96 | 
 97 |     Ported directly from here, and then adapted over time to further experimentation.
 98 |     https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py#L42
 99 | 
100 |     :param betas: a 1-D numpy array of betas for each diffusion timestep,
101 |                   starting at T and going to 1.
102 |     :param model_mean_type: a ModelMeanType determining what the model outputs.
103 |     :param model_var_type: a ModelVarType determining how variance is output.
104 |     :param loss_type: a LossType determining the loss function to use.
105 |     :param rescale_timesteps: if True, pass floating point timesteps into the
106 |                               model so that they are always scaled like in the
107 |                               original paper (0 to 1000).
108 |     """
109 | 
110 |     def __init__(
111 |         self,
112 |         *,
113 |         betas,
114 |         model_mean_type,
115 |         model_var_type,
116 |         loss_type,
117 |         rescale_timesteps=False,
118 |         conf=None
119 |     ):
120 |         self.model_mean_type = model_mean_type
121 |         self.model_var_type = model_var_type
122 |         self.loss_type = loss_type
123 |         self.rescale_timesteps = rescale_timesteps
124 | 
125 |         self.conf = conf
126 | 
127 |         # Use float64 for accuracy.
128 |         betas = np.array(betas, dtype=np.float64)
129 |         self.betas = betas
130 |         assert len(betas.shape) == 1, "betas must be 1-D"
131 |         assert (betas > 0).all() and (betas <= 1).all()
132 | 
133 |         self.num_timesteps = int(betas.shape[0])
134 | 
135 |         alphas = 1.0 - betas
136 |         self.alphas_cumprod = np.cumprod(alphas, axis=0)
137 |         self.alphas_cumprod_prev = np.append(1.0, self.alphas_cumprod[:-1])
138 |         self.alphas_cumprod_prev_prev = np.append(
139 |             1.0, self.alphas_cumprod_prev[:-1])
140 | 
141 |         self.alphas_cumprod_next = np.append(self.alphas_cumprod[1:], 0.0)
142 | 
143 |         assert self.alphas_cumprod_prev.shape == (self.num_timesteps,)
144 | 
145 |         self.sqrt_alphas_cumprod = np.sqrt(self.alphas_cumprod)
146 |         self.sqrt_alphas_cumprod_prev = np.sqrt(self.alphas_cumprod_prev)
147 |         self.sqrt_one_minus_alphas_cumprod = np.sqrt(1.0 - self.alphas_cumprod)
148 |         self.log_one_minus_alphas_cumprod = np.log(1.0 - self.alphas_cumprod)
149 |         self.sqrt_recip_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod)
150 |         self.sqrt_recipm1_alphas_cumprod = np.sqrt(
151 |             1.0 / self.alphas_cumprod - 1)
152 | 
153 |         self.posterior_variance = (
154 |             betas * (1.0 - self.alphas_cumprod_prev) /
155 |             (1.0 - self.alphas_cumprod)
156 |         )
157 |         self.posterior_log_variance_clipped = np.log(
158 |             np.append(self.posterior_variance[1], self.posterior_variance[1:])
159 |         )
160 |         self.posterior_mean_coef1 = (
161 |             betas * np.sqrt(self.alphas_cumprod_prev) /
162 |             (1.0 - self.alphas_cumprod)
163 |         )
164 |         self.posterior_mean_coef2 = (
165 |             (1.0 - self.alphas_cumprod_prev)
166 |             * np.sqrt(alphas)
167 |             / (1.0 - self.alphas_cumprod)
168 |         )
169 | 
170 |     def undo(self, image_before_step, img_after_model, est_x_0, t, debug=False):
171 |         return self._undo(img_after_model, t)
172 | 
173 |     def _undo(self, img_out, t):
174 |         beta = _extract_into_tensor(self.betas, t, img_out.shape)
175 | 
176 |         img_in_est = th.sqrt(1 - beta) * img_out + \
177 |             th.sqrt(beta) * th.randn_like(img_out)
178 | 
179 |         return img_in_est
180 | 
181 |     def q_posterior_mean_variance(self, x_start, x_t, t):
182 |         """
183 |         Compute the mean and variance of the diffusion posterior:
184 | 
185 |             q(x_{t-1} | x_t, x_0)
186 | 
187 |         """
188 |         assert x_start.shape == x_t.shape
189 |         posterior_mean = (
190 |             _extract_into_tensor(self.posterior_mean_coef1,
191 |                                  t, x_t.shape) * x_start
192 |             + _extract_into_tensor(self.posterior_mean_coef2,
193 |                                    t, x_t.shape) * x_t
194 |         )
195 |         posterior_variance = _extract_into_tensor(
196 |             self.posterior_variance, t, x_t.shape)
197 |         posterior_log_variance_clipped = _extract_into_tensor(
198 |             self.posterior_log_variance_clipped, t, x_t.shape
199 |         )
200 |         assert (
201 |             posterior_mean.shape[0]
202 |             == posterior_variance.shape[0]
203 |             == posterior_log_variance_clipped.shape[0]
204 |             == x_start.shape[0]
205 |         )
206 |         return posterior_mean, posterior_variance, posterior_log_variance_clipped
207 | 
208 |     def p_mean_variance(
209 |         self, model, x, t, clip_denoised=True, denoised_fn=None, model_kwargs=None
210 |     ):
211 |         """
212 |         Apply the model to get p(x_{t-1} | x_t), as well as a prediction of
213 |         the initial x, x_0.
214 | 
215 |         :param model: the model, which takes a signal and a batch of timesteps
216 |                       as input.
217 |         :param x: the [N x C x ...] tensor at time t.
218 |         :param t: a 1-D Tensor of timesteps.
219 |         :param clip_denoised: if True, clip the denoised signal into [-1, 1].
220 |         :param denoised_fn: if not None, a function which applies to the
221 |             x_start prediction before it is used to sample. Applies before
222 |             clip_denoised.
223 |         :param model_kwargs: if not None, a dict of extra keyword arguments to
224 |             pass to the model. This can be used for conditioning.
225 |         :return: a dict with the following keys:
226 |                  - 'mean': the model mean output.
227 |                  - 'variance': the model variance output.
228 |                  - 'log_variance': the log of 'variance'.
229 |                  - 'pred_xstart': the prediction for x_0.
230 |         """
231 |         if model_kwargs is None:
232 |             model_kwargs = {}
233 | 
234 |         B, C = x.shape[:2]
235 |         assert t.shape == (B,)
236 | 
237 |         model_output = model(x, self._scale_timesteps(t), **model_kwargs)
238 | 
239 |         assert model_output.shape == (B, C * 2, *x.shape[2:])
240 |         model_output, model_var_values = th.split(model_output, C, dim=1)
241 | 
242 |         if self.model_var_type == ModelVarType.LEARNED:
243 |             model_log_variance = model_var_values
244 |             model_variance = th.exp(model_log_variance)
245 |         else:
246 |             min_log = _extract_into_tensor(
247 |                 self.posterior_log_variance_clipped, t, x.shape
248 |             )
249 |             max_log = _extract_into_tensor(np.log(self.betas), t, x.shape)
250 |             frac = (model_var_values + 1) / 2
251 |             model_log_variance = frac * max_log + (1 - frac) * min_log
252 |             model_variance = th.exp(model_log_variance)
253 | 
254 |         def process_xstart(x):
255 |             if denoised_fn is not None:
256 |                 x = denoised_fn(x)
257 |             if clip_denoised:
258 |                 return x.clamp(-1, 1)
259 |             return x
260 | 
261 |         if self.model_mean_type == ModelMeanType.PREVIOUS_X:
262 |             pred_xstart = process_xstart(
263 |                 self._predict_xstart_from_xprev(x_t=x, t=t, xprev=model_output)
264 |             )
265 |             model_mean = model_output
266 |         elif self.model_mean_type in [ModelMeanType.START_X, ModelMeanType.EPSILON]:
267 |             if self.model_mean_type == ModelMeanType.START_X:
268 |                 pred_xstart = process_xstart(model_output)
269 |             else:
270 |                 pred_xstart = process_xstart(
271 |                     self._predict_xstart_from_eps(x_t=x, t=t, eps=model_output)
272 |                 )
273 |             model_mean, _, _ = self.q_posterior_mean_variance(
274 |                 x_start=pred_xstart, x_t=x, t=t
275 |             )
276 |         else:
277 |             raise NotImplementedError(self.model_mean_type)
278 | 
279 |         assert (
280 |             model_mean.shape == model_log_variance.shape == pred_xstart.shape == x.shape
281 |         )
282 | 
283 |         return {
284 |             "mean": model_mean,
285 |             "variance": model_variance,
286 |             "log_variance": model_log_variance,
287 |             "pred_xstart": pred_xstart,
288 |         }
289 | 
290 |     def _predict_xstart_from_eps(self, x_t, t, eps):
291 |         assert x_t.shape == eps.shape
292 |         return (
293 |             _extract_into_tensor(
294 |                 self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t
295 |             - _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * eps
296 |         )
297 | 
298 |     def condition_mean(self, cond_fn, p_mean_var, x, t, model_kwargs=None):
299 |         """
300 |         Compute the mean for the previous step, given a function cond_fn that
301 |         computes the gradient of a conditional log probability with respect to
302 |         x. In particular, cond_fn computes grad(log(p(y|x))), and we want to
303 |         condition on y.
304 | 
305 |         This uses the conditioning strategy from Sohl-Dickstein et al. (2015).
306 |         """
307 | 
308 |         gradient = cond_fn(x, self._scale_timesteps(t), **model_kwargs)
309 | 
310 | 
311 |         new_mean = (
312 |             p_mean_var["mean"].float() + p_mean_var["variance"] *
313 |             gradient.float()
314 |         )
315 |         return new_mean
316 | 
317 |     def p_sample(
318 |         self,
319 |         model,
320 |         x,
321 |         t,
322 |         clip_denoised=True,
323 |         denoised_fn=None,
324 |         cond_fn=None,
325 |         model_kwargs=None,
326 |         conf=None,
327 |         meas_fn=None,
328 |         pred_xstart=None,
329 |         idx_wall=-1
330 |     ):
331 |         """
332 |         Sample x_{t-1} from the model at the given timestep.
333 | 
334 |         :param model: the model to sample from.
335 |         :param x: the current tensor at x_{t-1}.
336 |         :param t: the value of t, starting at 0 for the first diffusion step.
337 |         :param clip_denoised: if True, clip the x_start prediction to [-1, 1].
338 |         :param denoised_fn: if not None, a function which applies to the
339 |             x_start prediction before it is used to sample.
340 |         :param cond_fn: if not None, this is a gradient function that acts
341 |                         similarly to the model.
342 |         :param model_kwargs: if not None, a dict of extra keyword arguments to
343 |             pass to the model. This can be used for conditioning.
344 |         :return: a dict containing the following keys:
345 |                  - 'sample': a random sample from the model.
346 |                  - 'pred_xstart': a prediction of x_0.
347 |         """
348 |         noise = th.randn_like(x)
349 | 
350 |         if conf.inpa_inj_sched_prev:
351 | 
352 |             if pred_xstart is not None:
353 |                 gt_keep_mask = model_kwargs.get('gt_keep_mask')
354 |                 if gt_keep_mask is None:
355 |                     gt_keep_mask = conf.get_inpa_mask(x)
356 | 
357 |                 gt = model_kwargs['gt']
358 | 
359 |                 alpha_cumprod = _extract_into_tensor(
360 |                     self.alphas_cumprod, t, x.shape)
361 | 
362 |                 if conf.inpa_inj_sched_prev_cumnoise:
363 |                     weighed_gt = self.get_gt_noised(gt, int(t[0].item()))
364 |                 else:
365 |                     gt_weight = th.sqrt(alpha_cumprod)
366 |                     gt_part = gt_weight * gt
367 | 
368 |                     noise_weight = th.sqrt((1 - alpha_cumprod))
369 |                     noise_part = noise_weight * th.randn_like(x)
370 | 
371 |                     weighed_gt = gt_part + noise_part
372 | 
373 |                 x = (
374 |                     gt_keep_mask * (
375 |                         weighed_gt
376 |                     )
377 |                     +
378 |                     (1 - gt_keep_mask) * (
379 |                         x
380 |                     )
381 |                 )
382 | 
383 | 
384 |         out = self.p_mean_variance(
385 |             model,
386 |             x,
387 |             t,
388 |             clip_denoised=clip_denoised,
389 |             denoised_fn=denoised_fn,
390 |             model_kwargs=model_kwargs,
391 |         )
392 | 
393 |         nonzero_mask = (
394 |             (t != 0).float().view(-1, *([1] * (len(x.shape) - 1)))
395 |         ) 
396 | 
397 |         if cond_fn is not None:
398 |             out["mean"] = self.condition_mean(
399 |                 cond_fn, out, x, t, model_kwargs=model_kwargs
400 |             )
401 | 
402 |         sample = out["mean"] + nonzero_mask * \
403 |             th.exp(0.5 * out["log_variance"]) * noise
404 | 
405 |         result = {"sample": sample,
406 |                   "pred_xstart": out["pred_xstart"], 'gt': model_kwargs.get('gt')}
407 | 
408 |         return result
409 | 
410 |     def p_sample_loop(
411 |         self,
412 |         model,
413 |         shape,
414 |         noise=None,
415 |         clip_denoised=True,
416 |         denoised_fn=None,
417 |         cond_fn=None,
418 |         model_kwargs=None,
419 |         device=None,
420 |         progress=True,
421 |         return_all=False,
422 |         conf=None
423 |     ):
424 |         """
425 |         Generate samples from the model.
426 | 
427 |         :param model: the model module.
428 |         :param shape: the shape of the samples, (N, C, H, W).
429 |         :param noise: if specified, the noise from the encoder to sample.
430 |                       Should be of the same shape as `shape`.
431 |         :param clip_denoised: if True, clip x_start predictions to [-1, 1].
432 |         :param denoised_fn: if not None, a function which applies to the
433 |             x_start prediction before it is used to sample.
434 |         :param cond_fn: if not None, this is a gradient function that acts
435 |                         similarly to the model.
436 |         :param model_kwargs: if not None, a dict of extra keyword arguments to
437 |             pass to the model. This can be used for conditioning.
438 |         :param device: if specified, the device to create the samples on.
439 |                        If not specified, use a model parameter's device.
440 |         :param progress: if True, show a tqdm progress bar.
441 |         :return: a non-differentiable batch of samples.
442 |         """
443 |         final = None
444 |         for sample in self.p_sample_loop_progressive(
445 |             model,
446 |             shape,
447 |             noise=noise,
448 |             clip_denoised=clip_denoised,
449 |             denoised_fn=denoised_fn,
450 |             cond_fn=cond_fn,
451 |             model_kwargs=model_kwargs,
452 |             device=device,
453 |             progress=progress,
454 |             conf=conf
455 |         ):
456 |             final = sample
457 | 
458 |         if return_all:
459 |             return final
460 |         else:
461 |             return final["sample"]
462 | 
463 |     def p_sample_loop_progressive(
464 |         self,
465 |         model,
466 |         shape,
467 |         noise=None,
468 |         clip_denoised=True,
469 |         denoised_fn=None,
470 |         cond_fn=None,
471 |         model_kwargs=None,
472 |         device=None,
473 |         progress=False,
474 |         conf=None
475 |     ):
476 |         """
477 |         Generate samples from the model and yield intermediate samples from
478 |         each timestep of diffusion.
479 | 
480 |         Arguments are the same as p_sample_loop().
481 |         Returns a generator over dicts, where each dict is the return value of
482 |         p_sample().
483 |         """
484 |         if device is None:
485 |             device = next(model.parameters()).device
486 |         assert isinstance(shape, (tuple, list))
487 |         if noise is not None:
488 |             image_after_step = noise
489 |         else:
490 |             image_after_step = th.randn(*shape, device=device)
491 | 
492 |         debug_steps = conf.pget('debug.num_timesteps')
493 | 
494 |         self.gt_noises = None  # reset for next image
495 | 
496 | 
497 |         pred_xstart = None
498 | 
499 |         idx_wall = -1
500 |         sample_idxs = defaultdict(lambda: 0)
501 | 
502 |         if conf.schedule_jump_params:
503 |             times = get_schedule_jump(**conf.schedule_jump_params)
504 | 
505 |             time_pairs = list(zip(times[:-1], times[1:]))
506 |             if progress:
507 |                 from tqdm.auto import tqdm
508 |                 time_pairs = tqdm(time_pairs)
509 | 
510 |             for t_last, t_cur in time_pairs:
511 |                 idx_wall += 1
512 |                 t_last_t = th.tensor([t_last] * shape[0],  # pylint: disable=not-callable
513 |                                      device=device)
514 | 
515 |                 if t_cur < t_last:  # reverse
516 |                     with th.no_grad():
517 |                         image_before_step = image_after_step.clone()
518 |                         out = self.p_sample(
519 |                             model,
520 |                             image_after_step,
521 |                             t_last_t,
522 |                             clip_denoised=clip_denoised,
523 |                             denoised_fn=denoised_fn,
524 |                             cond_fn=cond_fn,
525 |                             model_kwargs=model_kwargs,
526 |                             conf=conf,
527 |                             pred_xstart=pred_xstart
528 |                         )
529 |                         image_after_step = out["sample"]
530 |                         pred_xstart = out["pred_xstart"]
531 | 
532 |                         sample_idxs[t_cur] += 1
533 | 
534 |                         yield out
535 | 
536 |                 else:
537 |                     t_shift = conf.get('inpa_inj_time_shift', 1)
538 | 
539 |                     image_before_step = image_after_step.clone()
540 |                     image_after_step = self.undo(
541 |                         image_before_step, image_after_step,
542 |                         est_x_0=out['pred_xstart'], t=t_last_t+t_shift, debug=False)
543 |                     pred_xstart = out["pred_xstart"]
544 | 
545 | def _extract_into_tensor(arr, timesteps, broadcast_shape):
546 |     """
547 |     Extract values from a 1-D numpy array for a batch of indices.
548 | 
549 |     :param arr: the 1-D numpy array.
550 |     :param timesteps: a tensor of indices into the array to extract.
551 |     :param broadcast_shape: a larger shape of K dimensions with the batch
552 |                             dimension equal to the length of timesteps.
553 |     :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims.
554 |     """
555 |     res = th.from_numpy(arr).to(device=timesteps.device)[timesteps].float()
556 |     while len(res.shape) < len(broadcast_shape):
557 |         res = res[..., None]
558 |     return res.expand(broadcast_shape)
559 | 


--------------------------------------------------------------------------------
/guided_diffusion/unet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
  2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
  7 | #
  8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
 16 | 
 17 | from abc import abstractmethod
 18 | 
 19 | import math
 20 | 
 21 | from .fp16_util import convert_module_to_f16, convert_module_to_f32
 22 | import torch as th
 23 | import torch.nn as nn
 24 | import torch.nn.functional as F
 25 | 
 26 | from .nn import (
 27 |     checkpoint,
 28 |     conv_nd,
 29 |     linear,
 30 |     avg_pool_nd,
 31 |     zero_module,
 32 |     normalization,
 33 |     timestep_embedding,
 34 | )
 35 | 
 36 | 
 37 | class AttentionPool2d(nn.Module):
 38 |     """
 39 |     Adapted from CLIP: https://github.com/openai/CLIP/blob/main/clip/model.py
 40 |     """
 41 | 
 42 |     def __init__(
 43 |         self,
 44 |         spacial_dim: int,
 45 |         embed_dim: int,
 46 |         num_heads_channels: int,
 47 |         output_dim: int = None,
 48 |     ):
 49 |         super().__init__()
 50 |         self.positional_embedding = nn.Parameter(
 51 |             th.randn(embed_dim, spacial_dim ** 2 + 1) / embed_dim ** 0.5
 52 |         )
 53 |         self.qkv_proj = conv_nd(1, embed_dim, 3 * embed_dim, 1)
 54 |         self.c_proj = conv_nd(1, embed_dim, output_dim or embed_dim, 1)
 55 |         self.num_heads = embed_dim // num_heads_channels
 56 |         self.attention = QKVAttention(self.num_heads)
 57 | 
 58 |     def forward(self, x, **kwargs):
 59 |         b, c, *_spatial = x.shape
 60 |         x = x.reshape(b, c, -1)  # NC(HW)
 61 |         x = th.cat([x.mean(dim=-1, keepdim=True), x], dim=-1)  # NC(HW+1)
 62 |         x = x + self.positional_embedding[None, :, :].to(x.dtype)  # NC(HW+1)
 63 |         x = self.qkv_proj(x)
 64 |         x = self.attention(x)
 65 |         x = self.c_proj(x)
 66 |         return x[:, :, 0]
 67 | 
 68 | 
 69 | class TimestepBlock(nn.Module):
 70 |     """
 71 |     Any module where forward() takes timestep embeddings as a second argument.
 72 |     """
 73 | 
 74 |     @abstractmethod
 75 |     def forward(self, x, emb):
 76 |         """
 77 |         Apply the module to `x` given `emb` timestep embeddings.
 78 |         """
 79 | 
 80 | 
 81 | class TimestepEmbedSequential(nn.Sequential, TimestepBlock):
 82 |     """
 83 |     A sequential module that passes timestep embeddings to the children that
 84 |     support it as an extra input.
 85 |     """
 86 | 
 87 |     def forward(self, x, emb):
 88 |         for layer in self:
 89 |             if isinstance(layer, TimestepBlock):
 90 |                 x = layer(x, emb)
 91 |             else:
 92 |                 x = layer(x)
 93 |         return x
 94 | 
 95 | 
 96 | class Upsample(nn.Module):
 97 |     """
 98 |     An upsampling layer with an optional convolution.
 99 | 
100 |     :param channels: channels in the inputs and outputs.
101 |     :param use_conv: a bool determining if a convolution is applied.
102 |     :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then
103 |                  upsampling occurs in the inner-two dimensions.
104 |     """
105 | 
106 |     def __init__(self, channels, use_conv, dims=2, out_channels=None):
107 |         super().__init__()
108 |         self.channels = channels
109 |         self.out_channels = out_channels or channels
110 |         self.use_conv = use_conv
111 |         self.dims = dims
112 |         if use_conv:
113 |             self.conv = conv_nd(dims, self.channels,
114 |                                 self.out_channels, 3, padding=1)
115 | 
116 |     def forward(self, x):
117 |         assert x.shape[1] == self.channels
118 |         if self.dims == 3:
119 |             x = F.interpolate(
120 |                 x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest"
121 |             )
122 |         else:
123 |             x = F.interpolate(x, scale_factor=2, mode="nearest")
124 |         if self.use_conv:
125 |             x = self.conv(x)
126 |         return x
127 | 
128 | 
129 | class Downsample(nn.Module):
130 |     """
131 |     A downsampling layer with an optional convolution.
132 | 
133 |     :param channels: channels in the inputs and outputs.
134 |     :param use_conv: a bool determining if a convolution is applied.
135 |     :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then
136 |                  downsampling occurs in the inner-two dimensions.
137 |     """
138 | 
139 |     def __init__(self, channels, use_conv, dims=2, out_channels=None):
140 |         super().__init__()
141 |         self.channels = channels
142 |         self.out_channels = out_channels or channels
143 |         self.use_conv = use_conv
144 |         self.dims = dims
145 |         stride = 2 if dims != 3 else (1, 2, 2)
146 |         if use_conv:
147 |             self.op = conv_nd(
148 |                 dims, self.channels, self.out_channels, 3, stride=stride, padding=1
149 |             )
150 |         else:
151 |             assert self.channels == self.out_channels
152 |             self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride)
153 | 
154 |     def forward(self, x):
155 |         assert x.shape[1] == self.channels
156 |         return self.op(x)
157 | 
158 | 
159 | class ResBlock(TimestepBlock):
160 |     """
161 |     A residual block that can optionally change the number of channels.
162 | 
163 |     :param channels: the number of input channels.
164 |     :param emb_channels: the number of timestep embedding channels.
165 |     :param dropout: the rate of dropout.
166 |     :param out_channels: if specified, the number of out channels.
167 |     :param use_conv: if True and out_channels is specified, use a spatial
168 |         convolution instead of a smaller 1x1 convolution to change the
169 |         channels in the skip connection.
170 |     :param dims: determines if the signal is 1D, 2D, or 3D.
171 |     :param use_checkpoint: if True, use gradient checkpointing on this module.
172 |     :param up: if True, use this block for upsampling.
173 |     :param down: if True, use this block for downsampling.
174 |     """
175 | 
176 |     def __init__(
177 |         self,
178 |         channels,
179 |         emb_channels,
180 |         dropout,
181 |         out_channels=None,
182 |         use_conv=False,
183 |         use_scale_shift_norm=False,
184 |         dims=2,
185 |         use_checkpoint=False,
186 |         up=False,
187 |         down=False,
188 |     ):
189 |         super().__init__()
190 |         self.channels = channels
191 |         self.emb_channels = emb_channels
192 |         self.dropout = dropout
193 |         self.out_channels = out_channels or channels
194 |         self.use_conv = use_conv
195 |         self.use_checkpoint = use_checkpoint
196 |         self.use_scale_shift_norm = use_scale_shift_norm
197 | 
198 |         self.in_layers = nn.Sequential(
199 |             normalization(channels),
200 |             nn.SiLU(),
201 |             conv_nd(dims, channels, self.out_channels, 3, padding=1),
202 |         )
203 | 
204 |         self.updown = up or down
205 | 
206 |         if up:
207 |             self.h_upd = Upsample(channels, False, dims)
208 |             self.x_upd = Upsample(channels, False, dims)
209 |         elif down:
210 |             self.h_upd = Downsample(channels, False, dims)
211 |             self.x_upd = Downsample(channels, False, dims)
212 |         else:
213 |             self.h_upd = self.x_upd = nn.Identity()
214 | 
215 |         self.emb_layers = nn.Sequential(
216 |             nn.SiLU(),
217 |             linear(
218 |                 emb_channels,
219 |                 2 * self.out_channels if use_scale_shift_norm else self.out_channels,
220 |             ),
221 |         )
222 |         self.out_layers = nn.Sequential(
223 |             normalization(self.out_channels),
224 |             nn.SiLU(),
225 |             nn.Dropout(p=dropout),
226 |             zero_module(
227 |                 conv_nd(dims, self.out_channels,
228 |                         self.out_channels, 3, padding=1)
229 |             ),
230 |         )
231 | 
232 |         if self.out_channels == channels:
233 |             self.skip_connection = nn.Identity()
234 |         elif use_conv:
235 |             self.skip_connection = conv_nd(
236 |                 dims, channels, self.out_channels, 3, padding=1
237 |             )
238 |         else:
239 |             self.skip_connection = conv_nd(
240 |                 dims, channels, self.out_channels, 1)
241 | 
242 |     def forward(self, x, emb):
243 |         """
244 |         Apply the block to a Tensor, conditioned on a timestep embedding.
245 | 
246 |         :param x: an [N x C x ...] Tensor of features.
247 |         :param emb: an [N x emb_channels] Tensor of timestep embeddings.
248 |         :return: an [N x C x ...] Tensor of outputs.
249 |         """
250 |         return checkpoint(
251 |             self._forward, (x, emb), self.parameters(), self.use_checkpoint
252 |         )
253 | 
254 |     def _forward(self, x, emb):
255 |         if self.updown:
256 |             in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1]
257 |             h = in_rest(x)
258 |             h = self.h_upd(h)
259 |             x = self.x_upd(x)
260 |             h = in_conv(h)
261 |         else:
262 |             h = self.in_layers(x)
263 |         emb_out = self.emb_layers(emb).type(h.dtype)
264 |         while len(emb_out.shape) < len(h.shape):
265 |             emb_out = emb_out[..., None]
266 |         if self.use_scale_shift_norm:
267 |             out_norm, out_rest = self.out_layers[0], self.out_layers[1:]
268 |             scale, shift = th.chunk(emb_out, 2, dim=1)
269 |             h = out_norm(h) * (1 + scale) + shift
270 |             h = out_rest(h)
271 |         else:
272 |             h = h + emb_out
273 |             h = self.out_layers(h)
274 |         return self.skip_connection(x) + h
275 | 
276 | 
277 | class AttentionBlock(nn.Module):
278 |     """
279 |     An attention block that allows spatial positions to attend to each other.
280 | 
281 |     Originally ported from here, but adapted to the N-d case.
282 |     https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66.
283 |     """
284 | 
285 |     def __init__(
286 |         self,
287 |         channels,
288 |         num_heads=1,
289 |         num_head_channels=-1,
290 |         use_checkpoint=False,
291 |         use_new_attention_order=False,
292 |     ):
293 |         super().__init__()
294 |         self.channels = channels
295 |         if num_head_channels == -1:
296 |             self.num_heads = num_heads
297 |         else:
298 |             assert (
299 |                 channels % num_head_channels == 0
300 |             ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}"
301 |             self.num_heads = channels // num_head_channels
302 |         self.use_checkpoint = use_checkpoint
303 |         self.norm = normalization(channels)
304 |         self.qkv = conv_nd(1, channels, channels * 3, 1)
305 |         if use_new_attention_order:
306 |             # split qkv before split heads
307 |             self.attention = QKVAttention(self.num_heads)
308 |         else:
309 |             # split heads before split qkv
310 |             self.attention = QKVAttentionLegacy(self.num_heads)
311 | 
312 |         self.proj_out = zero_module(conv_nd(1, channels, channels, 1))
313 | 
314 |     def forward(self, x):
315 |         return checkpoint(self._forward, (x,), self.parameters(), True)
316 | 
317 |     def _forward(self, x):
318 |         b, c, *spatial = x.shape
319 | 
320 |         # Both spacial dimensions to a single verctor
321 |         x = x.reshape(b, c, -1)
322 | 
323 |         # Predict core key values using a 1x1 convolusion (h*w -> 3*h*2)
324 |         qkv = self.qkv(self.norm(x))
325 | 
326 |         h = self.attention(qkv)
327 | 
328 |         h = self.proj_out(h)
329 |         return (x + h).reshape(b, c, *spatial)
330 | 
331 | 
332 | class QKVAttentionLegacy(nn.Module):
333 |     """
334 |     A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping
335 |     """
336 | 
337 |     def __init__(self, n_heads):
338 |         super().__init__()
339 |         self.n_heads = n_heads
340 | 
341 |     def forward(self, qkv):
342 |         """
343 |         Apply QKV attention.
344 | 
345 |         :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs.
346 |         :return: an [N x (H * C) x T] tensor after attention.
347 |         """
348 |         bs, width, length = qkv.shape
349 |         assert width % (3 * self.n_heads) == 0
350 |         ch = width // (3 * self.n_heads)
351 |         q, k, v = qkv.reshape(bs * self.n_heads, ch * 3,
352 |                               length).split(ch, dim=1)
353 |         scale = 1 / math.sqrt(math.sqrt(ch))
354 |         weight = th.einsum(
355 |             "bct,bcs->bts", q * scale, k * scale
356 |         )  # More stable with f16 than dividing afterwards
357 |         weight = th.softmax(weight.float(), dim=-1).type(weight.dtype)
358 |         a = th.einsum("bts,bcs->bct", weight, v)
359 |         return a.reshape(bs, -1, length)
360 | 
361 |     @staticmethod
362 |     def count_flops(model, _x, y):
363 |         return count_flops_attn(model, _x, y)
364 | 
365 | 
366 | class QKVAttention(nn.Module):
367 |     """
368 |     A module which performs QKV attention and splits in a different order.
369 |     """
370 | 
371 |     def __init__(self, n_heads):
372 |         super().__init__()
373 |         self.n_heads = n_heads
374 | 
375 |     def forward(self, qkv):
376 |         """
377 |         Apply QKV attention.
378 | 
379 |         :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs.
380 |         :return: an [N x (H * C) x T] tensor after attention.
381 |         """
382 |         bs, width, length = qkv.shape
383 |         assert width % (3 * self.n_heads) == 0
384 |         ch = width // (3 * self.n_heads)
385 |         q, k, v = qkv.chunk(3, dim=1)
386 |         scale = 1 / math.sqrt(math.sqrt(ch))
387 |         weight = th.einsum(
388 |             "bct,bcs->bts",
389 |             (q * scale).view(bs * self.n_heads, ch, length),
390 |             (k * scale).view(bs * self.n_heads, ch, length),
391 |         )  # More stable with f16 than dividing afterwards
392 |         weight = th.softmax(weight.float(), dim=-1).type(weight.dtype)
393 |         a = th.einsum("bts,bcs->bct", weight,
394 |                       v.reshape(bs * self.n_heads, ch, length))
395 |         return a.reshape(bs, -1, length)
396 | 
397 |     @staticmethod
398 |     def count_flops(model, _x, y):
399 |         return count_flops_attn(model, _x, y)
400 | 
401 | 
402 | class UNetModel(nn.Module):
403 |     """
404 |     The full UNet model with attention and timestep embedding.
405 | 
406 |     :param in_channels: channels in the input Tensor.
407 |     :param model_channels: base channel count for the model.
408 |     :param out_channels: channels in the output Tensor.
409 |     :param num_res_blocks: number of residual blocks per downsample.
410 |     :param attention_resolutions: a collection of downsample rates at which
411 |         attention will take place. May be a set, list, or tuple.
412 |         For example, if this contains 4, then at 4x downsampling, attention
413 |         will be used.
414 |     :param dropout: the dropout probability.
415 |     :param channel_mult: channel multiplier for each level of the UNet.
416 |     :param conv_resample: if True, use learned convolutions for upsampling and
417 |         downsampling.
418 |     :param dims: determines if the signal is 1D, 2D, or 3D.
419 |     :param num_classes: if specified (as an int), then this model will be
420 |         class-conditional with `num_classes` classes.
421 |     :param use_checkpoint: use gradient checkpointing to reduce memory usage.
422 |     :param num_heads: the number of attention heads in each attention layer.
423 |     :param num_heads_channels: if specified, ignore num_heads and instead use
424 |                                a fixed channel width per attention head.
425 |     :param num_heads_upsample: works with num_heads to set a different number
426 |                                of heads for upsampling. Deprecated.
427 |     :param use_scale_shift_norm: use a FiLM-like conditioning mechanism.
428 |     :param resblock_updown: use residual blocks for up/downsampling.
429 |     :param use_new_attention_order: use a different attention pattern for potentially
430 |                                     increased efficiency.
431 |     """
432 | 
433 |     def __init__(
434 |         self,
435 |         image_size,
436 |         in_channels,
437 |         model_channels,
438 |         out_channels,
439 |         num_res_blocks,
440 |         attention_resolutions,
441 |         dropout=0,
442 |         channel_mult=(1, 2, 4, 8),
443 |         conv_resample=True,
444 |         dims=2,
445 |         num_classes=None,
446 |         use_checkpoint=False,
447 |         use_fp16=False,
448 |         num_heads=1,
449 |         num_head_channels=-1,
450 |         num_heads_upsample=-1,
451 |         use_scale_shift_norm=False,
452 |         resblock_updown=False,
453 |         use_new_attention_order=False,
454 |         conf=None
455 |     ):
456 |         super().__init__()
457 | 
458 |         if num_heads_upsample == -1:
459 |             num_heads_upsample = num_heads
460 | 
461 |         self.image_size = image_size
462 |         self.in_channels = in_channels
463 |         self.model_channels = model_channels
464 |         self.out_channels = out_channels
465 |         self.num_res_blocks = num_res_blocks
466 |         self.attention_resolutions = attention_resolutions
467 |         self.dropout = dropout
468 |         self.channel_mult = channel_mult
469 |         self.conv_resample = conv_resample
470 |         self.num_classes = num_classes
471 |         self.use_checkpoint = use_checkpoint
472 |         self.dtype = th.float16 if use_fp16 else th.float32
473 |         self.num_heads = num_heads
474 |         self.num_head_channels = num_head_channels
475 |         self.num_heads_upsample = num_heads_upsample
476 |         self.conf = conf
477 | 
478 |         time_embed_dim = model_channels * 4
479 |         self.time_embed = nn.Sequential(
480 |             linear(model_channels, time_embed_dim),
481 |             nn.SiLU(),
482 |             linear(time_embed_dim, time_embed_dim),
483 |         )
484 | 
485 |         if self.num_classes is not None:
486 |             self.label_emb = nn.Embedding(num_classes, time_embed_dim)
487 | 
488 |         ch = input_ch = int(channel_mult[0] * model_channels)
489 |         self.input_blocks = nn.ModuleList(
490 |             [TimestepEmbedSequential(
491 |                 conv_nd(dims, in_channels, ch, 3, padding=1))]
492 |         )
493 |         self._feature_size = ch
494 |         input_block_chans = [ch]
495 |         ds = 1
496 |         for level, mult in enumerate(channel_mult):
497 |             for _ in range(num_res_blocks):
498 |                 layers = [
499 |                     ResBlock(
500 |                         ch,
501 |                         time_embed_dim,
502 |                         dropout,
503 |                         out_channels=int(mult * model_channels),
504 |                         dims=dims,
505 |                         use_checkpoint=use_checkpoint,
506 |                         use_scale_shift_norm=use_scale_shift_norm,
507 |                     )
508 |                 ]
509 |                 ch = int(mult * model_channels)
510 |                 if ds in attention_resolutions:
511 |                     layers.append(
512 |                         AttentionBlock(
513 |                             ch,
514 |                             use_checkpoint=use_checkpoint,
515 |                             num_heads=num_heads,
516 |                             num_head_channels=num_head_channels,
517 |                             use_new_attention_order=use_new_attention_order,
518 |                         )
519 |                     )
520 |                 self.input_blocks.append(TimestepEmbedSequential(*layers))
521 |                 self._feature_size += ch
522 |                 input_block_chans.append(ch)
523 |             if level != len(channel_mult) - 1:
524 |                 out_ch = ch
525 |                 self.input_blocks.append(
526 |                     TimestepEmbedSequential(
527 |                         ResBlock(
528 |                             ch,
529 |                             time_embed_dim,
530 |                             dropout,
531 |                             out_channels=out_ch,
532 |                             dims=dims,
533 |                             use_checkpoint=use_checkpoint,
534 |                             use_scale_shift_norm=use_scale_shift_norm,
535 |                             down=True,
536 |                         )
537 |                         if resblock_updown
538 |                         else Downsample(
539 |                             ch, conv_resample, dims=dims, out_channels=out_ch
540 |                         )
541 |                     )
542 |                 )
543 |                 ch = out_ch
544 |                 input_block_chans.append(ch)
545 |                 ds *= 2
546 |                 self._feature_size += ch
547 | 
548 |         self.middle_block = TimestepEmbedSequential(
549 |             ResBlock(
550 |                 ch,
551 |                 time_embed_dim,
552 |                 dropout,
553 |                 dims=dims,
554 |                 use_checkpoint=use_checkpoint,
555 |                 use_scale_shift_norm=use_scale_shift_norm,
556 |             ),
557 |             AttentionBlock(
558 |                 ch,
559 |                 use_checkpoint=use_checkpoint,
560 |                 num_heads=num_heads,
561 |                 num_head_channels=num_head_channels,
562 |                 use_new_attention_order=use_new_attention_order,
563 |             ),
564 |             ResBlock(
565 |                 ch,
566 |                 time_embed_dim,
567 |                 dropout,
568 |                 dims=dims,
569 |                 use_checkpoint=use_checkpoint,
570 |                 use_scale_shift_norm=use_scale_shift_norm,
571 |             ),
572 |         )
573 |         self._feature_size += ch
574 | 
575 |         self.output_blocks = nn.ModuleList([])
576 |         for level, mult in list(enumerate(channel_mult))[::-1]:
577 |             for i in range(num_res_blocks + 1):
578 |                 ich = input_block_chans.pop()
579 |                 layers = [
580 |                     ResBlock(
581 |                         ch + ich,
582 |                         time_embed_dim,
583 |                         dropout,
584 |                         out_channels=int(model_channels * mult),
585 |                         dims=dims,
586 |                         use_checkpoint=use_checkpoint,
587 |                         use_scale_shift_norm=use_scale_shift_norm,
588 |                     )
589 |                 ]
590 |                 ch = int(model_channels * mult)
591 |                 if ds in attention_resolutions:
592 |                     layers.append(
593 |                         AttentionBlock(
594 |                             ch,
595 |                             use_checkpoint=use_checkpoint,
596 |                             num_heads=num_heads_upsample,
597 |                             num_head_channels=num_head_channels,
598 |                             use_new_attention_order=use_new_attention_order,
599 |                         )
600 |                     )
601 |                 if level and i == num_res_blocks:
602 |                     out_ch = ch
603 |                     layers.append(
604 |                         ResBlock(
605 |                             ch,
606 |                             time_embed_dim,
607 |                             dropout,
608 |                             out_channels=out_ch,
609 |                             dims=dims,
610 |                             use_checkpoint=use_checkpoint,
611 |                             use_scale_shift_norm=use_scale_shift_norm,
612 |                             up=True,
613 |                         )
614 |                         if resblock_updown
615 |                         else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch)
616 |                     )
617 |                     ds //= 2
618 |                 self.output_blocks.append(TimestepEmbedSequential(*layers))
619 |                 self._feature_size += ch
620 | 
621 |         self.out = nn.Sequential(
622 |             normalization(ch),
623 |             nn.SiLU(),
624 |             zero_module(conv_nd(dims, input_ch, out_channels, 3, padding=1)),
625 |         )
626 | 
627 |     def convert_to_fp16(self):
628 |         """
629 |         Convert the torso of the model to float16.
630 |         """
631 |         self.input_blocks.apply(convert_module_to_f16)
632 |         self.middle_block.apply(convert_module_to_f16)
633 |         self.output_blocks.apply(convert_module_to_f16)
634 | 
635 |     def convert_to_fp32(self):
636 |         """
637 |         Convert the torso of the model to float32.
638 |         """
639 |         self.input_blocks.apply(convert_module_to_f32)
640 |         self.middle_block.apply(convert_module_to_f32)
641 |         self.output_blocks.apply(convert_module_to_f32)
642 | 
643 |     def forward(self, x, timesteps, y=None, gt=None, **kwargs):
644 |         """
645 |         Apply the model to an input batch.
646 | 
647 |         :param x: an [N x C x ...] Tensor of inputs.
648 |         :param timesteps: a 1-D batch of timesteps.
649 |         :param y: an [N] Tensor of labels, if class-conditional.
650 |         :return: an [N x C x ...] Tensor of outputs.
651 |         """
652 | 
653 |         if timesteps[0].item() > self.conf.diffusion_steps:
654 |             raise RuntimeError("timesteps larger than diffusion steps.",
655 |                                timesteps[0].item(), self.conf.diffusion_steps)
656 | 
657 |         if self.conf.use_value_logger:
658 |             self.conf.value_logger.add_to_list(
659 |                 'model_time', timesteps[0].item())
660 | 
661 |         hs = []
662 |         emb = self.time_embed(timestep_embedding(
663 |             timesteps, self.model_channels))
664 | 
665 |         if self.num_classes is not None:
666 |             assert y.shape == (x.shape[0],)
667 |             emb = emb + self.label_emb(y)
668 | 
669 |         h = x.type(self.dtype)
670 |         for module in self.input_blocks:
671 |             h = module(h, emb)
672 |             hs.append(h)
673 |         h = self.middle_block(h, emb)
674 |         for module in self.output_blocks:
675 |             h = th.cat([h, hs.pop()], dim=1)
676 |             h = module(h, emb)
677 |         h = h.type(x.dtype)
678 |         return self.out(h)
679 | 
680 | 
681 | class SuperResModel(UNetModel):
682 |     """
683 |     A UNetModel that performs super-resolution.
684 | 
685 |     Expects an extra kwarg `low_res` to condition on a low-resolution image.
686 |     """
687 | 
688 |     def __init__(self, image_size, in_channels, *args, **kwargs):
689 |         super().__init__(image_size, in_channels * 2, *args, **kwargs)
690 | 
691 |     def forward(self, x, timesteps, low_res=None, **kwargs):
692 |         _, _, new_height, new_width = x.shape
693 |         upsampled = F.interpolate(
694 |             low_res, (new_height, new_width), mode="bilinear")
695 |         x = th.cat([x, upsampled], dim=1)
696 |         return super().forward(x, timesteps, **kwargs)
697 | 
698 | 
699 | class EncoderUNetModel(nn.Module):
700 |     """
701 |     The half UNet model with attention and timestep embedding.
702 | 
703 |     For usage, see UNet.
704 |     """
705 | 
706 |     def __init__(
707 |         self,
708 |         image_size,
709 |         in_channels,
710 |         model_channels,
711 |         out_channels,
712 |         num_res_blocks,
713 |         attention_resolutions,
714 |         dropout=0,
715 |         channel_mult=(1, 2, 4, 8),
716 |         conv_resample=True,
717 |         dims=2,
718 |         use_checkpoint=False,
719 |         use_fp16=False,
720 |         num_heads=1,
721 |         num_head_channels=-1,
722 |         num_heads_upsample=-1,
723 |         use_scale_shift_norm=False,
724 |         resblock_updown=False,
725 |         use_new_attention_order=False,
726 |         pool="adaptive",
727 |     ):
728 |         super().__init__()
729 | 
730 |         if num_heads_upsample == -1:
731 |             num_heads_upsample = num_heads
732 | 
733 |         self.in_channels = in_channels
734 |         self.model_channels = model_channels
735 |         self.out_channels = out_channels
736 |         self.num_res_blocks = num_res_blocks
737 |         self.attention_resolutions = attention_resolutions
738 |         self.dropout = dropout
739 |         self.channel_mult = channel_mult
740 |         self.conv_resample = conv_resample
741 |         self.use_checkpoint = use_checkpoint
742 |         self.dtype = th.float16 if use_fp16 else th.float32
743 |         self.num_heads = num_heads
744 |         self.num_head_channels = num_head_channels
745 |         self.num_heads_upsample = num_heads_upsample
746 | 
747 |         time_embed_dim = model_channels * 4
748 |         self.time_embed = nn.Sequential(
749 |             linear(model_channels, time_embed_dim),
750 |             nn.SiLU(),
751 |             linear(time_embed_dim, time_embed_dim),
752 |         )
753 | 
754 |         ch = int(channel_mult[0] * model_channels)
755 |         self.input_blocks = nn.ModuleList(
756 |             [TimestepEmbedSequential(
757 |                 conv_nd(dims, in_channels, ch, 3, padding=1))]
758 |         )
759 |         self._feature_size = ch
760 |         input_block_chans = [ch]
761 |         ds = 1
762 |         for level, mult in enumerate(channel_mult):
763 |             for _ in range(num_res_blocks):
764 |                 layers = [
765 |                     ResBlock(
766 |                         ch,
767 |                         time_embed_dim,
768 |                         dropout,
769 |                         out_channels=int(mult * model_channels),
770 |                         dims=dims,
771 |                         use_checkpoint=use_checkpoint,
772 |                         use_scale_shift_norm=use_scale_shift_norm,
773 |                     )
774 |                 ]
775 |                 ch = int(mult * model_channels)
776 |                 if ds in attention_resolutions:
777 |                     layers.append(
778 |                         AttentionBlock(
779 |                             ch,
780 |                             use_checkpoint=use_checkpoint,
781 |                             num_heads=num_heads,
782 |                             num_head_channels=num_head_channels,
783 |                             use_new_attention_order=use_new_attention_order,
784 |                         )
785 |                     )
786 |                 self.input_blocks.append(TimestepEmbedSequential(*layers))
787 |                 self._feature_size += ch
788 |                 input_block_chans.append(ch)
789 |             if level != len(channel_mult) - 1:
790 |                 out_ch = ch
791 |                 self.input_blocks.append(
792 |                     TimestepEmbedSequential(
793 |                         ResBlock(
794 |                             ch,
795 |                             time_embed_dim,
796 |                             dropout,
797 |                             out_channels=out_ch,
798 |                             dims=dims,
799 |                             use_checkpoint=use_checkpoint,
800 |                             use_scale_shift_norm=use_scale_shift_norm,
801 |                             down=True,
802 |                         )
803 |                         if resblock_updown
804 |                         else Downsample(
805 |                             ch, conv_resample, dims=dims, out_channels=out_ch
806 |                         )
807 |                     )
808 |                 )
809 |                 ch = out_ch
810 |                 input_block_chans.append(ch)
811 |                 ds *= 2
812 |                 self._feature_size += ch
813 | 
814 |         self.middle_block = TimestepEmbedSequential(
815 |             ResBlock(
816 |                 ch,
817 |                 time_embed_dim,
818 |                 dropout,
819 |                 dims=dims,
820 |                 use_checkpoint=use_checkpoint,
821 |                 use_scale_shift_norm=use_scale_shift_norm,
822 |             ),
823 |             AttentionBlock(
824 |                 ch,
825 |                 use_checkpoint=use_checkpoint,
826 |                 num_heads=num_heads,
827 |                 num_head_channels=num_head_channels,
828 |                 use_new_attention_order=use_new_attention_order,
829 |             ),
830 |             ResBlock(
831 |                 ch,
832 |                 time_embed_dim,
833 |                 dropout,
834 |                 dims=dims,
835 |                 use_checkpoint=use_checkpoint,
836 |                 use_scale_shift_norm=use_scale_shift_norm,
837 |             ),
838 |         )
839 |         self._feature_size += ch
840 |         self.pool = pool
841 |         if pool == "adaptive":
842 |             self.out = nn.Sequential(
843 |                 normalization(ch),
844 |                 nn.SiLU(),
845 |                 nn.AdaptiveAvgPool2d((1, 1)),
846 |                 zero_module(conv_nd(dims, ch, out_channels, 1)),
847 |                 nn.Flatten(),
848 |             )
849 |         elif pool == "attention":
850 |             assert num_head_channels != -1
851 |             self.out = nn.Sequential(
852 |                 normalization(ch),
853 |                 nn.SiLU(),
854 |                 AttentionPool2d(
855 |                     (image_size // ds), ch, num_head_channels, out_channels
856 |                 ),
857 |             )
858 |         elif pool == "spatial":
859 |             self.out = nn.Sequential(
860 |                 nn.Linear(self._feature_size, 2048),
861 |                 nn.ReLU(),
862 |                 nn.Linear(2048, self.out_channels),
863 |             )
864 |         elif pool == "spatial_v2":
865 |             self.out = nn.Sequential(
866 |                 nn.Linear(self._feature_size, 2048),
867 |                 normalization(2048),
868 |                 nn.SiLU(),
869 |                 nn.Linear(2048, self.out_channels),
870 |             )
871 |         else:
872 |             raise NotImplementedError(f"Unexpected {pool} pooling")
873 | 
874 |     def convert_to_fp16(self):
875 |         """
876 |         Convert the torso of the model to float16.
877 |         """
878 |         self.input_blocks.apply(convert_module_to_f16)
879 |         self.middle_block.apply(convert_module_to_f16)
880 | 
881 |     def convert_to_fp32(self):
882 |         """
883 |         Convert the torso of the model to float32.
884 |         """
885 |         self.input_blocks.apply(convert_module_to_f32)
886 |         self.middle_block.apply(convert_module_to_f32)
887 | 
888 |     def forward(self, x, timesteps):
889 |         """
890 |         Apply the model to an input batch.
891 | 
892 |         :param x: an [N x C x ...] Tensor of inputs.
893 |         :param timesteps: a 1-D batch of timesteps.
894 |         :return: an [N x K] Tensor of outputs.
895 |         """
896 |         emb = self.time_embed(timestep_embedding(
897 |             timesteps, self.model_channels))
898 | 
899 |         results = []
900 |         h = x.type(self.dtype)
901 |         for module in self.input_blocks:
902 |             h = module(h, emb)
903 |             if self.pool.startswith("spatial"):
904 |                 results.append(h.type(x.dtype).mean(dim=(2, 3)))
905 |         h = self.middle_block(h, emb)
906 |         if self.pool.startswith("spatial"):
907 |             results.append(h.type(x.dtype).mean(dim=(2, 3)))
908 |             h = th.cat(results, axis=-1)
909 |             return self.out(h)
910 |         else:
911 |             h = h.type(x.dtype)
912 |             return self.out(h)
913 | 


--------------------------------------------------------------------------------