├── LICENSES
├── README.md
├── LICENSE
└── LICENSE_guided_diffusion
├── download.sh
├── conf_mgt
├── __init__.py
└── conf_base.py
├── guided_diffusion
├── __init__.py
├── dist_util.py
├── nn.py
├── scheduler.py
├── respace.py
├── image_datasets.py
├── fp16_util.py
├── script_util.py
├── gaussian_diffusion.py
└── unet.py
├── utils
└── __init__.py
├── confs
├── face_example.yml
├── test_p256_nn2.yml
├── test_p256_ex64.yml
├── test_p256_thin.yml
├── test_c256_ev2li.yml
├── test_p256_ev2li.yml
├── test_p256_thick.yml
├── test_p256_genhalf.yml
├── test_c256_nn2.yml
├── test_c256_ex64.yml
├── test_c256_thin.yml
├── test_c256_thick.yml
├── test_inet256_nn2.yml
├── test_c256_genhalf.yml
├── test_inet256_ex64.yml
├── test_inet256_thin.yml
├── test_inet256_ev2li.yml
├── test_inet256_thick.yml
└── test_inet256_genhalf.yml
├── test.py
└── README.md
/LICENSES/README.md:
--------------------------------------------------------------------------------
1 | # License and Acknowledgement
2 |
3 | A big thanks to following contributes that open sourced their code and therefore helped us a lot in developing RePaint!
4 |
5 | This repository was forked from:
6 | https://github.com/openai/guided-diffusion
7 |
8 | It contains code from:
9 | https://github.com/hojonathanho/diffusion
10 |
11 | If we missed a contribution, please contact us.
--------------------------------------------------------------------------------
/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | (
4 | mkdir -p data/pretrained
5 | cd data/pretrained
6 |
7 | wget https://openaipublic.blob.core.windows.net/diffusion/jul-2021/256x256_classifier.pt # Trained by OpenAI
8 | wget https://openaipublic.blob.core.windows.net/diffusion/jul-2021/256x256_diffusion.pt # Trained by OpenAI
9 |
10 | gdown https://drive.google.com/uc?id=1norNWWGYP3EZ_o05DmoW1ryKuKMmhlCX
11 | gdown https://drive.google.com/uc?id=1QEl-btGbzQz6IwkXiFGd49uQNTUtTHsk
12 | )
13 |
14 | # data
15 | (
16 | gdown https://drive.google.com/uc?id=1Q_dxuyI41AAmSv9ti3780BwaJQqwvwMv
17 | unzip data.zip
18 | rm data.zip
19 | )
--------------------------------------------------------------------------------
/LICENSES/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | you may not use this file except in compliance with the License.
4 | You may obtain a copy of the License at
5 |
6 | https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 |
8 | The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
--------------------------------------------------------------------------------
/conf_mgt/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 |
18 | from conf_mgt.conf_base import Default_Conf
19 |
--------------------------------------------------------------------------------
/guided_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | """
18 | Based on "Improved Denoising Diffusion Probabilistic Models".
19 | """
20 |
--------------------------------------------------------------------------------
/LICENSES/LICENSE_guided_diffusion:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 OpenAI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | import yaml
18 | import os
19 | from PIL import Image
20 |
21 |
22 | def txtread(path):
23 | path = os.path.expanduser(path)
24 | with open(path, 'r') as f:
25 | return f.read()
26 |
27 |
28 | def yamlread(path):
29 | return yaml.safe_load(txtread(path=path))
30 |
31 | def imwrite(path=None, img=None):
32 | Image.fromarray(img).save(path)
33 |
--------------------------------------------------------------------------------
/guided_diffusion/dist_util.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | """
18 | Helpers for distributed training.
19 | """
20 |
21 | import io
22 |
23 | import blobfile as bf
24 | import torch as th
25 |
26 |
27 | def dev(device):
28 | """
29 | Get the device to use for torch.distributed.
30 | """
31 | if device is None:
32 | if th.cuda.is_available():
33 | return th.device(f"cuda")
34 | return th.device("cpu")
35 | return th.device(device)
36 |
37 |
38 | def load_state_dict(path, backend=None, **kwargs):
39 | with bf.BlobFile(path, "rb") as f:
40 | data = f.read()
41 | return th.load(io.BytesIO(data), **kwargs)
42 |
43 |
44 |
--------------------------------------------------------------------------------
/confs/face_example.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | latex_name: RePaint
53 | method_name: Repaint
54 | image_size: 256
55 | model_path: ./data/pretrained/celeba256_250000.pt
56 | name: face_example
57 | inpa_inj_sched_prev: true
58 | n_jobs: 1
59 | print_estimated_vars: true
60 | inpa_inj_sched_prev_cumnoise: false
61 | schedule_jump_params:
62 | t_T: 250
63 | n_sample: 1
64 | jump_length: 10
65 | jump_n_sample: 10
66 | data:
67 | eval:
68 | paper_face_mask:
69 | mask_loader: true
70 | gt_path: ./data/datasets/gts/face
71 | mask_path: ./data/datasets/gt_keep_masks/face
72 | image_size: 256
73 | class_cond: false
74 | deterministic: true
75 | random_crop: false
76 | random_flip: false
77 | return_dict: true
78 | drop_last: false
79 | batch_size: 1
80 | return_dataloader: true
81 | offset: 0
82 | max_len: 8
83 | paths:
84 | srs: ./log/face_example/inpainted
85 | lrs: ./log/face_example/gt_masked
86 | gts: ./log/face_example/gt
87 | gt_keep_masks: ./log/face_example/gt_keep_mask
88 |
--------------------------------------------------------------------------------
/confs/test_p256_nn2.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/places256_300000.pt
54 | name: test_p256_nn2
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 | t_T: 250
61 | n_sample: 1
62 | jump_length: 10
63 | jump_n_sample: 10
64 | data:
65 | eval:
66 | lama_p256_nn2_n100_test:
67 | mask_loader: true
68 | gt_path: ./data/datasets/gts/p256
69 | mask_path: ./data/datasets/gt_keep_masks/nn2
70 | image_size: 256
71 | class_cond: false
72 | deterministic: true
73 | random_crop: false
74 | random_flip: false
75 | return_dict: true
76 | drop_last: false
77 | batch_size: 4
78 | return_dataloader: true
79 | ds_conf:
80 | name: random_nn2_256
81 | max_len: 100
82 | paths:
83 | srs: ./log/test_p256_nn2/inpainted
84 | lrs: ./log/test_p256_nn2/gt_masked
85 | gts: ./log/test_p256_nn2/gt
86 | gt_keep_masks: ./log/test_p256_nn2/gt_keep_mask
87 |
--------------------------------------------------------------------------------
/confs/test_p256_ex64.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/places256_300000.pt
54 | name: test_p256_ex64
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 | t_T: 250
61 | n_sample: 1
62 | jump_length: 10
63 | jump_n_sample: 10
64 | data:
65 | eval:
66 | lama_p256_ex64_n100_test:
67 | mask_loader: true
68 | gt_path: ./data/datasets/gts/p256
69 | mask_path: ./data/datasets/gt_keep_masks/ex64
70 | image_size: 256
71 | class_cond: false
72 | deterministic: true
73 | random_crop: false
74 | random_flip: false
75 | return_dict: true
76 | drop_last: false
77 | batch_size: 4
78 | return_dataloader: true
79 | ds_conf:
80 | name: random_ex64_256
81 | max_len: 100
82 | paths:
83 | srs: ./log/test_p256_ex64/inpainted
84 | lrs: ./log/test_p256_ex64/gt_masked
85 | gts: ./log/test_p256_ex64/gt
86 | gt_keep_masks: ./log/test_p256_ex64/gt_keep_mask
87 |
--------------------------------------------------------------------------------
/confs/test_p256_thin.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/places256_300000.pt
54 | name: test_p256_thin
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 | t_T: 250
61 | n_sample: 1
62 | jump_length: 10
63 | jump_n_sample: 10
64 | data:
65 | eval:
66 | lama_p256_thin_n100_test:
67 | mask_loader: true
68 | gt_path: ./data/datasets/gts/p256
69 | mask_path: ./data/datasets/gt_keep_masks/thin
70 | image_size: 256
71 | class_cond: false
72 | deterministic: true
73 | random_crop: false
74 | random_flip: false
75 | return_dict: true
76 | drop_last: false
77 | batch_size: 4
78 | return_dataloader: true
79 | ds_conf:
80 | name: random_thin_256
81 | max_len: 100
82 | paths:
83 | srs: ./log/test_p256_thin/inpainted
84 | lrs: ./log/test_p256_thin/gt_masked
85 | gts: ./log/test_p256_thin/gt
86 | gt_keep_masks: ./log/test_p256_thin/gt_keep_mask
87 |
--------------------------------------------------------------------------------
/confs/test_c256_ev2li.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/celeba256_250000.pt
54 | name: test_c256_ev2li
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 | t_T: 250
61 | n_sample: 1
62 | jump_length: 10
63 | jump_n_sample: 10
64 | data:
65 | eval:
66 | lama_c256_ev2li_n100_test:
67 | mask_loader: true
68 | gt_path: ./data/datasets/gts/c256
69 | mask_path: ./data/datasets/gt_keep_masks/ev2li
70 | image_size: 256
71 | class_cond: false
72 | deterministic: true
73 | random_crop: false
74 | random_flip: false
75 | return_dict: true
76 | drop_last: false
77 | batch_size: 4
78 | return_dataloader: true
79 | ds_conf:
80 | name: fix_ev2li_256
81 | max_len: 100
82 | paths:
83 | srs: ./log/test_c256_ev2li/inpainted
84 | lrs: ./log/test_c256_ev2li/gt_masked
85 | gts: ./log/test_c256_ev2li/gt
86 | gt_keep_masks: ./log/test_c256_ev2li/gt_keep_mask
87 |
--------------------------------------------------------------------------------
/confs/test_p256_ev2li.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/places256_300000.pt
54 | name: test_p256_ev2li
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 | t_T: 250
61 | n_sample: 1
62 | jump_length: 10
63 | jump_n_sample: 10
64 | data:
65 | eval:
66 | lama_p256_ev2li_n100_test:
67 | mask_loader: true
68 | gt_path: ./data/datasets/gts/p256
69 | mask_path: ./data/datasets/gt_keep_masks/ev2li
70 | image_size: 256
71 | class_cond: false
72 | deterministic: true
73 | random_crop: false
74 | random_flip: false
75 | return_dict: true
76 | drop_last: false
77 | batch_size: 4
78 | return_dataloader: true
79 | ds_conf:
80 | name: random_ev2li_256
81 | max_len: 100
82 | paths:
83 | srs: ./log/test_p256_ev2li/inpainted
84 | lrs: ./log/test_p256_ev2li/gt_masked
85 | gts: ./log/test_p256_ev2li/gt
86 | gt_keep_masks: ./log/test_p256_ev2li/gt_keep_mask
87 |
--------------------------------------------------------------------------------
/confs/test_p256_thick.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/places256_300000.pt
54 | name: test_p256_thick
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 | t_T: 250
61 | n_sample: 1
62 | jump_length: 10
63 | jump_n_sample: 10
64 | data:
65 | eval:
66 | lama_p256_thick_n100_test:
67 | mask_loader: true
68 | gt_path: ./data/datasets/gts/p256
69 | mask_path: ./data/datasets/gt_keep_masks/thick
70 | image_size: 256
71 | class_cond: false
72 | deterministic: true
73 | random_crop: false
74 | random_flip: false
75 | return_dict: true
76 | drop_last: false
77 | batch_size: 4
78 | return_dataloader: true
79 | ds_conf:
80 | name: random_thick_256
81 | max_len: 100
82 | paths:
83 | srs: ./log/test_p256_thick/inpainted
84 | lrs: ./log/test_p256_thick/gt_masked
85 | gts: ./log/test_p256_thick/gt
86 | gt_keep_masks: ./log/test_p256_thick/gt_keep_mask
87 |
--------------------------------------------------------------------------------
/confs/test_p256_genhalf.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: ./data/pretrained/places256_300000.pt
54 | name: test_p256_genhalf
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 | t_T: 250
61 | n_sample: 1
62 | jump_length: 10
63 | jump_n_sample: 10
64 | data:
65 | eval:
66 | lama_p256_genhalf_n100_test:
67 | mask_loader: true
68 | gt_path: ./data/datasets/gts/p256
69 | mask_path: ./data/datasets/gt_keep_masks/genhalf
70 | image_size: 256
71 | class_cond: false
72 | deterministic: true
73 | random_crop: false
74 | random_flip: false
75 | return_dict: true
76 | drop_last: false
77 | batch_size: 4
78 | return_dataloader: true
79 | ds_conf:
80 | name: random_genhalf_256
81 | max_len: 100
82 | paths:
83 | srs: ./log/test_p256_genhalf/inpainted
84 | lrs: ./log/test_p256_genhalf/gt_masked
85 | gts: ./log/test_p256_genhalf/gt
86 | gt_keep_masks: ./log/test_p256_genhalf/gt_keep_mask
87 |
--------------------------------------------------------------------------------
/confs/test_c256_nn2.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: /cluster/work/cvl/gudiff/guided-diffusion/models/celeba256_diffsteps1000_4gpus/ema_0.9999_250000.pt
54 | name: test_c256_nn2
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 | t_T: 250
61 | n_sample: 1
62 | jump_length: 10
63 | jump_n_sample: 10
64 | data:
65 | eval:
66 | lama_c256_nn2_n100_test:
67 | mask_loader: true
68 | gt_path: ./data/datasets/gts/c256
69 | mask_path: ./data/datasets/gt_keep_masks/nn2
70 | image_size: 256
71 | class_cond: false
72 | deterministic: true
73 | random_crop: false
74 | random_flip: false
75 | return_dict: true
76 | drop_last: false
77 | batch_size: 4
78 | return_dataloader: true
79 | ds_conf:
80 | name: fix_nn2_256
81 | max_len: 100
82 | paths:
83 | srs: ./log/test_c256_nn2/inpainted
84 | lrs: ./log/test_c256_nn2/gt_masked
85 | gts: ./log/test_c256_nn2/gt
86 | gt_keep_masks: ./log/test_c256_nn2/gt_keep_mask
87 |
--------------------------------------------------------------------------------
/confs/test_c256_ex64.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: /cluster/work/cvl/gudiff/guided-diffusion/models/celeba256_diffsteps1000_4gpus/ema_0.9999_250000.pt
54 | name: test_c256_ex64
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 | t_T: 250
61 | n_sample: 1
62 | jump_length: 10
63 | jump_n_sample: 10
64 | data:
65 | eval:
66 | lama_c256_ex64_n100_test:
67 | mask_loader: true
68 | gt_path: ./data/datasets/gts/c256
69 | mask_path: ./data/datasets/gt_keep_masks/ex64
70 | image_size: 256
71 | class_cond: false
72 | deterministic: true
73 | random_crop: false
74 | random_flip: false
75 | return_dict: true
76 | drop_last: false
77 | batch_size: 4
78 | return_dataloader: true
79 | ds_conf:
80 | name: fix_ex64_256
81 | max_len: 100
82 | paths:
83 | srs: ./log/test_c256_ex64/inpainted
84 | lrs: ./log/test_c256_ex64/gt_masked
85 | gts: ./log/test_c256_ex64/gt
86 | gt_keep_masks: ./log/test_c256_ex64/gt_keep_mask
87 |
--------------------------------------------------------------------------------
/confs/test_c256_thin.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: /cluster/work/cvl/gudiff/guided-diffusion/models/celeba256_diffsteps1000_4gpus/ema_0.9999_250000.pt
54 | name: test_c256_thin
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 | t_T: 250
61 | n_sample: 1
62 | jump_length: 10
63 | jump_n_sample: 10
64 | data:
65 | eval:
66 | lama_c256_thin_n100_test:
67 | mask_loader: true
68 | gt_path: ./data/datasets/gts/c256
69 | mask_path: ./data/datasets/gt_keep_masks/thin
70 | image_size: 256
71 | class_cond: false
72 | deterministic: true
73 | random_crop: false
74 | random_flip: false
75 | return_dict: true
76 | drop_last: false
77 | batch_size: 4
78 | return_dataloader: true
79 | ds_conf:
80 | name: random_thin_256
81 | max_len: 100
82 | paths:
83 | srs: ./log/test_c256_thin/inpainted
84 | lrs: ./log/test_c256_thin/gt_masked
85 | gts: ./log/test_c256_thin/gt
86 | gt_keep_masks: ./log/test_c256_thin/gt_keep_mask
87 |
--------------------------------------------------------------------------------
/confs/test_c256_thick.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: /cluster/work/cvl/gudiff/guided-diffusion/models/celeba256_diffsteps1000_4gpus/ema_0.9999_250000.pt
54 | name: test_c256_thick
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 | t_T: 250
61 | n_sample: 1
62 | jump_length: 10
63 | jump_n_sample: 10
64 | data:
65 | eval:
66 | lama_c256_thick_n100_test:
67 | mask_loader: true
68 | gt_path: ./data/datasets/gts/c256
69 | mask_path: ./data/datasets/gt_keep_masks/thick
70 | image_size: 256
71 | class_cond: false
72 | deterministic: true
73 | random_crop: false
74 | random_flip: false
75 | return_dict: true
76 | drop_last: false
77 | batch_size: 4
78 | return_dataloader: true
79 | ds_conf:
80 | name: random_thick_256
81 | max_len: 100
82 | paths:
83 | srs: ./log/test_c256_thick/inpainted
84 | lrs: ./log/test_c256_thick/gt_masked
85 | gts: ./log/test_c256_thick/gt
86 | gt_keep_masks: ./log/test_c256_thick/gt_keep_mask
87 |
--------------------------------------------------------------------------------
/confs/test_inet256_nn2.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: true
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: true
28 | use_scale_shift_norm: true
29 | classifier_scale: 1.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | classifier_path: ./data/pretrained/256x256_classifier.pt
54 | model_path: ./data/pretrained/256x256_diffusion.pt
55 | name: test_inet256_nn2
56 | inpa_inj_sched_prev: true
57 | n_jobs: 25
58 | print_estimated_vars: true
59 | inpa_inj_sched_prev_cumnoise: false
60 | schedule_jump_params:
61 | t_T: 250
62 | n_sample: 1
63 | jump_length: 10
64 | jump_n_sample: 10
65 | data:
66 | eval:
67 | lama_inet256_nn2_n100_test:
68 | mask_loader: true
69 | gt_path: ./data/datasets/gts/inet256
70 | mask_path: ./data/datasets/gt_keep_masks/nn2
71 | image_size: 256
72 | class_cond: false
73 | deterministic: true
74 | random_crop: false
75 | random_flip: false
76 | return_dict: true
77 | drop_last: false
78 | batch_size: 4
79 | return_dataloader: true
80 | ds_conf:
81 | name: random_nn2_256
82 | max_len: 100
83 | paths:
84 | srs: ./log/test_inet256_nn2/inpainted
85 | lrs: ./log/test_inet256_nn2/gt_masked
86 | gts: ./log/test_inet256_nn2/gt
87 | gt_keep_masks: ./log/test_inet256_nn2/gt_keep_mask
88 |
--------------------------------------------------------------------------------
/confs/test_c256_genhalf.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: false
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: false
28 | use_scale_shift_norm: true
29 | classifier_scale: 4.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | model_path: /cluster/work/cvl/gudiff/guided-diffusion/models/celeba256_diffsteps1000_4gpus/ema_0.9999_250000.pt
54 | name: test_c256_genhalf
55 | inpa_inj_sched_prev: true
56 | n_jobs: 25
57 | print_estimated_vars: true
58 | inpa_inj_sched_prev_cumnoise: false
59 | schedule_jump_params:
60 | t_T: 250
61 | n_sample: 1
62 | jump_length: 10
63 | jump_n_sample: 10
64 | data:
65 | eval:
66 | lama_c256_genhalf_n100_test:
67 | mask_loader: true
68 | gt_path: ./data/datasets/gts/c256
69 | mask_path: ./data/datasets/gt_keep_masks/genhalf
70 | image_size: 256
71 | class_cond: false
72 | deterministic: true
73 | random_crop: false
74 | random_flip: false
75 | return_dict: true
76 | drop_last: false
77 | batch_size: 4
78 | return_dataloader: true
79 | ds_conf:
80 | name: fix_genhalf_256
81 | max_len: 100
82 | paths:
83 | srs: ./log/test_c256_genhalf/inpainted
84 | lrs: ./log/test_c256_genhalf/gt_masked
85 | gts: ./log/test_c256_genhalf/gt
86 | gt_keep_masks: ./log/test_c256_genhalf/gt_keep_mask
87 |
--------------------------------------------------------------------------------
/confs/test_inet256_ex64.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: true
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: true
28 | use_scale_shift_norm: true
29 | classifier_scale: 1.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | classifier_path: ./data/pretrained/256x256_classifier.pt
54 | model_path: ./data/pretrained/256x256_diffusion.pt
55 | name: test_inet256_ex64
56 | inpa_inj_sched_prev: true
57 | n_jobs: 25
58 | print_estimated_vars: true
59 | inpa_inj_sched_prev_cumnoise: false
60 | schedule_jump_params:
61 | t_T: 250
62 | n_sample: 1
63 | jump_length: 10
64 | jump_n_sample: 10
65 | data:
66 | eval:
67 | lama_inet256_ex64_n100_test:
68 | mask_loader: true
69 | gt_path: ./data/datasets/gts/inet256
70 | mask_path: ./data/datasets/gt_keep_masks/ex64
71 | image_size: 256
72 | class_cond: false
73 | deterministic: true
74 | random_crop: false
75 | random_flip: false
76 | return_dict: true
77 | drop_last: false
78 | batch_size: 4
79 | return_dataloader: true
80 | ds_conf:
81 | name: random_ex64_256
82 | max_len: 100
83 | paths:
84 | srs: ./log/test_inet256_ex64/inpainted
85 | lrs: ./log/test_inet256_ex64/gt_masked
86 | gts: ./log/test_inet256_ex64/gt
87 | gt_keep_masks: ./log/test_inet256_ex64/gt_keep_mask
88 |
--------------------------------------------------------------------------------
/confs/test_inet256_thin.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: true
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: true
28 | use_scale_shift_norm: true
29 | classifier_scale: 1.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | classifier_path: ./data/pretrained/256x256_classifier.pt
54 | model_path: ./data/pretrained/256x256_diffusion.pt
55 | name: test_inet256_thin
56 | inpa_inj_sched_prev: true
57 | n_jobs: 25
58 | print_estimated_vars: true
59 | inpa_inj_sched_prev_cumnoise: false
60 | schedule_jump_params:
61 | t_T: 250
62 | n_sample: 1
63 | jump_length: 10
64 | jump_n_sample: 10
65 | data:
66 | eval:
67 | lama_inet256_thin_n100_test:
68 | mask_loader: true
69 | gt_path: ./data/datasets/gts/inet256
70 | mask_path: ./data/datasets/gt_keep_masks/thin
71 | image_size: 256
72 | class_cond: false
73 | deterministic: true
74 | random_crop: false
75 | random_flip: false
76 | return_dict: true
77 | drop_last: false
78 | batch_size: 4
79 | return_dataloader: true
80 | ds_conf:
81 | name: random_thin_256
82 | max_len: 100
83 | paths:
84 | srs: ./log/test_inet256_thin/inpainted
85 | lrs: ./log/test_inet256_thin/gt_masked
86 | gts: ./log/test_inet256_thin/gt
87 | gt_keep_masks: ./log/test_inet256_thin/gt_keep_mask
88 |
--------------------------------------------------------------------------------
/confs/test_inet256_ev2li.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: true
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: true
28 | use_scale_shift_norm: true
29 | classifier_scale: 1.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | classifier_path: ./data/pretrained/256x256_classifier.pt
54 | model_path: ./data/pretrained/256x256_diffusion.pt
55 | name: test_inet256_ev2li
56 | inpa_inj_sched_prev: true
57 | n_jobs: 25
58 | print_estimated_vars: true
59 | inpa_inj_sched_prev_cumnoise: false
60 | schedule_jump_params:
61 | t_T: 250
62 | n_sample: 1
63 | jump_length: 10
64 | jump_n_sample: 10
65 | data:
66 | eval:
67 | lama_inet256_ev2li_n100_test:
68 | mask_loader: true
69 | gt_path: ./data/datasets/gts/inet256
70 | mask_path: ./data/datasets/gt_keep_masks/ev2li
71 | image_size: 256
72 | class_cond: false
73 | deterministic: true
74 | random_crop: false
75 | random_flip: false
76 | return_dict: true
77 | drop_last: false
78 | batch_size: 4
79 | return_dataloader: true
80 | ds_conf:
81 | name: random_ev2li_256
82 | max_len: 100
83 | paths:
84 | srs: ./log/test_inet256_ev2li/inpainted
85 | lrs: ./log/test_inet256_ev2li/gt_masked
86 | gts: ./log/test_inet256_ev2li/gt
87 | gt_keep_masks: ./log/test_inet256_ev2li/gt_keep_mask
88 |
--------------------------------------------------------------------------------
/confs/test_inet256_thick.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: true
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: true
28 | use_scale_shift_norm: true
29 | classifier_scale: 1.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | classifier_path: ./data/pretrained/256x256_classifier.pt
54 | model_path: ./data/pretrained/256x256_diffusion.pt
55 | name: test_inet256_thick
56 | inpa_inj_sched_prev: true
57 | n_jobs: 25
58 | print_estimated_vars: true
59 | inpa_inj_sched_prev_cumnoise: false
60 | schedule_jump_params:
61 | t_T: 250
62 | n_sample: 1
63 | jump_length: 10
64 | jump_n_sample: 10
65 | data:
66 | eval:
67 | lama_inet256_thick_n100_test:
68 | mask_loader: true
69 | gt_path: ./data/datasets/gts/inet256
70 | mask_path: ./data/datasets/gt_keep_masks/thick
71 | image_size: 256
72 | class_cond: false
73 | deterministic: true
74 | random_crop: false
75 | random_flip: false
76 | return_dict: true
77 | drop_last: false
78 | batch_size: 4
79 | return_dataloader: true
80 | ds_conf:
81 | name: random_thick_256
82 | max_len: 100
83 | paths:
84 | srs: ./log/test_inet256_thick/inpainted
85 | lrs: ./log/test_inet256_thick/gt_masked
86 | gts: ./log/test_inet256_thick/gt
87 | gt_keep_masks: ./log/test_inet256_thick/gt_keep_mask
88 |
--------------------------------------------------------------------------------
/confs/test_inet256_genhalf.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | attention_resolutions: 32,16,8
18 | class_cond: true
19 | diffusion_steps: 1000
20 | learn_sigma: true
21 | noise_schedule: linear
22 | num_channels: 256
23 | num_head_channels: 64
24 | num_heads: 4
25 | num_res_blocks: 2
26 | resblock_updown: true
27 | use_fp16: true
28 | use_scale_shift_norm: true
29 | classifier_scale: 1.0
30 | lr_kernel_n_std: 2
31 | num_samples: 100
32 | show_progress: true
33 | timestep_respacing: '250'
34 | use_kl: false
35 | predict_xstart: false
36 | rescale_timesteps: false
37 | rescale_learned_sigmas: false
38 | classifier_use_fp16: false
39 | classifier_width: 128
40 | classifier_depth: 2
41 | classifier_attention_resolutions: 32,16,8
42 | classifier_use_scale_shift_norm: true
43 | classifier_resblock_updown: true
44 | classifier_pool: attention
45 | num_heads_upsample: -1
46 | channel_mult: ''
47 | dropout: 0.0
48 | use_checkpoint: false
49 | use_new_attention_order: false
50 | clip_denoised: true
51 | use_ddim: false
52 | image_size: 256
53 | classifier_path: ./data/pretrained/256x256_classifier.pt
54 | model_path: ./data/pretrained/256x256_diffusion.pt
55 | name: test_inet256_genhalf
56 | inpa_inj_sched_prev: true
57 | n_jobs: 25
58 | print_estimated_vars: true
59 | inpa_inj_sched_prev_cumnoise: false
60 | schedule_jump_params:
61 | t_T: 250
62 | n_sample: 1
63 | jump_length: 10
64 | jump_n_sample: 10
65 | data:
66 | eval:
67 | lama_inet256_genhalf_n100_test:
68 | mask_loader: true
69 | gt_path: ./data/datasets/gts/inet256
70 | mask_path: ./data/datasets/gt_keep_masks/genhalf
71 | image_size: 256
72 | class_cond: false
73 | deterministic: true
74 | random_crop: false
75 | random_flip: false
76 | return_dict: true
77 | drop_last: false
78 | batch_size: 4
79 | return_dataloader: true
80 | ds_conf:
81 | name: random_genhalf_256
82 | max_len: 100
83 | paths:
84 | srs: ./log/test_inet256_genhalf/inpainted
85 | lrs: ./log/test_inet256_genhalf/gt_masked
86 | gts: ./log/test_inet256_genhalf/gt
87 | gt_keep_masks: ./log/test_inet256_genhalf/gt_keep_mask
88 |
--------------------------------------------------------------------------------
/conf_mgt/conf_base.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | from functools import lru_cache
18 | import os
19 | import torch
20 | from utils import imwrite
21 |
22 | from collections import defaultdict
23 | from os.path import isfile, expanduser
24 |
25 | def to_file_ext(img_names, ext):
26 | img_names_out = []
27 | for img_name in img_names:
28 | splits = img_name.split('.')
29 | if not len(splits) == 2:
30 | raise RuntimeError("File name needs exactly one '.':", img_name)
31 | img_names_out.append(splits[0] + '.' + ext)
32 |
33 | return img_names_out
34 |
35 | def write_images(imgs, img_names, dir_path):
36 | os.makedirs(dir_path, exist_ok=True)
37 |
38 | for image_name, image in zip(img_names, imgs):
39 | out_path = os.path.join(dir_path, image_name)
40 | imwrite(img=image, path=out_path)
41 |
42 |
43 |
44 | class NoneDict(defaultdict):
45 | def __init__(self):
46 | super().__init__(self.return_None)
47 |
48 | @staticmethod
49 | def return_None():
50 | return None
51 |
52 | def __getattr__(self, attr):
53 | return self.get(attr)
54 |
55 |
56 | class Default_Conf(NoneDict):
57 | def __init__(self):
58 | pass
59 |
60 | def get_dataloader(self, dset='train', dsName=None, batch_size=None, return_dataset=False):
61 |
62 | if batch_size is None:
63 | batch_size = self.batch_size
64 |
65 | candidates = self['data'][dset]
66 | ds_conf = candidates[dsName].copy()
67 |
68 | if ds_conf.get('mask_loader', False):
69 | from guided_diffusion.image_datasets import load_data_inpa
70 | return load_data_inpa(**ds_conf, conf=self)
71 | else:
72 | raise NotImplementedError()
73 |
74 | def get_debug_variance_path(self):
75 | return os.path.expanduser(os.path.join(self.get_default_eval_conf()['paths']['root'], 'debug/debug_variance'))
76 |
77 | @ staticmethod
78 | def device():
79 | return 'cuda' if torch.cuda.is_available() else 'cpu'
80 |
81 | def eval_imswrite(self, srs=None, img_names=None, dset=None, name=None, ext='png', lrs=None, gts=None, gt_keep_masks=None, verify_same=True):
82 | img_names = to_file_ext(img_names, ext)
83 |
84 | if dset is None:
85 | dset = self.get_default_eval_name()
86 |
87 | max_len = self['data'][dset][name].get('max_len')
88 |
89 | if srs is not None:
90 | sr_dir_path = expanduser(self['data'][dset][name]['paths']['srs'])
91 | write_images(srs, img_names, sr_dir_path)
92 |
93 | if gt_keep_masks is not None:
94 | mask_dir_path = expanduser(
95 | self['data'][dset][name]['paths']['gt_keep_masks'])
96 | write_images(gt_keep_masks, img_names, mask_dir_path)
97 |
98 | gts_path = self['data'][dset][name]['paths'].get('gts')
99 | if gts is not None and gts_path:
100 | gt_dir_path = expanduser(gts_path)
101 | write_images(gts, img_names, gt_dir_path)
102 |
103 | if lrs is not None:
104 | lrs_dir_path = expanduser(
105 | self['data'][dset][name]['paths']['lrs'])
106 | write_images(lrs, img_names, lrs_dir_path)
107 |
108 | def get_default_eval_name(self):
109 | candidates = self['data']['eval'].keys()
110 | if len(candidates) != 1:
111 | raise RuntimeError(
112 | f"Need exactly one candidate for {self.name}: {candidates}")
113 | return list(candidates)[0]
114 |
115 | def pget(self, name, default=None):
116 | if '.' in name:
117 | names = name.split('.')
118 | else:
119 | names = [name]
120 |
121 | sub_dict = self
122 | for name in names:
123 | sub_dict = sub_dict.get(name, default)
124 |
125 | if sub_dict == None:
126 | return default
127 |
128 | return sub_dict
129 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | """
18 | Like image_sample.py, but use a noisy image classifier to guide the sampling
19 | process towards more realistic images.
20 | """
21 |
22 | import os
23 | import argparse
24 | import torch as th
25 | import torch.nn.functional as F
26 | import time
27 | import conf_mgt
28 | from utils import yamlread
29 | from guided_diffusion import dist_util
30 |
31 | # Workaround
32 | try:
33 | import ctypes
34 | libgcc_s = ctypes.CDLL('libgcc_s.so.1')
35 | except:
36 | pass
37 |
38 |
39 | from guided_diffusion.script_util import (
40 | NUM_CLASSES,
41 | model_and_diffusion_defaults,
42 | classifier_defaults,
43 | create_model_and_diffusion,
44 | create_classifier,
45 | select_args,
46 | ) # noqa: E402
47 |
48 | def toU8(sample):
49 | if sample is None:
50 | return sample
51 |
52 | sample = ((sample + 1) * 127.5).clamp(0, 255).to(th.uint8)
53 | sample = sample.permute(0, 2, 3, 1)
54 | sample = sample.contiguous()
55 | sample = sample.detach().cpu().numpy()
56 | return sample
57 |
58 |
59 | def main(conf: conf_mgt.Default_Conf):
60 |
61 | print("Start", conf['name'])
62 |
63 | device = dist_util.dev(conf.get('device'))
64 |
65 |
66 | model, diffusion = create_model_and_diffusion(
67 | **select_args(conf, model_and_diffusion_defaults().keys()), conf=conf
68 | )
69 | model.load_state_dict(
70 | dist_util.load_state_dict(os.path.expanduser(
71 | conf.model_path), map_location="cpu")
72 | )
73 | model.to(device)
74 | if conf.use_fp16:
75 | model.convert_to_fp16()
76 | model.eval()
77 |
78 | show_progress = conf.show_progress
79 |
80 | if conf.classifier_scale > 0 and conf.classifier_path:
81 | print("loading classifier...")
82 | classifier = create_classifier(
83 | **select_args(conf, classifier_defaults().keys()))
84 | classifier.load_state_dict(
85 | dist_util.load_state_dict(os.path.expanduser(
86 | conf.classifier_path), map_location="cpu")
87 | )
88 |
89 | classifier.to(device)
90 | if conf.classifier_use_fp16:
91 | classifier.convert_to_fp16()
92 | classifier.eval()
93 |
94 | def cond_fn(x, t, y=None, gt=None, **kwargs):
95 | assert y is not None
96 | with th.enable_grad():
97 | x_in = x.detach().requires_grad_(True)
98 | logits = classifier(x_in, t)
99 | log_probs = F.log_softmax(logits, dim=-1)
100 | selected = log_probs[range(len(logits)), y.view(-1)]
101 | return th.autograd.grad(selected.sum(), x_in)[0] * conf.classifier_scale
102 | else:
103 | cond_fn = None
104 |
105 | def model_fn(x, t, y=None, gt=None, **kwargs):
106 | assert y is not None
107 | return model(x, t, y if conf.class_cond else None, gt=gt)
108 |
109 | print("sampling...")
110 | all_images = []
111 |
112 | dset = 'eval'
113 |
114 | eval_name = conf.get_default_eval_name()
115 |
116 | dl = conf.get_dataloader(dset=dset, dsName=eval_name)
117 |
118 | for batch in iter(dl):
119 |
120 | for k in batch.keys():
121 | if isinstance(batch[k], th.Tensor):
122 | batch[k] = batch[k].to(device)
123 |
124 | model_kwargs = {}
125 |
126 | model_kwargs["gt"] = batch['GT']
127 |
128 | gt_keep_mask = batch.get('gt_keep_mask')
129 | if gt_keep_mask is not None:
130 | model_kwargs['gt_keep_mask'] = gt_keep_mask
131 |
132 | batch_size = model_kwargs["gt"].shape[0]
133 |
134 | if conf.cond_y is not None:
135 | classes = th.ones(batch_size, dtype=th.long, device=device)
136 | model_kwargs["y"] = classes * conf.cond_y
137 | else:
138 | classes = th.randint(
139 | low=0, high=NUM_CLASSES, size=(batch_size,), device=device
140 | )
141 | model_kwargs["y"] = classes
142 |
143 | sample_fn = (
144 | diffusion.p_sample_loop if not conf.use_ddim else diffusion.ddim_sample_loop
145 | )
146 |
147 |
148 | result = sample_fn(
149 | model_fn,
150 | (batch_size, 3, conf.image_size, conf.image_size),
151 | clip_denoised=conf.clip_denoised,
152 | model_kwargs=model_kwargs,
153 | cond_fn=cond_fn,
154 | device=device,
155 | progress=show_progress,
156 | return_all=True,
157 | conf=conf
158 | )
159 | srs = toU8(result['sample'])
160 | gts = toU8(result['gt'])
161 | lrs = toU8(result.get('gt') * model_kwargs.get('gt_keep_mask') + (-1) *
162 | th.ones_like(result.get('gt')) * (1 - model_kwargs.get('gt_keep_mask')))
163 |
164 | gt_keep_masks = toU8((model_kwargs.get('gt_keep_mask') * 2 - 1))
165 |
166 | conf.eval_imswrite(
167 | srs=srs, gts=gts, lrs=lrs, gt_keep_masks=gt_keep_masks,
168 | img_names=batch['GT_name'], dset=dset, name=eval_name, verify_same=False)
169 |
170 | print("sampling complete")
171 |
172 |
173 | if __name__ == "__main__":
174 | parser = argparse.ArgumentParser()
175 | parser.add_argument('--conf_path', type=str, required=False, default=None)
176 | args = vars(parser.parse_args())
177 |
178 | conf_arg = conf_mgt.conf_base.Default_Conf()
179 | conf_arg.update(yamlread(args.get('conf_path')))
180 | main(conf_arg)
181 |
--------------------------------------------------------------------------------
/guided_diffusion/nn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | """
18 | Various utilities for neural networks.
19 | """
20 |
21 | import math
22 |
23 | import torch as th
24 | import torch.nn as nn
25 |
26 |
27 | # PyTorch 1.7 has SiLU, but we support PyTorch 1.5.
28 | class SiLU(nn.Module):
29 | def forward(self, x):
30 | return x * th.sigmoid(x)
31 |
32 |
33 | class GroupNorm32(nn.GroupNorm):
34 | def forward(self, x):
35 | return super().forward(x.float()).type(x.dtype)
36 |
37 |
38 | def conv_nd(dims, *args, **kwargs):
39 | """
40 | Create a 1D, 2D, or 3D convolution module.
41 | """
42 | if dims == 1:
43 | return nn.Conv1d(*args, **kwargs)
44 | elif dims == 2:
45 | return nn.Conv2d(*args, **kwargs)
46 | elif dims == 3:
47 | return nn.Conv3d(*args, **kwargs)
48 | raise ValueError(f"unsupported dimensions: {dims}")
49 |
50 |
51 | def linear(*args, **kwargs):
52 | """
53 | Create a linear module.
54 | """
55 | return nn.Linear(*args, **kwargs)
56 |
57 |
58 | def avg_pool_nd(dims, *args, **kwargs):
59 | """
60 | Create a 1D, 2D, or 3D average pooling module.
61 | """
62 | if dims == 1:
63 | return nn.AvgPool1d(*args, **kwargs)
64 | elif dims == 2:
65 | return nn.AvgPool2d(*args, **kwargs)
66 | elif dims == 3:
67 | return nn.AvgPool3d(*args, **kwargs)
68 | raise ValueError(f"unsupported dimensions: {dims}")
69 |
70 |
71 | def update_ema(target_params, source_params, rate=0.99):
72 | """
73 | Update target parameters to be closer to those of source parameters using
74 | an exponential moving average.
75 |
76 | :param target_params: the target parameter sequence.
77 | :param source_params: the source parameter sequence.
78 | :param rate: the EMA rate (closer to 1 means slower).
79 | """
80 | for targ, src in zip(target_params, source_params):
81 | targ.detach().mul_(rate).add_(src, alpha=1 - rate)
82 |
83 |
84 | def zero_module(module):
85 | """
86 | Zero out the parameters of a module and return it.
87 | """
88 | for p in module.parameters():
89 | p.detach().zero_()
90 | return module
91 |
92 |
93 | def scale_module(module, scale):
94 | """
95 | Scale the parameters of a module and return it.
96 | """
97 | for p in module.parameters():
98 | p.detach().mul_(scale)
99 | return module
100 |
101 |
102 | def mean_flat(tensor):
103 | """
104 | Take the mean over all non-batch dimensions.
105 | """
106 | return tensor.mean(dim=list(range(1, len(tensor.shape))))
107 |
108 |
109 | def normalization(channels):
110 | """
111 | Make a standard normalization layer.
112 |
113 | :param channels: number of input channels.
114 | :return: an nn.Module for normalization.
115 | """
116 | return GroupNorm32(32, channels)
117 |
118 |
119 | def timestep_embedding(timesteps, dim, max_period=10000):
120 | """
121 | Create sinusoidal timestep embeddings.
122 |
123 | :param timesteps: a 1-D Tensor of N indices, one per batch element.
124 | These may be fractional.
125 | :param dim: the dimension of the output.
126 | :param max_period: controls the minimum frequency of the embeddings.
127 | :return: an [N x dim] Tensor of positional embeddings.
128 | """
129 | half = dim // 2
130 | freqs = th.exp(
131 | -math.log(max_period) * th.arange(start=0, end=half, dtype=th.float32) / half
132 | ).to(device=timesteps.device)
133 | args = timesteps[:, None].float() * freqs[None]
134 | embedding = th.cat([th.cos(args), th.sin(args)], dim=-1)
135 | if dim % 2:
136 | embedding = th.cat([embedding, th.zeros_like(embedding[:, :1])], dim=-1)
137 | return embedding
138 |
139 |
140 | def checkpoint(func, inputs, params, flag):
141 | """
142 | Evaluate a function without caching intermediate activations, allowing for
143 | reduced memory at the expense of extra compute in the backward pass.
144 |
145 | :param func: the function to evaluate.
146 | :param inputs: the argument sequence to pass to `func`.
147 | :param params: a sequence of parameters `func` depends on but does not
148 | explicitly take as arguments.
149 | :param flag: if False, disable gradient checkpointing.
150 | """
151 | if flag:
152 | args = tuple(inputs) + tuple(params)
153 | return CheckpointFunction.apply(func, len(inputs), *args)
154 | else:
155 | return func(*inputs)
156 |
157 |
158 | class CheckpointFunction(th.autograd.Function):
159 | @staticmethod
160 | def forward(ctx, run_function, length, *args):
161 | ctx.run_function = run_function
162 | ctx.input_tensors = list(args[:length])
163 | ctx.input_params = list(args[length:])
164 | with th.no_grad():
165 | output_tensors = ctx.run_function(*ctx.input_tensors)
166 | return output_tensors
167 |
168 | @staticmethod
169 | def backward(ctx, *output_grads):
170 | ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
171 | with th.enable_grad():
172 | # Fixes a bug where the first op in run_function modifies the
173 | # Tensor storage in place, which is not allowed for detach()'d
174 | # Tensors.
175 | shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
176 | output_tensors = ctx.run_function(*shallow_copies)
177 | input_grads = th.autograd.grad(
178 | output_tensors,
179 | ctx.input_tensors + ctx.input_params,
180 | output_grads,
181 | allow_unused=True,
182 | )
183 | del ctx.input_tensors
184 | del ctx.input_params
185 | del output_tensors
186 | return (None, None) + input_grads
187 |
--------------------------------------------------------------------------------
/guided_diffusion/scheduler.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | def get_schedule(t_T, t_0, n_sample, n_steplength, debug=0):
18 | if n_steplength > 1:
19 | if not n_sample > 1:
20 | raise RuntimeError('n_steplength has no effect if n_sample=1')
21 |
22 | t = t_T
23 | times = [t]
24 | while t >= 0:
25 | t = t - 1
26 | times.append(t)
27 | n_steplength_cur = min(n_steplength, t_T - t)
28 |
29 | for _ in range(n_sample - 1):
30 |
31 | for _ in range(n_steplength_cur):
32 | t = t + 1
33 | times.append(t)
34 | for _ in range(n_steplength_cur):
35 | t = t - 1
36 | times.append(t)
37 |
38 | _check_times(times, t_0, t_T)
39 |
40 | if debug == 2:
41 | for x in [list(range(0, 50)), list(range(-1, -50, -1))]:
42 | _plot_times(x=x, times=[times[i] for i in x])
43 |
44 | return times
45 |
46 |
47 | def _check_times(times, t_0, t_T):
48 | # Check end
49 | assert times[0] > times[1], (times[0], times[1])
50 |
51 | # Check beginning
52 | assert times[-1] == -1, times[-1]
53 |
54 | # Steplength = 1
55 | for t_last, t_cur in zip(times[:-1], times[1:]):
56 | assert abs(t_last - t_cur) == 1, (t_last, t_cur)
57 |
58 | # Value range
59 | for t in times:
60 | assert t >= t_0, (t, t_0)
61 | assert t <= t_T, (t, t_T)
62 |
63 |
64 | def _plot_times(x, times):
65 | import matplotlib.pyplot as plt
66 | plt.plot(x, times)
67 | plt.show()
68 |
69 |
70 | def get_schedule_jump(t_T, n_sample, jump_length, jump_n_sample,
71 | jump2_length=1, jump2_n_sample=1,
72 | jump3_length=1, jump3_n_sample=1,
73 | start_resampling=100000000):
74 |
75 | jumps = {}
76 | for j in range(0, t_T - jump_length, jump_length):
77 | jumps[j] = jump_n_sample - 1
78 |
79 | jumps2 = {}
80 | for j in range(0, t_T - jump2_length, jump2_length):
81 | jumps2[j] = jump2_n_sample - 1
82 |
83 | jumps3 = {}
84 | for j in range(0, t_T - jump3_length, jump3_length):
85 | jumps3[j] = jump3_n_sample - 1
86 |
87 | t = t_T
88 | ts = []
89 |
90 | while t >= 1:
91 | t = t-1
92 | ts.append(t)
93 |
94 | if (
95 | t + 1 < t_T - 1 and
96 | t <= start_resampling
97 | ):
98 | for _ in range(n_sample - 1):
99 | t = t + 1
100 | ts.append(t)
101 |
102 | if t >= 0:
103 | t = t - 1
104 | ts.append(t)
105 |
106 | if (
107 | jumps3.get(t, 0) > 0 and
108 | t <= start_resampling - jump3_length
109 | ):
110 | jumps3[t] = jumps3[t] - 1
111 | for _ in range(jump3_length):
112 | t = t + 1
113 | ts.append(t)
114 |
115 | if (
116 | jumps2.get(t, 0) > 0 and
117 | t <= start_resampling - jump2_length
118 | ):
119 | jumps2[t] = jumps2[t] - 1
120 | for _ in range(jump2_length):
121 | t = t + 1
122 | ts.append(t)
123 | jumps3 = {}
124 | for j in range(0, t_T - jump3_length, jump3_length):
125 | jumps3[j] = jump3_n_sample - 1
126 |
127 | if (
128 | jumps.get(t, 0) > 0 and
129 | t <= start_resampling - jump_length
130 | ):
131 | jumps[t] = jumps[t] - 1
132 | for _ in range(jump_length):
133 | t = t + 1
134 | ts.append(t)
135 | jumps2 = {}
136 | for j in range(0, t_T - jump2_length, jump2_length):
137 | jumps2[j] = jump2_n_sample - 1
138 |
139 | jumps3 = {}
140 | for j in range(0, t_T - jump3_length, jump3_length):
141 | jumps3[j] = jump3_n_sample - 1
142 |
143 | ts.append(-1)
144 |
145 | _check_times(ts, -1, t_T)
146 |
147 | return ts
148 |
149 |
150 | def get_schedule_jump_paper():
151 | t_T = 250
152 | jump_length = 10
153 | jump_n_sample = 10
154 |
155 | jumps = {}
156 | for j in range(0, t_T - jump_length, jump_length):
157 | jumps[j] = jump_n_sample - 1
158 |
159 | t = t_T
160 | ts = []
161 |
162 | while t >= 1:
163 | t = t-1
164 | ts.append(t)
165 |
166 | if jumps.get(t, 0) > 0:
167 | jumps[t] = jumps[t] - 1
168 | for _ in range(jump_length):
169 | t = t + 1
170 | ts.append(t)
171 |
172 | ts.append(-1)
173 |
174 | _check_times(ts, -1, t_T)
175 |
176 | return ts
177 |
178 |
179 | def get_schedule_jump_test(to_supplement=False):
180 | ts = get_schedule_jump(t_T=250, n_sample=1,
181 | jump_length=10, jump_n_sample=10,
182 | jump2_length=1, jump2_n_sample=1,
183 | jump3_length=1, jump3_n_sample=1,
184 | start_resampling=250)
185 |
186 | import matplotlib.pyplot as plt
187 | SMALL_SIZE = 8*3
188 | MEDIUM_SIZE = 10*3
189 | BIGGER_SIZE = 12*3
190 |
191 | plt.rc('font', size=SMALL_SIZE) # controls default text sizes
192 | plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title
193 | plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
194 | plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels
195 | plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
196 | plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize
197 | plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
198 |
199 | plt.plot(ts)
200 |
201 | fig = plt.gcf()
202 | fig.set_size_inches(20, 10)
203 |
204 | ax = plt.gca()
205 | ax.set_xlabel('Number of Transitions')
206 | ax.set_ylabel('Diffusion time $t$')
207 |
208 | fig.tight_layout()
209 |
210 | if to_supplement:
211 | out_path = "/cluster/home/alugmayr/gdiff/paper/supplement/figures/jump_sched.pdf"
212 | plt.savefig(out_path)
213 |
214 | out_path = "./schedule.png"
215 | plt.savefig(out_path)
216 | print(out_path)
217 |
218 |
219 | def main():
220 | get_schedule_jump_test()
221 |
222 |
223 | if __name__ == "__main__":
224 | main()
225 |
--------------------------------------------------------------------------------
/guided_diffusion/respace.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | import numpy as np
18 | import torch as th
19 |
20 | from .gaussian_diffusion import GaussianDiffusion
21 |
22 |
23 | def space_timesteps(num_timesteps, section_counts):
24 | """
25 | Create a list of timesteps to use from an original diffusion process,
26 | given the number of timesteps we want to take from equally-sized portions
27 | of the original process.
28 |
29 | For example, if there's 300 timesteps and the section counts are [10,15,20]
30 | then the first 100 timesteps are strided to be 10 timesteps, the second 100
31 | are strided to be 15 timesteps, and the final 100 are strided to be 20.
32 |
33 | If the stride is a string starting with "ddim", then the fixed striding
34 | from the DDIM paper is used, and only one section is allowed.
35 |
36 | :param num_timesteps: the number of diffusion steps in the original
37 | process to divide up.
38 | :param section_counts: either a list of numbers, or a string containing
39 | comma-separated numbers, indicating the step count
40 | per section. As a special case, use "ddimN" where N
41 | is a number of steps to use the striding from the
42 | DDIM paper.
43 | :return: a set of diffusion steps from the original process to use.
44 | """
45 | if isinstance(section_counts, str):
46 | if section_counts.startswith("ddim"):
47 | desired_count = int(section_counts[len("ddim"):])
48 | for i in range(1, num_timesteps):
49 | if len(range(0, num_timesteps, i)) == desired_count:
50 | return set(range(0, num_timesteps, i))
51 | section_counts = [int(x) for x in section_counts.split(",")]
52 | if isinstance(section_counts, int):
53 | section_counts = [section_counts]
54 | size_per = num_timesteps // len(section_counts)
55 | extra = num_timesteps % len(section_counts)
56 | start_idx = 0
57 | all_steps = []
58 |
59 | if len(section_counts) == 1 and section_counts[0] > num_timesteps:
60 | return set(np.linspace(start=0, stop=num_timesteps, num=section_counts[0]))
61 |
62 | for i, section_count in enumerate(section_counts):
63 | size = size_per + (1 if i < extra else 0)
64 | if size < section_count:
65 | raise ValueError(
66 | f"cannot divide section of {size} steps into {section_count}"
67 | )
68 | if section_count <= 1:
69 | frac_stride = 1
70 | else:
71 | frac_stride = (size - 1) / (section_count - 1)
72 | cur_idx = 0.0
73 | taken_steps = []
74 | for _ in range(section_count):
75 | taken_steps.append(start_idx + round(cur_idx))
76 | cur_idx += frac_stride
77 | all_steps += taken_steps
78 | start_idx += size
79 | return set(all_steps)
80 |
81 |
82 | class SpacedDiffusion(GaussianDiffusion):
83 | """
84 | A diffusion process which can skip steps in a base diffusion process.
85 |
86 | :param use_timesteps: a collection (sequence or set) of timesteps from the
87 | original diffusion process to retain.
88 | :param kwargs: the kwargs to create the base diffusion process.
89 | """
90 |
91 | def __init__(self, use_timesteps, conf=None, **kwargs):
92 | self.use_timesteps = set(use_timesteps)
93 | self.original_num_steps = len(kwargs["betas"])
94 | self.conf = conf
95 |
96 | base_diffusion = GaussianDiffusion(conf=conf,
97 | **kwargs) # pylint: disable=missing-kwoa
98 |
99 | if conf.respace_interpolate:
100 | new_betas = resample_betas(
101 | kwargs["betas"], int(conf.timestep_respacing))
102 | self.timestep_map = list(range(len(new_betas)))
103 | else:
104 | self.timestep_map = []
105 | new_betas = []
106 | last_alpha_cumprod = 1.0
107 | for i, alpha_cumprod in enumerate(base_diffusion.alphas_cumprod):
108 | if i in self.use_timesteps:
109 | new_betas.append(1 - alpha_cumprod / last_alpha_cumprod)
110 | last_alpha_cumprod = alpha_cumprod
111 | self.timestep_map.append(i)
112 |
113 | kwargs["betas"] = np.array(new_betas)
114 |
115 | if conf.use_value_logger:
116 | conf.value_logger.add_value(
117 | new_betas, 'new_betas SpacedDiffusion')
118 |
119 | super().__init__(conf=conf, **kwargs)
120 |
121 | def p_mean_variance(
122 | self, model, *args, **kwargs
123 | ): # pylint: disable=signature-differs
124 | return super().p_mean_variance(self._wrap_model(model), *args, **kwargs)
125 |
126 | def training_losses(
127 | self, model, *args, **kwargs
128 | ): # pylint: disable=signature-differs
129 | return super().training_losses(self._wrap_model(model), *args, **kwargs)
130 |
131 | def condition_mean(self, cond_fn, *args, **kwargs):
132 | return super().condition_mean(self._wrap_model(cond_fn), *args, **kwargs)
133 |
134 | def condition_score(self, cond_fn, *args, **kwargs):
135 | return super().condition_score(self._wrap_model(cond_fn), *args, **kwargs)
136 |
137 | def _wrap_model(self, model):
138 | if isinstance(model, _WrappedModel):
139 | return model
140 | return _WrappedModel(
141 | model, self.timestep_map, self.rescale_timesteps,
142 | self.original_num_steps, self.conf
143 | )
144 |
145 | def _scale_timesteps(self, t):
146 | # Scaling is done by the wrapped model.
147 | return t
148 |
149 |
150 | class _WrappedModel:
151 | def __init__(self, model, timestep_map, rescale_timesteps, original_num_steps, conf):
152 | self.model = model
153 | self.timestep_map = timestep_map
154 | self.rescale_timesteps = rescale_timesteps
155 | self.original_num_steps = original_num_steps
156 | self.conf = conf
157 |
158 | def __call__(self, x, ts, **kwargs):
159 | map_tensor = th.tensor( # pylint: disable=not-callable
160 | self.timestep_map, device=ts.device, dtype=ts.dtype)
161 | new_ts = map_tensor[ts]
162 | if self.rescale_timesteps:
163 | raise NotImplementedError()
164 | #new_ts = self.do_rescale_timesteps(new_ts)
165 |
166 | if self.conf.respace_interpolate:
167 | new_ts = new_ts.float() * (
168 | (self.conf.diffusion_steps - 1) / (float(self.conf.timestep_respacing) - 1.0))
169 |
170 | return self.model(x, new_ts, **kwargs)
171 |
172 | def do_rescale_timesteps(self, new_ts):
173 | new_ts = new_ts.float() * (1000.0 / self.original_num_steps)
174 | return new_ts
175 |
--------------------------------------------------------------------------------
/guided_diffusion/image_datasets.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | import random
18 | import os
19 |
20 | from PIL import Image
21 | import blobfile as bf
22 | import numpy as np
23 | from torch.utils.data import DataLoader, Dataset
24 |
25 | def load_data_yield(loader):
26 | while True:
27 | yield from loader
28 |
29 | def load_data_inpa(
30 | *,
31 | gt_path=None,
32 | mask_path=None,
33 | batch_size,
34 | image_size,
35 | class_cond=False,
36 | deterministic=False,
37 | random_crop=False,
38 | random_flip=True,
39 | return_dataloader=False,
40 | return_dict=False,
41 | max_len=None,
42 | drop_last=True,
43 | conf=None,
44 | offset=0,
45 | ** kwargs
46 | ):
47 | """
48 | For a dataset, create a generator over (images, kwargs) pairs.
49 |
50 | Each images is an NCHW float tensor, and the kwargs dict contains zero or
51 | more keys, each of which map to a batched Tensor of their own.
52 | The kwargs dict can be used for class labels, in which case the key is "y"
53 | and the values are integer tensors of class labels.
54 |
55 | :param data_dir: a dataset directory.
56 | :param batch_size: the batch size of each returned pair.
57 | :param image_size: the size to which images are resized.
58 | :param class_cond: if True, include a "y" key in returned dicts for class
59 | label. If classes are not available and this is true, an
60 | exception will be raised.
61 | :param deterministic: if True, yield results in a deterministic order.
62 | :param random_crop: if True, randomly crop the images for augmentation.
63 | :param random_flip: if True, randomly flip the images for augmentation.
64 | """
65 |
66 | gt_dir = os.path.expanduser(gt_path)
67 | mask_dir = os.path.expanduser(mask_path)
68 |
69 | gt_paths = _list_image_files_recursively(gt_dir)
70 | mask_paths = _list_image_files_recursively(mask_dir)
71 |
72 | assert len(gt_paths) == len(mask_paths)
73 |
74 | classes = None
75 | if class_cond:
76 | raise NotImplementedError()
77 |
78 | dataset = ImageDatasetInpa(
79 | image_size,
80 | gt_paths=gt_paths,
81 | mask_paths=mask_paths,
82 | classes=classes,
83 | shard=0,
84 | num_shards=1,
85 | random_crop=random_crop,
86 | random_flip=random_flip,
87 | return_dict=return_dict,
88 | max_len=max_len,
89 | conf=conf,
90 | offset=offset
91 | )
92 |
93 | if deterministic:
94 | loader = DataLoader(
95 | dataset, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=drop_last
96 | )
97 |
98 | else:
99 | loader = DataLoader(
100 | dataset, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=drop_last
101 | )
102 |
103 | if return_dataloader:
104 | return loader
105 | else:
106 | return load_data_yield(loader)
107 |
108 |
109 | def _list_image_files_recursively(data_dir):
110 | results = []
111 | for entry in sorted(bf.listdir(data_dir)):
112 | full_path = bf.join(data_dir, entry)
113 | ext = entry.split(".")[-1]
114 | if "." in entry and ext.lower() in ["jpg", "jpeg", "png", "gif"]:
115 | results.append(full_path)
116 | elif bf.isdir(full_path):
117 | results.extend(_list_image_files_recursively(full_path))
118 | return results
119 |
120 |
121 | class ImageDatasetInpa(Dataset):
122 | def __init__(
123 | self,
124 | resolution,
125 | gt_paths,
126 | mask_paths,
127 | classes=None,
128 | shard=0,
129 | num_shards=1,
130 | random_crop=False,
131 | random_flip=True,
132 | return_dict=False,
133 | max_len=None,
134 | conf=None,
135 | offset=0
136 | ):
137 | super().__init__()
138 | self.resolution = resolution
139 |
140 | gt_paths = sorted(gt_paths)[offset:]
141 | mask_paths = sorted(mask_paths)[offset:]
142 |
143 | self.local_gts = gt_paths[shard:][::num_shards]
144 | self.local_masks = mask_paths[shard:][::num_shards]
145 |
146 | self.local_classes = None if classes is None else classes[shard:][::num_shards]
147 |
148 | self.random_crop = random_crop
149 | self.random_flip = random_flip
150 | self.return_dict = return_dict
151 | self.max_len = max_len
152 |
153 | def __len__(self):
154 | if self.max_len is not None:
155 | return self.max_len
156 |
157 | return len(self.local_gts)
158 |
159 | def __getitem__(self, idx):
160 | gt_path = self.local_gts[idx]
161 | pil_gt = self.imread(gt_path)
162 |
163 | mask_path = self.local_masks[idx]
164 | pil_mask = self.imread(mask_path)
165 |
166 | if self.random_crop:
167 | raise NotImplementedError()
168 | else:
169 | arr_gt = center_crop_arr(pil_gt, self.resolution)
170 | arr_mask = center_crop_arr(pil_mask, self.resolution)
171 |
172 | if self.random_flip and random.random() < 0.5:
173 | arr_gt = arr_gt[:, ::-1]
174 | arr_mask = arr_mask[:, ::-1]
175 |
176 | arr_gt = arr_gt.astype(np.float32) / 127.5 - 1
177 | arr_mask = arr_mask.astype(np.float32) / 255.0
178 |
179 | out_dict = {}
180 | if self.local_classes is not None:
181 | out_dict["y"] = np.array(self.local_classes[idx], dtype=np.int64)
182 |
183 | if self.return_dict:
184 | name = os.path.basename(gt_path)
185 | return {
186 | 'GT': np.transpose(arr_gt, [2, 0, 1]),
187 | 'GT_name': name,
188 | 'gt_keep_mask': np.transpose(arr_mask, [2, 0, 1]),
189 | }
190 | else:
191 | raise NotImplementedError()
192 |
193 | def imread(self, path):
194 | with bf.BlobFile(path, "rb") as f:
195 | pil_image = Image.open(f)
196 | pil_image.load()
197 | pil_image = pil_image.convert("RGB")
198 | return pil_image
199 |
200 |
201 | def center_crop_arr(pil_image, image_size):
202 | # We are not on a new enough PIL to support the `reducing_gap`
203 | # argument, which uses BOX downsampling at powers of two first.
204 | # Thus, we do it by hand to improve downsample quality.
205 | while min(*pil_image.size) >= 2 * image_size:
206 | pil_image = pil_image.resize(
207 | tuple(x // 2 for x in pil_image.size), resample=Image.BOX
208 | )
209 |
210 | scale = image_size / min(*pil_image.size)
211 | pil_image = pil_image.resize(
212 | tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
213 | )
214 |
215 | arr = np.array(pil_image)
216 | crop_y = (arr.shape[0] - image_size) // 2
217 | crop_x = (arr.shape[1] - image_size) // 2
218 | return arr[crop_y: crop_y + image_size, crop_x: crop_x + image_size]
219 |
--------------------------------------------------------------------------------
/guided_diffusion/fp16_util.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | """
18 | Helpers to train with 16-bit precision.
19 | """
20 |
21 | import numpy as np
22 | import torch as th
23 | import torch.nn as nn
24 | from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
25 |
26 |
27 | INITIAL_LOG_LOSS_SCALE = 20.0
28 |
29 |
30 | def convert_module_to_f16(l):
31 | """
32 | Convert primitive modules to float16.
33 | """
34 | if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
35 | l.weight.data = l.weight.data.half()
36 | if l.bias is not None:
37 | l.bias.data = l.bias.data.half()
38 |
39 |
40 | def convert_module_to_f32(l):
41 | """
42 | Convert primitive modules to float32, undoing convert_module_to_f16().
43 | """
44 | if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
45 | l.weight.data = l.weight.data.float()
46 | if l.bias is not None:
47 | l.bias.data = l.bias.data.float()
48 |
49 |
50 | def make_master_params(param_groups_and_shapes):
51 | """
52 | Copy model parameters into a (differently-shaped) list of full-precision
53 | parameters.
54 | """
55 | master_params = []
56 | for param_group, shape in param_groups_and_shapes:
57 | master_param = nn.Parameter(
58 | _flatten_dense_tensors(
59 | [param.detach().float() for (_, param) in param_group]
60 | ).view(shape)
61 | )
62 | master_param.requires_grad = True
63 | master_params.append(master_param)
64 | return master_params
65 |
66 |
67 | def model_grads_to_master_grads(param_groups_and_shapes, master_params):
68 | """
69 | Copy the gradients from the model parameters into the master parameters
70 | from make_master_params().
71 | """
72 | for master_param, (param_group, shape) in zip(
73 | master_params, param_groups_and_shapes
74 | ):
75 | master_param.grad = _flatten_dense_tensors(
76 | [param_grad_or_zeros(param) for (_, param) in param_group]
77 | ).view(shape)
78 |
79 |
80 | def master_params_to_model_params(param_groups_and_shapes, master_params):
81 | """
82 | Copy the master parameter data back into the model parameters.
83 | """
84 | # Without copying to a list, if a generator is passed, this will
85 | # silently not copy any parameters.
86 | for master_param, (param_group, _) in zip(master_params, param_groups_and_shapes):
87 | for (_, param), unflat_master_param in zip(
88 | param_group, unflatten_master_params(param_group, master_param.view(-1))
89 | ):
90 | param.detach().copy_(unflat_master_param)
91 |
92 |
93 | def unflatten_master_params(param_group, master_param):
94 | return _unflatten_dense_tensors(master_param, [param for (_, param) in param_group])
95 |
96 |
97 | def get_param_groups_and_shapes(named_model_params):
98 | named_model_params = list(named_model_params)
99 | scalar_vector_named_params = (
100 | [(n, p) for (n, p) in named_model_params if p.ndim <= 1],
101 | (-1),
102 | )
103 | matrix_named_params = (
104 | [(n, p) for (n, p) in named_model_params if p.ndim > 1],
105 | (1, -1),
106 | )
107 | return [scalar_vector_named_params, matrix_named_params]
108 |
109 |
110 | def master_params_to_state_dict(
111 | model, param_groups_and_shapes, master_params, use_fp16
112 | ):
113 | if use_fp16:
114 | state_dict = model.state_dict()
115 | for master_param, (param_group, _) in zip(
116 | master_params, param_groups_and_shapes
117 | ):
118 | for (name, _), unflat_master_param in zip(
119 | param_group, unflatten_master_params(param_group, master_param.view(-1))
120 | ):
121 | assert name in state_dict
122 | state_dict[name] = unflat_master_param
123 | else:
124 | state_dict = model.state_dict()
125 | for i, (name, _value) in enumerate(model.named_parameters()):
126 | assert name in state_dict
127 | state_dict[name] = master_params[i]
128 | return state_dict
129 |
130 |
131 | def state_dict_to_master_params(model, state_dict, use_fp16):
132 | if use_fp16:
133 | named_model_params = [
134 | (name, state_dict[name]) for name, _ in model.named_parameters()
135 | ]
136 | param_groups_and_shapes = get_param_groups_and_shapes(named_model_params)
137 | master_params = make_master_params(param_groups_and_shapes)
138 | else:
139 | master_params = [state_dict[name] for name, _ in model.named_parameters()]
140 | return master_params
141 |
142 |
143 | def zero_master_grads(master_params):
144 | for param in master_params:
145 | param.grad = None
146 |
147 |
148 | def zero_grad(model_params):
149 | for param in model_params:
150 | # Taken from https://pytorch.org/docs/stable/_modules/torch/optim/optimizer.html#Optimizer.add_param_group
151 | if param.grad is not None:
152 | param.grad.detach_()
153 | param.grad.zero_()
154 |
155 |
156 | def param_grad_or_zeros(param):
157 | if param.grad is not None:
158 | return param.grad.data.detach()
159 | else:
160 | return th.zeros_like(param)
161 |
162 |
163 | class MixedPrecisionTrainer:
164 | def __init__(
165 | self,
166 | *,
167 | model,
168 | use_fp16=False,
169 | fp16_scale_growth=1e-3,
170 | initial_lg_loss_scale=INITIAL_LOG_LOSS_SCALE,
171 | ):
172 | self.model = model
173 | self.use_fp16 = use_fp16
174 | self.fp16_scale_growth = fp16_scale_growth
175 |
176 | self.model_params = list(self.model.parameters())
177 | self.master_params = self.model_params
178 | self.param_groups_and_shapes = None
179 | self.lg_loss_scale = initial_lg_loss_scale
180 |
181 | if self.use_fp16:
182 | self.param_groups_and_shapes = get_param_groups_and_shapes(
183 | self.model.named_parameters()
184 | )
185 | self.master_params = make_master_params(self.param_groups_and_shapes)
186 | self.model.convert_to_fp16()
187 |
188 | def zero_grad(self):
189 | zero_grad(self.model_params)
190 |
191 | def backward(self, loss: th.Tensor):
192 | if self.use_fp16:
193 | loss_scale = 2 ** self.lg_loss_scale
194 | (loss * loss_scale).backward()
195 | else:
196 | loss.backward()
197 |
198 | def optimize(self, opt: th.optim.Optimizer):
199 | if self.use_fp16:
200 | return self._optimize_fp16(opt)
201 | else:
202 | return self._optimize_normal(opt)
203 |
204 | def _optimize_fp16(self, opt: th.optim.Optimizer):
205 | model_grads_to_master_grads(self.param_groups_and_shapes, self.master_params)
206 | grad_norm, param_norm = self._compute_norms(grad_scale=2 ** self.lg_loss_scale)
207 | if check_overflow(grad_norm):
208 | self.lg_loss_scale -= 1
209 | zero_master_grads(self.master_params)
210 | return False
211 |
212 | for p in self.master_params:
213 | p.grad.mul_(1.0 / (2 ** self.lg_loss_scale))
214 | opt.step()
215 | zero_master_grads(self.master_params)
216 | master_params_to_model_params(self.param_groups_and_shapes, self.master_params)
217 | self.lg_loss_scale += self.fp16_scale_growth
218 | return True
219 |
220 | def _optimize_normal(self, opt: th.optim.Optimizer):
221 | grad_norm, param_norm = self._compute_norms()
222 | opt.step()
223 | return True
224 |
225 | def _compute_norms(self, grad_scale=1.0):
226 | grad_norm = 0.0
227 | param_norm = 0.0
228 | for p in self.master_params:
229 | with th.no_grad():
230 | param_norm += th.norm(p, p=2, dtype=th.float32).item() ** 2
231 | if p.grad is not None:
232 | grad_norm += th.norm(p.grad, p=2, dtype=th.float32).item() ** 2
233 | return np.sqrt(grad_norm) / grad_scale, np.sqrt(param_norm)
234 |
235 | def master_params_to_state_dict(self, master_params):
236 | return master_params_to_state_dict(
237 | self.model, self.param_groups_and_shapes, master_params, self.use_fp16
238 | )
239 |
240 | def state_dict_to_master_params(self, state_dict):
241 | return state_dict_to_master_params(self.model, state_dict, self.use_fp16)
242 |
243 |
244 | def check_overflow(value):
245 | return (value == float("inf")) or (value == -float("inf")) or (value != value)
--------------------------------------------------------------------------------
/guided_diffusion/script_util.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 Huawei Technologies Co., Ltd.
2 | # Licensed under CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike 4.0 International) (the "License");
3 | # you may not use this file except in compliance with the License.
4 | # You may obtain a copy of the License at
5 | #
6 | # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
7 | #
8 | # The code is released for academic research use only. For commercial use, please contact Huawei Technologies Co., Ltd.
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # This repository was forked from https://github.com/openai/guided-diffusion, which is under the MIT license
16 |
17 | import argparse
18 | import inspect
19 |
20 | from . import gaussian_diffusion as gd
21 | from .respace import SpacedDiffusion, space_timesteps
22 | from .unet import SuperResModel, UNetModel, EncoderUNetModel
23 |
24 | NUM_CLASSES = 1000
25 |
26 |
27 | def diffusion_defaults():
28 | """
29 | Defaults for image and classifier training.
30 | """
31 | return dict(
32 | learn_sigma=False,
33 | diffusion_steps=1000,
34 | noise_schedule="linear",
35 | timestep_respacing="",
36 | use_kl=False,
37 | predict_xstart=False,
38 | rescale_timesteps=False,
39 | rescale_learned_sigmas=False,
40 | )
41 |
42 |
43 | def classifier_defaults():
44 | """
45 | Defaults for classifier models.
46 | """
47 | return dict(
48 | image_size=64,
49 | classifier_use_fp16=False,
50 | classifier_width=128,
51 | classifier_depth=2,
52 | classifier_attention_resolutions="32,16,8",
53 | classifier_use_scale_shift_norm=True,
54 | classifier_resblock_updown=True,
55 | classifier_pool="attention",
56 | )
57 |
58 |
59 | def model_and_diffusion_defaults():
60 | """
61 | Defaults for image training.
62 | """
63 | res = dict(
64 | image_size=64,
65 | num_channels=128,
66 | num_res_blocks=2,
67 | num_heads=4,
68 | num_heads_upsample=-1,
69 | num_head_channels=-1,
70 | attention_resolutions="16,8",
71 | channel_mult="",
72 | dropout=0.0,
73 | class_cond=False,
74 | use_checkpoint=False,
75 | use_scale_shift_norm=True,
76 | resblock_updown=False,
77 | use_fp16=False,
78 | use_new_attention_order=False,
79 | )
80 | res.update(diffusion_defaults())
81 | return res
82 |
83 |
84 | def classifier_and_diffusion_defaults():
85 | res = classifier_defaults()
86 | res.update(diffusion_defaults())
87 | return res
88 |
89 |
90 | def create_model_and_diffusion(
91 | image_size,
92 | class_cond,
93 | learn_sigma,
94 | num_channels,
95 | num_res_blocks,
96 | channel_mult,
97 | num_heads,
98 | num_head_channels,
99 | num_heads_upsample,
100 | attention_resolutions,
101 | dropout,
102 | diffusion_steps,
103 | noise_schedule,
104 | timestep_respacing,
105 | use_kl,
106 | predict_xstart,
107 | rescale_timesteps,
108 | rescale_learned_sigmas,
109 | use_checkpoint,
110 | use_scale_shift_norm,
111 | resblock_updown,
112 | use_fp16,
113 | use_new_attention_order,
114 | conf=None
115 | ):
116 | model = create_model(
117 | image_size,
118 | num_channels,
119 | num_res_blocks,
120 | channel_mult=channel_mult,
121 | learn_sigma=learn_sigma,
122 | class_cond=class_cond,
123 | use_checkpoint=use_checkpoint,
124 | attention_resolutions=attention_resolutions,
125 | num_heads=num_heads,
126 | num_head_channels=num_head_channels,
127 | num_heads_upsample=num_heads_upsample,
128 | use_scale_shift_norm=use_scale_shift_norm,
129 | dropout=dropout,
130 | resblock_updown=resblock_updown,
131 | use_fp16=use_fp16,
132 | use_new_attention_order=use_new_attention_order,
133 | conf=conf
134 | )
135 | diffusion = create_gaussian_diffusion(
136 | steps=diffusion_steps,
137 | learn_sigma=learn_sigma,
138 | noise_schedule=noise_schedule,
139 | use_kl=use_kl,
140 | predict_xstart=predict_xstart,
141 | rescale_timesteps=rescale_timesteps,
142 | rescale_learned_sigmas=rescale_learned_sigmas,
143 | timestep_respacing=timestep_respacing,
144 | conf=conf
145 | )
146 | return model, diffusion
147 |
148 |
149 | def create_model(
150 | image_size,
151 | num_channels,
152 | num_res_blocks,
153 | channel_mult="",
154 | learn_sigma=False,
155 | class_cond=False,
156 | use_checkpoint=False,
157 | attention_resolutions="16",
158 | num_heads=1,
159 | num_head_channels=-1,
160 | num_heads_upsample=-1,
161 | use_scale_shift_norm=False,
162 | dropout=0,
163 | resblock_updown=False,
164 | use_fp16=False,
165 | use_new_attention_order=False,
166 | image_size_inference=None,
167 | conf=None
168 | ):
169 | if channel_mult == "":
170 | if image_size == 512:
171 | channel_mult = (0.5, 1, 1, 2, 2, 4, 4)
172 | elif image_size == 256:
173 | channel_mult = (1, 1, 2, 2, 4, 4)
174 | elif image_size == 128:
175 | channel_mult = (1, 1, 2, 3, 4)
176 | elif image_size == 64:
177 | channel_mult = (1, 2, 3, 4)
178 | else:
179 | raise ValueError(f"unsupported image size: {image_size}")
180 | elif isinstance(channel_mult, tuple):
181 | pass
182 | else:
183 | channel_mult = tuple(int(ch_mult)
184 | for ch_mult in channel_mult.split(","))
185 |
186 | attention_ds = []
187 | for res in attention_resolutions.split(","):
188 | attention_ds.append(image_size // int(res))
189 |
190 | image_size_inference = image_size_inference or image_size
191 |
192 | return UNetModel(
193 | image_size=image_size,
194 | in_channels=3,
195 | model_channels=num_channels,
196 | out_channels=(3 if not learn_sigma else 6),
197 | num_res_blocks=num_res_blocks,
198 | attention_resolutions=tuple(attention_ds),
199 | dropout=dropout,
200 | channel_mult=channel_mult,
201 | num_classes=(NUM_CLASSES if class_cond else None),
202 | use_checkpoint=use_checkpoint,
203 | use_fp16=use_fp16,
204 | num_heads=num_heads,
205 | num_head_channels=num_head_channels,
206 | num_heads_upsample=num_heads_upsample,
207 | use_scale_shift_norm=use_scale_shift_norm,
208 | resblock_updown=resblock_updown,
209 | use_new_attention_order=use_new_attention_order,
210 | conf=conf
211 | )
212 |
213 |
214 | def create_classifier(
215 | image_size,
216 | classifier_use_fp16,
217 | classifier_width,
218 | classifier_depth,
219 | classifier_attention_resolutions,
220 | classifier_use_scale_shift_norm,
221 | classifier_resblock_updown,
222 | classifier_pool,
223 | image_size_inference=None
224 | ):
225 | if image_size == 512:
226 | channel_mult = (0.5, 1, 1, 2, 2, 4, 4)
227 | elif image_size == 256:
228 | channel_mult = (1, 1, 2, 2, 4, 4)
229 | elif image_size == 128:
230 | channel_mult = (1, 1, 2, 3, 4)
231 | elif image_size == 64:
232 | channel_mult = (1, 2, 3, 4)
233 | else:
234 | raise ValueError(f"unsupported image size: {image_size}")
235 |
236 | attention_ds = []
237 | for res in classifier_attention_resolutions.split(","):
238 | attention_ds.append(image_size // int(res))
239 |
240 | image_size_inference = image_size_inference or image_size
241 |
242 | return EncoderUNetModel(
243 | image_size=image_size_inference,
244 | in_channels=3,
245 | model_channels=classifier_width,
246 | out_channels=1000,
247 | num_res_blocks=classifier_depth,
248 | attention_resolutions=tuple(attention_ds),
249 | channel_mult=channel_mult,
250 | use_fp16=classifier_use_fp16,
251 | num_head_channels=64,
252 | use_scale_shift_norm=classifier_use_scale_shift_norm,
253 | resblock_updown=classifier_resblock_updown,
254 | pool=classifier_pool,
255 | )
256 |
257 |
258 | def create_gaussian_diffusion(
259 | *,
260 | steps=1000,
261 | learn_sigma=False,
262 | sigma_small=False,
263 | noise_schedule="linear",
264 | use_kl=False,
265 | predict_xstart=False,
266 | rescale_timesteps=False,
267 | rescale_learned_sigmas=False,
268 | timestep_respacing="",
269 | conf=None
270 | ):
271 |
272 | betas = gd.get_named_beta_schedule(noise_schedule, steps, use_scale=True)
273 |
274 | if conf.use_value_logger:
275 | conf.value_logger.add_value(
276 | betas, 'betas create_gaussian_diffusion')
277 |
278 | if use_kl:
279 | loss_type = gd.LossType.RESCALED_KL
280 | elif rescale_learned_sigmas:
281 | loss_type = gd.LossType.RESCALED_MSE
282 | else:
283 | loss_type = gd.LossType.MSE
284 |
285 | if not timestep_respacing:
286 | timestep_respacing = [steps]
287 |
288 | return SpacedDiffusion(
289 | use_timesteps=space_timesteps(steps, timestep_respacing),
290 | betas=betas,
291 | model_mean_type=(
292 | gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X
293 | ),
294 | model_var_type=(
295 | (
296 | gd.ModelVarType.FIXED_LARGE
297 | if not sigma_small
298 | else gd.ModelVarType.FIXED_SMALL
299 | )
300 | if not learn_sigma
301 | else gd.ModelVarType.LEARNED_RANGE
302 | ),
303 | loss_type=loss_type,
304 | rescale_timesteps=rescale_timesteps,
305 | conf=conf
306 | )
307 |
308 | def select_args(args_dict, keys):
309 | return {k: args_dict[k] for k in keys}
310 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RePaint
2 | **Inpainting using Denoising Diffusion Probabilistic Models**
3 |
4 |
5 | CVPR 2022 [[Paper]](https://bit.ly/3b1ABEb)
6 |
7 | [](#)
8 |
9 | ## Setup
10 |
11 | ### 1. Code
12 |
13 | ```bash
14 | git clone https://github.com/andreas128/RePaint.git
15 | ```
16 |
17 | ### 2. Environment
18 | ```bash
19 | pip install numpy torch blobfile tqdm pyYaml pillow # e.g. torch 1.7.1+cu110.
20 | ```
21 |
22 | ### 3. Download models and data
23 |
24 | ```bash
25 | pip install --upgrade gdown && bash ./download.sh
26 | ```
27 |
28 | That downloads the models for ImageNet, CelebA-HQ, and Places2, as well as the face example and example masks.
29 |
30 |
31 | ### 4. Run example
32 | ```bash
33 | python test.py --conf_path confs/face_example.yml
34 | ```
35 | Find the output in `./log/face_example/inpainted`
36 |
37 | *Note: After refactoring the code, we did not reevaluate all experiments.*
38 |
39 |
40 |
41 | # RePaint fills a missing image part using diffusion models
42 |
43 |
![]() |
46 | ![]() |
47 |
170 |
171 |
178 |
179 |
196 |
197 |
198 |