├── diffusion-insgen ├── torch_utils │ ├── __init__.py │ ├── ops │ │ ├── __init__.py │ │ ├── bias_act.h │ │ ├── fma.py │ │ ├── upfirdn2d.h │ │ ├── grid_sample_gradfix.py │ │ ├── bias_act.cpp │ │ └── upfirdn2d.cpp │ └── custom_ops.py ├── docs │ ├── assets │ │ ├── contrad.jpeg │ │ ├── diffaug.jpeg │ │ ├── framework.jpg │ │ ├── genforce.png │ │ ├── limitations.jpg │ │ ├── main_results.jpg │ │ ├── stylegan2-ada-teaser-1024x252.png │ │ ├── font.css │ │ └── style.css │ └── index.html ├── environment.yml ├── metrics │ ├── __init__.py │ ├── inception_score.py │ ├── frechet_inception_distance.py │ ├── kernel_inception_distance.py │ ├── precision_recall.py │ ├── perceptual_path_length.py │ └── metric_main.py ├── training │ ├── __init__.py │ └── diffaug.py ├── dnnlib │ └── __init__.py ├── style_mixing.py └── generate.py ├── diffusion-stylegan2 ├── torch_utils │ ├── __init__.py │ ├── ops │ │ ├── __init__.py │ │ ├── bias_act.h │ │ ├── fma.py │ │ ├── upfirdn2d.h │ │ ├── grid_sample_gradfix.py │ │ ├── bias_act.cpp │ │ └── upfirdn2d.cpp │ └── custom_ops.py ├── environment.yml ├── metrics │ ├── __init__.py │ ├── inception_score.py │ ├── frechet_inception_distance.py │ ├── kernel_inception_distance.py │ ├── precision_recall.py │ ├── perceptual_path_length.py │ └── metric_main.py ├── training │ ├── __init__.py │ └── diffaug.py ├── dnnlib │ └── __init__.py ├── style_mixing.py └── generate.py ├── docs └── diffusion-gan.png ├── diffusion-projected-gan ├── torch_utils │ ├── __init__.py │ ├── ops │ │ ├── __init__.py │ │ ├── bias_act.h │ │ ├── filtered_lrelu_rd.cu │ │ ├── filtered_lrelu_wr.cu │ │ ├── filtered_lrelu_ns.cu │ │ ├── upfirdn2d.h │ │ ├── fma.py │ │ ├── grid_sample_gradfix.py │ │ ├── filtered_lrelu.h │ │ ├── bias_act.cpp │ │ └── upfirdn2d.cpp │ └── utils_spectrum.py ├── dnnlib │ └── __init__.py ├── metrics │ ├── inception_score.py │ ├── frechet_inception_distance.py │ ├── kernel_inception_distance.py │ ├── precision_recall.py │ ├── perceptual_path_length.py │ └── metric_main.py ├── pg_modules │ ├── diffaug.py │ └── diffusion.py ├── training │ └── loss.py └── environment.yml ├── LICENSE └── .gitignore /diffusion-insgen/torch_utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | # empty 3 | -------------------------------------------------------------------------------- /diffusion-insgen/torch_utils/ops/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | # empty 3 | -------------------------------------------------------------------------------- /diffusion-stylegan2/torch_utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | # empty 3 | -------------------------------------------------------------------------------- /diffusion-stylegan2/torch_utils/ops/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | # empty 3 | -------------------------------------------------------------------------------- /docs/diffusion-gan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhendong-Wang/Diffusion-GAN/HEAD/docs/diffusion-gan.png -------------------------------------------------------------------------------- /diffusion-insgen/docs/assets/contrad.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhendong-Wang/Diffusion-GAN/HEAD/diffusion-insgen/docs/assets/contrad.jpeg -------------------------------------------------------------------------------- /diffusion-insgen/docs/assets/diffaug.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhendong-Wang/Diffusion-GAN/HEAD/diffusion-insgen/docs/assets/diffaug.jpeg -------------------------------------------------------------------------------- /diffusion-insgen/docs/assets/framework.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhendong-Wang/Diffusion-GAN/HEAD/diffusion-insgen/docs/assets/framework.jpg -------------------------------------------------------------------------------- /diffusion-insgen/docs/assets/genforce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhendong-Wang/Diffusion-GAN/HEAD/diffusion-insgen/docs/assets/genforce.png -------------------------------------------------------------------------------- /diffusion-insgen/docs/assets/limitations.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhendong-Wang/Diffusion-GAN/HEAD/diffusion-insgen/docs/assets/limitations.jpg -------------------------------------------------------------------------------- /diffusion-insgen/docs/assets/main_results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhendong-Wang/Diffusion-GAN/HEAD/diffusion-insgen/docs/assets/main_results.jpg -------------------------------------------------------------------------------- /diffusion-insgen/docs/assets/stylegan2-ada-teaser-1024x252.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhendong-Wang/Diffusion-GAN/HEAD/diffusion-insgen/docs/assets/stylegan2-ada-teaser-1024x252.png -------------------------------------------------------------------------------- /diffusion-insgen/environment.yml: -------------------------------------------------------------------------------- 1 | name: difgan 2 | channels: 3 | - pytorch 4 | - nvidia 5 | dependencies: 6 | - python==3.8 # package build failures on 3.10 7 | - pip 8 | - numpy>=1.20 9 | - click>=8.0 10 | - pillow>=8.3.1 11 | - scipy>=1.7.1 12 | - pytorch=1.8.1 13 | - psutil 14 | - requests 15 | - tqdm 16 | - imageio 17 | - ninja 18 | - pip: 19 | - imageio-ffmpeg>=0.4.3 20 | - pyspng 21 | 22 | 23 | -------------------------------------------------------------------------------- /diffusion-stylegan2/environment.yml: -------------------------------------------------------------------------------- 1 | name: difgan 2 | channels: 3 | - pytorch 4 | - nvidia 5 | dependencies: 6 | - python>=3.8, < 3.10 # package build failures on 3.10 7 | - pip 8 | - numpy>=1.20 9 | - click>=8.0 10 | - pillow>=8.3.1 11 | - scipy>=1.7.1 12 | - pytorch=1.12.1 13 | - psutil 14 | - requests 15 | - tqdm 16 | - imageio 17 | - ninja 18 | - pip: 19 | - imageio-ffmpeg>=0.4.3 20 | - pyspng 21 | 22 | 23 | -------------------------------------------------------------------------------- /diffusion-insgen/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # empty 10 | -------------------------------------------------------------------------------- /diffusion-insgen/training/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # empty 10 | -------------------------------------------------------------------------------- /diffusion-stylegan2/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # empty 10 | -------------------------------------------------------------------------------- /diffusion-stylegan2/training/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # empty 10 | -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # empty 10 | -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # empty 10 | -------------------------------------------------------------------------------- /diffusion-insgen/dnnlib/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from .util import EasyDict, make_cache_dir_path 10 | -------------------------------------------------------------------------------- /diffusion-stylegan2/dnnlib/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from .util import EasyDict, make_cache_dir_path 10 | -------------------------------------------------------------------------------- /diffusion-projected-gan/dnnlib/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from .util import EasyDict, make_cache_dir_path 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Zhendong Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /diffusion-insgen/torch_utils/ops/bias_act.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | //------------------------------------------------------------------------ 10 | // CUDA kernel parameters. 11 | 12 | struct bias_act_kernel_params 13 | { 14 | const void* x; // [sizeX] 15 | const void* b; // [sizeB] or NULL 16 | const void* xref; // [sizeX] or NULL 17 | const void* yref; // [sizeX] or NULL 18 | const void* dy; // [sizeX] or NULL 19 | void* y; // [sizeX] 20 | 21 | int grad; 22 | int act; 23 | float alpha; 24 | float gain; 25 | float clamp; 26 | 27 | int sizeX; 28 | int sizeB; 29 | int stepB; 30 | int loopX; 31 | }; 32 | 33 | //------------------------------------------------------------------------ 34 | // CUDA kernel selection. 35 | 36 | template void* choose_bias_act_kernel(const bias_act_kernel_params& p); 37 | 38 | //------------------------------------------------------------------------ 39 | -------------------------------------------------------------------------------- /diffusion-stylegan2/torch_utils/ops/bias_act.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | //------------------------------------------------------------------------ 10 | // CUDA kernel parameters. 11 | 12 | struct bias_act_kernel_params 13 | { 14 | const void* x; // [sizeX] 15 | const void* b; // [sizeB] or NULL 16 | const void* xref; // [sizeX] or NULL 17 | const void* yref; // [sizeX] or NULL 18 | const void* dy; // [sizeX] or NULL 19 | void* y; // [sizeX] 20 | 21 | int grad; 22 | int act; 23 | float alpha; 24 | float gain; 25 | float clamp; 26 | 27 | int sizeX; 28 | int sizeB; 29 | int stepB; 30 | int loopX; 31 | }; 32 | 33 | //------------------------------------------------------------------------ 34 | // CUDA kernel selection. 35 | 36 | template void* choose_bias_act_kernel(const bias_act_kernel_params& p); 37 | 38 | //------------------------------------------------------------------------ 39 | -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/ops/bias_act.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | //------------------------------------------------------------------------ 10 | // CUDA kernel parameters. 11 | 12 | struct bias_act_kernel_params 13 | { 14 | const void* x; // [sizeX] 15 | const void* b; // [sizeB] or NULL 16 | const void* xref; // [sizeX] or NULL 17 | const void* yref; // [sizeX] or NULL 18 | const void* dy; // [sizeX] or NULL 19 | void* y; // [sizeX] 20 | 21 | int grad; 22 | int act; 23 | float alpha; 24 | float gain; 25 | float clamp; 26 | 27 | int sizeX; 28 | int sizeB; 29 | int stepB; 30 | int loopX; 31 | }; 32 | 33 | //------------------------------------------------------------------------ 34 | // CUDA kernel selection. 35 | 36 | template void* choose_bias_act_kernel(const bias_act_kernel_params& p); 37 | 38 | //------------------------------------------------------------------------ 39 | -------------------------------------------------------------------------------- /diffusion-insgen/docs/assets/font.css: -------------------------------------------------------------------------------- 1 | /* Homepage Font */ 2 | 3 | /* latin-ext */ 4 | @font-face { 5 | font-family: 'Lato'; 6 | font-style: normal; 7 | font-weight: 400; 8 | src: local('Lato Regular'), local('Lato-Regular'), url(https://fonts.gstatic.com/s/lato/v16/S6uyw4BMUTPHjxAwXjeu.woff2) format('woff2'); 9 | unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; 10 | } 11 | 12 | /* latin */ 13 | @font-face { 14 | font-family: 'Lato'; 15 | font-style: normal; 16 | font-weight: 400; 17 | src: local('Lato Regular'), local('Lato-Regular'), url(https://fonts.gstatic.com/s/lato/v16/S6uyw4BMUTPHjx4wXg.woff2) format('woff2'); 18 | unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; 19 | } 20 | 21 | /* latin-ext */ 22 | @font-face { 23 | font-family: 'Lato'; 24 | font-style: normal; 25 | font-weight: 700; 26 | src: local('Lato Bold'), local('Lato-Bold'), url(https://fonts.gstatic.com/s/lato/v16/S6u9w4BMUTPHh6UVSwaPGR_p.woff2) format('woff2'); 27 | unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF; 28 | } 29 | 30 | /* latin */ 31 | @font-face { 32 | font-family: 'Lato'; 33 | font-style: normal; 34 | font-weight: 700; 35 | src: local('Lato Bold'), local('Lato-Bold'), url(https://fonts.gstatic.com/s/lato/v16/S6u9w4BMUTPHh6UVSwiPGQ.woff2) format('woff2'); 36 | unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; 37 | } 38 | -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/ops/filtered_lrelu_rd.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "filtered_lrelu.cu" 10 | 11 | // Template/kernel specializations for sign read mode. 12 | 13 | // Full op, 32-bit indexing. 14 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 15 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 16 | 17 | // Full op, 64-bit indexing. 18 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 19 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 20 | 21 | // Activation/signs only for generic variant. 64-bit indexing. 22 | template void* choose_filtered_lrelu_act_kernel(void); 23 | template void* choose_filtered_lrelu_act_kernel(void); 24 | template void* choose_filtered_lrelu_act_kernel(void); 25 | 26 | // Copy filters to constant memory. 27 | template cudaError_t copy_filters(cudaStream_t stream); 28 | -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/ops/filtered_lrelu_wr.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "filtered_lrelu.cu" 10 | 11 | // Template/kernel specializations for sign write mode. 12 | 13 | // Full op, 32-bit indexing. 14 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 15 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 16 | 17 | // Full op, 64-bit indexing. 18 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 19 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 20 | 21 | // Activation/signs only for generic variant. 64-bit indexing. 22 | template void* choose_filtered_lrelu_act_kernel(void); 23 | template void* choose_filtered_lrelu_act_kernel(void); 24 | template void* choose_filtered_lrelu_act_kernel(void); 25 | 26 | // Copy filters to constant memory. 27 | template cudaError_t copy_filters(cudaStream_t stream); 28 | -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/ops/filtered_lrelu_ns.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include "filtered_lrelu.cu" 10 | 11 | // Template/kernel specializations for no signs mode (no gradients required). 12 | 13 | // Full op, 32-bit indexing. 14 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 15 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 16 | 17 | // Full op, 64-bit indexing. 18 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 19 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 20 | 21 | // Activation/signs only for generic variant. 64-bit indexing. 22 | template void* choose_filtered_lrelu_act_kernel(void); 23 | template void* choose_filtered_lrelu_act_kernel(void); 24 | template void* choose_filtered_lrelu_act_kernel(void); 25 | 26 | // Copy filters to constant memory. 27 | template cudaError_t copy_filters(cudaStream_t stream); 28 | -------------------------------------------------------------------------------- /diffusion-insgen/torch_utils/ops/fma.py: -------------------------------------------------------------------------------- 1 | 2 | """Fused multiply-add, with slightly faster gradients than `torch.addcmul()`.""" 3 | 4 | import torch 5 | 6 | #---------------------------------------------------------------------------- 7 | 8 | def fma(a, b, c): # => a * b + c 9 | return _FusedMultiplyAdd.apply(a, b, c) 10 | 11 | #---------------------------------------------------------------------------- 12 | 13 | class _FusedMultiplyAdd(torch.autograd.Function): # a * b + c 14 | @staticmethod 15 | def forward(ctx, a, b, c): # pylint: disable=arguments-differ 16 | out = torch.addcmul(c, a, b) 17 | ctx.save_for_backward(a, b) 18 | ctx.c_shape = c.shape 19 | return out 20 | 21 | @staticmethod 22 | def backward(ctx, dout): # pylint: disable=arguments-differ 23 | a, b = ctx.saved_tensors 24 | c_shape = ctx.c_shape 25 | da = None 26 | db = None 27 | dc = None 28 | 29 | if ctx.needs_input_grad[0]: 30 | da = _unbroadcast(dout * b, a.shape) 31 | 32 | if ctx.needs_input_grad[1]: 33 | db = _unbroadcast(dout * a, b.shape) 34 | 35 | if ctx.needs_input_grad[2]: 36 | dc = _unbroadcast(dout, c_shape) 37 | 38 | return da, db, dc 39 | 40 | #---------------------------------------------------------------------------- 41 | 42 | def _unbroadcast(x, shape): 43 | extra_dims = x.ndim - len(shape) 44 | assert extra_dims >= 0 45 | dim = [i for i in range(x.ndim) if x.shape[i] > 1 and (i < extra_dims or shape[i - extra_dims] == 1)] 46 | if len(dim): 47 | x = x.sum(dim=dim, keepdim=True) 48 | if extra_dims: 49 | x = x.reshape(-1, *x.shape[extra_dims+1:]) 50 | assert x.shape == shape 51 | return x 52 | 53 | #---------------------------------------------------------------------------- 54 | -------------------------------------------------------------------------------- /diffusion-stylegan2/torch_utils/ops/fma.py: -------------------------------------------------------------------------------- 1 | 2 | """Fused multiply-add, with slightly faster gradients than `torch.addcmul()`.""" 3 | 4 | import torch 5 | 6 | #---------------------------------------------------------------------------- 7 | 8 | def fma(a, b, c): # => a * b + c 9 | return _FusedMultiplyAdd.apply(a, b, c) 10 | 11 | #---------------------------------------------------------------------------- 12 | 13 | class _FusedMultiplyAdd(torch.autograd.Function): # a * b + c 14 | @staticmethod 15 | def forward(ctx, a, b, c): # pylint: disable=arguments-differ 16 | out = torch.addcmul(c, a, b) 17 | ctx.save_for_backward(a, b) 18 | ctx.c_shape = c.shape 19 | return out 20 | 21 | @staticmethod 22 | def backward(ctx, dout): # pylint: disable=arguments-differ 23 | a, b = ctx.saved_tensors 24 | c_shape = ctx.c_shape 25 | da = None 26 | db = None 27 | dc = None 28 | 29 | if ctx.needs_input_grad[0]: 30 | da = _unbroadcast(dout * b, a.shape) 31 | 32 | if ctx.needs_input_grad[1]: 33 | db = _unbroadcast(dout * a, b.shape) 34 | 35 | if ctx.needs_input_grad[2]: 36 | dc = _unbroadcast(dout, c_shape) 37 | 38 | return da, db, dc 39 | 40 | #---------------------------------------------------------------------------- 41 | 42 | def _unbroadcast(x, shape): 43 | extra_dims = x.ndim - len(shape) 44 | assert extra_dims >= 0 45 | dim = [i for i in range(x.ndim) if x.shape[i] > 1 and (i < extra_dims or shape[i - extra_dims] == 1)] 46 | if len(dim): 47 | x = x.sum(dim=dim, keepdim=True) 48 | if extra_dims: 49 | x = x.reshape(-1, *x.shape[extra_dims+1:]) 50 | assert x.shape == shape 51 | return x 52 | 53 | #---------------------------------------------------------------------------- 54 | -------------------------------------------------------------------------------- /diffusion-insgen/metrics/inception_score.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Inception Score (IS) from the paper "Improved techniques for training 10 | GANs". Matches the original implementation by Salimans et al. at 11 | https://github.com/openai/improved-gan/blob/master/inception_score/model.py""" 12 | 13 | import numpy as np 14 | from . import metric_utils 15 | 16 | #---------------------------------------------------------------------------- 17 | 18 | def compute_is(opts, num_gen, num_splits): 19 | # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz 20 | detector_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt' 21 | detector_kwargs = dict(no_output_bias=True) # Match the original implementation by not applying bias in the softmax layer. 22 | 23 | gen_probs = metric_utils.compute_feature_stats_for_generator( 24 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 25 | capture_all=True, max_items=num_gen).get_all() 26 | 27 | if opts.rank != 0: 28 | return float('nan'), float('nan') 29 | 30 | scores = [] 31 | for i in range(num_splits): 32 | part = gen_probs[i * num_gen // num_splits : (i + 1) * num_gen // num_splits] 33 | kl = part * (np.log(part) - np.log(np.mean(part, axis=0, keepdims=True))) 34 | kl = np.mean(np.sum(kl, axis=1)) 35 | scores.append(np.exp(kl)) 36 | return float(np.mean(scores)), float(np.std(scores)) 37 | 38 | #---------------------------------------------------------------------------- 39 | -------------------------------------------------------------------------------- /diffusion-stylegan2/metrics/inception_score.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Inception Score (IS) from the paper "Improved techniques for training 10 | GANs". Matches the original implementation by Salimans et al. at 11 | https://github.com/openai/improved-gan/blob/master/inception_score/model.py""" 12 | 13 | import numpy as np 14 | from . import metric_utils 15 | 16 | #---------------------------------------------------------------------------- 17 | 18 | def compute_is(opts, num_gen, num_splits): 19 | # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz 20 | detector_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt' 21 | detector_kwargs = dict(no_output_bias=True) # Match the original implementation by not applying bias in the softmax layer. 22 | 23 | gen_probs = metric_utils.compute_feature_stats_for_generator( 24 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 25 | capture_all=True, max_items=num_gen).get_all() 26 | 27 | if opts.rank != 0: 28 | return float('nan'), float('nan') 29 | 30 | scores = [] 31 | for i in range(num_splits): 32 | part = gen_probs[i * num_gen // num_splits : (i + 1) * num_gen // num_splits] 33 | kl = part * (np.log(part) - np.log(np.mean(part, axis=0, keepdims=True))) 34 | kl = np.mean(np.sum(kl, axis=1)) 35 | scores.append(np.exp(kl)) 36 | return float(np.mean(scores)), float(np.std(scores)) 37 | 38 | #---------------------------------------------------------------------------- 39 | -------------------------------------------------------------------------------- /diffusion-projected-gan/metrics/inception_score.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Inception Score (IS) from the paper "Improved techniques for training 10 | GANs". Matches the original implementation by Salimans et al. at 11 | https://github.com/openai/improved-gan/blob/master/inception_score/model.py""" 12 | 13 | import numpy as np 14 | from . import metric_utils 15 | 16 | #---------------------------------------------------------------------------- 17 | 18 | def compute_is(opts, num_gen, num_splits): 19 | # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz 20 | detector_url = 'https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/inception-2015-12-05.pkl' 21 | detector_kwargs = dict(no_output_bias=True) # Match the original implementation by not applying bias in the softmax layer. 22 | 23 | gen_probs = metric_utils.compute_feature_stats_for_generator( 24 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 25 | capture_all=True, max_items=num_gen).get_all() 26 | 27 | if opts.rank != 0: 28 | return float('nan'), float('nan') 29 | 30 | scores = [] 31 | for i in range(num_splits): 32 | part = gen_probs[i * num_gen // num_splits : (i + 1) * num_gen // num_splits] 33 | kl = part * (np.log(part) - np.log(np.mean(part, axis=0, keepdims=True))) 34 | kl = np.mean(np.sum(kl, axis=1)) 35 | scores.append(np.exp(kl)) 36 | return float(np.mean(scores)), float(np.std(scores)) 37 | 38 | #---------------------------------------------------------------------------- 39 | -------------------------------------------------------------------------------- /diffusion-insgen/torch_utils/ops/upfirdn2d.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | 11 | //------------------------------------------------------------------------ 12 | // CUDA kernel parameters. 13 | 14 | struct upfirdn2d_kernel_params 15 | { 16 | const void* x; 17 | const float* f; 18 | void* y; 19 | 20 | int2 up; 21 | int2 down; 22 | int2 pad0; 23 | int flip; 24 | float gain; 25 | 26 | int4 inSize; // [width, height, channel, batch] 27 | int4 inStride; 28 | int2 filterSize; // [width, height] 29 | int2 filterStride; 30 | int4 outSize; // [width, height, channel, batch] 31 | int4 outStride; 32 | int sizeMinor; 33 | int sizeMajor; 34 | 35 | int loopMinor; 36 | int loopMajor; 37 | int loopX; 38 | int launchMinor; 39 | int launchMajor; 40 | }; 41 | 42 | //------------------------------------------------------------------------ 43 | // CUDA kernel specialization. 44 | 45 | struct upfirdn2d_kernel_spec 46 | { 47 | void* kernel; 48 | int tileOutW; 49 | int tileOutH; 50 | int loopMinor; 51 | int loopX; 52 | }; 53 | 54 | //------------------------------------------------------------------------ 55 | // CUDA kernel selection. 56 | 57 | template upfirdn2d_kernel_spec choose_upfirdn2d_kernel(const upfirdn2d_kernel_params& p); 58 | 59 | //------------------------------------------------------------------------ 60 | -------------------------------------------------------------------------------- /diffusion-stylegan2/torch_utils/ops/upfirdn2d.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | 11 | //------------------------------------------------------------------------ 12 | // CUDA kernel parameters. 13 | 14 | struct upfirdn2d_kernel_params 15 | { 16 | const void* x; 17 | const float* f; 18 | void* y; 19 | 20 | int2 up; 21 | int2 down; 22 | int2 pad0; 23 | int flip; 24 | float gain; 25 | 26 | int4 inSize; // [width, height, channel, batch] 27 | int4 inStride; 28 | int2 filterSize; // [width, height] 29 | int2 filterStride; 30 | int4 outSize; // [width, height, channel, batch] 31 | int4 outStride; 32 | int sizeMinor; 33 | int sizeMajor; 34 | 35 | int loopMinor; 36 | int loopMajor; 37 | int loopX; 38 | int launchMinor; 39 | int launchMajor; 40 | }; 41 | 42 | //------------------------------------------------------------------------ 43 | // CUDA kernel specialization. 44 | 45 | struct upfirdn2d_kernel_spec 46 | { 47 | void* kernel; 48 | int tileOutW; 49 | int tileOutH; 50 | int loopMinor; 51 | int loopX; 52 | }; 53 | 54 | //------------------------------------------------------------------------ 55 | // CUDA kernel selection. 56 | 57 | template upfirdn2d_kernel_spec choose_upfirdn2d_kernel(const upfirdn2d_kernel_params& p); 58 | 59 | //------------------------------------------------------------------------ 60 | -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/ops/upfirdn2d.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | 11 | //------------------------------------------------------------------------ 12 | // CUDA kernel parameters. 13 | 14 | struct upfirdn2d_kernel_params 15 | { 16 | const void* x; 17 | const float* f; 18 | void* y; 19 | 20 | int2 up; 21 | int2 down; 22 | int2 pad0; 23 | int flip; 24 | float gain; 25 | 26 | int4 inSize; // [width, height, channel, batch] 27 | int4 inStride; 28 | int2 filterSize; // [width, height] 29 | int2 filterStride; 30 | int4 outSize; // [width, height, channel, batch] 31 | int4 outStride; 32 | int sizeMinor; 33 | int sizeMajor; 34 | 35 | int loopMinor; 36 | int loopMajor; 37 | int loopX; 38 | int launchMinor; 39 | int launchMajor; 40 | }; 41 | 42 | //------------------------------------------------------------------------ 43 | // CUDA kernel specialization. 44 | 45 | struct upfirdn2d_kernel_spec 46 | { 47 | void* kernel; 48 | int tileOutW; 49 | int tileOutH; 50 | int loopMinor; 51 | int loopX; 52 | }; 53 | 54 | //------------------------------------------------------------------------ 55 | // CUDA kernel selection. 56 | 57 | template upfirdn2d_kernel_spec choose_upfirdn2d_kernel(const upfirdn2d_kernel_params& p); 58 | 59 | //------------------------------------------------------------------------ 60 | -------------------------------------------------------------------------------- /diffusion-insgen/metrics/frechet_inception_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Frechet Inception Distance (FID) from the paper 10 | "GANs trained by a two time-scale update rule converge to a local Nash 11 | equilibrium". Matches the original implementation by Heusel et al. at 12 | https://github.com/bioinf-jku/TTUR/blob/master/fid.py""" 13 | 14 | import numpy as np 15 | import scipy.linalg 16 | from . import metric_utils 17 | 18 | #---------------------------------------------------------------------------- 19 | 20 | def compute_fid(opts, max_real, num_gen): 21 | # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz 22 | detector_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt' 23 | detector_kwargs = dict(return_features=True) # Return raw features before the softmax layer. 24 | 25 | mu_real, sigma_real = metric_utils.compute_feature_stats_for_dataset( 26 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 27 | rel_lo=0, rel_hi=0, capture_mean_cov=True, max_items=max_real).get_mean_cov() 28 | 29 | mu_gen, sigma_gen = metric_utils.compute_feature_stats_for_generator( 30 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 31 | rel_lo=0, rel_hi=1, capture_mean_cov=True, max_items=num_gen).get_mean_cov() 32 | 33 | if opts.rank != 0: 34 | return float('nan') 35 | 36 | m = np.square(mu_gen - mu_real).sum() 37 | s, _ = scipy.linalg.sqrtm(np.dot(sigma_gen, sigma_real), disp=False) # pylint: disable=no-member 38 | fid = np.real(m + np.trace(sigma_gen + sigma_real - s * 2)) 39 | return float(fid) 40 | 41 | #---------------------------------------------------------------------------- 42 | -------------------------------------------------------------------------------- /diffusion-stylegan2/metrics/frechet_inception_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Frechet Inception Distance (FID) from the paper 10 | "GANs trained by a two time-scale update rule converge to a local Nash 11 | equilibrium". Matches the original implementation by Heusel et al. at 12 | https://github.com/bioinf-jku/TTUR/blob/master/fid.py""" 13 | 14 | import numpy as np 15 | import scipy.linalg 16 | from . import metric_utils 17 | 18 | #---------------------------------------------------------------------------- 19 | 20 | def compute_fid(opts, max_real, num_gen): 21 | # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz 22 | detector_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt' 23 | detector_kwargs = dict(return_features=True) # Return raw features before the softmax layer. 24 | 25 | mu_real, sigma_real = metric_utils.compute_feature_stats_for_dataset( 26 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 27 | rel_lo=0, rel_hi=0, capture_mean_cov=True, max_items=max_real).get_mean_cov() 28 | 29 | mu_gen, sigma_gen = metric_utils.compute_feature_stats_for_generator( 30 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 31 | rel_lo=0, rel_hi=1, capture_mean_cov=True, max_items=num_gen).get_mean_cov() 32 | 33 | if opts.rank != 0: 34 | return float('nan') 35 | 36 | m = np.square(mu_gen - mu_real).sum() 37 | s, _ = scipy.linalg.sqrtm(np.dot(sigma_gen, sigma_real), disp=False) # pylint: disable=no-member 38 | fid = np.real(m + np.trace(sigma_gen + sigma_real - s * 2)) 39 | return float(fid) 40 | 41 | #---------------------------------------------------------------------------- 42 | -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/ops/fma.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Fused multiply-add, with slightly faster gradients than `torch.addcmul()`.""" 10 | 11 | import torch 12 | 13 | #---------------------------------------------------------------------------- 14 | 15 | def fma(a, b, c): # => a * b + c 16 | return _FusedMultiplyAdd.apply(a, b, c) 17 | 18 | #---------------------------------------------------------------------------- 19 | 20 | class _FusedMultiplyAdd(torch.autograd.Function): # a * b + c 21 | @staticmethod 22 | def forward(ctx, a, b, c): # pylint: disable=arguments-differ 23 | out = torch.addcmul(c, a, b) 24 | ctx.save_for_backward(a, b) 25 | ctx.c_shape = c.shape 26 | return out 27 | 28 | @staticmethod 29 | def backward(ctx, dout): # pylint: disable=arguments-differ 30 | a, b = ctx.saved_tensors 31 | c_shape = ctx.c_shape 32 | da = None 33 | db = None 34 | dc = None 35 | 36 | if ctx.needs_input_grad[0]: 37 | da = _unbroadcast(dout * b, a.shape) 38 | 39 | if ctx.needs_input_grad[1]: 40 | db = _unbroadcast(dout * a, b.shape) 41 | 42 | if ctx.needs_input_grad[2]: 43 | dc = _unbroadcast(dout, c_shape) 44 | 45 | return da, db, dc 46 | 47 | #---------------------------------------------------------------------------- 48 | 49 | def _unbroadcast(x, shape): 50 | extra_dims = x.ndim - len(shape) 51 | assert extra_dims >= 0 52 | dim = [i for i in range(x.ndim) if x.shape[i] > 1 and (i < extra_dims or shape[i - extra_dims] == 1)] 53 | if len(dim): 54 | x = x.sum(dim=dim, keepdim=True) 55 | if extra_dims: 56 | x = x.reshape(-1, *x.shape[extra_dims+1:]) 57 | assert x.shape == shape 58 | return x 59 | 60 | #---------------------------------------------------------------------------- 61 | -------------------------------------------------------------------------------- /diffusion-projected-gan/metrics/frechet_inception_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Frechet Inception Distance (FID) from the paper 10 | "GANs trained by a two time-scale update rule converge to a local Nash 11 | equilibrium". Matches the original implementation by Heusel et al. at 12 | https://github.com/bioinf-jku/TTUR/blob/master/fid.py""" 13 | 14 | import numpy as np 15 | import scipy.linalg 16 | from . import metric_utils 17 | 18 | #---------------------------------------------------------------------------- 19 | 20 | def compute_fid(opts, max_real, num_gen, swav=False, sfid=False): 21 | # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz 22 | detector_url = 'https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/inception-2015-12-05.pkl' 23 | detector_kwargs = dict(return_features=True) # Return raw features before the softmax layer. 24 | 25 | mu_real, sigma_real = metric_utils.compute_feature_stats_for_dataset( 26 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 27 | rel_lo=0, rel_hi=0, capture_mean_cov=True, max_items=max_real, swav=swav, sfid=sfid).get_mean_cov() 28 | 29 | mu_gen, sigma_gen = metric_utils.compute_feature_stats_for_generator( 30 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 31 | rel_lo=0, rel_hi=1, capture_mean_cov=True, max_items=num_gen, swav=swav, sfid=sfid).get_mean_cov() 32 | 33 | if opts.rank != 0: 34 | return float('nan') 35 | 36 | m = np.square(mu_gen - mu_real).sum() 37 | s, _ = scipy.linalg.sqrtm(np.dot(sigma_gen, sigma_real), disp=False) # pylint: disable=no-member 38 | fid = np.real(m + np.trace(sigma_gen + sigma_real - s * 2)) 39 | return float(fid) 40 | 41 | #---------------------------------------------------------------------------- 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /diffusion-insgen/metrics/kernel_inception_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Kernel Inception Distance (KID) from the paper "Demystifying MMD 10 | GANs". Matches the original implementation by Binkowski et al. at 11 | https://github.com/mbinkowski/MMD-GAN/blob/master/gan/compute_scores.py""" 12 | 13 | import numpy as np 14 | from . import metric_utils 15 | 16 | #---------------------------------------------------------------------------- 17 | 18 | def compute_kid(opts, max_real, num_gen, num_subsets, max_subset_size): 19 | # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz 20 | detector_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt' 21 | detector_kwargs = dict(return_features=True) # Return raw features before the softmax layer. 22 | 23 | real_features = metric_utils.compute_feature_stats_for_dataset( 24 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 25 | rel_lo=0, rel_hi=0, capture_all=True, max_items=max_real).get_all() 26 | 27 | gen_features = metric_utils.compute_feature_stats_for_generator( 28 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 29 | rel_lo=0, rel_hi=1, capture_all=True, max_items=num_gen).get_all() 30 | 31 | if opts.rank != 0: 32 | return float('nan') 33 | 34 | n = real_features.shape[1] 35 | m = min(min(real_features.shape[0], gen_features.shape[0]), max_subset_size) 36 | t = 0 37 | for _subset_idx in range(num_subsets): 38 | x = gen_features[np.random.choice(gen_features.shape[0], m, replace=False)] 39 | y = real_features[np.random.choice(real_features.shape[0], m, replace=False)] 40 | a = (x @ x.T / n + 1) ** 3 + (y @ y.T / n + 1) ** 3 41 | b = (x @ y.T / n + 1) ** 3 42 | t += (a.sum() - np.diag(a).sum()) / (m - 1) - b.sum() * 2 / m 43 | kid = t / num_subsets / m 44 | return float(kid) 45 | 46 | #---------------------------------------------------------------------------- 47 | -------------------------------------------------------------------------------- /diffusion-stylegan2/metrics/kernel_inception_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Kernel Inception Distance (KID) from the paper "Demystifying MMD 10 | GANs". Matches the original implementation by Binkowski et al. at 11 | https://github.com/mbinkowski/MMD-GAN/blob/master/gan/compute_scores.py""" 12 | 13 | import numpy as np 14 | from . import metric_utils 15 | 16 | #---------------------------------------------------------------------------- 17 | 18 | def compute_kid(opts, max_real, num_gen, num_subsets, max_subset_size): 19 | # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz 20 | detector_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt' 21 | detector_kwargs = dict(return_features=True) # Return raw features before the softmax layer. 22 | 23 | real_features = metric_utils.compute_feature_stats_for_dataset( 24 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 25 | rel_lo=0, rel_hi=0, capture_all=True, max_items=max_real).get_all() 26 | 27 | gen_features = metric_utils.compute_feature_stats_for_generator( 28 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 29 | rel_lo=0, rel_hi=1, capture_all=True, max_items=num_gen).get_all() 30 | 31 | if opts.rank != 0: 32 | return float('nan') 33 | 34 | n = real_features.shape[1] 35 | m = min(min(real_features.shape[0], gen_features.shape[0]), max_subset_size) 36 | t = 0 37 | for _subset_idx in range(num_subsets): 38 | x = gen_features[np.random.choice(gen_features.shape[0], m, replace=False)] 39 | y = real_features[np.random.choice(real_features.shape[0], m, replace=False)] 40 | a = (x @ x.T / n + 1) ** 3 + (y @ y.T / n + 1) ** 3 41 | b = (x @ y.T / n + 1) ** 3 42 | t += (a.sum() - np.diag(a).sum()) / (m - 1) - b.sum() * 2 / m 43 | kid = t / num_subsets / m 44 | return float(kid) 45 | 46 | #---------------------------------------------------------------------------- 47 | -------------------------------------------------------------------------------- /diffusion-projected-gan/metrics/kernel_inception_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Kernel Inception Distance (KID) from the paper "Demystifying MMD 10 | GANs". Matches the original implementation by Binkowski et al. at 11 | https://github.com/mbinkowski/MMD-GAN/blob/master/gan/compute_scores.py""" 12 | 13 | import numpy as np 14 | from . import metric_utils 15 | 16 | #---------------------------------------------------------------------------- 17 | 18 | def compute_kid(opts, max_real, num_gen, num_subsets, max_subset_size): 19 | # Direct TorchScript translation of http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz 20 | detector_url = 'https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/inception-2015-12-05.pkl' 21 | detector_kwargs = dict(return_features=True) # Return raw features before the softmax layer. 22 | 23 | real_features = metric_utils.compute_feature_stats_for_dataset( 24 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 25 | rel_lo=0, rel_hi=0, capture_all=True, max_items=max_real).get_all() 26 | 27 | gen_features = metric_utils.compute_feature_stats_for_generator( 28 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 29 | rel_lo=0, rel_hi=1, capture_all=True, max_items=num_gen).get_all() 30 | 31 | if opts.rank != 0: 32 | return float('nan') 33 | 34 | n = real_features.shape[1] 35 | m = min(min(real_features.shape[0], gen_features.shape[0]), max_subset_size) 36 | t = 0 37 | for _subset_idx in range(num_subsets): 38 | x = gen_features[np.random.choice(gen_features.shape[0], m, replace=False)] 39 | y = real_features[np.random.choice(real_features.shape[0], m, replace=False)] 40 | a = (x @ x.T / n + 1) ** 3 + (y @ y.T / n + 1) ** 3 41 | b = (x @ y.T / n + 1) ** 3 42 | t += (a.sum() - np.diag(a).sum()) / (m - 1) - b.sum() * 2 / m 43 | kid = t / num_subsets / m 44 | return float(kid) 45 | 46 | #---------------------------------------------------------------------------- 47 | -------------------------------------------------------------------------------- /diffusion-insgen/docs/assets/style.css: -------------------------------------------------------------------------------- 1 | /* Body */ 2 | body { 3 | background: #e3e5e8; 4 | color: #ffffff; 5 | font-family: 'Lato', Verdana, Helvetica, sans-serif; 6 | font-weight: 300; 7 | font-size: 14pt; 8 | } 9 | 10 | /* Hyperlinks */ 11 | a {text-decoration: none;} 12 | a:link {color: #1772d0;} 13 | a:visited {color: #1772d0;} 14 | a:active {color: red;} 15 | a:hover {color: #f09228;} 16 | 17 | /* Pre-formatted Text */ 18 | pre { 19 | margin: 5pt 0; 20 | border: 0; 21 | font-size: 12pt; 22 | background: #fcfcfc; 23 | } 24 | 25 | /* Project Page Style */ 26 | /* Section */ 27 | .section { 28 | width: 768pt; 29 | min-height: 100pt; 30 | margin: 15pt auto; 31 | padding: 20pt 30pt; 32 | border: 1pt hidden #000; 33 | text-align: justify; 34 | color: #000000; 35 | background: #ffffff; 36 | } 37 | 38 | /* Header (Title and Logo) */ 39 | .section .header { 40 | min-height: 80pt; 41 | margin-top: 30pt; 42 | } 43 | .section .header .logo { 44 | width: 80pt; 45 | margin-left: 10pt; 46 | float: left; 47 | } 48 | .section .header .logo img { 49 | width: 80pt; 50 | object-fit: cover; 51 | } 52 | .section .header .title { 53 | margin: 0 120pt; 54 | text-align: center; 55 | font-size: 22pt; 56 | } 57 | 58 | /* Author */ 59 | .section .author { 60 | margin: 5pt 0; 61 | text-align: center; 62 | font-size: 16pt; 63 | } 64 | 65 | /* Institution */ 66 | .section .institution { 67 | margin: 5pt 0; 68 | text-align: center; 69 | font-size: 16pt; 70 | } 71 | 72 | /* Hyperlink (such as Paper and Code) */ 73 | .section .link { 74 | margin: 5pt 0; 75 | text-align: center; 76 | font-size: 16pt; 77 | } 78 | 79 | /* Teaser */ 80 | .section .teaser { 81 | margin: 20pt 0; 82 | text-align: center; 83 | } 84 | .section .teaser img { 85 | width: 95%; 86 | } 87 | 88 | /* Section Title */ 89 | .section .title { 90 | text-align: center; 91 | font-size: 22pt; 92 | margin: 5pt 0 15pt 0; /* top right bottom left */ 93 | } 94 | 95 | /* Section Body */ 96 | .section .body { 97 | margin-bottom: 15pt; 98 | text-align: justify; 99 | font-size: 14pt; 100 | } 101 | 102 | /* BibTeX */ 103 | .section .bibtex { 104 | margin: 5pt 0; 105 | text-align: left; 106 | font-size: 22pt; 107 | } 108 | 109 | /* Related Work */ 110 | .section .ref { 111 | margin: 20pt 0 10pt 0; /* top right bottom left */ 112 | text-align: left; 113 | font-size: 18pt; 114 | font-weight: bold; 115 | } 116 | 117 | /* Citation */ 118 | .section .citation { 119 | min-height: 60pt; 120 | margin: 10pt 0; 121 | } 122 | .section .citation .image { 123 | width: 120pt; 124 | float: left; 125 | } 126 | .section .citation .image img { 127 | max-height: 60pt; 128 | width: 120pt; 129 | object-fit: cover; 130 | } 131 | .section .citation .comment{ 132 | margin-left: 130pt; 133 | text-align: left; 134 | font-size: 14pt; 135 | } 136 | -------------------------------------------------------------------------------- /diffusion-insgen/torch_utils/ops/grid_sample_gradfix.py: -------------------------------------------------------------------------------- 1 | 2 | """Custom replacement for `torch.nn.functional.grid_sample` that 3 | supports arbitrarily high order gradients between the input and output. 4 | Only works on 2D images and assumes 5 | `mode='bilinear'`, `padding_mode='zeros'`, `align_corners=False`.""" 6 | 7 | import warnings 8 | import torch 9 | from distutils.version import LooseVersion 10 | 11 | # pylint: disable=redefined-builtin 12 | # pylint: disable=arguments-differ 13 | # pylint: disable=protected-access 14 | 15 | #---------------------------------------------------------------------------- 16 | 17 | enabled = False # Enable the custom op by setting this to true. 18 | 19 | #---------------------------------------------------------------------------- 20 | 21 | def grid_sample(input, grid): 22 | if _should_use_custom_op(): 23 | return _GridSample2dForward.apply(input, grid) 24 | return torch.nn.functional.grid_sample(input=input, grid=grid, mode='bilinear', padding_mode='zeros', align_corners=False) 25 | 26 | #---------------------------------------------------------------------------- 27 | 28 | def _should_use_custom_op(): 29 | if not enabled: 30 | return False 31 | if LooseVersion(torch.__version__) >= LooseVersion('1.7.0'): 32 | return True 33 | warnings.warn(f'grid_sample_gradfix not supported on PyTorch {torch.__version__}. Falling back to torch.nn.functional.grid_sample().') 34 | return False 35 | 36 | #---------------------------------------------------------------------------- 37 | 38 | class _GridSample2dForward(torch.autograd.Function): 39 | @staticmethod 40 | def forward(ctx, input, grid): 41 | assert input.ndim == 4 42 | assert grid.ndim == 4 43 | output = torch.nn.functional.grid_sample(input=input, grid=grid, mode='bilinear', padding_mode='zeros', align_corners=False) 44 | ctx.save_for_backward(input, grid) 45 | return output 46 | 47 | @staticmethod 48 | def backward(ctx, grad_output): 49 | input, grid = ctx.saved_tensors 50 | grad_input, grad_grid = _GridSample2dBackward.apply(grad_output, input, grid) 51 | return grad_input, grad_grid 52 | 53 | #---------------------------------------------------------------------------- 54 | 55 | class _GridSample2dBackward(torch.autograd.Function): 56 | @staticmethod 57 | def forward(ctx, grad_output, input, grid): 58 | op = torch._C._jit_get_operation('aten::grid_sampler_2d_backward') 59 | grad_input, grad_grid = op(grad_output, input, grid, 0, 0, False) 60 | ctx.save_for_backward(grid) 61 | return grad_input, grad_grid 62 | 63 | @staticmethod 64 | def backward(ctx, grad2_grad_input, grad2_grad_grid): 65 | _ = grad2_grad_grid # unused 66 | grid, = ctx.saved_tensors 67 | grad2_grad_output = None 68 | grad2_input = None 69 | grad2_grid = None 70 | 71 | if ctx.needs_input_grad[0]: 72 | grad2_grad_output = _GridSample2dForward.apply(grad2_grad_input, grid) 73 | 74 | assert not ctx.needs_input_grad[2] 75 | return grad2_grad_output, grad2_input, grad2_grid 76 | 77 | #---------------------------------------------------------------------------- 78 | -------------------------------------------------------------------------------- /diffusion-stylegan2/torch_utils/ops/grid_sample_gradfix.py: -------------------------------------------------------------------------------- 1 | 2 | """Custom replacement for `torch.nn.functional.grid_sample` that 3 | supports arbitrarily high order gradients between the input and output. 4 | Only works on 2D images and assumes 5 | `mode='bilinear'`, `padding_mode='zeros'`, `align_corners=False`.""" 6 | 7 | import warnings 8 | import torch 9 | from distutils.version import LooseVersion 10 | 11 | # pylint: disable=redefined-builtin 12 | # pylint: disable=arguments-differ 13 | # pylint: disable=protected-access 14 | 15 | #---------------------------------------------------------------------------- 16 | 17 | enabled = False # Enable the custom op by setting this to true. 18 | 19 | #---------------------------------------------------------------------------- 20 | 21 | def grid_sample(input, grid): 22 | if _should_use_custom_op(): 23 | return _GridSample2dForward.apply(input, grid) 24 | return torch.nn.functional.grid_sample(input=input, grid=grid, mode='bilinear', padding_mode='zeros', align_corners=False) 25 | 26 | #---------------------------------------------------------------------------- 27 | 28 | def _should_use_custom_op(): 29 | if not enabled: 30 | return False 31 | if LooseVersion(torch.__version__) >= LooseVersion('1.7.0'): 32 | return True 33 | warnings.warn(f'grid_sample_gradfix not supported on PyTorch {torch.__version__}. Falling back to torch.nn.functional.grid_sample().') 34 | return False 35 | 36 | #---------------------------------------------------------------------------- 37 | 38 | class _GridSample2dForward(torch.autograd.Function): 39 | @staticmethod 40 | def forward(ctx, input, grid): 41 | assert input.ndim == 4 42 | assert grid.ndim == 4 43 | output = torch.nn.functional.grid_sample(input=input, grid=grid, mode='bilinear', padding_mode='zeros', align_corners=False) 44 | ctx.save_for_backward(input, grid) 45 | return output 46 | 47 | @staticmethod 48 | def backward(ctx, grad_output): 49 | input, grid = ctx.saved_tensors 50 | grad_input, grad_grid = _GridSample2dBackward.apply(grad_output, input, grid) 51 | return grad_input, grad_grid 52 | 53 | #---------------------------------------------------------------------------- 54 | 55 | class _GridSample2dBackward(torch.autograd.Function): 56 | @staticmethod 57 | def forward(ctx, grad_output, input, grid): 58 | op = torch._C._jit_get_operation('aten::grid_sampler_2d_backward') 59 | grad_input, grad_grid = op(grad_output, input, grid, 0, 0, False) 60 | ctx.save_for_backward(grid) 61 | return grad_input, grad_grid 62 | 63 | @staticmethod 64 | def backward(ctx, grad2_grad_input, grad2_grad_grid): 65 | _ = grad2_grad_grid # unused 66 | grid, = ctx.saved_tensors 67 | grad2_grad_output = None 68 | grad2_input = None 69 | grad2_grid = None 70 | 71 | if ctx.needs_input_grad[0]: 72 | grad2_grad_output = _GridSample2dForward.apply(grad2_grad_input, grid) 73 | 74 | assert not ctx.needs_input_grad[2] 75 | return grad2_grad_output, grad2_input, grad2_grid 76 | 77 | #---------------------------------------------------------------------------- 78 | -------------------------------------------------------------------------------- /diffusion-projected-gan/pg_modules/diffaug.py: -------------------------------------------------------------------------------- 1 | # Differentiable Augmentation for Data-Efficient GAN Training 2 | # Shengyu Zhao, Zhijian Liu, Ji Lin, Jun-Yan Zhu, and Song Han 3 | # https://arxiv.org/pdf/2006.10738 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | 8 | 9 | def DiffAugment(x, policy='', channels_first=True): 10 | if policy: 11 | if not channels_first: 12 | x = x.permute(0, 3, 1, 2) 13 | for p in policy.split(','): 14 | for f in AUGMENT_FNS[p]: 15 | x = f(x) 16 | if not channels_first: 17 | x = x.permute(0, 2, 3, 1) 18 | x = x.contiguous() 19 | return x 20 | 21 | 22 | def rand_brightness(x): 23 | x = x + (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) - 0.5) 24 | return x 25 | 26 | 27 | def rand_saturation(x): 28 | x_mean = x.mean(dim=1, keepdim=True) 29 | x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) * 2) + x_mean 30 | return x 31 | 32 | 33 | def rand_contrast(x): 34 | x_mean = x.mean(dim=[1, 2, 3], keepdim=True) 35 | x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) + 0.5) + x_mean 36 | return x 37 | 38 | 39 | def rand_translation(x, ratio=0.125): 40 | shift_x, shift_y = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5) 41 | translation_x = torch.randint(-shift_x, shift_x + 1, size=[x.size(0), 1, 1], device=x.device) 42 | translation_y = torch.randint(-shift_y, shift_y + 1, size=[x.size(0), 1, 1], device=x.device) 43 | grid_batch, grid_x, grid_y = torch.meshgrid( 44 | torch.arange(x.size(0), dtype=torch.long, device=x.device), 45 | torch.arange(x.size(2), dtype=torch.long, device=x.device), 46 | torch.arange(x.size(3), dtype=torch.long, device=x.device), 47 | ) 48 | grid_x = torch.clamp(grid_x + translation_x + 1, 0, x.size(2) + 1) 49 | grid_y = torch.clamp(grid_y + translation_y + 1, 0, x.size(3) + 1) 50 | x_pad = F.pad(x, [1, 1, 1, 1, 0, 0, 0, 0]) 51 | x = x_pad.permute(0, 2, 3, 1).contiguous()[grid_batch, grid_x, grid_y].permute(0, 3, 1, 2) 52 | return x 53 | 54 | 55 | def rand_cutout(x, ratio=0.2): 56 | cutout_size = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5) 57 | offset_x = torch.randint(0, x.size(2) + (1 - cutout_size[0] % 2), size=[x.size(0), 1, 1], device=x.device) 58 | offset_y = torch.randint(0, x.size(3) + (1 - cutout_size[1] % 2), size=[x.size(0), 1, 1], device=x.device) 59 | grid_batch, grid_x, grid_y = torch.meshgrid( 60 | torch.arange(x.size(0), dtype=torch.long, device=x.device), 61 | torch.arange(cutout_size[0], dtype=torch.long, device=x.device), 62 | torch.arange(cutout_size[1], dtype=torch.long, device=x.device), 63 | ) 64 | grid_x = torch.clamp(grid_x + offset_x - cutout_size[0] // 2, min=0, max=x.size(2) - 1) 65 | grid_y = torch.clamp(grid_y + offset_y - cutout_size[1] // 2, min=0, max=x.size(3) - 1) 66 | mask = torch.ones(x.size(0), x.size(2), x.size(3), dtype=x.dtype, device=x.device) 67 | mask[grid_batch, grid_x, grid_y] = 0 68 | x = x * mask.unsqueeze(1) 69 | return x 70 | 71 | 72 | AUGMENT_FNS = { 73 | 'color': [rand_brightness, rand_saturation, rand_contrast], 74 | 'translation': [rand_translation], 75 | 'cutout': [rand_cutout], 76 | } 77 | -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/ops/grid_sample_gradfix.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Custom replacement for `torch.nn.functional.grid_sample` that 10 | supports arbitrarily high order gradients between the input and output. 11 | Only works on 2D images and assumes 12 | `mode='bilinear'`, `padding_mode='zeros'`, `align_corners=False`.""" 13 | 14 | import torch 15 | 16 | # pylint: disable=redefined-builtin 17 | # pylint: disable=arguments-differ 18 | # pylint: disable=protected-access 19 | 20 | #---------------------------------------------------------------------------- 21 | 22 | enabled = False # Enable the custom op by setting this to true. 23 | 24 | #---------------------------------------------------------------------------- 25 | 26 | def grid_sample(input, grid): 27 | if _should_use_custom_op(): 28 | return _GridSample2dForward.apply(input, grid) 29 | return torch.nn.functional.grid_sample(input=input, grid=grid, mode='bilinear', padding_mode='zeros', align_corners=False) 30 | 31 | #---------------------------------------------------------------------------- 32 | 33 | def _should_use_custom_op(): 34 | return enabled 35 | 36 | #---------------------------------------------------------------------------- 37 | 38 | class _GridSample2dForward(torch.autograd.Function): 39 | @staticmethod 40 | def forward(ctx, input, grid): 41 | assert input.ndim == 4 42 | assert grid.ndim == 4 43 | output = torch.nn.functional.grid_sample(input=input, grid=grid, mode='bilinear', padding_mode='zeros', align_corners=False) 44 | ctx.save_for_backward(input, grid) 45 | return output 46 | 47 | @staticmethod 48 | def backward(ctx, grad_output): 49 | input, grid = ctx.saved_tensors 50 | grad_input, grad_grid = _GridSample2dBackward.apply(grad_output, input, grid) 51 | return grad_input, grad_grid 52 | 53 | #---------------------------------------------------------------------------- 54 | 55 | class _GridSample2dBackward(torch.autograd.Function): 56 | @staticmethod 57 | def forward(ctx, grad_output, input, grid): 58 | op = torch._C._jit_get_operation('aten::grid_sampler_2d_backward') 59 | grad_input, grad_grid = op(grad_output, input, grid, 0, 0, False) 60 | ctx.save_for_backward(grid) 61 | return grad_input, grad_grid 62 | 63 | @staticmethod 64 | def backward(ctx, grad2_grad_input, grad2_grad_grid): 65 | _ = grad2_grad_grid # unused 66 | grid, = ctx.saved_tensors 67 | grad2_grad_output = None 68 | grad2_input = None 69 | grad2_grid = None 70 | 71 | if ctx.needs_input_grad[0]: 72 | grad2_grad_output = _GridSample2dForward.apply(grad2_grad_input, grid) 73 | 74 | assert not ctx.needs_input_grad[2] 75 | return grad2_grad_output, grad2_input, grad2_grid 76 | 77 | #---------------------------------------------------------------------------- 78 | -------------------------------------------------------------------------------- /diffusion-insgen/metrics/precision_recall.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Precision/Recall (PR) from the paper "Improved Precision and Recall 10 | Metric for Assessing Generative Models". Matches the original implementation 11 | by Kynkaanniemi et al. at 12 | https://github.com/kynkaat/improved-precision-and-recall-metric/blob/master/precision_recall.py""" 13 | 14 | import torch 15 | from . import metric_utils 16 | 17 | #---------------------------------------------------------------------------- 18 | 19 | def compute_distances(row_features, col_features, num_gpus, rank, col_batch_size): 20 | assert 0 <= rank < num_gpus 21 | num_cols = col_features.shape[0] 22 | num_batches = ((num_cols - 1) // col_batch_size // num_gpus + 1) * num_gpus 23 | col_batches = torch.nn.functional.pad(col_features, [0, 0, 0, -num_cols % num_batches]).chunk(num_batches) 24 | dist_batches = [] 25 | for col_batch in col_batches[rank :: num_gpus]: 26 | dist_batch = torch.cdist(row_features.unsqueeze(0), col_batch.unsqueeze(0))[0] 27 | for src in range(num_gpus): 28 | dist_broadcast = dist_batch.clone() 29 | if num_gpus > 1: 30 | torch.distributed.broadcast(dist_broadcast, src=src) 31 | dist_batches.append(dist_broadcast.cpu() if rank == 0 else None) 32 | return torch.cat(dist_batches, dim=1)[:, :num_cols] if rank == 0 else None 33 | 34 | #---------------------------------------------------------------------------- 35 | 36 | def compute_pr(opts, max_real, num_gen, nhood_size, row_batch_size, col_batch_size): 37 | detector_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt' 38 | detector_kwargs = dict(return_features=True) 39 | 40 | real_features = metric_utils.compute_feature_stats_for_dataset( 41 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 42 | rel_lo=0, rel_hi=0, capture_all=True, max_items=max_real).get_all_torch().to(torch.float16).to(opts.device) 43 | 44 | gen_features = metric_utils.compute_feature_stats_for_generator( 45 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 46 | rel_lo=0, rel_hi=1, capture_all=True, max_items=num_gen).get_all_torch().to(torch.float16).to(opts.device) 47 | 48 | results = dict() 49 | for name, manifold, probes in [('precision', real_features, gen_features), ('recall', gen_features, real_features)]: 50 | kth = [] 51 | for manifold_batch in manifold.split(row_batch_size): 52 | dist = compute_distances(row_features=manifold_batch, col_features=manifold, num_gpus=opts.num_gpus, rank=opts.rank, col_batch_size=col_batch_size) 53 | kth.append(dist.to(torch.float32).kthvalue(nhood_size + 1).values.to(torch.float16) if opts.rank == 0 else None) 54 | kth = torch.cat(kth) if opts.rank == 0 else None 55 | pred = [] 56 | for probes_batch in probes.split(row_batch_size): 57 | dist = compute_distances(row_features=probes_batch, col_features=manifold, num_gpus=opts.num_gpus, rank=opts.rank, col_batch_size=col_batch_size) 58 | pred.append((dist <= kth).any(dim=1) if opts.rank == 0 else None) 59 | results[name] = float(torch.cat(pred).to(torch.float32).mean() if opts.rank == 0 else 'nan') 60 | return results['precision'], results['recall'] 61 | 62 | #---------------------------------------------------------------------------- 63 | -------------------------------------------------------------------------------- /diffusion-stylegan2/metrics/precision_recall.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Precision/Recall (PR) from the paper "Improved Precision and Recall 10 | Metric for Assessing Generative Models". Matches the original implementation 11 | by Kynkaanniemi et al. at 12 | https://github.com/kynkaat/improved-precision-and-recall-metric/blob/master/precision_recall.py""" 13 | 14 | import torch 15 | from . import metric_utils 16 | 17 | #---------------------------------------------------------------------------- 18 | 19 | def compute_distances(row_features, col_features, num_gpus, rank, col_batch_size): 20 | assert 0 <= rank < num_gpus 21 | num_cols = col_features.shape[0] 22 | num_batches = ((num_cols - 1) // col_batch_size // num_gpus + 1) * num_gpus 23 | col_batches = torch.nn.functional.pad(col_features, [0, 0, 0, -num_cols % num_batches]).chunk(num_batches) 24 | dist_batches = [] 25 | for col_batch in col_batches[rank :: num_gpus]: 26 | dist_batch = torch.cdist(row_features.unsqueeze(0), col_batch.unsqueeze(0))[0] 27 | for src in range(num_gpus): 28 | dist_broadcast = dist_batch.clone() 29 | if num_gpus > 1: 30 | torch.distributed.broadcast(dist_broadcast, src=src) 31 | dist_batches.append(dist_broadcast.cpu() if rank == 0 else None) 32 | return torch.cat(dist_batches, dim=1)[:, :num_cols] if rank == 0 else None 33 | 34 | #---------------------------------------------------------------------------- 35 | 36 | def compute_pr(opts, max_real, num_gen, nhood_size, row_batch_size, col_batch_size): 37 | detector_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt' 38 | detector_kwargs = dict(return_features=True) 39 | 40 | real_features = metric_utils.compute_feature_stats_for_dataset( 41 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 42 | rel_lo=0, rel_hi=0, capture_all=True, max_items=max_real).get_all_torch().to(torch.float16).to(opts.device) 43 | 44 | gen_features = metric_utils.compute_feature_stats_for_generator( 45 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 46 | rel_lo=0, rel_hi=1, capture_all=True, max_items=num_gen).get_all_torch().to(torch.float16).to(opts.device) 47 | 48 | results = dict() 49 | for name, manifold, probes in [('precision', real_features, gen_features), ('recall', gen_features, real_features)]: 50 | kth = [] 51 | for manifold_batch in manifold.split(row_batch_size): 52 | dist = compute_distances(row_features=manifold_batch, col_features=manifold, num_gpus=opts.num_gpus, rank=opts.rank, col_batch_size=col_batch_size) 53 | kth.append(dist.to(torch.float32).kthvalue(nhood_size + 1).values.to(torch.float16) if opts.rank == 0 else None) 54 | kth = torch.cat(kth) if opts.rank == 0 else None 55 | pred = [] 56 | for probes_batch in probes.split(row_batch_size): 57 | dist = compute_distances(row_features=probes_batch, col_features=manifold, num_gpus=opts.num_gpus, rank=opts.rank, col_batch_size=col_batch_size) 58 | pred.append((dist <= kth).any(dim=1) if opts.rank == 0 else None) 59 | results[name] = float(torch.cat(pred).to(torch.float32).mean() if opts.rank == 0 else 'nan') 60 | return results['precision'], results['recall'] 61 | 62 | #---------------------------------------------------------------------------- 63 | -------------------------------------------------------------------------------- /diffusion-projected-gan/metrics/precision_recall.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Precision/Recall (PR) from the paper "Improved Precision and Recall 10 | Metric for Assessing Generative Models". Matches the original implementation 11 | by Kynkaanniemi et al. at 12 | https://github.com/kynkaat/improved-precision-and-recall-metric/blob/master/precision_recall.py""" 13 | 14 | import torch 15 | from . import metric_utils 16 | 17 | #---------------------------------------------------------------------------- 18 | 19 | def compute_distances(row_features, col_features, num_gpus, rank, col_batch_size): 20 | assert 0 <= rank < num_gpus 21 | num_cols = col_features.shape[0] 22 | num_batches = ((num_cols - 1) // col_batch_size // num_gpus + 1) * num_gpus 23 | col_batches = torch.nn.functional.pad(col_features, [0, 0, 0, -num_cols % num_batches]).chunk(num_batches) 24 | dist_batches = [] 25 | for col_batch in col_batches[rank :: num_gpus]: 26 | dist_batch = torch.cdist(row_features.unsqueeze(0), col_batch.unsqueeze(0))[0] 27 | for src in range(num_gpus): 28 | dist_broadcast = dist_batch.clone() 29 | if num_gpus > 1: 30 | torch.distributed.broadcast(dist_broadcast, src=src) 31 | dist_batches.append(dist_broadcast.cpu() if rank == 0 else None) 32 | return torch.cat(dist_batches, dim=1)[:, :num_cols] if rank == 0 else None 33 | 34 | #---------------------------------------------------------------------------- 35 | 36 | def compute_pr(opts, max_real, num_gen, nhood_size, row_batch_size, col_batch_size): 37 | detector_url = 'https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/vgg16.pkl' 38 | detector_kwargs = dict(return_features=True) 39 | 40 | real_features = metric_utils.compute_feature_stats_for_dataset( 41 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 42 | rel_lo=0, rel_hi=0, capture_all=True, max_items=max_real).get_all_torch().to(torch.float16).to(opts.device) 43 | 44 | gen_features = metric_utils.compute_feature_stats_for_generator( 45 | opts=opts, detector_url=detector_url, detector_kwargs=detector_kwargs, 46 | rel_lo=0, rel_hi=1, capture_all=True, max_items=num_gen).get_all_torch().to(torch.float16).to(opts.device) 47 | 48 | results = dict() 49 | for name, manifold, probes in [('precision', real_features, gen_features), ('recall', gen_features, real_features)]: 50 | kth = [] 51 | for manifold_batch in manifold.split(row_batch_size): 52 | dist = compute_distances(row_features=manifold_batch, col_features=manifold, num_gpus=opts.num_gpus, rank=opts.rank, col_batch_size=col_batch_size) 53 | kth.append(dist.to(torch.float32).kthvalue(nhood_size + 1).values.to(torch.float16) if opts.rank == 0 else None) 54 | kth = torch.cat(kth) if opts.rank == 0 else None 55 | pred = [] 56 | for probes_batch in probes.split(row_batch_size): 57 | dist = compute_distances(row_features=probes_batch, col_features=manifold, num_gpus=opts.num_gpus, rank=opts.rank, col_batch_size=col_batch_size) 58 | pred.append((dist <= kth).any(dim=1) if opts.rank == 0 else None) 59 | results[name] = float(torch.cat(pred).to(torch.float32).mean() if opts.rank == 0 else 'nan') 60 | return results['precision'], results['recall'] 61 | 62 | #---------------------------------------------------------------------------- 63 | -------------------------------------------------------------------------------- /diffusion-insgen/training/diffaug.py: -------------------------------------------------------------------------------- 1 | # Differentiable Augmentation for Data-Efficient GAN Training 2 | # Shengyu Zhao, Zhijian Liu, Ji Lin, Jun-Yan Zhu, and Song Han 3 | # https://arxiv.org/pdf/2006.10738 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | 8 | class DiffAugment(torch.nn.Module): 9 | def __init__(self, policy='color,translation,cutout', channels_first=True): 10 | super().__init__() 11 | self.policy = policy 12 | self.channels_first = channels_first 13 | 14 | def forward(self, x): 15 | if not self.channels_first: 16 | x = x.permute(0, 3, 1, 2) 17 | for p in self.policy.split(','): 18 | for f in AUGMENT_FNS[p]: 19 | x = f(x) 20 | if not self.channels_first: 21 | x = x.permute(0, 2, 3, 1) 22 | x = x.contiguous() 23 | return x 24 | 25 | # def DiffAugment(x, policy='', channels_first=True): 26 | # if policy: 27 | # if not channels_first: 28 | # x = x.permute(0, 3, 1, 2) 29 | # for p in policy.split(','): 30 | # for f in AUGMENT_FNS[p]: 31 | # x = f(x) 32 | # if not channels_first: 33 | # x = x.permute(0, 2, 3, 1) 34 | # x = x.contiguous() 35 | # return x 36 | 37 | 38 | def rand_brightness(x): 39 | x = x + (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) - 0.5) 40 | return x 41 | 42 | 43 | def rand_saturation(x): 44 | x_mean = x.mean(dim=1, keepdim=True) 45 | x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) * 2) + x_mean 46 | return x 47 | 48 | 49 | def rand_contrast(x): 50 | x_mean = x.mean(dim=[1, 2, 3], keepdim=True) 51 | x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) + 0.5) + x_mean 52 | return x 53 | 54 | 55 | def rand_translation(x, ratio=0.125): 56 | shift_x, shift_y = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5) 57 | translation_x = torch.randint(-shift_x, shift_x + 1, size=[x.size(0), 1, 1], device=x.device) 58 | translation_y = torch.randint(-shift_y, shift_y + 1, size=[x.size(0), 1, 1], device=x.device) 59 | grid_batch, grid_x, grid_y = torch.meshgrid( 60 | torch.arange(x.size(0), dtype=torch.long, device=x.device), 61 | torch.arange(x.size(2), dtype=torch.long, device=x.device), 62 | torch.arange(x.size(3), dtype=torch.long, device=x.device), 63 | ) 64 | grid_x = torch.clamp(grid_x + translation_x + 1, 0, x.size(2) + 1) 65 | grid_y = torch.clamp(grid_y + translation_y + 1, 0, x.size(3) + 1) 66 | x_pad = F.pad(x, [1, 1, 1, 1, 0, 0, 0, 0]) 67 | x = x_pad.permute(0, 2, 3, 1).contiguous()[grid_batch, grid_x, grid_y].permute(0, 3, 1, 2) 68 | return x 69 | 70 | 71 | def rand_cutout(x, ratio=0.2): 72 | cutout_size = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5) 73 | offset_x = torch.randint(0, x.size(2) + (1 - cutout_size[0] % 2), size=[x.size(0), 1, 1], device=x.device) 74 | offset_y = torch.randint(0, x.size(3) + (1 - cutout_size[1] % 2), size=[x.size(0), 1, 1], device=x.device) 75 | grid_batch, grid_x, grid_y = torch.meshgrid( 76 | torch.arange(x.size(0), dtype=torch.long, device=x.device), 77 | torch.arange(cutout_size[0], dtype=torch.long, device=x.device), 78 | torch.arange(cutout_size[1], dtype=torch.long, device=x.device), 79 | ) 80 | grid_x = torch.clamp(grid_x + offset_x - cutout_size[0] // 2, min=0, max=x.size(2) - 1) 81 | grid_y = torch.clamp(grid_y + offset_y - cutout_size[1] // 2, min=0, max=x.size(3) - 1) 82 | mask = torch.ones(x.size(0), x.size(2), x.size(3), dtype=x.dtype, device=x.device) 83 | mask[grid_batch, grid_x, grid_y] = 0 84 | x = x * mask.unsqueeze(1) 85 | return x 86 | 87 | 88 | AUGMENT_FNS = { 89 | 'color': [rand_brightness, rand_saturation, rand_contrast], 90 | 'translation': [rand_translation], 91 | 'cutout': [rand_cutout], 92 | } 93 | -------------------------------------------------------------------------------- /diffusion-stylegan2/training/diffaug.py: -------------------------------------------------------------------------------- 1 | # Differentiable Augmentation for Data-Efficient GAN Training 2 | # Shengyu Zhao, Zhijian Liu, Ji Lin, Jun-Yan Zhu, and Song Han 3 | # https://arxiv.org/pdf/2006.10738 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | 8 | class DiffAugment(torch.nn.Module): 9 | def __init__(self, policy='color,translation,cutout', channels_first=True): 10 | super().__init__() 11 | self.policy = policy 12 | self.channels_first = channels_first 13 | 14 | def forward(self, x): 15 | if not self.channels_first: 16 | x = x.permute(0, 3, 1, 2) 17 | for p in self.policy.split(','): 18 | for f in AUGMENT_FNS[p]: 19 | x = f(x) 20 | if not self.channels_first: 21 | x = x.permute(0, 2, 3, 1) 22 | x = x.contiguous() 23 | return x 24 | 25 | # def DiffAugment(x, policy='', channels_first=True): 26 | # if policy: 27 | # if not channels_first: 28 | # x = x.permute(0, 3, 1, 2) 29 | # for p in policy.split(','): 30 | # for f in AUGMENT_FNS[p]: 31 | # x = f(x) 32 | # if not channels_first: 33 | # x = x.permute(0, 2, 3, 1) 34 | # x = x.contiguous() 35 | # return x 36 | 37 | 38 | def rand_brightness(x): 39 | x = x + (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) - 0.5) 40 | return x 41 | 42 | 43 | def rand_saturation(x): 44 | x_mean = x.mean(dim=1, keepdim=True) 45 | x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) * 2) + x_mean 46 | return x 47 | 48 | 49 | def rand_contrast(x): 50 | x_mean = x.mean(dim=[1, 2, 3], keepdim=True) 51 | x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) + 0.5) + x_mean 52 | return x 53 | 54 | 55 | def rand_translation(x, ratio=0.125): 56 | shift_x, shift_y = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5) 57 | translation_x = torch.randint(-shift_x, shift_x + 1, size=[x.size(0), 1, 1], device=x.device) 58 | translation_y = torch.randint(-shift_y, shift_y + 1, size=[x.size(0), 1, 1], device=x.device) 59 | grid_batch, grid_x, grid_y = torch.meshgrid( 60 | torch.arange(x.size(0), dtype=torch.long, device=x.device), 61 | torch.arange(x.size(2), dtype=torch.long, device=x.device), 62 | torch.arange(x.size(3), dtype=torch.long, device=x.device), 63 | ) 64 | grid_x = torch.clamp(grid_x + translation_x + 1, 0, x.size(2) + 1) 65 | grid_y = torch.clamp(grid_y + translation_y + 1, 0, x.size(3) + 1) 66 | x_pad = F.pad(x, [1, 1, 1, 1, 0, 0, 0, 0]) 67 | x = x_pad.permute(0, 2, 3, 1).contiguous()[grid_batch, grid_x, grid_y].permute(0, 3, 1, 2) 68 | return x 69 | 70 | 71 | def rand_cutout(x, ratio=0.2): 72 | cutout_size = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5) 73 | offset_x = torch.randint(0, x.size(2) + (1 - cutout_size[0] % 2), size=[x.size(0), 1, 1], device=x.device) 74 | offset_y = torch.randint(0, x.size(3) + (1 - cutout_size[1] % 2), size=[x.size(0), 1, 1], device=x.device) 75 | grid_batch, grid_x, grid_y = torch.meshgrid( 76 | torch.arange(x.size(0), dtype=torch.long, device=x.device), 77 | torch.arange(cutout_size[0], dtype=torch.long, device=x.device), 78 | torch.arange(cutout_size[1], dtype=torch.long, device=x.device), 79 | ) 80 | grid_x = torch.clamp(grid_x + offset_x - cutout_size[0] // 2, min=0, max=x.size(2) - 1) 81 | grid_y = torch.clamp(grid_y + offset_y - cutout_size[1] // 2, min=0, max=x.size(3) - 1) 82 | mask = torch.ones(x.size(0), x.size(2), x.size(3), dtype=x.dtype, device=x.device) 83 | mask[grid_batch, grid_x, grid_y] = 0 84 | x = x * mask.unsqueeze(1) 85 | return x 86 | 87 | 88 | AUGMENT_FNS = { 89 | 'color': [rand_brightness, rand_saturation, rand_contrast], 90 | 'translation': [rand_translation], 91 | 'cutout': [rand_cutout], 92 | } 93 | -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/ops/filtered_lrelu.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | 11 | //------------------------------------------------------------------------ 12 | // CUDA kernel parameters. 13 | 14 | struct filtered_lrelu_kernel_params 15 | { 16 | // These parameters decide which kernel to use. 17 | int up; // upsampling ratio (1, 2, 4) 18 | int down; // downsampling ratio (1, 2, 4) 19 | int2 fuShape; // [size, 1] | [size, size] 20 | int2 fdShape; // [size, 1] | [size, size] 21 | 22 | int _dummy; // Alignment. 23 | 24 | // Rest of the parameters. 25 | const void* x; // Input tensor. 26 | void* y; // Output tensor. 27 | const void* b; // Bias tensor. 28 | unsigned char* s; // Sign tensor in/out. NULL if unused. 29 | const float* fu; // Upsampling filter. 30 | const float* fd; // Downsampling filter. 31 | 32 | int2 pad0; // Left/top padding. 33 | float gain; // Additional gain factor. 34 | float slope; // Leaky ReLU slope on negative side. 35 | float clamp; // Clamp after nonlinearity. 36 | int flip; // Filter kernel flip for gradient computation. 37 | 38 | int tilesXdim; // Original number of horizontal output tiles. 39 | int tilesXrep; // Number of horizontal tiles per CTA. 40 | int blockZofs; // Block z offset to support large minibatch, channel dimensions. 41 | 42 | int4 xShape; // [width, height, channel, batch] 43 | int4 yShape; // [width, height, channel, batch] 44 | int2 sShape; // [width, height] - width is in bytes. Contiguous. Zeros if unused. 45 | int2 sOfs; // [ofs_x, ofs_y] - offset between upsampled data and sign tensor. 46 | int swLimit; // Active width of sign tensor in bytes. 47 | 48 | longlong4 xStride; // Strides of all tensors except signs, same component order as shapes. 49 | longlong4 yStride; // 50 | int64_t bStride; // 51 | longlong3 fuStride; // 52 | longlong3 fdStride; // 53 | }; 54 | 55 | struct filtered_lrelu_act_kernel_params 56 | { 57 | void* x; // Input/output, modified in-place. 58 | unsigned char* s; // Sign tensor in/out. NULL if unused. 59 | 60 | float gain; // Additional gain factor. 61 | float slope; // Leaky ReLU slope on negative side. 62 | float clamp; // Clamp after nonlinearity. 63 | 64 | int4 xShape; // [width, height, channel, batch] 65 | longlong4 xStride; // Input/output tensor strides, same order as in shape. 66 | int2 sShape; // [width, height] - width is in elements. Contiguous. Zeros if unused. 67 | int2 sOfs; // [ofs_x, ofs_y] - offset between upsampled data and sign tensor. 68 | }; 69 | 70 | //------------------------------------------------------------------------ 71 | // CUDA kernel specialization. 72 | 73 | struct filtered_lrelu_kernel_spec 74 | { 75 | void* setup; // Function for filter kernel setup. 76 | void* exec; // Function for main operation. 77 | int2 tileOut; // Width/height of launch tile. 78 | int numWarps; // Number of warps per thread block, determines launch block size. 79 | int xrep; // For processing multiple horizontal tiles per thread block. 80 | int dynamicSharedKB; // How much dynamic shared memory the exec kernel wants. 81 | }; 82 | 83 | //------------------------------------------------------------------------ 84 | // CUDA kernel selection. 85 | 86 | template filtered_lrelu_kernel_spec choose_filtered_lrelu_kernel(const filtered_lrelu_kernel_params& p, int sharedKB); 87 | template void* choose_filtered_lrelu_act_kernel(void); 88 | template cudaError_t copy_filters(cudaStream_t stream); 89 | 90 | //------------------------------------------------------------------------ 91 | -------------------------------------------------------------------------------- /diffusion-insgen/torch_utils/ops/bias_act.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | #include 11 | #include 12 | #include "bias_act.h" 13 | 14 | //------------------------------------------------------------------------ 15 | 16 | static bool has_same_layout(torch::Tensor x, torch::Tensor y) 17 | { 18 | if (x.dim() != y.dim()) 19 | return false; 20 | for (int64_t i = 0; i < x.dim(); i++) 21 | { 22 | if (x.size(i) != y.size(i)) 23 | return false; 24 | if (x.size(i) >= 2 && x.stride(i) != y.stride(i)) 25 | return false; 26 | } 27 | return true; 28 | } 29 | 30 | //------------------------------------------------------------------------ 31 | 32 | static torch::Tensor bias_act(torch::Tensor x, torch::Tensor b, torch::Tensor xref, torch::Tensor yref, torch::Tensor dy, int grad, int dim, int act, float alpha, float gain, float clamp) 33 | { 34 | // Validate arguments. 35 | TORCH_CHECK(x.is_cuda(), "x must reside on CUDA device"); 36 | TORCH_CHECK(b.numel() == 0 || (b.dtype() == x.dtype() && b.device() == x.device()), "b must have the same dtype and device as x"); 37 | TORCH_CHECK(xref.numel() == 0 || (xref.sizes() == x.sizes() && xref.dtype() == x.dtype() && xref.device() == x.device()), "xref must have the same shape, dtype, and device as x"); 38 | TORCH_CHECK(yref.numel() == 0 || (yref.sizes() == x.sizes() && yref.dtype() == x.dtype() && yref.device() == x.device()), "yref must have the same shape, dtype, and device as x"); 39 | TORCH_CHECK(dy.numel() == 0 || (dy.sizes() == x.sizes() && dy.dtype() == x.dtype() && dy.device() == x.device()), "dy must have the same dtype and device as x"); 40 | TORCH_CHECK(x.numel() <= INT_MAX, "x is too large"); 41 | TORCH_CHECK(b.dim() == 1, "b must have rank 1"); 42 | TORCH_CHECK(b.numel() == 0 || (dim >= 0 && dim < x.dim()), "dim is out of bounds"); 43 | TORCH_CHECK(b.numel() == 0 || b.numel() == x.size(dim), "b has wrong number of elements"); 44 | TORCH_CHECK(grad >= 0, "grad must be non-negative"); 45 | 46 | // Validate layout. 47 | TORCH_CHECK(x.is_non_overlapping_and_dense(), "x must be non-overlapping and dense"); 48 | TORCH_CHECK(b.is_contiguous(), "b must be contiguous"); 49 | TORCH_CHECK(xref.numel() == 0 || has_same_layout(xref, x), "xref must have the same layout as x"); 50 | TORCH_CHECK(yref.numel() == 0 || has_same_layout(yref, x), "yref must have the same layout as x"); 51 | TORCH_CHECK(dy.numel() == 0 || has_same_layout(dy, x), "dy must have the same layout as x"); 52 | 53 | // Create output tensor. 54 | const at::cuda::OptionalCUDAGuard device_guard(device_of(x)); 55 | torch::Tensor y = torch::empty_like(x); 56 | TORCH_CHECK(has_same_layout(y, x), "y must have the same layout as x"); 57 | 58 | // Initialize CUDA kernel parameters. 59 | bias_act_kernel_params p; 60 | p.x = x.data_ptr(); 61 | p.b = (b.numel()) ? b.data_ptr() : NULL; 62 | p.xref = (xref.numel()) ? xref.data_ptr() : NULL; 63 | p.yref = (yref.numel()) ? yref.data_ptr() : NULL; 64 | p.dy = (dy.numel()) ? dy.data_ptr() : NULL; 65 | p.y = y.data_ptr(); 66 | p.grad = grad; 67 | p.act = act; 68 | p.alpha = alpha; 69 | p.gain = gain; 70 | p.clamp = clamp; 71 | p.sizeX = (int)x.numel(); 72 | p.sizeB = (int)b.numel(); 73 | p.stepB = (b.numel()) ? (int)x.stride(dim) : 1; 74 | 75 | // Choose CUDA kernel. 76 | void* kernel; 77 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] 78 | { 79 | kernel = choose_bias_act_kernel(p); 80 | }); 81 | TORCH_CHECK(kernel, "no CUDA kernel found for the specified activation func"); 82 | 83 | // Launch CUDA kernel. 84 | p.loopX = 4; 85 | int blockSize = 4 * 32; 86 | int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1; 87 | void* args[] = {&p}; 88 | AT_CUDA_CHECK(cudaLaunchKernel(kernel, gridSize, blockSize, args, 0, at::cuda::getCurrentCUDAStream())); 89 | return y; 90 | } 91 | 92 | //------------------------------------------------------------------------ 93 | 94 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) 95 | { 96 | m.def("bias_act", &bias_act); 97 | } 98 | 99 | //------------------------------------------------------------------------ 100 | -------------------------------------------------------------------------------- /diffusion-stylegan2/torch_utils/ops/bias_act.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | #include 11 | #include 12 | #include "bias_act.h" 13 | 14 | //------------------------------------------------------------------------ 15 | 16 | static bool has_same_layout(torch::Tensor x, torch::Tensor y) 17 | { 18 | if (x.dim() != y.dim()) 19 | return false; 20 | for (int64_t i = 0; i < x.dim(); i++) 21 | { 22 | if (x.size(i) != y.size(i)) 23 | return false; 24 | if (x.size(i) >= 2 && x.stride(i) != y.stride(i)) 25 | return false; 26 | } 27 | return true; 28 | } 29 | 30 | //------------------------------------------------------------------------ 31 | 32 | static torch::Tensor bias_act(torch::Tensor x, torch::Tensor b, torch::Tensor xref, torch::Tensor yref, torch::Tensor dy, int grad, int dim, int act, float alpha, float gain, float clamp) 33 | { 34 | // Validate arguments. 35 | TORCH_CHECK(x.is_cuda(), "x must reside on CUDA device"); 36 | TORCH_CHECK(b.numel() == 0 || (b.dtype() == x.dtype() && b.device() == x.device()), "b must have the same dtype and device as x"); 37 | TORCH_CHECK(xref.numel() == 0 || (xref.sizes() == x.sizes() && xref.dtype() == x.dtype() && xref.device() == x.device()), "xref must have the same shape, dtype, and device as x"); 38 | TORCH_CHECK(yref.numel() == 0 || (yref.sizes() == x.sizes() && yref.dtype() == x.dtype() && yref.device() == x.device()), "yref must have the same shape, dtype, and device as x"); 39 | TORCH_CHECK(dy.numel() == 0 || (dy.sizes() == x.sizes() && dy.dtype() == x.dtype() && dy.device() == x.device()), "dy must have the same dtype and device as x"); 40 | TORCH_CHECK(x.numel() <= INT_MAX, "x is too large"); 41 | TORCH_CHECK(b.dim() == 1, "b must have rank 1"); 42 | TORCH_CHECK(b.numel() == 0 || (dim >= 0 && dim < x.dim()), "dim is out of bounds"); 43 | TORCH_CHECK(b.numel() == 0 || b.numel() == x.size(dim), "b has wrong number of elements"); 44 | TORCH_CHECK(grad >= 0, "grad must be non-negative"); 45 | 46 | // Validate layout. 47 | TORCH_CHECK(x.is_non_overlapping_and_dense(), "x must be non-overlapping and dense"); 48 | TORCH_CHECK(b.is_contiguous(), "b must be contiguous"); 49 | TORCH_CHECK(xref.numel() == 0 || has_same_layout(xref, x), "xref must have the same layout as x"); 50 | TORCH_CHECK(yref.numel() == 0 || has_same_layout(yref, x), "yref must have the same layout as x"); 51 | TORCH_CHECK(dy.numel() == 0 || has_same_layout(dy, x), "dy must have the same layout as x"); 52 | 53 | // Create output tensor. 54 | const at::cuda::OptionalCUDAGuard device_guard(device_of(x)); 55 | torch::Tensor y = torch::empty_like(x); 56 | TORCH_CHECK(has_same_layout(y, x), "y must have the same layout as x"); 57 | 58 | // Initialize CUDA kernel parameters. 59 | bias_act_kernel_params p; 60 | p.x = x.data_ptr(); 61 | p.b = (b.numel()) ? b.data_ptr() : NULL; 62 | p.xref = (xref.numel()) ? xref.data_ptr() : NULL; 63 | p.yref = (yref.numel()) ? yref.data_ptr() : NULL; 64 | p.dy = (dy.numel()) ? dy.data_ptr() : NULL; 65 | p.y = y.data_ptr(); 66 | p.grad = grad; 67 | p.act = act; 68 | p.alpha = alpha; 69 | p.gain = gain; 70 | p.clamp = clamp; 71 | p.sizeX = (int)x.numel(); 72 | p.sizeB = (int)b.numel(); 73 | p.stepB = (b.numel()) ? (int)x.stride(dim) : 1; 74 | 75 | // Choose CUDA kernel. 76 | void* kernel; 77 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] 78 | { 79 | kernel = choose_bias_act_kernel(p); 80 | }); 81 | TORCH_CHECK(kernel, "no CUDA kernel found for the specified activation func"); 82 | 83 | // Launch CUDA kernel. 84 | p.loopX = 4; 85 | int blockSize = 4 * 32; 86 | int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1; 87 | void* args[] = {&p}; 88 | AT_CUDA_CHECK(cudaLaunchKernel(kernel, gridSize, blockSize, args, 0, at::cuda::getCurrentCUDAStream())); 89 | return y; 90 | } 91 | 92 | //------------------------------------------------------------------------ 93 | 94 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) 95 | { 96 | m.def("bias_act", &bias_act); 97 | } 98 | 99 | //------------------------------------------------------------------------ 100 | -------------------------------------------------------------------------------- /diffusion-projected-gan/training/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | # 9 | # modified by Axel Sauer for "Projected GANs Converge Faster" 10 | # 11 | import numpy as np 12 | import torch 13 | import torch.nn.functional as F 14 | from torch_utils import training_stats 15 | from torch_utils.ops import upfirdn2d 16 | 17 | 18 | class Loss: 19 | def accumulate_gradients(self, phase, real_img, real_c, gen_z, gen_c, gain, cur_nimg): # to be overridden by subclass 20 | raise NotImplementedError() 21 | 22 | 23 | class ProjectedGANLoss(Loss): 24 | def __init__(self, device, G, D, G_ema, blur_init_sigma=0, blur_fade_kimg=0, **kwargs): 25 | super().__init__() 26 | self.device = device 27 | self.G = G 28 | self.G_ema = G_ema 29 | self.D = D 30 | self.blur_init_sigma = blur_init_sigma 31 | self.blur_fade_kimg = blur_fade_kimg 32 | 33 | def run_G(self, z, c, update_emas=False): 34 | ws = self.G.mapping(z, c, update_emas=update_emas) 35 | img = self.G.synthesis(ws, c, update_emas=False) 36 | return img 37 | 38 | def run_D(self, img, c, blur_sigma=0, update_emas=False): 39 | blur_size = np.floor(blur_sigma * 3) 40 | if blur_size > 0: 41 | with torch.autograd.profiler.record_function('blur'): 42 | f = torch.arange(-blur_size, blur_size + 1, device=img.device).div(blur_sigma).square().neg().exp2() 43 | img = upfirdn2d.filter2d(img, f / f.sum()) 44 | 45 | logits = self.D(img, c) 46 | return logits 47 | 48 | def accumulate_gradients(self, phase, real_img, real_c, gen_z, gen_c, gain, cur_nimg): 49 | assert phase in ['Gmain', 'Greg', 'Gboth', 'Dmain', 'Dreg', 'Dboth'] 50 | do_Gmain = (phase in ['Gmain', 'Gboth']) 51 | do_Dmain = (phase in ['Dmain', 'Dboth']) 52 | if phase in ['Dreg', 'Greg']: return # no regularization needed for PG 53 | 54 | # blurring schedule 55 | blur_sigma = max(1 - cur_nimg / (self.blur_fade_kimg * 1e3), 0) * self.blur_init_sigma if self.blur_fade_kimg > 1 else 0 56 | 57 | if do_Gmain: 58 | 59 | # Gmain: Maximize logits for generated images. 60 | with torch.autograd.profiler.record_function('Gmain_forward'): 61 | gen_img = self.run_G(gen_z, gen_c) 62 | gen_logits = self.run_D(gen_img, gen_c, blur_sigma=blur_sigma) 63 | loss_Gmain = (-gen_logits).mean() 64 | 65 | # Logging 66 | training_stats.report('Loss/scores/fake', gen_logits) 67 | training_stats.report('Loss/signs/fake', gen_logits.sign()) 68 | training_stats.report('Loss/G/loss', loss_Gmain) 69 | 70 | with torch.autograd.profiler.record_function('Gmain_backward'): 71 | loss_Gmain.backward() 72 | 73 | if do_Dmain: 74 | 75 | # Dmain: Minimize logits for generated images. 76 | with torch.autograd.profiler.record_function('Dgen_forward'): 77 | gen_img = self.run_G(gen_z, gen_c, update_emas=True) 78 | gen_logits = self.run_D(gen_img, gen_c, blur_sigma=blur_sigma) 79 | loss_Dgen = (F.relu(torch.ones_like(gen_logits) + gen_logits)).mean() 80 | 81 | # Logging 82 | training_stats.report('Loss/scores/fake', gen_logits) 83 | training_stats.report('Loss/signs/fake', gen_logits.sign()) 84 | 85 | with torch.autograd.profiler.record_function('Dgen_backward'): 86 | loss_Dgen.backward() 87 | 88 | # Dmain: Maximize logits for real images. 89 | with torch.autograd.profiler.record_function('Dreal_forward'): 90 | real_img_tmp = real_img.detach().requires_grad_(False) 91 | real_logits = self.run_D(real_img_tmp, real_c, blur_sigma=blur_sigma) 92 | loss_Dreal = (F.relu(torch.ones_like(real_logits) - real_logits)).mean() 93 | 94 | # Logging 95 | training_stats.report('Loss/scores/real', real_logits) 96 | training_stats.report('Loss/signs/real', real_logits.sign()) 97 | training_stats.report('Loss/D/loss', loss_Dgen + loss_Dreal) 98 | 99 | with torch.autograd.profiler.record_function('Dreal_backward'): 100 | loss_Dreal.backward() 101 | -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/ops/bias_act.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | #include 11 | #include 12 | #include "bias_act.h" 13 | 14 | //------------------------------------------------------------------------ 15 | 16 | static bool has_same_layout(torch::Tensor x, torch::Tensor y) 17 | { 18 | if (x.dim() != y.dim()) 19 | return false; 20 | for (int64_t i = 0; i < x.dim(); i++) 21 | { 22 | if (x.size(i) != y.size(i)) 23 | return false; 24 | if (x.size(i) >= 2 && x.stride(i) != y.stride(i)) 25 | return false; 26 | } 27 | return true; 28 | } 29 | 30 | //------------------------------------------------------------------------ 31 | 32 | static torch::Tensor bias_act(torch::Tensor x, torch::Tensor b, torch::Tensor xref, torch::Tensor yref, torch::Tensor dy, int grad, int dim, int act, float alpha, float gain, float clamp) 33 | { 34 | // Validate arguments. 35 | TORCH_CHECK(x.is_cuda(), "x must reside on CUDA device"); 36 | TORCH_CHECK(b.numel() == 0 || (b.dtype() == x.dtype() && b.device() == x.device()), "b must have the same dtype and device as x"); 37 | TORCH_CHECK(xref.numel() == 0 || (xref.sizes() == x.sizes() && xref.dtype() == x.dtype() && xref.device() == x.device()), "xref must have the same shape, dtype, and device as x"); 38 | TORCH_CHECK(yref.numel() == 0 || (yref.sizes() == x.sizes() && yref.dtype() == x.dtype() && yref.device() == x.device()), "yref must have the same shape, dtype, and device as x"); 39 | TORCH_CHECK(dy.numel() == 0 || (dy.sizes() == x.sizes() && dy.dtype() == x.dtype() && dy.device() == x.device()), "dy must have the same dtype and device as x"); 40 | TORCH_CHECK(x.numel() <= INT_MAX, "x is too large"); 41 | TORCH_CHECK(b.dim() == 1, "b must have rank 1"); 42 | TORCH_CHECK(b.numel() == 0 || (dim >= 0 && dim < x.dim()), "dim is out of bounds"); 43 | TORCH_CHECK(b.numel() == 0 || b.numel() == x.size(dim), "b has wrong number of elements"); 44 | TORCH_CHECK(grad >= 0, "grad must be non-negative"); 45 | 46 | // Validate layout. 47 | TORCH_CHECK(x.is_non_overlapping_and_dense(), "x must be non-overlapping and dense"); 48 | TORCH_CHECK(b.is_contiguous(), "b must be contiguous"); 49 | TORCH_CHECK(xref.numel() == 0 || has_same_layout(xref, x), "xref must have the same layout as x"); 50 | TORCH_CHECK(yref.numel() == 0 || has_same_layout(yref, x), "yref must have the same layout as x"); 51 | TORCH_CHECK(dy.numel() == 0 || has_same_layout(dy, x), "dy must have the same layout as x"); 52 | 53 | // Create output tensor. 54 | const at::cuda::OptionalCUDAGuard device_guard(device_of(x)); 55 | torch::Tensor y = torch::empty_like(x); 56 | TORCH_CHECK(has_same_layout(y, x), "y must have the same layout as x"); 57 | 58 | // Initialize CUDA kernel parameters. 59 | bias_act_kernel_params p; 60 | p.x = x.data_ptr(); 61 | p.b = (b.numel()) ? b.data_ptr() : NULL; 62 | p.xref = (xref.numel()) ? xref.data_ptr() : NULL; 63 | p.yref = (yref.numel()) ? yref.data_ptr() : NULL; 64 | p.dy = (dy.numel()) ? dy.data_ptr() : NULL; 65 | p.y = y.data_ptr(); 66 | p.grad = grad; 67 | p.act = act; 68 | p.alpha = alpha; 69 | p.gain = gain; 70 | p.clamp = clamp; 71 | p.sizeX = (int)x.numel(); 72 | p.sizeB = (int)b.numel(); 73 | p.stepB = (b.numel()) ? (int)x.stride(dim) : 1; 74 | 75 | // Choose CUDA kernel. 76 | void* kernel; 77 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] 78 | { 79 | kernel = choose_bias_act_kernel(p); 80 | }); 81 | TORCH_CHECK(kernel, "no CUDA kernel found for the specified activation func"); 82 | 83 | // Launch CUDA kernel. 84 | p.loopX = 4; 85 | int blockSize = 4 * 32; 86 | int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1; 87 | void* args[] = {&p}; 88 | AT_CUDA_CHECK(cudaLaunchKernel(kernel, gridSize, blockSize, args, 0, at::cuda::getCurrentCUDAStream())); 89 | return y; 90 | } 91 | 92 | //------------------------------------------------------------------------ 93 | 94 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) 95 | { 96 | m.def("bias_act", &bias_act); 97 | } 98 | 99 | //------------------------------------------------------------------------ 100 | -------------------------------------------------------------------------------- /diffusion-projected-gan/environment.yml: -------------------------------------------------------------------------------- 1 | name: pg 2 | channels: 3 | - anaconda 4 | - nvidia 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1=conda_forge 9 | - _openmp_mutex=4.5=1_gnu 10 | - absl-py=1.0.0=pyhd8ed1ab_0 11 | - aiohttp=3.7.0=py39h07f9747_0 12 | - async-timeout=3.0.1=py_1000 13 | - attrs=21.2.0=pyhd8ed1ab_0 14 | - blas=1.0=mkl 15 | - blinker=1.4=py_1 16 | - brotli=1.0.9=he6710b0_2 17 | - brotlipy=0.7.0=py39h27cfd23_1003 18 | - c-ares=1.18.1=h7f98852_0 19 | - ca-certificates=2021.10.8=ha878542_0 20 | - cachetools=4.2.4=pyhd8ed1ab_0 21 | - certifi=2021.10.8=py39hf3d152e_1 22 | - cffi=1.14.6=py39h400218f_0 23 | - chardet=3.0.4=py39h079e4ff_1008 24 | - charset-normalizer=2.0.4=pyhd3eb1b0_0 25 | - click=8.0.3=pyhd3eb1b0_0 26 | - cryptography=35.0.0=py39hd23ed53_0 27 | - cudatoolkit=11.1.74=h6bb024c_0 28 | - cudnn=8.2.1.32=h86fa8c9_0 29 | - cycler=0.10.0=py39h06a4308_0 30 | - dataclasses=0.8=pyhc8e2a94_3 31 | - dbus=1.13.18=hb2f20db_0 32 | - dill=0.3.2=py_0 33 | - expat=2.4.1=h2531618_2 34 | - fontconfig=2.13.1=h6c09931_0 35 | - fonttools=4.25.0=pyhd3eb1b0_0 36 | - freetype=2.11.0=h70c0345_0 37 | - future=0.18.2=py39hf3d152e_4 38 | - glib=2.69.1=h5202010_0 39 | - google-auth=2.3.3=pyh6c4a22f_0 40 | - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0 41 | - grpcio=1.38.1=py39hff7568b_0 42 | - gst-plugins-base=1.14.0=h8213a91_2 43 | - gstreamer=1.14.0=h28cd5cc_2 44 | - icu=58.2=he6710b0_3 45 | - idna=3.3=pyhd3eb1b0_0 46 | - imageio=2.9.0=pyhd3eb1b0_0 47 | - importlib-metadata=4.8.2=py39hf3d152e_0 48 | - intel-openmp=2021.4.0=h06a4308_3561 49 | - jpeg=9d=h7f8727e_0 50 | - kiwisolver=1.3.1=py39h2531618_0 51 | - lcms2=2.12=h3be6417_0 52 | - ld_impl_linux-64=2.35.1=h7274673_9 53 | - libblas=3.9.0=12_linux64_mkl 54 | - libffi=3.3=he6710b0_2 55 | - libgcc-ng=11.2.0=h1d223b6_11 56 | - libgfortran-ng=7.5.0=ha8ba4b0_17 57 | - libgfortran4=7.5.0=ha8ba4b0_17 58 | - libgomp=11.2.0=h1d223b6_11 59 | - liblapack=3.9.0=12_linux64_mkl 60 | - libpng=1.6.37=hbc83047_0 61 | - libprotobuf=3.18.0=h780b84a_1 62 | - libstdcxx-ng=11.2.0=he4da1e4_11 63 | - libtiff=4.2.0=h85742a9_0 64 | - libuuid=1.0.3=h7f8727e_2 65 | - libuv=1.40.0=h7b6447c_0 66 | - libwebp-base=1.2.0=h27cfd23_0 67 | - libxcb=1.14=h7b6447c_0 68 | - libxml2=2.9.12=h03d6c58_0 69 | - lz4-c=1.9.3=h295c915_1 70 | - magma=2.5.4=ha9b7cf9_2 71 | - markdown=3.3.6=pyhd8ed1ab_0 72 | - matplotlib=3.4.2=py39h06a4308_0 73 | - matplotlib-base=3.4.2=py39hab158f2_0 74 | - mkl=2021.4.0=h06a4308_640 75 | - mkl-service=2.4.0=py39h7f8727e_0 76 | - mkl_fft=1.3.1=py39hd3c417c_0 77 | - mkl_random=1.2.2=py39h51133e4_0 78 | - multidict=5.2.0=py39h3811e60_1 79 | - munkres=1.1.4=py_0 80 | - nccl=2.11.4.1=h97a9cb7_0 81 | - ncurses=6.3=h7f8727e_2 82 | - ninja=1.10.2=py39hd09550d_3 83 | - numpy=1.21.2=py39h20f2e39_0 84 | - numpy-base=1.21.2=py39h79a1101_0 85 | - oauthlib=3.1.1=pyhd8ed1ab_0 86 | - olefile=0.46=pyhd3eb1b0_0 87 | - openjpeg=2.4.0=h3ad879b_0 88 | - openssl=1.1.1l=h7f98852_0 89 | - pcre=8.45=h295c915_0 90 | - pillow=8.3.1=py39h2c7a002_0 91 | - pip=21.2.4=py39h06a4308_0 92 | - protobuf=3.18.0=py39he80948d_0 93 | - psutil=5.8.0=py39h3811e60_1 94 | - pyasn1=0.4.8=py_0 95 | - pyasn1-modules=0.2.7=py_0 96 | - pycparser=2.21=pyhd3eb1b0_0 97 | - pyjwt=2.3.0=pyhd8ed1ab_0 98 | - pyopenssl=21.0.0=pyhd3eb1b0_1 99 | - pyparsing=3.0.4=pyhd3eb1b0_0 100 | - pyqt=5.9.2=py39h2531618_6 101 | - pysocks=1.7.1=py39h06a4308_0 102 | - python=3.9.7=h12debd9_1 103 | - python-dateutil=2.8.2=pyhd3eb1b0_0 104 | - python_abi=3.9=2_cp39 105 | - pytorch=1.9.1=cuda111py39hb4a4491_3 106 | - pytorch-gpu=1.9.1=cuda111py39h788eb59_3 107 | - pyu2f=0.1.5=pyhd8ed1ab_0 108 | - qt=5.9.7=h5867ecd_1 109 | - readline=8.1=h27cfd23_0 110 | - requests=2.26.0=pyhd3eb1b0_0 111 | - requests-oauthlib=1.3.0=pyh9f0ad1d_0 112 | - rsa=4.8=pyhd8ed1ab_0 113 | - scipy=1.7.1=py39h292c36d_2 114 | - setuptools=58.0.4=py39h06a4308_0 115 | - sip=4.19.13=py39h2531618_0 116 | - six=1.16.0=pyhd3eb1b0_0 117 | - sleef=3.5.1=h9b69904_2 118 | - sqlite=3.36.0=hc218d9a_0 119 | - tensorboard=2.7.0=pyhd8ed1ab_0 120 | - tensorboard-data-server=0.6.0=py39h95dcef6_1 121 | - tensorboard-plugin-wit=1.8.0=pyh44b312d_0 122 | - timm=0.4.12=pyhd8ed1ab_0 123 | - tk=8.6.11=h1ccaba5_0 124 | - torchvision=0.10.1=py39cuda111hcd06603_0_cuda 125 | - tornado=6.1=py39h27cfd23_0 126 | - tqdm=4.62.2=pyhd3eb1b0_1 127 | - typing_extensions=3.10.0.2=pyh06a4308_0 128 | - tzdata=2021e=hda174b7_0 129 | - urllib3=1.26.7=pyhd3eb1b0_0 130 | - werkzeug=2.0.1=pyhd8ed1ab_0 131 | - wheel=0.37.0=pyhd3eb1b0_1 132 | - xz=5.2.5=h7b6447c_0 133 | - yarl=1.7.2=py39h3811e60_1 134 | - zipp=3.6.0=pyhd8ed1ab_0 135 | - zlib=1.2.11=h7b6447c_3 136 | - zstd=1.4.9=haebb681_0 137 | - pip: 138 | - glfw==2.2.0 139 | - imageio-ffmpeg==0.4.3 140 | - imgui==1.3.0 141 | - pyopengl==3.1.5 142 | - pyspng==0.1.0 143 | -------------------------------------------------------------------------------- /diffusion-insgen/torch_utils/ops/upfirdn2d.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | #include 11 | #include 12 | #include "upfirdn2d.h" 13 | 14 | //------------------------------------------------------------------------ 15 | 16 | static torch::Tensor upfirdn2d(torch::Tensor x, torch::Tensor f, int upx, int upy, int downx, int downy, int padx0, int padx1, int pady0, int pady1, bool flip, float gain) 17 | { 18 | // Validate arguments. 19 | TORCH_CHECK(x.is_cuda(), "x must reside on CUDA device"); 20 | TORCH_CHECK(f.device() == x.device(), "f must reside on the same device as x"); 21 | TORCH_CHECK(f.dtype() == torch::kFloat, "f must be float32"); 22 | TORCH_CHECK(x.numel() <= INT_MAX, "x is too large"); 23 | TORCH_CHECK(f.numel() <= INT_MAX, "f is too large"); 24 | TORCH_CHECK(x.dim() == 4, "x must be rank 4"); 25 | TORCH_CHECK(f.dim() == 2, "f must be rank 2"); 26 | TORCH_CHECK(f.size(0) >= 1 && f.size(1) >= 1, "f must be at least 1x1"); 27 | TORCH_CHECK(upx >= 1 && upy >= 1, "upsampling factor must be at least 1"); 28 | TORCH_CHECK(downx >= 1 && downy >= 1, "downsampling factor must be at least 1"); 29 | 30 | // Create output tensor. 31 | const at::cuda::OptionalCUDAGuard device_guard(device_of(x)); 32 | int outW = ((int)x.size(3) * upx + padx0 + padx1 - (int)f.size(1) + downx) / downx; 33 | int outH = ((int)x.size(2) * upy + pady0 + pady1 - (int)f.size(0) + downy) / downy; 34 | TORCH_CHECK(outW >= 1 && outH >= 1, "output must be at least 1x1"); 35 | torch::Tensor y = torch::empty({x.size(0), x.size(1), outH, outW}, x.options(), x.suggest_memory_format()); 36 | TORCH_CHECK(y.numel() <= INT_MAX, "output is too large"); 37 | 38 | // Initialize CUDA kernel parameters. 39 | upfirdn2d_kernel_params p; 40 | p.x = x.data_ptr(); 41 | p.f = f.data_ptr(); 42 | p.y = y.data_ptr(); 43 | p.up = make_int2(upx, upy); 44 | p.down = make_int2(downx, downy); 45 | p.pad0 = make_int2(padx0, pady0); 46 | p.flip = (flip) ? 1 : 0; 47 | p.gain = gain; 48 | p.inSize = make_int4((int)x.size(3), (int)x.size(2), (int)x.size(1), (int)x.size(0)); 49 | p.inStride = make_int4((int)x.stride(3), (int)x.stride(2), (int)x.stride(1), (int)x.stride(0)); 50 | p.filterSize = make_int2((int)f.size(1), (int)f.size(0)); 51 | p.filterStride = make_int2((int)f.stride(1), (int)f.stride(0)); 52 | p.outSize = make_int4((int)y.size(3), (int)y.size(2), (int)y.size(1), (int)y.size(0)); 53 | p.outStride = make_int4((int)y.stride(3), (int)y.stride(2), (int)y.stride(1), (int)y.stride(0)); 54 | p.sizeMajor = (p.inStride.z == 1) ? p.inSize.w : p.inSize.w * p.inSize.z; 55 | p.sizeMinor = (p.inStride.z == 1) ? p.inSize.z : 1; 56 | 57 | // Choose CUDA kernel. 58 | upfirdn2d_kernel_spec spec; 59 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] 60 | { 61 | spec = choose_upfirdn2d_kernel(p); 62 | }); 63 | 64 | // Set looping options. 65 | p.loopMajor = (p.sizeMajor - 1) / 16384 + 1; 66 | p.loopMinor = spec.loopMinor; 67 | p.loopX = spec.loopX; 68 | p.launchMinor = (p.sizeMinor - 1) / p.loopMinor + 1; 69 | p.launchMajor = (p.sizeMajor - 1) / p.loopMajor + 1; 70 | 71 | // Compute grid size. 72 | dim3 blockSize, gridSize; 73 | if (spec.tileOutW < 0) // large 74 | { 75 | blockSize = dim3(4, 32, 1); 76 | gridSize = dim3( 77 | ((p.outSize.y - 1) / blockSize.x + 1) * p.launchMinor, 78 | (p.outSize.x - 1) / (blockSize.y * p.loopX) + 1, 79 | p.launchMajor); 80 | } 81 | else // small 82 | { 83 | blockSize = dim3(256, 1, 1); 84 | gridSize = dim3( 85 | ((p.outSize.y - 1) / spec.tileOutH + 1) * p.launchMinor, 86 | (p.outSize.x - 1) / (spec.tileOutW * p.loopX) + 1, 87 | p.launchMajor); 88 | } 89 | 90 | // Launch CUDA kernel. 91 | void* args[] = {&p}; 92 | AT_CUDA_CHECK(cudaLaunchKernel(spec.kernel, gridSize, blockSize, args, 0, at::cuda::getCurrentCUDAStream())); 93 | return y; 94 | } 95 | 96 | //------------------------------------------------------------------------ 97 | 98 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) 99 | { 100 | m.def("upfirdn2d", &upfirdn2d); 101 | } 102 | 103 | //------------------------------------------------------------------------ 104 | -------------------------------------------------------------------------------- /diffusion-stylegan2/torch_utils/ops/upfirdn2d.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | #include 11 | #include 12 | #include "upfirdn2d.h" 13 | 14 | //------------------------------------------------------------------------ 15 | 16 | static torch::Tensor upfirdn2d(torch::Tensor x, torch::Tensor f, int upx, int upy, int downx, int downy, int padx0, int padx1, int pady0, int pady1, bool flip, float gain) 17 | { 18 | // Validate arguments. 19 | TORCH_CHECK(x.is_cuda(), "x must reside on CUDA device"); 20 | TORCH_CHECK(f.device() == x.device(), "f must reside on the same device as x"); 21 | TORCH_CHECK(f.dtype() == torch::kFloat, "f must be float32"); 22 | TORCH_CHECK(x.numel() <= INT_MAX, "x is too large"); 23 | TORCH_CHECK(f.numel() <= INT_MAX, "f is too large"); 24 | TORCH_CHECK(x.dim() == 4, "x must be rank 4"); 25 | TORCH_CHECK(f.dim() == 2, "f must be rank 2"); 26 | TORCH_CHECK(f.size(0) >= 1 && f.size(1) >= 1, "f must be at least 1x1"); 27 | TORCH_CHECK(upx >= 1 && upy >= 1, "upsampling factor must be at least 1"); 28 | TORCH_CHECK(downx >= 1 && downy >= 1, "downsampling factor must be at least 1"); 29 | 30 | // Create output tensor. 31 | const at::cuda::OptionalCUDAGuard device_guard(device_of(x)); 32 | int outW = ((int)x.size(3) * upx + padx0 + padx1 - (int)f.size(1) + downx) / downx; 33 | int outH = ((int)x.size(2) * upy + pady0 + pady1 - (int)f.size(0) + downy) / downy; 34 | TORCH_CHECK(outW >= 1 && outH >= 1, "output must be at least 1x1"); 35 | torch::Tensor y = torch::empty({x.size(0), x.size(1), outH, outW}, x.options(), x.suggest_memory_format()); 36 | TORCH_CHECK(y.numel() <= INT_MAX, "output is too large"); 37 | 38 | // Initialize CUDA kernel parameters. 39 | upfirdn2d_kernel_params p; 40 | p.x = x.data_ptr(); 41 | p.f = f.data_ptr(); 42 | p.y = y.data_ptr(); 43 | p.up = make_int2(upx, upy); 44 | p.down = make_int2(downx, downy); 45 | p.pad0 = make_int2(padx0, pady0); 46 | p.flip = (flip) ? 1 : 0; 47 | p.gain = gain; 48 | p.inSize = make_int4((int)x.size(3), (int)x.size(2), (int)x.size(1), (int)x.size(0)); 49 | p.inStride = make_int4((int)x.stride(3), (int)x.stride(2), (int)x.stride(1), (int)x.stride(0)); 50 | p.filterSize = make_int2((int)f.size(1), (int)f.size(0)); 51 | p.filterStride = make_int2((int)f.stride(1), (int)f.stride(0)); 52 | p.outSize = make_int4((int)y.size(3), (int)y.size(2), (int)y.size(1), (int)y.size(0)); 53 | p.outStride = make_int4((int)y.stride(3), (int)y.stride(2), (int)y.stride(1), (int)y.stride(0)); 54 | p.sizeMajor = (p.inStride.z == 1) ? p.inSize.w : p.inSize.w * p.inSize.z; 55 | p.sizeMinor = (p.inStride.z == 1) ? p.inSize.z : 1; 56 | 57 | // Choose CUDA kernel. 58 | upfirdn2d_kernel_spec spec; 59 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] 60 | { 61 | spec = choose_upfirdn2d_kernel(p); 62 | }); 63 | 64 | // Set looping options. 65 | p.loopMajor = (p.sizeMajor - 1) / 16384 + 1; 66 | p.loopMinor = spec.loopMinor; 67 | p.loopX = spec.loopX; 68 | p.launchMinor = (p.sizeMinor - 1) / p.loopMinor + 1; 69 | p.launchMajor = (p.sizeMajor - 1) / p.loopMajor + 1; 70 | 71 | // Compute grid size. 72 | dim3 blockSize, gridSize; 73 | if (spec.tileOutW < 0) // large 74 | { 75 | blockSize = dim3(4, 32, 1); 76 | gridSize = dim3( 77 | ((p.outSize.y - 1) / blockSize.x + 1) * p.launchMinor, 78 | (p.outSize.x - 1) / (blockSize.y * p.loopX) + 1, 79 | p.launchMajor); 80 | } 81 | else // small 82 | { 83 | blockSize = dim3(256, 1, 1); 84 | gridSize = dim3( 85 | ((p.outSize.y - 1) / spec.tileOutH + 1) * p.launchMinor, 86 | (p.outSize.x - 1) / (spec.tileOutW * p.loopX) + 1, 87 | p.launchMajor); 88 | } 89 | 90 | // Launch CUDA kernel. 91 | void* args[] = {&p}; 92 | AT_CUDA_CHECK(cudaLaunchKernel(spec.kernel, gridSize, blockSize, args, 0, at::cuda::getCurrentCUDAStream())); 93 | return y; 94 | } 95 | 96 | //------------------------------------------------------------------------ 97 | 98 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) 99 | { 100 | m.def("upfirdn2d", &upfirdn2d); 101 | } 102 | 103 | //------------------------------------------------------------------------ 104 | -------------------------------------------------------------------------------- /diffusion-insgen/style_mixing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Generate style mixing image matrix using pretrained network pickle.""" 10 | 11 | import os 12 | import re 13 | from typing import List 14 | 15 | import click 16 | import dnnlib 17 | import numpy as np 18 | import PIL.Image 19 | import torch 20 | 21 | import legacy 22 | 23 | #---------------------------------------------------------------------------- 24 | 25 | def num_range(s: str) -> List[int]: 26 | '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' 27 | 28 | range_re = re.compile(r'^(\d+)-(\d+)$') 29 | m = range_re.match(s) 30 | if m: 31 | return list(range(int(m.group(1)), int(m.group(2))+1)) 32 | vals = s.split(',') 33 | return [int(x) for x in vals] 34 | 35 | #---------------------------------------------------------------------------- 36 | 37 | @click.command() 38 | @click.option('--network', 'network_pkl', help='Network pickle filename', required=True) 39 | @click.option('--rows', 'row_seeds', type=num_range, help='Random seeds to use for image rows', required=True) 40 | @click.option('--cols', 'col_seeds', type=num_range, help='Random seeds to use for image columns', required=True) 41 | @click.option('--styles', 'col_styles', type=num_range, help='Style layer range', default='0-6', show_default=True) 42 | @click.option('--trunc', 'truncation_psi', type=float, help='Truncation psi', default=1, show_default=True) 43 | @click.option('--noise-mode', help='Noise mode', type=click.Choice(['const', 'random', 'none']), default='const', show_default=True) 44 | @click.option('--outdir', type=str, required=True) 45 | def generate_style_mix( 46 | network_pkl: str, 47 | row_seeds: List[int], 48 | col_seeds: List[int], 49 | col_styles: List[int], 50 | truncation_psi: float, 51 | noise_mode: str, 52 | outdir: str 53 | ): 54 | """Generate images using pretrained network pickle. 55 | 56 | Examples: 57 | 58 | \b 59 | python style_mixing.py --outdir=out --rows=85,100,75,458,1500 --cols=55,821,1789,293 \\ 60 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl 61 | """ 62 | print('Loading networks from "%s"...' % network_pkl) 63 | device = torch.device('cuda') 64 | with dnnlib.util.open_url(network_pkl) as f: 65 | G = legacy.load_network_pkl(f)['G_ema'].to(device) # type: ignore 66 | 67 | os.makedirs(outdir, exist_ok=True) 68 | 69 | print('Generating W vectors...') 70 | all_seeds = list(set(row_seeds + col_seeds)) 71 | all_z = np.stack([np.random.RandomState(seed).randn(G.z_dim) for seed in all_seeds]) 72 | all_w = G.mapping(torch.from_numpy(all_z).to(device), None) 73 | w_avg = G.mapping.w_avg 74 | all_w = w_avg + (all_w - w_avg) * truncation_psi 75 | w_dict = {seed: w for seed, w in zip(all_seeds, list(all_w))} 76 | 77 | print('Generating images...') 78 | all_images = G.synthesis(all_w, noise_mode=noise_mode) 79 | all_images = (all_images.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8).cpu().numpy() 80 | image_dict = {(seed, seed): image for seed, image in zip(all_seeds, list(all_images))} 81 | 82 | print('Generating style-mixed images...') 83 | for row_seed in row_seeds: 84 | for col_seed in col_seeds: 85 | w = w_dict[row_seed].clone() 86 | w[col_styles] = w_dict[col_seed][col_styles] 87 | image = G.synthesis(w[np.newaxis], noise_mode=noise_mode) 88 | image = (image.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8) 89 | image_dict[(row_seed, col_seed)] = image[0].cpu().numpy() 90 | 91 | print('Saving images...') 92 | os.makedirs(outdir, exist_ok=True) 93 | for (row_seed, col_seed), image in image_dict.items(): 94 | PIL.Image.fromarray(image, 'RGB').save(f'{outdir}/{row_seed}-{col_seed}.png') 95 | 96 | print('Saving image grid...') 97 | W = G.img_resolution 98 | H = G.img_resolution 99 | canvas = PIL.Image.new('RGB', (W * (len(col_seeds) + 1), H * (len(row_seeds) + 1)), 'black') 100 | for row_idx, row_seed in enumerate([0] + row_seeds): 101 | for col_idx, col_seed in enumerate([0] + col_seeds): 102 | if row_idx == 0 and col_idx == 0: 103 | continue 104 | key = (row_seed, col_seed) 105 | if row_idx == 0: 106 | key = (col_seed, col_seed) 107 | if col_idx == 0: 108 | key = (row_seed, row_seed) 109 | canvas.paste(PIL.Image.fromarray(image_dict[key], 'RGB'), (W * col_idx, H * row_idx)) 110 | canvas.save(f'{outdir}/grid.png') 111 | 112 | 113 | #---------------------------------------------------------------------------- 114 | 115 | if __name__ == "__main__": 116 | generate_style_mix() # pylint: disable=no-value-for-parameter 117 | 118 | #---------------------------------------------------------------------------- 119 | -------------------------------------------------------------------------------- /diffusion-stylegan2/style_mixing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Generate style mixing image matrix using pretrained network pickle.""" 10 | 11 | import os 12 | import re 13 | from typing import List 14 | 15 | import click 16 | import dnnlib 17 | import numpy as np 18 | import PIL.Image 19 | import torch 20 | 21 | import legacy 22 | 23 | #---------------------------------------------------------------------------- 24 | 25 | def num_range(s: str) -> List[int]: 26 | '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' 27 | 28 | range_re = re.compile(r'^(\d+)-(\d+)$') 29 | m = range_re.match(s) 30 | if m: 31 | return list(range(int(m.group(1)), int(m.group(2))+1)) 32 | vals = s.split(',') 33 | return [int(x) for x in vals] 34 | 35 | #---------------------------------------------------------------------------- 36 | 37 | @click.command() 38 | @click.option('--network', 'network_pkl', help='Network pickle filename', required=True) 39 | @click.option('--rows', 'row_seeds', type=num_range, help='Random seeds to use for image rows', required=True) 40 | @click.option('--cols', 'col_seeds', type=num_range, help='Random seeds to use for image columns', required=True) 41 | @click.option('--styles', 'col_styles', type=num_range, help='Style layer range', default='0-6', show_default=True) 42 | @click.option('--trunc', 'truncation_psi', type=float, help='Truncation psi', default=1, show_default=True) 43 | @click.option('--noise-mode', help='Noise mode', type=click.Choice(['const', 'random', 'none']), default='const', show_default=True) 44 | @click.option('--outdir', type=str, required=True) 45 | def generate_style_mix( 46 | network_pkl: str, 47 | row_seeds: List[int], 48 | col_seeds: List[int], 49 | col_styles: List[int], 50 | truncation_psi: float, 51 | noise_mode: str, 52 | outdir: str 53 | ): 54 | """Generate images using pretrained network pickle. 55 | 56 | Examples: 57 | 58 | \b 59 | python style_mixing.py --outdir=out --rows=85,100,75,458,1500 --cols=55,821,1789,293 \\ 60 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl 61 | """ 62 | print('Loading networks from "%s"...' % network_pkl) 63 | device = torch.device('cuda') 64 | with dnnlib.util.open_url(network_pkl) as f: 65 | G = legacy.load_network_pkl(f)['G_ema'].to(device) # type: ignore 66 | 67 | os.makedirs(outdir, exist_ok=True) 68 | 69 | print('Generating W vectors...') 70 | all_seeds = list(set(row_seeds + col_seeds)) 71 | all_z = np.stack([np.random.RandomState(seed).randn(G.z_dim) for seed in all_seeds]) 72 | all_w = G.mapping(torch.from_numpy(all_z).to(device), None) 73 | w_avg = G.mapping.w_avg 74 | all_w = w_avg + (all_w - w_avg) * truncation_psi 75 | w_dict = {seed: w for seed, w in zip(all_seeds, list(all_w))} 76 | 77 | print('Generating images...') 78 | all_images = G.synthesis(all_w, noise_mode=noise_mode) 79 | all_images = (all_images.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8).cpu().numpy() 80 | image_dict = {(seed, seed): image for seed, image in zip(all_seeds, list(all_images))} 81 | 82 | print('Generating style-mixed images...') 83 | for row_seed in row_seeds: 84 | for col_seed in col_seeds: 85 | w = w_dict[row_seed].clone() 86 | w[col_styles] = w_dict[col_seed][col_styles] 87 | image = G.synthesis(w[np.newaxis], noise_mode=noise_mode) 88 | image = (image.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8) 89 | image_dict[(row_seed, col_seed)] = image[0].cpu().numpy() 90 | 91 | print('Saving images...') 92 | os.makedirs(outdir, exist_ok=True) 93 | for (row_seed, col_seed), image in image_dict.items(): 94 | PIL.Image.fromarray(image, 'RGB').save(f'{outdir}/{row_seed}-{col_seed}.png') 95 | 96 | print('Saving image grid...') 97 | W = G.img_resolution 98 | H = G.img_resolution 99 | canvas = PIL.Image.new('RGB', (W * (len(col_seeds) + 1), H * (len(row_seeds) + 1)), 'black') 100 | for row_idx, row_seed in enumerate([0] + row_seeds): 101 | for col_idx, col_seed in enumerate([0] + col_seeds): 102 | if row_idx == 0 and col_idx == 0: 103 | continue 104 | key = (row_seed, col_seed) 105 | if row_idx == 0: 106 | key = (col_seed, col_seed) 107 | if col_idx == 0: 108 | key = (row_seed, row_seed) 109 | canvas.paste(PIL.Image.fromarray(image_dict[key], 'RGB'), (W * col_idx, H * row_idx)) 110 | canvas.save(f'{outdir}/grid.png') 111 | 112 | 113 | #---------------------------------------------------------------------------- 114 | 115 | if __name__ == "__main__": 116 | generate_style_mix() # pylint: disable=no-value-for-parameter 117 | 118 | #---------------------------------------------------------------------------- 119 | -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/ops/upfirdn2d.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // 3 | // NVIDIA CORPORATION and its licensors retain all intellectual property 4 | // and proprietary rights in and to this software, related documentation 5 | // and any modifications thereto. Any use, reproduction, disclosure or 6 | // distribution of this software and related documentation without an express 7 | // license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | #include 10 | #include 11 | #include 12 | #include "upfirdn2d.h" 13 | 14 | //------------------------------------------------------------------------ 15 | 16 | static torch::Tensor upfirdn2d(torch::Tensor x, torch::Tensor f, int upx, int upy, int downx, int downy, int padx0, int padx1, int pady0, int pady1, bool flip, float gain) 17 | { 18 | // Validate arguments. 19 | TORCH_CHECK(x.is_cuda(), "x must reside on CUDA device"); 20 | TORCH_CHECK(f.device() == x.device(), "f must reside on the same device as x"); 21 | TORCH_CHECK(f.dtype() == torch::kFloat, "f must be float32"); 22 | TORCH_CHECK(x.numel() <= INT_MAX, "x is too large"); 23 | TORCH_CHECK(f.numel() <= INT_MAX, "f is too large"); 24 | TORCH_CHECK(x.numel() > 0, "x has zero size"); 25 | TORCH_CHECK(f.numel() > 0, "f has zero size"); 26 | TORCH_CHECK(x.dim() == 4, "x must be rank 4"); 27 | TORCH_CHECK(f.dim() == 2, "f must be rank 2"); 28 | TORCH_CHECK((x.size(0)-1)*x.stride(0) + (x.size(1)-1)*x.stride(1) + (x.size(2)-1)*x.stride(2) + (x.size(3)-1)*x.stride(3) <= INT_MAX, "x memory footprint is too large"); 29 | TORCH_CHECK(f.size(0) >= 1 && f.size(1) >= 1, "f must be at least 1x1"); 30 | TORCH_CHECK(upx >= 1 && upy >= 1, "upsampling factor must be at least 1"); 31 | TORCH_CHECK(downx >= 1 && downy >= 1, "downsampling factor must be at least 1"); 32 | 33 | // Create output tensor. 34 | const at::cuda::OptionalCUDAGuard device_guard(device_of(x)); 35 | int outW = ((int)x.size(3) * upx + padx0 + padx1 - (int)f.size(1) + downx) / downx; 36 | int outH = ((int)x.size(2) * upy + pady0 + pady1 - (int)f.size(0) + downy) / downy; 37 | TORCH_CHECK(outW >= 1 && outH >= 1, "output must be at least 1x1"); 38 | torch::Tensor y = torch::empty({x.size(0), x.size(1), outH, outW}, x.options(), x.suggest_memory_format()); 39 | TORCH_CHECK(y.numel() <= INT_MAX, "output is too large"); 40 | TORCH_CHECK((y.size(0)-1)*y.stride(0) + (y.size(1)-1)*y.stride(1) + (y.size(2)-1)*y.stride(2) + (y.size(3)-1)*y.stride(3) <= INT_MAX, "output memory footprint is too large"); 41 | 42 | // Initialize CUDA kernel parameters. 43 | upfirdn2d_kernel_params p; 44 | p.x = x.data_ptr(); 45 | p.f = f.data_ptr(); 46 | p.y = y.data_ptr(); 47 | p.up = make_int2(upx, upy); 48 | p.down = make_int2(downx, downy); 49 | p.pad0 = make_int2(padx0, pady0); 50 | p.flip = (flip) ? 1 : 0; 51 | p.gain = gain; 52 | p.inSize = make_int4((int)x.size(3), (int)x.size(2), (int)x.size(1), (int)x.size(0)); 53 | p.inStride = make_int4((int)x.stride(3), (int)x.stride(2), (int)x.stride(1), (int)x.stride(0)); 54 | p.filterSize = make_int2((int)f.size(1), (int)f.size(0)); 55 | p.filterStride = make_int2((int)f.stride(1), (int)f.stride(0)); 56 | p.outSize = make_int4((int)y.size(3), (int)y.size(2), (int)y.size(1), (int)y.size(0)); 57 | p.outStride = make_int4((int)y.stride(3), (int)y.stride(2), (int)y.stride(1), (int)y.stride(0)); 58 | p.sizeMajor = (p.inStride.z == 1) ? p.inSize.w : p.inSize.w * p.inSize.z; 59 | p.sizeMinor = (p.inStride.z == 1) ? p.inSize.z : 1; 60 | 61 | // Choose CUDA kernel. 62 | upfirdn2d_kernel_spec spec; 63 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] 64 | { 65 | spec = choose_upfirdn2d_kernel(p); 66 | }); 67 | 68 | // Set looping options. 69 | p.loopMajor = (p.sizeMajor - 1) / 16384 + 1; 70 | p.loopMinor = spec.loopMinor; 71 | p.loopX = spec.loopX; 72 | p.launchMinor = (p.sizeMinor - 1) / p.loopMinor + 1; 73 | p.launchMajor = (p.sizeMajor - 1) / p.loopMajor + 1; 74 | 75 | // Compute grid size. 76 | dim3 blockSize, gridSize; 77 | if (spec.tileOutW < 0) // large 78 | { 79 | blockSize = dim3(4, 32, 1); 80 | gridSize = dim3( 81 | ((p.outSize.y - 1) / blockSize.x + 1) * p.launchMinor, 82 | (p.outSize.x - 1) / (blockSize.y * p.loopX) + 1, 83 | p.launchMajor); 84 | } 85 | else // small 86 | { 87 | blockSize = dim3(256, 1, 1); 88 | gridSize = dim3( 89 | ((p.outSize.y - 1) / spec.tileOutH + 1) * p.launchMinor, 90 | (p.outSize.x - 1) / (spec.tileOutW * p.loopX) + 1, 91 | p.launchMajor); 92 | } 93 | 94 | // Launch CUDA kernel. 95 | void* args[] = {&p}; 96 | AT_CUDA_CHECK(cudaLaunchKernel(spec.kernel, gridSize, blockSize, args, 0, at::cuda::getCurrentCUDAStream())); 97 | return y; 98 | } 99 | 100 | //------------------------------------------------------------------------ 101 | 102 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) 103 | { 104 | m.def("upfirdn2d", &upfirdn2d); 105 | } 106 | 107 | //------------------------------------------------------------------------ 108 | -------------------------------------------------------------------------------- /diffusion-insgen/torch_utils/custom_ops.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import glob 4 | import torch 5 | import torch.utils.cpp_extension 6 | import importlib 7 | import hashlib 8 | import shutil 9 | from pathlib import Path 10 | 11 | from torch.utils.file_baton import FileBaton 12 | 13 | #---------------------------------------------------------------------------- 14 | # Global options. 15 | 16 | verbosity = 'brief' # Verbosity level: 'none', 'brief', 'full' 17 | 18 | #---------------------------------------------------------------------------- 19 | # Internal helper funcs. 20 | 21 | def _find_compiler_bindir(): 22 | patterns = [ 23 | 'C:/Program Files (x86)/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64', 24 | 'C:/Program Files (x86)/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/Hostx64/x64', 25 | 'C:/Program Files (x86)/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/Hostx64/x64', 26 | 'C:/Program Files (x86)/Microsoft Visual Studio */vc/bin', 27 | ] 28 | for pattern in patterns: 29 | matches = sorted(glob.glob(pattern)) 30 | if len(matches): 31 | return matches[-1] 32 | return None 33 | 34 | #---------------------------------------------------------------------------- 35 | # Main entry point for compiling and loading C++/CUDA plugins. 36 | 37 | _cached_plugins = dict() 38 | 39 | def get_plugin(module_name, sources, **build_kwargs): 40 | assert verbosity in ['none', 'brief', 'full'] 41 | 42 | # Already cached? 43 | if module_name in _cached_plugins: 44 | return _cached_plugins[module_name] 45 | 46 | # Print status. 47 | if verbosity == 'full': 48 | print(f'Setting up PyTorch plugin "{module_name}"...') 49 | elif verbosity == 'brief': 50 | print(f'Setting up PyTorch plugin "{module_name}"... ', end='', flush=True) 51 | 52 | try: # pylint: disable=too-many-nested-blocks 53 | # Make sure we can find the necessary compiler binaries. 54 | if os.name == 'nt' and os.system("where cl.exe >nul 2>nul") != 0: 55 | compiler_bindir = _find_compiler_bindir() 56 | if compiler_bindir is None: 57 | raise RuntimeError(f'Could not find MSVC/GCC/CLANG installation on this computer. Check _find_compiler_bindir() in "{__file__}".') 58 | os.environ['PATH'] += ';' + compiler_bindir 59 | 60 | # Compile and load. 61 | verbose_build = (verbosity == 'full') 62 | 63 | # Incremental build md5sum trickery. Copies all the input source files 64 | # into a cached build directory under a combined md5 digest of the input 65 | # source files. Copying is done only if the combined digest has changed. 66 | # This keeps input file timestamps and filenames the same as in previous 67 | # extension builds, allowing for fast incremental rebuilds. 68 | # 69 | # This optimization is done only in case all the source files reside in 70 | # a single directory (just for simplicity) and if the TORCH_EXTENSIONS_DIR 71 | # environment variable is set (we take this as a signal that the user 72 | # actually cares about this.) 73 | source_dirs_set = set(os.path.dirname(source) for source in sources) 74 | if len(source_dirs_set) == 1 and ('TORCH_EXTENSIONS_DIR' in os.environ): 75 | all_source_files = sorted(list(x for x in Path(list(source_dirs_set)[0]).iterdir() if x.is_file())) 76 | 77 | # Compute a combined hash digest for all source files in the same 78 | # custom op directory (usually .cu, .cpp, .py and .h files). 79 | hash_md5 = hashlib.md5() 80 | for src in all_source_files: 81 | with open(src, 'rb') as f: 82 | hash_md5.update(f.read()) 83 | build_dir = torch.utils.cpp_extension._get_build_directory(module_name, verbose=verbose_build) # pylint: disable=protected-access 84 | digest_build_dir = os.path.join(build_dir, hash_md5.hexdigest()) 85 | 86 | if not os.path.isdir(digest_build_dir): 87 | os.makedirs(digest_build_dir, exist_ok=True) 88 | baton = FileBaton(os.path.join(digest_build_dir, 'lock')) 89 | if baton.try_acquire(): 90 | try: 91 | for src in all_source_files: 92 | shutil.copyfile(src, os.path.join(digest_build_dir, os.path.basename(src))) 93 | finally: 94 | baton.release() 95 | else: 96 | # Someone else is copying source files under the digest dir, 97 | # wait until done and continue. 98 | baton.wait() 99 | digest_sources = [os.path.join(digest_build_dir, os.path.basename(x)) for x in sources] 100 | torch.utils.cpp_extension.load(name=module_name, build_directory=build_dir, 101 | verbose=verbose_build, sources=digest_sources, **build_kwargs) 102 | else: 103 | torch.utils.cpp_extension.load(name=module_name, verbose=verbose_build, sources=sources, **build_kwargs) 104 | module = importlib.import_module(module_name) 105 | 106 | except: 107 | if verbosity == 'brief': 108 | print('Failed!') 109 | raise 110 | 111 | # Print status and add to cache. 112 | if verbosity == 'full': 113 | print(f'Done setting up PyTorch plugin "{module_name}".') 114 | elif verbosity == 'brief': 115 | print('Done.') 116 | _cached_plugins[module_name] = module 117 | return module 118 | 119 | #---------------------------------------------------------------------------- 120 | -------------------------------------------------------------------------------- /diffusion-stylegan2/torch_utils/custom_ops.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import glob 4 | import torch 5 | import torch.utils.cpp_extension 6 | import importlib 7 | import hashlib 8 | import shutil 9 | from pathlib import Path 10 | 11 | from torch.utils.file_baton import FileBaton 12 | 13 | #---------------------------------------------------------------------------- 14 | # Global options. 15 | 16 | verbosity = 'brief' # Verbosity level: 'none', 'brief', 'full' 17 | 18 | #---------------------------------------------------------------------------- 19 | # Internal helper funcs. 20 | 21 | def _find_compiler_bindir(): 22 | patterns = [ 23 | 'C:/Program Files (x86)/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64', 24 | 'C:/Program Files (x86)/Microsoft Visual Studio/*/BuildTools/VC/Tools/MSVC/*/bin/Hostx64/x64', 25 | 'C:/Program Files (x86)/Microsoft Visual Studio/*/Community/VC/Tools/MSVC/*/bin/Hostx64/x64', 26 | 'C:/Program Files (x86)/Microsoft Visual Studio */vc/bin', 27 | ] 28 | for pattern in patterns: 29 | matches = sorted(glob.glob(pattern)) 30 | if len(matches): 31 | return matches[-1] 32 | return None 33 | 34 | #---------------------------------------------------------------------------- 35 | # Main entry point for compiling and loading C++/CUDA plugins. 36 | 37 | _cached_plugins = dict() 38 | 39 | def get_plugin(module_name, sources, **build_kwargs): 40 | assert verbosity in ['none', 'brief', 'full'] 41 | 42 | # Already cached? 43 | if module_name in _cached_plugins: 44 | return _cached_plugins[module_name] 45 | 46 | # Print status. 47 | if verbosity == 'full': 48 | print(f'Setting up PyTorch plugin "{module_name}"...') 49 | elif verbosity == 'brief': 50 | print(f'Setting up PyTorch plugin "{module_name}"... ', end='', flush=True) 51 | 52 | try: # pylint: disable=too-many-nested-blocks 53 | # Make sure we can find the necessary compiler binaries. 54 | if os.name == 'nt' and os.system("where cl.exe >nul 2>nul") != 0: 55 | compiler_bindir = _find_compiler_bindir() 56 | if compiler_bindir is None: 57 | raise RuntimeError(f'Could not find MSVC/GCC/CLANG installation on this computer. Check _find_compiler_bindir() in "{__file__}".') 58 | os.environ['PATH'] += ';' + compiler_bindir 59 | 60 | # Compile and load. 61 | verbose_build = (verbosity == 'full') 62 | 63 | # Incremental build md5sum trickery. Copies all the input source files 64 | # into a cached build directory under a combined md5 digest of the input 65 | # source files. Copying is done only if the combined digest has changed. 66 | # This keeps input file timestamps and filenames the same as in previous 67 | # extension builds, allowing for fast incremental rebuilds. 68 | # 69 | # This optimization is done only in case all the source files reside in 70 | # a single directory (just for simplicity) and if the TORCH_EXTENSIONS_DIR 71 | # environment variable is set (we take this as a signal that the user 72 | # actually cares about this.) 73 | source_dirs_set = set(os.path.dirname(source) for source in sources) 74 | if len(source_dirs_set) == 1 and ('TORCH_EXTENSIONS_DIR' in os.environ): 75 | all_source_files = sorted(list(x for x in Path(list(source_dirs_set)[0]).iterdir() if x.is_file())) 76 | 77 | # Compute a combined hash digest for all source files in the same 78 | # custom op directory (usually .cu, .cpp, .py and .h files). 79 | hash_md5 = hashlib.md5() 80 | for src in all_source_files: 81 | with open(src, 'rb') as f: 82 | hash_md5.update(f.read()) 83 | build_dir = torch.utils.cpp_extension._get_build_directory(module_name, verbose=verbose_build) # pylint: disable=protected-access 84 | digest_build_dir = os.path.join(build_dir, hash_md5.hexdigest()) 85 | 86 | if not os.path.isdir(digest_build_dir): 87 | os.makedirs(digest_build_dir, exist_ok=True) 88 | baton = FileBaton(os.path.join(digest_build_dir, 'lock')) 89 | if baton.try_acquire(): 90 | try: 91 | for src in all_source_files: 92 | shutil.copyfile(src, os.path.join(digest_build_dir, os.path.basename(src))) 93 | finally: 94 | baton.release() 95 | else: 96 | # Someone else is copying source files under the digest dir, 97 | # wait until done and continue. 98 | baton.wait() 99 | digest_sources = [os.path.join(digest_build_dir, os.path.basename(x)) for x in sources] 100 | torch.utils.cpp_extension.load(name=module_name, build_directory=build_dir, 101 | verbose=verbose_build, sources=digest_sources, **build_kwargs) 102 | else: 103 | torch.utils.cpp_extension.load(name=module_name, verbose=verbose_build, sources=sources, **build_kwargs) 104 | module = importlib.import_module(module_name) 105 | 106 | except: 107 | if verbosity == 'brief': 108 | print('Failed!') 109 | raise 110 | 111 | # Print status and add to cache. 112 | if verbosity == 'full': 113 | print(f'Done setting up PyTorch plugin "{module_name}".') 114 | elif verbosity == 'brief': 115 | print('Done.') 116 | _cached_plugins[module_name] = module 117 | return module 118 | 119 | #---------------------------------------------------------------------------- 120 | -------------------------------------------------------------------------------- /diffusion-projected-gan/metrics/perceptual_path_length.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Perceptual Path Length (PPL) from the paper "A Style-Based Generator 10 | Architecture for Generative Adversarial Networks". Matches the original 11 | implementation by Karras et al. at 12 | https://github.com/NVlabs/stylegan/blob/master/metrics/perceptual_path_length.py""" 13 | 14 | import copy 15 | import numpy as np 16 | import torch 17 | from . import metric_utils 18 | 19 | #---------------------------------------------------------------------------- 20 | 21 | # Spherical interpolation of a batch of vectors. 22 | def slerp(a, b, t): 23 | a = a / a.norm(dim=-1, keepdim=True) 24 | b = b / b.norm(dim=-1, keepdim=True) 25 | d = (a * b).sum(dim=-1, keepdim=True) 26 | p = t * torch.acos(d) 27 | c = b - d * a 28 | c = c / c.norm(dim=-1, keepdim=True) 29 | d = a * torch.cos(p) + c * torch.sin(p) 30 | d = d / d.norm(dim=-1, keepdim=True) 31 | return d 32 | 33 | #---------------------------------------------------------------------------- 34 | 35 | class PPLSampler(torch.nn.Module): 36 | def __init__(self, G, G_kwargs, epsilon, space, sampling, crop, vgg16): 37 | assert space in ['z', 'w'] 38 | assert sampling in ['full', 'end'] 39 | super().__init__() 40 | self.G = copy.deepcopy(G) 41 | self.G_kwargs = G_kwargs 42 | self.epsilon = epsilon 43 | self.space = space 44 | self.sampling = sampling 45 | self.crop = crop 46 | self.vgg16 = copy.deepcopy(vgg16) 47 | 48 | def forward(self, c): 49 | # Generate random latents and interpolation t-values. 50 | t = torch.rand([c.shape[0]], device=c.device) * (1 if self.sampling == 'full' else 0) 51 | z0, z1 = torch.randn([c.shape[0] * 2, self.G.z_dim], device=c.device).chunk(2) 52 | 53 | # Interpolate in W or Z. 54 | if self.space == 'w': 55 | w0, w1 = self.G.mapping(z=torch.cat([z0,z1]), c=torch.cat([c,c])).chunk(2) 56 | wt0 = w0.lerp(w1, t.unsqueeze(1).unsqueeze(2)) 57 | wt1 = w0.lerp(w1, t.unsqueeze(1).unsqueeze(2) + self.epsilon) 58 | else: # space == 'z' 59 | zt0 = slerp(z0, z1, t.unsqueeze(1)) 60 | zt1 = slerp(z0, z1, t.unsqueeze(1) + self.epsilon) 61 | wt0, wt1 = self.G.mapping(z=torch.cat([zt0,zt1]), c=torch.cat([c,c])).chunk(2) 62 | 63 | # Randomize noise buffers. 64 | for name, buf in self.G.named_buffers(): 65 | if name.endswith('.noise_const'): 66 | buf.copy_(torch.randn_like(buf)) 67 | 68 | # Generate images. 69 | img = self.G.synthesis(ws=torch.cat([wt0,wt1]), noise_mode='const', force_fp32=True, **self.G_kwargs) 70 | 71 | # Center crop. 72 | if self.crop: 73 | assert img.shape[2] == img.shape[3] 74 | c = img.shape[2] // 8 75 | img = img[:, :, c*3 : c*7, c*2 : c*6] 76 | 77 | # Downsample to 256x256. 78 | factor = self.G.img_resolution // 256 79 | if factor > 1: 80 | img = img.reshape([-1, img.shape[1], img.shape[2] // factor, factor, img.shape[3] // factor, factor]).mean([3, 5]) 81 | 82 | # Scale dynamic range from [-1,1] to [0,255]. 83 | img = (img + 1) * (255 / 2) 84 | if self.G.img_channels == 1: 85 | img = img.repeat([1, 3, 1, 1]) 86 | 87 | # Evaluate differential LPIPS. 88 | lpips_t0, lpips_t1 = self.vgg16(img, resize_images=False, return_lpips=True).chunk(2) 89 | dist = (lpips_t0 - lpips_t1).square().sum(1) / self.epsilon ** 2 90 | return dist 91 | 92 | #---------------------------------------------------------------------------- 93 | 94 | def compute_ppl(opts, num_samples, epsilon, space, sampling, crop, batch_size): 95 | vgg16_url = 'https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/metrics/vgg16.pkl' 96 | vgg16 = metric_utils.get_feature_detector(vgg16_url, num_gpus=opts.num_gpus, rank=opts.rank, verbose=opts.progress.verbose) 97 | 98 | # Setup sampler and labels. 99 | sampler = PPLSampler(G=opts.G, G_kwargs=opts.G_kwargs, epsilon=epsilon, space=space, sampling=sampling, crop=crop, vgg16=vgg16) 100 | sampler.eval().requires_grad_(False).to(opts.device) 101 | c_iter = metric_utils.iterate_random_labels(opts=opts, batch_size=batch_size) 102 | 103 | # Sampling loop. 104 | dist = [] 105 | progress = opts.progress.sub(tag='ppl sampling', num_items=num_samples) 106 | for batch_start in range(0, num_samples, batch_size * opts.num_gpus): 107 | progress.update(batch_start) 108 | x = sampler(next(c_iter)) 109 | for src in range(opts.num_gpus): 110 | y = x.clone() 111 | if opts.num_gpus > 1: 112 | torch.distributed.broadcast(y, src=src) 113 | dist.append(y) 114 | progress.update(num_samples) 115 | 116 | # Compute PPL. 117 | if opts.rank != 0: 118 | return float('nan') 119 | dist = torch.cat(dist)[:num_samples].cpu().numpy() 120 | lo = np.percentile(dist, 1, interpolation='lower') 121 | hi = np.percentile(dist, 99, interpolation='higher') 122 | ppl = np.extract(np.logical_and(dist >= lo, dist <= hi), dist).mean() 123 | return float(ppl) 124 | 125 | #---------------------------------------------------------------------------- 126 | -------------------------------------------------------------------------------- /diffusion-insgen/generate.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Generate images using pretrained network pickle.""" 10 | 11 | import os 12 | import re 13 | from typing import List, Optional 14 | 15 | import click 16 | import dnnlib 17 | import numpy as np 18 | import PIL.Image 19 | import torch 20 | 21 | import legacy 22 | 23 | #---------------------------------------------------------------------------- 24 | 25 | def num_range(s: str) -> List[int]: 26 | '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' 27 | 28 | range_re = re.compile(r'^(\d+)-(\d+)$') 29 | m = range_re.match(s) 30 | if m: 31 | return list(range(int(m.group(1)), int(m.group(2))+1)) 32 | vals = s.split(',') 33 | return [int(x) for x in vals] 34 | 35 | #---------------------------------------------------------------------------- 36 | 37 | @click.command() 38 | @click.pass_context 39 | @click.option('--network', 'network_pkl', help='Network pickle filename', required=True) 40 | @click.option('--seeds', type=num_range, help='List of random seeds') 41 | @click.option('--trunc', 'truncation_psi', type=float, help='Truncation psi', default=1, show_default=True) 42 | @click.option('--class', 'class_idx', type=int, help='Class label (unconditional if not specified)') 43 | @click.option('--noise-mode', help='Noise mode', type=click.Choice(['const', 'random', 'none']), default='const', show_default=True) 44 | @click.option('--projected-w', help='Projection result file', type=str, metavar='FILE') 45 | @click.option('--outdir', help='Where to save the output images', type=str, required=True, metavar='DIR') 46 | def generate_images( 47 | ctx: click.Context, 48 | network_pkl: str, 49 | seeds: Optional[List[int]], 50 | truncation_psi: float, 51 | noise_mode: str, 52 | outdir: str, 53 | class_idx: Optional[int], 54 | projected_w: Optional[str] 55 | ): 56 | """Generate images using pretrained network pickle. 57 | 58 | Examples: 59 | 60 | \b 61 | # Generate curated MetFaces images without truncation (Fig.10 left) 62 | python generate.py --outdir=out --trunc=1 --seeds=85,265,297,849 \\ 63 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl 64 | 65 | \b 66 | # Generate uncurated MetFaces images with truncation (Fig.12 upper left) 67 | python generate.py --outdir=out --trunc=0.7 --seeds=600-605 \\ 68 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl 69 | 70 | \b 71 | # Generate class conditional CIFAR-10 images (Fig.17 left, Car) 72 | python generate.py --outdir=out --seeds=0-35 --class=1 \\ 73 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/cifar10.pkl 74 | 75 | \b 76 | # Render an image from projected W 77 | python generate.py --outdir=out --projected_w=projected_w.npz \\ 78 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl 79 | """ 80 | 81 | print('Loading networks from "%s"...' % network_pkl) 82 | device = torch.device('cuda') 83 | with dnnlib.util.open_url(network_pkl) as f: 84 | G = legacy.load_network_pkl(f)['G_ema'].to(device) # type: ignore 85 | 86 | os.makedirs(outdir, exist_ok=True) 87 | 88 | # Synthesize the result of a W projection. 89 | if projected_w is not None: 90 | if seeds is not None: 91 | print ('warn: --seeds is ignored when using --projected-w') 92 | print(f'Generating images from projected W "{projected_w}"') 93 | ws = np.load(projected_w)['w'] 94 | ws = torch.tensor(ws, device=device) # pylint: disable=not-callable 95 | assert ws.shape[1:] == (G.num_ws, G.w_dim) 96 | for idx, w in enumerate(ws): 97 | img = G.synthesis(w.unsqueeze(0), noise_mode=noise_mode) 98 | img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8) 99 | img = PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB').save(f'{outdir}/proj{idx:02d}.png') 100 | return 101 | 102 | if seeds is None: 103 | ctx.fail('--seeds option is required when not using --projected-w') 104 | 105 | # Labels. 106 | label = torch.zeros([1, G.c_dim], device=device) 107 | if G.c_dim != 0: 108 | if class_idx is None: 109 | ctx.fail('Must specify class label with --class when using a conditional network') 110 | label[:, class_idx] = 1 111 | else: 112 | if class_idx is not None: 113 | print ('warn: --class=lbl ignored when running on an unconditional network') 114 | 115 | # Generate images. 116 | for seed_idx, seed in enumerate(seeds): 117 | print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds))) 118 | z = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(device) 119 | img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode) 120 | img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8) 121 | PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB').save(f'{outdir}/seed{seed:04d}.png') 122 | 123 | 124 | #---------------------------------------------------------------------------- 125 | 126 | if __name__ == "__main__": 127 | generate_images() # pylint: disable=no-value-for-parameter 128 | 129 | #---------------------------------------------------------------------------- 130 | -------------------------------------------------------------------------------- /diffusion-stylegan2/generate.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Generate images using pretrained network pickle.""" 10 | 11 | import os 12 | import re 13 | from typing import List, Optional 14 | 15 | import click 16 | import dnnlib 17 | import numpy as np 18 | import PIL.Image 19 | import torch 20 | 21 | import legacy 22 | 23 | #---------------------------------------------------------------------------- 24 | 25 | def num_range(s: str) -> List[int]: 26 | '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' 27 | 28 | range_re = re.compile(r'^(\d+)-(\d+)$') 29 | m = range_re.match(s) 30 | if m: 31 | return list(range(int(m.group(1)), int(m.group(2))+1)) 32 | vals = s.split(',') 33 | return [int(x) for x in vals] 34 | 35 | #---------------------------------------------------------------------------- 36 | 37 | @click.command() 38 | @click.pass_context 39 | @click.option('--network', 'network_pkl', help='Network pickle filename', required=True) 40 | @click.option('--seeds', type=num_range, help='List of random seeds') 41 | @click.option('--trunc', 'truncation_psi', type=float, help='Truncation psi', default=1, show_default=True) 42 | @click.option('--class', 'class_idx', type=int, help='Class label (unconditional if not specified)') 43 | @click.option('--noise-mode', help='Noise mode', type=click.Choice(['const', 'random', 'none']), default='const', show_default=True) 44 | @click.option('--projected-w', help='Projection result file', type=str, metavar='FILE') 45 | @click.option('--outdir', help='Where to save the output images', type=str, required=True, metavar='DIR') 46 | def generate_images( 47 | ctx: click.Context, 48 | network_pkl: str, 49 | seeds: Optional[List[int]], 50 | truncation_psi: float, 51 | noise_mode: str, 52 | outdir: str, 53 | class_idx: Optional[int], 54 | projected_w: Optional[str] 55 | ): 56 | """Generate images using pretrained network pickle. 57 | 58 | Examples: 59 | 60 | \b 61 | # Generate curated MetFaces images without truncation (Fig.10 left) 62 | python generate.py --outdir=out --trunc=1 --seeds=85,265,297,849 \\ 63 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl 64 | 65 | \b 66 | # Generate uncurated MetFaces images with truncation (Fig.12 upper left) 67 | python generate.py --outdir=out --trunc=0.7 --seeds=600-605 \\ 68 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl 69 | 70 | \b 71 | # Generate class conditional CIFAR-10 images (Fig.17 left, Car) 72 | python generate.py --outdir=out --seeds=0-35 --class=1 \\ 73 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/cifar10.pkl 74 | 75 | \b 76 | # Render an image from projected W 77 | python generate.py --outdir=out --projected_w=projected_w.npz \\ 78 | --network=https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metfaces.pkl 79 | """ 80 | 81 | print('Loading networks from "%s"...' % network_pkl) 82 | device = torch.device('cuda') 83 | with dnnlib.util.open_url(network_pkl) as f: 84 | G = legacy.load_network_pkl(f)['G_ema'].to(device) # type: ignore 85 | 86 | os.makedirs(outdir, exist_ok=True) 87 | 88 | # Synthesize the result of a W projection. 89 | if projected_w is not None: 90 | if seeds is not None: 91 | print ('warn: --seeds is ignored when using --projected-w') 92 | print(f'Generating images from projected W "{projected_w}"') 93 | ws = np.load(projected_w)['w'] 94 | ws = torch.tensor(ws, device=device) # pylint: disable=not-callable 95 | assert ws.shape[1:] == (G.num_ws, G.w_dim) 96 | for idx, w in enumerate(ws): 97 | img = G.synthesis(w.unsqueeze(0), noise_mode=noise_mode) 98 | img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8) 99 | img = PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB').save(f'{outdir}/proj{idx:02d}.png') 100 | return 101 | 102 | if seeds is None: 103 | ctx.fail('--seeds option is required when not using --projected-w') 104 | 105 | # Labels. 106 | label = torch.zeros([1, G.c_dim], device=device) 107 | if G.c_dim != 0: 108 | if class_idx is None: 109 | ctx.fail('Must specify class label with --class when using a conditional network') 110 | label[:, class_idx] = 1 111 | else: 112 | if class_idx is not None: 113 | print ('warn: --class=lbl ignored when running on an unconditional network') 114 | 115 | # Generate images. 116 | for seed_idx, seed in enumerate(seeds): 117 | print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds))) 118 | z = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(device) 119 | img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode) 120 | img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8) 121 | PIL.Image.fromarray(img[0].cpu().numpy(), 'RGB').save(f'{outdir}/seed{seed:04d}.png') 122 | 123 | 124 | #---------------------------------------------------------------------------- 125 | 126 | if __name__ == "__main__": 127 | generate_images() # pylint: disable=no-value-for-parameter 128 | 129 | #---------------------------------------------------------------------------- 130 | -------------------------------------------------------------------------------- /diffusion-insgen/docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | InsGen 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 |
23 | 24 |
25 | 28 |
29 | Data-Efficient Instance Generation from Instance Discrimination 30 |
31 |
32 | 33 |
34 | Ceyuan Yang1,  35 | Yujun Shen2,  36 | Yinghao Xu1,  37 | Bolei Zhou1 38 |
39 |
40 | 1 The Chinese University of Hong Kong
41 | 2 ByteDance Inc.
42 |
43 | 47 |
48 | 49 |
50 |
51 | 52 | 53 | 54 | 55 |
56 |
Overview
57 |
58 | In this work, we develop a novel data-efficient Instance Generation (InsGen) method for training GANs with limited data. With the instance discrimination as an auxiliary task, our method makes the best use of both real and fake images to train the discriminator. In turn the discriminator is exploited to train the generator to synthesize as many diverse images as possible. Experiments under different data regimes show that InsGen brings a substantial improvement over the baseline in terms of both image quality and image diversity, and outperforms previous data augmentation algorithms by a large margin. 59 |
60 |
61 | 62 | 63 | 64 | 65 |
66 |
Results
67 |
68 | Here we provide some synthesized samples with different numbers of training images and correspoding FID. 69 | 70 | 71 | 72 | 73 | 74 | 75 |
76 | 77 | 78 | 79 | 80 | 81 |
82 |
83 |
84 | 85 | 86 | 87 | 88 |
89 |
BibTeX
90 |
 91 | @article{yang2021insgen,
 92 |   title   = {Data-Efficient Instance Generation from Instance Discrimination},
 93 |   author  = {Yang, Ceyuan and Shen, Yujun and Xu, Yinghao and Zhou, Bolei},
 94 |   journal = {arXiv preprint arXiv:2106.04566},
 95 |   year    = {2021}
 96 | }
 97 | 
98 | 99 | 100 | 101 |
Related Work
102 |
103 |
104 |
105 | 106 | T. Karras, M. Aittala, J. Hellsten, S. Laine, J. Lehtinen, T. Aila. 107 | Training Generative Adversarial Networks with Limited Data. 108 | NeurIPS, 2020.
109 | Comment: 110 | Proposes an adaptive discriminator augmentation mechanism that significantly stabilizes training in limited data regimes. 111 |
112 |
113 | 114 |
115 |
116 |
117 | 118 | S. Zhao, Z. Liu, J. Lin, JY. Zhu, and S. Han. 119 | Differentiable Augmentation for Data-Efficient GAN Training. 120 | NeurIPS, 2020.
121 | Comment: 122 | Imposes various types of differentiable augmentations on both real and fake samples. 123 |
124 |
125 | 126 |
127 |
128 |
129 | 130 | J. Jeong, J. Shin. 131 | Training GANs with Stronger Augmentations via Contrastive Discriminator. 132 | ICLR, 2021.
133 | Comment: 134 | Proposes a novel discriminator of GAN showing that contrastive representation learning, e.g., SimCLR, and GAN can benefit each other when they are jointly trained. 135 |
136 |
137 | 138 | 139 |
140 | 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /diffusion-insgen/metrics/perceptual_path_length.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Perceptual Path Length (PPL) from the paper "A Style-Based Generator 10 | Architecture for Generative Adversarial Networks". Matches the original 11 | implementation by Karras et al. at 12 | https://github.com/NVlabs/stylegan/blob/master/metrics/perceptual_path_length.py""" 13 | 14 | import copy 15 | import numpy as np 16 | import torch 17 | import dnnlib 18 | from . import metric_utils 19 | 20 | #---------------------------------------------------------------------------- 21 | 22 | # Spherical interpolation of a batch of vectors. 23 | def slerp(a, b, t): 24 | a = a / a.norm(dim=-1, keepdim=True) 25 | b = b / b.norm(dim=-1, keepdim=True) 26 | d = (a * b).sum(dim=-1, keepdim=True) 27 | p = t * torch.acos(d) 28 | c = b - d * a 29 | c = c / c.norm(dim=-1, keepdim=True) 30 | d = a * torch.cos(p) + c * torch.sin(p) 31 | d = d / d.norm(dim=-1, keepdim=True) 32 | return d 33 | 34 | #---------------------------------------------------------------------------- 35 | 36 | class PPLSampler(torch.nn.Module): 37 | def __init__(self, G, G_kwargs, epsilon, space, sampling, crop, vgg16): 38 | assert space in ['z', 'w'] 39 | assert sampling in ['full', 'end'] 40 | super().__init__() 41 | self.G = copy.deepcopy(G) 42 | self.G_kwargs = G_kwargs 43 | self.epsilon = epsilon 44 | self.space = space 45 | self.sampling = sampling 46 | self.crop = crop 47 | self.vgg16 = copy.deepcopy(vgg16) 48 | 49 | def forward(self, c): 50 | # Generate random latents and interpolation t-values. 51 | t = torch.rand([c.shape[0]], device=c.device) * (1 if self.sampling == 'full' else 0) 52 | z0, z1 = torch.randn([c.shape[0] * 2, self.G.z_dim], device=c.device).chunk(2) 53 | 54 | # Interpolate in W or Z. 55 | if self.space == 'w': 56 | w0, w1 = self.G.mapping(z=torch.cat([z0,z1]), c=torch.cat([c,c])).chunk(2) 57 | wt0 = w0.lerp(w1, t.unsqueeze(1).unsqueeze(2)) 58 | wt1 = w0.lerp(w1, t.unsqueeze(1).unsqueeze(2) + self.epsilon) 59 | else: # space == 'z' 60 | zt0 = slerp(z0, z1, t.unsqueeze(1)) 61 | zt1 = slerp(z0, z1, t.unsqueeze(1) + self.epsilon) 62 | wt0, wt1 = self.G.mapping(z=torch.cat([zt0,zt1]), c=torch.cat([c,c])).chunk(2) 63 | 64 | # Randomize noise buffers. 65 | for name, buf in self.G.named_buffers(): 66 | if name.endswith('.noise_const'): 67 | buf.copy_(torch.randn_like(buf)) 68 | 69 | # Generate images. 70 | img = self.G.synthesis(ws=torch.cat([wt0,wt1]), noise_mode='const', force_fp32=True, **self.G_kwargs) 71 | 72 | # Center crop. 73 | if self.crop: 74 | assert img.shape[2] == img.shape[3] 75 | c = img.shape[2] // 8 76 | img = img[:, :, c*3 : c*7, c*2 : c*6] 77 | 78 | # Downsample to 256x256. 79 | factor = self.G.img_resolution // 256 80 | if factor > 1: 81 | img = img.reshape([-1, img.shape[1], img.shape[2] // factor, factor, img.shape[3] // factor, factor]).mean([3, 5]) 82 | 83 | # Scale dynamic range from [-1,1] to [0,255]. 84 | img = (img + 1) * (255 / 2) 85 | if self.G.img_channels == 1: 86 | img = img.repeat([1, 3, 1, 1]) 87 | 88 | # Evaluate differential LPIPS. 89 | lpips_t0, lpips_t1 = self.vgg16(img, resize_images=False, return_lpips=True).chunk(2) 90 | dist = (lpips_t0 - lpips_t1).square().sum(1) / self.epsilon ** 2 91 | return dist 92 | 93 | #---------------------------------------------------------------------------- 94 | 95 | def compute_ppl(opts, num_samples, epsilon, space, sampling, crop, batch_size, jit=False): 96 | dataset = dnnlib.util.construct_class_by_name(**opts.dataset_kwargs) 97 | vgg16_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt' 98 | vgg16 = metric_utils.get_feature_detector(vgg16_url, num_gpus=opts.num_gpus, rank=opts.rank, verbose=opts.progress.verbose) 99 | 100 | # Setup sampler. 101 | sampler = PPLSampler(G=opts.G, G_kwargs=opts.G_kwargs, epsilon=epsilon, space=space, sampling=sampling, crop=crop, vgg16=vgg16) 102 | sampler.eval().requires_grad_(False).to(opts.device) 103 | if jit: 104 | c = torch.zeros([batch_size, opts.G.c_dim], device=opts.device) 105 | sampler = torch.jit.trace(sampler, [c], check_trace=False) 106 | 107 | # Sampling loop. 108 | dist = [] 109 | progress = opts.progress.sub(tag='ppl sampling', num_items=num_samples) 110 | for batch_start in range(0, num_samples, batch_size * opts.num_gpus): 111 | progress.update(batch_start) 112 | c = [dataset.get_label(np.random.randint(len(dataset))) for _i in range(batch_size)] 113 | c = torch.from_numpy(np.stack(c)).pin_memory().to(opts.device) 114 | x = sampler(c) 115 | for src in range(opts.num_gpus): 116 | y = x.clone() 117 | if opts.num_gpus > 1: 118 | torch.distributed.broadcast(y, src=src) 119 | dist.append(y) 120 | progress.update(num_samples) 121 | 122 | # Compute PPL. 123 | if opts.rank != 0: 124 | return float('nan') 125 | dist = torch.cat(dist)[:num_samples].cpu().numpy() 126 | lo = np.percentile(dist, 1, interpolation='lower') 127 | hi = np.percentile(dist, 99, interpolation='higher') 128 | ppl = np.extract(np.logical_and(dist >= lo, dist <= hi), dist).mean() 129 | return float(ppl) 130 | 131 | #---------------------------------------------------------------------------- 132 | -------------------------------------------------------------------------------- /diffusion-stylegan2/metrics/perceptual_path_length.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | """Perceptual Path Length (PPL) from the paper "A Style-Based Generator 10 | Architecture for Generative Adversarial Networks". Matches the original 11 | implementation by Karras et al. at 12 | https://github.com/NVlabs/stylegan/blob/master/metrics/perceptual_path_length.py""" 13 | 14 | import copy 15 | import numpy as np 16 | import torch 17 | import dnnlib 18 | from . import metric_utils 19 | 20 | #---------------------------------------------------------------------------- 21 | 22 | # Spherical interpolation of a batch of vectors. 23 | def slerp(a, b, t): 24 | a = a / a.norm(dim=-1, keepdim=True) 25 | b = b / b.norm(dim=-1, keepdim=True) 26 | d = (a * b).sum(dim=-1, keepdim=True) 27 | p = t * torch.acos(d) 28 | c = b - d * a 29 | c = c / c.norm(dim=-1, keepdim=True) 30 | d = a * torch.cos(p) + c * torch.sin(p) 31 | d = d / d.norm(dim=-1, keepdim=True) 32 | return d 33 | 34 | #---------------------------------------------------------------------------- 35 | 36 | class PPLSampler(torch.nn.Module): 37 | def __init__(self, G, G_kwargs, epsilon, space, sampling, crop, vgg16): 38 | assert space in ['z', 'w'] 39 | assert sampling in ['full', 'end'] 40 | super().__init__() 41 | self.G = copy.deepcopy(G) 42 | self.G_kwargs = G_kwargs 43 | self.epsilon = epsilon 44 | self.space = space 45 | self.sampling = sampling 46 | self.crop = crop 47 | self.vgg16 = copy.deepcopy(vgg16) 48 | 49 | def forward(self, c): 50 | # Generate random latents and interpolation t-values. 51 | t = torch.rand([c.shape[0]], device=c.device) * (1 if self.sampling == 'full' else 0) 52 | z0, z1 = torch.randn([c.shape[0] * 2, self.G.z_dim], device=c.device).chunk(2) 53 | 54 | # Interpolate in W or Z. 55 | if self.space == 'w': 56 | w0, w1 = self.G.mapping(z=torch.cat([z0,z1]), c=torch.cat([c,c])).chunk(2) 57 | wt0 = w0.lerp(w1, t.unsqueeze(1).unsqueeze(2)) 58 | wt1 = w0.lerp(w1, t.unsqueeze(1).unsqueeze(2) + self.epsilon) 59 | else: # space == 'z' 60 | zt0 = slerp(z0, z1, t.unsqueeze(1)) 61 | zt1 = slerp(z0, z1, t.unsqueeze(1) + self.epsilon) 62 | wt0, wt1 = self.G.mapping(z=torch.cat([zt0,zt1]), c=torch.cat([c,c])).chunk(2) 63 | 64 | # Randomize noise buffers. 65 | for name, buf in self.G.named_buffers(): 66 | if name.endswith('.noise_const'): 67 | buf.copy_(torch.randn_like(buf)) 68 | 69 | # Generate images. 70 | img = self.G.synthesis(ws=torch.cat([wt0,wt1]), noise_mode='const', force_fp32=True, **self.G_kwargs) 71 | 72 | # Center crop. 73 | if self.crop: 74 | assert img.shape[2] == img.shape[3] 75 | c = img.shape[2] // 8 76 | img = img[:, :, c*3 : c*7, c*2 : c*6] 77 | 78 | # Downsample to 256x256. 79 | factor = self.G.img_resolution // 256 80 | if factor > 1: 81 | img = img.reshape([-1, img.shape[1], img.shape[2] // factor, factor, img.shape[3] // factor, factor]).mean([3, 5]) 82 | 83 | # Scale dynamic range from [-1,1] to [0,255]. 84 | img = (img + 1) * (255 / 2) 85 | if self.G.img_channels == 1: 86 | img = img.repeat([1, 3, 1, 1]) 87 | 88 | # Evaluate differential LPIPS. 89 | lpips_t0, lpips_t1 = self.vgg16(img, resize_images=False, return_lpips=True).chunk(2) 90 | dist = (lpips_t0 - lpips_t1).square().sum(1) / self.epsilon ** 2 91 | return dist 92 | 93 | #---------------------------------------------------------------------------- 94 | 95 | def compute_ppl(opts, num_samples, epsilon, space, sampling, crop, batch_size, jit=False): 96 | dataset = dnnlib.util.construct_class_by_name(**opts.dataset_kwargs) 97 | vgg16_url = 'https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt' 98 | vgg16 = metric_utils.get_feature_detector(vgg16_url, num_gpus=opts.num_gpus, rank=opts.rank, verbose=opts.progress.verbose) 99 | 100 | # Setup sampler. 101 | sampler = PPLSampler(G=opts.G, G_kwargs=opts.G_kwargs, epsilon=epsilon, space=space, sampling=sampling, crop=crop, vgg16=vgg16) 102 | sampler.eval().requires_grad_(False).to(opts.device) 103 | if jit: 104 | c = torch.zeros([batch_size, opts.G.c_dim], device=opts.device) 105 | sampler = torch.jit.trace(sampler, [c], check_trace=False) 106 | 107 | # Sampling loop. 108 | dist = [] 109 | progress = opts.progress.sub(tag='ppl sampling', num_items=num_samples) 110 | for batch_start in range(0, num_samples, batch_size * opts.num_gpus): 111 | progress.update(batch_start) 112 | c = [dataset.get_label(np.random.randint(len(dataset))) for _i in range(batch_size)] 113 | c = torch.from_numpy(np.stack(c)).pin_memory().to(opts.device) 114 | x = sampler(c) 115 | for src in range(opts.num_gpus): 116 | y = x.clone() 117 | if opts.num_gpus > 1: 118 | torch.distributed.broadcast(y, src=src) 119 | dist.append(y) 120 | progress.update(num_samples) 121 | 122 | # Compute PPL. 123 | if opts.rank != 0: 124 | return float('nan') 125 | dist = torch.cat(dist)[:num_samples].cpu().numpy() 126 | lo = np.percentile(dist, 1, interpolation='lower') 127 | hi = np.percentile(dist, 99, interpolation='higher') 128 | ppl = np.extract(np.logical_and(dist >= lo, dist <= hi), dist).mean() 129 | return float(ppl) 130 | 131 | #---------------------------------------------------------------------------- 132 | -------------------------------------------------------------------------------- /diffusion-projected-gan/metrics/metric_main.py: -------------------------------------------------------------------------------- 1 | # distribution of this software and related documentation without an express 2 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 3 | 4 | """Main API for computing and reporting quality metrics.""" 5 | 6 | import os 7 | import time 8 | import json 9 | import torch 10 | import dnnlib 11 | 12 | from . import metric_utils 13 | from . import frechet_inception_distance 14 | from . import kernel_inception_distance 15 | from . import precision_recall 16 | from . import perceptual_path_length 17 | from . import inception_score 18 | from . import equivariance 19 | 20 | #---------------------------------------------------------------------------- 21 | 22 | _metric_dict = dict() # name => fn 23 | 24 | def register_metric(fn): 25 | assert callable(fn) 26 | _metric_dict[fn.__name__] = fn 27 | return fn 28 | 29 | def is_valid_metric(metric): 30 | return metric in _metric_dict 31 | 32 | def list_valid_metrics(): 33 | return list(_metric_dict.keys()) 34 | 35 | #---------------------------------------------------------------------------- 36 | 37 | def calc_metric(metric, **kwargs): # See metric_utils.MetricOptions for the full list of arguments. 38 | assert is_valid_metric(metric) 39 | opts = metric_utils.MetricOptions(**kwargs) 40 | 41 | # Calculate. 42 | start_time = time.time() 43 | results = _metric_dict[metric](opts) 44 | total_time = time.time() - start_time 45 | 46 | # Broadcast results. 47 | for key, value in list(results.items()): 48 | if opts.num_gpus > 1: 49 | value = torch.as_tensor(value, dtype=torch.float64, device=opts.device) 50 | torch.distributed.broadcast(tensor=value, src=0) 51 | value = float(value.cpu()) 52 | results[key] = value 53 | 54 | # Decorate with metadata. 55 | return dnnlib.EasyDict( 56 | results = dnnlib.EasyDict(results), 57 | metric = metric, 58 | total_time = total_time, 59 | total_time_str = dnnlib.util.format_time(total_time), 60 | num_gpus = opts.num_gpus, 61 | ) 62 | 63 | #---------------------------------------------------------------------------- 64 | 65 | def report_metric(result_dict, run_dir=None, snapshot_pkl=None): 66 | metric = result_dict['metric'] 67 | assert is_valid_metric(metric) 68 | if run_dir is not None and snapshot_pkl is not None: 69 | snapshot_pkl = os.path.relpath(snapshot_pkl, run_dir) 70 | 71 | jsonl_line = json.dumps(dict(result_dict, snapshot_pkl=snapshot_pkl, timestamp=time.time())) 72 | print(jsonl_line) 73 | if run_dir is not None and os.path.isdir(run_dir): 74 | with open(os.path.join(run_dir, f'metric-{metric}.jsonl'), 'at') as f: 75 | f.write(jsonl_line + '\n') 76 | 77 | #---------------------------------------------------------------------------- 78 | # Recommended metrics. 79 | 80 | @register_metric 81 | def fid50k_full(opts): 82 | opts.dataset_kwargs.update(max_size=None, xflip=False) 83 | fid = frechet_inception_distance.compute_fid(opts, max_real=None, num_gen=50000) 84 | return dict(fid50k_full=fid) 85 | 86 | @register_metric 87 | def fid10k_full(opts): 88 | opts.dataset_kwargs.update(max_size=None, xflip=False) 89 | fid = frechet_inception_distance.compute_fid(opts, max_real=None, num_gen=10000) 90 | return dict(fid10k_full=fid) 91 | 92 | @register_metric 93 | def kid50k_full(opts): 94 | opts.dataset_kwargs.update(max_size=None, xflip=False) 95 | kid = kernel_inception_distance.compute_kid(opts, max_real=1000000, num_gen=50000, num_subsets=100, max_subset_size=1000) 96 | return dict(kid50k_full=kid) 97 | 98 | @register_metric 99 | def pr50k3_full(opts): 100 | opts.dataset_kwargs.update(max_size=None, xflip=False) 101 | precision, recall = precision_recall.compute_pr(opts, max_real=200000, num_gen=50000, nhood_size=3, row_batch_size=10000, col_batch_size=10000) 102 | return dict(pr50k3_full_precision=precision, pr50k3_full_recall=recall) 103 | 104 | @register_metric 105 | def ppl2_wend(opts): 106 | ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=False, batch_size=2) 107 | return dict(ppl2_wend=ppl) 108 | 109 | @register_metric 110 | def eqt50k_int(opts): 111 | opts.G_kwargs.update(force_fp32=True) 112 | psnr = equivariance.compute_equivariance_metrics(opts, num_samples=50000, batch_size=4, compute_eqt_int=True) 113 | return dict(eqt50k_int=psnr) 114 | 115 | @register_metric 116 | def eqt50k_frac(opts): 117 | opts.G_kwargs.update(force_fp32=True) 118 | psnr = equivariance.compute_equivariance_metrics(opts, num_samples=50000, batch_size=4, compute_eqt_frac=True) 119 | return dict(eqt50k_frac=psnr) 120 | 121 | @register_metric 122 | def eqr50k(opts): 123 | opts.G_kwargs.update(force_fp32=True) 124 | psnr = equivariance.compute_equivariance_metrics(opts, num_samples=50000, batch_size=4, compute_eqr=True) 125 | return dict(eqr50k=psnr) 126 | 127 | # Legacy metrics. 128 | 129 | @register_metric 130 | def fid50k(opts): 131 | opts.dataset_kwargs.update(max_size=None) 132 | fid = frechet_inception_distance.compute_fid(opts, max_real=50000, num_gen=50000) 133 | return dict(fid50k=fid) 134 | 135 | @register_metric 136 | def kid50k(opts): 137 | opts.dataset_kwargs.update(max_size=None) 138 | kid = kernel_inception_distance.compute_kid(opts, max_real=50000, num_gen=50000, num_subsets=100, max_subset_size=1000) 139 | return dict(kid50k=kid) 140 | 141 | @register_metric 142 | def pr50k3(opts): 143 | opts.dataset_kwargs.update(max_size=None) 144 | precision, recall = precision_recall.compute_pr(opts, max_real=50000, num_gen=50000, nhood_size=3, row_batch_size=10000, col_batch_size=10000) 145 | return dict(pr50k3_precision=precision, pr50k3_recall=recall) 146 | 147 | @register_metric 148 | def is50k(opts): 149 | opts.dataset_kwargs.update(max_size=None, xflip=False) 150 | mean, std = inception_score.compute_is(opts, num_gen=50000, num_splits=10) 151 | return dict(is50k_mean=mean, is50k_std=std) 152 | -------------------------------------------------------------------------------- /diffusion-projected-gan/pg_modules/diffusion.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | import scipy.signal 11 | import torch 12 | from torch_utils import persistence 13 | from torch_utils import misc 14 | from torch_utils.ops import upfirdn2d 15 | from torch_utils.ops import grid_sample_gradfix 16 | from torch_utils.ops import conv2d_gradfix 17 | 18 | #---------------------------------------------------------------------------- 19 | # Helpers for doing diffusion process. 20 | 21 | 22 | def get_beta_schedule(beta_schedule, beta_start, beta_end, num_diffusion_timesteps): 23 | def sigmoid(x): 24 | return 1 / (np.exp(-x) + 1) 25 | 26 | def continuous_t_beta(t, T): 27 | b_max = 5. 28 | b_min = 0.1 29 | alpha = np.exp(-b_min / T - 0.5 * (b_max - b_min) * (2 * t - 1) / T ** 2) 30 | return 1 - alpha 31 | 32 | if beta_schedule == "continuous_t": 33 | betas = continuous_t_beta(np.arange(1, num_diffusion_timesteps+1), num_diffusion_timesteps) 34 | elif beta_schedule == "quad": 35 | betas = ( 36 | np.linspace( 37 | beta_start ** 0.5, 38 | beta_end ** 0.5, 39 | num_diffusion_timesteps, 40 | dtype=np.float64, 41 | ) 42 | ** 2 43 | ) 44 | elif beta_schedule == "linear": 45 | betas = np.linspace( 46 | beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64 47 | ) 48 | elif beta_schedule == "const": 49 | betas = beta_end * np.ones(num_diffusion_timesteps, dtype=np.float64) 50 | elif beta_schedule == "jsd": # 1/T, 1/(T-1), 1/(T-2), ..., 1 51 | betas = 1.0 / np.linspace( 52 | num_diffusion_timesteps, 1, num_diffusion_timesteps, dtype=np.float64 53 | ) 54 | elif beta_schedule == "sigmoid": 55 | betas = np.linspace(-6, 6, num_diffusion_timesteps) 56 | betas = sigmoid(betas) * (beta_end - beta_start) + beta_start 57 | else: 58 | raise NotImplementedError(beta_schedule) 59 | assert betas.shape == (num_diffusion_timesteps,) 60 | return betas 61 | 62 | 63 | def q_sample(x_0, alphas_bar_sqrt, one_minus_alphas_bar_sqrt, t, noise_type='gauss', noise_std=1.0): 64 | batch_size, num_channels, _, _ = x_0.shape 65 | if noise_type == 'gauss': 66 | noise = torch.randn_like(x_0, device=x_0.device) * noise_std 67 | elif noise_type == 'bernoulli': 68 | noise = (torch.bernoulli(torch.ones_like(x_0) * 0.5) * 2 - 1.) * noise_std 69 | else: 70 | raise NotImplementedError(noise_type) 71 | alphas_t_sqrt = alphas_bar_sqrt[t].view(batch_size, num_channels, 1, 1) 72 | one_minus_alphas_bar_t_sqrt = one_minus_alphas_bar_sqrt[t].view(batch_size, num_channels, 1, 1) 73 | x_t = alphas_t_sqrt * x_0 + one_minus_alphas_bar_t_sqrt * noise 74 | return x_t 75 | 76 | 77 | @persistence.persistent_class 78 | class Diffusion(torch.nn.Module): 79 | def __init__(self, 80 | beta_schedule='linear', beta_start=1e-4, beta_end=1e-2, 81 | t_min=5, t_max=500, noise_std=0.5, 82 | ): 83 | super().__init__() 84 | self.p = 0.0 # Overall multiplier for augmentation probability. 85 | self.noise_type = self.base_noise_type = 'gauss' 86 | self.base_schedule = beta_schedule 87 | self.beta_start = beta_start 88 | self.beta_end = beta_end 89 | self.t_min = t_min 90 | self.t_max = t_max 91 | self.t_add = t_max - t_min 92 | self.update_T() 93 | 94 | # Image-space corruptions. 95 | self.noise_std = float(noise_std) # Standard deviation of additive RGB noise. 96 | 97 | def set_diffusion_process(self, t, beta_schedule): 98 | 99 | betas = get_beta_schedule( 100 | beta_schedule=beta_schedule, 101 | beta_start=self.beta_start, 102 | beta_end=self.beta_end, 103 | num_diffusion_timesteps=t, 104 | ) 105 | 106 | betas = self.betas = torch.from_numpy(betas).float() 107 | self.num_timesteps = betas.shape[0] 108 | 109 | alphas = self.alphas = 1.0 - betas 110 | alphas_cumprod = torch.cat([torch.tensor([1.]), alphas.cumprod(dim=0)]) 111 | self.alphas_bar_sqrt = torch.sqrt(alphas_cumprod) 112 | self.one_minus_alphas_bar_sqrt = torch.sqrt(1 - alphas_cumprod) 113 | 114 | def update_T(self): 115 | t_adjust = round(self.p * self.t_add) 116 | t = np.clip(int(self.t_min + t_adjust), a_min=self.t_min, a_max=self.t_max) 117 | self.set_diffusion_process(t, "linear") 118 | 119 | # sampling t 120 | self.t_epl = np.zeros(64, dtype=np.int) 121 | diffusion_ind = min(round(self.p * 64), 48) # 48 122 | prob_t = np.arange(t) / np.arange(t).sum() 123 | t_diffusion = np.random.choice(np.arange(1, t+1), size=diffusion_ind, p=prob_t) 124 | self.t_epl[:diffusion_ind] = t_diffusion 125 | 126 | def forward(self, x_0, noise_std=1.0): 127 | assert isinstance(x_0, torch.Tensor) and x_0.ndim == 4 128 | batch_size, num_channels, height, width = x_0.shape 129 | device = x_0.device 130 | 131 | alphas_bar_sqrt = self.alphas_bar_sqrt.to(device) 132 | one_minus_alphas_bar_sqrt = self.one_minus_alphas_bar_sqrt.to(device) 133 | 134 | t = torch.from_numpy(np.random.choice(self.t_epl, size=batch_size * num_channels, replace=True)).to(device) 135 | 136 | x_t = q_sample(x_0, alphas_bar_sqrt, one_minus_alphas_bar_sqrt, t, 137 | noise_type=self.noise_type, 138 | noise_std=noise_std) 139 | return x_t 140 | 141 | #---------------------------------------------------------------------------- -------------------------------------------------------------------------------- /diffusion-projected-gan/torch_utils/utils_spectrum.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.fft import fftn 3 | 4 | 5 | def roll_quadrants(data, backwards=False): 6 | """ 7 | Shift low frequencies to the center of fourier transform, i.e. [-N/2, ..., +N/2] -> [0, ..., N-1] 8 | Args: 9 | data: fourier transform, (NxHxW) 10 | backwards: bool, if True shift high frequencies back to center 11 | 12 | Returns: 13 | Shifted fourier transform. 14 | """ 15 | dim = data.ndim - 1 16 | 17 | if dim != 2: 18 | raise AttributeError(f'Data must be 2d but it is {dim}d.') 19 | if any(s % 2 == 0 for s in data.shape[1:]): 20 | raise RuntimeWarning('Roll quadrants for 2d input should only be used with uneven spatial sizes.') 21 | 22 | # for each dimension swap left and right half 23 | dims = tuple(range(1, dim+1)) # add one for batch dimension 24 | shifts = torch.tensor(data.shape[1:]) // 2 #.div(2, rounding_mode='floor') # N/2 if N even, (N-1)/2 if N odd 25 | if backwards: 26 | shifts *= -1 27 | return data.roll(shifts.tolist(), dims=dims) 28 | 29 | 30 | def batch_fft(data, normalize=False): 31 | """ 32 | Compute fourier transform of batch. 33 | Args: 34 | data: input tensor, (NxHxW) 35 | 36 | Returns: 37 | Batch fourier transform of input data. 38 | """ 39 | 40 | dim = data.ndim - 1 # subtract one for batch dimension 41 | if dim != 2: 42 | raise AttributeError(f'Data must be 2d but it is {dim}d.') 43 | 44 | dims = tuple(range(1, dim + 1)) # add one for batch dimension 45 | if normalize: 46 | norm = 'ortho' 47 | else: 48 | norm = 'backward' 49 | 50 | if not torch.is_complex(data): 51 | data = torch.complex(data, torch.zeros_like(data)) 52 | freq = fftn(data, dim=dims, norm=norm) 53 | 54 | return freq 55 | 56 | 57 | def azimuthal_average(image, center=None): 58 | # modified to tensor inputs from https://www.astrobetter.com/blog/2010/03/03/fourier-transforms-of-images-in-python/ 59 | """ 60 | Calculate the azimuthally averaged radial profile. 61 | Requires low frequencies to be at the center of the image. 62 | Args: 63 | image: Batch of 2D images, NxHxW 64 | center: The [x,y] pixel coordinates used as the center. The default is 65 | None, which then uses the center of the image (including 66 | fracitonal pixels). 67 | 68 | Returns: 69 | Azimuthal average over the image around the center 70 | """ 71 | # Check input shapes 72 | assert center is None or (len(center) == 2), f'Center has to be None or len(center)=2 ' \ 73 | f'(but it is len(center)={len(center)}.' 74 | # Calculate the indices from the image 75 | H, W = image.shape[-2:] 76 | h, w = torch.meshgrid(torch.arange(0, H), torch.arange(0, W)) 77 | 78 | if center is None: 79 | center = torch.tensor([(w.max() - w.min()) / 2.0, (h.max() - h.min()) / 2.0]) 80 | 81 | # Compute radius for each pixel wrt center 82 | r = torch.stack([w-center[0], h-center[1]]).norm(2, 0) 83 | 84 | # Get sorted radii 85 | r_sorted, ind = r.flatten().sort() 86 | i_sorted = image.flatten(-2, -1)[..., ind] 87 | 88 | # Get the integer part of the radii (bin size = 1) 89 | r_int = r_sorted.long() # attribute to the smaller integer 90 | 91 | # Find all pixels that fall within each radial bin. 92 | deltar = r_int[1:] - r_int[:-1] # Assumes all radii represented, computes bin change between subsequent radii 93 | rind = torch.where(deltar)[0] # location of changed radius 94 | 95 | # compute number of elements in each bin 96 | nind = rind + 1 # number of elements = idx + 1 97 | nind = torch.cat([torch.tensor([0]), nind, torch.tensor([H*W])]) # add borders 98 | nr = nind[1:] - nind[:-1] # number of radius bin, i.e. counter for bins belonging to each radius 99 | 100 | # Cumulative sum to figure out sums for each radius bin 101 | if H % 2 == 0: 102 | raise NotImplementedError('Not sure if implementation correct, please check') 103 | rind = torch.cat([torch.tensor([0]), rind, torch.tensor([H * W - 1])]) # add borders 104 | else: 105 | rind = torch.cat([rind, torch.tensor([H * W - 1])]) # add borders 106 | csim = i_sorted.cumsum(-1, dtype=torch.float64) # integrate over all values with smaller radius 107 | tbin = csim[..., rind[1:]] - csim[..., rind[:-1]] 108 | # add mean 109 | tbin = torch.cat([csim[:, 0:1], tbin], 1) 110 | 111 | radial_prof = tbin / nr.to(tbin.device) # normalize by counted bins 112 | 113 | return radial_prof 114 | 115 | 116 | def get_spectrum(data, normalize=False): 117 | dim = data.ndim - 1 # subtract one for batch dimension 118 | if dim != 2: 119 | raise AttributeError(f'Data must be 2d but it is {dim}d.') 120 | 121 | freq = batch_fft(data, normalize=normalize) 122 | power_spec = freq.real ** 2 + freq.imag ** 2 123 | N = data.shape[1] 124 | if N % 2 == 0: # duplicate value for N/2 so it is put at the end of the spectrum 125 | # and is not averaged with the mean value 126 | N_2 = N//2 127 | power_spec = torch.cat([power_spec[:, :N_2+1], power_spec[:, N_2:N_2+1], power_spec[:, N_2+1:]], dim=1) 128 | power_spec = torch.cat([power_spec[:, :, :N_2+1], power_spec[:, :, N_2:N_2+1], power_spec[:, :, N_2+1:]], dim=2) 129 | 130 | power_spec = roll_quadrants(power_spec) 131 | power_spec = azimuthal_average(power_spec) 132 | return power_spec 133 | 134 | 135 | def plot_std(mean, std, x=None, ax=None, **kwargs): 136 | import matplotlib.pyplot as plt 137 | if ax is None: 138 | fig, ax = plt.subplots(1) 139 | 140 | # plot error margins in same color as line 141 | err_kwargs = { 142 | 'alpha': 0.3 143 | } 144 | 145 | if 'c' in kwargs.keys(): 146 | err_kwargs['color'] = kwargs['c'] 147 | elif 'color' in kwargs.keys(): 148 | err_kwargs['color'] = kwargs['color'] 149 | 150 | if x is None: 151 | x = torch.linspace(0, 1, len(mean)) # use normalized x axis 152 | ax.plot(x, mean, **kwargs) 153 | ax.fill_between(x, mean-std, mean+std, **err_kwargs) 154 | 155 | return ax 156 | -------------------------------------------------------------------------------- /diffusion-insgen/metrics/metric_main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import time 11 | import json 12 | import torch 13 | import dnnlib 14 | 15 | from . import metric_utils 16 | from . import frechet_inception_distance 17 | from . import kernel_inception_distance 18 | from . import precision_recall 19 | from . import perceptual_path_length 20 | from . import inception_score 21 | 22 | #---------------------------------------------------------------------------- 23 | 24 | _metric_dict = dict() # name => fn 25 | 26 | def register_metric(fn): 27 | assert callable(fn) 28 | _metric_dict[fn.__name__] = fn 29 | return fn 30 | 31 | def is_valid_metric(metric): 32 | return metric in _metric_dict 33 | 34 | def list_valid_metrics(): 35 | return list(_metric_dict.keys()) 36 | 37 | #---------------------------------------------------------------------------- 38 | 39 | def calc_metric(metric, **kwargs): # See metric_utils.MetricOptions for the full list of arguments. 40 | assert is_valid_metric(metric) 41 | opts = metric_utils.MetricOptions(**kwargs) 42 | 43 | # Calculate. 44 | start_time = time.time() 45 | results = _metric_dict[metric](opts) 46 | total_time = time.time() - start_time 47 | 48 | # Broadcast results. 49 | for key, value in list(results.items()): 50 | if opts.num_gpus > 1: 51 | value = torch.as_tensor(value, dtype=torch.float64, device=opts.device) 52 | torch.distributed.broadcast(tensor=value, src=0) 53 | value = float(value.cpu()) 54 | results[key] = value 55 | 56 | # Decorate with metadata. 57 | return dnnlib.EasyDict( 58 | results = dnnlib.EasyDict(results), 59 | metric = metric, 60 | total_time = total_time, 61 | total_time_str = dnnlib.util.format_time(total_time), 62 | num_gpus = opts.num_gpus, 63 | ) 64 | 65 | #---------------------------------------------------------------------------- 66 | 67 | def report_metric(result_dict, run_dir=None, snapshot_pkl=None): 68 | metric = result_dict['metric'] 69 | assert is_valid_metric(metric) 70 | if run_dir is not None and snapshot_pkl is not None: 71 | snapshot_pkl = os.path.relpath(snapshot_pkl, run_dir) 72 | 73 | jsonl_line = json.dumps(dict(result_dict, snapshot_pkl=snapshot_pkl, timestamp=time.time())) 74 | print(jsonl_line) 75 | if run_dir is not None and os.path.isdir(run_dir): 76 | with open(os.path.join(run_dir, f'metric-{metric}.jsonl'), 'at') as f: 77 | f.write(jsonl_line + '\n') 78 | 79 | #---------------------------------------------------------------------------- 80 | # Primary metrics. 81 | 82 | @register_metric 83 | def fid50k_full(opts): 84 | opts.dataset_kwargs.update(max_size=None, xflip=False) 85 | fid = frechet_inception_distance.compute_fid(opts, max_real=None, num_gen=50000) 86 | return dict(fid50k_full=fid) 87 | 88 | @register_metric 89 | def kid50k_full(opts): 90 | opts.dataset_kwargs.update(max_size=None, xflip=False) 91 | kid = kernel_inception_distance.compute_kid(opts, max_real=1000000, num_gen=50000, num_subsets=100, max_subset_size=1000) 92 | return dict(kid50k_full=kid) 93 | 94 | @register_metric 95 | def pr50k3_full(opts): 96 | opts.dataset_kwargs.update(max_size=None, xflip=False) 97 | precision, recall = precision_recall.compute_pr(opts, max_real=200000, num_gen=50000, nhood_size=3, row_batch_size=10000, col_batch_size=10000) 98 | return dict(pr50k3_full_precision=precision, pr50k3_full_recall=recall) 99 | 100 | @register_metric 101 | def ppl2_wend(opts): 102 | ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=False, batch_size=2) 103 | return dict(ppl2_wend=ppl) 104 | 105 | @register_metric 106 | def is50k(opts): 107 | opts.dataset_kwargs.update(max_size=None, xflip=False) 108 | mean, std = inception_score.compute_is(opts, num_gen=50000, num_splits=10) 109 | return dict(is50k_mean=mean, is50k_std=std) 110 | 111 | #---------------------------------------------------------------------------- 112 | # Legacy metrics. 113 | 114 | @register_metric 115 | def fid50k(opts): 116 | opts.dataset_kwargs.update(max_size=None) 117 | fid = frechet_inception_distance.compute_fid(opts, max_real=50000, num_gen=50000) 118 | return dict(fid50k=fid) 119 | 120 | @register_metric 121 | def kid50k(opts): 122 | opts.dataset_kwargs.update(max_size=None) 123 | kid = kernel_inception_distance.compute_kid(opts, max_real=50000, num_gen=50000, num_subsets=100, max_subset_size=1000) 124 | return dict(kid50k=kid) 125 | 126 | @register_metric 127 | def pr50k3(opts): 128 | opts.dataset_kwargs.update(max_size=None) 129 | precision, recall = precision_recall.compute_pr(opts, max_real=50000, num_gen=50000, nhood_size=3, row_batch_size=10000, col_batch_size=10000) 130 | return dict(pr50k3_precision=precision, pr50k3_recall=recall) 131 | 132 | @register_metric 133 | def ppl_zfull(opts): 134 | ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='z', sampling='full', crop=True, batch_size=2) 135 | return dict(ppl_zfull=ppl) 136 | 137 | @register_metric 138 | def ppl_wfull(opts): 139 | ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='w', sampling='full', crop=True, batch_size=2) 140 | return dict(ppl_wfull=ppl) 141 | 142 | @register_metric 143 | def ppl_zend(opts): 144 | ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='z', sampling='end', crop=True, batch_size=2) 145 | return dict(ppl_zend=ppl) 146 | 147 | @register_metric 148 | def ppl_wend(opts): 149 | ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=True, batch_size=2) 150 | return dict(ppl_wend=ppl) 151 | 152 | #---------------------------------------------------------------------------- 153 | -------------------------------------------------------------------------------- /diffusion-stylegan2/metrics/metric_main.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import time 11 | import json 12 | import torch 13 | import dnnlib 14 | 15 | from . import metric_utils 16 | from . import frechet_inception_distance 17 | from . import kernel_inception_distance 18 | from . import precision_recall 19 | from . import perceptual_path_length 20 | from . import inception_score 21 | 22 | #---------------------------------------------------------------------------- 23 | 24 | _metric_dict = dict() # name => fn 25 | 26 | def register_metric(fn): 27 | assert callable(fn) 28 | _metric_dict[fn.__name__] = fn 29 | return fn 30 | 31 | def is_valid_metric(metric): 32 | return metric in _metric_dict 33 | 34 | def list_valid_metrics(): 35 | return list(_metric_dict.keys()) 36 | 37 | #---------------------------------------------------------------------------- 38 | 39 | def calc_metric(metric, **kwargs): # See metric_utils.MetricOptions for the full list of arguments. 40 | assert is_valid_metric(metric) 41 | opts = metric_utils.MetricOptions(**kwargs) 42 | 43 | # Calculate. 44 | start_time = time.time() 45 | results = _metric_dict[metric](opts) 46 | total_time = time.time() - start_time 47 | 48 | # Broadcast results. 49 | for key, value in list(results.items()): 50 | if opts.num_gpus > 1: 51 | value = torch.as_tensor(value, dtype=torch.float64, device=opts.device) 52 | torch.distributed.broadcast(tensor=value, src=0) 53 | value = float(value.cpu()) 54 | results[key] = value 55 | 56 | # Decorate with metadata. 57 | return dnnlib.EasyDict( 58 | results = dnnlib.EasyDict(results), 59 | metric = metric, 60 | total_time = total_time, 61 | total_time_str = dnnlib.util.format_time(total_time), 62 | num_gpus = opts.num_gpus, 63 | ) 64 | 65 | #---------------------------------------------------------------------------- 66 | 67 | def report_metric(result_dict, run_dir=None, snapshot_pkl=None): 68 | metric = result_dict['metric'] 69 | assert is_valid_metric(metric) 70 | if run_dir is not None and snapshot_pkl is not None: 71 | snapshot_pkl = os.path.relpath(snapshot_pkl, run_dir) 72 | 73 | jsonl_line = json.dumps(dict(result_dict, snapshot_pkl=snapshot_pkl, timestamp=time.time())) 74 | print(jsonl_line) 75 | if run_dir is not None and os.path.isdir(run_dir): 76 | with open(os.path.join(run_dir, f'metric-{metric}.jsonl'), 'at') as f: 77 | f.write(jsonl_line + '\n') 78 | 79 | #---------------------------------------------------------------------------- 80 | # Primary metrics. 81 | 82 | @register_metric 83 | def fid50k_full(opts): 84 | opts.dataset_kwargs.update(max_size=None, xflip=False) 85 | fid = frechet_inception_distance.compute_fid(opts, max_real=None, num_gen=50000) 86 | return dict(fid50k_full=fid) 87 | 88 | @register_metric 89 | def kid50k_full(opts): 90 | opts.dataset_kwargs.update(max_size=None, xflip=False) 91 | kid = kernel_inception_distance.compute_kid(opts, max_real=1000000, num_gen=50000, num_subsets=100, max_subset_size=1000) 92 | return dict(kid50k_full=kid) 93 | 94 | @register_metric 95 | def pr50k3_full(opts): 96 | opts.dataset_kwargs.update(max_size=None, xflip=False) 97 | precision, recall = precision_recall.compute_pr(opts, max_real=200000, num_gen=50000, nhood_size=3, row_batch_size=10000, col_batch_size=10000) 98 | return dict(pr50k3_full_precision=precision, pr50k3_full_recall=recall) 99 | 100 | @register_metric 101 | def ppl2_wend(opts): 102 | ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=False, batch_size=2) 103 | return dict(ppl2_wend=ppl) 104 | 105 | @register_metric 106 | def is50k(opts): 107 | opts.dataset_kwargs.update(max_size=None, xflip=False) 108 | mean, std = inception_score.compute_is(opts, num_gen=50000, num_splits=10) 109 | return dict(is50k_mean=mean, is50k_std=std) 110 | 111 | #---------------------------------------------------------------------------- 112 | # Legacy metrics. 113 | 114 | @register_metric 115 | def fid50k(opts): 116 | opts.dataset_kwargs.update(max_size=None) 117 | fid = frechet_inception_distance.compute_fid(opts, max_real=50000, num_gen=50000) 118 | return dict(fid50k=fid) 119 | 120 | @register_metric 121 | def kid50k(opts): 122 | opts.dataset_kwargs.update(max_size=None) 123 | kid = kernel_inception_distance.compute_kid(opts, max_real=50000, num_gen=50000, num_subsets=100, max_subset_size=1000) 124 | return dict(kid50k=kid) 125 | 126 | @register_metric 127 | def pr50k3(opts): 128 | opts.dataset_kwargs.update(max_size=None) 129 | precision, recall = precision_recall.compute_pr(opts, max_real=50000, num_gen=50000, nhood_size=3, row_batch_size=10000, col_batch_size=10000) 130 | return dict(pr50k3_precision=precision, pr50k3_recall=recall) 131 | 132 | @register_metric 133 | def ppl_zfull(opts): 134 | ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='z', sampling='full', crop=True, batch_size=2) 135 | return dict(ppl_zfull=ppl) 136 | 137 | @register_metric 138 | def ppl_wfull(opts): 139 | ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='w', sampling='full', crop=True, batch_size=2) 140 | return dict(ppl_wfull=ppl) 141 | 142 | @register_metric 143 | def ppl_zend(opts): 144 | ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='z', sampling='end', crop=True, batch_size=2) 145 | return dict(ppl_zend=ppl) 146 | 147 | @register_metric 148 | def ppl_wend(opts): 149 | ppl = perceptual_path_length.compute_ppl(opts, num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=True, batch_size=2) 150 | return dict(ppl_wend=ppl) 151 | 152 | #---------------------------------------------------------------------------- 153 | --------------------------------------------------------------------------------