├── LICENSE
├── LICENSE-LPIPS
├── LICENSE-Stylegan2
├── README.md
├── dataloader
    ├── __init__.py
    └── dataset.py
├── figs
    ├── cxr-seg.png
    ├── datasetgan_demo.png
    ├── face-parts-opt-steps.png
    ├── face-parts-seg.png
    ├── method.png
    ├── skin-lesion-seg.png
    └── teaser3.png
├── giistr-cla.md
├── models
    ├── __init__.py
    ├── encoder_model.py
    ├── lpips
    │   ├── __init__.py
    │   ├── base_model.py
    │   ├── dist_model.py
    │   ├── networks_basic.py
    │   ├── pretrained_networks.py
    │   └── weights
    │   │   ├── v0.0
    │   │       ├── alex.pth
    │   │       ├── squeeze.pth
    │   │       └── vgg.pth
    │   │   └── v0.1
    │   │       ├── alex.pth
    │   │       ├── squeeze.pth
    │   │       └── vgg.pth
    ├── op
    │   ├── __init__.py
    │   ├── fused_act.py
    │   ├── fused_bias_act.cpp
    │   ├── fused_bias_act_kernel.cu
    │   ├── upfirdn2d.cpp
    │   ├── upfirdn2d.py
    │   └── upfirdn2d_kernel.cu
    ├── stylegan2.py
    ├── stylegan2_seg.py
    └── utils.py
├── requirements.txt
├── semanticGAN
    ├── __init__.py
    ├── inference.py
    ├── losses.py
    ├── prepare_inception.py
    ├── preprocessing
    │   ├── __init__.py
    │   ├── face_postprocessing.py
    │   └── face_preprocessing.py
    ├── ranger.py
    ├── samplers.py
    ├── train_enc.py
    └── train_seg_gan.py
└── utils
    ├── __init__.py
    ├── data_util.py
    ├── distributed.py
    ├── inception_utils.py
    └── utils.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2021 NVIDIA Corporation. 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/LICENSE-LPIPS:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018, Richard Zhang, Phillip Isola, Alexei A. Efros, Eli Shechtman, Oliver Wang
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 


--------------------------------------------------------------------------------
/LICENSE-Stylegan2:
--------------------------------------------------------------------------------
  1 | Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
  2 | 
  3 | 
  4 | Nvidia Source Code License-NC
  5 | 
  6 | =======================================================================
  7 | 
  8 | 1. Definitions
  9 | 
 10 | "Licensor" means any person or entity that distributes its Work.
 11 | 
 12 | "Software" means the original work of authorship made available under
 13 | this License.
 14 | 
 15 | "Work" means the Software and any additions to or derivative works of
 16 | the Software that are made available under this License.
 17 | 
 18 | "Nvidia Processors" means any central processing unit (CPU), graphics
 19 | processing unit (GPU), field-programmable gate array (FPGA),
 20 | application-specific integrated circuit (ASIC) or any combination
 21 | thereof designed, made, sold, or provided by Nvidia or its affiliates.
 22 | 
 23 | The terms "reproduce," "reproduction," "derivative works," and
 24 | "distribution" have the meaning as provided under U.S. copyright law;
 25 | provided, however, that for the purposes of this License, derivative
 26 | works shall not include works that remain separable from, or merely
 27 | link (or bind by name) to the interfaces of, the Work.
 28 | 
 29 | Works, including the Software, are "made available" under this License
 30 | by including in or with the Work either (a) a copyright notice
 31 | referencing the applicability of this License to the Work, or (b) a
 32 | copy of this License.
 33 | 
 34 | 2. License Grants
 35 | 
 36 |     2.1 Copyright Grant. Subject to the terms and conditions of this
 37 |     License, each Licensor grants to you a perpetual, worldwide,
 38 |     non-exclusive, royalty-free, copyright license to reproduce,
 39 |     prepare derivative works of, publicly display, publicly perform,
 40 |     sublicense and distribute its Work and any resulting derivative
 41 |     works in any form.
 42 | 
 43 | 3. Limitations
 44 | 
 45 |     3.1 Redistribution. You may reproduce or distribute the Work only
 46 |     if (a) you do so under this License, (b) you include a complete
 47 |     copy of this License with your distribution, and (c) you retain
 48 |     without modification any copyright, patent, trademark, or
 49 |     attribution notices that are present in the Work.
 50 | 
 51 |     3.2 Derivative Works. You may specify that additional or different
 52 |     terms apply to the use, reproduction, and distribution of your
 53 |     derivative works of the Work ("Your Terms") only if (a) Your Terms
 54 |     provide that the use limitation in Section 3.3 applies to your
 55 |     derivative works, and (b) you identify the specific derivative
 56 |     works that are subject to Your Terms. Notwithstanding Your Terms,
 57 |     this License (including the redistribution requirements in Section
 58 |     3.1) will continue to apply to the Work itself.
 59 | 
 60 |     3.3 Use Limitation. The Work and any derivative works thereof only
 61 |     may be used or intended for use non-commercially. The Work or
 62 |     derivative works thereof may be used or intended for use by Nvidia
 63 |     or its affiliates commercially or non-commercially. As used herein,
 64 |     "non-commercially" means for research or evaluation purposes only.
 65 | 
 66 |     3.4 Patent Claims. If you bring or threaten to bring a patent claim
 67 |     against any Licensor (including any claim, cross-claim or
 68 |     counterclaim in a lawsuit) to enforce any patents that you allege
 69 |     are infringed by any Work, then your rights under this License from
 70 |     such Licensor (including the grants in Sections 2.1 and 2.2) will
 71 |     terminate immediately.
 72 | 
 73 |     3.5 Trademarks. This License does not grant any rights to use any
 74 |     Licensor's or its affiliates' names, logos, or trademarks, except
 75 |     as necessary to reproduce the notices described in this License.
 76 | 
 77 |     3.6 Termination. If you violate any term of this License, then your
 78 |     rights under this License (including the grants in Sections 2.1 and
 79 |     2.2) will terminate immediately.
 80 | 
 81 | 4. Disclaimer of Warranty.
 82 | 
 83 | THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
 84 | KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
 85 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
 86 | NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
 87 | THIS LICENSE. 
 88 | 
 89 | 5. Limitation of Liability.
 90 | 
 91 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
 92 | THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
 93 | SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
 94 | INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
 95 | OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
 96 | (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
 97 | LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
 98 | COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
 99 | THE POSSIBILITY OF SUCH DAMAGES.
100 | 
101 | =======================================================================


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SemanticGAN
  2 | This is the official code for:
  3 | 
  4 | #### Semantic Segmentation with Generative Models: Semi-Supervised Learning and Strong Out-of-Domain Generalization
  5 | 
  6 | [Daiqing Li](https://scholar.google.ca/citations?user=8q2ISMIAAAAJ&hl=en), [Junlin Yang](https://scholar.google.com/citations?user=QYkscc4AAAAJ&hl=en), [Karsten Kreis](https://scholar.google.de/citations?user=rFd-DiAAAAAJ&hl=de), [Antonio Torralba](https://groups.csail.mit.edu/vision/torralbalab/), [Sanja Fidler](http://www.cs.toronto.edu/~fidler/)
  7 | 
  8 | CVPR 2021 **[[Paper](https://arxiv.org/abs/2104.05833)]  [[Supp](https://nv-tlabs.github.io/semanticGAN/resources/SemanticGAN_supp.pdf)] [[Page](https://nv-tlabs.github.io/semanticGAN/)]** 
  9 | 
 10 | <img src = "./figs/method.png" width="100%"/>
 11 | 
 12 | 
 13 | ## Requirements
 14 | - Python 3.6 or 3.7 are supported.
 15 | - Pytorch 1.4.0 + is recommended.
 16 | - This code is tested with CUDA 10.2 toolkit and CuDNN 7.5.
 17 | - Please check the python package requirement from [`requirements.txt`](requirements.txt), and install using
 18 | ```
 19 | pip install -r requirements.txt
 20 | ```
 21 | ## Dataset
 22 | We recently release MetFaces40 annotation we use as out-of-domain testing. Please notice this dataset is under the [Creative Commons BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/) license by NVIDIA Corporation. To view a copy of this license, visit [LICENSE](https://github.com/nv-tlabs/semanticGAN_code/blob/main/LICENSE). Please see [GDrive](https://drive.google.com/drive/folders/1ibZzaWSUVoQ94OPoLNS0FrUufBeDvRP4?usp=sharing).
 23 | ## Training 
 24 | 
 25 | To reproduce paper **Semantic Segmentation with Generative Models: Semi-Supervised Learning and Strong Out-of-Domain Generalization**: 
 26 | 
 27 | 1. Run **Step1: Semantic GAN training**
 28 | 2. Run **Step2: Encoder training**
 29 | 3. Run **Inference & Optimization**.  
 30 | 
 31 | 
 32 | ---
 33 | #### 0. Prepare for FID calculation
 34 | In order to calculate FID score, you need to prepare inception features for your dataset,
 35 | 
 36 | ```
 37 | python prepare_inception.py \
 38 | --size [resolution of the image] \
 39 | --batch [batch size] \
 40 | --output [path to save the inception file, in .pkl] \
 41 | --dataset_name celeba-mask \
 42 | [positional argument 1, path to the image folder]] \
 43 | ```
 44 | #### 1. GAN Training
 45 | 
 46 | For training GAN with both image and its label,
 47 | 
 48 | ```
 49 | python train_seg_gan.py \
 50 | --img_dataset [path-to-img-folder] \
 51 | --seg_dataset [path-to-seg-folder] \
 52 | --inception [path-to-inception file] \
 53 | --seg_name celeba-mask \
 54 | --checkpoint_dir [path-to-ckpt-dir] \
 55 | ```
 56 | 
 57 | To use multi-gpus training in the cloud,
 58 | 
 59 | ```
 60 | python -m torch.distributed.launch \
 61 | --nproc_per_node=N_GPU \
 62 | --master_port=PORTtrain_gan.py \
 63 | train_gan.py \
 64 | --img_dataset [path-to-img-folder] \
 65 | --inception [path-to-inception file] \
 66 | --dataset_name celeba-mask \
 67 | --checkpoint_dir [path-to-ckpt-dir] \
 68 | ```
 69 | 
 70 | #### 2. Encoder Triaining
 71 | 
 72 | ```
 73 | python train_enc.py \
 74 | --img_dataset [path-to-img-folder] \
 75 | --seg_dataset [path-to-seg-folder] \
 76 | --ckpt [path-to-pretrained GAN model] \
 77 | --seg_name celeba-mask \
 78 | --enc_backboend [fpn|res] \
 79 | --checkpoint_dir [path-to-ckpt-dir] \
 80 | ```
 81 | 
 82 | ## Inference
 83 | 
 84 | For Face Parts Segmentation Task
 85 | 
 86 | ![img](./figs/face-parts-seg.png?lastModify=1616189357)
 87 | 
 88 | ```
 89 | python inference.py \
 90 | --ckpt [path-to-ckpt] \
 91 | --img_dir [path-to-test-folder] \
 92 | --outdir [path-to-output-folder] \
 93 | --dataset_name celeba-mask \
 94 | --w_plus \
 95 | --image_mode RGB \
 96 | --seg_dim 8 \
 97 | --step 200 [optimization steps] \
 98 | ```
 99 | 
100 | Visualization of different optimization steps
101 | 
102 | ![img](./figs/face-parts-opt-steps.png)
103 | 
104 | 
105 | ## Citation 
106 | 
107 | Please cite the following paper if you used the code in this repository.
108 | 
109 | ```
110 | @inproceedings{semanticGAN, 
111 | title={Semantic Segmentation with Generative Models: Semi-Supervised Learning and Strong Out-of-Domain Generalization}, 
112 | booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)}, 
113 | author={Li, Daiqing and Yang, Junlin and Kreis, Karsten and Torralba, Antonio and Fidler, Sanja}, 
114 | year={2021}, 
115 | }
116 | ```
117 | 
118 | 
119 | 
120 | ## License 
121 | For any code dependency related to Stylegan2, the license is under the Nvidia Source Code License-NC. To view a copy of this license, visit https://nvlabs.github.io/stylegan2/license.html
122 | 
123 | The work SemanticGAN is released under MIT License.
124 | 
125 | ```
126 | The MIT License (MIT)
127 | 
128 | Copyright (c) 2021 NVIDIA Corporation. 
129 | 
130 | Permission is hereby granted, free of charge, to any person obtaining a copy of
131 | this software and associated documentation files (the "Software"), to deal in
132 | the Software without restriction, including without limitation the rights to
133 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
134 | the Software, and to permit persons to whom the Software is furnished to do so,
135 | subject to the following conditions:
136 | 
137 | The above copyright notice and this permission notice shall be included in all
138 | copies or substantial portions of the Software.
139 | 
140 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
141 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
142 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
143 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
144 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
145 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
146 | ```


--------------------------------------------------------------------------------
/dataloader/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
 3 | Licensed under The MIT License (MIT)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | """


--------------------------------------------------------------------------------
/dataloader/dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under The MIT License (MIT)
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in all
 13 | copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | """
 22 | 
 23 | from PIL import Image, ImageOps
 24 | from torch.utils.data import Dataset
 25 | from torchvision import transforms
 26 | import os
 27 | import numpy as np
 28 | import torch
 29 | import cv2
 30 | import albumentations
 31 | import albumentations.augmentations as A
 32 | 
 33 | class HistogramEqualization(object):
 34 |     def __call__(self, img):
 35 |         img_eq = ImageOps.equalize(img)
 36 |         
 37 |         return img_eq
 38 | 
 39 | class AdjustGamma(object):
 40 |     def __init__(self, gamma):
 41 |         self.gamma = gamma
 42 |     
 43 |     def __call__(self, img):
 44 |         img_gamma = transforms.functional.adjust_gamma(img, self.gamma)
 45 | 
 46 |         return img_gamma
 47 | 
 48 | class CelebAMaskDataset(Dataset):
 49 |     def __init__(self, args, dataroot, unlabel_transform=None, latent_dir=None, is_label=True, phase='train', 
 50 |                     limit_size=None, unlabel_limit_size=None, aug=False, resolution=256):
 51 | 
 52 |         self.args = args
 53 |         self.is_label = is_label
 54 | 
 55 | 
 56 |         if is_label == True:
 57 |             self.latent_dir = latent_dir
 58 |             self.data_root = os.path.join(dataroot, 'label_data')
 59 |         
 60 |             if phase == 'train':
 61 |                 if limit_size is None:
 62 |                     self.idx_list = np.loadtxt(os.path.join(self.data_root, 'train_full_list.txt'), dtype=str)
 63 |                 else:
 64 |                     self.idx_list = np.loadtxt(os.path.join(self.data_root, 
 65 |                                             'train_{}_list.txt'.format(limit_size)), dtype=str).reshape(-1)
 66 |             elif phase == 'val':
 67 |                 if limit_size is None:
 68 |                     self.idx_list = np.loadtxt(os.path.join(self.data_root, 'val_full_list.txt'), dtype=str)
 69 |                 else:
 70 |                     self.idx_list = np.loadtxt(os.path.join(self.data_root, 
 71 |                                             'val_{}_list.txt'.format(limit_size)), dtype=str).reshape(-1)
 72 |             elif phase == 'train-val':
 73 |                 # concat both train and val
 74 |                 if limit_size is None:
 75 |                     train_list = np.loadtxt(os.path.join(self.data_root, 'train_full_list.txt'), dtype=str)
 76 |                     val_list = np.loadtxt(os.path.join(self.data_root, 'val_full_list.txt'), dtype=str)
 77 |                     self.idx_list = list(train_list) + list(val_list)
 78 |                 else:
 79 |                     train_list = np.loadtxt(os.path.join(self.data_root, 
 80 |                                             'train_{}_list.txt'.format(limit_size)), dtype=str).reshape(-1)
 81 |                     val_list = np.loadtxt(os.path.join(self.data_root, 
 82 |                                             'val_{}_list.txt'.format(limit_size)), dtype=str).reshape(-1)
 83 |                     self.idx_list = list(train_list) + list(val_list)
 84 |             else:
 85 |                 self.idx_list = np.loadtxt(os.path.join(self.data_root, 'test_list.txt'), dtype=str)
 86 |         else:
 87 |             self.data_root = os.path.join(dataroot, 'unlabel_data')
 88 |             if unlabel_limit_size is None:
 89 |                 self.idx_list = np.loadtxt(os.path.join(self.data_root, 'unlabel_list.txt'), dtype=str)
 90 |             else:
 91 |                 self.idx_list = np.loadtxt(os.path.join(self.data_root, 'unlabel_{}_list.txt'.format(unlabel_limit_size)), dtype=str)
 92 | 
 93 |         self.img_dir = os.path.join(self.data_root, 'image')
 94 |         self.label_dir = os.path.join(self.data_root, 'label')
 95 | 
 96 |         self.phase = phase
 97 |         self.color_map = {
 98 |             0: [  0,   0,   0],
 99 |             1: [ 0,0,205],
100 |             2: [132,112,255],
101 |             3: [ 25,25,112],
102 |             4: [187,255,255],
103 |             5: [ 102,205,170],
104 |             6: [ 227,207,87],
105 |             7: [ 142,142,56]
106 |         }
107 | 
108 |         self.data_size = len(self.idx_list)
109 |         self.resolution = resolution
110 | 
111 |         self.aug = aug
112 |         if aug == True:
113 |             self.aug_t = albumentations.Compose([
114 |                             A.transforms.HorizontalFlip(p=0.5),
115 |                             A.transforms.ShiftScaleRotate(shift_limit=0.1,
116 |                                                 scale_limit=0.2,
117 |                                                 rotate_limit=15,
118 |                                                 border_mode=cv2.BORDER_CONSTANT,
119 |                                                 value=0,
120 |                                                 mask_value=0,
121 |                                                 p=0.5),
122 |                     ])
123 | 
124 |         self.unlabel_transform = unlabel_transform
125 |         
126 | 
127 |     def _mask_labels(self, mask_np):
128 |         label_size = len(self.color_map.keys())
129 |         labels = np.zeros((label_size, mask_np.shape[0], mask_np.shape[1]))
130 |         for i in range(label_size):
131 |             labels[i][mask_np==i] = 1.0
132 |         
133 |         return labels
134 | 
135 |     
136 |     @staticmethod
137 |     def preprocess(img):
138 |         image_transform = transforms.Compose(
139 |                     [
140 |                         transforms.ToTensor(),
141 |                         transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5), inplace=True)
142 |                     ]
143 |                 )
144 |         img_tensor = image_transform(img)
145 |         # normalize
146 |         # img_tensor = (img_tensor - img_tensor.min()) / (img_tensor.max() - img_tensor.min())
147 |         # img_tensor = (img_tensor - 0.5) / 0.5
148 | 
149 |         return img_tensor
150 |         
151 | 
152 |     def __len__(self):
153 |         if hasattr(self.args, 'n_gpu') == False:
154 |             return self.data_size
155 |         # make sure dataloader size is larger than batchxngpu size
156 |         return max(self.args.batch*self.args.n_gpu, self.data_size)
157 |     
158 |     def __getitem__(self, idx):
159 |         if idx >= self.data_size:
160 |             idx = idx % (self.data_size)
161 |         img_idx = self.idx_list[idx]
162 |         img_pil = Image.open(os.path.join(self.img_dir, img_idx)).convert('RGB').resize((self.resolution, self.resolution))
163 |         mask_pil = Image.open(os.path.join(self.label_dir, img_idx)).convert('L').resize((self.resolution, self.resolution), resample=0)
164 |         
165 |         if self.is_label:
166 |             if (self.phase == 'train' or self.phase == 'train-val') and self.aug:
167 |                 augmented = self.aug_t(image=np.array(img_pil), mask=np.array(mask_pil))
168 |                 aug_img_pil = Image.fromarray(augmented['image'])
169 |                 # apply pixel-wise transformation
170 |                 img_tensor = self.preprocess(aug_img_pil)
171 | 
172 |                 mask_np = np.array(augmented['mask'])
173 |                 labels = self._mask_labels(mask_np)
174 | 
175 |                 mask_tensor = torch.tensor(labels, dtype=torch.float)
176 |                 mask_tensor = (mask_tensor - 0.5) / 0.5
177 | 
178 |             else:
179 |                 img_tensor = self.preprocess(img_pil)
180 |                 mask_np = np.array(mask_pil)
181 |                 labels = self._mask_labels(mask_np)
182 | 
183 |                 mask_tensor = torch.tensor(labels, dtype=torch.float)
184 |                 mask_tensor = (mask_tensor - 0.5) / 0.5
185 |             
186 |             return {
187 |                 'image': img_tensor,
188 |                 'mask': mask_tensor
189 |             }
190 |         else:
191 |             img_tensor = self.unlabel_transform(img_pil)
192 |             return {
193 |                 'image': img_tensor,
194 |             }
195 | 


--------------------------------------------------------------------------------
/figs/cxr-seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/figs/cxr-seg.png


--------------------------------------------------------------------------------
/figs/datasetgan_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/figs/datasetgan_demo.png


--------------------------------------------------------------------------------
/figs/face-parts-opt-steps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/figs/face-parts-opt-steps.png


--------------------------------------------------------------------------------
/figs/face-parts-seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/figs/face-parts-seg.png


--------------------------------------------------------------------------------
/figs/method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/figs/method.png


--------------------------------------------------------------------------------
/figs/skin-lesion-seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/figs/skin-lesion-seg.png


--------------------------------------------------------------------------------
/figs/teaser3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/figs/teaser3.png


--------------------------------------------------------------------------------
/giistr-cla.md:
--------------------------------------------------------------------------------
 1 | ## Individual Contributor License Agreement (CLA)
 2 | 
 3 | **Thank you for submitting your contributions to this project.**
 4 | 
 5 | By signing this CLA, you agree that the following terms apply to all of your past, present and future contributions
 6 | to the project.
 7 | 
 8 | ### License.
 9 | 
10 | You hereby represent that all present, past and future contributions are governed by the
11 | [MIT License](https://opensource.org/licenses/MIT)
12 | copyright statement.
13 | 
14 | This entails that to the extent possible under law, you transfer all copyright and related or neighboring rights
15 | of the code or documents you contribute to the project itself or its maintainers.
16 | Furthermore you also represent that you have the authority to perform the above waiver
17 | with respect to the entirety of you contributions.
18 | 
19 | ### Moral Rights.
20 | 
21 | To the fullest extent permitted under applicable law, you hereby waive, and agree not to
22 | assert, all of your “moral rights” in or relating to your contributions for the benefit of the project.
23 | 
24 | ### Third Party Content.
25 | 
26 | If your Contribution includes or is based on any source code, object code, bug fixes, configuration changes, tools,
27 | specifications, documentation, data, materials, feedback, information or other works of authorship that were not
28 | authored by you (“Third Party Content”) or if you are aware of any third party intellectual property or proprietary
29 | rights associated with your Contribution (“Third Party Rights”),
30 | then you agree to include with the submission of your Contribution full details respecting such Third Party
31 | Content and Third Party Rights, including, without limitation, identification of which aspects of your
32 | Contribution contain Third Party Content or are associated with Third Party Rights, the owner/author of the
33 | Third Party Content and Third Party Rights, where you obtained the Third Party Content, and any applicable
34 | third party license terms or restrictions respecting the Third Party Content and Third Party Rights. For greater
35 | certainty, the foregoing obligations respecting the identification of Third Party Content and Third Party Rights
36 | do not apply to any portion of a Project that is incorporated into your Contribution to that same Project.
37 | 
38 | ### Representations.
39 | 
40 | You represent that, other than the Third Party Content and Third Party Rights identified by
41 | you in accordance with this Agreement, you are the sole author of your Contributions and are legally entitled
42 | to grant the foregoing licenses and waivers in respect of your Contributions. If your Contributions were
43 | created in the course of your employment with your past or present employer(s), you represent that such
44 | employer(s) has authorized you to make your Contributions on behalf of such employer(s) or such employer
45 | (s) has waived all of their right, title or interest in or to your Contributions.
46 | 
47 | ### Disclaimer.
48 | 
49 | To the fullest extent permitted under applicable law, your Contributions are provided on an "as is"
50 | basis, without any warranties or conditions, express or implied, including, without limitation, any implied
51 | warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not
52 | required to provide support for your Contributions, except to the extent you desire to provide support.
53 | 
54 | ### No Obligation.
55 | 
56 | You acknowledge that the maintainers of this project are under no obligation to use or incorporate your contributions
57 | into the project. The decision to use or incorporate your contributions into the project will be made at the
58 | sole discretion of the maintainers or their authorized delegates.
59 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/models/__init__.py


--------------------------------------------------------------------------------
/models/encoder_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under The MIT License (MIT)
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in all
 13 | copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | """
 22 | 
 23 | 
 24 | import torch
 25 | import torch.nn as nn
 26 | import torch.nn.functional as F
 27 | from models.stylegan2_seg import EqualLinear, ConvLayer, ResBlock
 28 | import math
 29 | 
 30 | class Bottleneck(nn.Module):
 31 |     expansion = 4
 32 | 
 33 |     def __init__(self, in_planes, planes, stride=1):
 34 |         super(Bottleneck, self).__init__()
 35 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 36 |         self.bn1 = nn.BatchNorm2d(planes)
 37 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 38 |         self.bn2 = nn.BatchNorm2d(planes)
 39 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 40 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 41 | 
 42 |         self.shortcut = nn.Sequential()
 43 |         if stride != 1 or in_planes != self.expansion*planes:
 44 |             self.shortcut = nn.Sequential(
 45 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 46 |                 nn.BatchNorm2d(self.expansion*planes)
 47 |             )
 48 | 
 49 |     def forward(self, x):
 50 |         out = F.relu(self.bn1(self.conv1(x)))
 51 |         out = F.relu(self.bn2(self.conv2(out)))
 52 |         out = self.bn3(self.conv3(out))
 53 |         out += self.shortcut(x)
 54 |         out = F.relu(out)
 55 |         return out
 56 | 
 57 | 
 58 | class FPN(nn.Module):
 59 |     def __init__(self, input_dim, block, num_blocks):
 60 |         super(FPN, self).__init__()
 61 |         self.in_planes = 64
 62 |         self.feature_dim = 512
 63 | 
 64 |         self.conv1 = nn.Conv2d(input_dim, 64, kernel_size=7, stride=2, padding=3, bias=False)
 65 |         self.bn1 = nn.BatchNorm2d(64)
 66 | 
 67 |         # Bottom-up layers
 68 |         self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
 69 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 70 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 71 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 72 | 
 73 |         # Top layer
 74 |         self.toplayer = nn.Conv2d(2048, self.feature_dim, kernel_size=1, stride=1, padding=0)  # Reduce channels
 75 | 
 76 |         # Smooth layers
 77 |         self.smooth1 = nn.Conv2d(self.feature_dim, self.feature_dim, kernel_size=3, stride=1, padding=1)
 78 |         self.smooth2 = nn.Conv2d(self.feature_dim, self.feature_dim, kernel_size=3, stride=1, padding=1)
 79 |         self.smooth3 = nn.Conv2d(self.feature_dim, self.feature_dim, kernel_size=3, stride=1, padding=1)
 80 | 
 81 |         # Lateral layers
 82 |         self.latlayer1 = nn.Conv2d(1024, self.feature_dim, kernel_size=1, stride=1, padding=0)
 83 |         self.latlayer2 = nn.Conv2d( 512, self.feature_dim, kernel_size=1, stride=1, padding=0)
 84 |         self.latlayer3 = nn.Conv2d( 256, self.feature_dim, kernel_size=1, stride=1, padding=0)
 85 | 
 86 |     def _make_layer(self, block, planes, num_blocks, stride):
 87 |         strides = [stride] + [1]*(num_blocks-1)
 88 |         layers = []
 89 |         for stride in strides:
 90 |             layers.append(block(self.in_planes, planes, stride))
 91 |             self.in_planes = planes * block.expansion
 92 |         return nn.Sequential(*layers)
 93 | 
 94 |     def _upsample_add(self, x, y):
 95 |         '''Upsample and add two feature maps.
 96 |         Args:
 97 |           x: (Variable) top feature map to be upsampled.
 98 |           y: (Variable) lateral feature map.
 99 |         Returns:
100 |           (Variable) added feature map.
101 |         Note in PyTorch, when input size is odd, the upsampled feature map
102 |         with `F.upsample(..., scale_factor=2, mode='nearest')`
103 |         maybe not equal to the lateral feature map size.
104 |         e.g.
105 |         original input size: [N,_,15,15] ->
106 |         conv2d feature map size: [N,_,8,8] ->
107 |         upsampled feature map size: [N,_,16,16]
108 |         So we choose bilinear upsample which supports arbitrary output sizes.
109 |         '''
110 |         _,_,H,W = y.size()
111 |         return F.interpolate(x, size=(H,W), mode='bilinear', align_corners=False) + y
112 | 
113 |     def forward(self, x):
114 |         # Bottom-up
115 |         c1 = F.relu(self.bn1(self.conv1(x)))
116 |         c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
117 |         c2 = self.layer1(c1)
118 |         c3 = self.layer2(c2)
119 |         c4 = self.layer3(c3)
120 |         c5 = self.layer4(c4)
121 |         # Top-down
122 |         p5 = self.toplayer(c5)
123 |         p4 = self._upsample_add(p5, self.latlayer1(c4))
124 |         p3 = self._upsample_add(p4, self.latlayer2(c3))
125 |         p2 = self._upsample_add(p3, self.latlayer3(c2))
126 |         # Smooth
127 |         p4 = self.smooth1(p4)
128 |         p3 = self.smooth2(p3)
129 |         p2 = self.smooth3(p2)
130 | 
131 |         return p2, p3, p4
132 | 
133 | def conv3x3(in_planes, out_planes, stride=1, has_bias=False):
134 |     "3x3 convolution with padding"
135 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
136 |                      padding=1, bias=has_bias)
137 | 
138 | 
139 | def conv3x3_bn_relu(in_planes, out_planes, stride=1):
140 |     return nn.Sequential(
141 |                 conv3x3(in_planes, out_planes, stride),
142 |                 nn.BatchNorm2d(out_planes),
143 |                 nn.ReLU(inplace=True),
144 |             )
145 | 
146 | class ToStyleCode(nn.Module):
147 |     def __init__(self, n_convs, input_dim=512, out_dim=512):
148 |         super(ToStyleCode, self).__init__()
149 |         self.convs = nn.ModuleList()
150 |         self.out_dim = out_dim
151 | 
152 |         for i in range(n_convs):
153 |             if i == 0:
154 |                 self.convs.append(
155 |                 nn.Conv2d(in_channels=input_dim, out_channels=out_dim, kernel_size=3, padding=1, stride=2))
156 |                 #self.convs.append(nn.BatchNorm2d(out_dim))
157 |                 #self.convs.append(nn.InstanceNorm2d(out_dim))
158 |                 self.convs.append(nn.LeakyReLU(inplace=True))
159 |             else:
160 |                 self.convs.append(nn.Conv2d(in_channels=out_dim, out_channels=out_dim, kernel_size=3, padding=1, stride=2))
161 |                 self.convs.append(nn.LeakyReLU(inplace=True))
162 |         
163 |         self.convs = nn.Sequential(*self.convs)
164 |         self.linear = EqualLinear(out_dim, out_dim)
165 | 
166 |     def forward(self, x):
167 |         x = self.convs(x)
168 |         x = x.view(-1, self.out_dim)
169 |         x = self.linear(x)
170 |         return x
171 | 
172 | 
173 | class ToStyleHead(nn.Module):
174 |     def __init__(self, input_dim=512, out_dim=512):
175 |         super(ToStyleHead, self).__init__()
176 |         self.out_dim = out_dim
177 | 
178 |         self.convs = nn.Sequential(
179 |             conv3x3_bn_relu(input_dim, input_dim, 1),
180 |             nn.AdaptiveAvgPool2d(1),
181 |             # output 1x1
182 |             nn.Conv2d(in_channels=input_dim, out_channels=out_dim, kernel_size=1)
183 |         )
184 | 
185 |     def forward(self, x):
186 |         x = self.convs(x)
187 |         x = x.view(x.shape[0],self.out_dim)
188 |         return x
189 | 
190 | class FPNEncoder(nn.Module):
191 |     def __init__(self, input_dim, n_latent=14, use_style_head=False, style_layers=[4,5,6]):
192 |         super(FPNEncoder, self).__init__()
193 | 
194 |         self.n_latent = n_latent
195 |         num_blocks = [3,4,6,3] #resnet 50
196 |         self.FPN_module = FPN(input_dim, Bottleneck, num_blocks)
197 |         # course block 0-2, 4x4->8x8
198 |         self.course_styles = nn.ModuleList()
199 |         for i in range(3):
200 |             if use_style_head:
201 |                 self.course_styles.append(ToStyleHead())
202 |             else:
203 |                 self.course_styles.append(ToStyleCode(n_convs=style_layers[0]))
204 |         # medium1 block 3-6 16x16->32x32
205 |         self.medium_styles = nn.ModuleList()
206 |         for i in range(4):
207 |             if use_style_head:
208 |                 self.medium_styles.append(ToStyleHead())
209 |             else:
210 |                 self.medium_styles.append(ToStyleCode(n_convs=style_layers[1]))
211 |         # fine block 7-13 64x64->256x256
212 |         self.fine_styles = nn.ModuleList()
213 |         for i in range(n_latent - 7):
214 |             if use_style_head:
215 |                 self.fine_styles.append(ToStyleHead())
216 |             else:
217 |                 self.fine_styles.append(ToStyleCode(n_convs=style_layers[2]))
218 | 
219 |     def forward(self, x):
220 |         styles = []
221 |         # FPN feature
222 |         p2, p3, p4 = self.FPN_module(x)
223 |         
224 |         for style_map in self.course_styles:
225 |             styles.append(style_map(p4))
226 | 
227 |         for style_map in self.medium_styles:
228 |             styles.append(style_map(p3))
229 |             
230 |         for style_map in self.fine_styles:
231 |             styles.append(style_map(p2))
232 | 
233 |         styles = torch.stack(styles, dim=1)
234 | 
235 |         return styles
236 | 
237 | 
238 | class ResEncoder(nn.Module):
239 |     def __init__(self, size, input_dim, n_latent, channel_multiplier=2, blur_kernel=[1, 3, 3, 1]):
240 |         super().__init__()
241 | 
242 |         self.channels = {
243 |                 4: 512,
244 |                 8: 512,
245 |                 16: 512,
246 |                 32: 512,
247 |                 64: 256 * channel_multiplier,
248 |                 128: 128 * channel_multiplier,
249 |                 256: 64 * channel_multiplier,
250 |                 512: 32 * channel_multiplier,
251 |                 1024: 16 * channel_multiplier,
252 |         }
253 | 
254 | 
255 |         convs = [ConvLayer(input_dim, self.channels[size], 1)]
256 | 
257 |         log_size = int(math.log(size, 2))
258 | 
259 |         in_channel = self.channels[size]
260 | 
261 |         for i in range(log_size, 2, -1):
262 |             out_channel = self.channels[2 ** (i - 1)]
263 | 
264 |             convs.append(ResBlock(in_channel, out_channel, blur_kernel))
265 | 
266 |             in_channel = out_channel
267 | 
268 |         self.convs = nn.Sequential(*convs)
269 | 
270 |         self.n_latent = n_latent
271 |         self.stddev_group = 4
272 |         self.stddev_feat = 1
273 | 
274 |         self.final_conv = ConvLayer(in_channel + 1, self.channels[4], 3)
275 |         self.final_linear = EqualLinear(self.channels[4] * 4 * 4, n_latent * 512)
276 | 
277 |     def _cal_stddev(self, x):
278 |         batch, channel, height, width = x.shape
279 |         group = min(batch, self.stddev_group)
280 |         stddev = x.view(
281 |             group, -1, self.stddev_feat, channel // self.stddev_feat, height, width
282 |         )
283 |         stddev = torch.sqrt(stddev.var(0, unbiased=False) + 1e-8)
284 |         stddev = stddev.mean([2, 3, 4], keepdims=True).squeeze(2)
285 |         stddev = stddev.repeat(group, 1, height, width)
286 |         x = torch.cat([x, stddev], 1)
287 | 
288 |         return x
289 | 
290 |     def forward(self, input):
291 |         batch = input.shape[0]
292 |         
293 |         out = self.convs(input)
294 |         
295 |         out = self._cal_stddev(out)
296 | 
297 |         out = self.final_conv(out)
298 | 
299 |         out = out.view(batch, -1)
300 |         out = self.final_linear(out)
301 | 
302 |         out = out.view(batch, self.n_latent, -1)
303 | 
304 |         return out
305 | 


--------------------------------------------------------------------------------
/models/lpips/__init__.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | from __future__ import print_function
  5 | 
  6 | import numpy as np
  7 | from skimage.measure import compare_ssim
  8 | import torch
  9 | 
 10 | from models.lpips import dist_model
 11 | 
 12 | 
 13 | class PerceptualLoss(torch.nn.Module):
 14 |     def __init__(self, model='net-lin', net='alex', colorspace='rgb', spatial=False, use_gpu=False, gpu_ids=[0]): # VGG using our perceptually-learned weights (LPIPS metric)
 15 |     # def __init__(self, model='net', net='vgg', use_gpu=True): # "default" way of using VGG as a perceptual loss
 16 |         super(PerceptualLoss, self).__init__()
 17 |         print('Setting up Perceptual loss...')
 18 |         self.use_gpu = use_gpu
 19 |         self.spatial = spatial
 20 |         self.gpu_ids = gpu_ids
 21 |         self.model = dist_model.DistModel()
 22 |         self.model.initialize(model=model, net=net, use_gpu=use_gpu, colorspace=colorspace, spatial=self.spatial, gpu_ids=gpu_ids)
 23 |         print('...[%s] initialized'%self.model.name())
 24 |         print('...Done')
 25 | 
 26 |     def forward(self, pred, target, normalize=False):
 27 |         """
 28 |         Pred and target are Variables.
 29 |         If normalize is True, assumes the images are between [0,1] and then scales them between [-1,+1]
 30 |         If normalize is False, assumes the images are already between [-1,+1]
 31 | 
 32 |         Inputs pred and target are Nx3xHxW
 33 |         Output pytorch Variable N long
 34 |         """
 35 | 
 36 |         if normalize:
 37 |             target = 2 * target  - 1
 38 |             pred = 2 * pred  - 1
 39 | 
 40 |         return self.model.forward(target, pred)
 41 | 
 42 | def normalize_tensor(in_feat,eps=1e-10):
 43 |     norm_factor = torch.sqrt(torch.sum(in_feat**2,dim=1,keepdim=True))
 44 |     return in_feat/(norm_factor+eps)
 45 | 
 46 | def l2(p0, p1, range=255.):
 47 |     return .5*np.mean((p0 / range - p1 / range)**2)
 48 | 
 49 | def psnr(p0, p1, peak=255.):
 50 |     return 10*np.log10(peak**2/np.mean((1.*p0-1.*p1)**2))
 51 | 
 52 | def dssim(p0, p1, range=255.):
 53 |     return (1 - compare_ssim(p0, p1, data_range=range, multichannel=True)) / 2.
 54 | 
 55 | def rgb2lab(in_img,mean_cent=False):
 56 |     from skimage import color
 57 |     img_lab = color.rgb2lab(in_img)
 58 |     if(mean_cent):
 59 |         img_lab[:,:,0] = img_lab[:,:,0]-50
 60 |     return img_lab
 61 | 
 62 | def tensor2np(tensor_obj):
 63 |     # change dimension of a tensor object into a numpy array
 64 |     return tensor_obj[0].cpu().float().numpy().transpose((1,2,0))
 65 | 
 66 | def np2tensor(np_obj):
 67 |      # change dimenion of np array into tensor array
 68 |     return torch.Tensor(np_obj[:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
 69 | 
 70 | def tensor2tensorlab(image_tensor,to_norm=True,mc_only=False):
 71 |     # image tensor to lab tensor
 72 |     from skimage import color
 73 | 
 74 |     img = tensor2im(image_tensor)
 75 |     img_lab = color.rgb2lab(img)
 76 |     if(mc_only):
 77 |         img_lab[:,:,0] = img_lab[:,:,0]-50
 78 |     if(to_norm and not mc_only):
 79 |         img_lab[:,:,0] = img_lab[:,:,0]-50
 80 |         img_lab = img_lab/100.
 81 | 
 82 |     return np2tensor(img_lab)
 83 | 
 84 | def tensorlab2tensor(lab_tensor,return_inbnd=False):
 85 |     from skimage import color
 86 |     import warnings
 87 |     warnings.filterwarnings("ignore")
 88 | 
 89 |     lab = tensor2np(lab_tensor)*100.
 90 |     lab[:,:,0] = lab[:,:,0]+50
 91 | 
 92 |     rgb_back = 255.*np.clip(color.lab2rgb(lab.astype('float')),0,1)
 93 |     if(return_inbnd):
 94 |         # convert back to lab, see if we match
 95 |         lab_back = color.rgb2lab(rgb_back.astype('uint8'))
 96 |         mask = 1.*np.isclose(lab_back,lab,atol=2.)
 97 |         mask = np2tensor(np.prod(mask,axis=2)[:,:,np.newaxis])
 98 |         return (im2tensor(rgb_back),mask)
 99 |     else:
100 |         return im2tensor(rgb_back)
101 | 
102 | def rgb2lab(input):
103 |     from skimage import color
104 |     return color.rgb2lab(input / 255.)
105 | 
106 | def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.):
107 |     image_numpy = image_tensor[0].cpu().float().numpy()
108 |     image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
109 |     return image_numpy.astype(imtype)
110 | 
111 | def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.):
112 |     return torch.Tensor((image / factor - cent)
113 |                         [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
114 | 
115 | def tensor2vec(vector_tensor):
116 |     return vector_tensor.data.cpu().numpy()[:, :, 0, 0]
117 | 
118 | def voc_ap(rec, prec, use_07_metric=False):
119 |     """ ap = voc_ap(rec, prec, [use_07_metric])
120 |     Compute VOC AP given precision and recall.
121 |     If use_07_metric is true, uses the
122 |     VOC 07 11 point method (default:False).
123 |     """
124 |     if use_07_metric:
125 |         # 11 point metric
126 |         ap = 0.
127 |         for t in np.arange(0., 1.1, 0.1):
128 |             if np.sum(rec >= t) == 0:
129 |                 p = 0
130 |             else:
131 |                 p = np.max(prec[rec >= t])
132 |             ap = ap + p / 11.
133 |     else:
134 |         # correct AP calculation
135 |         # first append sentinel values at the end
136 |         mrec = np.concatenate(([0.], rec, [1.]))
137 |         mpre = np.concatenate(([0.], prec, [0.]))
138 | 
139 |         # compute the precision envelope
140 |         for i in range(mpre.size - 1, 0, -1):
141 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
142 | 
143 |         # to calculate area under PR curve, look for points
144 |         # where X axis (recall) changes value
145 |         i = np.where(mrec[1:] != mrec[:-1])[0]
146 | 
147 |         # and sum (\Delta recall) * prec
148 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
149 |     return ap
150 | 
151 | def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255./2.):
152 | # def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=1.):
153 |     image_numpy = image_tensor[0].cpu().float().numpy()
154 |     image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
155 |     return image_numpy.astype(imtype)
156 | 
157 | def im2tensor(image, imtype=np.uint8, cent=1., factor=255./2.):
158 | # def im2tensor(image, imtype=np.uint8, cent=1., factor=1.):
159 |     return torch.Tensor((image / factor - cent)
160 |                         [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
161 | 


--------------------------------------------------------------------------------
/models/lpips/base_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.autograd import Variable
 4 | 
 5 | class BaseModel(torch.nn.Module):
 6 |     def __init__(self):
 7 |         super().__init__()
 8 |         #pass
 9 |     def name(self):
10 |         return 'BaseModel'
11 | 
12 |     def initialize(self, use_gpu=False, gpu_ids=[0]):
13 |         self.use_gpu = use_gpu
14 |         self.gpu_ids = gpu_ids
15 | 
16 |     def forward(self):
17 |         pass
18 | 
19 |     def get_image_paths(self):
20 |         pass
21 | 
22 |     def optimize_parameters(self):
23 |         pass
24 | 
25 |     def get_current_visuals(self):
26 |         return self.input
27 | 
28 |     def get_current_errors(self):
29 |         return {}
30 | 
31 |     def save(self, label):
32 |         pass
33 | 
34 |     # helper saving function that can be used by subclasses
35 |     def save_network(self, network, path, network_label, epoch_label):
36 |         save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
37 |         save_path = os.path.join(path, save_filename)
38 |         torch.save(network.state_dict(), save_path)
39 | 
40 |     # helper loading function that can be used by subclasses
41 |     def load_network(self, network, network_label, epoch_label):
42 |         save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
43 |         save_path = os.path.join(self.save_dir, save_filename)
44 |         print('Loading network from %s'%save_path)
45 |         network.load_state_dict(torch.load(save_path))
46 | 
47 |     def update_learning_rate():
48 |         pass
49 | 
50 |     def get_image_paths(self):
51 |         return self.image_paths
52 | 
53 |     def save_done(self, flag=False):
54 |         np.save(os.path.join(self.save_dir, 'done_flag'),flag)
55 |         np.savetxt(os.path.join(self.save_dir, 'done_flag'),[flag,],fmt='%i')
56 | 
57 | 


--------------------------------------------------------------------------------
/models/lpips/dist_model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import numpy as np
  3 | import torch
  4 | import os
  5 | from collections import OrderedDict
  6 | from torch.autograd import Variable
  7 | from .base_model import BaseModel
  8 | from scipy.ndimage import zoom
  9 | from . import networks_basic as networks
 10 | from .. import lpips as util
 11 | 
 12 | 
 13 | class DistModel(BaseModel):
 14 |     def name(self):
 15 |         return self.model_name
 16 | 
 17 |     def initialize(self, model='net-lin', net='alex', colorspace='Lab', pnet_rand=False, pnet_tune=False, model_path=None,
 18 |             use_gpu=False, printNet=False, spatial=False, 
 19 |             is_train=False, lr=.0001, beta1=0.5, version='0.1', gpu_ids=[0]):
 20 |         '''
 21 |         INPUTS
 22 |             model - ['net-lin'] for linearly calibrated network
 23 |                     ['net'] for off-the-shelf network
 24 |                     ['L2'] for L2 distance in Lab colorspace
 25 |                     ['SSIM'] for ssim in RGB colorspace
 26 |             net - ['squeeze','alex','vgg']
 27 |             model_path - if None, will look in weights/[NET_NAME].pth
 28 |             colorspace - ['Lab','RGB'] colorspace to use for L2 and SSIM
 29 |             use_gpu - bool - whether or not to use a GPU
 30 |             printNet - bool - whether or not to print network architecture out
 31 |             spatial - bool - whether to output an array containing varying distances across spatial dimensions
 32 |             spatial_shape - if given, output spatial shape. if None then spatial shape is determined automatically via spatial_factor (see below).
 33 |             spatial_factor - if given, specifies upsampling factor relative to the largest spatial extent of a convolutional layer. if None then resized to size of input images.
 34 |             spatial_order - spline order of filter for upsampling in spatial mode, by default 1 (bilinear).
 35 |             is_train - bool - [True] for training mode
 36 |             lr - float - initial learning rate
 37 |             beta1 - float - initial momentum term for adam
 38 |             version - 0.1 for latest, 0.0 was original (with a bug)
 39 |             gpu_ids - int array - [0] by default, gpus to use
 40 |         '''
 41 |         BaseModel.initialize(self, use_gpu=use_gpu, gpu_ids=gpu_ids)
 42 | 
 43 |         self.model = model
 44 |         self.net = net
 45 |         self.is_train = is_train
 46 |         self.spatial = spatial
 47 |         self.gpu_ids = gpu_ids
 48 |         self.model_name = '%s [%s]'%(model,net)
 49 | 
 50 |         if(self.model == 'net-lin'): # pretrained net + linear layer
 51 |             self.net = networks.PNetLin(pnet_rand=pnet_rand, pnet_tune=pnet_tune, pnet_type=net,
 52 |                 use_dropout=True, spatial=spatial, version=version, lpips=True)
 53 |             kw = {}
 54 |             if not use_gpu:
 55 |                 kw['map_location'] = 'cpu'
 56 |             if(model_path is None):
 57 |                 import inspect
 58 |                 model_path = os.path.abspath(os.path.join(inspect.getfile(self.initialize), '..', 'weights/v%s/%s.pth'%(version,net)))
 59 | 
 60 |             if(not is_train):
 61 |                 print('Loading model from: %s'%model_path)
 62 |                 self.net.load_state_dict(torch.load(model_path, **kw), strict=False)
 63 | 
 64 |         elif(self.model=='net'): # pretrained network
 65 |             self.net = networks.PNetLin(pnet_rand=pnet_rand, pnet_type=net, lpips=False)
 66 |         elif(self.model in ['L2','l2']):
 67 |             self.net = networks.L2(use_gpu=use_gpu,colorspace=colorspace) # not really a network, only for testing
 68 |             self.model_name = 'L2'
 69 |         elif(self.model in ['DSSIM','dssim','SSIM','ssim']):
 70 |             self.net = networks.DSSIM(use_gpu=use_gpu,colorspace=colorspace)
 71 |             self.model_name = 'SSIM'
 72 |         else:
 73 |             raise ValueError("Model [%s] not recognized." % self.model)
 74 | 
 75 |         #self.parameters = list(self.net.parameters())
 76 | 
 77 |         if self.is_train: # training mode
 78 |             # extra network on top to go from distances (d0,d1) => predicted human judgment (h*)
 79 |             self.rankLoss = networks.BCERankingLoss()
 80 |             self.parameters += list(self.rankLoss.net.parameters())
 81 |             self.lr = lr
 82 |             self.old_lr = lr
 83 |             self.optimizer_net = torch.optim.Adam(self.parameters, lr=lr, betas=(beta1, 0.999))
 84 |         else: # test mode
 85 |             self.net.eval()
 86 | 
 87 |         # if(use_gpu):
 88 |         #     #self.net = torch.nn.DataParallel(self.net, device_ids=gpu_ids)
 89 |         #     self.net = self.net.to(gpu_ids[0])
 90 |         #     if(self.is_train):
 91 |         #         self.rankLoss = self.rankLoss.to(device=gpu_ids[0]) # just put this on GPU0
 92 | 
 93 |         if(printNet):
 94 |             print('---------- Networks initialized -------------')
 95 |             networks.print_network(self.net)
 96 |             print('-----------------------------------------------')
 97 | 
 98 |     def forward(self, in0, in1, retPerLayer=False):
 99 |         ''' Function computes the distance between image patches in0 and in1
100 |         INPUTS
101 |             in0, in1 - torch.Tensor object of shape Nx3xXxY - image patch scaled to [-1,1]
102 |         OUTPUT
103 |             computed distances between in0 and in1
104 |         '''
105 |         # check if input has 3 dim
106 |         if in0.shape[1] == 1:
107 |             in0 = in0.expand(-1,3,-1,-1)
108 |         if in1.shape[1] == 1:
109 |             in1 = in1.expand(-1,3,-1,-1)
110 |   
111 |         return self.net.forward(in0, in1, retPerLayer=retPerLayer)
112 | 
113 |     # ***** TRAINING FUNCTIONS *****
114 |     def optimize_parameters(self):
115 |         self.forward_train()
116 |         self.optimizer_net.zero_grad()
117 |         self.backward_train()
118 |         self.optimizer_net.step()
119 |         self.clamp_weights()
120 | 
121 |     def clamp_weights(self):
122 |         for module in self.net.modules():
123 |             if(hasattr(module, 'weight') and module.kernel_size==(1,1)):
124 |                 module.weight.data = torch.clamp(module.weight.data,min=0)
125 | 
126 |     def set_input(self, data):
127 |         self.input_ref = data['ref']
128 |         self.input_p0 = data['p0']
129 |         self.input_p1 = data['p1']
130 |         self.input_judge = data['judge']
131 | 
132 |         if(self.use_gpu):
133 |             self.input_ref = self.input_ref.to(device=self.gpu_ids[0])
134 |             self.input_p0 = self.input_p0.to(device=self.gpu_ids[0])
135 |             self.input_p1 = self.input_p1.to(device=self.gpu_ids[0])
136 |             self.input_judge = self.input_judge.to(device=self.gpu_ids[0])
137 | 
138 |         self.var_ref = Variable(self.input_ref,requires_grad=True)
139 |         self.var_p0 = Variable(self.input_p0,requires_grad=True)
140 |         self.var_p1 = Variable(self.input_p1,requires_grad=True)
141 | 
142 |     def forward_train(self): # run forward pass
143 |         # print(self.net.module.scaling_layer.shift)
144 |         # print(torch.norm(self.net.module.net.slice1[0].weight).item(), torch.norm(self.net.module.lin0.model[1].weight).item())
145 | 
146 |         self.d0 = self.forward(self.var_ref, self.var_p0)
147 |         self.d1 = self.forward(self.var_ref, self.var_p1)
148 |         self.acc_r = self.compute_accuracy(self.d0,self.d1,self.input_judge)
149 | 
150 |         self.var_judge = Variable(1.*self.input_judge).view(self.d0.size())
151 | 
152 |         self.loss_total = self.rankLoss.forward(self.d0, self.d1, self.var_judge*2.-1.)
153 | 
154 |         return self.loss_total
155 | 
156 |     def backward_train(self):
157 |         torch.mean(self.loss_total).backward()
158 | 
159 |     def compute_accuracy(self,d0,d1,judge):
160 |         ''' d0, d1 are Variables, judge is a Tensor '''
161 |         d1_lt_d0 = (d1<d0).cpu().data.numpy().flatten()
162 |         judge_per = judge.cpu().numpy().flatten()
163 |         return d1_lt_d0*judge_per + (1-d1_lt_d0)*(1-judge_per)
164 | 
165 |     def get_current_errors(self):
166 |         retDict = OrderedDict([('loss_total', self.loss_total.data.cpu().numpy()),
167 |                             ('acc_r', self.acc_r)])
168 | 
169 |         for key in retDict.keys():
170 |             retDict[key] = np.mean(retDict[key])
171 | 
172 |         return retDict
173 | 
174 |     def get_current_visuals(self):
175 |         zoom_factor = 256/self.var_ref.data.size()[2]
176 | 
177 |         ref_img = util.tensor2im(self.var_ref.data)
178 |         p0_img = util.tensor2im(self.var_p0.data)
179 |         p1_img = util.tensor2im(self.var_p1.data)
180 | 
181 |         ref_img_vis = zoom(ref_img,[zoom_factor, zoom_factor, 1],order=0)
182 |         p0_img_vis = zoom(p0_img,[zoom_factor, zoom_factor, 1],order=0)
183 |         p1_img_vis = zoom(p1_img,[zoom_factor, zoom_factor, 1],order=0)
184 | 
185 |         return OrderedDict([('ref', ref_img_vis),
186 |                             ('p0', p0_img_vis),
187 |                             ('p1', p1_img_vis)])
188 | 
189 |     def save(self, path, label):
190 |         if(self.use_gpu):
191 |             self.save_network(self.net.module, path, '', label)
192 |         else:
193 |             self.save_network(self.net, path, '', label)
194 |         self.save_network(self.rankLoss.net, path, 'rank', label)
195 | 
196 |     def update_learning_rate(self,nepoch_decay):
197 |         lrd = self.lr / nepoch_decay
198 |         lr = self.old_lr - lrd
199 | 
200 |         for param_group in self.optimizer_net.param_groups:
201 |             param_group['lr'] = lr
202 | 
203 |         print('update lr [%s] decay: %f -> %f' % (type,self.old_lr, lr))
204 |         self.old_lr = lr
205 | 
206 | def score_2afc_dataset(data_loader, func, name=''):
207 |     ''' Function computes Two Alternative Forced Choice (2AFC) score using
208 |         distance function 'func' in dataloader 'data_loader'
209 |     INPUTS
210 |         data_loader - CustomDatasetDataLoader object - contains a TwoAFCDataset inside
211 |         func - callable distance function - calling d=func(in0,in1) should take 2
212 |             pytorch tensors with shape Nx3xXxY, and return numpy array of length N
213 |     OUTPUTS
214 |         [0] - 2AFC score in [0,1], fraction of time func agrees with human evaluators
215 |         [1] - dictionary with following elements
216 |             d0s,d1s - N arrays containing distances between reference patch to perturbed patches 
217 |             gts - N array in [0,1], preferred patch selected by human evaluators
218 |                 (closer to "0" for left patch p0, "1" for right patch p1,
219 |                 "0.6" means 60pct people preferred right patch, 40pct preferred left)
220 |             scores - N array in [0,1], corresponding to what percentage function agreed with humans
221 |     CONSTS
222 |         N - number of test triplets in data_loader
223 |     '''
224 | 
225 |     d0s = []
226 |     d1s = []
227 |     gts = []
228 | 
229 |     for data in data_loader.load_data():
230 |         d0s+=func(data['ref'],data['p0']).data.cpu().numpy().flatten().tolist()
231 |         d1s+=func(data['ref'],data['p1']).data.cpu().numpy().flatten().tolist()
232 |         gts+=data['judge'].cpu().numpy().flatten().tolist()
233 | 
234 |     d0s = np.array(d0s)
235 |     d1s = np.array(d1s)
236 |     gts = np.array(gts)
237 |     scores = (d0s<d1s)*(1.-gts) + (d1s<d0s)*gts + (d1s==d0s)*.5
238 | 
239 |     return(np.mean(scores), dict(d0s=d0s,d1s=d1s,gts=gts,scores=scores))
240 | 
241 | def score_jnd_dataset(data_loader, func, name=''):
242 |     ''' Function computes JND score using distance function 'func' in dataloader 'data_loader'
243 |     INPUTS
244 |         data_loader - CustomDatasetDataLoader object - contains a JNDDataset inside
245 |         func - callable distance function - calling d=func(in0,in1) should take 2
246 |             pytorch tensors with shape Nx3xXxY, and return pytorch array of length N
247 |     OUTPUTS
248 |         [0] - JND score in [0,1], mAP score (area under precision-recall curve)
249 |         [1] - dictionary with following elements
250 |             ds - N array containing distances between two patches shown to human evaluator
251 |             sames - N array containing fraction of people who thought the two patches were identical
252 |     CONSTS
253 |         N - number of test triplets in data_loader
254 |     '''
255 | 
256 |     ds = []
257 |     gts = []
258 | 
259 |     for data in data_loader.load_data():
260 |         ds+=func(data['p0'],data['p1']).data.cpu().numpy().tolist()
261 |         gts+=data['same'].cpu().numpy().flatten().tolist()
262 | 
263 |     sames = np.array(gts)
264 |     ds = np.array(ds)
265 | 
266 |     sorted_inds = np.argsort(ds)
267 |     ds_sorted = ds[sorted_inds]
268 |     sames_sorted = sames[sorted_inds]
269 | 
270 |     TPs = np.cumsum(sames_sorted)
271 |     FPs = np.cumsum(1-sames_sorted)
272 |     FNs = np.sum(sames_sorted)-TPs
273 | 
274 |     precs = TPs/(TPs+FPs)
275 |     recs = TPs/(TPs+FNs)
276 |     score = util.voc_ap(recs,precs)
277 | 
278 |     return(score, dict(ds=ds,sames=sames))
279 | 


--------------------------------------------------------------------------------
/models/lpips/networks_basic.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.autograd import Variable
  5 | from . import pretrained_networks as pn
  6 | 
  7 | from .. import lpips as util
  8 | 
  9 | 
 10 | def spatial_average(in_tens, keepdim=True):
 11 |     return in_tens.mean([2,3],keepdim=keepdim)
 12 | 
 13 | def upsample(in_tens, out_H=64): # assumes scale factor is same for H and W
 14 |     in_H = in_tens.shape[2]
 15 |     scale_factor = 1.*out_H/in_H
 16 | 
 17 |     return nn.Upsample(scale_factor=scale_factor, mode='bilinear', align_corners=False)(in_tens)
 18 | 
 19 | # Learned perceptual metric
 20 | class PNetLin(nn.Module):
 21 |     def __init__(self, pnet_type='vgg', pnet_rand=False, pnet_tune=False, use_dropout=True, spatial=False, version='0.1', lpips=True):
 22 |         super(PNetLin, self).__init__()
 23 | 
 24 |         self.pnet_type = pnet_type
 25 |         self.pnet_tune = pnet_tune
 26 |         self.pnet_rand = pnet_rand
 27 |         self.spatial = spatial
 28 |         self.lpips = lpips
 29 |         self.version = version
 30 |         self.scaling_layer = ScalingLayer()
 31 | 
 32 |         if(self.pnet_type in ['vgg','vgg16']):
 33 |             net_type = pn.vgg16
 34 |             self.chns = [64,128,256,512,512]
 35 |         elif(self.pnet_type=='alex'):
 36 |             net_type = pn.alexnet
 37 |             self.chns = [64,192,384,256,256]
 38 |         elif(self.pnet_type=='squeeze'):
 39 |             net_type = pn.squeezenet
 40 |             self.chns = [64,128,256,384,384,512,512]
 41 |         self.L = len(self.chns)
 42 | 
 43 |         self.net = net_type(pretrained=not self.pnet_rand, requires_grad=self.pnet_tune)
 44 | 
 45 |         if(lpips):
 46 |             self.lin0 = NetLinLayer(self.chns[0], use_dropout=use_dropout)
 47 |             self.lin1 = NetLinLayer(self.chns[1], use_dropout=use_dropout)
 48 |             self.lin2 = NetLinLayer(self.chns[2], use_dropout=use_dropout)
 49 |             self.lin3 = NetLinLayer(self.chns[3], use_dropout=use_dropout)
 50 |             self.lin4 = NetLinLayer(self.chns[4], use_dropout=use_dropout)
 51 |             self.lins = [self.lin0,self.lin1,self.lin2,self.lin3,self.lin4]
 52 |             if(self.pnet_type=='squeeze'): # 7 layers for squeezenet
 53 |                 self.lin5 = NetLinLayer(self.chns[5], use_dropout=use_dropout)
 54 |                 self.lin6 = NetLinLayer(self.chns[6], use_dropout=use_dropout)
 55 |                 self.lins+=[self.lin5,self.lin6]
 56 |             self.lins = nn.ModuleList(self.lins)
 57 | 
 58 |     def forward(self, in0, in1, retPerLayer=False):
 59 |         # v0.0 - original release had a bug, where input was not scaled
 60 |         in0_input, in1_input = (self.scaling_layer(in0), self.scaling_layer(in1)) if self.version=='0.1' else (in0, in1)
 61 |         outs0, outs1 = self.net.forward(in0_input), self.net.forward(in1_input)
 62 |         feats0, feats1, diffs = {}, {}, {}
 63 | 
 64 |         for kk in range(self.L):
 65 |             feats0[kk], feats1[kk] = util.normalize_tensor(outs0[kk]), util.normalize_tensor(outs1[kk])
 66 |             diffs[kk] = (feats0[kk]-feats1[kk])**2
 67 | 
 68 |         if(self.lpips):
 69 |             if(self.spatial):
 70 |                 res = [upsample(self.lins[kk].model(diffs[kk]), out_H=in0.shape[2]) for kk in range(self.L)]
 71 |             else:
 72 |                 res = [spatial_average(self.lins[kk].model(diffs[kk]), keepdim=True) for kk in range(self.L)]
 73 |         else:
 74 |             if(self.spatial):
 75 |                 res = [upsample(diffs[kk].sum(dim=1,keepdim=True), out_H=in0.shape[2]) for kk in range(self.L)]
 76 |             else:
 77 |                 res = [spatial_average(diffs[kk].sum(dim=1,keepdim=True), keepdim=True) for kk in range(self.L)]
 78 | 
 79 |         val = res[0]
 80 |         for l in range(1,self.L):
 81 |             val += res[l]
 82 |         
 83 |         if(retPerLayer):
 84 |             return (val, res)
 85 |         else:
 86 |             return val
 87 | 
 88 | class ScalingLayer(nn.Module):
 89 |     def __init__(self):
 90 |         super(ScalingLayer, self).__init__()
 91 |         self.register_buffer('shift', torch.Tensor([-.030,-.088,-.188])[None,:,None,None])
 92 |         self.register_buffer('scale', torch.Tensor([.458,.448,.450])[None,:,None,None])
 93 | 
 94 |     def forward(self, inp):
 95 |         return (inp - self.shift) / self.scale
 96 | 
 97 | 
 98 | class NetLinLayer(nn.Module):
 99 |     ''' A single linear layer which does a 1x1 conv '''
100 |     def __init__(self, chn_in, chn_out=1, use_dropout=False):
101 |         super(NetLinLayer, self).__init__()
102 | 
103 |         layers = [nn.Dropout(),] if(use_dropout) else []
104 |         layers += [nn.Conv2d(chn_in, chn_out, 1, stride=1, padding=0, bias=False),]
105 |         self.model = nn.Sequential(*layers)
106 | 
107 | 
108 | class Dist2LogitLayer(nn.Module):
109 |     ''' takes 2 distances, puts through fc layers, spits out value between [0,1] (if use_sigmoid is True) '''
110 |     def __init__(self, chn_mid=32, use_sigmoid=True):
111 |         super(Dist2LogitLayer, self).__init__()
112 | 
113 |         layers = [nn.Conv2d(5, chn_mid, 1, stride=1, padding=0, bias=True),]
114 |         layers += [nn.LeakyReLU(0.2,True),]
115 |         layers += [nn.Conv2d(chn_mid, chn_mid, 1, stride=1, padding=0, bias=True),]
116 |         layers += [nn.LeakyReLU(0.2,True),]
117 |         layers += [nn.Conv2d(chn_mid, 1, 1, stride=1, padding=0, bias=True),]
118 |         if(use_sigmoid):
119 |             layers += [nn.Sigmoid(),]
120 |         self.model = nn.Sequential(*layers)
121 | 
122 |     def forward(self,d0,d1,eps=0.1):
123 |         return self.model.forward(torch.cat((d0,d1,d0-d1,d0/(d1+eps),d1/(d0+eps)),dim=1))
124 | 
125 | class BCERankingLoss(nn.Module):
126 |     def __init__(self, chn_mid=32):
127 |         super(BCERankingLoss, self).__init__()
128 |         self.net = Dist2LogitLayer(chn_mid=chn_mid)
129 |         # self.parameters = list(self.net.parameters())
130 |         self.loss = torch.nn.BCELoss()
131 | 
132 |     def forward(self, d0, d1, judge):
133 |         per = (judge+1.)/2.
134 |         self.logit = self.net.forward(d0,d1)
135 |         return self.loss(self.logit, per)
136 | 
137 | # L2, DSSIM metrics
138 | class FakeNet(nn.Module):
139 |     def __init__(self, use_gpu=True, colorspace='Lab'):
140 |         super(FakeNet, self).__init__()
141 |         self.use_gpu = use_gpu
142 |         self.colorspace=colorspace
143 | 
144 | class L2(FakeNet):
145 | 
146 |     def forward(self, in0, in1, retPerLayer=None):
147 |         assert(in0.size()[0]==1) # currently only supports batchSize 1
148 | 
149 |         if(self.colorspace=='RGB'):
150 |             (N,C,X,Y) = in0.size()
151 |             value = torch.mean(torch.mean(torch.mean((in0-in1)**2,dim=1).view(N,1,X,Y),dim=2).view(N,1,1,Y),dim=3).view(N)
152 |             return value
153 |         elif(self.colorspace=='Lab'):
154 |             value = util.l2(util.tensor2np(util.tensor2tensorlab(in0.data,to_norm=False)), 
155 |                 util.tensor2np(util.tensor2tensorlab(in1.data,to_norm=False)), range=100.).astype('float')
156 |             ret_var = Variable( torch.Tensor((value,) ) )
157 |             if(self.use_gpu):
158 |                 ret_var = ret_var.cuda()
159 |             return ret_var
160 | 
161 | class DSSIM(FakeNet):
162 | 
163 |     def forward(self, in0, in1, retPerLayer=None):
164 |         assert(in0.size()[0]==1) # currently only supports batchSize 1
165 | 
166 |         if(self.colorspace=='RGB'):
167 |             value = util.dssim(1.*util.tensor2im(in0.data), 1.*util.tensor2im(in1.data), range=255.).astype('float')
168 |         elif(self.colorspace=='Lab'):
169 |             value = util.dssim(util.tensor2np(util.tensor2tensorlab(in0.data,to_norm=False)), 
170 |                 util.tensor2np(util.tensor2tensorlab(in1.data,to_norm=False)), range=100.).astype('float')
171 |         ret_var = Variable( torch.Tensor((value,) ) )
172 |         if(self.use_gpu):
173 |             ret_var = ret_var.cuda()
174 |         return ret_var
175 | 
176 | def print_network(net):
177 |     num_params = 0
178 |     for param in net.parameters():
179 |         num_params += param.numel()
180 |     print('Network',net)
181 |     print('Total number of parameters: %d' % num_params)
182 | 


--------------------------------------------------------------------------------
/models/lpips/pretrained_networks.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | import torch
  3 | from torchvision import models as tv
  4 | 
  5 | class squeezenet(torch.nn.Module):
  6 |     def __init__(self, requires_grad=False, pretrained=True):
  7 |         super(squeezenet, self).__init__()
  8 |         pretrained_features = tv.squeezenet1_1(pretrained=pretrained).features
  9 |         self.slice1 = torch.nn.Sequential()
 10 |         self.slice2 = torch.nn.Sequential()
 11 |         self.slice3 = torch.nn.Sequential()
 12 |         self.slice4 = torch.nn.Sequential()
 13 |         self.slice5 = torch.nn.Sequential()
 14 |         self.slice6 = torch.nn.Sequential()
 15 |         self.slice7 = torch.nn.Sequential()
 16 |         self.N_slices = 7
 17 |         for x in range(2):
 18 |             self.slice1.add_module(str(x), pretrained_features[x])
 19 |         for x in range(2,5):
 20 |             self.slice2.add_module(str(x), pretrained_features[x])
 21 |         for x in range(5, 8):
 22 |             self.slice3.add_module(str(x), pretrained_features[x])
 23 |         for x in range(8, 10):
 24 |             self.slice4.add_module(str(x), pretrained_features[x])
 25 |         for x in range(10, 11):
 26 |             self.slice5.add_module(str(x), pretrained_features[x])
 27 |         for x in range(11, 12):
 28 |             self.slice6.add_module(str(x), pretrained_features[x])
 29 |         for x in range(12, 13):
 30 |             self.slice7.add_module(str(x), pretrained_features[x])
 31 |         if not requires_grad:
 32 |             for param in self.parameters():
 33 |                 param.requires_grad = False
 34 | 
 35 |     def forward(self, X):
 36 |         h = self.slice1(X)
 37 |         h_relu1 = h
 38 |         h = self.slice2(h)
 39 |         h_relu2 = h
 40 |         h = self.slice3(h)
 41 |         h_relu3 = h
 42 |         h = self.slice4(h)
 43 |         h_relu4 = h
 44 |         h = self.slice5(h)
 45 |         h_relu5 = h
 46 |         h = self.slice6(h)
 47 |         h_relu6 = h
 48 |         h = self.slice7(h)
 49 |         h_relu7 = h
 50 |         vgg_outputs = namedtuple("SqueezeOutputs", ['relu1','relu2','relu3','relu4','relu5','relu6','relu7'])
 51 |         out = vgg_outputs(h_relu1,h_relu2,h_relu3,h_relu4,h_relu5,h_relu6,h_relu7)
 52 | 
 53 |         return out
 54 | 
 55 | 
 56 | class alexnet(torch.nn.Module):
 57 |     def __init__(self, requires_grad=False, pretrained=True):
 58 |         super(alexnet, self).__init__()
 59 |         alexnet_pretrained_features = tv.alexnet(pretrained=pretrained).features
 60 |         self.slice1 = torch.nn.Sequential()
 61 |         self.slice2 = torch.nn.Sequential()
 62 |         self.slice3 = torch.nn.Sequential()
 63 |         self.slice4 = torch.nn.Sequential()
 64 |         self.slice5 = torch.nn.Sequential()
 65 |         self.N_slices = 5
 66 |         for x in range(2):
 67 |             self.slice1.add_module(str(x), alexnet_pretrained_features[x])
 68 |         for x in range(2, 5):
 69 |             self.slice2.add_module(str(x), alexnet_pretrained_features[x])
 70 |         for x in range(5, 8):
 71 |             self.slice3.add_module(str(x), alexnet_pretrained_features[x])
 72 |         for x in range(8, 10):
 73 |             self.slice4.add_module(str(x), alexnet_pretrained_features[x])
 74 |         for x in range(10, 12):
 75 |             self.slice5.add_module(str(x), alexnet_pretrained_features[x])
 76 |         if not requires_grad:
 77 |             for param in self.parameters():
 78 |                 param.requires_grad = False
 79 | 
 80 |     def forward(self, X):
 81 |         h = self.slice1(X)
 82 |         h_relu1 = h
 83 |         h = self.slice2(h)
 84 |         h_relu2 = h
 85 |         h = self.slice3(h)
 86 |         h_relu3 = h
 87 |         h = self.slice4(h)
 88 |         h_relu4 = h
 89 |         h = self.slice5(h)
 90 |         h_relu5 = h
 91 |         alexnet_outputs = namedtuple("AlexnetOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5'])
 92 |         out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5)
 93 | 
 94 |         return out
 95 | 
 96 | class vgg16(torch.nn.Module):
 97 |     def __init__(self, requires_grad=False, pretrained=True):
 98 |         super(vgg16, self).__init__()
 99 |         vgg_pretrained_features = tv.vgg16(pretrained=pretrained).features
100 |         self.slice1 = torch.nn.Sequential()
101 |         self.slice2 = torch.nn.Sequential()
102 |         self.slice3 = torch.nn.Sequential()
103 |         self.slice4 = torch.nn.Sequential()
104 |         self.slice5 = torch.nn.Sequential()
105 |         self.N_slices = 5
106 |         for x in range(4):
107 |             self.slice1.add_module(str(x), vgg_pretrained_features[x])
108 |         for x in range(4, 9):
109 |             self.slice2.add_module(str(x), vgg_pretrained_features[x])
110 |         for x in range(9, 16):
111 |             self.slice3.add_module(str(x), vgg_pretrained_features[x])
112 |         for x in range(16, 23):
113 |             self.slice4.add_module(str(x), vgg_pretrained_features[x])
114 |         for x in range(23, 30):
115 |             self.slice5.add_module(str(x), vgg_pretrained_features[x])
116 |         if not requires_grad:
117 |             for param in self.parameters():
118 |                 param.requires_grad = False
119 | 
120 |     def forward(self, X):
121 |         h = self.slice1(X)
122 |         h_relu1_2 = h
123 |         h = self.slice2(h)
124 |         h_relu2_2 = h
125 |         h = self.slice3(h)
126 |         h_relu3_3 = h
127 |         h = self.slice4(h)
128 |         h_relu4_3 = h
129 |         h = self.slice5(h)
130 |         h_relu5_3 = h
131 |         vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3'])
132 |         out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3)
133 | 
134 |         return out
135 | 
136 | 
137 | 
138 | class resnet(torch.nn.Module):
139 |     def __init__(self, requires_grad=False, pretrained=True, num=18):
140 |         super(resnet, self).__init__()
141 |         if(num==18):
142 |             self.net = tv.resnet18(pretrained=pretrained)
143 |         elif(num==34):
144 |             self.net = tv.resnet34(pretrained=pretrained)
145 |         elif(num==50):
146 |             self.net = tv.resnet50(pretrained=pretrained)
147 |         elif(num==101):
148 |             self.net = tv.resnet101(pretrained=pretrained)
149 |         elif(num==152):
150 |             self.net = tv.resnet152(pretrained=pretrained)
151 |         self.N_slices = 5
152 | 
153 |         self.conv1 = self.net.conv1
154 |         self.bn1 = self.net.bn1
155 |         self.relu = self.net.relu
156 |         self.maxpool = self.net.maxpool
157 |         self.layer1 = self.net.layer1
158 |         self.layer2 = self.net.layer2
159 |         self.layer3 = self.net.layer3
160 |         self.layer4 = self.net.layer4
161 | 
162 |     def forward(self, X):
163 |         h = self.conv1(X)
164 |         h = self.bn1(h)
165 |         h = self.relu(h)
166 |         h_relu1 = h
167 |         h = self.maxpool(h)
168 |         h = self.layer1(h)
169 |         h_conv2 = h
170 |         h = self.layer2(h)
171 |         h_conv3 = h
172 |         h = self.layer3(h)
173 |         h_conv4 = h
174 |         h = self.layer4(h)
175 |         h_conv5 = h
176 | 
177 |         outputs = namedtuple("Outputs", ['relu1','conv2','conv3','conv4','conv5'])
178 |         out = outputs(h_relu1, h_conv2, h_conv3, h_conv4, h_conv5)
179 | 
180 |         return out
181 | 


--------------------------------------------------------------------------------
/models/lpips/weights/v0.0/alex.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/models/lpips/weights/v0.0/alex.pth


--------------------------------------------------------------------------------
/models/lpips/weights/v0.0/squeeze.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/models/lpips/weights/v0.0/squeeze.pth


--------------------------------------------------------------------------------
/models/lpips/weights/v0.0/vgg.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/models/lpips/weights/v0.0/vgg.pth


--------------------------------------------------------------------------------
/models/lpips/weights/v0.1/alex.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/models/lpips/weights/v0.1/alex.pth


--------------------------------------------------------------------------------
/models/lpips/weights/v0.1/squeeze.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/models/lpips/weights/v0.1/squeeze.pth


--------------------------------------------------------------------------------
/models/lpips/weights/v0.1/vgg.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/models/lpips/weights/v0.1/vgg.pth


--------------------------------------------------------------------------------
/models/op/__init__.py:
--------------------------------------------------------------------------------
1 | from .fused_act import FusedLeakyReLU, fused_leaky_relu
2 | from .upfirdn2d import upfirdn2d
3 | 


--------------------------------------------------------------------------------
/models/op/fused_act.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
  2 | #
  3 | # This work is made available under the Nvidia Source Code License-NC.
  4 | # To view a copy of this license, visit
  5 | # https://nvlabs.github.io/stylegan2/license.html
  6 | 
  7 | import os
  8 | 
  9 | import torch
 10 | from torch import nn
 11 | from torch.nn import functional as F
 12 | from torch.autograd import Function
 13 | from torch.utils.cpp_extension import load
 14 | 
 15 | 
 16 | module_path = os.path.dirname(__file__)
 17 | fused = load(
 18 |     "fused",
 19 |     sources=[
 20 |         os.path.join(module_path, "fused_bias_act.cpp"),
 21 |         os.path.join(module_path, "fused_bias_act_kernel.cu"),
 22 |     ],
 23 | )
 24 | 
 25 | 
 26 | class FusedLeakyReLUFunctionBackward(Function):
 27 |     @staticmethod
 28 |     def forward(ctx, grad_output, out, bias, negative_slope, scale):
 29 |         ctx.save_for_backward(out)
 30 |         ctx.negative_slope = negative_slope
 31 |         ctx.scale = scale
 32 | 
 33 |         empty = grad_output.new_empty(0)
 34 | 
 35 |         grad_input = fused.fused_bias_act(
 36 |             grad_output, empty, out, 3, 1, negative_slope, scale
 37 |         )
 38 | 
 39 |         dim = [0]
 40 | 
 41 |         if grad_input.ndim > 2:
 42 |             dim += list(range(2, grad_input.ndim))
 43 | 
 44 |         if bias:
 45 |             grad_bias = grad_input.sum(dim).detach()
 46 | 
 47 |         else:
 48 |             grad_bias = empty
 49 | 
 50 |         return grad_input, grad_bias
 51 | 
 52 |     @staticmethod
 53 |     def backward(ctx, gradgrad_input, gradgrad_bias):
 54 |         out, = ctx.saved_tensors
 55 |         gradgrad_out = fused.fused_bias_act(
 56 |             gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale
 57 |         )
 58 | 
 59 |         return gradgrad_out, None, None, None, None
 60 | 
 61 | 
 62 | class FusedLeakyReLUFunction(Function):
 63 |     @staticmethod
 64 |     def forward(ctx, input, bias, negative_slope, scale):
 65 |         empty = input.new_empty(0)
 66 | 
 67 |         ctx.bias = bias is not None
 68 | 
 69 |         if bias is None:
 70 |             bias = empty
 71 | 
 72 |         out = fused.fused_bias_act(input, bias, empty, 3, 0, negative_slope, scale)
 73 |         ctx.save_for_backward(out)
 74 |         ctx.negative_slope = negative_slope
 75 |         ctx.scale = scale
 76 | 
 77 |         return out
 78 | 
 79 |     @staticmethod
 80 |     def backward(ctx, grad_output):
 81 |         out, = ctx.saved_tensors
 82 | 
 83 |         grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply(
 84 |             grad_output, out, ctx.bias, ctx.negative_slope, ctx.scale
 85 |         )
 86 | 
 87 |         if not ctx.bias:
 88 |             grad_bias = None
 89 | 
 90 |         return grad_input, grad_bias, None, None
 91 | 
 92 | 
 93 | class FusedLeakyReLU(nn.Module):
 94 |     def __init__(self, channel, bias=True, negative_slope=0.2, scale=2 ** 0.5):
 95 |         super().__init__()
 96 | 
 97 |         if bias:
 98 |             self.bias = nn.Parameter(torch.zeros(channel))
 99 | 
100 |         else:
101 |             self.bias = None
102 | 
103 |         self.negative_slope = negative_slope
104 |         self.scale = scale
105 | 
106 |     def forward(self, input):
107 |         return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)
108 | 
109 | 
110 | def fused_leaky_relu(input, bias=None, negative_slope=0.2, scale=2 ** 0.5):
111 |     if input.device.type == "cpu":
112 |         if bias is not None:
113 |             rest_dim = [1] * (input.ndim - bias.ndim - 1)
114 |             return (
115 |                 F.leaky_relu(
116 |                     input + bias.view(1, bias.shape[0], *rest_dim), negative_slope=0.2
117 |                 )
118 |                 * scale
119 |             )
120 | 
121 |         else:
122 |             return F.leaky_relu(input, negative_slope=0.2) * scale
123 | 
124 |     else:
125 |         return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale)


--------------------------------------------------------------------------------
/models/op/fused_bias_act.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
 2 | //
 3 | // This work is made available under the Nvidia Source Code License-NC.
 4 | // To view a copy of this license, visit
 5 | // https://nvlabs.github.io/stylegan2/license.html
 6 | 
 7 | #include <torch/extension.h>
 8 | 
 9 | 
10 | torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer,
11 |     int act, int grad, float alpha, float scale);
12 | 
13 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
14 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
15 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
16 | 
17 | torch::Tensor fused_bias_act(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer,
18 |     int act, int grad, float alpha, float scale) {
19 |     CHECK_CUDA(input);
20 |     CHECK_CUDA(bias);
21 | 
22 |     return fused_bias_act_op(input, bias, refer, act, grad, alpha, scale);
23 | }
24 | 
25 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
26 |     m.def("fused_bias_act", &fused_bias_act, "fused bias act (CUDA)");
27 | }


--------------------------------------------------------------------------------
/models/op/fused_bias_act_kernel.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
 2 | //
 3 | // This work is made available under the Nvidia Source Code License-NC.
 4 | // To view a copy of this license, visit
 5 | // https://nvlabs.github.io/stylegan2/license.html
 6 | 
 7 | #include <torch/types.h>
 8 | 
 9 | #include <ATen/ATen.h>
10 | #include <ATen/AccumulateType.h>
11 | #include <ATen/cuda/CUDAContext.h>
12 | #include <ATen/cuda/CUDAApplyUtils.cuh>
13 | 
14 | #include <cuda.h>
15 | #include <cuda_runtime.h>
16 | 
17 | 
18 | template <typename scalar_t>
19 | static __global__ void fused_bias_act_kernel(scalar_t* out, const scalar_t* p_x, const scalar_t* p_b, const scalar_t* p_ref,
20 |     int act, int grad, scalar_t alpha, scalar_t scale, int loop_x, int size_x, int step_b, int size_b, int use_bias, int use_ref) {
21 |     int xi = blockIdx.x * loop_x * blockDim.x + threadIdx.x;
22 | 
23 |     scalar_t zero = 0.0;
24 | 
25 |     for (int loop_idx = 0; loop_idx < loop_x && xi < size_x; loop_idx++, xi += blockDim.x) {
26 |         scalar_t x = p_x[xi];
27 | 
28 |         if (use_bias) {
29 |             x += p_b[(xi / step_b) % size_b];
30 |         }
31 | 
32 |         scalar_t ref = use_ref ? p_ref[xi] : zero;
33 | 
34 |         scalar_t y;
35 | 
36 |         switch (act * 10 + grad) {
37 |             default:
38 |             case 10: y = x; break;
39 |             case 11: y = x; break;
40 |             case 12: y = 0.0; break;
41 | 
42 |             case 30: y = (x > 0.0) ? x : x * alpha; break;
43 |             case 31: y = (ref > 0.0) ? x : x * alpha; break;
44 |             case 32: y = 0.0; break;
45 |         }
46 | 
47 |         out[xi] = y * scale;
48 |     }
49 | }
50 | 
51 | 
52 | torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer,
53 |     int act, int grad, float alpha, float scale) {
54 |     int curDevice = -1;
55 |     cudaGetDevice(&curDevice);
56 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice);
57 | 
58 |     auto x = input.contiguous();
59 |     auto b = bias.contiguous();
60 |     auto ref = refer.contiguous();
61 | 
62 |     int use_bias = b.numel() ? 1 : 0;
63 |     int use_ref = ref.numel() ? 1 : 0;
64 | 
65 |     int size_x = x.numel();
66 |     int size_b = b.numel();
67 |     int step_b = 1;
68 | 
69 |     for (int i = 1 + 1; i < x.dim(); i++) {
70 |         step_b *= x.size(i);
71 |     }
72 | 
73 |     int loop_x = 4;
74 |     int block_size = 4 * 32;
75 |     int grid_size = (size_x - 1) / (loop_x * block_size) + 1;
76 | 
77 |     auto y = torch::empty_like(x);
78 | 
79 |     AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "fused_bias_act_kernel", [&] {
80 |         fused_bias_act_kernel<scalar_t><<<grid_size, block_size, 0, stream>>>(
81 |             y.data_ptr<scalar_t>(),
82 |             x.data_ptr<scalar_t>(),
83 |             b.data_ptr<scalar_t>(),
84 |             ref.data_ptr<scalar_t>(),
85 |             act,
86 |             grad,
87 |             alpha,
88 |             scale,
89 |             loop_x,
90 |             size_x,
91 |             step_b,
92 |             size_b,
93 |             use_bias,
94 |             use_ref
95 |         );
96 |     });
97 | 
98 |     return y;
99 | }


--------------------------------------------------------------------------------
/models/op/upfirdn2d.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
 2 | //
 3 | // This work is made available under the Nvidia Source Code License-NC.
 4 | // To view a copy of this license, visit
 5 | // https://nvlabs.github.io/stylegan2/license.html
 6 | 
 7 | #include <torch/extension.h>
 8 | 
 9 | 
10 | torch::Tensor upfirdn2d_op(const torch::Tensor& input, const torch::Tensor& kernel,
11 |                             int up_x, int up_y, int down_x, int down_y,
12 |                             int pad_x0, int pad_x1, int pad_y0, int pad_y1);
13 | 
14 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
15 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
16 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
17 | 
18 | torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel,
19 |                         int up_x, int up_y, int down_x, int down_y,
20 |                         int pad_x0, int pad_x1, int pad_y0, int pad_y1) {
21 |     CHECK_CUDA(input);
22 |     CHECK_CUDA(kernel);
23 | 
24 |     return upfirdn2d_op(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1);
25 | }
26 | 
27 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
28 |     m.def("upfirdn2d", &upfirdn2d, "upfirdn2d (CUDA)");
29 | }


--------------------------------------------------------------------------------
/models/op/upfirdn2d.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
  2 | #
  3 | # This work is made available under the Nvidia Source Code License-NC.
  4 | # To view a copy of this license, visit
  5 | # https://nvlabs.github.io/stylegan2/license.html
  6 | 
  7 | import os
  8 | 
  9 | import torch
 10 | from torch.nn import functional as F
 11 | from torch.autograd import Function
 12 | from torch.utils.cpp_extension import load
 13 | 
 14 | 
 15 | module_path = os.path.dirname(__file__)
 16 | upfirdn2d_op = load(
 17 |     "upfirdn2d",
 18 |     sources=[
 19 |         os.path.join(module_path, "upfirdn2d.cpp"),
 20 |         os.path.join(module_path, "upfirdn2d_kernel.cu"),
 21 |     ],
 22 | )
 23 | 
 24 | 
 25 | class UpFirDn2dBackward(Function):
 26 |     @staticmethod
 27 |     def forward(
 28 |         ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, in_size, out_size
 29 |     ):
 30 | 
 31 |         up_x, up_y = up
 32 |         down_x, down_y = down
 33 |         g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad
 34 | 
 35 |         grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1)
 36 | 
 37 |         grad_input = upfirdn2d_op.upfirdn2d(
 38 |             grad_output,
 39 |             grad_kernel,
 40 |             down_x,
 41 |             down_y,
 42 |             up_x,
 43 |             up_y,
 44 |             g_pad_x0,
 45 |             g_pad_x1,
 46 |             g_pad_y0,
 47 |             g_pad_y1,
 48 |         )
 49 |         grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], in_size[3])
 50 | 
 51 |         ctx.save_for_backward(kernel)
 52 | 
 53 |         pad_x0, pad_x1, pad_y0, pad_y1 = pad
 54 | 
 55 |         ctx.up_x = up_x
 56 |         ctx.up_y = up_y
 57 |         ctx.down_x = down_x
 58 |         ctx.down_y = down_y
 59 |         ctx.pad_x0 = pad_x0
 60 |         ctx.pad_x1 = pad_x1
 61 |         ctx.pad_y0 = pad_y0
 62 |         ctx.pad_y1 = pad_y1
 63 |         ctx.in_size = in_size
 64 |         ctx.out_size = out_size
 65 | 
 66 |         return grad_input
 67 | 
 68 |     @staticmethod
 69 |     def backward(ctx, gradgrad_input):
 70 |         kernel, = ctx.saved_tensors
 71 | 
 72 |         gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], ctx.in_size[3], 1)
 73 | 
 74 |         gradgrad_out = upfirdn2d_op.upfirdn2d(
 75 |             gradgrad_input,
 76 |             kernel,
 77 |             ctx.up_x,
 78 |             ctx.up_y,
 79 |             ctx.down_x,
 80 |             ctx.down_y,
 81 |             ctx.pad_x0,
 82 |             ctx.pad_x1,
 83 |             ctx.pad_y0,
 84 |             ctx.pad_y1,
 85 |         )
 86 |         # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], ctx.out_size[1], ctx.in_size[3])
 87 |         gradgrad_out = gradgrad_out.view(
 88 |             ctx.in_size[0], ctx.in_size[1], ctx.out_size[0], ctx.out_size[1]
 89 |         )
 90 | 
 91 |         return gradgrad_out, None, None, None, None, None, None, None, None
 92 | 
 93 | 
 94 | class UpFirDn2d(Function):
 95 |     @staticmethod
 96 |     def forward(ctx, input, kernel, up, down, pad):
 97 |         up_x, up_y = up
 98 |         down_x, down_y = down
 99 |         pad_x0, pad_x1, pad_y0, pad_y1 = pad
100 | 
101 |         kernel_h, kernel_w = kernel.shape
102 |         batch, channel, in_h, in_w = input.shape
103 |         ctx.in_size = input.shape
104 | 
105 |         input = input.reshape(-1, in_h, in_w, 1)
106 | 
107 |         ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1]))
108 | 
109 |         out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
110 |         out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
111 |         ctx.out_size = (out_h, out_w)
112 | 
113 |         ctx.up = (up_x, up_y)
114 |         ctx.down = (down_x, down_y)
115 |         ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1)
116 | 
117 |         g_pad_x0 = kernel_w - pad_x0 - 1
118 |         g_pad_y0 = kernel_h - pad_y0 - 1
119 |         g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1
120 |         g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1
121 | 
122 |         ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1)
123 | 
124 |         out = upfirdn2d_op.upfirdn2d(
125 |             input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
126 |         )
127 |         # out = out.view(major, out_h, out_w, minor)
128 |         out = out.view(-1, channel, out_h, out_w)
129 | 
130 |         return out
131 | 
132 |     @staticmethod
133 |     def backward(ctx, grad_output):
134 |         kernel, grad_kernel = ctx.saved_tensors
135 | 
136 |         grad_input = UpFirDn2dBackward.apply(
137 |             grad_output,
138 |             kernel,
139 |             grad_kernel,
140 |             ctx.up,
141 |             ctx.down,
142 |             ctx.pad,
143 |             ctx.g_pad,
144 |             ctx.in_size,
145 |             ctx.out_size,
146 |         )
147 | 
148 |         return grad_input, None, None, None, None
149 | 
150 | 
151 | def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
152 |     if input.device.type == "cpu":
153 |         out = upfirdn2d_native(
154 |             input, kernel, up, up, down, down, pad[0], pad[1], pad[0], pad[1]
155 |         )
156 | 
157 |     else:
158 |         out = UpFirDn2d.apply(
159 |             input, kernel, (up, up), (down, down), (pad[0], pad[1], pad[0], pad[1])
160 |         )
161 | 
162 |     return out
163 | 
164 | 
165 | def upfirdn2d_native(
166 |     input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
167 | ):
168 |     _, channel, in_h, in_w = input.shape
169 |     input = input.reshape(-1, in_h, in_w, 1)
170 | 
171 |     _, in_h, in_w, minor = input.shape
172 |     kernel_h, kernel_w = kernel.shape
173 | 
174 |     out = input.view(-1, in_h, 1, in_w, 1, minor)
175 |     out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1])
176 |     out = out.view(-1, in_h * up_y, in_w * up_x, minor)
177 | 
178 |     out = F.pad(
179 |         out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]
180 |     )
181 |     out = out[
182 |         :,
183 |         max(-pad_y0, 0) : out.shape[1] - max(-pad_y1, 0),
184 |         max(-pad_x0, 0) : out.shape[2] - max(-pad_x1, 0),
185 |         :,
186 |     ]
187 | 
188 |     out = out.permute(0, 3, 1, 2)
189 |     out = out.reshape(
190 |         [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]
191 |     )
192 |     w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w)
193 |     out = F.conv2d(out, w)
194 |     out = out.reshape(
195 |         -1,
196 |         minor,
197 |         in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1,
198 |         in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,
199 |     )
200 |     out = out.permute(0, 2, 3, 1)
201 |     out = out[:, ::down_y, ::down_x, :]
202 | 
203 |     out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
204 |     out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
205 | 
206 |     return out.view(-1, channel, out_h, out_w)


--------------------------------------------------------------------------------
/models/op/upfirdn2d_kernel.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
  2 | //
  3 | // This work is made available under the Nvidia Source Code License-NC.
  4 | // To view a copy of this license, visit
  5 | // https://nvlabs.github.io/stylegan2/license.html
  6 | 
  7 | #include <torch/types.h>
  8 | 
  9 | #include <ATen/ATen.h>
 10 | #include <ATen/AccumulateType.h>
 11 | #include <ATen/cuda/CUDAApplyUtils.cuh>
 12 | #include <ATen/cuda/CUDAContext.h>
 13 | 
 14 | #include <cuda.h>
 15 | #include <cuda_runtime.h>
 16 | 
 17 | static __host__ __device__ __forceinline__ int floor_div(int a, int b) {
 18 |   int c = a / b;
 19 | 
 20 |   if (c * b > a) {
 21 |     c--;
 22 |   }
 23 | 
 24 |   return c;
 25 | }
 26 | 
 27 | struct UpFirDn2DKernelParams {
 28 |   int up_x;
 29 |   int up_y;
 30 |   int down_x;
 31 |   int down_y;
 32 |   int pad_x0;
 33 |   int pad_x1;
 34 |   int pad_y0;
 35 |   int pad_y1;
 36 | 
 37 |   int major_dim;
 38 |   int in_h;
 39 |   int in_w;
 40 |   int minor_dim;
 41 |   int kernel_h;
 42 |   int kernel_w;
 43 |   int out_h;
 44 |   int out_w;
 45 |   int loop_major;
 46 |   int loop_x;
 47 | };
 48 | 
 49 | template <typename scalar_t>
 50 | __global__ void upfirdn2d_kernel_large(scalar_t *out, const scalar_t *input,
 51 |                                        const scalar_t *kernel,
 52 |                                        const UpFirDn2DKernelParams p) {
 53 |   int minor_idx = blockIdx.x * blockDim.x + threadIdx.x;
 54 |   int out_y = minor_idx / p.minor_dim;
 55 |   minor_idx -= out_y * p.minor_dim;
 56 |   int out_x_base = blockIdx.y * p.loop_x * blockDim.y + threadIdx.y;
 57 |   int major_idx_base = blockIdx.z * p.loop_major;
 58 | 
 59 |   if (out_x_base >= p.out_w || out_y >= p.out_h ||
 60 |       major_idx_base >= p.major_dim) {
 61 |     return;
 62 |   }
 63 | 
 64 |   int mid_y = out_y * p.down_y + p.up_y - 1 - p.pad_y0;
 65 |   int in_y = min(max(floor_div(mid_y, p.up_y), 0), p.in_h);
 66 |   int h = min(max(floor_div(mid_y + p.kernel_h, p.up_y), 0), p.in_h) - in_y;
 67 |   int kernel_y = mid_y + p.kernel_h - (in_y + 1) * p.up_y;
 68 | 
 69 |   for (int loop_major = 0, major_idx = major_idx_base;
 70 |        loop_major < p.loop_major && major_idx < p.major_dim;
 71 |        loop_major++, major_idx++) {
 72 |     for (int loop_x = 0, out_x = out_x_base;
 73 |          loop_x < p.loop_x && out_x < p.out_w; loop_x++, out_x += blockDim.y) {
 74 |       int mid_x = out_x * p.down_x + p.up_x - 1 - p.pad_x0;
 75 |       int in_x = min(max(floor_div(mid_x, p.up_x), 0), p.in_w);
 76 |       int w = min(max(floor_div(mid_x + p.kernel_w, p.up_x), 0), p.in_w) - in_x;
 77 |       int kernel_x = mid_x + p.kernel_w - (in_x + 1) * p.up_x;
 78 | 
 79 |       const scalar_t *x_p =
 80 |           &input[((major_idx * p.in_h + in_y) * p.in_w + in_x) * p.minor_dim +
 81 |                  minor_idx];
 82 |       const scalar_t *k_p = &kernel[kernel_y * p.kernel_w + kernel_x];
 83 |       int x_px = p.minor_dim;
 84 |       int k_px = -p.up_x;
 85 |       int x_py = p.in_w * p.minor_dim;
 86 |       int k_py = -p.up_y * p.kernel_w;
 87 | 
 88 |       scalar_t v = 0.0f;
 89 | 
 90 |       for (int y = 0; y < h; y++) {
 91 |         for (int x = 0; x < w; x++) {
 92 |           v += static_cast<scalar_t>(*x_p) * static_cast<scalar_t>(*k_p);
 93 |           x_p += x_px;
 94 |           k_p += k_px;
 95 |         }
 96 | 
 97 |         x_p += x_py - w * x_px;
 98 |         k_p += k_py - w * k_px;
 99 |       }
100 | 
101 |       out[((major_idx * p.out_h + out_y) * p.out_w + out_x) * p.minor_dim +
102 |           minor_idx] = v;
103 |     }
104 |   }
105 | }
106 | 
107 | template <typename scalar_t, int up_x, int up_y, int down_x, int down_y,
108 |           int kernel_h, int kernel_w, int tile_out_h, int tile_out_w>
109 | __global__ void upfirdn2d_kernel(scalar_t *out, const scalar_t *input,
110 |                                  const scalar_t *kernel,
111 |                                  const UpFirDn2DKernelParams p) {
112 |   const int tile_in_h = ((tile_out_h - 1) * down_y + kernel_h - 1) / up_y + 1;
113 |   const int tile_in_w = ((tile_out_w - 1) * down_x + kernel_w - 1) / up_x + 1;
114 | 
115 |   __shared__ volatile float sk[kernel_h][kernel_w];
116 |   __shared__ volatile float sx[tile_in_h][tile_in_w];
117 | 
118 |   int minor_idx = blockIdx.x;
119 |   int tile_out_y = minor_idx / p.minor_dim;
120 |   minor_idx -= tile_out_y * p.minor_dim;
121 |   tile_out_y *= tile_out_h;
122 |   int tile_out_x_base = blockIdx.y * p.loop_x * tile_out_w;
123 |   int major_idx_base = blockIdx.z * p.loop_major;
124 | 
125 |   if (tile_out_x_base >= p.out_w | tile_out_y >= p.out_h |
126 |       major_idx_base >= p.major_dim) {
127 |     return;
128 |   }
129 | 
130 |   for (int tap_idx = threadIdx.x; tap_idx < kernel_h * kernel_w;
131 |        tap_idx += blockDim.x) {
132 |     int ky = tap_idx / kernel_w;
133 |     int kx = tap_idx - ky * kernel_w;
134 |     scalar_t v = 0.0;
135 | 
136 |     if (kx < p.kernel_w & ky < p.kernel_h) {
137 |       v = kernel[(p.kernel_h - 1 - ky) * p.kernel_w + (p.kernel_w - 1 - kx)];
138 |     }
139 | 
140 |     sk[ky][kx] = v;
141 |   }
142 | 
143 |   for (int loop_major = 0, major_idx = major_idx_base;
144 |        loop_major < p.loop_major & major_idx < p.major_dim;
145 |        loop_major++, major_idx++) {
146 |     for (int loop_x = 0, tile_out_x = tile_out_x_base;
147 |          loop_x < p.loop_x & tile_out_x < p.out_w;
148 |          loop_x++, tile_out_x += tile_out_w) {
149 |       int tile_mid_x = tile_out_x * down_x + up_x - 1 - p.pad_x0;
150 |       int tile_mid_y = tile_out_y * down_y + up_y - 1 - p.pad_y0;
151 |       int tile_in_x = floor_div(tile_mid_x, up_x);
152 |       int tile_in_y = floor_div(tile_mid_y, up_y);
153 | 
154 |       __syncthreads();
155 | 
156 |       for (int in_idx = threadIdx.x; in_idx < tile_in_h * tile_in_w;
157 |            in_idx += blockDim.x) {
158 |         int rel_in_y = in_idx / tile_in_w;
159 |         int rel_in_x = in_idx - rel_in_y * tile_in_w;
160 |         int in_x = rel_in_x + tile_in_x;
161 |         int in_y = rel_in_y + tile_in_y;
162 | 
163 |         scalar_t v = 0.0;
164 | 
165 |         if (in_x >= 0 & in_y >= 0 & in_x < p.in_w & in_y < p.in_h) {
166 |           v = input[((major_idx * p.in_h + in_y) * p.in_w + in_x) *
167 |                         p.minor_dim +
168 |                     minor_idx];
169 |         }
170 | 
171 |         sx[rel_in_y][rel_in_x] = v;
172 |       }
173 | 
174 |       __syncthreads();
175 |       for (int out_idx = threadIdx.x; out_idx < tile_out_h * tile_out_w;
176 |            out_idx += blockDim.x) {
177 |         int rel_out_y = out_idx / tile_out_w;
178 |         int rel_out_x = out_idx - rel_out_y * tile_out_w;
179 |         int out_x = rel_out_x + tile_out_x;
180 |         int out_y = rel_out_y + tile_out_y;
181 | 
182 |         int mid_x = tile_mid_x + rel_out_x * down_x;
183 |         int mid_y = tile_mid_y + rel_out_y * down_y;
184 |         int in_x = floor_div(mid_x, up_x);
185 |         int in_y = floor_div(mid_y, up_y);
186 |         int rel_in_x = in_x - tile_in_x;
187 |         int rel_in_y = in_y - tile_in_y;
188 |         int kernel_x = (in_x + 1) * up_x - mid_x - 1;
189 |         int kernel_y = (in_y + 1) * up_y - mid_y - 1;
190 | 
191 |         scalar_t v = 0.0;
192 | 
193 | #pragma unroll
194 |         for (int y = 0; y < kernel_h / up_y; y++)
195 | #pragma unroll
196 |           for (int x = 0; x < kernel_w / up_x; x++)
197 |             v += sx[rel_in_y + y][rel_in_x + x] *
198 |                  sk[kernel_y + y * up_y][kernel_x + x * up_x];
199 | 
200 |         if (out_x < p.out_w & out_y < p.out_h) {
201 |           out[((major_idx * p.out_h + out_y) * p.out_w + out_x) * p.minor_dim +
202 |               minor_idx] = v;
203 |         }
204 |       }
205 |     }
206 |   }
207 | }
208 | 
209 | torch::Tensor upfirdn2d_op(const torch::Tensor &input,
210 |                            const torch::Tensor &kernel, int up_x, int up_y,
211 |                            int down_x, int down_y, int pad_x0, int pad_x1,
212 |                            int pad_y0, int pad_y1) {
213 |   int curDevice = -1;
214 |   cudaGetDevice(&curDevice);
215 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice);
216 | 
217 |   UpFirDn2DKernelParams p;
218 | 
219 |   auto x = input.contiguous();
220 |   auto k = kernel.contiguous();
221 | 
222 |   p.major_dim = x.size(0);
223 |   p.in_h = x.size(1);
224 |   p.in_w = x.size(2);
225 |   p.minor_dim = x.size(3);
226 |   p.kernel_h = k.size(0);
227 |   p.kernel_w = k.size(1);
228 |   p.up_x = up_x;
229 |   p.up_y = up_y;
230 |   p.down_x = down_x;
231 |   p.down_y = down_y;
232 |   p.pad_x0 = pad_x0;
233 |   p.pad_x1 = pad_x1;
234 |   p.pad_y0 = pad_y0;
235 |   p.pad_y1 = pad_y1;
236 | 
237 |   p.out_h = (p.in_h * p.up_y + p.pad_y0 + p.pad_y1 - p.kernel_h + p.down_y) /
238 |             p.down_y;
239 |   p.out_w = (p.in_w * p.up_x + p.pad_x0 + p.pad_x1 - p.kernel_w + p.down_x) /
240 |             p.down_x;
241 | 
242 |   auto out =
243 |       at::empty({p.major_dim, p.out_h, p.out_w, p.minor_dim}, x.options());
244 | 
245 |   int mode = -1;
246 | 
247 |   int tile_out_h = -1;
248 |   int tile_out_w = -1;
249 | 
250 |   if (p.up_x == 1 && p.up_y == 1 && p.down_x == 1 && p.down_y == 1 &&
251 |       p.kernel_h <= 4 && p.kernel_w <= 4) {
252 |     mode = 1;
253 |     tile_out_h = 16;
254 |     tile_out_w = 64;
255 |   }
256 | 
257 |   if (p.up_x == 1 && p.up_y == 1 && p.down_x == 1 && p.down_y == 1 &&
258 |       p.kernel_h <= 3 && p.kernel_w <= 3) {
259 |     mode = 2;
260 |     tile_out_h = 16;
261 |     tile_out_w = 64;
262 |   }
263 | 
264 |   if (p.up_x == 2 && p.up_y == 2 && p.down_x == 1 && p.down_y == 1 &&
265 |       p.kernel_h <= 4 && p.kernel_w <= 4) {
266 |     mode = 3;
267 |     tile_out_h = 16;
268 |     tile_out_w = 64;
269 |   }
270 | 
271 |   if (p.up_x == 2 && p.up_y == 2 && p.down_x == 1 && p.down_y == 1 &&
272 |       p.kernel_h <= 2 && p.kernel_w <= 2) {
273 |     mode = 4;
274 |     tile_out_h = 16;
275 |     tile_out_w = 64;
276 |   }
277 | 
278 |   if (p.up_x == 1 && p.up_y == 1 && p.down_x == 2 && p.down_y == 2 &&
279 |       p.kernel_h <= 4 && p.kernel_w <= 4) {
280 |     mode = 5;
281 |     tile_out_h = 8;
282 |     tile_out_w = 32;
283 |   }
284 | 
285 |   if (p.up_x == 1 && p.up_y == 1 && p.down_x == 2 && p.down_y == 2 &&
286 |       p.kernel_h <= 2 && p.kernel_w <= 2) {
287 |     mode = 6;
288 |     tile_out_h = 8;
289 |     tile_out_w = 32;
290 |   }
291 | 
292 |   dim3 block_size;
293 |   dim3 grid_size;
294 | 
295 |   if (tile_out_h > 0 && tile_out_w > 0) {
296 |     p.loop_major = (p.major_dim - 1) / 16384 + 1;
297 |     p.loop_x = 1;
298 |     block_size = dim3(32 * 8, 1, 1);
299 |     grid_size = dim3(((p.out_h - 1) / tile_out_h + 1) * p.minor_dim,
300 |                      (p.out_w - 1) / (p.loop_x * tile_out_w) + 1,
301 |                      (p.major_dim - 1) / p.loop_major + 1);
302 |   } else {
303 |     p.loop_major = (p.major_dim - 1) / 16384 + 1;
304 |     p.loop_x = 4;
305 |     block_size = dim3(4, 32, 1);
306 |     grid_size = dim3((p.out_h * p.minor_dim - 1) / block_size.x + 1,
307 |                      (p.out_w - 1) / (p.loop_x * block_size.y) + 1,
308 |                      (p.major_dim - 1) / p.loop_major + 1);
309 |   }
310 | 
311 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] {
312 |     switch (mode) {
313 |     case 1:
314 |       upfirdn2d_kernel<scalar_t, 1, 1, 1, 1, 4, 4, 16, 64>
315 |           <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
316 |                                                  x.data_ptr<scalar_t>(),
317 |                                                  k.data_ptr<scalar_t>(), p);
318 | 
319 |       break;
320 | 
321 |     case 2:
322 |       upfirdn2d_kernel<scalar_t, 1, 1, 1, 1, 3, 3, 16, 64>
323 |           <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
324 |                                                  x.data_ptr<scalar_t>(),
325 |                                                  k.data_ptr<scalar_t>(), p);
326 | 
327 |       break;
328 | 
329 |     case 3:
330 |       upfirdn2d_kernel<scalar_t, 2, 2, 1, 1, 4, 4, 16, 64>
331 |           <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
332 |                                                  x.data_ptr<scalar_t>(),
333 |                                                  k.data_ptr<scalar_t>(), p);
334 | 
335 |       break;
336 | 
337 |     case 4:
338 |       upfirdn2d_kernel<scalar_t, 2, 2, 1, 1, 2, 2, 16, 64>
339 |           <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
340 |                                                  x.data_ptr<scalar_t>(),
341 |                                                  k.data_ptr<scalar_t>(), p);
342 | 
343 |       break;
344 | 
345 |     case 5:
346 |       upfirdn2d_kernel<scalar_t, 1, 1, 2, 2, 4, 4, 8, 32>
347 |           <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
348 |                                                  x.data_ptr<scalar_t>(),
349 |                                                  k.data_ptr<scalar_t>(), p);
350 | 
351 |       break;
352 | 
353 |     case 6:
354 |       upfirdn2d_kernel<scalar_t, 1, 1, 2, 2, 4, 4, 8, 32>
355 |           <<<grid_size, block_size, 0, stream>>>(out.data_ptr<scalar_t>(),
356 |                                                  x.data_ptr<scalar_t>(),
357 |                                                  k.data_ptr<scalar_t>(), p);
358 | 
359 |       break;
360 | 
361 |     default:
362 |       upfirdn2d_kernel_large<scalar_t><<<grid_size, block_size, 0, stream>>>(
363 |           out.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(),
364 |           k.data_ptr<scalar_t>(), p);
365 |     }
366 |   });
367 | 
368 |   return out;
369 | }


--------------------------------------------------------------------------------
/models/stylegan2.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
  2 | #
  3 | # This work is made available under the Nvidia Source Code License-NC.
  4 | # To view a copy of this license, visit
  5 | # https://nvlabs.github.io/stylegan2/license.html
  6 | 
  7 | from torch import nn
  8 | from models.utils import *
  9 | import numpy as np
 10 | 
 11 | 
 12 | class Generator(nn.Module):
 13 |     def __init__(
 14 |         self,
 15 |         size,
 16 |         style_dim,
 17 |         n_mlp,
 18 |         channel_multiplier=2,
 19 |         blur_kernel=[1, 3, 3, 1],
 20 |         lr_mlp=0.01,
 21 |         randomize_noise=True,
 22 |         image_mode='RGB',
 23 |     ):
 24 |         super().__init__()
 25 | 
 26 |         self.size = size
 27 | 
 28 |         self.style_dim = style_dim
 29 | 
 30 |         layers = [PixelNorm()]
 31 | 
 32 |         for i in range(n_mlp):
 33 |             layers.append(
 34 |                 EqualLinear(
 35 |                     style_dim, style_dim, lr_mul=lr_mlp, activation="fused_lrelu"
 36 |                 )
 37 |             )
 38 | 
 39 |         self.style = nn.Sequential(*layers)
 40 | 
 41 |         self.channels = {
 42 |             4: 512,
 43 |             8: 512,
 44 |             16: 512,
 45 |             32: 512,
 46 |             64: 256 * channel_multiplier,
 47 |             128: 128 * channel_multiplier,
 48 |             256: 64 * channel_multiplier,
 49 |             512: 32 * channel_multiplier,
 50 |             1024: 16 * channel_multiplier,
 51 |         }
 52 | 
 53 |         self.input = ConstantInput(self.channels[4])
 54 |         self.conv1 = StyledConv(
 55 |             self.channels[4], self.channels[4], 3, style_dim, blur_kernel=blur_kernel
 56 |         )
 57 | 
 58 |         if image_mode == 'RGB':
 59 |             self.rgb_channel = 3
 60 |         else:
 61 |             self.rgb_channel = 1
 62 | 
 63 | 
 64 |         self.to_rgb1 = ToRGB(self.channels[4], style_dim, out_channel=self.rgb_channel, upsample=False)
 65 | 
 66 |         self.log_size = int(math.log(size, 2))
 67 |         self.num_layers = (self.log_size - 2) * 2 + 1
 68 | 
 69 |         self.convs = nn.ModuleList()
 70 |         self.upsamples = nn.ModuleList()
 71 |         self.to_rgbs = nn.ModuleList()
 72 |         self.noises = nn.Module()
 73 |         self.randomize_noise = randomize_noise
 74 |         in_channel = self.channels[4]
 75 | 
 76 |         for layer_idx in range(self.num_layers):
 77 |             res = (layer_idx + 5) // 2
 78 |             shape = [1, 1, 2 ** res, 2 ** res]
 79 |             self.noises.register_buffer(f"noise_{layer_idx}", torch.randn(*shape))
 80 | 
 81 |         for i in range(3, self.log_size + 1):
 82 |             out_channel = self.channels[2 ** i]
 83 | 
 84 |             self.convs.append(
 85 |                 StyledConv(
 86 |                     in_channel,
 87 |                     out_channel,
 88 |                     3,
 89 |                     style_dim,
 90 |                     upsample=True,
 91 |                     blur_kernel=blur_kernel,
 92 |                 )
 93 |             )
 94 | 
 95 |             self.convs.append(
 96 |                 StyledConv(
 97 |                     out_channel, out_channel, 3, style_dim, blur_kernel=blur_kernel
 98 |                 )
 99 |             )
100 | 
101 |             self.to_rgbs.append(ToRGB(out_channel, style_dim))
102 | 
103 |             in_channel = out_channel
104 | 
105 |         self.n_latent = self.log_size * 2 - 2
106 | 
107 | 
108 |     def make_noise(self):
109 |         device = self.input.input.device
110 | 
111 |         noises = [torch.randn(1, 1, 2 ** 2, 2 ** 2, device=device)]
112 | 
113 |         for i in range(3, self.log_size + 1):
114 |             for _ in range(2):
115 |                 noises.append(torch.randn(1, 1, 2 ** i, 2 ** i, device=device))
116 | 
117 |         return noises
118 | 
119 |     def make_mean_latent(self, n_latent):
120 |         latent_in = torch.randn(
121 |             n_latent, self.style_dim, device=self.input.input.device
122 |         )
123 |         latent = self.style(latent_in).mean(0, keepdim=True)
124 |         self.mean_latent = latent
125 | 
126 |         return latent
127 | 
128 |     def get_latent(self, input):
129 | 
130 |         style = self.style(input)
131 |         return style
132 | 
133 |     def truncation(self, input):
134 | 
135 |         out = self.mean_latent.unsqueeze(1) + 0.7 * (input - self.mean_latent.unsqueeze(1))
136 | 
137 |         return out
138 | 
139 | 
140 |     def g_mapping(self, input):
141 |         style = self.style(input)
142 |         style = style.unsqueeze(1).repeat(1,  self.n_latent, 1)
143 | 
144 |         return style
145 | 
146 |     def g_synthesis(self, latent):
147 |         if self.randomize_noise:
148 |             noise = [None] * self.num_layers
149 |         else:
150 |             noise = [
151 |                 getattr(self.noises, f"noise_{i}") for i in range(self.num_layers)
152 |             ]
153 |         styles_feature = []
154 | 
155 |         out = self.input(latent)
156 |         styles_feature.append(out)
157 | 
158 |         out = self.conv1(out, latent[:, 0], noise=noise[0])
159 |         styles_feature.append(out)
160 | 
161 |         skip = self.to_rgb1(out, latent[:, 1])
162 | 
163 |         i = 1
164 | 
165 |         for conv1, conv2, noise1, noise2, to_rgb in zip(
166 |             self.convs[::2], self.convs[1::2], noise[1::2], noise[2::2], self.to_rgbs
167 |         ):
168 |             out = conv1(out, latent[:, i], noise=noise1)
169 | 
170 |             styles_feature.append(out)
171 | 
172 |             out = conv2(out, latent[:, i + 1], noise=noise2)
173 | 
174 |             styles_feature.append(out)
175 | 
176 |             skip = to_rgb(out, latent[:, i + 2], skip)
177 | 
178 |             i += 2
179 | 
180 |         image = skip
181 | 
182 |         return image, styles_feature
183 | 
184 |     def sample(self, num, latent_space_type='Z'):
185 |         """Samples latent codes randomly.
186 |         Args:
187 |         num: Number of latent codes to sample. Should be positive.
188 |         latent_space_type: Type of latent space from which to sample latent code.
189 |             Only [`Z`, `W`, `WP`] are supported. Case insensitive. (default: `Z`)
190 |         Returns:
191 |         A `numpy.ndarray` as sampled latend codes.
192 |         Raises:
193 |         ValueError: If the given `latent_space_type` is not supported.
194 |         """
195 |         latent_space_type = latent_space_type.upper()
196 |         if latent_space_type == 'Z':
197 |             latent_codes = np.random.randn(num, self.style_dim)
198 |         elif latent_space_type == 'W':
199 |             latent_codes = np.random.randn(num, self.style_dim)
200 |         elif latent_space_type == 'WP':
201 |             latent_codes = np.random.randn(num, self.n_latent, self.style_dim)
202 |         else:
203 |             raise ValueError(f'Latent space type `{latent_space_type}` is invalid!')
204 | 
205 |         return latent_codes.astype(np.float32)
206 | 
207 | 
208 |     def forward(
209 |         self,
210 |         styles,
211 |         return_latents=False,
212 |         inject_index=None,
213 | 
214 |         input_is_latent=False,
215 |         noise=None,
216 |         randomize_noise=True,
217 |     ):
218 |         if not input_is_latent:
219 |             styles = [self.style(s) for s in styles]
220 | 
221 | 
222 |         if noise is None:
223 |             if randomize_noise:
224 |                 noise = [None] * self.num_layers
225 |             else:
226 |                 noise = [
227 |                     getattr(self.noises, f"noise_{i}") for i in range(self.num_layers)
228 |                 ]
229 | 
230 | 
231 | 
232 |         styles = self.truncation(styles)
233 | 
234 |         if len(styles) < 2:
235 |             inject_index = self.n_latent
236 | 
237 |             if styles[0].ndim < 3:
238 |                 latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
239 | 
240 |             else:
241 |                 latent = styles[0]
242 | 
243 |         else:
244 |             if inject_index is None:
245 |                 inject_index = random.randint(1, self.n_latent - 1)
246 | 
247 |             latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
248 |             latent2 = styles[1].unsqueeze(1).repeat(1, self.n_latent - inject_index, 1)
249 | 
250 |             latent = torch.cat([latent, latent2], 1)
251 | 
252 |         out = self.input(latent)
253 |         out = self.conv1(out, latent[:, 0], noise=noise[0])
254 | 
255 |         skip = self.to_rgb1(out, latent[:, 1])
256 | 
257 |         i = 1
258 |         for conv1, conv2, noise1, noise2, to_rgb in zip(
259 |             self.convs[::2], self.convs[1::2], noise[1::2], noise[2::2], self.to_rgbs
260 |         ):
261 |             out = conv1(out, latent[:, i], noise=noise1)
262 |             out = conv2(out, latent[:, i + 1], noise=noise2)
263 |             skip = to_rgb(out, latent[:, i + 2], skip)
264 | 
265 |             i += 2
266 | 
267 |         image = skip
268 | 
269 |         if return_latents:
270 |             return image, latent
271 | 
272 |         else:
273 |             return image, None
274 | 
275 | 
276 | class Discriminator(nn.Module):
277 |     def __init__(self, size, channel_multiplier=2, blur_kernel=[1, 3, 3, 1]):
278 |         super().__init__()
279 | 
280 |         channels = {
281 |             4: 512,
282 |             8: 512,
283 |             16: 512,
284 |             32: 512,
285 |             64: 256 * channel_multiplier,
286 |             128: 128 * channel_multiplier,
287 |             256: 64 * channel_multiplier,
288 |             512: 32 * channel_multiplier,
289 |             1024: 16 * channel_multiplier,
290 |         }
291 | 
292 |         convs = [ConvLayer(3, channels[size], 1)]
293 | 
294 |         log_size = int(math.log(size, 2))
295 | 
296 |         in_channel = channels[size]
297 | 
298 |         for i in range(log_size, 2, -1):
299 |             out_channel = channels[2 ** (i - 1)]
300 | 
301 |             convs.append(ResBlock(in_channel, out_channel, blur_kernel))
302 | 
303 |             in_channel = out_channel
304 | 
305 |         self.convs = nn.Sequential(*convs)
306 | 
307 |         self.stddev_group = 4
308 |         self.stddev_feat = 1
309 | 
310 |         self.final_conv = ConvLayer(in_channel + 1, channels[4], 3)
311 |         self.final_linear = nn.Sequential(
312 |             EqualLinear(channels[4] * 4 * 4, channels[4], activation="fused_lrelu"),
313 |             EqualLinear(channels[4], 1),
314 |         )
315 | 
316 |     def forward(self, input):
317 |         out = self.convs(input)
318 | 
319 |         batch, channel, height, width = out.shape
320 |         group = min(batch, self.stddev_group)
321 |         stddev = out.view(
322 |             group, -1, self.stddev_feat, channel // self.stddev_feat, height, width
323 |         )
324 |         stddev = torch.sqrt(stddev.var(0, unbiased=False) + 1e-8)
325 |         stddev = stddev.mean([2, 3, 4], keepdims=True).squeeze(2)
326 |         stddev = stddev.repeat(group, 1, height, width)
327 |         out = torch.cat([out, stddev], 1)
328 | 
329 |         out = self.final_conv(out)
330 | 
331 |         out = out.view(batch, -1)
332 |         out = self.final_linear(out)
333 | 
334 |         return out
335 | 
336 | 


--------------------------------------------------------------------------------
/models/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
  2 | #
  3 | # This work is made available under the Nvidia Source Code License-NC.
  4 | # To view a copy of this license, visit
  5 | # https://nvlabs.github.io/stylegan2/license.html
  6 | 
  7 | import math
  8 | 
  9 | import torch
 10 | from torch import nn
 11 | from torch.nn import functional as F
 12 | from models.op import FusedLeakyReLU, fused_leaky_relu, upfirdn2d
 13 | 
 14 | class Blur(nn.Module):
 15 |     def __init__(self, kernel, pad, upsample_factor=1):
 16 |         super().__init__()
 17 | 
 18 |         kernel = make_kernel(kernel)
 19 | 
 20 |         if upsample_factor > 1:
 21 |             kernel = kernel * (upsample_factor ** 2)
 22 | 
 23 |         self.register_buffer('kernel', kernel)
 24 | 
 25 |         self.pad = pad
 26 | 
 27 |     def forward(self, input):
 28 |         out = upfirdn2d(input, self.kernel, pad=self.pad)
 29 | 
 30 |         return out
 31 | 
 32 | 
 33 | class ConvLayer(nn.Sequential):
 34 |     def __init__(
 35 |         self,
 36 |         in_channel,
 37 |         out_channel,
 38 |         kernel_size,
 39 |         downsample=False,
 40 |         blur_kernel=[1, 3, 3, 1],
 41 |         bias=True,
 42 |         activate=True,
 43 |     ):
 44 |         layers = []
 45 | 
 46 |         if downsample:
 47 |             factor = 2
 48 |             p = (len(blur_kernel) - factor) + (kernel_size - 1)
 49 |             pad0 = (p + 1) // 2
 50 |             pad1 = p // 2
 51 | 
 52 |             layers.append(Blur(blur_kernel, pad=(pad0, pad1)))
 53 | 
 54 |             stride = 2
 55 |             self.padding = 0
 56 | 
 57 |         else:
 58 |             stride = 1
 59 |             self.padding = kernel_size // 2
 60 | 
 61 |         layers.append(
 62 |             EqualConv2d(
 63 |                 in_channel,
 64 |                 out_channel,
 65 |                 kernel_size,
 66 |                 padding=self.padding,
 67 |                 stride=stride,
 68 |                 bias=bias and not activate,
 69 |             )
 70 |         )
 71 | 
 72 |         if activate:
 73 |             layers.append(FusedLeakyReLU(out_channel, bias=bias))
 74 | 
 75 |         super().__init__(*layers)
 76 | 
 77 | 
 78 | class ResBlock(nn.Module):
 79 |     def __init__(self, in_channel, out_channel, blur_kernel=[1, 3, 3, 1]):
 80 |         super().__init__()
 81 | 
 82 |         self.conv1 = ConvLayer(in_channel, in_channel, 3)
 83 |         self.conv2 = ConvLayer(in_channel, out_channel, 3, downsample=True)
 84 | 
 85 |         self.skip = ConvLayer(
 86 |             in_channel, out_channel, 1, downsample=True, activate=False, bias=False
 87 |         )
 88 | 
 89 |     def forward(self, input):
 90 |         out = self.conv1(input)
 91 |         out = self.conv2(out)
 92 | 
 93 |         skip = self.skip(input)
 94 |         out = (out + skip) / math.sqrt(2)
 95 | 
 96 |         return out
 97 | 
 98 | 
 99 | class PixelNorm(nn.Module):
100 |     def __init__(self):
101 |         super().__init__()
102 | 
103 |     def forward(self, input):
104 |         return input * torch.rsqrt(torch.mean(input ** 2, dim=1, keepdim=True) + 1e-8)
105 | 
106 | 
107 | def make_kernel(k):
108 |     k = torch.tensor(k, dtype=torch.float32)
109 | 
110 |     if k.ndim == 1:
111 |         k = k[None, :] * k[:, None]
112 | 
113 |     k /= k.sum()
114 | 
115 |     return k
116 | 
117 | 
118 | class Upsample(nn.Module):
119 |     def __init__(self, kernel, factor=2):
120 |         super().__init__()
121 | 
122 |         self.factor = factor
123 |         kernel = make_kernel(kernel) * (factor ** 2)
124 |         self.register_buffer("kernel", kernel)
125 | 
126 |         p = kernel.shape[0] - factor
127 | 
128 |         pad0 = (p + 1) // 2 + factor - 1
129 |         pad1 = p // 2
130 | 
131 |         self.pad = (pad0, pad1)
132 | 
133 |     def forward(self, input):
134 |         out = upfirdn2d(input, self.kernel, up=self.factor, down=1, pad=self.pad)
135 | 
136 |         return out
137 | 
138 | 
139 | class Downsample(nn.Module):
140 |     def __init__(self, kernel, factor=2):
141 |         super().__init__()
142 | 
143 |         self.factor = factor
144 |         kernel = make_kernel(kernel)
145 |         self.register_buffer("kernel", kernel)
146 | 
147 |         p = kernel.shape[0] - factor
148 | 
149 |         pad0 = (p + 1) // 2
150 |         pad1 = p // 2
151 | 
152 |         self.pad = (pad0, pad1)
153 | 
154 |     def forward(self, input):
155 |         out = upfirdn2d(input, self.kernel, up=1, down=self.factor, pad=self.pad)
156 | 
157 |         return out
158 | 
159 | 
160 | class EqualConv2d(nn.Module):
161 |     def __init__(
162 |         self, in_channel, out_channel, kernel_size, stride=1, padding=0, bias=True
163 |     ):
164 |         super().__init__()
165 | 
166 |         self.weight = nn.Parameter(
167 |             torch.randn(out_channel, in_channel, kernel_size, kernel_size)
168 |         )
169 |         self.scale = 1 / math.sqrt(in_channel * kernel_size ** 2)
170 | 
171 |         self.stride = stride
172 |         self.padding = padding
173 | 
174 |         if bias:
175 |             self.bias = nn.Parameter(torch.zeros(out_channel))
176 | 
177 |         else:
178 |             self.bias = None
179 | 
180 |     def forward(self, input):
181 |         out = F.conv2d(
182 |             input,
183 |             self.weight * self.scale,
184 |             bias=self.bias,
185 |             stride=self.stride,
186 |             padding=self.padding,
187 |         )
188 | 
189 |         return out
190 | 
191 |     def __repr__(self):
192 |         return (
193 |             f"{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]},"
194 |             f" {self.weight.shape[2]}, stride={self.stride}, padding={self.padding})"
195 |         )
196 | 
197 | 
198 | class EqualLinear(nn.Module):
199 |     def __init__(
200 |         self, in_dim, out_dim, bias=True, bias_init=0, lr_mul=1, activation=None
201 |     ):
202 |         super().__init__()
203 | 
204 |         self.weight = nn.Parameter(torch.randn(out_dim, in_dim).div_(lr_mul))
205 | 
206 |         if bias:
207 |             self.bias = nn.Parameter(torch.zeros(out_dim).fill_(bias_init))
208 | 
209 |         else:
210 |             self.bias = None
211 | 
212 |         self.activation = activation
213 | 
214 |         self.scale = (1 / math.sqrt(in_dim)) * lr_mul
215 |         self.lr_mul = lr_mul
216 | 
217 |     def forward(self, input):
218 |         if self.activation:
219 |             out = F.linear(input, self.weight * self.scale)
220 |             out = fused_leaky_relu(out, self.bias * self.lr_mul)
221 | 
222 |         else:
223 |             out = F.linear(
224 |                 input, self.weight * self.scale, bias=self.bias * self.lr_mul
225 |             )
226 | 
227 |         return out
228 | 
229 |     def __repr__(self):
230 |         return (
231 |             f"{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]})"
232 |         )
233 | 
234 | 
235 | class ModulatedConv2d(nn.Module):
236 |     def __init__(
237 |         self,
238 |         in_channel,
239 |         out_channel,
240 |         kernel_size,
241 |         style_dim,
242 |         demodulate=True,
243 |         upsample=False,
244 |         downsample=False,
245 |         blur_kernel=[1, 3, 3, 1],
246 |     ):
247 |         super().__init__()
248 | 
249 |         self.eps = 1e-8
250 |         self.kernel_size = kernel_size
251 |         self.in_channel = in_channel
252 |         self.out_channel = out_channel
253 |         self.upsample = upsample
254 |         self.downsample = downsample
255 | 
256 |         if upsample:
257 |             factor = 2
258 |             p = (len(blur_kernel) - factor) - (kernel_size - 1)
259 |             pad0 = (p + 1) // 2 + factor - 1
260 |             pad1 = p // 2 + 1
261 | 
262 |             self.blur = Blur(blur_kernel, pad=(pad0, pad1), upsample_factor=factor)
263 | 
264 |         if downsample:
265 |             factor = 2
266 |             p = (len(blur_kernel) - factor) + (kernel_size - 1)
267 |             pad0 = (p + 1) // 2
268 |             pad1 = p // 2
269 | 
270 |             self.blur = Blur(blur_kernel, pad=(pad0, pad1))
271 | 
272 |         fan_in = in_channel * kernel_size ** 2
273 |         self.scale = 1 / math.sqrt(fan_in)
274 |         self.padding = kernel_size // 2
275 | 
276 |         self.weight = nn.Parameter(
277 |             torch.randn(1, out_channel, in_channel, kernel_size, kernel_size)
278 |         )
279 | 
280 |         self.modulation = EqualLinear(style_dim, in_channel, bias_init=1)
281 | 
282 |         self.demodulate = demodulate
283 | 
284 |     def __repr__(self):
285 |         return (
286 |             f"{self.__class__.__name__}({self.in_channel}, {self.out_channel}, {self.kernel_size}, "
287 |             f"upsample={self.upsample}, downsample={self.downsample})"
288 |         )
289 | 
290 |     def forward(self, input, style):
291 |         batch, in_channel, height, width = input.shape
292 | 
293 |         style = self.modulation(style).view(batch, 1, in_channel, 1, 1)
294 |         weight = self.scale * self.weight * style
295 | 
296 |         if self.demodulate:
297 |             demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + 1e-8)
298 |             weight = weight * demod.view(batch, self.out_channel, 1, 1, 1)
299 | 
300 |         weight = weight.view(
301 |             batch * self.out_channel, in_channel, self.kernel_size, self.kernel_size
302 |         )
303 | 
304 |         if self.upsample:
305 |             input = input.view(1, batch * in_channel, height, width)
306 |             weight = weight.view(
307 |                 batch, self.out_channel, in_channel, self.kernel_size, self.kernel_size
308 |             )
309 |             weight = weight.transpose(1, 2).reshape(
310 |                 batch * in_channel, self.out_channel, self.kernel_size, self.kernel_size
311 |             )
312 |             out = F.conv_transpose2d(input, weight, padding=0, stride=2, groups=batch)
313 |             _, _, height, width = out.shape
314 |             out = out.view(batch, self.out_channel, height, width)
315 |             out = self.blur(out)
316 | 
317 |         elif self.downsample:
318 |             input = self.blur(input)
319 |             _, _, height, width = input.shape
320 |             input = input.view(1, batch * in_channel, height, width)
321 |             out = F.conv2d(input, weight, padding=0, stride=2, groups=batch)
322 |             _, _, height, width = out.shape
323 |             out = out.view(batch, self.out_channel, height, width)
324 | 
325 |         else:
326 |             input = input.view(1, batch * in_channel, height, width)
327 |             out = F.conv2d(input, weight, padding=self.padding, groups=batch)
328 |             _, _, height, width = out.shape
329 |             out = out.view(batch, self.out_channel, height, width)
330 | 
331 |         return out
332 | 
333 | 
334 | class CondStyledConv(nn.Module):
335 |     def __init__(
336 |             self,
337 |             in_channel,
338 |             out_channel,
339 |             kernel_size,
340 |             style_dim,
341 |             upsample=False,
342 |             blur_kernel=[1, 3, 3, 1],
343 |             demodulate=True,
344 |     ):
345 |         super().__init__()
346 | 
347 |         self.conv = ModulatedConv2d(
348 |             in_channel,
349 |             out_channel,
350 |             kernel_size,
351 |             style_dim,
352 |             upsample=upsample,
353 |             blur_kernel=blur_kernel,
354 |             demodulate=demodulate,
355 |         )
356 | 
357 |         self.noise = CondInjection()
358 | 
359 |         # self.bias = nn.Parameter(torch.zeros(1, out_channel, 1, 1))
360 |         # self.activate = ScaledLeakyReLU(0.2)
361 |         self.activate = FusedLeakyReLU(out_channel)
362 | 
363 |     def forward(self, input, style, labels):
364 |         out = self.conv(input, style)
365 |         out = self.noise(out, labels)
366 |         # out = out + self.bias
367 |         out = self.activate(out)
368 | 
369 |         return out
370 | 
371 | class CondInjection(nn.Module):
372 |     def __init__(self):
373 |         super().__init__()
374 | 
375 |         self.weight = nn.Parameter(torch.zeros(1))
376 | 
377 |     def forward(self, image, labels, noise=None):
378 |         if noise is None:
379 |             batch, _, height, width = image.shape
380 |             noise = image.new_empty(batch, 1, height, width).normal_()
381 | 
382 |         labels = labels.view(-1, 1, 1, 1)
383 |         batch, _, height, width = image.shape
384 |         cond = image.new_ones(batch, 1, height, width) / (labels + 1)
385 | 
386 |         # return image + self.weight * cond
387 |         return image + self.weight * noise
388 | 
389 | 
390 | 
391 | class NoiseInjection(nn.Module):
392 |     def __init__(self):
393 |         super().__init__()
394 | 
395 |         self.weight = nn.Parameter(torch.zeros(1))
396 | 
397 |     def forward(self, image, noise=None):
398 |         if noise is None:
399 |             batch, _, height, width = image.shape
400 |             noise = image.new_empty(batch, 1, height, width).normal_()
401 | 
402 |         return image + self.weight * noise
403 | 
404 | 
405 | class ConstantInput(nn.Module):
406 |     def __init__(self, channel, size=4):
407 |         super().__init__()
408 | 
409 |         self.input = nn.Parameter(torch.randn(1, channel, size, size))
410 | 
411 |     def forward(self, input):
412 |         batch = input.shape[0]
413 |         out = self.input.repeat(batch, 1, 1, 1)
414 | 
415 |         return out
416 | 
417 | 
418 | class StyledConv(nn.Module):
419 |     def __init__(
420 |         self,
421 |         in_channel,
422 |         out_channel,
423 |         kernel_size,
424 |         style_dim,
425 |         upsample=False,
426 |         blur_kernel=[1, 3, 3, 1],
427 |         demodulate=True,
428 |     ):
429 |         super().__init__()
430 | 
431 |         self.conv = ModulatedConv2d(
432 |             in_channel,
433 |             out_channel,
434 |             kernel_size,
435 |             style_dim,
436 |             upsample=upsample,
437 |             blur_kernel=blur_kernel,
438 |             demodulate=demodulate,
439 |         )
440 | 
441 |         self.noise = NoiseInjection()
442 |         # self.bias = nn.Parameter(torch.zeros(1, out_channel, 1, 1))
443 |         # self.activate = ScaledLeakyReLU(0.2)
444 |         self.activate = FusedLeakyReLU(out_channel)
445 | 
446 |     def forward(self, input, style, noise=None):
447 |         out = self.conv(input, style)
448 |         out = self.noise(out, noise=noise)
449 |         # out = out + self.bias
450 |         out = self.activate(out)
451 | 
452 |         return out
453 | 
454 | class ScaledLeakyReLU(nn.Module):
455 |     def __init__(self, negative_slope=0.2):
456 |         super().__init__()
457 | 
458 |         self.negative_slope = negative_slope
459 | 
460 |     def forward(self, input):
461 |         out = F.leaky_relu(input, negative_slope=self.negative_slope)
462 | 
463 |         return out * math.sqrt(2)
464 | 
465 | class ToRGB(nn.Module):
466 |     def __init__(self, in_channel, out_channel=3, style_dim=512, upsample=True, blur_kernel=[1, 3, 3, 1]):
467 |         super().__init__()
468 | 
469 |         if upsample:
470 |             self.upsample = Upsample(blur_kernel)
471 | 
472 |         self.conv = ModulatedConv2d(in_channel, out_channel, 1, style_dim, demodulate=False)
473 |         self.bias = nn.Parameter(torch.zeros(1, out_channel, 1, 1))
474 | 
475 |     def forward(self, input, style, skip=None):
476 |         out = self.conv(input, style)
477 |         out = out + self.bias
478 | 
479 |         if skip is not None:
480 |             skip = self.upsample(skip)
481 | 
482 |             out = out + skip
483 | 
484 |         return out
485 | 
486 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | albumentations==0.5.2
2 | imageio==2.8.0
3 | imageio-ffmpeg==0.4.2
4 | imgaug==0.4.0
5 | lmdb==0.98
6 | scikit-image==0.17.2
7 | scipy==1.5.0
8 | 
9 | 


--------------------------------------------------------------------------------
/semanticGAN/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/semanticGAN/__init__.py


--------------------------------------------------------------------------------
/semanticGAN/losses.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under The MIT License (MIT)
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in all
 13 | copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | """
 22 | 
 23 | import torch
 24 | import torch.nn as nn
 25 | 
 26 | class LogCoshLoss(torch.nn.Module):
 27 |     def __init__(self):
 28 |         super().__init__()
 29 | 
 30 |     def forward(self, true, pred):
 31 |         loss = true - pred
 32 |         return torch.mean(torch.log(torch.cosh(loss + 1e-12)))
 33 | 
 34 | class SoftmaxLoss(torch.nn.Module):
 35 |     def __init__(self, tau=1.0):
 36 |         super().__init__()
 37 |         self.tau = tau
 38 |         self.ce_loss = torch.nn.CrossEntropyLoss()
 39 | 
 40 |     def forward(self, pred, true):
 41 |         logits = pred / self.tau
 42 |         l = self.ce_loss(logits, true)
 43 |         
 44 |         return l
 45 | 
 46 | class SoftBinaryCrossEntropyLoss(torch.nn.Module):
 47 |     def __init__(self, tau=1.0):
 48 |         super().__init__()
 49 |         self.tau = tau
 50 |         # for numerical stable reason
 51 |         self.bce_logit = torch.nn.BCEWithLogitsLoss()
 52 | 
 53 |     def forward(self, pred, true):
 54 |         logits = pred / self.tau
 55 |         l = self.bce_logit(logits, true)
 56 | 
 57 |         return l
 58 | 
 59 | def noise_regularize(noises):
 60 |     loss = 0
 61 |     batch_size = noises[0].shape[0]
 62 |     for noise in noises:
 63 |         size = noise.shape[2]
 64 | 
 65 |         while True:
 66 |             loss = (
 67 |                 loss
 68 |                 + (noise * torch.roll(noise, shifts=1, dims=3)).mean().pow(2)
 69 |                 + (noise * torch.roll(noise, shifts=1, dims=2)).mean().pow(2)
 70 |             )
 71 | 
 72 |             if size <= 8:
 73 |                 break
 74 | 
 75 |             noise = noise.reshape([batch_size, 1, size // 2, 2, size // 2, 2])
 76 |             noise = noise.mean([3, 5])
 77 |             size //= 2
 78 | 
 79 |     return loss
 80 | 
 81 | class FocalLoss(nn.Module):
 82 |     """
 83 |     copy from: https://github.com/Hsuxu/Loss_ToolBox-PyTorch/blob/master/FocalLoss/FocalLoss.py
 84 |     This is a implementation of Focal Loss with smooth label cross entropy supported which is proposed in
 85 |     'Focal Loss for Dense Object Detection. (https://arxiv.org/abs/1708.02002)'
 86 |         Focal_Loss= -1*alpha*(1-pt)*log(pt)
 87 |     :param num_class:
 88 |     :param alpha: (tensor) 3D or 4D the scalar factor for this criterion
 89 |     :param gamma: (float,double) gamma > 0 reduces the relative loss for well-classified examples (p>0.5) putting more
 90 |                     focus on hard misclassified example
 91 |     :param smooth: (float,double) smooth value when cross entropy
 92 |     :param balance_index: (int) balance class index, should be specific when alpha is float
 93 |     :param size_average: (bool, optional) By default, the losses are averaged over each loss element in the batch.
 94 |     """
 95 | 
 96 |     def __init__(self, alpha=None, gamma=2, tau=1.0, balance_index=0, smooth=1e-5, size_average=True):
 97 |         super(FocalLoss, self).__init__()
 98 |         self.alpha = alpha
 99 |         self.gamma = gamma
100 |         self.tau = tau
101 |         self.balance_index = balance_index
102 |         self.smooth = smooth
103 |         self.size_average = size_average
104 | 
105 |         if self.smooth is not None:
106 |             if self.smooth < 0 or self.smooth > 1.0:
107 |                 raise ValueError('smooth value should be in [0,1]')
108 |     
109 |     def _apply_nonlin(self, logit):
110 |         num_class = logit.shape[1]
111 |         if num_class == 1:
112 |             logit = torch.sigmoid(logit / self.tau)
113 |         else:
114 |             logit = torch.softmax(logit / self.tau, dim=1)
115 |         
116 |         return logit
117 | 
118 |     def forward(self, logit, target):
119 |         logit = self._apply_nonlin(logit)
120 |         num_class = logit.shape[1]
121 | 
122 |         if logit.dim() > 2:
123 |             # N,C,d1,d2 -> N,C,m (m=d1*d2*...)
124 |             logit = logit.view(logit.size(0), logit.size(1), -1)
125 |             logit = logit.permute(0, 2, 1).contiguous()
126 |             logit = logit.view(-1, logit.size(-1))
127 |         target = torch.squeeze(target, 1)
128 |         target = target.view(-1, 1)
129 | 
130 |         alpha = self.alpha
131 | 
132 |         if alpha is None:
133 |             alpha = torch.ones(num_class, 1)
134 |         elif isinstance(alpha, (list, np.ndarray)):
135 |             assert len(alpha) == num_class
136 |             alpha = torch.FloatTensor(alpha).view(num_class, 1)
137 |             alpha = alpha / alpha.sum()
138 |         elif isinstance(alpha, float):
139 |             alpha = torch.ones(num_class, 1)
140 |             alpha = alpha * (1 - self.alpha)
141 |             alpha[self.balance_index] = self.alpha
142 | 
143 |         else:
144 |             raise TypeError('Not support alpha type')
145 |         
146 |         if alpha.device != logit.device:
147 |             alpha = alpha.to(logit.device)
148 | 
149 |         idx = target.cpu().long()
150 | 
151 |         one_hot_key = torch.FloatTensor(target.size(0), num_class).zero_()
152 |         one_hot_key = one_hot_key.scatter_(1, idx, 1)
153 |         if one_hot_key.device != logit.device:
154 |             one_hot_key = one_hot_key.to(logit.device)
155 | 
156 |         if self.smooth:
157 |             one_hot_key = torch.clamp(
158 |                 one_hot_key, self.smooth/(num_class-1), 1.0 - self.smooth)
159 |         pt = (one_hot_key * logit).sum(1) + self.smooth
160 |         logpt = pt.log()
161 | 
162 |         gamma = self.gamma
163 | 
164 |         alpha = alpha[idx]
165 |         alpha = torch.squeeze(alpha)
166 |         loss = -1 * alpha * torch.pow((1 - pt), gamma) * logpt
167 | 
168 |         if self.size_average:
169 |             loss = loss.mean()
170 |         else:
171 |             loss = loss.sum()
172 |         return loss
173 | 
174 | class DiceLoss(nn.Module):
175 |     """Computes Dice Loss according to https://arxiv.org/abs/1606.04797.
176 |     For multi-class segmentation `weight` parameter can be used to assign different weights per class.
177 |     The input to the loss function is assumed to be a logit and will be normalized by the Sigmoid function.
178 |     """
179 | 
180 |     def __init__(self, weight=None, sigmoid_tau=0.3, include_bg=False):
181 |         super().__init__()
182 |         self.register_buffer('weight', weight)
183 |         self.normalization = nn.Sigmoid()
184 |         self.sigmoid_tau = sigmoid_tau
185 |         self.include_bg = include_bg
186 | 
187 |     def _flatten(self, tensor):
188 |         """Flattens a given tensor such that the channel axis is first.
189 |         The shapes are transformed as follows:
190 |         (N, C, D, H, W) -> (C, N * D * H * W)
191 |         """
192 |         # number of channels
193 |         C = tensor.size(1)
194 |         # new axis order
195 |         axis_order = (1, 0) + tuple(range(2, tensor.dim()))
196 |         # Transpose: (N, C, D, H, W) -> (C, N, D, H, W)
197 |         transposed = tensor.permute(axis_order)
198 |         # Flatten: (C, N, D, H, W) -> (C, N * D * H * W)
199 |         return transposed.contiguous().view(C, -1)
200 | 
201 |     def _compute_per_channel_dice(self, input, target, epsilon=1e-6, weight=None):
202 |         """
203 |         Computes DiceCoefficient as defined in https://arxiv.org/abs/1606.04797 given  a multi channel input and target.
204 |         Assumes the input is a normalized probability, e.g. a result of Sigmoid or Softmax function.
205 |         Args:
206 |                 input (torch.Tensor): NxCxSpatial input tensor
207 |                 target (torch.Tensor): NxCxSpatial target tensor
208 |                 epsilon (float): prevents division by zero
209 |                 weight (torch.Tensor): Cx1 tensor of weight per channel/class
210 |         """
211 | 
212 |         # input and target shapes must match
213 |         assert input.size() == target.size(), "'input' and 'target' must have the same shape"
214 | 
215 |         input = self._flatten(input)
216 |         target = self._flatten(target)
217 |         target = target.float()
218 | 
219 |         # compute per channel Dice Coefficient
220 |         intersect = (input * target).sum(-1)
221 |         if weight is not None:
222 |             intersect = weight * intersect
223 | 
224 |         # here we can use standard dice (input + target).sum(-1) or extension (see V-Net) (input^2 + target^2).sum(-1)
225 |         denominator = (input * input).sum(-1) + (target * target).sum(-1)
226 |         return 2 * (intersect / denominator.clamp(min=epsilon))
227 | 
228 |     def dice(self, input, target, weight):
229 |         return self._compute_per_channel_dice(input, target, weight=weight)
230 |     
231 |     def forward(self, input, target):
232 |         # get probabilities from logits
233 |         input = self.normalization(input / self.sigmoid_tau)
234 | 
235 |         # compute per channel Dice coefficient
236 |         per_channel_dice = self.dice(input, target, weight=self.weight)
237 | 
238 |         # average Dice score across all channels/classes
239 |         if self.include_bg:
240 |             return 1. - torch.mean(per_channel_dice)
241 |         else:
242 |             return 1. - torch.mean(per_channel_dice[1:])
243 | 


--------------------------------------------------------------------------------
/semanticGAN/prepare_inception.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under The MIT License (MIT)
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in all
 13 | copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | """
 22 | 
 23 | import numpy as np
 24 | import torch
 25 | import torch.nn.functional as F
 26 | from torch.utils.data import DataLoader, ConcatDataset
 27 | 
 28 | import argparse
 29 | from utils import inception_utils
 30 | from dataloader import (CelebAMaskDataset)
 31 | import pickle
 32 | 
 33 | @torch.no_grad()
 34 | def extract_features(args, loader, inception, device):
 35 |     pbar = loader
 36 | 
 37 |     pools, logits = [], []
 38 | 
 39 |     for data in pbar:
 40 |         img = data['image']
 41 |             
 42 |         # check img dim
 43 |         if img.shape[1] != 3:
 44 |             img = img.expand(-1,3,-1,-1)
 45 | 
 46 |         img = img.to(device)
 47 |         pool_val, logits_val = inception(img)
 48 |         
 49 |         pools.append(pool_val.cpu().numpy())
 50 |         logits.append(F.softmax(logits_val, dim=1).cpu().numpy())
 51 | 
 52 |     pools = np.concatenate(pools, axis=0)
 53 |     logits = np.concatenate(logits, axis=0)
 54 | 
 55 |     return pools, logits
 56 | 
 57 | 
 58 | def get_dataset(args):
 59 |     if args.dataset_name == 'celeba-mask':
 60 |         unlabel_dataset = CelebAMaskDataset(args, args.path, is_label=False)
 61 |         train_val_dataset = CelebAMaskDataset(args, args.path, is_label=True, phase='train-val')
 62 |         dataset = ConcatDataset([unlabel_dataset, train_val_dataset])
 63 |     else:
 64 |         raise Exception('No such a dataloader!')
 65 |     return dataset
 66 | 
 67 | if __name__ == '__main__':
 68 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 69 | 
 70 |     parser = argparse.ArgumentParser(
 71 |         description='Calculate Inception v3 features for datasets'
 72 |     )
 73 |     parser.add_argument('--size', type=int, default=256)
 74 |     parser.add_argument('--batch', default=64, type=int, help='batch size')
 75 |     parser.add_argument('--n_sample', type=int, default=50000)
 76 |     parser.add_argument('--output', type=str, required=True)
 77 |     parser.add_argument('--image_mode', type=str, default='RGB')
 78 |     parser.add_argument('--dataset_name', type=str, help='[celeba-mask]')
 79 |     parser.add_argument('path', metavar='PATH', help='path to datset dir')
 80 | 
 81 |     args = parser.parse_args()
 82 | 
 83 |     inception = inception_utils.load_inception_net()
 84 | 
 85 |     dset = get_dataset(args)
 86 |     loader = DataLoader(dset, batch_size=args.batch, num_workers=4)
 87 | 
 88 |     pools, logits = extract_features(args, loader, inception, device)
 89 | 
 90 |     # pools = pools[: args.n_sample]
 91 |     # logits = logits[: args.n_sample]
 92 | 
 93 |     print(f'extracted {pools.shape[0]} features')
 94 | 
 95 |     print('Calculating inception metrics...')
 96 |     IS_mean, IS_std = inception_utils.calculate_inception_score(logits)
 97 |     print('Training data from dataloader has IS of %5.5f +/- %5.5f' % (IS_mean, IS_std))
 98 |     print('Calculating means and covariances...')
 99 | 
100 |     mean = np.mean(pools, axis=0)
101 |     cov = np.cov(pools, rowvar=False)
102 | 
103 |     with open(args.output, 'wb') as f:
104 |         pickle.dump({'mean': mean, 'cov': cov, 'size': args.size, 'path': args.path}, f)
105 | 


--------------------------------------------------------------------------------
/semanticGAN/preprocessing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nv-tlabs/semanticGAN_code/342889ebbe817695c0e64133100ede8f9877f3de/semanticGAN/preprocessing/__init__.py


--------------------------------------------------------------------------------
/semanticGAN/preprocessing/face_postprocessing.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under The MIT License (MIT)
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in all
 13 | copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | """
 22 | 
 23 | import os
 24 | import numpy as np
 25 | from PIL import Image
 26 | import json
 27 | import argparse
 28 | 
 29 | def find_coeffs(pa, pb):
 30 |     matrix = []
 31 |     for p1, p2 in zip(pa, pb):
 32 |         matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
 33 |         matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])
 34 | 
 35 |     A = np.matrix(matrix, dtype=np.float)
 36 |     B = np.array(pb).reshape(8)
 37 | 
 38 |     res = np.dot(np.linalg.inv(A.T * A) * A.T, B)
 39 |     return np.array(res).reshape(8)
 40 | 
 41 | def main(args):
 42 |     img_list = sorted(os.listdir(args.img_dir))
 43 |     meta_list = sorted(os.listdir(args.meta_dir))
 44 |     raw_list = sorted(os.listdir(args.raw_dir))
 45 | 
 46 |     for img_p, meta_p, raw_p in zip(img_list, meta_list, raw_list):
 47 |         img_n = img_p.split('.')[0]
 48 | 
 49 |         img_p = os.path.join(args.img_dir, img_p)
 50 |         meta_p = os.path.join(args.meta_dir, meta_p)
 51 |         raw_p = os.path.join(args.raw_dir, raw_p)
 52 | 
 53 |         with open(meta_p, 'r') as f:
 54 |             meta_json = json.load(f)
 55 |         
 56 |         kps = meta_json['quad']
 57 |         crop_box = meta_json['bbox']
 58 |         size = meta_json['size']
 59 |         pad = meta_json['pad']
 60 |         shrink = meta_json['shrink']
 61 | 
 62 |         upper_left = kps[0:2]
 63 |         lower_left = kps[2:4]
 64 |         lower_right = kps[4:6]
 65 |         upper_right= kps[6:]
 66 |         all_kps = [upper_left, lower_left, lower_right, upper_right]
 67 |         pa =  all_kps 
 68 |         pb =  [[0,0 ], [0, args.size], [args.size, args.size], [args.size,0]]
 69 | 
 70 |         coeffs = find_coeffs(pa, pb)
 71 | 
 72 |         left, top, right, bottom = crop_box
 73 | 
 74 |         width = size[0]
 75 |         height = size[1]
 76 | 
 77 |         img_pil = Image.open(img_p).convert('RGB')
 78 |         
 79 |         img_pil = img_pil.transform((width, height), Image.PERSPECTIVE, coeffs, Image.BILINEAR)
 80 | 
 81 |         #unpad
 82 |         img_np = np.array(img_pil)
 83 |         if (pad[0] == 0 and
 84 |             pad[1] == 0 and 
 85 |             pad[2] == 0 and 
 86 |             pad[3] == 0):
 87 |             pass
 88 |         else:
 89 |             if pad[3] != 0 and pad[2] != 0:
 90 |                 img_np = img_np[pad[1]:-pad[3], pad[0]:-pad[2]]
 91 |             elif pad[3] == 0 and pad[2] != 0:
 92 |                 img_np = img_np[pad[1]:, pad[0]:-pad[2]]
 93 |             elif pad[3] != 0 and pad[2] == 0:
 94 |                 img_np = img_np[pad[1]:-pad[3], pad[0]:]
 95 |             else:
 96 |                 img_np = img_np[pad[1]:, pad[0]:]
 97 | 
 98 |         crop_width = crop_box[2] - crop_box[0]
 99 |         crop_height = crop_box[3] - crop_box[1]
100 |         #unshrink
101 |         if shrink > 1:
102 |             img_pil = Image.fromarray(img_np)
103 |             rsize = (int(np.rint(float(img_pil.size[0]) * shrink)), int(np.rint(float(img_pil.size[1]) * shrink)))
104 |             img_pil = img_pil.resize(rsize, resample=Image.LANCZOS)
105 |             crop_width *= shrink
106 |             crop_height *= shrink
107 |             crop_box[3] *= shrink
108 |             crop_box[2] *= shrink
109 |             img_np = np.array(img_pil)
110 | 
111 |         assert crop_width == img_np.shape[1]
112 |         assert crop_height == img_np.shape[0]
113 | 
114 |         img_ori_pil = Image.open(raw_p).convert('RGB')
115 |         img_ori_np = np.array(img_ori_pil)
116 |         
117 |         img_ori_np[crop_box[1]:crop_box[3], crop_box[0]:crop_box[2]] = img_np
118 | 
119 |         img_ori_pil = Image.fromarray(img_ori_np)
120 | 
121 |         img_ori_pil.save(os.path.join(depth_out, img_n + '.png'))
122 | 
123 | if __name__ == '__main__':
124 |     parser = argparse.ArgumentParser()
125 |     parser.add_argument('--raw_dir', type=str, required=True)
126 |     parser.add_argument('--img_dir', type=str, required=True)
127 |     parser.add_argument('--meta_dir', type=str, required=True)
128 |     parser.add_argument('--outdir', type=str, required=True)
129 |     parser.add_argument('--size', type=int, default=256)
130 |     
131 |     args = parser.parse_args() 
132 | 
133 |     os.makedirs(args.outdir, exist_ok=True)
134 | 
135 |     main(args)   


--------------------------------------------------------------------------------
/semanticGAN/preprocessing/face_preprocessing.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under The MIT License (MIT)
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in all
 13 | copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | """
 22 | 
 23 | import os
 24 | import dlib
 25 | import numpy as np
 26 | import json
 27 | import scipy.ndimage
 28 | import PIL.Image
 29 | import argparse
 30 | 
 31 | 
 32 | def main(args):
 33 |     detector = dlib.get_frontal_face_detector()
 34 |     predictor = dlib.shape_predictor(args.detector)
 35 | 
 36 |     target_size = args.size
 37 |     supersampling = 4
 38 |     face_shrink = 2
 39 |     enable_padding = True
 40 | 
 41 | 
 42 |     img_out_dir = os.path.join(args.out_dir, 'image')
 43 |     meta_out_dir = os.path.join(args.out_dir, 'meta')
 44 | 
 45 |     img_list = sorted(os.listdir(args.img_dir))
 46 | 
 47 |     os.makedirs(img_out_dir, exist_ok=True)
 48 |     os.makedirs(meta_out_dir, exist_ok=True)
 49 | 
 50 |     def rot90(v) -> np.ndarray:
 51 |         return np.array([-v[1], v[0]])
 52 | 
 53 |     for img_n in img_list:
 54 |         img_p = os.path.join(args.img_dir, img_n)
 55 |         detector_img = dlib.load_rgb_image(img_p)
 56 | 
 57 |         # Ask the detector to find the bounding boxes of each face. The 1 in the
 58 |         # second argument indicates that we should upsample the image 1 time. This
 59 |         # will make everything bigger and allow us to detect more faces.
 60 |         dets = detector(detector_img, 1)
 61 |         print("Number of faces detected: {}".format(len(dets)))
 62 |         if len(dets) > 1:
 63 |             continue
 64 | 
 65 |         for k, d in enumerate(dets):
 66 | 
 67 |             # Get the landmarks/parts for the face in box d.
 68 |             shape = predictor(detector_img, d)
 69 |             all_parts = shape.parts()
 70 |             lm = np.array([ [item.x,item.y ] for item in all_parts])
 71 |             landmarks = np.float32(lm) + 0.5
 72 |             assert landmarks.shape == (68, 2)
 73 | 
 74 |             lm_eye_left      = landmarks[36 : 42]  # left-clockwise
 75 |             lm_eye_right     = landmarks[42 : 48]  # left-clockwise
 76 |             lm_mouth_outer   = landmarks[48 : 60]  # left-clockwise
 77 |             
 78 |             # Calculate auxiliary vectors.
 79 |             eye_left     = np.mean(lm_eye_left, axis=0)
 80 |             eye_right    = np.mean(lm_eye_right, axis=0)
 81 |             eye_avg      = (eye_left + eye_right) * 0.5
 82 |             eye_to_eye   = eye_right - eye_left
 83 |             mouth_left   = lm_mouth_outer[0]
 84 |             mouth_right  = lm_mouth_outer[6]
 85 |             mouth_avg    = (mouth_left + mouth_right) * 0.5
 86 |             eye_to_mouth = mouth_avg - eye_avg
 87 | 
 88 |             # Choose oriented crop rectangle.
 89 |             x = eye_to_eye - rot90(eye_to_mouth)
 90 |             x /= np.hypot(*x)
 91 |             x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
 92 |             y = rot90(x)
 93 |             c = eye_avg + eye_to_mouth * 0.1
 94 | 
 95 |             # Calculate auxiliary data.
 96 |             qsize = np.hypot(*x) * 2
 97 |             quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
 98 |             lo = np.min(quad, axis=0)
 99 |             hi = np.max(quad, axis=0)
100 |             lm_rel = np.dot(landmarks - c, np.transpose([x, y])) / qsize**2 * 2 + 0.5
101 |             rp = np.dot(np.random.RandomState(123).uniform(-1, 1, size=(1024, 2)), [x, y]) + c
102 | 
103 |             # Load.
104 |             img_ori = PIL.Image.open(img_p).convert('RGB')
105 |             img = PIL.Image.open(img_p).convert('RGB')
106 | 
107 |             # Shrink.
108 |             shrink = int(np.floor(qsize / target_size * 0.5))
109 |             if shrink > 1:
110 |                 rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
111 |                 img = img.resize(rsize, PIL.Image.ANTIALIAS)
112 |                 quad /= shrink
113 |                 qsize /= shrink
114 |             
115 |             # Crop.
116 |             border = max(int(np.rint(qsize * 0.1)), 3)
117 |             crop = (int(np.floor(min(quad[:,0]))), int(np.floor(min(quad[:,1]))), int(np.ceil(max(quad[:,0]))), int(np.ceil(max(quad[:,1]))))
118 |             crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1]))
119 |             if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
120 |                 img = img.crop(crop)
121 |                 quad -= crop[0:2]
122 | 
123 |             # Pad.
124 |             pad = (int(np.floor(min(quad[:,0]))), int(np.floor(min(quad[:,1]))), int(np.ceil(max(quad[:,0]))), int(np.ceil(max(quad[:,1]))))
125 |             pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), max(pad[3] - img.size[1] + border, 0))
126 |             if enable_padding and max(pad) > border - 4:
127 |                 pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
128 |                 img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
129 |                 h, w, _ = img.shape
130 |                 y, x, _ = np.ogrid[:h, :w, :1]
131 |                 mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w-1-x) / pad[2]), 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h-1-y) / pad[3]))
132 |                 blur = qsize * 0.02
133 |                 img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
134 |                 img += (np.median(img, axis=(0,1)) - img) * np.clip(mask, 0.0, 1.0)
135 |                 img = PIL.Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB')
136 |                 quad += pad[:2]
137 |             else:
138 |                 pad = (0,0,0,0)
139 | 
140 |             meta = {
141 |                 'bbox': list(crop), 
142 |                 'quad': list((quad.astype(float) + 0.5).flatten()),
143 |                 'size': list(img.size),
144 |                 'pad': [int(p) for p in list(pad)],
145 |                 'shrink': shrink,
146 |                 }
147 | 
148 |             # Transform.
149 |             super_size = target_size * supersampling
150 |             img = img.transform((super_size, super_size), PIL.Image.QUAD, (quad + 0.5).flatten(), PIL.Image.BILINEAR)
151 |             if target_size < super_size:
152 |                 img = img.resize((target_size, target_size), PIL.Image.ANTIALIAS)
153 |         
154 |             img_name = os.path.basename(img_p).split('.')[0]
155 | 
156 |             # save
157 |             with open(os.path.join(meta_out_dir, img_name + '.json'), 'w') as f:
158 |                 json.dump(meta, f)
159 |             
160 |             img.save(os.path.join(img_out_dir, img_name + '.png'))
161 | 
162 | if __name__ == "__main__":
163 |     parser = argparse.ArgumentParser()
164 |     parser.add_argument('--img_dir', type=str, required=True)
165 |     parser.add_argument('--outdir', type=str, required=True)
166 | 
167 |     parser.add_argument('--detector', type=str, default='./shape_predictor_68_face_landmarks.dat')
168 |     parser.add_argument('--size', type=int, default=256)
169 |     
170 |     args = parser.parse_args()    


--------------------------------------------------------------------------------
/semanticGAN/ranger.py:
--------------------------------------------------------------------------------
  1 | # Ranger deep learning optimizer - RAdam + Lookahead + Gradient Centralization, combined into one optimizer.
  2 | 
  3 | # https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer
  4 | # and/or
  5 | # https://github.com/lessw2020/Best-Deep-Learning-Optimizers
  6 | 
  7 | # Ranger has been used to capture 12 records on the FastAI leaderboard.
  8 | 
  9 | # This version = 2020.9.4
 10 | 
 11 | 
 12 | # Credits:
 13 | # Gradient Centralization --> https://arxiv.org/abs/2004.01461v2 (a new optimization technique for DNNs), github:  https://github.com/Yonghongwei/Gradient-Centralization
 14 | # RAdam -->  https://github.com/LiyuanLucasLiu/RAdam
 15 | # Lookahead --> rewritten by lessw2020, but big thanks to Github @LonePatient and @RWightman for ideas from their code.
 16 | # Lookahead paper --> MZhang,G Hinton  https://arxiv.org/abs/1907.08610
 17 | 
 18 | # summary of changes:
 19 | # 9/4/20 - updated addcmul_ signature to avoid warning.  Integrates latest changes from GC developer (he did the work for this), and verified on performance on private dataloader.
 20 | # 4/11/20 - add gradient centralization option.  Set new testing benchmark for accuracy with it, toggle with use_gc flag at init.
 21 | # full code integration with all updates at param level instead of group, moves slow weights into state dict (from generic weights),
 22 | # supports group learning rates (thanks @SHolderbach), fixes sporadic load from saved model issues.
 23 | # changes 8/31/19 - fix references to *self*.N_sma_threshold;
 24 | # changed eps to 1e-5 as better default than 1e-8.
 25 | 
 26 | # Apache License 2.0 LICENSE code copy from https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer 
 27 | # please refer to https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer/blob/master/LICENSE
 28 | 
 29 | 
 30 | import math
 31 | import torch
 32 | from torch.optim.optimizer import Optimizer, required
 33 | 
 34 | 
 35 | def centralized_gradient(x, use_gc=True, gc_conv_only=False):
 36 |     '''credit - https://github.com/Yonghongwei/Gradient-Centralization '''
 37 |     if use_gc:
 38 |         if gc_conv_only:
 39 |             if len(list(x.size())) > 3:
 40 |                 x.add_(-x.mean(dim=tuple(range(1, len(list(x.size())))), keepdim=True))
 41 |         else:
 42 |             if len(list(x.size())) > 1:
 43 |                 x.add_(-x.mean(dim=tuple(range(1, len(list(x.size())))), keepdim=True))
 44 |     return x
 45 | 
 46 | 
 47 | class Ranger(Optimizer):
 48 | 
 49 |     def __init__(self, params, lr=1e-3,                       # lr
 50 |                  alpha=0.5, k=6, N_sma_threshhold=5,           # Ranger options
 51 |                  betas=(.95, 0.999), eps=1e-5, weight_decay=0,  # Adam options
 52 |                  # Gradient centralization on or off, applied to conv layers only or conv + fc layers
 53 |                  use_gc=True, gc_conv_only=False, gc_loc=True
 54 |                  ):
 55 | 
 56 |         # parameter checks
 57 |         if not 0.0 <= alpha <= 1.0:
 58 |             raise ValueError(f'Invalid slow update rate: {alpha}')
 59 |         if not 1 <= k:
 60 |             raise ValueError(f'Invalid lookahead steps: {k}')
 61 |         if not lr > 0:
 62 |             raise ValueError(f'Invalid Learning Rate: {lr}')
 63 |         if not eps > 0:
 64 |             raise ValueError(f'Invalid eps: {eps}')
 65 | 
 66 |         # parameter comments:
 67 |         # beta1 (momentum) of .95 seems to work better than .90...
 68 |         # N_sma_threshold of 5 seems better in testing than 4.
 69 |         # In both cases, worth testing on your dataloader (.90 vs .95, 4 vs 5) to make sure which works best for you.
 70 | 
 71 |         # prep defaults and init torch.optim base
 72 |         defaults = dict(lr=lr, alpha=alpha, k=k, step_counter=0, betas=betas,
 73 |                         N_sma_threshhold=N_sma_threshhold, eps=eps, weight_decay=weight_decay)
 74 |         super().__init__(params, defaults)
 75 | 
 76 |         # adjustable threshold
 77 |         self.N_sma_threshhold = N_sma_threshhold
 78 | 
 79 |         # look ahead params
 80 | 
 81 |         self.alpha = alpha
 82 |         self.k = k
 83 | 
 84 |         # radam buffer for state
 85 |         self.radam_buffer = [[None, None, None] for ind in range(10)]
 86 | 
 87 |         # gc on or off
 88 |         self.gc_loc = gc_loc
 89 |         self.use_gc = use_gc
 90 |         self.gc_conv_only = gc_conv_only
 91 |         # level of gradient centralization
 92 |         #self.gc_gradient_threshold = 3 if gc_conv_only else 1
 93 | 
 94 |         print(
 95 |             f"Ranger optimizer loaded. \nGradient Centralization usage = {self.use_gc}")
 96 |         if (self.use_gc and self.gc_conv_only == False):
 97 |             print(f"GC applied to both conv and fc layers")
 98 |         elif (self.use_gc and self.gc_conv_only == True):
 99 |             print(f"GC applied to conv layers only")
100 | 
101 |     def __setstate__(self, state):
102 |         print("set state called")
103 |         super(Ranger, self).__setstate__(state)
104 | 
105 |     def step(self, closure=None):
106 |         loss = None
107 |         # note - below is commented out b/c I have other work that passes back the loss as a float, and thus not a callable closure.
108 |         # Uncomment if you need to use the actual closure...
109 | 
110 |         # if closure is not None:
111 |         #loss = closure()
112 | 
113 |         # Evaluate averages and grad, update param tensors
114 |         for group in self.param_groups:
115 | 
116 |             for p in group['params']:
117 |                 if p.grad is None:
118 |                     continue
119 |                 grad = p.grad.data.float()
120 | 
121 |                 if grad.is_sparse:
122 |                     raise RuntimeError(
123 |                         'Ranger optimizer does not support sparse gradients')
124 | 
125 |                 p_data_fp32 = p.data.float()
126 | 
127 |                 state = self.state[p]  # get state dict for this param
128 | 
129 |                 if len(state) == 0:  # if first time to run...init dictionary with our desired entries
130 |                     # if self.first_run_check==0:
131 |                     # self.first_run_check=1
132 |                     #print("Initializing slow buffer...should not see this at load from saved model!")
133 |                     state['step'] = 0
134 |                     state['exp_avg'] = torch.zeros_like(p_data_fp32)
135 |                     state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
136 | 
137 |                     # look ahead weight storage now in state dict
138 |                     state['slow_buffer'] = torch.empty_like(p.data)
139 |                     state['slow_buffer'].copy_(p.data)
140 | 
141 |                 else:
142 |                     state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
143 |                     state['exp_avg_sq'] = state['exp_avg_sq'].type_as(
144 |                         p_data_fp32)
145 | 
146 |                 # begin computations
147 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
148 |                 beta1, beta2 = group['betas']
149 | 
150 |                 # GC operation for Conv layers and FC layers
151 |                 # if grad.dim() > self.gc_gradient_threshold:
152 |                 #    grad.add_(-grad.mean(dim=tuple(range(1, grad.dim())), keepdim=True))
153 |                 if self.gc_loc:
154 |                     grad = centralized_gradient(grad, use_gc=self.use_gc, gc_conv_only=self.gc_conv_only)
155 | 
156 |                 state['step'] += 1
157 | 
158 |                 # compute variance mov avg
159 |                 exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
160 | 
161 |                 # compute mean moving avg
162 |                 exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
163 | 
164 |                 buffered = self.radam_buffer[int(state['step'] % 10)]
165 | 
166 |                 if state['step'] == buffered[0]:
167 |                     N_sma, step_size = buffered[1], buffered[2]
168 |                 else:
169 |                     buffered[0] = state['step']
170 |                     beta2_t = beta2 ** state['step']
171 |                     N_sma_max = 2 / (1 - beta2) - 1
172 |                     N_sma = N_sma_max - 2 * \
173 |                         state['step'] * beta2_t / (1 - beta2_t)
174 |                     buffered[1] = N_sma
175 |                     if N_sma > self.N_sma_threshhold:
176 |                         step_size = math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (
177 |                             N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
178 |                     else:
179 |                         step_size = 1.0 / (1 - beta1 ** state['step'])
180 |                     buffered[2] = step_size
181 | 
182 |                 # if group['weight_decay'] != 0:
183 |                 #    p_data_fp32.add_(-group['weight_decay']
184 |                 #                     * group['lr'], p_data_fp32)
185 | 
186 |                 # apply lr
187 |                 if N_sma > self.N_sma_threshhold:
188 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
189 |                     G_grad = exp_avg / denom
190 |                 else:
191 |                     G_grad = exp_avg
192 | 
193 |                 if group['weight_decay'] != 0:
194 |                     G_grad.add_(p_data_fp32, alpha=group['weight_decay'])
195 |                 # GC operation
196 |                 if self.gc_loc == False:
197 |                     G_grad = centralized_gradient(G_grad, use_gc=self.use_gc, gc_conv_only=self.gc_conv_only)
198 | 
199 |                 p_data_fp32.add_(G_grad, alpha=-step_size * group['lr'])
200 |                 p.data.copy_(p_data_fp32)
201 | 
202 |                 # integrated look ahead...
203 |                 # we do it at the param level instead of group level
204 |                 if state['step'] % group['k'] == 0:
205 |                     # get access to slow param tensor
206 |                     slow_p = state['slow_buffer']
207 |                     # (fast weights - slow weights) * alpha
208 |                     slow_p.add_(p.data - slow_p, alpha=self.alpha)
209 |                     # copy interpolated weights to RAdam param tensor
210 |                     p.data.copy_(slow_p)
211 | 
212 |         return loss


--------------------------------------------------------------------------------
/semanticGAN/samplers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under The MIT License (MIT)
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in all
 13 | copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | """
 22 | 
 23 | 
 24 | from typing import Iterator, List, Optional, Union
 25 | from operator import itemgetter
 26 | from torch.utils.data import DistributedSampler, Dataset
 27 | from torch.utils.data.sampler import BatchSampler, Sampler
 28 | 
 29 | class DatasetFromSampler(Dataset):
 30 |     """Dataset of indexes from `Sampler`."""
 31 | 
 32 |     def __init__(self, sampler: Sampler):
 33 |         """
 34 |         Args:
 35 |             sampler (Sampler): @TODO: Docs. Contribution is welcome
 36 |         """
 37 |         self.sampler = sampler
 38 |         self.sampler_list = None
 39 | 
 40 |     def __getitem__(self, index: int):
 41 |         """Gets element of the dataloader.
 42 |         Args:
 43 |             index (int): index of the element in the dataloader
 44 |         Returns:
 45 |             Single element by index
 46 |         """
 47 |         if self.sampler_list is None:
 48 |             self.sampler_list = list(self.sampler)
 49 |         return self.sampler_list[index]
 50 | 
 51 |     def __len__(self) -> int:
 52 |         """
 53 |         Returns:
 54 |             int: length of the dataloader
 55 |         """
 56 |         return len(self.sampler)
 57 | 
 58 | class DistributedSamplerWrapper(DistributedSampler):
 59 |     """
 60 |     Wrapper over `Sampler` for distributed training.
 61 |     Allows you to use any sampler in distributed mode.
 62 |     It is especially useful in conjunction with
 63 |     `torch.nn.parallel.DistributedDataParallel`. In such case, each
 64 |     process can pass a DistributedSamplerWrapper instance as a DataLoader
 65 |     sampler, and load a subset of subsampled data of the original dataloader
 66 |     that is exclusive to it.
 67 |     .. note::
 68 |         Sampler is assumed to be of constant size.
 69 |     """
 70 | 
 71 |     def __init__(
 72 |         self,
 73 |         sampler,
 74 |         num_replicas: Optional[int] = None,
 75 |         rank: Optional[int] = None,
 76 |         shuffle: bool = True,
 77 |     ):
 78 |         """
 79 |         Args:
 80 |             sampler: Sampler used for subsampling
 81 |             num_replicas (int, optional): Number of processes participating in
 82 |               distributed training
 83 |             rank (int, optional): Rank of the current process
 84 |               within ``num_replicas``
 85 |             shuffle (bool, optional): If true (default),
 86 |               sampler will shuffle the indices
 87 |         """
 88 |         super(DistributedSamplerWrapper, self).__init__(
 89 |             DatasetFromSampler(sampler),
 90 |             num_replicas=num_replicas,
 91 |             rank=rank,
 92 |             shuffle=shuffle,
 93 |         )
 94 |         self.sampler = sampler
 95 | 
 96 |     def __iter__(self):
 97 |         """@TODO: Docs. Contribution is welcome."""
 98 |         self.dataset = DatasetFromSampler(self.sampler)
 99 |         indexes_of_indexes = super().__iter__()
100 |         subsampler_indexes = self.dataset
101 |         return iter(itemgetter(*indexes_of_indexes)(subsampler_indexes))


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
 3 | Licensed under The MIT License (MIT)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | """


--------------------------------------------------------------------------------
/utils/data_util.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under The MIT License (MIT)
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in all
 13 | copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | """
 22 | 
 23 | import numpy as np
 24 | 
 25 | face_class = ['background', 'head', 'head***cheek', 'head***chin', 'head***ear', 'head***ear***helix',
 26 |               'head***ear***lobule', 'head***eye***botton lid', 'head***eye***eyelashes', 'head***eye***iris',
 27 |               'head***eye***pupil', 'head***eye***sclera', 'head***eye***tear duct', 'head***eye***top lid',
 28 |               'head***eyebrow', 'head***forehead', 'head***frown', 'head***hair', 'head***hair***sideburns',
 29 |               'head***jaw', 'head***moustache', 'head***mouth***inferior lip', 'head***mouth***oral comisure',
 30 |               'head***mouth***superior lip', 'head***mouth***teeth', 'head***neck', 'head***nose',
 31 |               'head***nose***ala of nose', 'head***nose***bridge', 'head***nose***nose tip', 'head***nose***nostril',
 32 |               'head***philtrum', 'head***temple', 'head***wrinkles']
 33 | 
 34 | car_12_class = ['background', 'car_body', 'head light', 'tail light', 'licence plate',
 35 |                 'wind shield', 'wheel', 'door', 'handle' , 'wheelhub', 'window', 'mirror']
 36 | car_20_class = ['background', 'back_bumper', 'bumper', 'car_body', 'car_lights', 'door', 'fender','grilles','handles',
 37 |                 'hoods', 'licensePlate', 'mirror','roof', 'running_boards', 'tailLight','tire', 'trunk_lids','wheelhub', 'window', 'windshield']
 38 | 
 39 | 
 40 | car_20_palette =[ 255,  255,  255, # 0 background
 41 |   238,  229,  102,# 1 back_bumper
 42 |   0, 0, 0,# 2 bumper
 43 |   124,  99 , 34, # 3 car
 44 |   193 , 127,  15,# 4 car_lights
 45 |   248  ,213 , 42, # 5 door
 46 |   220  ,147 , 77, # 6 fender
 47 |   99 , 83  , 3, # 7 grilles
 48 |   116 , 116 , 138,  # 8 handles
 49 |   200  ,226 , 37, # 9 hoods
 50 |   225 , 184 , 161, # 10 licensePlate
 51 |   142 , 172  ,248, # 11 mirror
 52 |   153 , 112 , 146, # 12 roof
 53 |   38  ,112 , 254, # 13 running_boards
 54 |   229 , 30  ,141, # 14 tailLight
 55 |   52 , 83  ,84, # 15 tire
 56 |   194 , 87 , 125, # 16 trunk_lids
 57 |   225,  96  ,18,  # 17 wheelhub
 58 |   31 , 102 , 211, # 18 window
 59 |   104 , 131 , 101# 19 windshield
 60 |          ]
 61 | 
 62 | 
 63 | 
 64 | face_palette = [  1.0000,  1.0000 , 1.0000,
 65 |               0.4420,  0.5100 , 0.4234,
 66 |               0.8562,  0.9537 , 0.3188,
 67 |               0.2405,  0.4699 , 0.9918,
 68 |               0.8434,  0.9329  ,0.7544,
 69 |               0.3748,  0.7917 , 0.3256,
 70 |               0.0190,  0.4943 , 0.3782,
 71 |               0.7461 , 0.0137 , 0.5684,
 72 |               0.1644,  0.2402 , 0.7324,
 73 |               0.0200 , 0.4379 , 0.4100,
 74 |               0.5853 , 0.8880 , 0.6137,
 75 |               0.7991 , 0.9132 , 0.9720,
 76 |               0.6816 , 0.6237  ,0.8562,
 77 |               0.9981 , 0.4692 , 0.3849,
 78 |               0.5351 , 0.8242 , 0.2731,
 79 |               0.1747 , 0.3626 , 0.8345,
 80 |               0.5323 , 0.6668 , 0.4922,
 81 |               0.2122 , 0.3483 , 0.4707,
 82 |               0.6844,  0.1238 , 0.1452,
 83 |               0.3882 , 0.4664 , 0.1003,
 84 |               0.2296,  0.0401 , 0.3030,
 85 |               0.5751 , 0.5467 , 0.9835,
 86 |               0.1308 , 0.9628,  0.0777,
 87 |               0.2849  ,0.1846 , 0.2625,
 88 |               0.9764 , 0.9420 , 0.6628,
 89 |               0.3893 , 0.4456 , 0.6433,
 90 |               0.8705 , 0.3957 , 0.0963,
 91 |               0.6117 , 0.9702 , 0.0247,
 92 |               0.3668 , 0.6694 , 0.3117,
 93 |               0.6451 , 0.7302,  0.9542,
 94 |               0.6171 , 0.1097,  0.9053,
 95 |               0.3377 , 0.4950,  0.7284,
 96 |               0.1655,  0.9254,  0.6557,
 97 |               0.9450  ,0.6721,  0.6162]
 98 | 
 99 | face_palette = [int(item * 255) for item in face_palette]
100 | 
101 | 
102 | 
103 | 
104 | 
105 | car_12_palette =[ 255,  255,  255, # 0 background
106 |          124,  99 , 34, # 3 car
107 |          193 , 127,  15,# 4 car_lights
108 |          229 , 30  ,141, # 14 tailLight
109 |         225 , 184 , 161, # 10 licensePlate
110 |         104 , 131 , 101,# 19 windshield
111 |         52 , 83  ,84, # 15 tire
112 |         248  ,213 , 42, # 5 door
113 |          116 , 116 , 138,  # 8 handles
114 |            225,  96  ,18,  # 17 wheelhub
115 |          31 , 102 , 211, # 18 window
116 |          142 , 172  ,248, # 11 mirror
117 |          ]
118 | 
119 | 
120 | 
121 | car_32_palette =[ 255,  255,  255,
122 |   238,  229,  102,
123 |   0, 0, 0,
124 |   124,  99 , 34,
125 |   193 , 127,  15,
126 |   106,  177,  21,
127 |   248  ,213 , 42,
128 |   252 , 155,  83,
129 |   220  ,147 , 77,
130 |   99 , 83  , 3,
131 |   116 , 116 , 138,
132 |   63  ,182 , 24,
133 |   200  ,226 , 37,
134 |   225 , 184 , 161,
135 |   233 ,  5  ,219,
136 |   142 , 172  ,248,
137 |   153 , 112 , 146,
138 |   38  ,112 , 254,
139 |   229 , 30  ,141,
140 |   115  ,208 , 131,
141 |   52 , 83  ,84,
142 |   229 , 63 , 110,
143 |   194 , 87 , 125,
144 |   225,  96  ,18,
145 |   73  ,139,  226,
146 |   172 , 143 , 16,
147 |   169 , 101 , 111,
148 |   31 , 102 , 211,
149 |   104 , 131 , 101,
150 |   70  ,168  ,156,
151 |   183 , 242 , 209,
152 |   72  ,184 , 226]
153 | 
154 | bedroom_palette =[ 255,  255,  255,
155 |   238,  229,  102,
156 |   255, 72, 69,
157 |   124,  99 , 34,
158 |   193 , 127,  15,
159 |   106,  177,  21,
160 |   248  ,213 , 42,
161 |   252 , 155,  83,
162 |   220  ,147 , 77,
163 |   99 , 83  , 3,
164 |   116 , 116 , 138,
165 |   63  ,182 , 24,
166 |   200  ,226 , 37,
167 |   225 , 184 , 161,
168 |   233 ,  5  ,219,
169 |   142 , 172  ,248,
170 |   153 , 112 , 146,
171 |   38  ,112 , 254,
172 |   229 , 30  ,141,
173 |    238, 229, 12,
174 |    255, 72, 6,
175 |    124, 9, 34,
176 |    193, 17, 15,
177 |    106, 17, 21,
178 |    28, 213, 2,
179 |    252, 155, 3,
180 |    20, 147, 77,
181 |    9, 83, 3,
182 |    11, 16, 138,
183 |    6, 12, 24,
184 |    20, 22, 37,
185 |    225, 14, 16,
186 |    23, 5, 29,
187 |    14, 12, 28,
188 |    15, 11, 16,
189 |    3, 12, 24,
190 |    22, 3, 11
191 |    ]
192 | 
193 | cat_palette = [255,  255,  255,
194 |             220, 220, 0,
195 |            190, 153, 153,
196 |             250, 170, 30,
197 |            220, 220, 0,
198 |            107, 142, 35,
199 |            102, 102, 156,
200 |            152, 251, 152,
201 |            119, 11, 32,
202 |            244, 35, 232,
203 |            220, 20, 60,
204 |            52 , 83  ,84,
205 |           194 , 87 , 125,
206 |           225,  96  ,18,
207 |           31 , 102 , 211,
208 |           104 , 131 , 101
209 |           ]
210 | 
211 | def trans_mask_stylegan_20classTo12(mask):
212 |     final_mask = np.zeros(mask.shape)
213 |     final_mask[(mask != 0)] = 1 # car
214 |     final_mask[(mask == 4)] = 2 # head light
215 |     final_mask[(mask == 14)] = 5 # tail light
216 |     final_mask[(mask == 10)] = 3 # licence plate
217 |     final_mask[ (mask == 19)] = 8 # wind shield
218 |     final_mask[(mask == 15)] = 6 # wheel
219 |     final_mask[(mask == 5)] = 9 # door
220 |     final_mask[(mask == 8)] = 10 # handle
221 |     final_mask[(mask == 17)] = 11 # wheelhub
222 |     final_mask[(mask == 18)] = 7 # window
223 |     final_mask[(mask == 11)] = 4 # mirror
224 |     return final_mask
225 | 
226 | 
227 | def trans_mask(mask):
228 |     return mask
229 | 


--------------------------------------------------------------------------------
/utils/distributed.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under The MIT License (MIT)
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in all
 13 | copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | """
 22 | 
 23 | import pickle
 24 | 
 25 | import torch
 26 | from torch import distributed as dist
 27 | 
 28 | 
 29 | def get_rank():
 30 |     if not dist.is_available():
 31 |         return 0
 32 | 
 33 |     if not dist.is_initialized():
 34 |         return 0
 35 | 
 36 |     return dist.get_rank()
 37 | 
 38 | 
 39 | def synchronize():
 40 |     if not dist.is_available():
 41 |         return
 42 | 
 43 |     if not dist.is_initialized():
 44 |         return
 45 | 
 46 |     world_size = dist.get_world_size()
 47 | 
 48 |     if world_size == 1:
 49 |         return
 50 | 
 51 |     dist.barrier()
 52 | 
 53 | 
 54 | def get_world_size():
 55 |     if not dist.is_available():
 56 |         return 1
 57 | 
 58 |     if not dist.is_initialized():
 59 |         return 1
 60 | 
 61 |     return dist.get_world_size()
 62 | 
 63 | 
 64 | def reduce_sum(tensor):
 65 |     if not dist.is_available():
 66 |         return tensor
 67 | 
 68 |     if not dist.is_initialized():
 69 |         return tensor
 70 | 
 71 |     tensor = tensor.clone()
 72 |     dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
 73 | 
 74 |     return tensor
 75 | 
 76 | 
 77 | def gather_grad(params):
 78 |     world_size = get_world_size()
 79 |     
 80 |     if world_size == 1:
 81 |         return
 82 | 
 83 |     for param in params:
 84 |         if param.grad is not None:
 85 |             dist.all_reduce(param.grad.data, op=dist.ReduceOp.SUM)
 86 |             param.grad.data.div_(world_size)
 87 | 
 88 | 
 89 | def all_gather(data):
 90 |     world_size = get_world_size()
 91 | 
 92 |     if world_size == 1:
 93 |         return [data]
 94 | 
 95 |     buffer = pickle.dumps(data)
 96 |     storage = torch.ByteStorage.from_buffer(buffer)
 97 |     tensor = torch.ByteTensor(storage).to('cuda')
 98 | 
 99 |     local_size = torch.IntTensor([tensor.numel()]).to('cuda')
100 |     size_list = [torch.IntTensor([0]).to('cuda') for _ in range(world_size)]
101 |     dist.all_gather(size_list, local_size)
102 |     size_list = [int(size.item()) for size in size_list]
103 |     max_size = max(size_list)
104 | 
105 |     tensor_list = []
106 |     for _ in size_list:
107 |         tensor_list.append(torch.ByteTensor(size=(max_size,)).to('cuda'))
108 | 
109 |     if local_size != max_size:
110 |         padding = torch.ByteTensor(size=(max_size - local_size,)).to('cuda')
111 |         tensor = torch.cat((tensor, padding), 0)
112 | 
113 |     dist.all_gather(tensor_list, tensor)
114 | 
115 |     data_list = []
116 | 
117 |     for size, tensor in zip(size_list, tensor_list):
118 |         buffer = tensor.cpu().numpy().tobytes()[:size]
119 |         data_list.append(pickle.loads(buffer))
120 | 
121 |     return data_list
122 | 
123 | 
124 | def reduce_loss_dict(loss_dict):
125 |     world_size = get_world_size()
126 | 
127 |     if world_size < 2:
128 |         return loss_dict
129 | 
130 |     with torch.no_grad():
131 |         keys = []
132 |         losses = []
133 | 
134 |         for k in sorted(loss_dict.keys()):
135 |             keys.append(k)
136 |             losses.append(loss_dict[k])
137 | 
138 |         losses = torch.stack(losses, 0)
139 |         dist.reduce(losses, dst=0)
140 | 
141 |         if dist.get_rank() == 0:
142 |             losses /= world_size
143 | 
144 |         reduced_losses = {k: v for k, v in zip(keys, losses)}
145 | 
146 |     return reduced_losses
147 | 


--------------------------------------------------------------------------------
/utils/inception_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under The MIT License (MIT)
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in all
 13 | copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | """
 22 | 
 23 | 
 24 | import numpy as np
 25 | from scipy import linalg # For numpy FID
 26 | import time
 27 | import pickle
 28 | import torch
 29 | import torch.nn as nn
 30 | import torch.nn.functional as F
 31 | from torch.nn import Parameter as P
 32 | from torchvision.models.inception import inception_v3
 33 | 
 34 | 
 35 | class WrapInception(nn.Module):
 36 |     def __init__(self, net):
 37 |         super(WrapInception,self).__init__()
 38 |         self.net = net
 39 |         self.mean = P(torch.tensor([0.485, 0.456, 0.406]).view(1, -1, 1, 1),
 40 |                         requires_grad=False)
 41 |         self.std = P(torch.tensor([0.229, 0.224, 0.225]).view(1, -1, 1, 1),
 42 |                         requires_grad=False)
 43 | 
 44 |     def forward(self, x):
 45 |         # Normalize x
 46 |         x = (x + 1.) / 2.0
 47 |         x = (x - self.mean) / self.std
 48 |         # Upsample if necessary
 49 |         if x.shape[2] != 299 or x.shape[3] != 299:
 50 |             x = F.interpolate(x, size=(299, 299), mode='bilinear', align_corners=True)
 51 |         # 299 x 299 x 3
 52 |         x = self.net.Conv2d_1a_3x3(x)
 53 |         # 149 x 149 x 32
 54 |         x = self.net.Conv2d_2a_3x3(x)
 55 |         # 147 x 147 x 32
 56 |         x = self.net.Conv2d_2b_3x3(x)
 57 |         # 147 x 147 x 64
 58 |         x = F.max_pool2d(x, kernel_size=3, stride=2)
 59 |         # 73 x 73 x 64
 60 |         x = self.net.Conv2d_3b_1x1(x)
 61 |         # 73 x 73 x 80
 62 |         x = self.net.Conv2d_4a_3x3(x)
 63 |         # 71 x 71 x 192
 64 |         x = F.max_pool2d(x, kernel_size=3, stride=2)
 65 |         # 35 x 35 x 192
 66 |         x = self.net.Mixed_5b(x)
 67 |         # 35 x 35 x 256
 68 |         x = self.net.Mixed_5c(x)
 69 |         # 35 x 35 x 288
 70 |         x = self.net.Mixed_5d(x)
 71 |         # 35 x 35 x 288
 72 |         x = self.net.Mixed_6a(x)
 73 |         # 17 x 17 x 768
 74 |         x = self.net.Mixed_6b(x)
 75 |         # 17 x 17 x 768
 76 |         x = self.net.Mixed_6c(x)
 77 |         # 17 x 17 x 768
 78 |         x = self.net.Mixed_6d(x)
 79 |         # 17 x 17 x 768
 80 |         x = self.net.Mixed_6e(x)
 81 |         # 17 x 17 x 768
 82 |         # 17 x 17 x 768
 83 |         x = self.net.Mixed_7a(x)
 84 |         # 8 x 8 x 1280
 85 |         x = self.net.Mixed_7b(x)
 86 |         # 8 x 8 x 2048
 87 |         x = self.net.Mixed_7c(x)
 88 |         # 8 x 8 x 2048
 89 |         pool = torch.mean(x.view(x.size(0), x.size(1), -1), 2)
 90 |         # 1 x 1 x 2048
 91 |         logits = self.net.fc(F.dropout(pool, training=False).view(pool.size(0), -1))
 92 |         # 1000 (num_classes)
 93 |         return pool, logits
 94 | 
 95 | 
 96 | # A pytorch implementation of cov, from Modar M. Alfadly
 97 | # https://discuss.pytorch.org/t/covariance-and-gradient-support/16217/2
 98 | def torch_cov(m, rowvar=False):
 99 |     '''Estimate a covariance matrix given data.
100 | 
101 |     Covariance indicates the level to which two variables vary together.
102 |     If we examine N-dimensional samples, `X = [x_1, x_2, ... x_N]^T`,
103 |     then the covariance matrix element `C_{ij}` is the covariance of
104 |     `x_i` and `x_j`. The element `C_{ii}` is the variance of `x_i`.
105 | 
106 |     Args:
107 |         m: A 1-D or 2-D array containing multiple variables and observations.
108 |             Each row of `m` represents a variable, and each column a single
109 |             observation of all those variables.
110 |         rowvar: If `rowvar` is True, then each row represents a
111 |             variable, with observations in the columns. Otherwise, the
112 |             relationship is transposed: each column represents a variable,
113 |             while the rows contain observations.
114 | 
115 |     Returns:
116 |         The covariance matrix of the variables.
117 |     '''
118 |     if m.dim() > 2:
119 |         raise ValueError('m has more than 2 dimensions')
120 |     if m.dim() < 2:
121 |         m = m.view(1, -1)
122 |     if not rowvar and m.size(0) != 1:
123 |         m = m.t()
124 |     # m = m.type(torch.double)  # uncomment this line if desired
125 |     fact = 1.0 / (m.size(1) - 1)
126 |     m -= torch.mean(m, dim=1, keepdim=True)
127 |     mt = m.t()  # if complex: mt = m.t().conj()
128 | 
129 |     return fact * m.matmul(mt).squeeze()
130 | 
131 | 
132 | # Pytorch implementation of matrix sqrt, from Tsung-Yu Lin, and Subhransu Maji
133 | # https://github.com/msubhransu/matrix-sqrt 
134 | def sqrt_newton_schulz(A, numIters, dtype=None):
135 |     with torch.no_grad():
136 |         if dtype is None:
137 |             dtype = A.type()
138 |         batchSize = A.shape[0]
139 |         dim = A.shape[1]
140 |         normA = A.mul(A).sum(dim=1).sum(dim=1).sqrt()
141 |         Y = A.div(normA.view(batchSize, 1, 1).expand_as(A))
142 |         I = torch.eye(dim,dim).view(1, dim, dim).repeat(batchSize,1,1).type(dtype)
143 |         Z = torch.eye(dim,dim).view(1, dim, dim).repeat(batchSize,1,1).type(dtype)
144 |         for i in range(numIters):
145 |             T = 0.5*(3.0*I - Z.bmm(Y))
146 |             Y = Y.bmm(T)
147 |             Z = T.bmm(Z)
148 |         sA = Y*torch.sqrt(normA).view(batchSize, 1, 1).expand_as(A)
149 |     return sA
150 | 
151 | 
152 | # FID calculator from TTUR--consider replacing this with GPU-accelerated cov
153 | # calculations using torch?
154 | def numpy_calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
155 |     """Numpy implementation of the Frechet Distance.
156 |     Taken from https://github.com/bioinf-jku/TTUR
157 |     The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
158 |     and X_2 ~ N(mu_2, C_2) is
159 |             d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
160 |     Stable version by Dougal J. Sutherland.
161 |     Params:
162 |     -- mu1   : Numpy array containing the activations of a layer of the
163 |                 inception net (like returned by the function 'get_predictions')
164 |                 for generated samples.
165 |     -- mu2   : The sample mean over activations, precalculated on an 
166 |                 representive data set.
167 |     -- sigma1: The covariance matrix over activations for generated samples.
168 |     -- sigma2: The covariance matrix over activations, precalculated on an 
169 |                 representive data set.
170 |     Returns:
171 |     --   : The Frechet Distance.
172 |     """
173 | 
174 |     mu1 = np.atleast_1d(mu1)
175 |     mu2 = np.atleast_1d(mu2)
176 | 
177 |     sigma1 = np.atleast_2d(sigma1)
178 |     sigma2 = np.atleast_2d(sigma2)
179 | 
180 |     assert mu1.shape == mu2.shape, \
181 |     'Training and test mean vectors have different lengths'
182 |     assert sigma1.shape == sigma2.shape, \
183 |     'Training and test covariances have different dimensions'
184 | 
185 |     diff = mu1 - mu2
186 | 
187 |     # Product might be almost singular
188 |     covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
189 |     if not np.isfinite(covmean).all():
190 |         msg = ('fid calculation produces singular product; '
191 |                 'adding %s to diagonal of cov estimates') % eps
192 |         print(msg)
193 |     offset = np.eye(sigma1.shape[0]) * eps
194 |     covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
195 | 
196 |     # Numerical error might give slight imaginary component
197 |     if np.iscomplexobj(covmean):
198 |         print('wat')
199 |     if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
200 |         m = np.max(np.abs(covmean.imag))
201 |         raise ValueError('Imaginary component {}'.format(m))
202 |     covmean = covmean.real  
203 | 
204 |     tr_covmean = np.trace(covmean) 
205 | 
206 |     out = diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
207 |     return out
208 | 
209 | 
210 | def torch_calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
211 |     """Pytorch implementation of the Frechet Distance.
212 |     Taken from https://github.com/bioinf-jku/TTUR
213 |     The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
214 |     and X_2 ~ N(mu_2, C_2) is
215 |             d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
216 |     Stable version by Dougal J. Sutherland.
217 |     Params:
218 |     -- mu1   : Numpy array containing the activations of a layer of the
219 |                 inception net (like returned by the function 'get_predictions')
220 |                 for generated samples.
221 |     -- mu2   : The sample mean over activations, precalculated on an 
222 |                 representive data set.
223 |     -- sigma1: The covariance matrix over activations for generated samples.
224 |     -- sigma2: The covariance matrix over activations, precalculated on an 
225 |                 representive data set.
226 |     Returns:
227 |     --   : The Frechet Distance.
228 |     """
229 | 
230 | 
231 |     assert mu1.shape == mu2.shape, \
232 |     'Training and test mean vectors have different lengths'
233 |     assert sigma1.shape == sigma2.shape, \
234 |     'Training and test covariances have different dimensions'
235 | 
236 |     diff = mu1 - mu2
237 |     # Run 50 itrs of newton-schulz to get the matrix sqrt of sigma1 dot sigma2
238 |     covmean = sqrt_newton_schulz(sigma1.mm(sigma2).unsqueeze(0), 50).squeeze()  
239 |     out = (diff.dot(diff) +  torch.trace(sigma1) + torch.trace(sigma2)
240 |             - 2 * torch.trace(covmean))
241 |     return out
242 | 
243 | 
244 | # Calculate Inception Score mean + std given softmax'd logits and number of splits
245 | def calculate_inception_score(pred, num_splits=10):
246 |     scores = []
247 |     for index in range(num_splits):
248 |         pred_chunk = pred[index * (pred.shape[0] // num_splits): (index + 1) * (pred.shape[0] // num_splits), :]
249 |         kl_inception = pred_chunk * (np.log(pred_chunk) - np.log(np.expand_dims(np.mean(pred_chunk, 0), 0)))
250 |         kl_inception = np.mean(np.sum(kl_inception, 1))
251 |         scores.append(np.exp(kl_inception))
252 |     return np.mean(scores), np.std(scores)
253 | 
254 | 
255 | # Loop and run the sampler and the net until it accumulates num_inception_images
256 | # activations. Return the pool, the logits, and the labels (if one wants 
257 | # Inception Accuracy the labels of the generated class will be needed)
258 | def accumulate_inception_activations(sample, net, num_inception_images=50000):
259 |     pool, logits= [], []
260 |     while (torch.cat(logits, 0).shape[0] if len(logits) else 0) < num_inception_images:
261 |         with torch.no_grad():
262 |             images = sample()
263 |             if images.shape[1] != 3:
264 |                 images = images.expand(-1,3,-1,-1)
265 | 
266 |             pool_val, logits_val = net(images.float())
267 |             pool += [pool_val]
268 |             logits += [F.softmax(logits_val, 1)]
269 | 
270 |     return torch.cat(pool, 0), torch.cat(logits, 0)
271 | 
272 | 
273 | # Load and wrap the Inception model
274 | def load_inception_net(parallel=False):
275 |     inception_model = inception_v3(pretrained=True, transform_input=False)
276 |     inception_model = WrapInception(inception_model.eval()).cuda()
277 |     if parallel:
278 |         print('Parallelizing Inception module...')
279 |         inception_model = nn.DataParallel(inception_model)
280 |     return inception_model
281 | 
282 | 
283 | # This produces a function which takes in an iterator which returns a set number of samples
284 | # and iterates until it accumulates config['num_inception_images'] images.
285 | # The iterator can return samples with a different batch size than used in
286 | # training, using the setting confg['inception_batchsize']
287 | def prepare_inception_metrics(dataset, parallel, no_fid=False):
288 |     # Load metrics; this is intentionally not in a try-except loop so that
289 |     # the script will crash here if it cannot find the Inception moments.
290 |     # By default, remove the "hdf5" from dataloader
291 |     with open(dataset, 'rb') as f:
292 |         embeds = pickle.load(f)
293 |         data_mu = embeds['mean']
294 |         data_sigma = embeds['cov']
295 | 
296 |     # Load network
297 |     net = load_inception_net(parallel)
298 |     def get_inception_metrics(sample, num_inception_images, num_splits=10, 
299 |                                 prints=True, use_torch=True):
300 |         if prints:
301 |             print('Gathering activations...')
302 |             pool, logits = accumulate_inception_activations(sample, net, num_inception_images)
303 |         if prints:  
304 |             print('Calculating Inception Score...')
305 |             IS_mean, IS_std = calculate_inception_score(logits.cpu().numpy(), num_splits)
306 |         if no_fid:
307 |             FID = 9999.0
308 |         else:
309 |             if prints:
310 |                 print('Calculating means and covariances...')
311 |             if use_torch:
312 |                 mu, sigma = torch.mean(pool, 0), torch_cov(pool, rowvar=False)
313 |             else:
314 |                 mu, sigma = np.mean(pool.cpu().numpy(), axis=0), np.cov(pool.cpu().numpy(), rowvar=False)
315 |         if prints:
316 |             print('Covariances calculated, getting FID...')
317 |         if use_torch:
318 |             import pdb; pdb.set_trace()
319 |             FID = torch_calculate_frechet_distance(mu, sigma, torch.tensor(data_mu).float().cuda(), torch.tensor(data_sigma).float().cuda())
320 |             FID = float(FID.cpu().numpy())
321 |         else:
322 |             FID = numpy_calculate_frechet_distance(mu, sigma, data_mu, data_sigma)
323 |         # Delete mu, sigma, pool, logits, and labels, just in case
324 |         del mu, sigma, pool, logits
325 |         return IS_mean, IS_std, FID
326 | 
327 |     return get_inception_metrics
328 | 
329 | def sample_gema(g_ema, device, truncation, mean_latent, batch_size):  
330 |     with torch.no_grad():
331 |         g_ema.eval()
332 | 
333 |         sample_z = torch.randn(batch_size, 512, device=device)
334 | 
335 |         samples = g_ema([sample_z], truncation=truncation, truncation_latent=mean_latent)
336 |         
337 |         sample = samples[0]
338 |         
339 |     return sample


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2021 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under The MIT License (MIT)
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in all
 13 | copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | """
 22 | 
 23 | import torch
 24 | from PIL import Image
 25 | import numpy as np
 26 | from torch import nn
 27 | 
 28 | class Interpolate(nn.Module):
 29 |     def __init__(self, size, mode):
 30 |         super(Interpolate, self).__init__()
 31 |         self.interp = nn.functional.interpolate
 32 |         self.size = size
 33 |         self.mode = mode
 34 | 
 35 |     def forward(self, x):
 36 |         x = self.interp(x, size=self.size, mode=self.mode, align_corners=False)
 37 |         return x
 38 | 
 39 | 
 40 | 
 41 | def multi_acc(y_pred, y_test):
 42 |     y_pred_softmax = torch.log_softmax(y_pred, dim=1)
 43 |     _, y_pred_tags = torch.max(y_pred_softmax, dim=1)
 44 | 
 45 |     correct_pred = (y_pred_tags == y_test).float()
 46 |     acc = correct_pred.sum() / len(correct_pred)
 47 | 
 48 |     acc = acc * 100
 49 | 
 50 |     return acc
 51 | 
 52 | 
 53 | def oht_to_scalar(y_pred):
 54 |     y_pred_softmax = torch.log_softmax(y_pred, dim=1)
 55 |     _, y_pred_tags = torch.max(y_pred_softmax, dim=1)
 56 | 
 57 |     return y_pred_tags
 58 | 
 59 | def latent_to_image(g_all, upsamplers, latents, return_upsampled_layers=False, use_style_latents=False,
 60 |                     style_latents=None, process_out=True, return_stylegan_latent=False, dim=512, return_only_im=False):
 61 |     '''Given a input latent code, generate corresponding image and concatenated feature maps'''
 62 | 
 63 |     # assert (len(latents) == 1)  # for GPU memory constraints
 64 |     if not use_style_latents:
 65 |         # generate style_latents from latents
 66 |         style_latents = g_all.module.truncation(g_all.module.g_mapping(latents))
 67 |         style_latents = style_latents.clone()  # make different layers non-alias
 68 | 
 69 |     else:
 70 |         style_latents = latents
 71 | 
 72 |         # style_latents = latents
 73 |     if return_stylegan_latent:
 74 | 
 75 |         return  style_latents
 76 |     img_list, affine_layers = g_all.module.g_synthesis(style_latents)
 77 | 
 78 |     if return_only_im:
 79 |         if process_out:
 80 |             if img_list.shape[-2] > 512:
 81 |                 img_list = upsamplers[-1](img_list)
 82 | 
 83 |             img_list = img_list.cpu().detach().numpy()
 84 |             img_list = process_image(img_list)
 85 |             img_list = np.transpose(img_list, (0, 2, 3, 1)).astype(np.uint8)
 86 |         return img_list, style_latents
 87 | 
 88 |     number_feautre = 0
 89 | 
 90 |     for item in affine_layers:
 91 |         number_feautre += item.shape[1]
 92 | 
 93 | 
 94 |     affine_layers_upsamples = torch.FloatTensor(1, number_feautre, dim, dim).cuda()
 95 |     if return_upsampled_layers:
 96 | 
 97 |         start_channel_index = 0
 98 |         for i in range(len(affine_layers)):
 99 |             len_channel = affine_layers[i].shape[1]
100 |             affine_layers_upsamples[:, start_channel_index:start_channel_index + len_channel] = upsamplers[i](
101 |                 affine_layers[i])
102 |             start_channel_index += len_channel
103 | 
104 |     if img_list.shape[-2] != 512:
105 |         img_list = upsamplers[-1](img_list)
106 | 
107 |     if process_out:
108 |         img_list = img_list.cpu().detach().numpy()
109 |         img_list = process_image(img_list)
110 |         img_list = np.transpose(img_list, (0, 2, 3, 1)).astype(np.uint8)
111 |         # print('start_channel_index',start_channel_index)
112 | 
113 | 
114 |     return img_list, affine_layers_upsamples
115 | 
116 | 
117 | def process_image(images):
118 |     drange = [-1, 1]
119 |     scale = 255 / (drange[1] - drange[0])
120 |     images = images * scale + (0.5 - drange[0] * scale)
121 | 
122 |     images = images.astype(int)
123 |     images[images > 255] = 255
124 |     images[images < 0] = 0
125 | 
126 |     return images.astype(int)
127 | 
128 | def colorize_mask(mask, palette):
129 |     # mask: numpy array of the mask
130 | 
131 |     new_mask = Image.fromarray(mask.astype(np.uint8)).convert('P')
132 |     new_mask.putpalette(palette)
133 |     return np.array(new_mask.convert('RGB'))
134 | 
135 | 
136 | def get_label_stas(data_loader):
137 |     count_dict = {}
138 |     for i in range(data_loader.__len__()):
139 |         x, y = data_loader.__getitem__(i)
140 |         if int(y.item()) not in count_dict:
141 |             count_dict[int(y.item())] = 1
142 |         else:
143 |             count_dict[int(y.item())] += 1
144 | 
145 |     return count_dict
146 | 


--------------------------------------------------------------------------------