├── SSIM ├── data │ ├── __init__.py │ ├── dataset │ │ ├── __init__.py │ │ ├── base_dataset.py │ │ ├── jnd_dataset.py │ │ └── twoafc_dataset.py │ ├── base_data_loader.py │ ├── data_loader.py │ ├── custom_dataset_data_loader.py │ └── image_folder.py ├── util │ ├── __init__.py │ └── html.py ├── requirements.txt ├── .gitignore ├── compute_dists.py ├── compute_dists_dirs.py ├── __init__.py ├── LICENSE ├── Dockerfile ├── compute_market.py ├── perceptual_loss.py ├── test_network.py ├── test_dataset_model.py ├── imgs │ └── example_dists.txt └── train.py ├── meglass_no_glass_ori.txt ├── .gitconfig ├── TTUR-master ├── BEGAN_FID_batched │ ├── stats │ │ └── README.md │ ├── data │ │ └── README.md │ ├── README.md │ ├── main_fid.py │ ├── data_loader.py │ ├── utils.py │ ├── config.py │ └── models.py ├── DCGAN_FID_batched │ ├── stats │ │ └── README.md │ ├── README.md │ └── ops.py ├── WGAN_GP │ ├── data │ │ └── README.md │ ├── inception-2015-12-05 │ │ └── README.md │ ├── README.md │ ├── tflib │ │ ├── ops │ │ │ ├── layernorm.py │ │ │ ├── deconv2d.py │ │ │ ├── conv1d.py │ │ │ ├── conv2d.py │ │ │ ├── batchnorm.py │ │ │ └── linear.py │ │ ├── save_images.py │ │ ├── small_imagenet.py │ │ ├── plot.py │ │ └── data_loader.py │ └── language_helpers.py ├── Results │ └── figures │ │ ├── lang_jsd4.pdf │ │ ├── lang_jsd6.pdf │ │ ├── dcgan_celebA.pdf │ │ ├── dcgan_lsun.pdf │ │ ├── dcgan_svhn.pdf │ │ ├── wgan_gp_lsun.pdf │ │ ├── dcgan_cifar10.pdf │ │ └── wgan_gp_cifar10.pdf ├── Poster │ └── TTUR_Converges_NIPS2017.pdf ├── FID_vs_Inception_Score │ ├── figures │ │ ├── sp_FID.pdf │ │ ├── sp_IND.pdf │ │ ├── blur_FID.pdf │ │ ├── blur_IND.pdf │ │ ├── mixed_FID.pdf │ │ ├── mixed_IND.pdf │ │ ├── rect_FID.pdf │ │ ├── rect_IND.pdf │ │ ├── swirl_FID.pdf │ │ ├── swirl_IND.pdf │ │ ├── gnoise_FID.pdf │ │ ├── gnoise_IND.pdf │ │ └── table_FID_vs_Inc.pdf │ └── README.md ├── fid_example.py ├── precalc_stats_example.py └── README.md ├── .gitignore ├── celeba_glass.py ├── lfw_pad.py ├── meglass_split.py ├── configs ├── munit.yaml ├── unit.yaml ├── ablation │ ├── ab2_1.yaml │ ├── ab2_4.yaml │ ├── ab2_2.yaml │ ├── ab2_3.yaml │ ├── ab2_dual.yaml │ └── ab2.yaml ├── LFW.yaml ├── celeba.yaml ├── meglass.yaml ├── meglass_refine.yaml └── meglass_refine2.yaml ├── README.md ├── data.py ├── MUNIT-master ├── UNIT_train.py └── UNIT_ssim_test.py ├── smooth.py ├── train.py ├── swap.py ├── test_batch.py └── ssim_batch.py /SSIM/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SSIM/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /meglass_no_glass_ori.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SSIM/data/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitconfig: -------------------------------------------------------------------------------- 1 | [user] 2 | name = Bingwen-Hu 3 | email = hubw.sky@gmail.com 4 | -------------------------------------------------------------------------------- /TTUR-master/BEGAN_FID_batched/stats/README.md: -------------------------------------------------------------------------------- 1 | Folder for precalculated FID statistics 2 | -------------------------------------------------------------------------------- /TTUR-master/DCGAN_FID_batched/stats/README.md: -------------------------------------------------------------------------------- 1 | Folder for precalculated FID statistics 2 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/data/README.md: -------------------------------------------------------------------------------- 1 | This directory holds the data directories for the training and validation datasets. 2 | -------------------------------------------------------------------------------- /TTUR-master/Results/figures/lang_jsd4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/Results/figures/lang_jsd4.pdf -------------------------------------------------------------------------------- /TTUR-master/Results/figures/lang_jsd6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/Results/figures/lang_jsd6.pdf -------------------------------------------------------------------------------- /TTUR-master/Results/figures/dcgan_celebA.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/Results/figures/dcgan_celebA.pdf -------------------------------------------------------------------------------- /TTUR-master/Results/figures/dcgan_lsun.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/Results/figures/dcgan_lsun.pdf -------------------------------------------------------------------------------- /TTUR-master/Results/figures/dcgan_svhn.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/Results/figures/dcgan_svhn.pdf -------------------------------------------------------------------------------- /TTUR-master/Results/figures/wgan_gp_lsun.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/Results/figures/wgan_gp_lsun.pdf -------------------------------------------------------------------------------- /TTUR-master/Poster/TTUR_Converges_NIPS2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/Poster/TTUR_Converges_NIPS2017.pdf -------------------------------------------------------------------------------- /TTUR-master/Results/figures/dcgan_cifar10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/Results/figures/dcgan_cifar10.pdf -------------------------------------------------------------------------------- /TTUR-master/BEGAN_FID_batched/data/README.md: -------------------------------------------------------------------------------- 1 | Data folder, e.g. celebA_cropped or lsun_cropped directories are located here if not specified otherwise. 2 | -------------------------------------------------------------------------------- /TTUR-master/Results/figures/wgan_gp_cifar10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/Results/figures/wgan_gp_cifar10.pdf -------------------------------------------------------------------------------- /SSIM/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=0.4.0 2 | torchvision>=0.2.1 3 | numpy>=1.14.3 4 | scipy>=1.0.1 5 | scikit-image>=0.13.0 6 | opencv>=2.4.11 7 | matplotlib>=1.5.1 8 | -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/sp_FID.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/sp_FID.pdf -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/sp_IND.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/sp_IND.pdf -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/blur_FID.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/blur_FID.pdf -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/blur_IND.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/blur_IND.pdf -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/mixed_FID.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/mixed_FID.pdf -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/mixed_IND.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/mixed_IND.pdf -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/rect_FID.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/rect_FID.pdf -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/rect_IND.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/rect_IND.pdf -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/swirl_FID.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/swirl_FID.pdf -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/swirl_IND.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/swirl_IND.pdf -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/gnoise_FID.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/gnoise_FID.pdf -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/gnoise_IND.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/gnoise_IND.pdf -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/figures/table_FID_vs_Inc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bingwen-Hu/ERGAN-Pytorch/HEAD/TTUR-master/FID_vs_Inception_Score/figures/table_FID_vs_Inc.pdf -------------------------------------------------------------------------------- /SSIM/data/base_data_loader.py: -------------------------------------------------------------------------------- 1 | 2 | class BaseDataLoader(): 3 | def __init__(self): 4 | pass 5 | 6 | def initialize(self): 7 | pass 8 | 9 | def load_data(): 10 | return None 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/inception-2015-12-05/README.md: -------------------------------------------------------------------------------- 1 | Contents of the Inception-v3 model. 2 | 3 | Get it from here: 4 | 5 | http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz 6 | 7 | and unpack it in the projects root directory (WGAN_GP). 8 | -------------------------------------------------------------------------------- /SSIM/data/dataset/base_dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | 3 | class BaseDataset(data.Dataset): 4 | def __init__(self): 5 | super(BaseDataset, self).__init__() 6 | 7 | def name(self): 8 | return 'BaseDataset' 9 | 10 | def initialize(self): 11 | pass 12 | 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | outputs/ 2 | results/ 3 | models/ 4 | datasets/ 5 | inputs/ 6 | scripts/ 7 | docs/ 8 | logs/ 9 | .idea/ 10 | .ipynb_checkpoints/ 11 | src/*jpg 12 | notebooks/.ipynb_checkpoints/* 13 | exps/ 14 | src/yaml_generator.py 15 | *.tar.gz 16 | *.ipynb 17 | *.zip 18 | *.pkl 19 | *.pyc 20 | *.jpg 21 | *.png 22 | *.pth 23 | *.pt 24 | *.t7 25 | *.sh 26 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/README.md: -------------------------------------------------------------------------------- 1 | This is a fork of the Improved Wasserstein Implementation 2 | 3 | https://github.com/igul222/improved_wgan_training 4 | 5 | We ported the implementation to Python 3.x and added a FID 6 | evaluation to the image model (gan_64x64_FID.py) which is 7 | logged and trackable with Tensorboard. 8 | 9 | The language model is altered to also log Tensorboard events 10 | i.e. the JSD. 11 | -------------------------------------------------------------------------------- /TTUR-master/BEGAN_FID_batched/README.md: -------------------------------------------------------------------------------- 1 | BEGAN fork from 2 | 3 | https://github.com/carpedm20/BEGAN-tensorflow 4 | 5 | with batched FID evaluation 6 | 7 | Needs fid.py from TTUR root directory. Please copy it here. 8 | 9 | Precalculated real world / training data statistics can be downloaded 10 | from here. Be sure to use the batched versions. 11 | 12 | http://bioinf.jku.at/research/ttur/ttur.html 13 | 14 | see sh/run.sh for options 15 | 16 | Fixed random seeds are removed. 17 | -------------------------------------------------------------------------------- /SSIM/data/data_loader.py: -------------------------------------------------------------------------------- 1 | def CreateDataLoader(datafolder,dataroot='./dataset',dataset_mode='2afc',load_size=64,batch_size=1,serial_batches=True): 2 | from data.custom_dataset_data_loader import CustomDatasetDataLoader 3 | data_loader = CustomDatasetDataLoader() 4 | # print(data_loader.name()) 5 | data_loader.initialize(datafolder,dataroot=dataroot+'/'+dataset_mode,dataset_mode=dataset_mode,load_size=load_size,batch_size=batch_size,serial_batches=serial_batches, nThreads=1) 6 | return data_loader 7 | -------------------------------------------------------------------------------- /SSIM/.gitignore: -------------------------------------------------------------------------------- 1 | # Don't track content of these folders 2 | outputs/ 3 | models/ 4 | logs/ 5 | __pycache__/ 6 | configs/cifs9a31 7 | core 8 | 9 | # Compiled source # 10 | ################### 11 | *.com 12 | *.class 13 | *.dll 14 | *.exe 15 | *.o 16 | *.so 17 | *.pyc 18 | 19 | # Packages # 20 | ############ 21 | # it's better to unpack these files and commit the raw source 22 | # git has its own built in compression methods 23 | *.7z 24 | *.dmg 25 | *.gz 26 | *.iso 27 | *.jar 28 | *.rar 29 | *.tar 30 | *.zip 31 | *.mat 32 | *.jpg 33 | *.npy 34 | *.pt 35 | *.pth 36 | -------------------------------------------------------------------------------- /SSIM/compute_dists.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from models import dist_model as dm 3 | from util import util 4 | 5 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 6 | parser.add_argument('--path0', type=str, default='./imgs/ex_ref.png') 7 | parser.add_argument('--path1', type=str, default='./imgs/ex_p0.png') 8 | parser.add_argument('--use_gpu', action='store_true', help='turn on flag to use GPU') 9 | opt = parser.parse_args() 10 | 11 | ## Initializing the model 12 | model = dm.DistModel() 13 | model.initialize(model='net-lin',net='alex',use_gpu=opt.use_gpu) 14 | 15 | # Load images 16 | img0 = util.im2tensor(util.load_image(opt.path0)) # RGB image from [-1,1] 17 | img1 = util.im2tensor(util.load_image(opt.path1)) 18 | 19 | # Compute distance 20 | dist01 = model.forward(img0,img1) 21 | print('Distance: %.3f'%dist01) 22 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/tflib/ops/layernorm.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | def Layernorm(name, norm_axes, inputs): 7 | mean, var = tf.nn.moments(inputs, norm_axes, keep_dims=True) 8 | 9 | # Assume the 'neurons' axis is the first of norm_axes. This is the case for fully-connected and BCHW conv layers. 10 | n_neurons = inputs.get_shape().as_list()[norm_axes[0]] 11 | 12 | offset = lib.param(name+'.offset', np.zeros(n_neurons, dtype='float32')) 13 | #offset = np.zeros(n_neurons, dtype='float32') 14 | scale = lib.param(name+'.scale', np.ones(n_neurons, dtype='float32')) 15 | #scale = np.ones(n_neurons, dtype='float32') 16 | # Add broadcasting dims to offset and scale (e.g. BCHW conv data) 17 | offset = tf.reshape(offset, [-1] + [1 for i in range(len(norm_axes)-1)]) 18 | scale = tf.reshape(scale, [-1] + [1 for i in range(len(norm_axes)-1)]) 19 | 20 | result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-5) 21 | 22 | return result 23 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/tflib/save_images.py: -------------------------------------------------------------------------------- 1 | """ 2 | Image grid saver, based on color_grid_vis from github.com/Newmu 3 | """ 4 | 5 | import numpy as np 6 | import scipy.misc 7 | from scipy.misc import imsave 8 | 9 | def save_images(X, save_path): 10 | # [0, 1] -> [0,255] 11 | if isinstance(X.flatten()[0], np.floating): 12 | X = (255.99*X).astype('uint8') 13 | 14 | n_samples = X.shape[0] 15 | rows = int(np.sqrt(n_samples)) 16 | while n_samples % rows != 0: 17 | rows -= 1 18 | 19 | nh, nw = rows, n_samples//rows 20 | 21 | if X.ndim == 2: 22 | X = np.reshape(X, (X.shape[0], int(np.sqrt(X.shape[1])), int(np.sqrt(X.shape[1])))) 23 | 24 | if X.ndim == 4: 25 | # BCHW -> BHWC 26 | X = X.transpose(0,2,3,1) 27 | h, w = X[0].shape[:2] 28 | img = np.zeros((h*nh, w*nw, 3)) 29 | elif X.ndim == 3: 30 | h, w = X[0].shape[:2] 31 | img = np.zeros((h*nh, w*nw)) 32 | 33 | for n, x in enumerate(X): 34 | j = n//nw 35 | i = n%nw 36 | img[j*h:j*h+h, i*w:i*w+w] = x 37 | 38 | imsave(save_path, img) 39 | -------------------------------------------------------------------------------- /SSIM/compute_dists_dirs.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from IPython import embed 4 | from util import util 5 | import models.dist_model as dm 6 | 7 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 8 | parser.add_argument('--dir0', type=str, default='./imgs/ex_dir0') 9 | parser.add_argument('--dir1', type=str, default='./imgs/ex_dir1') 10 | parser.add_argument('--out', type=str, default='./imgs/example_dists.txt') 11 | parser.add_argument('--use_gpu', action='store_true', help='turn on flag to use GPU') 12 | opt = parser.parse_args() 13 | 14 | ## Initializing the model 15 | model = dm.DistModel() 16 | model.initialize(model='net-lin',net='alex',use_gpu=opt.use_gpu) 17 | 18 | # crawl directories 19 | f = open(opt.out,'w') 20 | files = os.listdir(opt.dir0) 21 | 22 | for file in files: 23 | if(os.path.exists(os.path.join(opt.dir1,file))): 24 | # Load images 25 | img0 = util.im2tensor(util.load_image(os.path.join(opt.dir0,file))) # RGB image from [-1,1] 26 | img1 = util.im2tensor(util.load_image(os.path.join(opt.dir1,file))) 27 | 28 | # Compute distance 29 | dist01 = model.forward(img0,img1) 30 | print('%s: %.3f'%(file,dist01)) 31 | f.writelines('%s: %.6f\n'%(file,dist01)) 32 | 33 | f.close() 34 | -------------------------------------------------------------------------------- /SSIM/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | from .models import dist_model 7 | 8 | class PerceptualLoss(torch.nn.Module): 9 | def __init__(self, model='net-lin', net='vgg', use_gpu=True): # VGG using our perceptually-learned weights (LPIPS metric) 10 | # def __init__(self, model='net', net='vgg', use_gpu=True): # "default" way of using VGG 11 | print('Setting up Perceptual loss...') 12 | self.model = dist_model.DistModel() 13 | self.model.initialize(model=model, net=net, use_gpu=True) 14 | print('...Done') 15 | 16 | def forward(self, pred, target, normalize=False): 17 | """ 18 | Pred and target are Variables. 19 | If normalize is on, assumes the images are between [0,1] and then scales thembetween [-1, 1] 20 | If normalize is false, assumes the images are already between [-1,+1] 21 | 22 | Inputs pred and target are Nx3xHxW 23 | Output pytorch Variable N long 24 | """ 25 | if normalize: 26 | target = 2 * target - 1 27 | pred = 2 * pred - 1 28 | 29 | dist = self.model.forward_pair(target, pred) 30 | 31 | return dist 32 | -------------------------------------------------------------------------------- /TTUR-master/fid_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import absolute_import, division, print_function 3 | import os 4 | import glob 5 | #os.environ['CUDA_VISIBLE_DEVICES'] = '0' 6 | import numpy as np 7 | import fid 8 | from scipy.misc import imread 9 | import tensorflow as tf 10 | 11 | # Paths 12 | image_path = '/local00/bioinf/tmp/' # set path to some generated images 13 | stats_path = '/local00/bioinf/fid_stats_cifar10.npz' # training set statistics 14 | inception_path = fid.check_or_download_inception(None) # download inception network 15 | 16 | # loads all images into memory (this might require a lot of RAM!) 17 | image_list = glob.glob(os.path.join(datapath, '*.jpg')) 18 | images = np.array([imread(str(fn)).astype(np.float32) for fn in files]) 19 | 20 | # load precalculated training set statistics 21 | f = np.load(path) 22 | mu_real, sigma_real = f['mu'][:], f['sigma'][:] 23 | f.close() 24 | 25 | fid.create_inception_graph(inception_path) # load the graph into the current TF graph 26 | with tf.Session() as sess: 27 | sess.run(tf.global_variables_initializer()) 28 | mu_gen, sigma_gen = fid.calculate_activation_statistics(images, sess, batch_size=100) 29 | 30 | fid_value = fid.calculate_frechet_distance(mu_gen, sigma_gen, mu_real, sigma_real) 31 | print("FID: %s" % fid_value) 32 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/tflib/small_imagenet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.misc 3 | import time 4 | 5 | def make_generator(path, n_files, batch_size): 6 | epoch_count = [1] 7 | def get_epoch(): 8 | images = np.zeros((batch_size, 3, 64, 64), dtype='int32') 9 | files = list(range(n_files)) 10 | random_state = np.random.RandomState(epoch_count[0]) 11 | random_state.shuffle(files) 12 | epoch_count[0] += 1 13 | for n, i in enumerate(files): 14 | image = scipy.misc.imread("{}/{}.png".format(path, str(i+1).zfill(len(str(n_files))))) 15 | images[n % batch_size] = image.transpose(2,0,1) 16 | if n > 0 and n % batch_size == 0: 17 | yield (images,) 18 | return get_epoch 19 | 20 | def load(batch_size, data_dir='/home/ishaan/data/imagenet64'): 21 | return ( 22 | make_generator(data_dir+'/train_64x64', 1281149, batch_size), 23 | make_generator(data_dir+'/valid_64x64', 49999, batch_size) 24 | ) 25 | 26 | if __name__ == '__main__': 27 | train_gen, valid_gen = load(64) 28 | t0 = time.time() 29 | for i, batch in enumerate(train_gen(), start=1): 30 | print("s\t%d" % (str(time.time() - t0), batch[0][0,0,0,0])) 31 | if i == 1000: 32 | break 33 | t0 = time.time() 34 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/tflib/plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import matplotlib 4 | matplotlib.use('Agg') 5 | import matplotlib.pyplot as plt 6 | 7 | import collections 8 | import time 9 | import pickle 10 | import math 11 | 12 | _since_beginning = collections.defaultdict(lambda: {}) 13 | _since_last_flush = collections.defaultdict(lambda: {}) 14 | 15 | _iter = [0] 16 | def tick(): 17 | _iter[0] += 1 18 | 19 | def plot(name, value): 20 | _since_last_flush[name][_iter[0]] = value 21 | 22 | def flush(): 23 | prints = [] 24 | 25 | for name, vals in _since_last_flush.items(): 26 | #prints.append("{}\t{}" % (name, np.mean(vals.values()))) 27 | v = vals.values() 28 | sv = sum(v) 29 | prints.append("%s\t%f" % (name, sv / len(v))) 30 | _since_beginning[name].update(vals) 31 | 32 | x_vals = sorted(_since_beginning[name].keys()) 33 | y_vals = [_since_beginning[name][x] for x in x_vals] 34 | 35 | #plt.clf() 36 | #plt.plot(x_vals, y_vals) 37 | #plt.xlabel('iteration') 38 | #plt.ylabel(name) 39 | #plt.savefig(name.replace(' ', '_')+'.jpg') 40 | 41 | #print("iter %d\t%s" % (_iter[0], "\t".join(prints))) 42 | print("iter %d" % (_iter[0])) 43 | for p in prints: 44 | print(p) 45 | _since_last_flush.clear() 46 | 47 | #with open('log.pkl', 'wb') as f: 48 | # pickle.dump(dict(_since_beginning), f, pickle.HIGHEST_PROTOCOL) 49 | -------------------------------------------------------------------------------- /SSIM/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018, Richard Zhang, Phillip Isola, Alexei A. Efros, Eli Shechtman, Oliver Wang 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | -------------------------------------------------------------------------------- /TTUR-master/BEGAN_FID_batched/main_fid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from trainer_fid_batched import Trainer 5 | from config import get_config 6 | from data_loader import get_loader 7 | from utils import prepare_dirs_and_logger, save_config 8 | 9 | def main(config): 10 | prepare_dirs_and_logger(config) 11 | 12 | #rng = np.random.RandomState(config.random_seed) 13 | #tf.set_random_seed(config.random_seed) 14 | 15 | if config.is_train: 16 | data_path = config.data_path 17 | batch_size = config.batch_size 18 | do_shuffle = True 19 | else: 20 | setattr(config, 'batch_size', 64) 21 | if config.test_data_path is None: 22 | data_path = config.data_path 23 | else: 24 | data_path = config.test_data_path 25 | batch_size = config.sample_per_image 26 | do_shuffle = False 27 | 28 | data_loader = get_loader( 29 | data_path, config.batch_size, config.input_scale_size, 30 | config.data_format, config.split) 31 | trainer = Trainer(config, data_loader) 32 | 33 | if config.is_train: 34 | save_config(config) 35 | trainer.train() 36 | else: 37 | if not config.load_path: 38 | raise Exception("[!] You should specify `load_path` to load a pretrained model") 39 | trainer.test() 40 | 41 | if __name__ == "__main__": 42 | config, unparsed = get_config() 43 | main(config) 44 | -------------------------------------------------------------------------------- /TTUR-master/DCGAN_FID_batched/README.md: -------------------------------------------------------------------------------- 1 | # DCGAN for CelebA evaluated with FID (batched version) 2 | 3 | DCGAN fork from https://github.com/carpedm20/DCGAN-tensorflow 4 | 5 | Precalculated real world / trainng data statistics can be downloaded from: 6 | http://bioinf.jku.at/research/ttur/ttur.html 7 | 8 | ## Usage 9 | - Copy the file fid.py from TTUR root into the DCGAN_FID_batched directory 10 | - Modify the dataset variable in run.sh 11 | - Modify the data_path variable in run.sh 12 | - Download the precalculated statistics (see above) and save them into the "stats" folder. 13 | - Modify the incept_path in file run.sh 14 | - Run the command: bash run.sh 15 | - Checkpoint, sample and Tensorboard log directories will be automatically created in logs. 16 | 17 | ## FID evaluation: parameters fid_n_samples and fid_sample_batchsize 18 | The evaluation of the FID needs the comparison between precalculated statistics of real world data vs statistics of generated data. 19 | The calculation of the latter is a tradeoff between number of samples (the more the better) and available hardware. Two parameters 20 | in run.sh are concerned with this calculation: fid_n_samples and fid_sample_batchsize. The first parameter specifies the number of 21 | generated samples on which the statistics are calculated. Since this number should be high, it is very likely that it is not possible 22 | to generate this amount of samples at once. Thus the generation process is batched with batches of size fid_sample_batchsize. 23 | -------------------------------------------------------------------------------- /SSIM/data/custom_dataset_data_loader.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from data.base_data_loader import BaseDataLoader 3 | import os 4 | 5 | def CreateDataset(dataroots,dataset_mode='2afc',load_size=64,): 6 | dataset = None 7 | if dataset_mode=='2afc': # human judgements 8 | from dataset.twoafc_dataset import TwoAFCDataset 9 | dataset = TwoAFCDataset() 10 | elif dataset_mode=='jnd': # human judgements 11 | from dataset.jnd_dataset import JNDDataset 12 | dataset = JNDDataset() 13 | else: 14 | raise ValueError("Dataset Mode [%s] not recognized."%self.dataset_mode) 15 | 16 | dataset.initialize(dataroots,load_size=load_size) 17 | return dataset 18 | 19 | class CustomDatasetDataLoader(BaseDataLoader): 20 | def name(self): 21 | return 'CustomDatasetDataLoader' 22 | 23 | def initialize(self, datafolders, dataroot='./dataset',dataset_mode='2afc',load_size=64,batch_size=1,serial_batches=True, nThreads=1): 24 | BaseDataLoader.initialize(self) 25 | if(not isinstance(datafolders,list)): 26 | datafolders = [datafolders,] 27 | data_root_folders = [os.path.join(dataroot,datafolder) for datafolder in datafolders] 28 | self.dataset = CreateDataset(data_root_folders,dataset_mode=dataset_mode,load_size=load_size) 29 | self.dataloader = torch.utils.data.DataLoader( 30 | self.dataset, 31 | batch_size=batch_size, 32 | shuffle=not serial_batches, 33 | num_workers=int(nThreads)) 34 | 35 | def load_data(self): 36 | return self.dataloader 37 | 38 | def __len__(self): 39 | return len(self.dataset) 40 | -------------------------------------------------------------------------------- /TTUR-master/precalc_stats_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import glob 5 | #os.environ['CUDA_VISIBLE_DEVICES'] = '2' 6 | import numpy as np 7 | import fid 8 | from scipy.misc import imread 9 | import tensorflow as tf 10 | 11 | ######## 12 | # PATHS 13 | ######## 14 | data_path = 'data' # set path to training set images 15 | output_path = 'fid_stats.npz' # path for where to store the statistics 16 | # if you have downloaded and extracted 17 | # http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz 18 | # set this path to the directory where the extracted files are, otherwise 19 | # just set it to None and the script will later download the files for you 20 | inception_path = None 21 | print("check for inception model..", end=" ", flush=True) 22 | inception_path = fid.check_or_download_inception(inception_path) # download inception if necessary 23 | print("ok") 24 | 25 | # loads all images into memory (this might require a lot of RAM!) 26 | print("load images..", end=" " , flush=True) 27 | image_list = glob.glob(os.path.join(data_path, '*.jpg')) 28 | images = np.array([imread(str(fn)).astype(np.float32) for fn in image_list]) 29 | print("%d images found and loaded" % len(images)) 30 | 31 | print("create inception graph..", end=" ", flush=True) 32 | fid.create_inception_graph(inception_path) # load the graph into the current TF graph 33 | print("ok") 34 | 35 | print("calculte FID stats..", end=" ", flush=True) 36 | with tf.Session() as sess: 37 | sess.run(tf.global_variables_initializer()) 38 | mu, sigma = fid.calculate_activation_statistics(images, sess, batch_size=100) 39 | np.savez_compressed(output_path, mu=mu, sigma=sigma) 40 | print("finished") 41 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/tflib/data_loader.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.misc 3 | import time 4 | import os 5 | from glob import glob 6 | 7 | def make_generator(path, batch_size, dataset): 8 | print("scan files", end=" ", flush=True) 9 | if dataset == "celeba": 10 | files = glob(os.path.join(path, "*.jpg")) 11 | dim = 64 12 | if dataset == "svhn" or dataset == "cifar10": 13 | files = glob(os.path.join(path, "*.png")) 14 | dim = 32 15 | if dataset == "lsun": 16 | # It's assumed the lsun images are splitted 17 | # into subdirectories named 0, 1, .., 304 18 | files = [] 19 | for i in range(304): 20 | print("\rscan files %d" % i, end="", flush=True) 21 | files += glob(os.path.join(path, str(i), "*.jpg")) 22 | dim = 64 23 | n_files = len(files) 24 | print() 25 | print("%d images found" % n_files) 26 | def get_epoch(): 27 | images = np.zeros((batch_size, 3, dim, dim), dtype='int32') 28 | files_idx = list(range(n_files)) 29 | random_state = np.random.RandomState() 30 | random_state.shuffle(files_idx) 31 | for n, i in enumerate(files_idx): 32 | image = scipy.misc.imread(files[i]) 33 | images[n % batch_size] = image.transpose(2,0,1) 34 | if n > 0 and n % batch_size == 0: 35 | yield (images,) 36 | return get_epoch 37 | 38 | def load(batch_size, data_dir, dataset): 39 | return ( 40 | make_generator(data_dir, batch_size, dataset), 41 | make_generator(data_dir, batch_size, dataset) 42 | ) 43 | 44 | if __name__ == '__main__': 45 | train_gen, valid_gen = load(64) 46 | t0 = time.time() 47 | for i, batch in enumerate(train_gen(), start=1): 48 | print("s\t%d" % (str(time.time() - t0), batch[0][0,0,0,0])) 49 | if i == 1000: 50 | break 51 | t0 = time.time() 52 | -------------------------------------------------------------------------------- /SSIM/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:9.0-base-ubuntu16.04 2 | 3 | LABEL maintainer="Seyoung Park " 4 | 5 | # This Dockerfile is forked from Tensorflow Dockerfile 6 | 7 | # Pick up some PyTorch gpu dependencies 8 | RUN apt-get update && apt-get install -y --no-install-recommends \ 9 | build-essential \ 10 | cuda-command-line-tools-9-0 \ 11 | cuda-cublas-9-0 \ 12 | cuda-cufft-9-0 \ 13 | cuda-curand-9-0 \ 14 | cuda-cusolver-9-0 \ 15 | cuda-cusparse-9-0 \ 16 | curl \ 17 | libcudnn7=7.1.4.18-1+cuda9.0 \ 18 | libfreetype6-dev \ 19 | libhdf5-serial-dev \ 20 | libpng12-dev \ 21 | libzmq3-dev \ 22 | pkg-config \ 23 | python \ 24 | python-dev \ 25 | rsync \ 26 | software-properties-common \ 27 | unzip \ 28 | && \ 29 | apt-get clean && \ 30 | rm -rf /var/lib/apt/lists/* 31 | 32 | 33 | # Install miniconda 34 | RUN apt-get update && apt-get install -y --no-install-recommends \ 35 | wget && \ 36 | MINICONDA="Miniconda3-latest-Linux-x86_64.sh" && \ 37 | wget --quiet https://repo.continuum.io/miniconda/$MINICONDA && \ 38 | bash $MINICONDA -b -p /miniconda && \ 39 | rm -f $MINICONDA 40 | ENV PATH /miniconda/bin:$PATH 41 | 42 | # Install PyTorch 43 | RUN conda update -n base conda && \ 44 | conda install pytorch torchvision cuda90 -c pytorch 45 | 46 | # Install PerceptualSimilarity dependencies 47 | RUN conda install numpy scipy jupyter matplotlib && \ 48 | conda install -c conda-forge scikit-image && \ 49 | apt-get install -y python-qt4 && \ 50 | pip install opencv-python 51 | 52 | # For CUDA profiling, TensorFlow requires CUPTI. Maybe PyTorch needs this too. 53 | ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH 54 | 55 | # IPython 56 | EXPOSE 8888 57 | 58 | WORKDIR "/notebooks" 59 | 60 | -------------------------------------------------------------------------------- /SSIM/data/dataset/jnd_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import torchvision.transforms as transforms 3 | from data.dataset.base_dataset import BaseDataset 4 | from data.image_folder import make_dataset 5 | from PIL import Image 6 | import numpy as np 7 | import torch 8 | from IPython import embed 9 | 10 | class JNDDataset(BaseDataset): 11 | def initialize(self, dataroot, load_size=64): 12 | self.root = dataroot 13 | self.load_size = load_size 14 | 15 | self.dir_p0 = os.path.join(self.root, 'p0') 16 | self.p0_paths = make_dataset(self.dir_p0) 17 | self.p0_paths = sorted(self.p0_paths) 18 | 19 | self.dir_p1 = os.path.join(self.root, 'p1') 20 | self.p1_paths = make_dataset(self.dir_p1) 21 | self.p1_paths = sorted(self.p1_paths) 22 | 23 | transform_list = [] 24 | transform_list.append(transforms.Scale(load_size)) 25 | transform_list += [transforms.ToTensor(), 26 | transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))] 27 | 28 | self.transform = transforms.Compose(transform_list) 29 | 30 | # judgement directory 31 | self.dir_S = os.path.join(self.root, 'same') 32 | self.same_paths = make_dataset(self.dir_S,mode='np') 33 | self.same_paths = sorted(self.same_paths) 34 | 35 | def __getitem__(self, index): 36 | p0_path = self.p0_paths[index] 37 | p0_img_ = Image.open(p0_path).convert('RGB') 38 | p0_img = self.transform(p0_img_) 39 | 40 | p1_path = self.p1_paths[index] 41 | p1_img_ = Image.open(p1_path).convert('RGB') 42 | p1_img = self.transform(p1_img_) 43 | 44 | same_path = self.same_paths[index] 45 | same_img = np.load(same_path).reshape((1,1,1,)) # [0,1] 46 | 47 | same_img = torch.FloatTensor(same_img) 48 | 49 | return {'p0': p0_img, 'p1': p1_img, 'same': same_img, 50 | 'p0_path': p0_path, 'p1_path': p1_path, 'same_path': same_path} 51 | 52 | def __len__(self): 53 | return len(self.p0_paths) 54 | -------------------------------------------------------------------------------- /SSIM/compute_market.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from IPython import embed 4 | from util import util 5 | import models.dist_model as dm 6 | import numpy as np 7 | import torch 8 | from PIL import Image 9 | 10 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 11 | parser.add_argument('--dir', type=str, default='../Market/pytorch/train_all') 12 | opt = parser.parse_args() 13 | 14 | ## Initializing the model 15 | model = dm.DistModel() 16 | model.initialize(model='net-lin',net='vgg',use_gpu=True) 17 | #model.initialize(model='ssim',use_gpu=True) 18 | 19 | score = 0 20 | num = 0 21 | 22 | score_max = 0 23 | score_min = 1 24 | 25 | for ii in range(10): 26 | for subdir in os.listdir(opt.dir): 27 | subdir = opt.dir + '/'+ subdir 28 | count = 0 29 | all_file = os.listdir(subdir) 30 | for i in range(20): 31 | randp = np.random.permutation(len(all_file)) 32 | rand1 = randp[0] 33 | rand2 = randp[1] 34 | # Load images 35 | img0 = util.im2tensor(util.load_image(os.path.join(subdir,all_file[rand1]))) # RGB image from [-1,1] 36 | img1 = util.im2tensor(util.load_image(os.path.join(subdir,all_file[rand2]))) 37 | # mask = torch.zeros(img0.shape) 38 | # mask[:, :, round((224/192)* 60):round((224/192) * 140), round((224/192) * 32):round((224/192)* 168)] = 1 39 | # img0 = img0 * mask 40 | # img1 = img1 * mask 41 | # Compute distance 42 | # dist01 = model.forward(img0[:, :, 92 :144, 48 :172], 43 | # img1[:, :, 92 :144, 48 :172]) # celebA 44 | 45 | #dist01 = model.forward(img0[:, :, 68:104, 48:154], 46 | # img1[:, :, 68:104, 48:154]) # lfw 47 | 48 | dist01 = model.forward(img0[:, :, 48:78, 24:96], 49 | img1[:, :, 48:78, 24:96]) # meglass 50 | #dist01 = model.forward(img0,img1) 51 | num +=1 52 | score +=dist01 53 | print('%d::%.6f'%(ii,score/num)) 54 | if score/num >score_max: 55 | score_max = score/num 56 | if score/num 0: 21 | with self.doc.head: 22 | meta(http_equiv="reflesh", content=str(reflesh)) 23 | 24 | def get_image_dir(self): 25 | return self.img_dir 26 | 27 | def add_header(self, str): 28 | with self.doc: 29 | h3(str) 30 | 31 | def add_table(self, border=1): 32 | self.t = table(border=border, style="table-layout: fixed;") 33 | self.doc.add(self.t) 34 | 35 | def add_images(self, ims, txts, links, width=400): 36 | self.add_table() 37 | with self.t: 38 | with tr(): 39 | for im, txt, link in zip(ims, txts, links): 40 | with td(style="word-wrap: break-word;", halign="center", valign="top"): 41 | with p(): 42 | with a(href=os.path.join(link)): 43 | img(style="width:%dpx" % width, src=os.path.join(im)) 44 | br() 45 | p(txt) 46 | 47 | def save(self,file='index'): 48 | html_file = '%s/%s.html' % (self.web_dir,file) 49 | f = open(html_file, 'wt') 50 | f.write(self.doc.render()) 51 | f.close() 52 | 53 | 54 | if __name__ == '__main__': 55 | html = HTML('web/', 'test_html') 56 | html.add_header('hello world') 57 | 58 | ims = [] 59 | txts = [] 60 | links = [] 61 | for n in range(4): 62 | ims.append('image_%d.png' % n) 63 | txts.append('text_%d' % n) 64 | links.append('image_%d.png' % n) 65 | html.add_images(ims, txts, links) 66 | html.save() 67 | -------------------------------------------------------------------------------- /celeba_glass.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # !/usr/bin/env python3 3 | 4 | ''' 5 | Divide face in accordance with CelebA Attr type. 6 | ''' 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import shutil 13 | import os 14 | 15 | output_path = "/home/bingwen/Dataset/CelebA/" 16 | image_path = "/home/bingwen/Dataset/CelebA/img_align_celeba" 17 | CelebA_Attr_file = "/home/bingwen/Dataset/CelebA/list_attr_celeba.txt" 18 | Attr_type = 16 # Eyeglasses 19 | 20 | 21 | def main(): 22 | '''Divide face accordance CelebA Attr eyeglasses label.''' 23 | trainA_dir = os.path.join(output_path, "trainA") 24 | trainB_dir = os.path.join(output_path, "trainB") 25 | if not os.path.isdir(trainA_dir): 26 | os.makedirs(trainA_dir) 27 | if not os.path.isdir(trainB_dir): 28 | os.makedirs(trainB_dir) 29 | 30 | not_found_txt = open(os.path.join(output_path, "not_found_img.txt"), "w") 31 | 32 | count_A = 0 33 | count_B = 0 34 | count_N = 0 35 | 36 | with open(CelebA_Attr_file, "r") as Attr_file: 37 | Attr_info = Attr_file.readlines() 38 | Attr_info = Attr_info[2:] 39 | index = 0 40 | for line in Attr_info: 41 | index += 1 42 | info = line.split() 43 | filename = info[0] 44 | filepath_old = os.path.join(image_path, filename) 45 | if os.path.isfile(filepath_old): 46 | if int(info[Attr_type]) == 1: 47 | filepath_new = os.path.join(trainA_dir, filename) 48 | shutil.copyfile(filepath_old, filepath_new) 49 | count_A += 1 50 | else: 51 | filepath_new = os.path.join(trainB_dir, filename) 52 | shutil.copyfile(filepath_old, filepath_new) 53 | count_B += 1 54 | print("%d: success for copy %s -> %s" % (index, info[Attr_type], filepath_new)) 55 | else: 56 | print("%d: not found %s\n" % (index, filepath_old)) 57 | not_found_txt.write(line) 58 | count_N += 1 59 | 60 | not_found_txt.close() 61 | 62 | print("TrainA have %d images!" % count_A) 63 | print("TrainB have %d images!" % count_B) 64 | print("Not found %d images!" % count_N) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /SSIM/data/image_folder.py: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Code from 3 | # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py 4 | # Modified the original code so that it also loads images from the current 5 | # directory as well as the subdirectories 6 | ################################################################################ 7 | 8 | import torch.utils.data as data 9 | 10 | from PIL import Image 11 | import os 12 | import os.path 13 | 14 | IMG_EXTENSIONS = [ 15 | '.jpg', '.JPG', '.jpeg', '.JPEG', 16 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 17 | ] 18 | 19 | NP_EXTENSIONS = ['.npy',] 20 | 21 | def is_image_file(filename, mode='img'): 22 | if(mode=='img'): 23 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 24 | elif(mode=='np'): 25 | return any(filename.endswith(extension) for extension in NP_EXTENSIONS) 26 | 27 | def make_dataset(dirs, mode='img'): 28 | if(not isinstance(dirs,list)): 29 | dirs = [dirs,] 30 | 31 | images = [] 32 | for dir in dirs: 33 | assert os.path.isdir(dir), '%s is not a valid directory' % dir 34 | for root, _, fnames in sorted(os.walk(dir)): 35 | for fname in fnames: 36 | if is_image_file(fname, mode=mode): 37 | path = os.path.join(root, fname) 38 | images.append(path) 39 | 40 | # print("Found %i images in %s"%(len(images),root)) 41 | return images 42 | 43 | def default_loader(path): 44 | return Image.open(path).convert('RGB') 45 | 46 | class ImageFolder(data.Dataset): 47 | def __init__(self, root, transform=None, return_paths=False, 48 | loader=default_loader): 49 | imgs = make_dataset(root) 50 | if len(imgs) == 0: 51 | raise(RuntimeError("Found 0 images in: " + root + "\n" 52 | "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) 53 | 54 | self.root = root 55 | self.imgs = imgs 56 | self.transform = transform 57 | self.return_paths = return_paths 58 | self.loader = loader 59 | 60 | def __getitem__(self, index): 61 | path = self.imgs[index] 62 | img = self.loader(path) 63 | if self.transform is not None: 64 | img = self.transform(img) 65 | if self.return_paths: 66 | return img, path 67 | else: 68 | return img 69 | 70 | def __len__(self): 71 | return len(self.imgs) 72 | -------------------------------------------------------------------------------- /SSIM/data/dataset/twoafc_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import torchvision.transforms as transforms 3 | from data.dataset.base_dataset import BaseDataset 4 | from data.image_folder import make_dataset 5 | from PIL import Image 6 | import numpy as np 7 | import torch 8 | # from IPython import embed 9 | 10 | class TwoAFCDataset(BaseDataset): 11 | def initialize(self, dataroots, load_size=64): 12 | if(not isinstance(dataroots,list)): 13 | dataroots = [dataroots,] 14 | self.roots = dataroots 15 | self.load_size = load_size 16 | 17 | # image directory 18 | self.dir_ref = [os.path.join(root, 'ref') for root in self.roots] 19 | self.ref_paths = make_dataset(self.dir_ref) 20 | self.ref_paths = sorted(self.ref_paths) 21 | 22 | self.dir_p0 = [os.path.join(root, 'p0') for root in self.roots] 23 | self.p0_paths = make_dataset(self.dir_p0) 24 | self.p0_paths = sorted(self.p0_paths) 25 | 26 | self.dir_p1 = [os.path.join(root, 'p1') for root in self.roots] 27 | self.p1_paths = make_dataset(self.dir_p1) 28 | self.p1_paths = sorted(self.p1_paths) 29 | 30 | transform_list = [] 31 | transform_list.append(transforms.Scale(load_size)) 32 | transform_list += [transforms.ToTensor(), 33 | transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))] 34 | 35 | self.transform = transforms.Compose(transform_list) 36 | 37 | # judgement directory 38 | self.dir_J = [os.path.join(root, 'judge') for root in self.roots] 39 | self.judge_paths = make_dataset(self.dir_J,mode='np') 40 | self.judge_paths = sorted(self.judge_paths) 41 | 42 | def __getitem__(self, index): 43 | p0_path = self.p0_paths[index] 44 | p0_img_ = Image.open(p0_path).convert('RGB') 45 | p0_img = self.transform(p0_img_) 46 | 47 | p1_path = self.p1_paths[index] 48 | p1_img_ = Image.open(p1_path).convert('RGB') 49 | p1_img = self.transform(p1_img_) 50 | 51 | ref_path = self.ref_paths[index] 52 | ref_img_ = Image.open(ref_path).convert('RGB') 53 | ref_img = self.transform(ref_img_) 54 | 55 | judge_path = self.judge_paths[index] 56 | # judge_img = (np.load(judge_path)*2.-1.).reshape((1,1,1,)) # [-1,1] 57 | judge_img = np.load(judge_path).reshape((1,1,1,)) # [0,1] 58 | 59 | judge_img = torch.FloatTensor(judge_img) 60 | 61 | return {'p0': p0_img, 'p1': p1_img, 'ref': ref_img, 'judge': judge_img, 62 | 'p0_path': p0_path, 'p1_path': p1_path, 'ref_path': ref_path, 'judge_path': judge_path} 63 | 64 | def __len__(self): 65 | return len(self.p0_paths) 66 | -------------------------------------------------------------------------------- /SSIM/test_dataset_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from models import dist_model as dm 3 | from data import data_loader as dl 4 | import argparse 5 | from IPython import embed 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--dataset_mode', type=str, default='2afc', help='[2afc,jnd]') 9 | parser.add_argument('--datasets', type=str, nargs='+', default=['val/traditional','val/cnn','val/superres','val/deblur','val/color','val/frameinterp'], help='datasets to test - for jnd mode: [val/traditional],[val/cnn]; for 2afc mode: [train/traditional],[train/cnn],[train/mix],[val/traditional],[val/cnn],[val/color],[val/deblur],[val/frameinterp],[val/superres]') 10 | parser.add_argument('--model', type=str, default='net-lin', help='distance model type [net-lin] for linearly calibrated net, [net] for off-the-shelf network, [l2] for euclidean distance, [ssim] for Structured Similarity Image Metric') 11 | parser.add_argument('--net', type=str, default='alex', help='[squeeze], [alex], or [vgg] for network architectures') 12 | parser.add_argument('--colorspace', type=str, default='Lab', help='[Lab] or [RGB] for colorspace to use for l2, ssim model types') 13 | parser.add_argument('--batch_size', type=int, default=50, help='batch size to test image patches in') 14 | parser.add_argument('--use_gpu', action='store_true', help='turn on flag to use GPU') 15 | parser.add_argument('--model_path', type=str, default=None, help='location of model, will default to ./weights/v[version]/[net_name].pth') 16 | 17 | parser.add_argument('--from_scratch', action='store_true', help='model was initialized from scratch') 18 | parser.add_argument('--train_trunk', action='store_true', help='model trunk was trained/tuned') 19 | parser.add_argument('--version', type=str, default='0.1', help='v0.1 is latest, v0.0 was original release') 20 | 21 | opt = parser.parse_args() 22 | if(opt.model in ['l2','ssim']): 23 | opt.batch_size = 1 24 | 25 | # initialize model 26 | model = dm.DistModel() 27 | # model.initialize(model=opt.model,net=opt.net,colorspace=opt.colorspace,model_path=opt.model_path,use_gpu=opt.use_gpu) 28 | model.initialize(model=opt.model,net=opt.net,colorspace=opt.colorspace,model_path=opt.model_path,use_gpu=opt.use_gpu, pnet_rand=opt.from_scratch, pnet_tune=opt.train_trunk, version=opt.version) 29 | 30 | if(opt.model in ['net-lin','net']): 31 | print('Testing model [%s]-[%s]'%(opt.model,opt.net)) 32 | elif(opt.model in ['l2','ssim']): 33 | print('Testing model [%s]-[%s]'%(opt.model,opt.colorspace)) 34 | 35 | # embed() 36 | # initialize data loader 37 | for dataset in opt.datasets: 38 | data_loader = dl.CreateDataLoader(dataset,dataset_mode=opt.dataset_mode, batch_size=opt.batch_size) 39 | 40 | # evaluate model on data 41 | if(opt.dataset_mode=='2afc'): 42 | (score, results_verbose) = dm.score_2afc_dataset(data_loader,model.forward) 43 | elif(opt.dataset_mode=='jnd'): 44 | (score, results_verbose) = dm.score_jnd_dataset(data_loader,model.forward) 45 | 46 | # print results 47 | print(' Dataset [%s]: %.2f'%(dataset,100.*score)) 48 | 49 | -------------------------------------------------------------------------------- /TTUR-master/BEGAN_FID_batched/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import math 5 | import json 6 | import logging 7 | import numpy as np 8 | from PIL import Image 9 | from datetime import datetime 10 | 11 | def prepare_dirs_and_logger(config): 12 | formatter = logging.Formatter("%(asctime)s:%(levelname)s::%(message)s") 13 | logger = logging.getLogger() 14 | 15 | for hdlr in logger.handlers: 16 | logger.removeHandler(hdlr) 17 | 18 | handler = logging.StreamHandler() 19 | handler.setFormatter(formatter) 20 | 21 | logger.addHandler(handler) 22 | 23 | if config.load_checkpoint: 24 | #if config.load_path.startswith(config.log_dir): 25 | # config.model_dir = config.load_path 26 | #else: 27 | # if config.load_path.startswith(config.dataset): 28 | # config.model_name = config.load_path 29 | # else: 30 | config.model_name = config.checkpoint_name 31 | else: 32 | config.model_name = "%s_%.6f_%.6f_%s" % (get_time(), config.d_lr, config.g_lr, config.update_k) 33 | 34 | if not hasattr(config, 'model_dir'): 35 | config.model_dir = os.path.join(config.log_dir, config.model_name) 36 | 37 | config.data_path = os.path.join(config.data_dir, config.dataset) 38 | 39 | for path in [config.log_dir, config.data_dir, config.model_dir]: 40 | if not os.path.exists(path): 41 | os.makedirs(path) 42 | 43 | def get_time(): 44 | return datetime.now().strftime("%m%d_%H%M%S") 45 | 46 | def save_config(config): 47 | param_path = os.path.join(config.model_dir, "params.json") 48 | 49 | print("[*] MODEL dir: %s" % config.model_dir) 50 | print("[*] PARAM path: %s" % param_path) 51 | 52 | with open(param_path, 'w') as fp: 53 | json.dump(config.__dict__, fp, indent=4, sort_keys=True) 54 | 55 | def rank(array): 56 | return len(array.shape) 57 | 58 | def make_grid(tensor, nrow=8, padding=2, 59 | normalize=False, scale_each=False): 60 | """Code based on https://github.com/pytorch/vision/blob/master/torchvision/utils.py""" 61 | nmaps = tensor.shape[0] 62 | xmaps = min(nrow, nmaps) 63 | ymaps = int(math.ceil(float(nmaps) / xmaps)) 64 | height, width = int(tensor.shape[1] + padding), int(tensor.shape[2] + padding) 65 | grid = np.zeros([height * ymaps + 1 + padding // 2, width * xmaps + 1 + padding // 2, 3], dtype=np.uint8) 66 | k = 0 67 | for y in range(ymaps): 68 | for x in range(xmaps): 69 | if k >= nmaps: 70 | break 71 | h, h_width = y * height + 1 + padding // 2, height - padding 72 | w, w_width = x * width + 1 + padding // 2, width - padding 73 | 74 | grid[h:h+h_width, w:w+w_width] = tensor[k] 75 | k = k + 1 76 | return grid 77 | 78 | def save_image(tensor, filename, nrow=8, padding=2, 79 | normalize=False, scale_each=False): 80 | ndarr = make_grid(tensor, nrow=nrow, padding=padding, 81 | normalize=normalize, scale_each=scale_each) 82 | im = Image.fromarray(ndarr) 83 | im.save(filename) 84 | -------------------------------------------------------------------------------- /lfw_pad.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2018 NVIDIA Corporation. All rights reserved. 3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 4 | """ 5 | from __future__ import print_function 6 | from utils import get_config, get_data_loader_folder, pytorch03_to_pytorch04, load_inception 7 | from trainer import ERGAN_Trainer, UNIT_Trainer 8 | #from network import AdaINGen, MsImageDis, VAEGen 9 | from torch import nn 10 | from scipy.stats import entropy 11 | import torch.nn.functional as F 12 | import argparse 13 | from torch.autograd import Variable 14 | from data import ImageFolder 15 | import numpy as np 16 | import torchvision.utils as vutils 17 | try: 18 | from itertools import izip as zip 19 | except ImportError: # will be 3.x series 20 | pass 21 | import sys 22 | import torch 23 | import os 24 | from PIL import Image 25 | import torch.nn.functional as F 26 | 27 | parser = argparse.ArgumentParser() 28 | 29 | parser.add_argument('--A', type=str, default = '/home/bingwen/MUNIT-master/datasets/lfw_224_pad', help="input image folder A") 30 | #parser.add_argument('--B', type=str, default = '/home/bingwen/MUNIT-master/datasets/LFW/trainB', help="input image folder B") 31 | 32 | opts = parser.parse_args() 33 | 34 | output_folder1 = os.path.abspath('/home/bingwen/MUNIT-master/datasets/lfw_224_JPG') 35 | #output_folder2 = os.path.abspath('/home/bingwen/MUNIT-master/datasets/LFW/trainB_pad') 36 | 37 | if not os.path.exists(output_folder1): 38 | os.makedirs(output_folder1) 39 | 40 | data_loader_a = get_data_loader_folder(opts.A, 1, False, new_size=224, height=224, width=224,crop=False) 41 | #data_loader_b = get_data_loader_folder(opts.B, 1, False, new_size=224, height=224, width=224,crop=False) 42 | imagea_names = ImageFolder(opts.A, transform=None, return_paths=True) 43 | #imageb_names = ImageFolder(opts.B, transform=None, return_paths=True) 44 | 45 | def flip_lr(img): 46 | '''flip horizontal''' 47 | inv_idx = torch.arange(img.size(3)-1,-1,-1).long() # N x C x H x W 48 | img_flip = img.index_select(3,inv_idx) 49 | return img_flip 50 | 51 | def recover(inp): 52 | """Imshow for Tensor.""" 53 | inp = inp.numpy().transpose((1, 2, 0)) 54 | mean = np.array([0.485, 0.456, 0.406]) 55 | std = np.array([0.229, 0.224, 0.225]) 56 | inp = std * inp + mean 57 | inp = inp * 255.0 58 | inp = np.clip(inp, 0, 255) 59 | return inp 60 | 61 | for i, (images_a, imagea_names) in enumerate(zip(data_loader_a, imagea_names)): 62 | 63 | #basename_b = os.path.basename(imageb_names[1]) 64 | basename_a = os.path.basename(imagea_names[1]) 65 | 66 | images_a = images_a.cuda() 67 | #images_b = images_b.cuda() 68 | images_a = F.pad(images_a, (0, 0, 24, -24), mode='reflect') 69 | #images_b = F.pad(images_b, (0, 0, 24, -24), mode='reflect') 70 | 71 | (name_a, extention_a) = os.path.splitext(basename_a) 72 | #(name_b, extention_b) = os.path.splitext(basename_b) 73 | print(i+1) 74 | vutils.save_image(images_a.data, os.path.join(output_folder1, name_a +'.jpg'), padding=0, normalize=True) 75 | #vutils.save_image(images_b.data, os.path.join(output_folder2, name_b +'.png'), padding=0, normalize=True) 76 | 77 | else: 78 | pass 79 | 80 | 81 | -------------------------------------------------------------------------------- /meglass_split.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # !/usr/bin/env python3 3 | 4 | ''' 5 | Divide face accordance MeGlass Attr type. 6 | ''' 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import shutil 13 | import os 14 | import pdb 15 | 16 | output_path_train = "/home/bingwen/ERGAN-Pytorch/datasets/MeGlass" 17 | 18 | image_path_A = "/home/bingwen/ERGAN-Pytorch/datasets/MeGlass/trainA" 19 | image_path_B = "/home/bingwen/ERGAN-Pytorch/datasets/MeGlass/trainB" 20 | 21 | gallery_glass = "/home/bingwen/ERGAN-Pytorch/datasets/MeGlass/gallery_black_glass.txt" 22 | gallery_no_glass = "/home/bingwen/ERGAN-Pytorch/datasets/MeGlass/gallery_no_glass.txt" 23 | probe_glass = "/home/bingwen/ERGAN-Pytorch/datasets/MeGlass/probe_black_glass.txt" 24 | probe_no_glass = "/home/bingwen/ERGAN-Pytorch/datasets/MeGlass/probe_no_glass.txt" 25 | 26 | def main(): 27 | 28 | gallery_glass_dir = os.path.join(output_path_train, "gallery_glass") 29 | gallery_no_glass_dir = os.path.join(output_path_train, "gallery_no_glass") 30 | probe_glass_dir = os.path.join(output_path_train, "probe_glass") 31 | probe_no_glass_dir = os.path.join(output_path_train, "probe_no_glass") 32 | 33 | dirs = [gallery_glass_dir, gallery_no_glass_dir, probe_glass_dir, probe_no_glass_dir] 34 | 35 | for d in dirs: 36 | os.makedirs(d) 37 | 38 | count_A = 0 39 | count_B = 0 40 | count_C = 0 41 | count_D = 0 42 | 43 | with open(gallery_glass, "r") as Attr_file: 44 | Attr_info = Attr_file.readlines() 45 | index = 0 46 | for line in Attr_info: 47 | index += 1 48 | info = line.split() 49 | img = info[0] 50 | img_path_old = os.path.join(image_path_B, img) 51 | shutil.copy(img_path_old, gallery_glass_dir) 52 | count_A += 1 53 | 54 | with open(gallery_no_glass, "r") as Attr_file: 55 | Attr_info = Attr_file.readlines() 56 | index = 0 57 | for line in Attr_info: 58 | index += 1 59 | info = line.split() 60 | img = info[0] 61 | img_path_old = os.path.join(image_path_A, img) 62 | shutil.copy(img_path_old, gallery_no_glass_dir) 63 | count_B += 1 64 | 65 | with open(probe_glass, "r") as Attr_file: 66 | Attr_info = Attr_file.readlines() 67 | index = 0 68 | for line in Attr_info: 69 | index += 1 70 | info = line.split() 71 | img = info[0] 72 | img_path_old = os.path.join(image_path_B, img) 73 | shutil.copy(img_path_old, probe_glass_dir) 74 | count_C += 1 75 | 76 | with open(probe_no_glass, "r") as Attr_file: 77 | Attr_info = Attr_file.readlines() 78 | index = 0 79 | for line in Attr_info: 80 | index += 1 81 | info = line.split() 82 | img = info[0] 83 | img_path_old = os.path.join(image_path_A, img) 84 | shutil.copy(img_path_old, probe_no_glass_dir) 85 | count_D += 1 86 | 87 | print("gallery_glass have %d images!" % count_A) 88 | print("gallery_no_glass have %d images!" % count_B) 89 | print("probe_glass have %d images!" % count_C) 90 | print("probe_no_glass have %d images!" % count_D) 91 | 92 | if __name__ == "__main__": 93 | main() -------------------------------------------------------------------------------- /configs/munit.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr = 0.00005 4 | # logger options 5 | image_save_iter: 2000 # How often do you want to save output images during training 6 | image_display_iter: 1000 # How often do you want to display output images during training 7 | display_size: 32 # How many images do you want to display each time 8 | snapshot_save_iter: 2000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 1000000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0.5 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr: 0.0001 # initial learning rate 19 | lr_policy: step # learning rate scheduler 20 | step_size: 100000 # how often to decay learning rate 21 | gamma: 0.5 # how much to decay learning rate 22 | gan_w: 0.5 # weight of adversarial loss 23 | recon_x_w: 10 # weight of image reconstruction loss 24 | recon_kl_w: 0.01 # weight of KL loss for reconstruction 25 | recon_s_w: 1 # weight of style reconstruction loss 26 | recon_c_w: 1 # weight of content reconstruction loss 27 | recon_x_cyc_w: 1 # weight of explicit style augmented cycle consistency loss 28 | recon_kl_cyc_w: 0.01 #weight of KL loss for cycle consistency 29 | vgg_w: 0 # weight of domain-invariant perceptual loss 30 | 31 | # model options 32 | gen: 33 | dim: 32 # number of filters in the bottommost layer 34 | mlp_dim: 256 # number of filters in MLP 35 | style_dim: 256 # length of style code 36 | activ: relu # activation function [relu/lrelu/prelu/selu/tanh] 37 | n_downsample: 2 # number of downsampling layers in content encoder 38 | n_res: 4 # number of residual blocks in content encoder/decoder 39 | pad_type: reflect # padding type [zero/reflect] 40 | new_size: 224 # first resize the shortest image side to this size 41 | dis: 42 | dim: 32 # number of filters in the bottommost layer 43 | norm: none # normalization layer [none/bn/in/ln] 44 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 45 | n_layer: 4 # number of layers in D 46 | gan_type: lsgan # GAN loss [lsgan/nsgan] 47 | num_scales: 3 # number of scales 48 | pad_type: reflect # padding type [zero/reflect] 49 | 50 | # data options 51 | input_dim_a: 3 # number of image channels [1/3] 52 | input_dim_b: 3 # number of image channels [1/3] 53 | num_workers: 8 # number of data loading threads 54 | new_size: 224 # first resize the shortest image side to this size 55 | crop_image_height: 160 # random crop image of this height 56 | crop_image_width: 160 # random crop image of this width 57 | data_root: ./datasets/celebA/ # dataset folder location -------------------------------------------------------------------------------- /SSIM/imgs/example_dists.txt: -------------------------------------------------------------------------------- 1 | 1268_c6s3_066392_00.jpg: 0.409462 2 | 1268_c5s3_043415_00.jpg: 0.370679 3 | 1268_c3s3_025178_00.jpg: 0.377613 4 | 1268_c1s5_069416_00.jpg: 0.359672 5 | 0310_c1s1_070081_00.jpg: 0.375336 6 | 0310_c3s1_069792_00.jpg: 0.412537 7 | 0310_c5s1_070198_00.jpg: 0.257525 8 | 1387_c2s3_040657_00.jpg: 0.345342 9 | 1387_c3s3_047128_00.jpg: 0.341983 10 | 1387_c1s6_003616_00.jpg: 0.368656 11 | 0950_c3s2_119344_00.jpg: 0.341993 12 | 0950_c5s2_120299_00.jpg: 0.346218 13 | 0950_c6s2_114093_00.jpg: 0.399643 14 | 0813_c5s2_098877_00.jpg: 0.564771 15 | 0813_c4s4_037785_00.jpg: 0.241140 16 | 0813_c3s2_098128_00.jpg: 0.383537 17 | 0813_c1s4_037806_00.jpg: 0.393976 18 | 1394_c5s3_048415_00.jpg: 0.282432 19 | 1394_c6s3_071892_00.jpg: 0.384249 20 | 1394_c1s5_074291_00.jpg: 0.457967 21 | 1394_c3s3_048203_00.jpg: 0.426802 22 | 0153_c1s1_025901_00.jpg: 0.435372 23 | 0153_c4s1_025426_00.jpg: 0.511811 24 | 0538_c1s3_005496_00.jpg: 0.375202 25 | 0538_c2s1_152691_00.jpg: 0.345056 26 | 0538_c3s1_152808_00.jpg: 0.326725 27 | 0538_c5s1_152995_00.jpg: 0.429904 28 | 0747_c5s3_074212_00.jpg: 0.351172 29 | 0747_c6s4_000377_00.jpg: 0.405755 30 | 0747_c3s3_073894_00.jpg: 0.367626 31 | 0438_c1s2_056446_00.jpg: 0.316888 32 | 0438_c3s1_107092_00.jpg: 0.374331 33 | 1020_c1s4_071011_00.jpg: 0.450040 34 | 1020_c2s2_128977_00.jpg: 0.318594 35 | 1020_c5s2_131974_00.jpg: 0.358469 36 | 1020_c6s2_124968_00.jpg: 0.377623 37 | 0220_c2s1_046501_00.jpg: 0.486279 38 | 0220_c5s1_046776_00.jpg: 0.403562 39 | 0220_c3s1_046101_00.jpg: 0.384795 40 | 0220_c1s1_046976_00.jpg: 0.336192 41 | 0951_c3s2_119419_00.jpg: 0.365312 42 | 0951_c6s2_113668_00.jpg: 0.429206 43 | 0951_c1s4_059161_00.jpg: 0.365047 44 | 0511_c5s1_140795_00.jpg: 0.369904 45 | 0511_c1s2_067821_00.jpg: 0.457426 46 | 1462_c6s3_082142_00.jpg: 0.410175 47 | 1462_c3s3_058503_00.jpg: 0.360401 48 | 0265_c6s1_056276_00.jpg: 0.343683 49 | 0265_c1s1_056206_00.jpg: 0.461380 50 | 0265_c5s1_056273_00.jpg: 0.351248 51 | 0119_c4s1_020501_00.jpg: 0.214391 52 | 0119_c1s1_020626_00.jpg: 0.262322 53 | 1403_c1s6_024196_00.jpg: 0.522994 54 | 1403_c6s4_000002_00.jpg: 0.468204 55 | 1403_c3s3_073069_00.jpg: 0.424320 56 | 1403_c2s3_066702_00.jpg: 0.301243 57 | 1302_c3s3_093369_00.jpg: 0.338897 58 | 1302_c6s4_019777_00.jpg: 0.337023 59 | 1302_c1s6_044721_00.jpg: 0.390457 60 | 0944_c2s2_115552_00.jpg: 0.330748 61 | 0944_c1s4_058061_00.jpg: 0.311505 62 | 0040_c4s1_003801_00.jpg: 0.328948 63 | 0017_c2s1_000976_00.jpg: 0.350756 64 | 0017_c4s1_002051_00.jpg: 0.396924 65 | 0827_c1s4_044506_00.jpg: 0.323459 66 | 0827_c6s2_099418_00.jpg: 0.346720 67 | 0827_c2s2_101732_00.jpg: 0.329449 68 | 0827_c5s2_105652_00.jpg: 0.242451 69 | 1310_c6s3_056667_00.jpg: 0.361216 70 | 1310_c5s3_033440_00.jpg: 0.431048 71 | 1310_c1s5_059341_00.jpg: 0.368947 72 | 1054_c3s2_138419_00.jpg: 0.406628 73 | 1054_c1s5_038591_00.jpg: 0.427551 74 | 1054_c2s2_135127_00.jpg: 0.347783 75 | 1054_c6s3_000342_00.jpg: 0.426075 76 | 0286_c6s1_063151_00.jpg: 0.412983 77 | 0286_c1s1_062956_00.jpg: 0.474810 78 | 0286_c3s1_062567_00.jpg: 0.460199 79 | 0286_c5s1_067598_00.jpg: 0.345743 80 | 0634_c1s6_024346_00.jpg: 0.408376 81 | 0634_c5s3_073387_00.jpg: 0.403219 82 | 0072_c1s1_011251_00.jpg: 0.412135 83 | 0072_c4s1_011526_00.jpg: 0.303925 84 | 0072_c3s1_010576_00.jpg: 0.453071 85 | 0072_c5s1_011251_00.jpg: 0.274215 86 | 0574_c1s3_024126_00.jpg: 0.409572 87 | 0574_c5s2_010155_00.jpg: 0.373737 88 | 0574_c3s2_009662_00.jpg: 0.320667 89 | 0133_c3s1_021626_00.jpg: 0.260459 90 | 0133_c6s1_022526_00.jpg: 0.407536 91 | 0133_c2s1_021701_00.jpg: 0.437009 92 | 0133_c4s1_042776_00.jpg: 0.383070 93 | 0133_c1s1_022276_00.jpg: 0.423213 94 | 1194_c6s3_032892_00.jpg: 0.439256 95 | -------------------------------------------------------------------------------- /configs/unit.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr = 0.00005 4 | # logger options 5 | image_save_iter: 2000 # How often do you want to save output images during training 6 | image_display_iter: 1000 # How often do you want to display output images during training 7 | display_size: 32 # How many images do you want to display each time 8 | snapshot_save_iter: 2000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 1000000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0.5 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr: 0.0001 # initial learning rate 19 | lr_policy: step # learning rate scheduler 20 | step_size: 100000 # how often to decay learning rate 21 | gamma: 0.5 # how much to decay learning rate 22 | gan_w: 1 # weight of adversarial loss 23 | recon_x_w: 10 # weight of image reconstruction loss 24 | recon_kl_w: 0.01 # weight of KL loss for reconstruction 25 | recon_s_w: 1 # weight of style reconstruction loss 26 | recon_c_w: 1 # weight of content reconstruction loss 27 | recon_x_cyc_w: 1 # weight of explicit style augmented cycle consistency loss 28 | recon_kl_cyc_w: 0.01 #weight of KL loss for cycle consistency 29 | vgg_w: 0 # weight of domain-invariant perceptual loss 30 | 31 | # model options 32 | gen: 33 | dim: 32 # number of filters in the bottommost layer 34 | mlp_dim: 256 # number of filters in MLP 35 | style_dim: 256 # length of style code 36 | activ: relu # activation function [relu/lrelu/prelu/selu/tanh] 37 | n_downsample: 2 # number of downsampling layers in content encoder 38 | n_res: 4 # number of residual blocks in content encoder/decoder 39 | pad_type: reflect # padding type [zero/reflect] 40 | new_size: 224 # first resize the shortest image side to this size 41 | dis: 42 | dim: 32 # number of filters in the bottommost layer 43 | norm: none # normalization layer [none/bn/in/ln] 44 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 45 | n_layer: 4 # number of layers in D 46 | gan_type: lsgan # GAN loss [lsgan/nsgan] 47 | num_scales: 3 # number of scales 48 | pad_type: reflect # padding type [zero/reflect] 49 | 50 | # data options 51 | input_dim_a: 3 # number of image channels [1/3] 52 | input_dim_b: 3 # number of image channels [1/3] 53 | num_workers: 8 # number of data loading threads 54 | new_size: 224 # first resize the shortest image side to this size 55 | crop_image_height: 160 # random crop image of this height 56 | crop_image_width: 160 # random crop image of this width 57 | data_root: ./datasets/celebA/ # dataset folder location -------------------------------------------------------------------------------- /configs/ablation/ab2_1.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr: 0.0001 lambda=0.01 4 | # logger options 5 | image_save_iter: 5000 # How often do you want to save output images during training 6 | image_display_iter: 5000 # How often do you want to display output images during training 7 | display_size: 32 # How many images do you want to display each time 8 | snapshot_save_iter: 5000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 400000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr_G: 0.0001 # initial G learning rate 19 | lr_D: 0.0001 # initial D learning rate 20 | lr_policy: step # learning rate scheduler 21 | step_size: 100000 # how often to decay learning rate 22 | step_size_cyc: 10000 23 | gamma: 0.5 # how much to decay learning rate 24 | gan_w: 1 # weight of adversarial loss 25 | recon_x_w: 10 # weight of image reconstruction loss 26 | recon_x_w_re: 0 27 | recon_s_w: 1 # weight of style reconstruction loss 28 | recon_c_w: 1 # weight of content reconstruction loss 29 | recon_x_cyc_w: 1 # weight of explicit style augmented cycle consistency loss 30 | vgg_w: 0 # weight of domain-invariant perceptual loss 31 | fp16: false 32 | # model options 33 | gen: 34 | dim: 42 # number of filters in the bottommost layer 35 | mlp_dim: 256 # number of filters in MLP 36 | style_dim: 256 # length of style code 37 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 38 | n_downsample: 2 # number of downsampling layers in content encoder 39 | n_res: 4 # number of residual blocks in content encoder/decoder 40 | pad_type: reflect # padding type [zero/reflect] 41 | new_size: 224 # first resize the shortest image side to this size 42 | style_name: 1 43 | 44 | dis: 45 | dim: 32 # number of filters in the bottommost layer 46 | norm: none # normalization layer [none/bn/in/ln] 47 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 48 | n_layer: 4 # number of layers in D 49 | gan_type: lsgan # GAN loss [lsgan/nsgan] 50 | num_scales: 3 # number of scales 51 | pad_type: reflect # padding type [zero/reflect] 52 | lambda: 0.01 # 1/0.01/ default=0 53 | 54 | # data options 55 | input_dim_a: 3 # number of image channels [1/3] 56 | input_dim_b: 3 # number of image channels [1/3] 57 | num_workers: 8 # number of data loading threads 58 | new_size: 224 # first resize the shortest image side to this size 59 | crop_image_height: 160 # random crop image of this height 60 | crop_image_width: 160 # random crop image of this width 61 | data_root: ./datasets/celebA/ # dataset folder location -------------------------------------------------------------------------------- /configs/ablation/ab2_4.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr: 0.0001 lambda=0.01 4 | # logger options 5 | image_save_iter: 5000 # How often do you want to save output images during training 6 | image_display_iter: 5000 # How often do you want to display output images during training 7 | display_size: 32 # How many images do you want to display each time 8 | snapshot_save_iter: 5000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 400000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr_G: 0.0001 # initial G learning rate 19 | lr_D: 0.0001 # initial D learning rate 20 | lr_policy: step # learning rate scheduler 21 | step_size: 100000 # how often to decay learning rate 22 | step_size_cyc: 10000 23 | gamma: 0.5 # how much to decay learning rate 24 | gan_w: 1 # weight of adversarial loss 25 | recon_x_w: 0 # weight of image reconstruction loss 26 | recon_x_w_re: 10 27 | recon_s_w: 1 # weight of style reconstruction loss 28 | recon_c_w: 1 # weight of content reconstruction loss 29 | recon_x_cyc_w: 1 # weight of explicit style augmented cycle consistency loss 30 | vgg_w: 0 # weight of domain-invariant perceptual loss 31 | fp16: false 32 | # model options 33 | gen: 34 | dim: 42 # number of filters in the bottommost layer 35 | mlp_dim: 256 # number of filters in MLP 36 | style_dim: 256 # length of style code 37 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 38 | n_downsample: 2 # number of downsampling layers in content encoder 39 | n_res: 4 # number of residual blocks in content encoder/decoder 40 | pad_type: reflect # padding type [zero/reflect] 41 | new_size: 224 # first resize the shortest image side to this size 42 | style_name: 1 43 | 44 | dis: 45 | dim: 32 # number of filters in the bottommost layer 46 | norm: none # normalization layer [none/bn/in/ln] 47 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 48 | n_layer: 4 # number of layers in D 49 | gan_type: lsgan # GAN loss [lsgan/nsgan] 50 | num_scales: 3 # number of scales 51 | pad_type: reflect # padding type [zero/reflect] 52 | lambda: 0.01 # 1/0.01/ default=0 53 | 54 | # data options 55 | input_dim_a: 3 # number of image channels [1/3] 56 | input_dim_b: 3 # number of image channels [1/3] 57 | num_workers: 8 # number of data loading threads 58 | new_size: 224 # first resize the shortest image side to this size 59 | crop_image_height: 160 # random crop image of this height 60 | crop_image_width: 160 # random crop image of this width 61 | data_root: ./datasets/celebA/ # dataset folder location -------------------------------------------------------------------------------- /configs/ablation/ab2_2.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr: 0.0001 lambda=0.01 4 | # logger options 5 | image_save_iter: 5000 # How often do you want to save output images during training 6 | image_display_iter: 5000 # How often do you want to display output images during training 7 | display_size: 32 # How many images do you want to display each time 8 | snapshot_save_iter: 5000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 400000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr_G: 0.0001 # initial G learning rate 19 | lr_D: 0.0001 # initial D learning rate 20 | lr_policy: step # learning rate scheduler 21 | step_size: 100000 # how often to decay learning rate 22 | step_size_cyc: 10000 23 | gamma: 0.5 # how much to decay learning rate 24 | gan_w: 1 # weight of adversarial loss 25 | recon_x_w: 10 # weight of image reconstruction loss 26 | recon_x_w_re: 10 27 | recon_s_w: 1 # weight of style reconstruction loss 28 | recon_c_w: 1 # weight of content reconstruction loss 29 | recon_x_cyc_w: 0 # weight of explicit style augmented cycle consistency loss 30 | vgg_w: 0 # weight of domain-invariant perceptual loss 31 | fp16: false 32 | # model options 33 | gen: 34 | dim: 42 # number of filters in the bottommost layer 35 | mlp_dim: 256 # number of filters in MLP 36 | style_dim: 256 # length of style code 37 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 38 | n_downsample: 2 # number of downsampling layers in content encoder 39 | n_res: 4 # number of residual blocks in content encoder/decoder 40 | pad_type: reflect # padding type [zero/reflect] 41 | new_size: 224 # first resize the shortest image side to this size 42 | style_name: 1 43 | 44 | dis: 45 | dim: 32 # number of filters in the bottommost layer 46 | norm: none # normalization layer [none/bn/in/ln] 47 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 48 | n_layer: 4 # number of layers in D 49 | gan_type: lsgan # GAN loss [lsgan/nsgan] 50 | num_scales: 3 # number of scales 51 | pad_type: reflect # padding type [zero/reflect] 52 | lambda: 0.01 # 1/0.01/ default=0 53 | 54 | # data options 55 | input_dim_a: 3 # number of image channels [1/3] 56 | input_dim_b: 3 # number of image channels [1/3] 57 | num_workers: 8 # number of data loading threads 58 | new_size: 224 # first resize the shortest image side to this size 59 | crop_image_height: 160 # random crop image of this height 60 | crop_image_width: 160 # random crop image of this width 61 | data_root: ./datasets/celebA/ # dataset folder location -------------------------------------------------------------------------------- /configs/ablation/ab2_3.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr: 0.0001 lambda=0.01 4 | # logger options 5 | image_save_iter: 5000 # How often do you want to save output images during training 6 | image_display_iter: 5000 # How often do you want to display output images during training 7 | display_size: 32 # How many images do you want to display each time 8 | snapshot_save_iter: 5000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 400000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr_G: 0.0001 # initial G learning rate 19 | lr_D: 0.0001 # initial D learning rate 20 | lr_policy: step # learning rate scheduler 21 | step_size: 100000 # how often to decay learning rate 22 | step_size_cyc: 10000 23 | gamma: 0.5 # how much to decay learning rate 24 | gan_w: 1 # weight of adversarial loss 25 | recon_x_w: 10 # weight of image reconstruction loss 26 | recon_x_w_re: 10 27 | recon_s_w: 1 # weight of style reconstruction loss 28 | recon_c_w: 0 # weight of content reconstruction loss 29 | recon_x_cyc_w: 1 # weight of explicit style augmented cycle consistency loss 30 | vgg_w: 0 # weight of domain-invariant perceptual loss 31 | fp16: false 32 | # model options 33 | gen: 34 | dim: 42 # number of filters in the bottommost layer 35 | mlp_dim: 256 # number of filters in MLP 36 | style_dim: 256 # length of style code 37 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 38 | n_downsample: 2 # number of downsampling layers in content encoder 39 | n_res: 4 # number of residual blocks in content encoder/decoder 40 | pad_type: reflect # padding type [zero/reflect] 41 | new_size: 224 # first resize the shortest image side to this size 42 | style_name: 1 43 | 44 | dis: 45 | dim: 32 # number of filters in the bottommost layer 46 | norm: none # normalization layer [none/bn/in/ln] 47 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 48 | n_layer: 4 # number of layers in D 49 | gan_type: lsgan # GAN loss [lsgan/nsgan] 50 | num_scales: 3 # number of scales 51 | pad_type: reflect # padding type [zero/reflect] 52 | lambda: 0.01 # 1/0.01/ default=0 53 | 54 | # data options 55 | input_dim_a: 3 # number of image channels [1/3] 56 | input_dim_b: 3 # number of image channels [1/3] 57 | num_workers: 8 # number of data loading threads 58 | new_size: 224 # first resize the shortest image side to this size 59 | crop_image_height: 160 # random crop image of this height 60 | crop_image_width: 160 # random crop image of this width 61 | data_root: ./datasets/celebA/ # dataset folder location -------------------------------------------------------------------------------- /configs/ablation/ab2_dual.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr: 0.0001 lambda=0.01 4 | # logger options 5 | image_save_iter: 5000 # How often do you want to save output images during training 6 | image_display_iter: 1000 # How often do you want to display output images during training 7 | display_size: 32 # How many images do you want to display each time 8 | snapshot_save_iter: 5000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 400000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr_G: 0.0001 # initial G learning rate 19 | lr_D: 0.0001 # initial D learning rate 20 | lr_policy: step # learning rate scheduler 21 | step_size: 100000 # how often to decay learning rate 22 | step_size_cyc: 10000 23 | gamma: 0.5 # how much to decay learning rate 24 | gan_w: 1 # weight of adversarial loss 25 | recon_x_w: 10 # weight of image reconstruction loss 26 | recon_x_w_re: 10 27 | recon_s_w: 0 # weight of style reconstruction loss 28 | recon_c_w: 1 # weight of content reconstruction loss 29 | recon_x_cyc_w: 1 # weight of explicit style augmented cycle consistency loss 30 | vgg_w: 0 # weight of domain-invariant perceptual loss 31 | fp16: false 32 | # model options 33 | gen: 34 | dim: 42 # number of filters in the bottommost layer 35 | mlp_dim: 256 # number of filters in MLP 36 | style_dim: 256 # length of style code 37 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 38 | n_downsample: 2 # number of downsampling layers in content encoder 39 | n_res: 4 # number of residual blocks in content encoder/decoder 40 | pad_type: reflect # padding type [zero/reflect] 41 | new_size: 224 # first resize the shortest image side to this size 42 | style_name: 1 43 | 44 | dis: 45 | dim: 32 # number of filters in the bottommost layer 46 | norm: none # normalization layer [none/bn/in/ln] 47 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 48 | n_layer: 4 # number of layers in D 49 | gan_type: lsgan # GAN loss [lsgan/nsgan] 50 | num_scales: 3 # number of scales 51 | pad_type: reflect # padding type [zero/reflect] 52 | lambda: 0.01 # 1/0.01/ default=0 53 | 54 | # data options 55 | input_dim_a: 3 # number of image channels [1/3] 56 | input_dim_b: 3 # number of image channels [1/3] 57 | num_workers: 8 # number of data loading threads 58 | new_size: 224 # first resize the shortest image side to this size 59 | crop_image_height: 160 # random crop image of this height 60 | crop_image_width: 160 # random crop image of this width 61 | data_root: ./datasets/celebA/ # dataset folder location -------------------------------------------------------------------------------- /configs/LFW.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr: 0.0001 lambda=0.01 4 | # logger options 5 | image_save_iter: 3000 # How often do you want to save output images during training 6 | image_display_iter: 1000 # How often do you want to display output images during training 7 | display_size: 16 # How many images do you want to display each time 8 | snapshot_save_iter: 3000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 600000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr_G: 0.0001 # initial G learning rate 19 | lr_D: 0.0001 # initial D learning rate 20 | lr_policy: step # learning rate scheduler 21 | step_size: 100000 # how often to decay learning rate 22 | step_size_cyc: 10000 23 | gamma: 0.5 # how much to decay learning rate 24 | gan_w: 1 # weight of adversarial loss 25 | recon_x_w: 10 # weight of image reconstruction loss 26 | recon_x_w_re: 20 27 | recon_x_w_res: 0 28 | recon_s_w: 0 # weight of style reconstruction loss 29 | recon_c_w: 1 # weight of content reconstruction loss 30 | recon_x_cyc_w: 1 # weight of explicit style augmented cycle consistency loss 31 | vgg_w: 0 # weight of domain-invariant perceptual loss 32 | fp16: false 33 | # model options 34 | gen: 35 | dim: 42 # number of filters in the bottommost layer 36 | mlp_dim: 256 # number of filters in MLP 37 | style_dim: 256 # length of style code 38 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 39 | n_downsample: 2 # number of downsampling layers in content encoder 40 | n_res: 4 # number of residual blocks in content encoder/decoder 41 | pad_type: reflect # padding type [zero/reflect] 42 | new_size: 224 # first resize the shortest image side to this size 43 | style_name: 1 44 | 45 | dis: 46 | dim: 32 # number of filters in the bottommost layer 47 | norm: none # normalization layer [none/bn/in/ln] 48 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 49 | n_layer: 4 # number of layers in D 50 | gan_type: lsgan # GAN loss [lsgan/nsgan] 51 | num_scales: 3 # number of scales 52 | pad_type: reflect # padding type [zero/reflect] 53 | lambda: 0.01 # 1/0.01/ default=0 54 | 55 | # data options 56 | input_dim_a: 3 # number of image channels [1/3] 57 | input_dim_b: 3 # number of image channels [1/3] 58 | num_workers: 8 # number of data loading threads 59 | new_size: 224 # first resize the shortest image side to this size 60 | crop_image_height: 224 # random crop image of this height # celeba 160 61 | crop_image_width: 224 # random crop image of this width 62 | data_root: ./datasets/LFW/ # dataset folder location -------------------------------------------------------------------------------- /configs/ablation/ab2.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr: 0.0001 lambda=0.01 4 | # logger options 5 | image_save_iter: 5000 # How often do you want to save output images during training 6 | image_display_iter: 1000 # How often do you want to display output images during training 7 | display_size: 32 # How many images do you want to display each time 8 | snapshot_save_iter: 5000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 400000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr_G: 0.0001 # initial G learning rate 19 | lr_D: 0.0001 # initial D learning rate 20 | lr_policy: step # learning rate scheduler 21 | step_size: 100000 # how often to decay learning rate 22 | step_size_cyc: 10000 23 | gamma: 0.5 # how much to decay learning rate 24 | gan_w: 1 # weight of adversarial loss 25 | recon_x_w: 10 # weight of image reconstruction loss 26 | recon_x_w_re: 10 27 | recon_x_w_res: 0 28 | recon_s_w: 0 # weight of style reconstruction loss 29 | recon_c_w: 1 # weight of content reconstruction loss 30 | recon_x_cyc_w: 1 # weight of explicit style augmented cycle consistency loss 31 | vgg_w: 0 # weight of domain-invariant perceptual loss 32 | fp16: false 33 | # model options 34 | gen: 35 | dim: 42 # number of filters in the bottommost layer 36 | mlp_dim: 256 # number of filters in MLP 37 | style_dim: 256 # length of style code 38 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 39 | n_downsample: 2 # number of downsampling layers in content encoder 40 | n_res: 4 # number of residual blocks in content encoder/decoder 41 | pad_type: reflect # padding type [zero/reflect] 42 | new_size: 224 # first resize the shortest image side to this size 43 | style_name: 1 44 | 45 | dis: 46 | dim: 32 # number of filters in the bottommost layer 47 | norm: none # normalization layer [none/bn/in/ln] 48 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 49 | n_layer: 4 # number of layers in D 50 | gan_type: lsgan # GAN loss [lsgan/nsgan] 51 | num_scales: 3 # number of scales 52 | pad_type: reflect # padding type [zero/reflect] 53 | lambda: 0.01 # 1/0.01/ default=0 54 | 55 | # data options 56 | input_dim_a: 3 # number of image channels [1/3] 57 | input_dim_b: 3 # number of image channels [1/3] 58 | num_workers: 8 # number of data loading threads 59 | new_size: 224 # first resize the shortest image side to this size 60 | crop_image_height: 160 # random crop image of this height 61 | crop_image_width: 160 # random crop image of this width 62 | data_root: ./datasets/celebA/ # dataset folder location -------------------------------------------------------------------------------- /configs/celeba.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr: 0.0001 lambda=0.01 4 | # logger options 5 | image_save_iter: 3000 # How often do you want to save output images during training 6 | image_display_iter: 1000 # How often do you want to display output images during training 7 | display_size: 16 # How many images do you want to display each time 8 | snapshot_save_iter: 3000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 600000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr_G: 0.0001 # initial G learning rate 19 | lr_D: 0.0001 # initial D learning rate 20 | lr_policy: step # learning rate scheduler 21 | step_size: 100000 # how often to decay learning rate 22 | step_size_cyc: 10000 23 | gamma: 0.5 # how much to decay learning rate 24 | gan_w: 1 # weight of adversarial loss 25 | recon_x_w: 10 # weight of image reconstruction loss 26 | recon_x_w_re: 10 27 | recon_x_w_res: 0.1 28 | recon_s_w: 0 # weight of style reconstruction loss 29 | recon_c_w: 1 # weight of content reconstruction loss 30 | recon_x_cyc_w: 1 # weight of explicit style augmented cycle consistency loss 31 | vgg_w: 0 # weight of domain-invariant perceptual loss 32 | fp16: false 33 | # model options 34 | gen: 35 | dim: 42 # number of filters in the bottommost layer 36 | mlp_dim: 256 # number of filters in MLP 37 | style_dim: 256 # length of style code 38 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 39 | n_downsample: 2 # number of downsampling layers in content encoder 40 | n_res: 4 # number of residual blocks in content encoder/decoder 41 | pad_type: reflect # padding type [zero/reflect] 42 | new_size: 224 # first resize the shortest image side to this size 43 | style_name: 1 44 | 45 | dis: 46 | dim: 32 # number of filters in the bottommost layer 47 | norm: none # normalization layer [none/bn/in/ln] 48 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 49 | n_layer: 4 # number of layers in D 50 | gan_type: lsgan # GAN loss [lsgan/nsgan] 51 | num_scales: 3 # number of scales 52 | pad_type: reflect # padding type [zero/reflect] 53 | lambda: 0.01 # 1/0.01/ default=0 54 | 55 | # data options 56 | input_dim_a: 3 # number of image channels [1/3] 57 | input_dim_b: 3 # number of image channels [1/3] 58 | num_workers: 8 # number of data loading threads 59 | new_size: 224 # first resize the shortest image side to this size 60 | crop_image_height: 160 # random crop image of this height # celeba 160 61 | crop_image_width: 160 # random crop image of this width 62 | data_root: ./datasets/celebA/ # dataset folder location -------------------------------------------------------------------------------- /configs/meglass.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr: 0.0001 lambda=0.01 4 | # logger options 5 | image_save_iter: 3000 # How often do you want to save output images during training 6 | image_display_iter: 1000 # How often do you want to display output images during training 7 | display_size: 16 # How many images do you want to display each time 8 | snapshot_save_iter: 3000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 600000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr_G: 0.0001 # initial G learning rate 19 | lr_D: 0.0001 # initial D learning rate 20 | lr_policy: step # learning rate scheduler 21 | step_size: 100000 # how often to decay learning rate 22 | step_size_cyc: 10000 23 | gamma: 0.5 # how much to decay learning rate 24 | gan_w: 1 # weight of adversarial loss 25 | recon_x_w: 10 # weight of image reconstruction loss 26 | recon_x_w_re: 10 27 | recon_x_w_res: 0.1 28 | recon_s_w: 0 # weight of style reconstruction loss 29 | recon_c_w: 1 # weight of content reconstruction loss 30 | recon_x_cyc_w: 1 # weight of explicit style augmented cycle consistency loss 31 | vgg_w: 0 # weight of domain-invariant perceptual loss 32 | fp16: false 33 | # model options 34 | gen: 35 | dim: 42 # number of filters in the bottommost layer 36 | mlp_dim: 256 # number of filters in MLP 37 | style_dim: 256 # length of style code 38 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 39 | n_downsample: 2 # number of downsampling layers in content encoder 40 | n_res: 4 # number of residual blocks in content encoder/decoder 41 | pad_type: reflect # padding type [zero/reflect] 42 | new_size: 224 # first resize the shortest image side to this size 43 | style_name: 1 44 | 45 | dis: 46 | dim: 32 # number of filters in the bottommost layer 47 | norm: none # normalization layer [none/bn/in/ln] 48 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 49 | n_layer: 4 # number of layers in D 50 | gan_type: lsgan # GAN loss [lsgan/nsgan] 51 | num_scales: 3 # number of scales 52 | pad_type: reflect # padding type [zero/reflect] 53 | lambda: 0.01 # 1/0.01/ default=0 54 | 55 | # data options 56 | input_dim_a: 3 # number of image channels [1/3] 57 | input_dim_b: 3 # number of image channels [1/3] 58 | num_workers: 8 # number of data loading threads 59 | new_size: 224 # first resize the shortest image side to this size 60 | crop_image_height: 120 # random crop image of this height # celeba 160 61 | crop_image_width: 120 # random crop image of this width 62 | data_root: ./datasets/MeGlass/ # dataset folder location -------------------------------------------------------------------------------- /configs/meglass_refine.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr: 0.0001 lambda=0.01 4 | # logger options 5 | image_save_iter: 2000 # How often do you want to save output images during training 6 | image_display_iter: 1000 # How often do you want to display output images during training 7 | display_size: 16 # How many images do you want to display each time 8 | snapshot_save_iter: 4000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 800000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr_G: 0.0001 # initial G learning rate 19 | lr_D: 0.0001 # initial D learning rate 20 | lr_policy: step # learning rate scheduler 21 | step_size: 100000 # how often to decay learning rate 22 | step_size_cyc: 10000 23 | gamma: 0.5 # how much to decay learning rate 24 | gan_w: 1 # weight of adversarial loss 25 | recon_x_w: 10 # weight of image reconstruction loss 26 | recon_x_w_re: 10 27 | recon_x_w_res: 0.1 28 | recon_s_w: 0 # weight of style reconstruction loss 29 | recon_c_w: 1 # weight of content reconstruction loss 30 | recon_x_cyc_w: 1 # weight of explicit style augmented cycle consistency loss 31 | vgg_w: 0 # weight of domain-invariant perceptual loss 32 | fp16: false 33 | # model options 34 | gen: 35 | dim: 42 # number of filters in the bottommost layer 36 | mlp_dim: 256 # number of filters in MLP 37 | style_dim: 256 # length of style code 38 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 39 | n_downsample: 2 # number of downsampling layers in content encoder 40 | n_res: 4 # number of residual blocks in content encoder/decoder 41 | pad_type: reflect # padding type [zero/reflect] 42 | new_size: 224 # first resize the shortest image side to this size 43 | style_name: 1 44 | 45 | dis: 46 | dim: 32 # number of filters in the bottommost layer 47 | norm: none # normalization layer [none/bn/in/ln] 48 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 49 | n_layer: 4 # number of layers in D 50 | gan_type: lsgan # GAN loss [lsgan/nsgan] 51 | num_scales: 3 # number of scales 52 | pad_type: reflect # padding type [zero/reflect] 53 | lambda: 0.01 # 1/0.01/ default=0 54 | 55 | # data options 56 | input_dim_a: 3 # number of image channels [1/3] 57 | input_dim_b: 3 # number of image channels [1/3] 58 | num_workers: 8 # number of data loading threads 59 | new_size: 224 # first resize the shortest image side to this size 60 | crop_image_height: 120 # random crop image of this height # celeba 160 61 | crop_image_width: 120 # random crop image of this width 62 | data_root: ./datasets/MeGlass/ # dataset folder location -------------------------------------------------------------------------------- /configs/meglass_refine2.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | # Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | #lr: 0.0001 lambda=0.01 4 | # logger options 5 | image_save_iter: 2000 # How often do you want to save output images during training 6 | image_display_iter: 1000 # How often do you want to display output images during training 7 | display_size: 16 # How many images do you want to display each time 8 | snapshot_save_iter: 4000 # How often do you want to save trained models 9 | log_iter: 1 # How often do you want to log the training stats 10 | 11 | # optimization options 12 | max_iter: 800000 # maximum number of training iterations 13 | batch_size: 4 # batch size 14 | weight_decay: 0.0005 # weight decay 15 | beta1: 0 # Adam parameter 16 | beta2: 0.999 # Adam parameter 17 | init: kaiming # initialization [gaussian/kaiming/xavier/orthogonal] 18 | lr_G: 0.000125 # initial G learning rate 19 | lr_D: 0.000125 # initial D learning rate 20 | lr_policy: step # learning rate scheduler 21 | step_size: 100000 # how often to decay learning rate 22 | step_size_cyc: 10000 23 | gamma: 0.5 # how much to decay learning rate 24 | gan_w: 1 # weight of adversarial loss 25 | recon_x_w: 10 # weight of image reconstruction loss 26 | recon_x_w_re: 10 27 | recon_x_w_res: 0.1 28 | recon_s_w: 0 # weight of style reconstruction loss 29 | recon_c_w: 1 # weight of content reconstruction loss 30 | recon_x_cyc_w: 1 # weight of explicit style augmented cycle consistency loss 31 | vgg_w: 0 # weight of domain-invariant perceptual loss 32 | fp16: true 33 | # model options 34 | gen: 35 | dim: 64 # number of filters in the bottommost layer 36 | mlp_dim: 256 # number of filters in MLP 37 | style_dim: 256 # length of style code 38 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 39 | n_downsample: 2 # number of downsampling layers in content encoder 40 | n_res: 4 # number of residual blocks in content encoder/decoder 41 | pad_type: reflect # padding type [zero/reflect] 42 | new_size: 224 # first resize the shortest image side to this size 43 | style_name: 1 44 | 45 | dis: 46 | dim: 32 # number of filters in the bottommost layer 47 | norm: none # normalization layer [none/bn/in/ln] 48 | activ: lrelu # activation function [relu/lrelu/prelu/selu/tanh] 49 | n_layer: 4 # number of layers in D 50 | gan_type: lsgan # GAN loss [lsgan/nsgan] 51 | num_scales: 3 # number of scales 52 | pad_type: reflect # padding type [zero/reflect] 53 | lambda: 0.01 # 1/0.01/ default=0 54 | 55 | # data options 56 | input_dim_a: 3 # number of image channels [1/3] 57 | input_dim_b: 3 # number of image channels [1/3] 58 | num_workers: 8 # number of data loading threads 59 | new_size: 224 # first resize the shortest image side to this size 60 | crop_image_height: 120 # random crop image of this height # celeba 160 61 | crop_image_width: 120 # random crop image of this width 62 | data_root: ./datasets/MeGlass/ # dataset folder location -------------------------------------------------------------------------------- /TTUR-master/BEGAN_FID_batched/config.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | import argparse 3 | 4 | def str2bool(v): 5 | return v.lower() in ('true', '1') 6 | 7 | arg_lists = [] 8 | parser = argparse.ArgumentParser() 9 | 10 | def add_argument_group(name): 11 | arg = parser.add_argument_group(name) 12 | arg_lists.append(arg) 13 | return arg 14 | 15 | # Network 16 | net_arg = add_argument_group('Network') 17 | net_arg.add_argument('--input_scale_size', type=int, default=64, 18 | help='input image will be resized with the given value as width and height') 19 | net_arg.add_argument('--conv_hidden_num', type=int, default=128, 20 | choices=[64, 128],help='n in the paper') 21 | net_arg.add_argument('--z_num', type=int, default=64, choices=[64, 128]) 22 | 23 | # Data 24 | data_arg = add_argument_group('Data') 25 | data_arg.add_argument('--dataset', type=str, default='CelebA') 26 | data_arg.add_argument('--split', type=str, default='train') 27 | data_arg.add_argument('--batch_size', type=int, default=16) 28 | data_arg.add_argument('--grayscale', type=str2bool, default=False) 29 | data_arg.add_argument('--num_worker', type=int, default=4) 30 | 31 | # Training / test parameters 32 | train_arg = add_argument_group('Training') 33 | train_arg.add_argument('--is_train', type=str2bool, default=True) 34 | train_arg.add_argument('--optimizer', type=str, default='adam') 35 | train_arg.add_argument('--max_step', type=int, default=500000) 36 | train_arg.add_argument('--lr_update_step', type=int, default=100000, choices=[100000, 75000]) 37 | train_arg.add_argument('--d_lr', type=float, default=0.00008) 38 | train_arg.add_argument('--g_lr', type=float, default=0.00008) 39 | train_arg.add_argument('--beta1', type=float, default=0.5) 40 | train_arg.add_argument('--beta2', type=float, default=0.999) 41 | train_arg.add_argument('--gamma', type=float, default=0.5) 42 | train_arg.add_argument('--lambda_k', type=float, default=0.001) 43 | train_arg.add_argument('--use_gpu', type=str2bool, default=True) 44 | 45 | train_arg.add_argument('--update_k', type=str2bool, default=True) 46 | train_arg.add_argument('--k_constant', type=float, default=0.06) 47 | 48 | # FID 49 | fid_arg = add_argument_group('FID') 50 | fid_arg.add_argument('--train_stats_file', type=str, default='train_stats.npz') 51 | fid_arg.add_argument('--eval_num_samples', type=int, default=10000) 52 | fid_arg.add_argument('--eval_batch_size', type=int, default=100) 53 | fid_arg.add_argument('--eval_step', type=int, default=1000) 54 | 55 | 56 | # Misc 57 | misc_arg = add_argument_group('Misc') 58 | misc_arg.add_argument('--load_checkpoint', type=str2bool, default=False) 59 | misc_arg.add_argument('--checkpoint_name', type=str, default='') 60 | misc_arg.add_argument('--start_step', type=int, default=0) 61 | misc_arg.add_argument('--log_step', type=int, default=500) 62 | misc_arg.add_argument('--save_step', type=int, default=5000) 63 | misc_arg.add_argument('--num_log_samples', type=int, default=3) 64 | misc_arg.add_argument('--log_level', type=str, default='INFO', choices=['INFO', 'DEBUG', 'WARN']) 65 | misc_arg.add_argument('--log_dir', type=str, default='logs') 66 | misc_arg.add_argument('--data_dir', type=str, default='data') 67 | misc_arg.add_argument('--test_data_path', type=str, default=None, 68 | help='directory with images which will be used in test sample generation') 69 | misc_arg.add_argument('--sample_per_image', type=int, default=64, 70 | help='# of sample per image during test sample generation') 71 | misc_arg.add_argument('--random_seed', type=int, default=123) 72 | 73 | def get_config(): 74 | config, unparsed = parser.parse_known_args() 75 | if config.use_gpu: 76 | data_format = 'NCHW' 77 | else: 78 | data_format = 'NHWC' 79 | setattr(config, 'data_format', data_format) 80 | return config, unparsed 81 | -------------------------------------------------------------------------------- /TTUR-master/BEGAN_FID_batched/models.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | slim = tf.contrib.slim 4 | 5 | def GeneratorCNN(z, hidden_num, output_num, repeat_num, data_format, reuse): 6 | with tf.variable_scope("G", reuse=reuse) as vs: 7 | num_output = int(np.prod([8, 8, hidden_num])) 8 | x = slim.fully_connected(z, num_output, activation_fn=None) 9 | x = reshape(x, 8, 8, hidden_num, data_format) 10 | 11 | for idx in range(repeat_num): 12 | x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) 13 | x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) 14 | if idx < repeat_num - 1: 15 | x = upscale(x, 2, data_format) 16 | 17 | out = slim.conv2d(x, 3, 3, 1, activation_fn=None, data_format=data_format) 18 | 19 | variables = tf.contrib.framework.get_variables(vs) 20 | return out, variables 21 | 22 | def DiscriminatorCNN(x, input_channel, z_num, repeat_num, hidden_num, data_format): 23 | with tf.variable_scope("D") as vs: 24 | # Encoder 25 | x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) 26 | 27 | prev_channel_num = hidden_num 28 | for idx in range(repeat_num): 29 | channel_num = hidden_num * (idx + 1) 30 | x = slim.conv2d(x, channel_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) 31 | x = slim.conv2d(x, channel_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) 32 | if idx < repeat_num - 1: 33 | x = slim.conv2d(x, channel_num, 3, 2, activation_fn=tf.nn.elu, data_format=data_format) 34 | #x = tf.contrib.layers.max_pool2d(x, [2, 2], [2, 2], padding='VALID') 35 | 36 | x = tf.reshape(x, [-1, np.prod([8, 8, channel_num])]) 37 | z = x = slim.fully_connected(x, z_num, activation_fn=None) 38 | 39 | # Decoder 40 | num_output = int(np.prod([8, 8, hidden_num])) 41 | x = slim.fully_connected(x, num_output, activation_fn=None) 42 | x = reshape(x, 8, 8, hidden_num, data_format) 43 | 44 | for idx in range(repeat_num): 45 | x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) 46 | x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) 47 | if idx < repeat_num - 1: 48 | x = upscale(x, 2, data_format) 49 | 50 | out = slim.conv2d(x, input_channel, 3, 1, activation_fn=None, data_format=data_format) 51 | 52 | variables = tf.contrib.framework.get_variables(vs) 53 | return out, z, variables 54 | 55 | def int_shape(tensor): 56 | shape = tensor.get_shape().as_list() 57 | return [num if num is not None else -1 for num in shape] 58 | 59 | def get_conv_shape(tensor, data_format): 60 | shape = int_shape(tensor) 61 | # always return [N, H, W, C] 62 | if data_format == 'NCHW': 63 | return [shape[0], shape[2], shape[3], shape[1]] 64 | elif data_format == 'NHWC': 65 | return shape 66 | 67 | def nchw_to_nhwc(x): 68 | return tf.transpose(x, [0, 2, 3, 1]) 69 | 70 | def nhwc_to_nchw(x): 71 | return tf.transpose(x, [0, 3, 1, 2]) 72 | 73 | def reshape(x, h, w, c, data_format): 74 | if data_format == 'NCHW': 75 | x = tf.reshape(x, [-1, c, h, w]) 76 | else: 77 | x = tf.reshape(x, [-1, h, w, c]) 78 | return x 79 | 80 | def resize_nearest_neighbor(x, new_size, data_format): 81 | if data_format == 'NCHW': 82 | x = nchw_to_nhwc(x) 83 | x = tf.image.resize_nearest_neighbor(x, new_size) 84 | x = nhwc_to_nchw(x) 85 | else: 86 | x = tf.image.resize_nearest_neighbor(x, new_size) 87 | return x 88 | 89 | def upscale(x, scale, data_format): 90 | _, h, w, _ = get_conv_shape(x, data_format) 91 | return resize_nearest_neighbor(x, (h*scale, w*scale), data_format) 92 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/tflib/ops/deconv2d.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | _default_weightnorm = False 7 | def enable_default_weightnorm(): 8 | global _default_weightnorm 9 | _default_weightnorm = True 10 | 11 | _weights_stdev = None 12 | def set_weights_stdev(weights_stdev): 13 | global _weights_stdev 14 | _weights_stdev = weights_stdev 15 | 16 | def unset_weights_stdev(): 17 | global _weights_stdev 18 | _weights_stdev = None 19 | 20 | def Deconv2D( 21 | name, 22 | input_dim, 23 | output_dim, 24 | filter_size, 25 | inputs, 26 | he_init=True, 27 | weightnorm=None, 28 | biases=True, 29 | gain=1., 30 | mask_type=None, 31 | ): 32 | """ 33 | inputs: tensor of shape (batch size, height, width, input_dim) 34 | returns: tensor of shape (batch size, 2*height, 2*width, output_dim) 35 | """ 36 | with tf.name_scope(name) as scope: 37 | 38 | if mask_type != None: 39 | raise Exception('Unsupported configuration') 40 | 41 | def uniform(stdev, size): 42 | return np.random.uniform( 43 | low=-stdev * np.sqrt(3), 44 | high=stdev * np.sqrt(3), 45 | size=size 46 | ).astype('float32') 47 | 48 | stride = 2 49 | fan_in = input_dim * filter_size**2 // (stride**2) 50 | fan_out = output_dim * filter_size**2 51 | 52 | if he_init: 53 | filters_stdev = np.sqrt(4./(fan_in+fan_out)) 54 | else: # Normalized init (Glorot & Bengio) 55 | filters_stdev = np.sqrt(2./(fan_in+fan_out)) 56 | 57 | 58 | if _weights_stdev is not None: 59 | filter_values = uniform( 60 | _weights_stdev, 61 | (filter_size, filter_size, output_dim, input_dim) 62 | ) 63 | else: 64 | filter_values = uniform( 65 | filters_stdev, 66 | (filter_size, filter_size, output_dim, input_dim) 67 | ) 68 | 69 | filter_values *= gain 70 | 71 | filters = lib.param( 72 | name+'.Filters', 73 | filter_values 74 | ) 75 | #filters = filter_values 76 | 77 | if weightnorm==None: 78 | weightnorm = _default_weightnorm 79 | if weightnorm: 80 | norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,3))) 81 | target_norms = lib.param( 82 | name + '.g', 83 | norm_values 84 | ) 85 | target_norms = norm_values 86 | with tf.name_scope('weightnorm') as scope: 87 | norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,3])) 88 | filters = filters * tf.expand_dims(target_norms / norms, 1) 89 | 90 | 91 | inputs = tf.transpose(inputs, [0,2,3,1], name='NCHW_to_NHWC') 92 | 93 | input_shape = tf.shape(inputs) 94 | try: # tf pre-1.0 (top) vs 1.0 (bottom) 95 | output_shape = tf.pack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim]) 96 | except Exception as e: 97 | output_shape = tf.stack([input_shape[0], 2*input_shape[1], 2*input_shape[2], output_dim]) 98 | 99 | result = tf.nn.conv2d_transpose( 100 | value=inputs, 101 | filter=filters, 102 | output_shape=output_shape, 103 | strides=[1, 2, 2, 1], 104 | padding='SAME' 105 | ) 106 | 107 | if biases: 108 | _biases = lib.param( 109 | name+'.Biases', 110 | np.zeros(output_dim, dtype='float32') 111 | ) 112 | #_biases = np.zeros(output_dim, dtype='float32') 113 | result = tf.nn.bias_add(result, _biases) 114 | 115 | result = tf.transpose(result, [0,3,1,2], name='NHWC_to_NCHW') 116 | 117 | 118 | return result 119 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/tflib/ops/conv1d.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | _default_weightnorm = False 7 | def enable_default_weightnorm(): 8 | global _default_weightnorm 9 | _default_weightnorm = True 10 | 11 | def Conv1D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.): 12 | """ 13 | inputs: tensor of shape (batch size, num channels, width) 14 | mask_type: one of None, 'a', 'b' 15 | 16 | returns: tensor of shape (batch size, num channels, width) 17 | """ 18 | with tf.name_scope(name) as scope: 19 | 20 | if mask_type is not None: 21 | mask_type, mask_n_channels = mask_type 22 | 23 | mask = np.ones( 24 | (filter_size, input_dim, output_dim), 25 | dtype='float32' 26 | ) 27 | center = filter_size // 2 28 | 29 | # Mask out future locations 30 | # filter shape is (width, input channels, output channels) 31 | mask[center+1:, :, :] = 0. 32 | 33 | # Mask out future channels 34 | for i in xrange(mask_n_channels): 35 | for j in xrange(mask_n_channels): 36 | if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j): 37 | mask[ 38 | center, 39 | i::mask_n_channels, 40 | j::mask_n_channels 41 | ] = 0. 42 | 43 | 44 | def uniform(stdev, size): 45 | return np.random.uniform( 46 | low=-stdev * np.sqrt(3), 47 | high=stdev * np.sqrt(3), 48 | size=size 49 | ).astype('float32') 50 | 51 | #def uniform(stdev, size): 52 | # return np.random.normal( 53 | # scale=stdev, 54 | # size=size 55 | # ).astype('float32') 56 | 57 | 58 | fan_in = input_dim * filter_size 59 | fan_out = output_dim * filter_size / stride 60 | 61 | if mask_type is not None: # only approximately correct 62 | fan_in /= 2. 63 | fan_out /= 2. 64 | 65 | if he_init: 66 | filters_stdev = np.sqrt(4./(fan_in+fan_out)) 67 | else: # Normalized init (Glorot & Bengio) 68 | filters_stdev = np.sqrt(2./(fan_in+fan_out)) 69 | 70 | filter_values = uniform( 71 | filters_stdev, 72 | (filter_size, input_dim, output_dim) 73 | ) 74 | # print "WARNING IGNORING GAIN" 75 | filter_values *= gain 76 | 77 | filters = lib.param(name+'.Filters', filter_values) 78 | 79 | if weightnorm==None: 80 | weightnorm = _default_weightnorm 81 | if weightnorm: 82 | norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1))) 83 | target_norms = lib.param( 84 | name + '.g', 85 | norm_values 86 | ) 87 | with tf.name_scope('weightnorm') as scope: 88 | norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1])) 89 | filters = filters * (target_norms / norms) 90 | 91 | if mask_type is not None: 92 | with tf.name_scope('filter_mask'): 93 | filters = filters * mask 94 | 95 | result = tf.nn.conv1d( 96 | value=inputs, 97 | filters=filters, 98 | stride=stride, 99 | padding='SAME', 100 | data_format='NCHW' 101 | ) 102 | 103 | if biases: 104 | _biases = lib.param( 105 | name+'.Biases', 106 | np.zeros([output_dim], dtype='float32') 107 | ) 108 | 109 | # result = result + _biases 110 | 111 | result = tf.expand_dims(result, 3) 112 | result = tf.nn.bias_add(result, _biases, data_format='NCHW') 113 | result = tf.squeeze(result) 114 | 115 | return result 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Unsupervised Eyeglasses Removal in the Wild [[arXiv]](https://arxiv.org/abs/1909.06989) 2 | By Bingwen Hu, Zhedong Zheng, Ping Liu, Wankou Yang and Mingwu Ren. TCYB 2021. 3 | 4 | ## Prerequisites 5 | - Python 3.6, Ubuntu 14.04 6 | - GPU Memory >= 11G 7 | - conda install pytorch>=0.4.1 torchvision cuda91 -y -c pytorch 8 | - conda install -y -c anaconda pip 9 | - conda install -y -c anaconda pyyaml 10 | - pip install tensorboard tensorboardX 11 | 12 | ## Getting started 13 | Clone ERGAN source code 14 | ``` 15 | git clone https://github.com/Bingwen-Hu/ERGAN-Pytorch 16 | ``` 17 | 18 | The folder is structured as follows: 19 | ``` 20 | ├── ERGAN-Pytorch/ 21 | │ ├── configs/ /* Files for configs 22 | | | ├── celeba.yaml 23 | | | ├── meglass.yaml 24 | │ ├── models/ /* Files for pretrained model 25 | │ ├── outputs/ /* Intermediate image outputs 26 | │ ├── datasets/CelebA/ 27 | ├── trainA/ /* Training set: face images without glasses 28 | ├── trainB/ /* Training set: face images with glasses 29 | ├── testA/ /* Testing set: face images without glasses 30 | └── testB/ /* Testing set: face images with glasses 31 | │ ├── datasets/MeGlass/ 32 | │ ├── trainA/ /* Training set: face images without glasses 33 | │ ├── trainB/ /* Training set: face images with glasses 34 | │ ├── testA/ /* Testing set: face images without glasses 35 | │ └── testB/ /* Testing set: face images with glasses 36 | ``` 37 | 38 | ## Dataset Preparation 39 | Download the celebA Dataset [Here]( https://drive.google.com/drive/folders/0B7EVK8r0v71pWEZsZE9oNnFzTm8 ). Download the MeGlass Dataset [Here](https://drive.google.com/file/d/1V0c8p6MOlSFY5R-Hu9LxYZYLXd8B8j9q/view). 40 | 41 | We split the CelebA dataset into one subset with glasses and another without glasses, based on the annotated attributes. 42 | ```bash 43 | python celeba_glass.py 44 | ``` 45 | 46 | Note that you must modify the dataset path to your own path. 47 | 48 | ## Train 49 | Setup the yaml file. Check out configs/celeba.yaml for folder-based dataset organization. Change the data_root field to the path of your downloaded dataset. 50 | ```bash 51 | python train.py --config configs/celeba.yaml 52 | ``` 53 | Intermediate image outputs and model binary files are stored in outputs/celeba. 54 | 55 | ## Test 56 | First, download our pretrained models ([Google Drive](https://drive.google.com/file/d/1ap7qB6rkKjx5K2lrnzJ8eIHlpzW4fnh5/view?usp=sharing)) for the eyeglasses removal task and put them in models folder. 57 | 58 | If you want to test your own data with our pre-trained model, you need to align the data first ( refer to CelebA) or retrain with your own data. 59 | 60 | for CelebA: 61 | ```bash 62 | python test_batch.py --config configs/celeba.yaml --A input_path_A --B input_path_B --output_folder results/celeba --checkpoint models/celeba.pt 63 | ``` 64 | 65 | for MeGlass: 66 | ```bash 67 | python test_batch.py --config configs/meglass.yaml --A input_path_A --B input_path_B --output_folder results/meglass --checkpoint models/meglasss.pt 68 | ``` 69 | 70 | `--A` The PATH of the test set (without glasses). 71 | 72 | `--B` The PATH of the test set (with glasses). 73 | 74 | 75 | The results are stored in the results/celeba folder and results/meglass folder, respectively. 76 | 77 | ## Citation 78 | If you find ERGAN is useful in your research, please consider citing: 79 | ```bibtex 80 | @article{hu2021unsupervised, 81 | title={Unsupervised Eyeglasses Removal in the Wild}, 82 | author={Hu, Bingwen and Zheng, Zhedong and Liu, Ping and Yang, Wankou and Ren, Mingwu}, 83 | journal={IEEE transactions on cybernetics}, 84 | volume={51}, 85 | number={9}, 86 | pages={4373--4385}, 87 | year={2021} 88 | } 89 | ``` 90 | 91 | ## Related Repos 92 | 1. [CycleGAN](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) 93 | 2. [UNIT](https://github.com/mingyuliutw/UNIT) 94 | 3. [MUNIT](https://github.com/NVlabs/MUNIT) 95 | 4. [LPIPS](https://github.com/richzhang/PerceptualSimilarity) 96 | 5. [FID](https://github.com/bioinf-jku/TTUR) 97 | 6. [MeGlass](https://github.com/cleardusk/MeGlass) 98 | 7. [DG-Net](https://github.com/NVlabs/DG-Net) 99 | 100 | 101 | ## Acknowledgments 102 | Our code is inspired by MUNIT and DG-Net. 103 | -------------------------------------------------------------------------------- /TTUR-master/README.md: -------------------------------------------------------------------------------- 1 | # Two time-scale update rule for training GANs 2 | 3 | This repository contains code accompanying the paper [GANs Trained by a Two Time-Scale Update Rule 4 | Converge to a Local Nash Equilibrium](https://arxiv.org/abs/1706.08500). 5 | 6 | ## Fréchet Inception Distance (FID) 7 | The FID is the performance measure used to evaluate the experiments in the paper. There, a detailed description can be found 8 | in the experiment section as well as in the the appendix in section A1. 9 | 10 | In short: 11 | The Fréchet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1) and X_2 ~ N(mu_2, C_2) is 12 | 13 | d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)). 14 | 15 | The FID is calculated by assuming that X_1 and X_2 are the activations of the coding layer pool_3 of the inception model (see below) for generated samples and real world samples respectivly. mu_n is the mean and C_n the covariance of the activations of the coding layer over all real world or generated samples. 16 | 17 | IMPORTANT: The number of samples to calculate the Gaussian statistics (mean and covariance) should be greater than the 18 | dimension of the coding layer, here 2048 for the Inception pool 3 layer. Otherwise the covariance is not full rank resulting in complex numbers and nans by calculating the square root. 19 | 20 | We recommend using a minimum sample size of 10,000 to calculate the FID otherwise the true FID of the generator is 21 | underestimated. 22 | 23 | ### Compatibility notice 24 | Previous versions of this repository contained two implementations to calculate the FID, a "unbatched" and a "batched" version. 25 | The "unbatched" version should not be used anymore. If you've downloaded this code previously, please update it immediately to 26 | the new version. The old version included a bug! 27 | 28 | ## Provided Code 29 | 30 | Requirements: TF 1.1+, Python 3.x 31 | 32 | #### fid.py 33 | This file contains the implementation of all necessary functions to calculate the FID. It can be used either 34 | as a python module imported into your own code, or as a standalone 35 | script to calculate the FID between precalculated (training set) statistics and a directory full of images, or between 36 | two directories of images. 37 | 38 | To compare directories with pre-calculated statistics (e.g. the ones from http://bioinf.jku.at/research/ttur/), use: 39 | 40 | fid.py /path/to/images /path/to/precalculated_stats.npz 41 | 42 | To compare two directories, use 43 | 44 | fid.py /path/to/images /path/to/other_images 45 | 46 | See `fid.py --help` for more details. 47 | 48 | #### fid_example.py 49 | Example code to show the usage of `fid.py` in your own Python scripts. 50 | 51 | #### precalc_stats_example.py 52 | Example code to show how to calculate and save training set statistics. 53 | 54 | 55 | #### WGAN_GP 56 | Improved WGAN (WGAN-GP) implementation forked from https://github.com/igul222/improved_wgan_training 57 | with added FID evaluation for the image model and switchable TTUR/orig settings. Lanuage model with 58 | JSD Tensorboard logging and switchable TTUR/orig settings. 59 | 60 | ## Precalculated Statistics for FID calculation 61 | 62 | Precalculated statistics for datasets 63 | - [cropped CelebA](http://bioinf.jku.at/research/ttur/ttur_stats/fid_stats_celeba.npz) (64x64, calculated on all samples) 64 | - [LSUN bedroom](http://bioinf.jku.at/research/ttur/ttur_stats/fid_stats_lsun_train.npz) (calculated on all training samples) 65 | - [CIFAR 10](http://bioinf.jku.at/research/ttur/ttur_stats/fid_stats_cifar10_train.npz) (calculated on all training samples) 66 | - [SVHN](http://bioinf.jku.at/research/ttur/ttur_stats/fid_stats_svhn_train.npz) (calculated on all training samples) 67 | - [ImageNet Train](http://bioinf.jku.at/research/ttur/ttur_stats/fid_stats_imagenet_train.npz) (calculated on all training samples) 68 | - [ImageNet Valid](http://bioinf.jku.at/research/ttur/ttur_stats/fid_stats_imagenet_valid.npz) (calculated on all validation samples) 69 | 70 | 71 | are provided at: http://bioinf.jku.at/research/ttur/ 72 | 73 | ## Additional Links 74 | 75 | For FID evaluation download the Inception modelf from http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz 76 | 77 | The cropped CelebA dataset can be downloaded here http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html 78 | 79 | To download the LSUN bedroom dataset go to: http://www.yf.io/p/lsun 80 | 81 | The 64x64 downsampled ImageNet training and validation datasets can be found here http://image-net.org/small/download.php 82 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2018 NVIDIA Corporation. All rights reserved. 3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 4 | """ 5 | import torch.utils.data as data 6 | import os.path 7 | 8 | def default_loader(path): 9 | return Image.open(path).convert('RGB') 10 | 11 | 12 | def default_flist_reader(flist): 13 | """ 14 | flist format: impath label\nimpath label\n ...(same to caffe's filelist) 15 | """ 16 | imlist = [] 17 | with open(flist, 'r') as rf: 18 | for line in rf.readlines(): 19 | impath = line.strip() 20 | imlist.append(impath) 21 | 22 | return imlist 23 | 24 | 25 | class ImageFilelist(data.Dataset): 26 | def __init__(self, root, flist, transform=None, 27 | flist_reader=default_flist_reader, loader=default_loader): 28 | self.root = root 29 | self.imlist = flist_reader(flist) 30 | self.transform = transform 31 | self.loader = loader 32 | 33 | def __getitem__(self, index): 34 | impath = self.imlist[index] 35 | img = self.loader(os.path.join(self.root, impath)) 36 | if self.transform is not None: 37 | img = self.transform(img) 38 | 39 | return img 40 | 41 | def __len__(self): 42 | return len(self.imlist) 43 | 44 | 45 | class ImageLabelFilelist(data.Dataset): 46 | def __init__(self, root, flist, transform=None, 47 | flist_reader=default_flist_reader, loader=default_loader): 48 | self.root = root 49 | self.imlist = flist_reader(os.path.join(self.root, flist)) 50 | self.transform = transform 51 | self.loader = loader 52 | self.classes = sorted(list(set([path.split('/')[0] for path in self.imlist]))) 53 | self.class_to_idx = {self.classes[i]: i for i in range(len(self.classes))} 54 | self.imgs = [(impath, self.class_to_idx[impath.split('/')[0]]) for impath in self.imlist] 55 | 56 | def __getitem__(self, index): 57 | impath, label = self.imgs[index] 58 | img = self.loader(os.path.join(self.root, impath)) 59 | if self.transform is not None: 60 | img = self.transform(img) 61 | return img, label 62 | 63 | def __len__(self): 64 | return len(self.imgs) 65 | 66 | ############################################################################### 67 | # Code from 68 | # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py 69 | # Modified the original code so that it also loads images from the current 70 | # directory as well as the subdirectories 71 | ############################################################################### 72 | 73 | import torch.utils.data as data 74 | 75 | from PIL import Image 76 | import os 77 | import os.path 78 | 79 | IMG_EXTENSIONS = [ 80 | '.jpg', '.JPG', '.jpeg', '.JPEG', 81 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 82 | ] 83 | 84 | 85 | def is_image_file(filename): 86 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 87 | 88 | 89 | def make_dataset(dir): 90 | images = [] 91 | assert os.path.isdir(dir), '%s is not a valid directory' % dir 92 | 93 | for root, _, fnames in sorted(os.walk(dir)): 94 | for fname in fnames: 95 | if is_image_file(fname): 96 | path = os.path.join(root, fname) 97 | images.append(path) 98 | 99 | return images 100 | 101 | 102 | class ImageFolder(data.Dataset): 103 | 104 | def __init__(self, root, transform=None, return_paths=False, 105 | loader=default_loader): 106 | imgs = sorted(make_dataset(root)) 107 | if len(imgs) == 0: 108 | raise(RuntimeError("Found 0 images in: " + root + "\n" 109 | "Supported image extensions are: " + 110 | ",".join(IMG_EXTENSIONS))) 111 | 112 | self.root = root 113 | self.imgs = imgs 114 | self.transform = transform 115 | self.return_paths = return_paths 116 | self.loader = loader 117 | 118 | def __getitem__(self, index): 119 | path = self.imgs[index] 120 | img = self.loader(path) 121 | if self.transform is not None: 122 | img = self.transform(img) 123 | if self.return_paths: 124 | return img, path 125 | else: 126 | return img 127 | 128 | def __len__(self): 129 | return len(self.imgs) 130 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/tflib/ops/conv2d.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | _default_weightnorm = False 7 | def enable_default_weightnorm(): 8 | global _default_weightnorm 9 | _default_weightnorm = True 10 | 11 | _weights_stdev = None 12 | def set_weights_stdev(weights_stdev): 13 | global _weights_stdev 14 | _weights_stdev = weights_stdev 15 | 16 | def unset_weights_stdev(): 17 | global _weights_stdev 18 | _weights_stdev = None 19 | 20 | def Conv2D(name, input_dim, output_dim, filter_size, inputs, he_init=True, mask_type=None, stride=1, weightnorm=None, biases=True, gain=1.): 21 | """ 22 | inputs: tensor of shape (batch size, num channels, height, width) 23 | mask_type: one of None, 'a', 'b' 24 | 25 | returns: tensor of shape (batch size, num channels, height, width) 26 | """ 27 | with tf.name_scope(name) as scope: 28 | 29 | if mask_type is not None: 30 | mask_type, mask_n_channels = mask_type 31 | 32 | mask = np.ones( 33 | (filter_size, filter_size, input_dim, output_dim), 34 | dtype='float32' 35 | ) 36 | center = filter_size // 2 37 | 38 | # Mask out future locations 39 | # filter shape is (height, width, input channels, output channels) 40 | mask[center+1:, :, :, :] = 0. 41 | mask[center, center+1:, :, :] = 0. 42 | 43 | # Mask out future channels 44 | for i in xrange(mask_n_channels): 45 | for j in xrange(mask_n_channels): 46 | if (mask_type=='a' and i >= j) or (mask_type=='b' and i > j): 47 | mask[ 48 | center, 49 | center, 50 | i::mask_n_channels, 51 | j::mask_n_channels 52 | ] = 0. 53 | 54 | 55 | def uniform(stdev, size): 56 | return np.random.uniform( 57 | low=-stdev * np.sqrt(3), 58 | high=stdev * np.sqrt(3), 59 | size=size 60 | ).astype('float32') 61 | 62 | fan_in = input_dim * filter_size**2 63 | fan_out = output_dim * filter_size**2 // (stride**2) 64 | 65 | if mask_type is not None: # only approximately correct 66 | fan_in //= 2. 67 | fan_out //= 2. 68 | 69 | if he_init: 70 | filters_stdev = np.sqrt(4./(fan_in+fan_out)) 71 | else: # Normalized init (Glorot & Bengio) 72 | filters_stdev = np.sqrt(2./(fan_in+fan_out)) 73 | 74 | if _weights_stdev is not None: 75 | filter_values = uniform( 76 | _weights_stdev, 77 | (filter_size, filter_size, input_dim, output_dim) 78 | ) 79 | else: 80 | filter_values = uniform( 81 | filters_stdev, 82 | (filter_size, filter_size, input_dim, output_dim) 83 | ) 84 | 85 | # print "WARNING IGNORING GAIN" 86 | filter_values *= gain 87 | 88 | filters = lib.param(name+'.Filters', filter_values) 89 | #filters = filter_values 90 | 91 | if weightnorm==None: 92 | weightnorm = _default_weightnorm 93 | if weightnorm: 94 | norm_values = np.sqrt(np.sum(np.square(filter_values), axis=(0,1,2))) 95 | target_norms = lib.param( 96 | name + '.g', 97 | # norm_values 98 | ) 99 | #target_norms = norm_values 100 | with tf.name_scope('weightnorm') as scope: 101 | norms = tf.sqrt(tf.reduce_sum(tf.square(filters), reduction_indices=[0,1,2])) 102 | filters = filters * (target_norms / norms) 103 | 104 | if mask_type is not None: 105 | with tf.name_scope('filter_mask'): 106 | filters = filters * mask 107 | 108 | result = tf.nn.conv2d( 109 | input=inputs, 110 | filter=filters, 111 | strides=[1, 1, stride, stride], 112 | padding='SAME', 113 | data_format='NCHW' 114 | ) 115 | 116 | if biases: 117 | _biases = lib.param( 118 | name+'.Biases', 119 | np.zeros(output_dim, dtype='float32') 120 | ) 121 | #_biases = np.zeros(output_dim, dtype='float32') 122 | result = tf.nn.bias_add(result, _biases, data_format='NCHW') 123 | 124 | 125 | return result 126 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/tflib/ops/batchnorm.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | def Batchnorm(name, axes, inputs, is_training=None, stats_iter=None, update_moving_stats=True, fused=True): 7 | if ((axes == [0,2,3]) or (axes == [0,2])) and fused==True: 8 | if axes==[0,2]: 9 | inputs = tf.expand_dims(inputs, 3) 10 | # Old (working but pretty slow) implementation: 11 | ########## 12 | 13 | # inputs = tf.transpose(inputs, [0,2,3,1]) 14 | 15 | # mean, var = tf.nn.moments(inputs, [0,1,2], keep_dims=False) 16 | # offset = lib.param(name+'.offset', np.zeros(mean.get_shape()[-1], dtype='float32')) 17 | # scale = lib.param(name+'.scale', np.ones(var.get_shape()[-1], dtype='float32')) 18 | # result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-4) 19 | 20 | # return tf.transpose(result, [0,3,1,2]) 21 | 22 | # New (super fast but untested) implementation: 23 | offset = lib.param(name+'.offset', np.zeros(inputs.get_shape()[1], dtype='float32')) 24 | scale = lib.param(name+'.scale', np.ones(inputs.get_shape()[1], dtype='float32')) 25 | 26 | moving_mean = lib.param(name+'.moving_mean', np.zeros(inputs.get_shape()[1], dtype='float32'), trainable=False) 27 | moving_variance = lib.param(name+'.moving_variance', np.ones(inputs.get_shape()[1], dtype='float32'), trainable=False) 28 | 29 | def _fused_batch_norm_training(): 30 | return tf.nn.fused_batch_norm(inputs, scale, offset, epsilon=1e-5, data_format='NCHW') 31 | def _fused_batch_norm_inference(): 32 | # Version which blends in the current item's statistics 33 | batch_size = tf.cast(tf.shape(inputs)[0], 'float32') 34 | mean, var = tf.nn.moments(inputs, [2,3], keep_dims=True) 35 | mean = ((1./batch_size)*mean) + (((batch_size-1.)/batch_size)*moving_mean)[None,:,None,None] 36 | var = ((1./batch_size)*var) + (((batch_size-1.)/batch_size)*moving_variance)[None,:,None,None] 37 | return tf.nn.batch_normalization(inputs, mean, var, offset[None,:,None,None], scale[None,:,None,None], 1e-5), mean, var 38 | 39 | # Standard version 40 | # return tf.nn.fused_batch_norm( 41 | # inputs, 42 | # scale, 43 | # offset, 44 | # epsilon=1e-2, 45 | # mean=moving_mean, 46 | # variance=moving_variance, 47 | # is_training=False, 48 | # data_format='NCHW' 49 | # ) 50 | 51 | if is_training is None: 52 | outputs, batch_mean, batch_var = _fused_batch_norm_training() 53 | else: 54 | outputs, batch_mean, batch_var = tf.cond(is_training, 55 | _fused_batch_norm_training, 56 | _fused_batch_norm_inference) 57 | if update_moving_stats: 58 | no_updates = lambda: outputs 59 | def _force_updates(): 60 | """Internal function forces updates moving_vars if is_training.""" 61 | float_stats_iter = tf.cast(stats_iter, tf.float32) 62 | 63 | update_moving_mean = tf.assign(moving_mean, ((float_stats_iter/(float_stats_iter+1))*moving_mean) + ((1/(float_stats_iter+1))*batch_mean)) 64 | update_moving_variance = tf.assign(moving_variance, ((float_stats_iter/(float_stats_iter+1))*moving_variance) + ((1/(float_stats_iter+1))*batch_var)) 65 | 66 | with tf.control_dependencies([update_moving_mean, update_moving_variance]): 67 | return tf.identity(outputs) 68 | outputs = tf.cond(is_training, _force_updates, no_updates) 69 | 70 | if axes == [0,2]: 71 | return outputs[:,:,:,0] # collapse last dim 72 | else: 73 | return outputs 74 | else: 75 | # raise Exception('old BN') 76 | # TODO we can probably use nn.fused_batch_norm here too for speedup 77 | mean, var = tf.nn.moments(inputs, axes, keep_dims=True) 78 | shape = mean.get_shape().as_list() 79 | if 0 not in axes: 80 | print("WARNING (%s): didn't find 0 in axes, but not using separate BN params for each item in batch" % name) 81 | shape[0] = 1 82 | offset = lib.param(name+'.offset', np.zeros(shape, dtype='float32')) 83 | scale = lib.param(name+'.scale', np.ones(shape, dtype='float32')) 84 | result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-5) 85 | 86 | 87 | return result 88 | -------------------------------------------------------------------------------- /MUNIT-master/UNIT_train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2018 NVIDIA Corporation. All rights reserved. 3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 4 | """ 5 | from UNIT_utils import get_all_data_loaders, prepare_sub_folder, write_html, write_loss, get_config, write_2images, Timer 6 | import argparse 7 | from torch.autograd import Variable 8 | from UNIT_trainer import MUNIT_Trainer, UNIT_Trainer 9 | import torch.backends.cudnn as cudnn 10 | import torch 11 | try: 12 | from itertools import izip as zip 13 | except ImportError: # will be 3.x series 14 | pass 15 | import os 16 | import sys 17 | import tensorboardX 18 | import shutil 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--config', type=str, default='configs/edges2handbags_folder.yaml', help='Path to the config file.') 22 | parser.add_argument('--output_path', type=str, default='.', help="outputs path") 23 | parser.add_argument("--resume", action="store_true") 24 | parser.add_argument('--trainer', type=str, default='MUNIT', help="MUNIT|UNIT") 25 | opts = parser.parse_args() 26 | 27 | cudnn.benchmark = True 28 | 29 | # Load experiment setting 30 | config = get_config(opts.config) 31 | max_iter = config['max_iter'] 32 | display_size = config['display_size'] 33 | config['vgg_model_path'] = opts.output_path 34 | 35 | # Setup model and data loader 36 | if opts.trainer == 'MUNIT': 37 | trainer = MUNIT_Trainer(config) 38 | elif opts.trainer == 'UNIT': 39 | trainer = UNIT_Trainer(config) 40 | else: 41 | sys.exit("Only support MUNIT|UNIT") 42 | trainer.cuda() 43 | train_loader_a, train_loader_b, test_loader_a, test_loader_b = get_all_data_loaders(config) 44 | train_display_images_a = torch.stack([train_loader_a.dataset[i] for i in range(display_size)]).cuda() 45 | train_display_images_b = torch.stack([train_loader_b.dataset[i] for i in range(display_size)]).cuda() 46 | test_display_images_a = torch.stack([test_loader_a.dataset[i] for i in range(display_size)]).cuda() 47 | test_display_images_b = torch.stack([test_loader_b.dataset[i] for i in range(display_size)]).cuda() 48 | 49 | # Setup logger and output folders 50 | model_name = os.path.splitext(os.path.basename(opts.config))[0] 51 | train_writer = tensorboardX.SummaryWriter(os.path.join(opts.output_path + "/logs", model_name)) 52 | output_directory = os.path.join(opts.output_path + "/outputs", model_name) 53 | checkpoint_directory, image_directory = prepare_sub_folder(output_directory) 54 | shutil.copy(opts.config, os.path.join(output_directory, 'config.yaml')) # copy config file to output folder 55 | 56 | # Start training 57 | iterations = trainer.resume(checkpoint_directory, hyperparameters=config) if opts.resume else 0 58 | while True: 59 | for it, (images_a, images_b) in enumerate(zip(train_loader_a, train_loader_b)): 60 | trainer.update_learning_rate() 61 | images_a, images_b = images_a.cuda().detach(), images_b.cuda().detach() 62 | 63 | with Timer("Elapsed time in update: %f"): 64 | # Main training code 65 | trainer.dis_update(images_a, images_b, config) 66 | trainer.gen_update(images_a, images_b, config) 67 | torch.cuda.synchronize() 68 | 69 | # Dump training stats in log file 70 | if (iterations + 1) % config['log_iter'] == 0: 71 | print("Iteration: %08d/%08d" % (iterations + 1, max_iter)) 72 | write_loss(iterations, trainer, train_writer) 73 | 74 | # Write images 75 | if (iterations + 1) % config['image_save_iter'] == 0: 76 | with torch.no_grad(): 77 | test_image_outputs = trainer.sample(test_display_images_a, test_display_images_b) 78 | train_image_outputs = trainer.sample(train_display_images_a, train_display_images_b) 79 | 80 | write_2images(test_image_outputs, display_size, image_directory, 'test_%08d' % (iterations + 1)) 81 | write_2images(train_image_outputs, display_size, image_directory, 'train_%08d' % (iterations + 1)) 82 | # HTML 83 | write_html(output_directory + "/index.html", iterations + 1, config['image_save_iter'], 'images') 84 | 85 | if (iterations + 1) % config['image_display_iter'] == 0: 86 | with torch.no_grad(): 87 | image_outputs = trainer.sample(train_display_images_a, train_display_images_b) 88 | write_2images(image_outputs, display_size, image_directory, 'train_current') 89 | 90 | # Save network weights 91 | if (iterations + 1) % config['snapshot_save_iter'] == 0: 92 | trainer.save(checkpoint_directory, iterations) 93 | 94 | iterations += 1 95 | if iterations >= max_iter: 96 | sys.exit('Finish training') 97 | 98 | -------------------------------------------------------------------------------- /TTUR-master/DCGAN_FID_batched/ops.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import tensorflow as tf 4 | 5 | from tensorflow.python.framework import ops 6 | 7 | from utils import * 8 | 9 | try: 10 | image_summary = tf.image_summary 11 | scalar_summary = tf.scalar_summary 12 | histogram_summary = tf.histogram_summary 13 | merge_summary = tf.merge_summary 14 | SummaryWriter = tf.train.SummaryWriter 15 | except: 16 | image_summary = tf.summary.image 17 | scalar_summary = tf.summary.scalar 18 | histogram_summary = tf.summary.histogram 19 | merge_summary = tf.summary.merge 20 | SummaryWriter = tf.summary.FileWriter 21 | 22 | if "concat_v2" in dir(tf): 23 | def concat(tensors, axis, *args, **kwargs): 24 | return tf.concat_v2(tensors, axis, *args, **kwargs) 25 | else: 26 | def concat(tensors, axis, *args, **kwargs): 27 | return tf.concat(tensors, axis, *args, **kwargs) 28 | 29 | class batch_norm(object): 30 | def __init__(self, scale=True, epsilon=1e-5, momentum = 0.9, name="batch_norm"): 31 | with tf.variable_scope(name): 32 | self.epsilon = epsilon 33 | self.momentum = momentum 34 | self.name = name 35 | self.scale = scale 36 | 37 | def __call__(self, x, train=True): 38 | return tf.contrib.layers.batch_norm(x, 39 | decay=self.momentum, 40 | updates_collections=None, 41 | epsilon=self.epsilon, 42 | scale=self.scale, 43 | is_training=train, 44 | scope=self.name) 45 | 46 | def conv_cond_concat(x, y): 47 | """Concatenate conditioning vector on feature map axis.""" 48 | x_shapes = x.get_shape() 49 | y_shapes = y.get_shape() 50 | return concat([ 51 | x, y*tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])], 3) 52 | 53 | def conv2d(input_, output_dim, 54 | k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, 55 | name="conv2d"): 56 | # ELU init stddev 57 | #n = k_h * k_w * (input_.get_shape().as_list()[-1] + output_dim) / 2.0 58 | #n = k_h * k_w * tf.sqrt(tf.cast(input_.get_shape().as_list()[-1] * output_dim, tf.float32)) 59 | n = k_h * k_w * input_.get_shape().as_list()[-1] 60 | #stddev = tf.sqrt(1.55052/n) 61 | with tf.variable_scope(name): 62 | w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim], 63 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 64 | conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME') 65 | 66 | biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0)) 67 | conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape()) 68 | 69 | return conv 70 | 71 | def deconv2d(input_, output_shape, 72 | k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, 73 | name="deconv2d", with_w=False): 74 | # ELU init stddev 75 | #n = k_h * k_w * (input_.get_shape().as_list()[-1] + output_shape[-1]) / 2.0 76 | #n = k_h * k_w * tf.sqrt(tf.cast(input_.get_shape().as_list()[-1] * output_shape[-1], tf.float32)) 77 | n = k_h * k_w *input_.get_shape().as_list()[-1] 78 | #stddev = tf.sqrt(1.55052/n) 79 | with tf.variable_scope(name): 80 | # filter : [height, width, output_channels, in_channels] 81 | w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]], 82 | initializer=tf.random_normal_initializer(stddev=stddev)) 83 | 84 | try: 85 | deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape, 86 | strides=[1, d_h, d_w, 1]) 87 | 88 | # Support for verisons of TensorFlow before 0.7.0 89 | except AttributeError: 90 | deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape, 91 | strides=[1, d_h, d_w, 1]) 92 | 93 | biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0)) 94 | deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape()) 95 | 96 | if with_w: 97 | return deconv, w, biases 98 | else: 99 | return deconv 100 | 101 | def lrelu(x, leak=0.2, name="lrelu"): 102 | return tf.maximum(x, leak*x) 103 | 104 | def elu(x, name="elu"): 105 | return(tf.nn.elu(x)) 106 | 107 | # Scaled ELU 108 | def selu(x, name="selu"): 109 | alpha = 1.6732632423543772848170429916717 110 | scale = 1.0507009873554804934193349852946 111 | return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x)) 112 | 113 | def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False): 114 | shape = input_.get_shape().as_list() 115 | 116 | with tf.variable_scope(scope or "Linear"): 117 | matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32, 118 | tf.random_normal_initializer(stddev=stddev)) 119 | bias = tf.get_variable("bias", [output_size], 120 | initializer=tf.constant_initializer(bias_start)) 121 | if with_w: 122 | return tf.matmul(input_, matrix) + bias, matrix, bias 123 | else: 124 | return tf.matmul(input_, matrix) + bias 125 | -------------------------------------------------------------------------------- /TTUR-master/FID_vs_Inception_Score/README.md: -------------------------------------------------------------------------------- 1 | # Comparison of FID and Inception Score 2 | 3 | This experiments should highlight a crucial difference between the FID and the Inception Score (IS). 4 | The purpose of a generative model is to learn a real world distribution. Thus a good performance measure 5 | should, roughly speaking, somehow capture how far off the model distribution is. The experiments show, 6 | that in this sense the FID is a more useful measure. 7 | 8 | ## Methodology 9 | While the idea of the IS is to capture 1) how real the structures in the generated images are and 10 | 2) how much variability the generated samples have, there is no connection of the score to the 11 | real world distribution. Clearly the assumptions of the IS are met best on the dataset it is trained 12 | on, namely the ImageNet data set. It is however questionable, if the assumptions carry over to another image 13 | datasets. As an example consider the celebA dataset. It consists of about 200k face images of celebrities. 14 | While assumption 1) still holds it is not so clear why there should be a high variability across samples. 15 | 16 | But the main point is: an evaluation method should indicate how well the real world 17 | distribution has been learned. This implies: disturbed images should lead to a 18 | lower score or a higher distance respectively. Thus for the experiments we produce 19 | disturbed images of the celebA dataset with increasing disturbance levels 20 | to evaluate the FID and IS on them. 21 | The IS is transformed to an distance as described in the TTUR paper. This is done to 22 | make comparison between the two methods easier. We refer to the transformed 23 | IS as the IND - the inception distance. 24 | 25 | ## Experiments 26 | 1. Gaussian noise: We constructed a matrix N with Gaussian noise scaled to [0, 255]. The 27 | noisy image is computed as (1 − α)X + αN for α ∈ {0, 0.25, 0.5, 0.75}. The larger α is, 28 | the larger is the noise added to the image, the larger is the disturbance of the image. 29 | 30 | |FID|IND| 31 | |-|-| 32 | | || 33 | 34 | 2. Gaussian blur: The image is convolved with a Gaussian kernel with standard deviation 35 | α ∈ {0, 1, 2, 4}. The larger α is, the larger is the disturbance of the image, that is, 36 | the more the image is smoothed. 37 | 38 | |FID | IND| 39 | |-|-| 40 | | || 41 | 42 | 43 | 3. Black rectangles: To an image five black rectangles are are added at randomly chosen 44 | locations. The rectangles cover parts of the image.The size of the rectangles is 45 | α imagesize with α ∈ {0, 0.25, 0.5, 0.75}. The larger α is, the larger is the disturbance 46 | of the image, that is, the more of the image is covered by black rectangles. 47 | 48 | |FID|IND| 49 | |-|-| 50 | || | 51 | 52 | 53 | 4. Swirl: Parts of the image are transformed as a spiral, that is, as a swirl (whirlpool 54 | effect). Consider the coordinate (x, y) in the noisy (swirled) image for which we want to 55 | find the color. Toward this end we need the reverse mapping for the swirl transformation 56 | which gives the location which is mapped to (x, y). The disturbance level is given by the 57 | amount of swirl α ∈ {0, 1, 2, 4}. The larger α is, the larger is the disturbance of the 58 | image via the amount of swirl. 59 | 60 | |FID|IND| 61 | |-|-| 62 | | | | 63 | 64 | 65 | 5. Salt and pepper noise: Some pixels of the image are set to black or white, where black is 66 | chosen with 50% probability (same for white). Pixels are randomly chosen for being flipped 67 | to white or black, where the ratio of pixel flipped to white or black is given by the noise 68 | level α ∈ {0, 0.1, 0.2, 0.3}. The larger α is, the larger is the noise added to the image via 69 | flipping pixels to white or black, the larger is the disturbance level. 70 | 71 | |FID|IND| 72 | |-|-| 73 | | | | 74 | 75 | 76 | 6. ImageNet contamination: From each of the 1,000 ImageNet classes, 5 images are randomly 77 | chosen, which gives 5,000 ImageNet images. The images are ensured to be RGB and to 78 | have a minimal size of 256x256. A percentage of α ∈ {0, 0.25, 0.5, 0.75} of the CelebA 79 | images has been replaced by ImageNet images. α = 0 means all images are from CelebA, 80 | α = 0.25 means that 75% of the images are from CelebA and 25% from ImageNet etc. 81 | The larger α is, the larger is the disturbance of the CelebA dataset by contaminating it by 82 | ImageNet images. The larger the disturbance level is, the more the dataset deviates from the 83 | reference real world dataset. 84 | 85 | |FID|IND| 86 | |-|-| 87 | | | | 88 | -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/tflib/ops/linear.py: -------------------------------------------------------------------------------- 1 | import tflib as lib 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | _default_weightnorm = False 7 | def enable_default_weightnorm(): 8 | global _default_weightnorm 9 | _default_weightnorm = True 10 | 11 | def disable_default_weightnorm(): 12 | global _default_weightnorm 13 | _default_weightnorm = False 14 | 15 | _weights_stdev = None 16 | def set_weights_stdev(weights_stdev): 17 | global _weights_stdev 18 | _weights_stdev = weights_stdev 19 | 20 | def unset_weights_stdev(): 21 | global _weights_stdev 22 | _weights_stdev = None 23 | 24 | def Linear( 25 | name, 26 | input_dim, 27 | output_dim, 28 | inputs, 29 | biases=True, 30 | initialization=None, 31 | weightnorm=None, 32 | gain=1. 33 | ): 34 | """ 35 | initialization: None, `lecun`, 'glorot', `he`, 'glorot_he', `orthogonal`, `("uniform", range)` 36 | """ 37 | with tf.name_scope(name) as scope: 38 | 39 | def uniform(stdev, size): 40 | if _weights_stdev is not None: 41 | stdev = _weights_stdev 42 | return np.random.uniform( 43 | low=-stdev * np.sqrt(3), 44 | high=stdev * np.sqrt(3), 45 | size=size 46 | ).astype('float32') 47 | 48 | if initialization == 'lecun':# and input_dim != output_dim): 49 | # disabling orth. init for now because it's too slow 50 | weight_values = uniform( 51 | np.sqrt(1./input_dim), 52 | (input_dim, output_dim) 53 | ) 54 | 55 | elif initialization == 'glorot' or (initialization == None): 56 | 57 | weight_values = uniform( 58 | np.sqrt(2./(input_dim+output_dim)), 59 | (input_dim, output_dim) 60 | ) 61 | 62 | elif initialization == 'he': 63 | 64 | weight_values = uniform( 65 | np.sqrt(2./input_dim), 66 | (input_dim, output_dim) 67 | ) 68 | 69 | elif initialization == 'glorot_he': 70 | 71 | weight_values = uniform( 72 | np.sqrt(4./(input_dim+output_dim)), 73 | (input_dim, output_dim) 74 | ) 75 | 76 | elif initialization == 'orthogonal' or \ 77 | (initialization == None and input_dim == output_dim): 78 | 79 | # From lasagne 80 | def sample(shape): 81 | if len(shape) < 2: 82 | raise RuntimeError("Only shapes of length 2 or more are " 83 | "supported.") 84 | flat_shape = (shape[0], np.prod(shape[1:])) 85 | # TODO: why normal and not uniform? 86 | a = np.random.normal(0.0, 1.0, flat_shape) 87 | u, _, v = np.linalg.svd(a, full_matrices=False) 88 | # pick the one with the correct shape 89 | q = u if u.shape == flat_shape else v 90 | q = q.reshape(shape) 91 | return q.astype('float32') 92 | weight_values = sample((input_dim, output_dim)) 93 | 94 | elif initialization[0] == 'uniform': 95 | 96 | weight_values = np.random.uniform( 97 | low=-initialization[1], 98 | high=initialization[1], 99 | size=(input_dim, output_dim) 100 | ).astype('float32') 101 | 102 | else: 103 | 104 | raise Exception('Invalid initialization!') 105 | 106 | weight_values *= gain 107 | 108 | weight = lib.param( 109 | name + '.W', 110 | weight_values 111 | ) 112 | 113 | if weightnorm==None: 114 | weightnorm = _default_weightnorm 115 | if weightnorm: 116 | norm_values = np.sqrt(np.sum(np.square(weight_values), axis=0)) 117 | # norm_values = np.linalg.norm(weight_values, axis=0) 118 | 119 | target_norms = lib.param( 120 | name + '.g', 121 | norm_values 122 | ) 123 | 124 | with tf.name_scope('weightnorm') as scope: 125 | norms = tf.sqrt(tf.reduce_sum(tf.square(weight), reduction_indices=[0])) 126 | weight = weight * (target_norms / norms) 127 | 128 | # if 'Discriminator' in name: 129 | # print "WARNING weight constraint on {}".format(name) 130 | # weight = tf.nn.softsign(10.*weight)*.1 131 | 132 | if inputs.get_shape().ndims == 2: 133 | result = tf.matmul(inputs, weight) 134 | else: 135 | reshaped_inputs = tf.reshape(inputs, [-1, input_dim]) 136 | result = tf.matmul(reshaped_inputs, weight) 137 | result = tf.reshape(result, tf.pack(tf.unpack(tf.shape(inputs))[:-1] + [output_dim])) 138 | 139 | if biases: 140 | result = tf.nn.bias_add( 141 | result, 142 | lib.param( 143 | name + '.b', 144 | np.zeros((output_dim,), dtype='float32') 145 | ) 146 | ) 147 | 148 | return result 149 | 150 | -------------------------------------------------------------------------------- /smooth.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2018 NVIDIA Corporation. All rights reserved. 3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 4 | """ 5 | from __future__ import print_function 6 | import sys 7 | sys.path.append('.') 8 | from utils import get_config 9 | from trainer import ERGAN_Trainer, to_gray 10 | import argparse 11 | from torch.autograd import Variable 12 | import torchvision.utils as vutils 13 | import sys 14 | import torch 15 | import imageio 16 | import os 17 | import numpy as np 18 | from torchvision import datasets, models, transforms 19 | from PIL import Image 20 | try: 21 | from itertools import izip as zip 22 | except ImportError: # will be 3.x series 23 | pass 24 | 25 | name = 'eye' 26 | 27 | if not os.path.isdir('models/%s'%name): 28 | assert 0, "please change the name to your model name" 29 | 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument('--output_folder', type=str, default="./", help="output image path") 32 | parser.add_argument('--input_folder', type=str, default="inputs/swap/", help="input image path") 33 | parser.add_argument('--config', type=str, default='./outputs/%s/config.yaml'%name, help="net configuration") 34 | parser.add_argument('--checkpoint_gen', type=str, default="./outputs/%s/checkpoints/gen_00100000.pt"%name, help="checkpoint of autoencoders") 35 | parser.add_argument('--batchsize', default=1, type=int, help='batchsize') 36 | parser.add_argument('--a2b', type=int, default=1, help="1 for a2b and others for b2a") 37 | parser.add_argument('--seed', type=int, default=10, help="random seed") 38 | parser.add_argument('--synchronized', action='store_true', help="whether use synchronized style code or not") 39 | parser.add_argument('--output_only', action='store_true', help="whether use synchronized style code or not") 40 | parser.add_argument('--trainer', type=str, default='ERGAN', help="ERGAN") 41 | 42 | 43 | opts = parser.parse_args() 44 | 45 | torch.manual_seed(opts.seed) 46 | torch.cuda.manual_seed(opts.seed) 47 | if not os.path.exists(opts.output_folder): 48 | os.makedirs(opts.output_folder) 49 | 50 | # Load experiment setting 51 | config = get_config(opts.config) 52 | opts.num_style = 1 53 | 54 | # Setup model and data loader 55 | if opts.trainer == 'ERGAN': 56 | trainer = ERGAN_Trainer(config) 57 | else: 58 | sys.exit("Only support ERGAN") 59 | 60 | state_dict_gen = torch.load(opts.checkpoint_gen) 61 | trainer.gen_a.load_state_dict(state_dict_gen['a'], strict=False) 62 | trainer.gen_b.load_state_dict(state_dict_gen['b'], strict=False) 63 | 64 | trainer.cuda() 65 | trainer.eval() 66 | encode = trainer.gen_b.encode # encode function 67 | style_encode = trainer.gen_b.encode # encode function 68 | decode = trainer.gen_b.decode # decode function 69 | data_transforms = transforms.Compose([ 70 | transforms.Resize((224,224), interpolation=3), 71 | transforms.ToTensor(), 72 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) 73 | 74 | image_datasets1 = datasets.ImageFolder(opts.input_folder+'/1', data_transforms) 75 | image_datasets2 = datasets.ImageFolder(opts.input_folder+'/2', data_transforms) 76 | dataloader_content = torch.utils.data.DataLoader(image_datasets1, batch_size=2, shuffle=False, num_workers=1) 77 | dataloader_style = torch.utils.data.DataLoader(image_datasets2, batch_size=2, shuffle=False, num_workers=1) 78 | ###################################################################### 79 | # recover image 80 | # ----------------- 81 | def recover(inp): 82 | """Imshow for Tensor.""" 83 | inp = inp.numpy().transpose((1, 2, 0)) 84 | mean = np.array([0.485, 0.456, 0.406]) 85 | std = np.array([0.229, 0.224, 0.225]) 86 | inp = std * inp + mean 87 | inp = inp * 255.0 88 | inp = np.clip(inp, 0, 255) 89 | return inp 90 | 91 | im = {} 92 | # data2 = next(iter(dataloader_structure)) 93 | # bg_img, _ = data2 94 | # bg_img = Variable(bg_img.cuda()) 95 | ff = [] 96 | gif = [] 97 | with torch.no_grad(): 98 | for data in dataloader_content: 99 | id_img, _ = data 100 | id_img = id_img.cuda() 101 | n, c, h, w = id_img.size() 102 | f, _ = encode(id_img) 103 | for data2 in dataloader_style: 104 | bg_img, _ = data2 105 | bg_img = bg_img.cuda() 106 | _, s = style_encode(bg_img) 107 | # Start testing 108 | for count in range(2): 109 | input1 = recover(id_img[count].squeeze().data.cpu()) 110 | im[count] = input1 111 | 112 | for i in range(10): 113 | f_tmp = f[count,:,:,:] 114 | tmp_s = 0.1*i*s[0] + (1-0.1*i)*s[1] 115 | tmp_s = tmp_s.view(1, -1).contiguous() 116 | outputs = decode(f_tmp.unsqueeze(0), tmp_s, bg_img) 117 | tmp = recover(outputs[0].data.cpu()) 118 | im[count] = np.concatenate((im[count], tmp), axis=1) 119 | #gif.append(tmp) 120 | break 121 | 122 | # save long image 123 | pic = np.concatenate( (im[0], im[1]) , axis=0) 124 | pic = Image.fromarray(pic.astype('uint8')) 125 | pic.save('smooth.jpg') 126 | 127 | # save gif 128 | #imageio.mimsave('./smooth.gif', gif) -------------------------------------------------------------------------------- /TTUR-master/WGAN_GP/language_helpers.py: -------------------------------------------------------------------------------- 1 | # 2 | # Taken from: https://github.com/igul222/improved_wgan_training 3 | # 4 | 5 | import collections 6 | import numpy as np 7 | import re 8 | 9 | def tokenize_string(sample): 10 | return tuple(sample.lower().split(' ')) 11 | 12 | class NgramLanguageModel(object): 13 | def __init__(self, n, samples, tokenize=False): 14 | if tokenize: 15 | tokenized_samples = [] 16 | for sample in samples: 17 | tokenized_samples.append(tokenize_string(sample)) 18 | samples = tokenized_samples 19 | 20 | self._n = n 21 | self._samples = samples 22 | self._ngram_counts = collections.defaultdict(int) 23 | self._total_ngrams = 0 24 | for ngram in self.ngrams(): 25 | self._ngram_counts[ngram] += 1 26 | self._total_ngrams += 1 27 | 28 | def ngrams(self): 29 | n = self._n 30 | for sample in self._samples: 31 | for i in range(len(sample)-n+1): 32 | yield sample[i:i+n] 33 | 34 | def unique_ngrams(self): 35 | return set(self._ngram_counts.keys()) 36 | 37 | def log_likelihood(self, ngram): 38 | if ngram not in self._ngram_counts: 39 | return -np.inf 40 | else: 41 | return np.log(self._ngram_counts[ngram]) - np.log(self._total_ngrams) 42 | 43 | def kl_to(self, p): 44 | # p is another NgramLanguageModel 45 | log_likelihood_ratios = [] 46 | for ngram in p.ngrams(): 47 | log_likelihood_ratios.append(p.log_likelihood(ngram) - self.log_likelihood(ngram)) 48 | return np.mean(log_likelihood_ratios) 49 | 50 | def cosine_sim_with(self, p): 51 | # p is another NgramLanguageModel 52 | p_dot_q = 0. 53 | p_norm = 0. 54 | q_norm = 0. 55 | for ngram in p.unique_ngrams(): 56 | p_i = np.exp(p.log_likelihood(ngram)) 57 | q_i = np.exp(self.log_likelihood(ngram)) 58 | p_dot_q += p_i * q_i 59 | p_norm += p_i**2 60 | for ngram in self.unique_ngrams(): 61 | q_i = np.exp(self.log_likelihood(ngram)) 62 | q_norm += q_i**2 63 | return p_dot_q / (np.sqrt(p_norm) * np.sqrt(q_norm)) 64 | 65 | def precision_wrt(self, p): 66 | # p is another NgramLanguageModel 67 | num = 0. 68 | denom = 0 69 | p_ngrams = p.unique_ngrams() 70 | for ngram in self.unique_ngrams(): 71 | if ngram in p_ngrams: 72 | num += self._ngram_counts[ngram] 73 | denom += self._ngram_counts[ngram] 74 | return float(num) / denom 75 | 76 | def recall_wrt(self, p): 77 | return p.precision_wrt(self) 78 | 79 | def js_with(self, p): 80 | log_p = np.array([p.log_likelihood(ngram) for ngram in p.unique_ngrams()]) 81 | log_q = np.array([self.log_likelihood(ngram) for ngram in p.unique_ngrams()]) 82 | log_m = np.logaddexp(log_p - np.log(2), log_q - np.log(2)) 83 | kl_p_m = np.sum(np.exp(log_p) * (log_p - log_m)) 84 | 85 | log_p = np.array([p.log_likelihood(ngram) for ngram in self.unique_ngrams()]) 86 | log_q = np.array([self.log_likelihood(ngram) for ngram in self.unique_ngrams()]) 87 | log_m = np.logaddexp(log_p - np.log(2), log_q - np.log(2)) 88 | kl_q_m = np.sum(np.exp(log_q) * (log_q - log_m)) 89 | 90 | return 0.5*(kl_p_m + kl_q_m) / np.log(2) 91 | 92 | def load_dataset(max_length, max_n_examples, tokenize=False, max_vocab_size=2048, data_dir='/home/ishaan/data/1-billion-word-language-modeling-benchmark-r13output'): 93 | print("loading dataset...") 94 | 95 | lines = [] 96 | 97 | finished = False 98 | 99 | for i in range(99): 100 | path = data_dir+("/training-monolingual.tokenized.shuffled/news.en-{}-of-00100".format(str(i+1).zfill(5))) 101 | with open(path, 'r') as f: 102 | for line in f: 103 | line = line[:-1] 104 | if tokenize: 105 | line = tokenize_string(line) 106 | else: 107 | line = tuple(line) 108 | 109 | if len(line) > max_length: 110 | line = line[:max_length] 111 | 112 | lines.append(line + ( ("`",)*(max_length-len(line)) ) ) 113 | 114 | if len(lines) == max_n_examples: 115 | finished = True 116 | break 117 | if finished: 118 | break 119 | 120 | np.random.shuffle(lines) 121 | 122 | import collections 123 | counts = collections.Counter(char for line in lines for char in line) 124 | 125 | charmap = {'unk':0} 126 | inv_charmap = ['unk'] 127 | 128 | for char,count in counts.most_common(max_vocab_size-1): 129 | if char not in charmap: 130 | charmap[char] = len(inv_charmap) 131 | inv_charmap.append(char) 132 | 133 | filtered_lines = [] 134 | for line in lines: 135 | filtered_line = [] 136 | for char in line: 137 | if char in charmap: 138 | filtered_line.append(char) 139 | else: 140 | filtered_line.append('unk') 141 | filtered_lines.append(tuple(filtered_line)) 142 | 143 | #for i in range(100): 144 | # print(filtered_lines[i]) 145 | 146 | print("loaded %s lines in dataset" % (len(lines))) 147 | return filtered_lines, charmap, inv_charmap 148 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2018 NVIDIA Corporation. All rights reserved. 3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 4 | """ 5 | from utils import get_all_data_loaders, prepare_sub_folder, write_html, write_loss, get_config, write_2images, Timer 6 | import argparse 7 | from torch.autograd import Variable 8 | from trainer import ERGAN_Trainer, UNIT_Trainer 9 | import torch.backends.cudnn as cudnn 10 | import torch 11 | try: 12 | from itertools import izip as zip 13 | except ImportError: # will be 3.x series 14 | pass 15 | import os 16 | import sys 17 | import tensorboardX 18 | import shutil 19 | import scipy.misc 20 | import torch.nn.functional as F 21 | 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--config', type=str, default='configs/celebA_folder.yaml', help='Path to the config file.') 24 | parser.add_argument('--output_path', type=str, default='.', help="outputs path") 25 | parser.add_argument("--resume", action="store_true") 26 | parser.add_argument('--trainer', type=str, default='ERGAN', help="ERGAN|UNIT") 27 | opts = parser.parse_args() 28 | 29 | cudnn.benchmark = True 30 | 31 | # Load experiment setting 32 | config = get_config(opts.config) 33 | max_iter = config['max_iter'] 34 | display_size = config['display_size'] 35 | recon_cyc = config['recon_x_cyc_w'] 36 | config['vgg_model_path'] = opts.output_path 37 | 38 | # Setup model and data loader 39 | if opts.trainer == 'ERGAN': 40 | trainer = ERGAN_Trainer(config) 41 | elif opts.trainer == 'UNIT': 42 | trainer = UNIT_Trainer(config) 43 | else: 44 | sys.exit("Only support ERGAN|UNIT") 45 | trainer.cuda() 46 | train_loader_a, train_loader_b, test_loader_a, test_loader_b = get_all_data_loaders(config) 47 | train_display_images_a = torch.stack([train_loader_a.dataset[i] for i in range(display_size)]).cuda() 48 | train_display_images_b = torch.stack([train_loader_b.dataset[i] for i in range(display_size)]).cuda() 49 | test_display_images_a = torch.stack([test_loader_a.dataset[i] for i in range(display_size)]).cuda() 50 | test_display_images_b = torch.stack([test_loader_b.dataset[i] for i in range(display_size)]).cuda() 51 | 52 | 53 | # train_display_images_a = F.pad(train_display_images_a, (0,0,24,-24), mode = 'reflect') 54 | # train_display_images_b = F.pad(train_display_images_b, (0,0,24,-24), mode = 'reflect') 55 | # test_display_images_a = F.pad(test_display_images_a, (0,0,24,-24), mode = 'reflect') 56 | # test_display_images_b = F.pad(test_display_images_b, (0,0,24,-24), mode = 'reflect') 57 | 58 | # Setup logger and output folders 59 | model_name = os.path.splitext(os.path.basename(opts.config))[0] 60 | train_writer = tensorboardX.SummaryWriter(os.path.join(opts.output_path + "/logs", model_name)) 61 | output_directory = os.path.join(opts.output_path + "/outputs", model_name) 62 | checkpoint_directory, image_directory = prepare_sub_folder(output_directory) 63 | shutil.copy(opts.config, os.path.join(output_directory, 'config.yaml')) # copy config file to output folder 64 | 65 | # Start training 66 | iterations = trainer.resume(checkpoint_directory, hyperparameters=config) if opts.resume else 0 67 | while True: 68 | for it, (images_a, images_b) in enumerate(zip(train_loader_a, train_loader_b)): 69 | trainer.update_learning_rate() 70 | images_a, images_b = images_a.cuda(), images_b.cuda() 71 | 72 | # images_a = F.pad(images_a, (0, 0, 24, -24), mode='reflect') 73 | # images_b = F.pad(images_b, (0, 0, 24, -24), mode='reflect') 74 | 75 | with Timer("Elapsed time in update: %f"): 76 | # Main training code 77 | trainer.dis_update(images_a, images_b, config) 78 | trainer.gen_update(images_a, images_b, config) 79 | torch.cuda.synchronize() 80 | 81 | # Dump training stats in log file 82 | if (iterations + 1) % config['log_iter'] == 0: 83 | print("Iteration: %08d/%08d" % (iterations + 1, max_iter)) 84 | write_loss(iterations, trainer, train_writer) 85 | 86 | # Write images 87 | if (iterations + 1) % config['image_save_iter'] == 0: 88 | with torch.no_grad(): 89 | test_image_outputs = trainer.sample(test_display_images_a, test_display_images_b) 90 | train_image_outputs = trainer.sample(train_display_images_a, train_display_images_b) 91 | write_2images(test_image_outputs, display_size, image_directory, 'test_%08d' % (iterations + 1)) 92 | write_2images(train_image_outputs, display_size, image_directory, 'train_%08d' % (iterations + 1)) 93 | del(test_image_outputs, train_image_outputs ) 94 | # HTML 95 | write_html(output_directory + "/index.html", iterations + 1, config['image_save_iter'], 'images') 96 | 97 | if (iterations + 1) % config['image_display_iter'] == 0: 98 | with torch.no_grad(): 99 | image_outputs = trainer.sample(train_display_images_a, train_display_images_b) 100 | write_2images(image_outputs, display_size, image_directory, 'train_current') 101 | del(image_outputs) 102 | # Save network weights 103 | if (iterations + 1) % config['snapshot_save_iter'] == 0: 104 | trainer.save(checkpoint_directory, iterations) 105 | 106 | iterations += 1 107 | if iterations >= max_iter: 108 | sys.exit('Finish training') 109 | # warm up 110 | if iterations / 10000.0 < 5: 111 | config['recon_x_cyc_w'] = recon_cyc*iterations /10000.0 112 | else: 113 | config['recon_x_cyc_w'] = recon_cyc*5 114 | 115 | 116 | -------------------------------------------------------------------------------- /SSIM/train.py: -------------------------------------------------------------------------------- 1 | import torch.backends.cudnn as cudnn 2 | cudnn.benchmark=False 3 | 4 | import numpy as np 5 | import time 6 | import os 7 | from models import dist_model as dm 8 | from data import data_loader as dl 9 | import argparse 10 | from util.visualizer import Visualizer 11 | from IPython import embed 12 | 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--datasets', type=str, nargs='+', default=['train/traditional','train/cnn','train/mix'], help='datasets to train on: [train/traditional],[train/cnn],[train/mix],[val/traditional],[val/cnn],[val/color],[val/deblur],[val/frameinterp],[val/superres]') 15 | parser.add_argument('--model', type=str, default='net-lin', help='distance model type [net-lin] for linearly calibrated net, [net] for off-the-shelf network, [l2] for euclidean distance, [ssim] for Structured Similarity Image Metric') 16 | parser.add_argument('--net', type=str, default='alex', help='[squeeze], [alex], or [vgg] for network architectures') 17 | parser.add_argument('--batch_size', type=int, default=50, help='batch size to test image patches in') 18 | parser.add_argument('--use_gpu', action='store_true', help='turn on flag to use GPU') 19 | parser.add_argument('--nepoch', type=int, default=5, help='# epochs at base learning rate') 20 | parser.add_argument('--nepoch_decay', type=int, default=5, help='# additional epochs at linearly learning rate') 21 | parser.add_argument('--display_freq', type=int, default=5000, help='frequency (in instances) of showing training results on screen') 22 | parser.add_argument('--print_freq', type=int, default=5000, help='frequency (in instances) of showing training results on console') 23 | parser.add_argument('--save_latest_freq', type=int, default=10000, help='frequency (in instances) of saving the latest results') 24 | parser.add_argument('--save_epoch_freq', type=int, default=1, help='frequency of saving checkpoints at the end of epochs') 25 | parser.add_argument('--display_id', type=int, default=0, help='window id of the visdom display, [0] for no displaying') 26 | parser.add_argument('--display_winsize', type=int, default=256, help='display window size') 27 | parser.add_argument('--display_port', type=int, default=8001, help='visdom display port') 28 | parser.add_argument('--use_html', action='store_true', help='save off html pages') 29 | parser.add_argument('--checkpoints_dir', type=str, default='checkpoints', help='checkpoints directory') 30 | parser.add_argument('--name', type=str, default='tmp', help='directory name for training') 31 | 32 | parser.add_argument('--from_scratch', action='store_true', help='model was initialized from scratch') 33 | parser.add_argument('--train_trunk', action='store_true', help='model trunk was trained/tuned') 34 | 35 | opt = parser.parse_args() 36 | opt.train_plot = True 37 | opt.save_dir = os.path.join(opt.checkpoints_dir,opt.name) 38 | if(not os.path.exists(opt.save_dir)): 39 | os.mkdir(opt.save_dir) 40 | 41 | # initialize model 42 | model = dm.DistModel() 43 | # model.initialize(model=opt.model,net=opt.net,use_gpu=opt.use_gpu, is_train=True) 44 | model.initialize(model=opt.model,net=opt.net,use_gpu=opt.use_gpu, is_train=True, pnet_rand=opt.from_scratch, pnet_tune=opt.train_trunk) 45 | 46 | # load data from all training sets 47 | data_loader = dl.CreateDataLoader(opt.datasets,dataset_mode='2afc', batch_size=opt.batch_size, serial_batches=False) 48 | dataset = data_loader.load_data() 49 | dataset_size = len(data_loader) 50 | D = len(dataset) 51 | print('Loading %i instances from'%dataset_size,opt.datasets) 52 | visualizer = Visualizer(opt) 53 | 54 | total_steps = 0 55 | fid = open(os.path.join(opt.checkpoints_dir,opt.name,'train_log.txt'),'w+') 56 | for epoch in range(1, opt.nepoch + opt.nepoch_decay + 1): 57 | epoch_start_time = time.time() 58 | for i, data in enumerate(dataset): 59 | iter_start_time = time.time() 60 | total_steps += opt.batch_size 61 | epoch_iter = total_steps - dataset_size * (epoch - 1) 62 | 63 | model.set_input(data) 64 | model.optimize_parameters() 65 | 66 | if total_steps % opt.display_freq == 0: 67 | visualizer.display_current_results(model.get_current_visuals(), epoch) 68 | 69 | if total_steps % opt.print_freq == 0: 70 | errors = model.get_current_errors() 71 | t = (time.time()-iter_start_time)/opt.batch_size 72 | t2o = (time.time()-epoch_start_time)/3600. 73 | t2 = t2o*D/(i+.0001) 74 | visualizer.print_current_errors(epoch, epoch_iter, errors, t, t2=t2, t2o=t2o, fid=fid) 75 | 76 | for key in errors.keys(): 77 | visualizer.plot_current_errors_save(epoch, float(epoch_iter)/dataset_size, opt, errors, keys=[key,], name=key, to_plot=opt.train_plot) 78 | 79 | if opt.display_id > 0: 80 | visualizer.plot_current_errors(epoch, float(epoch_iter)/dataset_size, opt, errors) 81 | 82 | if total_steps % opt.save_latest_freq == 0: 83 | print('saving the latest model (epoch %d, total_steps %d)' % 84 | (epoch, total_steps)) 85 | model.save(opt.save_dir, 'latest') 86 | 87 | if epoch % opt.save_epoch_freq == 0: 88 | print('saving the model at the end of epoch %d, iters %d' % 89 | (epoch, total_steps)) 90 | model.save(opt.save_dir, 'latest') 91 | model.save(opt.save_dir, epoch) 92 | 93 | print('End of epoch %d / %d \t Time Taken: %d sec' % 94 | (epoch, opt.nepoch + opt.nepoch_decay, time.time() - epoch_start_time)) 95 | 96 | if epoch > opt.nepoch: 97 | model.update_learning_rate(opt.nepoch_decay) 98 | 99 | # model.save_done(True) 100 | fid.close() 101 | -------------------------------------------------------------------------------- /swap.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2018 NVIDIA Corporation. All rights reserved. 3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 4 | """ 5 | from __future__ import print_function 6 | import sys 7 | sys.path.append('.') 8 | from utils import get_config 9 | from trainer import ERGAN_Trainer, to_gray 10 | import argparse 11 | from torch.autograd import Variable 12 | import torchvision.utils as vutils 13 | import sys 14 | import torch 15 | import os 16 | import numpy as np 17 | from torchvision import datasets, models, transforms 18 | from PIL import Image 19 | try: 20 | from itertools import izip as zip 21 | except ImportError: # will be 3.x series 22 | pass 23 | 24 | name = 'eye' 25 | 26 | if not os.path.isdir('models/%s'%name): 27 | assert 0, "please change the name to your model name" 28 | 29 | parser = argparse.ArgumentParser() 30 | parser.add_argument('--output_folder', type=str, default="./", help="output image path") 31 | parser.add_argument('--input_folder', type=str, default="inputs/swap/", help="input image path") 32 | 33 | parser.add_argument('--config', type=str, default='./outputs/%s/config.yaml'%name, help="net configuration") 34 | parser.add_argument('--checkpoint_gen', type=str, default="./outputs/%s/checkpoints/gen_00100000.pt"%name, help="checkpoint of autoencoders") 35 | #parser.add_argument('--checkpoint_id', type=str, default="./outputs/%s/checkpoints/id_00100000.pt"%name, help="checkpoint of autoencoders") 36 | parser.add_argument('--batchsize', default=1, type=int, help='batchsize') 37 | parser.add_argument('--a2b', type=int, default=1, help="1 for a2b and others for b2a") 38 | parser.add_argument('--seed', type=int, default=10, help="random seed") 39 | parser.add_argument('--synchronized', action='store_true', help="whether use synchronized style code or not") 40 | parser.add_argument('--output_only', action='store_true', help="whether use synchronized style code or not") 41 | parser.add_argument('--trainer', type=str, default='ERGAN', help="ERGAN") 42 | 43 | 44 | opts = parser.parse_args() 45 | 46 | torch.manual_seed(opts.seed) 47 | torch.cuda.manual_seed(opts.seed) 48 | if not os.path.exists(opts.output_folder): 49 | os.makedirs(opts.output_folder) 50 | 51 | # Load experiment setting 52 | config = get_config(opts.config) 53 | opts.num_style = 1 54 | 55 | # Setup model and data loader 56 | if opts.trainer == 'ERGAN': 57 | trainer = ERGAN_Trainer(config) 58 | else: 59 | sys.exit("Only support MUNIT") 60 | 61 | state_dict_gen = torch.load(opts.checkpoint_gen) 62 | trainer.gen_a.load_state_dict(state_dict_gen['a'], strict=False) 63 | trainer.gen_b.load_state_dict(state_dict_gen['b'], strict=False) 64 | 65 | # state_dict_id = torch.load(opts.checkpoint_id) 66 | # trainer.id_a.load_state_dict(state_dict_id['a'], strict=False) 67 | # trainer.id_b = trainer.id_a 68 | 69 | trainer.cuda() 70 | trainer.eval() 71 | encode = trainer.gen_a.encode # encode function 72 | style_encode_a = trainer.gen_a.encode # encode function 73 | style_encode_b = trainer.gen_b.encode 74 | #id_encode = trainer.id_a # encode function 75 | decode_a = trainer.gen_a.decode # decode function 76 | decode_b = trainer.gen_b.decode 77 | 78 | data_transforms = transforms.Compose([ 79 | transforms.Resize((224,224), interpolation=3), 80 | transforms.ToTensor(), 81 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 82 | ]) 83 | 84 | image_datasets1 = datasets.ImageFolder(opts.input_folder+'/1', data_transforms) 85 | image_datasets2 = datasets.ImageFolder(opts.input_folder+'/2', data_transforms) 86 | dataloader_content = torch.utils.data.DataLoader(image_datasets1, batch_size=1, shuffle=False, num_workers=1) 87 | dataloader_structure = torch.utils.data.DataLoader(image_datasets2, batch_size=1, shuffle=False, num_workers=1) 88 | 89 | ###################################################################### 90 | # recover image 91 | # ----------------- 92 | def recover(inp): 93 | """Imshow for Tensor.""" 94 | inp = inp.numpy().transpose((1, 2, 0)) 95 | mean = np.array([0.485, 0.456, 0.406]) 96 | std = np.array([0.229, 0.224, 0.225]) 97 | inp = std * inp + mean 98 | inp = inp * 255.0 99 | inp = np.clip(inp, 0, 255) 100 | return inp 101 | 102 | def pad(inp, pad = 3): 103 | h = inp.shape[0] 104 | w = inp.shape[1] 105 | bg = np.zeros((h+2*pad, w+2*pad, inp.shape[2])) 106 | bg[pad:pad+h, pad:pad+w, :] = inp 107 | return bg 108 | 109 | im = {} 110 | npad = 3 111 | count = 0 112 | gray = to_gray(False) 113 | 114 | def generate(data, data2, decode,style_encode): 115 | bg_img, _ = data2 116 | #bg_img = gray(bg_img) 117 | bg_img = Variable(bg_img.cuda()) 118 | id_img, _ = data 119 | id_img = Variable(id_img.cuda()) 120 | n, c, h, w = id_img.size() 121 | # Start testing 122 | _, s = style_encode(bg_img) 123 | f, _ = encode(id_img) 124 | output = decode(f, s, id_img) 125 | return output.squeeze().data.cpu() 126 | 127 | w = np.ones( (16, 224+2*npad,3))*255 #white row 128 | w2 = np.ones( (32, 224+2*npad,3))*255 #white row 129 | with torch.no_grad(): 130 | for data, data2 in zip(dataloader_content, dataloader_structure): 131 | im1 = pad(recover(data[0].squeeze()), pad= npad) 132 | im2 = pad(recover(data2[0].squeeze()), pad= npad) 133 | output1 = pad(recover(generate(data, data2, decode_b, style_encode_b)), pad= npad) 134 | output2 = pad(recover(generate(data2, data, decode_a, style_encode_a)), pad= npad) 135 | im[count] = np.concatenate((im1, w, im2, w2, output2, w, output1), axis=0) 136 | count +=1 137 | print(count) 138 | 139 | white_col = np.ones( (im[0].shape[0], 16, 3))*255 140 | 141 | for i in range(count): 142 | if i == 0: 143 | pic = im[0] 144 | else: 145 | pic = np.concatenate((pic, im[i]), axis=1) 146 | pic = np.concatenate((pic, white_col), axis=1) 147 | 148 | pic = Image.fromarray(pic.astype('uint8')) 149 | pic.save('swap.jpg') 150 | 151 | -------------------------------------------------------------------------------- /test_batch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2018 NVIDIA Corporation. All rights reserved. 3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 4 | """ 5 | from __future__ import print_function 6 | from utils import get_config, get_data_loader_folder, pytorch03_to_pytorch04, load_inception 7 | from trainer import ERGAN_Trainer, UNIT_Trainer 8 | from torch import nn 9 | from scipy.stats import entropy 10 | import torch.nn.functional as F 11 | import argparse 12 | from torch.autograd import Variable 13 | from data import ImageFolder 14 | import numpy as np 15 | import torchvision.utils as vutils 16 | try: 17 | from itertools import izip as zip 18 | except ImportError: # will be 3.x series 19 | pass 20 | import sys 21 | import torch 22 | import os 23 | from PIL import Image 24 | import pdb 25 | 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument('--config', type=str, default='configs/celebA_folder', help='Path to the config file.') 28 | parser.add_argument('--A', type=str, default = 'datasets/celebA/trainA_test', help="input image folder A") 29 | parser.add_argument('--B', type=str, default = 'datasets/celebA/trainB',help="input image folder B") 30 | parser.add_argument('--output_folder', type=str, help="output image folder") 31 | parser.add_argument('--checkpoint', type=str, help="checkpoint of autoencoders") 32 | parser.add_argument('--a2b', action='store_true', help="a2b / b2a" ) 33 | parser.add_argument('--seed', type=int, default=10, help="random seed") 34 | parser.add_argument('--output_path', type=str, default='.', help="path for logs, checkpoints, and VGG model weight") 35 | parser.add_argument('--trainer', type=str, default='ERGAN', help="ERGAN|UNIT") 36 | 37 | opts = parser.parse_args() 38 | 39 | torch.manual_seed(opts.seed) 40 | torch.cuda.manual_seed(opts.seed) 41 | if not os.path.exists(opts.output_folder): 42 | os.makedirs(opts.output_folder) 43 | 44 | # Load experiment setting 45 | config = get_config(opts.config) 46 | input_dim = config['input_dim_a'] if opts.a2b else config['input_dim_b'] 47 | new_size = config['new_size'] 48 | crop_image_height =config['crop_image_height'] 49 | crop_image_width =config['crop_image_width'] 50 | 51 | # Setup model and data loader 52 | 53 | data_loader_a = get_data_loader_folder(opts.A, 1, False, new_size=new_size, height=crop_image_height, width=crop_image_width,crop=True) 54 | data_loader_b = get_data_loader_folder(opts.B, 1, False, new_size=new_size, height=crop_image_height, width=crop_image_width,crop=True) 55 | imagea_names = ImageFolder(opts.A, transform=None, return_paths=True) 56 | imageb_names = ImageFolder(opts.B, transform=None, return_paths=True) 57 | 58 | config['vgg_model_path'] = opts.output_path 59 | if opts.trainer == 'ERGAN': 60 | style_dim = config['gen']['style_dim'] 61 | trainer = ERGAN_Trainer(config) 62 | elif opts.trainer == 'UNIT': 63 | trainer = UNIT_Trainer(config) 64 | else: 65 | sys.exit("Only support ERGAN|UNIT") 66 | 67 | try: 68 | state_dict = torch.load(opts.checkpoint) 69 | trainer.gen_a.load_state_dict(state_dict['a']) 70 | trainer.gen_b.load_state_dict(state_dict['b']) 71 | except: 72 | state_dict = pytorch03_to_pytorch04(torch.load(opts.checkpoint), opts.trainer) 73 | trainer.gen_a.load_state_dict(state_dict['a']) 74 | trainer.gen_b.load_state_dict(state_dict['b']) 75 | 76 | trainer.cuda() 77 | trainer.eval() 78 | 79 | def flip_lr(img): 80 | '''flip horizontal''' 81 | inv_idx = torch.arange(img.size(3)-1,-1,-1).long() # N x C x H x W 82 | img_flip = img.index_select(3,inv_idx) 83 | return img_flip 84 | 85 | def recover(inp): 86 | """Imshow for Tensor.""" 87 | inp = inp.numpy().transpose((1, 2, 0)) 88 | mean = np.array([0.485, 0.456, 0.406]) 89 | std = np.array([0.229, 0.224, 0.225]) 90 | inp = std * inp + mean 91 | inp = inp * 255.0 92 | inp = np.clip(inp, 0, 255) 93 | return inp 94 | 95 | # Start testing 96 | with torch.no_grad(): 97 | if opts.trainer == 'ERGAN': 98 | 99 | for i, (images_a, images_b, imagea_names, imageb_names) in enumerate(zip(data_loader_a, data_loader_b, imagea_names, imageb_names)): 100 | basename_b = os.path.basename(imageb_names[1]) 101 | basename_a = os.path.basename(imagea_names[1]) 102 | #images_a_flip, images_b_flip = flip_lr(images_a).cuda(), flip_lr(images_b).cuda() 103 | images_a, images_b = images_a.cuda(), images_b.cuda() 104 | 105 | c_a, s_a_fake = trainer.gen_a.encode(images_a) 106 | # _, s_a_fake_flip = trainer.gen_a.encode(images_a_flip) 107 | # s_a_fake = (s_a_fake+s_a_fake_flip)/2 108 | c_b, s_b_fake = trainer.gen_b.encode(images_b) 109 | 110 | if opts.a2b: 111 | for j in range(images_b.size(0)): 112 | s_b = s_b_fake[j].unsqueeze(0) 113 | #s_b = style_b[j].unsqueeze(0) 114 | # s_b[s_b>0.7]=0.7 115 | # s_b[s_b < -0.7] = -0.7 116 | outputs = trainer.gen_b.decode(c_a, s_b, images_a) 117 | im = recover(outputs[0].data.cpu()) 118 | im = Image.fromarray(im.astype('uint8')) 119 | path = os.path.join(opts.output_folder, basename_a) 120 | im = im.resize((new_size, new_size), Image.ANTIALIAS) 121 | im.save(path) 122 | 123 | else: 124 | for j in range(images_a.size(0)): 125 | s_a = s_a_fake[j].unsqueeze(0) 126 | # s_a[s_a > 0.7] = 0.7 127 | # s_a[s_a <- 0.7] = -0.7 128 | outputs = trainer.gen_a.decode(c_b, s_a, images_b) 129 | im = recover(outputs[0].data.cpu()) 130 | im = Image.fromarray(im.astype('uint8')) 131 | path = os.path.join(opts.output_folder, basename_b) 132 | im = im.resize((new_size, new_size), Image.ANTIALIAS) 133 | im.save(path) 134 | #vutils.save_image(images_a.data, os.path.join(opts.output_folder, 'input{:06d}.jpg'.format(i)), padding=0, normalize=True) 135 | else: 136 | pass 137 | 138 | 139 | -------------------------------------------------------------------------------- /MUNIT-master/UNIT_ssim_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2018 NVIDIA Corporation. All rights reserved. 3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 4 | """ 5 | from __future__ import print_function 6 | from UNIT_utils import get_config, get_data_loader_folder, pytorch03_to_pytorch04 7 | from UNIT_trainer import MUNIT_Trainer, UNIT_Trainer 8 | import argparse 9 | from torch.autograd import Variable 10 | from data import ImageFolder 11 | import torchvision.utils as vutils 12 | import sys 13 | import torch 14 | import os 15 | from torchvision import transforms 16 | from PIL import Image 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--config', type=str, help="net configuration") 20 | parser.add_argument('--input_folder', type=str, help="input image path") 21 | parser.add_argument('--output_folder', type=str, help="output image path") 22 | parser.add_argument('--checkpoint', type=str, help="checkpoint of autoencoders") 23 | parser.add_argument('--style', type=str, default='', help="style image path") 24 | parser.add_argument('--a2b', action='store_true', help=" a2b ") 25 | parser.add_argument('--seed', type=int, default=10, help="random seed") 26 | parser.add_argument('--num_style',type=int, default=1, help="number of styles to sample") 27 | # parser.add_argument('--synchronized', action='store_true', help="whether use synchronized style code or not") 28 | #parser.add_argument('--output_only', action='store_true', help="whether use synchronized style code or not") 29 | parser.add_argument('--output_path', type=str, default='.', help="path for logs, checkpoints, and VGG model weight") 30 | parser.add_argument('--trainer', type=str, default='MUNIT', help="MUNIT|UNIT") 31 | opts = parser.parse_args() 32 | 33 | 34 | 35 | torch.manual_seed(opts.seed) 36 | torch.cuda.manual_seed(opts.seed) 37 | if not os.path.exists(opts.output_folder): 38 | os.makedirs(opts.output_folder) 39 | 40 | # Load experiment setting 41 | config = get_config(opts.config) 42 | #opts.num_style = 1 if opts.style != '' else opts.num_style 43 | 44 | image_names = ImageFolder(opts.input_folder, transform=None, return_paths=True) 45 | data_loader = get_data_loader_folder(opts.input_folder, 1, False, new_size=config['new_size'], height=config['crop_image_height'], 46 | width=config['crop_image_width'],crop=True) 47 | height = config['crop_image_height'] 48 | width = config['crop_image_width'] 49 | # Setup model and data loader 50 | config['vgg_model_path'] = opts.output_path 51 | if opts.trainer == 'MUNIT': 52 | style_dim = config['gen']['style_dim'] 53 | trainer = MUNIT_Trainer(config) 54 | elif opts.trainer == 'UNIT': 55 | trainer = UNIT_Trainer(config) 56 | else: 57 | sys.exit("Only support MUNIT|UNIT") 58 | 59 | try: 60 | state_dict = torch.load(opts.checkpoint) 61 | trainer.gen_a.load_state_dict(state_dict['a']) 62 | trainer.gen_b.load_state_dict(state_dict['b']) 63 | except: 64 | state_dict = pytorch03_to_pytorch04(torch.load(opts.checkpoint), opts.trainer) 65 | trainer.gen_a.load_state_dict(state_dict['a']) 66 | trainer.gen_b.load_state_dict(state_dict['b']) 67 | 68 | trainer.cuda() 69 | trainer.eval() 70 | # if opts.a2b: 71 | # encode = trainer.gen_a.encode 72 | # style_encode = trainer.gen_b.encode 73 | # decode = trainer.gen_b.decode 74 | # elif opts.b2a: 75 | # encode = trainer.gen_b.encode 76 | # style_encode = trainer.gen_a.encode 77 | # decode = trainer.gen_a.decode 78 | encode = trainer.gen_a.encode if opts.a2b else trainer.gen_b.encode # encode function 79 | style_encode = trainer.gen_b.encode if opts.a2b else trainer.gen_a.encode # encode function 80 | decode = trainer.gen_b.decode if opts.a2b else trainer.gen_a.decode # decode function 81 | 82 | if 'new_size' in config: 83 | new_size = config['new_size'] 84 | # else: 85 | # if opts.a2b==1: 86 | # new_size = config['new_size_a'] 87 | # else: 88 | # new_size = config['new_size_b'] 89 | 90 | with torch.no_grad(): 91 | transform = transforms.Compose([transforms.CenterCrop((height, width)), 92 | transforms.Resize((new_size, new_size)), 93 | transforms.ToTensor(), 94 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 95 | style_image = Variable(transform(Image.open(opts.style).convert('RGB')).unsqueeze(0).cuda()) 96 | 97 | # Start testing 98 | content, _ = encode(style_image) 99 | if opts.trainer == 'MUNIT': 100 | for i, (images, names) in enumerate(zip(data_loader, image_names)): 101 | #print(names[1]) 102 | images = Variable(images.cuda(), volatile=True) 103 | #style = Variable(torch.randn(images.size(0), style_dim, 1, 1).cuda(), volatile=True) 104 | _, style = style_encode(images) 105 | for j in range(images.size(0)): 106 | s = style[j].unsqueeze(0) 107 | outputs = decode(content, s) 108 | outputs = (outputs + 1) / 2. 109 | basename = os.path.basename(names[1]) 110 | path = os.path.join(opts.output_folder, basename) 111 | if not os.path.exists(os.path.dirname(path)): 112 | os.makedirs(os.path.dirname(path)) 113 | # 114 | vutils.save_image(outputs.data, path, padding=0, normalize=True) 115 | # if not opts.output_only: 116 | # vutils.save_image(images.data, os.path.join(opts.output_folder, 'input{:03d}.jpg'.format(i)), 117 | # padding=0, normalize=True) 118 | elif opts.trainer == 'UNIT': 119 | for i, (images, names) in enumerate(zip(data_loader, image_names)): 120 | #print(names[1]) 121 | images = Variable(images.cuda(), volatile=True) 122 | hiddens, _ = encode(style_image) 123 | #print(hiddens.shape) 124 | #_, noise = encode(images) 125 | noise = Variable(torch.randn(hiddens.size()).cuda(hiddens.data.get_device())) 126 | outputs = decode(hiddens+noise) 127 | outputs = (outputs + 1) / 2. 128 | basename = os.path.basename(names[1]) 129 | path = os.path.join(opts.output_folder, basename) 130 | vutils.save_image(outputs.data, path, padding=0, normalize=True) 131 | # if not opts.output_only: 132 | # # also save input images 133 | # vutils.save_image(images.data, os.path.join(opts.output_folder, 'input{:03d}.jpg'.format(i)), padding=0, normalize=True) 134 | else: 135 | pass 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /ssim_batch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2018 NVIDIA Corporation. All rights reserved. 3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 4 | """ 5 | from __future__ import print_function 6 | # from UNIT_utils import get_config, get_data_loader_folder, pytorch03_to_pytorch04 7 | #from UNIT_trainer import MUNIT_Trainer, UNIT_Trainer 8 | from utils import get_config, get_data_loader_folder, pytorch03_to_pytorch04 9 | from trainer import ERGAN_Trainer 10 | import argparse 11 | from torch.autograd import Variable 12 | from data import ImageFolder 13 | import torchvision.utils as vutils 14 | import sys 15 | import torch 16 | import os, random 17 | import numpy as np 18 | from torchvision import transforms 19 | from PIL import Image 20 | import pdb 21 | 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--config', type=str, help="net configuration") 24 | parser.add_argument('--A', type=str, help="input image path") 25 | parser.add_argument('--output_folder', type=str, help="output image path") 26 | parser.add_argument('--checkpoint', type=str, help="checkpoint of autoencoders") 27 | parser.add_argument('--B', type=str, help="style image path") 28 | parser.add_argument('--a2b', action='store_true', help="for a2b") 29 | parser.add_argument('--seed', type=int, default=10, help="random seed") 30 | # parser.add_argument('--synchronized', action='store_true', help="whether use synchronized style code or not") 31 | parser.add_argument('--output_path', type=str, default='.', help="path for logs, checkpoints, and VGG model weight") 32 | parser.add_argument('--trainer', type=str, default='ERGAN', help="ERGAN|UNIT") 33 | opts = parser.parse_args() 34 | 35 | 36 | 37 | torch.manual_seed(opts.seed) 38 | torch.cuda.manual_seed(opts.seed) 39 | if not os.path.exists(opts.output_folder): 40 | os.makedirs(opts.output_folder) 41 | 42 | # Load experiment setting 43 | config = get_config(opts.config) 44 | input_dim = config['input_dim_a'] if opts.a2b else config['input_dim_b'] 45 | new_size = config['new_size'] 46 | 47 | # imagea_names = ImageFolder(opts.A, transform=None, return_paths=True) 48 | # imageb_names = ImageFolder(opts.B, transform=None, return_paths=True) 49 | 50 | data_loader_a = get_data_loader_folder(opts.A, 1, False, new_size=new_size, height=224, width=224,crop=False) 51 | data_loader_b = get_data_loader_folder(opts.B, 1, False, new_size=new_size, height=224, width=224,crop=False) 52 | 53 | # Setup model and data loader 54 | config['vgg_model_path'] = opts.output_path 55 | if opts.trainer == 'ERGAN': 56 | style_dim = config['gen']['style_dim'] 57 | trainer = ERGAN_Trainer(config) 58 | elif opts.trainer == 'UNIT': 59 | trainer = UNIT_Trainer(config) 60 | else: 61 | sys.exit("Only support ERGAN|UNIT") 62 | 63 | try: 64 | state_dict = torch.load(opts.checkpoint) 65 | trainer.gen_a.load_state_dict(state_dict['a']) 66 | trainer.gen_b.load_state_dict(state_dict['b']) 67 | except: 68 | state_dict = pytorch03_to_pytorch04(torch.load(opts.checkpoint), opts.trainer) 69 | trainer.gen_a.load_state_dict(state_dict['a']) 70 | trainer.gen_b.load_state_dict(state_dict['b']) 71 | 72 | trainer.cuda() 73 | trainer.eval() 74 | 75 | if 'new_size' in config: 76 | new_size = config['new_size'] 77 | 78 | def recover(inp): 79 | """Imshow for Tensor.""" 80 | inp = inp.numpy().transpose((1, 2, 0)) 81 | mean = np.array([0.485, 0.456, 0.406]) 82 | std = np.array([0.229, 0.224, 0.225]) 83 | inp = std * inp + mean 84 | inp = inp * 255.0 85 | inp = np.clip(inp, 0, 255) 86 | return inp 87 | 88 | 89 | def img_transform(img): 90 | transform = transforms.Compose([# transforms.CenterCrop((120, 120)), 91 | transforms.Resize((224, 224)), 92 | transforms.ToTensor(), 93 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) 94 | image = Variable(transform(Image.open(img).convert('RGB')).unsqueeze(0).cuda()) 95 | return image 96 | 97 | with torch.no_grad(): 98 | 99 | a = 100 100 | b = 100 101 | # Start testing 102 | if opts.trainer == 'ERGAN': 103 | 104 | dir_a = os.listdir(opts.A) 105 | dir_b = os.listdir(opts.B) 106 | if opts.a2b: 107 | sample_a = random.sample(dir_a, a) 108 | sample_b = random.sample(dir_b, b) 109 | i = 0 110 | for a in sample_a: 111 | #print(a) 112 | images_a = img_transform( os.path.join(opts.A ,a)) 113 | c_a, _ = trainer.gen_a.encode(images_a) 114 | j = 0 115 | for b in sample_b: 116 | 117 | images_b = img_transform(os.path.join(opts.B , b)) 118 | _, s_b_fake = trainer.gen_b.encode(images_b) 119 | for n in range(images_b.size(0)): 120 | s_b = s_b_fake[n].unsqueeze(0) 121 | # s_b[s_b > 0.7] = 0.7 122 | # s_b[s_b < -0.7] = -0.7 123 | outputs = trainer.gen_b.decode(c_a, s_b, images_a) 124 | im = recover(outputs[0].data.cpu()) 125 | im = Image.fromarray(im.astype('uint8')) 126 | # path = os.path.join(opts.output_folder, os.path.basename(b)) 127 | path = os.path.join(opts.output_folder+"%03s"%i, '{:06d}.jpg'.format(j)) 128 | if not os.path.exists(os.path.dirname(path)): 129 | os.makedirs(os.path.dirname(path)) 130 | im = im.resize((120, 120), Image.ANTIALIAS) 131 | im.save(path) 132 | j = j + 1 133 | i = i+1 134 | 135 | else: 136 | sample_b = random.sample(dir_b, a) 137 | sample_a = random.sample(dir_a, b) 138 | i = 0 139 | for b in sample_b: 140 | # print(a) 141 | images_b = img_transform(os.path.join(opts.B, b)) 142 | c_b, _ = trainer.gen_b.encode(images_b) 143 | j = 0 144 | for a in sample_a: 145 | 146 | images_a = img_transform(os.path.join(opts.A, a)) 147 | _, s_a_fake = trainer.gen_a.encode(images_a) 148 | for n in range(images_a.size(0)): 149 | s_a = s_a_fake[n].unsqueeze(0) 150 | # s_a[s_a > 0.7] = 0.7 151 | # s_a[s_a < - 0.7] = -0.7 152 | outputs = trainer.gen_a.decode(c_b, s_a, images_b) 153 | im = recover(outputs[0].data.cpu()) 154 | im = Image.fromarray(im.astype('uint8')) 155 | # path = os.path.join(opts.output_folder, os.path.basename(a)) 156 | path = os.path.join(opts.output_folder + "%03s" % i, '{:06d}.jpg'.format(j)) 157 | if not os.path.exists(os.path.dirname(path)): 158 | os.makedirs(os.path.dirname(path)) 159 | im = im.resize((120, 120), Image.ANTIALIAS) 160 | im.save(path) 161 | j = j + 1 162 | i = i + 1 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | --------------------------------------------------------------------------------