├── .gitignore
├── LICENSE.txt
├── README.md
├── data
    ├── __init__.py
    ├── base_data_loader.py
    ├── base_dataset.py
    ├── custom_dataset_data_loader.py
    ├── data_loader.py
    ├── face_dataset.py
    ├── face_landmark_detection.py
    ├── image_folder.py
    ├── keypoint2img.py
    ├── pose_dataset.py
    ├── temporal_dataset.py
    └── test_dataset.py
├── docker
    ├── Dockerfile
    ├── launch_docker.sh
    └── pre_docker_install.sh
├── imgs
    ├── city_change_labels.gif
    ├── city_change_styles.gif
    ├── face.gif
    ├── face_multiple.gif
    ├── framePredict.gif
    ├── pose.gif
    └── teaser.gif
├── models
    ├── __init__.py
    ├── base_model.py
    ├── flownet.py
    ├── flownet2_pytorch
    │   ├── LICENSE
    │   ├── README.md
    │   ├── __init__.py
    │   ├── convert.py
    │   ├── datasets.py
    │   ├── download_caffe_models.sh
    │   ├── install.sh
    │   ├── launch_docker.sh
    │   ├── losses.py
    │   ├── main.py
    │   ├── models.py
    │   ├── networks
    │   │   ├── FlowNetC.py
    │   │   ├── FlowNetFusion.py
    │   │   ├── FlowNetS.py
    │   │   ├── FlowNetSD.py
    │   │   ├── __init__.py
    │   │   ├── channelnorm_package
    │   │   │   ├── __init__.py
    │   │   │   ├── channelnorm.py
    │   │   │   ├── channelnorm_cuda.cc
    │   │   │   ├── channelnorm_kernel.cu
    │   │   │   ├── channelnorm_kernel.cuh
    │   │   │   └── setup.py
    │   │   ├── correlation_package
    │   │   │   ├── __init__.py
    │   │   │   ├── correlation.py
    │   │   │   ├── correlation_cuda.cc
    │   │   │   ├── correlation_cuda_kernel.cu
    │   │   │   ├── correlation_cuda_kernel.cuh
    │   │   │   └── setup.py
    │   │   ├── resample2d_package
    │   │   │   ├── __init__.py
    │   │   │   ├── resample2d.py
    │   │   │   ├── resample2d_cuda.cc
    │   │   │   ├── resample2d_kernel.cu
    │   │   │   ├── resample2d_kernel.cuh
    │   │   │   └── setup.py
    │   │   └── submodules.py
    │   ├── run-caffe2pytorch.sh
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── flow_utils.py
    │   │   ├── frame_utils.py
    │   │   ├── param_utils.py
    │   │   └── tools.py
    ├── models.py
    ├── networks.py
    ├── vid2vid_model_D.py
    └── vid2vid_model_G.py
├── options
    ├── __init__.py
    ├── base_options.py
    ├── test_options.py
    └── train_options.py
├── scripts
    ├── download_datasets.py
    ├── download_flownet2.py
    ├── download_gdrive.py
    ├── download_models_flownet2.py
    ├── face
    │   ├── download_gdrive.py
    │   ├── download_models.py
    │   ├── test_512.sh
    │   ├── test_g1_256.sh
    │   ├── test_g1_512.sh
    │   ├── train_512.sh
    │   ├── train_512_bs7.sh
    │   ├── train_g1_256.sh
    │   └── train_g1_512.sh
    ├── pose
    │   ├── test_1024p.sh
    │   ├── test_256p.sh
    │   ├── test_512p.sh
    │   ├── test_g1_1024p.sh
    │   ├── test_g1_256p.sh
    │   ├── test_g1_512p.sh
    │   ├── train_1024p.sh
    │   ├── train_256p.sh
    │   ├── train_512p.sh
    │   ├── train_g1_1024p.sh
    │   ├── train_g1_256p.sh
    │   └── train_g1_512p.sh
    └── street
    │   ├── download_gdrive.py
    │   ├── download_models.py
    │   ├── download_models_g1.py
    │   ├── test_2048.sh
    │   ├── test_g1_1024.sh
    │   ├── train_1024.sh
    │   ├── train_2048.sh
    │   ├── train_2048_crop.sh
    │   ├── train_512.sh
    │   ├── train_512_bs.sh
    │   ├── train_512_no_fg.sh
    │   ├── train_g1_1024.sh
    │   ├── train_g1_256.sh
    │   └── train_g1_512.sh
├── test.py
├── train.py
└── util
    ├── __init__.py
    ├── html.py
    ├── image_pool.py
    ├── util.py
    └── visualizer.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | debug*
 2 | checkpoints/
 3 | datasets/
 4 | models/debug*
 5 | models/flownet2*/networks/*/*egg-info
 6 | models/flownet2*/networks/*/build
 7 | models/flownet2*/networks/*/__pycache__
 8 | models/flownet2*/networks/*/dist
 9 | results/
10 | build/
11 | */Thumbs.db
12 | */**/__pycache__
13 | */*.pyc
14 | */**/*.pyc
15 | */**/**/*.pyc
16 | */**/**/**/*.pyc
17 | */**/**/**/**/*.pyc
18 | */*.so*
19 | */**/*.so*
20 | */**/*.dylib*
21 | *.DS_Store
22 | *~
23 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2017 NVIDIA Corporation. Ting-Chun Wang, Ming-Yu Liu, Jun-Yan Zhu.
 2 | All rights reserved. 
 3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
 4 | 
 5 | Permission to use, copy, modify, and distribute this software and its documentation 
 6 | for any non-commercial purpose is hereby granted without fee, provided that the above 
 7 | copyright notice appear in all copies and that both that copyright notice and this 
 8 | permission notice appear in supporting documentation, and that the name of the author 
 9 | not be used in advertising or publicity pertaining to distribution of the software 
10 | without specific, written prior permission.
11 | 
12 | THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL 
13 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY PARTICULAR PURPOSE. 
14 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL 
15 | DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
16 | WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 
17 | OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 | 
19 | 
20 | --------------------------- LICENSE FOR pytorch-CycleGAN-and-pix2pix ----------------
21 | Copyright (c) 2017, Jun-Yan Zhu and Taesung Park
22 | All rights reserved.
23 | 
24 | Redistribution and use in source and binary forms, with or without
25 | modification, are permitted provided that the following conditions are met:
26 | 
27 | * Redistributions of source code must retain the above copyright notice, this
28 |   list of conditions and the following disclaimer.
29 | 
30 | * Redistributions in binary form must reproduce the above copyright notice,
31 |   this list of conditions and the following disclaimer in the documentation
32 |   and/or other materials provided with the distribution.
33 | 
34 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
35 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
37 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
38 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
40 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
41 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
42 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/data/__init__.py


--------------------------------------------------------------------------------
/data/base_data_loader.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class BaseDataLoader():
 3 |     def __init__(self):
 4 |         pass
 5 |     
 6 |     def initialize(self, opt):
 7 |         self.opt = opt
 8 |         pass
 9 | 
10 |     def load_data():
11 |         return None
12 | 
13 |         
14 |         
15 | 


--------------------------------------------------------------------------------
/data/base_dataset.py:
--------------------------------------------------------------------------------
  1 | from util.util import add_dummy_to_tensor
  2 | import torch.utils.data as data
  3 | import torch
  4 | from PIL import Image
  5 | import torchvision.transforms as transforms
  6 | import numpy as np
  7 | import random
  8 | 
  9 | class BaseDataset(data.Dataset):
 10 |     def __init__(self):
 11 |         super(BaseDataset, self).__init__()
 12 | 
 13 |     def name(self):
 14 |         return 'BaseDataset'
 15 | 
 16 |     def initialize(self, opt):
 17 |         pass
 18 | 
 19 |     def update_training_batch(self, ratio): # update the training sequence length to be longer      
 20 |         seq_len_max = min(128, self.seq_len_max) - (self.opt.n_frames_G - 1)
 21 |         if self.n_frames_total < seq_len_max:
 22 |             self.n_frames_total = min(seq_len_max, self.opt.n_frames_total * (2**ratio))
 23 |             #self.n_frames_total = min(seq_len_max, self.opt.n_frames_total * (ratio + 1))
 24 |             print('--------- Updating training sequence length to %d ---------' % self.n_frames_total)
 25 | 
 26 |     def init_frame_idx(self, A_paths):
 27 |         self.n_of_seqs = min(len(A_paths), self.opt.max_dataset_size)         # number of sequences to train
 28 |         self.seq_len_max = max([len(A) for A in A_paths])                     # max number of frames in the training sequences
 29 | 
 30 |         self.seq_idx = 0                                                      # index for current sequence
 31 |         self.frame_idx = self.opt.start_frame if not self.opt.isTrain else 0  # index for current frame in the sequence
 32 |         self.frames_count = []                                                # number of frames in each sequence
 33 |         for path in A_paths:
 34 |             self.frames_count.append(len(path) - self.opt.n_frames_G + 1)
 35 | 
 36 |         self.folder_prob = [count / sum(self.frames_count) for count in self.frames_count]
 37 |         self.n_frames_total = self.opt.n_frames_total if self.opt.isTrain else 1 
 38 |         self.A, self.B, self.I = None, None, None
 39 | 
 40 |     def update_frame_idx(self, A_paths, index):
 41 |         if self.opt.isTrain:
 42 |             if self.opt.dataset_mode == 'pose':                
 43 |                 seq_idx = np.random.choice(len(A_paths), p=self.folder_prob) # randomly pick sequence to train
 44 |                 self.frame_idx = index
 45 |             else:    
 46 |                 seq_idx = index % self.n_of_seqs            
 47 |             return None, None, None, seq_idx
 48 |         else:
 49 |             self.change_seq = self.frame_idx >= self.frames_count[self.seq_idx]
 50 |             if self.change_seq:
 51 |                 self.seq_idx += 1
 52 |                 self.frame_idx = 0
 53 |                 self.A, self.B, self.I = None, None, None
 54 |             return self.A, self.B, self.I, self.seq_idx
 55 | 
 56 |     def init_data_params(self, data, n_gpus, tG):
 57 |         opt = self.opt
 58 |         _, n_frames_total, self.height, self.width = data['B'].size()  # n_frames_total = n_frames_load * n_loadings + tG - 1        
 59 |         n_frames_total = n_frames_total // opt.output_nc
 60 |         n_frames_load = opt.max_frames_per_gpu * n_gpus                # number of total frames loaded into GPU at a time for each batch
 61 |         n_frames_load = min(n_frames_load, n_frames_total - tG + 1)
 62 |         self.t_len = n_frames_load + tG - 1                             # number of loaded frames plus previous frames
 63 |         return n_frames_total-self.t_len+1, n_frames_load, self.t_len
 64 | 
 65 |     def init_data(self, t_scales):
 66 |         fake_B_last = None  # the last generated frame from previous training batch (which becomes input to the next batch)
 67 |         real_B_all, fake_B_all, flow_ref_all, conf_ref_all = None, None, None, None # all real/generated frames so far
 68 |         if self.opt.sparse_D:
 69 |             real_B_all, fake_B_all, flow_ref_all, conf_ref_all = [None]*t_scales, [None]*t_scales, [None]*t_scales, [None]*t_scales
 70 |         
 71 |         frames_all = real_B_all, fake_B_all, flow_ref_all, conf_ref_all        
 72 |         return fake_B_last, frames_all
 73 | 
 74 |     def prepare_data(self, data, i, input_nc, output_nc):
 75 |         t_len, height, width = self.t_len, self.height, self.width
 76 |         # 5D tensor: batchSize, # of frames, # of channels, height, width
 77 |         input_A = (data['A'][:, i*input_nc:(i+t_len)*input_nc, ...]).view(-1, t_len, input_nc, height, width)
 78 |         input_B = (data['B'][:, i*output_nc:(i+t_len)*output_nc, ...]).view(-1, t_len, output_nc, height, width)                
 79 |         inst_A = (data['inst'][:, i:i+t_len, ...]).view(-1, t_len, 1, height, width) if len(data['inst'].size()) > 2 else None
 80 |         return [input_A, input_B, inst_A]
 81 | 
 82 | def make_power_2(n, base=32.0):    
 83 |     return int(round(n / base) * base)
 84 | 
 85 | def get_img_params(opt, size):
 86 |     w, h = size
 87 |     new_h, new_w = h, w        
 88 |     if 'resize' in opt.resize_or_crop:   # resize image to be loadSize x loadSize
 89 |         new_h = new_w = opt.loadSize            
 90 |     elif 'scaleWidth' in opt.resize_or_crop: # scale image width to be loadSize
 91 |         new_w = opt.loadSize
 92 |         new_h = opt.loadSize * h // w
 93 |     elif 'scaleHeight' in opt.resize_or_crop: # scale image height to be loadSize
 94 |         new_h = opt.loadSize
 95 |         new_w = opt.loadSize * w // h
 96 |     elif 'randomScaleWidth' in opt.resize_or_crop:  # randomly scale image width to be somewhere between loadSize and fineSize
 97 |         new_w = random.randint(opt.fineSize, opt.loadSize + 1)
 98 |         new_h = new_w * h // w
 99 |     elif 'randomScaleHeight' in opt.resize_or_crop: # randomly scale image height to be somewhere between loadSize and fineSize
100 |         new_h = random.randint(opt.fineSize, opt.loadSize + 1)
101 |         new_w = new_h * w // h
102 |     new_w = int(round(new_w / 4)) * 4
103 |     new_h = int(round(new_h / 4)) * 4    
104 | 
105 |     crop_x = crop_y = 0
106 |     crop_w = crop_h = 0
107 |     if 'crop' in opt.resize_or_crop or 'scaledCrop' in opt.resize_or_crop:
108 |         if 'crop' in opt.resize_or_crop:      # crop patches of size fineSize x fineSize
109 |             crop_w = crop_h = opt.fineSize
110 |         else:
111 |             if 'Width' in opt.resize_or_crop: # crop patches of width fineSize
112 |                 crop_w = opt.fineSize
113 |                 crop_h = opt.fineSize * h // w
114 |             else:                              # crop patches of height fineSize
115 |                 crop_h = opt.fineSize
116 |                 crop_w = opt.fineSize * w // h
117 | 
118 |         crop_w, crop_h = make_power_2(crop_w), make_power_2(crop_h)        
119 |         x_span = (new_w - crop_w) // 2
120 |         crop_x = np.maximum(0, np.minimum(x_span*2, int(np.random.randn() * x_span/3 + x_span)))        
121 |         crop_y = random.randint(0, np.minimum(np.maximum(0, new_h - crop_h), new_h // 8))
122 |         #crop_x = random.randint(0, np.maximum(0, new_w - crop_w))
123 |         #crop_y = random.randint(0, np.maximum(0, new_h - crop_h))        
124 |     else:
125 |         new_w, new_h = make_power_2(new_w), make_power_2(new_h)
126 | 
127 |     flip = (random.random() > 0.5) and (opt.dataset_mode != 'pose')
128 |     return {'new_size': (new_w, new_h), 'crop_size': (crop_w, crop_h), 'crop_pos': (crop_x, crop_y), 'flip': flip}
129 | 
130 | def get_transform(opt, params, method=Image.BICUBIC, normalize=True, toTensor=True):
131 |     transform_list = []
132 |     ### resize input image
133 |     if 'resize' in opt.resize_or_crop:
134 |         osize = [opt.loadSize, opt.loadSize]
135 |         transform_list.append(transforms.Scale(osize, method))   
136 |     else:
137 |         transform_list.append(transforms.Lambda(lambda img: __scale_image(img, params['new_size'], method)))
138 |         
139 |     ### crop patches from image
140 |     if 'crop' in opt.resize_or_crop or 'scaledCrop' in opt.resize_or_crop:
141 |         transform_list.append(transforms.Lambda(lambda img: __crop(img, params['crop_size'], params['crop_pos'])))    
142 | 
143 |     ### random flip
144 |     if opt.isTrain and not opt.no_flip:
145 |         transform_list.append(transforms.Lambda(lambda img: __flip(img, params['flip'])))
146 | 
147 |     if toTensor:
148 |         transform_list += [transforms.ToTensor()]
149 |     if normalize:
150 |         transform_list += [transforms.Normalize((0.5, 0.5, 0.5),
151 |                                                 (0.5, 0.5, 0.5))]
152 |     return transforms.Compose(transform_list)
153 | 
154 | def toTensor_normalize():    
155 |     transform_list = [transforms.ToTensor()]    
156 |     transform_list += [transforms.Normalize((0.5, 0.5, 0.5),
157 |                                             (0.5, 0.5, 0.5))]
158 |     return transforms.Compose(transform_list)
159 | 
160 | def __scale_image(img, size, method=Image.BICUBIC):
161 |     w, h = size    
162 |     return img.resize((w, h), method)
163 | 
164 | def __crop(img, size, pos):
165 |     ow, oh = img.size
166 |     tw, th = size
167 |     x1, y1 = pos        
168 |     if (ow > tw or oh > th):        
169 |         return img.crop((x1, y1, min(ow, x1 + tw), min(oh, y1 + th)))
170 |     return img
171 | 
172 | def __flip(img, flip):
173 |     if flip:
174 |         return img.transpose(Image.FLIP_LEFT_RIGHT)
175 |     return img
176 | 
177 | def get_video_params(opt, n_frames_total, cur_seq_len, index):
178 |     tG = opt.n_frames_G
179 |     if opt.isTrain:        
180 |         n_frames_total = min(n_frames_total, cur_seq_len - tG + 1)
181 | 
182 |         n_gpus = opt.n_gpus_gen if opt.batchSize == 1 else 1       # number of generator GPUs for each batch
183 |         n_frames_per_load = opt.max_frames_per_gpu * n_gpus        # number of frames to load into GPUs at one time (for each batch)
184 |         n_frames_per_load = min(n_frames_total, n_frames_per_load)
185 |         n_loadings = n_frames_total // n_frames_per_load           # how many times are needed to load entire sequence into GPUs         
186 |         n_frames_total = n_frames_per_load * n_loadings + tG - 1   # rounded overall number of frames to read from the sequence
187 |         
188 |         max_t_step = min(opt.max_t_step, (cur_seq_len-1) // (n_frames_total-1))
189 |         t_step = np.random.randint(max_t_step) + 1                    # spacing between neighboring sampled frames
190 |         offset_max = max(1, cur_seq_len - (n_frames_total-1)*t_step)  # maximum possible index for the first frame        
191 |         if opt.dataset_mode == 'pose':
192 |             start_idx = index % offset_max
193 |         else:
194 |             start_idx = np.random.randint(offset_max)                 # offset for the first frame to load
195 |         if opt.debug:
196 |             print("loading %d frames in total, first frame starting at index %d, space between neighboring frames is %d"
197 |                 % (n_frames_total, start_idx, t_step))
198 |     else:
199 |         n_frames_total = tG
200 |         start_idx = index
201 |         t_step = 1   
202 |     return n_frames_total, start_idx, t_step
203 | 
204 | def concat_frame(A, Ai, nF):
205 |     if A is None:
206 |         A = Ai
207 |     else:
208 |         c = Ai.size()[0]
209 |         if A.size()[0] == nF * c:
210 |             A = A[c:]
211 |         A = torch.cat([A, Ai])
212 |     return A


--------------------------------------------------------------------------------
/data/custom_dataset_data_loader.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data
 2 | from data.base_data_loader import BaseDataLoader
 3 | 
 4 | 
 5 | def CreateDataset(opt):
 6 |     dataset = None
 7 |     if opt.dataset_mode == 'temporal':
 8 |         from data.temporal_dataset import TemporalDataset
 9 |         dataset = TemporalDataset()   
10 |     elif opt.dataset_mode == 'face':
11 |         from data.face_dataset import FaceDataset
12 |         dataset = FaceDataset() 
13 |     elif opt.dataset_mode == 'pose':
14 |         from data.pose_dataset import PoseDataset
15 |         dataset = PoseDataset() 
16 |     elif opt.dataset_mode == 'test':
17 |         from data.test_dataset import TestDataset
18 |         dataset = TestDataset()
19 |     else:
20 |         raise ValueError("Dataset [%s] not recognized." % opt.dataset_mode)
21 | 
22 |     print("dataset [%s] was created" % (dataset.name()))
23 |     dataset.initialize(opt)
24 |     return dataset
25 | 
26 | 
27 | class CustomDatasetDataLoader(BaseDataLoader):
28 |     def name(self):
29 |         return 'CustomDatasetDataLoader'
30 | 
31 |     def initialize(self, opt):
32 |         BaseDataLoader.initialize(self, opt)
33 |         self.dataset = CreateDataset(opt)
34 |         self.dataloader = torch.utils.data.DataLoader(
35 |             self.dataset,
36 |             batch_size=opt.batchSize,
37 |             shuffle=not opt.serial_batches,
38 |             num_workers=int(opt.nThreads))
39 | 
40 |     def load_data(self):
41 |         return self.dataloader
42 | 
43 |     def __len__(self):
44 |         return min(len(self.dataset), self.opt.max_dataset_size)
45 | 


--------------------------------------------------------------------------------
/data/data_loader.py:
--------------------------------------------------------------------------------
1 | 
2 | def CreateDataLoader(opt):
3 |     from data.custom_dataset_data_loader import CustomDatasetDataLoader
4 |     data_loader = CustomDatasetDataLoader()
5 |     print(data_loader.name())
6 |     data_loader.initialize(opt)
7 |     return data_loader
8 | 


--------------------------------------------------------------------------------
/data/face_landmark_detection.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | from skimage import io
 4 | import numpy as np
 5 | import dlib
 6 | import sys
 7 | 
 8 | if len(sys.argv) < 2 or (sys.argv[1] != 'train' and sys.argv[1] != 'test'):
 9 |     raise ValueError('usage: python data/face_landmark_detection.py [train|test]')
10 | 
11 | phase = sys.argv[1]
12 | dataset_path = 'datasets/face/'
13 | faces_folder_path = os.path.join(dataset_path, phase + '_img/')
14 | predictor_path = os.path.join(dataset_path, 'shape_predictor_68_face_landmarks.dat')
15 | detector = dlib.get_frontal_face_detector()
16 | predictor = dlib.shape_predictor(predictor_path)
17 | 
18 | img_paths = sorted(glob.glob(faces_folder_path + '*'))
19 | for i in range(len(img_paths)):
20 |     f = img_paths[i]
21 |     print("Processing video: {}".format(f))
22 |     save_path = os.path.join(dataset_path, phase + '_keypoints', os.path.basename(f))
23 |     if not os.path.isdir(save_path):
24 |         os.makedirs(save_path)
25 | 
26 |     for img_name in sorted(glob.glob(os.path.join(f, '*.jpg'))):
27 |         img = io.imread(img_name)
28 |         dets = detector(img, 1)
29 |         if len(dets) > 0:
30 |             shape = predictor(img, dets[0])
31 |             points = np.empty([68, 2], dtype=int)
32 |             for b in range(68):
33 |                 points[b,0] = shape.part(b).x
34 |                 points[b,1] = shape.part(b).y
35 | 
36 |             save_name = os.path.join(save_path, os.path.basename(img_name)[:-4] + '.txt')
37 |             np.savetxt(save_name, points, fmt='%d', delimiter=',')
38 | 


--------------------------------------------------------------------------------
/data/image_folder.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Code from
 3 | # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py
 4 | # Modified the original code so that it also loads images from the current
 5 | # directory as well as the subdirectories
 6 | ###############################################################################
 7 | 
 8 | import torch.utils.data as data
 9 | 
10 | from PIL import Image
11 | import os
12 | import os.path
13 | 
14 | IMG_EXTENSIONS = [
15 |     '.jpg', '.JPG', '.jpeg', '.JPEG', '.pgm', '.PGM',
16 |     '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', '.tiff', 
17 |     '.txt', '.json'
18 | ]
19 | 
20 | 
21 | def is_image_file(filename):
22 |     return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
23 | 
24 | 
25 | def make_dataset(dir):
26 |     images = []
27 |     assert os.path.isdir(dir), '%s is not a valid directory' % dir
28 | 
29 |     for root, _, fnames in sorted(os.walk(dir)):
30 |         for fname in fnames:
31 |             if is_image_file(fname):
32 |                 path = os.path.join(root, fname)
33 |                 images.append(path)
34 |     return images
35 | 
36 | def make_grouped_dataset(dir):
37 |     images = []
38 |     assert os.path.isdir(dir), '%s is not a valid directory' % dir
39 |     fnames = sorted(os.walk(dir))
40 |     for fname in sorted(fnames):
41 |         paths = []
42 |         root = fname[0]
43 |         for f in sorted(fname[2]):
44 |             if is_image_file(f):
45 |                 paths.append(os.path.join(root, f))
46 |         if len(paths) > 0:
47 |             images.append(paths)
48 |     return images
49 | 
50 | def check_path_valid(A_paths, B_paths):
51 |     assert(len(A_paths) == len(B_paths))
52 |     for a, b in zip(A_paths, B_paths):
53 |         assert(len(a) == len(b))
54 | 
55 | def default_loader(path):
56 |     return Image.open(path).convert('RGB')
57 | 
58 | 
59 | class ImageFolder(data.Dataset):
60 | 
61 |     def __init__(self, root, transform=None, return_paths=False,
62 |                  loader=default_loader):
63 |         imgs = make_dataset(root)
64 |         if len(imgs) == 0:
65 |             raise(RuntimeError("Found 0 images in: " + root + "\n"
66 |                                "Supported image extensions are: " +
67 |                                ",".join(IMG_EXTENSIONS)))
68 | 
69 |         self.root = root
70 |         self.imgs = imgs
71 |         self.transform = transform
72 |         self.return_paths = return_paths
73 |         self.loader = loader
74 | 
75 |     def __getitem__(self, index):
76 |         path = self.imgs[index]
77 |         img = self.loader(path)
78 |         if self.transform is not None:
79 |             img = self.transform(img)
80 |         if self.return_paths:
81 |             return img, path
82 |         else:
83 |             return img
84 | 
85 |     def __len__(self):
86 |         return len(self.imgs)
87 | 


--------------------------------------------------------------------------------
/data/keypoint2img.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | from PIL import Image
  3 | import numpy as np
  4 | import json
  5 | import glob
  6 | from scipy.optimize import curve_fit
  7 | import warnings
  8 | 
  9 | def func(x, a, b, c):    
 10 |     return a * x**2 + b * x + c
 11 | 
 12 | def linear(x, a, b):
 13 |     return a * x + b
 14 | 
 15 | def setColor(im, yy, xx, color):
 16 |     if len(im.shape) == 3:
 17 |         if (im[yy, xx] == 0).all():            
 18 |             im[yy, xx, 0], im[yy, xx, 1], im[yy, xx, 2] = color[0], color[1], color[2]            
 19 |         else:            
 20 |             im[yy, xx, 0] = ((im[yy, xx, 0].astype(float) + color[0]) / 2).astype(np.uint8)
 21 |             im[yy, xx, 1] = ((im[yy, xx, 1].astype(float) + color[1]) / 2).astype(np.uint8)
 22 |             im[yy, xx, 2] = ((im[yy, xx, 2].astype(float) + color[2]) / 2).astype(np.uint8)
 23 |     else:
 24 |         im[yy, xx] = color[0]
 25 | 
 26 | def drawEdge(im, x, y, bw=1, color=(255,255,255), draw_end_points=False):
 27 |     if x is not None and x.size:
 28 |         h, w = im.shape[0], im.shape[1]
 29 |         # edge
 30 |         for i in range(-bw, bw):
 31 |             for j in range(-bw, bw):
 32 |                 yy = np.maximum(0, np.minimum(h-1, y+i))
 33 |                 xx = np.maximum(0, np.minimum(w-1, x+j))
 34 |                 setColor(im, yy, xx, color)
 35 | 
 36 |         # edge endpoints
 37 |         if draw_end_points:
 38 |             for i in range(-bw*2, bw*2):
 39 |                 for j in range(-bw*2, bw*2):
 40 |                     if (i**2) + (j**2) < (4 * bw**2):
 41 |                         yy = np.maximum(0, np.minimum(h-1, np.array([y[0], y[-1]])+i))
 42 |                         xx = np.maximum(0, np.minimum(w-1, np.array([x[0], x[-1]])+j))
 43 |                         setColor(im, yy, xx, color)
 44 | 
 45 | def interpPoints(x, y):    
 46 |     if abs(x[:-1] - x[1:]).max() < abs(y[:-1] - y[1:]).max():
 47 |         curve_y, curve_x = interpPoints(y, x)
 48 |         if curve_y is None:
 49 |             return None, None
 50 |     else:        
 51 |         with warnings.catch_warnings():
 52 |             warnings.simplefilter("ignore")    
 53 |             if len(x) < 3:
 54 |                 popt, _ = curve_fit(linear, x, y)
 55 |             else:
 56 |                 popt, _ = curve_fit(func, x, y)                
 57 |                 if abs(popt[0]) > 1:
 58 |                     return None, None
 59 |         if x[0] > x[-1]:
 60 |             x = list(reversed(x))
 61 |             y = list(reversed(y))
 62 |         curve_x = np.linspace(x[0], x[-1], (x[-1]-x[0]))
 63 |         if len(x) < 3:
 64 |             curve_y = linear(curve_x, *popt)
 65 |         else:
 66 |             curve_y = func(curve_x, *popt)
 67 |     return curve_x.astype(int), curve_y.astype(int)
 68 | 
 69 | def read_keypoints(json_input, size, random_drop_prob=0, remove_face_labels=False, basic_point_only=False):
 70 |     with open(json_input, encoding='utf-8') as f:
 71 |         keypoint_dicts = json.loads(f.read())["people"]
 72 | 
 73 |     edge_lists = define_edge_lists(basic_point_only)
 74 |     w, h = size    
 75 |     pose_img = np.zeros((h, w, 3), np.uint8)
 76 |     for keypoint_dict in keypoint_dicts:    
 77 |         pose_pts = np.array(keypoint_dict["pose_keypoints_2d"]).reshape(25, 3)
 78 |         face_pts = np.array(keypoint_dict["face_keypoints_2d"]).reshape(70, 3)
 79 |         hand_pts_l = np.array(keypoint_dict["hand_left_keypoints_2d"]).reshape(21, 3)
 80 |         hand_pts_r = np.array(keypoint_dict["hand_right_keypoints_2d"]).reshape(21, 3)            
 81 |         pts = [extract_valid_keypoints(pts, edge_lists) for pts in [pose_pts, face_pts, hand_pts_l, hand_pts_r]]           
 82 |         pose_img += connect_keypoints(pts, edge_lists, size, random_drop_prob, remove_face_labels, basic_point_only)
 83 |     return pose_img
 84 | 
 85 | def extract_valid_keypoints(pts, edge_lists):
 86 |     pose_edge_list, _, hand_edge_list, _, face_list = edge_lists
 87 |     p = pts.shape[0]
 88 |     thre = 0.1 if p == 70 else 0.01
 89 |     output = np.zeros((p, 2))    
 90 | 
 91 |     if p == 70:   # face
 92 |         for edge_list in face_list:
 93 |             for edge in edge_list:
 94 |                 if (pts[edge, 2] > thre).all():
 95 |                     output[edge, :] = pts[edge, :2]        
 96 |     elif p == 21: # hand        
 97 |         for edge in hand_edge_list:            
 98 |             if (pts[edge, 2] > thre).all():
 99 |                 output[edge, :] = pts[edge, :2]
100 |     else:         # pose
101 |         valid = (pts[:, 2] > thre)        
102 |         output[valid, :] = pts[valid, :2]
103 |         
104 |     return output
105 | 
106 | def connect_keypoints(pts, edge_lists, size, random_drop_prob, remove_face_labels, basic_point_only):
107 |     pose_pts, face_pts, hand_pts_l, hand_pts_r = pts
108 |     w, h = size
109 |     output_edges = np.zeros((h, w, 3), np.uint8)
110 |     pose_edge_list, pose_color_list, hand_edge_list, hand_color_list, face_list = edge_lists
111 |     
112 |     if random_drop_prob > 0 and remove_face_labels:
113 |         # add random noise to keypoints
114 |         pose_pts[[0,15,16,17,18], :] += 5 * np.random.randn(5,2)
115 |         face_pts[:,0] += 2 * np.random.randn()
116 |         face_pts[:,1] += 2 * np.random.randn()
117 | 
118 |     ### pose    
119 |     for i, edge in enumerate(pose_edge_list):
120 |         x, y = pose_pts[edge, 0], pose_pts[edge, 1]
121 |         if (np.random.rand() > random_drop_prob) and (0 not in x):
122 |             curve_x, curve_y = interpPoints(x, y)                                        
123 |             drawEdge(output_edges, curve_x, curve_y, bw=3, color=pose_color_list[i], draw_end_points=True)
124 | 
125 |     if not basic_point_only:
126 |         ### hand       
127 |         for hand_pts in [hand_pts_l, hand_pts_r]:     # for left and right hand
128 |             if np.random.rand() > random_drop_prob:
129 |                 for i, edge in enumerate(hand_edge_list): # for each finger
130 |                     for j in range(0, len(edge)-1):       # for each part of the finger
131 |                         sub_edge = edge[j:j+2] 
132 |                         x, y = hand_pts[sub_edge, 0], hand_pts[sub_edge, 1]                    
133 |                         if 0 not in x:
134 |                             line_x, line_y = interpPoints(x, y)                                        
135 |                             drawEdge(output_edges, line_x, line_y, bw=1, color=hand_color_list[i], draw_end_points=True)
136 | 
137 |         ### face
138 |         edge_len = 2
139 |         if (np.random.rand() > random_drop_prob):
140 |             for edge_list in face_list:
141 |                 for edge in edge_list:
142 |                     for i in range(0, max(1, len(edge)-1), edge_len-1):             
143 |                         sub_edge = edge[i:i+edge_len]
144 |                         x, y = face_pts[sub_edge, 0], face_pts[sub_edge, 1]
145 |                         if 0 not in x:
146 |                             curve_x, curve_y = interpPoints(x, y)
147 |                             drawEdge(output_edges, curve_x, curve_y, draw_end_points=True)
148 | 
149 |     return output_edges
150 | 
151 | def define_edge_lists(basic_point_only):
152 |     ### pose        
153 |     pose_edge_list = []
154 |     pose_color_list = []
155 |     if not basic_point_only:
156 |         pose_edge_list += [[17, 15], [15,  0], [ 0, 16], [16, 18]]       # head
157 |         pose_color_list += [[153,  0,153], [153,  0,102], [102,  0,153], [ 51,  0,153]]
158 | 
159 |     pose_edge_list += [        
160 |         [ 0,  1], [ 1,  8],                                         # body
161 |         [ 1,  2], [ 2,  3], [ 3,  4],                               # right arm
162 |         [ 1,  5], [ 5,  6], [ 6,  7],                               # left arm
163 |         [ 8,  9], [ 9, 10], [10, 11], [11, 24], [11, 22], [22, 23], # right leg
164 |         [ 8, 12], [12, 13], [13, 14], [14, 21], [14, 19], [19, 20]  # left leg
165 |     ]
166 |     pose_color_list += [
167 |         [153,  0, 51], [153,  0,  0],
168 |         [153, 51,  0], [153,102,  0], [153,153,  0],
169 |         [102,153,  0], [ 51,153,  0], [  0,153,  0],
170 |         [  0,153, 51], [  0,153,102], [  0,153,153], [  0,153,153], [  0,153,153], [  0,153,153],
171 |         [  0,102,153], [  0, 51,153], [  0,  0,153], [  0,  0,153], [  0,  0,153], [  0,  0,153]
172 |     ]
173 | 
174 |     ### hand
175 |     hand_edge_list = [
176 |         [0,  1,  2,  3,  4],
177 |         [0,  5,  6,  7,  8],
178 |         [0,  9, 10, 11, 12],
179 |         [0, 13, 14, 15, 16],
180 |         [0, 17, 18, 19, 20]
181 |     ]
182 |     hand_color_list = [
183 |         [204,0,0], [163,204,0], [0,204,82], [0,82,204], [163,0,204]
184 |     ]
185 | 
186 |     ### face        
187 |     face_list = [
188 |                  #[range(0, 17)], # face
189 |                  [range(17, 22)], # left eyebrow
190 |                  [range(22, 27)], # right eyebrow
191 |                  [range(27, 31), range(31, 36)], # nose
192 |                  [[36,37,38,39], [39,40,41,36]], # left eye
193 |                  [[42,43,44,45], [45,46,47,42]], # right eye
194 |                  [range(48, 55), [54,55,56,57,58,59,48]], # mouth
195 |                 ]
196 |     return pose_edge_list, pose_color_list, hand_edge_list, hand_color_list, face_list


--------------------------------------------------------------------------------
/data/pose_dataset.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import torchvision.transforms as transforms
  3 | import torch
  4 | from PIL import Image
  5 | import numpy as np
  6 | 
  7 | from data.base_dataset import BaseDataset, get_img_params, get_transform, get_video_params, concat_frame
  8 | from data.image_folder import make_grouped_dataset, check_path_valid
  9 | from data.keypoint2img import read_keypoints
 10 | 
 11 | class PoseDataset(BaseDataset):
 12 |     def initialize(self, opt):
 13 |         self.opt = opt
 14 |         self.root = opt.dataroot 
 15 | 
 16 |         self.dir_dp = os.path.join(opt.dataroot, opt.phase + '_densepose')
 17 |         self.dir_op = os.path.join(opt.dataroot, opt.phase + '_openpose')
 18 |         self.dir_img = os.path.join(opt.dataroot, opt.phase + '_img')                
 19 |         self.img_paths = sorted(make_grouped_dataset(self.dir_img))
 20 |         if not opt.openpose_only:
 21 |             self.dp_paths = sorted(make_grouped_dataset(self.dir_dp))
 22 |             check_path_valid(self.dp_paths, self.img_paths)
 23 |         if not opt.densepose_only:
 24 |             self.op_paths = sorted(make_grouped_dataset(self.dir_op))                
 25 |             check_path_valid(self.op_paths, self.img_paths)
 26 | 
 27 |         self.init_frame_idx(self.img_paths)
 28 | 
 29 |     def __getitem__(self, index):
 30 |         A, B, _, seq_idx = self.update_frame_idx(self.img_paths, index)
 31 |         img_paths = self.img_paths[seq_idx]        
 32 |         n_frames_total, start_idx, t_step = get_video_params(self.opt, self.n_frames_total, len(img_paths), self.frame_idx)
 33 |         
 34 |         img = Image.open(img_paths[start_idx]).convert('RGB')     
 35 |         size = img.size
 36 |         params = get_img_params(self.opt, size)
 37 | 
 38 |         frame_range = list(range(n_frames_total)) if (self.opt.isTrain or self.A is None) else [self.opt.n_frames_G-1]
 39 |         for i in frame_range:
 40 |             img_path = img_paths[start_idx + i * t_step]
 41 |             if not self.opt.openpose_only:
 42 |                 dp_path = self.dp_paths[seq_idx][start_idx + i * t_step]
 43 |                 Di = self.get_image(dp_path, size, params, input_type='densepose')
 44 |                 Di[2,:,:] = ((Di[2,:,:] * 0.5 + 0.5) * 255 / 24 - 0.5) / 0.5
 45 |             if not self.opt.densepose_only:
 46 |                 op_path = self.op_paths[seq_idx][start_idx + i * t_step]
 47 |                 Oi = self.get_image(op_path, size, params, input_type='openpose')
 48 | 
 49 |             if self.opt.openpose_only:
 50 |                 Ai = Oi
 51 |             elif self.opt.densepose_only:
 52 |                 Ai = Di
 53 |             else:
 54 |                 Ai = torch.cat([Di, Oi])
 55 |             Bi = self.get_image(img_path, size, params, input_type='img')
 56 |             
 57 |             Ai, Bi = self.crop(Ai), self.crop(Bi) # only crop the central half region to save time
 58 |             A = concat_frame(A, Ai, n_frames_total)
 59 |             B = concat_frame(B, Bi, n_frames_total)
 60 |         
 61 |         if not self.opt.isTrain:
 62 |             self.A, self.B = A, B
 63 |             self.frame_idx += 1            
 64 |         change_seq = False if self.opt.isTrain else self.change_seq
 65 |         return_list = {'A': A, 'B': B, 'inst': 0, 'A_path': img_path, 'change_seq': change_seq}
 66 | 
 67 |         return return_list
 68 | 
 69 |     def get_image(self, A_path, size, params, input_type):
 70 |         if input_type != 'openpose':
 71 |             A_img = Image.open(A_path).convert('RGB')
 72 |         else:            
 73 |             random_drop_prob = self.opt.random_drop_prob if self.opt.isTrain else 0
 74 |             A_img = Image.fromarray(read_keypoints(A_path, size, random_drop_prob, self.opt.remove_face_labels, self.opt.basic_point_only))            
 75 | 
 76 |         if input_type == 'densepose' and self.opt.isTrain:
 77 |             # randomly remove labels
 78 |             A_np = np.array(A_img)
 79 |             part_labels = A_np[:,:,2]            
 80 |             for part_id in range(1, 25):
 81 |                 if (np.random.rand() < self.opt.random_drop_prob):
 82 |                     A_np[(part_labels == part_id), :] = 0
 83 |             if self.opt.remove_face_labels:            
 84 |                 A_np[(part_labels == 23) | (part_labels == 24), :] = 0
 85 |             A_img = Image.fromarray(A_np)
 86 | 
 87 |         is_img = input_type == 'img'
 88 |         method = Image.BICUBIC if is_img else Image.NEAREST
 89 |         transform_scaleA = get_transform(self.opt, params, method=method)
 90 |         A_scaled = transform_scaleA(A_img)
 91 |         return A_scaled
 92 | 
 93 |     def crop(self, Ai):
 94 |         w = Ai.size()[2]
 95 |         base = 32
 96 |         x_cen = w // 2
 97 |         bs = int(w * 0.25) // base * base
 98 |         return Ai[:,:,(x_cen-bs):(x_cen+bs)]
 99 |                
100 |     def normalize_pose(self, A_img, target_yc, target_len, first=False):
101 |         w, h = A_img.size
102 |         A_np = np.array(A_img)  
103 | 
104 |         if first == True:          
105 |             part_labels = A_np[:,:,2]            
106 |             part_coords = np.nonzero((part_labels == 1) | (part_labels == 2))
107 |             y, x = part_coords[0], part_coords[1]
108 | 
109 |             ys, ye = y.min(), y.max()                    
110 |             min_i, max_i = np.argmin(y), np.argmax(y)
111 |             v_min = A_np[y[min_i], x[min_i], 1] / 255
112 |             v_max = A_np[y[max_i], x[max_i], 1] / 255
113 |             ylen = (ye-ys) / (v_max-v_min)
114 |             yc = (0.5-v_min) / (v_max-v_min) * (ye-ys) + ys            
115 |             
116 |             ratio = target_len / ylen
117 |             offset_y = int(yc - (target_yc / ratio))
118 |             offset_x = int(w * (1 - 1/ratio) / 2)        
119 | 
120 |             padding = int(max(0, max(-offset_y, int(offset_y + h/ratio) - h)))
121 |             padding = int(max(padding, max(-offset_x, int(offset_x + w/ratio) - w)))
122 |             offset_y += padding
123 |             offset_x += padding            
124 |             self.offset_y, self.offset_x = offset_y, offset_x
125 |             self.ratio, self.padding = ratio, padding
126 | 
127 |         p = self.padding
128 |         A_np = np.pad(A_np, ((p,p),(p,p),(0,0)), 'constant', constant_values=0)
129 |         A_np = A_np[self.offset_y:int(self.offset_y + h/self.ratio), self.offset_x:int(self.offset_x + w/self.ratio):, :]        
130 |         A_img = Image.fromarray(A_np)
131 |         A_img = A_img.resize((w, h))
132 |         return A_img
133 | 
134 |     def __len__(self):        
135 |         return sum(self.frames_count)
136 | 
137 |     def name(self):
138 |         return 'PoseDataset'
139 | 
140 | """
141 | DensePose label
142 | 0      = Background
143 | 1, 2   = Torso
144 | 3      = Right Hand
145 | 4      = Left Hand
146 | 5      = Right Foot
147 | 6      = Left Foot
148 | 7, 9   = Upper Leg Right
149 | 8, 10  = Upper Leg Left
150 | 11, 13 = Lower Leg Right
151 | 12, 14 = Lower Leg Left
152 | 15, 17 = Upper Arm Left
153 | 16, 18 = Upper Arm Right
154 | 19, 21 = Lower Arm Left
155 | 20, 22 = Lower Arm Right
156 | 23, 24 = Head """
157 | 


--------------------------------------------------------------------------------
/data/temporal_dataset.py:
--------------------------------------------------------------------------------
 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 
 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
 3 | import os.path
 4 | import random
 5 | import torch
 6 | from data.base_dataset import BaseDataset, get_img_params, get_transform, get_video_params
 7 | from data.image_folder import make_grouped_dataset, check_path_valid
 8 | from PIL import Image
 9 | import numpy as np
10 | 
11 | class TemporalDataset(BaseDataset):
12 |     def initialize(self, opt):
13 |         self.opt = opt
14 |         self.root = opt.dataroot
15 |         self.dir_A = os.path.join(opt.dataroot, opt.phase + '_A')
16 |         self.dir_B = os.path.join(opt.dataroot, opt.phase + '_B')
17 |         self.A_is_label = self.opt.label_nc != 0
18 | 
19 |         self.A_paths = sorted(make_grouped_dataset(self.dir_A))
20 |         self.B_paths = sorted(make_grouped_dataset(self.dir_B))
21 |         check_path_valid(self.A_paths, self.B_paths)
22 |         if opt.use_instance:                
23 |             self.dir_inst = os.path.join(opt.dataroot, opt.phase + '_inst')
24 |             self.I_paths = sorted(make_grouped_dataset(self.dir_inst))
25 |             check_path_valid(self.A_paths, self.I_paths)
26 | 
27 |         self.n_of_seqs = len(self.A_paths)                 # number of sequences to train       
28 |         self.seq_len_max = max([len(A) for A in self.A_paths])        
29 |         self.n_frames_total = self.opt.n_frames_total      # current number of frames to train in a single iteration
30 | 
31 |     def __getitem__(self, index):
32 |         tG = self.opt.n_frames_G
33 |         A_paths = self.A_paths[index % self.n_of_seqs]
34 |         B_paths = self.B_paths[index % self.n_of_seqs]                
35 |         if self.opt.use_instance:
36 |             I_paths = self.I_paths[index % self.n_of_seqs]                        
37 |         
38 |         # setting parameters
39 |         n_frames_total, start_idx, t_step = get_video_params(self.opt, self.n_frames_total, len(A_paths), index)     
40 | 
41 |         # setting transformers
42 |         B_img = Image.open(B_paths[start_idx]).convert('RGB')        
43 |         params = get_img_params(self.opt, B_img.size)          
44 |         transform_scaleB = get_transform(self.opt, params)
45 |         transform_scaleA = get_transform(self.opt, params, method=Image.NEAREST, normalize=False) if self.A_is_label else transform_scaleB
46 | 
47 |         # read in images
48 |         A = B = inst = 0
49 |         for i in range(n_frames_total):            
50 |             A_path = A_paths[start_idx + i * t_step]
51 |             B_path = B_paths[start_idx + i * t_step]            
52 |             Ai = self.get_image(A_path, transform_scaleA, is_label=self.A_is_label)            
53 |             Bi = self.get_image(B_path, transform_scaleB)
54 |             
55 |             A = Ai if i == 0 else torch.cat([A, Ai], dim=0)            
56 |             B = Bi if i == 0 else torch.cat([B, Bi], dim=0)            
57 | 
58 |             if self.opt.use_instance:
59 |                 I_path = I_paths[start_idx + i * t_step]                
60 |                 Ii = self.get_image(I_path, transform_scaleA) * 255.0
61 |                 inst = Ii if i == 0 else torch.cat([inst, Ii], dim=0)                
62 | 
63 |         return_list = {'A': A, 'B': B, 'inst': inst, 'A_path': A_path, 'B_paths': B_path}
64 |         return return_list
65 | 
66 |     def get_image(self, A_path, transform_scaleA, is_label=False):
67 |         A_img = Image.open(A_path)        
68 |         A_scaled = transform_scaleA(A_img)
69 |         if is_label:
70 |             A_scaled *= 255.0
71 |         return A_scaled
72 | 
73 |     def __len__(self):
74 |         return len(self.A_paths)
75 | 
76 |     def name(self):
77 |         return 'TemporalDataset'


--------------------------------------------------------------------------------
/data/test_dataset.py:
--------------------------------------------------------------------------------
 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 
 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
 3 | import os.path
 4 | import torch
 5 | from data.base_dataset import BaseDataset, get_img_params, get_transform, concat_frame
 6 | from data.image_folder import make_grouped_dataset, check_path_valid
 7 | from PIL import Image
 8 | import numpy as np
 9 | 
10 | class TestDataset(BaseDataset):
11 |     def initialize(self, opt):
12 |         self.opt = opt
13 |         self.root = opt.dataroot
14 |         self.dir_A = os.path.join(opt.dataroot, opt.phase + '_A')
15 |         self.dir_B = os.path.join(opt.dataroot, opt.phase + '_B')
16 |         self.use_real = opt.use_real_img
17 |         self.A_is_label = self.opt.label_nc != 0
18 | 
19 |         self.A_paths = sorted(make_grouped_dataset(self.dir_A))
20 |         if self.use_real:
21 |             self.B_paths = sorted(make_grouped_dataset(self.dir_B))
22 |             check_path_valid(self.A_paths, self.B_paths)
23 |         if self.opt.use_instance:                
24 |             self.dir_inst = os.path.join(opt.dataroot, opt.phase + '_inst')
25 |             self.I_paths = sorted(make_grouped_dataset(self.dir_inst))
26 |             check_path_valid(self.A_paths, self.I_paths)
27 | 
28 |         self.init_frame_idx(self.A_paths)
29 | 
30 |     def __getitem__(self, index):
31 |         self.A, self.B, self.I, seq_idx = self.update_frame_idx(self.A_paths, index)
32 |         tG = self.opt.n_frames_G
33 |               
34 |         A_img = Image.open(self.A_paths[seq_idx][0]).convert('RGB')        
35 |         params = get_img_params(self.opt, A_img.size)
36 |         transform_scaleB = get_transform(self.opt, params)
37 |         transform_scaleA = get_transform(self.opt, params, method=Image.NEAREST, normalize=False) if self.A_is_label else transform_scaleB
38 |         frame_range = list(range(tG)) if self.A is None else [tG-1]
39 |            
40 |         for i in frame_range:                                                   
41 |             A_path = self.A_paths[seq_idx][self.frame_idx + i]            
42 |             Ai = self.get_image(A_path, transform_scaleA, is_label=self.A_is_label)            
43 |             self.A = concat_frame(self.A, Ai, tG)
44 | 
45 |             if self.use_real:
46 |                 B_path = self.B_paths[seq_idx][self.frame_idx + i]
47 |                 Bi = self.get_image(B_path, transform_scaleB)                
48 |                 self.B = concat_frame(self.B, Bi, tG)
49 |             else:
50 |                 self.B = 0
51 | 
52 |             if self.opt.use_instance:
53 |                 I_path = self.I_paths[seq_idx][self.frame_idx + i]
54 |                 Ii = self.get_image(I_path, transform_scaleA) * 255.0                
55 |                 self.I = concat_frame(self.I, Ii, tG)
56 |             else:
57 |                 self.I = 0
58 | 
59 |         self.frame_idx += 1        
60 |         return_list = {'A': self.A, 'B': self.B, 'inst': self.I, 'A_path': A_path, 'change_seq': self.change_seq}
61 |         return return_list
62 | 
63 |     def get_image(self, A_path, transform_scaleA, is_label=False):
64 |         A_img = Image.open(A_path)
65 |         A_scaled = transform_scaleA(A_img)
66 |         if is_label:
67 |             A_scaled *= 255.0
68 |         return A_scaled
69 | 
70 |     def __len__(self):        
71 |         return sum(self.frames_count)
72 | 
73 |     def n_of_seqs(self):        
74 |         return len(self.A_paths)
75 | 
76 |     def name(self):
77 |         return 'TestDataset'


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
 2 | 
 3 | RUN apt-get update && apt-get install -y rsync htop git openssh-server
 4 | 
 5 | RUN apt-get install python3-pip -y
 6 | RUN ln -s /usr/bin/python3 /usr/bin/python
 7 | RUN pip3 install --upgrade pip
 8 | 
 9 | #Torch and dependencies:
10 | RUN pip install http://download.pytorch.org/whl/cu80/torch-0.4.0-cp35-cp35m-linux_x86_64.whl 
11 | RUN pip install torchvision cffi tensorboardX
12 | RUN pip install tqdm scipy scikit-image colorama==0.3.7 
13 | RUN pip install setproctitle pytz ipython
14 | 
15 | #vid2vid dependencies
16 | RUN apt-get install libglib2.0-0 libsm6 libxrender1 -y
17 | RUN pip install dominate requests opencv-python 
18 | 
19 | #pix2pixHD, required for initializing training
20 | RUN git clone https://github.com/NVIDIA/pix2pixHD /pix2pixHD
21 | 
22 | #vid2vid install
23 | RUN git clone https://github.com/NVIDIA/vid2vid /vid2vid
24 | WORKDIR /vid2vid
25 | #download flownet2 model dependencies
26 | #WARNING: we had an instance where these scripts needed to be re-run after the docker instance was launched
27 | RUN python scripts/download_flownet2.py
28 | RUN python scripts/download_models_flownet2.py
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/docker/launch_docker.sh:
--------------------------------------------------------------------------------
1 | # Thanks @dustinfreeman for providing the script
2 | #!/bin/bash
3 | sudo nvidia-docker build -t vid2vid:CUDA9-py35 .
4 | 
5 | sudo nvidia-docker run --rm -ti --ipc=host --shm-size 8G -v $(pwd):/vid2vid --workdir=/vid2vid vid2vid:CUDA9-py35 /bin/bash
6 | 


--------------------------------------------------------------------------------
/docker/pre_docker_install.sh:
--------------------------------------------------------------------------------
 1 | #Thanks @dustinfreeman for providing the script
 2 | 
 3 | #Install docker-ce https://docs.docker.com/install/linux/docker-ce/ubuntu/#set-up-the-repository
 4 | sudo apt-get install -y \
 5 |      apt-transport-https \
 6 |      ca-certificates \
 7 |      curl \
 8 |      software-properties-common
 9 | 
10 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
11 | 
12 | sudo add-apt-repository \
13 |    "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
14 |    $(lsb_release -cs) \
15 |    stable"
16 | sudo apt-get update
17 | sudo apt-get install -y docker-ce
18 | 
19 | 
20 | #Install nvidia-docker2 https://github.com/NVIDIA/nvidia-docker
21 | # Add the package repositories
22 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | \
23 |     sudo apt-key add -
24 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
25 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \
26 |     sudo tee /etc/apt/sources.list.d/nvidia-docker.list
27 | sudo apt-get update
28 | 
29 | sudo apt-get install -y nvidia-docker2
30 | sudo pkill -SIGHUP dockerd
31 | 
32 | 
33 | #NVIDIA drivers
34 | #This triggers an interactive request to the user.
35 | #Would love an alternative!
36 | DEBIAN_FRONTEND=noninteractive
37 | sudo apt-get install -y keyboard-configuration
38 | sudo apt install -y ubuntu-drivers-common
39 | 
40 | apt-get install -y nvidia-384
41 | 
42 | #Reboot so the nvidia driver finishes install
43 | sudo reboot
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/imgs/city_change_labels.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/city_change_labels.gif


--------------------------------------------------------------------------------
/imgs/city_change_styles.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/city_change_styles.gif


--------------------------------------------------------------------------------
/imgs/face.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/face.gif


--------------------------------------------------------------------------------
/imgs/face_multiple.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/face_multiple.gif


--------------------------------------------------------------------------------
/imgs/framePredict.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/framePredict.gif


--------------------------------------------------------------------------------
/imgs/pose.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/pose.gif


--------------------------------------------------------------------------------
/imgs/teaser.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/teaser.gif


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/__init__.py


--------------------------------------------------------------------------------
/models/base_model.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | import numpy as np
  3 | import torch
  4 | from .networks import get_grid
  5 | 
  6 | class BaseModel(torch.nn.Module):
  7 |     def name(self):
  8 |         return 'BaseModel'
  9 | 
 10 |     def initialize(self, opt):
 11 |         self.opt = opt
 12 |         self.gpu_ids = opt.gpu_ids
 13 |         self.isTrain = opt.isTrain
 14 |         self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor
 15 |         self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)
 16 | 
 17 |     def set_input(self, input):
 18 |         self.input = input
 19 | 
 20 |     def forward(self):
 21 |         pass
 22 | 
 23 |     # used in test time, no backprop
 24 |     def test(self):
 25 |         pass
 26 | 
 27 |     def get_image_paths(self):
 28 |         pass
 29 | 
 30 |     def optimize_parameters(self):
 31 |         pass
 32 | 
 33 |     def get_current_visuals(self):
 34 |         return self.input
 35 | 
 36 |     def get_current_errors(self):
 37 |         return {}
 38 | 
 39 |     def save(self, label):
 40 |         pass
 41 | 
 42 |     # helper saving function that can be used by subclasses
 43 |     def save_network(self, network, network_label, epoch_label, gpu_ids):
 44 |         save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
 45 |         save_path = os.path.join(self.save_dir, save_filename)
 46 |         torch.save(network.cpu().state_dict(), save_path)
 47 |         if len(gpu_ids) and torch.cuda.is_available():
 48 |             network.cuda(gpu_ids[0])
 49 | 
 50 |     def resolve_version(self):
 51 |         import torch._utils
 52 |         try:
 53 |             torch._utils._rebuild_tensor_v2
 54 |         except AttributeError:
 55 |             def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
 56 |                 tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
 57 |                 tensor.requires_grad = requires_grad
 58 |                 tensor._backward_hooks = backward_hooks
 59 |                 return tensor
 60 |             torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
 61 | 
 62 |     # helper loading function that can be used by subclasses
 63 |     def load_network(self, network, network_label, epoch_label, save_dir=''):        
 64 |         self.resolve_version()    
 65 |         save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
 66 |         if not save_dir:
 67 |             save_dir = self.save_dir
 68 |         save_path = os.path.join(save_dir, save_filename)        
 69 |         if not os.path.isfile(save_path):
 70 |             print('%s not exists yet!' % save_path)
 71 |             if 'G0' in network_label:
 72 |                 raise('Generator must exist!')
 73 |         else:
 74 |             #network.load_state_dict(torch.load(save_path))
 75 |             try:
 76 |                 network.load_state_dict(torch.load(save_path))
 77 |             except:   
 78 |                 pretrained_dict = torch.load(save_path)                
 79 |                 model_dict = network.state_dict()
 80 | 
 81 |                 ### printout layers in pretrained model
 82 |                 initialized = set()                    
 83 |                 for k, v in pretrained_dict.items():                      
 84 |                     initialized.add(k.split('.')[0])                         
 85 |                 #print('pretrained model has following layers: ')
 86 |                 #print(sorted(initialized))                
 87 | 
 88 |                 try:
 89 |                     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}                    
 90 |                     network.load_state_dict(pretrained_dict)
 91 |                     print('Pretrained network %s has excessive layers; Only loading layers that are used' % network_label)
 92 |                 except:
 93 |                     print('Pretrained network %s has fewer layers; The following are not initialized:' % network_label)
 94 |                     if sys.version_info >= (3,0):
 95 |                         not_initialized = set()
 96 |                     else:
 97 |                         from sets import Set
 98 |                         not_initialized = Set()
 99 |                     for k, v in pretrained_dict.items():                      
100 |                         if v.size() == model_dict[k].size():
101 |                             model_dict[k] = v
102 | 
103 |                     for k, v in model_dict.items():
104 |                         if k not in pretrained_dict or v.size() != pretrained_dict[k].size():
105 |                             not_initialized.add(k.split('.')[0])                            
106 |                     print(sorted(not_initialized))
107 |                     network.load_state_dict(model_dict)                  
108 | 
109 |     def concat(self, tensors, dim=0):
110 |         if tensors[0] is not None and tensors[1] is not None:
111 |             if isinstance(tensors[0], list):                
112 |                 tensors_cat = []
113 |                 for i in range(len(tensors[0])):                    
114 |                     tensors_cat.append(self.concat([tensors[0][i], tensors[1][i]], dim=dim))                
115 |                 return tensors_cat
116 |             return torch.cat([tensors[0], tensors[1]], dim=dim)
117 |         elif tensors[0] is not None:
118 |             return tensors[0]
119 |         else:
120 |             return tensors[1]
121 | 
122 |     def build_pyr(self, tensor, nearest=False): # build image pyramid from a single image
123 |         if tensor is None:
124 |             return [None] * self.n_scales
125 |         tensor = [tensor]
126 |         if nearest:
127 |             downsample = torch.nn.AvgPool2d(1, stride=2)
128 |         else:
129 |             downsample = torch.nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False)        
130 |         for s in range(1, self.n_scales):
131 |             b, t, c, h, w = tensor[-1].size()
132 |             down = downsample(tensor[-1].view(-1, h, w)).view(b, t, c, h//2, w//2)
133 |             tensor.append(down)
134 |         return tensor
135 | 
136 |     def dists_min(self, a, b, num=1):        
137 |         dists = torch.sum(torch.sum((a-b)*(a-b), dim=0), dim=0)        
138 |         if num == 1:
139 |             val, idx = torch.min(dists, dim=0)        
140 |             #idx = [idx]
141 |         else:
142 |             val, idx = torch.sort(dists, dim=0)
143 |             idx = idx[:num]
144 |         return idx.cpu().numpy().astype(int)
145 | 
146 |     def get_edges(self, t):
147 |         edge = torch.cuda.ByteTensor(t.size()).zero_()
148 |         edge[:,:,:,:,1:] = edge[:,:,:,:,1:] | (t[:,:,:,:,1:] != t[:,:,:,:,:-1])
149 |         edge[:,:,:,:,:-1] = edge[:,:,:,:,:-1] | (t[:,:,:,:,1:] != t[:,:,:,:,:-1])
150 |         edge[:,:,:,1:,:] = edge[:,:,:,1:,:] | (t[:,:,:,1:,:] != t[:,:,:,:-1,:])
151 |         edge[:,:,:,:-1,:] = edge[:,:,:,:-1,:] | (t[:,:,:,1:,:] != t[:,:,:,:-1,:])
152 |         return edge.float()       
153 |         
154 |     def update_learning_rate(self, epoch, model):        
155 |         lr = self.opt.lr * (1 - (epoch - self.opt.niter) / self.opt.niter_decay)
156 |         for param_group in getattr(self, 'optimizer_' + model).param_groups:
157 |             param_group['lr'] = lr
158 |         print('update learning rate: %f -> %f' % (self.old_lr, lr))
159 |         self.old_lr = lr
160 | 
161 |     def update_fixed_params(self): # finetune all scales instead of just finest scale
162 |         params = []
163 |         for s in range(self.n_scales):
164 |             params += list(getattr(self, 'netG'+str(s)).parameters())
165 |         self.optimizer_G = torch.optim.Adam(params, lr=self.old_lr, betas=(self.opt.beta1, 0.999))
166 |         self.finetune_all = True
167 |         print('------------ Now finetuning all scales -----------')
168 | 
169 |     def update_training_batch(self, ratio): # increase number of backpropagated frames and number of frames in each GPU
170 |         nfb = self.n_frames_bp
171 |         nfl = self.n_frames_load
172 |         if nfb < nfl:            
173 |             nfb = min(self.opt.max_frames_backpropagate, 2**ratio)
174 |             self.n_frames_bp = nfl // int(np.ceil(float(nfl) / nfb))
175 |             print('-------- Updating number of backpropagated frames to %d ----------' % self.n_frames_bp)
176 | 
177 |         if self.n_frames_per_gpu < self.opt.max_frames_per_gpu:
178 |             self.n_frames_per_gpu = min(self.n_frames_per_gpu*2, self.opt.max_frames_per_gpu)
179 |             self.n_frames_load = self.n_gpus * self.n_frames_per_gpu
180 |             print('-------- Updating number of frames per gpu to %d ----------' % self.n_frames_per_gpu)
181 | 
182 | 
183 |     def grid_sample(self, input1, input2):
184 |         if self.opt.fp16: # not sure if it's necessary
185 |             return torch.nn.functional.grid_sample(input1.float(), input2.float(), mode='bilinear', padding_mode='border').half()
186 |         else:
187 |             return torch.nn.functional.grid_sample(input1, input2, mode='bilinear', padding_mode='border')
188 | 
189 |     def resample(self, image, flow):        
190 |         b, c, h, w = image.size()        
191 |         if not hasattr(self, 'grid') or self.grid.size() != flow.size():
192 |             self.grid = get_grid(b, h, w, gpu_id=flow.get_device(), dtype=flow.dtype)            
193 |         flow = torch.cat([flow[:, 0:1, :, :] / ((w - 1.0) / 2.0), flow[:, 1:2, :, :] / ((h - 1.0) / 2.0)], dim=1)        
194 |         final_grid = (self.grid + flow).permute(0, 2, 3, 1).cuda(image.get_device())
195 |         output = self.grid_sample(image, final_grid)
196 |         return output


--------------------------------------------------------------------------------
/models/flownet.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import sys
 4 | from .base_model import BaseModel
 5 | 
 6 | class FlowNet(BaseModel):
 7 |     def name(self):
 8 |         return 'FlowNet'
 9 | 
10 |     def initialize(self, opt):
11 |         BaseModel.initialize(self, opt)
12 | 
13 |         # flownet 2           
14 |         from .flownet2_pytorch import models as flownet2_models
15 |         from .flownet2_pytorch.utils import tools as flownet2_tools
16 |         from .flownet2_pytorch.networks.resample2d_package.resample2d import Resample2d
17 |         
18 |         self.flowNet = flownet2_tools.module_to_dict(flownet2_models)['FlowNet2'](fp16=opt.fp16).cuda(self.gpu_ids[0])        
19 |         checkpoint = torch.load('models/flownet2_pytorch/FlowNet2_checkpoint.pth.tar')
20 |         self.flowNet.load_state_dict(checkpoint['state_dict'])
21 |         self.flowNet.eval() 
22 |         self.resample = Resample2d()
23 |         self.downsample = torch.nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False)
24 | 
25 |     def forward(self, input_A, input_B, dummy_bs=0):        
26 |         with torch.no_grad():            
27 |             if input_A.get_device() == self.gpu_ids[0]:
28 |                 input_A, input_B = input_A[dummy_bs:], input_B[dummy_bs:]
29 |                 if input_A.size(0) == 0:
30 |                     b, n, c, h, w = input_A.size()
31 |                     return self.Tensor(1, n, 2, h, w), self.Tensor(1, n, 1, h, w)
32 |             size = input_A.size()
33 |             assert(len(size) == 4 or len(size) == 5)
34 |             if len(size) == 5:
35 |                 b, n, c, h, w = size
36 |                 input_A = input_A.contiguous().view(-1, c, h, w)
37 |                 input_B = input_B.contiguous().view(-1, c, h, w)
38 |                 flow, conf = self.compute_flow_and_conf(input_A, input_B)
39 |                 return flow.view(b, n, 2, h, w), conf.view(b, n, 1, h, w)
40 |             else:
41 |                 return self.compute_flow_and_conf(input_A, input_B)
42 | 
43 |     def compute_flow_and_conf(self, im1, im2):
44 |         assert(im1.size()[1] == 3)
45 |         assert(im1.size() == im2.size())        
46 |         old_h, old_w = im1.size()[2], im1.size()[3]
47 |         new_h, new_w = old_h//64*64, old_w//64*64
48 |         if old_h != new_h:
49 |             downsample = torch.nn.Upsample(size=(new_h, new_w), mode='bilinear')
50 |             upsample = torch.nn.Upsample(size=(old_h, old_w), mode='bilinear')
51 |             im1 = downsample(im1)
52 |             im2 = downsample(im2)        
53 |         data1 = torch.cat([im1.unsqueeze(2), im2.unsqueeze(2)], dim=2)            
54 |         flow1 = self.flowNet(data1)
55 |         conf = (self.norm(im1 - self.resample(im2, flow1)) < 0.02).float()
56 |         if old_h != new_h:
57 |             flow1 = upsample(flow1) * old_h / new_h
58 |             conf = upsample(conf)
59 |         return flow1.detach(), conf.detach()
60 | 
61 |     def norm(self, t):
62 |         return torch.sum(t*t, dim=1, keepdim=True)   
63 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2017 NVIDIA CORPORATION
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.


--------------------------------------------------------------------------------
/models/flownet2_pytorch/README.md:
--------------------------------------------------------------------------------
  1 | # flownet2-pytorch 
  2 | 
  3 | Pytorch implementation of [FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks](https://arxiv.org/abs/1612.01925). 
  4 | 
  5 | Multiple GPU training is supported, and the code provides examples for training or inference on [MPI-Sintel](http://sintel.is.tue.mpg.de/) clean and final datasets. The same commands can be used for training or inference with other datasets. See below for more detail.
  6 | 
  7 | Inference using fp16 (half-precision) is also supported.
  8 | 
  9 | For more help, type <br />
 10 |     
 11 |     python main.py --help
 12 | 
 13 | ## Network architectures
 14 | Below are the different flownet neural network architectures that are provided. <br />
 15 | A batchnorm version for each network is also available.
 16 | 
 17 |  - **FlowNet2S**
 18 |  - **FlowNet2C**
 19 |  - **FlowNet2CS**
 20 |  - **FlowNet2CSS**
 21 |  - **FlowNet2SD**
 22 |  - **FlowNet2**
 23 | 
 24 | ## Custom layers
 25 | 
 26 | `FlowNet2` or `FlowNet2C*` achitectures rely on custom layers `Resample2d` or `Correlation`. <br />
 27 | A pytorch implementation of these layers with cuda kernels are available at [./networks](./networks). <br />
 28 | Note : Currently, half precision kernels are not available for these layers.
 29 | 
 30 | ## Data Loaders
 31 | 
 32 | Dataloaders for FlyingChairs, FlyingThings, ChairsSDHom and ImagesFromFolder are available in [datasets.py](./datasets.py). <br />
 33 | 
 34 | ## Loss Functions
 35 | 
 36 | L1 and L2 losses with multi-scale support are available in [losses.py](./losses.py). <br />
 37 | 
 38 | ## Installation 
 39 | 
 40 |     # get flownet2-pytorch source
 41 |     git clone https://github.com/NVIDIA/flownet2-pytorch.git
 42 |     cd flownet2-pytorch
 43 | 
 44 |     # install custom layers
 45 |     bash install.sh
 46 |     
 47 | ### Python requirements 
 48 | Currently, the code supports python 3
 49 | * numpy 
 50 | * PyTorch ( == 0.4.1, for <= 0.4.0 see branch [python36-PyTorch0.4](https://github.com/NVIDIA/flownet2-pytorch/tree/python36-PyTorch0.4))
 51 | * scipy 
 52 | * scikit-image
 53 | * tensorboardX
 54 | * colorama, tqdm, setproctitle 
 55 | 
 56 | ## Converted Caffe Pre-trained Models
 57 | We've included caffe pre-trained models. Should you use these pre-trained weights, please adhere to the [license agreements](https://drive.google.com/file/d/1TVv0BnNFh3rpHZvD-easMb9jYrPE2Eqd/view?usp=sharing). 
 58 | 
 59 | * [FlowNet2](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view?usp=sharing)[620MB]
 60 | * [FlowNet2-C](https://drive.google.com/file/d/1BFT6b7KgKJC8rA59RmOVAXRM_S7aSfKE/view?usp=sharing)[149MB]
 61 | * [FlowNet2-CS](https://drive.google.com/file/d/1iBJ1_o7PloaINpa8m7u_7TsLCX0Dt_jS/view?usp=sharing)[297MB]
 62 | * [FlowNet2-CSS](https://drive.google.com/file/d/157zuzVf4YMN6ABAQgZc8rRmR5cgWzSu8/view?usp=sharing)[445MB]
 63 | * [FlowNet2-CSS-ft-sd](https://drive.google.com/file/d/1R5xafCIzJCXc8ia4TGfC65irmTNiMg6u/view?usp=sharing)[445MB]
 64 | * [FlowNet2-S](https://drive.google.com/file/d/1V61dZjFomwlynwlYklJHC-TLfdFom3Lg/view?usp=sharing)[148MB]
 65 | * [FlowNet2-SD](https://drive.google.com/file/d/1QW03eyYG_vD-dT-Mx4wopYvtPu_msTKn/view?usp=sharing)[173MB]
 66 |     
 67 | ## Inference
 68 |     # Example on MPISintel Clean   
 69 |     python main.py --inference --model FlowNet2 --save_flow --inference_dataset MpiSintelClean \
 70 |     --inference_dataset_root /path/to/mpi-sintel/clean/dataset \
 71 |     --resume /path/to/checkpoints 
 72 |     
 73 | ## Training and validation
 74 | 
 75 |     # Example on MPISintel Final and Clean, with L1Loss on FlowNet2 model
 76 |     python main.py --batch_size 8 --model FlowNet2 --loss=L1Loss --optimizer=Adam --optimizer_lr=1e-4 \
 77 |     --training_dataset MpiSintelFinal --training_dataset_root /path/to/mpi-sintel/final/dataset  \
 78 |     --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset
 79 | 
 80 |     # Example on MPISintel Final and Clean, with MultiScale loss on FlowNet2C model 
 81 |     python main.py --batch_size 8 --model FlowNet2C --optimizer=Adam --optimizer_lr=1e-4 --loss=MultiScale --loss_norm=L1 \
 82 |     --loss_numScales=5 --loss_startScale=4 --optimizer_lr=1e-4 --crop_size 384 512 \
 83 |     --training_dataset FlyingChairs --training_dataset_root /path/to/flying-chairs/dataset  \
 84 |     --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset
 85 |     
 86 | ## Results on MPI-Sintel
 87 | [![Predicted flows on MPI-Sintel](./image.png)](https://www.youtube.com/watch?v=HtBmabY8aeU "Predicted flows on MPI-Sintel")
 88 | 
 89 | ## Reference 
 90 | If you find this implementation useful in your work, please acknowledge it appropriately and cite the paper:
 91 | ````
 92 | @InProceedings{IMKDB17,
 93 |   author       = "E. Ilg and N. Mayer and T. Saikia and M. Keuper and A. Dosovitskiy and T. Brox",
 94 |   title        = "FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks",
 95 |   booktitle    = "IEEE Conference on Computer Vision and Pattern Recognition (CVPR)",
 96 |   month        = "Jul",
 97 |   year         = "2017",
 98 |   url          = "http://lmb.informatik.uni-freiburg.de//Publications/2017/IMKDB17"
 99 | }
100 | ````
101 | ```
102 | @misc{flownet2-pytorch,
103 |   author = {Fitsum Reda and Robert Pottorff and Jon Barker and Bryan Catanzaro},
104 |   title = {flownet2-pytorch: Pytorch implementation of FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks},
105 |   year = {2017},
106 |   publisher = {GitHub},
107 |   journal = {GitHub repository},
108 |   howpublished = {\url{https://github.com/NVIDIA/flownet2-pytorch}}
109 | }
110 | ```
111 | ## Related Optical Flow Work from Nvidia 
112 | Code (in Caffe and Pytorch): [PWC-Net](https://github.com/NVlabs/PWC-Net) <br />
113 | Paper : [PWC-Net: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume](https://arxiv.org/abs/1709.02371). 
114 | 
115 | ## Acknowledgments
116 | Parts of this code were derived, as noted in the code, from [ClementPinard/FlowNetPytorch](https://github.com/ClementPinard/FlowNetPytorch).
117 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/__init__.py


--------------------------------------------------------------------------------
/models/flownet2_pytorch/convert.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2.7
  2 | 
  3 | import caffe
  4 | from caffe.proto import caffe_pb2
  5 | import sys, os
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | 
 10 | import argparse, tempfile
 11 | import numpy as np
 12 | 
 13 | parser = argparse.ArgumentParser()
 14 | parser.add_argument('caffe_model', help='input model in hdf5 or caffemodel format')
 15 | parser.add_argument('prototxt_template',help='prototxt template')
 16 | parser.add_argument('flownet2_pytorch', help='path to flownet2-pytorch')
 17 | 
 18 | args = parser.parse_args()
 19 | 
 20 | args.rgb_max = 255
 21 | args.fp16 = False
 22 | args.grads = {}
 23 | 
 24 | # load models
 25 | sys.path.append(args.flownet2_pytorch)
 26 | 
 27 | import models
 28 | from utils.param_utils import *
 29 | 
 30 | width = 256
 31 | height = 256
 32 | keys = {'TARGET_WIDTH': width, 
 33 |         'TARGET_HEIGHT': height,
 34 |         'ADAPTED_WIDTH':width,
 35 |         'ADAPTED_HEIGHT':height,
 36 |         'SCALE_WIDTH':1.,
 37 |         'SCALE_HEIGHT':1.,}
 38 | 
 39 | template = '\n'.join(np.loadtxt(args.prototxt_template, dtype=str, delimiter='\n'))
 40 | for k in keys:
 41 |     template = template.replace('$%s$'%(k),str(keys[k]))
 42 | 
 43 | prototxt = tempfile.NamedTemporaryFile(mode='w', delete=True)
 44 | prototxt.write(template)
 45 | prototxt.flush()
 46 | 
 47 | net = caffe.Net(prototxt.name, args.caffe_model, caffe.TEST)
 48 | 
 49 | weights = {}
 50 | biases = {}
 51 | 
 52 | for k, v in list(net.params.items()):
 53 |     weights[k] = np.array(v[0].data).reshape(v[0].data.shape)
 54 |     biases[k] = np.array(v[1].data).reshape(v[1].data.shape)
 55 |     print((k, weights[k].shape, biases[k].shape))
 56 | 
 57 | if 'FlowNet2/' in args.caffe_model:
 58 |     model = models.FlowNet2(args)
 59 | 
 60 |     parse_flownetc(model.flownetc.modules(), weights, biases)
 61 |     parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
 62 |     parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
 63 |     parse_flownetsd(model.flownets_d.modules(), weights, biases, param_prefix='netsd_')
 64 |     parse_flownetfusion(model.flownetfusion.modules(), weights, biases, param_prefix='fuse_')
 65 | 
 66 |     state = {'epoch': 0,
 67 |              'state_dict': model.state_dict(),
 68 |              'best_EPE': 1e10}
 69 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2_checkpoint.pth.tar'))
 70 | 
 71 | elif 'FlowNet2-C/' in args.caffe_model:
 72 |     model = models.FlowNet2C(args)
 73 | 
 74 |     parse_flownetc(model.modules(), weights, biases)
 75 |     state = {'epoch': 0,
 76 |              'state_dict': model.state_dict(),
 77 |              'best_EPE': 1e10}
 78 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-C_checkpoint.pth.tar'))
 79 | 
 80 | elif 'FlowNet2-CS/' in args.caffe_model:
 81 |     model = models.FlowNet2CS(args)
 82 | 
 83 |     parse_flownetc(model.flownetc.modules(), weights, biases)
 84 |     parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
 85 | 
 86 |     state = {'epoch': 0,
 87 |              'state_dict': model.state_dict(),
 88 |              'best_EPE': 1e10}
 89 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CS_checkpoint.pth.tar'))
 90 | 
 91 | elif 'FlowNet2-CSS/' in args.caffe_model:
 92 |     model = models.FlowNet2CSS(args)
 93 | 
 94 |     parse_flownetc(model.flownetc.modules(), weights, biases)
 95 |     parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
 96 |     parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
 97 | 
 98 |     state = {'epoch': 0,
 99 |              'state_dict': model.state_dict(),
100 |              'best_EPE': 1e10}
101 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS_checkpoint.pth.tar'))
102 | 
103 | elif 'FlowNet2-CSS-ft-sd/' in args.caffe_model:
104 |     model = models.FlowNet2CSS(args)
105 | 
106 |     parse_flownetc(model.flownetc.modules(), weights, biases)
107 |     parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
108 |     parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
109 | 
110 |     state = {'epoch': 0,
111 |              'state_dict': model.state_dict(),
112 |              'best_EPE': 1e10}
113 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS-ft-sd_checkpoint.pth.tar'))
114 | 
115 | elif 'FlowNet2-S/' in args.caffe_model:
116 |     model = models.FlowNet2S(args)
117 | 
118 |     parse_flownetsonly(model.modules(), weights, biases, param_prefix='')
119 |     state = {'epoch': 0,
120 |              'state_dict': model.state_dict(),
121 |              'best_EPE': 1e10}
122 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-S_checkpoint.pth.tar'))
123 | 
124 | elif 'FlowNet2-SD/' in args.caffe_model:
125 |     model = models.FlowNet2SD(args)
126 | 
127 |     parse_flownetsd(model.modules(), weights, biases, param_prefix='')
128 | 
129 |     state = {'epoch': 0,
130 |              'state_dict': model.state_dict(),
131 |              'best_EPE': 1e10}
132 |     torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-SD_checkpoint.pth.tar'))
133 | 
134 | else:
135 |     print(('model type cound not be determined from input caffe model %s'%(args.caffe_model)))
136 |     quit()
137 | print(("done converting ", args.caffe_model))


--------------------------------------------------------------------------------
/models/flownet2_pytorch/download_caffe_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | sudo rm -rf flownet2-docker
 3 | sudo git clone https://github.com/lmb-freiburg/flownet2-docker
 4 | cd flownet2-docker
 5 | 
 6 | sudo sed -i '$ a RUN apt-get update && apt-get install -y python-pip \
 7 | RUN pip install --upgrade pip \
 8 | RUN pip install numpy -I \
 9 | RUN pip install http://download.pytorch.org/whl/cu80/torch-0.2.0.post3-cp27-cp27mu-manylinux1_x86_64.whl \
10 | RUN pip install cffi ipython' Dockerfile
11 | 
12 | sudo make
13 | 
14 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd ./networks/correlation_package
3 | python setup.py install --user
4 | cd ../resample2d_package 
5 | python setup.py install --user
6 | cd ../channelnorm_package 
7 | python setup.py install --user
8 | cd ..
9 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/launch_docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sudo nvidia-docker build -t $USER/pytorch:CUDA8-py27 .
3 | sudo nvidia-docker run --rm -ti --volume=$(pwd):/flownet2-pytorch:rw --workdir=/flownet2-pytorch --ipc=host $USER/pytorch:CUDA8-py27 /bin/bash
4 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/losses.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Portions of this code copyright 2017, Clement Pinard
 3 | '''
 4 | 
 5 | # freda (todo) : adversarial loss 
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | import math
10 | 
11 | def EPE(input_flow, target_flow):
12 |     return torch.norm(target_flow-input_flow,p=2,dim=1).mean()
13 | 
14 | class L1(nn.Module):
15 |     def __init__(self):
16 |         super(L1, self).__init__()
17 |     def forward(self, output, target):
18 |         lossvalue = torch.abs(output - target).mean()
19 |         return lossvalue
20 | 
21 | class L2(nn.Module):
22 |     def __init__(self):
23 |         super(L2, self).__init__()
24 |     def forward(self, output, target):
25 |         lossvalue = torch.norm(output-target,p=2,dim=1).mean()
26 |         return lossvalue
27 | 
28 | class L1Loss(nn.Module):
29 |     def __init__(self, args):
30 |         super(L1Loss, self).__init__()
31 |         self.args = args
32 |         self.loss = L1()
33 |         self.loss_labels = ['L1', 'EPE']
34 | 
35 |     def forward(self, output, target):
36 |         lossvalue = self.loss(output, target)
37 |         epevalue = EPE(output, target)
38 |         return [lossvalue, epevalue]
39 | 
40 | class L2Loss(nn.Module):
41 |     def __init__(self, args):
42 |         super(L2Loss, self).__init__()
43 |         self.args = args
44 |         self.loss = L2()
45 |         self.loss_labels = ['L2', 'EPE']
46 | 
47 |     def forward(self, output, target):
48 |         lossvalue = self.loss(output, target)
49 |         epevalue = EPE(output, target)
50 |         return [lossvalue, epevalue]
51 | 
52 | class MultiScale(nn.Module):
53 |     def __init__(self, args, startScale = 4, numScales = 5, l_weight= 0.32, norm= 'L1'):
54 |         super(MultiScale,self).__init__()
55 | 
56 |         self.startScale = startScale
57 |         self.numScales = numScales
58 |         self.loss_weights = torch.FloatTensor([(l_weight / 2 ** scale) for scale in range(self.numScales)])
59 |         self.args = args
60 |         self.l_type = norm
61 |         self.div_flow = 0.05
62 |         assert(len(self.loss_weights) == self.numScales)
63 | 
64 |         if self.l_type == 'L1':
65 |             self.loss = L1()
66 |         else:
67 |             self.loss = L2()
68 | 
69 |         self.multiScales = [nn.AvgPool2d(self.startScale * (2**scale), self.startScale * (2**scale)) for scale in range(self.numScales)]
70 |         self.loss_labels = ['MultiScale-'+self.l_type, 'EPE'],
71 | 
72 |     def forward(self, output, target):
73 |         lossvalue = 0
74 |         epevalue = 0
75 | 
76 |         if type(output) is tuple:
77 |             target = self.div_flow * target
78 |             for i, output_ in enumerate(output):
79 |                 target_ = self.multiScales[i](target)
80 |                 epevalue += self.loss_weights[i]*EPE(output_, target_)
81 |                 lossvalue += self.loss_weights[i]*self.loss(output_, target_)
82 |             return [lossvalue, epevalue]
83 |         else:
84 |             epevalue += EPE(output, target)
85 |             lossvalue += self.loss(output, target)
86 |             return  [lossvalue, epevalue]
87 | 
88 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/FlowNetC.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn import init
  4 | 
  5 | import math
  6 | import numpy as np
  7 | 
  8 | from .correlation_package.correlation import Correlation
  9 | 
 10 | from .submodules import *
 11 | 'Parameter count , 39,175,298 '
 12 | 
 13 | class FlowNetC(nn.Module):
 14 |     def __init__(self, args, batchNorm=True, div_flow = 20):
 15 |         super(FlowNetC,self).__init__()
 16 |         self.fp16 = args.fp16
 17 |         self.batchNorm = batchNorm
 18 |         self.div_flow = div_flow
 19 | 
 20 |         self.conv1   = conv(self.batchNorm,   3,   64, kernel_size=7, stride=2)
 21 |         self.conv2   = conv(self.batchNorm,  64,  128, kernel_size=5, stride=2)
 22 |         self.conv3   = conv(self.batchNorm, 128,  256, kernel_size=5, stride=2)
 23 |         self.conv_redir  = conv(self.batchNorm, 256,   32, kernel_size=1, stride=1)
 24 | 
 25 |         """if args.fp16:
 26 |             self.corr = nn.Sequential(
 27 |                 tofp32(),
 28 |                 Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1),
 29 |                 tofp16())
 30 |         else:"""
 31 |         self.corr = Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1)
 32 | 
 33 |         self.corr_activation = nn.LeakyReLU(0.1,inplace=True)
 34 |         self.conv3_1 = conv(self.batchNorm, 473,  256)
 35 |         self.conv4   = conv(self.batchNorm, 256,  512, stride=2)
 36 |         self.conv4_1 = conv(self.batchNorm, 512,  512)
 37 |         self.conv5   = conv(self.batchNorm, 512,  512, stride=2)
 38 |         self.conv5_1 = conv(self.batchNorm, 512,  512)
 39 |         self.conv6   = conv(self.batchNorm, 512, 1024, stride=2)
 40 |         self.conv6_1 = conv(self.batchNorm,1024, 1024)
 41 | 
 42 |         self.deconv5 = deconv(1024,512)
 43 |         self.deconv4 = deconv(1026,256)
 44 |         self.deconv3 = deconv(770,128)
 45 |         self.deconv2 = deconv(386,64)
 46 | 
 47 |         self.predict_flow6 = predict_flow(1024)
 48 |         self.predict_flow5 = predict_flow(1026)
 49 |         self.predict_flow4 = predict_flow(770)
 50 |         self.predict_flow3 = predict_flow(386)
 51 |         self.predict_flow2 = predict_flow(194)
 52 | 
 53 |         self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
 54 |         self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
 55 |         self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
 56 |         self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
 57 | 
 58 |         for m in self.modules():
 59 |             if isinstance(m, nn.Conv2d):
 60 |                 if m.bias is not None:
 61 |                     init.uniform_(m.bias)
 62 |                 init.xavier_uniform_(m.weight)
 63 | 
 64 |             if isinstance(m, nn.ConvTranspose2d):
 65 |                 if m.bias is not None:
 66 |                     init.uniform_(m.bias)
 67 |                 init.xavier_uniform_(m.weight)
 68 |                 # init_deconv_bilinear(m.weight)
 69 |         self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
 70 | 
 71 |     def forward(self, x):
 72 |         x1 = x[:,0:3,:,:]
 73 |         x2 = x[:,3::,:,:]
 74 | 
 75 |         out_conv1a = self.conv1(x1)
 76 |         out_conv2a = self.conv2(out_conv1a)
 77 |         out_conv3a = self.conv3(out_conv2a)
 78 | 
 79 |         # FlownetC bottom input stream
 80 |         out_conv1b = self.conv1(x2)
 81 |         
 82 |         out_conv2b = self.conv2(out_conv1b)
 83 |         out_conv3b = self.conv3(out_conv2b)
 84 | 
 85 |         # Merge streams
 86 |         if self.fp16:                        
 87 |             out_corr = self.corr(out_conv3a.float(), out_conv3b.float()).half() # False            
 88 |         else:
 89 |             out_corr = self.corr(out_conv3a, out_conv3b) # False
 90 |         out_corr = self.corr_activation(out_corr)
 91 | 
 92 |         # Redirect top input stream and concatenate
 93 |         out_conv_redir = self.conv_redir(out_conv3a)
 94 | 
 95 |         in_conv3_1 = torch.cat((out_conv_redir, out_corr), 1)
 96 | 
 97 |         # Merged conv layers
 98 |         out_conv3_1 = self.conv3_1(in_conv3_1)
 99 | 
100 |         out_conv4 = self.conv4_1(self.conv4(out_conv3_1))
101 | 
102 |         out_conv5 = self.conv5_1(self.conv5(out_conv4))
103 |         out_conv6 = self.conv6_1(self.conv6(out_conv5))
104 | 
105 |         flow6       = self.predict_flow6(out_conv6)
106 |         flow6_up    = self.upsampled_flow6_to_5(flow6)
107 |         out_deconv5 = self.deconv5(out_conv6)
108 | 
109 |         concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
110 | 
111 |         flow5       = self.predict_flow5(concat5)
112 |         flow5_up    = self.upsampled_flow5_to_4(flow5)
113 |         out_deconv4 = self.deconv4(concat5)
114 |         concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
115 | 
116 |         flow4       = self.predict_flow4(concat4)
117 |         flow4_up    = self.upsampled_flow4_to_3(flow4)
118 |         out_deconv3 = self.deconv3(concat4)
119 |         concat3 = torch.cat((out_conv3_1,out_deconv3,flow4_up),1)
120 | 
121 |         flow3       = self.predict_flow3(concat3)
122 |         flow3_up    = self.upsampled_flow3_to_2(flow3)
123 |         out_deconv2 = self.deconv2(concat3)
124 |         concat2 = torch.cat((out_conv2a,out_deconv2,flow3_up),1)
125 | 
126 |         flow2 = self.predict_flow2(concat2)
127 | 
128 |         if self.training:
129 |             return flow2,flow3,flow4,flow5,flow6
130 |         else:
131 |             return flow2,
132 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/FlowNetFusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn import init
 4 | 
 5 | import math
 6 | import numpy as np
 7 | 
 8 | from .submodules import *
 9 | 'Parameter count = 581,226'
10 | 
11 | class FlowNetFusion(nn.Module):
12 |     def __init__(self,args, batchNorm=True):
13 |         super(FlowNetFusion,self).__init__()
14 | 
15 |         self.batchNorm = batchNorm
16 |         self.conv0   = conv(self.batchNorm,  11,   64)
17 |         self.conv1   = conv(self.batchNorm,  64,   64, stride=2)
18 |         self.conv1_1 = conv(self.batchNorm,  64,   128)
19 |         self.conv2   = conv(self.batchNorm,  128,  128, stride=2)
20 |         self.conv2_1 = conv(self.batchNorm,  128,  128)
21 | 
22 |         self.deconv1 = deconv(128,32)
23 |         self.deconv0 = deconv(162,16)
24 | 
25 |         self.inter_conv1 = i_conv(self.batchNorm,  162,   32)
26 |         self.inter_conv0 = i_conv(self.batchNorm,  82,   16)
27 | 
28 |         self.predict_flow2 = predict_flow(128)
29 |         self.predict_flow1 = predict_flow(32)
30 |         self.predict_flow0 = predict_flow(16)
31 | 
32 |         self.upsampled_flow2_to_1 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
33 |         self.upsampled_flow1_to_0 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
34 | 
35 |         for m in self.modules():
36 |             if isinstance(m, nn.Conv2d):
37 |                 if m.bias is not None:
38 |                     init.uniform_(m.bias)
39 |                 init.xavier_uniform_(m.weight)
40 | 
41 |             if isinstance(m, nn.ConvTranspose2d):
42 |                 if m.bias is not None:
43 |                     init.uniform_(m.bias)
44 |                 init.xavier_uniform_(m.weight)
45 |                 # init_deconv_bilinear(m.weight)
46 | 
47 |     def forward(self, x):
48 |         out_conv0 = self.conv0(x)
49 |         out_conv1 = self.conv1_1(self.conv1(out_conv0))
50 |         out_conv2 = self.conv2_1(self.conv2(out_conv1))
51 | 
52 |         flow2       = self.predict_flow2(out_conv2)
53 |         flow2_up    = self.upsampled_flow2_to_1(flow2)
54 |         out_deconv1 = self.deconv1(out_conv2)
55 |         
56 |         concat1 = torch.cat((out_conv1,out_deconv1,flow2_up),1)
57 |         out_interconv1 = self.inter_conv1(concat1)
58 |         flow1       = self.predict_flow1(out_interconv1)
59 |         flow1_up    = self.upsampled_flow1_to_0(flow1)
60 |         out_deconv0 = self.deconv0(concat1)
61 |         
62 |         concat0 = torch.cat((out_conv0,out_deconv0,flow1_up),1)
63 |         out_interconv0 = self.inter_conv0(concat0)
64 |         flow0       = self.predict_flow0(out_interconv0)
65 | 
66 |         return flow0
67 | 
68 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/FlowNetS.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Portions of this code copyright 2017, Clement Pinard
 3 | '''
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from torch.nn import init
 8 | 
 9 | import math
10 | import numpy as np
11 | 
12 | from .submodules import *
13 | 'Parameter count : 38,676,504 '
14 | 
15 | class FlowNetS(nn.Module):
16 |     def __init__(self, args, input_channels = 12, batchNorm=True):
17 |         super(FlowNetS,self).__init__()
18 | 
19 |         self.batchNorm = batchNorm
20 |         self.conv1   = conv(self.batchNorm,  input_channels,   64, kernel_size=7, stride=2)
21 |         self.conv2   = conv(self.batchNorm,  64,  128, kernel_size=5, stride=2)
22 |         self.conv3   = conv(self.batchNorm, 128,  256, kernel_size=5, stride=2)
23 |         self.conv3_1 = conv(self.batchNorm, 256,  256)
24 |         self.conv4   = conv(self.batchNorm, 256,  512, stride=2)
25 |         self.conv4_1 = conv(self.batchNorm, 512,  512)
26 |         self.conv5   = conv(self.batchNorm, 512,  512, stride=2)
27 |         self.conv5_1 = conv(self.batchNorm, 512,  512)
28 |         self.conv6   = conv(self.batchNorm, 512, 1024, stride=2)
29 |         self.conv6_1 = conv(self.batchNorm,1024, 1024)
30 | 
31 |         self.deconv5 = deconv(1024,512)
32 |         self.deconv4 = deconv(1026,256)
33 |         self.deconv3 = deconv(770,128)
34 |         self.deconv2 = deconv(386,64)
35 | 
36 |         self.predict_flow6 = predict_flow(1024)
37 |         self.predict_flow5 = predict_flow(1026)
38 |         self.predict_flow4 = predict_flow(770)
39 |         self.predict_flow3 = predict_flow(386)
40 |         self.predict_flow2 = predict_flow(194)
41 | 
42 |         self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
43 |         self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
44 |         self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
45 |         self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
46 | 
47 |         for m in self.modules():
48 |             if isinstance(m, nn.Conv2d):
49 |                 if m.bias is not None:
50 |                     init.uniform_(m.bias)
51 |                 init.xavier_uniform_(m.weight)
52 | 
53 |             if isinstance(m, nn.ConvTranspose2d):
54 |                 if m.bias is not None:
55 |                     init.uniform_(m.bias)
56 |                 init.xavier_uniform_(m.weight)
57 |                 # init_deconv_bilinear(m.weight)
58 |         self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
59 | 
60 |     def forward(self, x):
61 |         out_conv1 = self.conv1(x)
62 | 
63 |         out_conv2 = self.conv2(out_conv1)
64 |         out_conv3 = self.conv3_1(self.conv3(out_conv2))
65 |         out_conv4 = self.conv4_1(self.conv4(out_conv3))
66 |         out_conv5 = self.conv5_1(self.conv5(out_conv4))
67 |         out_conv6 = self.conv6_1(self.conv6(out_conv5))
68 | 
69 |         flow6       = self.predict_flow6(out_conv6)
70 |         flow6_up    = self.upsampled_flow6_to_5(flow6)
71 |         out_deconv5 = self.deconv5(out_conv6)
72 |         
73 |         concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
74 |         flow5       = self.predict_flow5(concat5)
75 |         flow5_up    = self.upsampled_flow5_to_4(flow5)
76 |         out_deconv4 = self.deconv4(concat5)
77 |         
78 |         concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
79 |         flow4       = self.predict_flow4(concat4)
80 |         flow4_up    = self.upsampled_flow4_to_3(flow4)
81 |         out_deconv3 = self.deconv3(concat4)
82 |         
83 |         concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
84 |         flow3       = self.predict_flow3(concat3)
85 |         flow3_up    = self.upsampled_flow3_to_2(flow3)
86 |         out_deconv2 = self.deconv2(concat3)
87 | 
88 |         concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
89 |         flow2 = self.predict_flow2(concat2)
90 | 
91 |         if self.training:
92 |             return flow2,flow3,flow4,flow5,flow6
93 |         else:
94 |             return flow2,
95 | 
96 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/FlowNetSD.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn import init
  4 | 
  5 | import math
  6 | import numpy as np
  7 | 
  8 | from .submodules import *
  9 | 'Parameter count = 45,371,666'
 10 | 
 11 | class FlowNetSD(nn.Module):
 12 |     def __init__(self, args, batchNorm=True):
 13 |         super(FlowNetSD,self).__init__()
 14 | 
 15 |         self.batchNorm = batchNorm
 16 |         self.conv0   = conv(self.batchNorm,  6,   64)
 17 |         self.conv1   = conv(self.batchNorm,  64,   64, stride=2)
 18 |         self.conv1_1 = conv(self.batchNorm,  64,   128)
 19 |         self.conv2   = conv(self.batchNorm,  128,  128, stride=2)
 20 |         self.conv2_1 = conv(self.batchNorm,  128,  128)
 21 |         self.conv3   = conv(self.batchNorm, 128,  256, stride=2)
 22 |         self.conv3_1 = conv(self.batchNorm, 256,  256)
 23 |         self.conv4   = conv(self.batchNorm, 256,  512, stride=2)
 24 |         self.conv4_1 = conv(self.batchNorm, 512,  512)
 25 |         self.conv5   = conv(self.batchNorm, 512,  512, stride=2)
 26 |         self.conv5_1 = conv(self.batchNorm, 512,  512)
 27 |         self.conv6   = conv(self.batchNorm, 512, 1024, stride=2)
 28 |         self.conv6_1 = conv(self.batchNorm,1024, 1024)
 29 | 
 30 |         self.deconv5 = deconv(1024,512)
 31 |         self.deconv4 = deconv(1026,256)
 32 |         self.deconv3 = deconv(770,128)
 33 |         self.deconv2 = deconv(386,64)
 34 | 
 35 |         self.inter_conv5 = i_conv(self.batchNorm,  1026,   512)
 36 |         self.inter_conv4 = i_conv(self.batchNorm,  770,   256)
 37 |         self.inter_conv3 = i_conv(self.batchNorm,  386,   128)
 38 |         self.inter_conv2 = i_conv(self.batchNorm,  194,   64)
 39 | 
 40 |         self.predict_flow6 = predict_flow(1024)
 41 |         self.predict_flow5 = predict_flow(512)
 42 |         self.predict_flow4 = predict_flow(256)
 43 |         self.predict_flow3 = predict_flow(128)
 44 |         self.predict_flow2 = predict_flow(64)
 45 | 
 46 |         self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
 47 |         self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
 48 |         self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
 49 |         self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
 50 | 
 51 |         for m in self.modules():
 52 |             if isinstance(m, nn.Conv2d):
 53 |                 if m.bias is not None:
 54 |                     init.uniform_(m.bias)
 55 |                 init.xavier_uniform_(m.weight)
 56 | 
 57 |             if isinstance(m, nn.ConvTranspose2d):
 58 |                 if m.bias is not None:
 59 |                     init.uniform_(m.bias)
 60 |                 init.xavier_uniform_(m.weight)
 61 |                 # init_deconv_bilinear(m.weight)
 62 |         self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
 63 | 
 64 | 
 65 | 
 66 |     def forward(self, x):
 67 |         out_conv0 = self.conv0(x)
 68 |         out_conv1 = self.conv1_1(self.conv1(out_conv0))
 69 |         out_conv2 = self.conv2_1(self.conv2(out_conv1))
 70 | 
 71 |         out_conv3 = self.conv3_1(self.conv3(out_conv2))
 72 |         out_conv4 = self.conv4_1(self.conv4(out_conv3))
 73 |         out_conv5 = self.conv5_1(self.conv5(out_conv4))
 74 |         out_conv6 = self.conv6_1(self.conv6(out_conv5))
 75 | 
 76 |         flow6       = self.predict_flow6(out_conv6)
 77 |         flow6_up    = self.upsampled_flow6_to_5(flow6)
 78 |         out_deconv5 = self.deconv5(out_conv6)
 79 |         
 80 |         concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
 81 |         out_interconv5 = self.inter_conv5(concat5)
 82 |         flow5       = self.predict_flow5(out_interconv5)
 83 | 
 84 |         flow5_up    = self.upsampled_flow5_to_4(flow5)
 85 |         out_deconv4 = self.deconv4(concat5)
 86 |         
 87 |         concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
 88 |         out_interconv4 = self.inter_conv4(concat4)
 89 |         flow4       = self.predict_flow4(out_interconv4)
 90 |         flow4_up    = self.upsampled_flow4_to_3(flow4)
 91 |         out_deconv3 = self.deconv3(concat4)
 92 |         
 93 |         concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
 94 |         out_interconv3 = self.inter_conv3(concat3)
 95 |         flow3       = self.predict_flow3(out_interconv3)
 96 |         flow3_up    = self.upsampled_flow3_to_2(flow3)
 97 |         out_deconv2 = self.deconv2(concat3)
 98 | 
 99 |         concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
100 |         out_interconv2 = self.inter_conv2(concat2)
101 |         flow2 = self.predict_flow2(out_interconv2)
102 | 
103 |         if self.training:
104 |             return flow2,flow3,flow4,flow5,flow6
105 |         else:
106 |             return flow2,
107 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/networks/__init__.py


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/channelnorm_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/networks/channelnorm_package/__init__.py


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/channelnorm_package/channelnorm.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Function, Variable
 2 | from torch.nn.modules.module import Module
 3 | import channelnorm_cuda
 4 | 
 5 | class ChannelNormFunction(Function):
 6 | 
 7 |     @staticmethod
 8 |     def forward(ctx, input1, norm_deg=2):
 9 |         assert input1.is_contiguous()
10 |         b, _, h, w = input1.size()
11 |         output = input1.new(b, 1, h, w).zero_()
12 | 
13 |         channelnorm_cuda.forward(input1, output, norm_deg)
14 |         ctx.save_for_backward(input1, output)
15 |         ctx.norm_deg = norm_deg
16 | 
17 |         return output
18 | 
19 |     @staticmethod
20 |     def backward(ctx, grad_output):
21 |         input1, output = ctx.saved_tensors
22 | 
23 |         grad_input1 = Variable(input1.new(input1.size()).zero_())
24 | 
25 |         channelnorm.backward(input1, output, grad_output.data,
26 |                                               grad_input1.data, ctx.norm_deg)
27 | 
28 |         return grad_input1, None
29 | 
30 | 
31 | class ChannelNorm(Module):
32 | 
33 |     def __init__(self, norm_deg=2):
34 |         super(ChannelNorm, self).__init__()
35 |         self.norm_deg = norm_deg
36 | 
37 |     def forward(self, input1):
38 |         return ChannelNormFunction.apply(input1, self.norm_deg)
39 | 
40 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/channelnorm_package/channelnorm_cuda.cc:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | #include <ATen/ATen.h>
 3 | 
 4 | #include "channelnorm_kernel.cuh"
 5 | 
 6 | int channelnorm_cuda_forward(
 7 |     at::Tensor& input1, 
 8 |     at::Tensor& output,
 9 |     int norm_deg) {
10 | 
11 |     channelnorm_kernel_forward(input1, output, norm_deg);
12 |     return 1;
13 | }
14 | 
15 | 
16 | int channelnorm_cuda_backward(
17 |     at::Tensor& input1, 
18 |     at::Tensor& output,
19 |     at::Tensor& gradOutput,
20 |     at::Tensor& gradInput1,
21 |     int norm_deg) {
22 | 
23 |     channelnorm_kernel_backward(input1, output, gradOutput, gradInput1, norm_deg);
24 |     return 1;
25 | }
26 | 
27 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
28 |   m.def("forward", &channelnorm_cuda_forward, "Channel norm forward (CUDA)");
29 |   m.def("backward", &channelnorm_cuda_backward, "Channel norm backward (CUDA)");
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/channelnorm_package/channelnorm_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | #include <ATen/Context.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include "channelnorm_kernel.cuh"
  6 | 
  7 | #define CUDA_NUM_THREADS 512 
  8 | 
  9 | #define DIM0(TENSOR) ((TENSOR).x)
 10 | #define DIM1(TENSOR) ((TENSOR).y)
 11 | #define DIM2(TENSOR) ((TENSOR).z)
 12 | #define DIM3(TENSOR) ((TENSOR).w)
 13 | 
 14 | #define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))])
 15 | 
 16 | using at::Half;
 17 | 
 18 | template <typename scalar_t>
 19 | __global__ void kernel_channelnorm_update_output(
 20 |     const int n, 
 21 |     const scalar_t* __restrict__ input1,
 22 |     const long4 input1_size,
 23 |     const long4 input1_stride,
 24 |     scalar_t* __restrict__ output, 
 25 |     const long4 output_size,
 26 |     const long4 output_stride,
 27 |     int norm_deg) {
 28 | 
 29 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 30 | 
 31 |     if (index >= n) {
 32 |         return;
 33 |     }
 34 | 
 35 |     int dim_b = DIM0(output_size);
 36 |     int dim_c = DIM1(output_size);
 37 |     int dim_h = DIM2(output_size);
 38 |     int dim_w = DIM3(output_size);
 39 |     int dim_chw = dim_c * dim_h * dim_w;
 40 | 
 41 |     int b = ( index / dim_chw ) % dim_b;
 42 |     int y = ( index / dim_w )   % dim_h;
 43 |     int x = ( index          )  % dim_w;
 44 | 
 45 |     int i1dim_c = DIM1(input1_size);
 46 |     int i1dim_h = DIM2(input1_size);
 47 |     int i1dim_w = DIM3(input1_size);
 48 |     int i1dim_chw = i1dim_c * i1dim_h * i1dim_w;
 49 |     int i1dim_hw  = i1dim_h * i1dim_w;
 50 | 
 51 |     float result = 0.0;
 52 | 
 53 |     for (int c = 0; c < i1dim_c; ++c) {
 54 |         int i1Index = b * i1dim_chw + c * i1dim_hw + y * i1dim_w + x;
 55 |         scalar_t val = input1[i1Index];
 56 |         result += static_cast<float>(val * val);
 57 |     }
 58 |     result = sqrt(result);
 59 |     output[index] = static_cast<scalar_t>(result);
 60 | }
 61 | 
 62 | 
 63 | template <typename scalar_t>
 64 | __global__ void kernel_channelnorm_backward_input1(
 65 |     const int n,
 66 |     const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
 67 |     const scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride, 
 68 |     const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride,
 69 |     scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, 
 70 |     int norm_deg) {
 71 | 
 72 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
 73 | 
 74 |     if (index >= n) {
 75 |         return;
 76 |     }
 77 | 
 78 |     float val = 0.0;
 79 | 
 80 |     int dim_b = DIM0(gradInput_size);
 81 |     int dim_c = DIM1(gradInput_size);
 82 |     int dim_h = DIM2(gradInput_size);
 83 |     int dim_w = DIM3(gradInput_size);
 84 |     int dim_chw = dim_c * dim_h * dim_w;
 85 |     int dim_hw  = dim_h * dim_w;
 86 | 
 87 |     int b = ( index / dim_chw ) % dim_b;
 88 |     int y = ( index / dim_w )   % dim_h;
 89 |     int x = ( index          )  % dim_w;
 90 | 
 91 | 
 92 |     int outIndex = b * dim_hw + y * dim_w + x;
 93 |     val = static_cast<float>(gradOutput[outIndex]) * static_cast<float>(input1[index]) / (static_cast<float>(output[outIndex])+1e-9);
 94 |     gradInput[index] = static_cast<scalar_t>(val);
 95 | 
 96 | }
 97 | 
 98 | void channelnorm_kernel_forward(
 99 |     at::Tensor& input1, 
100 |     at::Tensor& output, 
101 |     int norm_deg) {
102 | 
103 |     const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
104 |     const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
105 | 
106 |     const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
107 |     const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
108 | 
109 |     int n = output.numel();
110 | 
111 |     AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_forward", ([&] {
112 | 
113 |       kernel_channelnorm_update_output<scalar_t><<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
114 | //at::globalContext().getCurrentCUDAStream() >>>(
115 |           n,
116 |           input1.data<scalar_t>(), 
117 |           input1_size,
118 |           input1_stride, 
119 |           output.data<scalar_t>(),
120 |           output_size,
121 |           output_stride, 
122 |           norm_deg);
123 | 
124 |     }));
125 | 
126 |       // TODO: ATen-equivalent check
127 | 
128 |      // THCudaCheck(cudaGetLastError());
129 | }
130 | 
131 | void channelnorm_kernel_backward(
132 |     at::Tensor& input1, 
133 |     at::Tensor& output,
134 |     at::Tensor& gradOutput, 
135 |     at::Tensor& gradInput1, 
136 |     int norm_deg) {
137 | 
138 |     const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
139 |     const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
140 | 
141 |     const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
142 |     const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
143 | 
144 |     const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3));
145 |     const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3));
146 | 
147 |     const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3));
148 |     const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3));
149 | 
150 |     int n = gradInput1.numel();
151 | 
152 |     AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_backward_input1", ([&] {
153 | 
154 |       kernel_channelnorm_backward_input1<scalar_t><<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
155 | //at::globalContext().getCurrentCUDAStream() >>>(
156 |           n, 
157 |           input1.data<scalar_t>(),
158 |           input1_size,
159 |           input1_stride,
160 |           output.data<scalar_t>(),
161 |           output_size,
162 |           output_stride,
163 |           gradOutput.data<scalar_t>(),
164 |           gradOutput_size,
165 |           gradOutput_stride, 
166 |           gradInput1.data<scalar_t>(),
167 |           gradInput1_size,
168 |           gradInput1_stride,
169 |           norm_deg
170 |     );
171 | 
172 |     }));
173 | 
174 |     // TODO: Add ATen-equivalent check
175 | 
176 | //    THCudaCheck(cudaGetLastError());
177 | }
178 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/channelnorm_package/channelnorm_kernel.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | void channelnorm_kernel_forward(
 6 |     at::Tensor& input1,
 7 |     at::Tensor& output, 
 8 |     int norm_deg);
 9 | 
10 | 
11 | void channelnorm_kernel_backward(
12 |     at::Tensor& input1,
13 |     at::Tensor& output,
14 |     at::Tensor& gradOutput,
15 |     at::Tensor& gradInput1,
16 |     int norm_deg);
17 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/channelnorm_package/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import torch
 4 | 
 5 | from setuptools import setup
 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 7 | 
 8 | cxx_args = ['-std=c++11']
 9 | 
10 | nvcc_args = [
11 |     '-gencode', 'arch=compute_52,code=sm_52',
12 |     '-gencode', 'arch=compute_60,code=sm_60',
13 |     '-gencode', 'arch=compute_61,code=sm_61',
14 |     '-gencode', 'arch=compute_70,code=sm_70',
15 |     '-gencode', 'arch=compute_70,code=compute_70'
16 | ]
17 | 
18 | setup(
19 |     name='channelnorm_cuda',
20 |     ext_modules=[
21 |         CUDAExtension('channelnorm_cuda', [
22 |             'channelnorm_cuda.cc',
23 |             'channelnorm_kernel.cu'
24 |         ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
25 |     ],
26 |     cmdclass={
27 |         'build_ext': BuildExtension
28 |     })
29 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/correlation_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/networks/correlation_package/__init__.py


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/correlation_package/correlation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn.modules.module import Module
 3 | from torch.autograd import Function
 4 | import correlation_cuda
 5 | 
 6 | class CorrelationFunction(Function):
 7 | 
 8 |     def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1):
 9 |         super(CorrelationFunction, self).__init__()
10 |         self.pad_size = pad_size
11 |         self.kernel_size = kernel_size
12 |         self.max_displacement = max_displacement
13 |         self.stride1 = stride1
14 |         self.stride2 = stride2
15 |         self.corr_multiply = corr_multiply
16 |         # self.out_channel = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1)
17 | 
18 |     def forward(self, input1, input2):
19 |         self.save_for_backward(input1, input2)
20 | 
21 |         with torch.cuda.device_of(input1):
22 |             rbot1 = input1.new()
23 |             rbot2 = input2.new()
24 |             output = input1.new()
25 | 
26 |             correlation_cuda.forward(input1, input2, rbot1, rbot2, output, 
27 |                 self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)
28 | 
29 |         return output
30 | 
31 |     def backward(self, grad_output):
32 |         input1, input2 = self.saved_tensors
33 | 
34 |         with torch.cuda.device_of(input1):
35 |             rbot1 = input1.new()
36 |             rbot2 = input2.new()
37 | 
38 |             grad_input1 = input1.new()
39 |             grad_input2 = input2.new()
40 | 
41 |             correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
42 |                 self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)
43 | 
44 |         return grad_input1, grad_input2
45 | 
46 | 
47 | class Correlation(Module):
48 |     def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1):
49 |         super(Correlation, self).__init__()
50 |         self.pad_size = pad_size
51 |         self.kernel_size = kernel_size
52 |         self.max_displacement = max_displacement
53 |         self.stride1 = stride1
54 |         self.stride2 = stride2
55 |         self.corr_multiply = corr_multiply
56 | 
57 |     def forward(self, input1, input2):
58 | 
59 |         result = CorrelationFunction(self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)(input1, input2)
60 | 
61 |         return result
62 | 
63 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/correlation_package/correlation_cuda.cc:
--------------------------------------------------------------------------------
  1 | #include <torch/torch.h>
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/Context.h>
  4 | #include <ATen/cuda/CUDAContext.h>
  5 | #include <stdio.h>
  6 | #include <iostream>
  7 | 
  8 | #include "correlation_cuda_kernel.cuh"
  9 | 
 10 | int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output,
 11 |                        int pad_size,
 12 |                        int kernel_size,
 13 |                        int max_displacement,
 14 |                        int stride1,
 15 |                        int stride2,
 16 |                        int corr_type_multiply)
 17 | {
 18 | 
 19 |   int batchSize = input1.size(0);
 20 | 
 21 |   int nInputChannels = input1.size(1);
 22 |   int inputHeight = input1.size(2);
 23 |   int inputWidth = input1.size(3);
 24 | 
 25 |   int kernel_radius = (kernel_size - 1) / 2;
 26 |   int border_radius = kernel_radius + max_displacement;
 27 | 
 28 |   int paddedInputHeight = inputHeight + 2 * pad_size;
 29 |   int paddedInputWidth = inputWidth + 2 * pad_size;
 30 | 
 31 |   int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1);
 32 | 
 33 |   int outputHeight = ceil(static_cast<float>(paddedInputHeight - 2 * border_radius) / static_cast<float>(stride1));
 34 |   int outputwidth = ceil(static_cast<float>(paddedInputWidth - 2 * border_radius) / static_cast<float>(stride1));
 35 | 
 36 |   rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
 37 |   rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
 38 |   output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth});
 39 | 
 40 |   rInput1.fill_(0);
 41 |   rInput2.fill_(0);
 42 |   output.fill_(0);
 43 | 
 44 |   int success = correlation_forward_cuda_kernel(
 45 |     output,
 46 |     output.size(0), 
 47 |     output.size(1),
 48 |     output.size(2),
 49 |     output.size(3),
 50 |     output.stride(0),
 51 |     output.stride(1),
 52 |     output.stride(2),
 53 |     output.stride(3),
 54 |     input1,
 55 |     input1.size(1),
 56 |     input1.size(2),
 57 |     input1.size(3),
 58 |     input1.stride(0),
 59 |     input1.stride(1),
 60 |     input1.stride(2),
 61 |     input1.stride(3),
 62 |     input2,
 63 |     input2.size(1),
 64 |     input2.stride(0),
 65 |     input2.stride(1),
 66 |     input2.stride(2),
 67 |     input2.stride(3),
 68 |     rInput1,
 69 |     rInput2,
 70 |     pad_size,     
 71 |     kernel_size,
 72 |     max_displacement,
 73 |     stride1,
 74 |     stride2,
 75 |     corr_type_multiply,
 76 | 	at::cuda::getCurrentCUDAStream()
 77 |     //at::globalContext().getCurrentCUDAStream()
 78 |   );
 79 | 
 80 |   //check for errors
 81 |   if (!success) {
 82 |     AT_ERROR("CUDA call failed");
 83 |   }
 84 | 
 85 |   return 1;
 86 | 
 87 | }
 88 | 
 89 | int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput, 
 90 |                        at::Tensor& gradInput1, at::Tensor& gradInput2,
 91 |                        int pad_size,
 92 |                        int kernel_size,
 93 |                        int max_displacement,
 94 |                        int stride1,
 95 |                        int stride2,
 96 |                        int corr_type_multiply)
 97 | {
 98 | 
 99 |   int batchSize = input1.size(0);
100 |   int nInputChannels = input1.size(1);
101 |   int paddedInputHeight = input1.size(2)+ 2 * pad_size;
102 |   int paddedInputWidth = input1.size(3)+ 2 * pad_size;
103 | 
104 |   int height = input1.size(2);
105 |   int width = input1.size(3);
106 | 
107 |   rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
108 |   rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
109 |   gradInput1.resize_({batchSize, nInputChannels, height, width});
110 |   gradInput2.resize_({batchSize, nInputChannels, height, width});
111 | 
112 |   rInput1.fill_(0);
113 |   rInput2.fill_(0);
114 |   gradInput1.fill_(0);
115 |   gradInput2.fill_(0);
116 | 
117 |   int success = correlation_backward_cuda_kernel(gradOutput,
118 |                                                 gradOutput.size(0),
119 |                                                 gradOutput.size(1),
120 |                                                 gradOutput.size(2),
121 |                                                 gradOutput.size(3),
122 |                                                 gradOutput.stride(0),
123 |                                                 gradOutput.stride(1),
124 |                                                 gradOutput.stride(2),
125 |                                                 gradOutput.stride(3),
126 |                                                 input1,
127 |                                                 input1.size(1),
128 |                                                 input1.size(2),
129 |                                                 input1.size(3),
130 |                                                 input1.stride(0),
131 |                                                 input1.stride(1),
132 |                                                 input1.stride(2),
133 |                                                 input1.stride(3),
134 |                                                 input2,  
135 |                                                 input2.stride(0),
136 |                                                 input2.stride(1),
137 |                                                 input2.stride(2),
138 |                                                 input2.stride(3),
139 |                                                 gradInput1,
140 |                                                 gradInput1.stride(0),
141 |                                                 gradInput1.stride(1),
142 |                                                 gradInput1.stride(2),
143 |                                                 gradInput1.stride(3),
144 |                                                 gradInput2,
145 |                                                 gradInput2.size(1),
146 |                                                 gradInput2.stride(0),
147 |                                                 gradInput2.stride(1),
148 |                                                 gradInput2.stride(2),
149 |                                                 gradInput2.stride(3),
150 |                                                 rInput1,
151 |                                                 rInput2,
152 |                                                 pad_size,
153 |                                                 kernel_size,
154 |                                                 max_displacement,
155 |                                                 stride1, 
156 |                                                 stride2,
157 |                                                 corr_type_multiply,
158 | 												at::cuda::getCurrentCUDAStream()
159 |                                                 //at::globalContext().getCurrentCUDAStream()
160 |                                                );
161 | 
162 |   if (!success) {
163 |     AT_ERROR("CUDA call failed");
164 |   }
165 | 
166 |   return 1;
167 | }
168 | 
169 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
170 |   m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)");
171 |   m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)");
172 | }
173 | 
174 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/correlation_package/correlation_cuda_kernel.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/Context.h>
 5 | #include <cuda_runtime.h>
 6 | 
 7 | int correlation_forward_cuda_kernel(at::Tensor& output,
 8 |     int ob,
 9 |     int oc,
10 |     int oh,
11 |     int ow,
12 |     int osb,
13 |     int osc,
14 |     int osh,
15 |     int osw,
16 | 
17 |     at::Tensor& input1,
18 |     int ic,
19 |     int ih,
20 |     int iw,
21 |     int isb,
22 |     int isc,
23 |     int ish,
24 |     int isw,
25 | 
26 |     at::Tensor& input2,
27 |     int gc,
28 |     int gsb,
29 |     int gsc,
30 |     int gsh,
31 |     int gsw,
32 | 
33 |     at::Tensor& rInput1,
34 |     at::Tensor& rInput2,
35 |     int pad_size,
36 |     int kernel_size,
37 |     int max_displacement,
38 |     int stride1,
39 |     int stride2,
40 |     int corr_type_multiply,
41 |     cudaStream_t stream);
42 | 
43 | 
44 | int correlation_backward_cuda_kernel(   
45 |     at::Tensor& gradOutput,
46 |     int gob,
47 |     int goc,
48 |     int goh,
49 |     int gow,
50 |     int gosb,
51 |     int gosc,
52 |     int gosh,
53 |     int gosw,
54 | 
55 |     at::Tensor& input1,
56 |     int ic,
57 |     int ih,
58 |     int iw,
59 |     int isb,
60 |     int isc,
61 |     int ish,
62 |     int isw,
63 | 
64 |     at::Tensor& input2,
65 |     int gsb,
66 |     int gsc,
67 |     int gsh,
68 |     int gsw,
69 | 
70 |     at::Tensor& gradInput1, 
71 |     int gisb,
72 |     int gisc,
73 |     int gish,
74 |     int gisw,
75 | 
76 |     at::Tensor& gradInput2,
77 |     int ggc,
78 |     int ggsb,
79 |     int ggsc,
80 |     int ggsh,
81 |     int ggsw,
82 | 
83 |     at::Tensor& rInput1,
84 |     at::Tensor& rInput2,
85 |     int pad_size,
86 |     int kernel_size,
87 |     int max_displacement,
88 |     int stride1,
89 |     int stride2,
90 |     int corr_type_multiply,
91 |     cudaStream_t stream);
92 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/correlation_package/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import torch
 4 | 
 5 | from setuptools import setup, find_packages
 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 7 | 
 8 | cxx_args = ['-std=c++11']
 9 | 
10 | nvcc_args = [
11 |     '-gencode', 'arch=compute_50,code=sm_50',
12 |     '-gencode', 'arch=compute_52,code=sm_52',
13 |     '-gencode', 'arch=compute_60,code=sm_60',
14 |     '-gencode', 'arch=compute_61,code=sm_61',
15 |     '-gencode', 'arch=compute_70,code=sm_70',
16 |     '-gencode', 'arch=compute_70,code=compute_70'
17 | ]
18 | 
19 | setup(
20 |     name='correlation_cuda',
21 |     ext_modules=[
22 |         CUDAExtension('correlation_cuda', [
23 |             'correlation_cuda.cc',
24 |             'correlation_cuda_kernel.cu'
25 |         ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
26 |     ],
27 |     cmdclass={
28 |         'build_ext': BuildExtension
29 |     })
30 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/resample2d_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/networks/resample2d_package/__init__.py


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/resample2d_package/resample2d.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.autograd import Function, Variable
 3 | import resample2d_cuda
 4 | 
 5 | class Resample2dFunction(Function):
 6 | 
 7 |     @staticmethod
 8 |     def forward(ctx, input1, input2, kernel_size=1):
 9 |         assert input1.is_contiguous()
10 |         assert input2.is_contiguous()
11 | 
12 |         ctx.save_for_backward(input1, input2)
13 |         ctx.kernel_size = kernel_size
14 | 
15 |         _, d, _, _ = input1.size()
16 |         b, _, h, w = input2.size()
17 |         output = input1.new(b, d, h, w).zero_()
18 | 
19 |         resample2d_cuda.forward(input1, input2, output, kernel_size)
20 | 
21 |         return output
22 | 
23 |     @staticmethod
24 |     def backward(ctx, grad_output):
25 |         assert grad_output.is_contiguous()
26 | 
27 |         input1, input2 = ctx.saved_tensors
28 | 
29 |         grad_input1 = Variable(input1.new(input1.size()).zero_())
30 |         grad_input2 = Variable(input1.new(input2.size()).zero_())
31 | 
32 |         resample2d_cuda.backward(input1, input2, grad_output.data,
33 |                                  grad_input1.data, grad_input2.data,
34 |                                  ctx.kernel_size)
35 | 
36 |         return grad_input1, grad_input2, None
37 | 
38 | class Resample2d(Module):
39 | 
40 |     def __init__(self, kernel_size=1):
41 |         super(Resample2d, self).__init__()
42 |         self.kernel_size = kernel_size
43 | 
44 |     def forward(self, input1, input2):
45 |         input1_c = input1.contiguous()
46 |         return Resample2dFunction.apply(input1_c, input2, self.kernel_size)
47 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/resample2d_package/resample2d_cuda.cc:
--------------------------------------------------------------------------------
 1 | #include <ATen/ATen.h>
 2 | #include <torch/torch.h>
 3 | 
 4 | #include "resample2d_kernel.cuh"
 5 | 
 6 | int resample2d_cuda_forward(
 7 |     at::Tensor& input1,
 8 |     at::Tensor& input2, 
 9 |     at::Tensor& output,
10 |     int kernel_size) {
11 |       resample2d_kernel_forward(input1, input2, output, kernel_size);
12 |     return 1;
13 | }
14 | 
15 | int resample2d_cuda_backward(
16 |     at::Tensor& input1, 
17 |     at::Tensor& input2,
18 |     at::Tensor& gradOutput,
19 |     at::Tensor& gradInput1, 
20 |     at::Tensor& gradInput2, 
21 |     int kernel_size) {
22 |         resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size);
23 |     return 1;
24 | }
25 | 
26 | 
27 | 
28 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
29 |   m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)");
30 |   m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)");
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/resample2d_package/resample2d_kernel.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | void resample2d_kernel_forward(
 6 |     at::Tensor& input1,
 7 |     at::Tensor& input2,
 8 |     at::Tensor& output,
 9 |     int kernel_size);
10 | 
11 | void resample2d_kernel_backward(
12 |     at::Tensor& input1,
13 |     at::Tensor& input2,
14 |     at::Tensor& gradOutput,
15 |     at::Tensor& gradInput1, 
16 |     at::Tensor& gradInput2, 
17 |     int kernel_size);
18 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/resample2d_package/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import torch
 4 | 
 5 | from setuptools import setup
 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 7 | 
 8 | cxx_args = ['-std=c++11']
 9 | 
10 | nvcc_args = [
11 |     '-gencode', 'arch=compute_50,code=sm_50',
12 |     '-gencode', 'arch=compute_52,code=sm_52',
13 |     '-gencode', 'arch=compute_60,code=sm_60',
14 |     '-gencode', 'arch=compute_61,code=sm_61',
15 |     '-gencode', 'arch=compute_70,code=sm_70',
16 |     '-gencode', 'arch=compute_70,code=compute_70'
17 | ]
18 | 
19 | setup(
20 |     name='resample2d_cuda',
21 |     ext_modules=[
22 |         CUDAExtension('resample2d_cuda', [
23 |             'resample2d_cuda.cc',
24 |             'resample2d_kernel.cu'
25 |         ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
26 |     ],
27 |     cmdclass={
28 |         'build_ext': BuildExtension
29 |     })
30 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/submodules.py:
--------------------------------------------------------------------------------
 1 | # freda (todo) : 
 2 | 
 3 | import torch.nn as nn
 4 | import torch
 5 | import numpy as np 
 6 | 
 7 | def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1):
 8 |     if batchNorm:
 9 |         return nn.Sequential(
10 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False),
11 |             nn.BatchNorm2d(out_planes),
12 |             nn.LeakyReLU(0.1,inplace=True)
13 |         )
14 |     else:
15 |         return nn.Sequential(
16 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
17 |             nn.LeakyReLU(0.1,inplace=True)
18 |         )
19 | 
20 | def i_conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, bias = True):
21 |     if batchNorm:
22 |         return nn.Sequential(
23 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias),
24 |             nn.BatchNorm2d(out_planes),
25 |         )
26 |     else:
27 |         return nn.Sequential(
28 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias),
29 |         )
30 | 
31 | def predict_flow(in_planes):
32 |     return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)
33 | 
34 | def deconv(in_planes, out_planes):
35 |     return nn.Sequential(
36 |         nn.ConvTranspose2d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True),
37 |         nn.LeakyReLU(0.1,inplace=True)
38 |     )
39 | 
40 | class tofp16(nn.Module):
41 |     def __init__(self):
42 |         super(tofp16, self).__init__()
43 | 
44 |     def forward(self, input):
45 |         return input.half()
46 | 
47 | 
48 | class tofp32(nn.Module):
49 |     def __init__(self):
50 |         super(tofp32, self).__init__()
51 | 
52 |     def forward(self, input):
53 |         return input.float()
54 | 
55 | 
56 | def init_deconv_bilinear(weight):
57 |     f_shape = weight.size()
58 |     heigh, width = f_shape[-2], f_shape[-1]
59 |     f = np.ceil(width/2.0)
60 |     c = (2 * f - 1 - f % 2) / (2.0 * f)
61 |     bilinear = np.zeros([heigh, width])
62 |     for x in range(width):
63 |         for y in range(heigh):
64 |             value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
65 |             bilinear[x, y] = value
66 |     weight.data.fill_(0.)
67 |     for i in range(f_shape[0]):
68 |         for j in range(f_shape[1]):
69 |             weight.data[i,j,:,:] = torch.from_numpy(bilinear)
70 | 
71 | 
72 | def save_grad(grads, name):
73 |     def hook(grad):
74 |         grads[name] = grad
75 |     return hook
76 | 
77 | '''
78 | def save_grad(grads, name):
79 |     def hook(grad):
80 |         grads[name] = grad
81 |     return hook
82 | import torch
83 | from channelnorm_package.modules.channelnorm import ChannelNorm 
84 | model = ChannelNorm().cuda()
85 | grads = {}
86 | a = 100*torch.autograd.Variable(torch.randn((1,3,5,5)).cuda(), requires_grad=True)
87 | a.register_hook(save_grad(grads, 'a'))
88 | b = model(a)
89 | y = torch.mean(b)
90 | y.backward()
91 | 
92 | '''
93 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/run-caffe2pytorch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | FN2PYTORCH=${1:-/}
 4 | 
 5 | # install custom layers
 6 | sudo nvidia-docker build -t $USER/pytorch:CUDA8-py27 .
 7 | sudo nvidia-docker run --rm -ti --volume=${FN2PYTORCH}:/flownet2-pytorch:rw --workdir=/flownet2-pytorch $USER/pytorch:CUDA8-py27 /bin/bash -c "./install.sh"
 8 | 
 9 | # convert FlowNet2-C, CS, CSS, CSS-ft-sd, SD, S and 2 to PyTorch
10 | sudo nvidia-docker run -ti --volume=${FN2PYTORCH}:/fn2pytorch:rw flownet2:latest /bin/bash -c "source /flownet2/flownet2/set-env.sh && cd /flownet2/flownet2/models && \
11 | python /fn2pytorch/convert.py ./FlowNet2-C/FlowNet2-C_weights.caffemodel ./FlowNet2-C/FlowNet2-C_deploy.prototxt.template  /fn2pytorch && 
12 | python /fn2pytorch/convert.py ./FlowNet2-CS/FlowNet2-CS_weights.caffemodel ./FlowNet2-CS/FlowNet2-CS_deploy.prototxt.template /fn2pytorch && \
13 | python /fn2pytorch/convert.py ./FlowNet2-CSS/FlowNet2-CSS_weights.caffemodel.h5 ./FlowNet2-CSS/FlowNet2-CSS_deploy.prototxt.template /fn2pytorch && \
14 | python /fn2pytorch/convert.py ./FlowNet2-CSS-ft-sd/FlowNet2-CSS-ft-sd_weights.caffemodel.h5 ./FlowNet2-CSS-ft-sd/FlowNet2-CSS-ft-sd_deploy.prototxt.template /fn2pytorch && \
15 | python /fn2pytorch/convert.py ./FlowNet2-SD/FlowNet2-SD_weights.caffemodel.h5 ./FlowNet2-SD/FlowNet2-SD_deploy.prototxt.template /fn2pytorch && \
16 | python /fn2pytorch/convert.py ./FlowNet2-S/FlowNet2-S_weights.caffemodel.h5 ./FlowNet2-S/FlowNet2-S_deploy.prototxt.template /fn2pytorch && \
17 | python /fn2pytorch/convert.py ./FlowNet2/FlowNet2_weights.caffemodel.h5 ./FlowNet2/FlowNet2_deploy.prototxt.template /fn2pytorch"


--------------------------------------------------------------------------------
/models/flownet2_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/utils/__init__.py


--------------------------------------------------------------------------------
/models/flownet2_pytorch/utils/flow_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | TAG_CHAR = np.array([202021.25], np.float32)
 4 | 
 5 | def readFlow(fn):
 6 |     """ Read .flo file in Middlebury format"""
 7 |     # Code adapted from:
 8 |     # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
 9 | 
10 |     # WARNING: this will work on little-endian architectures (eg Intel x86) only!
11 |     # print 'fn = %s'%(fn)
12 |     with open(fn, 'rb') as f:
13 |         magic = np.fromfile(f, np.float32, count=1)
14 |         if 202021.25 != magic:
15 |             print('Magic number incorrect. Invalid .flo file')
16 |             return None
17 |         else:
18 |             w = np.fromfile(f, np.int32, count=1)
19 |             h = np.fromfile(f, np.int32, count=1)
20 |             # print 'Reading %d x %d flo file\n' % (w, h)
21 |             data = np.fromfile(f, np.float32, count=2*int(w)*int(h))
22 |             # Reshape data into 3D array (columns, rows, bands)
23 |             # The reshape here is for visualization, the original code is (w,h,2)
24 |             return np.resize(data, (int(h), int(w), 2))
25 | 
26 | def writeFlow(filename,uv,v=None):
27 |     """ Write optical flow to file.
28 |     
29 |     If v is None, uv is assumed to contain both u and v channels,
30 |     stacked in depth.
31 |     Original code by Deqing Sun, adapted from Daniel Scharstein.
32 |     """
33 |     nBands = 2
34 | 
35 |     if v is None:
36 |         assert(uv.ndim == 3)
37 |         assert(uv.shape[2] == 2)
38 |         u = uv[:,:,0]
39 |         v = uv[:,:,1]
40 |     else:
41 |         u = uv
42 | 
43 |     assert(u.shape == v.shape)
44 |     height,width = u.shape
45 |     f = open(filename,'wb')
46 |     # write the header
47 |     f.write(TAG_CHAR)
48 |     np.array(width).astype(np.int32).tofile(f)
49 |     np.array(height).astype(np.int32).tofile(f)
50 |     # arrange into matrix form
51 |     tmp = np.zeros((height, width*nBands))
52 |     tmp[:,np.arange(width)*2] = u
53 |     tmp[:,np.arange(width)*2 + 1] = v
54 |     tmp.astype(np.float32).tofile(f)
55 |     f.close()
56 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/utils/frame_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from os.path import *
 3 | from scipy.misc import imread
 4 | from . import flow_utils 
 5 | 
 6 | def read_gen(file_name):
 7 |     ext = splitext(file_name)[-1]
 8 |     if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
 9 |         im = imread(file_name)
10 |         if im.shape[2] > 3:
11 |             return im[:,:,:3]
12 |         else:
13 |             return im
14 |     elif ext == '.bin' or ext == '.raw':
15 |         return np.load(file_name)
16 |     elif ext == '.flo':
17 |         return flow_utils.readFlow(file_name).astype(np.float32)
18 |     return []
19 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/utils/param_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | 
  5 | def parse_flownetc(modules, weights, biases):
  6 |     keys = [
  7 |     'conv1',
  8 |     'conv2',
  9 |     'conv3',
 10 |     'conv_redir',
 11 |     'conv3_1',
 12 |     'conv4',
 13 |     'conv4_1',
 14 |     'conv5',
 15 |     'conv5_1',
 16 |     'conv6',
 17 |     'conv6_1',
 18 |     
 19 |     'deconv5',
 20 |     'deconv4',
 21 |     'deconv3',
 22 |     'deconv2',
 23 |     
 24 |     'Convolution1',
 25 |     'Convolution2',
 26 |     'Convolution3',
 27 |     'Convolution4',
 28 |     'Convolution5',
 29 | 
 30 |     'upsample_flow6to5',
 31 |     'upsample_flow5to4',
 32 |     'upsample_flow4to3',
 33 |     'upsample_flow3to2',
 34 |     
 35 |     ]
 36 |     i = 0
 37 |     for m in modules:
 38 |         if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
 39 |             weight = weights[keys[i]].copy()
 40 |             bias = biases[keys[i]].copy()
 41 |             if keys[i] == 'conv1':
 42 |                 m.weight.data[:,:,:,:] = torch.from_numpy(np.flip(weight, axis=1).copy())
 43 |                 m.bias.data[:] = torch.from_numpy(bias)
 44 |             else:
 45 |                 m.weight.data[:,:,:,:] = torch.from_numpy(weight)
 46 |                 m.bias.data[:] = torch.from_numpy(bias)                    
 47 | 
 48 |             i = i + 1
 49 |     return
 50 | 
 51 | def parse_flownets(modules, weights, biases, param_prefix='net2_'):
 52 |     keys = [
 53 |     'conv1',
 54 |     'conv2',
 55 |     'conv3',
 56 |     'conv3_1',
 57 |     'conv4',
 58 |     'conv4_1',
 59 |     'conv5',
 60 |     'conv5_1',
 61 |     'conv6',
 62 |     'conv6_1',
 63 |     
 64 |     'deconv5',
 65 |     'deconv4',
 66 |     'deconv3',
 67 |     'deconv2',
 68 |     
 69 |     'predict_conv6',
 70 |     'predict_conv5',
 71 |     'predict_conv4',
 72 |     'predict_conv3',
 73 |     'predict_conv2',
 74 | 
 75 |     'upsample_flow6to5',
 76 |     'upsample_flow5to4',
 77 |     'upsample_flow4to3',
 78 |     'upsample_flow3to2',
 79 |     ]
 80 |     for i, k in enumerate(keys):
 81 |         if 'upsample' in k:
 82 |             keys[i] = param_prefix + param_prefix + k
 83 |         else:
 84 |             keys[i] = param_prefix + k
 85 |     i = 0
 86 |     for m in modules:
 87 |         if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
 88 |             weight = weights[keys[i]].copy()
 89 |             bias = biases[keys[i]].copy()
 90 |             if keys[i] == param_prefix+'conv1':
 91 |                 m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
 92 |                 m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
 93 |                 m.weight.data[:,6:9,:,:] = torch.from_numpy(np.flip(weight[:,6:9,:,:], axis=1).copy())
 94 |                 m.weight.data[:,9::,:,:] = torch.from_numpy(weight[:,9:,:,:].copy())
 95 |                 if m.bias is not None:
 96 |                     m.bias.data[:] = torch.from_numpy(bias)
 97 |             else:
 98 |                 m.weight.data[:,:,:,:] = torch.from_numpy(weight)
 99 |                 if m.bias is not None:
100 |                     m.bias.data[:] = torch.from_numpy(bias)
101 |             i = i + 1
102 |     return
103 | 
104 | def parse_flownetsonly(modules, weights, biases, param_prefix=''):
105 |     keys = [
106 |     'conv1',
107 |     'conv2',
108 |     'conv3',
109 |     'conv3_1',
110 |     'conv4',
111 |     'conv4_1',
112 |     'conv5',
113 |     'conv5_1',
114 |     'conv6',
115 |     'conv6_1',
116 |     
117 |     'deconv5',
118 |     'deconv4',
119 |     'deconv3',
120 |     'deconv2',
121 |     
122 |     'Convolution1',
123 |     'Convolution2',
124 |     'Convolution3',
125 |     'Convolution4',
126 |     'Convolution5',
127 | 
128 |     'upsample_flow6to5',
129 |     'upsample_flow5to4',
130 |     'upsample_flow4to3',
131 |     'upsample_flow3to2',
132 |     ]
133 |     for i, k in enumerate(keys):
134 |         if 'upsample' in k:
135 |             keys[i] = param_prefix + param_prefix + k
136 |         else:
137 |             keys[i] = param_prefix + k
138 |     i = 0
139 |     for m in modules:
140 |         if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
141 |             weight = weights[keys[i]].copy()
142 |             bias = biases[keys[i]].copy()
143 |             if keys[i] == param_prefix+'conv1':
144 |                 # print ("%s :"%(keys[i]), m.weight.size(), m.bias.size(), tf_w[keys[i]].shape[::-1])
145 |                 m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
146 |                 m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
147 |                 if m.bias is not None:
148 |                     m.bias.data[:] = torch.from_numpy(bias)
149 |             else:
150 |                 m.weight.data[:,:,:,:] = torch.from_numpy(weight)
151 |                 if m.bias is not None:
152 |                     m.bias.data[:] = torch.from_numpy(bias)
153 |             i = i + 1
154 |     return
155 | 
156 | def parse_flownetsd(modules, weights, biases, param_prefix='netsd_'):
157 |     keys = [
158 |     'conv0',
159 |     'conv1',
160 |     'conv1_1',
161 |     'conv2',
162 |     'conv2_1',
163 |     'conv3',
164 |     'conv3_1',
165 |     'conv4',
166 |     'conv4_1',
167 |     'conv5',
168 |     'conv5_1',
169 |     'conv6',
170 |     'conv6_1',
171 |     
172 |     'deconv5',
173 |     'deconv4',
174 |     'deconv3',
175 |     'deconv2',
176 | 
177 |     'interconv5',
178 |     'interconv4',
179 |     'interconv3',
180 |     'interconv2',
181 |     
182 |     'Convolution1',
183 |     'Convolution2',
184 |     'Convolution3',
185 |     'Convolution4',
186 |     'Convolution5',
187 | 
188 |     'upsample_flow6to5',
189 |     'upsample_flow5to4',
190 |     'upsample_flow4to3',
191 |     'upsample_flow3to2',
192 |     ]
193 |     for i, k in enumerate(keys):
194 |         keys[i] = param_prefix + k
195 | 
196 |     i = 0
197 |     for m in modules:
198 |         if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
199 |             weight = weights[keys[i]].copy()
200 |             bias = biases[keys[i]].copy()
201 |             if keys[i] == param_prefix+'conv0':
202 |                 m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
203 |                 m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
204 |                 if m.bias is not None:
205 |                     m.bias.data[:] = torch.from_numpy(bias)
206 |             else:
207 |                 m.weight.data[:,:,:,:] = torch.from_numpy(weight)
208 |                 if m.bias is not None:
209 |                     m.bias.data[:] = torch.from_numpy(bias)
210 |             i = i + 1
211 | 
212 |     return
213 | 
214 | def parse_flownetfusion(modules, weights, biases, param_prefix='fuse_'):
215 |     keys = [
216 |     'conv0',
217 |     'conv1',
218 |     'conv1_1',
219 |     'conv2',
220 |     'conv2_1',
221 | 
222 |     'deconv1',
223 |     'deconv0',
224 | 
225 |     'interconv1',
226 |     'interconv0',
227 |     
228 |     '_Convolution5',
229 |     '_Convolution6',
230 |     '_Convolution7',
231 | 
232 |     'upsample_flow2to1',
233 |     'upsample_flow1to0',
234 |     ]
235 |     for i, k in enumerate(keys):
236 |         keys[i] = param_prefix + k
237 | 
238 |     i = 0
239 |     for m in modules:
240 |         if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
241 |             weight = weights[keys[i]].copy()
242 |             bias = biases[keys[i]].copy()
243 |             if keys[i] == param_prefix+'conv0':
244 |                 m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
245 |                 m.weight.data[:,3::,:,:] = torch.from_numpy(weight[:,3:,:,:].copy())
246 |                 if m.bias is not None:
247 |                     m.bias.data[:] = torch.from_numpy(bias)
248 |             else:
249 |                 m.weight.data[:,:,:,:] = torch.from_numpy(weight)
250 |                 if m.bias is not None:
251 |                     m.bias.data[:] = torch.from_numpy(bias)
252 |             i = i + 1
253 | 
254 |     return
255 | 


--------------------------------------------------------------------------------
/models/flownet2_pytorch/utils/tools.py:
--------------------------------------------------------------------------------
  1 | # freda (todo) : 
  2 | 
  3 | import os, time, sys, math
  4 | import subprocess, shutil
  5 | from os.path import *
  6 | import numpy as np
  7 | from inspect import isclass
  8 | from pytz import timezone
  9 | from datetime import datetime
 10 | import inspect
 11 | import torch
 12 | 
 13 | def datestr():
 14 |     pacific = timezone('US/Pacific')
 15 |     now = datetime.now(pacific)
 16 |     return '{}{:02}{:02}_{:02}{:02}'.format(now.year, now.month, now.day, now.hour, now.minute)
 17 | 
 18 | def module_to_dict(module, exclude=[]):
 19 |         return dict([(x, getattr(module, x)) for x in dir(module)
 20 |                      if isclass(getattr(module, x))
 21 |                      and x not in exclude
 22 |                      and getattr(module, x) not in exclude])
 23 | 
 24 | class TimerBlock: 
 25 |     def __init__(self, title):
 26 |         print(("{}".format(title)))
 27 | 
 28 |     def __enter__(self):
 29 |         self.start = time.clock()
 30 |         return self
 31 | 
 32 |     def __exit__(self, exc_type, exc_value, traceback):
 33 |         self.end = time.clock()
 34 |         self.interval = self.end - self.start
 35 | 
 36 |         if exc_type is not None:
 37 |             self.log("Operation failed\n")
 38 |         else:
 39 |             self.log("Operation finished\n")
 40 | 
 41 | 
 42 |     def log(self, string):
 43 |         duration = time.clock() - self.start
 44 |         units = 's'
 45 |         if duration > 60:
 46 |             duration = duration / 60.
 47 |             units = 'm'
 48 |         print(("  [{:.3f}{}] {}".format(duration, units, string)))
 49 |     
 50 |     def log2file(self, fid, string):
 51 |         fid = open(fid, 'a')
 52 |         fid.write("%s\n"%(string))
 53 |         fid.close()
 54 | 
 55 | def add_arguments_for_module(parser, module, argument_for_class, default, skip_params=[], parameter_defaults={}):
 56 |     argument_group = parser.add_argument_group(argument_for_class.capitalize())
 57 | 
 58 |     module_dict = module_to_dict(module)
 59 |     argument_group.add_argument('--' + argument_for_class, type=str, default=default, choices=list(module_dict.keys()))
 60 |     
 61 |     args, unknown_args = parser.parse_known_args()
 62 |     class_obj = module_dict[vars(args)[argument_for_class]]
 63 | 
 64 |     argspec = inspect.getargspec(class_obj.__init__)
 65 | 
 66 |     defaults = argspec.defaults[::-1] if argspec.defaults else None
 67 | 
 68 |     args = argspec.args[::-1]
 69 |     for i, arg in enumerate(args):
 70 |         cmd_arg = '{}_{}'.format(argument_for_class, arg)
 71 |         if arg not in skip_params + ['self', 'args']:
 72 |             if arg in list(parameter_defaults.keys()):
 73 |                 argument_group.add_argument('--{}'.format(cmd_arg), type=type(parameter_defaults[arg]), default=parameter_defaults[arg])
 74 |             elif (defaults is not None and i < len(defaults)):
 75 |                 argument_group.add_argument('--{}'.format(cmd_arg), type=type(defaults[i]), default=defaults[i])
 76 |             else:
 77 |                 print(("[Warning]: non-default argument '{}' detected on class '{}'. This argument cannot be modified via the command line"
 78 |                         .format(arg, module.__class__.__name__)))
 79 |             # We don't have a good way of dealing with inferring the type of the argument
 80 |             # TODO: try creating a custom action and using ast's infer type?
 81 |             # else:
 82 |             #     argument_group.add_argument('--{}'.format(cmd_arg), required=True)
 83 | 
 84 | def kwargs_from_args(args, argument_for_class):
 85 |     argument_for_class = argument_for_class + '_'
 86 |     return {key[len(argument_for_class):]: value for key, value in list(vars(args).items()) if argument_for_class in key and key != argument_for_class + 'class'}
 87 | 
 88 | def format_dictionary_of_losses(labels, values):
 89 |     try:
 90 |         string = ', '.join([('{}: {:' + ('.3f' if value >= 0.001 else '.1e') +'}').format(name, value) for name, value in zip(labels, values)])
 91 |     except (TypeError, ValueError) as e:
 92 |         print((list(zip(labels, values))))
 93 |         string = '[Log Error] ' + str(e)
 94 | 
 95 |     return string
 96 | 
 97 | 
 98 | class IteratorTimer():
 99 |     def __init__(self, iterable):
100 |         self.iterable = iterable
101 |         self.iterator = self.iterable.__iter__()
102 | 
103 |     def __iter__(self):
104 |         return self
105 | 
106 |     def __len__(self):
107 |         return len(self.iterable)
108 | 
109 |     def __next__(self):
110 |         start = time.time()
111 |         n = next(self.iterator)
112 |         self.last_duration = (time.time() - start)
113 |         return n
114 | 
115 |     next = __next__
116 | 
117 | def gpumemusage():
118 |     gpu_mem = subprocess.check_output("nvidia-smi | grep MiB | cut -f 3 -d '|'", shell=True).replace(' ', '').replace('\n', '').replace('i', '')
119 |     all_stat = [float(a) for a in gpu_mem.replace('/','').split('MB')[:-1]]
120 | 
121 |     gpu_mem = ''
122 |     for i in range(len(all_stat)/2):
123 |         curr, tot = all_stat[2*i], all_stat[2*i+1]
124 |         util = "%1.2f"%(100*curr/tot)+'%'
125 |         cmem = str(int(math.ceil(curr/1024.)))+'GB'
126 |         gmem = str(int(math.ceil(tot/1024.)))+'GB'
127 |         gpu_mem += util + '--' + join(cmem, gmem) + ' '
128 |     return gpu_mem
129 | 
130 | 
131 | def update_hyperparameter_schedule(args, epoch, global_iteration, optimizer):
132 |     if args.schedule_lr_frequency > 0:
133 |         for param_group in optimizer.param_groups:
134 |             if (global_iteration + 1) % args.schedule_lr_frequency == 0:
135 |                 param_group['lr'] /= float(args.schedule_lr_fraction)
136 |                 param_group['lr'] = float(np.maximum(param_group['lr'], 0.000001))
137 | 
138 | def save_checkpoint(state, is_best, path, prefix, filename='checkpoint.pth.tar'):
139 |     prefix_save = os.path.join(path, prefix)
140 |     name = prefix_save + '_' + filename
141 |     torch.save(state, name)
142 |     if is_best:
143 |         shutil.copyfile(name, prefix_save + '_model_best.pth.tar')
144 | 
145 | 


--------------------------------------------------------------------------------
/models/models.py:
--------------------------------------------------------------------------------
  1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 
  2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
  3 | import os
  4 | import torch
  5 | import torch.nn as nn
  6 | import numpy as np
  7 | import fractions
  8 | def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0
  9 | 
 10 | def wrap_model(opt, modelG, modelD, flowNet):
 11 |     if opt.n_gpus_gen == len(opt.gpu_ids):
 12 |         modelG = myModel(opt, modelG)
 13 |         modelD = myModel(opt, modelD)
 14 |         flowNet = myModel(opt, flowNet)
 15 |     else:             
 16 |         if opt.batchSize == 1:
 17 |             gpu_split_id = opt.n_gpus_gen + 1
 18 |             modelG = nn.DataParallel(modelG, device_ids=opt.gpu_ids[0:1])                
 19 |         else:
 20 |             gpu_split_id = opt.n_gpus_gen
 21 |             modelG = nn.DataParallel(modelG, device_ids=opt.gpu_ids[:gpu_split_id])
 22 |         modelD = nn.DataParallel(modelD, device_ids=[opt.gpu_ids[0]] + opt.gpu_ids[gpu_split_id:])
 23 |         flowNet = nn.DataParallel(flowNet, device_ids=[opt.gpu_ids[0]] + opt.gpu_ids[gpu_split_id:])
 24 |     return modelG, modelD, flowNet
 25 | 
 26 | class myModel(nn.Module):
 27 |     def __init__(self, opt, model):        
 28 |         super(myModel, self).__init__()
 29 |         self.opt = opt
 30 |         self.module = model
 31 |         self.model = nn.DataParallel(model, device_ids=opt.gpu_ids)
 32 |         self.bs_per_gpu = int(np.ceil(float(opt.batchSize) / len(opt.gpu_ids))) # batch size for each GPU
 33 |         self.pad_bs = self.bs_per_gpu * len(opt.gpu_ids) - opt.batchSize           
 34 | 
 35 |     def forward(self, *inputs, **kwargs):
 36 |         inputs = self.add_dummy_to_tensor(inputs, self.pad_bs)
 37 |         outputs = self.model(*inputs, **kwargs, dummy_bs=self.pad_bs)
 38 |         if self.pad_bs == self.bs_per_gpu: # gpu 0 does 0 batch but still returns 1 batch
 39 |             return self.remove_dummy_from_tensor(outputs, 1)
 40 |         return outputs        
 41 | 
 42 |     def add_dummy_to_tensor(self, tensors, add_size=0):        
 43 |         if add_size == 0 or tensors is None: return tensors
 44 |         if type(tensors) == list or type(tensors) == tuple:
 45 |             return [self.add_dummy_to_tensor(tensor, add_size) for tensor in tensors]    
 46 |                 
 47 |         if isinstance(tensors, torch.Tensor):            
 48 |             dummy = torch.zeros_like(tensors)[:add_size]
 49 |             tensors = torch.cat([dummy, tensors])
 50 |         return tensors
 51 | 
 52 |     def remove_dummy_from_tensor(self, tensors, remove_size=0):
 53 |         if remove_size == 0 or tensors is None: return tensors
 54 |         if type(tensors) == list or type(tensors) == tuple:
 55 |             return [self.remove_dummy_from_tensor(tensor, remove_size) for tensor in tensors]    
 56 |         
 57 |         if isinstance(tensors, torch.Tensor):
 58 |             tensors = tensors[remove_size:]
 59 |         return tensors
 60 | 
 61 | def create_model(opt):    
 62 |     print(opt.model)            
 63 |     if opt.model == 'vid2vid':
 64 |         from .vid2vid_model_G import Vid2VidModelG
 65 |         modelG = Vid2VidModelG()    
 66 |         if opt.isTrain:
 67 |             from .vid2vid_model_D import Vid2VidModelD
 68 |             modelD = Vid2VidModelD()    
 69 |     else:
 70 |         raise ValueError("Model [%s] not recognized." % opt.model)
 71 | 
 72 |     if opt.isTrain:
 73 |         from .flownet import FlowNet
 74 |         flowNet = FlowNet()
 75 |     
 76 |     modelG.initialize(opt)
 77 |     if opt.isTrain:
 78 |         modelD.initialize(opt)
 79 |         flowNet.initialize(opt)        
 80 |         if not opt.fp16:
 81 |             modelG, modelD, flownet = wrap_model(opt, modelG, modelD, flowNet)
 82 |         return [modelG, modelD, flowNet]
 83 |     else:
 84 |         return modelG
 85 | 
 86 | def create_optimizer(opt, models):
 87 |     modelG, modelD, flowNet = models
 88 |     optimizer_D_T = []    
 89 |     if opt.fp16:              
 90 |         from apex import amp
 91 |         for s in range(opt.n_scales_temporal):
 92 |             optimizer_D_T.append(getattr(modelD, 'optimizer_D_T'+str(s)))
 93 |         modelG, optimizer_G = amp.initialize(modelG, modelG.optimizer_G, opt_level='O1')
 94 |         modelD, optimizers_D = amp.initialize(modelD, [modelD.optimizer_D] + optimizer_D_T, opt_level='O1')
 95 |         optimizer_D, optimizer_D_T = optimizers_D[0], optimizers_D[1:]        
 96 |         modelG, modelD, flownet = wrap_model(opt, modelG, modelD, flowNet)
 97 |     else:        
 98 |         optimizer_G = modelG.module.optimizer_G
 99 |         optimizer_D = modelD.module.optimizer_D        
100 |         for s in range(opt.n_scales_temporal):
101 |             optimizer_D_T.append(getattr(modelD.module, 'optimizer_D_T'+str(s)))
102 |     return modelG, modelD, flowNet, optimizer_G, optimizer_D, optimizer_D_T
103 | 
104 | def init_params(opt, modelG, modelD, data_loader):
105 |     iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt')
106 |     start_epoch, epoch_iter = 1, 0
107 |     ### if continue training, recover previous states
108 |     if opt.continue_train:        
109 |         if os.path.exists(iter_path):
110 |             start_epoch, epoch_iter = np.loadtxt(iter_path , delimiter=',', dtype=int)        
111 |         print('Resuming from epoch %d at iteration %d' % (start_epoch, epoch_iter))   
112 |         if start_epoch > opt.niter:
113 |             modelG.module.update_learning_rate(start_epoch-1, 'G')
114 |             modelD.module.update_learning_rate(start_epoch-1, 'D')
115 |         if (opt.n_scales_spatial > 1) and (opt.niter_fix_global != 0) and (start_epoch > opt.niter_fix_global):
116 |             modelG.module.update_fixed_params()
117 |         if start_epoch > opt.niter_step:
118 |             data_loader.dataset.update_training_batch((start_epoch-1)//opt.niter_step)
119 |             modelG.module.update_training_batch((start_epoch-1)//opt.niter_step)    
120 | 
121 |     n_gpus = opt.n_gpus_gen if opt.batchSize == 1 else 1   # number of gpus used for generator for each batch
122 |     tG, tD = opt.n_frames_G, opt.n_frames_D
123 |     tDB = tD * opt.output_nc        
124 |     s_scales = opt.n_scales_spatial
125 |     t_scales = opt.n_scales_temporal
126 |     input_nc = 1 if opt.label_nc != 0 else opt.input_nc
127 |     output_nc = opt.output_nc         
128 | 
129 |     print_freq = lcm(opt.print_freq, opt.batchSize)
130 |     total_steps = (start_epoch-1) * len(data_loader) + epoch_iter
131 |     total_steps = total_steps // print_freq * print_freq  
132 | 
133 |     return n_gpus, tG, tD, tDB, s_scales, t_scales, input_nc, output_nc, start_epoch, epoch_iter, print_freq, total_steps, iter_path
134 | 
135 | def save_models(opt, epoch, epoch_iter, total_steps, visualizer, iter_path, modelG, modelD, end_of_epoch=False):
136 |     if not end_of_epoch:
137 |         if total_steps % opt.save_latest_freq == 0:
138 |             visualizer.vis_print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_steps))
139 |             modelG.module.save('latest')
140 |             modelD.module.save('latest')
141 |             np.savetxt(iter_path, (epoch, epoch_iter), delimiter=',', fmt='%d')
142 |     else:
143 |         if epoch % opt.save_epoch_freq == 0:
144 |             visualizer.vis_print('saving the model at the end of epoch %d, iters %d' % (epoch, total_steps))        
145 |             modelG.module.save('latest')
146 |             modelD.module.save('latest')
147 |             modelG.module.save(epoch)
148 |             modelD.module.save(epoch)
149 |             np.savetxt(iter_path, (epoch+1, 0), delimiter=',', fmt='%d')
150 | 
151 | def update_models(opt, epoch, modelG, modelD, data_loader):
152 |     ### linearly decay learning rate after certain iterations
153 |     if epoch > opt.niter:
154 |         modelG.module.update_learning_rate(epoch, 'G')
155 |         modelD.module.update_learning_rate(epoch, 'D')
156 | 
157 |     ### gradually grow training sequence length
158 |     if (epoch % opt.niter_step) == 0:
159 |         data_loader.dataset.update_training_batch(epoch//opt.niter_step)
160 |         modelG.module.update_training_batch(epoch//opt.niter_step)
161 | 
162 |     ### finetune all scales
163 |     if (opt.n_scales_spatial > 1) and (opt.niter_fix_global != 0) and (epoch == opt.niter_fix_global):
164 |         modelG.module.update_fixed_params()   


--------------------------------------------------------------------------------
/options/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/options/__init__.py


--------------------------------------------------------------------------------
/options/base_options.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | from util import util
  4 | import torch
  5 | 
  6 | class BaseOptions():
  7 |     def __init__(self):
  8 |         self.parser = argparse.ArgumentParser()
  9 |         self.initialized = False
 10 | 
 11 |     def initialize(self):                
 12 |         self.parser.add_argument('--dataroot', type=str, default='datasets/Cityscapes/')        
 13 |         self.parser.add_argument('--batchSize', type=int, default=1, help='input batch size')
 14 |         self.parser.add_argument('--loadSize', type=int, default=512, help='scale images to this size')
 15 |         self.parser.add_argument('--fineSize', type=int, default=512, help='then crop to this size')
 16 |         self.parser.add_argument('--input_nc', type=int, default=3, help='# of input image channels')
 17 |         self.parser.add_argument('--label_nc', type=int, default=0, help='number of labels')        
 18 |         self.parser.add_argument('--output_nc', type=int, default=3, help='# of output image channels')        
 19 | 
 20 |         # network arch
 21 |         self.parser.add_argument('--netG', type=str, default='composite', help='selects model to use for netG')        
 22 |         self.parser.add_argument('--ngf', type=int, default=128, help='# of gen filters in first conv layer')
 23 |         self.parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in first conv layer')    
 24 |         self.parser.add_argument('--n_blocks', type=int, default=9, help='number of resnet blocks in generator')
 25 |         self.parser.add_argument('--n_downsample_G', type=int, default=3, help='number of downsampling layers in netG')        
 26 | 
 27 |         self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU')
 28 |         self.parser.add_argument('--n_gpus_gen', type=int, default=-1, help='how many gpus are used for generator (the rest are used for discriminator). -1 means use all gpus')
 29 |         self.parser.add_argument('--name', type=str, default='experiment_name', help='name of the experiment. It decides where to store samples and models')
 30 |         self.parser.add_argument('--dataset_mode', type=str, default='temporal', help='chooses how datasets are loaded. [unaligned | aligned | single]')
 31 |         self.parser.add_argument('--model', type=str, default='vid2vid', help='chooses which model to use. vid2vid, test')        
 32 |         self.parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data')
 33 |         self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here')
 34 |         self.parser.add_argument('--norm', type=str, default='batch', help='instance normalization or batch normalization')
 35 |         self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly')
 36 |         self.parser.add_argument('--display_winsize', type=int, default=512,  help='display window size')
 37 |         self.parser.add_argument('--display_id', type=int, default=0, help='window id of the web display')        
 38 |         self.parser.add_argument('--tf_log', action='store_true', help='if specified, use tensorboard logging. Requires tensorflow installed')
 39 |                         
 40 |         self.parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.')
 41 |         self.parser.add_argument('--resize_or_crop', type=str, default='scaleWidth', help='scaling and cropping of images at load time [resize_and_crop|crop|scaledCrop|scaleWidth|scaleWidth_and_crop|scaleWidth_and_scaledCrop|scaleHeight|scaleHeight_and_crop] etc')
 42 |         self.parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data argumentation')                    
 43 |     
 44 |         # more features as input        
 45 |         self.parser.add_argument('--use_instance', action='store_true', help='if specified, add instance map as feature for class A')        
 46 |         self.parser.add_argument('--label_feat', action='store_true', help='if specified, encode label features as input')
 47 |         self.parser.add_argument('--feat_num', type=int, default=3, help='number of encoded features')        
 48 |         self.parser.add_argument('--nef', type=int, default=32, help='# of encoder filters in first conv layer')
 49 |         self.parser.add_argument('--load_features', action='store_true', help='if specified, load precomputed feature maps')
 50 |         self.parser.add_argument('--netE', type=str, default='simple', help='which model to use for encoder') 
 51 |         self.parser.add_argument('--n_downsample_E', type=int, default=3, help='number of downsampling layers in netE')
 52 | 
 53 |         # for cascaded resnet        
 54 |         self.parser.add_argument('--n_blocks_local', type=int, default=3, help='number of resnet blocks in outmost multiscale resnet')        
 55 |         self.parser.add_argument('--n_local_enhancers', type=int, default=1, help='number of cascaded layers')        
 56 | 
 57 |         # temporal
 58 |         self.parser.add_argument('--n_frames_G', type=int, default=3, help='number of input frames to feed into generator, i.e., n_frames_G-1 is the number of frames we look into past')
 59 |         self.parser.add_argument('--n_scales_spatial', type=int, default=1, help='number of spatial scales in the coarse-to-fine generator')        
 60 |         self.parser.add_argument('--no_first_img', action='store_true', help='if specified, generator also tries to synthesize first image')        
 61 |         self.parser.add_argument('--use_single_G', action='store_true', help='if specified, use single frame generator for the first frame')
 62 |         self.parser.add_argument('--fg', action='store_true', help='if specified, use foreground-background seperation model')
 63 |         self.parser.add_argument('--fg_labels', type=str, default='26', help='label indices for foreground objects')
 64 |         self.parser.add_argument('--no_flow', action='store_true', help='if specified, do not use flow warping and directly synthesize frames')
 65 | 
 66 |         # face specific
 67 |         self.parser.add_argument('--no_canny_edge', action='store_true', help='do *not* use canny edge as input')
 68 |         self.parser.add_argument('--no_dist_map', action='store_true', help='do *not* use distance transform map as input')
 69 |         self.parser.add_argument('--random_scale_points', action='store_true', help='randomly scale face keypoints a bit to create different results')
 70 | 
 71 |         # pose specific
 72 |         self.parser.add_argument('--densepose_only', action='store_true', help='use only densepose as input')
 73 |         self.parser.add_argument('--openpose_only', action='store_true', help='use only openpose as input') 
 74 |         self.parser.add_argument('--add_face_disc', action='store_true', help='add face discriminator') 
 75 |         self.parser.add_argument('--remove_face_labels', action='store_true', help='remove face labels to better adapt to different face shapes')
 76 |         self.parser.add_argument('--random_drop_prob', type=float, default=0.05, help='the probability to randomly drop each pose segment during training')
 77 |         self.parser.add_argument('--basic_point_only', action='store_true', help='only use basic joint keypoints for openpose, without hand or face keypoints')
 78 |         
 79 |         # miscellaneous                
 80 |         self.parser.add_argument('--load_pretrain', type=str, default='', help='if specified, load the pretrained model')                
 81 |         self.parser.add_argument('--debug', action='store_true', help='if specified, use small dataset for debug')
 82 |         self.parser.add_argument('--fp16', action='store_true', default=False, help='train with AMP')
 83 |         self.parser.add_argument('--local_rank', type=int, default=0, help='local rank for distributed training')
 84 | 
 85 |         self.initialized = True
 86 | 
 87 |     def parse_str(self, ids):
 88 |         str_ids = ids.split(',')
 89 |         ids_list = []
 90 |         for str_id in str_ids:
 91 |             id = int(str_id)
 92 |             if id >= 0:
 93 |                 ids_list.append(id)
 94 |         return ids_list
 95 | 
 96 |     def parse(self, save=True):
 97 |         if not self.initialized:
 98 |             self.initialize()
 99 |         self.opt = self.parser.parse_args()
100 |         self.opt.isTrain = self.isTrain   # train or test
101 |         
102 |         self.opt.fg_labels = self.parse_str(self.opt.fg_labels)
103 |         self.opt.gpu_ids = self.parse_str(self.opt.gpu_ids)
104 |         if self.opt.n_gpus_gen == -1:
105 |             self.opt.n_gpus_gen = len(self.opt.gpu_ids)
106 |         
107 |         # set gpu ids
108 |         if len(self.opt.gpu_ids) > 0:
109 |             torch.cuda.set_device(self.opt.gpu_ids[0])
110 | 
111 |         args = vars(self.opt)
112 | 
113 |         print('------------ Options -------------')
114 |         for k, v in sorted(args.items()):
115 |             print('%s: %s' % (str(k), str(v)))
116 |         print('-------------- End ----------------')
117 | 
118 |         # save to the disk        
119 |         expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name)
120 |         util.mkdirs(expr_dir)
121 |         if save:
122 |             file_name = os.path.join(expr_dir, 'opt.txt')
123 |             with open(file_name, 'wt') as opt_file:
124 |                 opt_file.write('------------ Options -------------\n')
125 |                 for k, v in sorted(args.items()):
126 |                     opt_file.write('%s: %s\n' % (str(k), str(v)))
127 |                 opt_file.write('-------------- End ----------------\n')
128 |         return self.opt
129 | 


--------------------------------------------------------------------------------
/options/test_options.py:
--------------------------------------------------------------------------------
 1 | from .base_options import BaseOptions
 2 | 
 3 | 
 4 | class TestOptions(BaseOptions):
 5 |     def initialize(self):
 6 |         BaseOptions.initialize(self)
 7 |         self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.')
 8 |         self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
 9 |         self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')
10 |         self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
11 |         self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
12 |         self.parser.add_argument('--how_many', type=int, default=300, help='how many test images to run')        
13 |         self.parser.add_argument('--use_real_img', action='store_true', help='use real image for first frame')
14 |         self.parser.add_argument('--start_frame', type=int, default=0, help='frame index to start inference on')        
15 |         self.isTrain = False
16 | 


--------------------------------------------------------------------------------
/options/train_options.py:
--------------------------------------------------------------------------------
 1 | from .base_options import BaseOptions
 2 | 
 3 | 
 4 | class TrainOptions(BaseOptions):
 5 |     def initialize(self):
 6 |         BaseOptions.initialize(self)
 7 |         self.parser.add_argument('--display_freq', type=int, default=100, help='frequency of showing training results on screen')
 8 |         self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console')
 9 |         self.parser.add_argument('--save_latest_freq', type=int, default=1000, help='frequency of saving the latest results')
10 |         self.parser.add_argument('--save_epoch_freq', type=int, default=1, help='frequency of saving checkpoints at the end of epochs')
11 |         self.parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model')        
12 |         self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc')
13 |         self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
14 |         self.parser.add_argument('--niter', type=int, default=10, help='# of iter at starting learning rate')
15 |         self.parser.add_argument('--niter_decay', type=int, default=10, help='# of iter to linearly decay learning rate to zero')
16 |         self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam')
17 |         self.parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam')
18 |         self.parser.add_argument('--TTUR', action='store_true', help='Use TTUR training scheme')        
19 |         self.parser.add_argument('--gan_mode', type=str, default='ls', help='(ls|original|hinge)')
20 |         self.parser.add_argument('--pool_size', type=int, default=1, help='the size of image buffer that stores previously generated images')
21 |         self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/')
22 |         
23 |         # for discriminators        
24 |         self.parser.add_argument('--num_D', type=int, default=2, help='number of patch scales in each discriminator')        
25 |         self.parser.add_argument('--n_layers_D', type=int, default=3, help='number of layers in discriminator')
26 |         self.parser.add_argument('--no_vgg', action='store_true', help='do not use VGG feature matching loss')        
27 |         self.parser.add_argument('--no_ganFeat', action='store_true', help='do not match discriminator features')        
28 |         self.parser.add_argument('--lambda_feat', type=float, default=10.0, help='weight for feature matching')        
29 |         self.parser.add_argument('--sparse_D', action='store_true', help='use sparse temporal discriminators to save memory')
30 | 
31 |         # for temporal
32 |         self.parser.add_argument('--lambda_T', type=float, default=10.0, help='weight for temporal loss')
33 |         self.parser.add_argument('--lambda_F', type=float, default=10.0, help='weight for flow loss')
34 |         self.parser.add_argument('--n_frames_D', type=int, default=3, help='number of frames to feed into temporal discriminator')        
35 |         self.parser.add_argument('--n_scales_temporal', type=int, default=2, help='number of temporal scales in the temporal discriminator')        
36 |         self.parser.add_argument('--max_frames_per_gpu', type=int, default=1, help='max number of frames to load into one GPU at a time')
37 |         self.parser.add_argument('--max_frames_backpropagate', type=int, default=1, help='max number of frames to backpropagate') 
38 |         self.parser.add_argument('--max_t_step', type=int, default=1, help='max spacing between neighboring sampled frames. If greater than 1, the network may randomly skip frames during training.')
39 |         self.parser.add_argument('--n_frames_total', type=int, default=30, help='the overall number of frames in a sequence to train with')                
40 |         self.parser.add_argument('--niter_step', type=int, default=5, help='how many epochs do we change training batch size again')
41 |         self.parser.add_argument('--niter_fix_global', type=int, default=0, help='if specified, only train the finest spatial layer for the given iterations')
42 | 
43 |         self.isTrain = True
44 | 


--------------------------------------------------------------------------------
/scripts/download_datasets.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from download_gdrive import *
 3 | 
 4 | file_id = '1rPcbnanuApZeo2uc7h55OneBkbcFCnnf'
 5 | chpt_path = './datasets/'
 6 | if not os.path.isdir(chpt_path):
 7 | 	os.makedirs(chpt_path)
 8 | destination = os.path.join(chpt_path, 'datasets.zip')
 9 | download_file_from_google_drive(file_id, destination) 
10 | unzip_file(destination, chpt_path)


--------------------------------------------------------------------------------
/scripts/download_flownet2.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from download_gdrive import *
 3 | import torch
 4 | 
 5 | """if torch.__version__ == '0.4.1':
 6 | 	file_id = '1gKwE1Ad41TwtAzwDcN3dYa_S6DcVyiSl'
 7 | 	file_name = 'flownet2_pytorch_041.zip'
 8 | else:
 9 | 	file_id = '1F2h_6e8gyTqxnbmFFW72zsxx_JX0dKFo'	
10 | 	file_name = 'flownet2_pytorch_040.zip'"""
11 | 
12 | chpt_path = './models/'
13 | if not os.path.isdir(chpt_path):
14 | 	os.makedirs(chpt_path)
15 | """destination = os.path.join(chpt_path, file_name)
16 | download_file_from_google_drive(file_id, destination) 
17 | unzip_file(destination, chpt_path)"""
18 | os.system('cd %s/flownet2_pytorch/; bash install.sh; cd ../../' % chpt_path)


--------------------------------------------------------------------------------
/scripts/download_gdrive.py:
--------------------------------------------------------------------------------
 1 | # Download code taken from Code taken from https://stackoverflow.com questions/25010369/wget-curl-large-file-from-google-drive/39225039#39225039
 2 | import requests, zipfile, os
 3 | def download_file_from_google_drive(id, destination):
 4 |     URL = "https://docs.google.com/uc?export=download"
 5 |     session = requests.Session()
 6 |     response = session.get(URL, params = { 'id' : id }, stream = True)
 7 |     token = get_confirm_token(response)
 8 |     if token:
 9 |         params = { 'id' : id, 'confirm' : token }
10 |         response = session.get(URL, params = params, stream = True)
11 |     save_response_content(response, destination)    
12 | def get_confirm_token(response):
13 |     for key, value in response.cookies.items():
14 |         if key.startswith('download_warning'):
15 |             return value
16 |     return None
17 | def save_response_content(response, destination):
18 |     CHUNK_SIZE = 32768
19 |     with open(destination, "wb") as f:
20 |         for chunk in response.iter_content(CHUNK_SIZE):
21 |             if chunk: # filter out keep-alive new chunks
22 |                 f.write(chunk)
23 | 
24 | def unzip_file(file_name, unzip_path):
25 |     zip_ref = zipfile.ZipFile(file_name, 'r')
26 |     zip_ref.extractall(unzip_path)
27 |     zip_ref.close()
28 |     os.remove(file_name)


--------------------------------------------------------------------------------
/scripts/download_models_flownet2.py:
--------------------------------------------------------------------------------
1 | import os
2 | from download_gdrive import *
3 | 
4 | file_id = '1E8re-b6csNuo-abg1vJKCDjCzlIam50F'
5 | chpt_path = './models/flownet2_pytorch/'
6 | destination = os.path.join(chpt_path, 'FlowNet2_checkpoint.pth.tar')
7 | download_file_from_google_drive(file_id, destination) 


--------------------------------------------------------------------------------
/scripts/face/download_gdrive.py:
--------------------------------------------------------------------------------
 1 | # Download code taken from Code taken from https://stackoverflow.com questions/25010369/wget-curl-large-file-from-google-drive/39225039#39225039
 2 | import requests, zipfile, os
 3 | def download_file_from_google_drive(id, destination):
 4 |     URL = "https://docs.google.com/uc?export=download"
 5 |     session = requests.Session()
 6 |     response = session.get(URL, params = { 'id' : id }, stream = True)
 7 |     token = get_confirm_token(response)
 8 |     if token:
 9 |         params = { 'id' : id, 'confirm' : token }
10 |         response = session.get(URL, params = params, stream = True)
11 |     save_response_content(response, destination)    
12 | def get_confirm_token(response):
13 |     for key, value in response.cookies.items():
14 |         if key.startswith('download_warning'):
15 |             return value
16 |     return None
17 | def save_response_content(response, destination):
18 |     CHUNK_SIZE = 32768
19 |     with open(destination, "wb") as f:
20 |         for chunk in response.iter_content(CHUNK_SIZE):
21 |             if chunk: # filter out keep-alive new chunks
22 |                 f.write(chunk)
23 | 
24 | def unzip_file(file_name, unzip_path):
25 |     zip_ref = zipfile.ZipFile(file_name, 'r')
26 |     zip_ref.extractall(unzip_path)
27 |     zip_ref.close()
28 |     os.remove(file_name)


--------------------------------------------------------------------------------
/scripts/face/download_models.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from download_gdrive import *
 3 | 
 4 | file_id = '10LvNw-2lrh-6sPGkWbQDfHspkqz5AKxb'
 5 | chpt_path = './checkpoints/'
 6 | if not os.path.isdir(chpt_path):
 7 | 	os.makedirs(chpt_path)
 8 | destination = os.path.join(chpt_path, 'models_face.zip')
 9 | download_file_from_google_drive(file_id, destination) 
10 | unzip_file(destination, chpt_path)


--------------------------------------------------------------------------------
/scripts/face/test_512.sh:
--------------------------------------------------------------------------------
1 | python test.py --name edge2face_512 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --input_nc 15 --loadSize 512 --use_single_G


--------------------------------------------------------------------------------
/scripts/face/test_g1_256.sh:
--------------------------------------------------------------------------------
1 | python test.py --name edge2face_256_g1 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --input_nc 15 --loadSize 256 --ngf 64 --use_single_G
4 | 


--------------------------------------------------------------------------------
/scripts/face/test_g1_512.sh:
--------------------------------------------------------------------------------
1 | python test.py --name edge2face_512_g1 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --n_scales_spatial 2 --input_nc 15 --loadSize 512 --ngf 64 \
4 | --use_single_G
5 | 


--------------------------------------------------------------------------------
/scripts/face/train_512.sh:
--------------------------------------------------------------------------------
1 | python train.py --name edge2face_512 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --input_nc 15 --loadSize 512 --num_D 3 \
4 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 6 \
5 | --niter 20 --niter_decay 20 --n_frames_total 12


--------------------------------------------------------------------------------
/scripts/face/train_512_bs7.sh:
--------------------------------------------------------------------------------
1 | python train.py --name edge2face_512 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --input_nc 15 --loadSize 512 --num_D 3 \
4 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 8 --batchSize 7 \
5 | --niter 20 --niter_decay 20 --n_frames_total 12


--------------------------------------------------------------------------------
/scripts/face/train_g1_256.sh:
--------------------------------------------------------------------------------
1 | python train.py --name edge2face_256_g1 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --input_nc 15 --loadSize 256 --ngf 64 \
4 | --max_frames_per_gpu 6 --n_frames_total 12 \
5 | --niter 20 --niter_decay 20
6 | 


--------------------------------------------------------------------------------
/scripts/face/train_g1_512.sh:
--------------------------------------------------------------------------------
1 | python train.py --name edge2face_512_g1 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --n_scales_spatial 2 --num_D 3 \
4 | --input_nc 15 --loadSize 512 --ngf 64 \
5 | --n_frames_total 6 --niter_step 2 --niter_fix_global 5 \
6 | --load_pretrain checkpoints/edge2face_256_g1
7 | 


--------------------------------------------------------------------------------
/scripts/pose/test_1024p.sh:
--------------------------------------------------------------------------------
1 | python test.py --name pose2body_1024p \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 3 \
4 | --resize_or_crop scaleHeight --loadSize 1024 --no_first_img


--------------------------------------------------------------------------------
/scripts/pose/test_256p.sh:
--------------------------------------------------------------------------------
1 | python test.py --name pose2body_256p \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --resize_or_crop scaleHeight --loadSize 256 --no_first_img


--------------------------------------------------------------------------------
/scripts/pose/test_512p.sh:
--------------------------------------------------------------------------------
1 | python test.py --name pose2body_512p \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 2 \
4 | --resize_or_crop scaleHeight --loadSize 512 --no_first_img


--------------------------------------------------------------------------------
/scripts/pose/test_g1_1024p.sh:
--------------------------------------------------------------------------------
1 | python test.py --name pose2body_1024p_g1 \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 3 --ngf 64 \
4 | --resize_or_crop scaleHeight --loadSize 1024 --no_first_img
5 | 


--------------------------------------------------------------------------------
/scripts/pose/test_g1_256p.sh:
--------------------------------------------------------------------------------
1 | python test.py --name pose2body_256p_g1 \
2 | --dataroot datasets/pose --dataset_mode pose --ngf 64 \
3 | --input_nc 6 --resize_or_crop scaleHeight --loadSize 256 --no_first_img
4 | 


--------------------------------------------------------------------------------
/scripts/pose/test_g1_512p.sh:
--------------------------------------------------------------------------------
1 | python test.py --name pose2body_512p_g1 \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 2 --ngf 64 \
4 | --resize_or_crop scaleHeight --loadSize 512 --no_first_img
5 | 


--------------------------------------------------------------------------------
/scripts/pose/train_1024p.sh:
--------------------------------------------------------------------------------
1 | python train.py --name pose2body_1024p \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 3 --num_D 4 \
4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 1536 --fineSize 1024 \
5 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 4 \
6 | --no_first_img --n_frames_total 12 --max_t_step 4 --add_face_disc \
7 | --niter_fix_global 3 --niter 5 --niter_decay 5 \
8 | --lr 0.00005 --load_pretrain checkpoints/pose2body_512p


--------------------------------------------------------------------------------
/scripts/pose/train_256p.sh:
--------------------------------------------------------------------------------
1 | python train.py --name pose2body_256p \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --num_D 2 \
4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 384 --fineSize 256 \
5 | --gpu_ids 0,1,2,3,4,5,6,7 --batchSize 8 --max_frames_per_gpu 3 \
6 | --niter 5 --niter_decay 5 \
7 | --no_first_img --n_frames_total 12 --max_t_step 4


--------------------------------------------------------------------------------
/scripts/pose/train_512p.sh:
--------------------------------------------------------------------------------
1 | python train.py --name pose2body_512p \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 2 --num_D 3 \
4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 768 --fineSize 512 \
5 | --gpu_ids 0,1,2,3,4,5,6,7 --batchSize 8 \
6 | --no_first_img --n_frames_total 12 --max_t_step 4 --add_face_disc \
7 | --niter_fix_global 3 --niter 5 --niter_decay 5 \
8 | --lr 0.0001 --load_pretrain checkpoints/pose2body_256p


--------------------------------------------------------------------------------
/scripts/pose/train_g1_1024p.sh:
--------------------------------------------------------------------------------
1 | python train.py --name pose2body_1024p_g1 \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 3 --num_D 4 --ngf 64 --ndf 32 \
4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 1536 --fineSize 1024 \
5 | --no_first_img --n_frames_total 12 --max_t_step 4 --add_face_disc \
6 | --niter_fix_global 3 --niter 5 --niter_decay 5 \
7 | --lr 0.00005 --load_pretrain checkpoints/pose2body_512p_g1
8 | 


--------------------------------------------------------------------------------
/scripts/pose/train_g1_256p.sh:
--------------------------------------------------------------------------------
1 | python train.py --name pose2body_256p_g1 \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --ngf 64 --num_D 2 \
4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 384 --fineSize 256 \
5 | --niter 5 --niter_decay 5 \
6 | --no_first_img --n_frames_total 12 --max_frames_per_gpu 4 --max_t_step 4
7 | 


--------------------------------------------------------------------------------
/scripts/pose/train_g1_512p.sh:
--------------------------------------------------------------------------------
1 | python train.py --name pose2body_512p_g1 \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 2 --ngf 64 --num_D 3 \
4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 768 --fineSize 512 \
5 | --no_first_img --n_frames_total 12 --max_frames_per_gpu 2 --max_t_step 4 --add_face_disc \
6 | --niter_fix_global 3 --niter 5 --niter_decay 5 \
7 | --lr 0.0001 --load_pretrain checkpoints/pose2body_256p_g1
8 | 


--------------------------------------------------------------------------------
/scripts/street/download_gdrive.py:
--------------------------------------------------------------------------------
 1 | # Download code taken from Code taken from https://stackoverflow.com questions/25010369/wget-curl-large-file-from-google-drive/39225039#39225039
 2 | import requests, zipfile, os
 3 | def download_file_from_google_drive(id, destination):
 4 |     URL = "https://docs.google.com/uc?export=download"
 5 |     session = requests.Session()
 6 |     response = session.get(URL, params = { 'id' : id }, stream = True)
 7 |     token = get_confirm_token(response)
 8 |     if token:
 9 |         params = { 'id' : id, 'confirm' : token }
10 |         response = session.get(URL, params = params, stream = True)
11 |     save_response_content(response, destination)    
12 | def get_confirm_token(response):
13 |     for key, value in response.cookies.items():
14 |         if key.startswith('download_warning'):
15 |             return value
16 |     return None
17 | def save_response_content(response, destination):
18 |     CHUNK_SIZE = 32768
19 |     with open(destination, "wb") as f:
20 |         for chunk in response.iter_content(CHUNK_SIZE):
21 |             if chunk: # filter out keep-alive new chunks
22 |                 f.write(chunk)
23 | 
24 | def unzip_file(file_name, unzip_path):
25 |     zip_ref = zipfile.ZipFile(file_name, 'r')
26 |     zip_ref.extractall(unzip_path)
27 |     zip_ref.close()
28 |     os.remove(file_name)


--------------------------------------------------------------------------------
/scripts/street/download_models.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from download_gdrive import *
 3 | 
 4 | file_id = '1MKtImgtnGC28EPU7Nh9DfFpHW6okNVkl'
 5 | chpt_path = './checkpoints/'
 6 | if not os.path.isdir(chpt_path):
 7 | 	os.makedirs(chpt_path)
 8 | destination = os.path.join(chpt_path, 'models.zip')
 9 | download_file_from_google_drive(file_id, destination) 
10 | unzip_file(destination, chpt_path)


--------------------------------------------------------------------------------
/scripts/street/download_models_g1.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from download_gdrive import *
 3 | 
 4 | file_id = '1QoE1p3QikxNVbbTBWWRDtIspg-RcLE8y'
 5 | chpt_path = './checkpoints/'
 6 | if not os.path.isdir(chpt_path):
 7 | 	os.makedirs(chpt_path)
 8 | destination = os.path.join(chpt_path, 'models_g1.zip')
 9 | download_file_from_google_drive(file_id, destination) 
10 | unzip_file(destination, chpt_path)
11 | 


--------------------------------------------------------------------------------
/scripts/street/test_2048.sh:
--------------------------------------------------------------------------------
1 | python test.py --name label2city_2048 --label_nc 35 --loadSize 2048 --n_scales_spatial 3 --use_instance --fg --use_single_G
2 | 


--------------------------------------------------------------------------------
/scripts/street/test_g1_1024.sh:
--------------------------------------------------------------------------------
1 | python test.py --name label2city_1024_g1 --label_nc 35 --loadSize 1024 --n_scales_spatial 3 --use_instance --fg --n_downsample_G 2 --use_single_G
2 | 


--------------------------------------------------------------------------------
/scripts/street/train_1024.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_1024 \
2 | --label_nc 35 --loadSize 1024 --n_scales_spatial 2 --num_D 3 --use_instance --fg \
3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 4 \
4 | --n_frames_total 4 --niter_step 2 \
5 | --niter_fix_global 10 --load_pretrain checkpoints/label2city_512 --lr 0.0001
6 | 


--------------------------------------------------------------------------------
/scripts/street/train_2048.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_2048 \
2 | --label_nc 35 --loadSize 2048 --n_scales_spatial 3 --num_D 4 --use_instance --fg \
3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 4 \
4 | --n_frames_total 4 --niter_step 1 \
5 | --niter 5 --niter_decay 5 \
6 | --niter_fix_global 5 --load_pretrain checkpoints/label2city_1024 --lr 0.00005


--------------------------------------------------------------------------------
/scripts/street/train_2048_crop.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_2048_crop \
2 | --label_nc 35 --loadSize 2048 --fineSize 1024 --resize_or_crop crop \
3 | --n_scales_spatial 3 --num_D 4 --use_instance --fg \
4 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 4 \
5 | --n_frames_total 4 --niter_step 1 \
6 | --niter 5 --niter_decay 5 \
7 | --niter_fix_global 5 --load_pretrain checkpoints/label2city_1024 --lr 0.00005
8 | 


--------------------------------------------------------------------------------
/scripts/street/train_512.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_512 \
2 | --label_nc 35 --loadSize 512 --use_instance --fg \
3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 6 \
4 | --n_frames_total 6 --max_frames_per_gpu 2
5 | 


--------------------------------------------------------------------------------
/scripts/street/train_512_bs.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_512_bs \
2 | --label_nc 35 --loadSize 512 --use_instance --fg \
3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen -1 \
4 | --n_frames_total 6 --batchSize 15
5 | 


--------------------------------------------------------------------------------
/scripts/street/train_512_no_fg.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_512_no_fg \
2 | --label_nc 35 --loadSize 512 --use_instance \
3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 6 \
4 | --n_frames_total 6 --max_frames_per_gpu 2 
5 | 


--------------------------------------------------------------------------------
/scripts/street/train_g1_1024.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_1024_g1 \
2 | --label_nc 35 --loadSize 896 --n_scales_spatial 3 --n_frames_D 2 \
3 | --use_instance --fg --n_downsample_G 2 --num_D 3 \
4 | --max_frames_per_gpu 1 --n_frames_total 4 \
5 | --niter_step 2 --niter_fix_global 8 --niter_decay 5 \
6 | --load_pretrain checkpoints/label2city_512_g1 --lr 0.0001
7 | 


--------------------------------------------------------------------------------
/scripts/street/train_g1_256.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_256 \
2 | --label_nc 35 --loadSize 256 --use_instance --fg \
3 | --n_downsample_G 2 --num_D 1 \
4 | --max_frames_per_gpu 6 --n_frames_total 6
5 | 


--------------------------------------------------------------------------------
/scripts/street/train_g1_512.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_512_g1 \
2 | --label_nc 35 --loadSize 512 --n_scales_spatial 2  \
3 | --use_instance --fg --n_downsample_G 2 \
4 | --max_frames_per_gpu 2 --n_frames_total 4 \
5 | --niter_step 2 --niter_fix_global 8 --niter_decay 5 \
6 | --load_pretrain checkpoints/label2city_256_g1
7 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 
 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
 3 | import time
 4 | import os
 5 | import numpy as np
 6 | from collections import OrderedDict
 7 | from torch.autograd import Variable
 8 | from options.test_options import TestOptions
 9 | from data.data_loader import CreateDataLoader
10 | from models.models import create_model
11 | import util.util as util
12 | from util.visualizer import Visualizer
13 | from util import html
14 | 
15 | opt = TestOptions().parse(save=False)
16 | opt.nThreads = 1   # test code only supports nThreads = 1
17 | opt.batchSize = 1  # test code only supports batchSize = 1
18 | opt.serial_batches = True  # no shuffle
19 | opt.no_flip = True  # no flip
20 | if opt.dataset_mode == 'temporal':
21 |     opt.dataset_mode = 'test'
22 | 
23 | data_loader = CreateDataLoader(opt)
24 | dataset = data_loader.load_data()
25 | model = create_model(opt)
26 | visualizer = Visualizer(opt)
27 | input_nc = 1 if opt.label_nc != 0 else opt.input_nc
28 | 
29 | save_dir = os.path.join(opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.which_epoch))
30 | print('Doing %d frames' % len(dataset))
31 | for i, data in enumerate(dataset):
32 |     if i >= opt.how_many:
33 |         break    
34 |     if data['change_seq']:
35 |         model.fake_B_prev = None
36 | 
37 |     _, _, height, width = data['A'].size()
38 |     A = Variable(data['A']).view(1, -1, input_nc, height, width)
39 |     B = Variable(data['B']).view(1, -1, opt.output_nc, height, width) if len(data['B'].size()) > 2 else None
40 |     inst = Variable(data['inst']).view(1, -1, 1, height, width) if len(data['inst'].size()) > 2 else None
41 |     generated = model.inference(A, B, inst)
42 |     
43 |     if opt.label_nc != 0:
44 |         real_A = util.tensor2label(generated[1], opt.label_nc)
45 |     else:
46 |         c = 3 if opt.input_nc == 3 else 1
47 |         real_A = util.tensor2im(generated[1][:c], normalize=False)    
48 |         
49 |     visual_list = [('real_A', real_A), 
50 |                    ('fake_B', util.tensor2im(generated[0].data[0]))]
51 |     visuals = OrderedDict(visual_list) 
52 |     img_path = data['A_path']
53 |     print('process image... %s' % img_path)
54 |     visualizer.save_images(save_dir, visuals, img_path)


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 
  2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
  3 | import time
  4 | import os
  5 | import torch
  6 | from subprocess import call
  7 | 
  8 | from options.train_options import TrainOptions
  9 | from data.data_loader import CreateDataLoader
 10 | from models.models import create_model, create_optimizer, init_params, save_models, update_models
 11 | import util.util as util
 12 | from util.visualizer import Visualizer
 13 | 
 14 | def train():
 15 |     opt = TrainOptions().parse()
 16 |     if opt.debug:
 17 |         opt.display_freq = 1
 18 |         opt.print_freq = 1    
 19 |         opt.nThreads = 1
 20 | 
 21 |     ### initialize dataset
 22 |     data_loader = CreateDataLoader(opt)
 23 |     dataset = data_loader.load_data()
 24 |     dataset_size = len(data_loader)    
 25 |     print('#training videos = %d' % dataset_size)
 26 | 
 27 |     ### initialize models
 28 |     models = create_model(opt)
 29 |     modelG, modelD, flowNet, optimizer_G, optimizer_D, optimizer_D_T = create_optimizer(opt, models)
 30 | 
 31 |     ### set parameters    
 32 |     n_gpus, tG, tD, tDB, s_scales, t_scales, input_nc, output_nc, \
 33 |         start_epoch, epoch_iter, print_freq, total_steps, iter_path = init_params(opt, modelG, modelD, data_loader)
 34 |     visualizer = Visualizer(opt)    
 35 | 
 36 |     ### real training starts here  
 37 |     for epoch in range(start_epoch, opt.niter + opt.niter_decay + 1):
 38 |         epoch_start_time = time.time()    
 39 |         for idx, data in enumerate(dataset, start=epoch_iter):        
 40 |             if total_steps % print_freq == 0:
 41 |                 iter_start_time = time.time()
 42 |             total_steps += opt.batchSize
 43 |             epoch_iter += opt.batchSize
 44 | 
 45 |             # whether to collect output images
 46 |             save_fake = total_steps % opt.display_freq == 0
 47 |             n_frames_total, n_frames_load, t_len = data_loader.dataset.init_data_params(data, n_gpus, tG)
 48 |             fake_B_prev_last, frames_all = data_loader.dataset.init_data(t_scales)
 49 | 
 50 |             for i in range(0, n_frames_total, n_frames_load):
 51 |                 input_A, input_B, inst_A = data_loader.dataset.prepare_data(data, i, input_nc, output_nc)
 52 |                 
 53 |                 ###################################### Forward Pass ##########################
 54 |                 ####### generator                  
 55 |                 fake_B, fake_B_raw, flow, weight, real_A, real_Bp, fake_B_last = modelG(input_A, input_B, inst_A, fake_B_prev_last)
 56 | 
 57 |                 ####### discriminator            
 58 |                 ### individual frame discriminator          
 59 |                 real_B_prev, real_B = real_Bp[:, :-1], real_Bp[:, 1:]   # the collection of previous and current real frames
 60 |                 flow_ref, conf_ref = flowNet(real_B, real_B_prev)       # reference flows and confidences                
 61 |                 fake_B_prev = modelG.module.compute_fake_B_prev(real_B_prev, fake_B_prev_last, fake_B)
 62 |                 fake_B_prev_last = fake_B_last
 63 |                
 64 |                 losses = modelD(0, reshape([real_B, fake_B, fake_B_raw, real_A, real_B_prev, fake_B_prev, flow, weight, flow_ref, conf_ref]))
 65 |                 losses = [ torch.mean(x) if x is not None else 0 for x in losses ]
 66 |                 loss_dict = dict(zip(modelD.module.loss_names, losses))
 67 | 
 68 |                 ### temporal discriminator                
 69 |                 # get skipped frames for each temporal scale
 70 |                 frames_all, frames_skipped = modelD.module.get_all_skipped_frames(frames_all, \
 71 |                         real_B, fake_B, flow_ref, conf_ref, t_scales, tD, n_frames_load, i, flowNet)                                
 72 | 
 73 |                 # run discriminator for each temporal scale
 74 |                 loss_dict_T = []
 75 |                 for s in range(t_scales):                
 76 |                     if frames_skipped[0][s] is not None:                        
 77 |                         losses = modelD(s+1, [frame_skipped[s] for frame_skipped in frames_skipped])
 78 |                         losses = [ torch.mean(x) if not isinstance(x, int) else x for x in losses ]
 79 |                         loss_dict_T.append(dict(zip(modelD.module.loss_names_T, losses)))
 80 | 
 81 |                 # collect losses
 82 |                 loss_G, loss_D, loss_D_T, t_scales_act = modelD.module.get_losses(loss_dict, loss_dict_T, t_scales)
 83 | 
 84 |                 ###################################### Backward Pass #################################                 
 85 |                 # update generator weights     
 86 |                 loss_backward(opt, loss_G, optimizer_G)                
 87 | 
 88 |                 # update individual discriminator weights                
 89 |                 loss_backward(opt, loss_D, optimizer_D)
 90 | 
 91 |                 # update temporal discriminator weights
 92 |                 for s in range(t_scales_act):                    
 93 |                     loss_backward(opt, loss_D_T[s], optimizer_D_T[s])
 94 | 
 95 |                 if i == 0: fake_B_first = fake_B[0, 0]   # the first generated image in this sequence
 96 | 
 97 |             if opt.debug:
 98 |                 call(["nvidia-smi", "--format=csv", "--query-gpu=memory.used,memory.free"]) 
 99 | 
100 |             ############## Display results and errors ##########
101 |             ### print out errors
102 |             if total_steps % print_freq == 0:
103 |                 t = (time.time() - iter_start_time) / print_freq
104 |                 errors = {k: v.data.item() if not isinstance(v, int) else v for k, v in loss_dict.items()}
105 |                 for s in range(len(loss_dict_T)):
106 |                     errors.update({k+str(s): v.data.item() if not isinstance(v, int) else v for k, v in loss_dict_T[s].items()})            
107 |                 visualizer.print_current_errors(epoch, epoch_iter, errors, t)
108 |                 visualizer.plot_current_errors(errors, total_steps)
109 | 
110 |             ### display output images
111 |             if save_fake:                
112 |                 visuals = util.save_all_tensors(opt, real_A, fake_B, fake_B_first, fake_B_raw, real_B, flow_ref, conf_ref, flow, weight, modelD)                
113 |                 visualizer.display_current_results(visuals, epoch, total_steps)
114 | 
115 |             ### save latest model
116 |             save_models(opt, epoch, epoch_iter, total_steps, visualizer, iter_path, modelG, modelD)            
117 |             if epoch_iter > dataset_size - opt.batchSize:
118 |                 epoch_iter = 0
119 |                 break
120 |            
121 |         # end of epoch 
122 |         iter_end_time = time.time()
123 |         visualizer.vis_print('End of epoch %d / %d \t Time Taken: %d sec' %
124 |               (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time))
125 | 
126 |         ### save model for this epoch and update model params
127 |         save_models(opt, epoch, epoch_iter, total_steps, visualizer, iter_path, modelG, modelD, end_of_epoch=True)
128 |         update_models(opt, epoch, modelG, modelD, data_loader) 
129 | 
130 | def loss_backward(opt, loss, optimizer):
131 |     optimizer.zero_grad()                
132 |     if opt.fp16:
133 |         from apex import amp
134 |         with amp.scale_loss(loss, optimizer) as scaled_loss: 
135 |             scaled_loss.backward()
136 |     else:
137 |         loss.backward()
138 |     optimizer.step()
139 | 
140 | def reshape(tensors):
141 |     if tensors is None: return None
142 |     if isinstance(tensors, list):
143 |         return [reshape(tensor) for tensor in tensors]    
144 |     _, _, ch, h, w = tensors.size()
145 |     return tensors.contiguous().view(-1, ch, h, w)
146 | 
147 | if __name__ == "__main__":
148 |    train()


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/util/__init__.py


--------------------------------------------------------------------------------
/util/html.py:
--------------------------------------------------------------------------------
 1 | import dominate
 2 | from dominate.tags import *
 3 | import os
 4 | 
 5 | 
 6 | class HTML:
 7 |     def __init__(self, web_dir, title, reflesh=0):
 8 |         self.title = title
 9 |         self.web_dir = web_dir
10 |         self.img_dir = os.path.join(self.web_dir, 'images')
11 |         if not os.path.exists(self.web_dir):
12 |             os.makedirs(self.web_dir)
13 |         if not os.path.exists(self.img_dir):
14 |             os.makedirs(self.img_dir)
15 |         # print(self.img_dir)
16 | 
17 |         self.doc = dominate.document(title=title)
18 |         if reflesh > 0:
19 |             with self.doc.head:
20 |                 meta(http_equiv="reflesh", content=str(reflesh))
21 | 
22 |     def get_image_dir(self):
23 |         return self.img_dir
24 | 
25 |     def add_header(self, str):
26 |         with self.doc:
27 |             h3(str)
28 | 
29 |     def add_table(self, border=1):
30 |         self.t = table(border=border, style="table-layout: fixed;")
31 |         self.doc.add(self.t)
32 | 
33 |     def add_images(self, ims, txts, links, width=400, height=0):
34 |         self.add_table()
35 |         with self.t:
36 |             with tr():
37 |                 for im, txt, link in zip(ims, txts, links):
38 |                     with td(style="word-wrap: break-word;", halign="center", valign="top"):
39 |                         with p():
40 |                             with a(href=os.path.join('images', link)):
41 |                                 if height != 0:
42 |                                     img(style="width:%dpx;height:%dpx" % (width, height), src=os.path.join('images', im))
43 |                                 else:
44 |                                     img(style="width:%dpx" % (width), src=os.path.join('images', im))
45 |                             br()
46 |                             p(txt)
47 | 
48 |     def save(self):
49 |         html_file = '%s/index.html' % self.web_dir
50 |         f = open(html_file, 'wt')
51 |         f.write(self.doc.render())
52 |         f.close()
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     html = HTML('web/', 'test_html')
57 |     html.add_header('hello world')
58 | 
59 |     ims = []
60 |     txts = []
61 |     links = []
62 |     for n in range(4):
63 |         ims.append('image_%d.jpg' % n)
64 |         txts.append('text_%d' % n)
65 |         links.append('image_%d.jpg' % n)
66 |     html.add_images(ims, txts, links)
67 |     html.save()
68 | 


--------------------------------------------------------------------------------
/util/image_pool.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | import torch
 4 | from torch.autograd import Variable
 5 | class ImagePool():
 6 |     def __init__(self, pool_size):
 7 |         self.pool_size = pool_size
 8 |         if self.pool_size > 0:
 9 |             self.num_imgs = 0
10 |             self.images = []
11 | 
12 |     def query(self, images):
13 |         if self.pool_size == 0:
14 |             return images
15 |         return_images = []
16 |         for image in images.data:
17 |             image = torch.unsqueeze(image, 0)
18 |             if self.num_imgs < self.pool_size:
19 |                 self.num_imgs = self.num_imgs + 1
20 |                 self.images.append(image)
21 |                 return_images.append(image)
22 |             else:
23 |                 p = random.uniform(0, 1)
24 |                 if p > 0.5:
25 |                     random_id = random.randint(0, self.pool_size-1)
26 |                     tmp = self.images[random_id].clone()
27 |                     self.images[random_id] = image
28 |                     return_images.append(tmp)
29 |                 else:
30 |                     return_images.append(image)
31 |         return_images = Variable(torch.cat(return_images, 0))
32 |         return return_images
33 | 


--------------------------------------------------------------------------------
/util/util.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import numpy as np
  4 | from PIL import Image
  5 | import inspect, re
  6 | import numpy as np
  7 | import os
  8 | import collections
  9 | from PIL import Image
 10 | import cv2
 11 | from collections import OrderedDict
 12 | 
 13 | def save_all_tensors(opt, real_A, fake_B, fake_B_first, fake_B_raw, real_B, flow_ref, conf_ref, flow, weight, modelD):
 14 |     if opt.label_nc != 0:
 15 |         input_image = tensor2label(real_A, opt.label_nc)
 16 |     elif opt.dataset_mode == 'pose':
 17 |         input_image = tensor2im(real_A)
 18 |         if real_A.size()[2] == 6:
 19 |             input_image2 = tensor2im(real_A[0, -1, 3:])
 20 |             input_image[input_image2 != 0] = input_image2[input_image2 != 0]
 21 |     else:
 22 |         c = 3 if opt.input_nc >= 3 else 1
 23 |         input_image = tensor2im(real_A[0, -1, :c], normalize=False)
 24 |     if opt.use_instance:
 25 |         edges = tensor2im(real_A[0, -1, -1:], normalize=False)
 26 |         input_image += edges[:,:,np.newaxis]
 27 |     
 28 |     if opt.add_face_disc:
 29 |         ys, ye, xs, xe = modelD.module.get_face_region(real_A[0, -1:])
 30 |         if ys is not None:
 31 |             input_image[ys, xs:xe, :] = input_image[ye, xs:xe, :] = input_image[ys:ye, xs, :] = input_image[ys:ye, xe, :] = 255 
 32 | 
 33 |     visual_list = [('input_image', input_image),
 34 |                    ('fake_image', tensor2im(fake_B)),
 35 |                    ('fake_first_image', tensor2im(fake_B_first)),
 36 |                    ('fake_raw_image', tensor2im(fake_B_raw)),
 37 |                    ('real_image', tensor2im(real_B)),                                                          
 38 |                    ('flow_ref', tensor2flow(flow_ref)),
 39 |                    ('conf_ref', tensor2im(conf_ref, normalize=False))]
 40 |     if flow is not None:
 41 |         visual_list += [('flow', tensor2flow(flow)),
 42 |                         ('weight', tensor2im(weight, normalize=False))]
 43 |     visuals = OrderedDict(visual_list)
 44 |     return visuals
 45 | 
 46 | # Converts a Tensor into a Numpy array
 47 | # |imtype|: the desired type of the converted numpy array
 48 | def tensor2im(image_tensor, imtype=np.uint8, normalize=True):
 49 |     if isinstance(image_tensor, list):
 50 |         image_numpy = []
 51 |         for i in range(len(image_tensor)):
 52 |             image_numpy.append(tensor2im(image_tensor[i], imtype, normalize))
 53 |         return image_numpy
 54 | 
 55 |     if isinstance(image_tensor, torch.autograd.Variable):
 56 |         image_tensor = image_tensor.data
 57 |     if len(image_tensor.size()) == 5:
 58 |         image_tensor = image_tensor[0, -1]
 59 |     if len(image_tensor.size()) == 4:
 60 |         image_tensor = image_tensor[0]
 61 |     image_tensor = image_tensor[:3]
 62 |     image_numpy = image_tensor.cpu().float().numpy()
 63 |     if normalize:
 64 |         image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
 65 |     else:
 66 |         image_numpy = np.transpose(image_numpy, (1, 2, 0)) * 255.0
 67 |     #image_numpy = (np.transpose(image_numpy, (1, 2, 0)) * std + mean)  * 255.0        
 68 |     image_numpy = np.clip(image_numpy, 0, 255)
 69 |     if image_numpy.shape[2] == 1:        
 70 |         image_numpy = image_numpy[:,:,0]
 71 |     return image_numpy.astype(imtype)
 72 | 
 73 | def tensor2label(output, n_label, imtype=np.uint8):
 74 |     if isinstance(output, torch.autograd.Variable):
 75 |         output = output.data
 76 |     if len(output.size()) == 5:
 77 |         output = output[0, -1]
 78 |     if len(output.size()) == 4:
 79 |         output = output[0]
 80 |     output = output.cpu().float()    
 81 |     if output.size()[0] > 1:
 82 |         output = output.max(0, keepdim=True)[1]
 83 |     #print(output.size())
 84 |     output = Colorize(n_label)(output)
 85 |     output = np.transpose(output.numpy(), (1, 2, 0))
 86 |     #img = Image.fromarray(output, "RGB")
 87 |     return output.astype(imtype)
 88 | 
 89 | def tensor2flow(output, imtype=np.uint8):
 90 |     if isinstance(output, torch.autograd.Variable):
 91 |         output = output.data
 92 |     if len(output.size()) == 5:
 93 |         output = output[0, -1]
 94 |     if len(output.size()) == 4:
 95 |         output = output[0]
 96 |     output = output.cpu().float().numpy()
 97 |     output = np.transpose(output, (1, 2, 0))
 98 |     #mag = np.max(np.sqrt(output[:,:,0]**2 + output[:,:,1]**2)) 
 99 |     #print(mag)
100 |     hsv = np.zeros((output.shape[0], output.shape[1], 3), dtype=np.uint8)
101 |     hsv[:, :, 0] = 255
102 |     hsv[:, :, 1] = 255
103 |     mag, ang = cv2.cartToPolar(output[..., 0], output[..., 1])
104 |     hsv[..., 0] = ang * 180 / np.pi / 2
105 |     hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
106 |     rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
107 |     return rgb
108 | 
109 | def add_dummy_to_tensor(tensors, add_size=0):
110 |     if add_size == 0 or tensors is None: return tensors
111 |     if isinstance(tensors, list):
112 |         return [add_dummy_to_tensor(tensor, add_size) for tensor in tensors]    
113 |     
114 |     if isinstance(tensors, torch.Tensor):
115 |         dummy = torch.zeros_like(tensors)[:add_size]
116 |         tensors = torch.cat([dummy, tensors])
117 |     return tensors
118 | 
119 | def remove_dummy_from_tensor(tensors, remove_size=0):
120 |     if remove_size == 0 or tensors is None: return tensors
121 |     if isinstance(tensors, list):
122 |         return [remove_dummy_from_tensor(tensor, remove_size) for tensor in tensors]    
123 |     
124 |     if isinstance(tensors, torch.Tensor):
125 |         tensors = tensors[remove_size:]
126 |     return tensors
127 | 
128 | def save_image(image_numpy, image_path):
129 |     image_pil = Image.fromarray(image_numpy)
130 |     image_pil.save(image_path)
131 | 
132 | def print_numpy(x, val=True, shp=False):
133 |     x = x.astype(np.float64)
134 |     if shp:
135 |         print('shape,', x.shape)
136 |     if val:
137 |         x = x.flatten()
138 |         print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
139 |             np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
140 | 
141 | def mkdirs(paths):
142 |     if isinstance(paths, list) and not isinstance(paths, str):
143 |         for path in paths:
144 |             mkdir(path)
145 |     else:
146 |         mkdir(paths)
147 | 
148 | def mkdir(path):
149 |     if not os.path.exists(path):
150 |         os.makedirs(path)
151 | 
152 | def uint82bin(n, count=8):
153 |     """returns the binary of integer n, count refers to amount of bits"""
154 |     return ''.join([str((n >> y) & 1) for y in range(count-1, -1, -1)])
155 | 
156 | def labelcolormap(N):
157 |     if N == 35: # Cityscapes train
158 |         cmap = np.array([(  0,  0,  0), (  0,  0,  0), (  0,  0,  0), (  0,  0,  0), (  0,  0,  0), (111, 74,  0), ( 81,  0, 81),
159 |                      (128, 64,128), (244, 35,232), (250,170,160), (230,150,140), ( 70, 70, 70), (102,102,156), (190,153,153),
160 |                      (180,165,180), (150,100,100), (150,120, 90), (153,153,153), (153,153,153), (250,170, 30), (220,220,  0),
161 |                      (107,142, 35), (152,251,152), ( 70,130,180), (220, 20, 60), (255,  0,  0), (  0,  0,142), (  0,  0, 70),
162 |                      (  0, 60,100), (  0,  0, 90), (  0,  0,110), (  0, 80,100), (  0,  0,230), (119, 11, 32), (  0,  0,142)], 
163 |                      dtype=np.uint8)
164 |     elif N == 20: # Cityscapes eval
165 |         cmap = np.array([(128, 64,128), (244, 35,232), ( 70, 70, 70), (102,102,156), (190,153,153), (153,153,153), (250,170, 30), 
166 |                          (220,220,  0), (107,142, 35), (152,251,152), ( 70,130,180), (220, 20, 60), (255,  0,  0), (  0,  0,142), 
167 |                          (  0,  0, 70), (  0, 60,100), (  0, 80,100), (  0,  0,230), (119, 11, 32), (  0,  0,  0)], 
168 |                          dtype=np.uint8)
169 |     else:
170 |         cmap = np.zeros((N, 3), dtype=np.uint8)
171 |         for i in range(N):
172 |             r, g, b = 0, 0, 0            
173 |             id = i
174 |             for j in range(7):
175 |                 str_id = uint82bin(id)
176 |                 r = r ^ (np.uint8(str_id[-1]) << (7-j))
177 |                 g = g ^ (np.uint8(str_id[-2]) << (7-j))
178 |                 b = b ^ (np.uint8(str_id[-3]) << (7-j))
179 |                 id = id >> 3
180 |             cmap[i, 0], cmap[i, 1], cmap[i, 2] = r, g, b             
181 |     return cmap
182 | 
183 | def colormap(n):
184 |     cmap = np.zeros([n, 3]).astype(np.uint8)
185 |     for i in np.arange(n):
186 |         r, g, b = np.zeros(3)
187 | 
188 |         for j in np.arange(8):
189 |             r = r + (1 << (7-j))*((i & (1 << (3*j))) >> (3*j))
190 |             g = g + (1 << (7-j))*((i & (1 << (3*j+1))) >> (3*j+1))
191 |             b = b + (1 << (7-j))*((i & (1 << (3*j+2))) >> (3*j+2))
192 | 
193 |         cmap[i, :] = np.array([r, g, b])
194 | 
195 |     return cmap
196 | 
197 | class Colorize(object):
198 |     def __init__(self, n=35):
199 |         self.cmap = labelcolormap(n)
200 |         self.cmap = torch.from_numpy(self.cmap[:n])
201 | 
202 |     def __call__(self, gray_image):
203 |         size = gray_image.size()
204 |         color_image = torch.ByteTensor(3, size[1], size[2]).fill_(0)
205 | 
206 |         for label in range(0, len(self.cmap)):
207 |             mask = (label == gray_image[0]).cpu()
208 |             color_image[0][mask] = self.cmap[label][0]
209 |             color_image[1][mask] = self.cmap[label][1]
210 |             color_image[2][mask] = self.cmap[label][2]
211 | 
212 |         return color_image


--------------------------------------------------------------------------------
/util/visualizer.py:
--------------------------------------------------------------------------------
  1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 
  2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
  3 | import numpy as np
  4 | import os
  5 | import time
  6 | from . import util
  7 | from . import html
  8 | import scipy.misc 
  9 | try:
 10 |     from StringIO import StringIO  # Python 2.7
 11 | except ImportError:
 12 |     from io import BytesIO         # Python 3.x
 13 | 
 14 | class Visualizer():
 15 |     def __init__(self, opt):
 16 |         self.opt = opt
 17 |         self.tf_log = opt.tf_log
 18 |         self.use_html = opt.isTrain and not opt.no_html
 19 |         self.win_size = opt.display_winsize
 20 |         self.name = opt.name
 21 |         if self.tf_log:
 22 |             import tensorflow as tf
 23 |             self.tf = tf
 24 |             self.log_dir = os.path.join(opt.checkpoints_dir, opt.name, 'logs')
 25 |             self.writer = tf.summary.FileWriter(self.log_dir)
 26 | 
 27 |         if self.use_html:
 28 |             self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web')
 29 |             self.img_dir = os.path.join(self.web_dir, 'images')
 30 |             print('create web directory %s...' % self.web_dir)
 31 |             util.mkdirs([self.web_dir, self.img_dir])
 32 |         self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt')
 33 |         with open(self.log_name, "a") as log_file:
 34 |             now = time.strftime("%c")
 35 |             log_file.write('================ Training Loss (%s) ================\n' % now)
 36 | 
 37 |     # |visuals|: dictionary of images to display or save
 38 |     def display_current_results(self, visuals, epoch, step):
 39 |         if self.tf_log: # show images in tensorboard output
 40 |             img_summaries = []
 41 |             for label, image_numpy in visuals.items():
 42 |                 # Write the image to a string
 43 |                 try:
 44 |                     s = StringIO()
 45 |                 except:
 46 |                     s = BytesIO()
 47 |                 scipy.misc.toimage(image_numpy).save(s, format="jpeg")
 48 |                 # Create an Image object
 49 |                 img_sum = self.tf.Summary.Image(encoded_image_string=s.getvalue(), height=image_numpy.shape[0], width=image_numpy.shape[1])
 50 |                 # Create a Summary value
 51 |                 img_summaries.append(self.tf.Summary.Value(tag=label, image=img_sum))
 52 | 
 53 |             # Create and write Summary
 54 |             summary = self.tf.Summary(value=img_summaries)
 55 |             self.writer.add_summary(summary, step)
 56 | 
 57 |         if self.use_html: # save images to a html file
 58 |             for label, image_numpy in visuals.items():
 59 |                 if isinstance(image_numpy, list):
 60 |                     for i in range(len(image_numpy)):
 61 |                         img_path = os.path.join(self.img_dir, 'epoch%.3d_%s_%d.jpg' % (epoch, label, i))
 62 |                         util.save_image(image_numpy[i], img_path)
 63 |                 else:
 64 |                     img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.jpg' % (epoch, label))
 65 |                     util.save_image(image_numpy, img_path)
 66 | 
 67 |             # update website
 68 |             webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, reflesh=1)
 69 |             for n in range(epoch, 0, -1):
 70 |                 webpage.add_header('epoch [%d]' % n)
 71 |                 ims = []
 72 |                 txts = []
 73 |                 links = []
 74 | 
 75 |                 for label, image_numpy in visuals.items():
 76 |                     if isinstance(image_numpy, list):
 77 |                         for i in range(len(image_numpy)):
 78 |                             img_path = 'epoch%.3d_%s_%d.jpg' % (n, label, i)
 79 |                             ims.append(img_path)
 80 |                             txts.append(label+str(i))
 81 |                             links.append(img_path)
 82 |                     else:
 83 |                         img_path = 'epoch%.3d_%s.jpg' % (n, label)
 84 |                         ims.append(img_path)
 85 |                         txts.append(label)
 86 |                         links.append(img_path)
 87 |                 if len(ims) < 6:
 88 |                     webpage.add_images(ims, txts, links, width=self.win_size)
 89 |                 else:
 90 |                     num = int(round(len(ims)/2.0))
 91 |                     webpage.add_images(ims[:num], txts[:num], links[:num], width=self.win_size)
 92 |                     webpage.add_images(ims[num:], txts[num:], links[num:], width=self.win_size)
 93 |             webpage.save()
 94 | 
 95 |     # errors: dictionary of error labels and values
 96 |     def plot_current_errors(self, errors, step):
 97 |         if self.tf_log:
 98 |             for tag, value in errors.items():            
 99 |                 summary = self.tf.Summary(value=[self.tf.Summary.Value(tag=tag, simple_value=value)])
100 |                 self.writer.add_summary(summary, step)
101 | 
102 |     # errors: same format as |errors| of plotCurrentErrors
103 |     def print_current_errors(self, epoch, i, errors, t):
104 |         message = '(epoch: %d, iters: %d, time: %.3f) ' % (epoch, i, t)
105 |         for k, v in sorted(errors.items()):
106 |             if v != 0:
107 |                 message += '%s: %.3f ' % (k, v)
108 | 
109 |         print(message)
110 |         with open(self.log_name, "a") as log_file:
111 |             log_file.write('%s\n' % message)
112 | 
113 |     # save image to the disk
114 |     def save_images(self, image_dir, visuals, image_path, webpage=None):        
115 |         dirname = os.path.basename(os.path.dirname(image_path[0]))
116 |         image_dir = os.path.join(image_dir, dirname)
117 |         util.mkdir(image_dir)
118 |         name = os.path.basename(image_path[0])
119 |         name = os.path.splitext(name)[0]        
120 | 
121 |         if webpage is not None:
122 |             webpage.add_header(name)
123 |             ims, txts, links = [], [], []         
124 | 
125 |         for label, image_numpy in visuals.items():
126 |             save_ext = 'png' if 'real_A' in label and self.opt.label_nc != 0 else 'jpg'
127 |             image_name = '%s_%s.%s' % (label, name, save_ext)
128 |             save_path = os.path.join(image_dir, image_name)
129 |             util.save_image(image_numpy, save_path)
130 | 
131 |             if webpage is not None:
132 |                 ims.append(image_name)
133 |                 txts.append(label)
134 |                 links.append(image_name)
135 |         if webpage is not None:
136 |             webpage.add_images(ims, txts, links, width=self.win_size)
137 | 
138 |     def vis_print(self, message):
139 |         print(message)
140 |         with open(self.log_name, "a") as log_file:
141 |             log_file.write('%s\n' % message)
142 | 
143 | 


--------------------------------------------------------------------------------