├── .gitignore ├── LICENSE.txt ├── README.md ├── data ├── __init__.py ├── base_data_loader.py ├── base_dataset.py ├── custom_dataset_data_loader.py ├── data_loader.py ├── face_dataset.py ├── face_landmark_detection.py ├── image_folder.py ├── keypoint2img.py ├── pose_dataset.py ├── temporal_dataset.py └── test_dataset.py ├── docker ├── Dockerfile ├── launch_docker.sh └── pre_docker_install.sh ├── imgs ├── city_change_labels.gif ├── city_change_styles.gif ├── face.gif ├── face_multiple.gif ├── framePredict.gif ├── pose.gif └── teaser.gif ├── models ├── __init__.py ├── base_model.py ├── flownet.py ├── flownet2_pytorch │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── convert.py │ ├── datasets.py │ ├── download_caffe_models.sh │ ├── install.sh │ ├── launch_docker.sh │ ├── losses.py │ ├── main.py │ ├── models.py │ ├── networks │ │ ├── FlowNetC.py │ │ ├── FlowNetFusion.py │ │ ├── FlowNetS.py │ │ ├── FlowNetSD.py │ │ ├── __init__.py │ │ ├── channelnorm_package │ │ │ ├── __init__.py │ │ │ ├── channelnorm.py │ │ │ ├── channelnorm_cuda.cc │ │ │ ├── channelnorm_kernel.cu │ │ │ ├── channelnorm_kernel.cuh │ │ │ └── setup.py │ │ ├── correlation_package │ │ │ ├── __init__.py │ │ │ ├── correlation.py │ │ │ ├── correlation_cuda.cc │ │ │ ├── correlation_cuda_kernel.cu │ │ │ ├── correlation_cuda_kernel.cuh │ │ │ └── setup.py │ │ ├── resample2d_package │ │ │ ├── __init__.py │ │ │ ├── resample2d.py │ │ │ ├── resample2d_cuda.cc │ │ │ ├── resample2d_kernel.cu │ │ │ ├── resample2d_kernel.cuh │ │ │ └── setup.py │ │ └── submodules.py │ ├── run-caffe2pytorch.sh │ └── utils │ │ ├── __init__.py │ │ ├── flow_utils.py │ │ ├── frame_utils.py │ │ ├── param_utils.py │ │ └── tools.py ├── models.py ├── networks.py ├── vid2vid_model_D.py └── vid2vid_model_G.py ├── options ├── __init__.py ├── base_options.py ├── test_options.py └── train_options.py ├── scripts ├── download_datasets.py ├── download_flownet2.py ├── download_gdrive.py ├── download_models_flownet2.py ├── face │ ├── download_gdrive.py │ ├── download_models.py │ ├── test_512.sh │ ├── test_g1_256.sh │ ├── test_g1_512.sh │ ├── train_512.sh │ ├── train_512_bs7.sh │ ├── train_g1_256.sh │ └── train_g1_512.sh ├── pose │ ├── test_1024p.sh │ ├── test_256p.sh │ ├── test_512p.sh │ ├── test_g1_1024p.sh │ ├── test_g1_256p.sh │ ├── test_g1_512p.sh │ ├── train_1024p.sh │ ├── train_256p.sh │ ├── train_512p.sh │ ├── train_g1_1024p.sh │ ├── train_g1_256p.sh │ └── train_g1_512p.sh └── street │ ├── download_gdrive.py │ ├── download_models.py │ ├── download_models_g1.py │ ├── test_2048.sh │ ├── test_g1_1024.sh │ ├── train_1024.sh │ ├── train_2048.sh │ ├── train_2048_crop.sh │ ├── train_512.sh │ ├── train_512_bs.sh │ ├── train_512_no_fg.sh │ ├── train_g1_1024.sh │ ├── train_g1_256.sh │ └── train_g1_512.sh ├── test.py ├── train.py └── util ├── __init__.py ├── html.py ├── image_pool.py ├── util.py └── visualizer.py /.gitignore: -------------------------------------------------------------------------------- 1 | debug* 2 | checkpoints/ 3 | datasets/ 4 | models/debug* 5 | models/flownet2*/networks/*/*egg-info 6 | models/flownet2*/networks/*/build 7 | models/flownet2*/networks/*/__pycache__ 8 | models/flownet2*/networks/*/dist 9 | results/ 10 | build/ 11 | */Thumbs.db 12 | */**/__pycache__ 13 | */*.pyc 14 | */**/*.pyc 15 | */**/**/*.pyc 16 | */**/**/**/*.pyc 17 | */**/**/**/**/*.pyc 18 | */*.so* 19 | */**/*.so* 20 | */**/*.dylib* 21 | *.DS_Store 22 | *~ 23 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2017 NVIDIA Corporation. Ting-Chun Wang, Ming-Yu Liu, Jun-Yan Zhu. 2 | All rights reserved. 3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 4 | 5 | Permission to use, copy, modify, and distribute this software and its documentation 6 | for any non-commercial purpose is hereby granted without fee, provided that the above 7 | copyright notice appear in all copies and that both that copyright notice and this 8 | permission notice appear in supporting documentation, and that the name of the author 9 | not be used in advertising or publicity pertaining to distribution of the software 10 | without specific, written prior permission. 11 | 12 | THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL 13 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY PARTICULAR PURPOSE. 14 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL 15 | DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 16 | WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 17 | OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 | 19 | 20 | --------------------------- LICENSE FOR pytorch-CycleGAN-and-pix2pix ---------------- 21 | Copyright (c) 2017, Jun-Yan Zhu and Taesung Park 22 | All rights reserved. 23 | 24 | Redistribution and use in source and binary forms, with or without 25 | modification, are permitted provided that the following conditions are met: 26 | 27 | * Redistributions of source code must retain the above copyright notice, this 28 | list of conditions and the following disclaimer. 29 | 30 | * Redistributions in binary form must reproduce the above copyright notice, 31 | this list of conditions and the following disclaimer in the documentation 32 | and/or other materials provided with the distribution. 33 | 34 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 35 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 36 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 37 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 38 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 39 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 40 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 41 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 42 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 43 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/data/__init__.py -------------------------------------------------------------------------------- /data/base_data_loader.py: -------------------------------------------------------------------------------- 1 | 2 | class BaseDataLoader(): 3 | def __init__(self): 4 | pass 5 | 6 | def initialize(self, opt): 7 | self.opt = opt 8 | pass 9 | 10 | def load_data(): 11 | return None 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /data/base_dataset.py: -------------------------------------------------------------------------------- 1 | from util.util import add_dummy_to_tensor 2 | import torch.utils.data as data 3 | import torch 4 | from PIL import Image 5 | import torchvision.transforms as transforms 6 | import numpy as np 7 | import random 8 | 9 | class BaseDataset(data.Dataset): 10 | def __init__(self): 11 | super(BaseDataset, self).__init__() 12 | 13 | def name(self): 14 | return 'BaseDataset' 15 | 16 | def initialize(self, opt): 17 | pass 18 | 19 | def update_training_batch(self, ratio): # update the training sequence length to be longer 20 | seq_len_max = min(128, self.seq_len_max) - (self.opt.n_frames_G - 1) 21 | if self.n_frames_total < seq_len_max: 22 | self.n_frames_total = min(seq_len_max, self.opt.n_frames_total * (2**ratio)) 23 | #self.n_frames_total = min(seq_len_max, self.opt.n_frames_total * (ratio + 1)) 24 | print('--------- Updating training sequence length to %d ---------' % self.n_frames_total) 25 | 26 | def init_frame_idx(self, A_paths): 27 | self.n_of_seqs = min(len(A_paths), self.opt.max_dataset_size) # number of sequences to train 28 | self.seq_len_max = max([len(A) for A in A_paths]) # max number of frames in the training sequences 29 | 30 | self.seq_idx = 0 # index for current sequence 31 | self.frame_idx = self.opt.start_frame if not self.opt.isTrain else 0 # index for current frame in the sequence 32 | self.frames_count = [] # number of frames in each sequence 33 | for path in A_paths: 34 | self.frames_count.append(len(path) - self.opt.n_frames_G + 1) 35 | 36 | self.folder_prob = [count / sum(self.frames_count) for count in self.frames_count] 37 | self.n_frames_total = self.opt.n_frames_total if self.opt.isTrain else 1 38 | self.A, self.B, self.I = None, None, None 39 | 40 | def update_frame_idx(self, A_paths, index): 41 | if self.opt.isTrain: 42 | if self.opt.dataset_mode == 'pose': 43 | seq_idx = np.random.choice(len(A_paths), p=self.folder_prob) # randomly pick sequence to train 44 | self.frame_idx = index 45 | else: 46 | seq_idx = index % self.n_of_seqs 47 | return None, None, None, seq_idx 48 | else: 49 | self.change_seq = self.frame_idx >= self.frames_count[self.seq_idx] 50 | if self.change_seq: 51 | self.seq_idx += 1 52 | self.frame_idx = 0 53 | self.A, self.B, self.I = None, None, None 54 | return self.A, self.B, self.I, self.seq_idx 55 | 56 | def init_data_params(self, data, n_gpus, tG): 57 | opt = self.opt 58 | _, n_frames_total, self.height, self.width = data['B'].size() # n_frames_total = n_frames_load * n_loadings + tG - 1 59 | n_frames_total = n_frames_total // opt.output_nc 60 | n_frames_load = opt.max_frames_per_gpu * n_gpus # number of total frames loaded into GPU at a time for each batch 61 | n_frames_load = min(n_frames_load, n_frames_total - tG + 1) 62 | self.t_len = n_frames_load + tG - 1 # number of loaded frames plus previous frames 63 | return n_frames_total-self.t_len+1, n_frames_load, self.t_len 64 | 65 | def init_data(self, t_scales): 66 | fake_B_last = None # the last generated frame from previous training batch (which becomes input to the next batch) 67 | real_B_all, fake_B_all, flow_ref_all, conf_ref_all = None, None, None, None # all real/generated frames so far 68 | if self.opt.sparse_D: 69 | real_B_all, fake_B_all, flow_ref_all, conf_ref_all = [None]*t_scales, [None]*t_scales, [None]*t_scales, [None]*t_scales 70 | 71 | frames_all = real_B_all, fake_B_all, flow_ref_all, conf_ref_all 72 | return fake_B_last, frames_all 73 | 74 | def prepare_data(self, data, i, input_nc, output_nc): 75 | t_len, height, width = self.t_len, self.height, self.width 76 | # 5D tensor: batchSize, # of frames, # of channels, height, width 77 | input_A = (data['A'][:, i*input_nc:(i+t_len)*input_nc, ...]).view(-1, t_len, input_nc, height, width) 78 | input_B = (data['B'][:, i*output_nc:(i+t_len)*output_nc, ...]).view(-1, t_len, output_nc, height, width) 79 | inst_A = (data['inst'][:, i:i+t_len, ...]).view(-1, t_len, 1, height, width) if len(data['inst'].size()) > 2 else None 80 | return [input_A, input_B, inst_A] 81 | 82 | def make_power_2(n, base=32.0): 83 | return int(round(n / base) * base) 84 | 85 | def get_img_params(opt, size): 86 | w, h = size 87 | new_h, new_w = h, w 88 | if 'resize' in opt.resize_or_crop: # resize image to be loadSize x loadSize 89 | new_h = new_w = opt.loadSize 90 | elif 'scaleWidth' in opt.resize_or_crop: # scale image width to be loadSize 91 | new_w = opt.loadSize 92 | new_h = opt.loadSize * h // w 93 | elif 'scaleHeight' in opt.resize_or_crop: # scale image height to be loadSize 94 | new_h = opt.loadSize 95 | new_w = opt.loadSize * w // h 96 | elif 'randomScaleWidth' in opt.resize_or_crop: # randomly scale image width to be somewhere between loadSize and fineSize 97 | new_w = random.randint(opt.fineSize, opt.loadSize + 1) 98 | new_h = new_w * h // w 99 | elif 'randomScaleHeight' in opt.resize_or_crop: # randomly scale image height to be somewhere between loadSize and fineSize 100 | new_h = random.randint(opt.fineSize, opt.loadSize + 1) 101 | new_w = new_h * w // h 102 | new_w = int(round(new_w / 4)) * 4 103 | new_h = int(round(new_h / 4)) * 4 104 | 105 | crop_x = crop_y = 0 106 | crop_w = crop_h = 0 107 | if 'crop' in opt.resize_or_crop or 'scaledCrop' in opt.resize_or_crop: 108 | if 'crop' in opt.resize_or_crop: # crop patches of size fineSize x fineSize 109 | crop_w = crop_h = opt.fineSize 110 | else: 111 | if 'Width' in opt.resize_or_crop: # crop patches of width fineSize 112 | crop_w = opt.fineSize 113 | crop_h = opt.fineSize * h // w 114 | else: # crop patches of height fineSize 115 | crop_h = opt.fineSize 116 | crop_w = opt.fineSize * w // h 117 | 118 | crop_w, crop_h = make_power_2(crop_w), make_power_2(crop_h) 119 | x_span = (new_w - crop_w) // 2 120 | crop_x = np.maximum(0, np.minimum(x_span*2, int(np.random.randn() * x_span/3 + x_span))) 121 | crop_y = random.randint(0, np.minimum(np.maximum(0, new_h - crop_h), new_h // 8)) 122 | #crop_x = random.randint(0, np.maximum(0, new_w - crop_w)) 123 | #crop_y = random.randint(0, np.maximum(0, new_h - crop_h)) 124 | else: 125 | new_w, new_h = make_power_2(new_w), make_power_2(new_h) 126 | 127 | flip = (random.random() > 0.5) and (opt.dataset_mode != 'pose') 128 | return {'new_size': (new_w, new_h), 'crop_size': (crop_w, crop_h), 'crop_pos': (crop_x, crop_y), 'flip': flip} 129 | 130 | def get_transform(opt, params, method=Image.BICUBIC, normalize=True, toTensor=True): 131 | transform_list = [] 132 | ### resize input image 133 | if 'resize' in opt.resize_or_crop: 134 | osize = [opt.loadSize, opt.loadSize] 135 | transform_list.append(transforms.Scale(osize, method)) 136 | else: 137 | transform_list.append(transforms.Lambda(lambda img: __scale_image(img, params['new_size'], method))) 138 | 139 | ### crop patches from image 140 | if 'crop' in opt.resize_or_crop or 'scaledCrop' in opt.resize_or_crop: 141 | transform_list.append(transforms.Lambda(lambda img: __crop(img, params['crop_size'], params['crop_pos']))) 142 | 143 | ### random flip 144 | if opt.isTrain and not opt.no_flip: 145 | transform_list.append(transforms.Lambda(lambda img: __flip(img, params['flip']))) 146 | 147 | if toTensor: 148 | transform_list += [transforms.ToTensor()] 149 | if normalize: 150 | transform_list += [transforms.Normalize((0.5, 0.5, 0.5), 151 | (0.5, 0.5, 0.5))] 152 | return transforms.Compose(transform_list) 153 | 154 | def toTensor_normalize(): 155 | transform_list = [transforms.ToTensor()] 156 | transform_list += [transforms.Normalize((0.5, 0.5, 0.5), 157 | (0.5, 0.5, 0.5))] 158 | return transforms.Compose(transform_list) 159 | 160 | def __scale_image(img, size, method=Image.BICUBIC): 161 | w, h = size 162 | return img.resize((w, h), method) 163 | 164 | def __crop(img, size, pos): 165 | ow, oh = img.size 166 | tw, th = size 167 | x1, y1 = pos 168 | if (ow > tw or oh > th): 169 | return img.crop((x1, y1, min(ow, x1 + tw), min(oh, y1 + th))) 170 | return img 171 | 172 | def __flip(img, flip): 173 | if flip: 174 | return img.transpose(Image.FLIP_LEFT_RIGHT) 175 | return img 176 | 177 | def get_video_params(opt, n_frames_total, cur_seq_len, index): 178 | tG = opt.n_frames_G 179 | if opt.isTrain: 180 | n_frames_total = min(n_frames_total, cur_seq_len - tG + 1) 181 | 182 | n_gpus = opt.n_gpus_gen if opt.batchSize == 1 else 1 # number of generator GPUs for each batch 183 | n_frames_per_load = opt.max_frames_per_gpu * n_gpus # number of frames to load into GPUs at one time (for each batch) 184 | n_frames_per_load = min(n_frames_total, n_frames_per_load) 185 | n_loadings = n_frames_total // n_frames_per_load # how many times are needed to load entire sequence into GPUs 186 | n_frames_total = n_frames_per_load * n_loadings + tG - 1 # rounded overall number of frames to read from the sequence 187 | 188 | max_t_step = min(opt.max_t_step, (cur_seq_len-1) // (n_frames_total-1)) 189 | t_step = np.random.randint(max_t_step) + 1 # spacing between neighboring sampled frames 190 | offset_max = max(1, cur_seq_len - (n_frames_total-1)*t_step) # maximum possible index for the first frame 191 | if opt.dataset_mode == 'pose': 192 | start_idx = index % offset_max 193 | else: 194 | start_idx = np.random.randint(offset_max) # offset for the first frame to load 195 | if opt.debug: 196 | print("loading %d frames in total, first frame starting at index %d, space between neighboring frames is %d" 197 | % (n_frames_total, start_idx, t_step)) 198 | else: 199 | n_frames_total = tG 200 | start_idx = index 201 | t_step = 1 202 | return n_frames_total, start_idx, t_step 203 | 204 | def concat_frame(A, Ai, nF): 205 | if A is None: 206 | A = Ai 207 | else: 208 | c = Ai.size()[0] 209 | if A.size()[0] == nF * c: 210 | A = A[c:] 211 | A = torch.cat([A, Ai]) 212 | return A -------------------------------------------------------------------------------- /data/custom_dataset_data_loader.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from data.base_data_loader import BaseDataLoader 3 | 4 | 5 | def CreateDataset(opt): 6 | dataset = None 7 | if opt.dataset_mode == 'temporal': 8 | from data.temporal_dataset import TemporalDataset 9 | dataset = TemporalDataset() 10 | elif opt.dataset_mode == 'face': 11 | from data.face_dataset import FaceDataset 12 | dataset = FaceDataset() 13 | elif opt.dataset_mode == 'pose': 14 | from data.pose_dataset import PoseDataset 15 | dataset = PoseDataset() 16 | elif opt.dataset_mode == 'test': 17 | from data.test_dataset import TestDataset 18 | dataset = TestDataset() 19 | else: 20 | raise ValueError("Dataset [%s] not recognized." % opt.dataset_mode) 21 | 22 | print("dataset [%s] was created" % (dataset.name())) 23 | dataset.initialize(opt) 24 | return dataset 25 | 26 | 27 | class CustomDatasetDataLoader(BaseDataLoader): 28 | def name(self): 29 | return 'CustomDatasetDataLoader' 30 | 31 | def initialize(self, opt): 32 | BaseDataLoader.initialize(self, opt) 33 | self.dataset = CreateDataset(opt) 34 | self.dataloader = torch.utils.data.DataLoader( 35 | self.dataset, 36 | batch_size=opt.batchSize, 37 | shuffle=not opt.serial_batches, 38 | num_workers=int(opt.nThreads)) 39 | 40 | def load_data(self): 41 | return self.dataloader 42 | 43 | def __len__(self): 44 | return min(len(self.dataset), self.opt.max_dataset_size) 45 | -------------------------------------------------------------------------------- /data/data_loader.py: -------------------------------------------------------------------------------- 1 | 2 | def CreateDataLoader(opt): 3 | from data.custom_dataset_data_loader import CustomDatasetDataLoader 4 | data_loader = CustomDatasetDataLoader() 5 | print(data_loader.name()) 6 | data_loader.initialize(opt) 7 | return data_loader 8 | -------------------------------------------------------------------------------- /data/face_landmark_detection.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | from skimage import io 4 | import numpy as np 5 | import dlib 6 | import sys 7 | 8 | if len(sys.argv) < 2 or (sys.argv[1] != 'train' and sys.argv[1] != 'test'): 9 | raise ValueError('usage: python data/face_landmark_detection.py [train|test]') 10 | 11 | phase = sys.argv[1] 12 | dataset_path = 'datasets/face/' 13 | faces_folder_path = os.path.join(dataset_path, phase + '_img/') 14 | predictor_path = os.path.join(dataset_path, 'shape_predictor_68_face_landmarks.dat') 15 | detector = dlib.get_frontal_face_detector() 16 | predictor = dlib.shape_predictor(predictor_path) 17 | 18 | img_paths = sorted(glob.glob(faces_folder_path + '*')) 19 | for i in range(len(img_paths)): 20 | f = img_paths[i] 21 | print("Processing video: {}".format(f)) 22 | save_path = os.path.join(dataset_path, phase + '_keypoints', os.path.basename(f)) 23 | if not os.path.isdir(save_path): 24 | os.makedirs(save_path) 25 | 26 | for img_name in sorted(glob.glob(os.path.join(f, '*.jpg'))): 27 | img = io.imread(img_name) 28 | dets = detector(img, 1) 29 | if len(dets) > 0: 30 | shape = predictor(img, dets[0]) 31 | points = np.empty([68, 2], dtype=int) 32 | for b in range(68): 33 | points[b,0] = shape.part(b).x 34 | points[b,1] = shape.part(b).y 35 | 36 | save_name = os.path.join(save_path, os.path.basename(img_name)[:-4] + '.txt') 37 | np.savetxt(save_name, points, fmt='%d', delimiter=',') 38 | -------------------------------------------------------------------------------- /data/image_folder.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Code from 3 | # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py 4 | # Modified the original code so that it also loads images from the current 5 | # directory as well as the subdirectories 6 | ############################################################################### 7 | 8 | import torch.utils.data as data 9 | 10 | from PIL import Image 11 | import os 12 | import os.path 13 | 14 | IMG_EXTENSIONS = [ 15 | '.jpg', '.JPG', '.jpeg', '.JPEG', '.pgm', '.PGM', 16 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', '.tiff', 17 | '.txt', '.json' 18 | ] 19 | 20 | 21 | def is_image_file(filename): 22 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 23 | 24 | 25 | def make_dataset(dir): 26 | images = [] 27 | assert os.path.isdir(dir), '%s is not a valid directory' % dir 28 | 29 | for root, _, fnames in sorted(os.walk(dir)): 30 | for fname in fnames: 31 | if is_image_file(fname): 32 | path = os.path.join(root, fname) 33 | images.append(path) 34 | return images 35 | 36 | def make_grouped_dataset(dir): 37 | images = [] 38 | assert os.path.isdir(dir), '%s is not a valid directory' % dir 39 | fnames = sorted(os.walk(dir)) 40 | for fname in sorted(fnames): 41 | paths = [] 42 | root = fname[0] 43 | for f in sorted(fname[2]): 44 | if is_image_file(f): 45 | paths.append(os.path.join(root, f)) 46 | if len(paths) > 0: 47 | images.append(paths) 48 | return images 49 | 50 | def check_path_valid(A_paths, B_paths): 51 | assert(len(A_paths) == len(B_paths)) 52 | for a, b in zip(A_paths, B_paths): 53 | assert(len(a) == len(b)) 54 | 55 | def default_loader(path): 56 | return Image.open(path).convert('RGB') 57 | 58 | 59 | class ImageFolder(data.Dataset): 60 | 61 | def __init__(self, root, transform=None, return_paths=False, 62 | loader=default_loader): 63 | imgs = make_dataset(root) 64 | if len(imgs) == 0: 65 | raise(RuntimeError("Found 0 images in: " + root + "\n" 66 | "Supported image extensions are: " + 67 | ",".join(IMG_EXTENSIONS))) 68 | 69 | self.root = root 70 | self.imgs = imgs 71 | self.transform = transform 72 | self.return_paths = return_paths 73 | self.loader = loader 74 | 75 | def __getitem__(self, index): 76 | path = self.imgs[index] 77 | img = self.loader(path) 78 | if self.transform is not None: 79 | img = self.transform(img) 80 | if self.return_paths: 81 | return img, path 82 | else: 83 | return img 84 | 85 | def __len__(self): 86 | return len(self.imgs) 87 | -------------------------------------------------------------------------------- /data/keypoint2img.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from PIL import Image 3 | import numpy as np 4 | import json 5 | import glob 6 | from scipy.optimize import curve_fit 7 | import warnings 8 | 9 | def func(x, a, b, c): 10 | return a * x**2 + b * x + c 11 | 12 | def linear(x, a, b): 13 | return a * x + b 14 | 15 | def setColor(im, yy, xx, color): 16 | if len(im.shape) == 3: 17 | if (im[yy, xx] == 0).all(): 18 | im[yy, xx, 0], im[yy, xx, 1], im[yy, xx, 2] = color[0], color[1], color[2] 19 | else: 20 | im[yy, xx, 0] = ((im[yy, xx, 0].astype(float) + color[0]) / 2).astype(np.uint8) 21 | im[yy, xx, 1] = ((im[yy, xx, 1].astype(float) + color[1]) / 2).astype(np.uint8) 22 | im[yy, xx, 2] = ((im[yy, xx, 2].astype(float) + color[2]) / 2).astype(np.uint8) 23 | else: 24 | im[yy, xx] = color[0] 25 | 26 | def drawEdge(im, x, y, bw=1, color=(255,255,255), draw_end_points=False): 27 | if x is not None and x.size: 28 | h, w = im.shape[0], im.shape[1] 29 | # edge 30 | for i in range(-bw, bw): 31 | for j in range(-bw, bw): 32 | yy = np.maximum(0, np.minimum(h-1, y+i)) 33 | xx = np.maximum(0, np.minimum(w-1, x+j)) 34 | setColor(im, yy, xx, color) 35 | 36 | # edge endpoints 37 | if draw_end_points: 38 | for i in range(-bw*2, bw*2): 39 | for j in range(-bw*2, bw*2): 40 | if (i**2) + (j**2) < (4 * bw**2): 41 | yy = np.maximum(0, np.minimum(h-1, np.array([y[0], y[-1]])+i)) 42 | xx = np.maximum(0, np.minimum(w-1, np.array([x[0], x[-1]])+j)) 43 | setColor(im, yy, xx, color) 44 | 45 | def interpPoints(x, y): 46 | if abs(x[:-1] - x[1:]).max() < abs(y[:-1] - y[1:]).max(): 47 | curve_y, curve_x = interpPoints(y, x) 48 | if curve_y is None: 49 | return None, None 50 | else: 51 | with warnings.catch_warnings(): 52 | warnings.simplefilter("ignore") 53 | if len(x) < 3: 54 | popt, _ = curve_fit(linear, x, y) 55 | else: 56 | popt, _ = curve_fit(func, x, y) 57 | if abs(popt[0]) > 1: 58 | return None, None 59 | if x[0] > x[-1]: 60 | x = list(reversed(x)) 61 | y = list(reversed(y)) 62 | curve_x = np.linspace(x[0], x[-1], (x[-1]-x[0])) 63 | if len(x) < 3: 64 | curve_y = linear(curve_x, *popt) 65 | else: 66 | curve_y = func(curve_x, *popt) 67 | return curve_x.astype(int), curve_y.astype(int) 68 | 69 | def read_keypoints(json_input, size, random_drop_prob=0, remove_face_labels=False, basic_point_only=False): 70 | with open(json_input, encoding='utf-8') as f: 71 | keypoint_dicts = json.loads(f.read())["people"] 72 | 73 | edge_lists = define_edge_lists(basic_point_only) 74 | w, h = size 75 | pose_img = np.zeros((h, w, 3), np.uint8) 76 | for keypoint_dict in keypoint_dicts: 77 | pose_pts = np.array(keypoint_dict["pose_keypoints_2d"]).reshape(25, 3) 78 | face_pts = np.array(keypoint_dict["face_keypoints_2d"]).reshape(70, 3) 79 | hand_pts_l = np.array(keypoint_dict["hand_left_keypoints_2d"]).reshape(21, 3) 80 | hand_pts_r = np.array(keypoint_dict["hand_right_keypoints_2d"]).reshape(21, 3) 81 | pts = [extract_valid_keypoints(pts, edge_lists) for pts in [pose_pts, face_pts, hand_pts_l, hand_pts_r]] 82 | pose_img += connect_keypoints(pts, edge_lists, size, random_drop_prob, remove_face_labels, basic_point_only) 83 | return pose_img 84 | 85 | def extract_valid_keypoints(pts, edge_lists): 86 | pose_edge_list, _, hand_edge_list, _, face_list = edge_lists 87 | p = pts.shape[0] 88 | thre = 0.1 if p == 70 else 0.01 89 | output = np.zeros((p, 2)) 90 | 91 | if p == 70: # face 92 | for edge_list in face_list: 93 | for edge in edge_list: 94 | if (pts[edge, 2] > thre).all(): 95 | output[edge, :] = pts[edge, :2] 96 | elif p == 21: # hand 97 | for edge in hand_edge_list: 98 | if (pts[edge, 2] > thre).all(): 99 | output[edge, :] = pts[edge, :2] 100 | else: # pose 101 | valid = (pts[:, 2] > thre) 102 | output[valid, :] = pts[valid, :2] 103 | 104 | return output 105 | 106 | def connect_keypoints(pts, edge_lists, size, random_drop_prob, remove_face_labels, basic_point_only): 107 | pose_pts, face_pts, hand_pts_l, hand_pts_r = pts 108 | w, h = size 109 | output_edges = np.zeros((h, w, 3), np.uint8) 110 | pose_edge_list, pose_color_list, hand_edge_list, hand_color_list, face_list = edge_lists 111 | 112 | if random_drop_prob > 0 and remove_face_labels: 113 | # add random noise to keypoints 114 | pose_pts[[0,15,16,17,18], :] += 5 * np.random.randn(5,2) 115 | face_pts[:,0] += 2 * np.random.randn() 116 | face_pts[:,1] += 2 * np.random.randn() 117 | 118 | ### pose 119 | for i, edge in enumerate(pose_edge_list): 120 | x, y = pose_pts[edge, 0], pose_pts[edge, 1] 121 | if (np.random.rand() > random_drop_prob) and (0 not in x): 122 | curve_x, curve_y = interpPoints(x, y) 123 | drawEdge(output_edges, curve_x, curve_y, bw=3, color=pose_color_list[i], draw_end_points=True) 124 | 125 | if not basic_point_only: 126 | ### hand 127 | for hand_pts in [hand_pts_l, hand_pts_r]: # for left and right hand 128 | if np.random.rand() > random_drop_prob: 129 | for i, edge in enumerate(hand_edge_list): # for each finger 130 | for j in range(0, len(edge)-1): # for each part of the finger 131 | sub_edge = edge[j:j+2] 132 | x, y = hand_pts[sub_edge, 0], hand_pts[sub_edge, 1] 133 | if 0 not in x: 134 | line_x, line_y = interpPoints(x, y) 135 | drawEdge(output_edges, line_x, line_y, bw=1, color=hand_color_list[i], draw_end_points=True) 136 | 137 | ### face 138 | edge_len = 2 139 | if (np.random.rand() > random_drop_prob): 140 | for edge_list in face_list: 141 | for edge in edge_list: 142 | for i in range(0, max(1, len(edge)-1), edge_len-1): 143 | sub_edge = edge[i:i+edge_len] 144 | x, y = face_pts[sub_edge, 0], face_pts[sub_edge, 1] 145 | if 0 not in x: 146 | curve_x, curve_y = interpPoints(x, y) 147 | drawEdge(output_edges, curve_x, curve_y, draw_end_points=True) 148 | 149 | return output_edges 150 | 151 | def define_edge_lists(basic_point_only): 152 | ### pose 153 | pose_edge_list = [] 154 | pose_color_list = [] 155 | if not basic_point_only: 156 | pose_edge_list += [[17, 15], [15, 0], [ 0, 16], [16, 18]] # head 157 | pose_color_list += [[153, 0,153], [153, 0,102], [102, 0,153], [ 51, 0,153]] 158 | 159 | pose_edge_list += [ 160 | [ 0, 1], [ 1, 8], # body 161 | [ 1, 2], [ 2, 3], [ 3, 4], # right arm 162 | [ 1, 5], [ 5, 6], [ 6, 7], # left arm 163 | [ 8, 9], [ 9, 10], [10, 11], [11, 24], [11, 22], [22, 23], # right leg 164 | [ 8, 12], [12, 13], [13, 14], [14, 21], [14, 19], [19, 20] # left leg 165 | ] 166 | pose_color_list += [ 167 | [153, 0, 51], [153, 0, 0], 168 | [153, 51, 0], [153,102, 0], [153,153, 0], 169 | [102,153, 0], [ 51,153, 0], [ 0,153, 0], 170 | [ 0,153, 51], [ 0,153,102], [ 0,153,153], [ 0,153,153], [ 0,153,153], [ 0,153,153], 171 | [ 0,102,153], [ 0, 51,153], [ 0, 0,153], [ 0, 0,153], [ 0, 0,153], [ 0, 0,153] 172 | ] 173 | 174 | ### hand 175 | hand_edge_list = [ 176 | [0, 1, 2, 3, 4], 177 | [0, 5, 6, 7, 8], 178 | [0, 9, 10, 11, 12], 179 | [0, 13, 14, 15, 16], 180 | [0, 17, 18, 19, 20] 181 | ] 182 | hand_color_list = [ 183 | [204,0,0], [163,204,0], [0,204,82], [0,82,204], [163,0,204] 184 | ] 185 | 186 | ### face 187 | face_list = [ 188 | #[range(0, 17)], # face 189 | [range(17, 22)], # left eyebrow 190 | [range(22, 27)], # right eyebrow 191 | [range(27, 31), range(31, 36)], # nose 192 | [[36,37,38,39], [39,40,41,36]], # left eye 193 | [[42,43,44,45], [45,46,47,42]], # right eye 194 | [range(48, 55), [54,55,56,57,58,59,48]], # mouth 195 | ] 196 | return pose_edge_list, pose_color_list, hand_edge_list, hand_color_list, face_list -------------------------------------------------------------------------------- /data/pose_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import torchvision.transforms as transforms 3 | import torch 4 | from PIL import Image 5 | import numpy as np 6 | 7 | from data.base_dataset import BaseDataset, get_img_params, get_transform, get_video_params, concat_frame 8 | from data.image_folder import make_grouped_dataset, check_path_valid 9 | from data.keypoint2img import read_keypoints 10 | 11 | class PoseDataset(BaseDataset): 12 | def initialize(self, opt): 13 | self.opt = opt 14 | self.root = opt.dataroot 15 | 16 | self.dir_dp = os.path.join(opt.dataroot, opt.phase + '_densepose') 17 | self.dir_op = os.path.join(opt.dataroot, opt.phase + '_openpose') 18 | self.dir_img = os.path.join(opt.dataroot, opt.phase + '_img') 19 | self.img_paths = sorted(make_grouped_dataset(self.dir_img)) 20 | if not opt.openpose_only: 21 | self.dp_paths = sorted(make_grouped_dataset(self.dir_dp)) 22 | check_path_valid(self.dp_paths, self.img_paths) 23 | if not opt.densepose_only: 24 | self.op_paths = sorted(make_grouped_dataset(self.dir_op)) 25 | check_path_valid(self.op_paths, self.img_paths) 26 | 27 | self.init_frame_idx(self.img_paths) 28 | 29 | def __getitem__(self, index): 30 | A, B, _, seq_idx = self.update_frame_idx(self.img_paths, index) 31 | img_paths = self.img_paths[seq_idx] 32 | n_frames_total, start_idx, t_step = get_video_params(self.opt, self.n_frames_total, len(img_paths), self.frame_idx) 33 | 34 | img = Image.open(img_paths[start_idx]).convert('RGB') 35 | size = img.size 36 | params = get_img_params(self.opt, size) 37 | 38 | frame_range = list(range(n_frames_total)) if (self.opt.isTrain or self.A is None) else [self.opt.n_frames_G-1] 39 | for i in frame_range: 40 | img_path = img_paths[start_idx + i * t_step] 41 | if not self.opt.openpose_only: 42 | dp_path = self.dp_paths[seq_idx][start_idx + i * t_step] 43 | Di = self.get_image(dp_path, size, params, input_type='densepose') 44 | Di[2,:,:] = ((Di[2,:,:] * 0.5 + 0.5) * 255 / 24 - 0.5) / 0.5 45 | if not self.opt.densepose_only: 46 | op_path = self.op_paths[seq_idx][start_idx + i * t_step] 47 | Oi = self.get_image(op_path, size, params, input_type='openpose') 48 | 49 | if self.opt.openpose_only: 50 | Ai = Oi 51 | elif self.opt.densepose_only: 52 | Ai = Di 53 | else: 54 | Ai = torch.cat([Di, Oi]) 55 | Bi = self.get_image(img_path, size, params, input_type='img') 56 | 57 | Ai, Bi = self.crop(Ai), self.crop(Bi) # only crop the central half region to save time 58 | A = concat_frame(A, Ai, n_frames_total) 59 | B = concat_frame(B, Bi, n_frames_total) 60 | 61 | if not self.opt.isTrain: 62 | self.A, self.B = A, B 63 | self.frame_idx += 1 64 | change_seq = False if self.opt.isTrain else self.change_seq 65 | return_list = {'A': A, 'B': B, 'inst': 0, 'A_path': img_path, 'change_seq': change_seq} 66 | 67 | return return_list 68 | 69 | def get_image(self, A_path, size, params, input_type): 70 | if input_type != 'openpose': 71 | A_img = Image.open(A_path).convert('RGB') 72 | else: 73 | random_drop_prob = self.opt.random_drop_prob if self.opt.isTrain else 0 74 | A_img = Image.fromarray(read_keypoints(A_path, size, random_drop_prob, self.opt.remove_face_labels, self.opt.basic_point_only)) 75 | 76 | if input_type == 'densepose' and self.opt.isTrain: 77 | # randomly remove labels 78 | A_np = np.array(A_img) 79 | part_labels = A_np[:,:,2] 80 | for part_id in range(1, 25): 81 | if (np.random.rand() < self.opt.random_drop_prob): 82 | A_np[(part_labels == part_id), :] = 0 83 | if self.opt.remove_face_labels: 84 | A_np[(part_labels == 23) | (part_labels == 24), :] = 0 85 | A_img = Image.fromarray(A_np) 86 | 87 | is_img = input_type == 'img' 88 | method = Image.BICUBIC if is_img else Image.NEAREST 89 | transform_scaleA = get_transform(self.opt, params, method=method) 90 | A_scaled = transform_scaleA(A_img) 91 | return A_scaled 92 | 93 | def crop(self, Ai): 94 | w = Ai.size()[2] 95 | base = 32 96 | x_cen = w // 2 97 | bs = int(w * 0.25) // base * base 98 | return Ai[:,:,(x_cen-bs):(x_cen+bs)] 99 | 100 | def normalize_pose(self, A_img, target_yc, target_len, first=False): 101 | w, h = A_img.size 102 | A_np = np.array(A_img) 103 | 104 | if first == True: 105 | part_labels = A_np[:,:,2] 106 | part_coords = np.nonzero((part_labels == 1) | (part_labels == 2)) 107 | y, x = part_coords[0], part_coords[1] 108 | 109 | ys, ye = y.min(), y.max() 110 | min_i, max_i = np.argmin(y), np.argmax(y) 111 | v_min = A_np[y[min_i], x[min_i], 1] / 255 112 | v_max = A_np[y[max_i], x[max_i], 1] / 255 113 | ylen = (ye-ys) / (v_max-v_min) 114 | yc = (0.5-v_min) / (v_max-v_min) * (ye-ys) + ys 115 | 116 | ratio = target_len / ylen 117 | offset_y = int(yc - (target_yc / ratio)) 118 | offset_x = int(w * (1 - 1/ratio) / 2) 119 | 120 | padding = int(max(0, max(-offset_y, int(offset_y + h/ratio) - h))) 121 | padding = int(max(padding, max(-offset_x, int(offset_x + w/ratio) - w))) 122 | offset_y += padding 123 | offset_x += padding 124 | self.offset_y, self.offset_x = offset_y, offset_x 125 | self.ratio, self.padding = ratio, padding 126 | 127 | p = self.padding 128 | A_np = np.pad(A_np, ((p,p),(p,p),(0,0)), 'constant', constant_values=0) 129 | A_np = A_np[self.offset_y:int(self.offset_y + h/self.ratio), self.offset_x:int(self.offset_x + w/self.ratio):, :] 130 | A_img = Image.fromarray(A_np) 131 | A_img = A_img.resize((w, h)) 132 | return A_img 133 | 134 | def __len__(self): 135 | return sum(self.frames_count) 136 | 137 | def name(self): 138 | return 'PoseDataset' 139 | 140 | """ 141 | DensePose label 142 | 0 = Background 143 | 1, 2 = Torso 144 | 3 = Right Hand 145 | 4 = Left Hand 146 | 5 = Right Foot 147 | 6 = Left Foot 148 | 7, 9 = Upper Leg Right 149 | 8, 10 = Upper Leg Left 150 | 11, 13 = Lower Leg Right 151 | 12, 14 = Lower Leg Left 152 | 15, 17 = Upper Arm Left 153 | 16, 18 = Upper Arm Right 154 | 19, 21 = Lower Arm Left 155 | 20, 22 = Lower Arm Right 156 | 23, 24 = Head """ 157 | -------------------------------------------------------------------------------- /data/temporal_dataset.py: -------------------------------------------------------------------------------- 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | import os.path 4 | import random 5 | import torch 6 | from data.base_dataset import BaseDataset, get_img_params, get_transform, get_video_params 7 | from data.image_folder import make_grouped_dataset, check_path_valid 8 | from PIL import Image 9 | import numpy as np 10 | 11 | class TemporalDataset(BaseDataset): 12 | def initialize(self, opt): 13 | self.opt = opt 14 | self.root = opt.dataroot 15 | self.dir_A = os.path.join(opt.dataroot, opt.phase + '_A') 16 | self.dir_B = os.path.join(opt.dataroot, opt.phase + '_B') 17 | self.A_is_label = self.opt.label_nc != 0 18 | 19 | self.A_paths = sorted(make_grouped_dataset(self.dir_A)) 20 | self.B_paths = sorted(make_grouped_dataset(self.dir_B)) 21 | check_path_valid(self.A_paths, self.B_paths) 22 | if opt.use_instance: 23 | self.dir_inst = os.path.join(opt.dataroot, opt.phase + '_inst') 24 | self.I_paths = sorted(make_grouped_dataset(self.dir_inst)) 25 | check_path_valid(self.A_paths, self.I_paths) 26 | 27 | self.n_of_seqs = len(self.A_paths) # number of sequences to train 28 | self.seq_len_max = max([len(A) for A in self.A_paths]) 29 | self.n_frames_total = self.opt.n_frames_total # current number of frames to train in a single iteration 30 | 31 | def __getitem__(self, index): 32 | tG = self.opt.n_frames_G 33 | A_paths = self.A_paths[index % self.n_of_seqs] 34 | B_paths = self.B_paths[index % self.n_of_seqs] 35 | if self.opt.use_instance: 36 | I_paths = self.I_paths[index % self.n_of_seqs] 37 | 38 | # setting parameters 39 | n_frames_total, start_idx, t_step = get_video_params(self.opt, self.n_frames_total, len(A_paths), index) 40 | 41 | # setting transformers 42 | B_img = Image.open(B_paths[start_idx]).convert('RGB') 43 | params = get_img_params(self.opt, B_img.size) 44 | transform_scaleB = get_transform(self.opt, params) 45 | transform_scaleA = get_transform(self.opt, params, method=Image.NEAREST, normalize=False) if self.A_is_label else transform_scaleB 46 | 47 | # read in images 48 | A = B = inst = 0 49 | for i in range(n_frames_total): 50 | A_path = A_paths[start_idx + i * t_step] 51 | B_path = B_paths[start_idx + i * t_step] 52 | Ai = self.get_image(A_path, transform_scaleA, is_label=self.A_is_label) 53 | Bi = self.get_image(B_path, transform_scaleB) 54 | 55 | A = Ai if i == 0 else torch.cat([A, Ai], dim=0) 56 | B = Bi if i == 0 else torch.cat([B, Bi], dim=0) 57 | 58 | if self.opt.use_instance: 59 | I_path = I_paths[start_idx + i * t_step] 60 | Ii = self.get_image(I_path, transform_scaleA) * 255.0 61 | inst = Ii if i == 0 else torch.cat([inst, Ii], dim=0) 62 | 63 | return_list = {'A': A, 'B': B, 'inst': inst, 'A_path': A_path, 'B_paths': B_path} 64 | return return_list 65 | 66 | def get_image(self, A_path, transform_scaleA, is_label=False): 67 | A_img = Image.open(A_path) 68 | A_scaled = transform_scaleA(A_img) 69 | if is_label: 70 | A_scaled *= 255.0 71 | return A_scaled 72 | 73 | def __len__(self): 74 | return len(self.A_paths) 75 | 76 | def name(self): 77 | return 'TemporalDataset' -------------------------------------------------------------------------------- /data/test_dataset.py: -------------------------------------------------------------------------------- 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | import os.path 4 | import torch 5 | from data.base_dataset import BaseDataset, get_img_params, get_transform, concat_frame 6 | from data.image_folder import make_grouped_dataset, check_path_valid 7 | from PIL import Image 8 | import numpy as np 9 | 10 | class TestDataset(BaseDataset): 11 | def initialize(self, opt): 12 | self.opt = opt 13 | self.root = opt.dataroot 14 | self.dir_A = os.path.join(opt.dataroot, opt.phase + '_A') 15 | self.dir_B = os.path.join(opt.dataroot, opt.phase + '_B') 16 | self.use_real = opt.use_real_img 17 | self.A_is_label = self.opt.label_nc != 0 18 | 19 | self.A_paths = sorted(make_grouped_dataset(self.dir_A)) 20 | if self.use_real: 21 | self.B_paths = sorted(make_grouped_dataset(self.dir_B)) 22 | check_path_valid(self.A_paths, self.B_paths) 23 | if self.opt.use_instance: 24 | self.dir_inst = os.path.join(opt.dataroot, opt.phase + '_inst') 25 | self.I_paths = sorted(make_grouped_dataset(self.dir_inst)) 26 | check_path_valid(self.A_paths, self.I_paths) 27 | 28 | self.init_frame_idx(self.A_paths) 29 | 30 | def __getitem__(self, index): 31 | self.A, self.B, self.I, seq_idx = self.update_frame_idx(self.A_paths, index) 32 | tG = self.opt.n_frames_G 33 | 34 | A_img = Image.open(self.A_paths[seq_idx][0]).convert('RGB') 35 | params = get_img_params(self.opt, A_img.size) 36 | transform_scaleB = get_transform(self.opt, params) 37 | transform_scaleA = get_transform(self.opt, params, method=Image.NEAREST, normalize=False) if self.A_is_label else transform_scaleB 38 | frame_range = list(range(tG)) if self.A is None else [tG-1] 39 | 40 | for i in frame_range: 41 | A_path = self.A_paths[seq_idx][self.frame_idx + i] 42 | Ai = self.get_image(A_path, transform_scaleA, is_label=self.A_is_label) 43 | self.A = concat_frame(self.A, Ai, tG) 44 | 45 | if self.use_real: 46 | B_path = self.B_paths[seq_idx][self.frame_idx + i] 47 | Bi = self.get_image(B_path, transform_scaleB) 48 | self.B = concat_frame(self.B, Bi, tG) 49 | else: 50 | self.B = 0 51 | 52 | if self.opt.use_instance: 53 | I_path = self.I_paths[seq_idx][self.frame_idx + i] 54 | Ii = self.get_image(I_path, transform_scaleA) * 255.0 55 | self.I = concat_frame(self.I, Ii, tG) 56 | else: 57 | self.I = 0 58 | 59 | self.frame_idx += 1 60 | return_list = {'A': self.A, 'B': self.B, 'inst': self.I, 'A_path': A_path, 'change_seq': self.change_seq} 61 | return return_list 62 | 63 | def get_image(self, A_path, transform_scaleA, is_label=False): 64 | A_img = Image.open(A_path) 65 | A_scaled = transform_scaleA(A_img) 66 | if is_label: 67 | A_scaled *= 255.0 68 | return A_scaled 69 | 70 | def __len__(self): 71 | return sum(self.frames_count) 72 | 73 | def n_of_seqs(self): 74 | return len(self.A_paths) 75 | 76 | def name(self): 77 | return 'TestDataset' -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 2 | 3 | RUN apt-get update && apt-get install -y rsync htop git openssh-server 4 | 5 | RUN apt-get install python3-pip -y 6 | RUN ln -s /usr/bin/python3 /usr/bin/python 7 | RUN pip3 install --upgrade pip 8 | 9 | #Torch and dependencies: 10 | RUN pip install http://download.pytorch.org/whl/cu80/torch-0.4.0-cp35-cp35m-linux_x86_64.whl 11 | RUN pip install torchvision cffi tensorboardX 12 | RUN pip install tqdm scipy scikit-image colorama==0.3.7 13 | RUN pip install setproctitle pytz ipython 14 | 15 | #vid2vid dependencies 16 | RUN apt-get install libglib2.0-0 libsm6 libxrender1 -y 17 | RUN pip install dominate requests opencv-python 18 | 19 | #pix2pixHD, required for initializing training 20 | RUN git clone https://github.com/NVIDIA/pix2pixHD /pix2pixHD 21 | 22 | #vid2vid install 23 | RUN git clone https://github.com/NVIDIA/vid2vid /vid2vid 24 | WORKDIR /vid2vid 25 | #download flownet2 model dependencies 26 | #WARNING: we had an instance where these scripts needed to be re-run after the docker instance was launched 27 | RUN python scripts/download_flownet2.py 28 | RUN python scripts/download_models_flownet2.py 29 | 30 | 31 | -------------------------------------------------------------------------------- /docker/launch_docker.sh: -------------------------------------------------------------------------------- 1 | # Thanks @dustinfreeman for providing the script 2 | #!/bin/bash 3 | sudo nvidia-docker build -t vid2vid:CUDA9-py35 . 4 | 5 | sudo nvidia-docker run --rm -ti --ipc=host --shm-size 8G -v $(pwd):/vid2vid --workdir=/vid2vid vid2vid:CUDA9-py35 /bin/bash 6 | -------------------------------------------------------------------------------- /docker/pre_docker_install.sh: -------------------------------------------------------------------------------- 1 | #Thanks @dustinfreeman for providing the script 2 | 3 | #Install docker-ce https://docs.docker.com/install/linux/docker-ce/ubuntu/#set-up-the-repository 4 | sudo apt-get install -y \ 5 | apt-transport-https \ 6 | ca-certificates \ 7 | curl \ 8 | software-properties-common 9 | 10 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 11 | 12 | sudo add-apt-repository \ 13 | "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ 14 | $(lsb_release -cs) \ 15 | stable" 16 | sudo apt-get update 17 | sudo apt-get install -y docker-ce 18 | 19 | 20 | #Install nvidia-docker2 https://github.com/NVIDIA/nvidia-docker 21 | # Add the package repositories 22 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | \ 23 | sudo apt-key add - 24 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID) 25 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \ 26 | sudo tee /etc/apt/sources.list.d/nvidia-docker.list 27 | sudo apt-get update 28 | 29 | sudo apt-get install -y nvidia-docker2 30 | sudo pkill -SIGHUP dockerd 31 | 32 | 33 | #NVIDIA drivers 34 | #This triggers an interactive request to the user. 35 | #Would love an alternative! 36 | DEBIAN_FRONTEND=noninteractive 37 | sudo apt-get install -y keyboard-configuration 38 | sudo apt install -y ubuntu-drivers-common 39 | 40 | apt-get install -y nvidia-384 41 | 42 | #Reboot so the nvidia driver finishes install 43 | sudo reboot 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /imgs/city_change_labels.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/city_change_labels.gif -------------------------------------------------------------------------------- /imgs/city_change_styles.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/city_change_styles.gif -------------------------------------------------------------------------------- /imgs/face.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/face.gif -------------------------------------------------------------------------------- /imgs/face_multiple.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/face_multiple.gif -------------------------------------------------------------------------------- /imgs/framePredict.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/framePredict.gif -------------------------------------------------------------------------------- /imgs/pose.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/pose.gif -------------------------------------------------------------------------------- /imgs/teaser.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/teaser.gif -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/__init__.py -------------------------------------------------------------------------------- /models/base_model.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import numpy as np 3 | import torch 4 | from .networks import get_grid 5 | 6 | class BaseModel(torch.nn.Module): 7 | def name(self): 8 | return 'BaseModel' 9 | 10 | def initialize(self, opt): 11 | self.opt = opt 12 | self.gpu_ids = opt.gpu_ids 13 | self.isTrain = opt.isTrain 14 | self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor 15 | self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) 16 | 17 | def set_input(self, input): 18 | self.input = input 19 | 20 | def forward(self): 21 | pass 22 | 23 | # used in test time, no backprop 24 | def test(self): 25 | pass 26 | 27 | def get_image_paths(self): 28 | pass 29 | 30 | def optimize_parameters(self): 31 | pass 32 | 33 | def get_current_visuals(self): 34 | return self.input 35 | 36 | def get_current_errors(self): 37 | return {} 38 | 39 | def save(self, label): 40 | pass 41 | 42 | # helper saving function that can be used by subclasses 43 | def save_network(self, network, network_label, epoch_label, gpu_ids): 44 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label) 45 | save_path = os.path.join(self.save_dir, save_filename) 46 | torch.save(network.cpu().state_dict(), save_path) 47 | if len(gpu_ids) and torch.cuda.is_available(): 48 | network.cuda(gpu_ids[0]) 49 | 50 | def resolve_version(self): 51 | import torch._utils 52 | try: 53 | torch._utils._rebuild_tensor_v2 54 | except AttributeError: 55 | def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks): 56 | tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride) 57 | tensor.requires_grad = requires_grad 58 | tensor._backward_hooks = backward_hooks 59 | return tensor 60 | torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2 61 | 62 | # helper loading function that can be used by subclasses 63 | def load_network(self, network, network_label, epoch_label, save_dir=''): 64 | self.resolve_version() 65 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label) 66 | if not save_dir: 67 | save_dir = self.save_dir 68 | save_path = os.path.join(save_dir, save_filename) 69 | if not os.path.isfile(save_path): 70 | print('%s not exists yet!' % save_path) 71 | if 'G0' in network_label: 72 | raise('Generator must exist!') 73 | else: 74 | #network.load_state_dict(torch.load(save_path)) 75 | try: 76 | network.load_state_dict(torch.load(save_path)) 77 | except: 78 | pretrained_dict = torch.load(save_path) 79 | model_dict = network.state_dict() 80 | 81 | ### printout layers in pretrained model 82 | initialized = set() 83 | for k, v in pretrained_dict.items(): 84 | initialized.add(k.split('.')[0]) 85 | #print('pretrained model has following layers: ') 86 | #print(sorted(initialized)) 87 | 88 | try: 89 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 90 | network.load_state_dict(pretrained_dict) 91 | print('Pretrained network %s has excessive layers; Only loading layers that are used' % network_label) 92 | except: 93 | print('Pretrained network %s has fewer layers; The following are not initialized:' % network_label) 94 | if sys.version_info >= (3,0): 95 | not_initialized = set() 96 | else: 97 | from sets import Set 98 | not_initialized = Set() 99 | for k, v in pretrained_dict.items(): 100 | if v.size() == model_dict[k].size(): 101 | model_dict[k] = v 102 | 103 | for k, v in model_dict.items(): 104 | if k not in pretrained_dict or v.size() != pretrained_dict[k].size(): 105 | not_initialized.add(k.split('.')[0]) 106 | print(sorted(not_initialized)) 107 | network.load_state_dict(model_dict) 108 | 109 | def concat(self, tensors, dim=0): 110 | if tensors[0] is not None and tensors[1] is not None: 111 | if isinstance(tensors[0], list): 112 | tensors_cat = [] 113 | for i in range(len(tensors[0])): 114 | tensors_cat.append(self.concat([tensors[0][i], tensors[1][i]], dim=dim)) 115 | return tensors_cat 116 | return torch.cat([tensors[0], tensors[1]], dim=dim) 117 | elif tensors[0] is not None: 118 | return tensors[0] 119 | else: 120 | return tensors[1] 121 | 122 | def build_pyr(self, tensor, nearest=False): # build image pyramid from a single image 123 | if tensor is None: 124 | return [None] * self.n_scales 125 | tensor = [tensor] 126 | if nearest: 127 | downsample = torch.nn.AvgPool2d(1, stride=2) 128 | else: 129 | downsample = torch.nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False) 130 | for s in range(1, self.n_scales): 131 | b, t, c, h, w = tensor[-1].size() 132 | down = downsample(tensor[-1].view(-1, h, w)).view(b, t, c, h//2, w//2) 133 | tensor.append(down) 134 | return tensor 135 | 136 | def dists_min(self, a, b, num=1): 137 | dists = torch.sum(torch.sum((a-b)*(a-b), dim=0), dim=0) 138 | if num == 1: 139 | val, idx = torch.min(dists, dim=0) 140 | #idx = [idx] 141 | else: 142 | val, idx = torch.sort(dists, dim=0) 143 | idx = idx[:num] 144 | return idx.cpu().numpy().astype(int) 145 | 146 | def get_edges(self, t): 147 | edge = torch.cuda.ByteTensor(t.size()).zero_() 148 | edge[:,:,:,:,1:] = edge[:,:,:,:,1:] | (t[:,:,:,:,1:] != t[:,:,:,:,:-1]) 149 | edge[:,:,:,:,:-1] = edge[:,:,:,:,:-1] | (t[:,:,:,:,1:] != t[:,:,:,:,:-1]) 150 | edge[:,:,:,1:,:] = edge[:,:,:,1:,:] | (t[:,:,:,1:,:] != t[:,:,:,:-1,:]) 151 | edge[:,:,:,:-1,:] = edge[:,:,:,:-1,:] | (t[:,:,:,1:,:] != t[:,:,:,:-1,:]) 152 | return edge.float() 153 | 154 | def update_learning_rate(self, epoch, model): 155 | lr = self.opt.lr * (1 - (epoch - self.opt.niter) / self.opt.niter_decay) 156 | for param_group in getattr(self, 'optimizer_' + model).param_groups: 157 | param_group['lr'] = lr 158 | print('update learning rate: %f -> %f' % (self.old_lr, lr)) 159 | self.old_lr = lr 160 | 161 | def update_fixed_params(self): # finetune all scales instead of just finest scale 162 | params = [] 163 | for s in range(self.n_scales): 164 | params += list(getattr(self, 'netG'+str(s)).parameters()) 165 | self.optimizer_G = torch.optim.Adam(params, lr=self.old_lr, betas=(self.opt.beta1, 0.999)) 166 | self.finetune_all = True 167 | print('------------ Now finetuning all scales -----------') 168 | 169 | def update_training_batch(self, ratio): # increase number of backpropagated frames and number of frames in each GPU 170 | nfb = self.n_frames_bp 171 | nfl = self.n_frames_load 172 | if nfb < nfl: 173 | nfb = min(self.opt.max_frames_backpropagate, 2**ratio) 174 | self.n_frames_bp = nfl // int(np.ceil(float(nfl) / nfb)) 175 | print('-------- Updating number of backpropagated frames to %d ----------' % self.n_frames_bp) 176 | 177 | if self.n_frames_per_gpu < self.opt.max_frames_per_gpu: 178 | self.n_frames_per_gpu = min(self.n_frames_per_gpu*2, self.opt.max_frames_per_gpu) 179 | self.n_frames_load = self.n_gpus * self.n_frames_per_gpu 180 | print('-------- Updating number of frames per gpu to %d ----------' % self.n_frames_per_gpu) 181 | 182 | 183 | def grid_sample(self, input1, input2): 184 | if self.opt.fp16: # not sure if it's necessary 185 | return torch.nn.functional.grid_sample(input1.float(), input2.float(), mode='bilinear', padding_mode='border').half() 186 | else: 187 | return torch.nn.functional.grid_sample(input1, input2, mode='bilinear', padding_mode='border') 188 | 189 | def resample(self, image, flow): 190 | b, c, h, w = image.size() 191 | if not hasattr(self, 'grid') or self.grid.size() != flow.size(): 192 | self.grid = get_grid(b, h, w, gpu_id=flow.get_device(), dtype=flow.dtype) 193 | flow = torch.cat([flow[:, 0:1, :, :] / ((w - 1.0) / 2.0), flow[:, 1:2, :, :] / ((h - 1.0) / 2.0)], dim=1) 194 | final_grid = (self.grid + flow).permute(0, 2, 3, 1).cuda(image.get_device()) 195 | output = self.grid_sample(image, final_grid) 196 | return output -------------------------------------------------------------------------------- /models/flownet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import sys 4 | from .base_model import BaseModel 5 | 6 | class FlowNet(BaseModel): 7 | def name(self): 8 | return 'FlowNet' 9 | 10 | def initialize(self, opt): 11 | BaseModel.initialize(self, opt) 12 | 13 | # flownet 2 14 | from .flownet2_pytorch import models as flownet2_models 15 | from .flownet2_pytorch.utils import tools as flownet2_tools 16 | from .flownet2_pytorch.networks.resample2d_package.resample2d import Resample2d 17 | 18 | self.flowNet = flownet2_tools.module_to_dict(flownet2_models)['FlowNet2'](fp16=opt.fp16).cuda(self.gpu_ids[0]) 19 | checkpoint = torch.load('models/flownet2_pytorch/FlowNet2_checkpoint.pth.tar') 20 | self.flowNet.load_state_dict(checkpoint['state_dict']) 21 | self.flowNet.eval() 22 | self.resample = Resample2d() 23 | self.downsample = torch.nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False) 24 | 25 | def forward(self, input_A, input_B, dummy_bs=0): 26 | with torch.no_grad(): 27 | if input_A.get_device() == self.gpu_ids[0]: 28 | input_A, input_B = input_A[dummy_bs:], input_B[dummy_bs:] 29 | if input_A.size(0) == 0: 30 | b, n, c, h, w = input_A.size() 31 | return self.Tensor(1, n, 2, h, w), self.Tensor(1, n, 1, h, w) 32 | size = input_A.size() 33 | assert(len(size) == 4 or len(size) == 5) 34 | if len(size) == 5: 35 | b, n, c, h, w = size 36 | input_A = input_A.contiguous().view(-1, c, h, w) 37 | input_B = input_B.contiguous().view(-1, c, h, w) 38 | flow, conf = self.compute_flow_and_conf(input_A, input_B) 39 | return flow.view(b, n, 2, h, w), conf.view(b, n, 1, h, w) 40 | else: 41 | return self.compute_flow_and_conf(input_A, input_B) 42 | 43 | def compute_flow_and_conf(self, im1, im2): 44 | assert(im1.size()[1] == 3) 45 | assert(im1.size() == im2.size()) 46 | old_h, old_w = im1.size()[2], im1.size()[3] 47 | new_h, new_w = old_h//64*64, old_w//64*64 48 | if old_h != new_h: 49 | downsample = torch.nn.Upsample(size=(new_h, new_w), mode='bilinear') 50 | upsample = torch.nn.Upsample(size=(old_h, old_w), mode='bilinear') 51 | im1 = downsample(im1) 52 | im2 = downsample(im2) 53 | data1 = torch.cat([im1.unsqueeze(2), im2.unsqueeze(2)], dim=2) 54 | flow1 = self.flowNet(data1) 55 | conf = (self.norm(im1 - self.resample(im2, flow1)) < 0.02).float() 56 | if old_h != new_h: 57 | flow1 = upsample(flow1) * old_h / new_h 58 | conf = upsample(conf) 59 | return flow1.detach(), conf.detach() 60 | 61 | def norm(self, t): 62 | return torch.sum(t*t, dim=1, keepdim=True) 63 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 NVIDIA CORPORATION 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /models/flownet2_pytorch/README.md: -------------------------------------------------------------------------------- 1 | # flownet2-pytorch 2 | 3 | Pytorch implementation of [FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks](https://arxiv.org/abs/1612.01925). 4 | 5 | Multiple GPU training is supported, and the code provides examples for training or inference on [MPI-Sintel](http://sintel.is.tue.mpg.de/) clean and final datasets. The same commands can be used for training or inference with other datasets. See below for more detail. 6 | 7 | Inference using fp16 (half-precision) is also supported. 8 | 9 | For more help, type
10 | 11 | python main.py --help 12 | 13 | ## Network architectures 14 | Below are the different flownet neural network architectures that are provided.
15 | A batchnorm version for each network is also available. 16 | 17 | - **FlowNet2S** 18 | - **FlowNet2C** 19 | - **FlowNet2CS** 20 | - **FlowNet2CSS** 21 | - **FlowNet2SD** 22 | - **FlowNet2** 23 | 24 | ## Custom layers 25 | 26 | `FlowNet2` or `FlowNet2C*` achitectures rely on custom layers `Resample2d` or `Correlation`.
27 | A pytorch implementation of these layers with cuda kernels are available at [./networks](./networks).
28 | Note : Currently, half precision kernels are not available for these layers. 29 | 30 | ## Data Loaders 31 | 32 | Dataloaders for FlyingChairs, FlyingThings, ChairsSDHom and ImagesFromFolder are available in [datasets.py](./datasets.py).
33 | 34 | ## Loss Functions 35 | 36 | L1 and L2 losses with multi-scale support are available in [losses.py](./losses.py).
37 | 38 | ## Installation 39 | 40 | # get flownet2-pytorch source 41 | git clone https://github.com/NVIDIA/flownet2-pytorch.git 42 | cd flownet2-pytorch 43 | 44 | # install custom layers 45 | bash install.sh 46 | 47 | ### Python requirements 48 | Currently, the code supports python 3 49 | * numpy 50 | * PyTorch ( == 0.4.1, for <= 0.4.0 see branch [python36-PyTorch0.4](https://github.com/NVIDIA/flownet2-pytorch/tree/python36-PyTorch0.4)) 51 | * scipy 52 | * scikit-image 53 | * tensorboardX 54 | * colorama, tqdm, setproctitle 55 | 56 | ## Converted Caffe Pre-trained Models 57 | We've included caffe pre-trained models. Should you use these pre-trained weights, please adhere to the [license agreements](https://drive.google.com/file/d/1TVv0BnNFh3rpHZvD-easMb9jYrPE2Eqd/view?usp=sharing). 58 | 59 | * [FlowNet2](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view?usp=sharing)[620MB] 60 | * [FlowNet2-C](https://drive.google.com/file/d/1BFT6b7KgKJC8rA59RmOVAXRM_S7aSfKE/view?usp=sharing)[149MB] 61 | * [FlowNet2-CS](https://drive.google.com/file/d/1iBJ1_o7PloaINpa8m7u_7TsLCX0Dt_jS/view?usp=sharing)[297MB] 62 | * [FlowNet2-CSS](https://drive.google.com/file/d/157zuzVf4YMN6ABAQgZc8rRmR5cgWzSu8/view?usp=sharing)[445MB] 63 | * [FlowNet2-CSS-ft-sd](https://drive.google.com/file/d/1R5xafCIzJCXc8ia4TGfC65irmTNiMg6u/view?usp=sharing)[445MB] 64 | * [FlowNet2-S](https://drive.google.com/file/d/1V61dZjFomwlynwlYklJHC-TLfdFom3Lg/view?usp=sharing)[148MB] 65 | * [FlowNet2-SD](https://drive.google.com/file/d/1QW03eyYG_vD-dT-Mx4wopYvtPu_msTKn/view?usp=sharing)[173MB] 66 | 67 | ## Inference 68 | # Example on MPISintel Clean 69 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset MpiSintelClean \ 70 | --inference_dataset_root /path/to/mpi-sintel/clean/dataset \ 71 | --resume /path/to/checkpoints 72 | 73 | ## Training and validation 74 | 75 | # Example on MPISintel Final and Clean, with L1Loss on FlowNet2 model 76 | python main.py --batch_size 8 --model FlowNet2 --loss=L1Loss --optimizer=Adam --optimizer_lr=1e-4 \ 77 | --training_dataset MpiSintelFinal --training_dataset_root /path/to/mpi-sintel/final/dataset \ 78 | --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset 79 | 80 | # Example on MPISintel Final and Clean, with MultiScale loss on FlowNet2C model 81 | python main.py --batch_size 8 --model FlowNet2C --optimizer=Adam --optimizer_lr=1e-4 --loss=MultiScale --loss_norm=L1 \ 82 | --loss_numScales=5 --loss_startScale=4 --optimizer_lr=1e-4 --crop_size 384 512 \ 83 | --training_dataset FlyingChairs --training_dataset_root /path/to/flying-chairs/dataset \ 84 | --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset 85 | 86 | ## Results on MPI-Sintel 87 | [![Predicted flows on MPI-Sintel](./image.png)](https://www.youtube.com/watch?v=HtBmabY8aeU "Predicted flows on MPI-Sintel") 88 | 89 | ## Reference 90 | If you find this implementation useful in your work, please acknowledge it appropriately and cite the paper: 91 | ```` 92 | @InProceedings{IMKDB17, 93 | author = "E. Ilg and N. Mayer and T. Saikia and M. Keuper and A. Dosovitskiy and T. Brox", 94 | title = "FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks", 95 | booktitle = "IEEE Conference on Computer Vision and Pattern Recognition (CVPR)", 96 | month = "Jul", 97 | year = "2017", 98 | url = "http://lmb.informatik.uni-freiburg.de//Publications/2017/IMKDB17" 99 | } 100 | ```` 101 | ``` 102 | @misc{flownet2-pytorch, 103 | author = {Fitsum Reda and Robert Pottorff and Jon Barker and Bryan Catanzaro}, 104 | title = {flownet2-pytorch: Pytorch implementation of FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks}, 105 | year = {2017}, 106 | publisher = {GitHub}, 107 | journal = {GitHub repository}, 108 | howpublished = {\url{https://github.com/NVIDIA/flownet2-pytorch}} 109 | } 110 | ``` 111 | ## Related Optical Flow Work from Nvidia 112 | Code (in Caffe and Pytorch): [PWC-Net](https://github.com/NVlabs/PWC-Net)
113 | Paper : [PWC-Net: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume](https://arxiv.org/abs/1709.02371). 114 | 115 | ## Acknowledgments 116 | Parts of this code were derived, as noted in the code, from [ClementPinard/FlowNetPytorch](https://github.com/ClementPinard/FlowNetPytorch). 117 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/__init__.py -------------------------------------------------------------------------------- /models/flownet2_pytorch/convert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | 3 | import caffe 4 | from caffe.proto import caffe_pb2 5 | import sys, os 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | import argparse, tempfile 11 | import numpy as np 12 | 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('caffe_model', help='input model in hdf5 or caffemodel format') 15 | parser.add_argument('prototxt_template',help='prototxt template') 16 | parser.add_argument('flownet2_pytorch', help='path to flownet2-pytorch') 17 | 18 | args = parser.parse_args() 19 | 20 | args.rgb_max = 255 21 | args.fp16 = False 22 | args.grads = {} 23 | 24 | # load models 25 | sys.path.append(args.flownet2_pytorch) 26 | 27 | import models 28 | from utils.param_utils import * 29 | 30 | width = 256 31 | height = 256 32 | keys = {'TARGET_WIDTH': width, 33 | 'TARGET_HEIGHT': height, 34 | 'ADAPTED_WIDTH':width, 35 | 'ADAPTED_HEIGHT':height, 36 | 'SCALE_WIDTH':1., 37 | 'SCALE_HEIGHT':1.,} 38 | 39 | template = '\n'.join(np.loadtxt(args.prototxt_template, dtype=str, delimiter='\n')) 40 | for k in keys: 41 | template = template.replace('$%s$'%(k),str(keys[k])) 42 | 43 | prototxt = tempfile.NamedTemporaryFile(mode='w', delete=True) 44 | prototxt.write(template) 45 | prototxt.flush() 46 | 47 | net = caffe.Net(prototxt.name, args.caffe_model, caffe.TEST) 48 | 49 | weights = {} 50 | biases = {} 51 | 52 | for k, v in list(net.params.items()): 53 | weights[k] = np.array(v[0].data).reshape(v[0].data.shape) 54 | biases[k] = np.array(v[1].data).reshape(v[1].data.shape) 55 | print((k, weights[k].shape, biases[k].shape)) 56 | 57 | if 'FlowNet2/' in args.caffe_model: 58 | model = models.FlowNet2(args) 59 | 60 | parse_flownetc(model.flownetc.modules(), weights, biases) 61 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_') 62 | parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_') 63 | parse_flownetsd(model.flownets_d.modules(), weights, biases, param_prefix='netsd_') 64 | parse_flownetfusion(model.flownetfusion.modules(), weights, biases, param_prefix='fuse_') 65 | 66 | state = {'epoch': 0, 67 | 'state_dict': model.state_dict(), 68 | 'best_EPE': 1e10} 69 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2_checkpoint.pth.tar')) 70 | 71 | elif 'FlowNet2-C/' in args.caffe_model: 72 | model = models.FlowNet2C(args) 73 | 74 | parse_flownetc(model.modules(), weights, biases) 75 | state = {'epoch': 0, 76 | 'state_dict': model.state_dict(), 77 | 'best_EPE': 1e10} 78 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-C_checkpoint.pth.tar')) 79 | 80 | elif 'FlowNet2-CS/' in args.caffe_model: 81 | model = models.FlowNet2CS(args) 82 | 83 | parse_flownetc(model.flownetc.modules(), weights, biases) 84 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_') 85 | 86 | state = {'epoch': 0, 87 | 'state_dict': model.state_dict(), 88 | 'best_EPE': 1e10} 89 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CS_checkpoint.pth.tar')) 90 | 91 | elif 'FlowNet2-CSS/' in args.caffe_model: 92 | model = models.FlowNet2CSS(args) 93 | 94 | parse_flownetc(model.flownetc.modules(), weights, biases) 95 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_') 96 | parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_') 97 | 98 | state = {'epoch': 0, 99 | 'state_dict': model.state_dict(), 100 | 'best_EPE': 1e10} 101 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS_checkpoint.pth.tar')) 102 | 103 | elif 'FlowNet2-CSS-ft-sd/' in args.caffe_model: 104 | model = models.FlowNet2CSS(args) 105 | 106 | parse_flownetc(model.flownetc.modules(), weights, biases) 107 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_') 108 | parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_') 109 | 110 | state = {'epoch': 0, 111 | 'state_dict': model.state_dict(), 112 | 'best_EPE': 1e10} 113 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS-ft-sd_checkpoint.pth.tar')) 114 | 115 | elif 'FlowNet2-S/' in args.caffe_model: 116 | model = models.FlowNet2S(args) 117 | 118 | parse_flownetsonly(model.modules(), weights, biases, param_prefix='') 119 | state = {'epoch': 0, 120 | 'state_dict': model.state_dict(), 121 | 'best_EPE': 1e10} 122 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-S_checkpoint.pth.tar')) 123 | 124 | elif 'FlowNet2-SD/' in args.caffe_model: 125 | model = models.FlowNet2SD(args) 126 | 127 | parse_flownetsd(model.modules(), weights, biases, param_prefix='') 128 | 129 | state = {'epoch': 0, 130 | 'state_dict': model.state_dict(), 131 | 'best_EPE': 1e10} 132 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-SD_checkpoint.pth.tar')) 133 | 134 | else: 135 | print(('model type cound not be determined from input caffe model %s'%(args.caffe_model))) 136 | quit() 137 | print(("done converting ", args.caffe_model)) -------------------------------------------------------------------------------- /models/flownet2_pytorch/download_caffe_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sudo rm -rf flownet2-docker 3 | sudo git clone https://github.com/lmb-freiburg/flownet2-docker 4 | cd flownet2-docker 5 | 6 | sudo sed -i '$ a RUN apt-get update && apt-get install -y python-pip \ 7 | RUN pip install --upgrade pip \ 8 | RUN pip install numpy -I \ 9 | RUN pip install http://download.pytorch.org/whl/cu80/torch-0.2.0.post3-cp27-cp27mu-manylinux1_x86_64.whl \ 10 | RUN pip install cffi ipython' Dockerfile 11 | 12 | sudo make 13 | 14 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd ./networks/correlation_package 3 | python setup.py install --user 4 | cd ../resample2d_package 5 | python setup.py install --user 6 | cd ../channelnorm_package 7 | python setup.py install --user 8 | cd .. 9 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/launch_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sudo nvidia-docker build -t $USER/pytorch:CUDA8-py27 . 3 | sudo nvidia-docker run --rm -ti --volume=$(pwd):/flownet2-pytorch:rw --workdir=/flownet2-pytorch --ipc=host $USER/pytorch:CUDA8-py27 /bin/bash 4 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/losses.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Portions of this code copyright 2017, Clement Pinard 3 | ''' 4 | 5 | # freda (todo) : adversarial loss 6 | 7 | import torch 8 | import torch.nn as nn 9 | import math 10 | 11 | def EPE(input_flow, target_flow): 12 | return torch.norm(target_flow-input_flow,p=2,dim=1).mean() 13 | 14 | class L1(nn.Module): 15 | def __init__(self): 16 | super(L1, self).__init__() 17 | def forward(self, output, target): 18 | lossvalue = torch.abs(output - target).mean() 19 | return lossvalue 20 | 21 | class L2(nn.Module): 22 | def __init__(self): 23 | super(L2, self).__init__() 24 | def forward(self, output, target): 25 | lossvalue = torch.norm(output-target,p=2,dim=1).mean() 26 | return lossvalue 27 | 28 | class L1Loss(nn.Module): 29 | def __init__(self, args): 30 | super(L1Loss, self).__init__() 31 | self.args = args 32 | self.loss = L1() 33 | self.loss_labels = ['L1', 'EPE'] 34 | 35 | def forward(self, output, target): 36 | lossvalue = self.loss(output, target) 37 | epevalue = EPE(output, target) 38 | return [lossvalue, epevalue] 39 | 40 | class L2Loss(nn.Module): 41 | def __init__(self, args): 42 | super(L2Loss, self).__init__() 43 | self.args = args 44 | self.loss = L2() 45 | self.loss_labels = ['L2', 'EPE'] 46 | 47 | def forward(self, output, target): 48 | lossvalue = self.loss(output, target) 49 | epevalue = EPE(output, target) 50 | return [lossvalue, epevalue] 51 | 52 | class MultiScale(nn.Module): 53 | def __init__(self, args, startScale = 4, numScales = 5, l_weight= 0.32, norm= 'L1'): 54 | super(MultiScale,self).__init__() 55 | 56 | self.startScale = startScale 57 | self.numScales = numScales 58 | self.loss_weights = torch.FloatTensor([(l_weight / 2 ** scale) for scale in range(self.numScales)]) 59 | self.args = args 60 | self.l_type = norm 61 | self.div_flow = 0.05 62 | assert(len(self.loss_weights) == self.numScales) 63 | 64 | if self.l_type == 'L1': 65 | self.loss = L1() 66 | else: 67 | self.loss = L2() 68 | 69 | self.multiScales = [nn.AvgPool2d(self.startScale * (2**scale), self.startScale * (2**scale)) for scale in range(self.numScales)] 70 | self.loss_labels = ['MultiScale-'+self.l_type, 'EPE'], 71 | 72 | def forward(self, output, target): 73 | lossvalue = 0 74 | epevalue = 0 75 | 76 | if type(output) is tuple: 77 | target = self.div_flow * target 78 | for i, output_ in enumerate(output): 79 | target_ = self.multiScales[i](target) 80 | epevalue += self.loss_weights[i]*EPE(output_, target_) 81 | lossvalue += self.loss_weights[i]*self.loss(output_, target_) 82 | return [lossvalue, epevalue] 83 | else: 84 | epevalue += EPE(output, target) 85 | lossvalue += self.loss(output, target) 86 | return [lossvalue, epevalue] 87 | 88 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/FlowNetC.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import init 4 | 5 | import math 6 | import numpy as np 7 | 8 | from .correlation_package.correlation import Correlation 9 | 10 | from .submodules import * 11 | 'Parameter count , 39,175,298 ' 12 | 13 | class FlowNetC(nn.Module): 14 | def __init__(self, args, batchNorm=True, div_flow = 20): 15 | super(FlowNetC,self).__init__() 16 | self.fp16 = args.fp16 17 | self.batchNorm = batchNorm 18 | self.div_flow = div_flow 19 | 20 | self.conv1 = conv(self.batchNorm, 3, 64, kernel_size=7, stride=2) 21 | self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2) 22 | self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2) 23 | self.conv_redir = conv(self.batchNorm, 256, 32, kernel_size=1, stride=1) 24 | 25 | """if args.fp16: 26 | self.corr = nn.Sequential( 27 | tofp32(), 28 | Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1), 29 | tofp16()) 30 | else:""" 31 | self.corr = Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1) 32 | 33 | self.corr_activation = nn.LeakyReLU(0.1,inplace=True) 34 | self.conv3_1 = conv(self.batchNorm, 473, 256) 35 | self.conv4 = conv(self.batchNorm, 256, 512, stride=2) 36 | self.conv4_1 = conv(self.batchNorm, 512, 512) 37 | self.conv5 = conv(self.batchNorm, 512, 512, stride=2) 38 | self.conv5_1 = conv(self.batchNorm, 512, 512) 39 | self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) 40 | self.conv6_1 = conv(self.batchNorm,1024, 1024) 41 | 42 | self.deconv5 = deconv(1024,512) 43 | self.deconv4 = deconv(1026,256) 44 | self.deconv3 = deconv(770,128) 45 | self.deconv2 = deconv(386,64) 46 | 47 | self.predict_flow6 = predict_flow(1024) 48 | self.predict_flow5 = predict_flow(1026) 49 | self.predict_flow4 = predict_flow(770) 50 | self.predict_flow3 = predict_flow(386) 51 | self.predict_flow2 = predict_flow(194) 52 | 53 | self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) 54 | self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) 55 | self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) 56 | self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) 57 | 58 | for m in self.modules(): 59 | if isinstance(m, nn.Conv2d): 60 | if m.bias is not None: 61 | init.uniform_(m.bias) 62 | init.xavier_uniform_(m.weight) 63 | 64 | if isinstance(m, nn.ConvTranspose2d): 65 | if m.bias is not None: 66 | init.uniform_(m.bias) 67 | init.xavier_uniform_(m.weight) 68 | # init_deconv_bilinear(m.weight) 69 | self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') 70 | 71 | def forward(self, x): 72 | x1 = x[:,0:3,:,:] 73 | x2 = x[:,3::,:,:] 74 | 75 | out_conv1a = self.conv1(x1) 76 | out_conv2a = self.conv2(out_conv1a) 77 | out_conv3a = self.conv3(out_conv2a) 78 | 79 | # FlownetC bottom input stream 80 | out_conv1b = self.conv1(x2) 81 | 82 | out_conv2b = self.conv2(out_conv1b) 83 | out_conv3b = self.conv3(out_conv2b) 84 | 85 | # Merge streams 86 | if self.fp16: 87 | out_corr = self.corr(out_conv3a.float(), out_conv3b.float()).half() # False 88 | else: 89 | out_corr = self.corr(out_conv3a, out_conv3b) # False 90 | out_corr = self.corr_activation(out_corr) 91 | 92 | # Redirect top input stream and concatenate 93 | out_conv_redir = self.conv_redir(out_conv3a) 94 | 95 | in_conv3_1 = torch.cat((out_conv_redir, out_corr), 1) 96 | 97 | # Merged conv layers 98 | out_conv3_1 = self.conv3_1(in_conv3_1) 99 | 100 | out_conv4 = self.conv4_1(self.conv4(out_conv3_1)) 101 | 102 | out_conv5 = self.conv5_1(self.conv5(out_conv4)) 103 | out_conv6 = self.conv6_1(self.conv6(out_conv5)) 104 | 105 | flow6 = self.predict_flow6(out_conv6) 106 | flow6_up = self.upsampled_flow6_to_5(flow6) 107 | out_deconv5 = self.deconv5(out_conv6) 108 | 109 | concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1) 110 | 111 | flow5 = self.predict_flow5(concat5) 112 | flow5_up = self.upsampled_flow5_to_4(flow5) 113 | out_deconv4 = self.deconv4(concat5) 114 | concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1) 115 | 116 | flow4 = self.predict_flow4(concat4) 117 | flow4_up = self.upsampled_flow4_to_3(flow4) 118 | out_deconv3 = self.deconv3(concat4) 119 | concat3 = torch.cat((out_conv3_1,out_deconv3,flow4_up),1) 120 | 121 | flow3 = self.predict_flow3(concat3) 122 | flow3_up = self.upsampled_flow3_to_2(flow3) 123 | out_deconv2 = self.deconv2(concat3) 124 | concat2 = torch.cat((out_conv2a,out_deconv2,flow3_up),1) 125 | 126 | flow2 = self.predict_flow2(concat2) 127 | 128 | if self.training: 129 | return flow2,flow3,flow4,flow5,flow6 130 | else: 131 | return flow2, 132 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/FlowNetFusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import init 4 | 5 | import math 6 | import numpy as np 7 | 8 | from .submodules import * 9 | 'Parameter count = 581,226' 10 | 11 | class FlowNetFusion(nn.Module): 12 | def __init__(self,args, batchNorm=True): 13 | super(FlowNetFusion,self).__init__() 14 | 15 | self.batchNorm = batchNorm 16 | self.conv0 = conv(self.batchNorm, 11, 64) 17 | self.conv1 = conv(self.batchNorm, 64, 64, stride=2) 18 | self.conv1_1 = conv(self.batchNorm, 64, 128) 19 | self.conv2 = conv(self.batchNorm, 128, 128, stride=2) 20 | self.conv2_1 = conv(self.batchNorm, 128, 128) 21 | 22 | self.deconv1 = deconv(128,32) 23 | self.deconv0 = deconv(162,16) 24 | 25 | self.inter_conv1 = i_conv(self.batchNorm, 162, 32) 26 | self.inter_conv0 = i_conv(self.batchNorm, 82, 16) 27 | 28 | self.predict_flow2 = predict_flow(128) 29 | self.predict_flow1 = predict_flow(32) 30 | self.predict_flow0 = predict_flow(16) 31 | 32 | self.upsampled_flow2_to_1 = nn.ConvTranspose2d(2, 2, 4, 2, 1) 33 | self.upsampled_flow1_to_0 = nn.ConvTranspose2d(2, 2, 4, 2, 1) 34 | 35 | for m in self.modules(): 36 | if isinstance(m, nn.Conv2d): 37 | if m.bias is not None: 38 | init.uniform_(m.bias) 39 | init.xavier_uniform_(m.weight) 40 | 41 | if isinstance(m, nn.ConvTranspose2d): 42 | if m.bias is not None: 43 | init.uniform_(m.bias) 44 | init.xavier_uniform_(m.weight) 45 | # init_deconv_bilinear(m.weight) 46 | 47 | def forward(self, x): 48 | out_conv0 = self.conv0(x) 49 | out_conv1 = self.conv1_1(self.conv1(out_conv0)) 50 | out_conv2 = self.conv2_1(self.conv2(out_conv1)) 51 | 52 | flow2 = self.predict_flow2(out_conv2) 53 | flow2_up = self.upsampled_flow2_to_1(flow2) 54 | out_deconv1 = self.deconv1(out_conv2) 55 | 56 | concat1 = torch.cat((out_conv1,out_deconv1,flow2_up),1) 57 | out_interconv1 = self.inter_conv1(concat1) 58 | flow1 = self.predict_flow1(out_interconv1) 59 | flow1_up = self.upsampled_flow1_to_0(flow1) 60 | out_deconv0 = self.deconv0(concat1) 61 | 62 | concat0 = torch.cat((out_conv0,out_deconv0,flow1_up),1) 63 | out_interconv0 = self.inter_conv0(concat0) 64 | flow0 = self.predict_flow0(out_interconv0) 65 | 66 | return flow0 67 | 68 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/FlowNetS.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Portions of this code copyright 2017, Clement Pinard 3 | ''' 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torch.nn import init 8 | 9 | import math 10 | import numpy as np 11 | 12 | from .submodules import * 13 | 'Parameter count : 38,676,504 ' 14 | 15 | class FlowNetS(nn.Module): 16 | def __init__(self, args, input_channels = 12, batchNorm=True): 17 | super(FlowNetS,self).__init__() 18 | 19 | self.batchNorm = batchNorm 20 | self.conv1 = conv(self.batchNorm, input_channels, 64, kernel_size=7, stride=2) 21 | self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2) 22 | self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2) 23 | self.conv3_1 = conv(self.batchNorm, 256, 256) 24 | self.conv4 = conv(self.batchNorm, 256, 512, stride=2) 25 | self.conv4_1 = conv(self.batchNorm, 512, 512) 26 | self.conv5 = conv(self.batchNorm, 512, 512, stride=2) 27 | self.conv5_1 = conv(self.batchNorm, 512, 512) 28 | self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) 29 | self.conv6_1 = conv(self.batchNorm,1024, 1024) 30 | 31 | self.deconv5 = deconv(1024,512) 32 | self.deconv4 = deconv(1026,256) 33 | self.deconv3 = deconv(770,128) 34 | self.deconv2 = deconv(386,64) 35 | 36 | self.predict_flow6 = predict_flow(1024) 37 | self.predict_flow5 = predict_flow(1026) 38 | self.predict_flow4 = predict_flow(770) 39 | self.predict_flow3 = predict_flow(386) 40 | self.predict_flow2 = predict_flow(194) 41 | 42 | self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) 43 | self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) 44 | self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) 45 | self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) 46 | 47 | for m in self.modules(): 48 | if isinstance(m, nn.Conv2d): 49 | if m.bias is not None: 50 | init.uniform_(m.bias) 51 | init.xavier_uniform_(m.weight) 52 | 53 | if isinstance(m, nn.ConvTranspose2d): 54 | if m.bias is not None: 55 | init.uniform_(m.bias) 56 | init.xavier_uniform_(m.weight) 57 | # init_deconv_bilinear(m.weight) 58 | self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') 59 | 60 | def forward(self, x): 61 | out_conv1 = self.conv1(x) 62 | 63 | out_conv2 = self.conv2(out_conv1) 64 | out_conv3 = self.conv3_1(self.conv3(out_conv2)) 65 | out_conv4 = self.conv4_1(self.conv4(out_conv3)) 66 | out_conv5 = self.conv5_1(self.conv5(out_conv4)) 67 | out_conv6 = self.conv6_1(self.conv6(out_conv5)) 68 | 69 | flow6 = self.predict_flow6(out_conv6) 70 | flow6_up = self.upsampled_flow6_to_5(flow6) 71 | out_deconv5 = self.deconv5(out_conv6) 72 | 73 | concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1) 74 | flow5 = self.predict_flow5(concat5) 75 | flow5_up = self.upsampled_flow5_to_4(flow5) 76 | out_deconv4 = self.deconv4(concat5) 77 | 78 | concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1) 79 | flow4 = self.predict_flow4(concat4) 80 | flow4_up = self.upsampled_flow4_to_3(flow4) 81 | out_deconv3 = self.deconv3(concat4) 82 | 83 | concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1) 84 | flow3 = self.predict_flow3(concat3) 85 | flow3_up = self.upsampled_flow3_to_2(flow3) 86 | out_deconv2 = self.deconv2(concat3) 87 | 88 | concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1) 89 | flow2 = self.predict_flow2(concat2) 90 | 91 | if self.training: 92 | return flow2,flow3,flow4,flow5,flow6 93 | else: 94 | return flow2, 95 | 96 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/FlowNetSD.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import init 4 | 5 | import math 6 | import numpy as np 7 | 8 | from .submodules import * 9 | 'Parameter count = 45,371,666' 10 | 11 | class FlowNetSD(nn.Module): 12 | def __init__(self, args, batchNorm=True): 13 | super(FlowNetSD,self).__init__() 14 | 15 | self.batchNorm = batchNorm 16 | self.conv0 = conv(self.batchNorm, 6, 64) 17 | self.conv1 = conv(self.batchNorm, 64, 64, stride=2) 18 | self.conv1_1 = conv(self.batchNorm, 64, 128) 19 | self.conv2 = conv(self.batchNorm, 128, 128, stride=2) 20 | self.conv2_1 = conv(self.batchNorm, 128, 128) 21 | self.conv3 = conv(self.batchNorm, 128, 256, stride=2) 22 | self.conv3_1 = conv(self.batchNorm, 256, 256) 23 | self.conv4 = conv(self.batchNorm, 256, 512, stride=2) 24 | self.conv4_1 = conv(self.batchNorm, 512, 512) 25 | self.conv5 = conv(self.batchNorm, 512, 512, stride=2) 26 | self.conv5_1 = conv(self.batchNorm, 512, 512) 27 | self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) 28 | self.conv6_1 = conv(self.batchNorm,1024, 1024) 29 | 30 | self.deconv5 = deconv(1024,512) 31 | self.deconv4 = deconv(1026,256) 32 | self.deconv3 = deconv(770,128) 33 | self.deconv2 = deconv(386,64) 34 | 35 | self.inter_conv5 = i_conv(self.batchNorm, 1026, 512) 36 | self.inter_conv4 = i_conv(self.batchNorm, 770, 256) 37 | self.inter_conv3 = i_conv(self.batchNorm, 386, 128) 38 | self.inter_conv2 = i_conv(self.batchNorm, 194, 64) 39 | 40 | self.predict_flow6 = predict_flow(1024) 41 | self.predict_flow5 = predict_flow(512) 42 | self.predict_flow4 = predict_flow(256) 43 | self.predict_flow3 = predict_flow(128) 44 | self.predict_flow2 = predict_flow(64) 45 | 46 | self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1) 47 | self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1) 48 | self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1) 49 | self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1) 50 | 51 | for m in self.modules(): 52 | if isinstance(m, nn.Conv2d): 53 | if m.bias is not None: 54 | init.uniform_(m.bias) 55 | init.xavier_uniform_(m.weight) 56 | 57 | if isinstance(m, nn.ConvTranspose2d): 58 | if m.bias is not None: 59 | init.uniform_(m.bias) 60 | init.xavier_uniform_(m.weight) 61 | # init_deconv_bilinear(m.weight) 62 | self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') 63 | 64 | 65 | 66 | def forward(self, x): 67 | out_conv0 = self.conv0(x) 68 | out_conv1 = self.conv1_1(self.conv1(out_conv0)) 69 | out_conv2 = self.conv2_1(self.conv2(out_conv1)) 70 | 71 | out_conv3 = self.conv3_1(self.conv3(out_conv2)) 72 | out_conv4 = self.conv4_1(self.conv4(out_conv3)) 73 | out_conv5 = self.conv5_1(self.conv5(out_conv4)) 74 | out_conv6 = self.conv6_1(self.conv6(out_conv5)) 75 | 76 | flow6 = self.predict_flow6(out_conv6) 77 | flow6_up = self.upsampled_flow6_to_5(flow6) 78 | out_deconv5 = self.deconv5(out_conv6) 79 | 80 | concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1) 81 | out_interconv5 = self.inter_conv5(concat5) 82 | flow5 = self.predict_flow5(out_interconv5) 83 | 84 | flow5_up = self.upsampled_flow5_to_4(flow5) 85 | out_deconv4 = self.deconv4(concat5) 86 | 87 | concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1) 88 | out_interconv4 = self.inter_conv4(concat4) 89 | flow4 = self.predict_flow4(out_interconv4) 90 | flow4_up = self.upsampled_flow4_to_3(flow4) 91 | out_deconv3 = self.deconv3(concat4) 92 | 93 | concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1) 94 | out_interconv3 = self.inter_conv3(concat3) 95 | flow3 = self.predict_flow3(out_interconv3) 96 | flow3_up = self.upsampled_flow3_to_2(flow3) 97 | out_deconv2 = self.deconv2(concat3) 98 | 99 | concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1) 100 | out_interconv2 = self.inter_conv2(concat2) 101 | flow2 = self.predict_flow2(out_interconv2) 102 | 103 | if self.training: 104 | return flow2,flow3,flow4,flow5,flow6 105 | else: 106 | return flow2, 107 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/networks/__init__.py -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/channelnorm_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/networks/channelnorm_package/__init__.py -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/channelnorm_package/channelnorm.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function, Variable 2 | from torch.nn.modules.module import Module 3 | import channelnorm_cuda 4 | 5 | class ChannelNormFunction(Function): 6 | 7 | @staticmethod 8 | def forward(ctx, input1, norm_deg=2): 9 | assert input1.is_contiguous() 10 | b, _, h, w = input1.size() 11 | output = input1.new(b, 1, h, w).zero_() 12 | 13 | channelnorm_cuda.forward(input1, output, norm_deg) 14 | ctx.save_for_backward(input1, output) 15 | ctx.norm_deg = norm_deg 16 | 17 | return output 18 | 19 | @staticmethod 20 | def backward(ctx, grad_output): 21 | input1, output = ctx.saved_tensors 22 | 23 | grad_input1 = Variable(input1.new(input1.size()).zero_()) 24 | 25 | channelnorm.backward(input1, output, grad_output.data, 26 | grad_input1.data, ctx.norm_deg) 27 | 28 | return grad_input1, None 29 | 30 | 31 | class ChannelNorm(Module): 32 | 33 | def __init__(self, norm_deg=2): 34 | super(ChannelNorm, self).__init__() 35 | self.norm_deg = norm_deg 36 | 37 | def forward(self, input1): 38 | return ChannelNormFunction.apply(input1, self.norm_deg) 39 | 40 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/channelnorm_package/channelnorm_cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "channelnorm_kernel.cuh" 5 | 6 | int channelnorm_cuda_forward( 7 | at::Tensor& input1, 8 | at::Tensor& output, 9 | int norm_deg) { 10 | 11 | channelnorm_kernel_forward(input1, output, norm_deg); 12 | return 1; 13 | } 14 | 15 | 16 | int channelnorm_cuda_backward( 17 | at::Tensor& input1, 18 | at::Tensor& output, 19 | at::Tensor& gradOutput, 20 | at::Tensor& gradInput1, 21 | int norm_deg) { 22 | 23 | channelnorm_kernel_backward(input1, output, gradOutput, gradInput1, norm_deg); 24 | return 1; 25 | } 26 | 27 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 28 | m.def("forward", &channelnorm_cuda_forward, "Channel norm forward (CUDA)"); 29 | m.def("backward", &channelnorm_cuda_backward, "Channel norm backward (CUDA)"); 30 | } 31 | 32 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/channelnorm_package/channelnorm_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "channelnorm_kernel.cuh" 6 | 7 | #define CUDA_NUM_THREADS 512 8 | 9 | #define DIM0(TENSOR) ((TENSOR).x) 10 | #define DIM1(TENSOR) ((TENSOR).y) 11 | #define DIM2(TENSOR) ((TENSOR).z) 12 | #define DIM3(TENSOR) ((TENSOR).w) 13 | 14 | #define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))]) 15 | 16 | using at::Half; 17 | 18 | template 19 | __global__ void kernel_channelnorm_update_output( 20 | const int n, 21 | const scalar_t* __restrict__ input1, 22 | const long4 input1_size, 23 | const long4 input1_stride, 24 | scalar_t* __restrict__ output, 25 | const long4 output_size, 26 | const long4 output_stride, 27 | int norm_deg) { 28 | 29 | int index = blockIdx.x * blockDim.x + threadIdx.x; 30 | 31 | if (index >= n) { 32 | return; 33 | } 34 | 35 | int dim_b = DIM0(output_size); 36 | int dim_c = DIM1(output_size); 37 | int dim_h = DIM2(output_size); 38 | int dim_w = DIM3(output_size); 39 | int dim_chw = dim_c * dim_h * dim_w; 40 | 41 | int b = ( index / dim_chw ) % dim_b; 42 | int y = ( index / dim_w ) % dim_h; 43 | int x = ( index ) % dim_w; 44 | 45 | int i1dim_c = DIM1(input1_size); 46 | int i1dim_h = DIM2(input1_size); 47 | int i1dim_w = DIM3(input1_size); 48 | int i1dim_chw = i1dim_c * i1dim_h * i1dim_w; 49 | int i1dim_hw = i1dim_h * i1dim_w; 50 | 51 | float result = 0.0; 52 | 53 | for (int c = 0; c < i1dim_c; ++c) { 54 | int i1Index = b * i1dim_chw + c * i1dim_hw + y * i1dim_w + x; 55 | scalar_t val = input1[i1Index]; 56 | result += static_cast(val * val); 57 | } 58 | result = sqrt(result); 59 | output[index] = static_cast(result); 60 | } 61 | 62 | 63 | template 64 | __global__ void kernel_channelnorm_backward_input1( 65 | const int n, 66 | const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride, 67 | const scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride, 68 | const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride, 69 | scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, 70 | int norm_deg) { 71 | 72 | int index = blockIdx.x * blockDim.x + threadIdx.x; 73 | 74 | if (index >= n) { 75 | return; 76 | } 77 | 78 | float val = 0.0; 79 | 80 | int dim_b = DIM0(gradInput_size); 81 | int dim_c = DIM1(gradInput_size); 82 | int dim_h = DIM2(gradInput_size); 83 | int dim_w = DIM3(gradInput_size); 84 | int dim_chw = dim_c * dim_h * dim_w; 85 | int dim_hw = dim_h * dim_w; 86 | 87 | int b = ( index / dim_chw ) % dim_b; 88 | int y = ( index / dim_w ) % dim_h; 89 | int x = ( index ) % dim_w; 90 | 91 | 92 | int outIndex = b * dim_hw + y * dim_w + x; 93 | val = static_cast(gradOutput[outIndex]) * static_cast(input1[index]) / (static_cast(output[outIndex])+1e-9); 94 | gradInput[index] = static_cast(val); 95 | 96 | } 97 | 98 | void channelnorm_kernel_forward( 99 | at::Tensor& input1, 100 | at::Tensor& output, 101 | int norm_deg) { 102 | 103 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3)); 104 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3)); 105 | 106 | const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3)); 107 | const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3)); 108 | 109 | int n = output.numel(); 110 | 111 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_forward", ([&] { 112 | 113 | kernel_channelnorm_update_output<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( 114 | //at::globalContext().getCurrentCUDAStream() >>>( 115 | n, 116 | input1.data(), 117 | input1_size, 118 | input1_stride, 119 | output.data(), 120 | output_size, 121 | output_stride, 122 | norm_deg); 123 | 124 | })); 125 | 126 | // TODO: ATen-equivalent check 127 | 128 | // THCudaCheck(cudaGetLastError()); 129 | } 130 | 131 | void channelnorm_kernel_backward( 132 | at::Tensor& input1, 133 | at::Tensor& output, 134 | at::Tensor& gradOutput, 135 | at::Tensor& gradInput1, 136 | int norm_deg) { 137 | 138 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3)); 139 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3)); 140 | 141 | const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3)); 142 | const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3)); 143 | 144 | const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3)); 145 | const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3)); 146 | 147 | const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3)); 148 | const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3)); 149 | 150 | int n = gradInput1.numel(); 151 | 152 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_backward_input1", ([&] { 153 | 154 | kernel_channelnorm_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( 155 | //at::globalContext().getCurrentCUDAStream() >>>( 156 | n, 157 | input1.data(), 158 | input1_size, 159 | input1_stride, 160 | output.data(), 161 | output_size, 162 | output_stride, 163 | gradOutput.data(), 164 | gradOutput_size, 165 | gradOutput_stride, 166 | gradInput1.data(), 167 | gradInput1_size, 168 | gradInput1_stride, 169 | norm_deg 170 | ); 171 | 172 | })); 173 | 174 | // TODO: Add ATen-equivalent check 175 | 176 | // THCudaCheck(cudaGetLastError()); 177 | } 178 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/channelnorm_package/channelnorm_kernel.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | void channelnorm_kernel_forward( 6 | at::Tensor& input1, 7 | at::Tensor& output, 8 | int norm_deg); 9 | 10 | 11 | void channelnorm_kernel_backward( 12 | at::Tensor& input1, 13 | at::Tensor& output, 14 | at::Tensor& gradOutput, 15 | at::Tensor& gradInput1, 16 | int norm_deg); 17 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/channelnorm_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import torch 4 | 5 | from setuptools import setup 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | cxx_args = ['-std=c++11'] 9 | 10 | nvcc_args = [ 11 | '-gencode', 'arch=compute_52,code=sm_52', 12 | '-gencode', 'arch=compute_60,code=sm_60', 13 | '-gencode', 'arch=compute_61,code=sm_61', 14 | '-gencode', 'arch=compute_70,code=sm_70', 15 | '-gencode', 'arch=compute_70,code=compute_70' 16 | ] 17 | 18 | setup( 19 | name='channelnorm_cuda', 20 | ext_modules=[ 21 | CUDAExtension('channelnorm_cuda', [ 22 | 'channelnorm_cuda.cc', 23 | 'channelnorm_kernel.cu' 24 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 25 | ], 26 | cmdclass={ 27 | 'build_ext': BuildExtension 28 | }) 29 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/correlation_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/networks/correlation_package/__init__.py -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/correlation_package/correlation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules.module import Module 3 | from torch.autograd import Function 4 | import correlation_cuda 5 | 6 | class CorrelationFunction(Function): 7 | 8 | def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1): 9 | super(CorrelationFunction, self).__init__() 10 | self.pad_size = pad_size 11 | self.kernel_size = kernel_size 12 | self.max_displacement = max_displacement 13 | self.stride1 = stride1 14 | self.stride2 = stride2 15 | self.corr_multiply = corr_multiply 16 | # self.out_channel = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1) 17 | 18 | def forward(self, input1, input2): 19 | self.save_for_backward(input1, input2) 20 | 21 | with torch.cuda.device_of(input1): 22 | rbot1 = input1.new() 23 | rbot2 = input2.new() 24 | output = input1.new() 25 | 26 | correlation_cuda.forward(input1, input2, rbot1, rbot2, output, 27 | self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply) 28 | 29 | return output 30 | 31 | def backward(self, grad_output): 32 | input1, input2 = self.saved_tensors 33 | 34 | with torch.cuda.device_of(input1): 35 | rbot1 = input1.new() 36 | rbot2 = input2.new() 37 | 38 | grad_input1 = input1.new() 39 | grad_input2 = input2.new() 40 | 41 | correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2, 42 | self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply) 43 | 44 | return grad_input1, grad_input2 45 | 46 | 47 | class Correlation(Module): 48 | def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1): 49 | super(Correlation, self).__init__() 50 | self.pad_size = pad_size 51 | self.kernel_size = kernel_size 52 | self.max_displacement = max_displacement 53 | self.stride1 = stride1 54 | self.stride2 = stride2 55 | self.corr_multiply = corr_multiply 56 | 57 | def forward(self, input1, input2): 58 | 59 | result = CorrelationFunction(self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)(input1, input2) 60 | 61 | return result 62 | 63 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/correlation_package/correlation_cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "correlation_cuda_kernel.cuh" 9 | 10 | int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output, 11 | int pad_size, 12 | int kernel_size, 13 | int max_displacement, 14 | int stride1, 15 | int stride2, 16 | int corr_type_multiply) 17 | { 18 | 19 | int batchSize = input1.size(0); 20 | 21 | int nInputChannels = input1.size(1); 22 | int inputHeight = input1.size(2); 23 | int inputWidth = input1.size(3); 24 | 25 | int kernel_radius = (kernel_size - 1) / 2; 26 | int border_radius = kernel_radius + max_displacement; 27 | 28 | int paddedInputHeight = inputHeight + 2 * pad_size; 29 | int paddedInputWidth = inputWidth + 2 * pad_size; 30 | 31 | int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1); 32 | 33 | int outputHeight = ceil(static_cast(paddedInputHeight - 2 * border_radius) / static_cast(stride1)); 34 | int outputwidth = ceil(static_cast(paddedInputWidth - 2 * border_radius) / static_cast(stride1)); 35 | 36 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 37 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 38 | output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth}); 39 | 40 | rInput1.fill_(0); 41 | rInput2.fill_(0); 42 | output.fill_(0); 43 | 44 | int success = correlation_forward_cuda_kernel( 45 | output, 46 | output.size(0), 47 | output.size(1), 48 | output.size(2), 49 | output.size(3), 50 | output.stride(0), 51 | output.stride(1), 52 | output.stride(2), 53 | output.stride(3), 54 | input1, 55 | input1.size(1), 56 | input1.size(2), 57 | input1.size(3), 58 | input1.stride(0), 59 | input1.stride(1), 60 | input1.stride(2), 61 | input1.stride(3), 62 | input2, 63 | input2.size(1), 64 | input2.stride(0), 65 | input2.stride(1), 66 | input2.stride(2), 67 | input2.stride(3), 68 | rInput1, 69 | rInput2, 70 | pad_size, 71 | kernel_size, 72 | max_displacement, 73 | stride1, 74 | stride2, 75 | corr_type_multiply, 76 | at::cuda::getCurrentCUDAStream() 77 | //at::globalContext().getCurrentCUDAStream() 78 | ); 79 | 80 | //check for errors 81 | if (!success) { 82 | AT_ERROR("CUDA call failed"); 83 | } 84 | 85 | return 1; 86 | 87 | } 88 | 89 | int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput, 90 | at::Tensor& gradInput1, at::Tensor& gradInput2, 91 | int pad_size, 92 | int kernel_size, 93 | int max_displacement, 94 | int stride1, 95 | int stride2, 96 | int corr_type_multiply) 97 | { 98 | 99 | int batchSize = input1.size(0); 100 | int nInputChannels = input1.size(1); 101 | int paddedInputHeight = input1.size(2)+ 2 * pad_size; 102 | int paddedInputWidth = input1.size(3)+ 2 * pad_size; 103 | 104 | int height = input1.size(2); 105 | int width = input1.size(3); 106 | 107 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 108 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 109 | gradInput1.resize_({batchSize, nInputChannels, height, width}); 110 | gradInput2.resize_({batchSize, nInputChannels, height, width}); 111 | 112 | rInput1.fill_(0); 113 | rInput2.fill_(0); 114 | gradInput1.fill_(0); 115 | gradInput2.fill_(0); 116 | 117 | int success = correlation_backward_cuda_kernel(gradOutput, 118 | gradOutput.size(0), 119 | gradOutput.size(1), 120 | gradOutput.size(2), 121 | gradOutput.size(3), 122 | gradOutput.stride(0), 123 | gradOutput.stride(1), 124 | gradOutput.stride(2), 125 | gradOutput.stride(3), 126 | input1, 127 | input1.size(1), 128 | input1.size(2), 129 | input1.size(3), 130 | input1.stride(0), 131 | input1.stride(1), 132 | input1.stride(2), 133 | input1.stride(3), 134 | input2, 135 | input2.stride(0), 136 | input2.stride(1), 137 | input2.stride(2), 138 | input2.stride(3), 139 | gradInput1, 140 | gradInput1.stride(0), 141 | gradInput1.stride(1), 142 | gradInput1.stride(2), 143 | gradInput1.stride(3), 144 | gradInput2, 145 | gradInput2.size(1), 146 | gradInput2.stride(0), 147 | gradInput2.stride(1), 148 | gradInput2.stride(2), 149 | gradInput2.stride(3), 150 | rInput1, 151 | rInput2, 152 | pad_size, 153 | kernel_size, 154 | max_displacement, 155 | stride1, 156 | stride2, 157 | corr_type_multiply, 158 | at::cuda::getCurrentCUDAStream() 159 | //at::globalContext().getCurrentCUDAStream() 160 | ); 161 | 162 | if (!success) { 163 | AT_ERROR("CUDA call failed"); 164 | } 165 | 166 | return 1; 167 | } 168 | 169 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 170 | m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)"); 171 | m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)"); 172 | } 173 | 174 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/correlation_package/correlation_cuda_kernel.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | int correlation_forward_cuda_kernel(at::Tensor& output, 8 | int ob, 9 | int oc, 10 | int oh, 11 | int ow, 12 | int osb, 13 | int osc, 14 | int osh, 15 | int osw, 16 | 17 | at::Tensor& input1, 18 | int ic, 19 | int ih, 20 | int iw, 21 | int isb, 22 | int isc, 23 | int ish, 24 | int isw, 25 | 26 | at::Tensor& input2, 27 | int gc, 28 | int gsb, 29 | int gsc, 30 | int gsh, 31 | int gsw, 32 | 33 | at::Tensor& rInput1, 34 | at::Tensor& rInput2, 35 | int pad_size, 36 | int kernel_size, 37 | int max_displacement, 38 | int stride1, 39 | int stride2, 40 | int corr_type_multiply, 41 | cudaStream_t stream); 42 | 43 | 44 | int correlation_backward_cuda_kernel( 45 | at::Tensor& gradOutput, 46 | int gob, 47 | int goc, 48 | int goh, 49 | int gow, 50 | int gosb, 51 | int gosc, 52 | int gosh, 53 | int gosw, 54 | 55 | at::Tensor& input1, 56 | int ic, 57 | int ih, 58 | int iw, 59 | int isb, 60 | int isc, 61 | int ish, 62 | int isw, 63 | 64 | at::Tensor& input2, 65 | int gsb, 66 | int gsc, 67 | int gsh, 68 | int gsw, 69 | 70 | at::Tensor& gradInput1, 71 | int gisb, 72 | int gisc, 73 | int gish, 74 | int gisw, 75 | 76 | at::Tensor& gradInput2, 77 | int ggc, 78 | int ggsb, 79 | int ggsc, 80 | int ggsh, 81 | int ggsw, 82 | 83 | at::Tensor& rInput1, 84 | at::Tensor& rInput2, 85 | int pad_size, 86 | int kernel_size, 87 | int max_displacement, 88 | int stride1, 89 | int stride2, 90 | int corr_type_multiply, 91 | cudaStream_t stream); 92 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/correlation_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import torch 4 | 5 | from setuptools import setup, find_packages 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | cxx_args = ['-std=c++11'] 9 | 10 | nvcc_args = [ 11 | '-gencode', 'arch=compute_50,code=sm_50', 12 | '-gencode', 'arch=compute_52,code=sm_52', 13 | '-gencode', 'arch=compute_60,code=sm_60', 14 | '-gencode', 'arch=compute_61,code=sm_61', 15 | '-gencode', 'arch=compute_70,code=sm_70', 16 | '-gencode', 'arch=compute_70,code=compute_70' 17 | ] 18 | 19 | setup( 20 | name='correlation_cuda', 21 | ext_modules=[ 22 | CUDAExtension('correlation_cuda', [ 23 | 'correlation_cuda.cc', 24 | 'correlation_cuda_kernel.cu' 25 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 26 | ], 27 | cmdclass={ 28 | 'build_ext': BuildExtension 29 | }) 30 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/resample2d_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/networks/resample2d_package/__init__.py -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/resample2d_package/resample2d.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.autograd import Function, Variable 3 | import resample2d_cuda 4 | 5 | class Resample2dFunction(Function): 6 | 7 | @staticmethod 8 | def forward(ctx, input1, input2, kernel_size=1): 9 | assert input1.is_contiguous() 10 | assert input2.is_contiguous() 11 | 12 | ctx.save_for_backward(input1, input2) 13 | ctx.kernel_size = kernel_size 14 | 15 | _, d, _, _ = input1.size() 16 | b, _, h, w = input2.size() 17 | output = input1.new(b, d, h, w).zero_() 18 | 19 | resample2d_cuda.forward(input1, input2, output, kernel_size) 20 | 21 | return output 22 | 23 | @staticmethod 24 | def backward(ctx, grad_output): 25 | assert grad_output.is_contiguous() 26 | 27 | input1, input2 = ctx.saved_tensors 28 | 29 | grad_input1 = Variable(input1.new(input1.size()).zero_()) 30 | grad_input2 = Variable(input1.new(input2.size()).zero_()) 31 | 32 | resample2d_cuda.backward(input1, input2, grad_output.data, 33 | grad_input1.data, grad_input2.data, 34 | ctx.kernel_size) 35 | 36 | return grad_input1, grad_input2, None 37 | 38 | class Resample2d(Module): 39 | 40 | def __init__(self, kernel_size=1): 41 | super(Resample2d, self).__init__() 42 | self.kernel_size = kernel_size 43 | 44 | def forward(self, input1, input2): 45 | input1_c = input1.contiguous() 46 | return Resample2dFunction.apply(input1_c, input2, self.kernel_size) 47 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/resample2d_package/resample2d_cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "resample2d_kernel.cuh" 5 | 6 | int resample2d_cuda_forward( 7 | at::Tensor& input1, 8 | at::Tensor& input2, 9 | at::Tensor& output, 10 | int kernel_size) { 11 | resample2d_kernel_forward(input1, input2, output, kernel_size); 12 | return 1; 13 | } 14 | 15 | int resample2d_cuda_backward( 16 | at::Tensor& input1, 17 | at::Tensor& input2, 18 | at::Tensor& gradOutput, 19 | at::Tensor& gradInput1, 20 | at::Tensor& gradInput2, 21 | int kernel_size) { 22 | resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size); 23 | return 1; 24 | } 25 | 26 | 27 | 28 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 29 | m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)"); 30 | m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)"); 31 | } 32 | 33 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/resample2d_package/resample2d_kernel.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | void resample2d_kernel_forward( 6 | at::Tensor& input1, 7 | at::Tensor& input2, 8 | at::Tensor& output, 9 | int kernel_size); 10 | 11 | void resample2d_kernel_backward( 12 | at::Tensor& input1, 13 | at::Tensor& input2, 14 | at::Tensor& gradOutput, 15 | at::Tensor& gradInput1, 16 | at::Tensor& gradInput2, 17 | int kernel_size); 18 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/resample2d_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import torch 4 | 5 | from setuptools import setup 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | cxx_args = ['-std=c++11'] 9 | 10 | nvcc_args = [ 11 | '-gencode', 'arch=compute_50,code=sm_50', 12 | '-gencode', 'arch=compute_52,code=sm_52', 13 | '-gencode', 'arch=compute_60,code=sm_60', 14 | '-gencode', 'arch=compute_61,code=sm_61', 15 | '-gencode', 'arch=compute_70,code=sm_70', 16 | '-gencode', 'arch=compute_70,code=compute_70' 17 | ] 18 | 19 | setup( 20 | name='resample2d_cuda', 21 | ext_modules=[ 22 | CUDAExtension('resample2d_cuda', [ 23 | 'resample2d_cuda.cc', 24 | 'resample2d_kernel.cu' 25 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 26 | ], 27 | cmdclass={ 28 | 'build_ext': BuildExtension 29 | }) 30 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/networks/submodules.py: -------------------------------------------------------------------------------- 1 | # freda (todo) : 2 | 3 | import torch.nn as nn 4 | import torch 5 | import numpy as np 6 | 7 | def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1): 8 | if batchNorm: 9 | return nn.Sequential( 10 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False), 11 | nn.BatchNorm2d(out_planes), 12 | nn.LeakyReLU(0.1,inplace=True) 13 | ) 14 | else: 15 | return nn.Sequential( 16 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True), 17 | nn.LeakyReLU(0.1,inplace=True) 18 | ) 19 | 20 | def i_conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, bias = True): 21 | if batchNorm: 22 | return nn.Sequential( 23 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias), 24 | nn.BatchNorm2d(out_planes), 25 | ) 26 | else: 27 | return nn.Sequential( 28 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias), 29 | ) 30 | 31 | def predict_flow(in_planes): 32 | return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True) 33 | 34 | def deconv(in_planes, out_planes): 35 | return nn.Sequential( 36 | nn.ConvTranspose2d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True), 37 | nn.LeakyReLU(0.1,inplace=True) 38 | ) 39 | 40 | class tofp16(nn.Module): 41 | def __init__(self): 42 | super(tofp16, self).__init__() 43 | 44 | def forward(self, input): 45 | return input.half() 46 | 47 | 48 | class tofp32(nn.Module): 49 | def __init__(self): 50 | super(tofp32, self).__init__() 51 | 52 | def forward(self, input): 53 | return input.float() 54 | 55 | 56 | def init_deconv_bilinear(weight): 57 | f_shape = weight.size() 58 | heigh, width = f_shape[-2], f_shape[-1] 59 | f = np.ceil(width/2.0) 60 | c = (2 * f - 1 - f % 2) / (2.0 * f) 61 | bilinear = np.zeros([heigh, width]) 62 | for x in range(width): 63 | for y in range(heigh): 64 | value = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) 65 | bilinear[x, y] = value 66 | weight.data.fill_(0.) 67 | for i in range(f_shape[0]): 68 | for j in range(f_shape[1]): 69 | weight.data[i,j,:,:] = torch.from_numpy(bilinear) 70 | 71 | 72 | def save_grad(grads, name): 73 | def hook(grad): 74 | grads[name] = grad 75 | return hook 76 | 77 | ''' 78 | def save_grad(grads, name): 79 | def hook(grad): 80 | grads[name] = grad 81 | return hook 82 | import torch 83 | from channelnorm_package.modules.channelnorm import ChannelNorm 84 | model = ChannelNorm().cuda() 85 | grads = {} 86 | a = 100*torch.autograd.Variable(torch.randn((1,3,5,5)).cuda(), requires_grad=True) 87 | a.register_hook(save_grad(grads, 'a')) 88 | b = model(a) 89 | y = torch.mean(b) 90 | y.backward() 91 | 92 | ''' 93 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/run-caffe2pytorch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | FN2PYTORCH=${1:-/} 4 | 5 | # install custom layers 6 | sudo nvidia-docker build -t $USER/pytorch:CUDA8-py27 . 7 | sudo nvidia-docker run --rm -ti --volume=${FN2PYTORCH}:/flownet2-pytorch:rw --workdir=/flownet2-pytorch $USER/pytorch:CUDA8-py27 /bin/bash -c "./install.sh" 8 | 9 | # convert FlowNet2-C, CS, CSS, CSS-ft-sd, SD, S and 2 to PyTorch 10 | sudo nvidia-docker run -ti --volume=${FN2PYTORCH}:/fn2pytorch:rw flownet2:latest /bin/bash -c "source /flownet2/flownet2/set-env.sh && cd /flownet2/flownet2/models && \ 11 | python /fn2pytorch/convert.py ./FlowNet2-C/FlowNet2-C_weights.caffemodel ./FlowNet2-C/FlowNet2-C_deploy.prototxt.template /fn2pytorch && 12 | python /fn2pytorch/convert.py ./FlowNet2-CS/FlowNet2-CS_weights.caffemodel ./FlowNet2-CS/FlowNet2-CS_deploy.prototxt.template /fn2pytorch && \ 13 | python /fn2pytorch/convert.py ./FlowNet2-CSS/FlowNet2-CSS_weights.caffemodel.h5 ./FlowNet2-CSS/FlowNet2-CSS_deploy.prototxt.template /fn2pytorch && \ 14 | python /fn2pytorch/convert.py ./FlowNet2-CSS-ft-sd/FlowNet2-CSS-ft-sd_weights.caffemodel.h5 ./FlowNet2-CSS-ft-sd/FlowNet2-CSS-ft-sd_deploy.prototxt.template /fn2pytorch && \ 15 | python /fn2pytorch/convert.py ./FlowNet2-SD/FlowNet2-SD_weights.caffemodel.h5 ./FlowNet2-SD/FlowNet2-SD_deploy.prototxt.template /fn2pytorch && \ 16 | python /fn2pytorch/convert.py ./FlowNet2-S/FlowNet2-S_weights.caffemodel.h5 ./FlowNet2-S/FlowNet2-S_deploy.prototxt.template /fn2pytorch && \ 17 | python /fn2pytorch/convert.py ./FlowNet2/FlowNet2_weights.caffemodel.h5 ./FlowNet2/FlowNet2_deploy.prototxt.template /fn2pytorch" -------------------------------------------------------------------------------- /models/flownet2_pytorch/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/utils/__init__.py -------------------------------------------------------------------------------- /models/flownet2_pytorch/utils/flow_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | TAG_CHAR = np.array([202021.25], np.float32) 4 | 5 | def readFlow(fn): 6 | """ Read .flo file in Middlebury format""" 7 | # Code adapted from: 8 | # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy 9 | 10 | # WARNING: this will work on little-endian architectures (eg Intel x86) only! 11 | # print 'fn = %s'%(fn) 12 | with open(fn, 'rb') as f: 13 | magic = np.fromfile(f, np.float32, count=1) 14 | if 202021.25 != magic: 15 | print('Magic number incorrect. Invalid .flo file') 16 | return None 17 | else: 18 | w = np.fromfile(f, np.int32, count=1) 19 | h = np.fromfile(f, np.int32, count=1) 20 | # print 'Reading %d x %d flo file\n' % (w, h) 21 | data = np.fromfile(f, np.float32, count=2*int(w)*int(h)) 22 | # Reshape data into 3D array (columns, rows, bands) 23 | # The reshape here is for visualization, the original code is (w,h,2) 24 | return np.resize(data, (int(h), int(w), 2)) 25 | 26 | def writeFlow(filename,uv,v=None): 27 | """ Write optical flow to file. 28 | 29 | If v is None, uv is assumed to contain both u and v channels, 30 | stacked in depth. 31 | Original code by Deqing Sun, adapted from Daniel Scharstein. 32 | """ 33 | nBands = 2 34 | 35 | if v is None: 36 | assert(uv.ndim == 3) 37 | assert(uv.shape[2] == 2) 38 | u = uv[:,:,0] 39 | v = uv[:,:,1] 40 | else: 41 | u = uv 42 | 43 | assert(u.shape == v.shape) 44 | height,width = u.shape 45 | f = open(filename,'wb') 46 | # write the header 47 | f.write(TAG_CHAR) 48 | np.array(width).astype(np.int32).tofile(f) 49 | np.array(height).astype(np.int32).tofile(f) 50 | # arrange into matrix form 51 | tmp = np.zeros((height, width*nBands)) 52 | tmp[:,np.arange(width)*2] = u 53 | tmp[:,np.arange(width)*2 + 1] = v 54 | tmp.astype(np.float32).tofile(f) 55 | f.close() 56 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/utils/frame_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from os.path import * 3 | from scipy.misc import imread 4 | from . import flow_utils 5 | 6 | def read_gen(file_name): 7 | ext = splitext(file_name)[-1] 8 | if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg': 9 | im = imread(file_name) 10 | if im.shape[2] > 3: 11 | return im[:,:,:3] 12 | else: 13 | return im 14 | elif ext == '.bin' or ext == '.raw': 15 | return np.load(file_name) 16 | elif ext == '.flo': 17 | return flow_utils.readFlow(file_name).astype(np.float32) 18 | return [] 19 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/utils/param_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | def parse_flownetc(modules, weights, biases): 6 | keys = [ 7 | 'conv1', 8 | 'conv2', 9 | 'conv3', 10 | 'conv_redir', 11 | 'conv3_1', 12 | 'conv4', 13 | 'conv4_1', 14 | 'conv5', 15 | 'conv5_1', 16 | 'conv6', 17 | 'conv6_1', 18 | 19 | 'deconv5', 20 | 'deconv4', 21 | 'deconv3', 22 | 'deconv2', 23 | 24 | 'Convolution1', 25 | 'Convolution2', 26 | 'Convolution3', 27 | 'Convolution4', 28 | 'Convolution5', 29 | 30 | 'upsample_flow6to5', 31 | 'upsample_flow5to4', 32 | 'upsample_flow4to3', 33 | 'upsample_flow3to2', 34 | 35 | ] 36 | i = 0 37 | for m in modules: 38 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 39 | weight = weights[keys[i]].copy() 40 | bias = biases[keys[i]].copy() 41 | if keys[i] == 'conv1': 42 | m.weight.data[:,:,:,:] = torch.from_numpy(np.flip(weight, axis=1).copy()) 43 | m.bias.data[:] = torch.from_numpy(bias) 44 | else: 45 | m.weight.data[:,:,:,:] = torch.from_numpy(weight) 46 | m.bias.data[:] = torch.from_numpy(bias) 47 | 48 | i = i + 1 49 | return 50 | 51 | def parse_flownets(modules, weights, biases, param_prefix='net2_'): 52 | keys = [ 53 | 'conv1', 54 | 'conv2', 55 | 'conv3', 56 | 'conv3_1', 57 | 'conv4', 58 | 'conv4_1', 59 | 'conv5', 60 | 'conv5_1', 61 | 'conv6', 62 | 'conv6_1', 63 | 64 | 'deconv5', 65 | 'deconv4', 66 | 'deconv3', 67 | 'deconv2', 68 | 69 | 'predict_conv6', 70 | 'predict_conv5', 71 | 'predict_conv4', 72 | 'predict_conv3', 73 | 'predict_conv2', 74 | 75 | 'upsample_flow6to5', 76 | 'upsample_flow5to4', 77 | 'upsample_flow4to3', 78 | 'upsample_flow3to2', 79 | ] 80 | for i, k in enumerate(keys): 81 | if 'upsample' in k: 82 | keys[i] = param_prefix + param_prefix + k 83 | else: 84 | keys[i] = param_prefix + k 85 | i = 0 86 | for m in modules: 87 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 88 | weight = weights[keys[i]].copy() 89 | bias = biases[keys[i]].copy() 90 | if keys[i] == param_prefix+'conv1': 91 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy()) 92 | m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy()) 93 | m.weight.data[:,6:9,:,:] = torch.from_numpy(np.flip(weight[:,6:9,:,:], axis=1).copy()) 94 | m.weight.data[:,9::,:,:] = torch.from_numpy(weight[:,9:,:,:].copy()) 95 | if m.bias is not None: 96 | m.bias.data[:] = torch.from_numpy(bias) 97 | else: 98 | m.weight.data[:,:,:,:] = torch.from_numpy(weight) 99 | if m.bias is not None: 100 | m.bias.data[:] = torch.from_numpy(bias) 101 | i = i + 1 102 | return 103 | 104 | def parse_flownetsonly(modules, weights, biases, param_prefix=''): 105 | keys = [ 106 | 'conv1', 107 | 'conv2', 108 | 'conv3', 109 | 'conv3_1', 110 | 'conv4', 111 | 'conv4_1', 112 | 'conv5', 113 | 'conv5_1', 114 | 'conv6', 115 | 'conv6_1', 116 | 117 | 'deconv5', 118 | 'deconv4', 119 | 'deconv3', 120 | 'deconv2', 121 | 122 | 'Convolution1', 123 | 'Convolution2', 124 | 'Convolution3', 125 | 'Convolution4', 126 | 'Convolution5', 127 | 128 | 'upsample_flow6to5', 129 | 'upsample_flow5to4', 130 | 'upsample_flow4to3', 131 | 'upsample_flow3to2', 132 | ] 133 | for i, k in enumerate(keys): 134 | if 'upsample' in k: 135 | keys[i] = param_prefix + param_prefix + k 136 | else: 137 | keys[i] = param_prefix + k 138 | i = 0 139 | for m in modules: 140 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 141 | weight = weights[keys[i]].copy() 142 | bias = biases[keys[i]].copy() 143 | if keys[i] == param_prefix+'conv1': 144 | # print ("%s :"%(keys[i]), m.weight.size(), m.bias.size(), tf_w[keys[i]].shape[::-1]) 145 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy()) 146 | m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy()) 147 | if m.bias is not None: 148 | m.bias.data[:] = torch.from_numpy(bias) 149 | else: 150 | m.weight.data[:,:,:,:] = torch.from_numpy(weight) 151 | if m.bias is not None: 152 | m.bias.data[:] = torch.from_numpy(bias) 153 | i = i + 1 154 | return 155 | 156 | def parse_flownetsd(modules, weights, biases, param_prefix='netsd_'): 157 | keys = [ 158 | 'conv0', 159 | 'conv1', 160 | 'conv1_1', 161 | 'conv2', 162 | 'conv2_1', 163 | 'conv3', 164 | 'conv3_1', 165 | 'conv4', 166 | 'conv4_1', 167 | 'conv5', 168 | 'conv5_1', 169 | 'conv6', 170 | 'conv6_1', 171 | 172 | 'deconv5', 173 | 'deconv4', 174 | 'deconv3', 175 | 'deconv2', 176 | 177 | 'interconv5', 178 | 'interconv4', 179 | 'interconv3', 180 | 'interconv2', 181 | 182 | 'Convolution1', 183 | 'Convolution2', 184 | 'Convolution3', 185 | 'Convolution4', 186 | 'Convolution5', 187 | 188 | 'upsample_flow6to5', 189 | 'upsample_flow5to4', 190 | 'upsample_flow4to3', 191 | 'upsample_flow3to2', 192 | ] 193 | for i, k in enumerate(keys): 194 | keys[i] = param_prefix + k 195 | 196 | i = 0 197 | for m in modules: 198 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 199 | weight = weights[keys[i]].copy() 200 | bias = biases[keys[i]].copy() 201 | if keys[i] == param_prefix+'conv0': 202 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy()) 203 | m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy()) 204 | if m.bias is not None: 205 | m.bias.data[:] = torch.from_numpy(bias) 206 | else: 207 | m.weight.data[:,:,:,:] = torch.from_numpy(weight) 208 | if m.bias is not None: 209 | m.bias.data[:] = torch.from_numpy(bias) 210 | i = i + 1 211 | 212 | return 213 | 214 | def parse_flownetfusion(modules, weights, biases, param_prefix='fuse_'): 215 | keys = [ 216 | 'conv0', 217 | 'conv1', 218 | 'conv1_1', 219 | 'conv2', 220 | 'conv2_1', 221 | 222 | 'deconv1', 223 | 'deconv0', 224 | 225 | 'interconv1', 226 | 'interconv0', 227 | 228 | '_Convolution5', 229 | '_Convolution6', 230 | '_Convolution7', 231 | 232 | 'upsample_flow2to1', 233 | 'upsample_flow1to0', 234 | ] 235 | for i, k in enumerate(keys): 236 | keys[i] = param_prefix + k 237 | 238 | i = 0 239 | for m in modules: 240 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 241 | weight = weights[keys[i]].copy() 242 | bias = biases[keys[i]].copy() 243 | if keys[i] == param_prefix+'conv0': 244 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy()) 245 | m.weight.data[:,3::,:,:] = torch.from_numpy(weight[:,3:,:,:].copy()) 246 | if m.bias is not None: 247 | m.bias.data[:] = torch.from_numpy(bias) 248 | else: 249 | m.weight.data[:,:,:,:] = torch.from_numpy(weight) 250 | if m.bias is not None: 251 | m.bias.data[:] = torch.from_numpy(bias) 252 | i = i + 1 253 | 254 | return 255 | -------------------------------------------------------------------------------- /models/flownet2_pytorch/utils/tools.py: -------------------------------------------------------------------------------- 1 | # freda (todo) : 2 | 3 | import os, time, sys, math 4 | import subprocess, shutil 5 | from os.path import * 6 | import numpy as np 7 | from inspect import isclass 8 | from pytz import timezone 9 | from datetime import datetime 10 | import inspect 11 | import torch 12 | 13 | def datestr(): 14 | pacific = timezone('US/Pacific') 15 | now = datetime.now(pacific) 16 | return '{}{:02}{:02}_{:02}{:02}'.format(now.year, now.month, now.day, now.hour, now.minute) 17 | 18 | def module_to_dict(module, exclude=[]): 19 | return dict([(x, getattr(module, x)) for x in dir(module) 20 | if isclass(getattr(module, x)) 21 | and x not in exclude 22 | and getattr(module, x) not in exclude]) 23 | 24 | class TimerBlock: 25 | def __init__(self, title): 26 | print(("{}".format(title))) 27 | 28 | def __enter__(self): 29 | self.start = time.clock() 30 | return self 31 | 32 | def __exit__(self, exc_type, exc_value, traceback): 33 | self.end = time.clock() 34 | self.interval = self.end - self.start 35 | 36 | if exc_type is not None: 37 | self.log("Operation failed\n") 38 | else: 39 | self.log("Operation finished\n") 40 | 41 | 42 | def log(self, string): 43 | duration = time.clock() - self.start 44 | units = 's' 45 | if duration > 60: 46 | duration = duration / 60. 47 | units = 'm' 48 | print((" [{:.3f}{}] {}".format(duration, units, string))) 49 | 50 | def log2file(self, fid, string): 51 | fid = open(fid, 'a') 52 | fid.write("%s\n"%(string)) 53 | fid.close() 54 | 55 | def add_arguments_for_module(parser, module, argument_for_class, default, skip_params=[], parameter_defaults={}): 56 | argument_group = parser.add_argument_group(argument_for_class.capitalize()) 57 | 58 | module_dict = module_to_dict(module) 59 | argument_group.add_argument('--' + argument_for_class, type=str, default=default, choices=list(module_dict.keys())) 60 | 61 | args, unknown_args = parser.parse_known_args() 62 | class_obj = module_dict[vars(args)[argument_for_class]] 63 | 64 | argspec = inspect.getargspec(class_obj.__init__) 65 | 66 | defaults = argspec.defaults[::-1] if argspec.defaults else None 67 | 68 | args = argspec.args[::-1] 69 | for i, arg in enumerate(args): 70 | cmd_arg = '{}_{}'.format(argument_for_class, arg) 71 | if arg not in skip_params + ['self', 'args']: 72 | if arg in list(parameter_defaults.keys()): 73 | argument_group.add_argument('--{}'.format(cmd_arg), type=type(parameter_defaults[arg]), default=parameter_defaults[arg]) 74 | elif (defaults is not None and i < len(defaults)): 75 | argument_group.add_argument('--{}'.format(cmd_arg), type=type(defaults[i]), default=defaults[i]) 76 | else: 77 | print(("[Warning]: non-default argument '{}' detected on class '{}'. This argument cannot be modified via the command line" 78 | .format(arg, module.__class__.__name__))) 79 | # We don't have a good way of dealing with inferring the type of the argument 80 | # TODO: try creating a custom action and using ast's infer type? 81 | # else: 82 | # argument_group.add_argument('--{}'.format(cmd_arg), required=True) 83 | 84 | def kwargs_from_args(args, argument_for_class): 85 | argument_for_class = argument_for_class + '_' 86 | return {key[len(argument_for_class):]: value for key, value in list(vars(args).items()) if argument_for_class in key and key != argument_for_class + 'class'} 87 | 88 | def format_dictionary_of_losses(labels, values): 89 | try: 90 | string = ', '.join([('{}: {:' + ('.3f' if value >= 0.001 else '.1e') +'}').format(name, value) for name, value in zip(labels, values)]) 91 | except (TypeError, ValueError) as e: 92 | print((list(zip(labels, values)))) 93 | string = '[Log Error] ' + str(e) 94 | 95 | return string 96 | 97 | 98 | class IteratorTimer(): 99 | def __init__(self, iterable): 100 | self.iterable = iterable 101 | self.iterator = self.iterable.__iter__() 102 | 103 | def __iter__(self): 104 | return self 105 | 106 | def __len__(self): 107 | return len(self.iterable) 108 | 109 | def __next__(self): 110 | start = time.time() 111 | n = next(self.iterator) 112 | self.last_duration = (time.time() - start) 113 | return n 114 | 115 | next = __next__ 116 | 117 | def gpumemusage(): 118 | gpu_mem = subprocess.check_output("nvidia-smi | grep MiB | cut -f 3 -d '|'", shell=True).replace(' ', '').replace('\n', '').replace('i', '') 119 | all_stat = [float(a) for a in gpu_mem.replace('/','').split('MB')[:-1]] 120 | 121 | gpu_mem = '' 122 | for i in range(len(all_stat)/2): 123 | curr, tot = all_stat[2*i], all_stat[2*i+1] 124 | util = "%1.2f"%(100*curr/tot)+'%' 125 | cmem = str(int(math.ceil(curr/1024.)))+'GB' 126 | gmem = str(int(math.ceil(tot/1024.)))+'GB' 127 | gpu_mem += util + '--' + join(cmem, gmem) + ' ' 128 | return gpu_mem 129 | 130 | 131 | def update_hyperparameter_schedule(args, epoch, global_iteration, optimizer): 132 | if args.schedule_lr_frequency > 0: 133 | for param_group in optimizer.param_groups: 134 | if (global_iteration + 1) % args.schedule_lr_frequency == 0: 135 | param_group['lr'] /= float(args.schedule_lr_fraction) 136 | param_group['lr'] = float(np.maximum(param_group['lr'], 0.000001)) 137 | 138 | def save_checkpoint(state, is_best, path, prefix, filename='checkpoint.pth.tar'): 139 | prefix_save = os.path.join(path, prefix) 140 | name = prefix_save + '_' + filename 141 | torch.save(state, name) 142 | if is_best: 143 | shutil.copyfile(name, prefix_save + '_model_best.pth.tar') 144 | 145 | -------------------------------------------------------------------------------- /models/models.py: -------------------------------------------------------------------------------- 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | import os 4 | import torch 5 | import torch.nn as nn 6 | import numpy as np 7 | import fractions 8 | def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0 9 | 10 | def wrap_model(opt, modelG, modelD, flowNet): 11 | if opt.n_gpus_gen == len(opt.gpu_ids): 12 | modelG = myModel(opt, modelG) 13 | modelD = myModel(opt, modelD) 14 | flowNet = myModel(opt, flowNet) 15 | else: 16 | if opt.batchSize == 1: 17 | gpu_split_id = opt.n_gpus_gen + 1 18 | modelG = nn.DataParallel(modelG, device_ids=opt.gpu_ids[0:1]) 19 | else: 20 | gpu_split_id = opt.n_gpus_gen 21 | modelG = nn.DataParallel(modelG, device_ids=opt.gpu_ids[:gpu_split_id]) 22 | modelD = nn.DataParallel(modelD, device_ids=[opt.gpu_ids[0]] + opt.gpu_ids[gpu_split_id:]) 23 | flowNet = nn.DataParallel(flowNet, device_ids=[opt.gpu_ids[0]] + opt.gpu_ids[gpu_split_id:]) 24 | return modelG, modelD, flowNet 25 | 26 | class myModel(nn.Module): 27 | def __init__(self, opt, model): 28 | super(myModel, self).__init__() 29 | self.opt = opt 30 | self.module = model 31 | self.model = nn.DataParallel(model, device_ids=opt.gpu_ids) 32 | self.bs_per_gpu = int(np.ceil(float(opt.batchSize) / len(opt.gpu_ids))) # batch size for each GPU 33 | self.pad_bs = self.bs_per_gpu * len(opt.gpu_ids) - opt.batchSize 34 | 35 | def forward(self, *inputs, **kwargs): 36 | inputs = self.add_dummy_to_tensor(inputs, self.pad_bs) 37 | outputs = self.model(*inputs, **kwargs, dummy_bs=self.pad_bs) 38 | if self.pad_bs == self.bs_per_gpu: # gpu 0 does 0 batch but still returns 1 batch 39 | return self.remove_dummy_from_tensor(outputs, 1) 40 | return outputs 41 | 42 | def add_dummy_to_tensor(self, tensors, add_size=0): 43 | if add_size == 0 or tensors is None: return tensors 44 | if type(tensors) == list or type(tensors) == tuple: 45 | return [self.add_dummy_to_tensor(tensor, add_size) for tensor in tensors] 46 | 47 | if isinstance(tensors, torch.Tensor): 48 | dummy = torch.zeros_like(tensors)[:add_size] 49 | tensors = torch.cat([dummy, tensors]) 50 | return tensors 51 | 52 | def remove_dummy_from_tensor(self, tensors, remove_size=0): 53 | if remove_size == 0 or tensors is None: return tensors 54 | if type(tensors) == list or type(tensors) == tuple: 55 | return [self.remove_dummy_from_tensor(tensor, remove_size) for tensor in tensors] 56 | 57 | if isinstance(tensors, torch.Tensor): 58 | tensors = tensors[remove_size:] 59 | return tensors 60 | 61 | def create_model(opt): 62 | print(opt.model) 63 | if opt.model == 'vid2vid': 64 | from .vid2vid_model_G import Vid2VidModelG 65 | modelG = Vid2VidModelG() 66 | if opt.isTrain: 67 | from .vid2vid_model_D import Vid2VidModelD 68 | modelD = Vid2VidModelD() 69 | else: 70 | raise ValueError("Model [%s] not recognized." % opt.model) 71 | 72 | if opt.isTrain: 73 | from .flownet import FlowNet 74 | flowNet = FlowNet() 75 | 76 | modelG.initialize(opt) 77 | if opt.isTrain: 78 | modelD.initialize(opt) 79 | flowNet.initialize(opt) 80 | if not opt.fp16: 81 | modelG, modelD, flownet = wrap_model(opt, modelG, modelD, flowNet) 82 | return [modelG, modelD, flowNet] 83 | else: 84 | return modelG 85 | 86 | def create_optimizer(opt, models): 87 | modelG, modelD, flowNet = models 88 | optimizer_D_T = [] 89 | if opt.fp16: 90 | from apex import amp 91 | for s in range(opt.n_scales_temporal): 92 | optimizer_D_T.append(getattr(modelD, 'optimizer_D_T'+str(s))) 93 | modelG, optimizer_G = amp.initialize(modelG, modelG.optimizer_G, opt_level='O1') 94 | modelD, optimizers_D = amp.initialize(modelD, [modelD.optimizer_D] + optimizer_D_T, opt_level='O1') 95 | optimizer_D, optimizer_D_T = optimizers_D[0], optimizers_D[1:] 96 | modelG, modelD, flownet = wrap_model(opt, modelG, modelD, flowNet) 97 | else: 98 | optimizer_G = modelG.module.optimizer_G 99 | optimizer_D = modelD.module.optimizer_D 100 | for s in range(opt.n_scales_temporal): 101 | optimizer_D_T.append(getattr(modelD.module, 'optimizer_D_T'+str(s))) 102 | return modelG, modelD, flowNet, optimizer_G, optimizer_D, optimizer_D_T 103 | 104 | def init_params(opt, modelG, modelD, data_loader): 105 | iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt') 106 | start_epoch, epoch_iter = 1, 0 107 | ### if continue training, recover previous states 108 | if opt.continue_train: 109 | if os.path.exists(iter_path): 110 | start_epoch, epoch_iter = np.loadtxt(iter_path , delimiter=',', dtype=int) 111 | print('Resuming from epoch %d at iteration %d' % (start_epoch, epoch_iter)) 112 | if start_epoch > opt.niter: 113 | modelG.module.update_learning_rate(start_epoch-1, 'G') 114 | modelD.module.update_learning_rate(start_epoch-1, 'D') 115 | if (opt.n_scales_spatial > 1) and (opt.niter_fix_global != 0) and (start_epoch > opt.niter_fix_global): 116 | modelG.module.update_fixed_params() 117 | if start_epoch > opt.niter_step: 118 | data_loader.dataset.update_training_batch((start_epoch-1)//opt.niter_step) 119 | modelG.module.update_training_batch((start_epoch-1)//opt.niter_step) 120 | 121 | n_gpus = opt.n_gpus_gen if opt.batchSize == 1 else 1 # number of gpus used for generator for each batch 122 | tG, tD = opt.n_frames_G, opt.n_frames_D 123 | tDB = tD * opt.output_nc 124 | s_scales = opt.n_scales_spatial 125 | t_scales = opt.n_scales_temporal 126 | input_nc = 1 if opt.label_nc != 0 else opt.input_nc 127 | output_nc = opt.output_nc 128 | 129 | print_freq = lcm(opt.print_freq, opt.batchSize) 130 | total_steps = (start_epoch-1) * len(data_loader) + epoch_iter 131 | total_steps = total_steps // print_freq * print_freq 132 | 133 | return n_gpus, tG, tD, tDB, s_scales, t_scales, input_nc, output_nc, start_epoch, epoch_iter, print_freq, total_steps, iter_path 134 | 135 | def save_models(opt, epoch, epoch_iter, total_steps, visualizer, iter_path, modelG, modelD, end_of_epoch=False): 136 | if not end_of_epoch: 137 | if total_steps % opt.save_latest_freq == 0: 138 | visualizer.vis_print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_steps)) 139 | modelG.module.save('latest') 140 | modelD.module.save('latest') 141 | np.savetxt(iter_path, (epoch, epoch_iter), delimiter=',', fmt='%d') 142 | else: 143 | if epoch % opt.save_epoch_freq == 0: 144 | visualizer.vis_print('saving the model at the end of epoch %d, iters %d' % (epoch, total_steps)) 145 | modelG.module.save('latest') 146 | modelD.module.save('latest') 147 | modelG.module.save(epoch) 148 | modelD.module.save(epoch) 149 | np.savetxt(iter_path, (epoch+1, 0), delimiter=',', fmt='%d') 150 | 151 | def update_models(opt, epoch, modelG, modelD, data_loader): 152 | ### linearly decay learning rate after certain iterations 153 | if epoch > opt.niter: 154 | modelG.module.update_learning_rate(epoch, 'G') 155 | modelD.module.update_learning_rate(epoch, 'D') 156 | 157 | ### gradually grow training sequence length 158 | if (epoch % opt.niter_step) == 0: 159 | data_loader.dataset.update_training_batch(epoch//opt.niter_step) 160 | modelG.module.update_training_batch(epoch//opt.niter_step) 161 | 162 | ### finetune all scales 163 | if (opt.n_scales_spatial > 1) and (opt.niter_fix_global != 0) and (epoch == opt.niter_fix_global): 164 | modelG.module.update_fixed_params() -------------------------------------------------------------------------------- /options/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/options/__init__.py -------------------------------------------------------------------------------- /options/base_options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from util import util 4 | import torch 5 | 6 | class BaseOptions(): 7 | def __init__(self): 8 | self.parser = argparse.ArgumentParser() 9 | self.initialized = False 10 | 11 | def initialize(self): 12 | self.parser.add_argument('--dataroot', type=str, default='datasets/Cityscapes/') 13 | self.parser.add_argument('--batchSize', type=int, default=1, help='input batch size') 14 | self.parser.add_argument('--loadSize', type=int, default=512, help='scale images to this size') 15 | self.parser.add_argument('--fineSize', type=int, default=512, help='then crop to this size') 16 | self.parser.add_argument('--input_nc', type=int, default=3, help='# of input image channels') 17 | self.parser.add_argument('--label_nc', type=int, default=0, help='number of labels') 18 | self.parser.add_argument('--output_nc', type=int, default=3, help='# of output image channels') 19 | 20 | # network arch 21 | self.parser.add_argument('--netG', type=str, default='composite', help='selects model to use for netG') 22 | self.parser.add_argument('--ngf', type=int, default=128, help='# of gen filters in first conv layer') 23 | self.parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in first conv layer') 24 | self.parser.add_argument('--n_blocks', type=int, default=9, help='number of resnet blocks in generator') 25 | self.parser.add_argument('--n_downsample_G', type=int, default=3, help='number of downsampling layers in netG') 26 | 27 | self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') 28 | self.parser.add_argument('--n_gpus_gen', type=int, default=-1, help='how many gpus are used for generator (the rest are used for discriminator). -1 means use all gpus') 29 | self.parser.add_argument('--name', type=str, default='experiment_name', help='name of the experiment. It decides where to store samples and models') 30 | self.parser.add_argument('--dataset_mode', type=str, default='temporal', help='chooses how datasets are loaded. [unaligned | aligned | single]') 31 | self.parser.add_argument('--model', type=str, default='vid2vid', help='chooses which model to use. vid2vid, test') 32 | self.parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data') 33 | self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here') 34 | self.parser.add_argument('--norm', type=str, default='batch', help='instance normalization or batch normalization') 35 | self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly') 36 | self.parser.add_argument('--display_winsize', type=int, default=512, help='display window size') 37 | self.parser.add_argument('--display_id', type=int, default=0, help='window id of the web display') 38 | self.parser.add_argument('--tf_log', action='store_true', help='if specified, use tensorboard logging. Requires tensorflow installed') 39 | 40 | self.parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.') 41 | self.parser.add_argument('--resize_or_crop', type=str, default='scaleWidth', help='scaling and cropping of images at load time [resize_and_crop|crop|scaledCrop|scaleWidth|scaleWidth_and_crop|scaleWidth_and_scaledCrop|scaleHeight|scaleHeight_and_crop] etc') 42 | self.parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data argumentation') 43 | 44 | # more features as input 45 | self.parser.add_argument('--use_instance', action='store_true', help='if specified, add instance map as feature for class A') 46 | self.parser.add_argument('--label_feat', action='store_true', help='if specified, encode label features as input') 47 | self.parser.add_argument('--feat_num', type=int, default=3, help='number of encoded features') 48 | self.parser.add_argument('--nef', type=int, default=32, help='# of encoder filters in first conv layer') 49 | self.parser.add_argument('--load_features', action='store_true', help='if specified, load precomputed feature maps') 50 | self.parser.add_argument('--netE', type=str, default='simple', help='which model to use for encoder') 51 | self.parser.add_argument('--n_downsample_E', type=int, default=3, help='number of downsampling layers in netE') 52 | 53 | # for cascaded resnet 54 | self.parser.add_argument('--n_blocks_local', type=int, default=3, help='number of resnet blocks in outmost multiscale resnet') 55 | self.parser.add_argument('--n_local_enhancers', type=int, default=1, help='number of cascaded layers') 56 | 57 | # temporal 58 | self.parser.add_argument('--n_frames_G', type=int, default=3, help='number of input frames to feed into generator, i.e., n_frames_G-1 is the number of frames we look into past') 59 | self.parser.add_argument('--n_scales_spatial', type=int, default=1, help='number of spatial scales in the coarse-to-fine generator') 60 | self.parser.add_argument('--no_first_img', action='store_true', help='if specified, generator also tries to synthesize first image') 61 | self.parser.add_argument('--use_single_G', action='store_true', help='if specified, use single frame generator for the first frame') 62 | self.parser.add_argument('--fg', action='store_true', help='if specified, use foreground-background seperation model') 63 | self.parser.add_argument('--fg_labels', type=str, default='26', help='label indices for foreground objects') 64 | self.parser.add_argument('--no_flow', action='store_true', help='if specified, do not use flow warping and directly synthesize frames') 65 | 66 | # face specific 67 | self.parser.add_argument('--no_canny_edge', action='store_true', help='do *not* use canny edge as input') 68 | self.parser.add_argument('--no_dist_map', action='store_true', help='do *not* use distance transform map as input') 69 | self.parser.add_argument('--random_scale_points', action='store_true', help='randomly scale face keypoints a bit to create different results') 70 | 71 | # pose specific 72 | self.parser.add_argument('--densepose_only', action='store_true', help='use only densepose as input') 73 | self.parser.add_argument('--openpose_only', action='store_true', help='use only openpose as input') 74 | self.parser.add_argument('--add_face_disc', action='store_true', help='add face discriminator') 75 | self.parser.add_argument('--remove_face_labels', action='store_true', help='remove face labels to better adapt to different face shapes') 76 | self.parser.add_argument('--random_drop_prob', type=float, default=0.05, help='the probability to randomly drop each pose segment during training') 77 | self.parser.add_argument('--basic_point_only', action='store_true', help='only use basic joint keypoints for openpose, without hand or face keypoints') 78 | 79 | # miscellaneous 80 | self.parser.add_argument('--load_pretrain', type=str, default='', help='if specified, load the pretrained model') 81 | self.parser.add_argument('--debug', action='store_true', help='if specified, use small dataset for debug') 82 | self.parser.add_argument('--fp16', action='store_true', default=False, help='train with AMP') 83 | self.parser.add_argument('--local_rank', type=int, default=0, help='local rank for distributed training') 84 | 85 | self.initialized = True 86 | 87 | def parse_str(self, ids): 88 | str_ids = ids.split(',') 89 | ids_list = [] 90 | for str_id in str_ids: 91 | id = int(str_id) 92 | if id >= 0: 93 | ids_list.append(id) 94 | return ids_list 95 | 96 | def parse(self, save=True): 97 | if not self.initialized: 98 | self.initialize() 99 | self.opt = self.parser.parse_args() 100 | self.opt.isTrain = self.isTrain # train or test 101 | 102 | self.opt.fg_labels = self.parse_str(self.opt.fg_labels) 103 | self.opt.gpu_ids = self.parse_str(self.opt.gpu_ids) 104 | if self.opt.n_gpus_gen == -1: 105 | self.opt.n_gpus_gen = len(self.opt.gpu_ids) 106 | 107 | # set gpu ids 108 | if len(self.opt.gpu_ids) > 0: 109 | torch.cuda.set_device(self.opt.gpu_ids[0]) 110 | 111 | args = vars(self.opt) 112 | 113 | print('------------ Options -------------') 114 | for k, v in sorted(args.items()): 115 | print('%s: %s' % (str(k), str(v))) 116 | print('-------------- End ----------------') 117 | 118 | # save to the disk 119 | expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name) 120 | util.mkdirs(expr_dir) 121 | if save: 122 | file_name = os.path.join(expr_dir, 'opt.txt') 123 | with open(file_name, 'wt') as opt_file: 124 | opt_file.write('------------ Options -------------\n') 125 | for k, v in sorted(args.items()): 126 | opt_file.write('%s: %s\n' % (str(k), str(v))) 127 | opt_file.write('-------------- End ----------------\n') 128 | return self.opt 129 | -------------------------------------------------------------------------------- /options/test_options.py: -------------------------------------------------------------------------------- 1 | from .base_options import BaseOptions 2 | 3 | 4 | class TestOptions(BaseOptions): 5 | def initialize(self): 6 | BaseOptions.initialize(self) 7 | self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.') 8 | self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.') 9 | self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images') 10 | self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') 11 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') 12 | self.parser.add_argument('--how_many', type=int, default=300, help='how many test images to run') 13 | self.parser.add_argument('--use_real_img', action='store_true', help='use real image for first frame') 14 | self.parser.add_argument('--start_frame', type=int, default=0, help='frame index to start inference on') 15 | self.isTrain = False 16 | -------------------------------------------------------------------------------- /options/train_options.py: -------------------------------------------------------------------------------- 1 | from .base_options import BaseOptions 2 | 3 | 4 | class TrainOptions(BaseOptions): 5 | def initialize(self): 6 | BaseOptions.initialize(self) 7 | self.parser.add_argument('--display_freq', type=int, default=100, help='frequency of showing training results on screen') 8 | self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console') 9 | self.parser.add_argument('--save_latest_freq', type=int, default=1000, help='frequency of saving the latest results') 10 | self.parser.add_argument('--save_epoch_freq', type=int, default=1, help='frequency of saving checkpoints at the end of epochs') 11 | self.parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model') 12 | self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc') 13 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') 14 | self.parser.add_argument('--niter', type=int, default=10, help='# of iter at starting learning rate') 15 | self.parser.add_argument('--niter_decay', type=int, default=10, help='# of iter to linearly decay learning rate to zero') 16 | self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') 17 | self.parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam') 18 | self.parser.add_argument('--TTUR', action='store_true', help='Use TTUR training scheme') 19 | self.parser.add_argument('--gan_mode', type=str, default='ls', help='(ls|original|hinge)') 20 | self.parser.add_argument('--pool_size', type=int, default=1, help='the size of image buffer that stores previously generated images') 21 | self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/') 22 | 23 | # for discriminators 24 | self.parser.add_argument('--num_D', type=int, default=2, help='number of patch scales in each discriminator') 25 | self.parser.add_argument('--n_layers_D', type=int, default=3, help='number of layers in discriminator') 26 | self.parser.add_argument('--no_vgg', action='store_true', help='do not use VGG feature matching loss') 27 | self.parser.add_argument('--no_ganFeat', action='store_true', help='do not match discriminator features') 28 | self.parser.add_argument('--lambda_feat', type=float, default=10.0, help='weight for feature matching') 29 | self.parser.add_argument('--sparse_D', action='store_true', help='use sparse temporal discriminators to save memory') 30 | 31 | # for temporal 32 | self.parser.add_argument('--lambda_T', type=float, default=10.0, help='weight for temporal loss') 33 | self.parser.add_argument('--lambda_F', type=float, default=10.0, help='weight for flow loss') 34 | self.parser.add_argument('--n_frames_D', type=int, default=3, help='number of frames to feed into temporal discriminator') 35 | self.parser.add_argument('--n_scales_temporal', type=int, default=2, help='number of temporal scales in the temporal discriminator') 36 | self.parser.add_argument('--max_frames_per_gpu', type=int, default=1, help='max number of frames to load into one GPU at a time') 37 | self.parser.add_argument('--max_frames_backpropagate', type=int, default=1, help='max number of frames to backpropagate') 38 | self.parser.add_argument('--max_t_step', type=int, default=1, help='max spacing between neighboring sampled frames. If greater than 1, the network may randomly skip frames during training.') 39 | self.parser.add_argument('--n_frames_total', type=int, default=30, help='the overall number of frames in a sequence to train with') 40 | self.parser.add_argument('--niter_step', type=int, default=5, help='how many epochs do we change training batch size again') 41 | self.parser.add_argument('--niter_fix_global', type=int, default=0, help='if specified, only train the finest spatial layer for the given iterations') 42 | 43 | self.isTrain = True 44 | -------------------------------------------------------------------------------- /scripts/download_datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | from download_gdrive import * 3 | 4 | file_id = '1rPcbnanuApZeo2uc7h55OneBkbcFCnnf' 5 | chpt_path = './datasets/' 6 | if not os.path.isdir(chpt_path): 7 | os.makedirs(chpt_path) 8 | destination = os.path.join(chpt_path, 'datasets.zip') 9 | download_file_from_google_drive(file_id, destination) 10 | unzip_file(destination, chpt_path) -------------------------------------------------------------------------------- /scripts/download_flownet2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from download_gdrive import * 3 | import torch 4 | 5 | """if torch.__version__ == '0.4.1': 6 | file_id = '1gKwE1Ad41TwtAzwDcN3dYa_S6DcVyiSl' 7 | file_name = 'flownet2_pytorch_041.zip' 8 | else: 9 | file_id = '1F2h_6e8gyTqxnbmFFW72zsxx_JX0dKFo' 10 | file_name = 'flownet2_pytorch_040.zip'""" 11 | 12 | chpt_path = './models/' 13 | if not os.path.isdir(chpt_path): 14 | os.makedirs(chpt_path) 15 | """destination = os.path.join(chpt_path, file_name) 16 | download_file_from_google_drive(file_id, destination) 17 | unzip_file(destination, chpt_path)""" 18 | os.system('cd %s/flownet2_pytorch/; bash install.sh; cd ../../' % chpt_path) -------------------------------------------------------------------------------- /scripts/download_gdrive.py: -------------------------------------------------------------------------------- 1 | # Download code taken from Code taken from https://stackoverflow.com questions/25010369/wget-curl-large-file-from-google-drive/39225039#39225039 2 | import requests, zipfile, os 3 | def download_file_from_google_drive(id, destination): 4 | URL = "https://docs.google.com/uc?export=download" 5 | session = requests.Session() 6 | response = session.get(URL, params = { 'id' : id }, stream = True) 7 | token = get_confirm_token(response) 8 | if token: 9 | params = { 'id' : id, 'confirm' : token } 10 | response = session.get(URL, params = params, stream = True) 11 | save_response_content(response, destination) 12 | def get_confirm_token(response): 13 | for key, value in response.cookies.items(): 14 | if key.startswith('download_warning'): 15 | return value 16 | return None 17 | def save_response_content(response, destination): 18 | CHUNK_SIZE = 32768 19 | with open(destination, "wb") as f: 20 | for chunk in response.iter_content(CHUNK_SIZE): 21 | if chunk: # filter out keep-alive new chunks 22 | f.write(chunk) 23 | 24 | def unzip_file(file_name, unzip_path): 25 | zip_ref = zipfile.ZipFile(file_name, 'r') 26 | zip_ref.extractall(unzip_path) 27 | zip_ref.close() 28 | os.remove(file_name) -------------------------------------------------------------------------------- /scripts/download_models_flownet2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from download_gdrive import * 3 | 4 | file_id = '1E8re-b6csNuo-abg1vJKCDjCzlIam50F' 5 | chpt_path = './models/flownet2_pytorch/' 6 | destination = os.path.join(chpt_path, 'FlowNet2_checkpoint.pth.tar') 7 | download_file_from_google_drive(file_id, destination) -------------------------------------------------------------------------------- /scripts/face/download_gdrive.py: -------------------------------------------------------------------------------- 1 | # Download code taken from Code taken from https://stackoverflow.com questions/25010369/wget-curl-large-file-from-google-drive/39225039#39225039 2 | import requests, zipfile, os 3 | def download_file_from_google_drive(id, destination): 4 | URL = "https://docs.google.com/uc?export=download" 5 | session = requests.Session() 6 | response = session.get(URL, params = { 'id' : id }, stream = True) 7 | token = get_confirm_token(response) 8 | if token: 9 | params = { 'id' : id, 'confirm' : token } 10 | response = session.get(URL, params = params, stream = True) 11 | save_response_content(response, destination) 12 | def get_confirm_token(response): 13 | for key, value in response.cookies.items(): 14 | if key.startswith('download_warning'): 15 | return value 16 | return None 17 | def save_response_content(response, destination): 18 | CHUNK_SIZE = 32768 19 | with open(destination, "wb") as f: 20 | for chunk in response.iter_content(CHUNK_SIZE): 21 | if chunk: # filter out keep-alive new chunks 22 | f.write(chunk) 23 | 24 | def unzip_file(file_name, unzip_path): 25 | zip_ref = zipfile.ZipFile(file_name, 'r') 26 | zip_ref.extractall(unzip_path) 27 | zip_ref.close() 28 | os.remove(file_name) -------------------------------------------------------------------------------- /scripts/face/download_models.py: -------------------------------------------------------------------------------- 1 | import os 2 | from download_gdrive import * 3 | 4 | file_id = '10LvNw-2lrh-6sPGkWbQDfHspkqz5AKxb' 5 | chpt_path = './checkpoints/' 6 | if not os.path.isdir(chpt_path): 7 | os.makedirs(chpt_path) 8 | destination = os.path.join(chpt_path, 'models_face.zip') 9 | download_file_from_google_drive(file_id, destination) 10 | unzip_file(destination, chpt_path) -------------------------------------------------------------------------------- /scripts/face/test_512.sh: -------------------------------------------------------------------------------- 1 | python test.py --name edge2face_512 \ 2 | --dataroot datasets/face/ --dataset_mode face \ 3 | --input_nc 15 --loadSize 512 --use_single_G -------------------------------------------------------------------------------- /scripts/face/test_g1_256.sh: -------------------------------------------------------------------------------- 1 | python test.py --name edge2face_256_g1 \ 2 | --dataroot datasets/face/ --dataset_mode face \ 3 | --input_nc 15 --loadSize 256 --ngf 64 --use_single_G 4 | -------------------------------------------------------------------------------- /scripts/face/test_g1_512.sh: -------------------------------------------------------------------------------- 1 | python test.py --name edge2face_512_g1 \ 2 | --dataroot datasets/face/ --dataset_mode face \ 3 | --n_scales_spatial 2 --input_nc 15 --loadSize 512 --ngf 64 \ 4 | --use_single_G 5 | -------------------------------------------------------------------------------- /scripts/face/train_512.sh: -------------------------------------------------------------------------------- 1 | python train.py --name edge2face_512 \ 2 | --dataroot datasets/face/ --dataset_mode face \ 3 | --input_nc 15 --loadSize 512 --num_D 3 \ 4 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 6 \ 5 | --niter 20 --niter_decay 20 --n_frames_total 12 -------------------------------------------------------------------------------- /scripts/face/train_512_bs7.sh: -------------------------------------------------------------------------------- 1 | python train.py --name edge2face_512 \ 2 | --dataroot datasets/face/ --dataset_mode face \ 3 | --input_nc 15 --loadSize 512 --num_D 3 \ 4 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 8 --batchSize 7 \ 5 | --niter 20 --niter_decay 20 --n_frames_total 12 -------------------------------------------------------------------------------- /scripts/face/train_g1_256.sh: -------------------------------------------------------------------------------- 1 | python train.py --name edge2face_256_g1 \ 2 | --dataroot datasets/face/ --dataset_mode face \ 3 | --input_nc 15 --loadSize 256 --ngf 64 \ 4 | --max_frames_per_gpu 6 --n_frames_total 12 \ 5 | --niter 20 --niter_decay 20 6 | -------------------------------------------------------------------------------- /scripts/face/train_g1_512.sh: -------------------------------------------------------------------------------- 1 | python train.py --name edge2face_512_g1 \ 2 | --dataroot datasets/face/ --dataset_mode face \ 3 | --n_scales_spatial 2 --num_D 3 \ 4 | --input_nc 15 --loadSize 512 --ngf 64 \ 5 | --n_frames_total 6 --niter_step 2 --niter_fix_global 5 \ 6 | --load_pretrain checkpoints/edge2face_256_g1 7 | -------------------------------------------------------------------------------- /scripts/pose/test_1024p.sh: -------------------------------------------------------------------------------- 1 | python test.py --name pose2body_1024p \ 2 | --dataroot datasets/pose --dataset_mode pose \ 3 | --input_nc 6 --n_scales_spatial 3 \ 4 | --resize_or_crop scaleHeight --loadSize 1024 --no_first_img -------------------------------------------------------------------------------- /scripts/pose/test_256p.sh: -------------------------------------------------------------------------------- 1 | python test.py --name pose2body_256p \ 2 | --dataroot datasets/pose --dataset_mode pose \ 3 | --input_nc 6 --resize_or_crop scaleHeight --loadSize 256 --no_first_img -------------------------------------------------------------------------------- /scripts/pose/test_512p.sh: -------------------------------------------------------------------------------- 1 | python test.py --name pose2body_512p \ 2 | --dataroot datasets/pose --dataset_mode pose \ 3 | --input_nc 6 --n_scales_spatial 2 \ 4 | --resize_or_crop scaleHeight --loadSize 512 --no_first_img -------------------------------------------------------------------------------- /scripts/pose/test_g1_1024p.sh: -------------------------------------------------------------------------------- 1 | python test.py --name pose2body_1024p_g1 \ 2 | --dataroot datasets/pose --dataset_mode pose \ 3 | --input_nc 6 --n_scales_spatial 3 --ngf 64 \ 4 | --resize_or_crop scaleHeight --loadSize 1024 --no_first_img 5 | -------------------------------------------------------------------------------- /scripts/pose/test_g1_256p.sh: -------------------------------------------------------------------------------- 1 | python test.py --name pose2body_256p_g1 \ 2 | --dataroot datasets/pose --dataset_mode pose --ngf 64 \ 3 | --input_nc 6 --resize_or_crop scaleHeight --loadSize 256 --no_first_img 4 | -------------------------------------------------------------------------------- /scripts/pose/test_g1_512p.sh: -------------------------------------------------------------------------------- 1 | python test.py --name pose2body_512p_g1 \ 2 | --dataroot datasets/pose --dataset_mode pose \ 3 | --input_nc 6 --n_scales_spatial 2 --ngf 64 \ 4 | --resize_or_crop scaleHeight --loadSize 512 --no_first_img 5 | -------------------------------------------------------------------------------- /scripts/pose/train_1024p.sh: -------------------------------------------------------------------------------- 1 | python train.py --name pose2body_1024p \ 2 | --dataroot datasets/pose --dataset_mode pose \ 3 | --input_nc 6 --n_scales_spatial 3 --num_D 4 \ 4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 1536 --fineSize 1024 \ 5 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 4 \ 6 | --no_first_img --n_frames_total 12 --max_t_step 4 --add_face_disc \ 7 | --niter_fix_global 3 --niter 5 --niter_decay 5 \ 8 | --lr 0.00005 --load_pretrain checkpoints/pose2body_512p -------------------------------------------------------------------------------- /scripts/pose/train_256p.sh: -------------------------------------------------------------------------------- 1 | python train.py --name pose2body_256p \ 2 | --dataroot datasets/pose --dataset_mode pose \ 3 | --input_nc 6 --num_D 2 \ 4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 384 --fineSize 256 \ 5 | --gpu_ids 0,1,2,3,4,5,6,7 --batchSize 8 --max_frames_per_gpu 3 \ 6 | --niter 5 --niter_decay 5 \ 7 | --no_first_img --n_frames_total 12 --max_t_step 4 -------------------------------------------------------------------------------- /scripts/pose/train_512p.sh: -------------------------------------------------------------------------------- 1 | python train.py --name pose2body_512p \ 2 | --dataroot datasets/pose --dataset_mode pose \ 3 | --input_nc 6 --n_scales_spatial 2 --num_D 3 \ 4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 768 --fineSize 512 \ 5 | --gpu_ids 0,1,2,3,4,5,6,7 --batchSize 8 \ 6 | --no_first_img --n_frames_total 12 --max_t_step 4 --add_face_disc \ 7 | --niter_fix_global 3 --niter 5 --niter_decay 5 \ 8 | --lr 0.0001 --load_pretrain checkpoints/pose2body_256p -------------------------------------------------------------------------------- /scripts/pose/train_g1_1024p.sh: -------------------------------------------------------------------------------- 1 | python train.py --name pose2body_1024p_g1 \ 2 | --dataroot datasets/pose --dataset_mode pose \ 3 | --input_nc 6 --n_scales_spatial 3 --num_D 4 --ngf 64 --ndf 32 \ 4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 1536 --fineSize 1024 \ 5 | --no_first_img --n_frames_total 12 --max_t_step 4 --add_face_disc \ 6 | --niter_fix_global 3 --niter 5 --niter_decay 5 \ 7 | --lr 0.00005 --load_pretrain checkpoints/pose2body_512p_g1 8 | -------------------------------------------------------------------------------- /scripts/pose/train_g1_256p.sh: -------------------------------------------------------------------------------- 1 | python train.py --name pose2body_256p_g1 \ 2 | --dataroot datasets/pose --dataset_mode pose \ 3 | --input_nc 6 --ngf 64 --num_D 2 \ 4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 384 --fineSize 256 \ 5 | --niter 5 --niter_decay 5 \ 6 | --no_first_img --n_frames_total 12 --max_frames_per_gpu 4 --max_t_step 4 7 | -------------------------------------------------------------------------------- /scripts/pose/train_g1_512p.sh: -------------------------------------------------------------------------------- 1 | python train.py --name pose2body_512p_g1 \ 2 | --dataroot datasets/pose --dataset_mode pose \ 3 | --input_nc 6 --n_scales_spatial 2 --ngf 64 --num_D 3 \ 4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 768 --fineSize 512 \ 5 | --no_first_img --n_frames_total 12 --max_frames_per_gpu 2 --max_t_step 4 --add_face_disc \ 6 | --niter_fix_global 3 --niter 5 --niter_decay 5 \ 7 | --lr 0.0001 --load_pretrain checkpoints/pose2body_256p_g1 8 | -------------------------------------------------------------------------------- /scripts/street/download_gdrive.py: -------------------------------------------------------------------------------- 1 | # Download code taken from Code taken from https://stackoverflow.com questions/25010369/wget-curl-large-file-from-google-drive/39225039#39225039 2 | import requests, zipfile, os 3 | def download_file_from_google_drive(id, destination): 4 | URL = "https://docs.google.com/uc?export=download" 5 | session = requests.Session() 6 | response = session.get(URL, params = { 'id' : id }, stream = True) 7 | token = get_confirm_token(response) 8 | if token: 9 | params = { 'id' : id, 'confirm' : token } 10 | response = session.get(URL, params = params, stream = True) 11 | save_response_content(response, destination) 12 | def get_confirm_token(response): 13 | for key, value in response.cookies.items(): 14 | if key.startswith('download_warning'): 15 | return value 16 | return None 17 | def save_response_content(response, destination): 18 | CHUNK_SIZE = 32768 19 | with open(destination, "wb") as f: 20 | for chunk in response.iter_content(CHUNK_SIZE): 21 | if chunk: # filter out keep-alive new chunks 22 | f.write(chunk) 23 | 24 | def unzip_file(file_name, unzip_path): 25 | zip_ref = zipfile.ZipFile(file_name, 'r') 26 | zip_ref.extractall(unzip_path) 27 | zip_ref.close() 28 | os.remove(file_name) -------------------------------------------------------------------------------- /scripts/street/download_models.py: -------------------------------------------------------------------------------- 1 | import os 2 | from download_gdrive import * 3 | 4 | file_id = '1MKtImgtnGC28EPU7Nh9DfFpHW6okNVkl' 5 | chpt_path = './checkpoints/' 6 | if not os.path.isdir(chpt_path): 7 | os.makedirs(chpt_path) 8 | destination = os.path.join(chpt_path, 'models.zip') 9 | download_file_from_google_drive(file_id, destination) 10 | unzip_file(destination, chpt_path) -------------------------------------------------------------------------------- /scripts/street/download_models_g1.py: -------------------------------------------------------------------------------- 1 | import os 2 | from download_gdrive import * 3 | 4 | file_id = '1QoE1p3QikxNVbbTBWWRDtIspg-RcLE8y' 5 | chpt_path = './checkpoints/' 6 | if not os.path.isdir(chpt_path): 7 | os.makedirs(chpt_path) 8 | destination = os.path.join(chpt_path, 'models_g1.zip') 9 | download_file_from_google_drive(file_id, destination) 10 | unzip_file(destination, chpt_path) 11 | -------------------------------------------------------------------------------- /scripts/street/test_2048.sh: -------------------------------------------------------------------------------- 1 | python test.py --name label2city_2048 --label_nc 35 --loadSize 2048 --n_scales_spatial 3 --use_instance --fg --use_single_G 2 | -------------------------------------------------------------------------------- /scripts/street/test_g1_1024.sh: -------------------------------------------------------------------------------- 1 | python test.py --name label2city_1024_g1 --label_nc 35 --loadSize 1024 --n_scales_spatial 3 --use_instance --fg --n_downsample_G 2 --use_single_G 2 | -------------------------------------------------------------------------------- /scripts/street/train_1024.sh: -------------------------------------------------------------------------------- 1 | python train.py --name label2city_1024 \ 2 | --label_nc 35 --loadSize 1024 --n_scales_spatial 2 --num_D 3 --use_instance --fg \ 3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 4 \ 4 | --n_frames_total 4 --niter_step 2 \ 5 | --niter_fix_global 10 --load_pretrain checkpoints/label2city_512 --lr 0.0001 6 | -------------------------------------------------------------------------------- /scripts/street/train_2048.sh: -------------------------------------------------------------------------------- 1 | python train.py --name label2city_2048 \ 2 | --label_nc 35 --loadSize 2048 --n_scales_spatial 3 --num_D 4 --use_instance --fg \ 3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 4 \ 4 | --n_frames_total 4 --niter_step 1 \ 5 | --niter 5 --niter_decay 5 \ 6 | --niter_fix_global 5 --load_pretrain checkpoints/label2city_1024 --lr 0.00005 -------------------------------------------------------------------------------- /scripts/street/train_2048_crop.sh: -------------------------------------------------------------------------------- 1 | python train.py --name label2city_2048_crop \ 2 | --label_nc 35 --loadSize 2048 --fineSize 1024 --resize_or_crop crop \ 3 | --n_scales_spatial 3 --num_D 4 --use_instance --fg \ 4 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 4 \ 5 | --n_frames_total 4 --niter_step 1 \ 6 | --niter 5 --niter_decay 5 \ 7 | --niter_fix_global 5 --load_pretrain checkpoints/label2city_1024 --lr 0.00005 8 | -------------------------------------------------------------------------------- /scripts/street/train_512.sh: -------------------------------------------------------------------------------- 1 | python train.py --name label2city_512 \ 2 | --label_nc 35 --loadSize 512 --use_instance --fg \ 3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 6 \ 4 | --n_frames_total 6 --max_frames_per_gpu 2 5 | -------------------------------------------------------------------------------- /scripts/street/train_512_bs.sh: -------------------------------------------------------------------------------- 1 | python train.py --name label2city_512_bs \ 2 | --label_nc 35 --loadSize 512 --use_instance --fg \ 3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen -1 \ 4 | --n_frames_total 6 --batchSize 15 5 | -------------------------------------------------------------------------------- /scripts/street/train_512_no_fg.sh: -------------------------------------------------------------------------------- 1 | python train.py --name label2city_512_no_fg \ 2 | --label_nc 35 --loadSize 512 --use_instance \ 3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 6 \ 4 | --n_frames_total 6 --max_frames_per_gpu 2 5 | -------------------------------------------------------------------------------- /scripts/street/train_g1_1024.sh: -------------------------------------------------------------------------------- 1 | python train.py --name label2city_1024_g1 \ 2 | --label_nc 35 --loadSize 896 --n_scales_spatial 3 --n_frames_D 2 \ 3 | --use_instance --fg --n_downsample_G 2 --num_D 3 \ 4 | --max_frames_per_gpu 1 --n_frames_total 4 \ 5 | --niter_step 2 --niter_fix_global 8 --niter_decay 5 \ 6 | --load_pretrain checkpoints/label2city_512_g1 --lr 0.0001 7 | -------------------------------------------------------------------------------- /scripts/street/train_g1_256.sh: -------------------------------------------------------------------------------- 1 | python train.py --name label2city_256 \ 2 | --label_nc 35 --loadSize 256 --use_instance --fg \ 3 | --n_downsample_G 2 --num_D 1 \ 4 | --max_frames_per_gpu 6 --n_frames_total 6 5 | -------------------------------------------------------------------------------- /scripts/street/train_g1_512.sh: -------------------------------------------------------------------------------- 1 | python train.py --name label2city_512_g1 \ 2 | --label_nc 35 --loadSize 512 --n_scales_spatial 2 \ 3 | --use_instance --fg --n_downsample_G 2 \ 4 | --max_frames_per_gpu 2 --n_frames_total 4 \ 5 | --niter_step 2 --niter_fix_global 8 --niter_decay 5 \ 6 | --load_pretrain checkpoints/label2city_256_g1 7 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | import time 4 | import os 5 | import numpy as np 6 | from collections import OrderedDict 7 | from torch.autograd import Variable 8 | from options.test_options import TestOptions 9 | from data.data_loader import CreateDataLoader 10 | from models.models import create_model 11 | import util.util as util 12 | from util.visualizer import Visualizer 13 | from util import html 14 | 15 | opt = TestOptions().parse(save=False) 16 | opt.nThreads = 1 # test code only supports nThreads = 1 17 | opt.batchSize = 1 # test code only supports batchSize = 1 18 | opt.serial_batches = True # no shuffle 19 | opt.no_flip = True # no flip 20 | if opt.dataset_mode == 'temporal': 21 | opt.dataset_mode = 'test' 22 | 23 | data_loader = CreateDataLoader(opt) 24 | dataset = data_loader.load_data() 25 | model = create_model(opt) 26 | visualizer = Visualizer(opt) 27 | input_nc = 1 if opt.label_nc != 0 else opt.input_nc 28 | 29 | save_dir = os.path.join(opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.which_epoch)) 30 | print('Doing %d frames' % len(dataset)) 31 | for i, data in enumerate(dataset): 32 | if i >= opt.how_many: 33 | break 34 | if data['change_seq']: 35 | model.fake_B_prev = None 36 | 37 | _, _, height, width = data['A'].size() 38 | A = Variable(data['A']).view(1, -1, input_nc, height, width) 39 | B = Variable(data['B']).view(1, -1, opt.output_nc, height, width) if len(data['B'].size()) > 2 else None 40 | inst = Variable(data['inst']).view(1, -1, 1, height, width) if len(data['inst'].size()) > 2 else None 41 | generated = model.inference(A, B, inst) 42 | 43 | if opt.label_nc != 0: 44 | real_A = util.tensor2label(generated[1], opt.label_nc) 45 | else: 46 | c = 3 if opt.input_nc == 3 else 1 47 | real_A = util.tensor2im(generated[1][:c], normalize=False) 48 | 49 | visual_list = [('real_A', real_A), 50 | ('fake_B', util.tensor2im(generated[0].data[0]))] 51 | visuals = OrderedDict(visual_list) 52 | img_path = data['A_path'] 53 | print('process image... %s' % img_path) 54 | visualizer.save_images(save_dir, visuals, img_path) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | import time 4 | import os 5 | import torch 6 | from subprocess import call 7 | 8 | from options.train_options import TrainOptions 9 | from data.data_loader import CreateDataLoader 10 | from models.models import create_model, create_optimizer, init_params, save_models, update_models 11 | import util.util as util 12 | from util.visualizer import Visualizer 13 | 14 | def train(): 15 | opt = TrainOptions().parse() 16 | if opt.debug: 17 | opt.display_freq = 1 18 | opt.print_freq = 1 19 | opt.nThreads = 1 20 | 21 | ### initialize dataset 22 | data_loader = CreateDataLoader(opt) 23 | dataset = data_loader.load_data() 24 | dataset_size = len(data_loader) 25 | print('#training videos = %d' % dataset_size) 26 | 27 | ### initialize models 28 | models = create_model(opt) 29 | modelG, modelD, flowNet, optimizer_G, optimizer_D, optimizer_D_T = create_optimizer(opt, models) 30 | 31 | ### set parameters 32 | n_gpus, tG, tD, tDB, s_scales, t_scales, input_nc, output_nc, \ 33 | start_epoch, epoch_iter, print_freq, total_steps, iter_path = init_params(opt, modelG, modelD, data_loader) 34 | visualizer = Visualizer(opt) 35 | 36 | ### real training starts here 37 | for epoch in range(start_epoch, opt.niter + opt.niter_decay + 1): 38 | epoch_start_time = time.time() 39 | for idx, data in enumerate(dataset, start=epoch_iter): 40 | if total_steps % print_freq == 0: 41 | iter_start_time = time.time() 42 | total_steps += opt.batchSize 43 | epoch_iter += opt.batchSize 44 | 45 | # whether to collect output images 46 | save_fake = total_steps % opt.display_freq == 0 47 | n_frames_total, n_frames_load, t_len = data_loader.dataset.init_data_params(data, n_gpus, tG) 48 | fake_B_prev_last, frames_all = data_loader.dataset.init_data(t_scales) 49 | 50 | for i in range(0, n_frames_total, n_frames_load): 51 | input_A, input_B, inst_A = data_loader.dataset.prepare_data(data, i, input_nc, output_nc) 52 | 53 | ###################################### Forward Pass ########################## 54 | ####### generator 55 | fake_B, fake_B_raw, flow, weight, real_A, real_Bp, fake_B_last = modelG(input_A, input_B, inst_A, fake_B_prev_last) 56 | 57 | ####### discriminator 58 | ### individual frame discriminator 59 | real_B_prev, real_B = real_Bp[:, :-1], real_Bp[:, 1:] # the collection of previous and current real frames 60 | flow_ref, conf_ref = flowNet(real_B, real_B_prev) # reference flows and confidences 61 | fake_B_prev = modelG.module.compute_fake_B_prev(real_B_prev, fake_B_prev_last, fake_B) 62 | fake_B_prev_last = fake_B_last 63 | 64 | losses = modelD(0, reshape([real_B, fake_B, fake_B_raw, real_A, real_B_prev, fake_B_prev, flow, weight, flow_ref, conf_ref])) 65 | losses = [ torch.mean(x) if x is not None else 0 for x in losses ] 66 | loss_dict = dict(zip(modelD.module.loss_names, losses)) 67 | 68 | ### temporal discriminator 69 | # get skipped frames for each temporal scale 70 | frames_all, frames_skipped = modelD.module.get_all_skipped_frames(frames_all, \ 71 | real_B, fake_B, flow_ref, conf_ref, t_scales, tD, n_frames_load, i, flowNet) 72 | 73 | # run discriminator for each temporal scale 74 | loss_dict_T = [] 75 | for s in range(t_scales): 76 | if frames_skipped[0][s] is not None: 77 | losses = modelD(s+1, [frame_skipped[s] for frame_skipped in frames_skipped]) 78 | losses = [ torch.mean(x) if not isinstance(x, int) else x for x in losses ] 79 | loss_dict_T.append(dict(zip(modelD.module.loss_names_T, losses))) 80 | 81 | # collect losses 82 | loss_G, loss_D, loss_D_T, t_scales_act = modelD.module.get_losses(loss_dict, loss_dict_T, t_scales) 83 | 84 | ###################################### Backward Pass ################################# 85 | # update generator weights 86 | loss_backward(opt, loss_G, optimizer_G) 87 | 88 | # update individual discriminator weights 89 | loss_backward(opt, loss_D, optimizer_D) 90 | 91 | # update temporal discriminator weights 92 | for s in range(t_scales_act): 93 | loss_backward(opt, loss_D_T[s], optimizer_D_T[s]) 94 | 95 | if i == 0: fake_B_first = fake_B[0, 0] # the first generated image in this sequence 96 | 97 | if opt.debug: 98 | call(["nvidia-smi", "--format=csv", "--query-gpu=memory.used,memory.free"]) 99 | 100 | ############## Display results and errors ########## 101 | ### print out errors 102 | if total_steps % print_freq == 0: 103 | t = (time.time() - iter_start_time) / print_freq 104 | errors = {k: v.data.item() if not isinstance(v, int) else v for k, v in loss_dict.items()} 105 | for s in range(len(loss_dict_T)): 106 | errors.update({k+str(s): v.data.item() if not isinstance(v, int) else v for k, v in loss_dict_T[s].items()}) 107 | visualizer.print_current_errors(epoch, epoch_iter, errors, t) 108 | visualizer.plot_current_errors(errors, total_steps) 109 | 110 | ### display output images 111 | if save_fake: 112 | visuals = util.save_all_tensors(opt, real_A, fake_B, fake_B_first, fake_B_raw, real_B, flow_ref, conf_ref, flow, weight, modelD) 113 | visualizer.display_current_results(visuals, epoch, total_steps) 114 | 115 | ### save latest model 116 | save_models(opt, epoch, epoch_iter, total_steps, visualizer, iter_path, modelG, modelD) 117 | if epoch_iter > dataset_size - opt.batchSize: 118 | epoch_iter = 0 119 | break 120 | 121 | # end of epoch 122 | iter_end_time = time.time() 123 | visualizer.vis_print('End of epoch %d / %d \t Time Taken: %d sec' % 124 | (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time)) 125 | 126 | ### save model for this epoch and update model params 127 | save_models(opt, epoch, epoch_iter, total_steps, visualizer, iter_path, modelG, modelD, end_of_epoch=True) 128 | update_models(opt, epoch, modelG, modelD, data_loader) 129 | 130 | def loss_backward(opt, loss, optimizer): 131 | optimizer.zero_grad() 132 | if opt.fp16: 133 | from apex import amp 134 | with amp.scale_loss(loss, optimizer) as scaled_loss: 135 | scaled_loss.backward() 136 | else: 137 | loss.backward() 138 | optimizer.step() 139 | 140 | def reshape(tensors): 141 | if tensors is None: return None 142 | if isinstance(tensors, list): 143 | return [reshape(tensor) for tensor in tensors] 144 | _, _, ch, h, w = tensors.size() 145 | return tensors.contiguous().view(-1, ch, h, w) 146 | 147 | if __name__ == "__main__": 148 | train() -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/util/__init__.py -------------------------------------------------------------------------------- /util/html.py: -------------------------------------------------------------------------------- 1 | import dominate 2 | from dominate.tags import * 3 | import os 4 | 5 | 6 | class HTML: 7 | def __init__(self, web_dir, title, reflesh=0): 8 | self.title = title 9 | self.web_dir = web_dir 10 | self.img_dir = os.path.join(self.web_dir, 'images') 11 | if not os.path.exists(self.web_dir): 12 | os.makedirs(self.web_dir) 13 | if not os.path.exists(self.img_dir): 14 | os.makedirs(self.img_dir) 15 | # print(self.img_dir) 16 | 17 | self.doc = dominate.document(title=title) 18 | if reflesh > 0: 19 | with self.doc.head: 20 | meta(http_equiv="reflesh", content=str(reflesh)) 21 | 22 | def get_image_dir(self): 23 | return self.img_dir 24 | 25 | def add_header(self, str): 26 | with self.doc: 27 | h3(str) 28 | 29 | def add_table(self, border=1): 30 | self.t = table(border=border, style="table-layout: fixed;") 31 | self.doc.add(self.t) 32 | 33 | def add_images(self, ims, txts, links, width=400, height=0): 34 | self.add_table() 35 | with self.t: 36 | with tr(): 37 | for im, txt, link in zip(ims, txts, links): 38 | with td(style="word-wrap: break-word;", halign="center", valign="top"): 39 | with p(): 40 | with a(href=os.path.join('images', link)): 41 | if height != 0: 42 | img(style="width:%dpx;height:%dpx" % (width, height), src=os.path.join('images', im)) 43 | else: 44 | img(style="width:%dpx" % (width), src=os.path.join('images', im)) 45 | br() 46 | p(txt) 47 | 48 | def save(self): 49 | html_file = '%s/index.html' % self.web_dir 50 | f = open(html_file, 'wt') 51 | f.write(self.doc.render()) 52 | f.close() 53 | 54 | 55 | if __name__ == '__main__': 56 | html = HTML('web/', 'test_html') 57 | html.add_header('hello world') 58 | 59 | ims = [] 60 | txts = [] 61 | links = [] 62 | for n in range(4): 63 | ims.append('image_%d.jpg' % n) 64 | txts.append('text_%d' % n) 65 | links.append('image_%d.jpg' % n) 66 | html.add_images(ims, txts, links) 67 | html.save() 68 | -------------------------------------------------------------------------------- /util/image_pool.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | from torch.autograd import Variable 5 | class ImagePool(): 6 | def __init__(self, pool_size): 7 | self.pool_size = pool_size 8 | if self.pool_size > 0: 9 | self.num_imgs = 0 10 | self.images = [] 11 | 12 | def query(self, images): 13 | if self.pool_size == 0: 14 | return images 15 | return_images = [] 16 | for image in images.data: 17 | image = torch.unsqueeze(image, 0) 18 | if self.num_imgs < self.pool_size: 19 | self.num_imgs = self.num_imgs + 1 20 | self.images.append(image) 21 | return_images.append(image) 22 | else: 23 | p = random.uniform(0, 1) 24 | if p > 0.5: 25 | random_id = random.randint(0, self.pool_size-1) 26 | tmp = self.images[random_id].clone() 27 | self.images[random_id] = image 28 | return_images.append(tmp) 29 | else: 30 | return_images.append(image) 31 | return_images = Variable(torch.cat(return_images, 0)) 32 | return return_images 33 | -------------------------------------------------------------------------------- /util/util.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import numpy as np 4 | from PIL import Image 5 | import inspect, re 6 | import numpy as np 7 | import os 8 | import collections 9 | from PIL import Image 10 | import cv2 11 | from collections import OrderedDict 12 | 13 | def save_all_tensors(opt, real_A, fake_B, fake_B_first, fake_B_raw, real_B, flow_ref, conf_ref, flow, weight, modelD): 14 | if opt.label_nc != 0: 15 | input_image = tensor2label(real_A, opt.label_nc) 16 | elif opt.dataset_mode == 'pose': 17 | input_image = tensor2im(real_A) 18 | if real_A.size()[2] == 6: 19 | input_image2 = tensor2im(real_A[0, -1, 3:]) 20 | input_image[input_image2 != 0] = input_image2[input_image2 != 0] 21 | else: 22 | c = 3 if opt.input_nc >= 3 else 1 23 | input_image = tensor2im(real_A[0, -1, :c], normalize=False) 24 | if opt.use_instance: 25 | edges = tensor2im(real_A[0, -1, -1:], normalize=False) 26 | input_image += edges[:,:,np.newaxis] 27 | 28 | if opt.add_face_disc: 29 | ys, ye, xs, xe = modelD.module.get_face_region(real_A[0, -1:]) 30 | if ys is not None: 31 | input_image[ys, xs:xe, :] = input_image[ye, xs:xe, :] = input_image[ys:ye, xs, :] = input_image[ys:ye, xe, :] = 255 32 | 33 | visual_list = [('input_image', input_image), 34 | ('fake_image', tensor2im(fake_B)), 35 | ('fake_first_image', tensor2im(fake_B_first)), 36 | ('fake_raw_image', tensor2im(fake_B_raw)), 37 | ('real_image', tensor2im(real_B)), 38 | ('flow_ref', tensor2flow(flow_ref)), 39 | ('conf_ref', tensor2im(conf_ref, normalize=False))] 40 | if flow is not None: 41 | visual_list += [('flow', tensor2flow(flow)), 42 | ('weight', tensor2im(weight, normalize=False))] 43 | visuals = OrderedDict(visual_list) 44 | return visuals 45 | 46 | # Converts a Tensor into a Numpy array 47 | # |imtype|: the desired type of the converted numpy array 48 | def tensor2im(image_tensor, imtype=np.uint8, normalize=True): 49 | if isinstance(image_tensor, list): 50 | image_numpy = [] 51 | for i in range(len(image_tensor)): 52 | image_numpy.append(tensor2im(image_tensor[i], imtype, normalize)) 53 | return image_numpy 54 | 55 | if isinstance(image_tensor, torch.autograd.Variable): 56 | image_tensor = image_tensor.data 57 | if len(image_tensor.size()) == 5: 58 | image_tensor = image_tensor[0, -1] 59 | if len(image_tensor.size()) == 4: 60 | image_tensor = image_tensor[0] 61 | image_tensor = image_tensor[:3] 62 | image_numpy = image_tensor.cpu().float().numpy() 63 | if normalize: 64 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 65 | else: 66 | image_numpy = np.transpose(image_numpy, (1, 2, 0)) * 255.0 67 | #image_numpy = (np.transpose(image_numpy, (1, 2, 0)) * std + mean) * 255.0 68 | image_numpy = np.clip(image_numpy, 0, 255) 69 | if image_numpy.shape[2] == 1: 70 | image_numpy = image_numpy[:,:,0] 71 | return image_numpy.astype(imtype) 72 | 73 | def tensor2label(output, n_label, imtype=np.uint8): 74 | if isinstance(output, torch.autograd.Variable): 75 | output = output.data 76 | if len(output.size()) == 5: 77 | output = output[0, -1] 78 | if len(output.size()) == 4: 79 | output = output[0] 80 | output = output.cpu().float() 81 | if output.size()[0] > 1: 82 | output = output.max(0, keepdim=True)[1] 83 | #print(output.size()) 84 | output = Colorize(n_label)(output) 85 | output = np.transpose(output.numpy(), (1, 2, 0)) 86 | #img = Image.fromarray(output, "RGB") 87 | return output.astype(imtype) 88 | 89 | def tensor2flow(output, imtype=np.uint8): 90 | if isinstance(output, torch.autograd.Variable): 91 | output = output.data 92 | if len(output.size()) == 5: 93 | output = output[0, -1] 94 | if len(output.size()) == 4: 95 | output = output[0] 96 | output = output.cpu().float().numpy() 97 | output = np.transpose(output, (1, 2, 0)) 98 | #mag = np.max(np.sqrt(output[:,:,0]**2 + output[:,:,1]**2)) 99 | #print(mag) 100 | hsv = np.zeros((output.shape[0], output.shape[1], 3), dtype=np.uint8) 101 | hsv[:, :, 0] = 255 102 | hsv[:, :, 1] = 255 103 | mag, ang = cv2.cartToPolar(output[..., 0], output[..., 1]) 104 | hsv[..., 0] = ang * 180 / np.pi / 2 105 | hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX) 106 | rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) 107 | return rgb 108 | 109 | def add_dummy_to_tensor(tensors, add_size=0): 110 | if add_size == 0 or tensors is None: return tensors 111 | if isinstance(tensors, list): 112 | return [add_dummy_to_tensor(tensor, add_size) for tensor in tensors] 113 | 114 | if isinstance(tensors, torch.Tensor): 115 | dummy = torch.zeros_like(tensors)[:add_size] 116 | tensors = torch.cat([dummy, tensors]) 117 | return tensors 118 | 119 | def remove_dummy_from_tensor(tensors, remove_size=0): 120 | if remove_size == 0 or tensors is None: return tensors 121 | if isinstance(tensors, list): 122 | return [remove_dummy_from_tensor(tensor, remove_size) for tensor in tensors] 123 | 124 | if isinstance(tensors, torch.Tensor): 125 | tensors = tensors[remove_size:] 126 | return tensors 127 | 128 | def save_image(image_numpy, image_path): 129 | image_pil = Image.fromarray(image_numpy) 130 | image_pil.save(image_path) 131 | 132 | def print_numpy(x, val=True, shp=False): 133 | x = x.astype(np.float64) 134 | if shp: 135 | print('shape,', x.shape) 136 | if val: 137 | x = x.flatten() 138 | print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % ( 139 | np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x))) 140 | 141 | def mkdirs(paths): 142 | if isinstance(paths, list) and not isinstance(paths, str): 143 | for path in paths: 144 | mkdir(path) 145 | else: 146 | mkdir(paths) 147 | 148 | def mkdir(path): 149 | if not os.path.exists(path): 150 | os.makedirs(path) 151 | 152 | def uint82bin(n, count=8): 153 | """returns the binary of integer n, count refers to amount of bits""" 154 | return ''.join([str((n >> y) & 1) for y in range(count-1, -1, -1)]) 155 | 156 | def labelcolormap(N): 157 | if N == 35: # Cityscapes train 158 | cmap = np.array([( 0, 0, 0), ( 0, 0, 0), ( 0, 0, 0), ( 0, 0, 0), ( 0, 0, 0), (111, 74, 0), ( 81, 0, 81), 159 | (128, 64,128), (244, 35,232), (250,170,160), (230,150,140), ( 70, 70, 70), (102,102,156), (190,153,153), 160 | (180,165,180), (150,100,100), (150,120, 90), (153,153,153), (153,153,153), (250,170, 30), (220,220, 0), 161 | (107,142, 35), (152,251,152), ( 70,130,180), (220, 20, 60), (255, 0, 0), ( 0, 0,142), ( 0, 0, 70), 162 | ( 0, 60,100), ( 0, 0, 90), ( 0, 0,110), ( 0, 80,100), ( 0, 0,230), (119, 11, 32), ( 0, 0,142)], 163 | dtype=np.uint8) 164 | elif N == 20: # Cityscapes eval 165 | cmap = np.array([(128, 64,128), (244, 35,232), ( 70, 70, 70), (102,102,156), (190,153,153), (153,153,153), (250,170, 30), 166 | (220,220, 0), (107,142, 35), (152,251,152), ( 70,130,180), (220, 20, 60), (255, 0, 0), ( 0, 0,142), 167 | ( 0, 0, 70), ( 0, 60,100), ( 0, 80,100), ( 0, 0,230), (119, 11, 32), ( 0, 0, 0)], 168 | dtype=np.uint8) 169 | else: 170 | cmap = np.zeros((N, 3), dtype=np.uint8) 171 | for i in range(N): 172 | r, g, b = 0, 0, 0 173 | id = i 174 | for j in range(7): 175 | str_id = uint82bin(id) 176 | r = r ^ (np.uint8(str_id[-1]) << (7-j)) 177 | g = g ^ (np.uint8(str_id[-2]) << (7-j)) 178 | b = b ^ (np.uint8(str_id[-3]) << (7-j)) 179 | id = id >> 3 180 | cmap[i, 0], cmap[i, 1], cmap[i, 2] = r, g, b 181 | return cmap 182 | 183 | def colormap(n): 184 | cmap = np.zeros([n, 3]).astype(np.uint8) 185 | for i in np.arange(n): 186 | r, g, b = np.zeros(3) 187 | 188 | for j in np.arange(8): 189 | r = r + (1 << (7-j))*((i & (1 << (3*j))) >> (3*j)) 190 | g = g + (1 << (7-j))*((i & (1 << (3*j+1))) >> (3*j+1)) 191 | b = b + (1 << (7-j))*((i & (1 << (3*j+2))) >> (3*j+2)) 192 | 193 | cmap[i, :] = np.array([r, g, b]) 194 | 195 | return cmap 196 | 197 | class Colorize(object): 198 | def __init__(self, n=35): 199 | self.cmap = labelcolormap(n) 200 | self.cmap = torch.from_numpy(self.cmap[:n]) 201 | 202 | def __call__(self, gray_image): 203 | size = gray_image.size() 204 | color_image = torch.ByteTensor(3, size[1], size[2]).fill_(0) 205 | 206 | for label in range(0, len(self.cmap)): 207 | mask = (label == gray_image[0]).cpu() 208 | color_image[0][mask] = self.cmap[label][0] 209 | color_image[1][mask] = self.cmap[label][1] 210 | color_image[2][mask] = self.cmap[label][2] 211 | 212 | return color_image -------------------------------------------------------------------------------- /util/visualizer.py: -------------------------------------------------------------------------------- 1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved. 2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 3 | import numpy as np 4 | import os 5 | import time 6 | from . import util 7 | from . import html 8 | import scipy.misc 9 | try: 10 | from StringIO import StringIO # Python 2.7 11 | except ImportError: 12 | from io import BytesIO # Python 3.x 13 | 14 | class Visualizer(): 15 | def __init__(self, opt): 16 | self.opt = opt 17 | self.tf_log = opt.tf_log 18 | self.use_html = opt.isTrain and not opt.no_html 19 | self.win_size = opt.display_winsize 20 | self.name = opt.name 21 | if self.tf_log: 22 | import tensorflow as tf 23 | self.tf = tf 24 | self.log_dir = os.path.join(opt.checkpoints_dir, opt.name, 'logs') 25 | self.writer = tf.summary.FileWriter(self.log_dir) 26 | 27 | if self.use_html: 28 | self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web') 29 | self.img_dir = os.path.join(self.web_dir, 'images') 30 | print('create web directory %s...' % self.web_dir) 31 | util.mkdirs([self.web_dir, self.img_dir]) 32 | self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt') 33 | with open(self.log_name, "a") as log_file: 34 | now = time.strftime("%c") 35 | log_file.write('================ Training Loss (%s) ================\n' % now) 36 | 37 | # |visuals|: dictionary of images to display or save 38 | def display_current_results(self, visuals, epoch, step): 39 | if self.tf_log: # show images in tensorboard output 40 | img_summaries = [] 41 | for label, image_numpy in visuals.items(): 42 | # Write the image to a string 43 | try: 44 | s = StringIO() 45 | except: 46 | s = BytesIO() 47 | scipy.misc.toimage(image_numpy).save(s, format="jpeg") 48 | # Create an Image object 49 | img_sum = self.tf.Summary.Image(encoded_image_string=s.getvalue(), height=image_numpy.shape[0], width=image_numpy.shape[1]) 50 | # Create a Summary value 51 | img_summaries.append(self.tf.Summary.Value(tag=label, image=img_sum)) 52 | 53 | # Create and write Summary 54 | summary = self.tf.Summary(value=img_summaries) 55 | self.writer.add_summary(summary, step) 56 | 57 | if self.use_html: # save images to a html file 58 | for label, image_numpy in visuals.items(): 59 | if isinstance(image_numpy, list): 60 | for i in range(len(image_numpy)): 61 | img_path = os.path.join(self.img_dir, 'epoch%.3d_%s_%d.jpg' % (epoch, label, i)) 62 | util.save_image(image_numpy[i], img_path) 63 | else: 64 | img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.jpg' % (epoch, label)) 65 | util.save_image(image_numpy, img_path) 66 | 67 | # update website 68 | webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, reflesh=1) 69 | for n in range(epoch, 0, -1): 70 | webpage.add_header('epoch [%d]' % n) 71 | ims = [] 72 | txts = [] 73 | links = [] 74 | 75 | for label, image_numpy in visuals.items(): 76 | if isinstance(image_numpy, list): 77 | for i in range(len(image_numpy)): 78 | img_path = 'epoch%.3d_%s_%d.jpg' % (n, label, i) 79 | ims.append(img_path) 80 | txts.append(label+str(i)) 81 | links.append(img_path) 82 | else: 83 | img_path = 'epoch%.3d_%s.jpg' % (n, label) 84 | ims.append(img_path) 85 | txts.append(label) 86 | links.append(img_path) 87 | if len(ims) < 6: 88 | webpage.add_images(ims, txts, links, width=self.win_size) 89 | else: 90 | num = int(round(len(ims)/2.0)) 91 | webpage.add_images(ims[:num], txts[:num], links[:num], width=self.win_size) 92 | webpage.add_images(ims[num:], txts[num:], links[num:], width=self.win_size) 93 | webpage.save() 94 | 95 | # errors: dictionary of error labels and values 96 | def plot_current_errors(self, errors, step): 97 | if self.tf_log: 98 | for tag, value in errors.items(): 99 | summary = self.tf.Summary(value=[self.tf.Summary.Value(tag=tag, simple_value=value)]) 100 | self.writer.add_summary(summary, step) 101 | 102 | # errors: same format as |errors| of plotCurrentErrors 103 | def print_current_errors(self, epoch, i, errors, t): 104 | message = '(epoch: %d, iters: %d, time: %.3f) ' % (epoch, i, t) 105 | for k, v in sorted(errors.items()): 106 | if v != 0: 107 | message += '%s: %.3f ' % (k, v) 108 | 109 | print(message) 110 | with open(self.log_name, "a") as log_file: 111 | log_file.write('%s\n' % message) 112 | 113 | # save image to the disk 114 | def save_images(self, image_dir, visuals, image_path, webpage=None): 115 | dirname = os.path.basename(os.path.dirname(image_path[0])) 116 | image_dir = os.path.join(image_dir, dirname) 117 | util.mkdir(image_dir) 118 | name = os.path.basename(image_path[0]) 119 | name = os.path.splitext(name)[0] 120 | 121 | if webpage is not None: 122 | webpage.add_header(name) 123 | ims, txts, links = [], [], [] 124 | 125 | for label, image_numpy in visuals.items(): 126 | save_ext = 'png' if 'real_A' in label and self.opt.label_nc != 0 else 'jpg' 127 | image_name = '%s_%s.%s' % (label, name, save_ext) 128 | save_path = os.path.join(image_dir, image_name) 129 | util.save_image(image_numpy, save_path) 130 | 131 | if webpage is not None: 132 | ims.append(image_name) 133 | txts.append(label) 134 | links.append(image_name) 135 | if webpage is not None: 136 | webpage.add_images(ims, txts, links, width=self.win_size) 137 | 138 | def vis_print(self, message): 139 | print(message) 140 | with open(self.log_name, "a") as log_file: 141 | log_file.write('%s\n' % message) 142 | 143 | --------------------------------------------------------------------------------