├── .gitignore
├── LICENSE.txt
├── README.md
├── data
├── __init__.py
├── base_data_loader.py
├── base_dataset.py
├── custom_dataset_data_loader.py
├── data_loader.py
├── face_dataset.py
├── face_landmark_detection.py
├── image_folder.py
├── keypoint2img.py
├── pose_dataset.py
├── temporal_dataset.py
└── test_dataset.py
├── docker
├── Dockerfile
├── launch_docker.sh
└── pre_docker_install.sh
├── imgs
├── city_change_labels.gif
├── city_change_styles.gif
├── face.gif
├── face_multiple.gif
├── framePredict.gif
├── pose.gif
└── teaser.gif
├── models
├── __init__.py
├── base_model.py
├── flownet.py
├── flownet2_pytorch
│ ├── LICENSE
│ ├── README.md
│ ├── __init__.py
│ ├── convert.py
│ ├── datasets.py
│ ├── download_caffe_models.sh
│ ├── install.sh
│ ├── launch_docker.sh
│ ├── losses.py
│ ├── main.py
│ ├── models.py
│ ├── networks
│ │ ├── FlowNetC.py
│ │ ├── FlowNetFusion.py
│ │ ├── FlowNetS.py
│ │ ├── FlowNetSD.py
│ │ ├── __init__.py
│ │ ├── channelnorm_package
│ │ │ ├── __init__.py
│ │ │ ├── channelnorm.py
│ │ │ ├── channelnorm_cuda.cc
│ │ │ ├── channelnorm_kernel.cu
│ │ │ ├── channelnorm_kernel.cuh
│ │ │ └── setup.py
│ │ ├── correlation_package
│ │ │ ├── __init__.py
│ │ │ ├── correlation.py
│ │ │ ├── correlation_cuda.cc
│ │ │ ├── correlation_cuda_kernel.cu
│ │ │ ├── correlation_cuda_kernel.cuh
│ │ │ └── setup.py
│ │ ├── resample2d_package
│ │ │ ├── __init__.py
│ │ │ ├── resample2d.py
│ │ │ ├── resample2d_cuda.cc
│ │ │ ├── resample2d_kernel.cu
│ │ │ ├── resample2d_kernel.cuh
│ │ │ └── setup.py
│ │ └── submodules.py
│ ├── run-caffe2pytorch.sh
│ └── utils
│ │ ├── __init__.py
│ │ ├── flow_utils.py
│ │ ├── frame_utils.py
│ │ ├── param_utils.py
│ │ └── tools.py
├── models.py
├── networks.py
├── vid2vid_model_D.py
└── vid2vid_model_G.py
├── options
├── __init__.py
├── base_options.py
├── test_options.py
└── train_options.py
├── scripts
├── download_datasets.py
├── download_flownet2.py
├── download_gdrive.py
├── download_models_flownet2.py
├── face
│ ├── download_gdrive.py
│ ├── download_models.py
│ ├── test_512.sh
│ ├── test_g1_256.sh
│ ├── test_g1_512.sh
│ ├── train_512.sh
│ ├── train_512_bs7.sh
│ ├── train_g1_256.sh
│ └── train_g1_512.sh
├── pose
│ ├── test_1024p.sh
│ ├── test_256p.sh
│ ├── test_512p.sh
│ ├── test_g1_1024p.sh
│ ├── test_g1_256p.sh
│ ├── test_g1_512p.sh
│ ├── train_1024p.sh
│ ├── train_256p.sh
│ ├── train_512p.sh
│ ├── train_g1_1024p.sh
│ ├── train_g1_256p.sh
│ └── train_g1_512p.sh
└── street
│ ├── download_gdrive.py
│ ├── download_models.py
│ ├── download_models_g1.py
│ ├── test_2048.sh
│ ├── test_g1_1024.sh
│ ├── train_1024.sh
│ ├── train_2048.sh
│ ├── train_2048_crop.sh
│ ├── train_512.sh
│ ├── train_512_bs.sh
│ ├── train_512_no_fg.sh
│ ├── train_g1_1024.sh
│ ├── train_g1_256.sh
│ └── train_g1_512.sh
├── test.py
├── train.py
└── util
├── __init__.py
├── html.py
├── image_pool.py
├── util.py
└── visualizer.py
/.gitignore:
--------------------------------------------------------------------------------
1 | debug*
2 | checkpoints/
3 | datasets/
4 | models/debug*
5 | models/flownet2*/networks/*/*egg-info
6 | models/flownet2*/networks/*/build
7 | models/flownet2*/networks/*/__pycache__
8 | models/flownet2*/networks/*/dist
9 | results/
10 | build/
11 | */Thumbs.db
12 | */**/__pycache__
13 | */*.pyc
14 | */**/*.pyc
15 | */**/**/*.pyc
16 | */**/**/**/*.pyc
17 | */**/**/**/**/*.pyc
18 | */*.so*
19 | */**/*.so*
20 | */**/*.dylib*
21 | *.DS_Store
22 | *~
23 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (C) 2017 NVIDIA Corporation. Ting-Chun Wang, Ming-Yu Liu, Jun-Yan Zhu.
2 | All rights reserved.
3 | Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
4 |
5 | Permission to use, copy, modify, and distribute this software and its documentation
6 | for any non-commercial purpose is hereby granted without fee, provided that the above
7 | copyright notice appear in all copies and that both that copyright notice and this
8 | permission notice appear in supporting documentation, and that the name of the author
9 | not be used in advertising or publicity pertaining to distribution of the software
10 | without specific, written prior permission.
11 |
12 | THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
13 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY PARTICULAR PURPOSE.
14 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
15 | DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
16 | WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
17 | OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 |
19 |
20 | --------------------------- LICENSE FOR pytorch-CycleGAN-and-pix2pix ----------------
21 | Copyright (c) 2017, Jun-Yan Zhu and Taesung Park
22 | All rights reserved.
23 |
24 | Redistribution and use in source and binary forms, with or without
25 | modification, are permitted provided that the following conditions are met:
26 |
27 | * Redistributions of source code must retain the above copyright notice, this
28 | list of conditions and the following disclaimer.
29 |
30 | * Redistributions in binary form must reproduce the above copyright notice,
31 | this list of conditions and the following disclaimer in the documentation
32 | and/or other materials provided with the distribution.
33 |
34 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
35 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
37 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
38 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
40 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
41 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
42 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 |
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/data/__init__.py
--------------------------------------------------------------------------------
/data/base_data_loader.py:
--------------------------------------------------------------------------------
1 |
2 | class BaseDataLoader():
3 | def __init__(self):
4 | pass
5 |
6 | def initialize(self, opt):
7 | self.opt = opt
8 | pass
9 |
10 | def load_data():
11 | return None
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/data/base_dataset.py:
--------------------------------------------------------------------------------
1 | from util.util import add_dummy_to_tensor
2 | import torch.utils.data as data
3 | import torch
4 | from PIL import Image
5 | import torchvision.transforms as transforms
6 | import numpy as np
7 | import random
8 |
9 | class BaseDataset(data.Dataset):
10 | def __init__(self):
11 | super(BaseDataset, self).__init__()
12 |
13 | def name(self):
14 | return 'BaseDataset'
15 |
16 | def initialize(self, opt):
17 | pass
18 |
19 | def update_training_batch(self, ratio): # update the training sequence length to be longer
20 | seq_len_max = min(128, self.seq_len_max) - (self.opt.n_frames_G - 1)
21 | if self.n_frames_total < seq_len_max:
22 | self.n_frames_total = min(seq_len_max, self.opt.n_frames_total * (2**ratio))
23 | #self.n_frames_total = min(seq_len_max, self.opt.n_frames_total * (ratio + 1))
24 | print('--------- Updating training sequence length to %d ---------' % self.n_frames_total)
25 |
26 | def init_frame_idx(self, A_paths):
27 | self.n_of_seqs = min(len(A_paths), self.opt.max_dataset_size) # number of sequences to train
28 | self.seq_len_max = max([len(A) for A in A_paths]) # max number of frames in the training sequences
29 |
30 | self.seq_idx = 0 # index for current sequence
31 | self.frame_idx = self.opt.start_frame if not self.opt.isTrain else 0 # index for current frame in the sequence
32 | self.frames_count = [] # number of frames in each sequence
33 | for path in A_paths:
34 | self.frames_count.append(len(path) - self.opt.n_frames_G + 1)
35 |
36 | self.folder_prob = [count / sum(self.frames_count) for count in self.frames_count]
37 | self.n_frames_total = self.opt.n_frames_total if self.opt.isTrain else 1
38 | self.A, self.B, self.I = None, None, None
39 |
40 | def update_frame_idx(self, A_paths, index):
41 | if self.opt.isTrain:
42 | if self.opt.dataset_mode == 'pose':
43 | seq_idx = np.random.choice(len(A_paths), p=self.folder_prob) # randomly pick sequence to train
44 | self.frame_idx = index
45 | else:
46 | seq_idx = index % self.n_of_seqs
47 | return None, None, None, seq_idx
48 | else:
49 | self.change_seq = self.frame_idx >= self.frames_count[self.seq_idx]
50 | if self.change_seq:
51 | self.seq_idx += 1
52 | self.frame_idx = 0
53 | self.A, self.B, self.I = None, None, None
54 | return self.A, self.B, self.I, self.seq_idx
55 |
56 | def init_data_params(self, data, n_gpus, tG):
57 | opt = self.opt
58 | _, n_frames_total, self.height, self.width = data['B'].size() # n_frames_total = n_frames_load * n_loadings + tG - 1
59 | n_frames_total = n_frames_total // opt.output_nc
60 | n_frames_load = opt.max_frames_per_gpu * n_gpus # number of total frames loaded into GPU at a time for each batch
61 | n_frames_load = min(n_frames_load, n_frames_total - tG + 1)
62 | self.t_len = n_frames_load + tG - 1 # number of loaded frames plus previous frames
63 | return n_frames_total-self.t_len+1, n_frames_load, self.t_len
64 |
65 | def init_data(self, t_scales):
66 | fake_B_last = None # the last generated frame from previous training batch (which becomes input to the next batch)
67 | real_B_all, fake_B_all, flow_ref_all, conf_ref_all = None, None, None, None # all real/generated frames so far
68 | if self.opt.sparse_D:
69 | real_B_all, fake_B_all, flow_ref_all, conf_ref_all = [None]*t_scales, [None]*t_scales, [None]*t_scales, [None]*t_scales
70 |
71 | frames_all = real_B_all, fake_B_all, flow_ref_all, conf_ref_all
72 | return fake_B_last, frames_all
73 |
74 | def prepare_data(self, data, i, input_nc, output_nc):
75 | t_len, height, width = self.t_len, self.height, self.width
76 | # 5D tensor: batchSize, # of frames, # of channels, height, width
77 | input_A = (data['A'][:, i*input_nc:(i+t_len)*input_nc, ...]).view(-1, t_len, input_nc, height, width)
78 | input_B = (data['B'][:, i*output_nc:(i+t_len)*output_nc, ...]).view(-1, t_len, output_nc, height, width)
79 | inst_A = (data['inst'][:, i:i+t_len, ...]).view(-1, t_len, 1, height, width) if len(data['inst'].size()) > 2 else None
80 | return [input_A, input_B, inst_A]
81 |
82 | def make_power_2(n, base=32.0):
83 | return int(round(n / base) * base)
84 |
85 | def get_img_params(opt, size):
86 | w, h = size
87 | new_h, new_w = h, w
88 | if 'resize' in opt.resize_or_crop: # resize image to be loadSize x loadSize
89 | new_h = new_w = opt.loadSize
90 | elif 'scaleWidth' in opt.resize_or_crop: # scale image width to be loadSize
91 | new_w = opt.loadSize
92 | new_h = opt.loadSize * h // w
93 | elif 'scaleHeight' in opt.resize_or_crop: # scale image height to be loadSize
94 | new_h = opt.loadSize
95 | new_w = opt.loadSize * w // h
96 | elif 'randomScaleWidth' in opt.resize_or_crop: # randomly scale image width to be somewhere between loadSize and fineSize
97 | new_w = random.randint(opt.fineSize, opt.loadSize + 1)
98 | new_h = new_w * h // w
99 | elif 'randomScaleHeight' in opt.resize_or_crop: # randomly scale image height to be somewhere between loadSize and fineSize
100 | new_h = random.randint(opt.fineSize, opt.loadSize + 1)
101 | new_w = new_h * w // h
102 | new_w = int(round(new_w / 4)) * 4
103 | new_h = int(round(new_h / 4)) * 4
104 |
105 | crop_x = crop_y = 0
106 | crop_w = crop_h = 0
107 | if 'crop' in opt.resize_or_crop or 'scaledCrop' in opt.resize_or_crop:
108 | if 'crop' in opt.resize_or_crop: # crop patches of size fineSize x fineSize
109 | crop_w = crop_h = opt.fineSize
110 | else:
111 | if 'Width' in opt.resize_or_crop: # crop patches of width fineSize
112 | crop_w = opt.fineSize
113 | crop_h = opt.fineSize * h // w
114 | else: # crop patches of height fineSize
115 | crop_h = opt.fineSize
116 | crop_w = opt.fineSize * w // h
117 |
118 | crop_w, crop_h = make_power_2(crop_w), make_power_2(crop_h)
119 | x_span = (new_w - crop_w) // 2
120 | crop_x = np.maximum(0, np.minimum(x_span*2, int(np.random.randn() * x_span/3 + x_span)))
121 | crop_y = random.randint(0, np.minimum(np.maximum(0, new_h - crop_h), new_h // 8))
122 | #crop_x = random.randint(0, np.maximum(0, new_w - crop_w))
123 | #crop_y = random.randint(0, np.maximum(0, new_h - crop_h))
124 | else:
125 | new_w, new_h = make_power_2(new_w), make_power_2(new_h)
126 |
127 | flip = (random.random() > 0.5) and (opt.dataset_mode != 'pose')
128 | return {'new_size': (new_w, new_h), 'crop_size': (crop_w, crop_h), 'crop_pos': (crop_x, crop_y), 'flip': flip}
129 |
130 | def get_transform(opt, params, method=Image.BICUBIC, normalize=True, toTensor=True):
131 | transform_list = []
132 | ### resize input image
133 | if 'resize' in opt.resize_or_crop:
134 | osize = [opt.loadSize, opt.loadSize]
135 | transform_list.append(transforms.Scale(osize, method))
136 | else:
137 | transform_list.append(transforms.Lambda(lambda img: __scale_image(img, params['new_size'], method)))
138 |
139 | ### crop patches from image
140 | if 'crop' in opt.resize_or_crop or 'scaledCrop' in opt.resize_or_crop:
141 | transform_list.append(transforms.Lambda(lambda img: __crop(img, params['crop_size'], params['crop_pos'])))
142 |
143 | ### random flip
144 | if opt.isTrain and not opt.no_flip:
145 | transform_list.append(transforms.Lambda(lambda img: __flip(img, params['flip'])))
146 |
147 | if toTensor:
148 | transform_list += [transforms.ToTensor()]
149 | if normalize:
150 | transform_list += [transforms.Normalize((0.5, 0.5, 0.5),
151 | (0.5, 0.5, 0.5))]
152 | return transforms.Compose(transform_list)
153 |
154 | def toTensor_normalize():
155 | transform_list = [transforms.ToTensor()]
156 | transform_list += [transforms.Normalize((0.5, 0.5, 0.5),
157 | (0.5, 0.5, 0.5))]
158 | return transforms.Compose(transform_list)
159 |
160 | def __scale_image(img, size, method=Image.BICUBIC):
161 | w, h = size
162 | return img.resize((w, h), method)
163 |
164 | def __crop(img, size, pos):
165 | ow, oh = img.size
166 | tw, th = size
167 | x1, y1 = pos
168 | if (ow > tw or oh > th):
169 | return img.crop((x1, y1, min(ow, x1 + tw), min(oh, y1 + th)))
170 | return img
171 |
172 | def __flip(img, flip):
173 | if flip:
174 | return img.transpose(Image.FLIP_LEFT_RIGHT)
175 | return img
176 |
177 | def get_video_params(opt, n_frames_total, cur_seq_len, index):
178 | tG = opt.n_frames_G
179 | if opt.isTrain:
180 | n_frames_total = min(n_frames_total, cur_seq_len - tG + 1)
181 |
182 | n_gpus = opt.n_gpus_gen if opt.batchSize == 1 else 1 # number of generator GPUs for each batch
183 | n_frames_per_load = opt.max_frames_per_gpu * n_gpus # number of frames to load into GPUs at one time (for each batch)
184 | n_frames_per_load = min(n_frames_total, n_frames_per_load)
185 | n_loadings = n_frames_total // n_frames_per_load # how many times are needed to load entire sequence into GPUs
186 | n_frames_total = n_frames_per_load * n_loadings + tG - 1 # rounded overall number of frames to read from the sequence
187 |
188 | max_t_step = min(opt.max_t_step, (cur_seq_len-1) // (n_frames_total-1))
189 | t_step = np.random.randint(max_t_step) + 1 # spacing between neighboring sampled frames
190 | offset_max = max(1, cur_seq_len - (n_frames_total-1)*t_step) # maximum possible index for the first frame
191 | if opt.dataset_mode == 'pose':
192 | start_idx = index % offset_max
193 | else:
194 | start_idx = np.random.randint(offset_max) # offset for the first frame to load
195 | if opt.debug:
196 | print("loading %d frames in total, first frame starting at index %d, space between neighboring frames is %d"
197 | % (n_frames_total, start_idx, t_step))
198 | else:
199 | n_frames_total = tG
200 | start_idx = index
201 | t_step = 1
202 | return n_frames_total, start_idx, t_step
203 |
204 | def concat_frame(A, Ai, nF):
205 | if A is None:
206 | A = Ai
207 | else:
208 | c = Ai.size()[0]
209 | if A.size()[0] == nF * c:
210 | A = A[c:]
211 | A = torch.cat([A, Ai])
212 | return A
--------------------------------------------------------------------------------
/data/custom_dataset_data_loader.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data
2 | from data.base_data_loader import BaseDataLoader
3 |
4 |
5 | def CreateDataset(opt):
6 | dataset = None
7 | if opt.dataset_mode == 'temporal':
8 | from data.temporal_dataset import TemporalDataset
9 | dataset = TemporalDataset()
10 | elif opt.dataset_mode == 'face':
11 | from data.face_dataset import FaceDataset
12 | dataset = FaceDataset()
13 | elif opt.dataset_mode == 'pose':
14 | from data.pose_dataset import PoseDataset
15 | dataset = PoseDataset()
16 | elif opt.dataset_mode == 'test':
17 | from data.test_dataset import TestDataset
18 | dataset = TestDataset()
19 | else:
20 | raise ValueError("Dataset [%s] not recognized." % opt.dataset_mode)
21 |
22 | print("dataset [%s] was created" % (dataset.name()))
23 | dataset.initialize(opt)
24 | return dataset
25 |
26 |
27 | class CustomDatasetDataLoader(BaseDataLoader):
28 | def name(self):
29 | return 'CustomDatasetDataLoader'
30 |
31 | def initialize(self, opt):
32 | BaseDataLoader.initialize(self, opt)
33 | self.dataset = CreateDataset(opt)
34 | self.dataloader = torch.utils.data.DataLoader(
35 | self.dataset,
36 | batch_size=opt.batchSize,
37 | shuffle=not opt.serial_batches,
38 | num_workers=int(opt.nThreads))
39 |
40 | def load_data(self):
41 | return self.dataloader
42 |
43 | def __len__(self):
44 | return min(len(self.dataset), self.opt.max_dataset_size)
45 |
--------------------------------------------------------------------------------
/data/data_loader.py:
--------------------------------------------------------------------------------
1 |
2 | def CreateDataLoader(opt):
3 | from data.custom_dataset_data_loader import CustomDatasetDataLoader
4 | data_loader = CustomDatasetDataLoader()
5 | print(data_loader.name())
6 | data_loader.initialize(opt)
7 | return data_loader
8 |
--------------------------------------------------------------------------------
/data/face_landmark_detection.py:
--------------------------------------------------------------------------------
1 | import os
2 | import glob
3 | from skimage import io
4 | import numpy as np
5 | import dlib
6 | import sys
7 |
8 | if len(sys.argv) < 2 or (sys.argv[1] != 'train' and sys.argv[1] != 'test'):
9 | raise ValueError('usage: python data/face_landmark_detection.py [train|test]')
10 |
11 | phase = sys.argv[1]
12 | dataset_path = 'datasets/face/'
13 | faces_folder_path = os.path.join(dataset_path, phase + '_img/')
14 | predictor_path = os.path.join(dataset_path, 'shape_predictor_68_face_landmarks.dat')
15 | detector = dlib.get_frontal_face_detector()
16 | predictor = dlib.shape_predictor(predictor_path)
17 |
18 | img_paths = sorted(glob.glob(faces_folder_path + '*'))
19 | for i in range(len(img_paths)):
20 | f = img_paths[i]
21 | print("Processing video: {}".format(f))
22 | save_path = os.path.join(dataset_path, phase + '_keypoints', os.path.basename(f))
23 | if not os.path.isdir(save_path):
24 | os.makedirs(save_path)
25 |
26 | for img_name in sorted(glob.glob(os.path.join(f, '*.jpg'))):
27 | img = io.imread(img_name)
28 | dets = detector(img, 1)
29 | if len(dets) > 0:
30 | shape = predictor(img, dets[0])
31 | points = np.empty([68, 2], dtype=int)
32 | for b in range(68):
33 | points[b,0] = shape.part(b).x
34 | points[b,1] = shape.part(b).y
35 |
36 | save_name = os.path.join(save_path, os.path.basename(img_name)[:-4] + '.txt')
37 | np.savetxt(save_name, points, fmt='%d', delimiter=',')
38 |
--------------------------------------------------------------------------------
/data/image_folder.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Code from
3 | # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py
4 | # Modified the original code so that it also loads images from the current
5 | # directory as well as the subdirectories
6 | ###############################################################################
7 |
8 | import torch.utils.data as data
9 |
10 | from PIL import Image
11 | import os
12 | import os.path
13 |
14 | IMG_EXTENSIONS = [
15 | '.jpg', '.JPG', '.jpeg', '.JPEG', '.pgm', '.PGM',
16 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', '.tiff',
17 | '.txt', '.json'
18 | ]
19 |
20 |
21 | def is_image_file(filename):
22 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
23 |
24 |
25 | def make_dataset(dir):
26 | images = []
27 | assert os.path.isdir(dir), '%s is not a valid directory' % dir
28 |
29 | for root, _, fnames in sorted(os.walk(dir)):
30 | for fname in fnames:
31 | if is_image_file(fname):
32 | path = os.path.join(root, fname)
33 | images.append(path)
34 | return images
35 |
36 | def make_grouped_dataset(dir):
37 | images = []
38 | assert os.path.isdir(dir), '%s is not a valid directory' % dir
39 | fnames = sorted(os.walk(dir))
40 | for fname in sorted(fnames):
41 | paths = []
42 | root = fname[0]
43 | for f in sorted(fname[2]):
44 | if is_image_file(f):
45 | paths.append(os.path.join(root, f))
46 | if len(paths) > 0:
47 | images.append(paths)
48 | return images
49 |
50 | def check_path_valid(A_paths, B_paths):
51 | assert(len(A_paths) == len(B_paths))
52 | for a, b in zip(A_paths, B_paths):
53 | assert(len(a) == len(b))
54 |
55 | def default_loader(path):
56 | return Image.open(path).convert('RGB')
57 |
58 |
59 | class ImageFolder(data.Dataset):
60 |
61 | def __init__(self, root, transform=None, return_paths=False,
62 | loader=default_loader):
63 | imgs = make_dataset(root)
64 | if len(imgs) == 0:
65 | raise(RuntimeError("Found 0 images in: " + root + "\n"
66 | "Supported image extensions are: " +
67 | ",".join(IMG_EXTENSIONS)))
68 |
69 | self.root = root
70 | self.imgs = imgs
71 | self.transform = transform
72 | self.return_paths = return_paths
73 | self.loader = loader
74 |
75 | def __getitem__(self, index):
76 | path = self.imgs[index]
77 | img = self.loader(path)
78 | if self.transform is not None:
79 | img = self.transform(img)
80 | if self.return_paths:
81 | return img, path
82 | else:
83 | return img
84 |
85 | def __len__(self):
86 | return len(self.imgs)
87 |
--------------------------------------------------------------------------------
/data/keypoint2img.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | from PIL import Image
3 | import numpy as np
4 | import json
5 | import glob
6 | from scipy.optimize import curve_fit
7 | import warnings
8 |
9 | def func(x, a, b, c):
10 | return a * x**2 + b * x + c
11 |
12 | def linear(x, a, b):
13 | return a * x + b
14 |
15 | def setColor(im, yy, xx, color):
16 | if len(im.shape) == 3:
17 | if (im[yy, xx] == 0).all():
18 | im[yy, xx, 0], im[yy, xx, 1], im[yy, xx, 2] = color[0], color[1], color[2]
19 | else:
20 | im[yy, xx, 0] = ((im[yy, xx, 0].astype(float) + color[0]) / 2).astype(np.uint8)
21 | im[yy, xx, 1] = ((im[yy, xx, 1].astype(float) + color[1]) / 2).astype(np.uint8)
22 | im[yy, xx, 2] = ((im[yy, xx, 2].astype(float) + color[2]) / 2).astype(np.uint8)
23 | else:
24 | im[yy, xx] = color[0]
25 |
26 | def drawEdge(im, x, y, bw=1, color=(255,255,255), draw_end_points=False):
27 | if x is not None and x.size:
28 | h, w = im.shape[0], im.shape[1]
29 | # edge
30 | for i in range(-bw, bw):
31 | for j in range(-bw, bw):
32 | yy = np.maximum(0, np.minimum(h-1, y+i))
33 | xx = np.maximum(0, np.minimum(w-1, x+j))
34 | setColor(im, yy, xx, color)
35 |
36 | # edge endpoints
37 | if draw_end_points:
38 | for i in range(-bw*2, bw*2):
39 | for j in range(-bw*2, bw*2):
40 | if (i**2) + (j**2) < (4 * bw**2):
41 | yy = np.maximum(0, np.minimum(h-1, np.array([y[0], y[-1]])+i))
42 | xx = np.maximum(0, np.minimum(w-1, np.array([x[0], x[-1]])+j))
43 | setColor(im, yy, xx, color)
44 |
45 | def interpPoints(x, y):
46 | if abs(x[:-1] - x[1:]).max() < abs(y[:-1] - y[1:]).max():
47 | curve_y, curve_x = interpPoints(y, x)
48 | if curve_y is None:
49 | return None, None
50 | else:
51 | with warnings.catch_warnings():
52 | warnings.simplefilter("ignore")
53 | if len(x) < 3:
54 | popt, _ = curve_fit(linear, x, y)
55 | else:
56 | popt, _ = curve_fit(func, x, y)
57 | if abs(popt[0]) > 1:
58 | return None, None
59 | if x[0] > x[-1]:
60 | x = list(reversed(x))
61 | y = list(reversed(y))
62 | curve_x = np.linspace(x[0], x[-1], (x[-1]-x[0]))
63 | if len(x) < 3:
64 | curve_y = linear(curve_x, *popt)
65 | else:
66 | curve_y = func(curve_x, *popt)
67 | return curve_x.astype(int), curve_y.astype(int)
68 |
69 | def read_keypoints(json_input, size, random_drop_prob=0, remove_face_labels=False, basic_point_only=False):
70 | with open(json_input, encoding='utf-8') as f:
71 | keypoint_dicts = json.loads(f.read())["people"]
72 |
73 | edge_lists = define_edge_lists(basic_point_only)
74 | w, h = size
75 | pose_img = np.zeros((h, w, 3), np.uint8)
76 | for keypoint_dict in keypoint_dicts:
77 | pose_pts = np.array(keypoint_dict["pose_keypoints_2d"]).reshape(25, 3)
78 | face_pts = np.array(keypoint_dict["face_keypoints_2d"]).reshape(70, 3)
79 | hand_pts_l = np.array(keypoint_dict["hand_left_keypoints_2d"]).reshape(21, 3)
80 | hand_pts_r = np.array(keypoint_dict["hand_right_keypoints_2d"]).reshape(21, 3)
81 | pts = [extract_valid_keypoints(pts, edge_lists) for pts in [pose_pts, face_pts, hand_pts_l, hand_pts_r]]
82 | pose_img += connect_keypoints(pts, edge_lists, size, random_drop_prob, remove_face_labels, basic_point_only)
83 | return pose_img
84 |
85 | def extract_valid_keypoints(pts, edge_lists):
86 | pose_edge_list, _, hand_edge_list, _, face_list = edge_lists
87 | p = pts.shape[0]
88 | thre = 0.1 if p == 70 else 0.01
89 | output = np.zeros((p, 2))
90 |
91 | if p == 70: # face
92 | for edge_list in face_list:
93 | for edge in edge_list:
94 | if (pts[edge, 2] > thre).all():
95 | output[edge, :] = pts[edge, :2]
96 | elif p == 21: # hand
97 | for edge in hand_edge_list:
98 | if (pts[edge, 2] > thre).all():
99 | output[edge, :] = pts[edge, :2]
100 | else: # pose
101 | valid = (pts[:, 2] > thre)
102 | output[valid, :] = pts[valid, :2]
103 |
104 | return output
105 |
106 | def connect_keypoints(pts, edge_lists, size, random_drop_prob, remove_face_labels, basic_point_only):
107 | pose_pts, face_pts, hand_pts_l, hand_pts_r = pts
108 | w, h = size
109 | output_edges = np.zeros((h, w, 3), np.uint8)
110 | pose_edge_list, pose_color_list, hand_edge_list, hand_color_list, face_list = edge_lists
111 |
112 | if random_drop_prob > 0 and remove_face_labels:
113 | # add random noise to keypoints
114 | pose_pts[[0,15,16,17,18], :] += 5 * np.random.randn(5,2)
115 | face_pts[:,0] += 2 * np.random.randn()
116 | face_pts[:,1] += 2 * np.random.randn()
117 |
118 | ### pose
119 | for i, edge in enumerate(pose_edge_list):
120 | x, y = pose_pts[edge, 0], pose_pts[edge, 1]
121 | if (np.random.rand() > random_drop_prob) and (0 not in x):
122 | curve_x, curve_y = interpPoints(x, y)
123 | drawEdge(output_edges, curve_x, curve_y, bw=3, color=pose_color_list[i], draw_end_points=True)
124 |
125 | if not basic_point_only:
126 | ### hand
127 | for hand_pts in [hand_pts_l, hand_pts_r]: # for left and right hand
128 | if np.random.rand() > random_drop_prob:
129 | for i, edge in enumerate(hand_edge_list): # for each finger
130 | for j in range(0, len(edge)-1): # for each part of the finger
131 | sub_edge = edge[j:j+2]
132 | x, y = hand_pts[sub_edge, 0], hand_pts[sub_edge, 1]
133 | if 0 not in x:
134 | line_x, line_y = interpPoints(x, y)
135 | drawEdge(output_edges, line_x, line_y, bw=1, color=hand_color_list[i], draw_end_points=True)
136 |
137 | ### face
138 | edge_len = 2
139 | if (np.random.rand() > random_drop_prob):
140 | for edge_list in face_list:
141 | for edge in edge_list:
142 | for i in range(0, max(1, len(edge)-1), edge_len-1):
143 | sub_edge = edge[i:i+edge_len]
144 | x, y = face_pts[sub_edge, 0], face_pts[sub_edge, 1]
145 | if 0 not in x:
146 | curve_x, curve_y = interpPoints(x, y)
147 | drawEdge(output_edges, curve_x, curve_y, draw_end_points=True)
148 |
149 | return output_edges
150 |
151 | def define_edge_lists(basic_point_only):
152 | ### pose
153 | pose_edge_list = []
154 | pose_color_list = []
155 | if not basic_point_only:
156 | pose_edge_list += [[17, 15], [15, 0], [ 0, 16], [16, 18]] # head
157 | pose_color_list += [[153, 0,153], [153, 0,102], [102, 0,153], [ 51, 0,153]]
158 |
159 | pose_edge_list += [
160 | [ 0, 1], [ 1, 8], # body
161 | [ 1, 2], [ 2, 3], [ 3, 4], # right arm
162 | [ 1, 5], [ 5, 6], [ 6, 7], # left arm
163 | [ 8, 9], [ 9, 10], [10, 11], [11, 24], [11, 22], [22, 23], # right leg
164 | [ 8, 12], [12, 13], [13, 14], [14, 21], [14, 19], [19, 20] # left leg
165 | ]
166 | pose_color_list += [
167 | [153, 0, 51], [153, 0, 0],
168 | [153, 51, 0], [153,102, 0], [153,153, 0],
169 | [102,153, 0], [ 51,153, 0], [ 0,153, 0],
170 | [ 0,153, 51], [ 0,153,102], [ 0,153,153], [ 0,153,153], [ 0,153,153], [ 0,153,153],
171 | [ 0,102,153], [ 0, 51,153], [ 0, 0,153], [ 0, 0,153], [ 0, 0,153], [ 0, 0,153]
172 | ]
173 |
174 | ### hand
175 | hand_edge_list = [
176 | [0, 1, 2, 3, 4],
177 | [0, 5, 6, 7, 8],
178 | [0, 9, 10, 11, 12],
179 | [0, 13, 14, 15, 16],
180 | [0, 17, 18, 19, 20]
181 | ]
182 | hand_color_list = [
183 | [204,0,0], [163,204,0], [0,204,82], [0,82,204], [163,0,204]
184 | ]
185 |
186 | ### face
187 | face_list = [
188 | #[range(0, 17)], # face
189 | [range(17, 22)], # left eyebrow
190 | [range(22, 27)], # right eyebrow
191 | [range(27, 31), range(31, 36)], # nose
192 | [[36,37,38,39], [39,40,41,36]], # left eye
193 | [[42,43,44,45], [45,46,47,42]], # right eye
194 | [range(48, 55), [54,55,56,57,58,59,48]], # mouth
195 | ]
196 | return pose_edge_list, pose_color_list, hand_edge_list, hand_color_list, face_list
--------------------------------------------------------------------------------
/data/pose_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import torchvision.transforms as transforms
3 | import torch
4 | from PIL import Image
5 | import numpy as np
6 |
7 | from data.base_dataset import BaseDataset, get_img_params, get_transform, get_video_params, concat_frame
8 | from data.image_folder import make_grouped_dataset, check_path_valid
9 | from data.keypoint2img import read_keypoints
10 |
11 | class PoseDataset(BaseDataset):
12 | def initialize(self, opt):
13 | self.opt = opt
14 | self.root = opt.dataroot
15 |
16 | self.dir_dp = os.path.join(opt.dataroot, opt.phase + '_densepose')
17 | self.dir_op = os.path.join(opt.dataroot, opt.phase + '_openpose')
18 | self.dir_img = os.path.join(opt.dataroot, opt.phase + '_img')
19 | self.img_paths = sorted(make_grouped_dataset(self.dir_img))
20 | if not opt.openpose_only:
21 | self.dp_paths = sorted(make_grouped_dataset(self.dir_dp))
22 | check_path_valid(self.dp_paths, self.img_paths)
23 | if not opt.densepose_only:
24 | self.op_paths = sorted(make_grouped_dataset(self.dir_op))
25 | check_path_valid(self.op_paths, self.img_paths)
26 |
27 | self.init_frame_idx(self.img_paths)
28 |
29 | def __getitem__(self, index):
30 | A, B, _, seq_idx = self.update_frame_idx(self.img_paths, index)
31 | img_paths = self.img_paths[seq_idx]
32 | n_frames_total, start_idx, t_step = get_video_params(self.opt, self.n_frames_total, len(img_paths), self.frame_idx)
33 |
34 | img = Image.open(img_paths[start_idx]).convert('RGB')
35 | size = img.size
36 | params = get_img_params(self.opt, size)
37 |
38 | frame_range = list(range(n_frames_total)) if (self.opt.isTrain or self.A is None) else [self.opt.n_frames_G-1]
39 | for i in frame_range:
40 | img_path = img_paths[start_idx + i * t_step]
41 | if not self.opt.openpose_only:
42 | dp_path = self.dp_paths[seq_idx][start_idx + i * t_step]
43 | Di = self.get_image(dp_path, size, params, input_type='densepose')
44 | Di[2,:,:] = ((Di[2,:,:] * 0.5 + 0.5) * 255 / 24 - 0.5) / 0.5
45 | if not self.opt.densepose_only:
46 | op_path = self.op_paths[seq_idx][start_idx + i * t_step]
47 | Oi = self.get_image(op_path, size, params, input_type='openpose')
48 |
49 | if self.opt.openpose_only:
50 | Ai = Oi
51 | elif self.opt.densepose_only:
52 | Ai = Di
53 | else:
54 | Ai = torch.cat([Di, Oi])
55 | Bi = self.get_image(img_path, size, params, input_type='img')
56 |
57 | Ai, Bi = self.crop(Ai), self.crop(Bi) # only crop the central half region to save time
58 | A = concat_frame(A, Ai, n_frames_total)
59 | B = concat_frame(B, Bi, n_frames_total)
60 |
61 | if not self.opt.isTrain:
62 | self.A, self.B = A, B
63 | self.frame_idx += 1
64 | change_seq = False if self.opt.isTrain else self.change_seq
65 | return_list = {'A': A, 'B': B, 'inst': 0, 'A_path': img_path, 'change_seq': change_seq}
66 |
67 | return return_list
68 |
69 | def get_image(self, A_path, size, params, input_type):
70 | if input_type != 'openpose':
71 | A_img = Image.open(A_path).convert('RGB')
72 | else:
73 | random_drop_prob = self.opt.random_drop_prob if self.opt.isTrain else 0
74 | A_img = Image.fromarray(read_keypoints(A_path, size, random_drop_prob, self.opt.remove_face_labels, self.opt.basic_point_only))
75 |
76 | if input_type == 'densepose' and self.opt.isTrain:
77 | # randomly remove labels
78 | A_np = np.array(A_img)
79 | part_labels = A_np[:,:,2]
80 | for part_id in range(1, 25):
81 | if (np.random.rand() < self.opt.random_drop_prob):
82 | A_np[(part_labels == part_id), :] = 0
83 | if self.opt.remove_face_labels:
84 | A_np[(part_labels == 23) | (part_labels == 24), :] = 0
85 | A_img = Image.fromarray(A_np)
86 |
87 | is_img = input_type == 'img'
88 | method = Image.BICUBIC if is_img else Image.NEAREST
89 | transform_scaleA = get_transform(self.opt, params, method=method)
90 | A_scaled = transform_scaleA(A_img)
91 | return A_scaled
92 |
93 | def crop(self, Ai):
94 | w = Ai.size()[2]
95 | base = 32
96 | x_cen = w // 2
97 | bs = int(w * 0.25) // base * base
98 | return Ai[:,:,(x_cen-bs):(x_cen+bs)]
99 |
100 | def normalize_pose(self, A_img, target_yc, target_len, first=False):
101 | w, h = A_img.size
102 | A_np = np.array(A_img)
103 |
104 | if first == True:
105 | part_labels = A_np[:,:,2]
106 | part_coords = np.nonzero((part_labels == 1) | (part_labels == 2))
107 | y, x = part_coords[0], part_coords[1]
108 |
109 | ys, ye = y.min(), y.max()
110 | min_i, max_i = np.argmin(y), np.argmax(y)
111 | v_min = A_np[y[min_i], x[min_i], 1] / 255
112 | v_max = A_np[y[max_i], x[max_i], 1] / 255
113 | ylen = (ye-ys) / (v_max-v_min)
114 | yc = (0.5-v_min) / (v_max-v_min) * (ye-ys) + ys
115 |
116 | ratio = target_len / ylen
117 | offset_y = int(yc - (target_yc / ratio))
118 | offset_x = int(w * (1 - 1/ratio) / 2)
119 |
120 | padding = int(max(0, max(-offset_y, int(offset_y + h/ratio) - h)))
121 | padding = int(max(padding, max(-offset_x, int(offset_x + w/ratio) - w)))
122 | offset_y += padding
123 | offset_x += padding
124 | self.offset_y, self.offset_x = offset_y, offset_x
125 | self.ratio, self.padding = ratio, padding
126 |
127 | p = self.padding
128 | A_np = np.pad(A_np, ((p,p),(p,p),(0,0)), 'constant', constant_values=0)
129 | A_np = A_np[self.offset_y:int(self.offset_y + h/self.ratio), self.offset_x:int(self.offset_x + w/self.ratio):, :]
130 | A_img = Image.fromarray(A_np)
131 | A_img = A_img.resize((w, h))
132 | return A_img
133 |
134 | def __len__(self):
135 | return sum(self.frames_count)
136 |
137 | def name(self):
138 | return 'PoseDataset'
139 |
140 | """
141 | DensePose label
142 | 0 = Background
143 | 1, 2 = Torso
144 | 3 = Right Hand
145 | 4 = Left Hand
146 | 5 = Right Foot
147 | 6 = Left Foot
148 | 7, 9 = Upper Leg Right
149 | 8, 10 = Upper Leg Left
150 | 11, 13 = Lower Leg Right
151 | 12, 14 = Lower Leg Left
152 | 15, 17 = Upper Arm Left
153 | 16, 18 = Upper Arm Right
154 | 19, 21 = Lower Arm Left
155 | 20, 22 = Lower Arm Right
156 | 23, 24 = Head """
157 |
--------------------------------------------------------------------------------
/data/temporal_dataset.py:
--------------------------------------------------------------------------------
1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
3 | import os.path
4 | import random
5 | import torch
6 | from data.base_dataset import BaseDataset, get_img_params, get_transform, get_video_params
7 | from data.image_folder import make_grouped_dataset, check_path_valid
8 | from PIL import Image
9 | import numpy as np
10 |
11 | class TemporalDataset(BaseDataset):
12 | def initialize(self, opt):
13 | self.opt = opt
14 | self.root = opt.dataroot
15 | self.dir_A = os.path.join(opt.dataroot, opt.phase + '_A')
16 | self.dir_B = os.path.join(opt.dataroot, opt.phase + '_B')
17 | self.A_is_label = self.opt.label_nc != 0
18 |
19 | self.A_paths = sorted(make_grouped_dataset(self.dir_A))
20 | self.B_paths = sorted(make_grouped_dataset(self.dir_B))
21 | check_path_valid(self.A_paths, self.B_paths)
22 | if opt.use_instance:
23 | self.dir_inst = os.path.join(opt.dataroot, opt.phase + '_inst')
24 | self.I_paths = sorted(make_grouped_dataset(self.dir_inst))
25 | check_path_valid(self.A_paths, self.I_paths)
26 |
27 | self.n_of_seqs = len(self.A_paths) # number of sequences to train
28 | self.seq_len_max = max([len(A) for A in self.A_paths])
29 | self.n_frames_total = self.opt.n_frames_total # current number of frames to train in a single iteration
30 |
31 | def __getitem__(self, index):
32 | tG = self.opt.n_frames_G
33 | A_paths = self.A_paths[index % self.n_of_seqs]
34 | B_paths = self.B_paths[index % self.n_of_seqs]
35 | if self.opt.use_instance:
36 | I_paths = self.I_paths[index % self.n_of_seqs]
37 |
38 | # setting parameters
39 | n_frames_total, start_idx, t_step = get_video_params(self.opt, self.n_frames_total, len(A_paths), index)
40 |
41 | # setting transformers
42 | B_img = Image.open(B_paths[start_idx]).convert('RGB')
43 | params = get_img_params(self.opt, B_img.size)
44 | transform_scaleB = get_transform(self.opt, params)
45 | transform_scaleA = get_transform(self.opt, params, method=Image.NEAREST, normalize=False) if self.A_is_label else transform_scaleB
46 |
47 | # read in images
48 | A = B = inst = 0
49 | for i in range(n_frames_total):
50 | A_path = A_paths[start_idx + i * t_step]
51 | B_path = B_paths[start_idx + i * t_step]
52 | Ai = self.get_image(A_path, transform_scaleA, is_label=self.A_is_label)
53 | Bi = self.get_image(B_path, transform_scaleB)
54 |
55 | A = Ai if i == 0 else torch.cat([A, Ai], dim=0)
56 | B = Bi if i == 0 else torch.cat([B, Bi], dim=0)
57 |
58 | if self.opt.use_instance:
59 | I_path = I_paths[start_idx + i * t_step]
60 | Ii = self.get_image(I_path, transform_scaleA) * 255.0
61 | inst = Ii if i == 0 else torch.cat([inst, Ii], dim=0)
62 |
63 | return_list = {'A': A, 'B': B, 'inst': inst, 'A_path': A_path, 'B_paths': B_path}
64 | return return_list
65 |
66 | def get_image(self, A_path, transform_scaleA, is_label=False):
67 | A_img = Image.open(A_path)
68 | A_scaled = transform_scaleA(A_img)
69 | if is_label:
70 | A_scaled *= 255.0
71 | return A_scaled
72 |
73 | def __len__(self):
74 | return len(self.A_paths)
75 |
76 | def name(self):
77 | return 'TemporalDataset'
--------------------------------------------------------------------------------
/data/test_dataset.py:
--------------------------------------------------------------------------------
1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
3 | import os.path
4 | import torch
5 | from data.base_dataset import BaseDataset, get_img_params, get_transform, concat_frame
6 | from data.image_folder import make_grouped_dataset, check_path_valid
7 | from PIL import Image
8 | import numpy as np
9 |
10 | class TestDataset(BaseDataset):
11 | def initialize(self, opt):
12 | self.opt = opt
13 | self.root = opt.dataroot
14 | self.dir_A = os.path.join(opt.dataroot, opt.phase + '_A')
15 | self.dir_B = os.path.join(opt.dataroot, opt.phase + '_B')
16 | self.use_real = opt.use_real_img
17 | self.A_is_label = self.opt.label_nc != 0
18 |
19 | self.A_paths = sorted(make_grouped_dataset(self.dir_A))
20 | if self.use_real:
21 | self.B_paths = sorted(make_grouped_dataset(self.dir_B))
22 | check_path_valid(self.A_paths, self.B_paths)
23 | if self.opt.use_instance:
24 | self.dir_inst = os.path.join(opt.dataroot, opt.phase + '_inst')
25 | self.I_paths = sorted(make_grouped_dataset(self.dir_inst))
26 | check_path_valid(self.A_paths, self.I_paths)
27 |
28 | self.init_frame_idx(self.A_paths)
29 |
30 | def __getitem__(self, index):
31 | self.A, self.B, self.I, seq_idx = self.update_frame_idx(self.A_paths, index)
32 | tG = self.opt.n_frames_G
33 |
34 | A_img = Image.open(self.A_paths[seq_idx][0]).convert('RGB')
35 | params = get_img_params(self.opt, A_img.size)
36 | transform_scaleB = get_transform(self.opt, params)
37 | transform_scaleA = get_transform(self.opt, params, method=Image.NEAREST, normalize=False) if self.A_is_label else transform_scaleB
38 | frame_range = list(range(tG)) if self.A is None else [tG-1]
39 |
40 | for i in frame_range:
41 | A_path = self.A_paths[seq_idx][self.frame_idx + i]
42 | Ai = self.get_image(A_path, transform_scaleA, is_label=self.A_is_label)
43 | self.A = concat_frame(self.A, Ai, tG)
44 |
45 | if self.use_real:
46 | B_path = self.B_paths[seq_idx][self.frame_idx + i]
47 | Bi = self.get_image(B_path, transform_scaleB)
48 | self.B = concat_frame(self.B, Bi, tG)
49 | else:
50 | self.B = 0
51 |
52 | if self.opt.use_instance:
53 | I_path = self.I_paths[seq_idx][self.frame_idx + i]
54 | Ii = self.get_image(I_path, transform_scaleA) * 255.0
55 | self.I = concat_frame(self.I, Ii, tG)
56 | else:
57 | self.I = 0
58 |
59 | self.frame_idx += 1
60 | return_list = {'A': self.A, 'B': self.B, 'inst': self.I, 'A_path': A_path, 'change_seq': self.change_seq}
61 | return return_list
62 |
63 | def get_image(self, A_path, transform_scaleA, is_label=False):
64 | A_img = Image.open(A_path)
65 | A_scaled = transform_scaleA(A_img)
66 | if is_label:
67 | A_scaled *= 255.0
68 | return A_scaled
69 |
70 | def __len__(self):
71 | return sum(self.frames_count)
72 |
73 | def n_of_seqs(self):
74 | return len(self.A_paths)
75 |
76 | def name(self):
77 | return 'TestDataset'
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
2 |
3 | RUN apt-get update && apt-get install -y rsync htop git openssh-server
4 |
5 | RUN apt-get install python3-pip -y
6 | RUN ln -s /usr/bin/python3 /usr/bin/python
7 | RUN pip3 install --upgrade pip
8 |
9 | #Torch and dependencies:
10 | RUN pip install http://download.pytorch.org/whl/cu80/torch-0.4.0-cp35-cp35m-linux_x86_64.whl
11 | RUN pip install torchvision cffi tensorboardX
12 | RUN pip install tqdm scipy scikit-image colorama==0.3.7
13 | RUN pip install setproctitle pytz ipython
14 |
15 | #vid2vid dependencies
16 | RUN apt-get install libglib2.0-0 libsm6 libxrender1 -y
17 | RUN pip install dominate requests opencv-python
18 |
19 | #pix2pixHD, required for initializing training
20 | RUN git clone https://github.com/NVIDIA/pix2pixHD /pix2pixHD
21 |
22 | #vid2vid install
23 | RUN git clone https://github.com/NVIDIA/vid2vid /vid2vid
24 | WORKDIR /vid2vid
25 | #download flownet2 model dependencies
26 | #WARNING: we had an instance where these scripts needed to be re-run after the docker instance was launched
27 | RUN python scripts/download_flownet2.py
28 | RUN python scripts/download_models_flownet2.py
29 |
30 |
31 |
--------------------------------------------------------------------------------
/docker/launch_docker.sh:
--------------------------------------------------------------------------------
1 | # Thanks @dustinfreeman for providing the script
2 | #!/bin/bash
3 | sudo nvidia-docker build -t vid2vid:CUDA9-py35 .
4 |
5 | sudo nvidia-docker run --rm -ti --ipc=host --shm-size 8G -v $(pwd):/vid2vid --workdir=/vid2vid vid2vid:CUDA9-py35 /bin/bash
6 |
--------------------------------------------------------------------------------
/docker/pre_docker_install.sh:
--------------------------------------------------------------------------------
1 | #Thanks @dustinfreeman for providing the script
2 |
3 | #Install docker-ce https://docs.docker.com/install/linux/docker-ce/ubuntu/#set-up-the-repository
4 | sudo apt-get install -y \
5 | apt-transport-https \
6 | ca-certificates \
7 | curl \
8 | software-properties-common
9 |
10 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
11 |
12 | sudo add-apt-repository \
13 | "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
14 | $(lsb_release -cs) \
15 | stable"
16 | sudo apt-get update
17 | sudo apt-get install -y docker-ce
18 |
19 |
20 | #Install nvidia-docker2 https://github.com/NVIDIA/nvidia-docker
21 | # Add the package repositories
22 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | \
23 | sudo apt-key add -
24 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
25 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \
26 | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
27 | sudo apt-get update
28 |
29 | sudo apt-get install -y nvidia-docker2
30 | sudo pkill -SIGHUP dockerd
31 |
32 |
33 | #NVIDIA drivers
34 | #This triggers an interactive request to the user.
35 | #Would love an alternative!
36 | DEBIAN_FRONTEND=noninteractive
37 | sudo apt-get install -y keyboard-configuration
38 | sudo apt install -y ubuntu-drivers-common
39 |
40 | apt-get install -y nvidia-384
41 |
42 | #Reboot so the nvidia driver finishes install
43 | sudo reboot
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/imgs/city_change_labels.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/city_change_labels.gif
--------------------------------------------------------------------------------
/imgs/city_change_styles.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/city_change_styles.gif
--------------------------------------------------------------------------------
/imgs/face.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/face.gif
--------------------------------------------------------------------------------
/imgs/face_multiple.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/face_multiple.gif
--------------------------------------------------------------------------------
/imgs/framePredict.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/framePredict.gif
--------------------------------------------------------------------------------
/imgs/pose.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/pose.gif
--------------------------------------------------------------------------------
/imgs/teaser.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/imgs/teaser.gif
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/__init__.py
--------------------------------------------------------------------------------
/models/base_model.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | import numpy as np
3 | import torch
4 | from .networks import get_grid
5 |
6 | class BaseModel(torch.nn.Module):
7 | def name(self):
8 | return 'BaseModel'
9 |
10 | def initialize(self, opt):
11 | self.opt = opt
12 | self.gpu_ids = opt.gpu_ids
13 | self.isTrain = opt.isTrain
14 | self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor
15 | self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)
16 |
17 | def set_input(self, input):
18 | self.input = input
19 |
20 | def forward(self):
21 | pass
22 |
23 | # used in test time, no backprop
24 | def test(self):
25 | pass
26 |
27 | def get_image_paths(self):
28 | pass
29 |
30 | def optimize_parameters(self):
31 | pass
32 |
33 | def get_current_visuals(self):
34 | return self.input
35 |
36 | def get_current_errors(self):
37 | return {}
38 |
39 | def save(self, label):
40 | pass
41 |
42 | # helper saving function that can be used by subclasses
43 | def save_network(self, network, network_label, epoch_label, gpu_ids):
44 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
45 | save_path = os.path.join(self.save_dir, save_filename)
46 | torch.save(network.cpu().state_dict(), save_path)
47 | if len(gpu_ids) and torch.cuda.is_available():
48 | network.cuda(gpu_ids[0])
49 |
50 | def resolve_version(self):
51 | import torch._utils
52 | try:
53 | torch._utils._rebuild_tensor_v2
54 | except AttributeError:
55 | def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
56 | tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
57 | tensor.requires_grad = requires_grad
58 | tensor._backward_hooks = backward_hooks
59 | return tensor
60 | torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
61 |
62 | # helper loading function that can be used by subclasses
63 | def load_network(self, network, network_label, epoch_label, save_dir=''):
64 | self.resolve_version()
65 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
66 | if not save_dir:
67 | save_dir = self.save_dir
68 | save_path = os.path.join(save_dir, save_filename)
69 | if not os.path.isfile(save_path):
70 | print('%s not exists yet!' % save_path)
71 | if 'G0' in network_label:
72 | raise('Generator must exist!')
73 | else:
74 | #network.load_state_dict(torch.load(save_path))
75 | try:
76 | network.load_state_dict(torch.load(save_path))
77 | except:
78 | pretrained_dict = torch.load(save_path)
79 | model_dict = network.state_dict()
80 |
81 | ### printout layers in pretrained model
82 | initialized = set()
83 | for k, v in pretrained_dict.items():
84 | initialized.add(k.split('.')[0])
85 | #print('pretrained model has following layers: ')
86 | #print(sorted(initialized))
87 |
88 | try:
89 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
90 | network.load_state_dict(pretrained_dict)
91 | print('Pretrained network %s has excessive layers; Only loading layers that are used' % network_label)
92 | except:
93 | print('Pretrained network %s has fewer layers; The following are not initialized:' % network_label)
94 | if sys.version_info >= (3,0):
95 | not_initialized = set()
96 | else:
97 | from sets import Set
98 | not_initialized = Set()
99 | for k, v in pretrained_dict.items():
100 | if v.size() == model_dict[k].size():
101 | model_dict[k] = v
102 |
103 | for k, v in model_dict.items():
104 | if k not in pretrained_dict or v.size() != pretrained_dict[k].size():
105 | not_initialized.add(k.split('.')[0])
106 | print(sorted(not_initialized))
107 | network.load_state_dict(model_dict)
108 |
109 | def concat(self, tensors, dim=0):
110 | if tensors[0] is not None and tensors[1] is not None:
111 | if isinstance(tensors[0], list):
112 | tensors_cat = []
113 | for i in range(len(tensors[0])):
114 | tensors_cat.append(self.concat([tensors[0][i], tensors[1][i]], dim=dim))
115 | return tensors_cat
116 | return torch.cat([tensors[0], tensors[1]], dim=dim)
117 | elif tensors[0] is not None:
118 | return tensors[0]
119 | else:
120 | return tensors[1]
121 |
122 | def build_pyr(self, tensor, nearest=False): # build image pyramid from a single image
123 | if tensor is None:
124 | return [None] * self.n_scales
125 | tensor = [tensor]
126 | if nearest:
127 | downsample = torch.nn.AvgPool2d(1, stride=2)
128 | else:
129 | downsample = torch.nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False)
130 | for s in range(1, self.n_scales):
131 | b, t, c, h, w = tensor[-1].size()
132 | down = downsample(tensor[-1].view(-1, h, w)).view(b, t, c, h//2, w//2)
133 | tensor.append(down)
134 | return tensor
135 |
136 | def dists_min(self, a, b, num=1):
137 | dists = torch.sum(torch.sum((a-b)*(a-b), dim=0), dim=0)
138 | if num == 1:
139 | val, idx = torch.min(dists, dim=0)
140 | #idx = [idx]
141 | else:
142 | val, idx = torch.sort(dists, dim=0)
143 | idx = idx[:num]
144 | return idx.cpu().numpy().astype(int)
145 |
146 | def get_edges(self, t):
147 | edge = torch.cuda.ByteTensor(t.size()).zero_()
148 | edge[:,:,:,:,1:] = edge[:,:,:,:,1:] | (t[:,:,:,:,1:] != t[:,:,:,:,:-1])
149 | edge[:,:,:,:,:-1] = edge[:,:,:,:,:-1] | (t[:,:,:,:,1:] != t[:,:,:,:,:-1])
150 | edge[:,:,:,1:,:] = edge[:,:,:,1:,:] | (t[:,:,:,1:,:] != t[:,:,:,:-1,:])
151 | edge[:,:,:,:-1,:] = edge[:,:,:,:-1,:] | (t[:,:,:,1:,:] != t[:,:,:,:-1,:])
152 | return edge.float()
153 |
154 | def update_learning_rate(self, epoch, model):
155 | lr = self.opt.lr * (1 - (epoch - self.opt.niter) / self.opt.niter_decay)
156 | for param_group in getattr(self, 'optimizer_' + model).param_groups:
157 | param_group['lr'] = lr
158 | print('update learning rate: %f -> %f' % (self.old_lr, lr))
159 | self.old_lr = lr
160 |
161 | def update_fixed_params(self): # finetune all scales instead of just finest scale
162 | params = []
163 | for s in range(self.n_scales):
164 | params += list(getattr(self, 'netG'+str(s)).parameters())
165 | self.optimizer_G = torch.optim.Adam(params, lr=self.old_lr, betas=(self.opt.beta1, 0.999))
166 | self.finetune_all = True
167 | print('------------ Now finetuning all scales -----------')
168 |
169 | def update_training_batch(self, ratio): # increase number of backpropagated frames and number of frames in each GPU
170 | nfb = self.n_frames_bp
171 | nfl = self.n_frames_load
172 | if nfb < nfl:
173 | nfb = min(self.opt.max_frames_backpropagate, 2**ratio)
174 | self.n_frames_bp = nfl // int(np.ceil(float(nfl) / nfb))
175 | print('-------- Updating number of backpropagated frames to %d ----------' % self.n_frames_bp)
176 |
177 | if self.n_frames_per_gpu < self.opt.max_frames_per_gpu:
178 | self.n_frames_per_gpu = min(self.n_frames_per_gpu*2, self.opt.max_frames_per_gpu)
179 | self.n_frames_load = self.n_gpus * self.n_frames_per_gpu
180 | print('-------- Updating number of frames per gpu to %d ----------' % self.n_frames_per_gpu)
181 |
182 |
183 | def grid_sample(self, input1, input2):
184 | if self.opt.fp16: # not sure if it's necessary
185 | return torch.nn.functional.grid_sample(input1.float(), input2.float(), mode='bilinear', padding_mode='border').half()
186 | else:
187 | return torch.nn.functional.grid_sample(input1, input2, mode='bilinear', padding_mode='border')
188 |
189 | def resample(self, image, flow):
190 | b, c, h, w = image.size()
191 | if not hasattr(self, 'grid') or self.grid.size() != flow.size():
192 | self.grid = get_grid(b, h, w, gpu_id=flow.get_device(), dtype=flow.dtype)
193 | flow = torch.cat([flow[:, 0:1, :, :] / ((w - 1.0) / 2.0), flow[:, 1:2, :, :] / ((h - 1.0) / 2.0)], dim=1)
194 | final_grid = (self.grid + flow).permute(0, 2, 3, 1).cuda(image.get_device())
195 | output = self.grid_sample(image, final_grid)
196 | return output
--------------------------------------------------------------------------------
/models/flownet.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import sys
4 | from .base_model import BaseModel
5 |
6 | class FlowNet(BaseModel):
7 | def name(self):
8 | return 'FlowNet'
9 |
10 | def initialize(self, opt):
11 | BaseModel.initialize(self, opt)
12 |
13 | # flownet 2
14 | from .flownet2_pytorch import models as flownet2_models
15 | from .flownet2_pytorch.utils import tools as flownet2_tools
16 | from .flownet2_pytorch.networks.resample2d_package.resample2d import Resample2d
17 |
18 | self.flowNet = flownet2_tools.module_to_dict(flownet2_models)['FlowNet2'](fp16=opt.fp16).cuda(self.gpu_ids[0])
19 | checkpoint = torch.load('models/flownet2_pytorch/FlowNet2_checkpoint.pth.tar')
20 | self.flowNet.load_state_dict(checkpoint['state_dict'])
21 | self.flowNet.eval()
22 | self.resample = Resample2d()
23 | self.downsample = torch.nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False)
24 |
25 | def forward(self, input_A, input_B, dummy_bs=0):
26 | with torch.no_grad():
27 | if input_A.get_device() == self.gpu_ids[0]:
28 | input_A, input_B = input_A[dummy_bs:], input_B[dummy_bs:]
29 | if input_A.size(0) == 0:
30 | b, n, c, h, w = input_A.size()
31 | return self.Tensor(1, n, 2, h, w), self.Tensor(1, n, 1, h, w)
32 | size = input_A.size()
33 | assert(len(size) == 4 or len(size) == 5)
34 | if len(size) == 5:
35 | b, n, c, h, w = size
36 | input_A = input_A.contiguous().view(-1, c, h, w)
37 | input_B = input_B.contiguous().view(-1, c, h, w)
38 | flow, conf = self.compute_flow_and_conf(input_A, input_B)
39 | return flow.view(b, n, 2, h, w), conf.view(b, n, 1, h, w)
40 | else:
41 | return self.compute_flow_and_conf(input_A, input_B)
42 |
43 | def compute_flow_and_conf(self, im1, im2):
44 | assert(im1.size()[1] == 3)
45 | assert(im1.size() == im2.size())
46 | old_h, old_w = im1.size()[2], im1.size()[3]
47 | new_h, new_w = old_h//64*64, old_w//64*64
48 | if old_h != new_h:
49 | downsample = torch.nn.Upsample(size=(new_h, new_w), mode='bilinear')
50 | upsample = torch.nn.Upsample(size=(old_h, old_w), mode='bilinear')
51 | im1 = downsample(im1)
52 | im2 = downsample(im2)
53 | data1 = torch.cat([im1.unsqueeze(2), im2.unsqueeze(2)], dim=2)
54 | flow1 = self.flowNet(data1)
55 | conf = (self.norm(im1 - self.resample(im2, flow1)) < 0.02).float()
56 | if old_h != new_h:
57 | flow1 = upsample(flow1) * old_h / new_h
58 | conf = upsample(conf)
59 | return flow1.detach(), conf.detach()
60 |
61 | def norm(self, t):
62 | return torch.sum(t*t, dim=1, keepdim=True)
63 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2017 NVIDIA CORPORATION
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
--------------------------------------------------------------------------------
/models/flownet2_pytorch/README.md:
--------------------------------------------------------------------------------
1 | # flownet2-pytorch
2 |
3 | Pytorch implementation of [FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks](https://arxiv.org/abs/1612.01925).
4 |
5 | Multiple GPU training is supported, and the code provides examples for training or inference on [MPI-Sintel](http://sintel.is.tue.mpg.de/) clean and final datasets. The same commands can be used for training or inference with other datasets. See below for more detail.
6 |
7 | Inference using fp16 (half-precision) is also supported.
8 |
9 | For more help, type
10 |
11 | python main.py --help
12 |
13 | ## Network architectures
14 | Below are the different flownet neural network architectures that are provided.
15 | A batchnorm version for each network is also available.
16 |
17 | - **FlowNet2S**
18 | - **FlowNet2C**
19 | - **FlowNet2CS**
20 | - **FlowNet2CSS**
21 | - **FlowNet2SD**
22 | - **FlowNet2**
23 |
24 | ## Custom layers
25 |
26 | `FlowNet2` or `FlowNet2C*` achitectures rely on custom layers `Resample2d` or `Correlation`.
27 | A pytorch implementation of these layers with cuda kernels are available at [./networks](./networks).
28 | Note : Currently, half precision kernels are not available for these layers.
29 |
30 | ## Data Loaders
31 |
32 | Dataloaders for FlyingChairs, FlyingThings, ChairsSDHom and ImagesFromFolder are available in [datasets.py](./datasets.py).
33 |
34 | ## Loss Functions
35 |
36 | L1 and L2 losses with multi-scale support are available in [losses.py](./losses.py).
37 |
38 | ## Installation
39 |
40 | # get flownet2-pytorch source
41 | git clone https://github.com/NVIDIA/flownet2-pytorch.git
42 | cd flownet2-pytorch
43 |
44 | # install custom layers
45 | bash install.sh
46 |
47 | ### Python requirements
48 | Currently, the code supports python 3
49 | * numpy
50 | * PyTorch ( == 0.4.1, for <= 0.4.0 see branch [python36-PyTorch0.4](https://github.com/NVIDIA/flownet2-pytorch/tree/python36-PyTorch0.4))
51 | * scipy
52 | * scikit-image
53 | * tensorboardX
54 | * colorama, tqdm, setproctitle
55 |
56 | ## Converted Caffe Pre-trained Models
57 | We've included caffe pre-trained models. Should you use these pre-trained weights, please adhere to the [license agreements](https://drive.google.com/file/d/1TVv0BnNFh3rpHZvD-easMb9jYrPE2Eqd/view?usp=sharing).
58 |
59 | * [FlowNet2](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view?usp=sharing)[620MB]
60 | * [FlowNet2-C](https://drive.google.com/file/d/1BFT6b7KgKJC8rA59RmOVAXRM_S7aSfKE/view?usp=sharing)[149MB]
61 | * [FlowNet2-CS](https://drive.google.com/file/d/1iBJ1_o7PloaINpa8m7u_7TsLCX0Dt_jS/view?usp=sharing)[297MB]
62 | * [FlowNet2-CSS](https://drive.google.com/file/d/157zuzVf4YMN6ABAQgZc8rRmR5cgWzSu8/view?usp=sharing)[445MB]
63 | * [FlowNet2-CSS-ft-sd](https://drive.google.com/file/d/1R5xafCIzJCXc8ia4TGfC65irmTNiMg6u/view?usp=sharing)[445MB]
64 | * [FlowNet2-S](https://drive.google.com/file/d/1V61dZjFomwlynwlYklJHC-TLfdFom3Lg/view?usp=sharing)[148MB]
65 | * [FlowNet2-SD](https://drive.google.com/file/d/1QW03eyYG_vD-dT-Mx4wopYvtPu_msTKn/view?usp=sharing)[173MB]
66 |
67 | ## Inference
68 | # Example on MPISintel Clean
69 | python main.py --inference --model FlowNet2 --save_flow --inference_dataset MpiSintelClean \
70 | --inference_dataset_root /path/to/mpi-sintel/clean/dataset \
71 | --resume /path/to/checkpoints
72 |
73 | ## Training and validation
74 |
75 | # Example on MPISintel Final and Clean, with L1Loss on FlowNet2 model
76 | python main.py --batch_size 8 --model FlowNet2 --loss=L1Loss --optimizer=Adam --optimizer_lr=1e-4 \
77 | --training_dataset MpiSintelFinal --training_dataset_root /path/to/mpi-sintel/final/dataset \
78 | --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset
79 |
80 | # Example on MPISintel Final and Clean, with MultiScale loss on FlowNet2C model
81 | python main.py --batch_size 8 --model FlowNet2C --optimizer=Adam --optimizer_lr=1e-4 --loss=MultiScale --loss_norm=L1 \
82 | --loss_numScales=5 --loss_startScale=4 --optimizer_lr=1e-4 --crop_size 384 512 \
83 | --training_dataset FlyingChairs --training_dataset_root /path/to/flying-chairs/dataset \
84 | --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset
85 |
86 | ## Results on MPI-Sintel
87 | [](https://www.youtube.com/watch?v=HtBmabY8aeU "Predicted flows on MPI-Sintel")
88 |
89 | ## Reference
90 | If you find this implementation useful in your work, please acknowledge it appropriately and cite the paper:
91 | ````
92 | @InProceedings{IMKDB17,
93 | author = "E. Ilg and N. Mayer and T. Saikia and M. Keuper and A. Dosovitskiy and T. Brox",
94 | title = "FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks",
95 | booktitle = "IEEE Conference on Computer Vision and Pattern Recognition (CVPR)",
96 | month = "Jul",
97 | year = "2017",
98 | url = "http://lmb.informatik.uni-freiburg.de//Publications/2017/IMKDB17"
99 | }
100 | ````
101 | ```
102 | @misc{flownet2-pytorch,
103 | author = {Fitsum Reda and Robert Pottorff and Jon Barker and Bryan Catanzaro},
104 | title = {flownet2-pytorch: Pytorch implementation of FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks},
105 | year = {2017},
106 | publisher = {GitHub},
107 | journal = {GitHub repository},
108 | howpublished = {\url{https://github.com/NVIDIA/flownet2-pytorch}}
109 | }
110 | ```
111 | ## Related Optical Flow Work from Nvidia
112 | Code (in Caffe and Pytorch): [PWC-Net](https://github.com/NVlabs/PWC-Net)
113 | Paper : [PWC-Net: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume](https://arxiv.org/abs/1709.02371).
114 |
115 | ## Acknowledgments
116 | Parts of this code were derived, as noted in the code, from [ClementPinard/FlowNetPytorch](https://github.com/ClementPinard/FlowNetPytorch).
117 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/__init__.py
--------------------------------------------------------------------------------
/models/flownet2_pytorch/convert.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python2.7
2 |
3 | import caffe
4 | from caffe.proto import caffe_pb2
5 | import sys, os
6 |
7 | import torch
8 | import torch.nn as nn
9 |
10 | import argparse, tempfile
11 | import numpy as np
12 |
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument('caffe_model', help='input model in hdf5 or caffemodel format')
15 | parser.add_argument('prototxt_template',help='prototxt template')
16 | parser.add_argument('flownet2_pytorch', help='path to flownet2-pytorch')
17 |
18 | args = parser.parse_args()
19 |
20 | args.rgb_max = 255
21 | args.fp16 = False
22 | args.grads = {}
23 |
24 | # load models
25 | sys.path.append(args.flownet2_pytorch)
26 |
27 | import models
28 | from utils.param_utils import *
29 |
30 | width = 256
31 | height = 256
32 | keys = {'TARGET_WIDTH': width,
33 | 'TARGET_HEIGHT': height,
34 | 'ADAPTED_WIDTH':width,
35 | 'ADAPTED_HEIGHT':height,
36 | 'SCALE_WIDTH':1.,
37 | 'SCALE_HEIGHT':1.,}
38 |
39 | template = '\n'.join(np.loadtxt(args.prototxt_template, dtype=str, delimiter='\n'))
40 | for k in keys:
41 | template = template.replace('$%s$'%(k),str(keys[k]))
42 |
43 | prototxt = tempfile.NamedTemporaryFile(mode='w', delete=True)
44 | prototxt.write(template)
45 | prototxt.flush()
46 |
47 | net = caffe.Net(prototxt.name, args.caffe_model, caffe.TEST)
48 |
49 | weights = {}
50 | biases = {}
51 |
52 | for k, v in list(net.params.items()):
53 | weights[k] = np.array(v[0].data).reshape(v[0].data.shape)
54 | biases[k] = np.array(v[1].data).reshape(v[1].data.shape)
55 | print((k, weights[k].shape, biases[k].shape))
56 |
57 | if 'FlowNet2/' in args.caffe_model:
58 | model = models.FlowNet2(args)
59 |
60 | parse_flownetc(model.flownetc.modules(), weights, biases)
61 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
62 | parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
63 | parse_flownetsd(model.flownets_d.modules(), weights, biases, param_prefix='netsd_')
64 | parse_flownetfusion(model.flownetfusion.modules(), weights, biases, param_prefix='fuse_')
65 |
66 | state = {'epoch': 0,
67 | 'state_dict': model.state_dict(),
68 | 'best_EPE': 1e10}
69 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2_checkpoint.pth.tar'))
70 |
71 | elif 'FlowNet2-C/' in args.caffe_model:
72 | model = models.FlowNet2C(args)
73 |
74 | parse_flownetc(model.modules(), weights, biases)
75 | state = {'epoch': 0,
76 | 'state_dict': model.state_dict(),
77 | 'best_EPE': 1e10}
78 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-C_checkpoint.pth.tar'))
79 |
80 | elif 'FlowNet2-CS/' in args.caffe_model:
81 | model = models.FlowNet2CS(args)
82 |
83 | parse_flownetc(model.flownetc.modules(), weights, biases)
84 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
85 |
86 | state = {'epoch': 0,
87 | 'state_dict': model.state_dict(),
88 | 'best_EPE': 1e10}
89 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CS_checkpoint.pth.tar'))
90 |
91 | elif 'FlowNet2-CSS/' in args.caffe_model:
92 | model = models.FlowNet2CSS(args)
93 |
94 | parse_flownetc(model.flownetc.modules(), weights, biases)
95 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
96 | parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
97 |
98 | state = {'epoch': 0,
99 | 'state_dict': model.state_dict(),
100 | 'best_EPE': 1e10}
101 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS_checkpoint.pth.tar'))
102 |
103 | elif 'FlowNet2-CSS-ft-sd/' in args.caffe_model:
104 | model = models.FlowNet2CSS(args)
105 |
106 | parse_flownetc(model.flownetc.modules(), weights, biases)
107 | parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
108 | parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
109 |
110 | state = {'epoch': 0,
111 | 'state_dict': model.state_dict(),
112 | 'best_EPE': 1e10}
113 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS-ft-sd_checkpoint.pth.tar'))
114 |
115 | elif 'FlowNet2-S/' in args.caffe_model:
116 | model = models.FlowNet2S(args)
117 |
118 | parse_flownetsonly(model.modules(), weights, biases, param_prefix='')
119 | state = {'epoch': 0,
120 | 'state_dict': model.state_dict(),
121 | 'best_EPE': 1e10}
122 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-S_checkpoint.pth.tar'))
123 |
124 | elif 'FlowNet2-SD/' in args.caffe_model:
125 | model = models.FlowNet2SD(args)
126 |
127 | parse_flownetsd(model.modules(), weights, biases, param_prefix='')
128 |
129 | state = {'epoch': 0,
130 | 'state_dict': model.state_dict(),
131 | 'best_EPE': 1e10}
132 | torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-SD_checkpoint.pth.tar'))
133 |
134 | else:
135 | print(('model type cound not be determined from input caffe model %s'%(args.caffe_model)))
136 | quit()
137 | print(("done converting ", args.caffe_model))
--------------------------------------------------------------------------------
/models/flownet2_pytorch/download_caffe_models.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sudo rm -rf flownet2-docker
3 | sudo git clone https://github.com/lmb-freiburg/flownet2-docker
4 | cd flownet2-docker
5 |
6 | sudo sed -i '$ a RUN apt-get update && apt-get install -y python-pip \
7 | RUN pip install --upgrade pip \
8 | RUN pip install numpy -I \
9 | RUN pip install http://download.pytorch.org/whl/cu80/torch-0.2.0.post3-cp27-cp27mu-manylinux1_x86_64.whl \
10 | RUN pip install cffi ipython' Dockerfile
11 |
12 | sudo make
13 |
14 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd ./networks/correlation_package
3 | python setup.py install --user
4 | cd ../resample2d_package
5 | python setup.py install --user
6 | cd ../channelnorm_package
7 | python setup.py install --user
8 | cd ..
9 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/launch_docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sudo nvidia-docker build -t $USER/pytorch:CUDA8-py27 .
3 | sudo nvidia-docker run --rm -ti --volume=$(pwd):/flownet2-pytorch:rw --workdir=/flownet2-pytorch --ipc=host $USER/pytorch:CUDA8-py27 /bin/bash
4 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/losses.py:
--------------------------------------------------------------------------------
1 | '''
2 | Portions of this code copyright 2017, Clement Pinard
3 | '''
4 |
5 | # freda (todo) : adversarial loss
6 |
7 | import torch
8 | import torch.nn as nn
9 | import math
10 |
11 | def EPE(input_flow, target_flow):
12 | return torch.norm(target_flow-input_flow,p=2,dim=1).mean()
13 |
14 | class L1(nn.Module):
15 | def __init__(self):
16 | super(L1, self).__init__()
17 | def forward(self, output, target):
18 | lossvalue = torch.abs(output - target).mean()
19 | return lossvalue
20 |
21 | class L2(nn.Module):
22 | def __init__(self):
23 | super(L2, self).__init__()
24 | def forward(self, output, target):
25 | lossvalue = torch.norm(output-target,p=2,dim=1).mean()
26 | return lossvalue
27 |
28 | class L1Loss(nn.Module):
29 | def __init__(self, args):
30 | super(L1Loss, self).__init__()
31 | self.args = args
32 | self.loss = L1()
33 | self.loss_labels = ['L1', 'EPE']
34 |
35 | def forward(self, output, target):
36 | lossvalue = self.loss(output, target)
37 | epevalue = EPE(output, target)
38 | return [lossvalue, epevalue]
39 |
40 | class L2Loss(nn.Module):
41 | def __init__(self, args):
42 | super(L2Loss, self).__init__()
43 | self.args = args
44 | self.loss = L2()
45 | self.loss_labels = ['L2', 'EPE']
46 |
47 | def forward(self, output, target):
48 | lossvalue = self.loss(output, target)
49 | epevalue = EPE(output, target)
50 | return [lossvalue, epevalue]
51 |
52 | class MultiScale(nn.Module):
53 | def __init__(self, args, startScale = 4, numScales = 5, l_weight= 0.32, norm= 'L1'):
54 | super(MultiScale,self).__init__()
55 |
56 | self.startScale = startScale
57 | self.numScales = numScales
58 | self.loss_weights = torch.FloatTensor([(l_weight / 2 ** scale) for scale in range(self.numScales)])
59 | self.args = args
60 | self.l_type = norm
61 | self.div_flow = 0.05
62 | assert(len(self.loss_weights) == self.numScales)
63 |
64 | if self.l_type == 'L1':
65 | self.loss = L1()
66 | else:
67 | self.loss = L2()
68 |
69 | self.multiScales = [nn.AvgPool2d(self.startScale * (2**scale), self.startScale * (2**scale)) for scale in range(self.numScales)]
70 | self.loss_labels = ['MultiScale-'+self.l_type, 'EPE'],
71 |
72 | def forward(self, output, target):
73 | lossvalue = 0
74 | epevalue = 0
75 |
76 | if type(output) is tuple:
77 | target = self.div_flow * target
78 | for i, output_ in enumerate(output):
79 | target_ = self.multiScales[i](target)
80 | epevalue += self.loss_weights[i]*EPE(output_, target_)
81 | lossvalue += self.loss_weights[i]*self.loss(output_, target_)
82 | return [lossvalue, epevalue]
83 | else:
84 | epevalue += EPE(output, target)
85 | lossvalue += self.loss(output, target)
86 | return [lossvalue, epevalue]
87 |
88 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/FlowNetC.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.nn import init
4 |
5 | import math
6 | import numpy as np
7 |
8 | from .correlation_package.correlation import Correlation
9 |
10 | from .submodules import *
11 | 'Parameter count , 39,175,298 '
12 |
13 | class FlowNetC(nn.Module):
14 | def __init__(self, args, batchNorm=True, div_flow = 20):
15 | super(FlowNetC,self).__init__()
16 | self.fp16 = args.fp16
17 | self.batchNorm = batchNorm
18 | self.div_flow = div_flow
19 |
20 | self.conv1 = conv(self.batchNorm, 3, 64, kernel_size=7, stride=2)
21 | self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2)
22 | self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2)
23 | self.conv_redir = conv(self.batchNorm, 256, 32, kernel_size=1, stride=1)
24 |
25 | """if args.fp16:
26 | self.corr = nn.Sequential(
27 | tofp32(),
28 | Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1),
29 | tofp16())
30 | else:"""
31 | self.corr = Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1)
32 |
33 | self.corr_activation = nn.LeakyReLU(0.1,inplace=True)
34 | self.conv3_1 = conv(self.batchNorm, 473, 256)
35 | self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
36 | self.conv4_1 = conv(self.batchNorm, 512, 512)
37 | self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
38 | self.conv5_1 = conv(self.batchNorm, 512, 512)
39 | self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
40 | self.conv6_1 = conv(self.batchNorm,1024, 1024)
41 |
42 | self.deconv5 = deconv(1024,512)
43 | self.deconv4 = deconv(1026,256)
44 | self.deconv3 = deconv(770,128)
45 | self.deconv2 = deconv(386,64)
46 |
47 | self.predict_flow6 = predict_flow(1024)
48 | self.predict_flow5 = predict_flow(1026)
49 | self.predict_flow4 = predict_flow(770)
50 | self.predict_flow3 = predict_flow(386)
51 | self.predict_flow2 = predict_flow(194)
52 |
53 | self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
54 | self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
55 | self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
56 | self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
57 |
58 | for m in self.modules():
59 | if isinstance(m, nn.Conv2d):
60 | if m.bias is not None:
61 | init.uniform_(m.bias)
62 | init.xavier_uniform_(m.weight)
63 |
64 | if isinstance(m, nn.ConvTranspose2d):
65 | if m.bias is not None:
66 | init.uniform_(m.bias)
67 | init.xavier_uniform_(m.weight)
68 | # init_deconv_bilinear(m.weight)
69 | self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
70 |
71 | def forward(self, x):
72 | x1 = x[:,0:3,:,:]
73 | x2 = x[:,3::,:,:]
74 |
75 | out_conv1a = self.conv1(x1)
76 | out_conv2a = self.conv2(out_conv1a)
77 | out_conv3a = self.conv3(out_conv2a)
78 |
79 | # FlownetC bottom input stream
80 | out_conv1b = self.conv1(x2)
81 |
82 | out_conv2b = self.conv2(out_conv1b)
83 | out_conv3b = self.conv3(out_conv2b)
84 |
85 | # Merge streams
86 | if self.fp16:
87 | out_corr = self.corr(out_conv3a.float(), out_conv3b.float()).half() # False
88 | else:
89 | out_corr = self.corr(out_conv3a, out_conv3b) # False
90 | out_corr = self.corr_activation(out_corr)
91 |
92 | # Redirect top input stream and concatenate
93 | out_conv_redir = self.conv_redir(out_conv3a)
94 |
95 | in_conv3_1 = torch.cat((out_conv_redir, out_corr), 1)
96 |
97 | # Merged conv layers
98 | out_conv3_1 = self.conv3_1(in_conv3_1)
99 |
100 | out_conv4 = self.conv4_1(self.conv4(out_conv3_1))
101 |
102 | out_conv5 = self.conv5_1(self.conv5(out_conv4))
103 | out_conv6 = self.conv6_1(self.conv6(out_conv5))
104 |
105 | flow6 = self.predict_flow6(out_conv6)
106 | flow6_up = self.upsampled_flow6_to_5(flow6)
107 | out_deconv5 = self.deconv5(out_conv6)
108 |
109 | concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
110 |
111 | flow5 = self.predict_flow5(concat5)
112 | flow5_up = self.upsampled_flow5_to_4(flow5)
113 | out_deconv4 = self.deconv4(concat5)
114 | concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
115 |
116 | flow4 = self.predict_flow4(concat4)
117 | flow4_up = self.upsampled_flow4_to_3(flow4)
118 | out_deconv3 = self.deconv3(concat4)
119 | concat3 = torch.cat((out_conv3_1,out_deconv3,flow4_up),1)
120 |
121 | flow3 = self.predict_flow3(concat3)
122 | flow3_up = self.upsampled_flow3_to_2(flow3)
123 | out_deconv2 = self.deconv2(concat3)
124 | concat2 = torch.cat((out_conv2a,out_deconv2,flow3_up),1)
125 |
126 | flow2 = self.predict_flow2(concat2)
127 |
128 | if self.training:
129 | return flow2,flow3,flow4,flow5,flow6
130 | else:
131 | return flow2,
132 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/FlowNetFusion.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.nn import init
4 |
5 | import math
6 | import numpy as np
7 |
8 | from .submodules import *
9 | 'Parameter count = 581,226'
10 |
11 | class FlowNetFusion(nn.Module):
12 | def __init__(self,args, batchNorm=True):
13 | super(FlowNetFusion,self).__init__()
14 |
15 | self.batchNorm = batchNorm
16 | self.conv0 = conv(self.batchNorm, 11, 64)
17 | self.conv1 = conv(self.batchNorm, 64, 64, stride=2)
18 | self.conv1_1 = conv(self.batchNorm, 64, 128)
19 | self.conv2 = conv(self.batchNorm, 128, 128, stride=2)
20 | self.conv2_1 = conv(self.batchNorm, 128, 128)
21 |
22 | self.deconv1 = deconv(128,32)
23 | self.deconv0 = deconv(162,16)
24 |
25 | self.inter_conv1 = i_conv(self.batchNorm, 162, 32)
26 | self.inter_conv0 = i_conv(self.batchNorm, 82, 16)
27 |
28 | self.predict_flow2 = predict_flow(128)
29 | self.predict_flow1 = predict_flow(32)
30 | self.predict_flow0 = predict_flow(16)
31 |
32 | self.upsampled_flow2_to_1 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
33 | self.upsampled_flow1_to_0 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
34 |
35 | for m in self.modules():
36 | if isinstance(m, nn.Conv2d):
37 | if m.bias is not None:
38 | init.uniform_(m.bias)
39 | init.xavier_uniform_(m.weight)
40 |
41 | if isinstance(m, nn.ConvTranspose2d):
42 | if m.bias is not None:
43 | init.uniform_(m.bias)
44 | init.xavier_uniform_(m.weight)
45 | # init_deconv_bilinear(m.weight)
46 |
47 | def forward(self, x):
48 | out_conv0 = self.conv0(x)
49 | out_conv1 = self.conv1_1(self.conv1(out_conv0))
50 | out_conv2 = self.conv2_1(self.conv2(out_conv1))
51 |
52 | flow2 = self.predict_flow2(out_conv2)
53 | flow2_up = self.upsampled_flow2_to_1(flow2)
54 | out_deconv1 = self.deconv1(out_conv2)
55 |
56 | concat1 = torch.cat((out_conv1,out_deconv1,flow2_up),1)
57 | out_interconv1 = self.inter_conv1(concat1)
58 | flow1 = self.predict_flow1(out_interconv1)
59 | flow1_up = self.upsampled_flow1_to_0(flow1)
60 | out_deconv0 = self.deconv0(concat1)
61 |
62 | concat0 = torch.cat((out_conv0,out_deconv0,flow1_up),1)
63 | out_interconv0 = self.inter_conv0(concat0)
64 | flow0 = self.predict_flow0(out_interconv0)
65 |
66 | return flow0
67 |
68 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/FlowNetS.py:
--------------------------------------------------------------------------------
1 | '''
2 | Portions of this code copyright 2017, Clement Pinard
3 | '''
4 |
5 | import torch
6 | import torch.nn as nn
7 | from torch.nn import init
8 |
9 | import math
10 | import numpy as np
11 |
12 | from .submodules import *
13 | 'Parameter count : 38,676,504 '
14 |
15 | class FlowNetS(nn.Module):
16 | def __init__(self, args, input_channels = 12, batchNorm=True):
17 | super(FlowNetS,self).__init__()
18 |
19 | self.batchNorm = batchNorm
20 | self.conv1 = conv(self.batchNorm, input_channels, 64, kernel_size=7, stride=2)
21 | self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2)
22 | self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2)
23 | self.conv3_1 = conv(self.batchNorm, 256, 256)
24 | self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
25 | self.conv4_1 = conv(self.batchNorm, 512, 512)
26 | self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
27 | self.conv5_1 = conv(self.batchNorm, 512, 512)
28 | self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
29 | self.conv6_1 = conv(self.batchNorm,1024, 1024)
30 |
31 | self.deconv5 = deconv(1024,512)
32 | self.deconv4 = deconv(1026,256)
33 | self.deconv3 = deconv(770,128)
34 | self.deconv2 = deconv(386,64)
35 |
36 | self.predict_flow6 = predict_flow(1024)
37 | self.predict_flow5 = predict_flow(1026)
38 | self.predict_flow4 = predict_flow(770)
39 | self.predict_flow3 = predict_flow(386)
40 | self.predict_flow2 = predict_flow(194)
41 |
42 | self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
43 | self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
44 | self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
45 | self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
46 |
47 | for m in self.modules():
48 | if isinstance(m, nn.Conv2d):
49 | if m.bias is not None:
50 | init.uniform_(m.bias)
51 | init.xavier_uniform_(m.weight)
52 |
53 | if isinstance(m, nn.ConvTranspose2d):
54 | if m.bias is not None:
55 | init.uniform_(m.bias)
56 | init.xavier_uniform_(m.weight)
57 | # init_deconv_bilinear(m.weight)
58 | self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
59 |
60 | def forward(self, x):
61 | out_conv1 = self.conv1(x)
62 |
63 | out_conv2 = self.conv2(out_conv1)
64 | out_conv3 = self.conv3_1(self.conv3(out_conv2))
65 | out_conv4 = self.conv4_1(self.conv4(out_conv3))
66 | out_conv5 = self.conv5_1(self.conv5(out_conv4))
67 | out_conv6 = self.conv6_1(self.conv6(out_conv5))
68 |
69 | flow6 = self.predict_flow6(out_conv6)
70 | flow6_up = self.upsampled_flow6_to_5(flow6)
71 | out_deconv5 = self.deconv5(out_conv6)
72 |
73 | concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
74 | flow5 = self.predict_flow5(concat5)
75 | flow5_up = self.upsampled_flow5_to_4(flow5)
76 | out_deconv4 = self.deconv4(concat5)
77 |
78 | concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
79 | flow4 = self.predict_flow4(concat4)
80 | flow4_up = self.upsampled_flow4_to_3(flow4)
81 | out_deconv3 = self.deconv3(concat4)
82 |
83 | concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
84 | flow3 = self.predict_flow3(concat3)
85 | flow3_up = self.upsampled_flow3_to_2(flow3)
86 | out_deconv2 = self.deconv2(concat3)
87 |
88 | concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
89 | flow2 = self.predict_flow2(concat2)
90 |
91 | if self.training:
92 | return flow2,flow3,flow4,flow5,flow6
93 | else:
94 | return flow2,
95 |
96 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/FlowNetSD.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.nn import init
4 |
5 | import math
6 | import numpy as np
7 |
8 | from .submodules import *
9 | 'Parameter count = 45,371,666'
10 |
11 | class FlowNetSD(nn.Module):
12 | def __init__(self, args, batchNorm=True):
13 | super(FlowNetSD,self).__init__()
14 |
15 | self.batchNorm = batchNorm
16 | self.conv0 = conv(self.batchNorm, 6, 64)
17 | self.conv1 = conv(self.batchNorm, 64, 64, stride=2)
18 | self.conv1_1 = conv(self.batchNorm, 64, 128)
19 | self.conv2 = conv(self.batchNorm, 128, 128, stride=2)
20 | self.conv2_1 = conv(self.batchNorm, 128, 128)
21 | self.conv3 = conv(self.batchNorm, 128, 256, stride=2)
22 | self.conv3_1 = conv(self.batchNorm, 256, 256)
23 | self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
24 | self.conv4_1 = conv(self.batchNorm, 512, 512)
25 | self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
26 | self.conv5_1 = conv(self.batchNorm, 512, 512)
27 | self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
28 | self.conv6_1 = conv(self.batchNorm,1024, 1024)
29 |
30 | self.deconv5 = deconv(1024,512)
31 | self.deconv4 = deconv(1026,256)
32 | self.deconv3 = deconv(770,128)
33 | self.deconv2 = deconv(386,64)
34 |
35 | self.inter_conv5 = i_conv(self.batchNorm, 1026, 512)
36 | self.inter_conv4 = i_conv(self.batchNorm, 770, 256)
37 | self.inter_conv3 = i_conv(self.batchNorm, 386, 128)
38 | self.inter_conv2 = i_conv(self.batchNorm, 194, 64)
39 |
40 | self.predict_flow6 = predict_flow(1024)
41 | self.predict_flow5 = predict_flow(512)
42 | self.predict_flow4 = predict_flow(256)
43 | self.predict_flow3 = predict_flow(128)
44 | self.predict_flow2 = predict_flow(64)
45 |
46 | self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
47 | self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
48 | self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
49 | self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
50 |
51 | for m in self.modules():
52 | if isinstance(m, nn.Conv2d):
53 | if m.bias is not None:
54 | init.uniform_(m.bias)
55 | init.xavier_uniform_(m.weight)
56 |
57 | if isinstance(m, nn.ConvTranspose2d):
58 | if m.bias is not None:
59 | init.uniform_(m.bias)
60 | init.xavier_uniform_(m.weight)
61 | # init_deconv_bilinear(m.weight)
62 | self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
63 |
64 |
65 |
66 | def forward(self, x):
67 | out_conv0 = self.conv0(x)
68 | out_conv1 = self.conv1_1(self.conv1(out_conv0))
69 | out_conv2 = self.conv2_1(self.conv2(out_conv1))
70 |
71 | out_conv3 = self.conv3_1(self.conv3(out_conv2))
72 | out_conv4 = self.conv4_1(self.conv4(out_conv3))
73 | out_conv5 = self.conv5_1(self.conv5(out_conv4))
74 | out_conv6 = self.conv6_1(self.conv6(out_conv5))
75 |
76 | flow6 = self.predict_flow6(out_conv6)
77 | flow6_up = self.upsampled_flow6_to_5(flow6)
78 | out_deconv5 = self.deconv5(out_conv6)
79 |
80 | concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
81 | out_interconv5 = self.inter_conv5(concat5)
82 | flow5 = self.predict_flow5(out_interconv5)
83 |
84 | flow5_up = self.upsampled_flow5_to_4(flow5)
85 | out_deconv4 = self.deconv4(concat5)
86 |
87 | concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
88 | out_interconv4 = self.inter_conv4(concat4)
89 | flow4 = self.predict_flow4(out_interconv4)
90 | flow4_up = self.upsampled_flow4_to_3(flow4)
91 | out_deconv3 = self.deconv3(concat4)
92 |
93 | concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
94 | out_interconv3 = self.inter_conv3(concat3)
95 | flow3 = self.predict_flow3(out_interconv3)
96 | flow3_up = self.upsampled_flow3_to_2(flow3)
97 | out_deconv2 = self.deconv2(concat3)
98 |
99 | concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
100 | out_interconv2 = self.inter_conv2(concat2)
101 | flow2 = self.predict_flow2(out_interconv2)
102 |
103 | if self.training:
104 | return flow2,flow3,flow4,flow5,flow6
105 | else:
106 | return flow2,
107 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/networks/__init__.py
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/channelnorm_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/networks/channelnorm_package/__init__.py
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/channelnorm_package/channelnorm.py:
--------------------------------------------------------------------------------
1 | from torch.autograd import Function, Variable
2 | from torch.nn.modules.module import Module
3 | import channelnorm_cuda
4 |
5 | class ChannelNormFunction(Function):
6 |
7 | @staticmethod
8 | def forward(ctx, input1, norm_deg=2):
9 | assert input1.is_contiguous()
10 | b, _, h, w = input1.size()
11 | output = input1.new(b, 1, h, w).zero_()
12 |
13 | channelnorm_cuda.forward(input1, output, norm_deg)
14 | ctx.save_for_backward(input1, output)
15 | ctx.norm_deg = norm_deg
16 |
17 | return output
18 |
19 | @staticmethod
20 | def backward(ctx, grad_output):
21 | input1, output = ctx.saved_tensors
22 |
23 | grad_input1 = Variable(input1.new(input1.size()).zero_())
24 |
25 | channelnorm.backward(input1, output, grad_output.data,
26 | grad_input1.data, ctx.norm_deg)
27 |
28 | return grad_input1, None
29 |
30 |
31 | class ChannelNorm(Module):
32 |
33 | def __init__(self, norm_deg=2):
34 | super(ChannelNorm, self).__init__()
35 | self.norm_deg = norm_deg
36 |
37 | def forward(self, input1):
38 | return ChannelNormFunction.apply(input1, self.norm_deg)
39 |
40 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/channelnorm_package/channelnorm_cuda.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "channelnorm_kernel.cuh"
5 |
6 | int channelnorm_cuda_forward(
7 | at::Tensor& input1,
8 | at::Tensor& output,
9 | int norm_deg) {
10 |
11 | channelnorm_kernel_forward(input1, output, norm_deg);
12 | return 1;
13 | }
14 |
15 |
16 | int channelnorm_cuda_backward(
17 | at::Tensor& input1,
18 | at::Tensor& output,
19 | at::Tensor& gradOutput,
20 | at::Tensor& gradInput1,
21 | int norm_deg) {
22 |
23 | channelnorm_kernel_backward(input1, output, gradOutput, gradInput1, norm_deg);
24 | return 1;
25 | }
26 |
27 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
28 | m.def("forward", &channelnorm_cuda_forward, "Channel norm forward (CUDA)");
29 | m.def("backward", &channelnorm_cuda_backward, "Channel norm backward (CUDA)");
30 | }
31 |
32 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/channelnorm_package/channelnorm_kernel.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "channelnorm_kernel.cuh"
6 |
7 | #define CUDA_NUM_THREADS 512
8 |
9 | #define DIM0(TENSOR) ((TENSOR).x)
10 | #define DIM1(TENSOR) ((TENSOR).y)
11 | #define DIM2(TENSOR) ((TENSOR).z)
12 | #define DIM3(TENSOR) ((TENSOR).w)
13 |
14 | #define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))])
15 |
16 | using at::Half;
17 |
18 | template
19 | __global__ void kernel_channelnorm_update_output(
20 | const int n,
21 | const scalar_t* __restrict__ input1,
22 | const long4 input1_size,
23 | const long4 input1_stride,
24 | scalar_t* __restrict__ output,
25 | const long4 output_size,
26 | const long4 output_stride,
27 | int norm_deg) {
28 |
29 | int index = blockIdx.x * blockDim.x + threadIdx.x;
30 |
31 | if (index >= n) {
32 | return;
33 | }
34 |
35 | int dim_b = DIM0(output_size);
36 | int dim_c = DIM1(output_size);
37 | int dim_h = DIM2(output_size);
38 | int dim_w = DIM3(output_size);
39 | int dim_chw = dim_c * dim_h * dim_w;
40 |
41 | int b = ( index / dim_chw ) % dim_b;
42 | int y = ( index / dim_w ) % dim_h;
43 | int x = ( index ) % dim_w;
44 |
45 | int i1dim_c = DIM1(input1_size);
46 | int i1dim_h = DIM2(input1_size);
47 | int i1dim_w = DIM3(input1_size);
48 | int i1dim_chw = i1dim_c * i1dim_h * i1dim_w;
49 | int i1dim_hw = i1dim_h * i1dim_w;
50 |
51 | float result = 0.0;
52 |
53 | for (int c = 0; c < i1dim_c; ++c) {
54 | int i1Index = b * i1dim_chw + c * i1dim_hw + y * i1dim_w + x;
55 | scalar_t val = input1[i1Index];
56 | result += static_cast(val * val);
57 | }
58 | result = sqrt(result);
59 | output[index] = static_cast(result);
60 | }
61 |
62 |
63 | template
64 | __global__ void kernel_channelnorm_backward_input1(
65 | const int n,
66 | const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
67 | const scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride,
68 | const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride,
69 | scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride,
70 | int norm_deg) {
71 |
72 | int index = blockIdx.x * blockDim.x + threadIdx.x;
73 |
74 | if (index >= n) {
75 | return;
76 | }
77 |
78 | float val = 0.0;
79 |
80 | int dim_b = DIM0(gradInput_size);
81 | int dim_c = DIM1(gradInput_size);
82 | int dim_h = DIM2(gradInput_size);
83 | int dim_w = DIM3(gradInput_size);
84 | int dim_chw = dim_c * dim_h * dim_w;
85 | int dim_hw = dim_h * dim_w;
86 |
87 | int b = ( index / dim_chw ) % dim_b;
88 | int y = ( index / dim_w ) % dim_h;
89 | int x = ( index ) % dim_w;
90 |
91 |
92 | int outIndex = b * dim_hw + y * dim_w + x;
93 | val = static_cast(gradOutput[outIndex]) * static_cast(input1[index]) / (static_cast(output[outIndex])+1e-9);
94 | gradInput[index] = static_cast(val);
95 |
96 | }
97 |
98 | void channelnorm_kernel_forward(
99 | at::Tensor& input1,
100 | at::Tensor& output,
101 | int norm_deg) {
102 |
103 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
104 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
105 |
106 | const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
107 | const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
108 |
109 | int n = output.numel();
110 |
111 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_forward", ([&] {
112 |
113 | kernel_channelnorm_update_output<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
114 | //at::globalContext().getCurrentCUDAStream() >>>(
115 | n,
116 | input1.data(),
117 | input1_size,
118 | input1_stride,
119 | output.data(),
120 | output_size,
121 | output_stride,
122 | norm_deg);
123 |
124 | }));
125 |
126 | // TODO: ATen-equivalent check
127 |
128 | // THCudaCheck(cudaGetLastError());
129 | }
130 |
131 | void channelnorm_kernel_backward(
132 | at::Tensor& input1,
133 | at::Tensor& output,
134 | at::Tensor& gradOutput,
135 | at::Tensor& gradInput1,
136 | int norm_deg) {
137 |
138 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
139 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
140 |
141 | const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
142 | const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
143 |
144 | const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3));
145 | const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3));
146 |
147 | const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3));
148 | const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3));
149 |
150 | int n = gradInput1.numel();
151 |
152 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_backward_input1", ([&] {
153 |
154 | kernel_channelnorm_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
155 | //at::globalContext().getCurrentCUDAStream() >>>(
156 | n,
157 | input1.data(),
158 | input1_size,
159 | input1_stride,
160 | output.data(),
161 | output_size,
162 | output_stride,
163 | gradOutput.data(),
164 | gradOutput_size,
165 | gradOutput_stride,
166 | gradInput1.data(),
167 | gradInput1_size,
168 | gradInput1_stride,
169 | norm_deg
170 | );
171 |
172 | }));
173 |
174 | // TODO: Add ATen-equivalent check
175 |
176 | // THCudaCheck(cudaGetLastError());
177 | }
178 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/channelnorm_package/channelnorm_kernel.cuh:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | void channelnorm_kernel_forward(
6 | at::Tensor& input1,
7 | at::Tensor& output,
8 | int norm_deg);
9 |
10 |
11 | void channelnorm_kernel_backward(
12 | at::Tensor& input1,
13 | at::Tensor& output,
14 | at::Tensor& gradOutput,
15 | at::Tensor& gradInput1,
16 | int norm_deg);
17 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/channelnorm_package/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import os
3 | import torch
4 |
5 | from setuptools import setup
6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
7 |
8 | cxx_args = ['-std=c++11']
9 |
10 | nvcc_args = [
11 | '-gencode', 'arch=compute_52,code=sm_52',
12 | '-gencode', 'arch=compute_60,code=sm_60',
13 | '-gencode', 'arch=compute_61,code=sm_61',
14 | '-gencode', 'arch=compute_70,code=sm_70',
15 | '-gencode', 'arch=compute_70,code=compute_70'
16 | ]
17 |
18 | setup(
19 | name='channelnorm_cuda',
20 | ext_modules=[
21 | CUDAExtension('channelnorm_cuda', [
22 | 'channelnorm_cuda.cc',
23 | 'channelnorm_kernel.cu'
24 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
25 | ],
26 | cmdclass={
27 | 'build_ext': BuildExtension
28 | })
29 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/correlation_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/networks/correlation_package/__init__.py
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/correlation_package/correlation.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn.modules.module import Module
3 | from torch.autograd import Function
4 | import correlation_cuda
5 |
6 | class CorrelationFunction(Function):
7 |
8 | def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1):
9 | super(CorrelationFunction, self).__init__()
10 | self.pad_size = pad_size
11 | self.kernel_size = kernel_size
12 | self.max_displacement = max_displacement
13 | self.stride1 = stride1
14 | self.stride2 = stride2
15 | self.corr_multiply = corr_multiply
16 | # self.out_channel = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1)
17 |
18 | def forward(self, input1, input2):
19 | self.save_for_backward(input1, input2)
20 |
21 | with torch.cuda.device_of(input1):
22 | rbot1 = input1.new()
23 | rbot2 = input2.new()
24 | output = input1.new()
25 |
26 | correlation_cuda.forward(input1, input2, rbot1, rbot2, output,
27 | self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)
28 |
29 | return output
30 |
31 | def backward(self, grad_output):
32 | input1, input2 = self.saved_tensors
33 |
34 | with torch.cuda.device_of(input1):
35 | rbot1 = input1.new()
36 | rbot2 = input2.new()
37 |
38 | grad_input1 = input1.new()
39 | grad_input2 = input2.new()
40 |
41 | correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
42 | self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)
43 |
44 | return grad_input1, grad_input2
45 |
46 |
47 | class Correlation(Module):
48 | def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1):
49 | super(Correlation, self).__init__()
50 | self.pad_size = pad_size
51 | self.kernel_size = kernel_size
52 | self.max_displacement = max_displacement
53 | self.stride1 = stride1
54 | self.stride2 = stride2
55 | self.corr_multiply = corr_multiply
56 |
57 | def forward(self, input1, input2):
58 |
59 | result = CorrelationFunction(self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)(input1, input2)
60 |
61 | return result
62 |
63 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/correlation_package/correlation_cuda.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 |
8 | #include "correlation_cuda_kernel.cuh"
9 |
10 | int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output,
11 | int pad_size,
12 | int kernel_size,
13 | int max_displacement,
14 | int stride1,
15 | int stride2,
16 | int corr_type_multiply)
17 | {
18 |
19 | int batchSize = input1.size(0);
20 |
21 | int nInputChannels = input1.size(1);
22 | int inputHeight = input1.size(2);
23 | int inputWidth = input1.size(3);
24 |
25 | int kernel_radius = (kernel_size - 1) / 2;
26 | int border_radius = kernel_radius + max_displacement;
27 |
28 | int paddedInputHeight = inputHeight + 2 * pad_size;
29 | int paddedInputWidth = inputWidth + 2 * pad_size;
30 |
31 | int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1);
32 |
33 | int outputHeight = ceil(static_cast(paddedInputHeight - 2 * border_radius) / static_cast(stride1));
34 | int outputwidth = ceil(static_cast(paddedInputWidth - 2 * border_radius) / static_cast(stride1));
35 |
36 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
37 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
38 | output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth});
39 |
40 | rInput1.fill_(0);
41 | rInput2.fill_(0);
42 | output.fill_(0);
43 |
44 | int success = correlation_forward_cuda_kernel(
45 | output,
46 | output.size(0),
47 | output.size(1),
48 | output.size(2),
49 | output.size(3),
50 | output.stride(0),
51 | output.stride(1),
52 | output.stride(2),
53 | output.stride(3),
54 | input1,
55 | input1.size(1),
56 | input1.size(2),
57 | input1.size(3),
58 | input1.stride(0),
59 | input1.stride(1),
60 | input1.stride(2),
61 | input1.stride(3),
62 | input2,
63 | input2.size(1),
64 | input2.stride(0),
65 | input2.stride(1),
66 | input2.stride(2),
67 | input2.stride(3),
68 | rInput1,
69 | rInput2,
70 | pad_size,
71 | kernel_size,
72 | max_displacement,
73 | stride1,
74 | stride2,
75 | corr_type_multiply,
76 | at::cuda::getCurrentCUDAStream()
77 | //at::globalContext().getCurrentCUDAStream()
78 | );
79 |
80 | //check for errors
81 | if (!success) {
82 | AT_ERROR("CUDA call failed");
83 | }
84 |
85 | return 1;
86 |
87 | }
88 |
89 | int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput,
90 | at::Tensor& gradInput1, at::Tensor& gradInput2,
91 | int pad_size,
92 | int kernel_size,
93 | int max_displacement,
94 | int stride1,
95 | int stride2,
96 | int corr_type_multiply)
97 | {
98 |
99 | int batchSize = input1.size(0);
100 | int nInputChannels = input1.size(1);
101 | int paddedInputHeight = input1.size(2)+ 2 * pad_size;
102 | int paddedInputWidth = input1.size(3)+ 2 * pad_size;
103 |
104 | int height = input1.size(2);
105 | int width = input1.size(3);
106 |
107 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
108 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
109 | gradInput1.resize_({batchSize, nInputChannels, height, width});
110 | gradInput2.resize_({batchSize, nInputChannels, height, width});
111 |
112 | rInput1.fill_(0);
113 | rInput2.fill_(0);
114 | gradInput1.fill_(0);
115 | gradInput2.fill_(0);
116 |
117 | int success = correlation_backward_cuda_kernel(gradOutput,
118 | gradOutput.size(0),
119 | gradOutput.size(1),
120 | gradOutput.size(2),
121 | gradOutput.size(3),
122 | gradOutput.stride(0),
123 | gradOutput.stride(1),
124 | gradOutput.stride(2),
125 | gradOutput.stride(3),
126 | input1,
127 | input1.size(1),
128 | input1.size(2),
129 | input1.size(3),
130 | input1.stride(0),
131 | input1.stride(1),
132 | input1.stride(2),
133 | input1.stride(3),
134 | input2,
135 | input2.stride(0),
136 | input2.stride(1),
137 | input2.stride(2),
138 | input2.stride(3),
139 | gradInput1,
140 | gradInput1.stride(0),
141 | gradInput1.stride(1),
142 | gradInput1.stride(2),
143 | gradInput1.stride(3),
144 | gradInput2,
145 | gradInput2.size(1),
146 | gradInput2.stride(0),
147 | gradInput2.stride(1),
148 | gradInput2.stride(2),
149 | gradInput2.stride(3),
150 | rInput1,
151 | rInput2,
152 | pad_size,
153 | kernel_size,
154 | max_displacement,
155 | stride1,
156 | stride2,
157 | corr_type_multiply,
158 | at::cuda::getCurrentCUDAStream()
159 | //at::globalContext().getCurrentCUDAStream()
160 | );
161 |
162 | if (!success) {
163 | AT_ERROR("CUDA call failed");
164 | }
165 |
166 | return 1;
167 | }
168 |
169 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
170 | m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)");
171 | m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)");
172 | }
173 |
174 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/correlation_package/correlation_cuda_kernel.cuh:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | int correlation_forward_cuda_kernel(at::Tensor& output,
8 | int ob,
9 | int oc,
10 | int oh,
11 | int ow,
12 | int osb,
13 | int osc,
14 | int osh,
15 | int osw,
16 |
17 | at::Tensor& input1,
18 | int ic,
19 | int ih,
20 | int iw,
21 | int isb,
22 | int isc,
23 | int ish,
24 | int isw,
25 |
26 | at::Tensor& input2,
27 | int gc,
28 | int gsb,
29 | int gsc,
30 | int gsh,
31 | int gsw,
32 |
33 | at::Tensor& rInput1,
34 | at::Tensor& rInput2,
35 | int pad_size,
36 | int kernel_size,
37 | int max_displacement,
38 | int stride1,
39 | int stride2,
40 | int corr_type_multiply,
41 | cudaStream_t stream);
42 |
43 |
44 | int correlation_backward_cuda_kernel(
45 | at::Tensor& gradOutput,
46 | int gob,
47 | int goc,
48 | int goh,
49 | int gow,
50 | int gosb,
51 | int gosc,
52 | int gosh,
53 | int gosw,
54 |
55 | at::Tensor& input1,
56 | int ic,
57 | int ih,
58 | int iw,
59 | int isb,
60 | int isc,
61 | int ish,
62 | int isw,
63 |
64 | at::Tensor& input2,
65 | int gsb,
66 | int gsc,
67 | int gsh,
68 | int gsw,
69 |
70 | at::Tensor& gradInput1,
71 | int gisb,
72 | int gisc,
73 | int gish,
74 | int gisw,
75 |
76 | at::Tensor& gradInput2,
77 | int ggc,
78 | int ggsb,
79 | int ggsc,
80 | int ggsh,
81 | int ggsw,
82 |
83 | at::Tensor& rInput1,
84 | at::Tensor& rInput2,
85 | int pad_size,
86 | int kernel_size,
87 | int max_displacement,
88 | int stride1,
89 | int stride2,
90 | int corr_type_multiply,
91 | cudaStream_t stream);
92 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/correlation_package/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import os
3 | import torch
4 |
5 | from setuptools import setup, find_packages
6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
7 |
8 | cxx_args = ['-std=c++11']
9 |
10 | nvcc_args = [
11 | '-gencode', 'arch=compute_50,code=sm_50',
12 | '-gencode', 'arch=compute_52,code=sm_52',
13 | '-gencode', 'arch=compute_60,code=sm_60',
14 | '-gencode', 'arch=compute_61,code=sm_61',
15 | '-gencode', 'arch=compute_70,code=sm_70',
16 | '-gencode', 'arch=compute_70,code=compute_70'
17 | ]
18 |
19 | setup(
20 | name='correlation_cuda',
21 | ext_modules=[
22 | CUDAExtension('correlation_cuda', [
23 | 'correlation_cuda.cc',
24 | 'correlation_cuda_kernel.cu'
25 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
26 | ],
27 | cmdclass={
28 | 'build_ext': BuildExtension
29 | })
30 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/resample2d_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/networks/resample2d_package/__init__.py
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/resample2d_package/resample2d.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from torch.autograd import Function, Variable
3 | import resample2d_cuda
4 |
5 | class Resample2dFunction(Function):
6 |
7 | @staticmethod
8 | def forward(ctx, input1, input2, kernel_size=1):
9 | assert input1.is_contiguous()
10 | assert input2.is_contiguous()
11 |
12 | ctx.save_for_backward(input1, input2)
13 | ctx.kernel_size = kernel_size
14 |
15 | _, d, _, _ = input1.size()
16 | b, _, h, w = input2.size()
17 | output = input1.new(b, d, h, w).zero_()
18 |
19 | resample2d_cuda.forward(input1, input2, output, kernel_size)
20 |
21 | return output
22 |
23 | @staticmethod
24 | def backward(ctx, grad_output):
25 | assert grad_output.is_contiguous()
26 |
27 | input1, input2 = ctx.saved_tensors
28 |
29 | grad_input1 = Variable(input1.new(input1.size()).zero_())
30 | grad_input2 = Variable(input1.new(input2.size()).zero_())
31 |
32 | resample2d_cuda.backward(input1, input2, grad_output.data,
33 | grad_input1.data, grad_input2.data,
34 | ctx.kernel_size)
35 |
36 | return grad_input1, grad_input2, None
37 |
38 | class Resample2d(Module):
39 |
40 | def __init__(self, kernel_size=1):
41 | super(Resample2d, self).__init__()
42 | self.kernel_size = kernel_size
43 |
44 | def forward(self, input1, input2):
45 | input1_c = input1.contiguous()
46 | return Resample2dFunction.apply(input1_c, input2, self.kernel_size)
47 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/resample2d_package/resample2d_cuda.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "resample2d_kernel.cuh"
5 |
6 | int resample2d_cuda_forward(
7 | at::Tensor& input1,
8 | at::Tensor& input2,
9 | at::Tensor& output,
10 | int kernel_size) {
11 | resample2d_kernel_forward(input1, input2, output, kernel_size);
12 | return 1;
13 | }
14 |
15 | int resample2d_cuda_backward(
16 | at::Tensor& input1,
17 | at::Tensor& input2,
18 | at::Tensor& gradOutput,
19 | at::Tensor& gradInput1,
20 | at::Tensor& gradInput2,
21 | int kernel_size) {
22 | resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size);
23 | return 1;
24 | }
25 |
26 |
27 |
28 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
29 | m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)");
30 | m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)");
31 | }
32 |
33 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/resample2d_package/resample2d_kernel.cuh:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | void resample2d_kernel_forward(
6 | at::Tensor& input1,
7 | at::Tensor& input2,
8 | at::Tensor& output,
9 | int kernel_size);
10 |
11 | void resample2d_kernel_backward(
12 | at::Tensor& input1,
13 | at::Tensor& input2,
14 | at::Tensor& gradOutput,
15 | at::Tensor& gradInput1,
16 | at::Tensor& gradInput2,
17 | int kernel_size);
18 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/resample2d_package/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import os
3 | import torch
4 |
5 | from setuptools import setup
6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
7 |
8 | cxx_args = ['-std=c++11']
9 |
10 | nvcc_args = [
11 | '-gencode', 'arch=compute_50,code=sm_50',
12 | '-gencode', 'arch=compute_52,code=sm_52',
13 | '-gencode', 'arch=compute_60,code=sm_60',
14 | '-gencode', 'arch=compute_61,code=sm_61',
15 | '-gencode', 'arch=compute_70,code=sm_70',
16 | '-gencode', 'arch=compute_70,code=compute_70'
17 | ]
18 |
19 | setup(
20 | name='resample2d_cuda',
21 | ext_modules=[
22 | CUDAExtension('resample2d_cuda', [
23 | 'resample2d_cuda.cc',
24 | 'resample2d_kernel.cu'
25 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
26 | ],
27 | cmdclass={
28 | 'build_ext': BuildExtension
29 | })
30 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/networks/submodules.py:
--------------------------------------------------------------------------------
1 | # freda (todo) :
2 |
3 | import torch.nn as nn
4 | import torch
5 | import numpy as np
6 |
7 | def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1):
8 | if batchNorm:
9 | return nn.Sequential(
10 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False),
11 | nn.BatchNorm2d(out_planes),
12 | nn.LeakyReLU(0.1,inplace=True)
13 | )
14 | else:
15 | return nn.Sequential(
16 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
17 | nn.LeakyReLU(0.1,inplace=True)
18 | )
19 |
20 | def i_conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, bias = True):
21 | if batchNorm:
22 | return nn.Sequential(
23 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias),
24 | nn.BatchNorm2d(out_planes),
25 | )
26 | else:
27 | return nn.Sequential(
28 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias),
29 | )
30 |
31 | def predict_flow(in_planes):
32 | return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)
33 |
34 | def deconv(in_planes, out_planes):
35 | return nn.Sequential(
36 | nn.ConvTranspose2d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True),
37 | nn.LeakyReLU(0.1,inplace=True)
38 | )
39 |
40 | class tofp16(nn.Module):
41 | def __init__(self):
42 | super(tofp16, self).__init__()
43 |
44 | def forward(self, input):
45 | return input.half()
46 |
47 |
48 | class tofp32(nn.Module):
49 | def __init__(self):
50 | super(tofp32, self).__init__()
51 |
52 | def forward(self, input):
53 | return input.float()
54 |
55 |
56 | def init_deconv_bilinear(weight):
57 | f_shape = weight.size()
58 | heigh, width = f_shape[-2], f_shape[-1]
59 | f = np.ceil(width/2.0)
60 | c = (2 * f - 1 - f % 2) / (2.0 * f)
61 | bilinear = np.zeros([heigh, width])
62 | for x in range(width):
63 | for y in range(heigh):
64 | value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
65 | bilinear[x, y] = value
66 | weight.data.fill_(0.)
67 | for i in range(f_shape[0]):
68 | for j in range(f_shape[1]):
69 | weight.data[i,j,:,:] = torch.from_numpy(bilinear)
70 |
71 |
72 | def save_grad(grads, name):
73 | def hook(grad):
74 | grads[name] = grad
75 | return hook
76 |
77 | '''
78 | def save_grad(grads, name):
79 | def hook(grad):
80 | grads[name] = grad
81 | return hook
82 | import torch
83 | from channelnorm_package.modules.channelnorm import ChannelNorm
84 | model = ChannelNorm().cuda()
85 | grads = {}
86 | a = 100*torch.autograd.Variable(torch.randn((1,3,5,5)).cuda(), requires_grad=True)
87 | a.register_hook(save_grad(grads, 'a'))
88 | b = model(a)
89 | y = torch.mean(b)
90 | y.backward()
91 |
92 | '''
93 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/run-caffe2pytorch.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | FN2PYTORCH=${1:-/}
4 |
5 | # install custom layers
6 | sudo nvidia-docker build -t $USER/pytorch:CUDA8-py27 .
7 | sudo nvidia-docker run --rm -ti --volume=${FN2PYTORCH}:/flownet2-pytorch:rw --workdir=/flownet2-pytorch $USER/pytorch:CUDA8-py27 /bin/bash -c "./install.sh"
8 |
9 | # convert FlowNet2-C, CS, CSS, CSS-ft-sd, SD, S and 2 to PyTorch
10 | sudo nvidia-docker run -ti --volume=${FN2PYTORCH}:/fn2pytorch:rw flownet2:latest /bin/bash -c "source /flownet2/flownet2/set-env.sh && cd /flownet2/flownet2/models && \
11 | python /fn2pytorch/convert.py ./FlowNet2-C/FlowNet2-C_weights.caffemodel ./FlowNet2-C/FlowNet2-C_deploy.prototxt.template /fn2pytorch &&
12 | python /fn2pytorch/convert.py ./FlowNet2-CS/FlowNet2-CS_weights.caffemodel ./FlowNet2-CS/FlowNet2-CS_deploy.prototxt.template /fn2pytorch && \
13 | python /fn2pytorch/convert.py ./FlowNet2-CSS/FlowNet2-CSS_weights.caffemodel.h5 ./FlowNet2-CSS/FlowNet2-CSS_deploy.prototxt.template /fn2pytorch && \
14 | python /fn2pytorch/convert.py ./FlowNet2-CSS-ft-sd/FlowNet2-CSS-ft-sd_weights.caffemodel.h5 ./FlowNet2-CSS-ft-sd/FlowNet2-CSS-ft-sd_deploy.prototxt.template /fn2pytorch && \
15 | python /fn2pytorch/convert.py ./FlowNet2-SD/FlowNet2-SD_weights.caffemodel.h5 ./FlowNet2-SD/FlowNet2-SD_deploy.prototxt.template /fn2pytorch && \
16 | python /fn2pytorch/convert.py ./FlowNet2-S/FlowNet2-S_weights.caffemodel.h5 ./FlowNet2-S/FlowNet2-S_deploy.prototxt.template /fn2pytorch && \
17 | python /fn2pytorch/convert.py ./FlowNet2/FlowNet2_weights.caffemodel.h5 ./FlowNet2/FlowNet2_deploy.prototxt.template /fn2pytorch"
--------------------------------------------------------------------------------
/models/flownet2_pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/models/flownet2_pytorch/utils/__init__.py
--------------------------------------------------------------------------------
/models/flownet2_pytorch/utils/flow_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | TAG_CHAR = np.array([202021.25], np.float32)
4 |
5 | def readFlow(fn):
6 | """ Read .flo file in Middlebury format"""
7 | # Code adapted from:
8 | # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
9 |
10 | # WARNING: this will work on little-endian architectures (eg Intel x86) only!
11 | # print 'fn = %s'%(fn)
12 | with open(fn, 'rb') as f:
13 | magic = np.fromfile(f, np.float32, count=1)
14 | if 202021.25 != magic:
15 | print('Magic number incorrect. Invalid .flo file')
16 | return None
17 | else:
18 | w = np.fromfile(f, np.int32, count=1)
19 | h = np.fromfile(f, np.int32, count=1)
20 | # print 'Reading %d x %d flo file\n' % (w, h)
21 | data = np.fromfile(f, np.float32, count=2*int(w)*int(h))
22 | # Reshape data into 3D array (columns, rows, bands)
23 | # The reshape here is for visualization, the original code is (w,h,2)
24 | return np.resize(data, (int(h), int(w), 2))
25 |
26 | def writeFlow(filename,uv,v=None):
27 | """ Write optical flow to file.
28 |
29 | If v is None, uv is assumed to contain both u and v channels,
30 | stacked in depth.
31 | Original code by Deqing Sun, adapted from Daniel Scharstein.
32 | """
33 | nBands = 2
34 |
35 | if v is None:
36 | assert(uv.ndim == 3)
37 | assert(uv.shape[2] == 2)
38 | u = uv[:,:,0]
39 | v = uv[:,:,1]
40 | else:
41 | u = uv
42 |
43 | assert(u.shape == v.shape)
44 | height,width = u.shape
45 | f = open(filename,'wb')
46 | # write the header
47 | f.write(TAG_CHAR)
48 | np.array(width).astype(np.int32).tofile(f)
49 | np.array(height).astype(np.int32).tofile(f)
50 | # arrange into matrix form
51 | tmp = np.zeros((height, width*nBands))
52 | tmp[:,np.arange(width)*2] = u
53 | tmp[:,np.arange(width)*2 + 1] = v
54 | tmp.astype(np.float32).tofile(f)
55 | f.close()
56 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/utils/frame_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from os.path import *
3 | from scipy.misc import imread
4 | from . import flow_utils
5 |
6 | def read_gen(file_name):
7 | ext = splitext(file_name)[-1]
8 | if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
9 | im = imread(file_name)
10 | if im.shape[2] > 3:
11 | return im[:,:,:3]
12 | else:
13 | return im
14 | elif ext == '.bin' or ext == '.raw':
15 | return np.load(file_name)
16 | elif ext == '.flo':
17 | return flow_utils.readFlow(file_name).astype(np.float32)
18 | return []
19 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/utils/param_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 |
5 | def parse_flownetc(modules, weights, biases):
6 | keys = [
7 | 'conv1',
8 | 'conv2',
9 | 'conv3',
10 | 'conv_redir',
11 | 'conv3_1',
12 | 'conv4',
13 | 'conv4_1',
14 | 'conv5',
15 | 'conv5_1',
16 | 'conv6',
17 | 'conv6_1',
18 |
19 | 'deconv5',
20 | 'deconv4',
21 | 'deconv3',
22 | 'deconv2',
23 |
24 | 'Convolution1',
25 | 'Convolution2',
26 | 'Convolution3',
27 | 'Convolution4',
28 | 'Convolution5',
29 |
30 | 'upsample_flow6to5',
31 | 'upsample_flow5to4',
32 | 'upsample_flow4to3',
33 | 'upsample_flow3to2',
34 |
35 | ]
36 | i = 0
37 | for m in modules:
38 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
39 | weight = weights[keys[i]].copy()
40 | bias = biases[keys[i]].copy()
41 | if keys[i] == 'conv1':
42 | m.weight.data[:,:,:,:] = torch.from_numpy(np.flip(weight, axis=1).copy())
43 | m.bias.data[:] = torch.from_numpy(bias)
44 | else:
45 | m.weight.data[:,:,:,:] = torch.from_numpy(weight)
46 | m.bias.data[:] = torch.from_numpy(bias)
47 |
48 | i = i + 1
49 | return
50 |
51 | def parse_flownets(modules, weights, biases, param_prefix='net2_'):
52 | keys = [
53 | 'conv1',
54 | 'conv2',
55 | 'conv3',
56 | 'conv3_1',
57 | 'conv4',
58 | 'conv4_1',
59 | 'conv5',
60 | 'conv5_1',
61 | 'conv6',
62 | 'conv6_1',
63 |
64 | 'deconv5',
65 | 'deconv4',
66 | 'deconv3',
67 | 'deconv2',
68 |
69 | 'predict_conv6',
70 | 'predict_conv5',
71 | 'predict_conv4',
72 | 'predict_conv3',
73 | 'predict_conv2',
74 |
75 | 'upsample_flow6to5',
76 | 'upsample_flow5to4',
77 | 'upsample_flow4to3',
78 | 'upsample_flow3to2',
79 | ]
80 | for i, k in enumerate(keys):
81 | if 'upsample' in k:
82 | keys[i] = param_prefix + param_prefix + k
83 | else:
84 | keys[i] = param_prefix + k
85 | i = 0
86 | for m in modules:
87 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
88 | weight = weights[keys[i]].copy()
89 | bias = biases[keys[i]].copy()
90 | if keys[i] == param_prefix+'conv1':
91 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
92 | m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
93 | m.weight.data[:,6:9,:,:] = torch.from_numpy(np.flip(weight[:,6:9,:,:], axis=1).copy())
94 | m.weight.data[:,9::,:,:] = torch.from_numpy(weight[:,9:,:,:].copy())
95 | if m.bias is not None:
96 | m.bias.data[:] = torch.from_numpy(bias)
97 | else:
98 | m.weight.data[:,:,:,:] = torch.from_numpy(weight)
99 | if m.bias is not None:
100 | m.bias.data[:] = torch.from_numpy(bias)
101 | i = i + 1
102 | return
103 |
104 | def parse_flownetsonly(modules, weights, biases, param_prefix=''):
105 | keys = [
106 | 'conv1',
107 | 'conv2',
108 | 'conv3',
109 | 'conv3_1',
110 | 'conv4',
111 | 'conv4_1',
112 | 'conv5',
113 | 'conv5_1',
114 | 'conv6',
115 | 'conv6_1',
116 |
117 | 'deconv5',
118 | 'deconv4',
119 | 'deconv3',
120 | 'deconv2',
121 |
122 | 'Convolution1',
123 | 'Convolution2',
124 | 'Convolution3',
125 | 'Convolution4',
126 | 'Convolution5',
127 |
128 | 'upsample_flow6to5',
129 | 'upsample_flow5to4',
130 | 'upsample_flow4to3',
131 | 'upsample_flow3to2',
132 | ]
133 | for i, k in enumerate(keys):
134 | if 'upsample' in k:
135 | keys[i] = param_prefix + param_prefix + k
136 | else:
137 | keys[i] = param_prefix + k
138 | i = 0
139 | for m in modules:
140 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
141 | weight = weights[keys[i]].copy()
142 | bias = biases[keys[i]].copy()
143 | if keys[i] == param_prefix+'conv1':
144 | # print ("%s :"%(keys[i]), m.weight.size(), m.bias.size(), tf_w[keys[i]].shape[::-1])
145 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
146 | m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
147 | if m.bias is not None:
148 | m.bias.data[:] = torch.from_numpy(bias)
149 | else:
150 | m.weight.data[:,:,:,:] = torch.from_numpy(weight)
151 | if m.bias is not None:
152 | m.bias.data[:] = torch.from_numpy(bias)
153 | i = i + 1
154 | return
155 |
156 | def parse_flownetsd(modules, weights, biases, param_prefix='netsd_'):
157 | keys = [
158 | 'conv0',
159 | 'conv1',
160 | 'conv1_1',
161 | 'conv2',
162 | 'conv2_1',
163 | 'conv3',
164 | 'conv3_1',
165 | 'conv4',
166 | 'conv4_1',
167 | 'conv5',
168 | 'conv5_1',
169 | 'conv6',
170 | 'conv6_1',
171 |
172 | 'deconv5',
173 | 'deconv4',
174 | 'deconv3',
175 | 'deconv2',
176 |
177 | 'interconv5',
178 | 'interconv4',
179 | 'interconv3',
180 | 'interconv2',
181 |
182 | 'Convolution1',
183 | 'Convolution2',
184 | 'Convolution3',
185 | 'Convolution4',
186 | 'Convolution5',
187 |
188 | 'upsample_flow6to5',
189 | 'upsample_flow5to4',
190 | 'upsample_flow4to3',
191 | 'upsample_flow3to2',
192 | ]
193 | for i, k in enumerate(keys):
194 | keys[i] = param_prefix + k
195 |
196 | i = 0
197 | for m in modules:
198 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
199 | weight = weights[keys[i]].copy()
200 | bias = biases[keys[i]].copy()
201 | if keys[i] == param_prefix+'conv0':
202 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
203 | m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
204 | if m.bias is not None:
205 | m.bias.data[:] = torch.from_numpy(bias)
206 | else:
207 | m.weight.data[:,:,:,:] = torch.from_numpy(weight)
208 | if m.bias is not None:
209 | m.bias.data[:] = torch.from_numpy(bias)
210 | i = i + 1
211 |
212 | return
213 |
214 | def parse_flownetfusion(modules, weights, biases, param_prefix='fuse_'):
215 | keys = [
216 | 'conv0',
217 | 'conv1',
218 | 'conv1_1',
219 | 'conv2',
220 | 'conv2_1',
221 |
222 | 'deconv1',
223 | 'deconv0',
224 |
225 | 'interconv1',
226 | 'interconv0',
227 |
228 | '_Convolution5',
229 | '_Convolution6',
230 | '_Convolution7',
231 |
232 | 'upsample_flow2to1',
233 | 'upsample_flow1to0',
234 | ]
235 | for i, k in enumerate(keys):
236 | keys[i] = param_prefix + k
237 |
238 | i = 0
239 | for m in modules:
240 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
241 | weight = weights[keys[i]].copy()
242 | bias = biases[keys[i]].copy()
243 | if keys[i] == param_prefix+'conv0':
244 | m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
245 | m.weight.data[:,3::,:,:] = torch.from_numpy(weight[:,3:,:,:].copy())
246 | if m.bias is not None:
247 | m.bias.data[:] = torch.from_numpy(bias)
248 | else:
249 | m.weight.data[:,:,:,:] = torch.from_numpy(weight)
250 | if m.bias is not None:
251 | m.bias.data[:] = torch.from_numpy(bias)
252 | i = i + 1
253 |
254 | return
255 |
--------------------------------------------------------------------------------
/models/flownet2_pytorch/utils/tools.py:
--------------------------------------------------------------------------------
1 | # freda (todo) :
2 |
3 | import os, time, sys, math
4 | import subprocess, shutil
5 | from os.path import *
6 | import numpy as np
7 | from inspect import isclass
8 | from pytz import timezone
9 | from datetime import datetime
10 | import inspect
11 | import torch
12 |
13 | def datestr():
14 | pacific = timezone('US/Pacific')
15 | now = datetime.now(pacific)
16 | return '{}{:02}{:02}_{:02}{:02}'.format(now.year, now.month, now.day, now.hour, now.minute)
17 |
18 | def module_to_dict(module, exclude=[]):
19 | return dict([(x, getattr(module, x)) for x in dir(module)
20 | if isclass(getattr(module, x))
21 | and x not in exclude
22 | and getattr(module, x) not in exclude])
23 |
24 | class TimerBlock:
25 | def __init__(self, title):
26 | print(("{}".format(title)))
27 |
28 | def __enter__(self):
29 | self.start = time.clock()
30 | return self
31 |
32 | def __exit__(self, exc_type, exc_value, traceback):
33 | self.end = time.clock()
34 | self.interval = self.end - self.start
35 |
36 | if exc_type is not None:
37 | self.log("Operation failed\n")
38 | else:
39 | self.log("Operation finished\n")
40 |
41 |
42 | def log(self, string):
43 | duration = time.clock() - self.start
44 | units = 's'
45 | if duration > 60:
46 | duration = duration / 60.
47 | units = 'm'
48 | print((" [{:.3f}{}] {}".format(duration, units, string)))
49 |
50 | def log2file(self, fid, string):
51 | fid = open(fid, 'a')
52 | fid.write("%s\n"%(string))
53 | fid.close()
54 |
55 | def add_arguments_for_module(parser, module, argument_for_class, default, skip_params=[], parameter_defaults={}):
56 | argument_group = parser.add_argument_group(argument_for_class.capitalize())
57 |
58 | module_dict = module_to_dict(module)
59 | argument_group.add_argument('--' + argument_for_class, type=str, default=default, choices=list(module_dict.keys()))
60 |
61 | args, unknown_args = parser.parse_known_args()
62 | class_obj = module_dict[vars(args)[argument_for_class]]
63 |
64 | argspec = inspect.getargspec(class_obj.__init__)
65 |
66 | defaults = argspec.defaults[::-1] if argspec.defaults else None
67 |
68 | args = argspec.args[::-1]
69 | for i, arg in enumerate(args):
70 | cmd_arg = '{}_{}'.format(argument_for_class, arg)
71 | if arg not in skip_params + ['self', 'args']:
72 | if arg in list(parameter_defaults.keys()):
73 | argument_group.add_argument('--{}'.format(cmd_arg), type=type(parameter_defaults[arg]), default=parameter_defaults[arg])
74 | elif (defaults is not None and i < len(defaults)):
75 | argument_group.add_argument('--{}'.format(cmd_arg), type=type(defaults[i]), default=defaults[i])
76 | else:
77 | print(("[Warning]: non-default argument '{}' detected on class '{}'. This argument cannot be modified via the command line"
78 | .format(arg, module.__class__.__name__)))
79 | # We don't have a good way of dealing with inferring the type of the argument
80 | # TODO: try creating a custom action and using ast's infer type?
81 | # else:
82 | # argument_group.add_argument('--{}'.format(cmd_arg), required=True)
83 |
84 | def kwargs_from_args(args, argument_for_class):
85 | argument_for_class = argument_for_class + '_'
86 | return {key[len(argument_for_class):]: value for key, value in list(vars(args).items()) if argument_for_class in key and key != argument_for_class + 'class'}
87 |
88 | def format_dictionary_of_losses(labels, values):
89 | try:
90 | string = ', '.join([('{}: {:' + ('.3f' if value >= 0.001 else '.1e') +'}').format(name, value) for name, value in zip(labels, values)])
91 | except (TypeError, ValueError) as e:
92 | print((list(zip(labels, values))))
93 | string = '[Log Error] ' + str(e)
94 |
95 | return string
96 |
97 |
98 | class IteratorTimer():
99 | def __init__(self, iterable):
100 | self.iterable = iterable
101 | self.iterator = self.iterable.__iter__()
102 |
103 | def __iter__(self):
104 | return self
105 |
106 | def __len__(self):
107 | return len(self.iterable)
108 |
109 | def __next__(self):
110 | start = time.time()
111 | n = next(self.iterator)
112 | self.last_duration = (time.time() - start)
113 | return n
114 |
115 | next = __next__
116 |
117 | def gpumemusage():
118 | gpu_mem = subprocess.check_output("nvidia-smi | grep MiB | cut -f 3 -d '|'", shell=True).replace(' ', '').replace('\n', '').replace('i', '')
119 | all_stat = [float(a) for a in gpu_mem.replace('/','').split('MB')[:-1]]
120 |
121 | gpu_mem = ''
122 | for i in range(len(all_stat)/2):
123 | curr, tot = all_stat[2*i], all_stat[2*i+1]
124 | util = "%1.2f"%(100*curr/tot)+'%'
125 | cmem = str(int(math.ceil(curr/1024.)))+'GB'
126 | gmem = str(int(math.ceil(tot/1024.)))+'GB'
127 | gpu_mem += util + '--' + join(cmem, gmem) + ' '
128 | return gpu_mem
129 |
130 |
131 | def update_hyperparameter_schedule(args, epoch, global_iteration, optimizer):
132 | if args.schedule_lr_frequency > 0:
133 | for param_group in optimizer.param_groups:
134 | if (global_iteration + 1) % args.schedule_lr_frequency == 0:
135 | param_group['lr'] /= float(args.schedule_lr_fraction)
136 | param_group['lr'] = float(np.maximum(param_group['lr'], 0.000001))
137 |
138 | def save_checkpoint(state, is_best, path, prefix, filename='checkpoint.pth.tar'):
139 | prefix_save = os.path.join(path, prefix)
140 | name = prefix_save + '_' + filename
141 | torch.save(state, name)
142 | if is_best:
143 | shutil.copyfile(name, prefix_save + '_model_best.pth.tar')
144 |
145 |
--------------------------------------------------------------------------------
/models/models.py:
--------------------------------------------------------------------------------
1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
3 | import os
4 | import torch
5 | import torch.nn as nn
6 | import numpy as np
7 | import fractions
8 | def lcm(a,b): return abs(a * b)/fractions.gcd(a,b) if a and b else 0
9 |
10 | def wrap_model(opt, modelG, modelD, flowNet):
11 | if opt.n_gpus_gen == len(opt.gpu_ids):
12 | modelG = myModel(opt, modelG)
13 | modelD = myModel(opt, modelD)
14 | flowNet = myModel(opt, flowNet)
15 | else:
16 | if opt.batchSize == 1:
17 | gpu_split_id = opt.n_gpus_gen + 1
18 | modelG = nn.DataParallel(modelG, device_ids=opt.gpu_ids[0:1])
19 | else:
20 | gpu_split_id = opt.n_gpus_gen
21 | modelG = nn.DataParallel(modelG, device_ids=opt.gpu_ids[:gpu_split_id])
22 | modelD = nn.DataParallel(modelD, device_ids=[opt.gpu_ids[0]] + opt.gpu_ids[gpu_split_id:])
23 | flowNet = nn.DataParallel(flowNet, device_ids=[opt.gpu_ids[0]] + opt.gpu_ids[gpu_split_id:])
24 | return modelG, modelD, flowNet
25 |
26 | class myModel(nn.Module):
27 | def __init__(self, opt, model):
28 | super(myModel, self).__init__()
29 | self.opt = opt
30 | self.module = model
31 | self.model = nn.DataParallel(model, device_ids=opt.gpu_ids)
32 | self.bs_per_gpu = int(np.ceil(float(opt.batchSize) / len(opt.gpu_ids))) # batch size for each GPU
33 | self.pad_bs = self.bs_per_gpu * len(opt.gpu_ids) - opt.batchSize
34 |
35 | def forward(self, *inputs, **kwargs):
36 | inputs = self.add_dummy_to_tensor(inputs, self.pad_bs)
37 | outputs = self.model(*inputs, **kwargs, dummy_bs=self.pad_bs)
38 | if self.pad_bs == self.bs_per_gpu: # gpu 0 does 0 batch but still returns 1 batch
39 | return self.remove_dummy_from_tensor(outputs, 1)
40 | return outputs
41 |
42 | def add_dummy_to_tensor(self, tensors, add_size=0):
43 | if add_size == 0 or tensors is None: return tensors
44 | if type(tensors) == list or type(tensors) == tuple:
45 | return [self.add_dummy_to_tensor(tensor, add_size) for tensor in tensors]
46 |
47 | if isinstance(tensors, torch.Tensor):
48 | dummy = torch.zeros_like(tensors)[:add_size]
49 | tensors = torch.cat([dummy, tensors])
50 | return tensors
51 |
52 | def remove_dummy_from_tensor(self, tensors, remove_size=0):
53 | if remove_size == 0 or tensors is None: return tensors
54 | if type(tensors) == list or type(tensors) == tuple:
55 | return [self.remove_dummy_from_tensor(tensor, remove_size) for tensor in tensors]
56 |
57 | if isinstance(tensors, torch.Tensor):
58 | tensors = tensors[remove_size:]
59 | return tensors
60 |
61 | def create_model(opt):
62 | print(opt.model)
63 | if opt.model == 'vid2vid':
64 | from .vid2vid_model_G import Vid2VidModelG
65 | modelG = Vid2VidModelG()
66 | if opt.isTrain:
67 | from .vid2vid_model_D import Vid2VidModelD
68 | modelD = Vid2VidModelD()
69 | else:
70 | raise ValueError("Model [%s] not recognized." % opt.model)
71 |
72 | if opt.isTrain:
73 | from .flownet import FlowNet
74 | flowNet = FlowNet()
75 |
76 | modelG.initialize(opt)
77 | if opt.isTrain:
78 | modelD.initialize(opt)
79 | flowNet.initialize(opt)
80 | if not opt.fp16:
81 | modelG, modelD, flownet = wrap_model(opt, modelG, modelD, flowNet)
82 | return [modelG, modelD, flowNet]
83 | else:
84 | return modelG
85 |
86 | def create_optimizer(opt, models):
87 | modelG, modelD, flowNet = models
88 | optimizer_D_T = []
89 | if opt.fp16:
90 | from apex import amp
91 | for s in range(opt.n_scales_temporal):
92 | optimizer_D_T.append(getattr(modelD, 'optimizer_D_T'+str(s)))
93 | modelG, optimizer_G = amp.initialize(modelG, modelG.optimizer_G, opt_level='O1')
94 | modelD, optimizers_D = amp.initialize(modelD, [modelD.optimizer_D] + optimizer_D_T, opt_level='O1')
95 | optimizer_D, optimizer_D_T = optimizers_D[0], optimizers_D[1:]
96 | modelG, modelD, flownet = wrap_model(opt, modelG, modelD, flowNet)
97 | else:
98 | optimizer_G = modelG.module.optimizer_G
99 | optimizer_D = modelD.module.optimizer_D
100 | for s in range(opt.n_scales_temporal):
101 | optimizer_D_T.append(getattr(modelD.module, 'optimizer_D_T'+str(s)))
102 | return modelG, modelD, flowNet, optimizer_G, optimizer_D, optimizer_D_T
103 |
104 | def init_params(opt, modelG, modelD, data_loader):
105 | iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt')
106 | start_epoch, epoch_iter = 1, 0
107 | ### if continue training, recover previous states
108 | if opt.continue_train:
109 | if os.path.exists(iter_path):
110 | start_epoch, epoch_iter = np.loadtxt(iter_path , delimiter=',', dtype=int)
111 | print('Resuming from epoch %d at iteration %d' % (start_epoch, epoch_iter))
112 | if start_epoch > opt.niter:
113 | modelG.module.update_learning_rate(start_epoch-1, 'G')
114 | modelD.module.update_learning_rate(start_epoch-1, 'D')
115 | if (opt.n_scales_spatial > 1) and (opt.niter_fix_global != 0) and (start_epoch > opt.niter_fix_global):
116 | modelG.module.update_fixed_params()
117 | if start_epoch > opt.niter_step:
118 | data_loader.dataset.update_training_batch((start_epoch-1)//opt.niter_step)
119 | modelG.module.update_training_batch((start_epoch-1)//opt.niter_step)
120 |
121 | n_gpus = opt.n_gpus_gen if opt.batchSize == 1 else 1 # number of gpus used for generator for each batch
122 | tG, tD = opt.n_frames_G, opt.n_frames_D
123 | tDB = tD * opt.output_nc
124 | s_scales = opt.n_scales_spatial
125 | t_scales = opt.n_scales_temporal
126 | input_nc = 1 if opt.label_nc != 0 else opt.input_nc
127 | output_nc = opt.output_nc
128 |
129 | print_freq = lcm(opt.print_freq, opt.batchSize)
130 | total_steps = (start_epoch-1) * len(data_loader) + epoch_iter
131 | total_steps = total_steps // print_freq * print_freq
132 |
133 | return n_gpus, tG, tD, tDB, s_scales, t_scales, input_nc, output_nc, start_epoch, epoch_iter, print_freq, total_steps, iter_path
134 |
135 | def save_models(opt, epoch, epoch_iter, total_steps, visualizer, iter_path, modelG, modelD, end_of_epoch=False):
136 | if not end_of_epoch:
137 | if total_steps % opt.save_latest_freq == 0:
138 | visualizer.vis_print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_steps))
139 | modelG.module.save('latest')
140 | modelD.module.save('latest')
141 | np.savetxt(iter_path, (epoch, epoch_iter), delimiter=',', fmt='%d')
142 | else:
143 | if epoch % opt.save_epoch_freq == 0:
144 | visualizer.vis_print('saving the model at the end of epoch %d, iters %d' % (epoch, total_steps))
145 | modelG.module.save('latest')
146 | modelD.module.save('latest')
147 | modelG.module.save(epoch)
148 | modelD.module.save(epoch)
149 | np.savetxt(iter_path, (epoch+1, 0), delimiter=',', fmt='%d')
150 |
151 | def update_models(opt, epoch, modelG, modelD, data_loader):
152 | ### linearly decay learning rate after certain iterations
153 | if epoch > opt.niter:
154 | modelG.module.update_learning_rate(epoch, 'G')
155 | modelD.module.update_learning_rate(epoch, 'D')
156 |
157 | ### gradually grow training sequence length
158 | if (epoch % opt.niter_step) == 0:
159 | data_loader.dataset.update_training_batch(epoch//opt.niter_step)
160 | modelG.module.update_training_batch(epoch//opt.niter_step)
161 |
162 | ### finetune all scales
163 | if (opt.n_scales_spatial > 1) and (opt.niter_fix_global != 0) and (epoch == opt.niter_fix_global):
164 | modelG.module.update_fixed_params()
--------------------------------------------------------------------------------
/options/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/options/__init__.py
--------------------------------------------------------------------------------
/options/base_options.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | from util import util
4 | import torch
5 |
6 | class BaseOptions():
7 | def __init__(self):
8 | self.parser = argparse.ArgumentParser()
9 | self.initialized = False
10 |
11 | def initialize(self):
12 | self.parser.add_argument('--dataroot', type=str, default='datasets/Cityscapes/')
13 | self.parser.add_argument('--batchSize', type=int, default=1, help='input batch size')
14 | self.parser.add_argument('--loadSize', type=int, default=512, help='scale images to this size')
15 | self.parser.add_argument('--fineSize', type=int, default=512, help='then crop to this size')
16 | self.parser.add_argument('--input_nc', type=int, default=3, help='# of input image channels')
17 | self.parser.add_argument('--label_nc', type=int, default=0, help='number of labels')
18 | self.parser.add_argument('--output_nc', type=int, default=3, help='# of output image channels')
19 |
20 | # network arch
21 | self.parser.add_argument('--netG', type=str, default='composite', help='selects model to use for netG')
22 | self.parser.add_argument('--ngf', type=int, default=128, help='# of gen filters in first conv layer')
23 | self.parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in first conv layer')
24 | self.parser.add_argument('--n_blocks', type=int, default=9, help='number of resnet blocks in generator')
25 | self.parser.add_argument('--n_downsample_G', type=int, default=3, help='number of downsampling layers in netG')
26 |
27 | self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU')
28 | self.parser.add_argument('--n_gpus_gen', type=int, default=-1, help='how many gpus are used for generator (the rest are used for discriminator). -1 means use all gpus')
29 | self.parser.add_argument('--name', type=str, default='experiment_name', help='name of the experiment. It decides where to store samples and models')
30 | self.parser.add_argument('--dataset_mode', type=str, default='temporal', help='chooses how datasets are loaded. [unaligned | aligned | single]')
31 | self.parser.add_argument('--model', type=str, default='vid2vid', help='chooses which model to use. vid2vid, test')
32 | self.parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data')
33 | self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here')
34 | self.parser.add_argument('--norm', type=str, default='batch', help='instance normalization or batch normalization')
35 | self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly')
36 | self.parser.add_argument('--display_winsize', type=int, default=512, help='display window size')
37 | self.parser.add_argument('--display_id', type=int, default=0, help='window id of the web display')
38 | self.parser.add_argument('--tf_log', action='store_true', help='if specified, use tensorboard logging. Requires tensorflow installed')
39 |
40 | self.parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.')
41 | self.parser.add_argument('--resize_or_crop', type=str, default='scaleWidth', help='scaling and cropping of images at load time [resize_and_crop|crop|scaledCrop|scaleWidth|scaleWidth_and_crop|scaleWidth_and_scaledCrop|scaleHeight|scaleHeight_and_crop] etc')
42 | self.parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data argumentation')
43 |
44 | # more features as input
45 | self.parser.add_argument('--use_instance', action='store_true', help='if specified, add instance map as feature for class A')
46 | self.parser.add_argument('--label_feat', action='store_true', help='if specified, encode label features as input')
47 | self.parser.add_argument('--feat_num', type=int, default=3, help='number of encoded features')
48 | self.parser.add_argument('--nef', type=int, default=32, help='# of encoder filters in first conv layer')
49 | self.parser.add_argument('--load_features', action='store_true', help='if specified, load precomputed feature maps')
50 | self.parser.add_argument('--netE', type=str, default='simple', help='which model to use for encoder')
51 | self.parser.add_argument('--n_downsample_E', type=int, default=3, help='number of downsampling layers in netE')
52 |
53 | # for cascaded resnet
54 | self.parser.add_argument('--n_blocks_local', type=int, default=3, help='number of resnet blocks in outmost multiscale resnet')
55 | self.parser.add_argument('--n_local_enhancers', type=int, default=1, help='number of cascaded layers')
56 |
57 | # temporal
58 | self.parser.add_argument('--n_frames_G', type=int, default=3, help='number of input frames to feed into generator, i.e., n_frames_G-1 is the number of frames we look into past')
59 | self.parser.add_argument('--n_scales_spatial', type=int, default=1, help='number of spatial scales in the coarse-to-fine generator')
60 | self.parser.add_argument('--no_first_img', action='store_true', help='if specified, generator also tries to synthesize first image')
61 | self.parser.add_argument('--use_single_G', action='store_true', help='if specified, use single frame generator for the first frame')
62 | self.parser.add_argument('--fg', action='store_true', help='if specified, use foreground-background seperation model')
63 | self.parser.add_argument('--fg_labels', type=str, default='26', help='label indices for foreground objects')
64 | self.parser.add_argument('--no_flow', action='store_true', help='if specified, do not use flow warping and directly synthesize frames')
65 |
66 | # face specific
67 | self.parser.add_argument('--no_canny_edge', action='store_true', help='do *not* use canny edge as input')
68 | self.parser.add_argument('--no_dist_map', action='store_true', help='do *not* use distance transform map as input')
69 | self.parser.add_argument('--random_scale_points', action='store_true', help='randomly scale face keypoints a bit to create different results')
70 |
71 | # pose specific
72 | self.parser.add_argument('--densepose_only', action='store_true', help='use only densepose as input')
73 | self.parser.add_argument('--openpose_only', action='store_true', help='use only openpose as input')
74 | self.parser.add_argument('--add_face_disc', action='store_true', help='add face discriminator')
75 | self.parser.add_argument('--remove_face_labels', action='store_true', help='remove face labels to better adapt to different face shapes')
76 | self.parser.add_argument('--random_drop_prob', type=float, default=0.05, help='the probability to randomly drop each pose segment during training')
77 | self.parser.add_argument('--basic_point_only', action='store_true', help='only use basic joint keypoints for openpose, without hand or face keypoints')
78 |
79 | # miscellaneous
80 | self.parser.add_argument('--load_pretrain', type=str, default='', help='if specified, load the pretrained model')
81 | self.parser.add_argument('--debug', action='store_true', help='if specified, use small dataset for debug')
82 | self.parser.add_argument('--fp16', action='store_true', default=False, help='train with AMP')
83 | self.parser.add_argument('--local_rank', type=int, default=0, help='local rank for distributed training')
84 |
85 | self.initialized = True
86 |
87 | def parse_str(self, ids):
88 | str_ids = ids.split(',')
89 | ids_list = []
90 | for str_id in str_ids:
91 | id = int(str_id)
92 | if id >= 0:
93 | ids_list.append(id)
94 | return ids_list
95 |
96 | def parse(self, save=True):
97 | if not self.initialized:
98 | self.initialize()
99 | self.opt = self.parser.parse_args()
100 | self.opt.isTrain = self.isTrain # train or test
101 |
102 | self.opt.fg_labels = self.parse_str(self.opt.fg_labels)
103 | self.opt.gpu_ids = self.parse_str(self.opt.gpu_ids)
104 | if self.opt.n_gpus_gen == -1:
105 | self.opt.n_gpus_gen = len(self.opt.gpu_ids)
106 |
107 | # set gpu ids
108 | if len(self.opt.gpu_ids) > 0:
109 | torch.cuda.set_device(self.opt.gpu_ids[0])
110 |
111 | args = vars(self.opt)
112 |
113 | print('------------ Options -------------')
114 | for k, v in sorted(args.items()):
115 | print('%s: %s' % (str(k), str(v)))
116 | print('-------------- End ----------------')
117 |
118 | # save to the disk
119 | expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name)
120 | util.mkdirs(expr_dir)
121 | if save:
122 | file_name = os.path.join(expr_dir, 'opt.txt')
123 | with open(file_name, 'wt') as opt_file:
124 | opt_file.write('------------ Options -------------\n')
125 | for k, v in sorted(args.items()):
126 | opt_file.write('%s: %s\n' % (str(k), str(v)))
127 | opt_file.write('-------------- End ----------------\n')
128 | return self.opt
129 |
--------------------------------------------------------------------------------
/options/test_options.py:
--------------------------------------------------------------------------------
1 | from .base_options import BaseOptions
2 |
3 |
4 | class TestOptions(BaseOptions):
5 | def initialize(self):
6 | BaseOptions.initialize(self)
7 | self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.')
8 | self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
9 | self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')
10 | self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
11 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
12 | self.parser.add_argument('--how_many', type=int, default=300, help='how many test images to run')
13 | self.parser.add_argument('--use_real_img', action='store_true', help='use real image for first frame')
14 | self.parser.add_argument('--start_frame', type=int, default=0, help='frame index to start inference on')
15 | self.isTrain = False
16 |
--------------------------------------------------------------------------------
/options/train_options.py:
--------------------------------------------------------------------------------
1 | from .base_options import BaseOptions
2 |
3 |
4 | class TrainOptions(BaseOptions):
5 | def initialize(self):
6 | BaseOptions.initialize(self)
7 | self.parser.add_argument('--display_freq', type=int, default=100, help='frequency of showing training results on screen')
8 | self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console')
9 | self.parser.add_argument('--save_latest_freq', type=int, default=1000, help='frequency of saving the latest results')
10 | self.parser.add_argument('--save_epoch_freq', type=int, default=1, help='frequency of saving checkpoints at the end of epochs')
11 | self.parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model')
12 | self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc')
13 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
14 | self.parser.add_argument('--niter', type=int, default=10, help='# of iter at starting learning rate')
15 | self.parser.add_argument('--niter_decay', type=int, default=10, help='# of iter to linearly decay learning rate to zero')
16 | self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam')
17 | self.parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam')
18 | self.parser.add_argument('--TTUR', action='store_true', help='Use TTUR training scheme')
19 | self.parser.add_argument('--gan_mode', type=str, default='ls', help='(ls|original|hinge)')
20 | self.parser.add_argument('--pool_size', type=int, default=1, help='the size of image buffer that stores previously generated images')
21 | self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/')
22 |
23 | # for discriminators
24 | self.parser.add_argument('--num_D', type=int, default=2, help='number of patch scales in each discriminator')
25 | self.parser.add_argument('--n_layers_D', type=int, default=3, help='number of layers in discriminator')
26 | self.parser.add_argument('--no_vgg', action='store_true', help='do not use VGG feature matching loss')
27 | self.parser.add_argument('--no_ganFeat', action='store_true', help='do not match discriminator features')
28 | self.parser.add_argument('--lambda_feat', type=float, default=10.0, help='weight for feature matching')
29 | self.parser.add_argument('--sparse_D', action='store_true', help='use sparse temporal discriminators to save memory')
30 |
31 | # for temporal
32 | self.parser.add_argument('--lambda_T', type=float, default=10.0, help='weight for temporal loss')
33 | self.parser.add_argument('--lambda_F', type=float, default=10.0, help='weight for flow loss')
34 | self.parser.add_argument('--n_frames_D', type=int, default=3, help='number of frames to feed into temporal discriminator')
35 | self.parser.add_argument('--n_scales_temporal', type=int, default=2, help='number of temporal scales in the temporal discriminator')
36 | self.parser.add_argument('--max_frames_per_gpu', type=int, default=1, help='max number of frames to load into one GPU at a time')
37 | self.parser.add_argument('--max_frames_backpropagate', type=int, default=1, help='max number of frames to backpropagate')
38 | self.parser.add_argument('--max_t_step', type=int, default=1, help='max spacing between neighboring sampled frames. If greater than 1, the network may randomly skip frames during training.')
39 | self.parser.add_argument('--n_frames_total', type=int, default=30, help='the overall number of frames in a sequence to train with')
40 | self.parser.add_argument('--niter_step', type=int, default=5, help='how many epochs do we change training batch size again')
41 | self.parser.add_argument('--niter_fix_global', type=int, default=0, help='if specified, only train the finest spatial layer for the given iterations')
42 |
43 | self.isTrain = True
44 |
--------------------------------------------------------------------------------
/scripts/download_datasets.py:
--------------------------------------------------------------------------------
1 | import os
2 | from download_gdrive import *
3 |
4 | file_id = '1rPcbnanuApZeo2uc7h55OneBkbcFCnnf'
5 | chpt_path = './datasets/'
6 | if not os.path.isdir(chpt_path):
7 | os.makedirs(chpt_path)
8 | destination = os.path.join(chpt_path, 'datasets.zip')
9 | download_file_from_google_drive(file_id, destination)
10 | unzip_file(destination, chpt_path)
--------------------------------------------------------------------------------
/scripts/download_flownet2.py:
--------------------------------------------------------------------------------
1 | import os
2 | from download_gdrive import *
3 | import torch
4 |
5 | """if torch.__version__ == '0.4.1':
6 | file_id = '1gKwE1Ad41TwtAzwDcN3dYa_S6DcVyiSl'
7 | file_name = 'flownet2_pytorch_041.zip'
8 | else:
9 | file_id = '1F2h_6e8gyTqxnbmFFW72zsxx_JX0dKFo'
10 | file_name = 'flownet2_pytorch_040.zip'"""
11 |
12 | chpt_path = './models/'
13 | if not os.path.isdir(chpt_path):
14 | os.makedirs(chpt_path)
15 | """destination = os.path.join(chpt_path, file_name)
16 | download_file_from_google_drive(file_id, destination)
17 | unzip_file(destination, chpt_path)"""
18 | os.system('cd %s/flownet2_pytorch/; bash install.sh; cd ../../' % chpt_path)
--------------------------------------------------------------------------------
/scripts/download_gdrive.py:
--------------------------------------------------------------------------------
1 | # Download code taken from Code taken from https://stackoverflow.com questions/25010369/wget-curl-large-file-from-google-drive/39225039#39225039
2 | import requests, zipfile, os
3 | def download_file_from_google_drive(id, destination):
4 | URL = "https://docs.google.com/uc?export=download"
5 | session = requests.Session()
6 | response = session.get(URL, params = { 'id' : id }, stream = True)
7 | token = get_confirm_token(response)
8 | if token:
9 | params = { 'id' : id, 'confirm' : token }
10 | response = session.get(URL, params = params, stream = True)
11 | save_response_content(response, destination)
12 | def get_confirm_token(response):
13 | for key, value in response.cookies.items():
14 | if key.startswith('download_warning'):
15 | return value
16 | return None
17 | def save_response_content(response, destination):
18 | CHUNK_SIZE = 32768
19 | with open(destination, "wb") as f:
20 | for chunk in response.iter_content(CHUNK_SIZE):
21 | if chunk: # filter out keep-alive new chunks
22 | f.write(chunk)
23 |
24 | def unzip_file(file_name, unzip_path):
25 | zip_ref = zipfile.ZipFile(file_name, 'r')
26 | zip_ref.extractall(unzip_path)
27 | zip_ref.close()
28 | os.remove(file_name)
--------------------------------------------------------------------------------
/scripts/download_models_flownet2.py:
--------------------------------------------------------------------------------
1 | import os
2 | from download_gdrive import *
3 |
4 | file_id = '1E8re-b6csNuo-abg1vJKCDjCzlIam50F'
5 | chpt_path = './models/flownet2_pytorch/'
6 | destination = os.path.join(chpt_path, 'FlowNet2_checkpoint.pth.tar')
7 | download_file_from_google_drive(file_id, destination)
--------------------------------------------------------------------------------
/scripts/face/download_gdrive.py:
--------------------------------------------------------------------------------
1 | # Download code taken from Code taken from https://stackoverflow.com questions/25010369/wget-curl-large-file-from-google-drive/39225039#39225039
2 | import requests, zipfile, os
3 | def download_file_from_google_drive(id, destination):
4 | URL = "https://docs.google.com/uc?export=download"
5 | session = requests.Session()
6 | response = session.get(URL, params = { 'id' : id }, stream = True)
7 | token = get_confirm_token(response)
8 | if token:
9 | params = { 'id' : id, 'confirm' : token }
10 | response = session.get(URL, params = params, stream = True)
11 | save_response_content(response, destination)
12 | def get_confirm_token(response):
13 | for key, value in response.cookies.items():
14 | if key.startswith('download_warning'):
15 | return value
16 | return None
17 | def save_response_content(response, destination):
18 | CHUNK_SIZE = 32768
19 | with open(destination, "wb") as f:
20 | for chunk in response.iter_content(CHUNK_SIZE):
21 | if chunk: # filter out keep-alive new chunks
22 | f.write(chunk)
23 |
24 | def unzip_file(file_name, unzip_path):
25 | zip_ref = zipfile.ZipFile(file_name, 'r')
26 | zip_ref.extractall(unzip_path)
27 | zip_ref.close()
28 | os.remove(file_name)
--------------------------------------------------------------------------------
/scripts/face/download_models.py:
--------------------------------------------------------------------------------
1 | import os
2 | from download_gdrive import *
3 |
4 | file_id = '10LvNw-2lrh-6sPGkWbQDfHspkqz5AKxb'
5 | chpt_path = './checkpoints/'
6 | if not os.path.isdir(chpt_path):
7 | os.makedirs(chpt_path)
8 | destination = os.path.join(chpt_path, 'models_face.zip')
9 | download_file_from_google_drive(file_id, destination)
10 | unzip_file(destination, chpt_path)
--------------------------------------------------------------------------------
/scripts/face/test_512.sh:
--------------------------------------------------------------------------------
1 | python test.py --name edge2face_512 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --input_nc 15 --loadSize 512 --use_single_G
--------------------------------------------------------------------------------
/scripts/face/test_g1_256.sh:
--------------------------------------------------------------------------------
1 | python test.py --name edge2face_256_g1 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --input_nc 15 --loadSize 256 --ngf 64 --use_single_G
4 |
--------------------------------------------------------------------------------
/scripts/face/test_g1_512.sh:
--------------------------------------------------------------------------------
1 | python test.py --name edge2face_512_g1 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --n_scales_spatial 2 --input_nc 15 --loadSize 512 --ngf 64 \
4 | --use_single_G
5 |
--------------------------------------------------------------------------------
/scripts/face/train_512.sh:
--------------------------------------------------------------------------------
1 | python train.py --name edge2face_512 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --input_nc 15 --loadSize 512 --num_D 3 \
4 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 6 \
5 | --niter 20 --niter_decay 20 --n_frames_total 12
--------------------------------------------------------------------------------
/scripts/face/train_512_bs7.sh:
--------------------------------------------------------------------------------
1 | python train.py --name edge2face_512 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --input_nc 15 --loadSize 512 --num_D 3 \
4 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 8 --batchSize 7 \
5 | --niter 20 --niter_decay 20 --n_frames_total 12
--------------------------------------------------------------------------------
/scripts/face/train_g1_256.sh:
--------------------------------------------------------------------------------
1 | python train.py --name edge2face_256_g1 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --input_nc 15 --loadSize 256 --ngf 64 \
4 | --max_frames_per_gpu 6 --n_frames_total 12 \
5 | --niter 20 --niter_decay 20
6 |
--------------------------------------------------------------------------------
/scripts/face/train_g1_512.sh:
--------------------------------------------------------------------------------
1 | python train.py --name edge2face_512_g1 \
2 | --dataroot datasets/face/ --dataset_mode face \
3 | --n_scales_spatial 2 --num_D 3 \
4 | --input_nc 15 --loadSize 512 --ngf 64 \
5 | --n_frames_total 6 --niter_step 2 --niter_fix_global 5 \
6 | --load_pretrain checkpoints/edge2face_256_g1
7 |
--------------------------------------------------------------------------------
/scripts/pose/test_1024p.sh:
--------------------------------------------------------------------------------
1 | python test.py --name pose2body_1024p \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 3 \
4 | --resize_or_crop scaleHeight --loadSize 1024 --no_first_img
--------------------------------------------------------------------------------
/scripts/pose/test_256p.sh:
--------------------------------------------------------------------------------
1 | python test.py --name pose2body_256p \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --resize_or_crop scaleHeight --loadSize 256 --no_first_img
--------------------------------------------------------------------------------
/scripts/pose/test_512p.sh:
--------------------------------------------------------------------------------
1 | python test.py --name pose2body_512p \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 2 \
4 | --resize_or_crop scaleHeight --loadSize 512 --no_first_img
--------------------------------------------------------------------------------
/scripts/pose/test_g1_1024p.sh:
--------------------------------------------------------------------------------
1 | python test.py --name pose2body_1024p_g1 \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 3 --ngf 64 \
4 | --resize_or_crop scaleHeight --loadSize 1024 --no_first_img
5 |
--------------------------------------------------------------------------------
/scripts/pose/test_g1_256p.sh:
--------------------------------------------------------------------------------
1 | python test.py --name pose2body_256p_g1 \
2 | --dataroot datasets/pose --dataset_mode pose --ngf 64 \
3 | --input_nc 6 --resize_or_crop scaleHeight --loadSize 256 --no_first_img
4 |
--------------------------------------------------------------------------------
/scripts/pose/test_g1_512p.sh:
--------------------------------------------------------------------------------
1 | python test.py --name pose2body_512p_g1 \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 2 --ngf 64 \
4 | --resize_or_crop scaleHeight --loadSize 512 --no_first_img
5 |
--------------------------------------------------------------------------------
/scripts/pose/train_1024p.sh:
--------------------------------------------------------------------------------
1 | python train.py --name pose2body_1024p \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 3 --num_D 4 \
4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 1536 --fineSize 1024 \
5 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 4 \
6 | --no_first_img --n_frames_total 12 --max_t_step 4 --add_face_disc \
7 | --niter_fix_global 3 --niter 5 --niter_decay 5 \
8 | --lr 0.00005 --load_pretrain checkpoints/pose2body_512p
--------------------------------------------------------------------------------
/scripts/pose/train_256p.sh:
--------------------------------------------------------------------------------
1 | python train.py --name pose2body_256p \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --num_D 2 \
4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 384 --fineSize 256 \
5 | --gpu_ids 0,1,2,3,4,5,6,7 --batchSize 8 --max_frames_per_gpu 3 \
6 | --niter 5 --niter_decay 5 \
7 | --no_first_img --n_frames_total 12 --max_t_step 4
--------------------------------------------------------------------------------
/scripts/pose/train_512p.sh:
--------------------------------------------------------------------------------
1 | python train.py --name pose2body_512p \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 2 --num_D 3 \
4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 768 --fineSize 512 \
5 | --gpu_ids 0,1,2,3,4,5,6,7 --batchSize 8 \
6 | --no_first_img --n_frames_total 12 --max_t_step 4 --add_face_disc \
7 | --niter_fix_global 3 --niter 5 --niter_decay 5 \
8 | --lr 0.0001 --load_pretrain checkpoints/pose2body_256p
--------------------------------------------------------------------------------
/scripts/pose/train_g1_1024p.sh:
--------------------------------------------------------------------------------
1 | python train.py --name pose2body_1024p_g1 \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 3 --num_D 4 --ngf 64 --ndf 32 \
4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 1536 --fineSize 1024 \
5 | --no_first_img --n_frames_total 12 --max_t_step 4 --add_face_disc \
6 | --niter_fix_global 3 --niter 5 --niter_decay 5 \
7 | --lr 0.00005 --load_pretrain checkpoints/pose2body_512p_g1
8 |
--------------------------------------------------------------------------------
/scripts/pose/train_g1_256p.sh:
--------------------------------------------------------------------------------
1 | python train.py --name pose2body_256p_g1 \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --ngf 64 --num_D 2 \
4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 384 --fineSize 256 \
5 | --niter 5 --niter_decay 5 \
6 | --no_first_img --n_frames_total 12 --max_frames_per_gpu 4 --max_t_step 4
7 |
--------------------------------------------------------------------------------
/scripts/pose/train_g1_512p.sh:
--------------------------------------------------------------------------------
1 | python train.py --name pose2body_512p_g1 \
2 | --dataroot datasets/pose --dataset_mode pose \
3 | --input_nc 6 --n_scales_spatial 2 --ngf 64 --num_D 3 \
4 | --resize_or_crop randomScaleHeight_and_scaledCrop --loadSize 768 --fineSize 512 \
5 | --no_first_img --n_frames_total 12 --max_frames_per_gpu 2 --max_t_step 4 --add_face_disc \
6 | --niter_fix_global 3 --niter 5 --niter_decay 5 \
7 | --lr 0.0001 --load_pretrain checkpoints/pose2body_256p_g1
8 |
--------------------------------------------------------------------------------
/scripts/street/download_gdrive.py:
--------------------------------------------------------------------------------
1 | # Download code taken from Code taken from https://stackoverflow.com questions/25010369/wget-curl-large-file-from-google-drive/39225039#39225039
2 | import requests, zipfile, os
3 | def download_file_from_google_drive(id, destination):
4 | URL = "https://docs.google.com/uc?export=download"
5 | session = requests.Session()
6 | response = session.get(URL, params = { 'id' : id }, stream = True)
7 | token = get_confirm_token(response)
8 | if token:
9 | params = { 'id' : id, 'confirm' : token }
10 | response = session.get(URL, params = params, stream = True)
11 | save_response_content(response, destination)
12 | def get_confirm_token(response):
13 | for key, value in response.cookies.items():
14 | if key.startswith('download_warning'):
15 | return value
16 | return None
17 | def save_response_content(response, destination):
18 | CHUNK_SIZE = 32768
19 | with open(destination, "wb") as f:
20 | for chunk in response.iter_content(CHUNK_SIZE):
21 | if chunk: # filter out keep-alive new chunks
22 | f.write(chunk)
23 |
24 | def unzip_file(file_name, unzip_path):
25 | zip_ref = zipfile.ZipFile(file_name, 'r')
26 | zip_ref.extractall(unzip_path)
27 | zip_ref.close()
28 | os.remove(file_name)
--------------------------------------------------------------------------------
/scripts/street/download_models.py:
--------------------------------------------------------------------------------
1 | import os
2 | from download_gdrive import *
3 |
4 | file_id = '1MKtImgtnGC28EPU7Nh9DfFpHW6okNVkl'
5 | chpt_path = './checkpoints/'
6 | if not os.path.isdir(chpt_path):
7 | os.makedirs(chpt_path)
8 | destination = os.path.join(chpt_path, 'models.zip')
9 | download_file_from_google_drive(file_id, destination)
10 | unzip_file(destination, chpt_path)
--------------------------------------------------------------------------------
/scripts/street/download_models_g1.py:
--------------------------------------------------------------------------------
1 | import os
2 | from download_gdrive import *
3 |
4 | file_id = '1QoE1p3QikxNVbbTBWWRDtIspg-RcLE8y'
5 | chpt_path = './checkpoints/'
6 | if not os.path.isdir(chpt_path):
7 | os.makedirs(chpt_path)
8 | destination = os.path.join(chpt_path, 'models_g1.zip')
9 | download_file_from_google_drive(file_id, destination)
10 | unzip_file(destination, chpt_path)
11 |
--------------------------------------------------------------------------------
/scripts/street/test_2048.sh:
--------------------------------------------------------------------------------
1 | python test.py --name label2city_2048 --label_nc 35 --loadSize 2048 --n_scales_spatial 3 --use_instance --fg --use_single_G
2 |
--------------------------------------------------------------------------------
/scripts/street/test_g1_1024.sh:
--------------------------------------------------------------------------------
1 | python test.py --name label2city_1024_g1 --label_nc 35 --loadSize 1024 --n_scales_spatial 3 --use_instance --fg --n_downsample_G 2 --use_single_G
2 |
--------------------------------------------------------------------------------
/scripts/street/train_1024.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_1024 \
2 | --label_nc 35 --loadSize 1024 --n_scales_spatial 2 --num_D 3 --use_instance --fg \
3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 4 \
4 | --n_frames_total 4 --niter_step 2 \
5 | --niter_fix_global 10 --load_pretrain checkpoints/label2city_512 --lr 0.0001
6 |
--------------------------------------------------------------------------------
/scripts/street/train_2048.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_2048 \
2 | --label_nc 35 --loadSize 2048 --n_scales_spatial 3 --num_D 4 --use_instance --fg \
3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 4 \
4 | --n_frames_total 4 --niter_step 1 \
5 | --niter 5 --niter_decay 5 \
6 | --niter_fix_global 5 --load_pretrain checkpoints/label2city_1024 --lr 0.00005
--------------------------------------------------------------------------------
/scripts/street/train_2048_crop.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_2048_crop \
2 | --label_nc 35 --loadSize 2048 --fineSize 1024 --resize_or_crop crop \
3 | --n_scales_spatial 3 --num_D 4 --use_instance --fg \
4 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 4 \
5 | --n_frames_total 4 --niter_step 1 \
6 | --niter 5 --niter_decay 5 \
7 | --niter_fix_global 5 --load_pretrain checkpoints/label2city_1024 --lr 0.00005
8 |
--------------------------------------------------------------------------------
/scripts/street/train_512.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_512 \
2 | --label_nc 35 --loadSize 512 --use_instance --fg \
3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 6 \
4 | --n_frames_total 6 --max_frames_per_gpu 2
5 |
--------------------------------------------------------------------------------
/scripts/street/train_512_bs.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_512_bs \
2 | --label_nc 35 --loadSize 512 --use_instance --fg \
3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen -1 \
4 | --n_frames_total 6 --batchSize 15
5 |
--------------------------------------------------------------------------------
/scripts/street/train_512_no_fg.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_512_no_fg \
2 | --label_nc 35 --loadSize 512 --use_instance \
3 | --gpu_ids 0,1,2,3,4,5,6,7 --n_gpus_gen 6 \
4 | --n_frames_total 6 --max_frames_per_gpu 2
5 |
--------------------------------------------------------------------------------
/scripts/street/train_g1_1024.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_1024_g1 \
2 | --label_nc 35 --loadSize 896 --n_scales_spatial 3 --n_frames_D 2 \
3 | --use_instance --fg --n_downsample_G 2 --num_D 3 \
4 | --max_frames_per_gpu 1 --n_frames_total 4 \
5 | --niter_step 2 --niter_fix_global 8 --niter_decay 5 \
6 | --load_pretrain checkpoints/label2city_512_g1 --lr 0.0001
7 |
--------------------------------------------------------------------------------
/scripts/street/train_g1_256.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_256 \
2 | --label_nc 35 --loadSize 256 --use_instance --fg \
3 | --n_downsample_G 2 --num_D 1 \
4 | --max_frames_per_gpu 6 --n_frames_total 6
5 |
--------------------------------------------------------------------------------
/scripts/street/train_g1_512.sh:
--------------------------------------------------------------------------------
1 | python train.py --name label2city_512_g1 \
2 | --label_nc 35 --loadSize 512 --n_scales_spatial 2 \
3 | --use_instance --fg --n_downsample_G 2 \
4 | --max_frames_per_gpu 2 --n_frames_total 4 \
5 | --niter_step 2 --niter_fix_global 8 --niter_decay 5 \
6 | --load_pretrain checkpoints/label2city_256_g1
7 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
3 | import time
4 | import os
5 | import numpy as np
6 | from collections import OrderedDict
7 | from torch.autograd import Variable
8 | from options.test_options import TestOptions
9 | from data.data_loader import CreateDataLoader
10 | from models.models import create_model
11 | import util.util as util
12 | from util.visualizer import Visualizer
13 | from util import html
14 |
15 | opt = TestOptions().parse(save=False)
16 | opt.nThreads = 1 # test code only supports nThreads = 1
17 | opt.batchSize = 1 # test code only supports batchSize = 1
18 | opt.serial_batches = True # no shuffle
19 | opt.no_flip = True # no flip
20 | if opt.dataset_mode == 'temporal':
21 | opt.dataset_mode = 'test'
22 |
23 | data_loader = CreateDataLoader(opt)
24 | dataset = data_loader.load_data()
25 | model = create_model(opt)
26 | visualizer = Visualizer(opt)
27 | input_nc = 1 if opt.label_nc != 0 else opt.input_nc
28 |
29 | save_dir = os.path.join(opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.which_epoch))
30 | print('Doing %d frames' % len(dataset))
31 | for i, data in enumerate(dataset):
32 | if i >= opt.how_many:
33 | break
34 | if data['change_seq']:
35 | model.fake_B_prev = None
36 |
37 | _, _, height, width = data['A'].size()
38 | A = Variable(data['A']).view(1, -1, input_nc, height, width)
39 | B = Variable(data['B']).view(1, -1, opt.output_nc, height, width) if len(data['B'].size()) > 2 else None
40 | inst = Variable(data['inst']).view(1, -1, 1, height, width) if len(data['inst'].size()) > 2 else None
41 | generated = model.inference(A, B, inst)
42 |
43 | if opt.label_nc != 0:
44 | real_A = util.tensor2label(generated[1], opt.label_nc)
45 | else:
46 | c = 3 if opt.input_nc == 3 else 1
47 | real_A = util.tensor2im(generated[1][:c], normalize=False)
48 |
49 | visual_list = [('real_A', real_A),
50 | ('fake_B', util.tensor2im(generated[0].data[0]))]
51 | visuals = OrderedDict(visual_list)
52 | img_path = data['A_path']
53 | print('process image... %s' % img_path)
54 | visualizer.save_images(save_dir, visuals, img_path)
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
3 | import time
4 | import os
5 | import torch
6 | from subprocess import call
7 |
8 | from options.train_options import TrainOptions
9 | from data.data_loader import CreateDataLoader
10 | from models.models import create_model, create_optimizer, init_params, save_models, update_models
11 | import util.util as util
12 | from util.visualizer import Visualizer
13 |
14 | def train():
15 | opt = TrainOptions().parse()
16 | if opt.debug:
17 | opt.display_freq = 1
18 | opt.print_freq = 1
19 | opt.nThreads = 1
20 |
21 | ### initialize dataset
22 | data_loader = CreateDataLoader(opt)
23 | dataset = data_loader.load_data()
24 | dataset_size = len(data_loader)
25 | print('#training videos = %d' % dataset_size)
26 |
27 | ### initialize models
28 | models = create_model(opt)
29 | modelG, modelD, flowNet, optimizer_G, optimizer_D, optimizer_D_T = create_optimizer(opt, models)
30 |
31 | ### set parameters
32 | n_gpus, tG, tD, tDB, s_scales, t_scales, input_nc, output_nc, \
33 | start_epoch, epoch_iter, print_freq, total_steps, iter_path = init_params(opt, modelG, modelD, data_loader)
34 | visualizer = Visualizer(opt)
35 |
36 | ### real training starts here
37 | for epoch in range(start_epoch, opt.niter + opt.niter_decay + 1):
38 | epoch_start_time = time.time()
39 | for idx, data in enumerate(dataset, start=epoch_iter):
40 | if total_steps % print_freq == 0:
41 | iter_start_time = time.time()
42 | total_steps += opt.batchSize
43 | epoch_iter += opt.batchSize
44 |
45 | # whether to collect output images
46 | save_fake = total_steps % opt.display_freq == 0
47 | n_frames_total, n_frames_load, t_len = data_loader.dataset.init_data_params(data, n_gpus, tG)
48 | fake_B_prev_last, frames_all = data_loader.dataset.init_data(t_scales)
49 |
50 | for i in range(0, n_frames_total, n_frames_load):
51 | input_A, input_B, inst_A = data_loader.dataset.prepare_data(data, i, input_nc, output_nc)
52 |
53 | ###################################### Forward Pass ##########################
54 | ####### generator
55 | fake_B, fake_B_raw, flow, weight, real_A, real_Bp, fake_B_last = modelG(input_A, input_B, inst_A, fake_B_prev_last)
56 |
57 | ####### discriminator
58 | ### individual frame discriminator
59 | real_B_prev, real_B = real_Bp[:, :-1], real_Bp[:, 1:] # the collection of previous and current real frames
60 | flow_ref, conf_ref = flowNet(real_B, real_B_prev) # reference flows and confidences
61 | fake_B_prev = modelG.module.compute_fake_B_prev(real_B_prev, fake_B_prev_last, fake_B)
62 | fake_B_prev_last = fake_B_last
63 |
64 | losses = modelD(0, reshape([real_B, fake_B, fake_B_raw, real_A, real_B_prev, fake_B_prev, flow, weight, flow_ref, conf_ref]))
65 | losses = [ torch.mean(x) if x is not None else 0 for x in losses ]
66 | loss_dict = dict(zip(modelD.module.loss_names, losses))
67 |
68 | ### temporal discriminator
69 | # get skipped frames for each temporal scale
70 | frames_all, frames_skipped = modelD.module.get_all_skipped_frames(frames_all, \
71 | real_B, fake_B, flow_ref, conf_ref, t_scales, tD, n_frames_load, i, flowNet)
72 |
73 | # run discriminator for each temporal scale
74 | loss_dict_T = []
75 | for s in range(t_scales):
76 | if frames_skipped[0][s] is not None:
77 | losses = modelD(s+1, [frame_skipped[s] for frame_skipped in frames_skipped])
78 | losses = [ torch.mean(x) if not isinstance(x, int) else x for x in losses ]
79 | loss_dict_T.append(dict(zip(modelD.module.loss_names_T, losses)))
80 |
81 | # collect losses
82 | loss_G, loss_D, loss_D_T, t_scales_act = modelD.module.get_losses(loss_dict, loss_dict_T, t_scales)
83 |
84 | ###################################### Backward Pass #################################
85 | # update generator weights
86 | loss_backward(opt, loss_G, optimizer_G)
87 |
88 | # update individual discriminator weights
89 | loss_backward(opt, loss_D, optimizer_D)
90 |
91 | # update temporal discriminator weights
92 | for s in range(t_scales_act):
93 | loss_backward(opt, loss_D_T[s], optimizer_D_T[s])
94 |
95 | if i == 0: fake_B_first = fake_B[0, 0] # the first generated image in this sequence
96 |
97 | if opt.debug:
98 | call(["nvidia-smi", "--format=csv", "--query-gpu=memory.used,memory.free"])
99 |
100 | ############## Display results and errors ##########
101 | ### print out errors
102 | if total_steps % print_freq == 0:
103 | t = (time.time() - iter_start_time) / print_freq
104 | errors = {k: v.data.item() if not isinstance(v, int) else v for k, v in loss_dict.items()}
105 | for s in range(len(loss_dict_T)):
106 | errors.update({k+str(s): v.data.item() if not isinstance(v, int) else v for k, v in loss_dict_T[s].items()})
107 | visualizer.print_current_errors(epoch, epoch_iter, errors, t)
108 | visualizer.plot_current_errors(errors, total_steps)
109 |
110 | ### display output images
111 | if save_fake:
112 | visuals = util.save_all_tensors(opt, real_A, fake_B, fake_B_first, fake_B_raw, real_B, flow_ref, conf_ref, flow, weight, modelD)
113 | visualizer.display_current_results(visuals, epoch, total_steps)
114 |
115 | ### save latest model
116 | save_models(opt, epoch, epoch_iter, total_steps, visualizer, iter_path, modelG, modelD)
117 | if epoch_iter > dataset_size - opt.batchSize:
118 | epoch_iter = 0
119 | break
120 |
121 | # end of epoch
122 | iter_end_time = time.time()
123 | visualizer.vis_print('End of epoch %d / %d \t Time Taken: %d sec' %
124 | (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time))
125 |
126 | ### save model for this epoch and update model params
127 | save_models(opt, epoch, epoch_iter, total_steps, visualizer, iter_path, modelG, modelD, end_of_epoch=True)
128 | update_models(opt, epoch, modelG, modelD, data_loader)
129 |
130 | def loss_backward(opt, loss, optimizer):
131 | optimizer.zero_grad()
132 | if opt.fp16:
133 | from apex import amp
134 | with amp.scale_loss(loss, optimizer) as scaled_loss:
135 | scaled_loss.backward()
136 | else:
137 | loss.backward()
138 | optimizer.step()
139 |
140 | def reshape(tensors):
141 | if tensors is None: return None
142 | if isinstance(tensors, list):
143 | return [reshape(tensor) for tensor in tensors]
144 | _, _, ch, h, w = tensors.size()
145 | return tensors.contiguous().view(-1, ch, h, w)
146 |
147 | if __name__ == "__main__":
148 | train()
--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/vid2vid/2e6d13755fc2e33200e7d4c0c44f2692d6ab0898/util/__init__.py
--------------------------------------------------------------------------------
/util/html.py:
--------------------------------------------------------------------------------
1 | import dominate
2 | from dominate.tags import *
3 | import os
4 |
5 |
6 | class HTML:
7 | def __init__(self, web_dir, title, reflesh=0):
8 | self.title = title
9 | self.web_dir = web_dir
10 | self.img_dir = os.path.join(self.web_dir, 'images')
11 | if not os.path.exists(self.web_dir):
12 | os.makedirs(self.web_dir)
13 | if not os.path.exists(self.img_dir):
14 | os.makedirs(self.img_dir)
15 | # print(self.img_dir)
16 |
17 | self.doc = dominate.document(title=title)
18 | if reflesh > 0:
19 | with self.doc.head:
20 | meta(http_equiv="reflesh", content=str(reflesh))
21 |
22 | def get_image_dir(self):
23 | return self.img_dir
24 |
25 | def add_header(self, str):
26 | with self.doc:
27 | h3(str)
28 |
29 | def add_table(self, border=1):
30 | self.t = table(border=border, style="table-layout: fixed;")
31 | self.doc.add(self.t)
32 |
33 | def add_images(self, ims, txts, links, width=400, height=0):
34 | self.add_table()
35 | with self.t:
36 | with tr():
37 | for im, txt, link in zip(ims, txts, links):
38 | with td(style="word-wrap: break-word;", halign="center", valign="top"):
39 | with p():
40 | with a(href=os.path.join('images', link)):
41 | if height != 0:
42 | img(style="width:%dpx;height:%dpx" % (width, height), src=os.path.join('images', im))
43 | else:
44 | img(style="width:%dpx" % (width), src=os.path.join('images', im))
45 | br()
46 | p(txt)
47 |
48 | def save(self):
49 | html_file = '%s/index.html' % self.web_dir
50 | f = open(html_file, 'wt')
51 | f.write(self.doc.render())
52 | f.close()
53 |
54 |
55 | if __name__ == '__main__':
56 | html = HTML('web/', 'test_html')
57 | html.add_header('hello world')
58 |
59 | ims = []
60 | txts = []
61 | links = []
62 | for n in range(4):
63 | ims.append('image_%d.jpg' % n)
64 | txts.append('text_%d' % n)
65 | links.append('image_%d.jpg' % n)
66 | html.add_images(ims, txts, links)
67 | html.save()
68 |
--------------------------------------------------------------------------------
/util/image_pool.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | import torch
4 | from torch.autograd import Variable
5 | class ImagePool():
6 | def __init__(self, pool_size):
7 | self.pool_size = pool_size
8 | if self.pool_size > 0:
9 | self.num_imgs = 0
10 | self.images = []
11 |
12 | def query(self, images):
13 | if self.pool_size == 0:
14 | return images
15 | return_images = []
16 | for image in images.data:
17 | image = torch.unsqueeze(image, 0)
18 | if self.num_imgs < self.pool_size:
19 | self.num_imgs = self.num_imgs + 1
20 | self.images.append(image)
21 | return_images.append(image)
22 | else:
23 | p = random.uniform(0, 1)
24 | if p > 0.5:
25 | random_id = random.randint(0, self.pool_size-1)
26 | tmp = self.images[random_id].clone()
27 | self.images[random_id] = image
28 | return_images.append(tmp)
29 | else:
30 | return_images.append(image)
31 | return_images = Variable(torch.cat(return_images, 0))
32 | return return_images
33 |
--------------------------------------------------------------------------------
/util/util.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import torch
3 | import numpy as np
4 | from PIL import Image
5 | import inspect, re
6 | import numpy as np
7 | import os
8 | import collections
9 | from PIL import Image
10 | import cv2
11 | from collections import OrderedDict
12 |
13 | def save_all_tensors(opt, real_A, fake_B, fake_B_first, fake_B_raw, real_B, flow_ref, conf_ref, flow, weight, modelD):
14 | if opt.label_nc != 0:
15 | input_image = tensor2label(real_A, opt.label_nc)
16 | elif opt.dataset_mode == 'pose':
17 | input_image = tensor2im(real_A)
18 | if real_A.size()[2] == 6:
19 | input_image2 = tensor2im(real_A[0, -1, 3:])
20 | input_image[input_image2 != 0] = input_image2[input_image2 != 0]
21 | else:
22 | c = 3 if opt.input_nc >= 3 else 1
23 | input_image = tensor2im(real_A[0, -1, :c], normalize=False)
24 | if opt.use_instance:
25 | edges = tensor2im(real_A[0, -1, -1:], normalize=False)
26 | input_image += edges[:,:,np.newaxis]
27 |
28 | if opt.add_face_disc:
29 | ys, ye, xs, xe = modelD.module.get_face_region(real_A[0, -1:])
30 | if ys is not None:
31 | input_image[ys, xs:xe, :] = input_image[ye, xs:xe, :] = input_image[ys:ye, xs, :] = input_image[ys:ye, xe, :] = 255
32 |
33 | visual_list = [('input_image', input_image),
34 | ('fake_image', tensor2im(fake_B)),
35 | ('fake_first_image', tensor2im(fake_B_first)),
36 | ('fake_raw_image', tensor2im(fake_B_raw)),
37 | ('real_image', tensor2im(real_B)),
38 | ('flow_ref', tensor2flow(flow_ref)),
39 | ('conf_ref', tensor2im(conf_ref, normalize=False))]
40 | if flow is not None:
41 | visual_list += [('flow', tensor2flow(flow)),
42 | ('weight', tensor2im(weight, normalize=False))]
43 | visuals = OrderedDict(visual_list)
44 | return visuals
45 |
46 | # Converts a Tensor into a Numpy array
47 | # |imtype|: the desired type of the converted numpy array
48 | def tensor2im(image_tensor, imtype=np.uint8, normalize=True):
49 | if isinstance(image_tensor, list):
50 | image_numpy = []
51 | for i in range(len(image_tensor)):
52 | image_numpy.append(tensor2im(image_tensor[i], imtype, normalize))
53 | return image_numpy
54 |
55 | if isinstance(image_tensor, torch.autograd.Variable):
56 | image_tensor = image_tensor.data
57 | if len(image_tensor.size()) == 5:
58 | image_tensor = image_tensor[0, -1]
59 | if len(image_tensor.size()) == 4:
60 | image_tensor = image_tensor[0]
61 | image_tensor = image_tensor[:3]
62 | image_numpy = image_tensor.cpu().float().numpy()
63 | if normalize:
64 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
65 | else:
66 | image_numpy = np.transpose(image_numpy, (1, 2, 0)) * 255.0
67 | #image_numpy = (np.transpose(image_numpy, (1, 2, 0)) * std + mean) * 255.0
68 | image_numpy = np.clip(image_numpy, 0, 255)
69 | if image_numpy.shape[2] == 1:
70 | image_numpy = image_numpy[:,:,0]
71 | return image_numpy.astype(imtype)
72 |
73 | def tensor2label(output, n_label, imtype=np.uint8):
74 | if isinstance(output, torch.autograd.Variable):
75 | output = output.data
76 | if len(output.size()) == 5:
77 | output = output[0, -1]
78 | if len(output.size()) == 4:
79 | output = output[0]
80 | output = output.cpu().float()
81 | if output.size()[0] > 1:
82 | output = output.max(0, keepdim=True)[1]
83 | #print(output.size())
84 | output = Colorize(n_label)(output)
85 | output = np.transpose(output.numpy(), (1, 2, 0))
86 | #img = Image.fromarray(output, "RGB")
87 | return output.astype(imtype)
88 |
89 | def tensor2flow(output, imtype=np.uint8):
90 | if isinstance(output, torch.autograd.Variable):
91 | output = output.data
92 | if len(output.size()) == 5:
93 | output = output[0, -1]
94 | if len(output.size()) == 4:
95 | output = output[0]
96 | output = output.cpu().float().numpy()
97 | output = np.transpose(output, (1, 2, 0))
98 | #mag = np.max(np.sqrt(output[:,:,0]**2 + output[:,:,1]**2))
99 | #print(mag)
100 | hsv = np.zeros((output.shape[0], output.shape[1], 3), dtype=np.uint8)
101 | hsv[:, :, 0] = 255
102 | hsv[:, :, 1] = 255
103 | mag, ang = cv2.cartToPolar(output[..., 0], output[..., 1])
104 | hsv[..., 0] = ang * 180 / np.pi / 2
105 | hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
106 | rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
107 | return rgb
108 |
109 | def add_dummy_to_tensor(tensors, add_size=0):
110 | if add_size == 0 or tensors is None: return tensors
111 | if isinstance(tensors, list):
112 | return [add_dummy_to_tensor(tensor, add_size) for tensor in tensors]
113 |
114 | if isinstance(tensors, torch.Tensor):
115 | dummy = torch.zeros_like(tensors)[:add_size]
116 | tensors = torch.cat([dummy, tensors])
117 | return tensors
118 |
119 | def remove_dummy_from_tensor(tensors, remove_size=0):
120 | if remove_size == 0 or tensors is None: return tensors
121 | if isinstance(tensors, list):
122 | return [remove_dummy_from_tensor(tensor, remove_size) for tensor in tensors]
123 |
124 | if isinstance(tensors, torch.Tensor):
125 | tensors = tensors[remove_size:]
126 | return tensors
127 |
128 | def save_image(image_numpy, image_path):
129 | image_pil = Image.fromarray(image_numpy)
130 | image_pil.save(image_path)
131 |
132 | def print_numpy(x, val=True, shp=False):
133 | x = x.astype(np.float64)
134 | if shp:
135 | print('shape,', x.shape)
136 | if val:
137 | x = x.flatten()
138 | print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
139 | np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
140 |
141 | def mkdirs(paths):
142 | if isinstance(paths, list) and not isinstance(paths, str):
143 | for path in paths:
144 | mkdir(path)
145 | else:
146 | mkdir(paths)
147 |
148 | def mkdir(path):
149 | if not os.path.exists(path):
150 | os.makedirs(path)
151 |
152 | def uint82bin(n, count=8):
153 | """returns the binary of integer n, count refers to amount of bits"""
154 | return ''.join([str((n >> y) & 1) for y in range(count-1, -1, -1)])
155 |
156 | def labelcolormap(N):
157 | if N == 35: # Cityscapes train
158 | cmap = np.array([( 0, 0, 0), ( 0, 0, 0), ( 0, 0, 0), ( 0, 0, 0), ( 0, 0, 0), (111, 74, 0), ( 81, 0, 81),
159 | (128, 64,128), (244, 35,232), (250,170,160), (230,150,140), ( 70, 70, 70), (102,102,156), (190,153,153),
160 | (180,165,180), (150,100,100), (150,120, 90), (153,153,153), (153,153,153), (250,170, 30), (220,220, 0),
161 | (107,142, 35), (152,251,152), ( 70,130,180), (220, 20, 60), (255, 0, 0), ( 0, 0,142), ( 0, 0, 70),
162 | ( 0, 60,100), ( 0, 0, 90), ( 0, 0,110), ( 0, 80,100), ( 0, 0,230), (119, 11, 32), ( 0, 0,142)],
163 | dtype=np.uint8)
164 | elif N == 20: # Cityscapes eval
165 | cmap = np.array([(128, 64,128), (244, 35,232), ( 70, 70, 70), (102,102,156), (190,153,153), (153,153,153), (250,170, 30),
166 | (220,220, 0), (107,142, 35), (152,251,152), ( 70,130,180), (220, 20, 60), (255, 0, 0), ( 0, 0,142),
167 | ( 0, 0, 70), ( 0, 60,100), ( 0, 80,100), ( 0, 0,230), (119, 11, 32), ( 0, 0, 0)],
168 | dtype=np.uint8)
169 | else:
170 | cmap = np.zeros((N, 3), dtype=np.uint8)
171 | for i in range(N):
172 | r, g, b = 0, 0, 0
173 | id = i
174 | for j in range(7):
175 | str_id = uint82bin(id)
176 | r = r ^ (np.uint8(str_id[-1]) << (7-j))
177 | g = g ^ (np.uint8(str_id[-2]) << (7-j))
178 | b = b ^ (np.uint8(str_id[-3]) << (7-j))
179 | id = id >> 3
180 | cmap[i, 0], cmap[i, 1], cmap[i, 2] = r, g, b
181 | return cmap
182 |
183 | def colormap(n):
184 | cmap = np.zeros([n, 3]).astype(np.uint8)
185 | for i in np.arange(n):
186 | r, g, b = np.zeros(3)
187 |
188 | for j in np.arange(8):
189 | r = r + (1 << (7-j))*((i & (1 << (3*j))) >> (3*j))
190 | g = g + (1 << (7-j))*((i & (1 << (3*j+1))) >> (3*j+1))
191 | b = b + (1 << (7-j))*((i & (1 << (3*j+2))) >> (3*j+2))
192 |
193 | cmap[i, :] = np.array([r, g, b])
194 |
195 | return cmap
196 |
197 | class Colorize(object):
198 | def __init__(self, n=35):
199 | self.cmap = labelcolormap(n)
200 | self.cmap = torch.from_numpy(self.cmap[:n])
201 |
202 | def __call__(self, gray_image):
203 | size = gray_image.size()
204 | color_image = torch.ByteTensor(3, size[1], size[2]).fill_(0)
205 |
206 | for label in range(0, len(self.cmap)):
207 | mask = (label == gray_image[0]).cpu()
208 | color_image[0][mask] = self.cmap[label][0]
209 | color_image[1][mask] = self.cmap[label][1]
210 | color_image[2][mask] = self.cmap[label][2]
211 |
212 | return color_image
--------------------------------------------------------------------------------
/util/visualizer.py:
--------------------------------------------------------------------------------
1 | ### Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
2 | ### Licensed under the CC BY-NC-SA 4.0 license (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
3 | import numpy as np
4 | import os
5 | import time
6 | from . import util
7 | from . import html
8 | import scipy.misc
9 | try:
10 | from StringIO import StringIO # Python 2.7
11 | except ImportError:
12 | from io import BytesIO # Python 3.x
13 |
14 | class Visualizer():
15 | def __init__(self, opt):
16 | self.opt = opt
17 | self.tf_log = opt.tf_log
18 | self.use_html = opt.isTrain and not opt.no_html
19 | self.win_size = opt.display_winsize
20 | self.name = opt.name
21 | if self.tf_log:
22 | import tensorflow as tf
23 | self.tf = tf
24 | self.log_dir = os.path.join(opt.checkpoints_dir, opt.name, 'logs')
25 | self.writer = tf.summary.FileWriter(self.log_dir)
26 |
27 | if self.use_html:
28 | self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web')
29 | self.img_dir = os.path.join(self.web_dir, 'images')
30 | print('create web directory %s...' % self.web_dir)
31 | util.mkdirs([self.web_dir, self.img_dir])
32 | self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt')
33 | with open(self.log_name, "a") as log_file:
34 | now = time.strftime("%c")
35 | log_file.write('================ Training Loss (%s) ================\n' % now)
36 |
37 | # |visuals|: dictionary of images to display or save
38 | def display_current_results(self, visuals, epoch, step):
39 | if self.tf_log: # show images in tensorboard output
40 | img_summaries = []
41 | for label, image_numpy in visuals.items():
42 | # Write the image to a string
43 | try:
44 | s = StringIO()
45 | except:
46 | s = BytesIO()
47 | scipy.misc.toimage(image_numpy).save(s, format="jpeg")
48 | # Create an Image object
49 | img_sum = self.tf.Summary.Image(encoded_image_string=s.getvalue(), height=image_numpy.shape[0], width=image_numpy.shape[1])
50 | # Create a Summary value
51 | img_summaries.append(self.tf.Summary.Value(tag=label, image=img_sum))
52 |
53 | # Create and write Summary
54 | summary = self.tf.Summary(value=img_summaries)
55 | self.writer.add_summary(summary, step)
56 |
57 | if self.use_html: # save images to a html file
58 | for label, image_numpy in visuals.items():
59 | if isinstance(image_numpy, list):
60 | for i in range(len(image_numpy)):
61 | img_path = os.path.join(self.img_dir, 'epoch%.3d_%s_%d.jpg' % (epoch, label, i))
62 | util.save_image(image_numpy[i], img_path)
63 | else:
64 | img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.jpg' % (epoch, label))
65 | util.save_image(image_numpy, img_path)
66 |
67 | # update website
68 | webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, reflesh=1)
69 | for n in range(epoch, 0, -1):
70 | webpage.add_header('epoch [%d]' % n)
71 | ims = []
72 | txts = []
73 | links = []
74 |
75 | for label, image_numpy in visuals.items():
76 | if isinstance(image_numpy, list):
77 | for i in range(len(image_numpy)):
78 | img_path = 'epoch%.3d_%s_%d.jpg' % (n, label, i)
79 | ims.append(img_path)
80 | txts.append(label+str(i))
81 | links.append(img_path)
82 | else:
83 | img_path = 'epoch%.3d_%s.jpg' % (n, label)
84 | ims.append(img_path)
85 | txts.append(label)
86 | links.append(img_path)
87 | if len(ims) < 6:
88 | webpage.add_images(ims, txts, links, width=self.win_size)
89 | else:
90 | num = int(round(len(ims)/2.0))
91 | webpage.add_images(ims[:num], txts[:num], links[:num], width=self.win_size)
92 | webpage.add_images(ims[num:], txts[num:], links[num:], width=self.win_size)
93 | webpage.save()
94 |
95 | # errors: dictionary of error labels and values
96 | def plot_current_errors(self, errors, step):
97 | if self.tf_log:
98 | for tag, value in errors.items():
99 | summary = self.tf.Summary(value=[self.tf.Summary.Value(tag=tag, simple_value=value)])
100 | self.writer.add_summary(summary, step)
101 |
102 | # errors: same format as |errors| of plotCurrentErrors
103 | def print_current_errors(self, epoch, i, errors, t):
104 | message = '(epoch: %d, iters: %d, time: %.3f) ' % (epoch, i, t)
105 | for k, v in sorted(errors.items()):
106 | if v != 0:
107 | message += '%s: %.3f ' % (k, v)
108 |
109 | print(message)
110 | with open(self.log_name, "a") as log_file:
111 | log_file.write('%s\n' % message)
112 |
113 | # save image to the disk
114 | def save_images(self, image_dir, visuals, image_path, webpage=None):
115 | dirname = os.path.basename(os.path.dirname(image_path[0]))
116 | image_dir = os.path.join(image_dir, dirname)
117 | util.mkdir(image_dir)
118 | name = os.path.basename(image_path[0])
119 | name = os.path.splitext(name)[0]
120 |
121 | if webpage is not None:
122 | webpage.add_header(name)
123 | ims, txts, links = [], [], []
124 |
125 | for label, image_numpy in visuals.items():
126 | save_ext = 'png' if 'real_A' in label and self.opt.label_nc != 0 else 'jpg'
127 | image_name = '%s_%s.%s' % (label, name, save_ext)
128 | save_path = os.path.join(image_dir, image_name)
129 | util.save_image(image_numpy, save_path)
130 |
131 | if webpage is not None:
132 | ims.append(image_name)
133 | txts.append(label)
134 | links.append(image_name)
135 | if webpage is not None:
136 | webpage.add_images(ims, txts, links, width=self.win_size)
137 |
138 | def vis_print(self, message):
139 | print(message)
140 | with open(self.log_name, "a") as log_file:
141 | log_file.write('%s\n' % message)
142 |
143 |
--------------------------------------------------------------------------------