├── flownet2 └── readme.txt ├── README.md ├── LICENSE ├── how_to_use.py └── Flownet2Controller.py /flownet2/readme.txt: -------------------------------------------------------------------------------- 1 | Place Nvidia's FlowNet2 project files in this directory 2 | Located at = https://github.com/NVIDIA/flownet2-pytorch -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FlowNet2-Pytorch-EasyToUse-Wrapper 2 | This repository provides a wrapper with very easy usage for Nvidia's FlowNet2 Implementation 3 | 4 | # Test on Cyberpunk 2077 Cinematic 5 | [![IMAGE ALT TEXT](http://img.youtube.com/vi/6GXBBtCxihM/0.jpg)](https://www.youtube.com/watch?v=6GXBBtCxihM "Video Title") 6 | 7 | # Installation 8 | * Step 1 : 9 | Start with Nvidia's Installation guide located at https://github.com/NVIDIA/flownet2-pytorch 10 | 11 | * Step 2 : 12 | Place all the files under flownet2-pytorch-master to flownet2 folder. (Or you can simply rename flownet2-pytorch-master directory to flownet2 and place it directly on root directory) 13 | 14 | # Usage 15 | Please refer to how_to_use.py for code examples 16 | 17 | ```python 18 | 19 | # For predicting flow given 2 images 20 | flow_controller.predict(im1, im2) 21 | 22 | # For converting flow matrix(output of predict method) into image 23 | flow_controller.convert_flow_to_image(flow) 24 | 25 | # For converting videos located on disk to optical flow videos 26 | flow_controller.convert_video_to_flow("cp77cinematic.mp4", "output", downsample_res=(320, 320)) 27 | ``` 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Eren 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /how_to_use.py: -------------------------------------------------------------------------------- 1 | from Flownet2Controller import FlowController 2 | import cv2 3 | import numpy as np 4 | 5 | 6 | def main(): 7 | # This assumes FlowNet2_checkpoint.pth.tar is placed directly under flownet2 folder 8 | # You can pass in model_path to the constructor if it is located elsewhere 9 | flow_controller = FlowController() 10 | 11 | flow_controller = FlowController("./flownet2/FlowNet2_checkpoint.pth.tar") 12 | 13 | # Prediction given 2 images 14 | im1 = cv2.imread("im1.png") 15 | im2 = cv2.imread("im2.png") 16 | 17 | flow = flow_controller.predict(im1, im2) 18 | # Important note : All predictions are made at maximum viable resolution to ensure prediction quality is high, 19 | # but this comes at a massive hit to performance, if you want fast executions I suggest downsampling images first 20 | 21 | # Can convert flow to image using built in method 22 | flow_image = flow_controller.convert_flow_to_image(flow) 23 | 24 | cv2.imshow("Random flow image", flow_image) 25 | cv2.waitKey() 26 | cv2.destroyAllWindows() 27 | 28 | # Can also convert video's to their optical flow variants using following method 29 | # Use raw=true argument for only saving the optical flow video 30 | # Set downsample_res if you want to process video faster 31 | flow_controller.convert_video_to_flow("cp77cinematic.mp4", "output", downsample_res=(320, 320)) 32 | 33 | 34 | if __name__ == "__main__": 35 | main() 36 | -------------------------------------------------------------------------------- /Flownet2Controller.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from torch.autograd import Variable 4 | import argparse 5 | import numpy as np 6 | from os.path import * 7 | 8 | import cv2 9 | 10 | from flownet2 import models, losses, datasets 11 | from flownet2.utils import tools 12 | 13 | parser = argparse.ArgumentParser() 14 | 15 | parser.add_argument('--start_epoch', type=int, default=1) 16 | parser.add_argument('--total_epochs', type=int, default=10000) 17 | parser.add_argument('--batch_size', '-b', type=int, default=8, help="Batch size") 18 | parser.add_argument('--train_n_batches', type=int, default=-1, 19 | help='Number of min-batches per epoch. If < 0, it will be determined by training_dataloader') 20 | parser.add_argument('--crop_size', type=int, nargs='+', default=[256, 256], 21 | help="Spatial dimension to crop training samples for training") 22 | parser.add_argument('--gradient_clip', type=float, default=None) 23 | parser.add_argument('--schedule_lr_frequency', type=int, default=0, help='in number of iterations (0 for no schedule)') 24 | parser.add_argument('--schedule_lr_fraction', type=float, default=10) 25 | parser.add_argument("--rgb_max", type=float, default=255.) 26 | 27 | parser.add_argument('--number_workers', '-nw', '--num_workers', type=int, default=8) 28 | parser.add_argument('--number_gpus', '-ng', type=int, default=-1, help='number of GPUs to use') 29 | parser.add_argument('--no_cuda', action='store_true') 30 | 31 | parser.add_argument('--seed', type=int, default=1) 32 | parser.add_argument('--name', default='run', type=str, help='a name to append to the save directory') 33 | parser.add_argument('--save', '-s', default='./work', type=str, help='directory for saving') 34 | 35 | parser.add_argument('--validation_frequency', type=int, default=5, help='validate every n epochs') 36 | parser.add_argument('--validation_n_batches', type=int, default=-1) 37 | parser.add_argument('--render_validation', action='store_true', 38 | help='run inference (save flows to file) and every validation_frequency epoch') 39 | 40 | parser.add_argument('--inference', action='store_true') 41 | parser.add_argument('--inference_size', type=int, nargs='+', default=[-1, -1], 42 | help='spatial size divisible by 64. default (-1,-1) - largest possible valid size would be used') 43 | parser.add_argument('--inference_batch_size', type=int, default=1) 44 | parser.add_argument('--inference_n_batches', type=int, default=-1) 45 | parser.add_argument('--save_flow', action='store_true', help='save predicted flows to file') 46 | 47 | parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') 48 | parser.add_argument('--log_frequency', '--summ_iter', type=int, default=1, help="Log every n batches") 49 | 50 | parser.add_argument('--skip_training', action='store_true') 51 | parser.add_argument('--skip_validation', action='store_true') 52 | 53 | parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') 54 | parser.add_argument('--fp16_scale', type=float, default=1024., 55 | help='Loss scaling, positive power of 2 values can improve fp16 convergence.') 56 | 57 | tools.add_arguments_for_module(parser, models, argument_for_class='model', default='FlowNet2') 58 | 59 | tools.add_arguments_for_module(parser, losses, argument_for_class='loss', default='L1Loss') 60 | 61 | tools.add_arguments_for_module(parser, torch.optim, argument_for_class='optimizer', default='Adam', 62 | skip_params=['params']) 63 | 64 | tools.add_arguments_for_module(parser, datasets, argument_for_class='training_dataset', default='MpiSintelFinal', 65 | skip_params=['is_cropped'], 66 | parameter_defaults={'root': './MPI-Sintel/flow/training'}) 67 | 68 | tools.add_arguments_for_module(parser, datasets, argument_for_class='validation_dataset', default='MpiSintelClean', 69 | skip_params=['is_cropped'], 70 | parameter_defaults={'root': './MPI-Sintel/flow/training', 71 | 'replicates': 1}) 72 | 73 | tools.add_arguments_for_module(parser, datasets, argument_for_class='inference_dataset', default='MpiSintelClean', 74 | skip_params=['is_cropped'], 75 | parameter_defaults={'root': './MPI-Sintel/flow/training', 76 | 'replicates': 1}) 77 | 78 | args = parser.parse_args() 79 | if args.number_gpus < 0 : args.number_gpus = torch.cuda.device_count() 80 | parser.add_argument('--IGNORE', action='store_true') 81 | defaults = vars(parser.parse_args(['--IGNORE'])) 82 | 83 | args.model_class = tools.module_to_dict(models)[args.model] 84 | args.optimizer_class = tools.module_to_dict(torch.optim)[args.optimizer] 85 | args.loss_class = tools.module_to_dict(losses)[args.loss] 86 | 87 | args.training_dataset_class = tools.module_to_dict(datasets)[args.training_dataset] 88 | args.validation_dataset_class = tools.module_to_dict(datasets)[args.validation_dataset] 89 | args.inference_dataset_class = tools.module_to_dict(datasets)[args.inference_dataset] 90 | 91 | args.cuda = not args.no_cuda and torch.cuda.is_available() 92 | args.log_file = join(args.save, 'args.txt') 93 | 94 | args.grads = {} 95 | 96 | if args.inference: 97 | args.skip_validation = True 98 | args.skip_training = True 99 | args.total_epochs = 1 100 | args.inference_dir = "{}/inference".format(args.save) 101 | 102 | 103 | args.effective_batch_size = args.batch_size * args.number_gpus 104 | args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus 105 | args.effective_number_workers = args.number_workers * args.number_gpus 106 | gpuargs = {'num_workers': args.effective_number_workers, 107 | 'pin_memory': True, 108 | 'drop_last' : True} if args.cuda else {} 109 | inf_gpuargs = gpuargs.copy() 110 | inf_gpuargs['num_workers'] = args.number_workers 111 | 112 | 113 | class FlowController: 114 | def __init__(self, model_path="flownet2/FlowNet2_checkpoint.pth.tar"): 115 | self.model = models.FlowNet2(args) 116 | checkpoint = torch.load(model_path) 117 | self.model.load_state_dict(checkpoint['state_dict']) 118 | self.model.eval() 119 | 120 | if torch.cuda.is_available(): 121 | self.model.cuda() 122 | 123 | self.is_cropped = False 124 | 125 | @staticmethod 126 | def convert_flow_to_image(flow): 127 | image_shape = flow.shape[0:2] + (3,) 128 | 129 | hsv = np.zeros(shape=image_shape, dtype=np.uint8) 130 | hsv[..., 1] = 255 131 | mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1]) 132 | hsv[..., 0] = ang * 180 / np.pi / 2 133 | normalized_mag = np.asarray(np.clip(mag*40, 0, 255), dtype=np.uint8) 134 | hsv[..., 2] = normalized_mag 135 | rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) 136 | rgb = np.asarray(rgb, np.uint8) 137 | return rgb 138 | 139 | def convert_video_to_flow(self, video_path, output_path="out", downsample_res=None, raw_save=False): 140 | video = cv2.VideoCapture(video_path) 141 | ret, prev_frame = video.read() 142 | if downsample_res is not None: 143 | prev_frame = cv2.resize(prev_frame, downsample_res) 144 | 145 | view_shape = list(prev_frame.shape[0:2]) 146 | if not raw_save: 147 | view_shape[0] *= 2 148 | 149 | out_video = cv2.VideoWriter(output_path+".avi", cv2.VideoWriter_fourcc('M','J','P','G'), 24, tuple(view_shape)) 150 | 151 | while video.isOpened(): 152 | ret, frame = video.read() 153 | if ret == True: 154 | if downsample_res is not None: 155 | frame = cv2.resize(frame, downsample_res) 156 | opt_flow = self.predict(frame, prev_frame) 157 | opt_flow_image = self.convert_flow_to_image(opt_flow) 158 | prev_frame = frame 159 | 160 | joint_image = np.append(frame, opt_flow_image, axis=1) 161 | cv2.imshow("FlowNet2", joint_image) 162 | 163 | if raw_save: 164 | out_video.write(opt_flow_image) 165 | else: 166 | out_video.write(joint_image) 167 | 168 | if cv2.waitKey(1) & 0xFF == ord('q'): 169 | break 170 | 171 | else: 172 | break 173 | 174 | video.release() 175 | out_video.release() 176 | cv2.destroyAllWindows() 177 | 178 | @staticmethod 179 | def preprocess_frames(frame1, frame2): 180 | assert frame1.shape == frame2.shape, "Shapes of both frames must be same" 181 | 182 | # Downscale image resolution to closest factor for 64, if smaller than 64 than upscale to 64 183 | # This part basically calculates which resolution it should scale the image to 184 | process_resolution = tuple([max(64 * (frame1.shape[i] // 64), 64) for i in range(2)]) 185 | images = [cv2.resize(frame1, process_resolution), cv2.resize(frame2, process_resolution)] 186 | images = np.expand_dims(np.array(images).transpose(3, 0, 1, 2), axis=0) 187 | images = torch.from_numpy(images.astype(np.float32)) 188 | 189 | return [images], [torch.zeros(images.size()[0:1] + (2,) + images.size()[-2:])] 190 | 191 | def predict(self, image1, image2): 192 | (data, target) = self.preprocess_frames(image1, image2) 193 | if args.cuda: 194 | data, target = [d.cuda() for d in data], [t.cuda() for t in target] 195 | data, target = [Variable(d) for d in data], [Variable(t) for t in target] 196 | 197 | with torch.no_grad(): 198 | output = self.model(data[0]) 199 | 200 | flow = cv2.resize(output.data.cpu().numpy()[0].transpose(1, 2, 0), (image1.shape[1], image1.shape[0])) 201 | return flow 202 | --------------------------------------------------------------------------------