├── flownet2
    └── readme.txt
├── README.md
├── LICENSE
├── how_to_use.py
└── Flownet2Controller.py


/flownet2/readme.txt:
--------------------------------------------------------------------------------
1 | Place Nvidia's FlowNet2 project files in this directory
2 | Located at = https://github.com/NVIDIA/flownet2-pytorch


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # FlowNet2-Pytorch-EasyToUse-Wrapper
 2 | This repository provides a wrapper with very easy usage for Nvidia's FlowNet2 Implementation
 3 | 
 4 | # Test on Cyberpunk 2077 Cinematic
 5 | [![IMAGE ALT TEXT](http://img.youtube.com/vi/6GXBBtCxihM/0.jpg)](https://www.youtube.com/watch?v=6GXBBtCxihM "Video Title")
 6 | 
 7 | # Installation
 8 | * Step 1 :  
 9 | Start with Nvidia's Installation guide located at https://github.com/NVIDIA/flownet2-pytorch
10 | 
11 | * Step 2 :  
12 | Place all the files under flownet2-pytorch-master to flownet2 folder. (Or you can simply rename flownet2-pytorch-master directory to flownet2 and place it directly on root directory)
13 | 
14 | # Usage
15 | Please refer to how_to_use.py for code examples
16 | 
17 | ```python
18 | 
19 |  # For predicting flow given 2 images
20 | flow_controller.predict(im1, im2)
21 | 
22 | # For converting flow matrix(output of predict method) into image 
23 | flow_controller.convert_flow_to_image(flow) 
24 | 
25 | # For converting videos located on disk to optical flow videos
26 | flow_controller.convert_video_to_flow("cp77cinematic.mp4", "output", downsample_res=(320, 320)) 
27 | ```
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Eren
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/how_to_use.py:
--------------------------------------------------------------------------------
 1 | from Flownet2Controller import FlowController
 2 | import cv2
 3 | import numpy as np
 4 | 
 5 | 
 6 | def main():
 7 |     # This assumes FlowNet2_checkpoint.pth.tar is placed directly under flownet2 folder
 8 |     # You can pass in model_path to the constructor if it is located elsewhere
 9 |     flow_controller = FlowController()
10 | 
11 |     flow_controller = FlowController("./flownet2/FlowNet2_checkpoint.pth.tar")
12 | 
13 |     # Prediction given 2 images
14 |     im1 = cv2.imread("im1.png")
15 |     im2 = cv2.imread("im2.png")
16 | 
17 |     flow = flow_controller.predict(im1, im2)
18 |     # Important note : All predictions are made at maximum viable resolution to ensure prediction quality is high,
19 |     # but this comes at a massive hit to performance, if you want fast executions I suggest downsampling images first
20 | 
21 |     # Can convert flow to image using built in method
22 |     flow_image = flow_controller.convert_flow_to_image(flow)
23 | 
24 |     cv2.imshow("Random flow image", flow_image)
25 |     cv2.waitKey()
26 |     cv2.destroyAllWindows()
27 | 
28 |     # Can also convert video's to their optical flow variants using following method
29 |     # Use raw=true argument for only saving the optical flow video
30 |     # Set downsample_res if you want to process video faster
31 |     flow_controller.convert_video_to_flow("cp77cinematic.mp4", "output", downsample_res=(320, 320))
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     main()
36 | 


--------------------------------------------------------------------------------
/Flownet2Controller.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | from torch.autograd import Variable
  4 | import argparse
  5 | import numpy as np
  6 | from os.path import *
  7 | 
  8 | import cv2
  9 | 
 10 | from flownet2 import models, losses, datasets
 11 | from flownet2.utils import tools
 12 | 
 13 | parser = argparse.ArgumentParser()
 14 | 
 15 | parser.add_argument('--start_epoch', type=int, default=1)
 16 | parser.add_argument('--total_epochs', type=int, default=10000)
 17 | parser.add_argument('--batch_size', '-b', type=int, default=8, help="Batch size")
 18 | parser.add_argument('--train_n_batches', type=int, default=-1,
 19 |                     help='Number of min-batches per epoch. If < 0, it will be determined by training_dataloader')
 20 | parser.add_argument('--crop_size', type=int, nargs='+', default=[256, 256],
 21 |                     help="Spatial dimension to crop training samples for training")
 22 | parser.add_argument('--gradient_clip', type=float, default=None)
 23 | parser.add_argument('--schedule_lr_frequency', type=int, default=0, help='in number of iterations (0 for no schedule)')
 24 | parser.add_argument('--schedule_lr_fraction', type=float, default=10)
 25 | parser.add_argument("--rgb_max", type=float, default=255.)
 26 | 
 27 | parser.add_argument('--number_workers', '-nw', '--num_workers', type=int, default=8)
 28 | parser.add_argument('--number_gpus', '-ng', type=int, default=-1, help='number of GPUs to use')
 29 | parser.add_argument('--no_cuda', action='store_true')
 30 | 
 31 | parser.add_argument('--seed', type=int, default=1)
 32 | parser.add_argument('--name', default='run', type=str, help='a name to append to the save directory')
 33 | parser.add_argument('--save', '-s', default='./work', type=str, help='directory for saving')
 34 | 
 35 | parser.add_argument('--validation_frequency', type=int, default=5, help='validate every n epochs')
 36 | parser.add_argument('--validation_n_batches', type=int, default=-1)
 37 | parser.add_argument('--render_validation', action='store_true',
 38 |                     help='run inference (save flows to file) and every validation_frequency epoch')
 39 | 
 40 | parser.add_argument('--inference', action='store_true')
 41 | parser.add_argument('--inference_size', type=int, nargs='+', default=[-1, -1],
 42 |                     help='spatial size divisible by 64. default (-1,-1) - largest possible valid size would be used')
 43 | parser.add_argument('--inference_batch_size', type=int, default=1)
 44 | parser.add_argument('--inference_n_batches', type=int, default=-1)
 45 | parser.add_argument('--save_flow', action='store_true', help='save predicted flows to file')
 46 | 
 47 | parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
 48 | parser.add_argument('--log_frequency', '--summ_iter', type=int, default=1, help="Log every n batches")
 49 | 
 50 | parser.add_argument('--skip_training', action='store_true')
 51 | parser.add_argument('--skip_validation', action='store_true')
 52 | 
 53 | parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).')
 54 | parser.add_argument('--fp16_scale', type=float, default=1024.,
 55 |                     help='Loss scaling, positive power of 2 values can improve fp16 convergence.')
 56 | 
 57 | tools.add_arguments_for_module(parser, models, argument_for_class='model', default='FlowNet2')
 58 | 
 59 | tools.add_arguments_for_module(parser, losses, argument_for_class='loss', default='L1Loss')
 60 | 
 61 | tools.add_arguments_for_module(parser, torch.optim, argument_for_class='optimizer', default='Adam',
 62 |                                skip_params=['params'])
 63 | 
 64 | tools.add_arguments_for_module(parser, datasets, argument_for_class='training_dataset', default='MpiSintelFinal',
 65 |                                skip_params=['is_cropped'],
 66 |                                parameter_defaults={'root': './MPI-Sintel/flow/training'})
 67 | 
 68 | tools.add_arguments_for_module(parser, datasets, argument_for_class='validation_dataset', default='MpiSintelClean',
 69 |                                skip_params=['is_cropped'],
 70 |                                parameter_defaults={'root': './MPI-Sintel/flow/training',
 71 |                                                    'replicates': 1})
 72 | 
 73 | tools.add_arguments_for_module(parser, datasets, argument_for_class='inference_dataset', default='MpiSintelClean',
 74 |                                skip_params=['is_cropped'],
 75 |                                parameter_defaults={'root': './MPI-Sintel/flow/training',
 76 |                                                    'replicates': 1})
 77 | 
 78 | args = parser.parse_args()
 79 | if args.number_gpus < 0 : args.number_gpus = torch.cuda.device_count()
 80 | parser.add_argument('--IGNORE',  action='store_true')
 81 | defaults = vars(parser.parse_args(['--IGNORE']))
 82 | 
 83 | args.model_class = tools.module_to_dict(models)[args.model]
 84 | args.optimizer_class = tools.module_to_dict(torch.optim)[args.optimizer]
 85 | args.loss_class = tools.module_to_dict(losses)[args.loss]
 86 | 
 87 | args.training_dataset_class = tools.module_to_dict(datasets)[args.training_dataset]
 88 | args.validation_dataset_class = tools.module_to_dict(datasets)[args.validation_dataset]
 89 | args.inference_dataset_class = tools.module_to_dict(datasets)[args.inference_dataset]
 90 | 
 91 | args.cuda = not args.no_cuda and torch.cuda.is_available()
 92 | args.log_file = join(args.save, 'args.txt')
 93 | 
 94 | args.grads = {}
 95 | 
 96 | if args.inference:
 97 |     args.skip_validation = True
 98 |     args.skip_training = True
 99 |     args.total_epochs = 1
100 |     args.inference_dir = "{}/inference".format(args.save)
101 | 
102 | 
103 | args.effective_batch_size = args.batch_size * args.number_gpus
104 | args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus
105 | args.effective_number_workers = args.number_workers * args.number_gpus
106 | gpuargs = {'num_workers': args.effective_number_workers,
107 |            'pin_memory': True,
108 |            'drop_last' : True} if args.cuda else {}
109 | inf_gpuargs = gpuargs.copy()
110 | inf_gpuargs['num_workers'] = args.number_workers
111 | 
112 | 
113 | class FlowController:
114 |     def __init__(self, model_path="flownet2/FlowNet2_checkpoint.pth.tar"):
115 |         self.model = models.FlowNet2(args)
116 |         checkpoint = torch.load(model_path)
117 |         self.model.load_state_dict(checkpoint['state_dict'])
118 |         self.model.eval()
119 | 
120 |         if torch.cuda.is_available():
121 |             self.model.cuda()
122 | 
123 |         self.is_cropped = False
124 | 
125 |     @staticmethod
126 |     def convert_flow_to_image(flow):
127 |         image_shape = flow.shape[0:2] + (3,)
128 | 
129 |         hsv = np.zeros(shape=image_shape, dtype=np.uint8)
130 |         hsv[..., 1] = 255
131 |         mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
132 |         hsv[..., 0] = ang * 180 / np.pi / 2
133 |         normalized_mag = np.asarray(np.clip(mag*40, 0, 255), dtype=np.uint8)
134 |         hsv[..., 2] = normalized_mag
135 |         rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
136 |         rgb = np.asarray(rgb, np.uint8)
137 |         return rgb
138 | 
139 |     def convert_video_to_flow(self, video_path, output_path="out", downsample_res=None, raw_save=False):
140 |         video = cv2.VideoCapture(video_path)
141 |         ret, prev_frame = video.read()
142 |         if downsample_res is not None:
143 |             prev_frame = cv2.resize(prev_frame, downsample_res)
144 | 
145 |         view_shape = list(prev_frame.shape[0:2])
146 |         if not raw_save:
147 |             view_shape[0] *= 2
148 | 
149 |         out_video = cv2.VideoWriter(output_path+".avi", cv2.VideoWriter_fourcc('M','J','P','G'), 24, tuple(view_shape))
150 | 
151 |         while video.isOpened():
152 |             ret, frame = video.read()
153 |             if ret == True:
154 |                 if downsample_res is not None:
155 |                     frame = cv2.resize(frame, downsample_res)
156 |                 opt_flow = self.predict(frame, prev_frame)
157 |                 opt_flow_image = self.convert_flow_to_image(opt_flow)
158 |                 prev_frame = frame
159 | 
160 |                 joint_image = np.append(frame, opt_flow_image, axis=1)
161 |                 cv2.imshow("FlowNet2", joint_image)
162 | 
163 |                 if raw_save:
164 |                     out_video.write(opt_flow_image)
165 |                 else:
166 |                     out_video.write(joint_image)
167 | 
168 |                 if cv2.waitKey(1) & 0xFF == ord('q'):
169 |                     break
170 | 
171 |             else:
172 |                 break
173 | 
174 |         video.release()
175 |         out_video.release()
176 |         cv2.destroyAllWindows()
177 | 
178 |     @staticmethod
179 |     def preprocess_frames(frame1, frame2):
180 |         assert frame1.shape == frame2.shape, "Shapes of both frames must be same"
181 | 
182 |         # Downscale image resolution to closest factor for 64, if smaller than 64 than upscale to 64
183 |         # This part basically calculates which resolution it should scale the image to
184 |         process_resolution = tuple([max(64 * (frame1.shape[i] // 64), 64) for i in range(2)])
185 |         images = [cv2.resize(frame1, process_resolution), cv2.resize(frame2, process_resolution)]
186 |         images = np.expand_dims(np.array(images).transpose(3, 0, 1, 2), axis=0)
187 |         images = torch.from_numpy(images.astype(np.float32))
188 | 
189 |         return [images], [torch.zeros(images.size()[0:1] + (2,) + images.size()[-2:])]
190 | 
191 |     def predict(self, image1, image2):
192 |         (data, target) = self.preprocess_frames(image1, image2)
193 |         if args.cuda:
194 |             data, target = [d.cuda() for d in data], [t.cuda() for t in target]
195 |         data, target = [Variable(d) for d in data], [Variable(t) for t in target]
196 | 
197 |         with torch.no_grad():
198 |             output = self.model(data[0])
199 | 
200 |         flow = cv2.resize(output.data.cpu().numpy()[0].transpose(1, 2, 0), (image1.shape[1], image1.shape[0]))
201 |         return flow
202 | 


--------------------------------------------------------------------------------