├── figures
├── result1.png
├── result2.png
├── teaser.png
└── teaser2.png
├── Dockerfile
├── datasets
├── __init__.py
├── VideoInterp.py
└── data_transforms.py
├── models
├── __init__.py
├── model_utils.py
├── CycleHJSuperSloMo.py
└── HJSuperSloMo.py
├── LICENSE
├── utils.py
├── parser.py
├── eval.py
├── README.md
└── train.py
/figures/result1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/unsupervised-video-interpolation/HEAD/figures/result1.png
--------------------------------------------------------------------------------
/figures/result2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/unsupervised-video-interpolation/HEAD/figures/result2.png
--------------------------------------------------------------------------------
/figures/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/unsupervised-video-interpolation/HEAD/figures/teaser.png
--------------------------------------------------------------------------------
/figures/teaser2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/unsupervised-video-interpolation/HEAD/figures/teaser2.png
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # ===========
2 | # base images
3 | # ===========
4 | FROM nvcr.io/nvidia/pytorch:19.04-py3
5 |
6 |
7 | # ===============
8 | # system packages
9 | # ===============
10 | RUN apt-get update
11 | RUN apt-get install -y bash-completion \
12 | emacs \
13 | ffmpeg \
14 | git \
15 | graphviz \
16 | htop \
17 | libopenexr-dev \
18 | openssh-server \
19 | rsync \
20 | wget \
21 | curl
22 |
23 |
24 | # ===========
25 | # latest apex
26 | # ===========
27 | RUN pip uninstall -y apex
28 | RUN git clone https://github.com/NVIDIA/apex.git ~/apex && \
29 | cd ~/apex && \
30 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
31 |
32 |
33 | # ============
34 | # pip packages
35 | # ============
36 | RUN pip install --upgrade pip
37 | RUN pip install --upgrade ffmpeg==1.4
38 | RUN pip install --upgrade imageio==2.6.1
39 | RUN pip install --upgrade natsort==6.2.0
40 | RUN pip install --upgrade numpy==1.18.1
41 | RUN pip install --upgrade pillow==6.1
42 | RUN pip install --upgrade scikit-image==0.16.2
43 | RUN pip install --upgrade tensorboardX==2.0
44 | RUN pip install --upgrade torchvision==0.4.2
45 | RUN pip install --upgrade tqdm==4.41.1
46 |
--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # *****************************************************************************
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | # * Redistributions of source code must retain the above copyright
7 | # notice, this list of conditions and the following disclaimer.
8 | # * Redistributions in binary form must reproduce the above copyright
9 | # notice, this list of conditions and the following disclaimer in the
10 | # documentation and/or other materials provided with the distribution.
11 | # * Neither the name of the NVIDIA CORPORATION nor the
12 | # names of its contributors may be used to endorse or promote products
13 | # derived from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | #
26 | # *****************************************************************************
27 | from .VideoInterp import *
28 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | # *****************************************************************************
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | # * Redistributions of source code must retain the above copyright
7 | # notice, this list of conditions and the following disclaimer.
8 | # * Redistributions in binary form must reproduce the above copyright
9 | # notice, this list of conditions and the following disclaimer in the
10 | # documentation and/or other materials provided with the distribution.
11 | # * Neither the name of the NVIDIA CORPORATION nor the
12 | # names of its contributors may be used to endorse or promote products
13 | # derived from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | #
26 | # *****************************************************************************
27 | from .HJSuperSloMo import *
28 | from .CycleHJSuperSloMo import *
29 |
--------------------------------------------------------------------------------
/models/model_utils.py:
--------------------------------------------------------------------------------
1 | # *****************************************************************************
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | # * Redistributions of source code must retain the above copyright
7 | # notice, this list of conditions and the following disclaimer.
8 | # * Redistributions in binary form must reproduce the above copyright
9 | # notice, this list of conditions and the following disclaimer in the
10 | # documentation and/or other materials provided with the distribution.
11 | # * Neither the name of the NVIDIA CORPORATION nor the
12 | # names of its contributors may be used to endorse or promote products
13 | # derived from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | #
26 | # *****************************************************************************
27 | from __future__ import division
28 | from __future__ import print_function
29 |
30 | import numpy as np
31 | import torch
32 | import torch.nn as nn
33 | import torch.nn.functional as F
34 |
35 | # The baseline Super SloMo relies on torch.nn.functional.grid_sample to implement a warping module.
36 | # To ensure that our results replicate published accuracy numbers, we also implement a Resample2D layer
37 | # in a similar way, completely with torch tensors, as is done in:
38 | # https://github.com/avinashpaliwal/Super-SloMo/blob/master/model.py#L213
39 | #
40 | # However, for faster training, we suggest to use our CUDA kernels for Resample2D, here:
41 | # https://github.com/NVIDIA/flownet2-pytorch/blob/master/networks/resample2d_package/resample2d.py
42 | #
43 | # from flownet2_pytorch.networks.resample2d_package.resample2d import Resample2d
44 | #
45 |
46 |
47 | class MyResample2D(nn.Module):
48 | def __init__(self, width, height):
49 | super(MyResample2D, self).__init__()
50 |
51 | self.width = width
52 | self.height = height
53 |
54 | # make grids for horizontal and vertical displacements
55 | grid_w, grid_h = np.meshgrid(np.arange(width), np.arange(height))
56 | grid_w, grid_h = grid_w.reshape((1,) + grid_w.shape), grid_h.reshape((1,) + grid_h.shape)
57 |
58 | self.register_buffer("grid_w", torch.tensor(grid_w, requires_grad=False, dtype=torch.float32))
59 | self.register_buffer("grid_h", torch.tensor(grid_h, requires_grad=False, dtype=torch.float32))
60 |
61 | def forward(self, im, uv):
62 |
63 | # Get relative displacement
64 | u = uv[:, 0, ...]
65 | v = uv[:, 1, ...]
66 |
67 | # Calculate absolute displacement along height and width axis -> (batch_size, height, width)
68 | ww = self.grid_w.expand_as(u) + u
69 | hh = self.grid_h.expand_as(v) + v
70 |
71 | # Normalize indices to [-1,1]
72 | ww = 2 * ww / (self.width - 1) - 1
73 | hh = 2 * hh / (self.height - 1) - 1
74 |
75 | # Form a grid of shape (batch_size, height, width, 2)
76 | norm_grid_wh = torch.stack((ww, hh), dim=-1)
77 |
78 | # Perform a resample
79 | reampled_im = torch.nn.functional.grid_sample(im, norm_grid_wh)
80 |
81 | return reampled_im
82 |
83 |
84 | class DummyModel(nn.Module):
85 | def __init__(self):
86 | super(DummyModel, self).__init__()
87 |
88 | def forward(self, inputs, target_index):
89 | return {}, inputs['image'][1], inputs['image'][1]
90 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Nvidia Source Code License (1-Way Commercial) – NVIDIA CONFIDENTIAL
2 |
3 | 1. Definitions
4 |
5 | “Licensor” means any person or entity that distributes its Work.
6 | “Software” means the original work of authorship made available under this License.
7 | “Work” means the Software and any additions to or derivative works of the Software that are made available under this License.
8 | “Nvidia Processors” means any central processing unit (CPU), graphics processing unit (GPU), field-programmable gate array (FPGA), application-specific integrated circuit (ASIC) or any combination thereof designed, made, sold, or provided by Nvidia or its affiliates.
9 | The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work.
10 | Works, including the Software, are “made available” under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License.
11 |
12 | 2. License Grants
13 |
14 | 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form.
15 |
16 | 3. Limitations
17 |
18 | 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work.
19 |
20 | 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself.
21 |
22 | 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use non-commercially. The Work or derivative works thereof may be used or intended for use by Nvidia or it’s affiliates commercially or non-commercially. As used herein, “non-commercially” means for research or evaluation purposes only.
23 |
24 | 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grants in Sections 2.1 and 2.2) will terminate immediately.
25 |
26 | 3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License.
27 |
28 | 3.6 Termination. If you violate any term of this License, then your rights under this License (including the grants in Sections 2.1 and 2.2) will terminate immediately.
29 |
30 | 4. Disclaimer of Warranty.
31 |
32 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE.
33 |
34 | 5. Limitation of Liability.
35 |
36 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
37 |
38 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | import subprocess
4 | import time
5 | from inspect import isclass
6 | import numpy as np
7 |
8 |
9 | class TimerBlock:
10 | def __init__(self, title):
11 | print(("{}".format(title)))
12 |
13 | def __enter__(self):
14 | self.start = time.clock()
15 | return self
16 |
17 | def __exit__(self, exc_type, exc_value, traceback):
18 | self.end = time.clock()
19 | self.interval = self.end - self.start
20 |
21 | if exc_type is not None:
22 | self.log("Operation failed\n")
23 | else:
24 | self.log("Operation finished\n")
25 |
26 | def log(self, string):
27 | duration = time.clock() - self.start
28 | units = 's'
29 | if duration > 60:
30 | duration = duration / 60.
31 | units = 'm'
32 | print(" [{:.3f}{}] {}".format(duration, units, string), flush=True)
33 |
34 |
35 | def module_to_dict(module, exclude=[]):
36 | return dict([(x, getattr(module, x)) for x in dir(module)
37 | if isclass(getattr(module, x))
38 | and x not in exclude
39 | and getattr(module, x) not in exclude])
40 |
41 |
42 | # AverageMeter: adapted from https://github.com/pytorch/examples/blob/master/imagenet/main.py
43 | class AverageMeter(object):
44 | """Computes and stores the average and current value"""
45 | def __init__(self):
46 | self.reset()
47 |
48 | def reset(self):
49 | self.val = 0
50 | self.avg = 0
51 | self.sum = 0
52 | self.count = 0
53 |
54 | def update(self, val, n=1):
55 | self.val = val
56 | self.sum += val * n
57 | self.count += n
58 | self.avg = self.sum / self.count
59 |
60 |
61 | # creat_pipe: adapted from https://stackoverflow.com/questions/23709893/popen-write-operation-on-closed-file-images-to-video-using-ffmpeg/23709937#23709937
62 | # start an ffmpeg pipe for creating RGB8 for color images or FFV1 for depth
63 | # NOTE: this is REALLY lossy and not optimal for HDR data. when it comes time to train
64 | # on HDR data, you'll need to figure out the way to save to pix_fmt=rgb48 or something
65 | # similar
66 | def create_pipe(pipe_filename, width, height, frame_rate=60, quite=True):
67 | # default extension and tonemapper
68 | pix_fmt = 'rgb24'
69 | out_fmt = 'yuv420p'
70 | codec = 'h264'
71 |
72 | command = ['ffmpeg',
73 | '-threads', '2', # number of threads to start
74 | '-y', # (optional) overwrite output file if it exists
75 | '-f', 'rawvideo', # input format
76 | '-vcodec', 'rawvideo', # input codec
77 | '-s', str(width) + 'x' + str(height), # size of one frame
78 | '-pix_fmt', pix_fmt, # input pixel format
79 | '-r', str(frame_rate), # frames per second
80 | '-i', '-', # The imput comes from a pipe
81 | '-an', # Tells FFMPEG not to expect any audio
82 | '-codec:v', codec, # output codec
83 | '-crf', '18',
84 | # compression quality for h264 (maybe h265 too?) - http://slhck.info/video/2017/02/24/crf-guide.html
85 | # '-compression_level', '10', # compression level for libjpeg if doing lossy depth
86 | '-strict', '-2', # experimental 16 bit support nessesary for gray16le
87 | '-pix_fmt', out_fmt, # output pixel format
88 | '-s', str(width) + 'x' + str(height), # output size
89 | pipe_filename]
90 | cmd = ' '.join(command)
91 | if not quite:
92 | print('openning a pip ....\n' + cmd + '\n')
93 |
94 | # open the pipe, and ignore stdout and stderr output
95 | DEVNULL = open(os.devnull, 'wb')
96 | return subprocess.Popen(command, stdin=subprocess.PIPE, stdout=DEVNULL, stderr=DEVNULL, close_fds=True)
97 |
98 |
99 |
100 | def get_pred_flag(height, width):
101 | pred_flag = np.ones((height, width, 3), dtype=np.uint8)
102 | pred_values = np.zeros((height, width, 3), dtype=np.uint8)
103 |
104 | hstart = int((192. / 1200) * height)
105 | wstart = int((224. / 1920) * width)
106 | h_step = int((24. / 1200) * height)
107 | w_step = int((32. / 1920) * width)
108 |
109 | pred_flag[hstart:hstart + h_step, -wstart + 0 * w_step:-wstart + 1 * w_step, :] = np.asarray([0, 0, 0])
110 | pred_flag[hstart:hstart + h_step, -wstart + 1 * w_step:-wstart + 2 * w_step, :] = np.asarray([0, 0, 0])
111 | pred_flag[hstart:hstart + h_step, -wstart + 2 * w_step:-wstart + 3 * w_step, :] = np.asarray([0, 0, 0])
112 |
113 | pred_values[hstart:hstart + h_step, -wstart + 0 * w_step:-wstart + 1 * w_step, :] = np.asarray([0, 0, 255])
114 | pred_values[hstart:hstart + h_step, -wstart + 1 * w_step:-wstart + 2 * w_step, :] = np.asarray([0, 255, 0])
115 | pred_values[hstart:hstart + h_step, -wstart + 2 * w_step:-wstart + 3 * w_step, :] = np.asarray([255, 0, 0])
116 | return pred_flag, pred_values
117 |
118 |
119 | def copy_arguments(main_dict, main_filepath='', save_dir='./'):
120 | pycmd = 'python3 ' + main_filepath + ' \\\n'
121 | _main_dict = main_dict.copy()
122 | _main_dict['--name'] = _main_dict['--name']+'_replicate'
123 | for k in _main_dict.keys():
124 | if 'batchNorm' in k:
125 | pycmd += ' ' + k + ' ' + str(_main_dict[k]) + ' \\\n'
126 | elif type(_main_dict[k]) == bool and _main_dict[k]:
127 | pycmd += ' ' + k + ' \\\n'
128 | elif type(_main_dict[k]) == list:
129 | pycmd += ' ' + k + ' ' + ' '.join([str(f) for f in _main_dict[k]]) + ' \\\n'
130 | elif type(_main_dict[k]) != bool:
131 | pycmd += ' ' + k + ' ' + str(_main_dict[k]) + ' \\\n'
132 | pycmd = '#!/bin/bash\n' + pycmd[:-2]
133 | job_script = os.path.join(save_dir, 'job.sh')
134 |
135 | file = open(job_script, 'w')
136 | file.write(pycmd)
137 | file.close()
138 |
139 | return
140 |
141 |
142 | def block_print():
143 | sys.stdout = open(os.devnull, 'w')
144 |
--------------------------------------------------------------------------------
/datasets/VideoInterp.py:
--------------------------------------------------------------------------------
1 | # *****************************************************************************
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | # * Redistributions of source code must retain the above copyright
7 | # notice, this list of conditions and the following disclaimer.
8 | # * Redistributions in binary form must reproduce the above copyright
9 | # notice, this list of conditions and the following disclaimer in the
10 | # documentation and/or other materials provided with the distribution.
11 | # * Neither the name of the NVIDIA CORPORATION nor the
12 | # names of its contributors may be used to endorse or promote products
13 | # derived from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | #
26 | # *****************************************************************************
27 | from __future__ import division
28 | from __future__ import print_function
29 |
30 | import os
31 | import natsort
32 | import numpy as np
33 | from imageio import imread
34 | import torch
35 | from torch.utils import data
36 |
37 |
38 | class VideoInterp(data.Dataset):
39 | def __init__(self, args=None, root='', num_interp=7, sample_rate=1, step_size=1,
40 | is_training=False, transform=None):
41 |
42 | self.num_interp = num_interp
43 | self.sample_rate = sample_rate
44 | self.step_size = step_size
45 | self.transform = transform
46 | self.is_training = is_training
47 | self.transform = transform
48 |
49 | self.start_index = args.start_index
50 | self.stride = args.stride
51 | self.crop_size = args.crop_size
52 |
53 | # argument sanity check
54 | assert (os.path.exists(root)), "Invalid path to input dataset."
55 | assert self.num_interp > 0, "num_interp must be at least 1"
56 | assert self.step_size > 0, "step_size must be at least 1"
57 |
58 | if self.is_training:
59 | self.start_index = 0
60 |
61 | # collect, colors, motion vectors, and depth
62 | self.ref = self.collect_filelist(root)
63 |
64 | # calculate total number of unique sub-sequences
65 | def calc_subseq_len(n):
66 | return (n - max(1, (self.num_interp + 1) * self.sample_rate) - 1) // self.step_size + 1
67 | self.counts = [calc_subseq_len(len(el)) for el in self.ref]
68 |
69 | self.total = np.sum(self.counts)
70 | self.cum_sum = list(np.cumsum([0] + [el for el in self.counts]))
71 |
72 | def collect_filelist(self, root):
73 | include_ext = [".png", ".jpg", "jpeg", ".bmp"]
74 | # collect subfolders, excluding hidden files, but following symlinks
75 | dirs = [x[0] for x in os.walk(root, followlinks=True) if not x[0].startswith('.')]
76 |
77 | # naturally sort, both dirs and individual images, while skipping hidden files
78 | dirs = natsort.natsorted(dirs)
79 |
80 | datasets = [
81 | [os.path.join(fdir, el) for el in natsort.natsorted(os.listdir(fdir))
82 | if os.path.isfile(os.path.join(fdir, el))
83 | and not el.startswith('.')
84 | and any([el.endswith(ext) for ext in include_ext])]
85 | for fdir in dirs
86 | ]
87 |
88 | return [el for el in datasets if el]
89 |
90 | def get_sample_indices(self, index, tar_index=None):
91 | if self.is_training:
92 | sample_indices = [index, index + self.sample_rate * tar_index, index +
93 | self.sample_rate * (self.num_interp + 1)]
94 | else:
95 | sample_indices = [index + i * self.sample_rate for i in range(0, self.num_interp + 2)]
96 | if self.sample_rate == 0:
97 | sample_indices[-1] += 1
98 | return sample_indices
99 |
100 | def pad_images(self, images):
101 | height, width, _ = images[0].shape
102 | image_count = len(images)
103 | # Pad images with zeros if it is not evenly divisible by args.stride (property of model)
104 | if (height % self.stride) != 0:
105 | new_height = (height // self.stride + 1) * self.stride
106 | for i in range(image_count):
107 | images[i] = np.pad(images[i], ((0, new_height - height), (0, 0), (0, 0)), 'constant',
108 | constant_values=(0, 0))
109 |
110 | if (width % self.stride) != 0:
111 | new_width = (width // self.stride + 1) * self.stride
112 | for i in range(image_count):
113 | images[i] = np.pad(images[i], ((0, 0), (0, new_width - width), (0, 0)), 'constant',
114 | constant_values=(0, 0))
115 | return images
116 |
117 | def __len__(self):
118 | return self.total
119 |
120 | def __getitem__(self, index):
121 | # Adjust index
122 | index = len(self) + index if index < 0 else index
123 | index = index + self.start_index
124 |
125 | dataset_index = np.searchsorted(self.cum_sum, index + 1)
126 | index = self.step_size * (index - self.cum_sum[np.maximum(0, dataset_index - 1)])
127 |
128 | image_list = self.ref[dataset_index - 1]
129 |
130 | # target index, subset of range(1,num_interp+1)
131 | tar_index = 1 + torch.randint(0, max(1, self.num_interp), (1,)).item()
132 | input_indices = self.get_sample_indices(index, tar_index)
133 |
134 | # reverse subsequence for augmentation with a probability of 0.5
135 | if self.is_training and torch.randint(0, 2, (1,)).item():
136 | input_indices = input_indices[::-1]
137 | tar_index = self.num_interp - tar_index + 1
138 |
139 | image_files = [image_list[i] for i in input_indices]
140 |
141 | # Read images from file
142 | images = [imread(image_file)[:, :, :3] for image_file in image_files]
143 | image_shape = images[0].shape
144 |
145 | # Apply data augmentation if defined.
146 | if self.transform:
147 | input_images, target_images = [images[0], images[-1]], images[1:-1]
148 | input_images, target_images = self.transform(input_images, target_images)
149 | images = [input_images[0]] + target_images + [input_images[-1]]
150 |
151 | # Pad images with zeros, so they fit evenly to model arch in forward pass.
152 | padded_images = self.pad_images(images)
153 |
154 | input_images = [torch.from_numpy(np.ascontiguousarray(tmp.transpose(2, 0, 1).astype(np.float32))).float() for
155 | tmp in padded_images]
156 |
157 | output_dict = {
158 | 'image': input_images, 'tindex': tar_index, 'ishape': image_shape[:2], 'input_files': image_files
159 | }
160 | # print (' '.join([os.path.basename(f) for f in image_files]))
161 | return output_dict
162 |
163 |
164 | class CycleVideoInterp(VideoInterp):
165 | def __init__(self, args=None, root='', num_interp=7, sample_rate=1, step_size=1,
166 | is_training=False, transform=None):
167 | super(CycleVideoInterp, self).__init__(args=args, root=root, num_interp=num_interp, sample_rate=sample_rate,
168 | step_size=step_size, is_training=is_training, transform=transform)
169 |
170 | # # Adjust indices
171 | if self.is_training:
172 | self.counts = [el - 1 for el in self.counts]
173 | self.total = np.sum(self.counts)
174 | self.cum_sum = list(np.cumsum([0] + [el for el in self.counts]))
175 |
176 | def get_sample_indices(self, index, tar_index=None):
177 | if self.is_training:
178 | offset = max(1, self.sample_rate) + self.sample_rate * self.num_interp
179 | sample_indices = [index, index + offset, index + 2 * offset]
180 | else:
181 | sample_indices = [index + i * self.sample_rate for i in range(0, self.num_interp + 2)]
182 | if self.sample_rate == 0:
183 | sample_indices[-1] += 1
184 | return sample_indices
185 |
--------------------------------------------------------------------------------
/parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # *****************************************************************************
3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | # * Redistributions of source code must retain the above copyright
8 | # notice, this list of conditions and the following disclaimer.
9 | # * Redistributions in binary form must reproduce the above copyright
10 | # notice, this list of conditions and the following disclaimer in the
11 | # documentation and/or other materials provided with the distribution.
12 | # * Neither the name of the NVIDIA CORPORATION nor the
13 | # names of its contributors may be used to endorse or promote products
14 | # derived from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | #
27 | # *****************************************************************************
28 | import argparse
29 | import models
30 |
31 | # Collect all available model classes
32 | model_names = sorted(el for el in models.__dict__
33 | if not el.startswith("__") and callable(models.__dict__[el]))
34 |
35 | """
36 | Reda, Fitsum A., et al. "Unsupervised Video Interpolation Using Cycle Consistency."
37 | arXiv preprint arXiv:1906.05928 (2019).
38 |
39 | Jiang, Huaizu, et al. "Super slomo: High quality estimation of multiple
40 | intermediate frames for video interpolation." arXiv pre-print arXiv:1712.00080 (2017).
41 | """
42 |
43 | parser = argparse.ArgumentParser(description="A PyTorch Implementation of Unsupervised Video Interpolation Using "
44 | "Cycle Consistency")
45 |
46 | parser.add_argument('--model', metavar='MODEL', default='HJSuperSloMo',
47 | choices=model_names,
48 | help='model architecture: ' +
49 | ' | '.join(model_names) +
50 | ' (default: HJSuperSloMo)')
51 | parser.add_argument('-s', '--save', '--save_root',
52 | default='./result_folder', type=str,
53 | help='Path of the output folder',
54 | metavar='SAVE_PATH')
55 | parser.add_argument('--torch_home', default='./.torch', type=str,
56 | metavar='TORCH_HOME',
57 | help='Path to save pre-trained models from torchvision')
58 | parser.add_argument('-n', '--name', default='trial_0', type=str, metavar='EXPERIMENT_NAME',
59 | help='Name of experiment folder.')
60 | parser.add_argument('--dataset', default='VideoInterp', type=str, metavar='TRAINING_DATALOADER_CLASS',
61 | help='Specify training dataset class for loading (Default: VideoInterp)')
62 | parser.add_argument('--resume', default='', type=str, metavar='CHECKPOINT_PATH',
63 | help='path to checkpoint file (default: none)')
64 |
65 | # Resources
66 | parser.add_argument('--distributed_backend', default='nccl', type=str, metavar='DISTRIBUTED_BACKEND',
67 | help='backend used for communication between processes.')
68 | parser.add_argument('-j', '--workers', default=4, type=int,
69 | help='number of data loader workers (default: 10)')
70 | parser.add_argument('-g', '--gpus', type=int, default=-1,
71 | help='number of GPUs to use')
72 | parser.add_argument('--fp16', action='store_true', help='Enable mixed-precision training.')
73 |
74 | # Learning rate parameters.
75 | parser.add_argument('--lr', '--learning_rate', default=0.0001, type=float,
76 | metavar='LR', help='initial learning rate')
77 | parser.add_argument('--lr_scheduler', default='MultiStepLR', type=str,
78 | metavar='LR_Scheduler', help='Scheduler for learning' +
79 | ' rate (only ExponentialLR and MultiStepLR supported.')
80 | parser.add_argument('--lr_gamma', default=0.1, type=float,
81 | help='learning rate will be multiplied by this gamma')
82 | parser.add_argument('--lr_step', default=200, type=int,
83 | help='stepsize of changing the learning rate')
84 | parser.add_argument('--lr_milestones', type=int, nargs='+',
85 | default=[250, 450], help="Spatial dimension to " +
86 | "crop training samples for training")
87 | # Gradient.
88 | parser.add_argument('--clip_gradients', default=-1.0, type=float,
89 | help='If positive, clip the gradients by this value.')
90 |
91 | # Optimization hyper-parameters
92 | parser.add_argument('-b', '--batch_size', default=4, type=int, metavar='BATCH_SIZE',
93 | help='mini-batch per gpu size (default : 4)')
94 | parser.add_argument('--wd', '--weight_decay', default=0.001, type=float, metavar='WEIGHT_DECAY',
95 | help='weight_decay (default = 0.001)')
96 | parser.add_argument('--seed', default=1234, type=int, metavar="SEED",
97 | help='seed for initializing training. ')
98 | parser.add_argument('--optimizer', default='Adam', type=str, metavar='OPTIMIZER',
99 | help='Specify optimizer from torch.optim (Default: Adam)')
100 | parser.add_argument('--mean_pix', nargs='+', type=float, metavar="RGB_MEAN",
101 | default=[109.93, 109.167, 101.455],
102 | help='mean pixel values carried over from superslomo (default: [109.93, 109.167, 101.455])')
103 | parser.add_argument('--print_freq', default=100, type=int, metavar="PRINT_FREQ",
104 | help='frequency of printing training status (default: 100)')
105 | parser.add_argument('--save_freq', type=int, default=20, metavar="SAVE_FREQ",
106 | help='frequency of saving intermediate models, in epoches (default: 20)')
107 | parser.add_argument('--start_epoch', type=int, default=-1,
108 | help="Set epoch number during resuming")
109 | parser.add_argument('--epochs', default=500, type=int, metavar="EPOCHES",
110 | help='number of total epochs to run (default: 500)')
111 |
112 | # Training sequence, supports a single sequence for now
113 | parser.add_argument('--train_file', required=False, metavar="TRAINING_FILE",
114 | help='training file (default : Required)')
115 | parser.add_argument('--crop_size', type=int, nargs='+', default=[704, 704], metavar="CROP_SIZE",
116 | help="Spatial dimension to crop training samples for training (default : [704, 704])")
117 | parser.add_argument('--train_n_batches', default=-1, type=int, metavar="TRAIN_N_BATCHES",
118 | help="Limit the number of minibatch iterations per epoch. Used for debugging purposes. \
119 | (default : -1, means use all available mini-batches")
120 | parser.add_argument('--sample_rate', type=int, default=1,
121 | help='number of frames to skip when sampling input1, {intermediate}, and input2 \
122 | (default=1, ie. we treat consecutive frames for input1 and intermediate, and input2 frames.)')
123 | parser.add_argument('--step_size', type=int, default=-1, metavar="STEP_INTERP",
124 | help='number of frames to skip from one mini-batch to the next mini-batch \
125 | (default -1, means step_size = num_interp + 1')
126 | parser.add_argument('--num_interp', default=7, type=int, metavar="NUM_INTERP",
127 | help='number intermediate frames to interpolate (default : 7)')
128 |
129 |
130 | # Validation sequence, supports a single sequence for now
131 | parser.add_argument('--val_file', metavar="VALIDATION_FILE",
132 | help='validation file (default : None)')
133 | parser.add_argument('--val_batch_size', type=int, default=1,
134 | help="Batch size to use for validation.")
135 | parser.add_argument('--val_n_batches', default=-1, type=int,
136 | help="Limit the number of minibatch iterations per epoch. Used for debugging purposes.")
137 | parser.add_argument('--video_fps', type=int, default=30,
138 | help="Render predicted video with a specified frame rate")
139 | parser.add_argument('--initial_eval', action='store_true', help='Perform initial evaluation before training.')
140 | parser.add_argument("--start_index", type=int, default=0, metavar="VAL_START_INDEX",
141 | help="Index to start running validation (default : 0)")
142 | parser.add_argument("--val_sample_rate", type=int, default=1, metavar="VAL_START_INDEX",
143 | help='number of frames to skip when sampling input1, {intermediate}, and input2 (default=1, \
144 | ie. we treat consecutive frames for input1 and intermediate, and input2 frames.)')
145 | parser.add_argument('--val_step_size', type=int, default=-1, metavar="VAL_STEP_INTERP",
146 | help='number of frames to skip from one mini-batch to the next mini-batch \
147 | (default -1, means step_size = num_interp + 1')
148 | parser.add_argument('--val_num_interp', type=int, default=1,
149 | help='number of intermediate frames we want to interpolate for validation. (default: 1)')
150 |
151 | # Misc: undersample large sequences (--step_size), compute flow after downscale (--flow_scale)
152 | parser.add_argument('--flow_scale', type=float, default=1.,
153 | help="Flow scale (default: 1.) for robust interpolation in high resolution images.")
154 | parser.add_argument('--skip_aug', action='store_true', help='Skips expensive geometric or photometric augmentations.')
155 | parser.add_argument('--teacher_weight', type=float, default=-1.,
156 | help="Teacher or Pseudo Supervised Loss (PSL)'s weight of contribution to total loss.")
157 |
158 | parser.add_argument('--apply_vidflag', action='store_true', help='Apply applying the BRG flag to interpolated frames.')
159 |
160 | parser.add_argument('--write_video', action='store_true', help='save video to \'args.save/args.name.mp4\'.')
161 | parser.add_argument('--write_images', action='store_true',
162 | help='write to folder \'args.save/args.name\' prediction and ground-truth images.')
163 | parser.add_argument('--stride', type=int, default=64,
164 | help='the largest factor a model reduces spatial size of inputs during a forward pass.')
165 | parser.add_argument('--post_fix', default='Proposed', type=str,
166 | help='tag for predicted frames (default: \'proposed\')')
167 |
168 | # Required for torch distributed launch
169 | parser.add_argument('--local_rank', default=None, type=int,
170 | help='Torch Distributed')
171 |
--------------------------------------------------------------------------------
/models/CycleHJSuperSloMo.py:
--------------------------------------------------------------------------------
1 | # *****************************************************************************
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | # * Redistributions of source code must retain the above copyright
7 | # notice, this list of conditions and the following disclaimer.
8 | # * Redistributions in binary form must reproduce the above copyright
9 | # notice, this list of conditions and the following disclaimer in the
10 | # documentation and/or other materials provided with the distribution.
11 | # * Neither the name of the NVIDIA CORPORATION nor the
12 | # names of its contributors may be used to endorse or promote products
13 | # derived from this software without specific prior written permission.
14 | #
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | #
26 | # *****************************************************************************
27 | from __future__ import division
28 | from __future__ import print_function
29 | import torch
30 | import torch.nn.functional as F
31 | from .model_utils import MyResample2D, DummyModel
32 | from .HJSuperSloMo import HJSuperSloMo
33 |
34 |
35 | class CycleHJSuperSloMo(HJSuperSloMo):
36 | def __init__(self, args, mean_pix=[109.93, 109.167, 101.455]):
37 | super(CycleHJSuperSloMo, self).__init__(args=args, mean_pix=mean_pix)
38 |
39 | if args.resume:
40 | self.teacher = HJSuperSloMo(args)
41 | checkpoint = torch.load(args.resume, map_location='cpu')
42 | self.teacher.load_state_dict(checkpoint['state_dict'], strict=False)
43 | for param in self.teacher.parameters():
44 | param.requires_grad = False
45 |
46 | self.teacher_weight = 0.8
47 | if 'teacher_weight' in args and args.teacher_weight >= 0:
48 | self.teacher_weight = args.teacher_weight
49 | else:
50 | self.teacher = DummyModel()
51 | self.teacher_weight = 0.
52 |
53 | def network_output(self, inputs, target_index):
54 |
55 | im1, im2 = inputs
56 |
57 | # Estimate bi-directional optical flows between input low FPS frame pairs
58 | # Downsample images for robust intermediate flow estimation
59 | ds_im1 = F.interpolate(im1, scale_factor=1./self.scale, mode='bilinear', align_corners=False)
60 | ds_im2 = F.interpolate(im2, scale_factor=1./self.scale, mode='bilinear', align_corners=False)
61 |
62 | uvf, bottleneck_out, uvb = self.make_flow_prediction(torch.cat((ds_im1, ds_im2), dim=1))
63 |
64 | uvf = self.scale * F.interpolate(uvf, scale_factor=self.scale, mode='bilinear', align_corners=False)
65 | uvb = self.scale * F.interpolate(uvb, scale_factor=self.scale, mode='bilinear', align_corners=False)
66 | bottleneck_out = F.interpolate(bottleneck_out, scale_factor=self.scale, mode='bilinear', align_corners=False)
67 |
68 | t = self.tlinespace[target_index]
69 | t = t.reshape(t.shape[0], 1, 1, 1)
70 |
71 | uvb_t_raw = - (1 - t) * t * uvf + t * t * uvb
72 | uvf_t_raw = (1 - t) * (1 - t) * uvf - (1 - t) * t * uvb
73 |
74 | im1w_raw = self.resample2d(im1, uvb_t_raw) # im1w_raw
75 | im2w_raw = self.resample2d(im2, uvf_t_raw) # im2w_raw
76 |
77 | # Perform intermediate bi-directional flow refinement
78 | uv_t_data = torch.cat((im1, im2, im1w_raw, uvb_t_raw, im2w_raw, uvf_t_raw), dim=1)
79 | uvf_t, uvb_t, t_vis_map = self.make_flow_interpolation(uv_t_data, bottleneck_out)
80 |
81 | uvb_t = uvb_t_raw + uvb_t # uvb_t
82 | uvf_t = uvf_t_raw + uvf_t # uvf_t
83 |
84 | im1w = self.resample2d(im1, uvb_t) # im1w
85 | im2w = self.resample2d(im2, uvf_t) # im2w
86 |
87 | # Compute final intermediate frame via weighted blending
88 | alpha1 = (1 - t) * t_vis_map
89 | alpha2 = t * (1 - t_vis_map)
90 | denorm = alpha1 + alpha2 + 1e-10
91 | im_t_out = (alpha1 * im1w + alpha2 * im2w) / denorm
92 |
93 | return im_t_out, uvb, uvf
94 |
95 | def network_eval(self, inputs, target_index):
96 | _, _, height, width = inputs[0].shape
97 | self.resample2d = MyResample2D(width, height).cuda()
98 |
99 | # Normalize inputs
100 | im1, im_target, im2 = [(im - self.mean_pix) for im in inputs]
101 |
102 | im_t_out, uvb, uvf = self.network_output([im1, im2], target_index)
103 |
104 | # Calculate losses
105 | losses = {}
106 | losses['pix_loss'] = self.L1_loss(im_t_out, im_target)
107 |
108 | im_t_out_features = self.vgg16_features(im_t_out / 255.)
109 | im_target_features = self.vgg16_features(im_target / 255.)
110 | losses['vgg16_loss'] = self.L2_loss(im_t_out_features, im_target_features)
111 |
112 | losses['warp_loss'] = self.L1_loss(self.resample2d(im1, uvb.contiguous()), im2) + \
113 | self.L1_loss(self.resample2d(im2, uvf.contiguous()), im1)
114 |
115 | smooth_bwd = self.L1_loss(uvb[:, :, :, :-1], uvb[:, :, :, 1:]) + \
116 | self.L1_loss(uvb[:, :, :-1, :], uvb[:, :, 1:, :])
117 | smooth_fwd = self.L1_loss(uvf[:, :, :, :-1], uvf[:, :, :, 1:]) + \
118 | self.L1_loss(uvf[:, :, :-1, :], uvf[:, :, 1:, :])
119 |
120 | losses['smooth_loss'] = smooth_bwd + smooth_fwd
121 |
122 | # Coefficients for total loss determined empirically using a validation set
123 | losses['tot'] = 0.8 * losses['pix_loss'] + 0.4 * losses['warp_loss'] + 0.005 * losses['vgg16_loss'] + losses[
124 | 'smooth_loss']
125 |
126 | # Converts back to (0, 255) range
127 | im_t_out = im_t_out + self.mean_pix
128 | im_target = im_target + self.mean_pix
129 |
130 | return losses, im_t_out, im_target
131 |
132 | def forward(self, inputs, target_index):
133 | if 'image' in inputs:
134 | inputs = inputs['image']
135 |
136 | if not self.training:
137 | return self.network_eval(inputs, target_index)
138 | self.resample2d = MyResample2D(inputs[0].shape[-1], inputs[0].shape[-2]).cuda()
139 |
140 | # Input frames
141 | im1, im2, im3 = inputs
142 |
143 | # Calculate Pseudo targets at interm_index
144 | with torch.no_grad():
145 | _, psuedo_gt12, _ = self.teacher({'image': [im1, im1, im2]}, target_index)
146 | _, psuedo_gt23, _ = self.teacher({'image': [im2, im3, im3]}, target_index)
147 | psuedo_gt12, psuedo_gt23 = psuedo_gt12 - self.mean_pix, psuedo_gt23 - self.mean_pix
148 |
149 | im1, im2, im3 = im1 - self.mean_pix, im2 - self.mean_pix, im3 - self.mean_pix
150 |
151 | pred12, pred12_uvb, pred12_uvf = self.network_output([im1, im2], target_index)
152 | pred23, pred23_uvb, pred23_uvf = self.network_output([im2, im3], target_index)
153 |
154 | target_index = (self.args.num_interp + 1) - target_index
155 |
156 | ds_pred12 = F.interpolate(pred12, scale_factor=1./self.scale, mode='bilinear', align_corners=False)
157 | ds_pred23 = F.interpolate(pred23, scale_factor=1./self.scale, mode='bilinear', align_corners=False)
158 |
159 | uvf, bottleneck_out, uvb = self.make_flow_prediction(torch.cat((ds_pred12, ds_pred23), dim=1))
160 |
161 | uvf = self.scale * F.interpolate(uvf, scale_factor=self.scale, mode='bilinear', align_corners=False)
162 | uvb = self.scale * F.interpolate(uvb, scale_factor=self.scale, mode='bilinear', align_corners=False)
163 | bottleneck_out = F.interpolate(bottleneck_out, scale_factor=self.scale, mode='bilinear', align_corners=False)
164 |
165 | t = self.tlinespace[target_index]
166 | t = t.reshape(t.shape[0], 1, 1, 1)
167 |
168 | uvb_t_raw = - (1 - t) * t * uvf + t * t * uvb
169 | uvf_t_raw = (1 - t) * (1 - t) * uvf - (1 - t) * t * uvb
170 |
171 | im12w_raw = self.resample2d(pred12, uvb_t_raw) # im1w_raw
172 | im23w_raw = self.resample2d(pred23, uvf_t_raw) # im2w_raw
173 |
174 | # Perform intermediate bi-directional flow refinement
175 | uv_t_data = torch.cat((pred12, pred23, im12w_raw, uvb_t_raw, im23w_raw, uvf_t_raw), dim=1)
176 | uvf_t, uvb_t, t_vis_map = self.make_flow_interpolation(uv_t_data, bottleneck_out)
177 |
178 | uvb_t = uvb_t_raw + uvb_t # uvb_t
179 | uvf_t = uvf_t_raw + uvf_t # uvf_t
180 |
181 | im12w = self.resample2d(pred12, uvb_t) # im1w
182 | im23w = self.resample2d(pred23, uvf_t) # im2w
183 |
184 | # Compute final intermediate frame via weighted blending
185 | alpha1 = (1 - t) * t_vis_map
186 | alpha2 = t * (1 - t_vis_map)
187 | denorm = alpha1 + alpha2 + 1e-10
188 | im_t_out = (alpha1 * im12w + alpha2 * im23w) / denorm
189 |
190 | # Calculate training loss
191 | losses = {}
192 | losses['pix_loss'] = self.L1_loss(im_t_out, im2)
193 |
194 | im_t_out_features = self.vgg16_features(im_t_out/255.)
195 | im2_features = self.vgg16_features(im2/255.)
196 | losses['vgg16_loss'] = self.L2_loss(im_t_out_features, im2_features)
197 |
198 | losses['warp_loss'] = self.L1_loss(im12w_raw, im2) + self.L1_loss(im23w_raw, im2) + \
199 | self.L1_loss(self.resample2d(pred12, uvb), pred23) + \
200 | self.L1_loss(self.resample2d(pred23, uvf), pred12) + \
201 | self.L1_loss(self.resample2d(im1, pred12_uvb), im2) + \
202 | self.L1_loss(self.resample2d(im2, pred12_uvf), im1) + \
203 | self.L1_loss(self.resample2d(im2, pred23_uvb), im3) + \
204 | self.L1_loss(self.resample2d(im3, pred23_uvf), im2)
205 |
206 | smooth_bwd = self.L1_loss(uvb[:, :, :, :-1], uvb[:, :, :, 1:]) + \
207 | self.L1_loss(uvb[:, :, :-1, :], uvb[:, :, 1:, :]) + \
208 | self.L1_loss(pred12_uvb[:, :, :, :-1], pred12_uvb[:, :, :, 1:]) + \
209 | self.L1_loss(pred12_uvb[:, :, :-1, :], pred12_uvb[:, :, 1:, :]) + \
210 | self.L1_loss(pred23_uvb[:, :, :, :-1], pred23_uvb[:, :, :, 1:]) + \
211 | self.L1_loss(pred23_uvb[:, :, :-1, :], pred23_uvb[:, :, 1:, :])
212 |
213 | smooth_fwd = self.L1_loss(uvf[:, :, :, :-1], uvf[:, :, :, 1:]) + \
214 | self.L1_loss(uvf[:, :, :-1, :], uvf[:, :, 1:, :]) + \
215 | self.L1_loss(pred12_uvf[:, :, :, :-1], pred12_uvf[:, :, :, 1:]) + \
216 | self.L1_loss(pred12_uvf[:, :, :-1, :], pred12_uvf[:, :, 1:, :]) + \
217 | self.L1_loss(pred23_uvf[:, :, :, :-1], pred23_uvf[:, :, :, 1:]) + \
218 | self.L1_loss(pred23_uvf[:, :, :-1, :], pred23_uvf[:, :, 1:, :])
219 |
220 | losses['loss_smooth'] = smooth_bwd + smooth_fwd
221 |
222 | losses['teacher'] = self.L1_loss(psuedo_gt12, pred12) + self.L1_loss(psuedo_gt23, pred23)
223 |
224 | # Coefficients for total loss determined empirically using a validation set
225 | losses['tot'] = self.pix_alpha * losses['pix_loss'] + self.warp_alpha * losses['warp_loss'] + \
226 | self.vgg16_alpha * losses['vgg16_loss'] + self.smooth_alpha * losses['loss_smooth'] + self.teacher_weight * losses['teacher']
227 |
228 | # Converts back to (0, 255) range
229 | im_t_out = im_t_out + self.mean_pix
230 | im_target = im2 + self.mean_pix
231 |
232 | return losses, im_t_out, im_target
233 |
--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # *****************************************************************************
3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | # * Redistributions of source code must retain the above copyright
8 | # notice, this list of conditions and the following disclaimer.
9 | # * Redistributions in binary form must reproduce the above copyright
10 | # notice, this list of conditions and the following disclaimer in the
11 | # documentation and/or other materials provided with the distribution.
12 | # * Neither the name of the NVIDIA CORPORATION nor the
13 | # names of its contributors may be used to endorse or promote products
14 | # derived from this software without specific prior written permission.
15 | #
16 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | #
27 | # *****************************************************************************
28 | import os
29 | import sys
30 | import shutil
31 | import natsort
32 | import numpy as np
33 | from glob import glob
34 | from imageio import imsave
35 | from skimage.measure import compare_psnr, compare_ssim
36 | from tqdm import tqdm
37 | tqdm.monitor_interval = 0
38 |
39 | import torch
40 | import torch.backends.cudnn
41 | import torch.nn.parallel
42 | import torch.optim
43 | import torch.utils.data
44 |
45 | from parser import parser
46 | import datasets
47 | import models
48 | import utils
49 |
50 | """
51 | Reda, Fitsum A., et al. "Unsupervised Video Interpolation Using Cycle Consistency."
52 | arXiv preprint arXiv:1906.05928 (2019).
53 |
54 | Jiang, Huaizu, et al. "Super slomo: High quality estimation of multiple
55 | intermediate frames for video interpolation." arXiv pre-print arXiv:1712.00080 (2017).
56 | """
57 |
58 |
59 | def main():
60 | with utils.TimerBlock("\nParsing Arguments") as block:
61 | args = parser.parse_args()
62 |
63 | args.rank = int(os.getenv('RANK', 0))
64 |
65 | block.log("Creating save directory: {}".format(args.save))
66 | args.save_root = os.path.join(args.save, args.name)
67 | if args.write_images or args.write_video:
68 | os.makedirs(args.save_root, exist_ok=True)
69 | assert os.path.exists(args.save_root)
70 | else:
71 | os.makedirs(args.save, exist_ok=True)
72 | assert os.path.exists(args.save)
73 |
74 | os.makedirs(args.torch_home, exist_ok=True)
75 | os.environ['TORCH_HOME'] = args.torch_home
76 |
77 | args.gpus = torch.cuda.device_count() if args.gpus < 0 else args.gpus
78 | block.log('Number of gpus: {} | {}'.format(args.gpus, list(range(args.gpus))))
79 |
80 | args.network_class = utils.module_to_dict(models)[args.model]
81 | args.dataset_class = utils.module_to_dict(datasets)[args.dataset]
82 | block.log('save_root: {}'.format(args.save_root))
83 | block.log('val_file: {}'.format(args.val_file))
84 |
85 | with utils.TimerBlock("Building {} Dataset".format(args.dataset)) as block:
86 | vkwargs = {'batch_size': args.gpus * args.val_batch_size,
87 | 'num_workers': args.gpus * args.workers,
88 | 'pin_memory': True, 'drop_last': True}
89 | step_size = args.val_step_size if args.val_step_size > 0 else (args.num_interp + 1)
90 | val_dataset = args.dataset_class(args=args, root=args.val_file, num_interp=args.num_interp,
91 | sample_rate=args.val_sample_rate, step_size=step_size)
92 |
93 | val_loader = torch.utils.data.DataLoader(val_dataset, shuffle=False,
94 | **vkwargs)
95 |
96 | args.folder_list = natsort.natsorted(
97 | [os.path.basename(f) for f in sorted(glob(os.path.join(args.val_file, '*')))])
98 |
99 | block.log('Number of Validation Images: {}:({} mini-batches)'.format(len(val_loader.dataset), len(val_loader)))
100 |
101 | with utils.TimerBlock("Building {} Model".format(args.model)) as block:
102 | model = args.network_class(args)
103 |
104 | block.log('Number of parameters: {val:,}'.format(val=
105 | sum([p.data.nelement() if p.requires_grad else 0 for p in model.parameters()])))
106 |
107 | block.log('Initializing CUDA')
108 | assert torch.cuda.is_available(), 'Code supported for GPUs only at the moment'
109 | model = model.cuda()
110 | model = torch.nn.DataParallel(model, device_ids=list(range(args.gpus)))
111 | torch.manual_seed(args.seed)
112 |
113 | block.log("Attempting to Load checkpoint '{}'".format(args.resume))
114 | if args.resume and os.path.isfile(args.resume):
115 | checkpoint = torch.load(args.resume)
116 |
117 | # Partial initialization
118 | input_dict = checkpoint['state_dict']
119 | curr_dict = model.module.state_dict()
120 | state_dict = input_dict.copy()
121 | for key in input_dict:
122 | if key not in curr_dict:
123 | continue
124 | if curr_dict[key].shape != input_dict[key].shape:
125 | state_dict.pop(key)
126 | print("key {} skipped because of size mismatch.".format(key))
127 | model.module.load_state_dict(state_dict, strict=False)
128 |
129 | epoch = checkpoint['epoch']
130 | block.log("Successfully loaded checkpoint (at epoch {})".format(epoch))
131 | elif args.resume:
132 | block.log("No checkpoint found at '{}'.\nAborted.".format(args.resume))
133 | sys.exit(0)
134 | else:
135 | block.log("Random initialization, checkpoint not provided.")
136 |
137 | with utils.TimerBlock("Inference started ") as block:
138 | evaluate(args, val_loader, model, args.num_interp, epoch, block)
139 |
140 |
141 | def evaluate(args, val_loader, model, num_interp, epoch, block):
142 | in_height, in_width = val_loader.dataset[0]['ishape']
143 | pred_flag, pred_values = utils.get_pred_flag(in_height, in_width)
144 |
145 | if not args.apply_vidflag:
146 | pred_flag = 0 * pred_flag + 1
147 | pred_values = 0 * pred_values
148 |
149 | if args.rank == 0 and args.write_video:
150 | video_file = os.path.join(args.save_root, '__epoch_%03d.mp4' % epoch)
151 | _pipe = utils.create_pipe(video_file, in_width, in_height, frame_rate=args.video_fps)
152 |
153 | model.eval()
154 |
155 | loss_values = utils.AverageMeter()
156 | avg_metrics = np.zeros((0, 3), dtype=float)
157 | num_batches = len(val_loader) if args.val_n_batches < 0 else args.val_n_batches
158 |
159 | with torch.no_grad():
160 | for i, batch in enumerate(tqdm(val_loader, total=num_batches)):
161 |
162 | inputs = [b.cuda() for b in batch['image']]
163 |
164 | input_images = [inputs[0], inputs[len(inputs) // 2], inputs[-1]]
165 | inputs_dict = {'image': input_images}
166 |
167 | target_images = inputs[1:-1]
168 | tar_indices = batch['tindex'].cuda()
169 |
170 | # compute loss at mid-way
171 | tar_indices[:] = (num_interp + 1) // 2
172 | loss, outputs, _ = model(inputs_dict, tar_indices)
173 | loss_values.update(loss['tot'].data.item(), outputs.size(0))
174 |
175 | # compute output for each intermediate timepoint
176 | output_image = inputs[0]
177 | for tarIndex in range(1, num_interp + 1):
178 | tar_indices[:] = tarIndex
179 | _, outputs, _ = model(inputs_dict, tar_indices)
180 | output_image = torch.cat((output_image, outputs), dim=1)
181 | output_image = torch.split(output_image, 3, dim=1)[1:]
182 |
183 | batch_size, _, _, _ = inputs[0].shape
184 | input_filenames = batch['input_files'][1:-1]
185 | in_height, in_width = batch['ishape']
186 |
187 | for b in range(batch_size):
188 | first_target = (input_images[0][b].data.cpu().numpy().transpose(1, 2, 0)).astype(np.uint8)
189 | first_target = first_target[:in_height, :in_width, :]
190 | second_target = (input_images[-1][b].data.cpu().numpy().transpose(1, 2, 0)).astype(np.uint8)
191 | second_target = second_target[:in_height, :in_width, :]
192 |
193 | gt_image = first_target
194 | for index in range(num_interp):
195 | pred_image = (output_image[index][b].data.cpu().numpy().transpose(1, 2, 0)).astype(np.uint8)
196 | pred_image = pred_image[:in_height, :in_width, :]
197 |
198 | # if ground-truth not loaded, treat low FPS frames as targets
199 | if index < len(target_images):
200 | gt_image = (target_images[index][b].data.cpu().numpy().transpose(1, 2, 0)).astype(np.uint8)
201 | gt_filename = '/'.join(input_filenames[index][b].split(os.sep)[-2:])
202 | gt_image = gt_image[:in_height, :in_width, :]
203 |
204 | # calculate metrics using skimage
205 | psnr = compare_psnr(pred_image, gt_image)
206 | ssim = compare_ssim(pred_image, gt_image, multichannel=True, gaussian_weights=True)
207 | err = pred_image.astype(np.float32) - gt_image.astype(np.float32)
208 | ie = np.mean(np.sqrt(np.sum(err * err, axis=2)))
209 |
210 | avg_metrics = np.vstack((avg_metrics, np.array([psnr, ssim, ie])))
211 |
212 | # write_images
213 | if args.write_images:
214 | tmp_filename = os.path.join(args.save_root, "%s-%02d-%s.png" % (gt_filename[:-4], (index + 1), args.post_fix))
215 | os.makedirs(os.path.dirname(tmp_filename), exist_ok=True)
216 | imsave(tmp_filename, pred_image)
217 |
218 | # write video
219 | if args.rank == 0 and args.write_video:
220 | if index == 0:
221 | _pipe.stdin.write(first_target.tobytes())
222 | try:
223 | _pipe.stdin.write((pred_image * pred_flag + pred_values).tobytes())
224 | except AttributeError:
225 | raise AttributeError("Error in ffmpeg video creation. Inconsistent image size.")
226 | if args.write_images:
227 | tmp_filename = os.path.join(args.save_root, "%s-%02d-%s.png" % (gt_filename[:-4], 0, "ground_truth"))
228 | os.makedirs(os.path.dirname(tmp_filename), exist_ok=True)
229 | imsave(tmp_filename, first_target)
230 | tmp_filename = os.path.join(args.save_root, "%s-%02d-%s.png" % (gt_filename[:-4], num_interp+1, "ground_truth"))
231 | imsave(tmp_filename, second_target)
232 | if (i + 1) >= num_batches:
233 | break
234 |
235 | if args.write_video:
236 | _pipe.stdin.close()
237 | _pipe.wait()
238 |
239 | """
240 | Print final accuracy statistics. If intermediate ground truth frames are not available from the input sequence,
241 | the first low FPS frame is treated as a ground-truth frame for all intermediately predicted frames,
242 | as the quantities should not be trusted, in this case.
243 | """
244 | for i in range(num_interp):
245 | result2print = 'interm {:02d} PSNR: {:.2f}, SSIM: {:.3f}, IE: {:.2f}'.format(i+1,
246 | np.nanmean(avg_metrics[i::num_interp], axis=0)[0],
247 | np.nanmean(avg_metrics[i::num_interp], axis=0)[1],
248 | np.nanmean(avg_metrics[i::num_interp], axis=0)[2])
249 | block.log(result2print)
250 |
251 | avg_metrics = np.nanmean(avg_metrics, axis=0)
252 | result2print = 'Overall PSNR: {:.2f}, SSIM: {:.3f}, IE: {:.2f}'.format(avg_metrics[0], avg_metrics[1],
253 | avg_metrics[2])
254 | v_psnr, v_ssim, v_ie = avg_metrics[0], avg_metrics[1], avg_metrics[2]
255 | block.log(result2print)
256 |
257 | # re-name video with psnr
258 | if args.rank == 0 and args.write_video:
259 | shutil.move(os.path.join(args.save_root, '__epoch_%03d.mp4' % epoch),
260 | os.path.join(args.save_root, '__epoch_%03d_psnr_%1.2f.mp4' % (epoch, avg_metrics[0])))
261 |
262 | # Move back the model to train mode.
263 | model.train()
264 |
265 | torch.cuda.empty_cache()
266 | block.log('max memory allocated (GB): {:.3f}: '.format(
267 | torch.cuda.max_memory_allocated() / (1024 * 1024 * 1024)))
268 |
269 | return v_psnr, v_ssim, v_ie, loss_values.val
270 |
271 |
272 | if __name__ == '__main__':
273 | main()
274 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Unsupervised Video Interpolation using Cycle Consistency
2 | ### [Project](https://nv-adlr.github.io/publication/2019-UnsupervisedVideoInterpolation) | [Paper](https://arxiv.org/abs/1906.05928) | [YouTube](https://drive.google.com/uc?export=view&id=1DgF-0r1agSy9Y77Bthm_w135qOABc3Xd)
3 | [Unsupervised Video Interpolation using Cycle Consistency](https://nv-adlr.github.io/publication/2019-UnsupervisedVideoInterpolation)
4 | [Fitsum A. Reda](https://scholar.google.com/citations?user=quZ_qLYAAAAJ&hl=en), [Deqing Sun](https://scholar.google.com/citations?user=t4rgICIAAAAJ&hl=en)*, Aysegul Dundar, Mohammad Shoeybi, [Guilin Liu](https://liuguilin1225.github.io/), Kevin J. Shih, Andrew Tao, [Jan Kautz](http://jankautz.com/), [Bryan Catanzaro](http://catanzaro.name/)
5 | NVIDIA Corporation
6 | In International Conferene on Computer Vision (**ICCV**) 2019.
7 | ( * Currently affiliated with Google. )
8 |
9 |
10 |
11 |
12 |
102 |
103 |
104 |