├── Datasets
    ├── __init__.py
    ├── tartanTrajFlowDataset.py
    ├── cowmask.py
    ├── util_flow.py
    └── segmask_gt.py
├── Network
    ├── __init__.py
    ├── rigidmask
    │   ├── __init__.py
    │   ├── .gitignore
    │   ├── networks
    │   │   ├── DCNv2
    │   │   │   ├── DCN
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── src
    │   │   │   │   │   ├── vision.cpp
    │   │   │   │   │   ├── cpu
    │   │   │   │   │   │   ├── vision.h
    │   │   │   │   │   │   ├── dcn_v2_im2col_cpu.h
    │   │   │   │   │   │   └── dcn_v2_cpu.cpp
    │   │   │   │   │   ├── cuda
    │   │   │   │   │   │   ├── vision.h
    │   │   │   │   │   │   └── dcn_v2_im2col_cuda.h
    │   │   │   │   │   └── dcn_v2.h
    │   │   │   │   ├── testcpu.py
    │   │   │   │   └── testcuda.py
    │   │   │   ├── make.sh
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── LICENSE
    │   │   │   └── setup.py
    │   │   ├── large_hourglass.py
    │   │   ├── resnet_dcn.py
    │   │   └── msra_resnet.py
    │   ├── det_utils.py
    │   ├── det.py
    │   └── det_losses.py
    ├── PWC
    │   ├── __init__.py
    │   └── PWCNet.py
    ├── VONet.py
    └── VOFlowNet.py
├── evaluator
    ├── __init__.py
    ├── tartanair_evaluator.py
    ├── evaluator_base.py
    ├── evaluate_kitti.py
    ├── evaluate_ate_scale.py
    ├── evaluate_rpe.py
    ├── trajectory_transform.py
    └── transformation.py
├── .gitignore
├── LICENSE
├── environment.yml
├── vo_trajectory_from_folder.py
└── README.md


/Datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Network/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Network/rigidmask/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Network/rigidmask/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | 


--------------------------------------------------------------------------------
/Network/PWC/__init__.py:
--------------------------------------------------------------------------------
1 | from .PWCNet import *
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | models/
3 | data/
4 | __pycache__/
5 | .DS_Store


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/DCN/__init__.py:
--------------------------------------------------------------------------------
1 | from .dcn_v2 import *
2 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python setup.py build develop
3 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | .idea
3 | *.so
4 | *.o
5 | *pyc
6 | _ext
7 | build
8 | DCNv2.egg-info
9 | dist


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/DCN/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "dcn_v2.h"
 3 | 
 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 5 |   m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward");
 6 |   m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward");
 7 |   m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward");
 8 |   m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward");
 9 | }
10 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/README.md:
--------------------------------------------------------------------------------
 1 | ## Deformable Convolutional Networks V2 with Pytorch 1.X
 2 | 
 3 | ### Build
 4 | ```bash
 5 |     ./make.sh         # build
 6 |     python testcpu.py    # run examples and gradient check on cpu
 7 |     python testcuda.py   # run examples and gradient check on gpu 
 8 | ```
 9 | ### Note
10 | Now the master branch is for pytorch 1.x, you can switch back to pytorch 0.4 with,
11 | ```bash
12 | git checkout pytorch_0.4
13 | ```
14 | 
15 | ### Known Issues:
16 | 
17 | - [x] Gradient check w.r.t offset (solved)
18 | - [ ] Backward is not reentrant (minor)
19 | 
20 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
21 | 
22 | Update: all gradient check passes with **double** precision. 
23 | 
24 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 
25 | float `<1e-15` for double), 
26 | so it may not be a serious problem (?)
27 | 
28 | Please post an issue or PR if you have any comments.
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2020, Air Lab Stacks
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Charles Shang
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/Network/rigidmask/det_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | def _sigmoid(x):
 9 |   y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
10 |   return y
11 | 
12 | def _gather_feat(feat, ind, mask=None):
13 |     dim  = feat.size(2)
14 |     ind  = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
15 |     feat = feat.gather(1, ind)
16 |     if mask is not None:
17 |         mask = mask.unsqueeze(2).expand_as(feat)
18 |         feat = feat[mask]
19 |         feat = feat.view(-1, dim)
20 |     return feat
21 | 
22 | def _transpose_and_gather_feat(feat, ind):
23 |     feat = feat.permute(0, 2, 3, 1).contiguous()
24 |     feat = feat.view(feat.size(0), -1, feat.size(3))
25 |     feat = _gather_feat(feat, ind)
26 |     return feat
27 | 
28 | def flip_tensor(x):
29 |     return torch.flip(x, [3])
30 |     # tmp = x.detach().cpu().numpy()[..., ::-1].copy()
31 |     # return torch.from_numpy(tmp).to(x.device)
32 | 
33 | def flip_lr(x, flip_idx):
34 |   tmp = x.detach().cpu().numpy()[..., ::-1].copy()
35 |   shape = tmp.shape
36 |   for e in flip_idx:
37 |     tmp[:, e[0], ...], tmp[:, e[1], ...] = \
38 |       tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
39 |   return torch.from_numpy(tmp.reshape(shape)).to(x.device)
40 | 
41 | def flip_lr_off(x, flip_idx):
42 |   tmp = x.detach().cpu().numpy()[..., ::-1].copy()
43 |   shape = tmp.shape
44 |   tmp = tmp.reshape(tmp.shape[0], 17, 2, 
45 |                     tmp.shape[2], tmp.shape[3])
46 |   tmp[:, :, 0, :, :] *= -1
47 |   for e in flip_idx:
48 |     tmp[:, e[0], ...], tmp[:, e[1], ...] = \
49 |       tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
50 |   return torch.from_numpy(tmp.reshape(shape)).to(x.device)


--------------------------------------------------------------------------------
/Datasets/tartanTrajFlowDataset.py:
--------------------------------------------------------------------------------
 1 | """
 2 | # ==============================
 3 | # tartanTrajFlowDataset.py
 4 | # library for DytanVO data I/O
 5 | # Author: Wenshan Wang, Shihao Shen
 6 | # Date: 3rd Jan 2023
 7 | # ==============================
 8 | """
 9 | import numpy as np
10 | import cv2
11 | from torch.utils.data import Dataset, DataLoader
12 | from os import listdir
13 | from evaluator.transformation import pos_quats2SEs, pose2motion, SEs2ses
14 | from .utils import make_intrinsics_layer
15 | 
16 | class TrajFolderDataset(Dataset):
17 |     """scene flow synthetic dataset. """
18 | 
19 |     def __init__(self, imgfolder, transform = None, 
20 |                     focalx = 320.0, focaly = 320.0, centerx = 320.0, centery = 240.0):
21 |         
22 |         files = listdir(imgfolder)
23 |         self.rgbfiles = [(imgfolder +'/'+ ff) for ff in files if (ff.endswith('.png') or ff.endswith('.jpg'))]
24 |         self.rgbfiles.sort()
25 |         self.imgfolder = imgfolder
26 | 
27 |         print('Find {} image files in {}'.format(len(self.rgbfiles), imgfolder))
28 | 
29 |         self.N = len(self.rgbfiles) - 1
30 | 
31 |         # self.N = len(self.lines)
32 |         self.transform = transform
33 |         self.focalx = focalx
34 |         self.focaly = focaly
35 |         self.centerx = centerx
36 |         self.centery = centery
37 | 
38 |     def __len__(self):
39 |         return self.N
40 | 
41 |     def __getitem__(self, idx):
42 |         imgfile1 = self.rgbfiles[idx].strip()
43 |         imgfile2 = self.rgbfiles[idx+1].strip()
44 |         img1 = cv2.imread(imgfile1)
45 |         img2 = cv2.imread(imgfile2)
46 | 
47 |         res = {'img1': img1, 'img2': img2}
48 | 
49 |         h, w, _ = img1.shape
50 |         intrinsicLayer = make_intrinsics_layer(w, h, self.focalx, self.focaly, self.centerx, self.centery)
51 |         res['intrinsic'] = intrinsicLayer
52 | 
53 |         if self.transform:
54 |             res = self.transform(res)
55 | 
56 |         res['img1_raw'] = img1
57 |         res['img2_raw'] = img2
58 | 
59 |         return res


--------------------------------------------------------------------------------
/evaluator/tartanair_evaluator.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>
 2 | # For License information please see the LICENSE file in the root directory.
 3 | 
 4 | import numpy as np
 5 | from .evaluator_base import ATEEvaluator, RPEEvaluator, KittiEvaluator, transform_trajs, quats2SEs
 6 | from os.path import isdir, isfile
 7 | 
 8 | # from trajectory_transform import timestamp_associate
 9 | 
10 | class TartanAirEvaluator:
11 |     def __init__(self, scale = False, round=1):
12 |         self.ate_eval = ATEEvaluator()
13 |         self.rpe_eval = RPEEvaluator()
14 |         self.kitti_eval = KittiEvaluator()
15 |         
16 |     def evaluate_one_trajectory(self, gt_traj, est_traj, scale=False, kittitype=True):
17 |         """
18 |         scale = True: calculate a global scale
19 |         """
20 |         # load trajectories
21 |         try:
22 |             gt_traj = np.loadtxt(gt_traj)
23 |             est_traj = np.loadtxt(est_traj)
24 |         except:
25 |             pass
26 | 
27 |         if gt_traj.shape[0] != est_traj.shape[0]:
28 |             raise Exception("POSEFILE_LENGTH_ILLEGAL")
29 |         if gt_traj.shape[1] != 7 or est_traj.shape[1] != 7:
30 |             raise Exception("POSEFILE_FORMAT_ILLEGAL")
31 | 
32 |         # transform and scale
33 |         gt_traj_trans, est_traj_trans, s = transform_trajs(gt_traj, est_traj, scale)
34 |         gt_SEs, est_SEs = quats2SEs(gt_traj_trans, est_traj_trans)
35 | 
36 |         ate_score, gt_ate_aligned, est_ate_aligned = self.ate_eval.evaluate(gt_traj, est_traj, scale)
37 |         rpe_score = self.rpe_eval.evaluate(gt_SEs, est_SEs)
38 |         kitti_score = self.kitti_eval.evaluate(gt_SEs, est_SEs, kittitype=kittitype)
39 | 
40 |         return {'ate_score': ate_score, 
41 |                 'rpe_score': rpe_score, 
42 |                 'kitti_score': kitti_score,
43 |                 'gt_aligned': gt_ate_aligned, 
44 |                 'est_aligned': est_ate_aligned}
45 | 
46 | if __name__ == "__main__":
47 |     
48 |     # scale = True for monocular track, scale = False for stereo track
49 |     aicrowd_evaluator = TartanAirEvaluator()
50 |     result = aicrowd_evaluator.evaluate_one_trajectory('pose_gt.txt', 'pose_est.txt', scale=True)
51 |     print(result)
52 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import glob
 5 | 
 6 | import torch
 7 | 
 8 | from torch.utils.cpp_extension import CUDA_HOME
 9 | from torch.utils.cpp_extension import CppExtension
10 | from torch.utils.cpp_extension import CUDAExtension
11 | 
12 | from setuptools import find_packages
13 | from setuptools import setup
14 | 
15 | requirements = ["torch", "torchvision"]
16 | 
17 | 
18 | def get_extensions():
19 |     this_dir = os.path.dirname(os.path.abspath(__file__))
20 |     extensions_dir = os.path.join(this_dir, "DCN", "src")
21 | 
22 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
23 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
24 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
25 |     
26 |     #os.environ["CC"] = "g++"
27 |     sources = main_file + source_cpu
28 |     extension = CppExtension
29 |     extra_compile_args = {'cxx': ['-std=c++14']}
30 |     define_macros = []
31 | 
32 |     
33 |     #if torch.cuda.is_available() and CUDA_HOME is not None:
34 |     if torch.cuda.is_available():
35 |         extension = CUDAExtension
36 |         sources += source_cuda
37 |         define_macros += [("WITH_CUDA", None)]
38 |         extra_compile_args["nvcc"] = [
39 |             "-DCUDA_HAS_FP16=1",
40 |             "-D__CUDA_NO_HALF_OPERATORS__",
41 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
42 |             "-D__CUDA_NO_HALF2_OPERATORS__",
43 |         ]
44 |     else:
45 |         #raise NotImplementedError('Cuda is not available')
46 |         pass
47 |     
48 | 
49 |     sources = [os.path.join(extensions_dir, s) for s in sources]
50 |     include_dirs = [extensions_dir]
51 |     ext_modules = [
52 |         extension(
53 |             "_ext",
54 |             sources,
55 |             include_dirs=include_dirs,
56 |             define_macros=define_macros,
57 |             extra_compile_args=extra_compile_args,
58 |         )
59 |     ]
60 |     return ext_modules
61 | 
62 | setup(
63 |     name="DCNv2",
64 |     version="0.1",
65 |     author="charlesshang",
66 |     url="https://github.com/charlesshang/DCNv2",
67 |     description="deformable convolutional networks",
68 |     packages=find_packages(exclude=("configs", "tests",)),
69 |     # install_requires=requirements,
70 |     ext_modules=get_extensions(),
71 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
72 | )
73 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/DCN/src/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | at::Tensor
 5 | dcn_v2_cpu_forward(const at::Tensor &input,
 6 |                     const at::Tensor &weight,
 7 |                     const at::Tensor &bias,
 8 |                     const at::Tensor &offset,
 9 |                     const at::Tensor &mask,
10 |                     const int kernel_h,
11 |                     const int kernel_w,
12 |                     const int stride_h,
13 |                     const int stride_w,
14 |                     const int pad_h,
15 |                     const int pad_w,
16 |                     const int dilation_h,
17 |                     const int dilation_w,
18 |                     const int deformable_group);
19 | 
20 | std::vector<at::Tensor>
21 | dcn_v2_cpu_backward(const at::Tensor &input,
22 |                      const at::Tensor &weight,
23 |                      const at::Tensor &bias,
24 |                      const at::Tensor &offset,
25 |                      const at::Tensor &mask,
26 |                      const at::Tensor &grad_output,
27 |                      int kernel_h, int kernel_w,
28 |                      int stride_h, int stride_w,
29 |                      int pad_h, int pad_w,
30 |                      int dilation_h, int dilation_w,
31 |                      int deformable_group);
32 | 
33 | 
34 | std::tuple<at::Tensor, at::Tensor>
35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
36 |                                   const at::Tensor &bbox,
37 |                                   const at::Tensor &trans,
38 |                                   const int no_trans,
39 |                                   const float spatial_scale,
40 |                                   const int output_dim,
41 |                                   const int group_size,
42 |                                   const int pooled_size,
43 |                                   const int part_size,
44 |                                   const int sample_per_part,
45 |                                   const float trans_std);
46 | 
47 | std::tuple<at::Tensor, at::Tensor>
48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
49 |                                    const at::Tensor &input,
50 |                                    const at::Tensor &bbox,
51 |                                    const at::Tensor &trans,
52 |                                    const at::Tensor &top_count,
53 |                                    const int no_trans,
54 |                                    const float spatial_scale,
55 |                                    const int output_dim,
56 |                                    const int group_size,
57 |                                    const int pooled_size,
58 |                                    const int part_size,
59 |                                    const int sample_per_part,
60 |                                    const float trans_std);


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/DCN/src/cuda/vision.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | at::Tensor
 5 | dcn_v2_cuda_forward(const at::Tensor &input,
 6 |                     const at::Tensor &weight,
 7 |                     const at::Tensor &bias,
 8 |                     const at::Tensor &offset,
 9 |                     const at::Tensor &mask,
10 |                     const int kernel_h,
11 |                     const int kernel_w,
12 |                     const int stride_h,
13 |                     const int stride_w,
14 |                     const int pad_h,
15 |                     const int pad_w,
16 |                     const int dilation_h,
17 |                     const int dilation_w,
18 |                     const int deformable_group);
19 | 
20 | std::vector<at::Tensor>
21 | dcn_v2_cuda_backward(const at::Tensor &input,
22 |                      const at::Tensor &weight,
23 |                      const at::Tensor &bias,
24 |                      const at::Tensor &offset,
25 |                      const at::Tensor &mask,
26 |                      const at::Tensor &grad_output,
27 |                      int kernel_h, int kernel_w,
28 |                      int stride_h, int stride_w,
29 |                      int pad_h, int pad_w,
30 |                      int dilation_h, int dilation_w,
31 |                      int deformable_group);
32 | 
33 | 
34 | std::tuple<at::Tensor, at::Tensor>
35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input,
36 |                                   const at::Tensor &bbox,
37 |                                   const at::Tensor &trans,
38 |                                   const int no_trans,
39 |                                   const float spatial_scale,
40 |                                   const int output_dim,
41 |                                   const int group_size,
42 |                                   const int pooled_size,
43 |                                   const int part_size,
44 |                                   const int sample_per_part,
45 |                                   const float trans_std);
46 | 
47 | std::tuple<at::Tensor, at::Tensor>
48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad,
49 |                                    const at::Tensor &input,
50 |                                    const at::Tensor &bbox,
51 |                                    const at::Tensor &trans,
52 |                                    const at::Tensor &top_count,
53 |                                    const int no_trans,
54 |                                    const float spatial_scale,
55 |                                    const int output_dim,
56 |                                    const int group_size,
57 |                                    const int pooled_size,
58 |                                    const int part_size,
59 |                                    const int sample_per_part,
60 |                                    const float trans_std);


--------------------------------------------------------------------------------
/Network/VONet.py:
--------------------------------------------------------------------------------
 1 | # Software License Agreement (BSD License)
 2 | #
 3 | # Copyright (c) 2020, Wenshan Wang, Yaoyu Hu,  CMU
 4 | # All rights reserved.
 5 | #
 6 | # Redistribution and use in source and binary forms, with or without
 7 | # modification, are permitted provided that the following conditions
 8 | # are met:
 9 | #
10 | #  * Redistributions of source code must retain the above copyright
11 | #    notice, this list of conditions and the following disclaimer.
12 | #  * Redistributions in binary form must reproduce the above
13 | #    copyright notice, this list of conditions and the following
14 | #    disclaimer in the documentation and/or other materials provided
15 | #    with the distribution.
16 | #  * Neither the name of CMU nor the names of its
17 | #    contributors may be used to endorse or promote products derived
18 | #    from this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 | # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 | # POSSIBILITY OF SUCH DAMAGE.
32 | 
33 | import torch 
34 | import torch.nn as nn
35 | import torch.nn.functional as F
36 | from .PWC import PWCDCNet as FlowNet
37 | from .VOFlowNet import VOFlowRes as FlowPoseNet
38 | 
39 | class VONet(nn.Module):
40 |     def __init__(self):
41 |         super(VONet, self).__init__()
42 | 
43 |         self.flowNet     = FlowNet()
44 |         self.flowPoseNet = FlowPoseNet()
45 | 
46 |     def forward(self, x, only_flow=False, only_pose=False):
47 |         '''
48 |         x[0]: rgb frame t-1
49 |         x[1]: rgb frame t
50 |         x[2]: intrinsics
51 |         x[3]: flow t-1 -> t (optional)
52 |         x[4]: motion segmentation mask
53 |         '''
54 |         # import ipdb;ipdb.set_trace()
55 |         if not only_pose:
56 |             flow_out = self.flowNet(torch.cat((x[0], x[1]), dim=1))
57 | 
58 |             if only_flow:
59 |                 return flow_out, None
60 |             
61 |             flow = flow_out[0]
62 | 
63 |         else:
64 |             assert(len(x) > 3)
65 |             flow_out = None
66 | 
67 |         if len(x) > 3 and x[3] is not None:
68 |             flow_input = x[3]
69 |         else:
70 |             flow_input = flow
71 | 
72 |         # Mask out input flow using the segmentation result
73 |         assert(len(x) > 4)
74 |         mask = torch.gt(x[4], 0)
75 |         for i in range(flow_input.shape[0]):
76 |             zeros = torch.cat([mask[i], ]*2, dim=0)
77 |             flow_input[i][zeros] = 0
78 | 
79 |         flow_input = torch.cat((flow_input, 1 - x[4]), dim=1)  # segmentation layer
80 |         flow_input = torch.cat((flow_input, x[2]), dim=1)  # intrinsics layer
81 |     
82 |         pose = self.flowPoseNet(flow_input)
83 | 
84 |         return flow_out, pose
85 | 


--------------------------------------------------------------------------------
/evaluator/evaluator_base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>
 2 | # For License information please see the LICENSE file in the root directory.
 3 | 
 4 | import numpy as np
 5 | from .trajectory_transform import trajectory_transform, rescale
 6 | from .transformation import pos_quats2SE_matrices, SE2pos_quat
 7 | from .evaluate_ate_scale import align, plot_traj
 8 | from .evaluate_rpe import evaluate_trajectory
 9 | from .evaluate_kitti import evaluate as kittievaluate
10 | 
11 | np.set_printoptions(suppress=True, precision=2, threshold=100000)
12 | 
13 | def transform_trajs(gt_traj, est_traj, cal_scale):
14 |     gt_traj, est_traj = trajectory_transform(gt_traj, est_traj)
15 |     if cal_scale :
16 |         est_traj, s = rescale(gt_traj, est_traj)
17 |         print('  Scale, {}'.format(s))
18 |     else:
19 |         s = 1.0
20 |     return gt_traj, est_traj, s
21 | 
22 | def quats2SEs(gt_traj, est_traj):
23 |     gt_SEs = pos_quats2SE_matrices(gt_traj)
24 |     est_SEs = pos_quats2SE_matrices(est_traj)
25 |     return gt_SEs, est_SEs
26 | 
27 | def per_frame_scale_alignment(gt_motions, est_motions):
28 |     dist_gt = np.linalg.norm(gt_motions[:,:3], axis=1)
29 |     # scale the output frame by frame
30 |     motions_scale = est_motions.copy()
31 |     dist = np.linalg.norm(motions_scale[:,:3],axis=1)
32 |     scale_gt = dist_gt/dist
33 |     motions_scale[:,:3] = est_motions[:,:3] * scale_gt.reshape(-1,1)
34 | 
35 |     return motions_scale
36 | 
37 | class ATEEvaluator(object):
38 |     def __init__(self):
39 |         super(ATEEvaluator, self).__init__()
40 | 
41 | 
42 |     def evaluate(self, gt_traj, est_traj, scale):
43 |         gt_xyz = np.matrix(gt_traj[:,0:3].transpose())
44 |         est_xyz = np.matrix(est_traj[:, 0:3].transpose())
45 | 
46 |         rot, trans, trans_error, s = align(gt_xyz, est_xyz, scale)
47 |         print('  ATE scale: {}'.format(s))
48 |         error = np.sqrt(np.dot(trans_error,trans_error) / len(trans_error))
49 | 
50 |         # align two trajs 
51 |         est_SEs = pos_quats2SE_matrices(est_traj)
52 |         T = np.eye(4) 
53 |         T[:3,:3] = rot
54 |         T[:3,3:] = trans 
55 |         T = np.linalg.inv(T)
56 |         est_traj_aligned = []
57 |         for se in est_SEs:
58 |             se[:3,3] = se[:3,3] * s
59 |             se_new = T.dot(se)
60 |             se_new = SE2pos_quat(se_new)
61 |             est_traj_aligned.append(se_new)
62 | 
63 |         est_traj_aligned = np.array(est_traj_aligned)
64 |         return error, gt_traj, est_traj_aligned
65 | 
66 | # =======================
67 | 
68 | 
69 | class RPEEvaluator(object):
70 |     def __init__(self):
71 |         super(RPEEvaluator, self).__init__()
72 | 
73 | 
74 |     def evaluate(self, gt_SEs, est_SEs):
75 |         result = evaluate_trajectory(gt_SEs, est_SEs)
76 |         
77 |         trans_error = np.array(result)[:,2]
78 |         rot_error = np.array(result)[:,3]
79 | 
80 |         trans_error_mean = np.mean(trans_error)
81 |         rot_error_mean = np.mean(rot_error)
82 | 
83 |         # import ipdb;ipdb.set_trace()
84 | 
85 |         return (rot_error_mean, trans_error_mean)
86 | 
87 | # =======================
88 | 
89 | 
90 | class KittiEvaluator(object):
91 |     def __init__(self):
92 |         super(KittiEvaluator, self).__init__()
93 | 
94 |     # return rot_error, tra_error
95 |     def evaluate(self, gt_SEs, est_SEs, kittitype):
96 |         # trajectory_scale(est_SEs, 0.831984631412)
97 |         error = kittievaluate(gt_SEs, est_SEs, kittitype=kittitype)
98 |         return error
99 | 


--------------------------------------------------------------------------------
/Network/rigidmask/det.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torchvision.models as models
 6 | import torch
 7 | import torch.nn as nn
 8 | import os
 9 | 
10 | from .networks.msra_resnet import get_pose_net
11 | from .networks.dlav0 import get_pose_net as get_dlav0
12 | from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn
13 | from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn
14 | from .networks.large_hourglass import get_large_hourglass_net
15 | 
16 | _model_factory = {
17 |   'res': get_pose_net, # default Resnet with deconv
18 |   'dlav0': get_dlav0, # default DLAup
19 |   'dla': get_dla_dcn,
20 |   'resdcn': get_pose_net_dcn,
21 |   'hourglass': get_large_hourglass_net,
22 | }
23 | 
24 | def create_model(arch, heads, head_conv,num_input):
25 |   num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
26 |   arch = arch[:arch.find('_')] if '_' in arch else arch
27 |   get_model = _model_factory[arch]
28 |   model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv,num_input=num_input)
29 |   return model
30 | 
31 | def load_model(model, model_path, optimizer=None, resume=False, 
32 |                lr=None, lr_step=None):
33 |   start_epoch = 0
34 |   checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
35 |   print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
36 |   state_dict_ = checkpoint['state_dict']
37 |   state_dict = {}
38 |   
39 |   # convert data_parallal to model
40 |   for k in state_dict_:
41 |     if k.startswith('module') and not k.startswith('module_list'):
42 |       state_dict[k[7:]] = state_dict_[k]
43 |     else:
44 |       state_dict[k] = state_dict_[k]
45 |   model_state_dict = model.state_dict()
46 | 
47 |   # check loaded parameters and created model parameters
48 |   msg = 'If you see this, your model does not fully load the ' + \
49 |         'pre-trained weight. Please make sure ' + \
50 |         'you have correctly specified --arch xxx ' + \
51 |         'or set the correct --num_classes for your own dataset.'
52 |   for k in state_dict:
53 |     if k in model_state_dict:
54 |       if state_dict[k].shape != model_state_dict[k].shape:
55 |         print('Skip loading parameter {}, required shape{}, '\
56 |               'loaded shape{}. {}'.format(
57 |           k, model_state_dict[k].shape, state_dict[k].shape, msg))
58 |         state_dict[k] = model_state_dict[k]
59 |     else:
60 |       print('Drop parameter {}.'.format(k) + msg)
61 |   for k in model_state_dict:
62 |     if not (k in state_dict):
63 |       print('No param {}.'.format(k) + msg)
64 |       state_dict[k] = model_state_dict[k]
65 |   model.load_state_dict(state_dict, strict=False)
66 | 
67 |   # resume optimizer parameters
68 |   if optimizer is not None and resume:
69 |     if 'optimizer' in checkpoint:
70 |       optimizer.load_state_dict(checkpoint['optimizer'])
71 |       start_epoch = checkpoint['epoch']
72 |       start_lr = lr
73 |       for step in lr_step:
74 |         if start_epoch >= step:
75 |           start_lr *= 0.1
76 |       for param_group in optimizer.param_groups:
77 |         param_group['lr'] = start_lr
78 |       print('Resumed optimizer with start lr', start_lr)
79 |     else:
80 |       print('No optimizer parameters in checkpoint.')
81 |   if optimizer is not None:
82 |     return model, optimizer, start_epoch
83 |   else:
84 |     return model
85 | 
86 | def save_model(path, epoch, model, optimizer=None):
87 |   if isinstance(model, torch.nn.DataParallel):
88 |     state_dict = model.module.state_dict()
89 |   else:
90 |     state_dict = model.state_dict()
91 |   data = {'epoch': epoch,
92 |           'state_dict': state_dict}
93 |   if not (optimizer is None):
94 |     data['optimizer'] = optimizer.state_dict()
95 |   torch.save(data, path)
96 | 
97 | 


--------------------------------------------------------------------------------
/Datasets/cowmask.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=bad-indentation
  2 | # coding=utf-8
  3 | # Copyright 2022 The Google Research Authors.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | """
 18 | Cow mask generation. 
 19 | https://github.com/google-research/google-research/blob/master/milking_cowmask/
 20 | Adapted from LAX implementation to NumPy due to PyTorch dataloader 
 21 | being incompatible with JAX
 22 | Author: Shihao Shen
 23 | Date: 29th Aug 2022
 24 | """
 25 | import math
 26 | import numpy as np
 27 | from scipy import special
 28 | from scipy.signal import convolve
 29 | 
 30 | _ROOT_2 = math.sqrt(2.0)
 31 | _ROOT_2_PI = math.sqrt(2.0 * math.pi)
 32 | 
 33 | 
 34 | def gaussian_kernels(sigma, max_sigma):
 35 | 	"""Make Gaussian kernels for Gaussian blur.
 36 | 	Args:
 37 | 		sigma: kernel sigma
 38 | 		max_sigma: sigma upper limit as a float (this is used to determine
 39 | 			the size of kernel required to fit all kernels)
 40 |   	Returns:
 41 | 	  	a (1, kernel_width) numpy array
 42 |   	"""
 43 | 	size = round(max_sigma * 3) * 2 + 1
 44 | 	x = np.arange(-size, size + 1)[None, :].astype(np.float32)
 45 | 	y = np.exp(-0.5 * x ** 2 / sigma ** 2)
 46 | 	return y / (sigma * _ROOT_2_PI)
 47 | 
 48 | 
 49 | def cow_masks(mask_size, log_sigma_range, max_sigma, prop_range):
 50 | 	"""Generate Cow Mask.
 51 | 	Args:
 52 |       n_masks: number of masks to generate as an int
 53 |       mask_size: image size as a `(height, width)` tuple
 54 |       log_sigma_range: the range of the sigma (smoothing kernel)
 55 |           parameter in log-space`(log(sigma_min), log(sigma_max))`
 56 |       max_sigma: smoothing sigma upper limit
 57 |       prop_range: range from which to draw the proportion `p` that
 58 |         controls the proportion of pixel in a mask that are 1 vs 0
 59 |   Returns:
 60 |       Cow Masks as a [v, height, width, 1] numpy array
 61 | 	"""
 62 | 
 63 | 	# Draw the per-mask proportion p
 64 | 	p = np.random.uniform(prop_range[0], prop_range[1])
 65 | 	# Compute threshold factors
 66 | 	threshold_factor = special.erfinv(2 * p - 1) * _ROOT_2
 67 | 
 68 | 	sigma = np.exp(np.random.uniform(log_sigma_range[0], log_sigma_range[1]))
 69 | 
 70 | 	noise = np.random.normal(size=mask_size)
 71 | 
 72 | 	# Generate a kernel for each sigma
 73 | 	kernel = gaussian_kernels(sigma, max_sigma)
 74 | 	kernel = kernel.squeeze()
 75 | 	# kernels in y and x
 76 | 	krn_y = kernel[None, :]
 77 | 	krn_x = kernel[:, None]
 78 | 
 79 | 	# Apply kernels in y and x separately
 80 | 	smooth_noise = convolve(noise, krn_y, mode='same')
 81 | 	smooth_noise = convolve(smooth_noise, krn_x, mode='same')
 82 | 
 83 | 	# Compute mean and std-dev
 84 | 	noise_mu = smooth_noise.mean(axis=(0,1))
 85 | 	noise_sigma = smooth_noise.std(axis=(0,1))
 86 | 	# Compute thresholds
 87 | 	threshold = threshold_factor * noise_sigma + noise_mu
 88 | 	# Apply threshold
 89 | 	mask = (smooth_noise <= threshold).astype(bool)
 90 | 
 91 | 	return mask
 92 | 
 93 | 
 94 | if __name__=="__main__":
 95 |     import time
 96 |     import matplotlib.pyplot as plt
 97 | 
 98 |     cow_sigma_range = (20, 60)
 99 |     log_sigma_range = (math.log(cow_sigma_range[0]), math.log(cow_sigma_range[1]))
100 |     cow_prop_range = (0.1, 0.5)
101 |     s = time.time()
102 |     max_iou = 0
103 |     # for _ in range(1000):
104 |     #     mask = cow_masks((240, 360), log_sigma_range, cow_sigma_range[1], cow_prop_range)
105 |     #     max_iou = max(max_iou, np.sum(mask) / (240*360))
106 |     # print(time.time() - s)
107 |     # print(max_iou)
108 | 
109 |     mask = cow_masks((240, 360), log_sigma_range, cow_sigma_range[1], cow_prop_range)
110 |     print(np.sum(mask) / (240*360))
111 |     plt.imshow(mask * 255)
112 |     plt.savefig('mask.png')


--------------------------------------------------------------------------------
/evaluator/evaluate_kitti.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>
  2 | # For License information please see the LICENSE file in the root directory.
  3 | # This is a python reinplementation of the KITTI metric: http://www.cvlibs.net/datasets/kitti/eval_odometry.php
  4 | # Cridit: Xiangwei Wang https://github.com/TimingSpace
  5 | 
  6 | import numpy as np
  7 | import sys
  8 | 
  9 | def trajectory_distances(poses):
 10 |     distances = []
 11 |     distances.append(0)
 12 |     for i in range(1,len(poses)):
 13 |         p1 = poses[i-1]
 14 |         p2 = poses[i]
 15 |         delta = p1[0:3,3] - p2[0:3,3]
 16 |         distances.append(distances[i-1]+np.linalg.norm(delta))
 17 |     return distances
 18 | 
 19 | def last_frame_from_segment_length(dist,first_frame,length):
 20 |     for i in range(first_frame,len(dist)):
 21 |         if dist[i]>dist[first_frame]+length:
 22 |             return i
 23 |     return -1
 24 | 
 25 | def rotation_error(pose_error):
 26 |     a = pose_error[0,0]
 27 |     b = pose_error[1,1]
 28 |     c = pose_error[2,2]
 29 |     d = 0.5*(a+b+c-1)
 30 |     rot_error = np.arccos(max(min(d,1.0),-1.0))
 31 |     return rot_error
 32 | 
 33 | def translation_error(pose_error):
 34 |     dx = pose_error[0,3]
 35 |     dy = pose_error[1,3]
 36 |     dz = pose_error[2,3]
 37 |     return np.sqrt(dx*dx+dy*dy+dz*dz)
 38 | 
 39 | # def line2matrix(pose_line):
 40 | #     pose_line = np.array(pose_line)
 41 | #     pose_m = np.matrix(np.eye(4))
 42 | #     pose_m[0:3,:] = pose_line.reshape(3,4)
 43 | #     return pose_m
 44 |     
 45 | def calculate_sequence_error(poses_gt,poses_result,lengths=[10,20,30,40,50,60,70,80]):
 46 |     # error_vetor
 47 |     errors = []
 48 | 
 49 |     # paramet
 50 |     step_size = 1 #10; # every second
 51 |     num_lengths = len(lengths)
 52 | 
 53 |     # import ipdb;ipdb.set_trace()
 54 |     # pre-compute distances (from ground truth as reference)
 55 |     dist = trajectory_distances(poses_gt)
 56 |     # for all start positions do
 57 |     for  first_frame in range(0, len(poses_gt), step_size):
 58 |     # for all segment lengths do
 59 |         for i in range(0,num_lengths):
 60 |             #  current length
 61 |             length = lengths[i];
 62 | 
 63 |             # compute last frame
 64 |             last_frame = last_frame_from_segment_length(dist,first_frame,length);
 65 |             # continue, if sequence not long enough
 66 |             if (last_frame==-1):
 67 |                 continue;
 68 | 
 69 |             # compute rotational and translational errors
 70 |             pose_delta_gt     = np.linalg.inv(poses_gt[first_frame]).dot(poses_gt[last_frame])
 71 |             pose_delta_result = np.linalg.inv(poses_result[first_frame]).dot(poses_result[last_frame])
 72 |             pose_error        = np.linalg.inv(pose_delta_result).dot(pose_delta_gt)
 73 |             r_err = rotation_error(pose_error);
 74 |             t_err = translation_error(pose_error);
 75 | 
 76 |             # compute speed
 77 |             num_frames = (float)(last_frame-first_frame+1);
 78 |             speed = length/(0.1*num_frames);
 79 | 
 80 |             # write to file
 81 |             error = [first_frame,r_err/length,t_err/length,length,speed]
 82 |             errors.append(error)
 83 |             # return error vector
 84 |     return errors
 85 | 
 86 | def calculate_ave_errors(errors,lengths=[10,20,30,40,50,60,70,80]):
 87 |     rot_errors=[]
 88 |     tra_errors=[]
 89 |     for length in lengths:
 90 |         rot_error_each_length =[]
 91 |         tra_error_each_length =[]
 92 |         for error in errors:
 93 |             if abs(error[3]-length)<0.1:
 94 |                 rot_error_each_length.append(error[1])
 95 |                 tra_error_each_length.append(error[2])
 96 | 
 97 |         if len(rot_error_each_length)==0:
 98 |             # import ipdb;ipdb.set_trace()
 99 |             continue
100 |         else:
101 |             rot_errors.append(sum(rot_error_each_length)/len(rot_error_each_length))
102 |             tra_errors.append(sum(tra_error_each_length)/len(tra_error_each_length))
103 |     return np.array(rot_errors)*180/np.pi, tra_errors
104 | 
105 | def evaluate(gt, data,kittitype=True):
106 |     if kittitype:
107 |         lens =  [100,200,300,400,500,600,700,800] #
108 |     else:
109 |         lens = [5,10,15,20,25,30,35,40] #[1,2,3,4,5,6] # 
110 |     errors = calculate_sequence_error(gt, data, lengths=lens)
111 |     rot,tra = calculate_ave_errors(errors, lengths=lens)
112 |     return np.mean(rot), np.mean(tra)
113 | 
114 | def  main():
115 |     # usage: python main.py path_to_ground_truth path_to_predict_pose
116 |     # load and preprocess data
117 |     ground_truth_data  = np.loadtxt(sys.argv[1])
118 |     predict_pose__data = np.loadtxt(sys.argv[2])
119 |     errors = calculate_sequence_error(ground_truth_data,predict_pose__data)
120 |     rot,tra = calculate_ave_errors(errors)
121 |     print(rot,'\n',tra)
122 |     #print(error)
123 |     # evaluate the vo result
124 |     # save and visualization the evaluatation result
125 | 
126 | if __name__ == "__main__":
127 |     main()
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/evaluator/evaluate_ate_scale.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # Modified by Wenshan Wang
  4 | # Modified by Raul Mur-Artal
  5 | # Automatically compute the optimal scale factor for monocular VO/SLAM.
  6 | 
  7 | # Software License Agreement (BSD License)
  8 | #
  9 | # Copyright (c) 2013, Juergen Sturm, TUM
 10 | # All rights reserved.
 11 | #
 12 | # Redistribution and use in source and binary forms, with or without
 13 | # modification, are permitted provided that the following conditions
 14 | # are met:
 15 | #
 16 | #  * Redistributions of source code must retain the above copyright
 17 | #    notice, this list of conditions and the following disclaimer.
 18 | #  * Redistributions in binary form must reproduce the above
 19 | #    copyright notice, this list of conditions and the following
 20 | #    disclaimer in the documentation and/or other materials provided
 21 | #    with the distribution.
 22 | #  * Neither the name of TUM nor the names of its
 23 | #    contributors may be used to endorse or promote products derived
 24 | #    from this software without specific prior written permission.
 25 | #
 26 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 27 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 28 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 29 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 30 | # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 31 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 32 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 33 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 34 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 35 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 36 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 | # POSSIBILITY OF SUCH DAMAGE.
 38 | #
 39 | # Requirements: 
 40 | # sudo apt-get install python-argparse
 41 | 
 42 | """
 43 | This script computes the absolute trajectory error from the ground truth
 44 | trajectory and the estimated trajectory.
 45 | """
 46 | 
 47 | import numpy
 48 | 
 49 | def align(model,data,calc_scale=False):
 50 |     """Align two trajectories using the method of Horn (closed-form).
 51 |     
 52 |     Input:
 53 |     model -- first trajectory (3xn)
 54 |     data -- second trajectory (3xn)
 55 |     
 56 |     Output:
 57 |     rot -- rotation matrix (3x3)
 58 |     trans -- translation vector (3x1)
 59 |     trans_error -- translational error per point (1xn)
 60 |     
 61 |     """
 62 |     numpy.set_printoptions(precision=3,suppress=True)
 63 |     model_zerocentered = model - model.mean(1)
 64 |     data_zerocentered = data - data.mean(1)
 65 |     
 66 |     W = numpy.zeros( (3,3) )
 67 |     for column in range(model.shape[1]):
 68 |         W += numpy.outer(model_zerocentered[:,column],data_zerocentered[:,column])
 69 |     U,d,Vh = numpy.linalg.linalg.svd(W.transpose())
 70 |     S = numpy.matrix(numpy.identity( 3 ))
 71 |     if(numpy.linalg.det(U) * numpy.linalg.det(Vh)<0):
 72 |         S[2,2] = -1
 73 |     rot = U*S*Vh
 74 | 
 75 |     if calc_scale:
 76 |         rotmodel = rot*model_zerocentered
 77 |         dots = 0.0
 78 |         norms = 0.0
 79 |         for column in range(data_zerocentered.shape[1]):
 80 |             dots += numpy.dot(data_zerocentered[:,column].transpose(),rotmodel[:,column])
 81 |             normi = numpy.linalg.norm(model_zerocentered[:,column])
 82 |             norms += normi*normi
 83 |         # s = float(dots/norms)  
 84 |         s = float(norms/dots)
 85 |     else:
 86 |         s = 1.0  
 87 | 
 88 |     # trans = data.mean(1) - s*rot * model.mean(1)
 89 |     # model_aligned = s*rot * model + trans
 90 |     # alignment_error = model_aligned - data
 91 | 
 92 |     # scale the est to the gt, otherwise the ATE could be very small if the est scale is small
 93 |     trans = s*data.mean(1) - rot * model.mean(1)
 94 |     model_aligned = rot * model + trans
 95 |     data_alingned = s * data
 96 |     alignment_error = model_aligned - data_alingned
 97 |     
 98 |     trans_error = numpy.sqrt(numpy.sum(numpy.multiply(alignment_error,alignment_error),0)).A[0]
 99 |         
100 |     return rot,trans,trans_error, s
101 | 
102 | def plot_traj(ax,stamps,traj,style,color,label):
103 |     """
104 |     Plot a trajectory using matplotlib. 
105 |     
106 |     Input:
107 |     ax -- the plot
108 |     stamps -- time stamps (1xn)
109 |     traj -- trajectory (3xn)
110 |     style -- line style
111 |     color -- line color
112 |     label -- plot legend
113 |     
114 |     """
115 |     stamps.sort()
116 |     interval = numpy.median([s-t for s,t in zip(stamps[1:],stamps[:-1])])
117 |     x = []
118 |     y = []
119 |     last = stamps[0]
120 |     for i in range(len(stamps)):
121 |         if stamps[i]-last < 2*interval:
122 |             x.append(traj[i][0])
123 |             y.append(traj[i][1])
124 |         elif len(x)>0:
125 |             ax.plot(x,y,style,color=color,label=label)
126 |             label=""
127 |             x=[]
128 |             y=[]
129 |         last= stamps[i]
130 |     if len(x)>0:
131 |         ax.plot(x,y,style,color=color,label=label)
132 |             
133 | 
134 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*!
 3 |  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
 4 |  *
 5 |  * COPYRIGHT
 6 |  *
 7 |  * All contributions by the University of California:
 8 |  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
 9 |  * All rights reserved.
10 |  *
11 |  * All other contributions:
12 |  * Copyright (c) 2014-2017, the respective contributors
13 |  * All rights reserved.
14 |  *
15 |  * Caffe uses a shared copyright model: each contributor holds copyright over
16 |  * their contributions to Caffe. The project versioning records all such
17 |  * contribution and copyright details. If a contributor wants to further mark
18 |  * their specific copyright on a particular contribution, they should indicate
19 |  * their copyright solely in the commit message of the change when it is
20 |  * committed.
21 |  *
22 |  * LICENSE
23 |  *
24 |  * Redistribution and use in source and binary forms, with or without
25 |  * modification, are permitted provided that the following conditions are met:
26 |  *
27 |  * 1. Redistributions of source code must retain the above copyright notice, this
28 |  * list of conditions and the following disclaimer.
29 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
30 |  * this list of conditions and the following disclaimer in the documentation
31 |  * and/or other materials provided with the distribution.
32 |  *
33 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
34 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
37 |  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
40 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 |  *
44 |  * CONTRIBUTION AGREEMENT
45 |  *
46 |  * By contributing to the BVLC/caffe repository through pull-request, comment,
47 |  * or otherwise, the contributor releases their content to the
48 |  * license and copyright terms herein.
49 |  *
50 |  ***************** END Caffe Copyright Notice and Disclaimer ********************
51 |  *
52 |  * Copyright (c) 2018 Microsoft
53 |  * Licensed under The MIT License [see LICENSE for details]
54 |  * \file modulated_deformable_im2col.h
55 |  * \brief Function definitions of converting an image to
56 |  * column matrix based on kernel, padding, dilation, and offset.
57 |  * These functions are mainly used in deformable convolution operators.
58 |  * \ref: https://arxiv.org/abs/1811.11168
59 |  * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
60 |  */
61 | 
62 | /***************** Adapted by Charles Shang *********************/
63 | // modified from the CUDA version for CPU use by Daniel K. Suhendro
64 | 
65 | #ifndef DCN_V2_IM2COL_CPU
66 | #define DCN_V2_IM2COL_CPU
67 | 
68 | #ifdef __cplusplus
69 | extern "C"
70 | {
71 | #endif
72 | 
73 |   void modulated_deformable_im2col_cpu(const float *data_im, const float *data_offset, const float *data_mask,
74 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
75 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
76 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
77 |                                         const int dilation_h, const int dilation_w,
78 |                                         const int deformable_group, float *data_col);
79 | 
80 |   void modulated_deformable_col2im_cpu(const float *data_col, const float *data_offset, const float *data_mask,
81 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
82 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
83 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
84 |                                         const int dilation_h, const int dilation_w,
85 |                                         const int deformable_group, float *grad_im);
86 | 
87 |   void modulated_deformable_col2im_coord_cpu(const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
88 |                                          const int batch_size, const int channels, const int height_im, const int width_im,
89 |                                          const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
90 |                                          const int pad_h, const int pad_w, const int stride_h, const int stride_w,
91 |                                          const int dilation_h, const int dilation_w,
92 |                                          const int deformable_group,
93 |                                          float *grad_offset, float *grad_mask);
94 | 
95 | #ifdef __cplusplus
96 | }
97 | #endif
98 | 
99 | #endif


--------------------------------------------------------------------------------
/evaluator/evaluate_rpe.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # Software License Agreement (BSD License)
  3 | #
  4 | # Modified by Wenshan Wang
  5 | # Copyright (c) 2013, Juergen Sturm, TUM
  6 | # All rights reserved.
  7 | #
  8 | # Redistribution and use in source and binary forms, with or without
  9 | # modification, are permitted provided that the following conditions
 10 | # are met:
 11 | #
 12 | #  * Redistributions of source code must retain the above copyright
 13 | #    notice, this list of conditions and the following disclaimer.
 14 | #  * Redistributions in binary form must reproduce the above
 15 | #    copyright notice, this list of conditions and the following
 16 | #    disclaimer in the documentation and/or other materials provided
 17 | #    with the distribution.
 18 | #  * Neither the name of TUM nor the names of its
 19 | #    contributors may be used to endorse or promote products derived
 20 | #    from this software without specific prior written permission.
 21 | #
 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 25 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 26 | # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 27 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 28 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 29 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 30 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 31 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 32 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 33 | # POSSIBILITY OF SUCH DAMAGE.
 34 | 
 35 | """
 36 | This script computes the relative pose error from the ground truth trajectory
 37 | and the estimated trajectory.
 38 | """
 39 | 
 40 | import random
 41 | import numpy as np
 42 | import sys
 43 | 
 44 | def ominus(a,b):
 45 |     """
 46 |     Compute the relative 3D transformation between a and b.
 47 |     
 48 |     Input:
 49 |     a -- first pose (homogeneous 4x4 matrix)
 50 |     b -- second pose (homogeneous 4x4 matrix)
 51 |     
 52 |     Output:
 53 |     Relative 3D transformation from a to b.
 54 |     """
 55 |     return np.dot(np.linalg.inv(a),b)
 56 | 
 57 | def compute_distance(transform):
 58 |     """
 59 |     Compute the distance of the translational component of a 4x4 homogeneous matrix.
 60 |     """
 61 |     return np.linalg.norm(transform[0:3,3])
 62 | 
 63 | def compute_angle(transform):
 64 |     """
 65 |     Compute the rotation angle from a 4x4 homogeneous matrix.
 66 |     """
 67 |     # an invitation to 3-d vision, p 27
 68 |     return np.arccos( min(1,max(-1, (np.trace(transform[0:3,0:3]) - 1)/2) ))
 69 | 
 70 | def distances_along_trajectory(traj):
 71 |     """
 72 |     Compute the translational distances along a trajectory. 
 73 |     """
 74 |     motion = [ominus(traj[i+1],traj[i]) for i in range(len(traj)-1)]
 75 |     distances = [0]
 76 |     sum = 0
 77 |     for t in motion:
 78 |         sum += compute_distance(t)
 79 |         distances.append(sum)
 80 |     return distances
 81 |     
 82 | 
 83 | def evaluate_trajectory(traj_gt, traj_est, param_max_pairs=10000, param_fixed_delta=False,
 84 |                         param_delta=1.00):
 85 |     """
 86 |     Compute the relative pose error between two trajectories.
 87 |     
 88 |     Input:
 89 |     traj_gt -- the first trajectory (ground truth)
 90 |     traj_est -- the second trajectory (estimated trajectory)
 91 |     param_max_pairs -- number of relative poses to be evaluated
 92 |     param_fixed_delta -- false: evaluate over all possible pairs
 93 |                          true: only evaluate over pairs with a given distance (delta)
 94 |     param_delta -- distance between the evaluated pairs
 95 |     param_delta_unit -- unit for comparison:
 96 |                         "s": seconds
 97 |                         "m": meters
 98 |                         "rad": radians
 99 |                         "deg": degrees
100 |                         "f": frames
101 |     param_offset -- time offset between two trajectories (to model the delay)
102 |     param_scale -- scale to be applied to the second trajectory
103 |     
104 |     Output:
105 |     list of compared poses and the resulting translation and rotation error
106 |     """
107 |     
108 |     if not param_fixed_delta:
109 |         if(param_max_pairs==0 or len(traj_est)<np.sqrt(param_max_pairs)):
110 |             pairs = [(i,j) for i in range(len(traj_est)) for j in range(len(traj_est))]
111 |         else:
112 |             pairs = [(random.randint(0,len(traj_est)-1),random.randint(0,len(traj_est)-1)) for i in range(param_max_pairs)]
113 |     else:
114 |         pairs = []
115 |         for i in range(len(traj_est)):
116 |             j = i + param_delta
117 |             if j < len(traj_est): 
118 |                 pairs.append((i,j))
119 |         if(param_max_pairs!=0 and len(pairs)>param_max_pairs):
120 |             pairs = random.sample(pairs,param_max_pairs)
121 |         
122 |     result = []
123 |     for i,j in pairs:
124 |         
125 |         error44 = ominus(  ominus( traj_est[j], traj_est[i] ),
126 |                            ominus( traj_gt[j], traj_gt[i] ) )
127 |         
128 |         trans = compute_distance(error44)
129 |         rot = compute_angle(error44)
130 |         
131 |         result.append([i,j,trans,rot])
132 |         
133 |     if len(result)<2:
134 |         raise Exception("Couldn't find pairs between groundtruth and estimated trajectory!")
135 |         
136 |     return result
137 | 
138 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_im2col_cuda.h:
--------------------------------------------------------------------------------
  1 | 
  2 | /*!
  3 |  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  4 |  *
  5 |  * COPYRIGHT
  6 |  *
  7 |  * All contributions by the University of California:
  8 |  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
  9 |  * All rights reserved.
 10 |  *
 11 |  * All other contributions:
 12 |  * Copyright (c) 2014-2017, the respective contributors
 13 |  * All rights reserved.
 14 |  *
 15 |  * Caffe uses a shared copyright model: each contributor holds copyright over
 16 |  * their contributions to Caffe. The project versioning records all such
 17 |  * contribution and copyright details. If a contributor wants to further mark
 18 |  * their specific copyright on a particular contribution, they should indicate
 19 |  * their copyright solely in the commit message of the change when it is
 20 |  * committed.
 21 |  *
 22 |  * LICENSE
 23 |  *
 24 |  * Redistribution and use in source and binary forms, with or without
 25 |  * modification, are permitted provided that the following conditions are met:
 26 |  *
 27 |  * 1. Redistributions of source code must retain the above copyright notice, this
 28 |  * list of conditions and the following disclaimer.
 29 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 30 |  * this list of conditions and the following disclaimer in the documentation
 31 |  * and/or other materials provided with the distribution.
 32 |  *
 33 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 34 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 35 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 36 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 37 |  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 38 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 39 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 40 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 41 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 42 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 43 |  *
 44 |  * CONTRIBUTION AGREEMENT
 45 |  *
 46 |  * By contributing to the BVLC/caffe repository through pull-request, comment,
 47 |  * or otherwise, the contributor releases their content to the
 48 |  * license and copyright terms herein.
 49 |  *
 50 |  ***************** END Caffe Copyright Notice and Disclaimer ********************
 51 |  *
 52 |  * Copyright (c) 2018 Microsoft
 53 |  * Licensed under The MIT License [see LICENSE for details]
 54 |  * \file modulated_deformable_im2col.h
 55 |  * \brief Function definitions of converting an image to
 56 |  * column matrix based on kernel, padding, dilation, and offset.
 57 |  * These functions are mainly used in deformable convolution operators.
 58 |  * \ref: https://arxiv.org/abs/1811.11168
 59 |  * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
 60 |  */
 61 | 
 62 | /***************** Adapted by Charles Shang *********************/
 63 | 
 64 | #ifndef DCN_V2_IM2COL_CUDA
 65 | #define DCN_V2_IM2COL_CUDA
 66 | 
 67 | #ifdef __cplusplus
 68 | extern "C"
 69 | {
 70 | #endif
 71 | 
 72 |   void modulated_deformable_im2col_cuda(cudaStream_t stream,
 73 |                                         const float *data_im, const float *data_offset, const float *data_mask,
 74 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 75 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 76 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 77 |                                         const int dilation_h, const int dilation_w,
 78 |                                         const int deformable_group, float *data_col);
 79 | 
 80 |   void modulated_deformable_col2im_cuda(cudaStream_t stream,
 81 |                                         const float *data_col, const float *data_offset, const float *data_mask,
 82 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 83 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 84 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 85 |                                         const int dilation_h, const int dilation_w,
 86 |                                         const int deformable_group, float *grad_im);
 87 | 
 88 |   void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
 89 |                                          const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
 90 |                                          const int batch_size, const int channels, const int height_im, const int width_im,
 91 |                                          const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 92 |                                          const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 93 |                                          const int dilation_h, const int dilation_w,
 94 |                                          const int deformable_group,
 95 |                                          float *grad_offset, float *grad_mask);
 96 | 
 97 | #ifdef __cplusplus
 98 | }
 99 | #endif
100 | 
101 | #endif


--------------------------------------------------------------------------------
/evaluator/trajectory_transform.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>
  2 | # For License information please see the LICENSE file in the root directory.
  3 | 
  4 | import numpy as np
  5 | from .transformation import pos_quats2SE_matrices, SE2pos_quat, pose2motion, motion2pose
  6 | 
  7 | def shift0(traj): 
  8 |     '''
  9 |     Traj: a list of [t + quat]
 10 |     Return: translate and rotate the traj
 11 |     '''
 12 |     traj_ses = pos_quats2SE_matrices(np.array(traj))
 13 |     traj_init = traj_ses[0]
 14 |     traj_init_inv = np.linalg.inv(traj_init)
 15 |     new_traj = []
 16 |     for tt in traj_ses:
 17 |         ttt=traj_init_inv.dot(tt)
 18 |         new_traj.append(SE2pos_quat(ttt))
 19 |     return np.array(new_traj)
 20 | 
 21 | def ned2cam(traj):
 22 |     '''
 23 |     transfer a ned traj to camera frame traj
 24 |     '''
 25 |     T = np.array([[0,1,0,0],
 26 |                   [0,0,1,0],
 27 |                   [1,0,0,0],
 28 |                   [0,0,0,1]], dtype=np.float32) 
 29 |     T_inv = np.linalg.inv(T)
 30 |     new_traj = []
 31 |     traj_ses = pos_quats2SE_matrices(np.array(traj))
 32 | 
 33 |     for tt in traj_ses:
 34 |         ttt=T.dot(tt).dot(T_inv)
 35 |         new_traj.append(SE2pos_quat(ttt))
 36 |         
 37 |     return np.array(new_traj)
 38 | 
 39 | def cam2ned(traj):
 40 |     '''
 41 |     transfer a camera traj to ned frame traj
 42 |     '''
 43 |     T = np.array([[0,0,1,0],
 44 |                   [1,0,0,0],
 45 |                   [0,1,0,0],
 46 |                   [0,0,0,1]], dtype=np.float32) 
 47 |     T_inv = np.linalg.inv(T)
 48 |     new_traj = []
 49 |     traj_ses = pos_quats2SE_matrices(np.array(traj))
 50 | 
 51 |     for tt in traj_ses:
 52 |         ttt=T.dot(tt).dot(T_inv)
 53 |         new_traj.append(SE2pos_quat(ttt))
 54 |         
 55 |     return np.array(new_traj)
 56 | 
 57 | 
 58 | def trajectory_transform(gt_traj, est_traj):
 59 |     '''
 60 |     1. center the start frame to the axis origin
 61 |     2. align the GT frame (NED) with estimation frame (camera)
 62 |     '''
 63 |     gt_traj_trans = shift0(gt_traj)
 64 |     est_traj_trans = shift0(est_traj)
 65 | 
 66 |     # gt_traj_trans = ned2cam(gt_traj_trans)
 67 |     # est_traj_trans = cam2ned(est_traj_trans)
 68 | 
 69 |     return gt_traj_trans, est_traj_trans
 70 | 
 71 | def rescale_bk(poses_gt, poses):
 72 |     motion_gt = pose2motion(poses_gt)
 73 |     motion    = pose2motion(poses)
 74 |     
 75 |     speed_square_gt = np.sum(motion_gt[:,0:3,3]*motion_gt[:,0:3,3],1)
 76 |     speed_gt = np.sqrt(speed_square_gt)
 77 |     speed_square    = np.sum(motion[:,0:3,3]*motion[:,0:3,3],1)
 78 |     speed = np.sqrt(speed_square)
 79 |     # when the speed is small, the scale could become very large
 80 |     # import ipdb;ipdb.set_trace()
 81 |     mask = (speed_gt>0.0001) # * (speed>0.00001)
 82 |     scale = np.mean((speed[mask])/speed_gt[mask])
 83 |     scale = 1.0/scale
 84 |     motion[:,0:3,3] = motion[:,0:3,3]*scale
 85 |     pose_update = motion2pose(motion)
 86 |     return  pose_update, scale
 87 | 
 88 | def pose2trans(pose_data):
 89 |     data_size = len(pose_data)
 90 |     trans = []
 91 |     for i in range(0,data_size-1):
 92 |         tran = np.array(pose_data[i+1][:3]) - np.array(pose_data[i][:3]) # np.linalg.inv(data[i]).dot(data[i+1])
 93 |         trans.append(tran)
 94 | 
 95 |     return np.array(trans) # N x 3
 96 | 
 97 | 
 98 | def rescale(poses_gt, poses):
 99 |     '''
100 |     similar to rescale
101 |     poses_gt/poses: N x 7 poselist in quaternion format
102 |     '''
103 |     trans_gt = pose2trans(poses_gt)
104 |     trans    = pose2trans(poses)
105 |     
106 |     speed_square_gt = np.sum(trans_gt*trans_gt,1)
107 |     speed_gt = np.sqrt(speed_square_gt)
108 |     speed_square    = np.sum(trans*trans,1)
109 |     speed = np.sqrt(speed_square)
110 |     # when the speed is small, the scale could become very large
111 |     # import ipdb;ipdb.set_trace()
112 |     mask = (speed_gt>0.0001) # * (speed>0.00001)
113 |     scale = np.mean((speed[mask])/speed_gt[mask])
114 |     scale = 1.0/scale
115 |     poses[:,0:3] = poses[:,0:3]*scale
116 |     return  poses, scale
117 | 
118 | def trajectory_scale(traj, scale):
119 |     for ttt in traj:
120 |         ttt[0:3,3] = ttt[0:3,3]*scale
121 |     return traj
122 |  
123 | def timestamp_associate(first_list, second_list, max_difference):
124 |     """
125 |     Associate two trajectory of [stamp,data]. As the time stamps never match exactly, we aim 
126 |     to find the closest match for every input tuple.
127 |     
128 |     Input:
129 |     first_list -- first list of (stamp,data)
130 |     second_list -- second list of (stamp,data)
131 |     max_difference -- search radius for candidate generation
132 | 
133 |     Output:
134 |     first_res: matched data from the first list
135 |     second_res: matched data from the second list
136 |     
137 |     """
138 |     first_dict = dict([(l[0],l[1:]) for l in first_list if len(l)>1])
139 |     second_dict = dict([(l[0],l[1:]) for l in second_list if len(l)>1])
140 | 
141 |     first_keys = first_dict.keys()
142 |     second_keys = second_dict.keys()
143 |     potential_matches = [(abs(a - b ), a, b) 
144 |                          for a in first_keys 
145 |                          for b in second_keys 
146 |                          if abs(a - b) < max_difference]
147 |     potential_matches.sort()
148 |     matches = []
149 |     for diff, a, b in potential_matches:
150 |         if a in first_keys and b in second_keys:
151 |             first_keys.remove(a)
152 |             second_keys.remove(b)
153 |             matches.append((a, b))
154 |     
155 |     matches.sort()
156 | 
157 |     first_res = []
158 |     second_res = []
159 |     for t1, t2 in matches:
160 |         first_res.append(first_dict[t1])
161 |         second_res.append(second_dict[t2])
162 |     return np.array(first_res), np.array(second_res)
163 | 


--------------------------------------------------------------------------------
/Network/VOFlowNet.py:
--------------------------------------------------------------------------------
  1 | # Software License Agreement (BSD License)
  2 | #
  3 | # Copyright (c) 2020, Wenshan Wang, CMU
  4 | # All rights reserved.
  5 | #
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions
  8 | # are met:
  9 | #
 10 | #  * Redistributions of source code must retain the above copyright
 11 | #    notice, this list of conditions and the following disclaimer.
 12 | #  * Redistributions in binary form must reproduce the above
 13 | #    copyright notice, this list of conditions and the following
 14 | #    disclaimer in the documentation and/or other materials provided
 15 | #    with the distribution.
 16 | #  * Neither the name of CMU nor the names of its
 17 | #    contributors may be used to endorse or promote products derived
 18 | #    from this software without specific prior written permission.
 19 | #
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 23 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 24 | # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 25 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 26 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 27 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 29 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 30 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 31 | # POSSIBILITY OF SUCH DAMAGE.
 32 | 
 33 | import torch 
 34 | import torch.nn as nn
 35 | import torch.nn.functional as F
 36 | import math
 37 | 
 38 | def conv(in_planes, out_planes, kernel_size=3, stride=2, padding=1, dilation=1, bn_layer=False, bias=True):
 39 |     if bn_layer:
 40 |         return nn.Sequential(
 41 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation, bias=bias),
 42 |             nn.BatchNorm2d(out_planes),
 43 |             nn.ReLU(inplace=True)
 44 |         )
 45 |     else: 
 46 |         return nn.Sequential(
 47 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation),
 48 |             nn.ReLU(inplace=True)
 49 |         )
 50 | 
 51 | def linear(in_planes, out_planes):
 52 |     return nn.Sequential(
 53 |         nn.Linear(in_planes, out_planes), 
 54 |         nn.ReLU(inplace=True)
 55 |         )
 56 | 
 57 | class BasicBlock(nn.Module):
 58 |     expansion = 1
 59 |     def __init__(self, inplanes, planes, stride, downsample, pad, dilation):
 60 |         super(BasicBlock, self).__init__()
 61 | 
 62 |         self.conv1 = conv(inplanes, planes, 3, stride, pad, dilation)
 63 |         self.conv2 = nn.Conv2d(planes, planes, 3, 1, pad, dilation)
 64 | 
 65 |         self.downsample = downsample
 66 |         self.stride = stride
 67 | 
 68 |     def forward(self, x):
 69 |         out = self.conv1(x)
 70 |         out = self.conv2(out)
 71 | 
 72 |         if self.downsample is not None:
 73 |             x = self.downsample(x)
 74 |         out += x
 75 | 
 76 |         return F.relu(out, inplace=True)
 77 | 
 78 | class VOFlowRes(nn.Module):
 79 |     def __init__(self):
 80 |         super(VOFlowRes, self).__init__()
 81 |         inputnum = 5
 82 |         blocknums = [2,2,3,4,6,7,3]
 83 |         outputnums = [32,64,64,128,128,256,256]
 84 | 
 85 |         self.firstconv = nn.Sequential(conv(inputnum, 32, 3, 2, 1, 1, False),
 86 |                                        conv(32, 32, 3, 1, 1, 1),
 87 |                                        conv(32, 32, 3, 1, 1, 1))
 88 | 
 89 |         self.inplanes = 32
 90 | 
 91 |         self.layer1 = self._make_layer(BasicBlock, outputnums[2], blocknums[2], 2, 1, 1) # 40 x 28
 92 |         self.layer2 = self._make_layer(BasicBlock, outputnums[3], blocknums[3], 2, 1, 1) # 20 x 14
 93 |         self.layer3 = self._make_layer(BasicBlock, outputnums[4], blocknums[4], 2, 1, 1) # 10 x 7
 94 |         self.layer4 = self._make_layer(BasicBlock, outputnums[5], blocknums[5], 2, 1, 1) # 5 x 4
 95 |         self.layer5 = self._make_layer(BasicBlock, outputnums[6], blocknums[6], 2, 1, 1) # 3 x 2
 96 |         fcnum = outputnums[6] * 6
 97 | 
 98 |         fc1_trans = linear(fcnum, 128)
 99 |         fc2_trans = linear(128,32)
100 |         fc3_trans = nn.Linear(32,3)
101 | 
102 |         fc1_rot = linear(fcnum, 128)
103 |         fc2_rot = linear(128,32)
104 |         fc3_rot = nn.Linear(32,3)
105 | 
106 | 
107 |         self.voflow_trans = nn.Sequential(fc1_trans, fc2_trans, fc3_trans)
108 |         self.voflow_rot = nn.Sequential(fc1_rot, fc2_rot, fc3_rot)
109 | 
110 | 
111 |     def _make_layer(self, block, planes, blocks, stride, pad, dilation):
112 |         downsample = None
113 |         if stride != 1 or self.inplanes != planes * block.expansion:
114 |            downsample = nn.Conv2d(self.inplanes, planes * block.expansion,
115 |                           kernel_size=1, stride=stride)
116 | 
117 |         layers = []
118 |         layers.append(block(self.inplanes, planes, stride, downsample, pad, dilation))
119 |         self.inplanes = planes * block.expansion
120 |         for i in range(1, blocks):
121 |             layers.append(block(self.inplanes, planes,1,None,pad,dilation))
122 | 
123 |         return nn.Sequential(*layers)
124 | 
125 |     def forward(self, x):
126 |         x = self.firstconv(x)
127 |         x = self.layer1(x)
128 |         x = self.layer2(x)
129 |         x = self.layer3(x)
130 |         x = self.layer4(x)
131 |         x = self.layer5(x)
132 |         
133 |         x = x.view(x.shape[0], -1)
134 |         x_trans = self.voflow_trans(x)
135 |         x_rot = self.voflow_rot(x)
136 |         return torch.cat((x_trans, x_rot), dim=1)


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: dytanvo
  2 | channels:
  3 |   - pytorch3d
  4 |   - pytorch
  5 |   - conda-forge
  6 |   - defaults
  7 | dependencies:
  8 |   - _libgcc_mutex=0.1=main
  9 |   - blas=1.0=mkl
 10 |   - bzip2=1.0.8=h516909a_3
 11 |   - ca-certificates=2022.5.18.1=ha878542_0
 12 |   - cairo=1.14.12=h8948797_3
 13 |   - certifi=2022.5.18.1=py38h578d9bd_0
 14 |   - colorama=0.4.4=pyh9f0ad1d_0
 15 |   - cudatoolkit=10.2.89=hfd86e86_1
 16 |   - cudatoolkit-dev=11.4.0=py38h497a2fe_1
 17 |   - cycler=0.10.0=py38_0
 18 |   - dbus=1.13.18=hb2f20db_0
 19 |   - expat=2.2.10=he6710b0_2
 20 |   - ffmpeg=4.0.2=ha0c5888_2
 21 |   - fontconfig=2.13.0=h9420a91_0
 22 |   - freeglut=3.2.1=h58526e2_0
 23 |   - freetype=2.10.4=h5ab3b9f_0
 24 |   - glib=2.63.1=h5a9c865_0
 25 |   - gmp=6.1.2=hf484d3e_1000
 26 |   - gnutls=3.5.19=h2a4e5f8_1
 27 |   - graphite2=1.3.13=h58526e2_1001
 28 |   - gst-plugins-base=1.14.0=hbbd80ab_1
 29 |   - gstreamer=1.14.0=hb453b48_1
 30 |   - harfbuzz=1.8.8=hffaf4a1_0
 31 |   - hdf5=1.10.2=hc401514_3
 32 |   - icu=58.2=he6710b0_3
 33 |   - intel-openmp=2020.2=254
 34 |   - jasper=2.0.14=h07fcdf6_1
 35 |   - jpeg=9b=h024ee3a_2
 36 |   - kiwisolver=1.3.0=py38h2531618_0
 37 |   - kornia=0.5.3=pyhd8ed1ab_0
 38 |   - lcms2=2.11=h396b838_0
 39 |   - ld_impl_linux-64=2.33.1=h53a641e_7
 40 |   - libedit=3.1.20191231=h14c3975_1
 41 |   - libffi=3.2.1=hf484d3e_1007
 42 |   - libgcc-ng=9.1.0=hdf63c60_0
 43 |   - libgfortran=3.0.0=1
 44 |   - libgfortran-ng=7.3.0=hdf63c60_0
 45 |   - libglu=9.0.0=he1b5a44_1001
 46 |   - libiconv=1.16=h516909a_0
 47 |   - libopencv=3.4.2=hb342d67_1
 48 |   - libpng=1.6.37=hbc83047_0
 49 |   - libstdcxx-ng=9.1.0=hdf63c60_0
 50 |   - libtiff=4.1.0=h2733197_1
 51 |   - libuuid=1.0.3=h1bed415_2
 52 |   - libuv=1.40.0=h7b6447c_0
 53 |   - libxcb=1.14=h7b6447c_0
 54 |   - libxml2=2.9.10=hb55368b_3
 55 |   - lz4-c=1.9.2=heb0550a_3
 56 |   - matplotlib=3.3.2=0
 57 |   - matplotlib-base=3.3.2=py38h817c723_0
 58 |   - mkl=2020.2=256
 59 |   - mkl-service=2.3.0=py38he904b0f_0
 60 |   - mkl_fft=1.2.0=py38h23d657b_0
 61 |   - mkl_random=1.1.1=py38h0573a6f_0
 62 |   - ncurses=6.2=he6710b0_1
 63 |   - nettle=3.3=0
 64 |   - ninja=1.10.2=py38hff7bd54_0
 65 |   - olefile=0.46=py_0
 66 |   - openh264=1.8.0=hdbcaa40_1000
 67 |   - openssl=1.1.1h=h516909a_0
 68 |   - pcre=8.44=he6710b0_0
 69 |   - pillow=8.0.1=py38he98fc37_0
 70 |   - pip=20.3=py38h06a4308_0
 71 |   - pixman=0.40.0=h36c2ea0_0
 72 |   - pyparsing=2.4.7=py_0
 73 |   - pyqt=5.9.2=py38h05f1152_4
 74 |   - python=3.8.3=cpython_he5300dc_0
 75 |   - python-dateutil=2.8.2=pyhd8ed1ab_0
 76 |   - python_abi=3.8=1_cp38
 77 |   - pytorch=1.7.0=py3.8_cuda10.2.89_cudnn7.6.5_0
 78 |   - pytorch3d=0.3.0=py38_cu102_pyt170
 79 |   - pyyaml=5.3.1=py38h8df0ef7_1
 80 |   - qt=5.9.7=h5867ecd_1
 81 |   - readline=8.0=h7b6447c_0
 82 |   - scipy=1.5.2=py38h0b6359f_0
 83 |   - setuptools=50.3.2=py38h06a4308_2
 84 |   - sip=4.19.13=py38he6710b0_0
 85 |   - six=1.15.0=py38h06a4308_0
 86 |   - sqlite=3.33.0=h62c20be_0
 87 |   - termcolor=1.1.0=py_2
 88 |   - tk=8.6.10=hbc83047_0
 89 |   - torchaudio=0.7.0=py38
 90 |   - torchvision=0.8.1=py38_cu102
 91 |   - tornado=6.1=py38h27cfd23_0
 92 |   - typing_extensions=4.3.0=pyha770c72_0
 93 |   - wheel=0.36.0=pyhd3eb1b0_0
 94 |   - x264=1!152.20180806=h14c3975_0
 95 |   - xorg-fixesproto=5.0=h14c3975_1002
 96 |   - xorg-inputproto=2.3.2=h14c3975_1002
 97 |   - xorg-kbproto=1.0.7=h14c3975_1002
 98 |   - xorg-libx11=1.6.12=h516909a_0
 99 |   - xorg-libxau=1.0.9=h14c3975_0
100 |   - xorg-libxext=1.3.4=h516909a_0
101 |   - xorg-libxfixes=5.0.3=h516909a_1004
102 |   - xorg-libxi=1.7.10=h516909a_0
103 |   - xorg-xextproto=7.3.0=h14c3975_1002
104 |   - xorg-xproto=7.0.31=h14c3975_1007
105 |   - xz=5.2.5=h7b6447c_0
106 |   - yacs=0.1.8=pyhd8ed1ab_0
107 |   - yaml=0.2.5=h516909a_0
108 |   - zlib=1.2.11=h7b6447c_3
109 |   - zstd=1.4.5=h9ceee32_0
110 |   - pip:
111 |     - absl-py==0.11.0
112 |     - antlr4-python3-runtime==4.9.3
113 |     - appdirs==1.4.4
114 |     - beautifulsoup4==4.11.1
115 |     - black==21.4b2
116 |     - cachetools==4.1.1
117 |     - chardet==3.0.4
118 |     - charset-normalizer==2.1.1
119 |     - cloudpickle==1.6.0
120 |     - cupy-cuda102==11.1.0
121 |     - cython==0.29.21
122 |     - data==0.4
123 |     - dataclasses==0.6
124 |     - dcnv2==0.1
125 |     - decorator==5.1.1
126 |     - detectron2==0.5+cu102
127 |     - fastrlock==0.8
128 |     - filelock==3.8.0
129 |     - funcsigs==1.0.2
130 |     - future==0.18.2
131 |     - fvcore==0.1.2.post20201122
132 |     - gdown==4.5.1
133 |     - google-auth==1.23.0
134 |     - google-auth-oauthlib==0.4.2
135 |     - grpcio==1.34.0
136 |     - hydra-core==1.2.0
137 |     - idna==2.10
138 |     - imageio==2.9.0
139 |     - importlib-resources==5.9.0
140 |     - iopath==0.1.8
141 |     - joblib==0.17.0
142 |     - jsonpatch==1.32
143 |     - jsonpointer==2.3
144 |     - latex==0.7.0
145 |     - lxml==4.9.1
146 |     - markdown==3.3.3
147 |     - mypy-extensions==0.4.3
148 |     - ngransac==0.0.0
149 |     - numpy==1.23.2
150 |     - oauthlib==3.1.0
151 |     - omegaconf==2.2.3
152 |     - opencv-python==4.4.0.46
153 |     - packaging==21.3
154 |     - pathspec==0.10.1
155 |     - portalocker==2.0.0
156 |     - protobuf==3.14.0
157 |     - pyasn1==0.4.8
158 |     - pyasn1-modules==0.2.8
159 |     - pycocotools==2.0.4
160 |     - pydot==1.4.1
161 |     - pypng==0.0.20
162 |     - pysocks==1.7.1
163 |     - pytransform3d==1.14.0
164 |     - pyzmq==23.2.1
165 |     - regex==2022.8.17
166 |     - requests==2.25.0
167 |     - requests-oauthlib==1.3.0
168 |     - rsa==4.6
169 |     - shutilwhich==1.1.0
170 |     - soupsieve==2.3.2.post1
171 |     - splines==0.2.0
172 |     - tabulate==0.8.7
173 |     - tempdir==0.7.1
174 |     - tensorboard==2.4.0
175 |     - tensorboard-data-server==0.6.1
176 |     - tensorboard-plugin-wit==1.7.0
177 |     - timm==0.6.7
178 |     - toml==0.10.2
179 |     - torchfile==0.1.0
180 |     - tqdm==4.54.0
181 |     - trimesh==3.9.3
182 |     - urllib3==1.26.2
183 |     - visdom==0.1.8.9
184 |     - websocket-client==1.4.0
185 |     - werkzeug==1.0.1
186 |     - workflow==1.0
187 |     - zipp==3.8.1
188 | prefix: /home/shihao/miniconda3/envs/rigidmask_v0
189 | 


--------------------------------------------------------------------------------
/vo_trajectory_from_folder.py:
--------------------------------------------------------------------------------
  1 | from torch.utils.data import DataLoader
  2 | from Datasets.utils import ToTensor, Compose, CropCenter, ResizeData, dataset_intrinsics, DownscaleFlow
  3 | from Datasets.utils import plot_traj, visflow, load_kiiti_intrinsics, load_sceneflow_extrinsics
  4 | from Datasets.tartanTrajFlowDataset import TrajFolderDataset
  5 | from evaluator.transformation import pose_quats2motion_ses, motion_ses2pose_quats
  6 | from evaluator.tartanair_evaluator import TartanAirEvaluator
  7 | from evaluator.evaluator_base import per_frame_scale_alignment
  8 | from DytanVO import DytanVO
  9 | 
 10 | import argparse
 11 | import numpy as np
 12 | import cv2
 13 | from os import mkdir
 14 | from os.path import isdir
 15 | 
 16 | def get_args():
 17 |     parser = argparse.ArgumentParser(description='Inference code of DytanVO')
 18 | 
 19 |     parser.add_argument('--batch-size', type=int, default=1,
 20 |                         help='batch size (default: 1)')
 21 |     parser.add_argument('--worker-num', type=int, default=1,
 22 |                         help='data loader worker number (default: 1)')
 23 |     parser.add_argument('--image-width', type=int, default=640,
 24 |                         help='image width (default: 640)')
 25 |     parser.add_argument('--image-height', type=int, default=448,
 26 |                         help='image height (default: 448)')
 27 |     parser.add_argument('--vo-model-name', default='',
 28 |                         help='name of pretrained VO model (default: "")')
 29 |     parser.add_argument('--flow-model-name', default='',
 30 |                         help='name of pretrained flow model (default: "")')
 31 |     parser.add_argument('--pose-model-name', default='',
 32 |                         help='name of pretrained pose model (default: "")')
 33 |     parser.add_argument('--seg-model-name', default='',
 34 |                         help='name of pretrained segmentation model (default: "")')
 35 |     parser.add_argument('--airdos', action='store_true', default=False,
 36 |                         help='airdos test (default: False)')
 37 |     parser.add_argument('--rs_d435', action='store_true', default=False,
 38 |                         help='realsense d435i test (default: False)')
 39 |     parser.add_argument('--sceneflow', action='store_true', default=False,
 40 |                         help='sceneflow test (default: False)')
 41 |     parser.add_argument('--kitti', action='store_true', default=False,
 42 |                         help='kitti test (default: False)')
 43 |     parser.add_argument('--commaai', action='store_true', default=False,
 44 |                         help='commaai test (default: False)')
 45 |     parser.add_argument('--kitti-intrinsics-file',  default='',
 46 |                         help='kitti intrinsics file calib.txt (default: )')
 47 |     parser.add_argument('--test-dir', default='',
 48 |                         help='test trajectory folder where the RGB images are (default: "")')
 49 |     parser.add_argument('--pose-file', default='',
 50 |                         help='test trajectory gt pose file, used for scale calculation, and visualization (default: "")')
 51 |     parser.add_argument('--save-flow', action='store_true', default=False,
 52 |                         help='save optical flow (default: False)')
 53 |     parser.add_argument('--seg-thresh', type=float, default=0.7,
 54 |                         help='threshold for motion segmentation')
 55 |     parser.add_argument('--iter-num', type=int, default=2,
 56 |                         help='number of iterations')
 57 | 
 58 |     args = parser.parse_args()
 59 | 
 60 |     return args
 61 | 
 62 | 
 63 | if __name__ == '__main__':
 64 |     args = get_args()
 65 | 
 66 |     testvo = DytanVO(args.vo_model_name, args.seg_model_name, args.image_height, args.image_width, 
 67 |                     args.kitti, args.flow_model_name, args.pose_model_name)
 68 | 
 69 |     # load trajectory data from a folder
 70 |     if args.kitti:
 71 |         datastr = 'kitti'
 72 |     elif args.airdos:
 73 |         datastr = 'airdos'
 74 |     elif args.rs_d435:
 75 |         datastr = 'rs_d435'
 76 |     elif args.sceneflow:
 77 |         datastr = 'sceneflow'
 78 |     elif args.commaai:
 79 |         datastr = 'commaai'
 80 |     else:
 81 |         datastr = 'tartanair'
 82 |     focalx, focaly, centerx, centery, baseline = dataset_intrinsics(datastr, '15mm' in args.test_dir) 
 83 |     if args.kitti_intrinsics_file.endswith('.txt') and datastr == 'kitti':
 84 |         focalx, focaly, centerx, centery, baseline = load_kiiti_intrinsics(args.kitti_intrinsics_file)
 85 | 
 86 |     if datastr == 'kitti':
 87 |         transform = Compose([ResizeData((args.image_height, 1226)), CropCenter((args.image_height, args.image_width)), DownscaleFlow(), ToTensor()])
 88 |     else:
 89 |         transform = Compose([CropCenter((args.image_height, args.image_width)), DownscaleFlow(), ToTensor()])
 90 | 
 91 |     testDataset = TrajFolderDataset(args.test_dir, transform=transform, 
 92 |                                         focalx=focalx, focaly=focaly, centerx=centerx, centery=centery)
 93 |     testDataloader = DataLoader(testDataset, batch_size=args.batch_size, 
 94 |                                         shuffle=False, num_workers=args.worker_num)
 95 |     testDataiter = iter(testDataloader)
 96 | 
 97 |     motionlist = []
 98 |     testname = datastr + '_' + args.vo_model_name.split('.')[0] + '_' + args.test_dir.split('/')[-1]
 99 |     if args.save_flow:
100 |         flowdir = 'results/'+testname+'_flow'
101 |         if not isdir(flowdir):
102 |             mkdir(flowdir)
103 |         flowcount = 0
104 |     while True:
105 |         try:
106 |             sample = testDataiter.next()
107 |         except StopIteration:
108 |             break
109 | 
110 |         motion, flow = testvo.test_batch(sample, [focalx, centerx, centery, baseline], args.seg_thresh, args.iter_num)
111 |         motionlist.append(motion)
112 | 
113 |         if args.save_flow:
114 |             for k in range(flow.shape[0]):
115 |                 flowk = flow[k].transpose(1,2,0)
116 |                 np.save(flowdir+'/'+str(flowcount).zfill(6)+'.npy',flowk)
117 |                 flow_vis = visflow(flowk)
118 |                 cv2.imwrite(flowdir+'/'+str(flowcount).zfill(6)+'.png',flow_vis)
119 |                 flowcount += 1
120 | 
121 |     motions = np.array(motionlist)
122 | 
123 |     # calculate ATE, RPE, KITTI-RPE
124 |     if args.pose_file.endswith('.txt'):
125 |         if datastr == 'sceneflow':
126 |             gtposes = load_sceneflow_extrinsics(args.pose_file)
127 |         else:
128 |             gtposes = np.loadtxt(args.pose_file)
129 |             if datastr == 'airdos':
130 |                 gtposes = gtposes[:,1:]  # remove the first column of timestamps
131 |         
132 |         gtmotions = pose_quats2motion_ses(gtposes)
133 |         estmotion_scale = per_frame_scale_alignment(gtmotions, motions)
134 |         estposes = motion_ses2pose_quats(estmotion_scale)
135 | 
136 |         evaluator = TartanAirEvaluator()
137 |         results = evaluator.evaluate_one_trajectory(gtposes, estposes, scale=True, kittitype=(datastr=='kitti'))
138 |         
139 |         print("==> ATE: %.4f,\t KITTI-R/t: %.4f, %.4f" %(results['ate_score'], results['kitti_score'][0], results['kitti_score'][1]))
140 | 
141 |         # save results and visualization
142 |         plot_traj(results['gt_aligned'], results['est_aligned'], vis=False, savefigname='results/'+testname+'.png', title='ATE %.4f' %(results['ate_score']))
143 |         np.savetxt('results/'+testname+'.txt',results['est_aligned'])
144 |     else:
145 |         np.savetxt('results/'+testname+'.txt', motion_ses2pose_quats(motions))


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/DCN/src/dcn_v2.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "cpu/vision.h"
  4 | 
  5 | #ifdef WITH_CUDA
  6 | #include "cuda/vision.h"
  7 | #endif
  8 | 
  9 | at::Tensor
 10 | dcn_v2_forward(const at::Tensor &input,
 11 |                const at::Tensor &weight,
 12 |                const at::Tensor &bias,
 13 |                const at::Tensor &offset,
 14 |                const at::Tensor &mask,
 15 |                const int kernel_h,
 16 |                const int kernel_w,
 17 |                const int stride_h,
 18 |                const int stride_w,
 19 |                const int pad_h,
 20 |                const int pad_w,
 21 |                const int dilation_h,
 22 |                const int dilation_w,
 23 |                const int deformable_group)
 24 | {
 25 |     if (input.type().is_cuda())
 26 |     {
 27 | #ifdef WITH_CUDA
 28 |         return dcn_v2_cuda_forward(input, weight, bias, offset, mask,
 29 |                                    kernel_h, kernel_w,
 30 |                                    stride_h, stride_w,
 31 |                                    pad_h, pad_w,
 32 |                                    dilation_h, dilation_w,
 33 |                                    deformable_group);
 34 | #else
 35 |         AT_ERROR("Not compiled with GPU support");
 36 | #endif
 37 |     }
 38 |     else{
 39 |         return dcn_v2_cpu_forward(input, weight, bias, offset, mask,
 40 |                                    kernel_h, kernel_w,
 41 |                                    stride_h, stride_w,
 42 |                                    pad_h, pad_w,
 43 |                                    dilation_h, dilation_w,
 44 |                                    deformable_group);
 45 |     }
 46 | }
 47 | 
 48 | std::vector<at::Tensor>
 49 | dcn_v2_backward(const at::Tensor &input,
 50 |                 const at::Tensor &weight,
 51 |                 const at::Tensor &bias,
 52 |                 const at::Tensor &offset,
 53 |                 const at::Tensor &mask,
 54 |                 const at::Tensor &grad_output,
 55 |                 int kernel_h, int kernel_w,
 56 |                 int stride_h, int stride_w,
 57 |                 int pad_h, int pad_w,
 58 |                 int dilation_h, int dilation_w,
 59 |                 int deformable_group)
 60 | {
 61 |     if (input.type().is_cuda())
 62 |     {
 63 | #ifdef WITH_CUDA
 64 |         return dcn_v2_cuda_backward(input,
 65 |                                     weight,
 66 |                                     bias,
 67 |                                     offset,
 68 |                                     mask,
 69 |                                     grad_output,
 70 |                                     kernel_h, kernel_w,
 71 |                                     stride_h, stride_w,
 72 |                                     pad_h, pad_w,
 73 |                                     dilation_h, dilation_w,
 74 |                                     deformable_group);
 75 | #else
 76 |         AT_ERROR("Not compiled with GPU support");
 77 | #endif
 78 |     }
 79 |     else{
 80 |         return dcn_v2_cpu_backward(input,
 81 |                                     weight,
 82 |                                     bias,
 83 |                                     offset,
 84 |                                     mask,
 85 |                                     grad_output,
 86 |                                     kernel_h, kernel_w,
 87 |                                     stride_h, stride_w,
 88 |                                     pad_h, pad_w,
 89 |                                     dilation_h, dilation_w,
 90 |                                     deformable_group);
 91 |     }
 92 | }
 93 | 
 94 | std::tuple<at::Tensor, at::Tensor>
 95 | dcn_v2_psroi_pooling_forward(const at::Tensor &input,
 96 |                              const at::Tensor &bbox,
 97 |                              const at::Tensor &trans,
 98 |                              const int no_trans,
 99 |                              const float spatial_scale,
100 |                              const int output_dim,
101 |                              const int group_size,
102 |                              const int pooled_size,
103 |                              const int part_size,
104 |                              const int sample_per_part,
105 |                              const float trans_std)
106 | {
107 |     if (input.type().is_cuda())
108 |     {
109 | #ifdef WITH_CUDA
110 |         return dcn_v2_psroi_pooling_cuda_forward(input,
111 |                                                  bbox,
112 |                                                  trans,
113 |                                                  no_trans,
114 |                                                  spatial_scale,
115 |                                                  output_dim,
116 |                                                  group_size,
117 |                                                  pooled_size,
118 |                                                  part_size,
119 |                                                  sample_per_part,
120 |                                                  trans_std);
121 | #else
122 |         AT_ERROR("Not compiled with GPU support");
123 | #endif
124 |     }
125 |     else{
126 |         return dcn_v2_psroi_pooling_cpu_forward(input,
127 |                                                  bbox,
128 |                                                  trans,
129 |                                                  no_trans,
130 |                                                  spatial_scale,
131 |                                                  output_dim,
132 |                                                  group_size,
133 |                                                  pooled_size,
134 |                                                  part_size,
135 |                                                  sample_per_part,
136 |                                                  trans_std);
137 |     }
138 | }
139 | 
140 | std::tuple<at::Tensor, at::Tensor>
141 | dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad,
142 |                               const at::Tensor &input,
143 |                               const at::Tensor &bbox,
144 |                               const at::Tensor &trans,
145 |                               const at::Tensor &top_count,
146 |                               const int no_trans,
147 |                               const float spatial_scale,
148 |                               const int output_dim,
149 |                               const int group_size,
150 |                               const int pooled_size,
151 |                               const int part_size,
152 |                               const int sample_per_part,
153 |                               const float trans_std)
154 | {
155 |     if (input.type().is_cuda())
156 |     {
157 | #ifdef WITH_CUDA
158 |         return dcn_v2_psroi_pooling_cuda_backward(out_grad,
159 |                                                   input,
160 |                                                   bbox,
161 |                                                   trans,
162 |                                                   top_count,
163 |                                                   no_trans,
164 |                                                   spatial_scale,
165 |                                                   output_dim,
166 |                                                   group_size,
167 |                                                   pooled_size,
168 |                                                   part_size,
169 |                                                   sample_per_part,
170 |                                                   trans_std);
171 | #else
172 |         AT_ERROR("Not compiled with GPU support");
173 | #endif
174 |     }
175 |     else{
176 |         return dcn_v2_psroi_pooling_cpu_backward(out_grad,
177 |                                                   input,
178 |                                                   bbox,
179 |                                                   trans,
180 |                                                   top_count,
181 |                                                   no_trans,
182 |                                                   spatial_scale,
183 |                                                   output_dim,
184 |                                                   group_size,
185 |                                                   pooled_size,
186 |                                                   part_size,
187 |                                                   sample_per_part,
188 |                                                   trans_std);
189 |     }
190 | }


--------------------------------------------------------------------------------
/Network/rigidmask/det_losses.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Portions of this code are from
  3 | # CornerNet (https://github.com/princeton-vl/CornerNet)
  4 | # Copyright (c) 2018, University of Michigan
  5 | # Licensed under the BSD 3-Clause License
  6 | # ------------------------------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import pdb
 12 | import torch
 13 | import torch.nn as nn
 14 | from .det_utils import _transpose_and_gather_feat
 15 | import torch.nn.functional as F
 16 | 
 17 | 
 18 | def _slow_neg_loss(pred, gt):
 19 |   '''focal loss from CornerNet'''
 20 |   pos_inds = gt.eq(1)
 21 |   neg_inds = gt.lt(1)
 22 | 
 23 |   neg_weights = torch.pow(1 - gt[neg_inds], 4)
 24 | 
 25 |   loss = 0
 26 |   pos_pred = pred[pos_inds]
 27 |   neg_pred = pred[neg_inds]
 28 | 
 29 |   pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
 30 |   neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
 31 | 
 32 |   num_pos  = pos_inds.float().sum()
 33 |   pos_loss = pos_loss.sum()
 34 |   neg_loss = neg_loss.sum()
 35 | 
 36 |   if pos_pred.nelement() == 0:
 37 |     loss = loss - neg_loss
 38 |   else:
 39 |     loss = loss - (pos_loss + neg_loss) / num_pos
 40 |   return loss
 41 | 
 42 | 
 43 | def _neg_loss(pred, gt, heat_logits):
 44 |   ''' Modified focal loss. Exactly the same as CornerNet.
 45 |       Runs faster and costs a little bit more memory
 46 |     Arguments:
 47 |       pred (batch x c x h x w)
 48 |       gt_regr (batch x c x h x w)
 49 |   '''
 50 |   pos_inds = gt.eq(1).float()
 51 |   neg_inds = gt.lt(1).float()
 52 | 
 53 |   neg_weights = torch.pow(1 - gt, 4)
 54 | 
 55 |   loss = 0
 56 | 
 57 |   logpred = torch.nn.functional.log_softmax(heat_logits,1)
 58 |   pos_loss = logpred[:,0:1] * torch.pow(1 - pred, 2) * pos_inds
 59 |   neg_loss = logpred[:,1:2] * torch.pow(pred, 2) * neg_weights * neg_inds
 60 | 
 61 |   num_pos  = pos_inds.float().sum()
 62 |   pos_loss = pos_loss.sum()
 63 |   neg_loss = neg_loss.sum()
 64 | 
 65 |   if num_pos == 0:
 66 |     loss = loss - neg_loss
 67 |   else:
 68 |     loss = loss - (pos_loss + neg_loss) / num_pos
 69 |   return loss
 70 | 
 71 | def _not_faster_neg_loss(pred, gt):
 72 |     pos_inds = gt.eq(1).float()
 73 |     neg_inds = gt.lt(1).float()    
 74 |     num_pos  = pos_inds.float().sum()
 75 |     neg_weights = torch.pow(1 - gt, 4)
 76 | 
 77 |     loss = 0
 78 |     trans_pred = pred * neg_inds + (1 - pred) * pos_inds
 79 |     weight = neg_weights * neg_inds + pos_inds
 80 |     all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight
 81 |     all_loss = all_loss.sum()
 82 | 
 83 |     if num_pos > 0:
 84 |         all_loss /= num_pos
 85 |     loss -=  all_loss
 86 |     return loss
 87 | 
 88 | def _slow_reg_loss(regr, gt_regr, mask):
 89 |     num  = mask.float().sum()
 90 |     mask = mask.unsqueeze(2).expand_as(gt_regr)
 91 | 
 92 |     regr    = regr[mask]
 93 |     gt_regr = gt_regr[mask]
 94 |     
 95 |     regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
 96 |     regr_loss = regr_loss / (num + 1e-4)
 97 |     return regr_loss
 98 | 
 99 | def _reg_loss(regr, gt_regr, mask):
100 |   ''' L1 regression loss
101 |     Arguments:
102 |       regr (batch x max_objects x dim)
103 |       gt_regr (batch x max_objects x dim)
104 |       mask (batch x max_objects)
105 |   '''
106 |   num = mask.float().sum()
107 |   mask = mask.unsqueeze(2).expand_as(gt_regr).float()
108 | 
109 |   regr = regr * mask
110 |   gt_regr = gt_regr * mask
111 |     
112 |   regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
113 |   regr_loss = regr_loss / (num + 1e-4)
114 |   return regr_loss
115 | 
116 | class FocalLoss(nn.Module):
117 |   '''nn.Module warpper for focal loss'''
118 |   def __init__(self):
119 |     super(FocalLoss, self).__init__()
120 |     self.neg_loss = _neg_loss
121 | 
122 |   def forward(self, out, target, logits):
123 |     return self.neg_loss(out, target, logits)
124 | 
125 | class RegLoss(nn.Module):
126 |   '''Regression loss for an output tensor
127 |     Arguments:
128 |       output (batch x dim x h x w)
129 |       mask (batch x max_objects)
130 |       ind (batch x max_objects)
131 |       target (batch x max_objects x dim)
132 |   '''
133 |   def __init__(self):
134 |     super(RegLoss, self).__init__()
135 |   
136 |   def forward(self, output, mask, ind, target):
137 |     pred = _transpose_and_gather_feat(output, ind)
138 |     loss = _reg_loss(pred, target, mask)
139 |     return loss
140 | 
141 | class RegL1Loss(nn.Module):
142 |   def __init__(self):
143 |     super(RegL1Loss, self).__init__()
144 |   
145 |   def forward(self, output, mask, ind, target):
146 |     pred = _transpose_and_gather_feat(output, ind)
147 |     mask = mask.unsqueeze(2).expand_as(pred).float()
148 |     # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
149 |     loss = F.l1_loss(pred * mask, target * mask, size_average=False)
150 |     loss = loss / (mask.sum() + 1e-4)
151 |     return loss
152 | 
153 | class NormRegL1Loss(nn.Module):
154 |   def __init__(self):
155 |     super(NormRegL1Loss, self).__init__()
156 |   
157 |   def forward(self, output, mask, ind, target):
158 |     pred = _transpose_and_gather_feat(output, ind)
159 |     mask = mask.unsqueeze(2).expand_as(pred).float()
160 |     # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
161 |     pred = pred / (target + 1e-4)
162 |     target = target * 0 + 1
163 |     loss = F.l1_loss(pred * mask, target * mask, size_average=False)
164 |     loss = loss / (mask.sum() + 1e-4)
165 |     return loss
166 | 
167 | class RegWeightedL1Loss(nn.Module):
168 |   def __init__(self):
169 |     super(RegWeightedL1Loss, self).__init__()
170 |   
171 |   def forward(self, output, mask, ind, target):
172 |     pred = _transpose_and_gather_feat(output, ind)
173 |     mask = mask.float()
174 |     # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
175 |     loss = F.l1_loss(pred * mask, target * mask, size_average=False)
176 |     loss = loss / (mask.sum() + 1e-4)
177 |     return loss
178 | 
179 | class L1Loss(nn.Module):
180 |   def __init__(self):
181 |     super(L1Loss, self).__init__()
182 |   
183 |   def forward(self, output, mask, ind, target):
184 |     pred = _transpose_and_gather_feat(output, ind)
185 |     mask = mask.unsqueeze(2).expand_as(pred).float()
186 |     loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
187 |     return loss
188 | 
189 | class BinRotLoss(nn.Module):
190 |   def __init__(self):
191 |     super(BinRotLoss, self).__init__()
192 |   
193 |   def forward(self, output, mask, ind, rotbin, rotres):
194 |     pred = _transpose_and_gather_feat(output, ind)
195 |     loss = compute_rot_loss(pred, rotbin, rotres, mask)
196 |     return loss
197 | 
198 | def compute_res_loss(output, target):
199 |     return F.smooth_l1_loss(output, target, reduction='elementwise_mean')
200 | 
201 | # TODO: weight
202 | def compute_bin_loss(output, target, mask):
203 |     mask = mask.expand_as(output)
204 |     output = output * mask.float()
205 |     return F.cross_entropy(output, target, reduction='elementwise_mean')
206 | 
207 | def compute_rot_loss(output, target_bin, target_res, mask):
208 |     # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 
209 |     #                 bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
210 |     # target_bin: (B, 128, 2) [bin1_cls, bin2_cls]
211 |     # target_res: (B, 128, 2) [bin1_res, bin2_res]
212 |     # mask: (B, 128, 1)
213 |     # import pdb; pdb.set_trace()
214 |     output = output.view(-1, 8)
215 |     target_bin = target_bin.view(-1, 2)
216 |     target_res = target_res.view(-1, 2)
217 |     mask = mask.view(-1, 1)
218 |     loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask)
219 |     loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask)
220 |     loss_res = torch.zeros_like(loss_bin1)
221 |     if target_bin[:, 0].nonzero().shape[0] > 0:
222 |         idx1 = target_bin[:, 0].nonzero()[:, 0]
223 |         valid_output1 = torch.index_select(output, 0, idx1.long())
224 |         valid_target_res1 = torch.index_select(target_res, 0, idx1.long())
225 |         loss_sin1 = compute_res_loss(
226 |           valid_output1[:, 2], torch.sin(valid_target_res1[:, 0]))
227 |         loss_cos1 = compute_res_loss(
228 |           valid_output1[:, 3], torch.cos(valid_target_res1[:, 0]))
229 |         loss_res += loss_sin1 + loss_cos1
230 |     if target_bin[:, 1].nonzero().shape[0] > 0:
231 |         idx2 = target_bin[:, 1].nonzero()[:, 0]
232 |         valid_output2 = torch.index_select(output, 0, idx2.long())
233 |         valid_target_res2 = torch.index_select(target_res, 0, idx2.long())
234 |         loss_sin2 = compute_res_loss(
235 |           valid_output2[:, 6], torch.sin(valid_target_res2[:, 1]))
236 |         loss_cos2 = compute_res_loss(
237 |           valid_output2[:, 7], torch.cos(valid_target_res2[:, 1]))
238 |         loss_res += loss_sin2 + loss_cos2
239 |     return loss_bin1 + loss_bin2 + loss_res
240 | 


--------------------------------------------------------------------------------
/Datasets/util_flow.py:
--------------------------------------------------------------------------------
  1 | """
  2 | # ==============================
  3 | # util_flow.py
  4 | # library for optical flow processing
  5 | # Author: Gengshan Yang
  6 | # Date: 10th Feb 2021
  7 | # ==============================
  8 | """
  9 | import math
 10 | import png
 11 | import struct
 12 | import array
 13 | import numpy as np
 14 | import cv2
 15 | import pdb
 16 | 
 17 | from io import *
 18 | 
 19 | UNKNOWN_FLOW_THRESH = 1e9;
 20 | UNKNOWN_FLOW = 1e10;
 21 | 
 22 | # Middlebury checks
 23 | TAG_STRING = 'PIEH'    # use this when WRITING the file
 24 | TAG_FLOAT = 202021.25  # check for this when READING the file
 25 | 
 26 | def readPFM(file):
 27 |     import re
 28 |     file = open(file, 'rb')
 29 | 
 30 |     color = None
 31 |     width = None
 32 |     height = None
 33 |     scale = None
 34 |     endian = None
 35 | 
 36 |     header = file.readline().rstrip()
 37 |     if header == b'PF':
 38 |         color = True
 39 |     elif header == b'Pf':
 40 |         color = False
 41 |     else:
 42 |         raise Exception('Not a PFM file.')
 43 | 
 44 |     dim_match = re.match(b'^(\d+)\s(\d+)\s$', file.readline())
 45 |     if dim_match:
 46 |         width, height = map(int, dim_match.groups())
 47 |     else:
 48 |         raise Exception('Malformed PFM header.')
 49 | 
 50 |     scale = float(file.readline().rstrip())
 51 |     if scale < 0: # little-endian
 52 |         endian = '<'
 53 |         scale = -scale
 54 |     else:
 55 |         endian = '>' # big-endian
 56 | 
 57 |     data = np.fromfile(file, endian + 'f')
 58 |     shape = (height, width, 3) if color else (height, width)
 59 | 
 60 |     data = np.reshape(data, shape)
 61 |     data = np.flipud(data)
 62 |     return data, scale
 63 | 
 64 | 
 65 | def save_pfm(file, image, scale = 1):
 66 |   import sys
 67 |   color = None
 68 | 
 69 |   if image.dtype.name != 'float32':
 70 |     raise Exception('Image dtype must be float32.')
 71 | 
 72 |   if len(image.shape) == 3 and image.shape[2] == 3: # color image
 73 |     color = True
 74 |   elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale
 75 |     color = False
 76 |   else:
 77 |     raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.')
 78 | 
 79 |   file.write('PF\n' if color else 'Pf\n')
 80 |   file.write('%d %d\n' % (image.shape[1], image.shape[0]))
 81 | 
 82 |   endian = image.dtype.byteorder
 83 | 
 84 |   if endian == '<' or endian == '=' and sys.byteorder == 'little':
 85 |     scale = -scale
 86 | 
 87 |   file.write('%f\n' % scale)
 88 | 
 89 |   image.tofile(file)
 90 | 
 91 | 
 92 | def ReadMiddleburyFloFile(path):
 93 |     """ Read .FLO file as specified by Middlebury.
 94 | 
 95 |     Returns tuple (width, height, u, v, mask), where u, v, mask are flat
 96 |     arrays of values.
 97 |     """
 98 | 
 99 |     with open(path, 'rb') as fil:
100 |         tag = struct.unpack('f', fil.read(4))[0]
101 |         width = struct.unpack('i', fil.read(4))[0]
102 |         height = struct.unpack('i', fil.read(4))[0]
103 | 
104 |         assert tag == TAG_FLOAT
105 |         
106 |         #data = np.fromfile(path, dtype=np.float, count=-1)
107 |         #data = data[3:]
108 | 
109 |         fmt = 'f' * width*height*2
110 |         data = struct.unpack(fmt, fil.read(4*width*height*2))
111 | 
112 |         u = data[::2]
113 |         v = data[1::2]
114 | 
115 |         mask = map(lambda x,y: abs(x)<UNKNOWN_FLOW_THRESH and abs(y) < UNKNOWN_FLOW_THRESH, u, v)
116 |         mask = list(mask)
117 |         u_masked = map(lambda x,y: x if y else 0, u, mask)
118 |         v_masked = map(lambda x,y: x if y else 0, v, mask)
119 | 
120 |     return width, height, list(u_masked), list(v_masked), list(mask)
121 | 
122 | def ReadKittiPngFile(path):
123 |     """ Read 16-bit .PNG file as specified by KITTI-2015 (flow).
124 | 
125 |     Returns a tuple, (width, height, u, v, mask), where u, v, mask
126 |     are flat arrays of values.
127 |     """
128 |     # Read .png file.
129 |     png_reader = png.Reader(path)
130 |     data = png_reader.read()
131 |     if data[3]['bitdepth'] != 16:
132 |         raise Exception('bitdepth of ' + path + ' is not 16')
133 | 
134 |     width = data[0]
135 |     height = data[1]
136 | 
137 |     # Get list of rows.
138 |     rows = list(data[2])
139 | 
140 |     u = array.array('f', [0]) * width*height
141 |     v = array.array('f', [0]) * width*height
142 |     mask = array.array('f', [0]) * width*height
143 | 
144 |     for y, row in enumerate(rows):
145 |         for x in range(width):
146 |             ind = width*y+x
147 |             u[ind] = (row[3*x] - 2**15) / 64.0
148 |             v[ind] = (row[3*x+1] - 2**15) / 64.0
149 |             mask[ind] = row[3*x+2]
150 | 
151 |             # if mask[ind] > 0:
152 |             #     print(u[ind], v[ind], mask[ind], row[3*x], row[3*x+1], row[3*x+2])
153 | 
154 |     #png_reader.close()
155 | 
156 |     return (width, height, u, v, mask)
157 | 
158 | 
159 | def WriteMiddleburyFloFile(path, width, height, u, v, mask=None):
160 |     """ Write .FLO file as specified by Middlebury.
161 |     """
162 | 
163 |     if mask is not None:
164 |         u_masked = map(lambda x,y: x if y else UNKNOWN_FLOW, u, mask)
165 |         v_masked = map(lambda x,y: x if y else UNKNOWN_FLOW, v, mask)
166 |     else:
167 |         u_masked = u
168 |         v_masked = v
169 | 
170 |     fmt = 'f' * width*height*2
171 |     # Interleave lists
172 |     data = [x for t in zip(u_masked,v_masked) for x in t]
173 | 
174 |     with open(path, 'wb') as fil:
175 |         fil.write(str.encode(TAG_STRING))
176 |         fil.write(struct.pack('i', width))
177 |         fil.write(struct.pack('i', height))
178 |         fil.write(struct.pack(fmt, *data))
179 | 
180 | 
181 | def write_flow(path,flow):
182 |     
183 |     invalid_idx = (flow[:, :, 2] == 0)
184 |     flow[:, :, 0:2] = flow[:, :, 0:2]*64.+ 2 ** 15
185 |     flow[invalid_idx, 0] = 0
186 |     flow[invalid_idx, 1] = 0
187 | 
188 |     flow = flow.astype(np.uint16)
189 |     flow = cv2.imwrite(path, flow[:,:,::-1])
190 | 
191 |     #WriteKittiPngFile(path,
192 |     #     flow.shape[1], flow.shape[0], flow[:,:,0].flatten(), 
193 |     #    flow[:,:,1].flatten(), flow[:,:,2].flatten())
194 |     
195 | 
196 | 
197 | def WriteKittiPngFile(path, width, height, u, v, mask=None):
198 |     """ Write 16-bit .PNG file as specified by KITTI-2015 (flow).
199 | 
200 |     u, v are lists of float values
201 |     mask is a list of floats, denoting the *valid* pixels.
202 |     """
203 | 
204 |     data = array.array('H',[0])*width*height*3
205 | 
206 |     for i,(u_,v_,mask_) in enumerate(zip(u,v,mask)):
207 |         data[3*i] = int(u_*64.0+2**15)
208 |         data[3*i+1] = int(v_*64.0+2**15)
209 |         data[3*i+2] = int(mask_)
210 | 
211 |         # if mask_ > 0:
212 |         #     print(data[3*i], data[3*i+1],data[3*i+2])
213 | 
214 |     with open(path, 'wb') as png_file:
215 |         png_writer = png.Writer(width=width, height=height, bitdepth=16, compression=3, greyscale=False)
216 |         png_writer.write_array(png_file, data)
217 | 
218 | 
219 | def ConvertMiddleburyFloToKittiPng(src_path, dest_path):
220 |     width, height, u, v, mask = ReadMiddleburyFloFile(src_path)
221 |     WriteKittiPngFile(dest_path, width, height, u, v, mask=mask)
222 | 
223 | def ConvertKittiPngToMiddleburyFlo(src_path, dest_path):
224 |     width, height, u, v, mask = ReadKittiPngFile(src_path)
225 |     WriteMiddleburyFloFile(dest_path, width, height, u, v, mask=mask)
226 | 
227 | 
228 | def ParseFilenameKitti(filename):
229 |     # Parse kitti filename (seq_frameno.xx),
230 |     # return seq, frameno, ext.
231 |     # Be aware that seq might contain the dataset name (if contained as prefix)
232 |     ext = filename[filename.rfind('.'):]
233 |     frameno = filename[filename.rfind('_')+1:filename.rfind('.')]
234 |     frameno = int(frameno)
235 |     seq = filename[:filename.rfind('_')]
236 |     return seq, frameno, ext
237 | 
238 | 
239 | def read_calib_file(filepath):
240 |     """Read in a calibration file and parse into a dictionary."""
241 |     data = {}
242 | 
243 |     with open(filepath, 'r') as f:
244 |         for line in f.readlines():
245 |             key, value = line.split(':', 1)
246 |             # The only non-float values in these files are dates, which
247 |             # we don't care about anyway
248 |             try:
249 |                 data[key] = np.array([float(x) for x in value.split()])
250 |             except ValueError:
251 |                 pass
252 | 
253 |     return data
254 | 
255 | def load_calib_cam_to_cam(cam_to_cam_file):
256 |     # We'll return the camera calibration as a dictionary
257 |     data = {}
258 | 
259 |     # Load and parse the cam-to-cam calibration data
260 |     filedata = read_calib_file(cam_to_cam_file)
261 | 
262 |     # Create 3x4 projection matrices
263 |     P_rect_00 = np.reshape(filedata['P_rect_00'], (3, 4))
264 |     P_rect_10 = np.reshape(filedata['P_rect_01'], (3, 4))
265 |     P_rect_20 = np.reshape(filedata['P_rect_02'], (3, 4))
266 |     P_rect_30 = np.reshape(filedata['P_rect_03'], (3, 4))
267 | 
268 |     # Compute the camera intrinsics
269 |     data['K_cam0'] = P_rect_00[0:3, 0:3]
270 |     data['K_cam1'] = P_rect_10[0:3, 0:3]
271 |     data['K_cam2'] = P_rect_20[0:3, 0:3]
272 |     data['K_cam3'] = P_rect_30[0:3, 0:3]
273 | 
274 |     data['b00'] = P_rect_00[0, 3] / P_rect_00[0, 0]
275 |     data['b10'] = P_rect_10[0, 3] / P_rect_10[0, 0]
276 |     data['b20'] = P_rect_20[0, 3] / P_rect_20[0, 0]
277 |     data['b30'] = P_rect_30[0, 3] / P_rect_30[0, 0]
278 | 
279 |     return data


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DytanVO: Joint Refinement of Visual Odometry and Motion Segmentation in Dynamic Environments
  2 | 
  3 | <p align="center">
  4 |     <a href="https://ieeexplore.ieee.org/abstract/document/10161306"><img src="https://img.shields.io/badge/ICRA-2023-yellow?logo=ieee"></a>
  5 |     <a href="https://arxiv.org/abs/2209.08430"><img src="https://img.shields.io/badge/arXiv-2209.08430-b31b1b"></a>
  6 |     <a href="https://youtu.be/6yO7RsZjSBQ"><img src="https://img.shields.io/badge/Video-Demo-critical?logo=youtube"></a>
  7 |     <a href="https://github.com/castacks/DytanVO/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-BSD%203--Clause-blue.svg"></a>
  8 | </p>
  9 | <p align="center">
 10 | 	DytanVO: Joint Refinement of Visual Odometry and Motion Segmentation in Dynamic Environments (ICRA 2023)<br>
 11 |   By
 12 |   <a href="https://github.com/Geniussh/">Shihao Shen</a>, 
 13 |   <a href="http://missinglight.github.io/">Yilin Cai</a>, 
 14 |   <a href="http://www.wangwenshan.com/">Wenshan Wang</a>, and 
 15 |   <a href="https://theairlab.org/team/sebastian/">Sebastian Scherer</a>.
 16 | </p>
 17 | 
 18 | ### What's new.
 19 | 
 20 | - 01-17-2023: Our paper has been accepted to ICRA 2023!
 21 | 
 22 | - 01-05-2023: Clean up and upload the codebase for _DytanVO_. Pretrained weights and datasets are also ready.
 23 | 
 24 | - 09-20-2022: Remove _Dynamic Dense RGB-D SLAM with Learning-Based Visual Odometry_. The repo will be used to release codebase for the most recent ICRA 2023 submission.
 25 | 
 26 | 
 27 | ## Introduction
 28 | DytanVO is a learning-based visual odometry (VO) based on its precursor, [TartanVO](https://github.com/castacks/tartanvo). It is the first supervised learning-based VO method that deals with dynamic environments. It takes two consecutive monocular frames in real-time and predicts camera ego-motion in an iterative fashion. It achieves an average improvement of 27.7% over state-of-the-art VO solutions in real-world dynamic environments, and even performs competitively among dynamic visual SLAM systems which optimize the trajectory on the backend. Experiments on plentiful unseen environments also demonstrate its generalizability.
 29 | 
 30 | 
 31 | ## Installation
 32 | We provide an environment file using [anaconda](https://www.anaconda.com/). The code has been tested on an RTX 2080Ti with CUDA 11.4.
 33 | ```bash
 34 | conda env create -f environment.yml
 35 | conda activate dytanvo
 36 | ```
 37 | 
 38 | Compile [DCNv2](https://github.com/MatthewHowe/DCNv2).
 39 | ```
 40 | cd Network/rigidmask/networks/DCNv2/; python setup.py install; cd -
 41 | ```
 42 | 
 43 | 
 44 | ## Models and Data
 45 | 
 46 | ### Pretrained weights
 47 | Download [here](https://drive.google.com/file/d/1ujYmKv5FHXYe1KETabTnSs-R2OE0KJV3/view?usp=share_link) and unzip it to the `models` folder. 
 48 | 
 49 | ### KITTI dynamic sequences
 50 | Original sequences in [KITTI Odometry](https://www.cvlibs.net/datasets/kitti/eval_odometry.php) are trimmed into sub-sequences which contain moving pedestrians, vehicles and cyclists so that VO's robustness to dynamic objects can be explicitly evaluated. Download [DynaKITTI](https://drive.google.com/file/d/1BDnraRWzNf938UsfprWIkcqCSfOUyGt9/view?usp=share_link) and unzip it to the `data` folder. Please cite this paper if you find it useful in your work. 
 51 | 
 52 | ### AirDOS-Shibuya
 53 | Follow [tartanair-shibuya](https://github.com/haleqiu/tartanair-shibuya) and download it to the `data` folder.
 54 | 
 55 | ### (Optional) Scene Flow
 56 | One can also test the model on [Scene Flow datasets](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html), which was used to train both the VO and the segmentation networks. Scene Flow datasets have very challenging sequences with large areas of dynamic objects in image frames. 
 57 | 
 58 | You can create symbolic links to wherever the datasets were downloaded in the `data` folder.
 59 | 
 60 | ```Shell
 61 | ├── data
 62 |     ├── AirDOS_shibuya
 63 |         ├── RoadCrossing03
 64 |             ├── image_0
 65 |             ├── ...
 66 |             ├── gt_pose.txt
 67 |         ├── RoadCrossing04
 68 |         ├── ...
 69 |     ├── DynaKITTI
 70 |         ├── 00_1
 71 |             ├── image_2
 72 |             ├── ...
 73 |             ├── pose_left.txt
 74 |             ├── calib.txt
 75 |         ├── 01_0
 76 |         ├── ...
 77 |     ├── SceneFlow
 78 |         ├── FlyThings3D
 79 |             ├── frames_cleanpass
 80 |             ├── frames_finalpass
 81 |             ├── optical_flow
 82 |             ├── camera_data
 83 |         ├── Driving
 84 |         ├── Monkaa
 85 |     ├── ...
 86 | ```
 87 | 
 88 | 
 89 | ## Evaluation
 90 | Create a folder to save output flow, segmentation, or poses. 
 91 | ```bash
 92 | mkdir results
 93 | ```
 94 | 
 95 | ### Dynamic sequences in KITTI (loading the finetuned VO model at once)
 96 | ```bash
 97 | traj=00_1
 98 | python -W ignore::UserWarning vo_trajectory_from_folder.py --vo-model-name vonet_ft.pkl  \
 99 | 							   --seg-model-name segnet-kitti.pth  \
100 | 							   --kitti --kitti-intrinsics-file data/DynaKITTI/$traj/calib.txt  \
101 | 							   --test-dir data/DynaKITTI/$traj/image_2  \
102 | 							   --pose-file data/DynaKITTI/$traj/pose_left.txt 
103 | ```
104 | 
105 | ### AirDOS-Shibuya (loading FlowNet and PoseNet separately)
106 | ```bash
107 | traj=RoadCrossing03
108 | python -W ignore::UserWarning vo_trajectory_from_folder.py --flow-model-name flownet.pkl  \
109 | 							   --pose-model-name posenet.pkl  \
110 | 							   --seg-model segnet-sf.pth  \
111 | 							   --airdos  \
112 | 							   --test-dir data/AirDOS_shibuya/$traj/image_0  \
113 | 							   --pose-file data/AirDOS_shibuya/$traj/gt_pose.txt 
114 | ```
115 | 
116 | ### Scene Flow
117 | ```bash
118 | img=Driving/frames_finalpass/15mm_focallength/scene_forwards/fast/left
119 | pose=Driving/camera_data/15mm_focallength/scene_forwards/fast/camera_data.txt
120 | python -W ignore::UserWarning vo_trajectory_from_folder.py --flow-model-name flownet.pkl  \
121 | 							   --pose-model-name posenet.pkl  \
122 | 							   --seg-model segnet-sf.pth  \
123 | 							   --sceneflow  \
124 | 							   --test-dir data/SceneFlow/$img  \
125 | 							   --pose-file data/SceneFlow/$pose
126 | ```
127 | 
128 | Add `--save-flow` tag to save intermediate optical flow outputs into the `results` folder.
129 | 
130 | Adjust the batch size and the worker number by `--batch-size 10`, `--worker-num 5`. 
131 | 
132 | 
133 | ## (Optional) Segmentation Mask Ground Truth
134 | If your dataset has ground truth for camera motion, optical flow and disparity change across consecutive frames, we provide an example script to automatically generate ground truth of segmentation mask given these two modalities based on the pure geometry for the [Scene Flow datasets](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html). 
135 | 
136 | ```bash
137 | python Datasets/segmask_gt.py --database data/SceneFlow --frames_pass clean --dataset FlyingThings3D
138 | ```
139 | 
140 | Add `--debug` flag to save visualizations of the generated masks.
141 | 
142 | ## Citation
143 | If you find our code, paper or dataset useful, please cite
144 | ```bibtex
145 | @inproceedings{shen2023dytanvo,
146 |   title={Dytanvo: Joint refinement of visual odometry and motion segmentation in dynamic environments},
147 |   author={Shen, Shihao and Cai, Yilin and Wang, Wenshan and Scherer, Sebastian},
148 |   booktitle={2023 IEEE International Conference on Robotics and Automation (ICRA)},
149 |   pages={4048--4055},
150 |   year={2023},
151 |   organization={IEEE}
152 | }
153 | ```
154 | 
155 | ## Acknowledgement
156 | We built DytanVO on top of [TartanVO](https://github.com/castacks/tartanvo). We implemented the segmentation network by adapting [rigidmask](https://github.com/gengshan-y/rigidmask). We thank [Gengshan Yang](https://gengshan-y.github.io/) for his code and suggestions. 
157 | 
158 | ## License
159 | This software is BSD licensed.
160 | 
161 | Copyright (c) 2020, Carnegie Mellon University All rights reserved.
162 | 
163 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
164 | 
165 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
166 | 
167 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
168 | 
169 | Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
170 | 
171 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
172 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/DCN/testcpu.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import absolute_import
  3 | from __future__ import print_function
  4 | from __future__ import division
  5 | 
  6 | import time
  7 | import torch
  8 | import torch.nn as nn
  9 | from torch.autograd import gradcheck
 10 | 
 11 | from dcn_v2 import dcn_v2_conv, DCNv2, DCN
 12 | from dcn_v2 import dcn_v2_pooling, DCNv2Pooling, DCNPooling
 13 | 
 14 | deformable_groups = 1
 15 | N, inC, inH, inW = 2, 2, 4, 4
 16 | outC = 2
 17 | kH, kW = 3, 3
 18 | 
 19 | 
 20 | def conv_identify(weight, bias):
 21 |     weight.data.zero_()
 22 |     bias.data.zero_()
 23 |     o, i, h, w = weight.shape
 24 |     y = h//2
 25 |     x = w//2
 26 |     for p in range(i):
 27 |         for q in range(o):
 28 |             if p == q:
 29 |                 weight.data[q, p, y, x] = 1.0
 30 | 
 31 | 
 32 | def check_zero_offset():
 33 |     conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW,
 34 |                             kernel_size=(kH, kW),
 35 |                             stride=(1, 1),
 36 |                             padding=(1, 1),
 37 |                             bias=True)
 38 | 
 39 |     conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW,
 40 |                           kernel_size=(kH, kW),
 41 |                           stride=(1, 1),
 42 |                           padding=(1, 1),
 43 |                           bias=True)
 44 | 
 45 |     dcn_v2 = DCNv2(inC, outC, (kH, kW),
 46 |                    stride=1, padding=1, dilation=1,
 47 |                    deformable_groups=deformable_groups)
 48 | 
 49 |     conv_offset.weight.data.zero_()
 50 |     conv_offset.bias.data.zero_()
 51 |     conv_mask.weight.data.zero_()
 52 |     conv_mask.bias.data.zero_()
 53 |     conv_identify(dcn_v2.weight, dcn_v2.bias)
 54 | 
 55 |     input = torch.randn(N, inC, inH, inW)
 56 |     offset = conv_offset(input)
 57 |     mask = conv_mask(input)
 58 |     mask = torch.sigmoid(mask)
 59 |     output = dcn_v2(input, offset, mask)
 60 |     output *= 2
 61 |     d = (input - output).abs().max()
 62 |     if d < 1e-10:
 63 |         print('Zero offset passed')
 64 |     else:
 65 |         print('Zero offset failed')
 66 |         print(input)
 67 |         print(output)
 68 | 
 69 | def check_gradient_dconv():
 70 | 
 71 |     input = torch.rand(N, inC, inH, inW) * 0.01
 72 |     input.requires_grad = True
 73 | 
 74 |     offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW) * 2
 75 |     # offset.data.zero_()
 76 |     # offset.data -= 0.5
 77 |     offset.requires_grad = True
 78 | 
 79 |     mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW)
 80 |     # mask.data.zero_()
 81 |     mask.requires_grad = True
 82 |     mask = torch.sigmoid(mask)
 83 | 
 84 |     weight = torch.randn(outC, inC, kH, kW)
 85 |     weight.requires_grad = True
 86 | 
 87 |     bias = torch.rand(outC)
 88 |     bias.requires_grad = True
 89 | 
 90 |     stride = 1
 91 |     padding = 1
 92 |     dilation = 1
 93 | 
 94 |     print('check_gradient_dconv: ',
 95 |           gradcheck(dcn_v2_conv, (input, offset, mask, weight, bias,
 96 |                     stride, padding, dilation, deformable_groups),
 97 |                     eps=1e-3, atol=1e-4, rtol=1e-2))
 98 | 
 99 | 
100 | def check_pooling_zero_offset():
101 | 
102 |     input = torch.randn(2, 16, 64, 64).zero_()
103 |     input[0, :, 16:26, 16:26] = 1.
104 |     input[1, :, 10:20, 20:30] = 2.
105 |     rois = torch.tensor([
106 |         [0, 65, 65, 103, 103],
107 |         [1, 81, 41, 119, 79],
108 |     ]).float()
109 |     pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
110 |                            pooled_size=7,
111 |                            output_dim=16,
112 |                            no_trans=True,
113 |                            group_size=1,
114 |                            trans_std=0.0)
115 | 
116 |     out = pooling(input, rois, input.new())
117 |     s = ', '.join(['%f' % out[i, :, :, :].mean().item()
118 |                    for i in range(rois.shape[0])])
119 |     print(s)
120 | 
121 |     dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
122 |                             pooled_size=7,
123 |                             output_dim=16,
124 |                             no_trans=False,
125 |                             group_size=1,
126 |                             trans_std=0.0)
127 |     offset = torch.randn(20, 2, 7, 7).zero_()
128 |     dout = dpooling(input, rois, offset)
129 |     s = ', '.join(['%f' % dout[i, :, :, :].mean().item()
130 |                    for i in range(rois.shape[0])])
131 |     print(s)
132 | 
133 | 
134 | def check_gradient_dpooling():
135 |     input = torch.randn(2, 3, 5, 5) * 0.01
136 |     N = 4
137 |     batch_inds = torch.randint(2, (N, 1)).float()
138 |     x = torch.rand((N, 1)).float() * 15
139 |     y = torch.rand((N, 1)).float() * 15
140 |     w = torch.rand((N, 1)).float() * 10
141 |     h = torch.rand((N, 1)).float() * 10
142 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
143 |     offset = torch.randn(N, 2, 3, 3)
144 |     input.requires_grad = True
145 |     offset.requires_grad = True
146 | 
147 |     spatial_scale = 1.0 / 4
148 |     pooled_size = 3
149 |     output_dim = 3
150 |     no_trans = 0
151 |     group_size = 1
152 |     trans_std = 0.0
153 |     sample_per_part = 4
154 |     part_size = pooled_size
155 | 
156 |     print('check_gradient_dpooling:',
157 |           gradcheck(dcn_v2_pooling, (input, rois, offset,
158 |                                      spatial_scale,
159 |                                      pooled_size,
160 |                                      output_dim,
161 |                                      no_trans,
162 |                                      group_size,
163 |                                      part_size,
164 |                                      sample_per_part,
165 |                                      trans_std),
166 |                     eps=1e-4))
167 | 
168 | 
169 | def example_dconv():
170 |     input = torch.randn(2, 64, 128, 128)
171 |     # wrap all things (offset and mask) in DCN
172 |     dcn = DCN(64, 64, kernel_size=(3, 3), stride=1,
173 |               padding=1, deformable_groups=2)
174 |     # print(dcn.weight.shape, input.shape)
175 |     output = dcn(input)
176 |     targert = output.new(*output.size())
177 |     targert.data.uniform_(-0.01, 0.01)
178 |     error = (targert - output).mean()
179 |     error.backward()
180 |     print(output.shape)
181 | 
182 | 
183 | def example_dpooling():
184 |     input = torch.randn(2, 32, 64, 64)
185 |     batch_inds = torch.randint(2, (20, 1)).float()
186 |     x = torch.randint(256, (20, 1)).float()
187 |     y = torch.randint(256, (20, 1)).float()
188 |     w = torch.randint(64, (20, 1)).float()
189 |     h = torch.randint(64, (20, 1)).float()
190 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
191 |     offset = torch.randn(20, 2, 7, 7)
192 |     input.requires_grad = True
193 |     offset.requires_grad = True
194 | 
195 |     # normal roi_align
196 |     pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
197 |                            pooled_size=7,
198 |                            output_dim=32,
199 |                            no_trans=True,
200 |                            group_size=1,
201 |                            trans_std=0.1)
202 | 
203 |     # deformable pooling
204 |     dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
205 |                             pooled_size=7,
206 |                             output_dim=32,
207 |                             no_trans=False,
208 |                             group_size=1,
209 |                             trans_std=0.1)
210 | 
211 |     out = pooling(input, rois, offset)
212 |     dout = dpooling(input, rois, offset)
213 |     print(out.shape)
214 |     print(dout.shape)
215 | 
216 |     target_out = out.new(*out.size())
217 |     target_out.data.uniform_(-0.01, 0.01)
218 |     target_dout = dout.new(*dout.size())
219 |     target_dout.data.uniform_(-0.01, 0.01)
220 |     e = (target_out - out).mean()
221 |     e.backward()
222 |     e = (target_dout - dout).mean()
223 |     e.backward()
224 | 
225 | 
226 | def example_mdpooling():
227 |     input = torch.randn(2, 32, 64, 64)
228 |     input.requires_grad = True
229 |     batch_inds = torch.randint(2, (20, 1)).float()
230 |     x = torch.randint(256, (20, 1)).float()
231 |     y = torch.randint(256, (20, 1)).float()
232 |     w = torch.randint(64, (20, 1)).float()
233 |     h = torch.randint(64, (20, 1)).float()
234 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
235 | 
236 |     # mdformable pooling (V2)
237 |     dpooling = DCNPooling(spatial_scale=1.0 / 4,
238 |                           pooled_size=7,
239 |                           output_dim=32,
240 |                           no_trans=False,
241 |                           group_size=1,
242 |                           trans_std=0.1,
243 |                           deform_fc_dim=1024)
244 | 
245 |     dout = dpooling(input, rois)
246 |     target = dout.new(*dout.size())
247 |     target.data.uniform_(-0.1, 0.1)
248 |     error = (target - dout).mean()
249 |     error.backward()
250 |     print(dout.shape)
251 | 
252 | 
253 | if __name__ == '__main__':
254 | 
255 |     example_dconv()
256 |     example_dpooling()
257 |     example_mdpooling()
258 | 
259 |     check_pooling_zero_offset()
260 |     # zero offset check
261 |     if inC == outC:
262 |         check_zero_offset()
263 | 
264 |     check_gradient_dpooling()
265 |     check_gradient_dconv()
266 |     # """
267 |     # ****** Note: backward is not reentrant error may not be a serious problem,
268 |     # ****** since the max error is less than 1e-7,
269 |     # ****** Still looking for what trigger this problem
270 |     # """
271 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/DCN/testcuda.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import absolute_import
  3 | from __future__ import print_function
  4 | from __future__ import division
  5 | 
  6 | import time
  7 | import torch
  8 | import torch.nn as nn
  9 | from torch.autograd import gradcheck
 10 | 
 11 | from dcn_v2 import dcn_v2_conv, DCNv2, DCN
 12 | from dcn_v2 import dcn_v2_pooling, DCNv2Pooling, DCNPooling
 13 | 
 14 | deformable_groups = 1
 15 | N, inC, inH, inW = 2, 2, 4, 4
 16 | outC = 2
 17 | kH, kW = 3, 3
 18 | 
 19 | 
 20 | def conv_identify(weight, bias):
 21 |     weight.data.zero_()
 22 |     bias.data.zero_()
 23 |     o, i, h, w = weight.shape
 24 |     y = h//2
 25 |     x = w//2
 26 |     for p in range(i):
 27 |         for q in range(o):
 28 |             if p == q:
 29 |                 weight.data[q, p, y, x] = 1.0
 30 | 
 31 | 
 32 | def check_zero_offset():
 33 |     conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW,
 34 |                             kernel_size=(kH, kW),
 35 |                             stride=(1, 1),
 36 |                             padding=(1, 1),
 37 |                             bias=True).cuda()
 38 | 
 39 |     conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW,
 40 |                           kernel_size=(kH, kW),
 41 |                           stride=(1, 1),
 42 |                           padding=(1, 1),
 43 |                           bias=True).cuda()
 44 | 
 45 |     dcn_v2 = DCNv2(inC, outC, (kH, kW),
 46 |                    stride=1, padding=1, dilation=1,
 47 |                    deformable_groups=deformable_groups).cuda()
 48 | 
 49 |     conv_offset.weight.data.zero_()
 50 |     conv_offset.bias.data.zero_()
 51 |     conv_mask.weight.data.zero_()
 52 |     conv_mask.bias.data.zero_()
 53 |     conv_identify(dcn_v2.weight, dcn_v2.bias)
 54 | 
 55 |     input = torch.randn(N, inC, inH, inW).cuda()
 56 |     offset = conv_offset(input)
 57 |     mask = conv_mask(input)
 58 |     mask = torch.sigmoid(mask)
 59 |     output = dcn_v2(input, offset, mask)
 60 |     output *= 2
 61 |     d = (input - output).abs().max()
 62 |     if d < 1e-10:
 63 |         print('Zero offset passed')
 64 |     else:
 65 |         print('Zero offset failed')
 66 |         print(input)
 67 |         print(output)
 68 | 
 69 | def check_gradient_dconv():
 70 | 
 71 |     input = torch.rand(N, inC, inH, inW).cuda() * 0.01
 72 |     input.requires_grad = True
 73 | 
 74 |     offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda() * 2
 75 |     # offset.data.zero_()
 76 |     # offset.data -= 0.5
 77 |     offset.requires_grad = True
 78 | 
 79 |     mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda()
 80 |     # mask.data.zero_()
 81 |     mask.requires_grad = True
 82 |     mask = torch.sigmoid(mask)
 83 | 
 84 |     weight = torch.randn(outC, inC, kH, kW).cuda()
 85 |     weight.requires_grad = True
 86 | 
 87 |     bias = torch.rand(outC).cuda()
 88 |     bias.requires_grad = True
 89 | 
 90 |     stride = 1
 91 |     padding = 1
 92 |     dilation = 1
 93 | 
 94 |     print('check_gradient_dconv: ',
 95 |           gradcheck(dcn_v2_conv, (input, offset, mask, weight, bias,
 96 |                     stride, padding, dilation, deformable_groups),
 97 |                     eps=1e-3, atol=1e-4, rtol=1e-2))
 98 | 
 99 | 
100 | def check_pooling_zero_offset():
101 | 
102 |     input = torch.randn(2, 16, 64, 64).cuda().zero_()
103 |     input[0, :, 16:26, 16:26] = 1.
104 |     input[1, :, 10:20, 20:30] = 2.
105 |     rois = torch.tensor([
106 |         [0, 65, 65, 103, 103],
107 |         [1, 81, 41, 119, 79],
108 |     ]).cuda().float()
109 |     pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
110 |                            pooled_size=7,
111 |                            output_dim=16,
112 |                            no_trans=True,
113 |                            group_size=1,
114 |                            trans_std=0.0).cuda()
115 | 
116 |     out = pooling(input, rois, input.new())
117 |     s = ', '.join(['%f' % out[i, :, :, :].mean().item()
118 |                    for i in range(rois.shape[0])])
119 |     print(s)
120 | 
121 |     dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
122 |                             pooled_size=7,
123 |                             output_dim=16,
124 |                             no_trans=False,
125 |                             group_size=1,
126 |                             trans_std=0.0).cuda()
127 |     offset = torch.randn(20, 2, 7, 7).cuda().zero_()
128 |     dout = dpooling(input, rois, offset)
129 |     s = ', '.join(['%f' % dout[i, :, :, :].mean().item()
130 |                    for i in range(rois.shape[0])])
131 |     print(s)
132 | 
133 | 
134 | def check_gradient_dpooling():
135 |     input = torch.randn(2, 3, 5, 5).cuda().float() * 0.01
136 |     N = 4
137 |     batch_inds = torch.randint(2, (N, 1)).cuda().float()
138 |     x = torch.rand((N, 1)).cuda().float() * 15
139 |     y = torch.rand((N, 1)).cuda().float() * 15
140 |     w = torch.rand((N, 1)).cuda().float() * 10
141 |     h = torch.rand((N, 1)).cuda().float() * 10
142 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
143 |     offset = torch.randn(N, 2, 3, 3).cuda()
144 |     input.requires_grad = True
145 |     offset.requires_grad = True
146 | 
147 |     spatial_scale = 1.0 / 4
148 |     pooled_size = 3
149 |     output_dim = 3
150 |     no_trans = 0
151 |     group_size = 1
152 |     trans_std = 0.0
153 |     sample_per_part = 4
154 |     part_size = pooled_size
155 | 
156 |     print('check_gradient_dpooling:',
157 |           gradcheck(dcn_v2_pooling, (input, rois, offset,
158 |                                      spatial_scale,
159 |                                      pooled_size,
160 |                                      output_dim,
161 |                                      no_trans,
162 |                                      group_size,
163 |                                      part_size,
164 |                                      sample_per_part,
165 |                                      trans_std),
166 |                     eps=1e-4))
167 | 
168 | 
169 | def example_dconv():
170 |     input = torch.randn(2, 64, 128, 128).cuda()
171 |     # wrap all things (offset and mask) in DCN
172 |     dcn = DCN(64, 64, kernel_size=(3, 3), stride=1,
173 |               padding=1, deformable_groups=2).cuda()
174 |     # print(dcn.weight.shape, input.shape)
175 |     output = dcn(input)
176 |     targert = output.new(*output.size())
177 |     targert.data.uniform_(-0.01, 0.01)
178 |     error = (targert - output).mean()
179 |     error.backward()
180 |     print(output.shape)
181 | 
182 | 
183 | def example_dpooling():
184 |     input = torch.randn(2, 32, 64, 64).cuda()
185 |     batch_inds = torch.randint(2, (20, 1)).cuda().float()
186 |     x = torch.randint(256, (20, 1)).cuda().float()
187 |     y = torch.randint(256, (20, 1)).cuda().float()
188 |     w = torch.randint(64, (20, 1)).cuda().float()
189 |     h = torch.randint(64, (20, 1)).cuda().float()
190 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
191 |     offset = torch.randn(20, 2, 7, 7).cuda()
192 |     input.requires_grad = True
193 |     offset.requires_grad = True
194 | 
195 |     # normal roi_align
196 |     pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
197 |                            pooled_size=7,
198 |                            output_dim=32,
199 |                            no_trans=True,
200 |                            group_size=1,
201 |                            trans_std=0.1).cuda()
202 | 
203 |     # deformable pooling
204 |     dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
205 |                             pooled_size=7,
206 |                             output_dim=32,
207 |                             no_trans=False,
208 |                             group_size=1,
209 |                             trans_std=0.1).cuda()
210 | 
211 |     out = pooling(input, rois, offset)
212 |     dout = dpooling(input, rois, offset)
213 |     print(out.shape)
214 |     print(dout.shape)
215 | 
216 |     target_out = out.new(*out.size())
217 |     target_out.data.uniform_(-0.01, 0.01)
218 |     target_dout = dout.new(*dout.size())
219 |     target_dout.data.uniform_(-0.01, 0.01)
220 |     e = (target_out - out).mean()
221 |     e.backward()
222 |     e = (target_dout - dout).mean()
223 |     e.backward()
224 | 
225 | 
226 | def example_mdpooling():
227 |     input = torch.randn(2, 32, 64, 64).cuda()
228 |     input.requires_grad = True
229 |     batch_inds = torch.randint(2, (20, 1)).cuda().float()
230 |     x = torch.randint(256, (20, 1)).cuda().float()
231 |     y = torch.randint(256, (20, 1)).cuda().float()
232 |     w = torch.randint(64, (20, 1)).cuda().float()
233 |     h = torch.randint(64, (20, 1)).cuda().float()
234 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
235 | 
236 |     # mdformable pooling (V2)
237 |     dpooling = DCNPooling(spatial_scale=1.0 / 4,
238 |                           pooled_size=7,
239 |                           output_dim=32,
240 |                           no_trans=False,
241 |                           group_size=1,
242 |                           trans_std=0.1,
243 |                           deform_fc_dim=1024).cuda()
244 | 
245 |     dout = dpooling(input, rois)
246 |     target = dout.new(*dout.size())
247 |     target.data.uniform_(-0.1, 0.1)
248 |     error = (target - dout).mean()
249 |     error.backward()
250 |     print(dout.shape)
251 | 
252 | 
253 | if __name__ == '__main__':
254 | 
255 |     example_dconv()
256 |     example_dpooling()
257 |     example_mdpooling()
258 | 
259 |     check_pooling_zero_offset()
260 |     # zero offset check
261 |     if inC == outC:
262 |         check_zero_offset()
263 | 
264 |     check_gradient_dpooling()
265 |     check_gradient_dconv()
266 |     # """
267 |     # ****** Note: backward is not reentrant error may not be a serious problem,
268 |     # ****** since the max error is less than 1e-7,
269 |     # ****** Still looking for what trigger this problem
270 |     # """
271 | 


--------------------------------------------------------------------------------
/evaluator/transformation.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang <wenshanw@andrew.cmu.edu>
  2 | # For License information please see the LICENSE file in the root directory.
  3 | # Credit: Xiangwei Wang https://github.com/TimingSpace
  4 | 
  5 | import numpy as np
  6 | from scipy.spatial.transform import Rotation as R
  7 | 
  8 | def line2mat(line_data):
  9 |     '''
 10 |     12 -> 4 x 4
 11 |     '''
 12 |     mat = np.eye(4)
 13 |     mat[0:3,:] = line_data.reshape(3,4)
 14 |     return np.matrix(mat)
 15 | 
 16 | def mat2line(mat_data):
 17 |     '''
 18 |     4 x 4 -> 12
 19 |     '''
 20 |     line_data = np.zeros(12)
 21 |     line_data[:]=mat_data[:3,:].reshape((12))
 22 |     return line_data
 23 | 
 24 | def motion2pose(data):
 25 |     '''
 26 |     data: N x 12
 27 |     all_pose: (N+1) x 12
 28 |     '''
 29 |     data_size = data.shape[0]
 30 |     all_pose = np.zeros((data_size+1,12))
 31 |     temp = np.eye(4,4).reshape(1,16)
 32 |     all_pose[0,:] = temp[0,0:12]
 33 |     pose = np.matrix(np.eye(4,4))
 34 |     for i in range(0,data_size):
 35 |         data_mat = line2mat(data[i,:])
 36 |         pose = pose*data_mat
 37 |         pose_line = np.array(pose[0:3,:]).reshape(1,12)
 38 |         all_pose[i+1,:] = pose_line
 39 |     return all_pose
 40 | 
 41 | def pose2motion(data, skip=0):
 42 |     '''
 43 |     data: N x 12
 44 |     all_motion (N-1-skip) x 12
 45 |     '''
 46 |     data_size = data.shape[0]
 47 |     all_motion = np.zeros((data_size-1,12))
 48 |     for i in range(0,data_size-1-skip):
 49 |         pose_curr = line2mat(data[i,:])
 50 |         pose_next = line2mat(data[i+1+skip,:])
 51 |         motion = pose_curr.I*pose_next
 52 |         motion_line = np.array(motion[0:3,:]).reshape(1,12)
 53 |         all_motion[i,:] = motion_line
 54 |     return all_motion
 55 | 
 56 | def SE2se(SE_data):
 57 |     result = np.zeros((6))
 58 |     result[0:3] = np.array(SE_data[0:3,3].T)
 59 |     result[3:6] = SO2so(SE_data[0:3,0:3]).T
 60 |     return result
 61 |     
 62 | def SO2so(SO_data):
 63 |     return R.from_matrix(SO_data).as_rotvec()
 64 | 
 65 | def so2SO(so_data):
 66 |     return R.from_rotvec(so_data).as_matrix()
 67 | 
 68 | def se2SE(se_data):
 69 |     result_mat = np.matrix(np.eye(4))
 70 |     result_mat[0:3,0:3] = so2SO(se_data[3:6])
 71 |     result_mat[0:3,3]   = np.matrix(se_data[0:3]).T
 72 |     return result_mat
 73 | ### can get wrong result
 74 | def se_mean(se_datas):
 75 |     all_SE = np.matrix(np.eye(4))
 76 |     for i in range(se_datas.shape[0]):
 77 |         se = se_datas[i,:]
 78 |         SE = se2SE(se)
 79 |         all_SE = all_SE*SE
 80 |     all_se = SE2se(all_SE)
 81 |     mean_se = all_se/se_datas.shape[0]
 82 |     return mean_se
 83 | 
 84 | def ses_mean(se_datas):
 85 |     se_datas = np.array(se_datas)
 86 |     se_datas = np.transpose(se_datas.reshape(se_datas.shape[0],se_datas.shape[1],se_datas.shape[2]*se_datas.shape[3]),(0,2,1))
 87 |     se_result = np.zeros((se_datas.shape[0],se_datas.shape[2]))
 88 |     for i in range(0,se_datas.shape[0]):
 89 |         mean_se = se_mean(se_datas[i,:,:])
 90 |         se_result[i,:] = mean_se
 91 |     return se_result
 92 | 
 93 | def ses2poses(data):
 94 |     data_size = data.shape[0]
 95 |     all_pose = np.zeros((data_size+1,12))
 96 |     temp = np.eye(4,4).reshape(1,16)
 97 |     all_pose[0,:] = temp[0,0:12]
 98 |     pose = np.matrix(np.eye(4,4))
 99 |     for i in range(0,data_size):
100 |         data_mat = se2SE(data[i,:])
101 |         pose = pose*data_mat
102 |         pose_line = np.array(pose[0:3,:]).reshape(1,12)
103 |         all_pose[i+1,:] = pose_line
104 |     return all_pose
105 | 
106 | def ses2poses_quat(data):
107 |     '''
108 |     ses: N x 6
109 |     '''
110 |     data_size = data.shape[0]
111 |     all_pose_quat = np.zeros((data_size+1,7))
112 |     all_pose_quat[0,:] = np.array([0., 0., 0., 0., 0., 0., 1.])
113 |     pose = np.matrix(np.eye(4,4))
114 |     for i in range(0,data_size):
115 |         data_mat = se2SE(data[i,:])
116 |         pose = pose*data_mat
117 |         quat = SO2quat(pose[0:3,0:3])
118 |         all_pose_quat[i+1,:3] = np.array([pose[0,3], pose[1,3], pose[2,3]])
119 |         all_pose_quat[i+1,3:] = quat      
120 |     return all_pose_quat
121 | 
122 | def SEs2ses(motion_data):
123 |     data_size = motion_data.shape[0]
124 |     ses = np.zeros((data_size,6))
125 |     for i in range(0,data_size):
126 |         SE = np.matrix(np.eye(4))
127 |         SE[0:3,:] = motion_data[i,:].reshape(3,4)
128 |         ses[i,:] = SE2se(SE)
129 |     return ses
130 | 
131 | def so2quat(so_data):
132 |     so_data = np.array(so_data)
133 |     theta = np.sqrt(np.sum(so_data*so_data))
134 |     axis = so_data/theta
135 |     quat=np.zeros(4)
136 |     quat[0:3] = np.sin(theta/2)*axis
137 |     quat[3] = np.cos(theta/2)
138 |     return quat
139 | 
140 | def quat2so(quat_data):
141 |     quat_data = np.array(quat_data)
142 |     sin_half_theta = np.sqrt(np.sum(quat_data[0:3]*quat_data[0:3]))
143 |     axis = quat_data[0:3]/sin_half_theta
144 |     cos_half_theta = quat_data[3]
145 |     theta = 2*np.arctan2(sin_half_theta,cos_half_theta)
146 |     so = theta*axis
147 |     return so
148 | 
149 | # input so_datas batch*channel*height*width
150 | # return quat_datas batch*numner*channel
151 | def sos2quats(so_datas,mean_std=[[1],[1]]):
152 |     so_datas = np.array(so_datas)
153 |     so_datas = so_datas.reshape(so_datas.shape[0],so_datas.shape[1],so_datas.shape[2]*so_datas.shape[3])
154 |     so_datas = np.transpose(so_datas,(0,2,1))
155 |     quat_datas = np.zeros((so_datas.shape[0],so_datas.shape[1],4))
156 |     for i_b in range(0,so_datas.shape[0]):
157 |         for i_p in range(0,so_datas.shape[1]):
158 |             so_data = so_datas[i_b,i_p,:]
159 |             quat_data = so2quat(so_data)
160 |             quat_datas[i_b,i_p,:] = quat_data
161 |     return quat_datas
162 | 
163 | def SO2quat(SO_data):
164 |     rr = R.from_matrix(SO_data)
165 |     return rr.as_quat()
166 | 
167 | def quat2SO(quat_data):
168 |     return R.from_quat(quat_data).as_matrix()
169 | 
170 | 
171 | def pos_quat2SE(quat_data):
172 |     SO = R.from_quat(quat_data[3:7]).as_matrix()
173 |     SE = np.matrix(np.eye(4))
174 |     SE[0:3,0:3] = np.matrix(SO)
175 |     SE[0:3,3]   = np.matrix(quat_data[0:3]).T
176 |     SE = np.array(SE[0:3,:]).reshape(1,12)
177 |     return SE
178 | 
179 | 
180 | def pos_quats2SEs(quat_datas):
181 |     data_len = quat_datas.shape[0]
182 |     SEs = np.zeros((data_len,12))
183 |     for i_data in range(0,data_len):
184 |         SE = pos_quat2SE(quat_datas[i_data,:])
185 |         SEs[i_data,:] = SE
186 |     return SEs
187 | 
188 | 
189 | def pos_quats2SE_matrices(quat_datas):
190 |     data_len = quat_datas.shape[0]
191 |     SEs = []
192 |     for quat in quat_datas:
193 |         SO = R.from_quat(quat[3:7]).as_matrix()
194 |         SE = np.eye(4)
195 |         SE[0:3,0:3] = SO
196 |         SE[0:3,3]   = quat[0:3]
197 |         SEs.append(SE)
198 |     return SEs
199 | 
200 | def SE2pos_quat(SE_data):
201 |     pos_quat = np.zeros(7)
202 |     pos_quat[3:] = SO2quat(SE_data[0:3,0:3])
203 |     pos_quat[:3] = SE_data[0:3,3].T
204 |     return pos_quat
205 | 
206 | def SEs2ses(data):
207 |     '''
208 |     data: N x 12
209 |     ses: N x 6
210 |     '''
211 |     data_size = data.shape[0]
212 |     ses = np.zeros((data_size,6))
213 |     for i in range(0,data_size):
214 |         ses[i,:] = SE2se(line2mat(data[i]))
215 |     return ses
216 | 
217 | def ses2SEs(data):
218 |     '''
219 |     data: N x 6
220 |     SEs: N x 12
221 |     '''
222 |     data_size = data.shape[0]
223 |     SEs = np.zeros((data_size,12))
224 |     for i in range(0,data_size):
225 |         SEs[i,:] = mat2line(se2SE(data[i]))
226 |     return SEs
227 | 
228 | def SE2quat(SE_data):
229 |     '''
230 |     SE_data: 4 x 4
231 |     quat: 7
232 |     '''
233 |     pos_quat = np.zeros(7)
234 |     pos_quat[3:] = SO2quat(SE_data[0:3,0:3])
235 |     pos_quat[:3] = SE_data[0:3,3].T
236 |     return pos_quat
237 | 
238 | def quat2SE(quat_data):
239 |     '''
240 |     quat_data: 7
241 |     SE: 4 x 4
242 |     '''
243 |     SO = R.from_quat(quat_data[3:7]).as_matrix()
244 |     SE = np.matrix(np.eye(4))
245 |     SE[0:3,0:3] = np.matrix(SO)
246 |     SE[0:3,3]   = np.matrix(quat_data[0:3]).T
247 |     return SE
248 | 
249 | def SEs2quats(SEs_data):
250 |     '''
251 |     SE_data: N x 12
252 |     quat: N x 7
253 |     '''
254 |     data_len = SEs_data.shape[0]
255 |     all_quats = np.zeros((data_len,7))
256 |     for i in range(0,data_len):
257 |         SE = line2mat(SEs_data[i])
258 |         all_quats[i] = SE2quat(SE)
259 |     return all_quats
260 | 
261 | def quats2SEs(quat_datas):
262 |     '''
263 |     pos_quats: N x 7
264 |     SEs: N x 12
265 |     '''
266 |     data_len = quat_datas.shape[0]
267 |     SEs = np.zeros((data_len,12))
268 |     for i_data in range(0,data_len):
269 |         SE = quat2SE(quat_datas[i_data,:])
270 |         SEs[i_data,:] = mat2line(SE)
271 |     return SEs
272 | 
273 | def motion_ses2pose_quats(data):
274 |     '''
275 |     data: N x 6 motion data
276 |     poses_quat: (N+1) x 7 pose data
277 |     '''
278 |     motions_SEs = ses2SEs(data) # N x 6 -> N x 12
279 |     poses_SEs  = motion2pose(motions_SEs) # N x 12 -> (N + 1) x 12
280 |     poses_quat = SEs2quats(poses_SEs) # (N + 1) x 12 -> (N+1) x 7
281 |     return poses_quat
282 | 
283 | def pose_quats2motion_ses(data):
284 |     '''
285 |     data: N x 7 pose list
286 |     motions: (N-1-skip) x 6 se3 list
287 |     '''
288 |     poses_SEs = quats2SEs(data) # N x 7 -> N x 12
289 |     matrix = pose2motion(poses_SEs) # N x 12 -> (N-1-skip) x 12
290 |     motions = SEs2ses(matrix).astype(np.float32) # (N-1-skip) x 12 -> (N-1-skip) x 6
291 |     return motions
292 | 
293 | def kitti2tartan(traj):
294 |     '''
295 |     traj: in kitti style, N x 12 numpy array, in camera frame
296 |     output: in TartanAir style, N x 7 numpy array, in NED frame
297 |     '''
298 |     T = np.array([[0,0,1,0],
299 |                   [1,0,0,0],
300 |                   [0,1,0,0],
301 |                   [0,0,0,1]], dtype=np.float32) 
302 |     T_inv = np.linalg.inv(T)
303 |     new_traj = []
304 | 
305 |     for pose in traj:
306 |         tt = np.eye(4)
307 |         tt[:3,:] = pose.reshape(3,4)
308 |         ttt=T.dot(tt).dot(T_inv)
309 |         new_traj.append(SE2pos_quat(ttt))
310 |         
311 |     return np.array(new_traj)
312 | 
313 | def tartan2kitti(traj):
314 |     T = np.array([[0,1,0,0],
315 |                   [0,0,1,0],
316 |                   [1,0,0,0],
317 |                   [0,0,0,1]], dtype=np.float32) 
318 |     T_inv = np.linalg.inv(T)
319 |     new_traj = []
320 | 
321 |     for pose in traj:
322 |         tt = np.eye(4)
323 |         tt[:3,:] = pos_quat2SE(pose).reshape(3,4)
324 |         ttt=T.dot(tt).dot(T_inv)
325 |         new_traj.append(ttt[:3,:].reshape(12))
326 |         
327 |     return np.array(new_traj)


--------------------------------------------------------------------------------
/Network/rigidmask/networks/large_hourglass.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # This code is base on 
  3 | # CornerNet (https://github.com/princeton-vl/CornerNet)
  4 | # Copyright (c) 2018, University of Michigan
  5 | # Licensed under the BSD 3-Clause License
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | 
 13 | import numpy as np
 14 | import torch
 15 | import torch.nn as nn
 16 | 
 17 | class convolution(nn.Module):
 18 |     def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
 19 |         super(convolution, self).__init__()
 20 | 
 21 |         pad = (k - 1) // 2
 22 |         self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn)
 23 |         self.bn   = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential()
 24 |         self.relu = nn.ReLU(inplace=True)
 25 | 
 26 |     def forward(self, x):
 27 |         conv = self.conv(x)
 28 |         bn   = self.bn(conv)
 29 |         relu = self.relu(bn)
 30 |         return relu
 31 | 
 32 | class fully_connected(nn.Module):
 33 |     def __init__(self, inp_dim, out_dim, with_bn=True):
 34 |         super(fully_connected, self).__init__()
 35 |         self.with_bn = with_bn
 36 | 
 37 |         self.linear = nn.Linear(inp_dim, out_dim)
 38 |         if self.with_bn:
 39 |             self.bn = nn.BatchNorm1d(out_dim)
 40 |         self.relu   = nn.ReLU(inplace=True)
 41 | 
 42 |     def forward(self, x):
 43 |         linear = self.linear(x)
 44 |         bn     = self.bn(linear) if self.with_bn else linear
 45 |         relu   = self.relu(bn)
 46 |         return relu
 47 | 
 48 | class residual(nn.Module):
 49 |     def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
 50 |         super(residual, self).__init__()
 51 | 
 52 |         self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False)
 53 |         self.bn1   = nn.BatchNorm2d(out_dim)
 54 |         self.relu1 = nn.ReLU(inplace=True)
 55 | 
 56 |         self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False)
 57 |         self.bn2   = nn.BatchNorm2d(out_dim)
 58 |         
 59 |         self.skip  = nn.Sequential(
 60 |             nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False),
 61 |             nn.BatchNorm2d(out_dim)
 62 |         ) if stride != 1 or inp_dim != out_dim else nn.Sequential()
 63 |         self.relu  = nn.ReLU(inplace=True)
 64 | 
 65 |     def forward(self, x):
 66 |         conv1 = self.conv1(x)
 67 |         bn1   = self.bn1(conv1)
 68 |         relu1 = self.relu1(bn1)
 69 | 
 70 |         conv2 = self.conv2(relu1)
 71 |         bn2   = self.bn2(conv2)
 72 | 
 73 |         skip  = self.skip(x)
 74 |         return self.relu(bn2 + skip)
 75 | 
 76 | def make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
 77 |     layers = [layer(k, inp_dim, out_dim, **kwargs)]
 78 |     for _ in range(1, modules):
 79 |         layers.append(layer(k, out_dim, out_dim, **kwargs))
 80 |     return nn.Sequential(*layers)
 81 | 
 82 | def make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
 83 |     layers = []
 84 |     for _ in range(modules - 1):
 85 |         layers.append(layer(k, inp_dim, inp_dim, **kwargs))
 86 |     layers.append(layer(k, inp_dim, out_dim, **kwargs))
 87 |     return nn.Sequential(*layers)
 88 | 
 89 | class MergeUp(nn.Module):
 90 |     def forward(self, up1, up2):
 91 |         return up1 + up2
 92 | 
 93 | def make_merge_layer(dim):
 94 |     return MergeUp()
 95 | 
 96 | # def make_pool_layer(dim):
 97 | #     return nn.MaxPool2d(kernel_size=2, stride=2)
 98 | 
 99 | def make_pool_layer(dim):
100 |     return nn.Sequential()
101 | 
102 | def make_unpool_layer(dim):
103 |     return nn.Upsample(scale_factor=2)
104 | 
105 | def make_kp_layer(cnv_dim, curr_dim, out_dim):
106 |     return nn.Sequential(
107 |         convolution(3, cnv_dim, curr_dim, with_bn=False),
108 |         nn.Conv2d(curr_dim, out_dim, (1, 1))
109 |     )
110 | 
111 | def make_inter_layer(dim):
112 |     return residual(3, dim, dim)
113 | 
114 | def make_cnv_layer(inp_dim, out_dim):
115 |     return convolution(3, inp_dim, out_dim)
116 | 
117 | class kp_module(nn.Module):
118 |     def __init__(
119 |         self, n, dims, modules, layer=residual,
120 |         make_up_layer=make_layer, make_low_layer=make_layer,
121 |         make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
122 |         make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
123 |         make_merge_layer=make_merge_layer, **kwargs
124 |     ):
125 |         super(kp_module, self).__init__()
126 | 
127 |         self.n   = n
128 | 
129 |         curr_mod = modules[0]
130 |         next_mod = modules[1]
131 | 
132 |         curr_dim = dims[0]
133 |         next_dim = dims[1]
134 | 
135 |         self.up1  = make_up_layer(
136 |             3, curr_dim, curr_dim, curr_mod, 
137 |             layer=layer, **kwargs
138 |         )  
139 |         self.max1 = make_pool_layer(curr_dim)
140 |         self.low1 = make_hg_layer(
141 |             3, curr_dim, next_dim, curr_mod,
142 |             layer=layer, **kwargs
143 |         )
144 |         self.low2 = kp_module(
145 |             n - 1, dims[1:], modules[1:], layer=layer, 
146 |             make_up_layer=make_up_layer, 
147 |             make_low_layer=make_low_layer,
148 |             make_hg_layer=make_hg_layer,
149 |             make_hg_layer_revr=make_hg_layer_revr,
150 |             make_pool_layer=make_pool_layer,
151 |             make_unpool_layer=make_unpool_layer,
152 |             make_merge_layer=make_merge_layer,
153 |             **kwargs
154 |         ) if self.n > 1 else \
155 |         make_low_layer(
156 |             3, next_dim, next_dim, next_mod,
157 |             layer=layer, **kwargs
158 |         )
159 |         self.low3 = make_hg_layer_revr(
160 |             3, next_dim, curr_dim, curr_mod,
161 |             layer=layer, **kwargs
162 |         )
163 |         self.up2  = make_unpool_layer(curr_dim)
164 | 
165 |         self.merge = make_merge_layer(curr_dim)
166 | 
167 |     def forward(self, x):
168 |         up1  = self.up1(x)
169 |         max1 = self.max1(x)
170 |         low1 = self.low1(max1)
171 |         low2 = self.low2(low1)
172 |         low3 = self.low3(low2)
173 |         up2  = self.up2(low3)
174 |         return self.merge(up1, up2)
175 | 
176 | class exkp(nn.Module):
177 |     def __init__(
178 |         self, n, nstack, dims, modules, heads, pre=None, cnv_dim=256, 
179 |         make_tl_layer=None, make_br_layer=None,
180 |         make_cnv_layer=make_cnv_layer, make_heat_layer=make_kp_layer,
181 |         make_tag_layer=make_kp_layer, make_regr_layer=make_kp_layer,
182 |         make_up_layer=make_layer, make_low_layer=make_layer, 
183 |         make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
184 |         make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
185 |         make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer, 
186 |         kp_layer=residual
187 |     ):
188 |         super(exkp, self).__init__()
189 | 
190 |         self.nstack    = nstack
191 |         self.heads     = heads
192 | 
193 |         curr_dim = dims[0]
194 | 
195 |         self.pre = nn.Sequential(
196 |             convolution(7, 3, 128, stride=2),
197 |             residual(3, 128, 256, stride=2)
198 |         ) if pre is None else pre
199 | 
200 |         self.kps  = nn.ModuleList([
201 |             kp_module(
202 |                 n, dims, modules, layer=kp_layer,
203 |                 make_up_layer=make_up_layer,
204 |                 make_low_layer=make_low_layer,
205 |                 make_hg_layer=make_hg_layer,
206 |                 make_hg_layer_revr=make_hg_layer_revr,
207 |                 make_pool_layer=make_pool_layer,
208 |                 make_unpool_layer=make_unpool_layer,
209 |                 make_merge_layer=make_merge_layer
210 |             ) for _ in range(nstack)
211 |         ])
212 |         self.cnvs = nn.ModuleList([
213 |             make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack)
214 |         ])
215 | 
216 |         self.inters = nn.ModuleList([
217 |             make_inter_layer(curr_dim) for _ in range(nstack - 1)
218 |         ])
219 | 
220 |         self.inters_ = nn.ModuleList([
221 |             nn.Sequential(
222 |                 nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False),
223 |                 nn.BatchNorm2d(curr_dim)
224 |             ) for _ in range(nstack - 1)
225 |         ])
226 |         self.cnvs_   = nn.ModuleList([
227 |             nn.Sequential(
228 |                 nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False),
229 |                 nn.BatchNorm2d(curr_dim)
230 |             ) for _ in range(nstack - 1)
231 |         ])
232 | 
233 |         ## keypoint heatmaps
234 |         for head in heads.keys():
235 |             if 'hm' in head:
236 |                 module =  nn.ModuleList([
237 |                     make_heat_layer(
238 |                         cnv_dim, curr_dim, heads[head]) for _ in range(nstack)
239 |                 ])
240 |                 self.__setattr__(head, module)
241 |                 for heat in self.__getattr__(head):
242 |                     heat[-1].bias.data.fill_(-2.19)
243 |             else:
244 |                 module = nn.ModuleList([
245 |                     make_regr_layer(
246 |                         cnv_dim, curr_dim, heads[head]) for _ in range(nstack)
247 |                 ])
248 |                 self.__setattr__(head, module)
249 | 
250 | 
251 |         self.relu = nn.ReLU(inplace=True)
252 | 
253 |     def forward(self, image):
254 |         # print('image shape', image.shape)
255 |         inter = self.pre(image)
256 |         outs  = []
257 | 
258 |         for ind in range(self.nstack):
259 |             kp_, cnv_  = self.kps[ind], self.cnvs[ind]
260 |             kp  = kp_(inter)
261 |             cnv = cnv_(kp)
262 | 
263 |             out = {}
264 |             for head in self.heads:
265 |                 layer = self.__getattr__(head)[ind]
266 |                 y = layer(cnv)
267 |                 out[head] = y
268 |             
269 |             outs.append(out)
270 |             if ind < self.nstack - 1:
271 |                 inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv)
272 |                 inter = self.relu(inter)
273 |                 inter = self.inters[ind](inter)
274 |         return outs
275 | 
276 | 
277 | def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs):
278 |     layers  = [layer(kernel, dim0, dim1, stride=2)]
279 |     layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)]
280 |     return nn.Sequential(*layers)
281 | 
282 | 
283 | class HourglassNet(exkp):
284 |     def __init__(self, heads, num_stacks=2):
285 |         n       = 5
286 |         dims    = [256, 256, 384, 384, 384, 512]
287 |         modules = [2, 2, 2, 2, 2, 4]
288 | 
289 |         super(HourglassNet, self).__init__(
290 |             n, num_stacks, dims, modules, heads,
291 |             make_tl_layer=None,
292 |             make_br_layer=None,
293 |             make_pool_layer=make_pool_layer,
294 |             make_hg_layer=make_hg_layer,
295 |             kp_layer=residual, cnv_dim=256
296 |         )
297 | 
298 | def get_large_hourglass_net(num_layers, heads, head_conv):
299 |   model = HourglassNet(heads, 2)
300 |   return model
301 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/resnet_dcn.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # Modified by Dequan Wang and Xingyi Zhou
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import os
 13 | import math
 14 | import logging
 15 | 
 16 | import torch
 17 | import torch.nn as nn
 18 | from .DCNv2.DCN.dcn_v2 import DCN
 19 | import torch.utils.model_zoo as model_zoo
 20 | 
 21 | BN_MOMENTUM = 0.1
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | model_urls = {
 25 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 26 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 27 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 28 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 29 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 30 | }
 31 | 
 32 | def conv3x3(in_planes, out_planes, stride=1):
 33 |     """3x3 convolution with padding"""
 34 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 35 |                      padding=1, bias=False)
 36 | 
 37 | 
 38 | class BasicBlock(nn.Module):
 39 |     expansion = 1
 40 | 
 41 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 42 |         super(BasicBlock, self).__init__()
 43 |         self.conv1 = conv3x3(inplanes, planes, stride)
 44 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 45 |         self.relu = nn.ReLU(inplace=True)
 46 |         self.conv2 = conv3x3(planes, planes)
 47 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 48 |         self.downsample = downsample
 49 |         self.stride = stride
 50 | 
 51 |     def forward(self, x):
 52 |         residual = x
 53 | 
 54 |         out = self.conv1(x)
 55 |         out = self.bn1(out)
 56 |         out = self.relu(out)
 57 | 
 58 |         out = self.conv2(out)
 59 |         out = self.bn2(out)
 60 | 
 61 |         if self.downsample is not None:
 62 |             residual = self.downsample(x)
 63 | 
 64 |         out += residual
 65 |         out = self.relu(out)
 66 | 
 67 |         return out
 68 | 
 69 | 
 70 | class Bottleneck(nn.Module):
 71 |     expansion = 4
 72 | 
 73 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 74 |         super(Bottleneck, self).__init__()
 75 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 76 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 77 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 78 |                                padding=1, bias=False)
 79 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 80 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
 81 |                                bias=False)
 82 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion,
 83 |                                   momentum=BN_MOMENTUM)
 84 |         self.relu = nn.ReLU(inplace=True)
 85 |         self.downsample = downsample
 86 |         self.stride = stride
 87 | 
 88 |     def forward(self, x):
 89 |         residual = x
 90 | 
 91 |         out = self.conv1(x)
 92 |         out = self.bn1(out)
 93 |         out = self.relu(out)
 94 | 
 95 |         out = self.conv2(out)
 96 |         out = self.bn2(out)
 97 |         out = self.relu(out)
 98 | 
 99 |         out = self.conv3(out)
100 |         out = self.bn3(out)
101 | 
102 |         if self.downsample is not None:
103 |             residual = self.downsample(x)
104 | 
105 |         out += residual
106 |         out = self.relu(out)
107 | 
108 |         return out
109 | 
110 | def fill_up_weights(up):
111 |     w = up.weight.data
112 |     f = math.ceil(w.size(2) / 2)
113 |     c = (2 * f - 1 - f % 2) / (2. * f)
114 |     for i in range(w.size(2)):
115 |         for j in range(w.size(3)):
116 |             w[0, 0, i, j] = \
117 |                 (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
118 |     for c in range(1, w.size(0)):
119 |         w[c, 0, :, :] = w[0, 0, :, :] 
120 | 
121 | def fill_fc_weights(layers):
122 |     for m in layers.modules():
123 |         if isinstance(m, nn.Conv2d):
124 |             nn.init.normal_(m.weight, std=0.001)
125 |             # torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
126 |             # torch.nn.init.xavier_normal_(m.weight.data)
127 |             if m.bias is not None:
128 |                 nn.init.constant_(m.bias, 0)
129 | 
130 | class PoseResNet(nn.Module):
131 | 
132 |     def __init__(self, block, layers, heads, head_conv):
133 |         self.inplanes = 64
134 |         self.heads = heads
135 |         self.deconv_with_bias = False
136 | 
137 |         super(PoseResNet, self).__init__()
138 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
139 |                                bias=False)
140 |         self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
141 |         self.relu = nn.ReLU(inplace=True)
142 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
143 |         self.layer1 = self._make_layer(block, 64, layers[0])
144 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
145 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
146 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
147 | 
148 |         # used for deconv layers
149 |         self.deconv_layers = self._make_deconv_layer(
150 |             3,
151 |             [256, 128, 64],
152 |             [4, 4, 4],
153 |         )
154 | 
155 |         for head in self.heads:
156 |             classes = self.heads[head]
157 |             if head_conv > 0:
158 |                 fc = nn.Sequential(
159 |                   nn.Conv2d(64, head_conv,
160 |                     kernel_size=3, padding=1, bias=True),
161 |                   nn.ReLU(inplace=True),
162 |                   nn.Conv2d(head_conv, classes, 
163 |                     kernel_size=1, stride=1, 
164 |                     padding=0, bias=True))
165 |                 if 'hm' in head:
166 |                     fc[-1].bias.data.fill_(-2.19)
167 |                 else:
168 |                     fill_fc_weights(fc)
169 |             else:
170 |                 fc = nn.Conv2d(64, classes, 
171 |                   kernel_size=1, stride=1, 
172 |                   padding=0, bias=True)
173 |                 if 'hm' in head:
174 |                     fc.bias.data.fill_(-2.19)
175 |                 else:
176 |                     fill_fc_weights(fc)
177 |             self.__setattr__(head, fc)
178 | 
179 |     def _make_layer(self, block, planes, blocks, stride=1):
180 |         downsample = None
181 |         if stride != 1 or self.inplanes != planes * block.expansion:
182 |             downsample = nn.Sequential(
183 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
184 |                           kernel_size=1, stride=stride, bias=False),
185 |                 nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
186 |             )
187 | 
188 |         layers = []
189 |         layers.append(block(self.inplanes, planes, stride, downsample))
190 |         self.inplanes = planes * block.expansion
191 |         for i in range(1, blocks):
192 |             layers.append(block(self.inplanes, planes))
193 | 
194 |         return nn.Sequential(*layers)
195 | 
196 |     def _get_deconv_cfg(self, deconv_kernel, index):
197 |         if deconv_kernel == 4:
198 |             padding = 1
199 |             output_padding = 0
200 |         elif deconv_kernel == 3:
201 |             padding = 1
202 |             output_padding = 1
203 |         elif deconv_kernel == 2:
204 |             padding = 0
205 |             output_padding = 0
206 | 
207 |         return deconv_kernel, padding, output_padding
208 | 
209 |     def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
210 |         assert num_layers == len(num_filters), \
211 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
212 |         assert num_layers == len(num_kernels), \
213 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
214 | 
215 |         layers = []
216 |         for i in range(num_layers):
217 |             kernel, padding, output_padding = \
218 |                 self._get_deconv_cfg(num_kernels[i], i)
219 | 
220 |             planes = num_filters[i]
221 |             fc = DCN(self.inplanes, planes, 
222 |                     kernel_size=(3,3), stride=1,
223 |                     padding=1, dilation=1, deformable_groups=1)
224 |             # fc = nn.Conv2d(self.inplanes, planes,
225 |             #         kernel_size=3, stride=1, 
226 |             #         padding=1, dilation=1, bias=False)
227 |             # fill_fc_weights(fc)
228 |             up = nn.ConvTranspose2d(
229 |                     in_channels=planes,
230 |                     out_channels=planes,
231 |                     kernel_size=kernel,
232 |                     stride=2,
233 |                     padding=padding,
234 |                     output_padding=output_padding,
235 |                     bias=self.deconv_with_bias)
236 |             fill_up_weights(up)
237 | 
238 |             layers.append(fc)
239 |             layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
240 |             layers.append(nn.ReLU(inplace=True))
241 |             layers.append(up)
242 |             layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
243 |             layers.append(nn.ReLU(inplace=True))
244 |             self.inplanes = planes
245 | 
246 |         return nn.Sequential(*layers)
247 | 
248 |     def forward(self, x):
249 |         x = self.conv1(x)
250 |         x = self.bn1(x)
251 |         x = self.relu(x)
252 |         x = self.maxpool(x)
253 | 
254 |         x = self.layer1(x)
255 |         x = self.layer2(x)
256 |         x = self.layer3(x)
257 |         x = self.layer4(x)
258 | 
259 |         x = self.deconv_layers(x)
260 |         ret = {}
261 |         for head in self.heads:
262 |             ret[head] = self.__getattr__(head)(x)
263 |         return [ret]
264 | 
265 |     def init_weights(self, num_layers):
266 |         if 1:
267 |             url = model_urls['resnet{}'.format(num_layers)]
268 |             pretrained_state_dict = model_zoo.load_url(url)
269 |             print('=> loading pretrained model {}'.format(url))
270 |             self.load_state_dict(pretrained_state_dict, strict=False)
271 |             print('=> init deconv weights from normal distribution')
272 |             for name, m in self.deconv_layers.named_modules():
273 |                 if isinstance(m, nn.BatchNorm2d):
274 |                     nn.init.constant_(m.weight, 1)
275 |                     nn.init.constant_(m.bias, 0)
276 | 
277 | 
278 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
279 |                34: (BasicBlock, [3, 4, 6, 3]),
280 |                50: (Bottleneck, [3, 4, 6, 3]),
281 |                101: (Bottleneck, [3, 4, 23, 3]),
282 |                152: (Bottleneck, [3, 8, 36, 3])}
283 | 
284 | 
285 | def get_pose_net(num_layers, heads, head_conv=256):
286 |   block_class, layers = resnet_spec[num_layers]
287 | 
288 |   model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
289 |   model.init_weights(num_layers)
290 |   return model
291 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/msra_resnet.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # Modified by Xingyi Zhou
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import os
 13 | 
 14 | import torch
 15 | import torch.nn as nn
 16 | import torch.utils.model_zoo as model_zoo
 17 | 
 18 | BN_MOMENTUM = 0.1
 19 | 
 20 | model_urls = {
 21 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 22 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 23 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 24 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 25 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 26 | }
 27 | 
 28 | def conv3x3(in_planes, out_planes, stride=1):
 29 |     """3x3 convolution with padding"""
 30 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 31 |                      padding=1, bias=False)
 32 | 
 33 | 
 34 | class BasicBlock(nn.Module):
 35 |     expansion = 1
 36 | 
 37 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 38 |         super(BasicBlock, self).__init__()
 39 |         self.conv1 = conv3x3(inplanes, planes, stride)
 40 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 41 |         self.relu = nn.ReLU(inplace=True)
 42 |         self.conv2 = conv3x3(planes, planes)
 43 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 44 |         self.downsample = downsample
 45 |         self.stride = stride
 46 | 
 47 |     def forward(self, x):
 48 |         residual = x
 49 | 
 50 |         out = self.conv1(x)
 51 |         out = self.bn1(out)
 52 |         out = self.relu(out)
 53 | 
 54 |         out = self.conv2(out)
 55 |         out = self.bn2(out)
 56 | 
 57 |         if self.downsample is not None:
 58 |             residual = self.downsample(x)
 59 | 
 60 |         out += residual
 61 |         out = self.relu(out)
 62 | 
 63 |         return out
 64 | 
 65 | 
 66 | class Bottleneck(nn.Module):
 67 |     expansion = 4
 68 | 
 69 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 70 |         super(Bottleneck, self).__init__()
 71 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 72 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 73 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 74 |                                padding=1, bias=False)
 75 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 76 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
 77 |                                bias=False)
 78 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion,
 79 |                                   momentum=BN_MOMENTUM)
 80 |         self.relu = nn.ReLU(inplace=True)
 81 |         self.downsample = downsample
 82 |         self.stride = stride
 83 | 
 84 |     def forward(self, x):
 85 |         residual = x
 86 | 
 87 |         out = self.conv1(x)
 88 |         out = self.bn1(out)
 89 |         out = self.relu(out)
 90 | 
 91 |         out = self.conv2(out)
 92 |         out = self.bn2(out)
 93 |         out = self.relu(out)
 94 | 
 95 |         out = self.conv3(out)
 96 |         out = self.bn3(out)
 97 | 
 98 |         if self.downsample is not None:
 99 |             residual = self.downsample(x)
100 | 
101 |         out += residual
102 |         out = self.relu(out)
103 | 
104 |         return out
105 | 
106 | 
107 | class PoseResNet(nn.Module):
108 | 
109 |     def __init__(self, block, layers, heads, head_conv, **kwargs):
110 |         self.inplanes = 64
111 |         self.deconv_with_bias = False
112 |         self.heads = heads
113 | 
114 |         super(PoseResNet, self).__init__()
115 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
116 |                                bias=False)
117 |         self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
118 |         self.relu = nn.ReLU(inplace=True)
119 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
120 |         self.layer1 = self._make_layer(block, 64, layers[0])
121 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
122 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
123 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
124 | 
125 |         # used for deconv layers
126 |         self.deconv_layers = self._make_deconv_layer(
127 |             3,
128 |             [256, 256, 256],
129 |             [4, 4, 4],
130 |         )
131 |         # self.final_layer = []
132 | 
133 |         for head in sorted(self.heads):
134 |           num_output = self.heads[head]
135 |           if head_conv > 0:
136 |             fc = nn.Sequential(
137 |                 nn.Conv2d(256, head_conv,
138 |                   kernel_size=3, padding=1, bias=True),
139 |                 nn.ReLU(inplace=True),
140 |                 nn.Conv2d(head_conv, num_output, 
141 |                   kernel_size=1, stride=1, padding=0))
142 |           else:
143 |             fc = nn.Conv2d(
144 |               in_channels=256,
145 |               out_channels=num_output,
146 |               kernel_size=1,
147 |               stride=1,
148 |               padding=0
149 |           )
150 |           self.__setattr__(head, fc)
151 | 
152 |         # self.final_layer = nn.ModuleList(self.final_layer)
153 | 
154 |     def _make_layer(self, block, planes, blocks, stride=1):
155 |         downsample = None
156 |         if stride != 1 or self.inplanes != planes * block.expansion:
157 |             downsample = nn.Sequential(
158 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
159 |                           kernel_size=1, stride=stride, bias=False),
160 |                 nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
161 |             )
162 | 
163 |         layers = []
164 |         layers.append(block(self.inplanes, planes, stride, downsample))
165 |         self.inplanes = planes * block.expansion
166 |         for i in range(1, blocks):
167 |             layers.append(block(self.inplanes, planes))
168 | 
169 |         return nn.Sequential(*layers)
170 | 
171 |     def _get_deconv_cfg(self, deconv_kernel, index):
172 |         if deconv_kernel == 4:
173 |             padding = 1
174 |             output_padding = 0
175 |         elif deconv_kernel == 3:
176 |             padding = 1
177 |             output_padding = 1
178 |         elif deconv_kernel == 2:
179 |             padding = 0
180 |             output_padding = 0
181 | 
182 |         return deconv_kernel, padding, output_padding
183 | 
184 |     def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
185 |         assert num_layers == len(num_filters), \
186 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
187 |         assert num_layers == len(num_kernels), \
188 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
189 | 
190 |         layers = []
191 |         for i in range(num_layers):
192 |             kernel, padding, output_padding = \
193 |                 self._get_deconv_cfg(num_kernels[i], i)
194 | 
195 |             planes = num_filters[i]
196 |             layers.append(
197 |                 nn.ConvTranspose2d(
198 |                     in_channels=self.inplanes,
199 |                     out_channels=planes,
200 |                     kernel_size=kernel,
201 |                     stride=2,
202 |                     padding=padding,
203 |                     output_padding=output_padding,
204 |                     bias=self.deconv_with_bias))
205 |             layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
206 |             layers.append(nn.ReLU(inplace=True))
207 |             self.inplanes = planes
208 | 
209 |         return nn.Sequential(*layers)
210 | 
211 |     def forward(self, x):
212 |         x = self.conv1(x)
213 |         x = self.bn1(x)
214 |         x = self.relu(x)
215 |         x = self.maxpool(x)
216 | 
217 |         x = self.layer1(x)
218 |         x = self.layer2(x)
219 |         x = self.layer3(x)
220 |         x = self.layer4(x)
221 | 
222 |         x = self.deconv_layers(x)
223 |         ret = {}
224 |         for head in self.heads:
225 |             ret[head] = self.__getattr__(head)(x)
226 |         return [ret]
227 | 
228 |     def init_weights(self, num_layers, pretrained=True):
229 |         if pretrained:
230 |             # print('=> init resnet deconv weights from normal distribution')
231 |             for _, m in self.deconv_layers.named_modules():
232 |                 if isinstance(m, nn.ConvTranspose2d):
233 |                     # print('=> init {}.weight as normal(0, 0.001)'.format(name))
234 |                     # print('=> init {}.bias as 0'.format(name))
235 |                     nn.init.normal_(m.weight, std=0.001)
236 |                     if self.deconv_with_bias:
237 |                         nn.init.constant_(m.bias, 0)
238 |                 elif isinstance(m, nn.BatchNorm2d):
239 |                     # print('=> init {}.weight as 1'.format(name))
240 |                     # print('=> init {}.bias as 0'.format(name))
241 |                     nn.init.constant_(m.weight, 1)
242 |                     nn.init.constant_(m.bias, 0)
243 |             # print('=> init final conv weights from normal distribution')
244 |             for head in self.heads:
245 |               final_layer = self.__getattr__(head)
246 |               for i, m in enumerate(final_layer.modules()):
247 |                   if isinstance(m, nn.Conv2d):
248 |                       # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
249 |                       # print('=> init {}.weight as normal(0, 0.001)'.format(name))
250 |                       # print('=> init {}.bias as 0'.format(name))
251 |                       if m.weight.shape[0] == self.heads[head]:
252 |                           if 'hm' in head:
253 |                               nn.init.constant_(m.bias, -2.19)
254 |                           else:
255 |                               nn.init.normal_(m.weight, std=0.001)
256 |                               nn.init.constant_(m.bias, 0)
257 |             #pretrained_state_dict = torch.load(pretrained)
258 |             url = model_urls['resnet{}'.format(num_layers)]
259 |             pretrained_state_dict = model_zoo.load_url(url)
260 |             print('=> loading pretrained model {}'.format(url))
261 |             self.load_state_dict(pretrained_state_dict, strict=False)
262 |         else:
263 |             print('=> imagenet pretrained model dose not exist')
264 |             print('=> please download it first')
265 |             raise ValueError('imagenet pretrained model does not exist')
266 | 
267 | 
268 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
269 |                34: (BasicBlock, [3, 4, 6, 3]),
270 |                50: (Bottleneck, [3, 4, 6, 3]),
271 |                101: (Bottleneck, [3, 4, 23, 3]),
272 |                152: (Bottleneck, [3, 8, 36, 3])}
273 | 
274 | 
275 | def get_pose_net(num_layers, heads, head_conv):
276 |   block_class, layers = resnet_spec[num_layers]
277 | 
278 |   model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
279 |   model.init_weights(num_layers, pretrained=True)
280 |   return model
281 | 


--------------------------------------------------------------------------------
/Network/PWC/PWCNet.py:
--------------------------------------------------------------------------------
  1 | """
  2 | implementation of the PWC-DC network for optical flow estimation by Sun et al., 2018
  3 | 
  4 | Jinwei Gu and Zhile Ren
  5 | 
  6 | """
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | import os
 12 | import numpy as np
 13 | from .correlation import FunctionCorrelation
 14 | import cv2 # debug
 15 | 
 16 | def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):   
 17 |     return nn.Sequential(
 18 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, 
 19 |                         padding=padding, dilation=dilation, bias=True),
 20 |             nn.LeakyReLU(0.1))
 21 | 
 22 | def predict_flow(in_planes):
 23 |     return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)
 24 | 
 25 | def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
 26 |     return nn.ConvTranspose2d(in_planes, out_planes, kernel_size, stride, padding, bias=True)
 27 | 
 28 | 
 29 | 
 30 | class PWCDCNet(nn.Module):
 31 |     """
 32 |     PWC-DC net. add dilation convolution and densenet connections
 33 | 
 34 |     """
 35 |     def __init__(self, md=4, flow_norm=20.0):
 36 |         """
 37 |         input: md --- maximum displacement (for correlation. default: 4), after warpping
 38 | 
 39 |         """
 40 |         super(PWCDCNet,self).__init__()
 41 | 
 42 |         self.flow_norm = flow_norm
 43 |         
 44 |         self.conv1a  = conv(3,   16, kernel_size=3, stride=2)
 45 |         self.conv1aa = conv(16,  16, kernel_size=3, stride=1)
 46 |         self.conv1b  = conv(16,  16, kernel_size=3, stride=1)
 47 |         self.conv2a  = conv(16,  32, kernel_size=3, stride=2)
 48 |         self.conv2aa = conv(32,  32, kernel_size=3, stride=1)
 49 |         self.conv2b  = conv(32,  32, kernel_size=3, stride=1)
 50 |         self.conv3a  = conv(32,  64, kernel_size=3, stride=2)
 51 |         self.conv3aa = conv(64,  64, kernel_size=3, stride=1)
 52 |         self.conv3b  = conv(64,  64, kernel_size=3, stride=1)
 53 |         self.conv4a  = conv(64,  96, kernel_size=3, stride=2)
 54 |         self.conv4aa = conv(96,  96, kernel_size=3, stride=1)
 55 |         self.conv4b  = conv(96,  96, kernel_size=3, stride=1)
 56 |         self.conv5a  = conv(96, 128, kernel_size=3, stride=2)
 57 |         self.conv5aa = conv(128,128, kernel_size=3, stride=1)
 58 |         self.conv5b  = conv(128,128, kernel_size=3, stride=1)
 59 |         self.conv6aa = conv(128,196, kernel_size=3, stride=2)
 60 |         self.conv6a  = conv(196,196, kernel_size=3, stride=1)
 61 |         self.conv6b  = conv(196,196, kernel_size=3, stride=1)
 62 | 
 63 |         # self.corr    = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1)
 64 |         self.leakyRELU = nn.LeakyReLU(0.1)
 65 |         
 66 |         nd = (2*md+1)**2
 67 |         dd = np.cumsum([128,128,96,64,32])
 68 | 
 69 |         od = nd
 70 |         self.conv6_0 = conv(od,      128, kernel_size=3, stride=1)
 71 |         self.conv6_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
 72 |         self.conv6_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
 73 |         self.conv6_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
 74 |         self.conv6_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)        
 75 |         self.predict_flow6 = predict_flow(od+dd[4])
 76 |         self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
 77 |         self.upfeat6 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
 78 |         
 79 |         od = nd+128+4
 80 |         self.conv5_0 = conv(od,      128, kernel_size=3, stride=1)
 81 |         self.conv5_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
 82 |         self.conv5_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
 83 |         self.conv5_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
 84 |         self.conv5_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
 85 |         self.predict_flow5 = predict_flow(od+dd[4]) 
 86 |         self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
 87 |         self.upfeat5 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
 88 |         
 89 |         od = nd+96+4
 90 |         self.conv4_0 = conv(od,      128, kernel_size=3, stride=1)
 91 |         self.conv4_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
 92 |         self.conv4_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
 93 |         self.conv4_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
 94 |         self.conv4_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
 95 |         self.predict_flow4 = predict_flow(od+dd[4]) 
 96 |         self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
 97 |         self.upfeat4 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
 98 |         
 99 |         od = nd+64+4
100 |         self.conv3_0 = conv(od,      128, kernel_size=3, stride=1)
101 |         self.conv3_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
102 |         self.conv3_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
103 |         self.conv3_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
104 |         self.conv3_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
105 |         self.predict_flow3 = predict_flow(od+dd[4]) 
106 |         self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
107 |         self.upfeat3 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 
108 |         
109 |         od = nd+32+4
110 |         self.conv2_0 = conv(od,      128, kernel_size=3, stride=1)
111 |         self.conv2_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
112 |         self.conv2_2 = conv(od+dd[1],96,  kernel_size=3, stride=1)
113 |         self.conv2_3 = conv(od+dd[2],64,  kernel_size=3, stride=1)
114 |         self.conv2_4 = conv(od+dd[3],32,  kernel_size=3, stride=1)
115 |         self.predict_flow2 = predict_flow(od+dd[4]) 
116 |         self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 
117 |         
118 |         self.dc_conv1 = conv(od+dd[4], 128, kernel_size=3, stride=1, padding=1,  dilation=1)
119 |         self.dc_conv2 = conv(128,      128, kernel_size=3, stride=1, padding=2,  dilation=2)
120 |         self.dc_conv3 = conv(128,      128, kernel_size=3, stride=1, padding=4,  dilation=4)
121 |         self.dc_conv4 = conv(128,      96,  kernel_size=3, stride=1, padding=8,  dilation=8)
122 |         self.dc_conv5 = conv(96,       64,  kernel_size=3, stride=1, padding=16, dilation=16)
123 |         self.dc_conv6 = conv(64,       32,  kernel_size=3, stride=1, padding=1,  dilation=1)
124 |         self.dc_conv7 = predict_flow(32)
125 | 
126 |         for m in self.modules():
127 |             if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
128 |                 nn.init.kaiming_normal(m.weight.data, mode='fan_in')
129 |                 if m.bias is not None:
130 |                     m.bias.data.zero_()
131 | 
132 | 
133 |     def warp(self, x, flo):
134 |         """
135 |         warp an image/tensor (im2) back to im1, according to the optical flow
136 | 
137 |         x: [B, C, H, W] (im2)
138 |         flo: [B, 2, H, W] flow
139 | 
140 |         """
141 |         B, C, H, W = x.size()
142 |         # mesh grid 
143 |         xx = torch.arange(0, W).view(1,-1).repeat(H,1)
144 |         yy = torch.arange(0, H).view(-1,1).repeat(1,W)
145 |         xx = xx.view(1,1,H,W).repeat(B,1,1,1)
146 |         yy = yy.view(1,1,H,W).repeat(B,1,1,1)
147 |         grid = torch.cat((xx,yy),1).float()
148 | 
149 |         if x.is_cuda:
150 |             grid = grid.cuda()
151 |         vgrid = grid + flo
152 | 
153 |         # scale grid to [-1,1] 
154 |         vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:].clone() / max(W-1,1)-1.0
155 |         vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:].clone() / max(H-1,1)-1.0
156 | 
157 |         vgrid = vgrid.permute(0,2,3,1)        
158 |         output = nn.functional.grid_sample(x, vgrid, align_corners=True)
159 |         mask = torch.ones(x.size()).cuda()
160 |         mask = nn.functional.grid_sample(mask, vgrid, align_corners=True)
161 | 
162 |         # if W==128:
163 |             # np.save('mask.npy', mask.cpu().data.numpy())
164 |             # np.save('warp.npy', output.cpu().data.numpy())
165 |         
166 |         mask[mask<0.9999] = 0
167 |         mask[mask>0] = 1
168 |         
169 |         return output*mask
170 | 
171 |     def multi_scale_conv(self, conv0_func, conv1_func, conv2_func, conv3_func, conv4_func, input_feat):
172 |         x = torch.cat((conv0_func(input_feat), input_feat),1)
173 |         x = torch.cat((conv1_func(x), x),1)
174 |         x = torch.cat((conv2_func(x), x),1)
175 |         x = torch.cat((conv3_func(x), x),1)
176 |         x = torch.cat((conv4_func(x), x),1)
177 |         return x
178 | 
179 |     def concate_two_layers(self, pred_func, decon_func, upfeat_func, feat_high, feat_low1, feat_low2, scale):
180 |         flow_high = pred_func(feat_high)
181 |         up_flow_high = decon_func(flow_high)
182 |         up_feat_high = upfeat_func(feat_high)
183 | 
184 |         warp_feat = self.warp(feat_low2, up_flow_high*scale)
185 |         corr_low = FunctionCorrelation(tenFirst=feat_low1, tenSecond=warp_feat)
186 |         corr_low = self.leakyRELU(corr_low)
187 |         x = torch.cat((corr_low, feat_low1, up_flow_high, up_feat_high), 1)
188 | 
189 |         return x, flow_high
190 | 
191 |     def forward(self,x):
192 |         im1 = x[:,0:3,...]
193 |         im2 = x[:,3:6,...]
194 |         
195 |         c11 = self.conv1b(self.conv1aa(self.conv1a(im1)))
196 |         c21 = self.conv1b(self.conv1aa(self.conv1a(im2)))
197 |         c12 = self.conv2b(self.conv2aa(self.conv2a(c11)))
198 |         c22 = self.conv2b(self.conv2aa(self.conv2a(c21)))
199 |         c13 = self.conv3b(self.conv3aa(self.conv3a(c12)))
200 |         c23 = self.conv3b(self.conv3aa(self.conv3a(c22)))
201 |         c14 = self.conv4b(self.conv4aa(self.conv4a(c13)))
202 |         c24 = self.conv4b(self.conv4aa(self.conv4a(c23)))
203 |         c15 = self.conv5b(self.conv5aa(self.conv5a(c14)))
204 |         c25 = self.conv5b(self.conv5aa(self.conv5a(c24)))
205 |         c16 = self.conv6b(self.conv6a(self.conv6aa(c15)))
206 |         c26 = self.conv6b(self.conv6a(self.conv6aa(c25)))
207 | 
208 | 
209 |         # corr6 = self.corr(c16, c26) 
210 |         corr6 = FunctionCorrelation(tenFirst=c16, tenSecond=c26)
211 |         corr6 = self.leakyRELU(corr6)   
212 | 
213 |         x = self.multi_scale_conv(self.conv6_0, self.conv6_1, self.conv6_2, self.conv6_3, self.conv6_4, corr6)
214 |         x, flow6 = self.concate_two_layers(self.predict_flow6, self.deconv6, self.upfeat6, x, c15, c25, 0.625)
215 | 
216 |         x = self.multi_scale_conv(self.conv5_0, self.conv5_1, self.conv5_2, self.conv5_3, self.conv5_4, x)
217 |         x, flow5 = self.concate_two_layers(self.predict_flow5, self.deconv5, self.upfeat5, x, c14, c24, 1.25)
218 | 
219 |         x = self.multi_scale_conv(self.conv4_0, self.conv4_1, self.conv4_2, self.conv4_3, self.conv4_4, x)
220 |         x, flow4 = self.concate_two_layers(self.predict_flow4, self.deconv4, self.upfeat4, x, c13, c23, 2.5)
221 | 
222 |         x = self.multi_scale_conv(self.conv3_0, self.conv3_1, self.conv3_2, self.conv3_3, self.conv3_4, x)
223 |         x, flow3 = self.concate_two_layers(self.predict_flow3, self.deconv3, self.upfeat3, x, c12, c22, 5.0)
224 | 
225 |         x = self.multi_scale_conv(self.conv2_0, self.conv2_1, self.conv2_2, self.conv2_3, self.conv2_4, x)
226 | 
227 |         flow2 = self.predict_flow2(x)
228 |  
229 |         x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x))))
230 |         refine = self.dc_conv7(self.dc_conv6(self.dc_conv5(x)))
231 |         flow2 = flow2 + refine
232 |         
233 |         return flow2
234 | 
235 | 
236 | def pwc_dc_net(path=None):
237 | 
238 |     model = PWCDCNet()
239 |     if path is not None:
240 |         data = torch.load(path)
241 |         if 'state_dict' in data.keys():
242 |             model.load_state_dict(data['state_dict'])
243 |         else:
244 |             model.load_state_dict(data)
245 |     return model
246 | 
247 | 
248 | 
249 | 
250 | 


--------------------------------------------------------------------------------
/Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_cpu.cpp:
--------------------------------------------------------------------------------
  1 | #include <vector>
  2 | #include "cpu/dcn_v2_im2col_cpu.h"
  3 | #include <iostream>
  4 | 
  5 | #include <ATen/ATen.h>
  6 | //#include <ATen/cuda/CUDAContext.h>
  7 | 
  8 | #include <TH/TH.h>
  9 | //#include <THC/THCAtomics.cuh>
 10 | //#include <THC/THCDeviceUtils.cuh>
 11 | 
 12 | //extern THCState *state;
 13 | 
 14 | // author: Charles Shang
 15 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
 16 | 
 17 | // modified from the CUDA version for CPU use by Daniel K. Suhendro
 18 | 
 19 | // edit by: James Bockman and Matthew Howe
 20 | // modified for torch implementation to remove use of deprecated torch access to Blas
 21 | 
 22 | at::Tensor
 23 | dcn_v2_cpu_forward(const at::Tensor &input,
 24 |                     const at::Tensor &weight,
 25 |                     const at::Tensor &bias,
 26 |                     const at::Tensor &offset,
 27 |                     const at::Tensor &mask,
 28 |                     const int kernel_h,
 29 |                     const int kernel_w,
 30 |                     const int stride_h,
 31 |                     const int stride_w,
 32 |                     const int pad_h,
 33 |                     const int pad_w,
 34 |                     const int dilation_h,
 35 |                     const int dilation_w,
 36 |                     const int deformable_group)
 37 | {
 38 |     // THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, weight, bias, offset, mask));
 39 |     /*AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
 40 |     AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
 41 |     AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor");
 42 |     AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor");
 43 |     AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");*/
 44 | 
 45 |     const int batch = input.size(0);
 46 |     const int channels = input.size(1);
 47 |     const int height = input.size(2);
 48 |     const int width = input.size(3);
 49 | 
 50 |     const int channels_out = weight.size(0);
 51 |     const int channels_kernel = weight.size(1);
 52 |     const int kernel_h_ = weight.size(2);
 53 |     const int kernel_w_ = weight.size(3);
 54 | 
 55 |     // printf("Kernels: %d %d %d %d\n", kernel_h_, kernel_w_, kernel_w, kernel_h);
 56 |     // printf("Channels: %d %d\n", channels, channels_kernel);
 57 |     // printf("Channels: %d %d\n", channels_out, channels_kernel);
 58 | 
 59 |     AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,
 60 |                "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_);
 61 | 
 62 |     AT_ASSERTM(channels == channels_kernel,
 63 |                "Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel);
 64 | 
 65 |     const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
 66 |     const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
 67 | 
 68 |     // auto ones = at::ones({height_out, width_out}, input.options());
 69 |     auto ones = at::ones({bias.sizes()[0], height_out, width_out}, input.options());
 70 |     auto columns = at::empty({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());
 71 |     auto output = at::zeros({batch, channels_out, height_out, width_out}, input.options());
 72 | 
 73 |     using scalar_t = float;
 74 |     for (int b = 0; b < batch; b++)
 75 |     {
 76 |         auto input_n = input.select(0, b);
 77 |         auto offset_n = offset.select(0, b);
 78 |         auto mask_n = mask.select(0, b);
 79 |         auto output_n = output.select(0, b);
 80 |         // std::cout << "output_n: " << output_n << "output.select(0,b): " << output.select(0,b) << "\n"; 
 81 | 
 82 |         // Do Bias first:
 83 |         // M,N,K are dims of matrix A and B
 84 |         // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
 85 |         // (N x 1) (1 x M)
 86 | 
 87 |         // torch implementation
 88 |         auto ones_T = at::transpose(ones.contiguous(), 2, 0);
 89 |         ones_T = at::mul(ones_T, bias.contiguous());
 90 |         ones_T = at::transpose(ones_T, 2, 0);
 91 |         output_n = at::add(output_n, ones_T);
 92 | 
 93 |         modulated_deformable_im2col_cpu(input_n.data_ptr<scalar_t>(),
 94 |                                          offset_n.data_ptr<scalar_t>(),
 95 |                                          mask_n.data_ptr<scalar_t>(),
 96 |                                          1, channels, height, width,
 97 |                                          height_out, width_out, kernel_h, kernel_w,
 98 |                                          pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
 99 |                                          deformable_group,
100 |                                          columns.data_ptr<scalar_t>());
101 | 
102 |         //(k * m)  x  (m * n)
103 |         // Y = WC
104 | 
105 |         // torch implementation
106 |         auto weight_flat = weight.view({channels_out, channels * kernel_h * kernel_w});
107 |         auto product = at::matmul(weight_flat, columns);
108 |         output.select(0, b) = at::add(output_n, product.view({channels_out, height_out, width_out}));
109 |     }
110 |     return output;
111 | }
112 | 
113 | std::vector<at::Tensor> dcn_v2_cpu_backward(const at::Tensor &input,
114 |                                              const at::Tensor &weight,
115 |                                              const at::Tensor &bias,
116 |                                              const at::Tensor &offset,
117 |                                              const at::Tensor &mask,
118 |                                              const at::Tensor &grad_output,
119 |                                              int kernel_h, int kernel_w,
120 |                                              int stride_h, int stride_w,
121 |                                              int pad_h, int pad_w,
122 |                                              int dilation_h, int dilation_w,
123 |                                              int deformable_group)
124 | {
125 | 
126 |     THArgCheck(input.is_contiguous(), 1, "input tensor has to be contiguous");
127 |     THArgCheck(weight.is_contiguous(), 2, "weight tensor has to be contiguous");
128 | 
129 |     /*AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
130 |     AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
131 |     AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor");
132 |     AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor");
133 |     AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");*/
134 | 
135 |     const int batch = input.size(0);
136 |     const int channels = input.size(1);
137 |     const int height = input.size(2);
138 |     const int width = input.size(3);
139 | 
140 |     const int channels_out = weight.size(0);
141 |     const int channels_kernel = weight.size(1);
142 |     const int kernel_h_ = weight.size(2);
143 |     const int kernel_w_ = weight.size(3);
144 | 
145 |     AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w,
146 |                "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_);
147 | 
148 |     AT_ASSERTM(channels == channels_kernel,
149 |                "Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel);
150 | 
151 |     const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
152 |     const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
153 | 
154 |     auto ones = at::ones({height_out, width_out}, input.options());
155 |     auto columns = at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options());
156 |     auto output = at::empty({batch, channels_out, height_out, width_out}, input.options());
157 | 
158 |     auto grad_input = at::zeros_like(input);
159 |     auto grad_weight = at::zeros_like(weight);
160 |     auto grad_bias = at::zeros_like(bias);
161 |     auto grad_offset = at::zeros_like(offset);
162 |     auto grad_mask = at::zeros_like(mask);
163 | 
164 |     using scalar_t = float;
165 | 
166 |     for (int b = 0; b < batch; b++)
167 |     {
168 |         auto input_n = input.select(0, b);
169 |         auto offset_n = offset.select(0, b);
170 |         auto mask_n = mask.select(0, b);
171 |         auto grad_output_n = grad_output.select(0, b);
172 |         auto grad_input_n = grad_input.select(0, b);
173 |         auto grad_offset_n = grad_offset.select(0, b);
174 |         auto grad_mask_n = grad_mask.select(0, b);
175 | 
176 | 
177 | 
178 |         // Torch implementation
179 |         auto weight_flat = weight.view({channels_out, channels*kernel_h*kernel_w});
180 |         weight_flat = at::transpose(weight_flat, 1, 0);
181 |         auto grad_output_n_flat = grad_output_n.view({channels_out, height_out*width_out});
182 |         columns = at::matmul(weight_flat, grad_output_n_flat);
183 | 
184 |         // gradient w.r.t. input coordinate data
185 |         modulated_deformable_col2im_coord_cpu(columns.data_ptr<scalar_t>(),
186 |                                                input_n.data_ptr<scalar_t>(),
187 |                                                offset_n.data_ptr<scalar_t>(),
188 |                                                mask_n.data_ptr<scalar_t>(),
189 |                                                1, channels, height, width,
190 |                                                height_out, width_out, kernel_h, kernel_w,
191 |                                                pad_h, pad_w, stride_h, stride_w,
192 |                                                dilation_h, dilation_w, deformable_group,
193 |                                                grad_offset_n.data_ptr<scalar_t>(),
194 |                                                grad_mask_n.data_ptr<scalar_t>());
195 |         // gradient w.r.t. input data
196 |         modulated_deformable_col2im_cpu(columns.data_ptr<scalar_t>(),
197 |                                          offset_n.data_ptr<scalar_t>(),
198 |                                          mask_n.data_ptr<scalar_t>(),
199 |                                          1, channels, height, width,
200 |                                          height_out, width_out, kernel_h, kernel_w,
201 |                                          pad_h, pad_w, stride_h, stride_w,
202 |                                          dilation_h, dilation_w, deformable_group,
203 |                                          grad_input_n.data_ptr<scalar_t>());
204 | 
205 |         // gradient w.r.t. weight, dWeight should accumulate across the batch and group
206 |         modulated_deformable_im2col_cpu(input_n.data_ptr<scalar_t>(),
207 |                                          offset_n.data_ptr<scalar_t>(),
208 |                                          mask_n.data_ptr<scalar_t>(),
209 |                                          1, channels, height, width,
210 |                                          height_out, width_out, kernel_h, kernel_w,
211 |                                          pad_h, pad_w, stride_h, stride_w,
212 |                                          dilation_h, dilation_w, deformable_group,
213 |                                          columns.data_ptr<scalar_t>());
214 | 
215 |         // Torch implementation
216 |         auto product = at::matmul(grad_output_n_flat, at::transpose(columns, 1, 0));
217 |         grad_weight = at::add(grad_weight, product.view({channels_out, channels, kernel_h, kernel_w}));
218 | 
219 | 
220 |         // Torch implementation
221 |         auto ones_flat = ones.view({height_out*width_out});
222 |         product = at::matmul(grad_output_n_flat, ones_flat);
223 |         grad_bias = at::add(grad_bias, product);
224 |     }
225 | 
226 |     return {
227 |         grad_input, grad_offset, grad_mask, grad_weight, grad_bias
228 |     };
229 | }
230 | 


--------------------------------------------------------------------------------
/Datasets/segmask_gt.py:
--------------------------------------------------------------------------------
  1 | """
  2 | # ==============================
  3 | # segmask_gt.py
  4 | # library to generate groundtruth 
  5 | # segmentation mask given flow and
  6 | # disparity change
  7 | # (Adapted from code for rigidmask)
  8 | # Author: Shihao Shen
  9 | # Date: 14th Sep 2022
 10 | # ==============================
 11 | """
 12 | 
 13 | import argparse
 14 | import os
 15 | import os.path
 16 | import glob
 17 | import numpy as np
 18 | import cv2
 19 | from PIL import Image
 20 | from flowlib import read_flow, readPFM, flow_to_image
 21 | 
 22 | def dataloader(filepath, fpass='frames_cleanpass', level=6):
 23 |     iml0 = []
 24 |     iml1 = []
 25 |     flowl0 = []
 26 |     disp0 = []
 27 |     dispc = []
 28 |     calib = []
 29 |     level_stars = '/*'*level
 30 |     candidate_pool = glob.glob('%s/optical_flow%s'%(filepath,level_stars))
 31 |     for flow_path in sorted(candidate_pool):
 32 |         # if 'TEST' in flow_path: continue
 33 |         if 'flower_storm_x2/into_future/right/OpticalFlowIntoFuture_0023_R.pfm' in flow_path:
 34 |             print('Skipping %s' % flow_path)
 35 |             continue
 36 |         if 'flower_storm_x2/into_future/left/OpticalFlowIntoFuture_0023_L.pfm' in flow_path:
 37 |             print('Skipping %s' % flow_path)
 38 |             continue
 39 |         if 'flower_storm_augmented0_x2/into_future/right/OpticalFlowIntoFuture_0023_R.pfm' in flow_path:
 40 |             print('Skipping %s' % flow_path)
 41 |             continue
 42 |         if 'flower_storm_augmented0_x2/into_future/left/OpticalFlowIntoFuture_0023_L.pfm' in flow_path:
 43 |             print('Skipping %s' % flow_path)
 44 |             continue
 45 |         # if 'FlyingThings' in flow_path and '_0014_' in flow_path:
 46 |         #     print('Skipping %s' % flow_path)
 47 |         #     continue
 48 |         # if 'FlyingThings' in flow_path and '_0015_' in flow_path:
 49 |         #     print('Skipping %s' % flow_path)
 50 |         #     continue
 51 |         idd = flow_path.split('/')[-1].split('_')[-2]
 52 |         if 'into_future' in flow_path:
 53 |             idd_p1 = '%04d'%(int(idd)+1)
 54 |         else:
 55 |             idd_p1 = '%04d'%(int(idd)-1)
 56 |         if os.path.exists(flow_path.replace(idd,idd_p1)): 
 57 |             d0_path = flow_path.replace('/into_future/','/').replace('/into_past/','/').replace('optical_flow','disparity')
 58 |             d0_path = '%s/%s.pfm'%(d0_path.rsplit('/',1)[0],idd)
 59 |             dc_path = flow_path.replace('optical_flow','disparity_change')
 60 |             dc_path = '%s/%s.pfm'%(dc_path.rsplit('/',1)[0],idd)
 61 |             im_path = flow_path.replace('/into_future/','/').replace('/into_past/','/').replace('optical_flow',fpass)
 62 |             im0_path = '%s/%s.png'%(im_path.rsplit('/',1)[0],idd)
 63 |             im1_path = '%s/%s.png'%(im_path.rsplit('/',1)[0],idd_p1)
 64 | 
 65 |             # This will skip any sequence that contains less than 10 poses in camera_data.txt
 66 |             with open('%s/camera_data.txt'%(im0_path.replace(fpass,'camera_data').rsplit('/',2)[0]),'r') as f:
 67 |                if 'FlyingThings' in flow_path and len(f.readlines())!=40: 
 68 |                    print('Skipping %s' % flow_path)
 69 |                    continue
 70 | 
 71 |             iml0.append(im0_path)
 72 |             iml1.append(im1_path)
 73 |             flowl0.append(flow_path)
 74 |             disp0.append(d0_path)
 75 |             dispc.append(dc_path)
 76 |             calib.append('%s/camera_data.txt'%(im0_path.replace(fpass,'camera_data').rsplit('/',2)[0]))
 77 |     return iml0, iml1, flowl0, disp0, dispc, calib
 78 | 
 79 | def default_loader(path):
 80 |     return Image.open(path).convert('RGB')
 81 | 
 82 | def flow_loader(path):
 83 |     if '.pfm' in path:
 84 |         data =  readPFM(path)[0]
 85 |         data[:,:,2] = 1
 86 |         return data
 87 |     else:
 88 |         return read_flow(path)
 89 | 
 90 | def load_exts(cam_file):
 91 |     with open(cam_file, 'r') as f:
 92 |         lines = f.readlines()
 93 | 
 94 |     l_exts = []
 95 |     r_exts = []
 96 |     for l in lines:
 97 |         if 'L ' in l:
 98 |             l_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
 99 |         if 'R ' in l:
100 |             r_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4))
101 |     return l_exts,r_exts        
102 | 
103 | def disparity_loader(path):
104 |     if '.png' in path:
105 |         data = Image.open(path)
106 |         data = np.ascontiguousarray(data,dtype=np.float32)/256
107 |         return data
108 |     else:    
109 |         return readPFM(path)[0]
110 | 
111 | # triangulation
112 | def triangulation(disp, xcoord, ycoord, bl=1, fl = 450, cx = 479.5, cy = 269.5):
113 |     depth = bl*fl / disp # 450px->15mm focal length
114 |     X = (xcoord - cx) * depth / fl
115 |     Y = (ycoord - cy) * depth / fl
116 |     Z = depth
117 |     P = np.concatenate((X[np.newaxis],Y[np.newaxis],Z[np.newaxis]),0).reshape(3,-1)
118 |     P = np.concatenate((P,np.ones((1,P.shape[-1]))),0)
119 |     return P
120 | 
121 | def exp_loader(index, iml0s, iml1s, flowl0s, disp0s=None, dispcs=None, calibs=None):
122 |     '''
123 |     index: index of the frame in the file lists below
124 |     iml0s: a file list of the first frames
125 |     iml1s: a file list of the second frames
126 |     flowl0s: a file list of the optical w.r.t. iml0s
127 |     disp0s: a file list of the disparity w.r.t. iml0s
128 |     dispcs: a file list of the disparity change w.r.t. disp0s
129 |     calibs: a file list of the camera extrinsics
130 |     '''
131 |     iml0 = iml0s[index]
132 |     iml1 = iml1s[index]
133 |     flowl0 = flowl0s[index]
134 |     
135 |     iml0 = default_loader(iml0)
136 |     iml1 = default_loader(iml1)
137 | 
138 |     flowl0 = flow_loader(flowl0)
139 |     flowl0[:,:,-1][flowl0[:,:,0] == np.inf] = 0 
140 |     flowl0[:,:,0][~flowl0[:,:,2].astype(bool)] = 0
141 |     flowl0[:,:,1][~flowl0[:,:,2].astype(bool)] = 0
142 |     flowl0 = np.ascontiguousarray(flowl0, dtype=np.float32)
143 |     flowl0[np.isnan(flowl0)] = 1e6
144 |     
145 |     bl = 1
146 |     if '15mm_' in calibs[index]: 
147 |         fl = 450
148 |     else:
149 |         fl = 1050
150 |     cx = 479.5
151 |     cy = 269.5
152 |     intr = [[fl],[cx],[cy],[bl],[1],[0],[0],[1],[0],[0]]
153 | 
154 |     d1 = np.abs(disparity_loader(disp0s[index]))
155 |     d2 = np.abs(disparity_loader(dispcs[index]) + d1)
156 |     
157 |     flowl0[:,:,2] = np.logical_and(np.logical_and(flowl0[:,:,2] == 1, d1 != 0), d2 != 0).astype(float)
158 |     
159 |     shape = d1.shape
160 |     mesh = np.meshgrid(range(shape[1]), range(shape[0]))
161 |     xcoord = mesh[0].astype(float)
162 |     ycoord = mesh[1].astype(float)
163 | 
164 |     # triangulation in two frames
165 |     P0 = triangulation(d1, xcoord, ycoord, bl=bl, fl=fl, cx=cx, cy=cy)
166 |     P1 = triangulation(d2, xcoord + flowl0[:,:,0], ycoord + flowl0[:,:,1], bl=bl, fl=fl, cx=cx, cy=cy)
167 |     depth0 = P0[2]
168 |     depth1 = P1[2]
169 | 
170 |     depth0 = depth0.reshape(shape).astype(np.float32)
171 |     flow3d = (P1-P0)[:3].reshape((3,)+shape).transpose((1,2,0))
172 | 
173 |     fid = int(flowl0s[index].split('/')[-1].split('_')[1])
174 |     with open(calibs[index], 'r') as f:
175 |         fid = fid - int(f.readline().split(' ')[-1])
176 |     l_exts, r_exts= load_exts(calibs[index])
177 |     if '/right/' in iml0s[index]:
178 |         exts = r_exts
179 |     else:
180 |         exts = l_exts
181 | 
182 |     if '/into_future/' in flowl0s[index]:
183 |         if (fid + 1) > len(exts) - 1: print(flowl0s[index])
184 |         if (fid) > len(exts) - 1: print(flowl0s[index])
185 |         ext1 = exts[fid+1]
186 |         ext0 = exts[fid]
187 |     else:
188 |         if (fid - 1) > len(exts) - 1: print(flowl0s[index])
189 |         if (fid) > len(exts) - 1: print(flowl0s[index])
190 |         ext1 = exts[fid-1]
191 |         ext0 = exts[fid]
192 |     camT = np.eye(4); camT[1,1] = -1; camT[2,2] = -1  # Sceneflow uses Blender's coordinate system
193 |     RT01 = camT.dot(np.linalg.inv(ext0)).dot(ext1).dot(camT)  # ext is from camera space to world space
194 |     
195 |     rect_flow3d = (RT01[:3,:3].dot(P1[:3])-P0[:3]).reshape((3,)+shape).transpose((1,2,0))  # rectified scene flow
196 | 
197 |     depthflow = np.concatenate((depth0[:,:,np.newaxis], rect_flow3d, flow3d), 2)
198 |     RT01 = np.concatenate((cv2.Rodrigues(RT01[:3,:3])[0][:,0], RT01[:3,-1])).astype(np.float32)
199 | 
200 |     # object mask
201 |     fnum = int(iml0s[index].split('/')[-1].split('.png')[0])
202 |     obj_fname = '%s/%04d.pfm'%(flowl0s[index].replace('/optical_flow','object_index').replace('into_past/','/').replace('into_future/','/').rsplit('/',1)[0],fnum)
203 |     obj_idx = disparity_loader(obj_fname)
204 |     
205 |     depthflow = np.concatenate((depthflow, obj_idx[:,:,np.newaxis]), 2)
206 |     # depthflow dimension: H x W x 8 (depth=1 + rectified_flow3d=3 + flow3d=3 + object_segmentation=1)
207 | 
208 |     iml1 = np.asarray(iml1)
209 |     iml0 = np.asarray(iml0)
210 |     
211 |     return iml0, iml1, flowl0, depthflow, intr, RT01
212 | 
213 | 
214 | def motionmask(flowl0, depthflow, RT01):
215 |     '''
216 |     flowl0: optical flow. [H, W, 3]
217 |     depthflow: a concatenation of depth, rectified scene flow, scene flow, and object segmentation. [H, W, 8]
218 |     RT01: camera motion from the future frame to the current frame. [6, ]
219 |     '''
220 |     valid_mask = (flowl0[:,:,2] == 1) & (depthflow[:,:,0] < 100) & (depthflow[:,:,0] > 0.01)  # valid flow & valid depth
221 |     Tglobal_gt = -RT01[3:, np.newaxis, np.newaxis]  # background translation
222 |     Tlocal_gt = depthflow[:,:,1:4].transpose(2, 0, 1)   # point translation (after removing rotation)
223 |     m3d_gt = np.linalg.norm(Tlocal_gt - Tglobal_gt, 2, 0)       # abs. motion
224 |     fgmask_gt = m3d_gt * 100 > 1
225 |     fgmask_gt[~valid_mask] = False
226 | 
227 |     return fgmask_gt
228 | 
229 | 
230 | if __name__ == '__main__':
231 |     parser = argparse.ArgumentParser(description='segmask_gt_generation')
232 |     parser.add_argument('--database',
233 |                     help='path to the database (required)')
234 |     parser.add_argument('--debug', action='store_true', default=False,
235 |                     help='generate visualization')
236 |     parser.add_argument('--frames_pass', default='frames_cleanpass', 
237 |                     help='which pass to use, either clean or final')
238 |     parser.add_argument('--dataset', 
239 |                     help='choose from FlyingThings3D, Driving, Monkaa')
240 |     args = parser.parse_args()
241 | 
242 |     if args.debug:
243 |         os.makedirs('%s/%s/results_viz' % (args.database, args.dataset), exist_ok=True)
244 |     
245 |     if args.dataset == 'Monkaa':
246 |         level = 4
247 |     else:
248 |         level = 6
249 |     iml0s, iml1s, flowl0s, disp0s, dispcs, calibs = dataloader('%s/%s/' % (args.database, args.dataset), 
250 |                                                                 level=level, fpass=args.frames_pass)
251 |     
252 |     print("Generating %s masks..." % len(flowl0s))
253 |     for i in range(len(iml0s)):
254 |         idd = flowl0s[i].split('/')[-1].split('_')[-2]
255 |         mask_fn = '%s/%s.npy' % (os.path.dirname(flowl0s[i]).replace('optical_flow', 'rigidmask'), idd)
256 |         if os.path.exists(mask_fn):
257 |             print(i)
258 |             continue
259 |         os.makedirs(os.path.dirname(mask_fn), exist_ok=True)
260 | 
261 |         iml0, iml1, flowl0, depthflow, intr, RT01 = exp_loader(i, iml0s, iml1s, flowl0s, disp0s, dispcs, calibs)
262 |         fgmask = motionmask(flowl0, depthflow, RT01)
263 |         np.save(mask_fn, fgmask)
264 | 
265 |         if args.debug:
266 |             if args.dataset == 'Driving' and 'rigidmask/15mm_focallength/scene_forwards/fast/left' not in mask_fn:
267 |                 continue
268 |             elif args.dataset == 'Monkaa' and 'rigidmask/eating_camera2_x2/left' not in mask_fn:
269 |                 continue
270 |             elif args.dataset == 'FlyingThings3D' and not ('rigidmask/TEST/A' in mask_fn and 'into_future/left' in mask_fn):
271 |                 continue
272 |             print("Visualizing %s" % mask_fn)
273 |             flowl0viz = flow_to_image(flowl0)
274 |             maskviz = np.stack((fgmask * 255.0, )*3, axis=-1).astype(np.uint8)
275 |             inputs = np.concatenate([iml0, flowl0viz, maskviz], axis=1)
276 |             cv2.imwrite('%s/%s/results_viz/%s.png' % (args.database, args.dataset, str(i).zfill(5)), cv2.cvtColor(inputs, cv2.COLOR_RGB2BGR))


--------------------------------------------------------------------------------