├── Datasets ├── __init__.py ├── tartanTrajFlowDataset.py ├── cowmask.py ├── util_flow.py └── segmask_gt.py ├── Network ├── __init__.py ├── rigidmask │ ├── __init__.py │ ├── .gitignore │ ├── networks │ │ ├── DCNv2 │ │ │ ├── DCN │ │ │ │ ├── __init__.py │ │ │ │ ├── src │ │ │ │ │ ├── vision.cpp │ │ │ │ │ ├── cpu │ │ │ │ │ │ ├── vision.h │ │ │ │ │ │ ├── dcn_v2_im2col_cpu.h │ │ │ │ │ │ └── dcn_v2_cpu.cpp │ │ │ │ │ ├── cuda │ │ │ │ │ │ ├── vision.h │ │ │ │ │ │ └── dcn_v2_im2col_cuda.h │ │ │ │ │ └── dcn_v2.h │ │ │ │ ├── testcpu.py │ │ │ │ └── testcuda.py │ │ │ ├── make.sh │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── LICENSE │ │ │ └── setup.py │ │ ├── large_hourglass.py │ │ ├── resnet_dcn.py │ │ └── msra_resnet.py │ ├── det_utils.py │ ├── det.py │ └── det_losses.py ├── PWC │ ├── __init__.py │ └── PWCNet.py ├── VONet.py └── VOFlowNet.py ├── evaluator ├── __init__.py ├── tartanair_evaluator.py ├── evaluator_base.py ├── evaluate_kitti.py ├── evaluate_ate_scale.py ├── evaluate_rpe.py ├── trajectory_transform.py └── transformation.py ├── .gitignore ├── LICENSE ├── environment.yml ├── vo_trajectory_from_folder.py └── README.md /Datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Network/rigidmask/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Network/rigidmask/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | -------------------------------------------------------------------------------- /Network/PWC/__init__.py: -------------------------------------------------------------------------------- 1 | from .PWCNet import * 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | models/ 3 | data/ 4 | __pycache__/ 5 | .DS_Store -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/DCN/__init__.py: -------------------------------------------------------------------------------- 1 | from .dcn_v2 import * 2 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python setup.py build develop 3 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .idea 3 | *.so 4 | *.o 5 | *pyc 6 | _ext 7 | build 8 | DCNv2.egg-info 9 | dist -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/DCN/src/vision.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "dcn_v2.h" 3 | 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 5 | m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward"); 6 | m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward"); 7 | m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward"); 8 | m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward"); 9 | } 10 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/README.md: -------------------------------------------------------------------------------- 1 | ## Deformable Convolutional Networks V2 with Pytorch 1.X 2 | 3 | ### Build 4 | ```bash 5 | ./make.sh # build 6 | python testcpu.py # run examples and gradient check on cpu 7 | python testcuda.py # run examples and gradient check on gpu 8 | ``` 9 | ### Note 10 | Now the master branch is for pytorch 1.x, you can switch back to pytorch 0.4 with, 11 | ```bash 12 | git checkout pytorch_0.4 13 | ``` 14 | 15 | ### Known Issues: 16 | 17 | - [x] Gradient check w.r.t offset (solved) 18 | - [ ] Backward is not reentrant (minor) 19 | 20 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op). 21 | 22 | Update: all gradient check passes with **double** precision. 23 | 24 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 25 | float `<1e-15` for double), 26 | so it may not be a serious problem (?) 27 | 28 | Please post an issue or PR if you have any comments. 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, Air Lab Stacks 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Charles Shang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Network/rigidmask/det_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | def _sigmoid(x): 9 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 10 | return y 11 | 12 | def _gather_feat(feat, ind, mask=None): 13 | dim = feat.size(2) 14 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 15 | feat = feat.gather(1, ind) 16 | if mask is not None: 17 | mask = mask.unsqueeze(2).expand_as(feat) 18 | feat = feat[mask] 19 | feat = feat.view(-1, dim) 20 | return feat 21 | 22 | def _transpose_and_gather_feat(feat, ind): 23 | feat = feat.permute(0, 2, 3, 1).contiguous() 24 | feat = feat.view(feat.size(0), -1, feat.size(3)) 25 | feat = _gather_feat(feat, ind) 26 | return feat 27 | 28 | def flip_tensor(x): 29 | return torch.flip(x, [3]) 30 | # tmp = x.detach().cpu().numpy()[..., ::-1].copy() 31 | # return torch.from_numpy(tmp).to(x.device) 32 | 33 | def flip_lr(x, flip_idx): 34 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 35 | shape = tmp.shape 36 | for e in flip_idx: 37 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 38 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 39 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 40 | 41 | def flip_lr_off(x, flip_idx): 42 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 43 | shape = tmp.shape 44 | tmp = tmp.reshape(tmp.shape[0], 17, 2, 45 | tmp.shape[2], tmp.shape[3]) 46 | tmp[:, :, 0, :, :] *= -1 47 | for e in flip_idx: 48 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 49 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 50 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) -------------------------------------------------------------------------------- /Datasets/tartanTrajFlowDataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | # ============================== 3 | # tartanTrajFlowDataset.py 4 | # library for DytanVO data I/O 5 | # Author: Wenshan Wang, Shihao Shen 6 | # Date: 3rd Jan 2023 7 | # ============================== 8 | """ 9 | import numpy as np 10 | import cv2 11 | from torch.utils.data import Dataset, DataLoader 12 | from os import listdir 13 | from evaluator.transformation import pos_quats2SEs, pose2motion, SEs2ses 14 | from .utils import make_intrinsics_layer 15 | 16 | class TrajFolderDataset(Dataset): 17 | """scene flow synthetic dataset. """ 18 | 19 | def __init__(self, imgfolder, transform = None, 20 | focalx = 320.0, focaly = 320.0, centerx = 320.0, centery = 240.0): 21 | 22 | files = listdir(imgfolder) 23 | self.rgbfiles = [(imgfolder +'/'+ ff) for ff in files if (ff.endswith('.png') or ff.endswith('.jpg'))] 24 | self.rgbfiles.sort() 25 | self.imgfolder = imgfolder 26 | 27 | print('Find {} image files in {}'.format(len(self.rgbfiles), imgfolder)) 28 | 29 | self.N = len(self.rgbfiles) - 1 30 | 31 | # self.N = len(self.lines) 32 | self.transform = transform 33 | self.focalx = focalx 34 | self.focaly = focaly 35 | self.centerx = centerx 36 | self.centery = centery 37 | 38 | def __len__(self): 39 | return self.N 40 | 41 | def __getitem__(self, idx): 42 | imgfile1 = self.rgbfiles[idx].strip() 43 | imgfile2 = self.rgbfiles[idx+1].strip() 44 | img1 = cv2.imread(imgfile1) 45 | img2 = cv2.imread(imgfile2) 46 | 47 | res = {'img1': img1, 'img2': img2} 48 | 49 | h, w, _ = img1.shape 50 | intrinsicLayer = make_intrinsics_layer(w, h, self.focalx, self.focaly, self.centerx, self.centery) 51 | res['intrinsic'] = intrinsicLayer 52 | 53 | if self.transform: 54 | res = self.transform(res) 55 | 56 | res['img1_raw'] = img1 57 | res['img2_raw'] = img2 58 | 59 | return res -------------------------------------------------------------------------------- /evaluator/tartanair_evaluator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang 2 | # For License information please see the LICENSE file in the root directory. 3 | 4 | import numpy as np 5 | from .evaluator_base import ATEEvaluator, RPEEvaluator, KittiEvaluator, transform_trajs, quats2SEs 6 | from os.path import isdir, isfile 7 | 8 | # from trajectory_transform import timestamp_associate 9 | 10 | class TartanAirEvaluator: 11 | def __init__(self, scale = False, round=1): 12 | self.ate_eval = ATEEvaluator() 13 | self.rpe_eval = RPEEvaluator() 14 | self.kitti_eval = KittiEvaluator() 15 | 16 | def evaluate_one_trajectory(self, gt_traj, est_traj, scale=False, kittitype=True): 17 | """ 18 | scale = True: calculate a global scale 19 | """ 20 | # load trajectories 21 | try: 22 | gt_traj = np.loadtxt(gt_traj) 23 | est_traj = np.loadtxt(est_traj) 24 | except: 25 | pass 26 | 27 | if gt_traj.shape[0] != est_traj.shape[0]: 28 | raise Exception("POSEFILE_LENGTH_ILLEGAL") 29 | if gt_traj.shape[1] != 7 or est_traj.shape[1] != 7: 30 | raise Exception("POSEFILE_FORMAT_ILLEGAL") 31 | 32 | # transform and scale 33 | gt_traj_trans, est_traj_trans, s = transform_trajs(gt_traj, est_traj, scale) 34 | gt_SEs, est_SEs = quats2SEs(gt_traj_trans, est_traj_trans) 35 | 36 | ate_score, gt_ate_aligned, est_ate_aligned = self.ate_eval.evaluate(gt_traj, est_traj, scale) 37 | rpe_score = self.rpe_eval.evaluate(gt_SEs, est_SEs) 38 | kitti_score = self.kitti_eval.evaluate(gt_SEs, est_SEs, kittitype=kittitype) 39 | 40 | return {'ate_score': ate_score, 41 | 'rpe_score': rpe_score, 42 | 'kitti_score': kitti_score, 43 | 'gt_aligned': gt_ate_aligned, 44 | 'est_aligned': est_ate_aligned} 45 | 46 | if __name__ == "__main__": 47 | 48 | # scale = True for monocular track, scale = False for stereo track 49 | aicrowd_evaluator = TartanAirEvaluator() 50 | result = aicrowd_evaluator.evaluate_one_trajectory('pose_gt.txt', 'pose_est.txt', scale=True) 51 | print(result) 52 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import glob 5 | 6 | import torch 7 | 8 | from torch.utils.cpp_extension import CUDA_HOME 9 | from torch.utils.cpp_extension import CppExtension 10 | from torch.utils.cpp_extension import CUDAExtension 11 | 12 | from setuptools import find_packages 13 | from setuptools import setup 14 | 15 | requirements = ["torch", "torchvision"] 16 | 17 | 18 | def get_extensions(): 19 | this_dir = os.path.dirname(os.path.abspath(__file__)) 20 | extensions_dir = os.path.join(this_dir, "DCN", "src") 21 | 22 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 23 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 24 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 25 | 26 | #os.environ["CC"] = "g++" 27 | sources = main_file + source_cpu 28 | extension = CppExtension 29 | extra_compile_args = {'cxx': ['-std=c++14']} 30 | define_macros = [] 31 | 32 | 33 | #if torch.cuda.is_available() and CUDA_HOME is not None: 34 | if torch.cuda.is_available(): 35 | extension = CUDAExtension 36 | sources += source_cuda 37 | define_macros += [("WITH_CUDA", None)] 38 | extra_compile_args["nvcc"] = [ 39 | "-DCUDA_HAS_FP16=1", 40 | "-D__CUDA_NO_HALF_OPERATORS__", 41 | "-D__CUDA_NO_HALF_CONVERSIONS__", 42 | "-D__CUDA_NO_HALF2_OPERATORS__", 43 | ] 44 | else: 45 | #raise NotImplementedError('Cuda is not available') 46 | pass 47 | 48 | 49 | sources = [os.path.join(extensions_dir, s) for s in sources] 50 | include_dirs = [extensions_dir] 51 | ext_modules = [ 52 | extension( 53 | "_ext", 54 | sources, 55 | include_dirs=include_dirs, 56 | define_macros=define_macros, 57 | extra_compile_args=extra_compile_args, 58 | ) 59 | ] 60 | return ext_modules 61 | 62 | setup( 63 | name="DCNv2", 64 | version="0.1", 65 | author="charlesshang", 66 | url="https://github.com/charlesshang/DCNv2", 67 | description="deformable convolutional networks", 68 | packages=find_packages(exclude=("configs", "tests",)), 69 | # install_requires=requirements, 70 | ext_modules=get_extensions(), 71 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 72 | ) 73 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/DCN/src/cpu/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cpu_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cpu_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/DCN/src/cuda/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cuda_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cuda_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /Network/VONet.py: -------------------------------------------------------------------------------- 1 | # Software License Agreement (BSD License) 2 | # 3 | # Copyright (c) 2020, Wenshan Wang, Yaoyu Hu, CMU 4 | # All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions 8 | # are met: 9 | # 10 | # * Redistributions of source code must retain the above copyright 11 | # notice, this list of conditions and the following disclaimer. 12 | # * Redistributions in binary form must reproduce the above 13 | # copyright notice, this list of conditions and the following 14 | # disclaimer in the documentation and/or other materials provided 15 | # with the distribution. 16 | # * Neither the name of CMU nor the names of its 17 | # contributors may be used to endorse or promote products derived 18 | # from this software without specific prior written permission. 19 | # 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 30 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | # POSSIBILITY OF SUCH DAMAGE. 32 | 33 | import torch 34 | import torch.nn as nn 35 | import torch.nn.functional as F 36 | from .PWC import PWCDCNet as FlowNet 37 | from .VOFlowNet import VOFlowRes as FlowPoseNet 38 | 39 | class VONet(nn.Module): 40 | def __init__(self): 41 | super(VONet, self).__init__() 42 | 43 | self.flowNet = FlowNet() 44 | self.flowPoseNet = FlowPoseNet() 45 | 46 | def forward(self, x, only_flow=False, only_pose=False): 47 | ''' 48 | x[0]: rgb frame t-1 49 | x[1]: rgb frame t 50 | x[2]: intrinsics 51 | x[3]: flow t-1 -> t (optional) 52 | x[4]: motion segmentation mask 53 | ''' 54 | # import ipdb;ipdb.set_trace() 55 | if not only_pose: 56 | flow_out = self.flowNet(torch.cat((x[0], x[1]), dim=1)) 57 | 58 | if only_flow: 59 | return flow_out, None 60 | 61 | flow = flow_out[0] 62 | 63 | else: 64 | assert(len(x) > 3) 65 | flow_out = None 66 | 67 | if len(x) > 3 and x[3] is not None: 68 | flow_input = x[3] 69 | else: 70 | flow_input = flow 71 | 72 | # Mask out input flow using the segmentation result 73 | assert(len(x) > 4) 74 | mask = torch.gt(x[4], 0) 75 | for i in range(flow_input.shape[0]): 76 | zeros = torch.cat([mask[i], ]*2, dim=0) 77 | flow_input[i][zeros] = 0 78 | 79 | flow_input = torch.cat((flow_input, 1 - x[4]), dim=1) # segmentation layer 80 | flow_input = torch.cat((flow_input, x[2]), dim=1) # intrinsics layer 81 | 82 | pose = self.flowPoseNet(flow_input) 83 | 84 | return flow_out, pose 85 | -------------------------------------------------------------------------------- /evaluator/evaluator_base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang 2 | # For License information please see the LICENSE file in the root directory. 3 | 4 | import numpy as np 5 | from .trajectory_transform import trajectory_transform, rescale 6 | from .transformation import pos_quats2SE_matrices, SE2pos_quat 7 | from .evaluate_ate_scale import align, plot_traj 8 | from .evaluate_rpe import evaluate_trajectory 9 | from .evaluate_kitti import evaluate as kittievaluate 10 | 11 | np.set_printoptions(suppress=True, precision=2, threshold=100000) 12 | 13 | def transform_trajs(gt_traj, est_traj, cal_scale): 14 | gt_traj, est_traj = trajectory_transform(gt_traj, est_traj) 15 | if cal_scale : 16 | est_traj, s = rescale(gt_traj, est_traj) 17 | print(' Scale, {}'.format(s)) 18 | else: 19 | s = 1.0 20 | return gt_traj, est_traj, s 21 | 22 | def quats2SEs(gt_traj, est_traj): 23 | gt_SEs = pos_quats2SE_matrices(gt_traj) 24 | est_SEs = pos_quats2SE_matrices(est_traj) 25 | return gt_SEs, est_SEs 26 | 27 | def per_frame_scale_alignment(gt_motions, est_motions): 28 | dist_gt = np.linalg.norm(gt_motions[:,:3], axis=1) 29 | # scale the output frame by frame 30 | motions_scale = est_motions.copy() 31 | dist = np.linalg.norm(motions_scale[:,:3],axis=1) 32 | scale_gt = dist_gt/dist 33 | motions_scale[:,:3] = est_motions[:,:3] * scale_gt.reshape(-1,1) 34 | 35 | return motions_scale 36 | 37 | class ATEEvaluator(object): 38 | def __init__(self): 39 | super(ATEEvaluator, self).__init__() 40 | 41 | 42 | def evaluate(self, gt_traj, est_traj, scale): 43 | gt_xyz = np.matrix(gt_traj[:,0:3].transpose()) 44 | est_xyz = np.matrix(est_traj[:, 0:3].transpose()) 45 | 46 | rot, trans, trans_error, s = align(gt_xyz, est_xyz, scale) 47 | print(' ATE scale: {}'.format(s)) 48 | error = np.sqrt(np.dot(trans_error,trans_error) / len(trans_error)) 49 | 50 | # align two trajs 51 | est_SEs = pos_quats2SE_matrices(est_traj) 52 | T = np.eye(4) 53 | T[:3,:3] = rot 54 | T[:3,3:] = trans 55 | T = np.linalg.inv(T) 56 | est_traj_aligned = [] 57 | for se in est_SEs: 58 | se[:3,3] = se[:3,3] * s 59 | se_new = T.dot(se) 60 | se_new = SE2pos_quat(se_new) 61 | est_traj_aligned.append(se_new) 62 | 63 | est_traj_aligned = np.array(est_traj_aligned) 64 | return error, gt_traj, est_traj_aligned 65 | 66 | # ======================= 67 | 68 | 69 | class RPEEvaluator(object): 70 | def __init__(self): 71 | super(RPEEvaluator, self).__init__() 72 | 73 | 74 | def evaluate(self, gt_SEs, est_SEs): 75 | result = evaluate_trajectory(gt_SEs, est_SEs) 76 | 77 | trans_error = np.array(result)[:,2] 78 | rot_error = np.array(result)[:,3] 79 | 80 | trans_error_mean = np.mean(trans_error) 81 | rot_error_mean = np.mean(rot_error) 82 | 83 | # import ipdb;ipdb.set_trace() 84 | 85 | return (rot_error_mean, trans_error_mean) 86 | 87 | # ======================= 88 | 89 | 90 | class KittiEvaluator(object): 91 | def __init__(self): 92 | super(KittiEvaluator, self).__init__() 93 | 94 | # return rot_error, tra_error 95 | def evaluate(self, gt_SEs, est_SEs, kittitype): 96 | # trajectory_scale(est_SEs, 0.831984631412) 97 | error = kittievaluate(gt_SEs, est_SEs, kittitype=kittitype) 98 | return error 99 | -------------------------------------------------------------------------------- /Network/rigidmask/det.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torchvision.models as models 6 | import torch 7 | import torch.nn as nn 8 | import os 9 | 10 | from .networks.msra_resnet import get_pose_net 11 | from .networks.dlav0 import get_pose_net as get_dlav0 12 | from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn 13 | from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn 14 | from .networks.large_hourglass import get_large_hourglass_net 15 | 16 | _model_factory = { 17 | 'res': get_pose_net, # default Resnet with deconv 18 | 'dlav0': get_dlav0, # default DLAup 19 | 'dla': get_dla_dcn, 20 | 'resdcn': get_pose_net_dcn, 21 | 'hourglass': get_large_hourglass_net, 22 | } 23 | 24 | def create_model(arch, heads, head_conv,num_input): 25 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0 26 | arch = arch[:arch.find('_')] if '_' in arch else arch 27 | get_model = _model_factory[arch] 28 | model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv,num_input=num_input) 29 | return model 30 | 31 | def load_model(model, model_path, optimizer=None, resume=False, 32 | lr=None, lr_step=None): 33 | start_epoch = 0 34 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 35 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 36 | state_dict_ = checkpoint['state_dict'] 37 | state_dict = {} 38 | 39 | # convert data_parallal to model 40 | for k in state_dict_: 41 | if k.startswith('module') and not k.startswith('module_list'): 42 | state_dict[k[7:]] = state_dict_[k] 43 | else: 44 | state_dict[k] = state_dict_[k] 45 | model_state_dict = model.state_dict() 46 | 47 | # check loaded parameters and created model parameters 48 | msg = 'If you see this, your model does not fully load the ' + \ 49 | 'pre-trained weight. Please make sure ' + \ 50 | 'you have correctly specified --arch xxx ' + \ 51 | 'or set the correct --num_classes for your own dataset.' 52 | for k in state_dict: 53 | if k in model_state_dict: 54 | if state_dict[k].shape != model_state_dict[k].shape: 55 | print('Skip loading parameter {}, required shape{}, '\ 56 | 'loaded shape{}. {}'.format( 57 | k, model_state_dict[k].shape, state_dict[k].shape, msg)) 58 | state_dict[k] = model_state_dict[k] 59 | else: 60 | print('Drop parameter {}.'.format(k) + msg) 61 | for k in model_state_dict: 62 | if not (k in state_dict): 63 | print('No param {}.'.format(k) + msg) 64 | state_dict[k] = model_state_dict[k] 65 | model.load_state_dict(state_dict, strict=False) 66 | 67 | # resume optimizer parameters 68 | if optimizer is not None and resume: 69 | if 'optimizer' in checkpoint: 70 | optimizer.load_state_dict(checkpoint['optimizer']) 71 | start_epoch = checkpoint['epoch'] 72 | start_lr = lr 73 | for step in lr_step: 74 | if start_epoch >= step: 75 | start_lr *= 0.1 76 | for param_group in optimizer.param_groups: 77 | param_group['lr'] = start_lr 78 | print('Resumed optimizer with start lr', start_lr) 79 | else: 80 | print('No optimizer parameters in checkpoint.') 81 | if optimizer is not None: 82 | return model, optimizer, start_epoch 83 | else: 84 | return model 85 | 86 | def save_model(path, epoch, model, optimizer=None): 87 | if isinstance(model, torch.nn.DataParallel): 88 | state_dict = model.module.state_dict() 89 | else: 90 | state_dict = model.state_dict() 91 | data = {'epoch': epoch, 92 | 'state_dict': state_dict} 93 | if not (optimizer is None): 94 | data['optimizer'] = optimizer.state_dict() 95 | torch.save(data, path) 96 | 97 | -------------------------------------------------------------------------------- /Datasets/cowmask.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=bad-indentation 2 | # coding=utf-8 3 | # Copyright 2022 The Google Research Authors. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """ 18 | Cow mask generation. 19 | https://github.com/google-research/google-research/blob/master/milking_cowmask/ 20 | Adapted from LAX implementation to NumPy due to PyTorch dataloader 21 | being incompatible with JAX 22 | Author: Shihao Shen 23 | Date: 29th Aug 2022 24 | """ 25 | import math 26 | import numpy as np 27 | from scipy import special 28 | from scipy.signal import convolve 29 | 30 | _ROOT_2 = math.sqrt(2.0) 31 | _ROOT_2_PI = math.sqrt(2.0 * math.pi) 32 | 33 | 34 | def gaussian_kernels(sigma, max_sigma): 35 | """Make Gaussian kernels for Gaussian blur. 36 | Args: 37 | sigma: kernel sigma 38 | max_sigma: sigma upper limit as a float (this is used to determine 39 | the size of kernel required to fit all kernels) 40 | Returns: 41 | a (1, kernel_width) numpy array 42 | """ 43 | size = round(max_sigma * 3) * 2 + 1 44 | x = np.arange(-size, size + 1)[None, :].astype(np.float32) 45 | y = np.exp(-0.5 * x ** 2 / sigma ** 2) 46 | return y / (sigma * _ROOT_2_PI) 47 | 48 | 49 | def cow_masks(mask_size, log_sigma_range, max_sigma, prop_range): 50 | """Generate Cow Mask. 51 | Args: 52 | n_masks: number of masks to generate as an int 53 | mask_size: image size as a `(height, width)` tuple 54 | log_sigma_range: the range of the sigma (smoothing kernel) 55 | parameter in log-space`(log(sigma_min), log(sigma_max))` 56 | max_sigma: smoothing sigma upper limit 57 | prop_range: range from which to draw the proportion `p` that 58 | controls the proportion of pixel in a mask that are 1 vs 0 59 | Returns: 60 | Cow Masks as a [v, height, width, 1] numpy array 61 | """ 62 | 63 | # Draw the per-mask proportion p 64 | p = np.random.uniform(prop_range[0], prop_range[1]) 65 | # Compute threshold factors 66 | threshold_factor = special.erfinv(2 * p - 1) * _ROOT_2 67 | 68 | sigma = np.exp(np.random.uniform(log_sigma_range[0], log_sigma_range[1])) 69 | 70 | noise = np.random.normal(size=mask_size) 71 | 72 | # Generate a kernel for each sigma 73 | kernel = gaussian_kernels(sigma, max_sigma) 74 | kernel = kernel.squeeze() 75 | # kernels in y and x 76 | krn_y = kernel[None, :] 77 | krn_x = kernel[:, None] 78 | 79 | # Apply kernels in y and x separately 80 | smooth_noise = convolve(noise, krn_y, mode='same') 81 | smooth_noise = convolve(smooth_noise, krn_x, mode='same') 82 | 83 | # Compute mean and std-dev 84 | noise_mu = smooth_noise.mean(axis=(0,1)) 85 | noise_sigma = smooth_noise.std(axis=(0,1)) 86 | # Compute thresholds 87 | threshold = threshold_factor * noise_sigma + noise_mu 88 | # Apply threshold 89 | mask = (smooth_noise <= threshold).astype(bool) 90 | 91 | return mask 92 | 93 | 94 | if __name__=="__main__": 95 | import time 96 | import matplotlib.pyplot as plt 97 | 98 | cow_sigma_range = (20, 60) 99 | log_sigma_range = (math.log(cow_sigma_range[0]), math.log(cow_sigma_range[1])) 100 | cow_prop_range = (0.1, 0.5) 101 | s = time.time() 102 | max_iou = 0 103 | # for _ in range(1000): 104 | # mask = cow_masks((240, 360), log_sigma_range, cow_sigma_range[1], cow_prop_range) 105 | # max_iou = max(max_iou, np.sum(mask) / (240*360)) 106 | # print(time.time() - s) 107 | # print(max_iou) 108 | 109 | mask = cow_masks((240, 360), log_sigma_range, cow_sigma_range[1], cow_prop_range) 110 | print(np.sum(mask) / (240*360)) 111 | plt.imshow(mask * 255) 112 | plt.savefig('mask.png') -------------------------------------------------------------------------------- /evaluator/evaluate_kitti.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang 2 | # For License information please see the LICENSE file in the root directory. 3 | # This is a python reinplementation of the KITTI metric: http://www.cvlibs.net/datasets/kitti/eval_odometry.php 4 | # Cridit: Xiangwei Wang https://github.com/TimingSpace 5 | 6 | import numpy as np 7 | import sys 8 | 9 | def trajectory_distances(poses): 10 | distances = [] 11 | distances.append(0) 12 | for i in range(1,len(poses)): 13 | p1 = poses[i-1] 14 | p2 = poses[i] 15 | delta = p1[0:3,3] - p2[0:3,3] 16 | distances.append(distances[i-1]+np.linalg.norm(delta)) 17 | return distances 18 | 19 | def last_frame_from_segment_length(dist,first_frame,length): 20 | for i in range(first_frame,len(dist)): 21 | if dist[i]>dist[first_frame]+length: 22 | return i 23 | return -1 24 | 25 | def rotation_error(pose_error): 26 | a = pose_error[0,0] 27 | b = pose_error[1,1] 28 | c = pose_error[2,2] 29 | d = 0.5*(a+b+c-1) 30 | rot_error = np.arccos(max(min(d,1.0),-1.0)) 31 | return rot_error 32 | 33 | def translation_error(pose_error): 34 | dx = pose_error[0,3] 35 | dy = pose_error[1,3] 36 | dz = pose_error[2,3] 37 | return np.sqrt(dx*dx+dy*dy+dz*dz) 38 | 39 | # def line2matrix(pose_line): 40 | # pose_line = np.array(pose_line) 41 | # pose_m = np.matrix(np.eye(4)) 42 | # pose_m[0:3,:] = pose_line.reshape(3,4) 43 | # return pose_m 44 | 45 | def calculate_sequence_error(poses_gt,poses_result,lengths=[10,20,30,40,50,60,70,80]): 46 | # error_vetor 47 | errors = [] 48 | 49 | # paramet 50 | step_size = 1 #10; # every second 51 | num_lengths = len(lengths) 52 | 53 | # import ipdb;ipdb.set_trace() 54 | # pre-compute distances (from ground truth as reference) 55 | dist = trajectory_distances(poses_gt) 56 | # for all start positions do 57 | for first_frame in range(0, len(poses_gt), step_size): 58 | # for all segment lengths do 59 | for i in range(0,num_lengths): 60 | # current length 61 | length = lengths[i]; 62 | 63 | # compute last frame 64 | last_frame = last_frame_from_segment_length(dist,first_frame,length); 65 | # continue, if sequence not long enough 66 | if (last_frame==-1): 67 | continue; 68 | 69 | # compute rotational and translational errors 70 | pose_delta_gt = np.linalg.inv(poses_gt[first_frame]).dot(poses_gt[last_frame]) 71 | pose_delta_result = np.linalg.inv(poses_result[first_frame]).dot(poses_result[last_frame]) 72 | pose_error = np.linalg.inv(pose_delta_result).dot(pose_delta_gt) 73 | r_err = rotation_error(pose_error); 74 | t_err = translation_error(pose_error); 75 | 76 | # compute speed 77 | num_frames = (float)(last_frame-first_frame+1); 78 | speed = length/(0.1*num_frames); 79 | 80 | # write to file 81 | error = [first_frame,r_err/length,t_err/length,length,speed] 82 | errors.append(error) 83 | # return error vector 84 | return errors 85 | 86 | def calculate_ave_errors(errors,lengths=[10,20,30,40,50,60,70,80]): 87 | rot_errors=[] 88 | tra_errors=[] 89 | for length in lengths: 90 | rot_error_each_length =[] 91 | tra_error_each_length =[] 92 | for error in errors: 93 | if abs(error[3]-length)<0.1: 94 | rot_error_each_length.append(error[1]) 95 | tra_error_each_length.append(error[2]) 96 | 97 | if len(rot_error_each_length)==0: 98 | # import ipdb;ipdb.set_trace() 99 | continue 100 | else: 101 | rot_errors.append(sum(rot_error_each_length)/len(rot_error_each_length)) 102 | tra_errors.append(sum(tra_error_each_length)/len(tra_error_each_length)) 103 | return np.array(rot_errors)*180/np.pi, tra_errors 104 | 105 | def evaluate(gt, data,kittitype=True): 106 | if kittitype: 107 | lens = [100,200,300,400,500,600,700,800] # 108 | else: 109 | lens = [5,10,15,20,25,30,35,40] #[1,2,3,4,5,6] # 110 | errors = calculate_sequence_error(gt, data, lengths=lens) 111 | rot,tra = calculate_ave_errors(errors, lengths=lens) 112 | return np.mean(rot), np.mean(tra) 113 | 114 | def main(): 115 | # usage: python main.py path_to_ground_truth path_to_predict_pose 116 | # load and preprocess data 117 | ground_truth_data = np.loadtxt(sys.argv[1]) 118 | predict_pose__data = np.loadtxt(sys.argv[2]) 119 | errors = calculate_sequence_error(ground_truth_data,predict_pose__data) 120 | rot,tra = calculate_ave_errors(errors) 121 | print(rot,'\n',tra) 122 | #print(error) 123 | # evaluate the vo result 124 | # save and visualization the evaluatation result 125 | 126 | if __name__ == "__main__": 127 | main() 128 | 129 | 130 | -------------------------------------------------------------------------------- /evaluator/evaluate_ate_scale.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Modified by Wenshan Wang 4 | # Modified by Raul Mur-Artal 5 | # Automatically compute the optimal scale factor for monocular VO/SLAM. 6 | 7 | # Software License Agreement (BSD License) 8 | # 9 | # Copyright (c) 2013, Juergen Sturm, TUM 10 | # All rights reserved. 11 | # 12 | # Redistribution and use in source and binary forms, with or without 13 | # modification, are permitted provided that the following conditions 14 | # are met: 15 | # 16 | # * Redistributions of source code must retain the above copyright 17 | # notice, this list of conditions and the following disclaimer. 18 | # * Redistributions in binary form must reproduce the above 19 | # copyright notice, this list of conditions and the following 20 | # disclaimer in the documentation and/or other materials provided 21 | # with the distribution. 22 | # * Neither the name of TUM nor the names of its 23 | # contributors may be used to endorse or promote products derived 24 | # from this software without specific prior written permission. 25 | # 26 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 29 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 30 | # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 31 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 32 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 33 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 34 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 36 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | # POSSIBILITY OF SUCH DAMAGE. 38 | # 39 | # Requirements: 40 | # sudo apt-get install python-argparse 41 | 42 | """ 43 | This script computes the absolute trajectory error from the ground truth 44 | trajectory and the estimated trajectory. 45 | """ 46 | 47 | import numpy 48 | 49 | def align(model,data,calc_scale=False): 50 | """Align two trajectories using the method of Horn (closed-form). 51 | 52 | Input: 53 | model -- first trajectory (3xn) 54 | data -- second trajectory (3xn) 55 | 56 | Output: 57 | rot -- rotation matrix (3x3) 58 | trans -- translation vector (3x1) 59 | trans_error -- translational error per point (1xn) 60 | 61 | """ 62 | numpy.set_printoptions(precision=3,suppress=True) 63 | model_zerocentered = model - model.mean(1) 64 | data_zerocentered = data - data.mean(1) 65 | 66 | W = numpy.zeros( (3,3) ) 67 | for column in range(model.shape[1]): 68 | W += numpy.outer(model_zerocentered[:,column],data_zerocentered[:,column]) 69 | U,d,Vh = numpy.linalg.linalg.svd(W.transpose()) 70 | S = numpy.matrix(numpy.identity( 3 )) 71 | if(numpy.linalg.det(U) * numpy.linalg.det(Vh)<0): 72 | S[2,2] = -1 73 | rot = U*S*Vh 74 | 75 | if calc_scale: 76 | rotmodel = rot*model_zerocentered 77 | dots = 0.0 78 | norms = 0.0 79 | for column in range(data_zerocentered.shape[1]): 80 | dots += numpy.dot(data_zerocentered[:,column].transpose(),rotmodel[:,column]) 81 | normi = numpy.linalg.norm(model_zerocentered[:,column]) 82 | norms += normi*normi 83 | # s = float(dots/norms) 84 | s = float(norms/dots) 85 | else: 86 | s = 1.0 87 | 88 | # trans = data.mean(1) - s*rot * model.mean(1) 89 | # model_aligned = s*rot * model + trans 90 | # alignment_error = model_aligned - data 91 | 92 | # scale the est to the gt, otherwise the ATE could be very small if the est scale is small 93 | trans = s*data.mean(1) - rot * model.mean(1) 94 | model_aligned = rot * model + trans 95 | data_alingned = s * data 96 | alignment_error = model_aligned - data_alingned 97 | 98 | trans_error = numpy.sqrt(numpy.sum(numpy.multiply(alignment_error,alignment_error),0)).A[0] 99 | 100 | return rot,trans,trans_error, s 101 | 102 | def plot_traj(ax,stamps,traj,style,color,label): 103 | """ 104 | Plot a trajectory using matplotlib. 105 | 106 | Input: 107 | ax -- the plot 108 | stamps -- time stamps (1xn) 109 | traj -- trajectory (3xn) 110 | style -- line style 111 | color -- line color 112 | label -- plot legend 113 | 114 | """ 115 | stamps.sort() 116 | interval = numpy.median([s-t for s,t in zip(stamps[1:],stamps[:-1])]) 117 | x = [] 118 | y = [] 119 | last = stamps[0] 120 | for i in range(len(stamps)): 121 | if stamps[i]-last < 2*interval: 122 | x.append(traj[i][0]) 123 | y.append(traj[i][1]) 124 | elif len(x)>0: 125 | ax.plot(x,y,style,color=color,label=label) 126 | label="" 127 | x=[] 128 | y=[] 129 | last= stamps[i] 130 | if len(x)>0: 131 | ax.plot(x,y,style,color=color,label=label) 132 | 133 | 134 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_im2col_cpu.h: -------------------------------------------------------------------------------- 1 | 2 | /*! 3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 4 | * 5 | * COPYRIGHT 6 | * 7 | * All contributions by the University of California: 8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 9 | * All rights reserved. 10 | * 11 | * All other contributions: 12 | * Copyright (c) 2014-2017, the respective contributors 13 | * All rights reserved. 14 | * 15 | * Caffe uses a shared copyright model: each contributor holds copyright over 16 | * their contributions to Caffe. The project versioning records all such 17 | * contribution and copyright details. If a contributor wants to further mark 18 | * their specific copyright on a particular contribution, they should indicate 19 | * their copyright solely in the commit message of the change when it is 20 | * committed. 21 | * 22 | * LICENSE 23 | * 24 | * Redistribution and use in source and binary forms, with or without 25 | * modification, are permitted provided that the following conditions are met: 26 | * 27 | * 1. Redistributions of source code must retain the above copyright notice, this 28 | * list of conditions and the following disclaimer. 29 | * 2. Redistributions in binary form must reproduce the above copyright notice, 30 | * this list of conditions and the following disclaimer in the documentation 31 | * and/or other materials provided with the distribution. 32 | * 33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | * 44 | * CONTRIBUTION AGREEMENT 45 | * 46 | * By contributing to the BVLC/caffe repository through pull-request, comment, 47 | * or otherwise, the contributor releases their content to the 48 | * license and copyright terms herein. 49 | * 50 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 51 | * 52 | * Copyright (c) 2018 Microsoft 53 | * Licensed under The MIT License [see LICENSE for details] 54 | * \file modulated_deformable_im2col.h 55 | * \brief Function definitions of converting an image to 56 | * column matrix based on kernel, padding, dilation, and offset. 57 | * These functions are mainly used in deformable convolution operators. 58 | * \ref: https://arxiv.org/abs/1811.11168 59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 60 | */ 61 | 62 | /***************** Adapted by Charles Shang *********************/ 63 | // modified from the CUDA version for CPU use by Daniel K. Suhendro 64 | 65 | #ifndef DCN_V2_IM2COL_CPU 66 | #define DCN_V2_IM2COL_CPU 67 | 68 | #ifdef __cplusplus 69 | extern "C" 70 | { 71 | #endif 72 | 73 | void modulated_deformable_im2col_cpu(const float *data_im, const float *data_offset, const float *data_mask, 74 | const int batch_size, const int channels, const int height_im, const int width_im, 75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 77 | const int dilation_h, const int dilation_w, 78 | const int deformable_group, float *data_col); 79 | 80 | void modulated_deformable_col2im_cpu(const float *data_col, const float *data_offset, const float *data_mask, 81 | const int batch_size, const int channels, const int height_im, const int width_im, 82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 84 | const int dilation_h, const int dilation_w, 85 | const int deformable_group, float *grad_im); 86 | 87 | void modulated_deformable_col2im_coord_cpu(const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 88 | const int batch_size, const int channels, const int height_im, const int width_im, 89 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 90 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 91 | const int dilation_h, const int dilation_w, 92 | const int deformable_group, 93 | float *grad_offset, float *grad_mask); 94 | 95 | #ifdef __cplusplus 96 | } 97 | #endif 98 | 99 | #endif -------------------------------------------------------------------------------- /evaluator/evaluate_rpe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Software License Agreement (BSD License) 3 | # 4 | # Modified by Wenshan Wang 5 | # Copyright (c) 2013, Juergen Sturm, TUM 6 | # All rights reserved. 7 | # 8 | # Redistribution and use in source and binary forms, with or without 9 | # modification, are permitted provided that the following conditions 10 | # are met: 11 | # 12 | # * Redistributions of source code must retain the above copyright 13 | # notice, this list of conditions and the following disclaimer. 14 | # * Redistributions in binary form must reproduce the above 15 | # copyright notice, this list of conditions and the following 16 | # disclaimer in the documentation and/or other materials provided 17 | # with the distribution. 18 | # * Neither the name of TUM nor the names of its 19 | # contributors may be used to endorse or promote products derived 20 | # from this software without specific prior written permission. 21 | # 22 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 | # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 | # POSSIBILITY OF SUCH DAMAGE. 34 | 35 | """ 36 | This script computes the relative pose error from the ground truth trajectory 37 | and the estimated trajectory. 38 | """ 39 | 40 | import random 41 | import numpy as np 42 | import sys 43 | 44 | def ominus(a,b): 45 | """ 46 | Compute the relative 3D transformation between a and b. 47 | 48 | Input: 49 | a -- first pose (homogeneous 4x4 matrix) 50 | b -- second pose (homogeneous 4x4 matrix) 51 | 52 | Output: 53 | Relative 3D transformation from a to b. 54 | """ 55 | return np.dot(np.linalg.inv(a),b) 56 | 57 | def compute_distance(transform): 58 | """ 59 | Compute the distance of the translational component of a 4x4 homogeneous matrix. 60 | """ 61 | return np.linalg.norm(transform[0:3,3]) 62 | 63 | def compute_angle(transform): 64 | """ 65 | Compute the rotation angle from a 4x4 homogeneous matrix. 66 | """ 67 | # an invitation to 3-d vision, p 27 68 | return np.arccos( min(1,max(-1, (np.trace(transform[0:3,0:3]) - 1)/2) )) 69 | 70 | def distances_along_trajectory(traj): 71 | """ 72 | Compute the translational distances along a trajectory. 73 | """ 74 | motion = [ominus(traj[i+1],traj[i]) for i in range(len(traj)-1)] 75 | distances = [0] 76 | sum = 0 77 | for t in motion: 78 | sum += compute_distance(t) 79 | distances.append(sum) 80 | return distances 81 | 82 | 83 | def evaluate_trajectory(traj_gt, traj_est, param_max_pairs=10000, param_fixed_delta=False, 84 | param_delta=1.00): 85 | """ 86 | Compute the relative pose error between two trajectories. 87 | 88 | Input: 89 | traj_gt -- the first trajectory (ground truth) 90 | traj_est -- the second trajectory (estimated trajectory) 91 | param_max_pairs -- number of relative poses to be evaluated 92 | param_fixed_delta -- false: evaluate over all possible pairs 93 | true: only evaluate over pairs with a given distance (delta) 94 | param_delta -- distance between the evaluated pairs 95 | param_delta_unit -- unit for comparison: 96 | "s": seconds 97 | "m": meters 98 | "rad": radians 99 | "deg": degrees 100 | "f": frames 101 | param_offset -- time offset between two trajectories (to model the delay) 102 | param_scale -- scale to be applied to the second trajectory 103 | 104 | Output: 105 | list of compared poses and the resulting translation and rotation error 106 | """ 107 | 108 | if not param_fixed_delta: 109 | if(param_max_pairs==0 or len(traj_est)param_max_pairs): 120 | pairs = random.sample(pairs,param_max_pairs) 121 | 122 | result = [] 123 | for i,j in pairs: 124 | 125 | error44 = ominus( ominus( traj_est[j], traj_est[i] ), 126 | ominus( traj_gt[j], traj_gt[i] ) ) 127 | 128 | trans = compute_distance(error44) 129 | rot = compute_angle(error44) 130 | 131 | result.append([i,j,trans,rot]) 132 | 133 | if len(result)<2: 134 | raise Exception("Couldn't find pairs between groundtruth and estimated trajectory!") 135 | 136 | return result 137 | 138 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/DCN/src/cuda/dcn_v2_im2col_cuda.h: -------------------------------------------------------------------------------- 1 | 2 | /*! 3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 4 | * 5 | * COPYRIGHT 6 | * 7 | * All contributions by the University of California: 8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 9 | * All rights reserved. 10 | * 11 | * All other contributions: 12 | * Copyright (c) 2014-2017, the respective contributors 13 | * All rights reserved. 14 | * 15 | * Caffe uses a shared copyright model: each contributor holds copyright over 16 | * their contributions to Caffe. The project versioning records all such 17 | * contribution and copyright details. If a contributor wants to further mark 18 | * their specific copyright on a particular contribution, they should indicate 19 | * their copyright solely in the commit message of the change when it is 20 | * committed. 21 | * 22 | * LICENSE 23 | * 24 | * Redistribution and use in source and binary forms, with or without 25 | * modification, are permitted provided that the following conditions are met: 26 | * 27 | * 1. Redistributions of source code must retain the above copyright notice, this 28 | * list of conditions and the following disclaimer. 29 | * 2. Redistributions in binary form must reproduce the above copyright notice, 30 | * this list of conditions and the following disclaimer in the documentation 31 | * and/or other materials provided with the distribution. 32 | * 33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | * 44 | * CONTRIBUTION AGREEMENT 45 | * 46 | * By contributing to the BVLC/caffe repository through pull-request, comment, 47 | * or otherwise, the contributor releases their content to the 48 | * license and copyright terms herein. 49 | * 50 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 51 | * 52 | * Copyright (c) 2018 Microsoft 53 | * Licensed under The MIT License [see LICENSE for details] 54 | * \file modulated_deformable_im2col.h 55 | * \brief Function definitions of converting an image to 56 | * column matrix based on kernel, padding, dilation, and offset. 57 | * These functions are mainly used in deformable convolution operators. 58 | * \ref: https://arxiv.org/abs/1811.11168 59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 60 | */ 61 | 62 | /***************** Adapted by Charles Shang *********************/ 63 | 64 | #ifndef DCN_V2_IM2COL_CUDA 65 | #define DCN_V2_IM2COL_CUDA 66 | 67 | #ifdef __cplusplus 68 | extern "C" 69 | { 70 | #endif 71 | 72 | void modulated_deformable_im2col_cuda(cudaStream_t stream, 73 | const float *data_im, const float *data_offset, const float *data_mask, 74 | const int batch_size, const int channels, const int height_im, const int width_im, 75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 77 | const int dilation_h, const int dilation_w, 78 | const int deformable_group, float *data_col); 79 | 80 | void modulated_deformable_col2im_cuda(cudaStream_t stream, 81 | const float *data_col, const float *data_offset, const float *data_mask, 82 | const int batch_size, const int channels, const int height_im, const int width_im, 83 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 84 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 85 | const int dilation_h, const int dilation_w, 86 | const int deformable_group, float *grad_im); 87 | 88 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream, 89 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 90 | const int batch_size, const int channels, const int height_im, const int width_im, 91 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 92 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 93 | const int dilation_h, const int dilation_w, 94 | const int deformable_group, 95 | float *grad_offset, float *grad_mask); 96 | 97 | #ifdef __cplusplus 98 | } 99 | #endif 100 | 101 | #endif -------------------------------------------------------------------------------- /evaluator/trajectory_transform.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang 2 | # For License information please see the LICENSE file in the root directory. 3 | 4 | import numpy as np 5 | from .transformation import pos_quats2SE_matrices, SE2pos_quat, pose2motion, motion2pose 6 | 7 | def shift0(traj): 8 | ''' 9 | Traj: a list of [t + quat] 10 | Return: translate and rotate the traj 11 | ''' 12 | traj_ses = pos_quats2SE_matrices(np.array(traj)) 13 | traj_init = traj_ses[0] 14 | traj_init_inv = np.linalg.inv(traj_init) 15 | new_traj = [] 16 | for tt in traj_ses: 17 | ttt=traj_init_inv.dot(tt) 18 | new_traj.append(SE2pos_quat(ttt)) 19 | return np.array(new_traj) 20 | 21 | def ned2cam(traj): 22 | ''' 23 | transfer a ned traj to camera frame traj 24 | ''' 25 | T = np.array([[0,1,0,0], 26 | [0,0,1,0], 27 | [1,0,0,0], 28 | [0,0,0,1]], dtype=np.float32) 29 | T_inv = np.linalg.inv(T) 30 | new_traj = [] 31 | traj_ses = pos_quats2SE_matrices(np.array(traj)) 32 | 33 | for tt in traj_ses: 34 | ttt=T.dot(tt).dot(T_inv) 35 | new_traj.append(SE2pos_quat(ttt)) 36 | 37 | return np.array(new_traj) 38 | 39 | def cam2ned(traj): 40 | ''' 41 | transfer a camera traj to ned frame traj 42 | ''' 43 | T = np.array([[0,0,1,0], 44 | [1,0,0,0], 45 | [0,1,0,0], 46 | [0,0,0,1]], dtype=np.float32) 47 | T_inv = np.linalg.inv(T) 48 | new_traj = [] 49 | traj_ses = pos_quats2SE_matrices(np.array(traj)) 50 | 51 | for tt in traj_ses: 52 | ttt=T.dot(tt).dot(T_inv) 53 | new_traj.append(SE2pos_quat(ttt)) 54 | 55 | return np.array(new_traj) 56 | 57 | 58 | def trajectory_transform(gt_traj, est_traj): 59 | ''' 60 | 1. center the start frame to the axis origin 61 | 2. align the GT frame (NED) with estimation frame (camera) 62 | ''' 63 | gt_traj_trans = shift0(gt_traj) 64 | est_traj_trans = shift0(est_traj) 65 | 66 | # gt_traj_trans = ned2cam(gt_traj_trans) 67 | # est_traj_trans = cam2ned(est_traj_trans) 68 | 69 | return gt_traj_trans, est_traj_trans 70 | 71 | def rescale_bk(poses_gt, poses): 72 | motion_gt = pose2motion(poses_gt) 73 | motion = pose2motion(poses) 74 | 75 | speed_square_gt = np.sum(motion_gt[:,0:3,3]*motion_gt[:,0:3,3],1) 76 | speed_gt = np.sqrt(speed_square_gt) 77 | speed_square = np.sum(motion[:,0:3,3]*motion[:,0:3,3],1) 78 | speed = np.sqrt(speed_square) 79 | # when the speed is small, the scale could become very large 80 | # import ipdb;ipdb.set_trace() 81 | mask = (speed_gt>0.0001) # * (speed>0.00001) 82 | scale = np.mean((speed[mask])/speed_gt[mask]) 83 | scale = 1.0/scale 84 | motion[:,0:3,3] = motion[:,0:3,3]*scale 85 | pose_update = motion2pose(motion) 86 | return pose_update, scale 87 | 88 | def pose2trans(pose_data): 89 | data_size = len(pose_data) 90 | trans = [] 91 | for i in range(0,data_size-1): 92 | tran = np.array(pose_data[i+1][:3]) - np.array(pose_data[i][:3]) # np.linalg.inv(data[i]).dot(data[i+1]) 93 | trans.append(tran) 94 | 95 | return np.array(trans) # N x 3 96 | 97 | 98 | def rescale(poses_gt, poses): 99 | ''' 100 | similar to rescale 101 | poses_gt/poses: N x 7 poselist in quaternion format 102 | ''' 103 | trans_gt = pose2trans(poses_gt) 104 | trans = pose2trans(poses) 105 | 106 | speed_square_gt = np.sum(trans_gt*trans_gt,1) 107 | speed_gt = np.sqrt(speed_square_gt) 108 | speed_square = np.sum(trans*trans,1) 109 | speed = np.sqrt(speed_square) 110 | # when the speed is small, the scale could become very large 111 | # import ipdb;ipdb.set_trace() 112 | mask = (speed_gt>0.0001) # * (speed>0.00001) 113 | scale = np.mean((speed[mask])/speed_gt[mask]) 114 | scale = 1.0/scale 115 | poses[:,0:3] = poses[:,0:3]*scale 116 | return poses, scale 117 | 118 | def trajectory_scale(traj, scale): 119 | for ttt in traj: 120 | ttt[0:3,3] = ttt[0:3,3]*scale 121 | return traj 122 | 123 | def timestamp_associate(first_list, second_list, max_difference): 124 | """ 125 | Associate two trajectory of [stamp,data]. As the time stamps never match exactly, we aim 126 | to find the closest match for every input tuple. 127 | 128 | Input: 129 | first_list -- first list of (stamp,data) 130 | second_list -- second list of (stamp,data) 131 | max_difference -- search radius for candidate generation 132 | 133 | Output: 134 | first_res: matched data from the first list 135 | second_res: matched data from the second list 136 | 137 | """ 138 | first_dict = dict([(l[0],l[1:]) for l in first_list if len(l)>1]) 139 | second_dict = dict([(l[0],l[1:]) for l in second_list if len(l)>1]) 140 | 141 | first_keys = first_dict.keys() 142 | second_keys = second_dict.keys() 143 | potential_matches = [(abs(a - b ), a, b) 144 | for a in first_keys 145 | for b in second_keys 146 | if abs(a - b) < max_difference] 147 | potential_matches.sort() 148 | matches = [] 149 | for diff, a, b in potential_matches: 150 | if a in first_keys and b in second_keys: 151 | first_keys.remove(a) 152 | second_keys.remove(b) 153 | matches.append((a, b)) 154 | 155 | matches.sort() 156 | 157 | first_res = [] 158 | second_res = [] 159 | for t1, t2 in matches: 160 | first_res.append(first_dict[t1]) 161 | second_res.append(second_dict[t2]) 162 | return np.array(first_res), np.array(second_res) 163 | -------------------------------------------------------------------------------- /Network/VOFlowNet.py: -------------------------------------------------------------------------------- 1 | # Software License Agreement (BSD License) 2 | # 3 | # Copyright (c) 2020, Wenshan Wang, CMU 4 | # All rights reserved. 5 | # 6 | # Redistribution and use in source and binary forms, with or without 7 | # modification, are permitted provided that the following conditions 8 | # are met: 9 | # 10 | # * Redistributions of source code must retain the above copyright 11 | # notice, this list of conditions and the following disclaimer. 12 | # * Redistributions in binary form must reproduce the above 13 | # copyright notice, this list of conditions and the following 14 | # disclaimer in the documentation and/or other materials provided 15 | # with the distribution. 16 | # * Neither the name of CMU nor the names of its 17 | # contributors may be used to endorse or promote products derived 18 | # from this software without specific prior written permission. 19 | # 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 | # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 30 | # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | # POSSIBILITY OF SUCH DAMAGE. 32 | 33 | import torch 34 | import torch.nn as nn 35 | import torch.nn.functional as F 36 | import math 37 | 38 | def conv(in_planes, out_planes, kernel_size=3, stride=2, padding=1, dilation=1, bn_layer=False, bias=True): 39 | if bn_layer: 40 | return nn.Sequential( 41 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation, bias=bias), 42 | nn.BatchNorm2d(out_planes), 43 | nn.ReLU(inplace=True) 44 | ) 45 | else: 46 | return nn.Sequential( 47 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, padding=padding, stride=stride, dilation=dilation), 48 | nn.ReLU(inplace=True) 49 | ) 50 | 51 | def linear(in_planes, out_planes): 52 | return nn.Sequential( 53 | nn.Linear(in_planes, out_planes), 54 | nn.ReLU(inplace=True) 55 | ) 56 | 57 | class BasicBlock(nn.Module): 58 | expansion = 1 59 | def __init__(self, inplanes, planes, stride, downsample, pad, dilation): 60 | super(BasicBlock, self).__init__() 61 | 62 | self.conv1 = conv(inplanes, planes, 3, stride, pad, dilation) 63 | self.conv2 = nn.Conv2d(planes, planes, 3, 1, pad, dilation) 64 | 65 | self.downsample = downsample 66 | self.stride = stride 67 | 68 | def forward(self, x): 69 | out = self.conv1(x) 70 | out = self.conv2(out) 71 | 72 | if self.downsample is not None: 73 | x = self.downsample(x) 74 | out += x 75 | 76 | return F.relu(out, inplace=True) 77 | 78 | class VOFlowRes(nn.Module): 79 | def __init__(self): 80 | super(VOFlowRes, self).__init__() 81 | inputnum = 5 82 | blocknums = [2,2,3,4,6,7,3] 83 | outputnums = [32,64,64,128,128,256,256] 84 | 85 | self.firstconv = nn.Sequential(conv(inputnum, 32, 3, 2, 1, 1, False), 86 | conv(32, 32, 3, 1, 1, 1), 87 | conv(32, 32, 3, 1, 1, 1)) 88 | 89 | self.inplanes = 32 90 | 91 | self.layer1 = self._make_layer(BasicBlock, outputnums[2], blocknums[2], 2, 1, 1) # 40 x 28 92 | self.layer2 = self._make_layer(BasicBlock, outputnums[3], blocknums[3], 2, 1, 1) # 20 x 14 93 | self.layer3 = self._make_layer(BasicBlock, outputnums[4], blocknums[4], 2, 1, 1) # 10 x 7 94 | self.layer4 = self._make_layer(BasicBlock, outputnums[5], blocknums[5], 2, 1, 1) # 5 x 4 95 | self.layer5 = self._make_layer(BasicBlock, outputnums[6], blocknums[6], 2, 1, 1) # 3 x 2 96 | fcnum = outputnums[6] * 6 97 | 98 | fc1_trans = linear(fcnum, 128) 99 | fc2_trans = linear(128,32) 100 | fc3_trans = nn.Linear(32,3) 101 | 102 | fc1_rot = linear(fcnum, 128) 103 | fc2_rot = linear(128,32) 104 | fc3_rot = nn.Linear(32,3) 105 | 106 | 107 | self.voflow_trans = nn.Sequential(fc1_trans, fc2_trans, fc3_trans) 108 | self.voflow_rot = nn.Sequential(fc1_rot, fc2_rot, fc3_rot) 109 | 110 | 111 | def _make_layer(self, block, planes, blocks, stride, pad, dilation): 112 | downsample = None 113 | if stride != 1 or self.inplanes != planes * block.expansion: 114 | downsample = nn.Conv2d(self.inplanes, planes * block.expansion, 115 | kernel_size=1, stride=stride) 116 | 117 | layers = [] 118 | layers.append(block(self.inplanes, planes, stride, downsample, pad, dilation)) 119 | self.inplanes = planes * block.expansion 120 | for i in range(1, blocks): 121 | layers.append(block(self.inplanes, planes,1,None,pad,dilation)) 122 | 123 | return nn.Sequential(*layers) 124 | 125 | def forward(self, x): 126 | x = self.firstconv(x) 127 | x = self.layer1(x) 128 | x = self.layer2(x) 129 | x = self.layer3(x) 130 | x = self.layer4(x) 131 | x = self.layer5(x) 132 | 133 | x = x.view(x.shape[0], -1) 134 | x_trans = self.voflow_trans(x) 135 | x_rot = self.voflow_rot(x) 136 | return torch.cat((x_trans, x_rot), dim=1) -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: dytanvo 2 | channels: 3 | - pytorch3d 4 | - pytorch 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1=main 9 | - blas=1.0=mkl 10 | - bzip2=1.0.8=h516909a_3 11 | - ca-certificates=2022.5.18.1=ha878542_0 12 | - cairo=1.14.12=h8948797_3 13 | - certifi=2022.5.18.1=py38h578d9bd_0 14 | - colorama=0.4.4=pyh9f0ad1d_0 15 | - cudatoolkit=10.2.89=hfd86e86_1 16 | - cudatoolkit-dev=11.4.0=py38h497a2fe_1 17 | - cycler=0.10.0=py38_0 18 | - dbus=1.13.18=hb2f20db_0 19 | - expat=2.2.10=he6710b0_2 20 | - ffmpeg=4.0.2=ha0c5888_2 21 | - fontconfig=2.13.0=h9420a91_0 22 | - freeglut=3.2.1=h58526e2_0 23 | - freetype=2.10.4=h5ab3b9f_0 24 | - glib=2.63.1=h5a9c865_0 25 | - gmp=6.1.2=hf484d3e_1000 26 | - gnutls=3.5.19=h2a4e5f8_1 27 | - graphite2=1.3.13=h58526e2_1001 28 | - gst-plugins-base=1.14.0=hbbd80ab_1 29 | - gstreamer=1.14.0=hb453b48_1 30 | - harfbuzz=1.8.8=hffaf4a1_0 31 | - hdf5=1.10.2=hc401514_3 32 | - icu=58.2=he6710b0_3 33 | - intel-openmp=2020.2=254 34 | - jasper=2.0.14=h07fcdf6_1 35 | - jpeg=9b=h024ee3a_2 36 | - kiwisolver=1.3.0=py38h2531618_0 37 | - kornia=0.5.3=pyhd8ed1ab_0 38 | - lcms2=2.11=h396b838_0 39 | - ld_impl_linux-64=2.33.1=h53a641e_7 40 | - libedit=3.1.20191231=h14c3975_1 41 | - libffi=3.2.1=hf484d3e_1007 42 | - libgcc-ng=9.1.0=hdf63c60_0 43 | - libgfortran=3.0.0=1 44 | - libgfortran-ng=7.3.0=hdf63c60_0 45 | - libglu=9.0.0=he1b5a44_1001 46 | - libiconv=1.16=h516909a_0 47 | - libopencv=3.4.2=hb342d67_1 48 | - libpng=1.6.37=hbc83047_0 49 | - libstdcxx-ng=9.1.0=hdf63c60_0 50 | - libtiff=4.1.0=h2733197_1 51 | - libuuid=1.0.3=h1bed415_2 52 | - libuv=1.40.0=h7b6447c_0 53 | - libxcb=1.14=h7b6447c_0 54 | - libxml2=2.9.10=hb55368b_3 55 | - lz4-c=1.9.2=heb0550a_3 56 | - matplotlib=3.3.2=0 57 | - matplotlib-base=3.3.2=py38h817c723_0 58 | - mkl=2020.2=256 59 | - mkl-service=2.3.0=py38he904b0f_0 60 | - mkl_fft=1.2.0=py38h23d657b_0 61 | - mkl_random=1.1.1=py38h0573a6f_0 62 | - ncurses=6.2=he6710b0_1 63 | - nettle=3.3=0 64 | - ninja=1.10.2=py38hff7bd54_0 65 | - olefile=0.46=py_0 66 | - openh264=1.8.0=hdbcaa40_1000 67 | - openssl=1.1.1h=h516909a_0 68 | - pcre=8.44=he6710b0_0 69 | - pillow=8.0.1=py38he98fc37_0 70 | - pip=20.3=py38h06a4308_0 71 | - pixman=0.40.0=h36c2ea0_0 72 | - pyparsing=2.4.7=py_0 73 | - pyqt=5.9.2=py38h05f1152_4 74 | - python=3.8.3=cpython_he5300dc_0 75 | - python-dateutil=2.8.2=pyhd8ed1ab_0 76 | - python_abi=3.8=1_cp38 77 | - pytorch=1.7.0=py3.8_cuda10.2.89_cudnn7.6.5_0 78 | - pytorch3d=0.3.0=py38_cu102_pyt170 79 | - pyyaml=5.3.1=py38h8df0ef7_1 80 | - qt=5.9.7=h5867ecd_1 81 | - readline=8.0=h7b6447c_0 82 | - scipy=1.5.2=py38h0b6359f_0 83 | - setuptools=50.3.2=py38h06a4308_2 84 | - sip=4.19.13=py38he6710b0_0 85 | - six=1.15.0=py38h06a4308_0 86 | - sqlite=3.33.0=h62c20be_0 87 | - termcolor=1.1.0=py_2 88 | - tk=8.6.10=hbc83047_0 89 | - torchaudio=0.7.0=py38 90 | - torchvision=0.8.1=py38_cu102 91 | - tornado=6.1=py38h27cfd23_0 92 | - typing_extensions=4.3.0=pyha770c72_0 93 | - wheel=0.36.0=pyhd3eb1b0_0 94 | - x264=1!152.20180806=h14c3975_0 95 | - xorg-fixesproto=5.0=h14c3975_1002 96 | - xorg-inputproto=2.3.2=h14c3975_1002 97 | - xorg-kbproto=1.0.7=h14c3975_1002 98 | - xorg-libx11=1.6.12=h516909a_0 99 | - xorg-libxau=1.0.9=h14c3975_0 100 | - xorg-libxext=1.3.4=h516909a_0 101 | - xorg-libxfixes=5.0.3=h516909a_1004 102 | - xorg-libxi=1.7.10=h516909a_0 103 | - xorg-xextproto=7.3.0=h14c3975_1002 104 | - xorg-xproto=7.0.31=h14c3975_1007 105 | - xz=5.2.5=h7b6447c_0 106 | - yacs=0.1.8=pyhd8ed1ab_0 107 | - yaml=0.2.5=h516909a_0 108 | - zlib=1.2.11=h7b6447c_3 109 | - zstd=1.4.5=h9ceee32_0 110 | - pip: 111 | - absl-py==0.11.0 112 | - antlr4-python3-runtime==4.9.3 113 | - appdirs==1.4.4 114 | - beautifulsoup4==4.11.1 115 | - black==21.4b2 116 | - cachetools==4.1.1 117 | - chardet==3.0.4 118 | - charset-normalizer==2.1.1 119 | - cloudpickle==1.6.0 120 | - cupy-cuda102==11.1.0 121 | - cython==0.29.21 122 | - data==0.4 123 | - dataclasses==0.6 124 | - dcnv2==0.1 125 | - decorator==5.1.1 126 | - detectron2==0.5+cu102 127 | - fastrlock==0.8 128 | - filelock==3.8.0 129 | - funcsigs==1.0.2 130 | - future==0.18.2 131 | - fvcore==0.1.2.post20201122 132 | - gdown==4.5.1 133 | - google-auth==1.23.0 134 | - google-auth-oauthlib==0.4.2 135 | - grpcio==1.34.0 136 | - hydra-core==1.2.0 137 | - idna==2.10 138 | - imageio==2.9.0 139 | - importlib-resources==5.9.0 140 | - iopath==0.1.8 141 | - joblib==0.17.0 142 | - jsonpatch==1.32 143 | - jsonpointer==2.3 144 | - latex==0.7.0 145 | - lxml==4.9.1 146 | - markdown==3.3.3 147 | - mypy-extensions==0.4.3 148 | - ngransac==0.0.0 149 | - numpy==1.23.2 150 | - oauthlib==3.1.0 151 | - omegaconf==2.2.3 152 | - opencv-python==4.4.0.46 153 | - packaging==21.3 154 | - pathspec==0.10.1 155 | - portalocker==2.0.0 156 | - protobuf==3.14.0 157 | - pyasn1==0.4.8 158 | - pyasn1-modules==0.2.8 159 | - pycocotools==2.0.4 160 | - pydot==1.4.1 161 | - pypng==0.0.20 162 | - pysocks==1.7.1 163 | - pytransform3d==1.14.0 164 | - pyzmq==23.2.1 165 | - regex==2022.8.17 166 | - requests==2.25.0 167 | - requests-oauthlib==1.3.0 168 | - rsa==4.6 169 | - shutilwhich==1.1.0 170 | - soupsieve==2.3.2.post1 171 | - splines==0.2.0 172 | - tabulate==0.8.7 173 | - tempdir==0.7.1 174 | - tensorboard==2.4.0 175 | - tensorboard-data-server==0.6.1 176 | - tensorboard-plugin-wit==1.7.0 177 | - timm==0.6.7 178 | - toml==0.10.2 179 | - torchfile==0.1.0 180 | - tqdm==4.54.0 181 | - trimesh==3.9.3 182 | - urllib3==1.26.2 183 | - visdom==0.1.8.9 184 | - websocket-client==1.4.0 185 | - werkzeug==1.0.1 186 | - workflow==1.0 187 | - zipp==3.8.1 188 | prefix: /home/shihao/miniconda3/envs/rigidmask_v0 189 | -------------------------------------------------------------------------------- /vo_trajectory_from_folder.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | from Datasets.utils import ToTensor, Compose, CropCenter, ResizeData, dataset_intrinsics, DownscaleFlow 3 | from Datasets.utils import plot_traj, visflow, load_kiiti_intrinsics, load_sceneflow_extrinsics 4 | from Datasets.tartanTrajFlowDataset import TrajFolderDataset 5 | from evaluator.transformation import pose_quats2motion_ses, motion_ses2pose_quats 6 | from evaluator.tartanair_evaluator import TartanAirEvaluator 7 | from evaluator.evaluator_base import per_frame_scale_alignment 8 | from DytanVO import DytanVO 9 | 10 | import argparse 11 | import numpy as np 12 | import cv2 13 | from os import mkdir 14 | from os.path import isdir 15 | 16 | def get_args(): 17 | parser = argparse.ArgumentParser(description='Inference code of DytanVO') 18 | 19 | parser.add_argument('--batch-size', type=int, default=1, 20 | help='batch size (default: 1)') 21 | parser.add_argument('--worker-num', type=int, default=1, 22 | help='data loader worker number (default: 1)') 23 | parser.add_argument('--image-width', type=int, default=640, 24 | help='image width (default: 640)') 25 | parser.add_argument('--image-height', type=int, default=448, 26 | help='image height (default: 448)') 27 | parser.add_argument('--vo-model-name', default='', 28 | help='name of pretrained VO model (default: "")') 29 | parser.add_argument('--flow-model-name', default='', 30 | help='name of pretrained flow model (default: "")') 31 | parser.add_argument('--pose-model-name', default='', 32 | help='name of pretrained pose model (default: "")') 33 | parser.add_argument('--seg-model-name', default='', 34 | help='name of pretrained segmentation model (default: "")') 35 | parser.add_argument('--airdos', action='store_true', default=False, 36 | help='airdos test (default: False)') 37 | parser.add_argument('--rs_d435', action='store_true', default=False, 38 | help='realsense d435i test (default: False)') 39 | parser.add_argument('--sceneflow', action='store_true', default=False, 40 | help='sceneflow test (default: False)') 41 | parser.add_argument('--kitti', action='store_true', default=False, 42 | help='kitti test (default: False)') 43 | parser.add_argument('--commaai', action='store_true', default=False, 44 | help='commaai test (default: False)') 45 | parser.add_argument('--kitti-intrinsics-file', default='', 46 | help='kitti intrinsics file calib.txt (default: )') 47 | parser.add_argument('--test-dir', default='', 48 | help='test trajectory folder where the RGB images are (default: "")') 49 | parser.add_argument('--pose-file', default='', 50 | help='test trajectory gt pose file, used for scale calculation, and visualization (default: "")') 51 | parser.add_argument('--save-flow', action='store_true', default=False, 52 | help='save optical flow (default: False)') 53 | parser.add_argument('--seg-thresh', type=float, default=0.7, 54 | help='threshold for motion segmentation') 55 | parser.add_argument('--iter-num', type=int, default=2, 56 | help='number of iterations') 57 | 58 | args = parser.parse_args() 59 | 60 | return args 61 | 62 | 63 | if __name__ == '__main__': 64 | args = get_args() 65 | 66 | testvo = DytanVO(args.vo_model_name, args.seg_model_name, args.image_height, args.image_width, 67 | args.kitti, args.flow_model_name, args.pose_model_name) 68 | 69 | # load trajectory data from a folder 70 | if args.kitti: 71 | datastr = 'kitti' 72 | elif args.airdos: 73 | datastr = 'airdos' 74 | elif args.rs_d435: 75 | datastr = 'rs_d435' 76 | elif args.sceneflow: 77 | datastr = 'sceneflow' 78 | elif args.commaai: 79 | datastr = 'commaai' 80 | else: 81 | datastr = 'tartanair' 82 | focalx, focaly, centerx, centery, baseline = dataset_intrinsics(datastr, '15mm' in args.test_dir) 83 | if args.kitti_intrinsics_file.endswith('.txt') and datastr == 'kitti': 84 | focalx, focaly, centerx, centery, baseline = load_kiiti_intrinsics(args.kitti_intrinsics_file) 85 | 86 | if datastr == 'kitti': 87 | transform = Compose([ResizeData((args.image_height, 1226)), CropCenter((args.image_height, args.image_width)), DownscaleFlow(), ToTensor()]) 88 | else: 89 | transform = Compose([CropCenter((args.image_height, args.image_width)), DownscaleFlow(), ToTensor()]) 90 | 91 | testDataset = TrajFolderDataset(args.test_dir, transform=transform, 92 | focalx=focalx, focaly=focaly, centerx=centerx, centery=centery) 93 | testDataloader = DataLoader(testDataset, batch_size=args.batch_size, 94 | shuffle=False, num_workers=args.worker_num) 95 | testDataiter = iter(testDataloader) 96 | 97 | motionlist = [] 98 | testname = datastr + '_' + args.vo_model_name.split('.')[0] + '_' + args.test_dir.split('/')[-1] 99 | if args.save_flow: 100 | flowdir = 'results/'+testname+'_flow' 101 | if not isdir(flowdir): 102 | mkdir(flowdir) 103 | flowcount = 0 104 | while True: 105 | try: 106 | sample = testDataiter.next() 107 | except StopIteration: 108 | break 109 | 110 | motion, flow = testvo.test_batch(sample, [focalx, centerx, centery, baseline], args.seg_thresh, args.iter_num) 111 | motionlist.append(motion) 112 | 113 | if args.save_flow: 114 | for k in range(flow.shape[0]): 115 | flowk = flow[k].transpose(1,2,0) 116 | np.save(flowdir+'/'+str(flowcount).zfill(6)+'.npy',flowk) 117 | flow_vis = visflow(flowk) 118 | cv2.imwrite(flowdir+'/'+str(flowcount).zfill(6)+'.png',flow_vis) 119 | flowcount += 1 120 | 121 | motions = np.array(motionlist) 122 | 123 | # calculate ATE, RPE, KITTI-RPE 124 | if args.pose_file.endswith('.txt'): 125 | if datastr == 'sceneflow': 126 | gtposes = load_sceneflow_extrinsics(args.pose_file) 127 | else: 128 | gtposes = np.loadtxt(args.pose_file) 129 | if datastr == 'airdos': 130 | gtposes = gtposes[:,1:] # remove the first column of timestamps 131 | 132 | gtmotions = pose_quats2motion_ses(gtposes) 133 | estmotion_scale = per_frame_scale_alignment(gtmotions, motions) 134 | estposes = motion_ses2pose_quats(estmotion_scale) 135 | 136 | evaluator = TartanAirEvaluator() 137 | results = evaluator.evaluate_one_trajectory(gtposes, estposes, scale=True, kittitype=(datastr=='kitti')) 138 | 139 | print("==> ATE: %.4f,\t KITTI-R/t: %.4f, %.4f" %(results['ate_score'], results['kitti_score'][0], results['kitti_score'][1])) 140 | 141 | # save results and visualization 142 | plot_traj(results['gt_aligned'], results['est_aligned'], vis=False, savefigname='results/'+testname+'.png', title='ATE %.4f' %(results['ate_score'])) 143 | np.savetxt('results/'+testname+'.txt',results['est_aligned']) 144 | else: 145 | np.savetxt('results/'+testname+'.txt', motion_ses2pose_quats(motions)) -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/DCN/src/dcn_v2.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | at::Tensor 10 | dcn_v2_forward(const at::Tensor &input, 11 | const at::Tensor &weight, 12 | const at::Tensor &bias, 13 | const at::Tensor &offset, 14 | const at::Tensor &mask, 15 | const int kernel_h, 16 | const int kernel_w, 17 | const int stride_h, 18 | const int stride_w, 19 | const int pad_h, 20 | const int pad_w, 21 | const int dilation_h, 22 | const int dilation_w, 23 | const int deformable_group) 24 | { 25 | if (input.type().is_cuda()) 26 | { 27 | #ifdef WITH_CUDA 28 | return dcn_v2_cuda_forward(input, weight, bias, offset, mask, 29 | kernel_h, kernel_w, 30 | stride_h, stride_w, 31 | pad_h, pad_w, 32 | dilation_h, dilation_w, 33 | deformable_group); 34 | #else 35 | AT_ERROR("Not compiled with GPU support"); 36 | #endif 37 | } 38 | else{ 39 | return dcn_v2_cpu_forward(input, weight, bias, offset, mask, 40 | kernel_h, kernel_w, 41 | stride_h, stride_w, 42 | pad_h, pad_w, 43 | dilation_h, dilation_w, 44 | deformable_group); 45 | } 46 | } 47 | 48 | std::vector 49 | dcn_v2_backward(const at::Tensor &input, 50 | const at::Tensor &weight, 51 | const at::Tensor &bias, 52 | const at::Tensor &offset, 53 | const at::Tensor &mask, 54 | const at::Tensor &grad_output, 55 | int kernel_h, int kernel_w, 56 | int stride_h, int stride_w, 57 | int pad_h, int pad_w, 58 | int dilation_h, int dilation_w, 59 | int deformable_group) 60 | { 61 | if (input.type().is_cuda()) 62 | { 63 | #ifdef WITH_CUDA 64 | return dcn_v2_cuda_backward(input, 65 | weight, 66 | bias, 67 | offset, 68 | mask, 69 | grad_output, 70 | kernel_h, kernel_w, 71 | stride_h, stride_w, 72 | pad_h, pad_w, 73 | dilation_h, dilation_w, 74 | deformable_group); 75 | #else 76 | AT_ERROR("Not compiled with GPU support"); 77 | #endif 78 | } 79 | else{ 80 | return dcn_v2_cpu_backward(input, 81 | weight, 82 | bias, 83 | offset, 84 | mask, 85 | grad_output, 86 | kernel_h, kernel_w, 87 | stride_h, stride_w, 88 | pad_h, pad_w, 89 | dilation_h, dilation_w, 90 | deformable_group); 91 | } 92 | } 93 | 94 | std::tuple 95 | dcn_v2_psroi_pooling_forward(const at::Tensor &input, 96 | const at::Tensor &bbox, 97 | const at::Tensor &trans, 98 | const int no_trans, 99 | const float spatial_scale, 100 | const int output_dim, 101 | const int group_size, 102 | const int pooled_size, 103 | const int part_size, 104 | const int sample_per_part, 105 | const float trans_std) 106 | { 107 | if (input.type().is_cuda()) 108 | { 109 | #ifdef WITH_CUDA 110 | return dcn_v2_psroi_pooling_cuda_forward(input, 111 | bbox, 112 | trans, 113 | no_trans, 114 | spatial_scale, 115 | output_dim, 116 | group_size, 117 | pooled_size, 118 | part_size, 119 | sample_per_part, 120 | trans_std); 121 | #else 122 | AT_ERROR("Not compiled with GPU support"); 123 | #endif 124 | } 125 | else{ 126 | return dcn_v2_psroi_pooling_cpu_forward(input, 127 | bbox, 128 | trans, 129 | no_trans, 130 | spatial_scale, 131 | output_dim, 132 | group_size, 133 | pooled_size, 134 | part_size, 135 | sample_per_part, 136 | trans_std); 137 | } 138 | } 139 | 140 | std::tuple 141 | dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad, 142 | const at::Tensor &input, 143 | const at::Tensor &bbox, 144 | const at::Tensor &trans, 145 | const at::Tensor &top_count, 146 | const int no_trans, 147 | const float spatial_scale, 148 | const int output_dim, 149 | const int group_size, 150 | const int pooled_size, 151 | const int part_size, 152 | const int sample_per_part, 153 | const float trans_std) 154 | { 155 | if (input.type().is_cuda()) 156 | { 157 | #ifdef WITH_CUDA 158 | return dcn_v2_psroi_pooling_cuda_backward(out_grad, 159 | input, 160 | bbox, 161 | trans, 162 | top_count, 163 | no_trans, 164 | spatial_scale, 165 | output_dim, 166 | group_size, 167 | pooled_size, 168 | part_size, 169 | sample_per_part, 170 | trans_std); 171 | #else 172 | AT_ERROR("Not compiled with GPU support"); 173 | #endif 174 | } 175 | else{ 176 | return dcn_v2_psroi_pooling_cpu_backward(out_grad, 177 | input, 178 | bbox, 179 | trans, 180 | top_count, 181 | no_trans, 182 | spatial_scale, 183 | output_dim, 184 | group_size, 185 | pooled_size, 186 | part_size, 187 | sample_per_part, 188 | trans_std); 189 | } 190 | } -------------------------------------------------------------------------------- /Network/rigidmask/det_losses.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Portions of this code are from 3 | # CornerNet (https://github.com/princeton-vl/CornerNet) 4 | # Copyright (c) 2018, University of Michigan 5 | # Licensed under the BSD 3-Clause License 6 | # ------------------------------------------------------------------------------ 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import pdb 12 | import torch 13 | import torch.nn as nn 14 | from .det_utils import _transpose_and_gather_feat 15 | import torch.nn.functional as F 16 | 17 | 18 | def _slow_neg_loss(pred, gt): 19 | '''focal loss from CornerNet''' 20 | pos_inds = gt.eq(1) 21 | neg_inds = gt.lt(1) 22 | 23 | neg_weights = torch.pow(1 - gt[neg_inds], 4) 24 | 25 | loss = 0 26 | pos_pred = pred[pos_inds] 27 | neg_pred = pred[neg_inds] 28 | 29 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) 30 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights 31 | 32 | num_pos = pos_inds.float().sum() 33 | pos_loss = pos_loss.sum() 34 | neg_loss = neg_loss.sum() 35 | 36 | if pos_pred.nelement() == 0: 37 | loss = loss - neg_loss 38 | else: 39 | loss = loss - (pos_loss + neg_loss) / num_pos 40 | return loss 41 | 42 | 43 | def _neg_loss(pred, gt, heat_logits): 44 | ''' Modified focal loss. Exactly the same as CornerNet. 45 | Runs faster and costs a little bit more memory 46 | Arguments: 47 | pred (batch x c x h x w) 48 | gt_regr (batch x c x h x w) 49 | ''' 50 | pos_inds = gt.eq(1).float() 51 | neg_inds = gt.lt(1).float() 52 | 53 | neg_weights = torch.pow(1 - gt, 4) 54 | 55 | loss = 0 56 | 57 | logpred = torch.nn.functional.log_softmax(heat_logits,1) 58 | pos_loss = logpred[:,0:1] * torch.pow(1 - pred, 2) * pos_inds 59 | neg_loss = logpred[:,1:2] * torch.pow(pred, 2) * neg_weights * neg_inds 60 | 61 | num_pos = pos_inds.float().sum() 62 | pos_loss = pos_loss.sum() 63 | neg_loss = neg_loss.sum() 64 | 65 | if num_pos == 0: 66 | loss = loss - neg_loss 67 | else: 68 | loss = loss - (pos_loss + neg_loss) / num_pos 69 | return loss 70 | 71 | def _not_faster_neg_loss(pred, gt): 72 | pos_inds = gt.eq(1).float() 73 | neg_inds = gt.lt(1).float() 74 | num_pos = pos_inds.float().sum() 75 | neg_weights = torch.pow(1 - gt, 4) 76 | 77 | loss = 0 78 | trans_pred = pred * neg_inds + (1 - pred) * pos_inds 79 | weight = neg_weights * neg_inds + pos_inds 80 | all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight 81 | all_loss = all_loss.sum() 82 | 83 | if num_pos > 0: 84 | all_loss /= num_pos 85 | loss -= all_loss 86 | return loss 87 | 88 | def _slow_reg_loss(regr, gt_regr, mask): 89 | num = mask.float().sum() 90 | mask = mask.unsqueeze(2).expand_as(gt_regr) 91 | 92 | regr = regr[mask] 93 | gt_regr = gt_regr[mask] 94 | 95 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False) 96 | regr_loss = regr_loss / (num + 1e-4) 97 | return regr_loss 98 | 99 | def _reg_loss(regr, gt_regr, mask): 100 | ''' L1 regression loss 101 | Arguments: 102 | regr (batch x max_objects x dim) 103 | gt_regr (batch x max_objects x dim) 104 | mask (batch x max_objects) 105 | ''' 106 | num = mask.float().sum() 107 | mask = mask.unsqueeze(2).expand_as(gt_regr).float() 108 | 109 | regr = regr * mask 110 | gt_regr = gt_regr * mask 111 | 112 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False) 113 | regr_loss = regr_loss / (num + 1e-4) 114 | return regr_loss 115 | 116 | class FocalLoss(nn.Module): 117 | '''nn.Module warpper for focal loss''' 118 | def __init__(self): 119 | super(FocalLoss, self).__init__() 120 | self.neg_loss = _neg_loss 121 | 122 | def forward(self, out, target, logits): 123 | return self.neg_loss(out, target, logits) 124 | 125 | class RegLoss(nn.Module): 126 | '''Regression loss for an output tensor 127 | Arguments: 128 | output (batch x dim x h x w) 129 | mask (batch x max_objects) 130 | ind (batch x max_objects) 131 | target (batch x max_objects x dim) 132 | ''' 133 | def __init__(self): 134 | super(RegLoss, self).__init__() 135 | 136 | def forward(self, output, mask, ind, target): 137 | pred = _transpose_and_gather_feat(output, ind) 138 | loss = _reg_loss(pred, target, mask) 139 | return loss 140 | 141 | class RegL1Loss(nn.Module): 142 | def __init__(self): 143 | super(RegL1Loss, self).__init__() 144 | 145 | def forward(self, output, mask, ind, target): 146 | pred = _transpose_and_gather_feat(output, ind) 147 | mask = mask.unsqueeze(2).expand_as(pred).float() 148 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 149 | loss = F.l1_loss(pred * mask, target * mask, size_average=False) 150 | loss = loss / (mask.sum() + 1e-4) 151 | return loss 152 | 153 | class NormRegL1Loss(nn.Module): 154 | def __init__(self): 155 | super(NormRegL1Loss, self).__init__() 156 | 157 | def forward(self, output, mask, ind, target): 158 | pred = _transpose_and_gather_feat(output, ind) 159 | mask = mask.unsqueeze(2).expand_as(pred).float() 160 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 161 | pred = pred / (target + 1e-4) 162 | target = target * 0 + 1 163 | loss = F.l1_loss(pred * mask, target * mask, size_average=False) 164 | loss = loss / (mask.sum() + 1e-4) 165 | return loss 166 | 167 | class RegWeightedL1Loss(nn.Module): 168 | def __init__(self): 169 | super(RegWeightedL1Loss, self).__init__() 170 | 171 | def forward(self, output, mask, ind, target): 172 | pred = _transpose_and_gather_feat(output, ind) 173 | mask = mask.float() 174 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 175 | loss = F.l1_loss(pred * mask, target * mask, size_average=False) 176 | loss = loss / (mask.sum() + 1e-4) 177 | return loss 178 | 179 | class L1Loss(nn.Module): 180 | def __init__(self): 181 | super(L1Loss, self).__init__() 182 | 183 | def forward(self, output, mask, ind, target): 184 | pred = _transpose_and_gather_feat(output, ind) 185 | mask = mask.unsqueeze(2).expand_as(pred).float() 186 | loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 187 | return loss 188 | 189 | class BinRotLoss(nn.Module): 190 | def __init__(self): 191 | super(BinRotLoss, self).__init__() 192 | 193 | def forward(self, output, mask, ind, rotbin, rotres): 194 | pred = _transpose_and_gather_feat(output, ind) 195 | loss = compute_rot_loss(pred, rotbin, rotres, mask) 196 | return loss 197 | 198 | def compute_res_loss(output, target): 199 | return F.smooth_l1_loss(output, target, reduction='elementwise_mean') 200 | 201 | # TODO: weight 202 | def compute_bin_loss(output, target, mask): 203 | mask = mask.expand_as(output) 204 | output = output * mask.float() 205 | return F.cross_entropy(output, target, reduction='elementwise_mean') 206 | 207 | def compute_rot_loss(output, target_bin, target_res, mask): 208 | # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 209 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] 210 | # target_bin: (B, 128, 2) [bin1_cls, bin2_cls] 211 | # target_res: (B, 128, 2) [bin1_res, bin2_res] 212 | # mask: (B, 128, 1) 213 | # import pdb; pdb.set_trace() 214 | output = output.view(-1, 8) 215 | target_bin = target_bin.view(-1, 2) 216 | target_res = target_res.view(-1, 2) 217 | mask = mask.view(-1, 1) 218 | loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask) 219 | loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask) 220 | loss_res = torch.zeros_like(loss_bin1) 221 | if target_bin[:, 0].nonzero().shape[0] > 0: 222 | idx1 = target_bin[:, 0].nonzero()[:, 0] 223 | valid_output1 = torch.index_select(output, 0, idx1.long()) 224 | valid_target_res1 = torch.index_select(target_res, 0, idx1.long()) 225 | loss_sin1 = compute_res_loss( 226 | valid_output1[:, 2], torch.sin(valid_target_res1[:, 0])) 227 | loss_cos1 = compute_res_loss( 228 | valid_output1[:, 3], torch.cos(valid_target_res1[:, 0])) 229 | loss_res += loss_sin1 + loss_cos1 230 | if target_bin[:, 1].nonzero().shape[0] > 0: 231 | idx2 = target_bin[:, 1].nonzero()[:, 0] 232 | valid_output2 = torch.index_select(output, 0, idx2.long()) 233 | valid_target_res2 = torch.index_select(target_res, 0, idx2.long()) 234 | loss_sin2 = compute_res_loss( 235 | valid_output2[:, 6], torch.sin(valid_target_res2[:, 1])) 236 | loss_cos2 = compute_res_loss( 237 | valid_output2[:, 7], torch.cos(valid_target_res2[:, 1])) 238 | loss_res += loss_sin2 + loss_cos2 239 | return loss_bin1 + loss_bin2 + loss_res 240 | -------------------------------------------------------------------------------- /Datasets/util_flow.py: -------------------------------------------------------------------------------- 1 | """ 2 | # ============================== 3 | # util_flow.py 4 | # library for optical flow processing 5 | # Author: Gengshan Yang 6 | # Date: 10th Feb 2021 7 | # ============================== 8 | """ 9 | import math 10 | import png 11 | import struct 12 | import array 13 | import numpy as np 14 | import cv2 15 | import pdb 16 | 17 | from io import * 18 | 19 | UNKNOWN_FLOW_THRESH = 1e9; 20 | UNKNOWN_FLOW = 1e10; 21 | 22 | # Middlebury checks 23 | TAG_STRING = 'PIEH' # use this when WRITING the file 24 | TAG_FLOAT = 202021.25 # check for this when READING the file 25 | 26 | def readPFM(file): 27 | import re 28 | file = open(file, 'rb') 29 | 30 | color = None 31 | width = None 32 | height = None 33 | scale = None 34 | endian = None 35 | 36 | header = file.readline().rstrip() 37 | if header == b'PF': 38 | color = True 39 | elif header == b'Pf': 40 | color = False 41 | else: 42 | raise Exception('Not a PFM file.') 43 | 44 | dim_match = re.match(b'^(\d+)\s(\d+)\s$', file.readline()) 45 | if dim_match: 46 | width, height = map(int, dim_match.groups()) 47 | else: 48 | raise Exception('Malformed PFM header.') 49 | 50 | scale = float(file.readline().rstrip()) 51 | if scale < 0: # little-endian 52 | endian = '<' 53 | scale = -scale 54 | else: 55 | endian = '>' # big-endian 56 | 57 | data = np.fromfile(file, endian + 'f') 58 | shape = (height, width, 3) if color else (height, width) 59 | 60 | data = np.reshape(data, shape) 61 | data = np.flipud(data) 62 | return data, scale 63 | 64 | 65 | def save_pfm(file, image, scale = 1): 66 | import sys 67 | color = None 68 | 69 | if image.dtype.name != 'float32': 70 | raise Exception('Image dtype must be float32.') 71 | 72 | if len(image.shape) == 3 and image.shape[2] == 3: # color image 73 | color = True 74 | elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale 75 | color = False 76 | else: 77 | raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.') 78 | 79 | file.write('PF\n' if color else 'Pf\n') 80 | file.write('%d %d\n' % (image.shape[1], image.shape[0])) 81 | 82 | endian = image.dtype.byteorder 83 | 84 | if endian == '<' or endian == '=' and sys.byteorder == 'little': 85 | scale = -scale 86 | 87 | file.write('%f\n' % scale) 88 | 89 | image.tofile(file) 90 | 91 | 92 | def ReadMiddleburyFloFile(path): 93 | """ Read .FLO file as specified by Middlebury. 94 | 95 | Returns tuple (width, height, u, v, mask), where u, v, mask are flat 96 | arrays of values. 97 | """ 98 | 99 | with open(path, 'rb') as fil: 100 | tag = struct.unpack('f', fil.read(4))[0] 101 | width = struct.unpack('i', fil.read(4))[0] 102 | height = struct.unpack('i', fil.read(4))[0] 103 | 104 | assert tag == TAG_FLOAT 105 | 106 | #data = np.fromfile(path, dtype=np.float, count=-1) 107 | #data = data[3:] 108 | 109 | fmt = 'f' * width*height*2 110 | data = struct.unpack(fmt, fil.read(4*width*height*2)) 111 | 112 | u = data[::2] 113 | v = data[1::2] 114 | 115 | mask = map(lambda x,y: abs(x) 0: 152 | # print(u[ind], v[ind], mask[ind], row[3*x], row[3*x+1], row[3*x+2]) 153 | 154 | #png_reader.close() 155 | 156 | return (width, height, u, v, mask) 157 | 158 | 159 | def WriteMiddleburyFloFile(path, width, height, u, v, mask=None): 160 | """ Write .FLO file as specified by Middlebury. 161 | """ 162 | 163 | if mask is not None: 164 | u_masked = map(lambda x,y: x if y else UNKNOWN_FLOW, u, mask) 165 | v_masked = map(lambda x,y: x if y else UNKNOWN_FLOW, v, mask) 166 | else: 167 | u_masked = u 168 | v_masked = v 169 | 170 | fmt = 'f' * width*height*2 171 | # Interleave lists 172 | data = [x for t in zip(u_masked,v_masked) for x in t] 173 | 174 | with open(path, 'wb') as fil: 175 | fil.write(str.encode(TAG_STRING)) 176 | fil.write(struct.pack('i', width)) 177 | fil.write(struct.pack('i', height)) 178 | fil.write(struct.pack(fmt, *data)) 179 | 180 | 181 | def write_flow(path,flow): 182 | 183 | invalid_idx = (flow[:, :, 2] == 0) 184 | flow[:, :, 0:2] = flow[:, :, 0:2]*64.+ 2 ** 15 185 | flow[invalid_idx, 0] = 0 186 | flow[invalid_idx, 1] = 0 187 | 188 | flow = flow.astype(np.uint16) 189 | flow = cv2.imwrite(path, flow[:,:,::-1]) 190 | 191 | #WriteKittiPngFile(path, 192 | # flow.shape[1], flow.shape[0], flow[:,:,0].flatten(), 193 | # flow[:,:,1].flatten(), flow[:,:,2].flatten()) 194 | 195 | 196 | 197 | def WriteKittiPngFile(path, width, height, u, v, mask=None): 198 | """ Write 16-bit .PNG file as specified by KITTI-2015 (flow). 199 | 200 | u, v are lists of float values 201 | mask is a list of floats, denoting the *valid* pixels. 202 | """ 203 | 204 | data = array.array('H',[0])*width*height*3 205 | 206 | for i,(u_,v_,mask_) in enumerate(zip(u,v,mask)): 207 | data[3*i] = int(u_*64.0+2**15) 208 | data[3*i+1] = int(v_*64.0+2**15) 209 | data[3*i+2] = int(mask_) 210 | 211 | # if mask_ > 0: 212 | # print(data[3*i], data[3*i+1],data[3*i+2]) 213 | 214 | with open(path, 'wb') as png_file: 215 | png_writer = png.Writer(width=width, height=height, bitdepth=16, compression=3, greyscale=False) 216 | png_writer.write_array(png_file, data) 217 | 218 | 219 | def ConvertMiddleburyFloToKittiPng(src_path, dest_path): 220 | width, height, u, v, mask = ReadMiddleburyFloFile(src_path) 221 | WriteKittiPngFile(dest_path, width, height, u, v, mask=mask) 222 | 223 | def ConvertKittiPngToMiddleburyFlo(src_path, dest_path): 224 | width, height, u, v, mask = ReadKittiPngFile(src_path) 225 | WriteMiddleburyFloFile(dest_path, width, height, u, v, mask=mask) 226 | 227 | 228 | def ParseFilenameKitti(filename): 229 | # Parse kitti filename (seq_frameno.xx), 230 | # return seq, frameno, ext. 231 | # Be aware that seq might contain the dataset name (if contained as prefix) 232 | ext = filename[filename.rfind('.'):] 233 | frameno = filename[filename.rfind('_')+1:filename.rfind('.')] 234 | frameno = int(frameno) 235 | seq = filename[:filename.rfind('_')] 236 | return seq, frameno, ext 237 | 238 | 239 | def read_calib_file(filepath): 240 | """Read in a calibration file and parse into a dictionary.""" 241 | data = {} 242 | 243 | with open(filepath, 'r') as f: 244 | for line in f.readlines(): 245 | key, value = line.split(':', 1) 246 | # The only non-float values in these files are dates, which 247 | # we don't care about anyway 248 | try: 249 | data[key] = np.array([float(x) for x in value.split()]) 250 | except ValueError: 251 | pass 252 | 253 | return data 254 | 255 | def load_calib_cam_to_cam(cam_to_cam_file): 256 | # We'll return the camera calibration as a dictionary 257 | data = {} 258 | 259 | # Load and parse the cam-to-cam calibration data 260 | filedata = read_calib_file(cam_to_cam_file) 261 | 262 | # Create 3x4 projection matrices 263 | P_rect_00 = np.reshape(filedata['P_rect_00'], (3, 4)) 264 | P_rect_10 = np.reshape(filedata['P_rect_01'], (3, 4)) 265 | P_rect_20 = np.reshape(filedata['P_rect_02'], (3, 4)) 266 | P_rect_30 = np.reshape(filedata['P_rect_03'], (3, 4)) 267 | 268 | # Compute the camera intrinsics 269 | data['K_cam0'] = P_rect_00[0:3, 0:3] 270 | data['K_cam1'] = P_rect_10[0:3, 0:3] 271 | data['K_cam2'] = P_rect_20[0:3, 0:3] 272 | data['K_cam3'] = P_rect_30[0:3, 0:3] 273 | 274 | data['b00'] = P_rect_00[0, 3] / P_rect_00[0, 0] 275 | data['b10'] = P_rect_10[0, 3] / P_rect_10[0, 0] 276 | data['b20'] = P_rect_20[0, 3] / P_rect_20[0, 0] 277 | data['b30'] = P_rect_30[0, 3] / P_rect_30[0, 0] 278 | 279 | return data -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DytanVO: Joint Refinement of Visual Odometry and Motion Segmentation in Dynamic Environments 2 | 3 |

4 | 5 | 6 | 7 | 8 |

9 |

10 | DytanVO: Joint Refinement of Visual Odometry and Motion Segmentation in Dynamic Environments (ICRA 2023)
11 | By 12 | Shihao Shen, 13 | Yilin Cai, 14 | Wenshan Wang, and 15 | Sebastian Scherer. 16 |

17 | 18 | ### What's new. 19 | 20 | - 01-17-2023: Our paper has been accepted to ICRA 2023! 21 | 22 | - 01-05-2023: Clean up and upload the codebase for _DytanVO_. Pretrained weights and datasets are also ready. 23 | 24 | - 09-20-2022: Remove _Dynamic Dense RGB-D SLAM with Learning-Based Visual Odometry_. The repo will be used to release codebase for the most recent ICRA 2023 submission. 25 | 26 | 27 | ## Introduction 28 | DytanVO is a learning-based visual odometry (VO) based on its precursor, [TartanVO](https://github.com/castacks/tartanvo). It is the first supervised learning-based VO method that deals with dynamic environments. It takes two consecutive monocular frames in real-time and predicts camera ego-motion in an iterative fashion. It achieves an average improvement of 27.7% over state-of-the-art VO solutions in real-world dynamic environments, and even performs competitively among dynamic visual SLAM systems which optimize the trajectory on the backend. Experiments on plentiful unseen environments also demonstrate its generalizability. 29 | 30 | 31 | ## Installation 32 | We provide an environment file using [anaconda](https://www.anaconda.com/). The code has been tested on an RTX 2080Ti with CUDA 11.4. 33 | ```bash 34 | conda env create -f environment.yml 35 | conda activate dytanvo 36 | ``` 37 | 38 | Compile [DCNv2](https://github.com/MatthewHowe/DCNv2). 39 | ``` 40 | cd Network/rigidmask/networks/DCNv2/; python setup.py install; cd - 41 | ``` 42 | 43 | 44 | ## Models and Data 45 | 46 | ### Pretrained weights 47 | Download [here](https://drive.google.com/file/d/1ujYmKv5FHXYe1KETabTnSs-R2OE0KJV3/view?usp=share_link) and unzip it to the `models` folder. 48 | 49 | ### KITTI dynamic sequences 50 | Original sequences in [KITTI Odometry](https://www.cvlibs.net/datasets/kitti/eval_odometry.php) are trimmed into sub-sequences which contain moving pedestrians, vehicles and cyclists so that VO's robustness to dynamic objects can be explicitly evaluated. Download [DynaKITTI](https://drive.google.com/file/d/1BDnraRWzNf938UsfprWIkcqCSfOUyGt9/view?usp=share_link) and unzip it to the `data` folder. Please cite this paper if you find it useful in your work. 51 | 52 | ### AirDOS-Shibuya 53 | Follow [tartanair-shibuya](https://github.com/haleqiu/tartanair-shibuya) and download it to the `data` folder. 54 | 55 | ### (Optional) Scene Flow 56 | One can also test the model on [Scene Flow datasets](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html), which was used to train both the VO and the segmentation networks. Scene Flow datasets have very challenging sequences with large areas of dynamic objects in image frames. 57 | 58 | You can create symbolic links to wherever the datasets were downloaded in the `data` folder. 59 | 60 | ```Shell 61 | ├── data 62 | ├── AirDOS_shibuya 63 | ├── RoadCrossing03 64 | ├── image_0 65 | ├── ... 66 | ├── gt_pose.txt 67 | ├── RoadCrossing04 68 | ├── ... 69 | ├── DynaKITTI 70 | ├── 00_1 71 | ├── image_2 72 | ├── ... 73 | ├── pose_left.txt 74 | ├── calib.txt 75 | ├── 01_0 76 | ├── ... 77 | ├── SceneFlow 78 | ├── FlyThings3D 79 | ├── frames_cleanpass 80 | ├── frames_finalpass 81 | ├── optical_flow 82 | ├── camera_data 83 | ├── Driving 84 | ├── Monkaa 85 | ├── ... 86 | ``` 87 | 88 | 89 | ## Evaluation 90 | Create a folder to save output flow, segmentation, or poses. 91 | ```bash 92 | mkdir results 93 | ``` 94 | 95 | ### Dynamic sequences in KITTI (loading the finetuned VO model at once) 96 | ```bash 97 | traj=00_1 98 | python -W ignore::UserWarning vo_trajectory_from_folder.py --vo-model-name vonet_ft.pkl \ 99 | --seg-model-name segnet-kitti.pth \ 100 | --kitti --kitti-intrinsics-file data/DynaKITTI/$traj/calib.txt \ 101 | --test-dir data/DynaKITTI/$traj/image_2 \ 102 | --pose-file data/DynaKITTI/$traj/pose_left.txt 103 | ``` 104 | 105 | ### AirDOS-Shibuya (loading FlowNet and PoseNet separately) 106 | ```bash 107 | traj=RoadCrossing03 108 | python -W ignore::UserWarning vo_trajectory_from_folder.py --flow-model-name flownet.pkl \ 109 | --pose-model-name posenet.pkl \ 110 | --seg-model segnet-sf.pth \ 111 | --airdos \ 112 | --test-dir data/AirDOS_shibuya/$traj/image_0 \ 113 | --pose-file data/AirDOS_shibuya/$traj/gt_pose.txt 114 | ``` 115 | 116 | ### Scene Flow 117 | ```bash 118 | img=Driving/frames_finalpass/15mm_focallength/scene_forwards/fast/left 119 | pose=Driving/camera_data/15mm_focallength/scene_forwards/fast/camera_data.txt 120 | python -W ignore::UserWarning vo_trajectory_from_folder.py --flow-model-name flownet.pkl \ 121 | --pose-model-name posenet.pkl \ 122 | --seg-model segnet-sf.pth \ 123 | --sceneflow \ 124 | --test-dir data/SceneFlow/$img \ 125 | --pose-file data/SceneFlow/$pose 126 | ``` 127 | 128 | Add `--save-flow` tag to save intermediate optical flow outputs into the `results` folder. 129 | 130 | Adjust the batch size and the worker number by `--batch-size 10`, `--worker-num 5`. 131 | 132 | 133 | ## (Optional) Segmentation Mask Ground Truth 134 | If your dataset has ground truth for camera motion, optical flow and disparity change across consecutive frames, we provide an example script to automatically generate ground truth of segmentation mask given these two modalities based on the pure geometry for the [Scene Flow datasets](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html). 135 | 136 | ```bash 137 | python Datasets/segmask_gt.py --database data/SceneFlow --frames_pass clean --dataset FlyingThings3D 138 | ``` 139 | 140 | Add `--debug` flag to save visualizations of the generated masks. 141 | 142 | ## Citation 143 | If you find our code, paper or dataset useful, please cite 144 | ```bibtex 145 | @inproceedings{shen2023dytanvo, 146 | title={Dytanvo: Joint refinement of visual odometry and motion segmentation in dynamic environments}, 147 | author={Shen, Shihao and Cai, Yilin and Wang, Wenshan and Scherer, Sebastian}, 148 | booktitle={2023 IEEE International Conference on Robotics and Automation (ICRA)}, 149 | pages={4048--4055}, 150 | year={2023}, 151 | organization={IEEE} 152 | } 153 | ``` 154 | 155 | ## Acknowledgement 156 | We built DytanVO on top of [TartanVO](https://github.com/castacks/tartanvo). We implemented the segmentation network by adapting [rigidmask](https://github.com/gengshan-y/rigidmask). We thank [Gengshan Yang](https://gengshan-y.github.io/) for his code and suggestions. 157 | 158 | ## License 159 | This software is BSD licensed. 160 | 161 | Copyright (c) 2020, Carnegie Mellon University All rights reserved. 162 | 163 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 164 | 165 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 166 | 167 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 168 | 169 | Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 170 | 171 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 172 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/DCN/testcpu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | from __future__ import division 5 | 6 | import time 7 | import torch 8 | import torch.nn as nn 9 | from torch.autograd import gradcheck 10 | 11 | from dcn_v2 import dcn_v2_conv, DCNv2, DCN 12 | from dcn_v2 import dcn_v2_pooling, DCNv2Pooling, DCNPooling 13 | 14 | deformable_groups = 1 15 | N, inC, inH, inW = 2, 2, 4, 4 16 | outC = 2 17 | kH, kW = 3, 3 18 | 19 | 20 | def conv_identify(weight, bias): 21 | weight.data.zero_() 22 | bias.data.zero_() 23 | o, i, h, w = weight.shape 24 | y = h//2 25 | x = w//2 26 | for p in range(i): 27 | for q in range(o): 28 | if p == q: 29 | weight.data[q, p, y, x] = 1.0 30 | 31 | 32 | def check_zero_offset(): 33 | conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW, 34 | kernel_size=(kH, kW), 35 | stride=(1, 1), 36 | padding=(1, 1), 37 | bias=True) 38 | 39 | conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW, 40 | kernel_size=(kH, kW), 41 | stride=(1, 1), 42 | padding=(1, 1), 43 | bias=True) 44 | 45 | dcn_v2 = DCNv2(inC, outC, (kH, kW), 46 | stride=1, padding=1, dilation=1, 47 | deformable_groups=deformable_groups) 48 | 49 | conv_offset.weight.data.zero_() 50 | conv_offset.bias.data.zero_() 51 | conv_mask.weight.data.zero_() 52 | conv_mask.bias.data.zero_() 53 | conv_identify(dcn_v2.weight, dcn_v2.bias) 54 | 55 | input = torch.randn(N, inC, inH, inW) 56 | offset = conv_offset(input) 57 | mask = conv_mask(input) 58 | mask = torch.sigmoid(mask) 59 | output = dcn_v2(input, offset, mask) 60 | output *= 2 61 | d = (input - output).abs().max() 62 | if d < 1e-10: 63 | print('Zero offset passed') 64 | else: 65 | print('Zero offset failed') 66 | print(input) 67 | print(output) 68 | 69 | def check_gradient_dconv(): 70 | 71 | input = torch.rand(N, inC, inH, inW) * 0.01 72 | input.requires_grad = True 73 | 74 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW) * 2 75 | # offset.data.zero_() 76 | # offset.data -= 0.5 77 | offset.requires_grad = True 78 | 79 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW) 80 | # mask.data.zero_() 81 | mask.requires_grad = True 82 | mask = torch.sigmoid(mask) 83 | 84 | weight = torch.randn(outC, inC, kH, kW) 85 | weight.requires_grad = True 86 | 87 | bias = torch.rand(outC) 88 | bias.requires_grad = True 89 | 90 | stride = 1 91 | padding = 1 92 | dilation = 1 93 | 94 | print('check_gradient_dconv: ', 95 | gradcheck(dcn_v2_conv, (input, offset, mask, weight, bias, 96 | stride, padding, dilation, deformable_groups), 97 | eps=1e-3, atol=1e-4, rtol=1e-2)) 98 | 99 | 100 | def check_pooling_zero_offset(): 101 | 102 | input = torch.randn(2, 16, 64, 64).zero_() 103 | input[0, :, 16:26, 16:26] = 1. 104 | input[1, :, 10:20, 20:30] = 2. 105 | rois = torch.tensor([ 106 | [0, 65, 65, 103, 103], 107 | [1, 81, 41, 119, 79], 108 | ]).float() 109 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4, 110 | pooled_size=7, 111 | output_dim=16, 112 | no_trans=True, 113 | group_size=1, 114 | trans_std=0.0) 115 | 116 | out = pooling(input, rois, input.new()) 117 | s = ', '.join(['%f' % out[i, :, :, :].mean().item() 118 | for i in range(rois.shape[0])]) 119 | print(s) 120 | 121 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 122 | pooled_size=7, 123 | output_dim=16, 124 | no_trans=False, 125 | group_size=1, 126 | trans_std=0.0) 127 | offset = torch.randn(20, 2, 7, 7).zero_() 128 | dout = dpooling(input, rois, offset) 129 | s = ', '.join(['%f' % dout[i, :, :, :].mean().item() 130 | for i in range(rois.shape[0])]) 131 | print(s) 132 | 133 | 134 | def check_gradient_dpooling(): 135 | input = torch.randn(2, 3, 5, 5) * 0.01 136 | N = 4 137 | batch_inds = torch.randint(2, (N, 1)).float() 138 | x = torch.rand((N, 1)).float() * 15 139 | y = torch.rand((N, 1)).float() * 15 140 | w = torch.rand((N, 1)).float() * 10 141 | h = torch.rand((N, 1)).float() * 10 142 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 143 | offset = torch.randn(N, 2, 3, 3) 144 | input.requires_grad = True 145 | offset.requires_grad = True 146 | 147 | spatial_scale = 1.0 / 4 148 | pooled_size = 3 149 | output_dim = 3 150 | no_trans = 0 151 | group_size = 1 152 | trans_std = 0.0 153 | sample_per_part = 4 154 | part_size = pooled_size 155 | 156 | print('check_gradient_dpooling:', 157 | gradcheck(dcn_v2_pooling, (input, rois, offset, 158 | spatial_scale, 159 | pooled_size, 160 | output_dim, 161 | no_trans, 162 | group_size, 163 | part_size, 164 | sample_per_part, 165 | trans_std), 166 | eps=1e-4)) 167 | 168 | 169 | def example_dconv(): 170 | input = torch.randn(2, 64, 128, 128) 171 | # wrap all things (offset and mask) in DCN 172 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1, 173 | padding=1, deformable_groups=2) 174 | # print(dcn.weight.shape, input.shape) 175 | output = dcn(input) 176 | targert = output.new(*output.size()) 177 | targert.data.uniform_(-0.01, 0.01) 178 | error = (targert - output).mean() 179 | error.backward() 180 | print(output.shape) 181 | 182 | 183 | def example_dpooling(): 184 | input = torch.randn(2, 32, 64, 64) 185 | batch_inds = torch.randint(2, (20, 1)).float() 186 | x = torch.randint(256, (20, 1)).float() 187 | y = torch.randint(256, (20, 1)).float() 188 | w = torch.randint(64, (20, 1)).float() 189 | h = torch.randint(64, (20, 1)).float() 190 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 191 | offset = torch.randn(20, 2, 7, 7) 192 | input.requires_grad = True 193 | offset.requires_grad = True 194 | 195 | # normal roi_align 196 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4, 197 | pooled_size=7, 198 | output_dim=32, 199 | no_trans=True, 200 | group_size=1, 201 | trans_std=0.1) 202 | 203 | # deformable pooling 204 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 205 | pooled_size=7, 206 | output_dim=32, 207 | no_trans=False, 208 | group_size=1, 209 | trans_std=0.1) 210 | 211 | out = pooling(input, rois, offset) 212 | dout = dpooling(input, rois, offset) 213 | print(out.shape) 214 | print(dout.shape) 215 | 216 | target_out = out.new(*out.size()) 217 | target_out.data.uniform_(-0.01, 0.01) 218 | target_dout = dout.new(*dout.size()) 219 | target_dout.data.uniform_(-0.01, 0.01) 220 | e = (target_out - out).mean() 221 | e.backward() 222 | e = (target_dout - dout).mean() 223 | e.backward() 224 | 225 | 226 | def example_mdpooling(): 227 | input = torch.randn(2, 32, 64, 64) 228 | input.requires_grad = True 229 | batch_inds = torch.randint(2, (20, 1)).float() 230 | x = torch.randint(256, (20, 1)).float() 231 | y = torch.randint(256, (20, 1)).float() 232 | w = torch.randint(64, (20, 1)).float() 233 | h = torch.randint(64, (20, 1)).float() 234 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 235 | 236 | # mdformable pooling (V2) 237 | dpooling = DCNPooling(spatial_scale=1.0 / 4, 238 | pooled_size=7, 239 | output_dim=32, 240 | no_trans=False, 241 | group_size=1, 242 | trans_std=0.1, 243 | deform_fc_dim=1024) 244 | 245 | dout = dpooling(input, rois) 246 | target = dout.new(*dout.size()) 247 | target.data.uniform_(-0.1, 0.1) 248 | error = (target - dout).mean() 249 | error.backward() 250 | print(dout.shape) 251 | 252 | 253 | if __name__ == '__main__': 254 | 255 | example_dconv() 256 | example_dpooling() 257 | example_mdpooling() 258 | 259 | check_pooling_zero_offset() 260 | # zero offset check 261 | if inC == outC: 262 | check_zero_offset() 263 | 264 | check_gradient_dpooling() 265 | check_gradient_dconv() 266 | # """ 267 | # ****** Note: backward is not reentrant error may not be a serious problem, 268 | # ****** since the max error is less than 1e-7, 269 | # ****** Still looking for what trigger this problem 270 | # """ 271 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/DCN/testcuda.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | from __future__ import division 5 | 6 | import time 7 | import torch 8 | import torch.nn as nn 9 | from torch.autograd import gradcheck 10 | 11 | from dcn_v2 import dcn_v2_conv, DCNv2, DCN 12 | from dcn_v2 import dcn_v2_pooling, DCNv2Pooling, DCNPooling 13 | 14 | deformable_groups = 1 15 | N, inC, inH, inW = 2, 2, 4, 4 16 | outC = 2 17 | kH, kW = 3, 3 18 | 19 | 20 | def conv_identify(weight, bias): 21 | weight.data.zero_() 22 | bias.data.zero_() 23 | o, i, h, w = weight.shape 24 | y = h//2 25 | x = w//2 26 | for p in range(i): 27 | for q in range(o): 28 | if p == q: 29 | weight.data[q, p, y, x] = 1.0 30 | 31 | 32 | def check_zero_offset(): 33 | conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW, 34 | kernel_size=(kH, kW), 35 | stride=(1, 1), 36 | padding=(1, 1), 37 | bias=True).cuda() 38 | 39 | conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW, 40 | kernel_size=(kH, kW), 41 | stride=(1, 1), 42 | padding=(1, 1), 43 | bias=True).cuda() 44 | 45 | dcn_v2 = DCNv2(inC, outC, (kH, kW), 46 | stride=1, padding=1, dilation=1, 47 | deformable_groups=deformable_groups).cuda() 48 | 49 | conv_offset.weight.data.zero_() 50 | conv_offset.bias.data.zero_() 51 | conv_mask.weight.data.zero_() 52 | conv_mask.bias.data.zero_() 53 | conv_identify(dcn_v2.weight, dcn_v2.bias) 54 | 55 | input = torch.randn(N, inC, inH, inW).cuda() 56 | offset = conv_offset(input) 57 | mask = conv_mask(input) 58 | mask = torch.sigmoid(mask) 59 | output = dcn_v2(input, offset, mask) 60 | output *= 2 61 | d = (input - output).abs().max() 62 | if d < 1e-10: 63 | print('Zero offset passed') 64 | else: 65 | print('Zero offset failed') 66 | print(input) 67 | print(output) 68 | 69 | def check_gradient_dconv(): 70 | 71 | input = torch.rand(N, inC, inH, inW).cuda() * 0.01 72 | input.requires_grad = True 73 | 74 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda() * 2 75 | # offset.data.zero_() 76 | # offset.data -= 0.5 77 | offset.requires_grad = True 78 | 79 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda() 80 | # mask.data.zero_() 81 | mask.requires_grad = True 82 | mask = torch.sigmoid(mask) 83 | 84 | weight = torch.randn(outC, inC, kH, kW).cuda() 85 | weight.requires_grad = True 86 | 87 | bias = torch.rand(outC).cuda() 88 | bias.requires_grad = True 89 | 90 | stride = 1 91 | padding = 1 92 | dilation = 1 93 | 94 | print('check_gradient_dconv: ', 95 | gradcheck(dcn_v2_conv, (input, offset, mask, weight, bias, 96 | stride, padding, dilation, deformable_groups), 97 | eps=1e-3, atol=1e-4, rtol=1e-2)) 98 | 99 | 100 | def check_pooling_zero_offset(): 101 | 102 | input = torch.randn(2, 16, 64, 64).cuda().zero_() 103 | input[0, :, 16:26, 16:26] = 1. 104 | input[1, :, 10:20, 20:30] = 2. 105 | rois = torch.tensor([ 106 | [0, 65, 65, 103, 103], 107 | [1, 81, 41, 119, 79], 108 | ]).cuda().float() 109 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4, 110 | pooled_size=7, 111 | output_dim=16, 112 | no_trans=True, 113 | group_size=1, 114 | trans_std=0.0).cuda() 115 | 116 | out = pooling(input, rois, input.new()) 117 | s = ', '.join(['%f' % out[i, :, :, :].mean().item() 118 | for i in range(rois.shape[0])]) 119 | print(s) 120 | 121 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 122 | pooled_size=7, 123 | output_dim=16, 124 | no_trans=False, 125 | group_size=1, 126 | trans_std=0.0).cuda() 127 | offset = torch.randn(20, 2, 7, 7).cuda().zero_() 128 | dout = dpooling(input, rois, offset) 129 | s = ', '.join(['%f' % dout[i, :, :, :].mean().item() 130 | for i in range(rois.shape[0])]) 131 | print(s) 132 | 133 | 134 | def check_gradient_dpooling(): 135 | input = torch.randn(2, 3, 5, 5).cuda().float() * 0.01 136 | N = 4 137 | batch_inds = torch.randint(2, (N, 1)).cuda().float() 138 | x = torch.rand((N, 1)).cuda().float() * 15 139 | y = torch.rand((N, 1)).cuda().float() * 15 140 | w = torch.rand((N, 1)).cuda().float() * 10 141 | h = torch.rand((N, 1)).cuda().float() * 10 142 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 143 | offset = torch.randn(N, 2, 3, 3).cuda() 144 | input.requires_grad = True 145 | offset.requires_grad = True 146 | 147 | spatial_scale = 1.0 / 4 148 | pooled_size = 3 149 | output_dim = 3 150 | no_trans = 0 151 | group_size = 1 152 | trans_std = 0.0 153 | sample_per_part = 4 154 | part_size = pooled_size 155 | 156 | print('check_gradient_dpooling:', 157 | gradcheck(dcn_v2_pooling, (input, rois, offset, 158 | spatial_scale, 159 | pooled_size, 160 | output_dim, 161 | no_trans, 162 | group_size, 163 | part_size, 164 | sample_per_part, 165 | trans_std), 166 | eps=1e-4)) 167 | 168 | 169 | def example_dconv(): 170 | input = torch.randn(2, 64, 128, 128).cuda() 171 | # wrap all things (offset and mask) in DCN 172 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1, 173 | padding=1, deformable_groups=2).cuda() 174 | # print(dcn.weight.shape, input.shape) 175 | output = dcn(input) 176 | targert = output.new(*output.size()) 177 | targert.data.uniform_(-0.01, 0.01) 178 | error = (targert - output).mean() 179 | error.backward() 180 | print(output.shape) 181 | 182 | 183 | def example_dpooling(): 184 | input = torch.randn(2, 32, 64, 64).cuda() 185 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 186 | x = torch.randint(256, (20, 1)).cuda().float() 187 | y = torch.randint(256, (20, 1)).cuda().float() 188 | w = torch.randint(64, (20, 1)).cuda().float() 189 | h = torch.randint(64, (20, 1)).cuda().float() 190 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 191 | offset = torch.randn(20, 2, 7, 7).cuda() 192 | input.requires_grad = True 193 | offset.requires_grad = True 194 | 195 | # normal roi_align 196 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4, 197 | pooled_size=7, 198 | output_dim=32, 199 | no_trans=True, 200 | group_size=1, 201 | trans_std=0.1).cuda() 202 | 203 | # deformable pooling 204 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 205 | pooled_size=7, 206 | output_dim=32, 207 | no_trans=False, 208 | group_size=1, 209 | trans_std=0.1).cuda() 210 | 211 | out = pooling(input, rois, offset) 212 | dout = dpooling(input, rois, offset) 213 | print(out.shape) 214 | print(dout.shape) 215 | 216 | target_out = out.new(*out.size()) 217 | target_out.data.uniform_(-0.01, 0.01) 218 | target_dout = dout.new(*dout.size()) 219 | target_dout.data.uniform_(-0.01, 0.01) 220 | e = (target_out - out).mean() 221 | e.backward() 222 | e = (target_dout - dout).mean() 223 | e.backward() 224 | 225 | 226 | def example_mdpooling(): 227 | input = torch.randn(2, 32, 64, 64).cuda() 228 | input.requires_grad = True 229 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 230 | x = torch.randint(256, (20, 1)).cuda().float() 231 | y = torch.randint(256, (20, 1)).cuda().float() 232 | w = torch.randint(64, (20, 1)).cuda().float() 233 | h = torch.randint(64, (20, 1)).cuda().float() 234 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 235 | 236 | # mdformable pooling (V2) 237 | dpooling = DCNPooling(spatial_scale=1.0 / 4, 238 | pooled_size=7, 239 | output_dim=32, 240 | no_trans=False, 241 | group_size=1, 242 | trans_std=0.1, 243 | deform_fc_dim=1024).cuda() 244 | 245 | dout = dpooling(input, rois) 246 | target = dout.new(*dout.size()) 247 | target.data.uniform_(-0.1, 0.1) 248 | error = (target - dout).mean() 249 | error.backward() 250 | print(dout.shape) 251 | 252 | 253 | if __name__ == '__main__': 254 | 255 | example_dconv() 256 | example_dpooling() 257 | example_mdpooling() 258 | 259 | check_pooling_zero_offset() 260 | # zero offset check 261 | if inC == outC: 262 | check_zero_offset() 263 | 264 | check_gradient_dpooling() 265 | check_gradient_dconv() 266 | # """ 267 | # ****** Note: backward is not reentrant error may not be a serious problem, 268 | # ****** since the max error is less than 1e-7, 269 | # ****** Still looking for what trigger this problem 270 | # """ 271 | -------------------------------------------------------------------------------- /evaluator/transformation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Carnegie Mellon University, Wenshan Wang 2 | # For License information please see the LICENSE file in the root directory. 3 | # Credit: Xiangwei Wang https://github.com/TimingSpace 4 | 5 | import numpy as np 6 | from scipy.spatial.transform import Rotation as R 7 | 8 | def line2mat(line_data): 9 | ''' 10 | 12 -> 4 x 4 11 | ''' 12 | mat = np.eye(4) 13 | mat[0:3,:] = line_data.reshape(3,4) 14 | return np.matrix(mat) 15 | 16 | def mat2line(mat_data): 17 | ''' 18 | 4 x 4 -> 12 19 | ''' 20 | line_data = np.zeros(12) 21 | line_data[:]=mat_data[:3,:].reshape((12)) 22 | return line_data 23 | 24 | def motion2pose(data): 25 | ''' 26 | data: N x 12 27 | all_pose: (N+1) x 12 28 | ''' 29 | data_size = data.shape[0] 30 | all_pose = np.zeros((data_size+1,12)) 31 | temp = np.eye(4,4).reshape(1,16) 32 | all_pose[0,:] = temp[0,0:12] 33 | pose = np.matrix(np.eye(4,4)) 34 | for i in range(0,data_size): 35 | data_mat = line2mat(data[i,:]) 36 | pose = pose*data_mat 37 | pose_line = np.array(pose[0:3,:]).reshape(1,12) 38 | all_pose[i+1,:] = pose_line 39 | return all_pose 40 | 41 | def pose2motion(data, skip=0): 42 | ''' 43 | data: N x 12 44 | all_motion (N-1-skip) x 12 45 | ''' 46 | data_size = data.shape[0] 47 | all_motion = np.zeros((data_size-1,12)) 48 | for i in range(0,data_size-1-skip): 49 | pose_curr = line2mat(data[i,:]) 50 | pose_next = line2mat(data[i+1+skip,:]) 51 | motion = pose_curr.I*pose_next 52 | motion_line = np.array(motion[0:3,:]).reshape(1,12) 53 | all_motion[i,:] = motion_line 54 | return all_motion 55 | 56 | def SE2se(SE_data): 57 | result = np.zeros((6)) 58 | result[0:3] = np.array(SE_data[0:3,3].T) 59 | result[3:6] = SO2so(SE_data[0:3,0:3]).T 60 | return result 61 | 62 | def SO2so(SO_data): 63 | return R.from_matrix(SO_data).as_rotvec() 64 | 65 | def so2SO(so_data): 66 | return R.from_rotvec(so_data).as_matrix() 67 | 68 | def se2SE(se_data): 69 | result_mat = np.matrix(np.eye(4)) 70 | result_mat[0:3,0:3] = so2SO(se_data[3:6]) 71 | result_mat[0:3,3] = np.matrix(se_data[0:3]).T 72 | return result_mat 73 | ### can get wrong result 74 | def se_mean(se_datas): 75 | all_SE = np.matrix(np.eye(4)) 76 | for i in range(se_datas.shape[0]): 77 | se = se_datas[i,:] 78 | SE = se2SE(se) 79 | all_SE = all_SE*SE 80 | all_se = SE2se(all_SE) 81 | mean_se = all_se/se_datas.shape[0] 82 | return mean_se 83 | 84 | def ses_mean(se_datas): 85 | se_datas = np.array(se_datas) 86 | se_datas = np.transpose(se_datas.reshape(se_datas.shape[0],se_datas.shape[1],se_datas.shape[2]*se_datas.shape[3]),(0,2,1)) 87 | se_result = np.zeros((se_datas.shape[0],se_datas.shape[2])) 88 | for i in range(0,se_datas.shape[0]): 89 | mean_se = se_mean(se_datas[i,:,:]) 90 | se_result[i,:] = mean_se 91 | return se_result 92 | 93 | def ses2poses(data): 94 | data_size = data.shape[0] 95 | all_pose = np.zeros((data_size+1,12)) 96 | temp = np.eye(4,4).reshape(1,16) 97 | all_pose[0,:] = temp[0,0:12] 98 | pose = np.matrix(np.eye(4,4)) 99 | for i in range(0,data_size): 100 | data_mat = se2SE(data[i,:]) 101 | pose = pose*data_mat 102 | pose_line = np.array(pose[0:3,:]).reshape(1,12) 103 | all_pose[i+1,:] = pose_line 104 | return all_pose 105 | 106 | def ses2poses_quat(data): 107 | ''' 108 | ses: N x 6 109 | ''' 110 | data_size = data.shape[0] 111 | all_pose_quat = np.zeros((data_size+1,7)) 112 | all_pose_quat[0,:] = np.array([0., 0., 0., 0., 0., 0., 1.]) 113 | pose = np.matrix(np.eye(4,4)) 114 | for i in range(0,data_size): 115 | data_mat = se2SE(data[i,:]) 116 | pose = pose*data_mat 117 | quat = SO2quat(pose[0:3,0:3]) 118 | all_pose_quat[i+1,:3] = np.array([pose[0,3], pose[1,3], pose[2,3]]) 119 | all_pose_quat[i+1,3:] = quat 120 | return all_pose_quat 121 | 122 | def SEs2ses(motion_data): 123 | data_size = motion_data.shape[0] 124 | ses = np.zeros((data_size,6)) 125 | for i in range(0,data_size): 126 | SE = np.matrix(np.eye(4)) 127 | SE[0:3,:] = motion_data[i,:].reshape(3,4) 128 | ses[i,:] = SE2se(SE) 129 | return ses 130 | 131 | def so2quat(so_data): 132 | so_data = np.array(so_data) 133 | theta = np.sqrt(np.sum(so_data*so_data)) 134 | axis = so_data/theta 135 | quat=np.zeros(4) 136 | quat[0:3] = np.sin(theta/2)*axis 137 | quat[3] = np.cos(theta/2) 138 | return quat 139 | 140 | def quat2so(quat_data): 141 | quat_data = np.array(quat_data) 142 | sin_half_theta = np.sqrt(np.sum(quat_data[0:3]*quat_data[0:3])) 143 | axis = quat_data[0:3]/sin_half_theta 144 | cos_half_theta = quat_data[3] 145 | theta = 2*np.arctan2(sin_half_theta,cos_half_theta) 146 | so = theta*axis 147 | return so 148 | 149 | # input so_datas batch*channel*height*width 150 | # return quat_datas batch*numner*channel 151 | def sos2quats(so_datas,mean_std=[[1],[1]]): 152 | so_datas = np.array(so_datas) 153 | so_datas = so_datas.reshape(so_datas.shape[0],so_datas.shape[1],so_datas.shape[2]*so_datas.shape[3]) 154 | so_datas = np.transpose(so_datas,(0,2,1)) 155 | quat_datas = np.zeros((so_datas.shape[0],so_datas.shape[1],4)) 156 | for i_b in range(0,so_datas.shape[0]): 157 | for i_p in range(0,so_datas.shape[1]): 158 | so_data = so_datas[i_b,i_p,:] 159 | quat_data = so2quat(so_data) 160 | quat_datas[i_b,i_p,:] = quat_data 161 | return quat_datas 162 | 163 | def SO2quat(SO_data): 164 | rr = R.from_matrix(SO_data) 165 | return rr.as_quat() 166 | 167 | def quat2SO(quat_data): 168 | return R.from_quat(quat_data).as_matrix() 169 | 170 | 171 | def pos_quat2SE(quat_data): 172 | SO = R.from_quat(quat_data[3:7]).as_matrix() 173 | SE = np.matrix(np.eye(4)) 174 | SE[0:3,0:3] = np.matrix(SO) 175 | SE[0:3,3] = np.matrix(quat_data[0:3]).T 176 | SE = np.array(SE[0:3,:]).reshape(1,12) 177 | return SE 178 | 179 | 180 | def pos_quats2SEs(quat_datas): 181 | data_len = quat_datas.shape[0] 182 | SEs = np.zeros((data_len,12)) 183 | for i_data in range(0,data_len): 184 | SE = pos_quat2SE(quat_datas[i_data,:]) 185 | SEs[i_data,:] = SE 186 | return SEs 187 | 188 | 189 | def pos_quats2SE_matrices(quat_datas): 190 | data_len = quat_datas.shape[0] 191 | SEs = [] 192 | for quat in quat_datas: 193 | SO = R.from_quat(quat[3:7]).as_matrix() 194 | SE = np.eye(4) 195 | SE[0:3,0:3] = SO 196 | SE[0:3,3] = quat[0:3] 197 | SEs.append(SE) 198 | return SEs 199 | 200 | def SE2pos_quat(SE_data): 201 | pos_quat = np.zeros(7) 202 | pos_quat[3:] = SO2quat(SE_data[0:3,0:3]) 203 | pos_quat[:3] = SE_data[0:3,3].T 204 | return pos_quat 205 | 206 | def SEs2ses(data): 207 | ''' 208 | data: N x 12 209 | ses: N x 6 210 | ''' 211 | data_size = data.shape[0] 212 | ses = np.zeros((data_size,6)) 213 | for i in range(0,data_size): 214 | ses[i,:] = SE2se(line2mat(data[i])) 215 | return ses 216 | 217 | def ses2SEs(data): 218 | ''' 219 | data: N x 6 220 | SEs: N x 12 221 | ''' 222 | data_size = data.shape[0] 223 | SEs = np.zeros((data_size,12)) 224 | for i in range(0,data_size): 225 | SEs[i,:] = mat2line(se2SE(data[i])) 226 | return SEs 227 | 228 | def SE2quat(SE_data): 229 | ''' 230 | SE_data: 4 x 4 231 | quat: 7 232 | ''' 233 | pos_quat = np.zeros(7) 234 | pos_quat[3:] = SO2quat(SE_data[0:3,0:3]) 235 | pos_quat[:3] = SE_data[0:3,3].T 236 | return pos_quat 237 | 238 | def quat2SE(quat_data): 239 | ''' 240 | quat_data: 7 241 | SE: 4 x 4 242 | ''' 243 | SO = R.from_quat(quat_data[3:7]).as_matrix() 244 | SE = np.matrix(np.eye(4)) 245 | SE[0:3,0:3] = np.matrix(SO) 246 | SE[0:3,3] = np.matrix(quat_data[0:3]).T 247 | return SE 248 | 249 | def SEs2quats(SEs_data): 250 | ''' 251 | SE_data: N x 12 252 | quat: N x 7 253 | ''' 254 | data_len = SEs_data.shape[0] 255 | all_quats = np.zeros((data_len,7)) 256 | for i in range(0,data_len): 257 | SE = line2mat(SEs_data[i]) 258 | all_quats[i] = SE2quat(SE) 259 | return all_quats 260 | 261 | def quats2SEs(quat_datas): 262 | ''' 263 | pos_quats: N x 7 264 | SEs: N x 12 265 | ''' 266 | data_len = quat_datas.shape[0] 267 | SEs = np.zeros((data_len,12)) 268 | for i_data in range(0,data_len): 269 | SE = quat2SE(quat_datas[i_data,:]) 270 | SEs[i_data,:] = mat2line(SE) 271 | return SEs 272 | 273 | def motion_ses2pose_quats(data): 274 | ''' 275 | data: N x 6 motion data 276 | poses_quat: (N+1) x 7 pose data 277 | ''' 278 | motions_SEs = ses2SEs(data) # N x 6 -> N x 12 279 | poses_SEs = motion2pose(motions_SEs) # N x 12 -> (N + 1) x 12 280 | poses_quat = SEs2quats(poses_SEs) # (N + 1) x 12 -> (N+1) x 7 281 | return poses_quat 282 | 283 | def pose_quats2motion_ses(data): 284 | ''' 285 | data: N x 7 pose list 286 | motions: (N-1-skip) x 6 se3 list 287 | ''' 288 | poses_SEs = quats2SEs(data) # N x 7 -> N x 12 289 | matrix = pose2motion(poses_SEs) # N x 12 -> (N-1-skip) x 12 290 | motions = SEs2ses(matrix).astype(np.float32) # (N-1-skip) x 12 -> (N-1-skip) x 6 291 | return motions 292 | 293 | def kitti2tartan(traj): 294 | ''' 295 | traj: in kitti style, N x 12 numpy array, in camera frame 296 | output: in TartanAir style, N x 7 numpy array, in NED frame 297 | ''' 298 | T = np.array([[0,0,1,0], 299 | [1,0,0,0], 300 | [0,1,0,0], 301 | [0,0,0,1]], dtype=np.float32) 302 | T_inv = np.linalg.inv(T) 303 | new_traj = [] 304 | 305 | for pose in traj: 306 | tt = np.eye(4) 307 | tt[:3,:] = pose.reshape(3,4) 308 | ttt=T.dot(tt).dot(T_inv) 309 | new_traj.append(SE2pos_quat(ttt)) 310 | 311 | return np.array(new_traj) 312 | 313 | def tartan2kitti(traj): 314 | T = np.array([[0,1,0,0], 315 | [0,0,1,0], 316 | [1,0,0,0], 317 | [0,0,0,1]], dtype=np.float32) 318 | T_inv = np.linalg.inv(T) 319 | new_traj = [] 320 | 321 | for pose in traj: 322 | tt = np.eye(4) 323 | tt[:3,:] = pos_quat2SE(pose).reshape(3,4) 324 | ttt=T.dot(tt).dot(T_inv) 325 | new_traj.append(ttt[:3,:].reshape(12)) 326 | 327 | return np.array(new_traj) -------------------------------------------------------------------------------- /Network/rigidmask/networks/large_hourglass.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # This code is base on 3 | # CornerNet (https://github.com/princeton-vl/CornerNet) 4 | # Copyright (c) 2018, University of Michigan 5 | # Licensed under the BSD 3-Clause License 6 | # ------------------------------------------------------------------------------ 7 | 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import torch 15 | import torch.nn as nn 16 | 17 | class convolution(nn.Module): 18 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): 19 | super(convolution, self).__init__() 20 | 21 | pad = (k - 1) // 2 22 | self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn) 23 | self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential() 24 | self.relu = nn.ReLU(inplace=True) 25 | 26 | def forward(self, x): 27 | conv = self.conv(x) 28 | bn = self.bn(conv) 29 | relu = self.relu(bn) 30 | return relu 31 | 32 | class fully_connected(nn.Module): 33 | def __init__(self, inp_dim, out_dim, with_bn=True): 34 | super(fully_connected, self).__init__() 35 | self.with_bn = with_bn 36 | 37 | self.linear = nn.Linear(inp_dim, out_dim) 38 | if self.with_bn: 39 | self.bn = nn.BatchNorm1d(out_dim) 40 | self.relu = nn.ReLU(inplace=True) 41 | 42 | def forward(self, x): 43 | linear = self.linear(x) 44 | bn = self.bn(linear) if self.with_bn else linear 45 | relu = self.relu(bn) 46 | return relu 47 | 48 | class residual(nn.Module): 49 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): 50 | super(residual, self).__init__() 51 | 52 | self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False) 53 | self.bn1 = nn.BatchNorm2d(out_dim) 54 | self.relu1 = nn.ReLU(inplace=True) 55 | 56 | self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False) 57 | self.bn2 = nn.BatchNorm2d(out_dim) 58 | 59 | self.skip = nn.Sequential( 60 | nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False), 61 | nn.BatchNorm2d(out_dim) 62 | ) if stride != 1 or inp_dim != out_dim else nn.Sequential() 63 | self.relu = nn.ReLU(inplace=True) 64 | 65 | def forward(self, x): 66 | conv1 = self.conv1(x) 67 | bn1 = self.bn1(conv1) 68 | relu1 = self.relu1(bn1) 69 | 70 | conv2 = self.conv2(relu1) 71 | bn2 = self.bn2(conv2) 72 | 73 | skip = self.skip(x) 74 | return self.relu(bn2 + skip) 75 | 76 | def make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs): 77 | layers = [layer(k, inp_dim, out_dim, **kwargs)] 78 | for _ in range(1, modules): 79 | layers.append(layer(k, out_dim, out_dim, **kwargs)) 80 | return nn.Sequential(*layers) 81 | 82 | def make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **kwargs): 83 | layers = [] 84 | for _ in range(modules - 1): 85 | layers.append(layer(k, inp_dim, inp_dim, **kwargs)) 86 | layers.append(layer(k, inp_dim, out_dim, **kwargs)) 87 | return nn.Sequential(*layers) 88 | 89 | class MergeUp(nn.Module): 90 | def forward(self, up1, up2): 91 | return up1 + up2 92 | 93 | def make_merge_layer(dim): 94 | return MergeUp() 95 | 96 | # def make_pool_layer(dim): 97 | # return nn.MaxPool2d(kernel_size=2, stride=2) 98 | 99 | def make_pool_layer(dim): 100 | return nn.Sequential() 101 | 102 | def make_unpool_layer(dim): 103 | return nn.Upsample(scale_factor=2) 104 | 105 | def make_kp_layer(cnv_dim, curr_dim, out_dim): 106 | return nn.Sequential( 107 | convolution(3, cnv_dim, curr_dim, with_bn=False), 108 | nn.Conv2d(curr_dim, out_dim, (1, 1)) 109 | ) 110 | 111 | def make_inter_layer(dim): 112 | return residual(3, dim, dim) 113 | 114 | def make_cnv_layer(inp_dim, out_dim): 115 | return convolution(3, inp_dim, out_dim) 116 | 117 | class kp_module(nn.Module): 118 | def __init__( 119 | self, n, dims, modules, layer=residual, 120 | make_up_layer=make_layer, make_low_layer=make_layer, 121 | make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr, 122 | make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer, 123 | make_merge_layer=make_merge_layer, **kwargs 124 | ): 125 | super(kp_module, self).__init__() 126 | 127 | self.n = n 128 | 129 | curr_mod = modules[0] 130 | next_mod = modules[1] 131 | 132 | curr_dim = dims[0] 133 | next_dim = dims[1] 134 | 135 | self.up1 = make_up_layer( 136 | 3, curr_dim, curr_dim, curr_mod, 137 | layer=layer, **kwargs 138 | ) 139 | self.max1 = make_pool_layer(curr_dim) 140 | self.low1 = make_hg_layer( 141 | 3, curr_dim, next_dim, curr_mod, 142 | layer=layer, **kwargs 143 | ) 144 | self.low2 = kp_module( 145 | n - 1, dims[1:], modules[1:], layer=layer, 146 | make_up_layer=make_up_layer, 147 | make_low_layer=make_low_layer, 148 | make_hg_layer=make_hg_layer, 149 | make_hg_layer_revr=make_hg_layer_revr, 150 | make_pool_layer=make_pool_layer, 151 | make_unpool_layer=make_unpool_layer, 152 | make_merge_layer=make_merge_layer, 153 | **kwargs 154 | ) if self.n > 1 else \ 155 | make_low_layer( 156 | 3, next_dim, next_dim, next_mod, 157 | layer=layer, **kwargs 158 | ) 159 | self.low3 = make_hg_layer_revr( 160 | 3, next_dim, curr_dim, curr_mod, 161 | layer=layer, **kwargs 162 | ) 163 | self.up2 = make_unpool_layer(curr_dim) 164 | 165 | self.merge = make_merge_layer(curr_dim) 166 | 167 | def forward(self, x): 168 | up1 = self.up1(x) 169 | max1 = self.max1(x) 170 | low1 = self.low1(max1) 171 | low2 = self.low2(low1) 172 | low3 = self.low3(low2) 173 | up2 = self.up2(low3) 174 | return self.merge(up1, up2) 175 | 176 | class exkp(nn.Module): 177 | def __init__( 178 | self, n, nstack, dims, modules, heads, pre=None, cnv_dim=256, 179 | make_tl_layer=None, make_br_layer=None, 180 | make_cnv_layer=make_cnv_layer, make_heat_layer=make_kp_layer, 181 | make_tag_layer=make_kp_layer, make_regr_layer=make_kp_layer, 182 | make_up_layer=make_layer, make_low_layer=make_layer, 183 | make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr, 184 | make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer, 185 | make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer, 186 | kp_layer=residual 187 | ): 188 | super(exkp, self).__init__() 189 | 190 | self.nstack = nstack 191 | self.heads = heads 192 | 193 | curr_dim = dims[0] 194 | 195 | self.pre = nn.Sequential( 196 | convolution(7, 3, 128, stride=2), 197 | residual(3, 128, 256, stride=2) 198 | ) if pre is None else pre 199 | 200 | self.kps = nn.ModuleList([ 201 | kp_module( 202 | n, dims, modules, layer=kp_layer, 203 | make_up_layer=make_up_layer, 204 | make_low_layer=make_low_layer, 205 | make_hg_layer=make_hg_layer, 206 | make_hg_layer_revr=make_hg_layer_revr, 207 | make_pool_layer=make_pool_layer, 208 | make_unpool_layer=make_unpool_layer, 209 | make_merge_layer=make_merge_layer 210 | ) for _ in range(nstack) 211 | ]) 212 | self.cnvs = nn.ModuleList([ 213 | make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack) 214 | ]) 215 | 216 | self.inters = nn.ModuleList([ 217 | make_inter_layer(curr_dim) for _ in range(nstack - 1) 218 | ]) 219 | 220 | self.inters_ = nn.ModuleList([ 221 | nn.Sequential( 222 | nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False), 223 | nn.BatchNorm2d(curr_dim) 224 | ) for _ in range(nstack - 1) 225 | ]) 226 | self.cnvs_ = nn.ModuleList([ 227 | nn.Sequential( 228 | nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False), 229 | nn.BatchNorm2d(curr_dim) 230 | ) for _ in range(nstack - 1) 231 | ]) 232 | 233 | ## keypoint heatmaps 234 | for head in heads.keys(): 235 | if 'hm' in head: 236 | module = nn.ModuleList([ 237 | make_heat_layer( 238 | cnv_dim, curr_dim, heads[head]) for _ in range(nstack) 239 | ]) 240 | self.__setattr__(head, module) 241 | for heat in self.__getattr__(head): 242 | heat[-1].bias.data.fill_(-2.19) 243 | else: 244 | module = nn.ModuleList([ 245 | make_regr_layer( 246 | cnv_dim, curr_dim, heads[head]) for _ in range(nstack) 247 | ]) 248 | self.__setattr__(head, module) 249 | 250 | 251 | self.relu = nn.ReLU(inplace=True) 252 | 253 | def forward(self, image): 254 | # print('image shape', image.shape) 255 | inter = self.pre(image) 256 | outs = [] 257 | 258 | for ind in range(self.nstack): 259 | kp_, cnv_ = self.kps[ind], self.cnvs[ind] 260 | kp = kp_(inter) 261 | cnv = cnv_(kp) 262 | 263 | out = {} 264 | for head in self.heads: 265 | layer = self.__getattr__(head)[ind] 266 | y = layer(cnv) 267 | out[head] = y 268 | 269 | outs.append(out) 270 | if ind < self.nstack - 1: 271 | inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv) 272 | inter = self.relu(inter) 273 | inter = self.inters[ind](inter) 274 | return outs 275 | 276 | 277 | def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs): 278 | layers = [layer(kernel, dim0, dim1, stride=2)] 279 | layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)] 280 | return nn.Sequential(*layers) 281 | 282 | 283 | class HourglassNet(exkp): 284 | def __init__(self, heads, num_stacks=2): 285 | n = 5 286 | dims = [256, 256, 384, 384, 384, 512] 287 | modules = [2, 2, 2, 2, 2, 4] 288 | 289 | super(HourglassNet, self).__init__( 290 | n, num_stacks, dims, modules, heads, 291 | make_tl_layer=None, 292 | make_br_layer=None, 293 | make_pool_layer=make_pool_layer, 294 | make_hg_layer=make_hg_layer, 295 | kp_layer=residual, cnv_dim=256 296 | ) 297 | 298 | def get_large_hourglass_net(num_layers, heads, head_conv): 299 | model = HourglassNet(heads, 2) 300 | return model 301 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/resnet_dcn.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Dequan Wang and Xingyi Zhou 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | import math 14 | import logging 15 | 16 | import torch 17 | import torch.nn as nn 18 | from .DCNv2.DCN.dcn_v2 import DCN 19 | import torch.utils.model_zoo as model_zoo 20 | 21 | BN_MOMENTUM = 0.1 22 | logger = logging.getLogger(__name__) 23 | 24 | model_urls = { 25 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 26 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 27 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 28 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 29 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 30 | } 31 | 32 | def conv3x3(in_planes, out_planes, stride=1): 33 | """3x3 convolution with padding""" 34 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 35 | padding=1, bias=False) 36 | 37 | 38 | class BasicBlock(nn.Module): 39 | expansion = 1 40 | 41 | def __init__(self, inplanes, planes, stride=1, downsample=None): 42 | super(BasicBlock, self).__init__() 43 | self.conv1 = conv3x3(inplanes, planes, stride) 44 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 45 | self.relu = nn.ReLU(inplace=True) 46 | self.conv2 = conv3x3(planes, planes) 47 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 48 | self.downsample = downsample 49 | self.stride = stride 50 | 51 | def forward(self, x): 52 | residual = x 53 | 54 | out = self.conv1(x) 55 | out = self.bn1(out) 56 | out = self.relu(out) 57 | 58 | out = self.conv2(out) 59 | out = self.bn2(out) 60 | 61 | if self.downsample is not None: 62 | residual = self.downsample(x) 63 | 64 | out += residual 65 | out = self.relu(out) 66 | 67 | return out 68 | 69 | 70 | class Bottleneck(nn.Module): 71 | expansion = 4 72 | 73 | def __init__(self, inplanes, planes, stride=1, downsample=None): 74 | super(Bottleneck, self).__init__() 75 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 76 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 77 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 78 | padding=1, bias=False) 79 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 80 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 81 | bias=False) 82 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 83 | momentum=BN_MOMENTUM) 84 | self.relu = nn.ReLU(inplace=True) 85 | self.downsample = downsample 86 | self.stride = stride 87 | 88 | def forward(self, x): 89 | residual = x 90 | 91 | out = self.conv1(x) 92 | out = self.bn1(out) 93 | out = self.relu(out) 94 | 95 | out = self.conv2(out) 96 | out = self.bn2(out) 97 | out = self.relu(out) 98 | 99 | out = self.conv3(out) 100 | out = self.bn3(out) 101 | 102 | if self.downsample is not None: 103 | residual = self.downsample(x) 104 | 105 | out += residual 106 | out = self.relu(out) 107 | 108 | return out 109 | 110 | def fill_up_weights(up): 111 | w = up.weight.data 112 | f = math.ceil(w.size(2) / 2) 113 | c = (2 * f - 1 - f % 2) / (2. * f) 114 | for i in range(w.size(2)): 115 | for j in range(w.size(3)): 116 | w[0, 0, i, j] = \ 117 | (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c)) 118 | for c in range(1, w.size(0)): 119 | w[c, 0, :, :] = w[0, 0, :, :] 120 | 121 | def fill_fc_weights(layers): 122 | for m in layers.modules(): 123 | if isinstance(m, nn.Conv2d): 124 | nn.init.normal_(m.weight, std=0.001) 125 | # torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu') 126 | # torch.nn.init.xavier_normal_(m.weight.data) 127 | if m.bias is not None: 128 | nn.init.constant_(m.bias, 0) 129 | 130 | class PoseResNet(nn.Module): 131 | 132 | def __init__(self, block, layers, heads, head_conv): 133 | self.inplanes = 64 134 | self.heads = heads 135 | self.deconv_with_bias = False 136 | 137 | super(PoseResNet, self).__init__() 138 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 139 | bias=False) 140 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 141 | self.relu = nn.ReLU(inplace=True) 142 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 143 | self.layer1 = self._make_layer(block, 64, layers[0]) 144 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 145 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 146 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 147 | 148 | # used for deconv layers 149 | self.deconv_layers = self._make_deconv_layer( 150 | 3, 151 | [256, 128, 64], 152 | [4, 4, 4], 153 | ) 154 | 155 | for head in self.heads: 156 | classes = self.heads[head] 157 | if head_conv > 0: 158 | fc = nn.Sequential( 159 | nn.Conv2d(64, head_conv, 160 | kernel_size=3, padding=1, bias=True), 161 | nn.ReLU(inplace=True), 162 | nn.Conv2d(head_conv, classes, 163 | kernel_size=1, stride=1, 164 | padding=0, bias=True)) 165 | if 'hm' in head: 166 | fc[-1].bias.data.fill_(-2.19) 167 | else: 168 | fill_fc_weights(fc) 169 | else: 170 | fc = nn.Conv2d(64, classes, 171 | kernel_size=1, stride=1, 172 | padding=0, bias=True) 173 | if 'hm' in head: 174 | fc.bias.data.fill_(-2.19) 175 | else: 176 | fill_fc_weights(fc) 177 | self.__setattr__(head, fc) 178 | 179 | def _make_layer(self, block, planes, blocks, stride=1): 180 | downsample = None 181 | if stride != 1 or self.inplanes != planes * block.expansion: 182 | downsample = nn.Sequential( 183 | nn.Conv2d(self.inplanes, planes * block.expansion, 184 | kernel_size=1, stride=stride, bias=False), 185 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 186 | ) 187 | 188 | layers = [] 189 | layers.append(block(self.inplanes, planes, stride, downsample)) 190 | self.inplanes = planes * block.expansion 191 | for i in range(1, blocks): 192 | layers.append(block(self.inplanes, planes)) 193 | 194 | return nn.Sequential(*layers) 195 | 196 | def _get_deconv_cfg(self, deconv_kernel, index): 197 | if deconv_kernel == 4: 198 | padding = 1 199 | output_padding = 0 200 | elif deconv_kernel == 3: 201 | padding = 1 202 | output_padding = 1 203 | elif deconv_kernel == 2: 204 | padding = 0 205 | output_padding = 0 206 | 207 | return deconv_kernel, padding, output_padding 208 | 209 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 210 | assert num_layers == len(num_filters), \ 211 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 212 | assert num_layers == len(num_kernels), \ 213 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 214 | 215 | layers = [] 216 | for i in range(num_layers): 217 | kernel, padding, output_padding = \ 218 | self._get_deconv_cfg(num_kernels[i], i) 219 | 220 | planes = num_filters[i] 221 | fc = DCN(self.inplanes, planes, 222 | kernel_size=(3,3), stride=1, 223 | padding=1, dilation=1, deformable_groups=1) 224 | # fc = nn.Conv2d(self.inplanes, planes, 225 | # kernel_size=3, stride=1, 226 | # padding=1, dilation=1, bias=False) 227 | # fill_fc_weights(fc) 228 | up = nn.ConvTranspose2d( 229 | in_channels=planes, 230 | out_channels=planes, 231 | kernel_size=kernel, 232 | stride=2, 233 | padding=padding, 234 | output_padding=output_padding, 235 | bias=self.deconv_with_bias) 236 | fill_up_weights(up) 237 | 238 | layers.append(fc) 239 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 240 | layers.append(nn.ReLU(inplace=True)) 241 | layers.append(up) 242 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 243 | layers.append(nn.ReLU(inplace=True)) 244 | self.inplanes = planes 245 | 246 | return nn.Sequential(*layers) 247 | 248 | def forward(self, x): 249 | x = self.conv1(x) 250 | x = self.bn1(x) 251 | x = self.relu(x) 252 | x = self.maxpool(x) 253 | 254 | x = self.layer1(x) 255 | x = self.layer2(x) 256 | x = self.layer3(x) 257 | x = self.layer4(x) 258 | 259 | x = self.deconv_layers(x) 260 | ret = {} 261 | for head in self.heads: 262 | ret[head] = self.__getattr__(head)(x) 263 | return [ret] 264 | 265 | def init_weights(self, num_layers): 266 | if 1: 267 | url = model_urls['resnet{}'.format(num_layers)] 268 | pretrained_state_dict = model_zoo.load_url(url) 269 | print('=> loading pretrained model {}'.format(url)) 270 | self.load_state_dict(pretrained_state_dict, strict=False) 271 | print('=> init deconv weights from normal distribution') 272 | for name, m in self.deconv_layers.named_modules(): 273 | if isinstance(m, nn.BatchNorm2d): 274 | nn.init.constant_(m.weight, 1) 275 | nn.init.constant_(m.bias, 0) 276 | 277 | 278 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), 279 | 34: (BasicBlock, [3, 4, 6, 3]), 280 | 50: (Bottleneck, [3, 4, 6, 3]), 281 | 101: (Bottleneck, [3, 4, 23, 3]), 282 | 152: (Bottleneck, [3, 8, 36, 3])} 283 | 284 | 285 | def get_pose_net(num_layers, heads, head_conv=256): 286 | block_class, layers = resnet_spec[num_layers] 287 | 288 | model = PoseResNet(block_class, layers, heads, head_conv=head_conv) 289 | model.init_weights(num_layers) 290 | return model 291 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/msra_resnet.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Xingyi Zhou 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | 14 | import torch 15 | import torch.nn as nn 16 | import torch.utils.model_zoo as model_zoo 17 | 18 | BN_MOMENTUM = 0.1 19 | 20 | model_urls = { 21 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 22 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 23 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 24 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 25 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 26 | } 27 | 28 | def conv3x3(in_planes, out_planes, stride=1): 29 | """3x3 convolution with padding""" 30 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 31 | padding=1, bias=False) 32 | 33 | 34 | class BasicBlock(nn.Module): 35 | expansion = 1 36 | 37 | def __init__(self, inplanes, planes, stride=1, downsample=None): 38 | super(BasicBlock, self).__init__() 39 | self.conv1 = conv3x3(inplanes, planes, stride) 40 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 41 | self.relu = nn.ReLU(inplace=True) 42 | self.conv2 = conv3x3(planes, planes) 43 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 44 | self.downsample = downsample 45 | self.stride = stride 46 | 47 | def forward(self, x): 48 | residual = x 49 | 50 | out = self.conv1(x) 51 | out = self.bn1(out) 52 | out = self.relu(out) 53 | 54 | out = self.conv2(out) 55 | out = self.bn2(out) 56 | 57 | if self.downsample is not None: 58 | residual = self.downsample(x) 59 | 60 | out += residual 61 | out = self.relu(out) 62 | 63 | return out 64 | 65 | 66 | class Bottleneck(nn.Module): 67 | expansion = 4 68 | 69 | def __init__(self, inplanes, planes, stride=1, downsample=None): 70 | super(Bottleneck, self).__init__() 71 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 72 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 73 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 74 | padding=1, bias=False) 75 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 76 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 77 | bias=False) 78 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 79 | momentum=BN_MOMENTUM) 80 | self.relu = nn.ReLU(inplace=True) 81 | self.downsample = downsample 82 | self.stride = stride 83 | 84 | def forward(self, x): 85 | residual = x 86 | 87 | out = self.conv1(x) 88 | out = self.bn1(out) 89 | out = self.relu(out) 90 | 91 | out = self.conv2(out) 92 | out = self.bn2(out) 93 | out = self.relu(out) 94 | 95 | out = self.conv3(out) 96 | out = self.bn3(out) 97 | 98 | if self.downsample is not None: 99 | residual = self.downsample(x) 100 | 101 | out += residual 102 | out = self.relu(out) 103 | 104 | return out 105 | 106 | 107 | class PoseResNet(nn.Module): 108 | 109 | def __init__(self, block, layers, heads, head_conv, **kwargs): 110 | self.inplanes = 64 111 | self.deconv_with_bias = False 112 | self.heads = heads 113 | 114 | super(PoseResNet, self).__init__() 115 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 116 | bias=False) 117 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 118 | self.relu = nn.ReLU(inplace=True) 119 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 120 | self.layer1 = self._make_layer(block, 64, layers[0]) 121 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 122 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 123 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 124 | 125 | # used for deconv layers 126 | self.deconv_layers = self._make_deconv_layer( 127 | 3, 128 | [256, 256, 256], 129 | [4, 4, 4], 130 | ) 131 | # self.final_layer = [] 132 | 133 | for head in sorted(self.heads): 134 | num_output = self.heads[head] 135 | if head_conv > 0: 136 | fc = nn.Sequential( 137 | nn.Conv2d(256, head_conv, 138 | kernel_size=3, padding=1, bias=True), 139 | nn.ReLU(inplace=True), 140 | nn.Conv2d(head_conv, num_output, 141 | kernel_size=1, stride=1, padding=0)) 142 | else: 143 | fc = nn.Conv2d( 144 | in_channels=256, 145 | out_channels=num_output, 146 | kernel_size=1, 147 | stride=1, 148 | padding=0 149 | ) 150 | self.__setattr__(head, fc) 151 | 152 | # self.final_layer = nn.ModuleList(self.final_layer) 153 | 154 | def _make_layer(self, block, planes, blocks, stride=1): 155 | downsample = None 156 | if stride != 1 or self.inplanes != planes * block.expansion: 157 | downsample = nn.Sequential( 158 | nn.Conv2d(self.inplanes, planes * block.expansion, 159 | kernel_size=1, stride=stride, bias=False), 160 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 161 | ) 162 | 163 | layers = [] 164 | layers.append(block(self.inplanes, planes, stride, downsample)) 165 | self.inplanes = planes * block.expansion 166 | for i in range(1, blocks): 167 | layers.append(block(self.inplanes, planes)) 168 | 169 | return nn.Sequential(*layers) 170 | 171 | def _get_deconv_cfg(self, deconv_kernel, index): 172 | if deconv_kernel == 4: 173 | padding = 1 174 | output_padding = 0 175 | elif deconv_kernel == 3: 176 | padding = 1 177 | output_padding = 1 178 | elif deconv_kernel == 2: 179 | padding = 0 180 | output_padding = 0 181 | 182 | return deconv_kernel, padding, output_padding 183 | 184 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 185 | assert num_layers == len(num_filters), \ 186 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 187 | assert num_layers == len(num_kernels), \ 188 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 189 | 190 | layers = [] 191 | for i in range(num_layers): 192 | kernel, padding, output_padding = \ 193 | self._get_deconv_cfg(num_kernels[i], i) 194 | 195 | planes = num_filters[i] 196 | layers.append( 197 | nn.ConvTranspose2d( 198 | in_channels=self.inplanes, 199 | out_channels=planes, 200 | kernel_size=kernel, 201 | stride=2, 202 | padding=padding, 203 | output_padding=output_padding, 204 | bias=self.deconv_with_bias)) 205 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 206 | layers.append(nn.ReLU(inplace=True)) 207 | self.inplanes = planes 208 | 209 | return nn.Sequential(*layers) 210 | 211 | def forward(self, x): 212 | x = self.conv1(x) 213 | x = self.bn1(x) 214 | x = self.relu(x) 215 | x = self.maxpool(x) 216 | 217 | x = self.layer1(x) 218 | x = self.layer2(x) 219 | x = self.layer3(x) 220 | x = self.layer4(x) 221 | 222 | x = self.deconv_layers(x) 223 | ret = {} 224 | for head in self.heads: 225 | ret[head] = self.__getattr__(head)(x) 226 | return [ret] 227 | 228 | def init_weights(self, num_layers, pretrained=True): 229 | if pretrained: 230 | # print('=> init resnet deconv weights from normal distribution') 231 | for _, m in self.deconv_layers.named_modules(): 232 | if isinstance(m, nn.ConvTranspose2d): 233 | # print('=> init {}.weight as normal(0, 0.001)'.format(name)) 234 | # print('=> init {}.bias as 0'.format(name)) 235 | nn.init.normal_(m.weight, std=0.001) 236 | if self.deconv_with_bias: 237 | nn.init.constant_(m.bias, 0) 238 | elif isinstance(m, nn.BatchNorm2d): 239 | # print('=> init {}.weight as 1'.format(name)) 240 | # print('=> init {}.bias as 0'.format(name)) 241 | nn.init.constant_(m.weight, 1) 242 | nn.init.constant_(m.bias, 0) 243 | # print('=> init final conv weights from normal distribution') 244 | for head in self.heads: 245 | final_layer = self.__getattr__(head) 246 | for i, m in enumerate(final_layer.modules()): 247 | if isinstance(m, nn.Conv2d): 248 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 249 | # print('=> init {}.weight as normal(0, 0.001)'.format(name)) 250 | # print('=> init {}.bias as 0'.format(name)) 251 | if m.weight.shape[0] == self.heads[head]: 252 | if 'hm' in head: 253 | nn.init.constant_(m.bias, -2.19) 254 | else: 255 | nn.init.normal_(m.weight, std=0.001) 256 | nn.init.constant_(m.bias, 0) 257 | #pretrained_state_dict = torch.load(pretrained) 258 | url = model_urls['resnet{}'.format(num_layers)] 259 | pretrained_state_dict = model_zoo.load_url(url) 260 | print('=> loading pretrained model {}'.format(url)) 261 | self.load_state_dict(pretrained_state_dict, strict=False) 262 | else: 263 | print('=> imagenet pretrained model dose not exist') 264 | print('=> please download it first') 265 | raise ValueError('imagenet pretrained model does not exist') 266 | 267 | 268 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), 269 | 34: (BasicBlock, [3, 4, 6, 3]), 270 | 50: (Bottleneck, [3, 4, 6, 3]), 271 | 101: (Bottleneck, [3, 4, 23, 3]), 272 | 152: (Bottleneck, [3, 8, 36, 3])} 273 | 274 | 275 | def get_pose_net(num_layers, heads, head_conv): 276 | block_class, layers = resnet_spec[num_layers] 277 | 278 | model = PoseResNet(block_class, layers, heads, head_conv=head_conv) 279 | model.init_weights(num_layers, pretrained=True) 280 | return model 281 | -------------------------------------------------------------------------------- /Network/PWC/PWCNet.py: -------------------------------------------------------------------------------- 1 | """ 2 | implementation of the PWC-DC network for optical flow estimation by Sun et al., 2018 3 | 4 | Jinwei Gu and Zhile Ren 5 | 6 | """ 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | import os 12 | import numpy as np 13 | from .correlation import FunctionCorrelation 14 | import cv2 # debug 15 | 16 | def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1): 17 | return nn.Sequential( 18 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, 19 | padding=padding, dilation=dilation, bias=True), 20 | nn.LeakyReLU(0.1)) 21 | 22 | def predict_flow(in_planes): 23 | return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True) 24 | 25 | def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1): 26 | return nn.ConvTranspose2d(in_planes, out_planes, kernel_size, stride, padding, bias=True) 27 | 28 | 29 | 30 | class PWCDCNet(nn.Module): 31 | """ 32 | PWC-DC net. add dilation convolution and densenet connections 33 | 34 | """ 35 | def __init__(self, md=4, flow_norm=20.0): 36 | """ 37 | input: md --- maximum displacement (for correlation. default: 4), after warpping 38 | 39 | """ 40 | super(PWCDCNet,self).__init__() 41 | 42 | self.flow_norm = flow_norm 43 | 44 | self.conv1a = conv(3, 16, kernel_size=3, stride=2) 45 | self.conv1aa = conv(16, 16, kernel_size=3, stride=1) 46 | self.conv1b = conv(16, 16, kernel_size=3, stride=1) 47 | self.conv2a = conv(16, 32, kernel_size=3, stride=2) 48 | self.conv2aa = conv(32, 32, kernel_size=3, stride=1) 49 | self.conv2b = conv(32, 32, kernel_size=3, stride=1) 50 | self.conv3a = conv(32, 64, kernel_size=3, stride=2) 51 | self.conv3aa = conv(64, 64, kernel_size=3, stride=1) 52 | self.conv3b = conv(64, 64, kernel_size=3, stride=1) 53 | self.conv4a = conv(64, 96, kernel_size=3, stride=2) 54 | self.conv4aa = conv(96, 96, kernel_size=3, stride=1) 55 | self.conv4b = conv(96, 96, kernel_size=3, stride=1) 56 | self.conv5a = conv(96, 128, kernel_size=3, stride=2) 57 | self.conv5aa = conv(128,128, kernel_size=3, stride=1) 58 | self.conv5b = conv(128,128, kernel_size=3, stride=1) 59 | self.conv6aa = conv(128,196, kernel_size=3, stride=2) 60 | self.conv6a = conv(196,196, kernel_size=3, stride=1) 61 | self.conv6b = conv(196,196, kernel_size=3, stride=1) 62 | 63 | # self.corr = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1) 64 | self.leakyRELU = nn.LeakyReLU(0.1) 65 | 66 | nd = (2*md+1)**2 67 | dd = np.cumsum([128,128,96,64,32]) 68 | 69 | od = nd 70 | self.conv6_0 = conv(od, 128, kernel_size=3, stride=1) 71 | self.conv6_1 = conv(od+dd[0],128, kernel_size=3, stride=1) 72 | self.conv6_2 = conv(od+dd[1],96, kernel_size=3, stride=1) 73 | self.conv6_3 = conv(od+dd[2],64, kernel_size=3, stride=1) 74 | self.conv6_4 = conv(od+dd[3],32, kernel_size=3, stride=1) 75 | self.predict_flow6 = predict_flow(od+dd[4]) 76 | self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 77 | self.upfeat6 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 78 | 79 | od = nd+128+4 80 | self.conv5_0 = conv(od, 128, kernel_size=3, stride=1) 81 | self.conv5_1 = conv(od+dd[0],128, kernel_size=3, stride=1) 82 | self.conv5_2 = conv(od+dd[1],96, kernel_size=3, stride=1) 83 | self.conv5_3 = conv(od+dd[2],64, kernel_size=3, stride=1) 84 | self.conv5_4 = conv(od+dd[3],32, kernel_size=3, stride=1) 85 | self.predict_flow5 = predict_flow(od+dd[4]) 86 | self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 87 | self.upfeat5 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 88 | 89 | od = nd+96+4 90 | self.conv4_0 = conv(od, 128, kernel_size=3, stride=1) 91 | self.conv4_1 = conv(od+dd[0],128, kernel_size=3, stride=1) 92 | self.conv4_2 = conv(od+dd[1],96, kernel_size=3, stride=1) 93 | self.conv4_3 = conv(od+dd[2],64, kernel_size=3, stride=1) 94 | self.conv4_4 = conv(od+dd[3],32, kernel_size=3, stride=1) 95 | self.predict_flow4 = predict_flow(od+dd[4]) 96 | self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 97 | self.upfeat4 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 98 | 99 | od = nd+64+4 100 | self.conv3_0 = conv(od, 128, kernel_size=3, stride=1) 101 | self.conv3_1 = conv(od+dd[0],128, kernel_size=3, stride=1) 102 | self.conv3_2 = conv(od+dd[1],96, kernel_size=3, stride=1) 103 | self.conv3_3 = conv(od+dd[2],64, kernel_size=3, stride=1) 104 | self.conv3_4 = conv(od+dd[3],32, kernel_size=3, stride=1) 105 | self.predict_flow3 = predict_flow(od+dd[4]) 106 | self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 107 | self.upfeat3 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1) 108 | 109 | od = nd+32+4 110 | self.conv2_0 = conv(od, 128, kernel_size=3, stride=1) 111 | self.conv2_1 = conv(od+dd[0],128, kernel_size=3, stride=1) 112 | self.conv2_2 = conv(od+dd[1],96, kernel_size=3, stride=1) 113 | self.conv2_3 = conv(od+dd[2],64, kernel_size=3, stride=1) 114 | self.conv2_4 = conv(od+dd[3],32, kernel_size=3, stride=1) 115 | self.predict_flow2 = predict_flow(od+dd[4]) 116 | self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1) 117 | 118 | self.dc_conv1 = conv(od+dd[4], 128, kernel_size=3, stride=1, padding=1, dilation=1) 119 | self.dc_conv2 = conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2) 120 | self.dc_conv3 = conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4) 121 | self.dc_conv4 = conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8) 122 | self.dc_conv5 = conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16) 123 | self.dc_conv6 = conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1) 124 | self.dc_conv7 = predict_flow(32) 125 | 126 | for m in self.modules(): 127 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 128 | nn.init.kaiming_normal(m.weight.data, mode='fan_in') 129 | if m.bias is not None: 130 | m.bias.data.zero_() 131 | 132 | 133 | def warp(self, x, flo): 134 | """ 135 | warp an image/tensor (im2) back to im1, according to the optical flow 136 | 137 | x: [B, C, H, W] (im2) 138 | flo: [B, 2, H, W] flow 139 | 140 | """ 141 | B, C, H, W = x.size() 142 | # mesh grid 143 | xx = torch.arange(0, W).view(1,-1).repeat(H,1) 144 | yy = torch.arange(0, H).view(-1,1).repeat(1,W) 145 | xx = xx.view(1,1,H,W).repeat(B,1,1,1) 146 | yy = yy.view(1,1,H,W).repeat(B,1,1,1) 147 | grid = torch.cat((xx,yy),1).float() 148 | 149 | if x.is_cuda: 150 | grid = grid.cuda() 151 | vgrid = grid + flo 152 | 153 | # scale grid to [-1,1] 154 | vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:].clone() / max(W-1,1)-1.0 155 | vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:].clone() / max(H-1,1)-1.0 156 | 157 | vgrid = vgrid.permute(0,2,3,1) 158 | output = nn.functional.grid_sample(x, vgrid, align_corners=True) 159 | mask = torch.ones(x.size()).cuda() 160 | mask = nn.functional.grid_sample(mask, vgrid, align_corners=True) 161 | 162 | # if W==128: 163 | # np.save('mask.npy', mask.cpu().data.numpy()) 164 | # np.save('warp.npy', output.cpu().data.numpy()) 165 | 166 | mask[mask<0.9999] = 0 167 | mask[mask>0] = 1 168 | 169 | return output*mask 170 | 171 | def multi_scale_conv(self, conv0_func, conv1_func, conv2_func, conv3_func, conv4_func, input_feat): 172 | x = torch.cat((conv0_func(input_feat), input_feat),1) 173 | x = torch.cat((conv1_func(x), x),1) 174 | x = torch.cat((conv2_func(x), x),1) 175 | x = torch.cat((conv3_func(x), x),1) 176 | x = torch.cat((conv4_func(x), x),1) 177 | return x 178 | 179 | def concate_two_layers(self, pred_func, decon_func, upfeat_func, feat_high, feat_low1, feat_low2, scale): 180 | flow_high = pred_func(feat_high) 181 | up_flow_high = decon_func(flow_high) 182 | up_feat_high = upfeat_func(feat_high) 183 | 184 | warp_feat = self.warp(feat_low2, up_flow_high*scale) 185 | corr_low = FunctionCorrelation(tenFirst=feat_low1, tenSecond=warp_feat) 186 | corr_low = self.leakyRELU(corr_low) 187 | x = torch.cat((corr_low, feat_low1, up_flow_high, up_feat_high), 1) 188 | 189 | return x, flow_high 190 | 191 | def forward(self,x): 192 | im1 = x[:,0:3,...] 193 | im2 = x[:,3:6,...] 194 | 195 | c11 = self.conv1b(self.conv1aa(self.conv1a(im1))) 196 | c21 = self.conv1b(self.conv1aa(self.conv1a(im2))) 197 | c12 = self.conv2b(self.conv2aa(self.conv2a(c11))) 198 | c22 = self.conv2b(self.conv2aa(self.conv2a(c21))) 199 | c13 = self.conv3b(self.conv3aa(self.conv3a(c12))) 200 | c23 = self.conv3b(self.conv3aa(self.conv3a(c22))) 201 | c14 = self.conv4b(self.conv4aa(self.conv4a(c13))) 202 | c24 = self.conv4b(self.conv4aa(self.conv4a(c23))) 203 | c15 = self.conv5b(self.conv5aa(self.conv5a(c14))) 204 | c25 = self.conv5b(self.conv5aa(self.conv5a(c24))) 205 | c16 = self.conv6b(self.conv6a(self.conv6aa(c15))) 206 | c26 = self.conv6b(self.conv6a(self.conv6aa(c25))) 207 | 208 | 209 | # corr6 = self.corr(c16, c26) 210 | corr6 = FunctionCorrelation(tenFirst=c16, tenSecond=c26) 211 | corr6 = self.leakyRELU(corr6) 212 | 213 | x = self.multi_scale_conv(self.conv6_0, self.conv6_1, self.conv6_2, self.conv6_3, self.conv6_4, corr6) 214 | x, flow6 = self.concate_two_layers(self.predict_flow6, self.deconv6, self.upfeat6, x, c15, c25, 0.625) 215 | 216 | x = self.multi_scale_conv(self.conv5_0, self.conv5_1, self.conv5_2, self.conv5_3, self.conv5_4, x) 217 | x, flow5 = self.concate_two_layers(self.predict_flow5, self.deconv5, self.upfeat5, x, c14, c24, 1.25) 218 | 219 | x = self.multi_scale_conv(self.conv4_0, self.conv4_1, self.conv4_2, self.conv4_3, self.conv4_4, x) 220 | x, flow4 = self.concate_two_layers(self.predict_flow4, self.deconv4, self.upfeat4, x, c13, c23, 2.5) 221 | 222 | x = self.multi_scale_conv(self.conv3_0, self.conv3_1, self.conv3_2, self.conv3_3, self.conv3_4, x) 223 | x, flow3 = self.concate_two_layers(self.predict_flow3, self.deconv3, self.upfeat3, x, c12, c22, 5.0) 224 | 225 | x = self.multi_scale_conv(self.conv2_0, self.conv2_1, self.conv2_2, self.conv2_3, self.conv2_4, x) 226 | 227 | flow2 = self.predict_flow2(x) 228 | 229 | x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x)))) 230 | refine = self.dc_conv7(self.dc_conv6(self.dc_conv5(x))) 231 | flow2 = flow2 + refine 232 | 233 | return flow2 234 | 235 | 236 | def pwc_dc_net(path=None): 237 | 238 | model = PWCDCNet() 239 | if path is not None: 240 | data = torch.load(path) 241 | if 'state_dict' in data.keys(): 242 | model.load_state_dict(data['state_dict']) 243 | else: 244 | model.load_state_dict(data) 245 | return model 246 | 247 | 248 | 249 | 250 | -------------------------------------------------------------------------------- /Network/rigidmask/networks/DCNv2/DCN/src/cpu/dcn_v2_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "cpu/dcn_v2_im2col_cpu.h" 3 | #include 4 | 5 | #include 6 | //#include 7 | 8 | #include 9 | //#include 10 | //#include 11 | 12 | //extern THCState *state; 13 | 14 | // author: Charles Shang 15 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 16 | 17 | // modified from the CUDA version for CPU use by Daniel K. Suhendro 18 | 19 | // edit by: James Bockman and Matthew Howe 20 | // modified for torch implementation to remove use of deprecated torch access to Blas 21 | 22 | at::Tensor 23 | dcn_v2_cpu_forward(const at::Tensor &input, 24 | const at::Tensor &weight, 25 | const at::Tensor &bias, 26 | const at::Tensor &offset, 27 | const at::Tensor &mask, 28 | const int kernel_h, 29 | const int kernel_w, 30 | const int stride_h, 31 | const int stride_w, 32 | const int pad_h, 33 | const int pad_w, 34 | const int dilation_h, 35 | const int dilation_w, 36 | const int deformable_group) 37 | { 38 | // THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, weight, bias, offset, mask)); 39 | /*AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); 40 | AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor"); 41 | AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor"); 42 | AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor"); 43 | AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");*/ 44 | 45 | const int batch = input.size(0); 46 | const int channels = input.size(1); 47 | const int height = input.size(2); 48 | const int width = input.size(3); 49 | 50 | const int channels_out = weight.size(0); 51 | const int channels_kernel = weight.size(1); 52 | const int kernel_h_ = weight.size(2); 53 | const int kernel_w_ = weight.size(3); 54 | 55 | // printf("Kernels: %d %d %d %d\n", kernel_h_, kernel_w_, kernel_w, kernel_h); 56 | // printf("Channels: %d %d\n", channels, channels_kernel); 57 | // printf("Channels: %d %d\n", channels_out, channels_kernel); 58 | 59 | AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w, 60 | "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_); 61 | 62 | AT_ASSERTM(channels == channels_kernel, 63 | "Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel); 64 | 65 | const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; 66 | const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; 67 | 68 | // auto ones = at::ones({height_out, width_out}, input.options()); 69 | auto ones = at::ones({bias.sizes()[0], height_out, width_out}, input.options()); 70 | auto columns = at::empty({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options()); 71 | auto output = at::zeros({batch, channels_out, height_out, width_out}, input.options()); 72 | 73 | using scalar_t = float; 74 | for (int b = 0; b < batch; b++) 75 | { 76 | auto input_n = input.select(0, b); 77 | auto offset_n = offset.select(0, b); 78 | auto mask_n = mask.select(0, b); 79 | auto output_n = output.select(0, b); 80 | // std::cout << "output_n: " << output_n << "output.select(0,b): " << output.select(0,b) << "\n"; 81 | 82 | // Do Bias first: 83 | // M,N,K are dims of matrix A and B 84 | // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) 85 | // (N x 1) (1 x M) 86 | 87 | // torch implementation 88 | auto ones_T = at::transpose(ones.contiguous(), 2, 0); 89 | ones_T = at::mul(ones_T, bias.contiguous()); 90 | ones_T = at::transpose(ones_T, 2, 0); 91 | output_n = at::add(output_n, ones_T); 92 | 93 | modulated_deformable_im2col_cpu(input_n.data_ptr(), 94 | offset_n.data_ptr(), 95 | mask_n.data_ptr(), 96 | 1, channels, height, width, 97 | height_out, width_out, kernel_h, kernel_w, 98 | pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, 99 | deformable_group, 100 | columns.data_ptr()); 101 | 102 | //(k * m) x (m * n) 103 | // Y = WC 104 | 105 | // torch implementation 106 | auto weight_flat = weight.view({channels_out, channels * kernel_h * kernel_w}); 107 | auto product = at::matmul(weight_flat, columns); 108 | output.select(0, b) = at::add(output_n, product.view({channels_out, height_out, width_out})); 109 | } 110 | return output; 111 | } 112 | 113 | std::vector dcn_v2_cpu_backward(const at::Tensor &input, 114 | const at::Tensor &weight, 115 | const at::Tensor &bias, 116 | const at::Tensor &offset, 117 | const at::Tensor &mask, 118 | const at::Tensor &grad_output, 119 | int kernel_h, int kernel_w, 120 | int stride_h, int stride_w, 121 | int pad_h, int pad_w, 122 | int dilation_h, int dilation_w, 123 | int deformable_group) 124 | { 125 | 126 | THArgCheck(input.is_contiguous(), 1, "input tensor has to be contiguous"); 127 | THArgCheck(weight.is_contiguous(), 2, "weight tensor has to be contiguous"); 128 | 129 | /*AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); 130 | AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor"); 131 | AT_ASSERTM(bias.type().is_cuda(), "bias must be a CUDA tensor"); 132 | AT_ASSERTM(offset.type().is_cuda(), "offset must be a CUDA tensor"); 133 | AT_ASSERTM(mask.type().is_cuda(), "mask must be a CUDA tensor");*/ 134 | 135 | const int batch = input.size(0); 136 | const int channels = input.size(1); 137 | const int height = input.size(2); 138 | const int width = input.size(3); 139 | 140 | const int channels_out = weight.size(0); 141 | const int channels_kernel = weight.size(1); 142 | const int kernel_h_ = weight.size(2); 143 | const int kernel_w_ = weight.size(3); 144 | 145 | AT_ASSERTM(kernel_h_ == kernel_h && kernel_w_ == kernel_w, 146 | "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", kernel_h_, kernel_w, kernel_h_, kernel_w_); 147 | 148 | AT_ASSERTM(channels == channels_kernel, 149 | "Input shape and kernel channels wont match: (%d vs %d).", channels, channels_kernel); 150 | 151 | const int height_out = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; 152 | const int width_out = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; 153 | 154 | auto ones = at::ones({height_out, width_out}, input.options()); 155 | auto columns = at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out}, input.options()); 156 | auto output = at::empty({batch, channels_out, height_out, width_out}, input.options()); 157 | 158 | auto grad_input = at::zeros_like(input); 159 | auto grad_weight = at::zeros_like(weight); 160 | auto grad_bias = at::zeros_like(bias); 161 | auto grad_offset = at::zeros_like(offset); 162 | auto grad_mask = at::zeros_like(mask); 163 | 164 | using scalar_t = float; 165 | 166 | for (int b = 0; b < batch; b++) 167 | { 168 | auto input_n = input.select(0, b); 169 | auto offset_n = offset.select(0, b); 170 | auto mask_n = mask.select(0, b); 171 | auto grad_output_n = grad_output.select(0, b); 172 | auto grad_input_n = grad_input.select(0, b); 173 | auto grad_offset_n = grad_offset.select(0, b); 174 | auto grad_mask_n = grad_mask.select(0, b); 175 | 176 | 177 | 178 | // Torch implementation 179 | auto weight_flat = weight.view({channels_out, channels*kernel_h*kernel_w}); 180 | weight_flat = at::transpose(weight_flat, 1, 0); 181 | auto grad_output_n_flat = grad_output_n.view({channels_out, height_out*width_out}); 182 | columns = at::matmul(weight_flat, grad_output_n_flat); 183 | 184 | // gradient w.r.t. input coordinate data 185 | modulated_deformable_col2im_coord_cpu(columns.data_ptr(), 186 | input_n.data_ptr(), 187 | offset_n.data_ptr(), 188 | mask_n.data_ptr(), 189 | 1, channels, height, width, 190 | height_out, width_out, kernel_h, kernel_w, 191 | pad_h, pad_w, stride_h, stride_w, 192 | dilation_h, dilation_w, deformable_group, 193 | grad_offset_n.data_ptr(), 194 | grad_mask_n.data_ptr()); 195 | // gradient w.r.t. input data 196 | modulated_deformable_col2im_cpu(columns.data_ptr(), 197 | offset_n.data_ptr(), 198 | mask_n.data_ptr(), 199 | 1, channels, height, width, 200 | height_out, width_out, kernel_h, kernel_w, 201 | pad_h, pad_w, stride_h, stride_w, 202 | dilation_h, dilation_w, deformable_group, 203 | grad_input_n.data_ptr()); 204 | 205 | // gradient w.r.t. weight, dWeight should accumulate across the batch and group 206 | modulated_deformable_im2col_cpu(input_n.data_ptr(), 207 | offset_n.data_ptr(), 208 | mask_n.data_ptr(), 209 | 1, channels, height, width, 210 | height_out, width_out, kernel_h, kernel_w, 211 | pad_h, pad_w, stride_h, stride_w, 212 | dilation_h, dilation_w, deformable_group, 213 | columns.data_ptr()); 214 | 215 | // Torch implementation 216 | auto product = at::matmul(grad_output_n_flat, at::transpose(columns, 1, 0)); 217 | grad_weight = at::add(grad_weight, product.view({channels_out, channels, kernel_h, kernel_w})); 218 | 219 | 220 | // Torch implementation 221 | auto ones_flat = ones.view({height_out*width_out}); 222 | product = at::matmul(grad_output_n_flat, ones_flat); 223 | grad_bias = at::add(grad_bias, product); 224 | } 225 | 226 | return { 227 | grad_input, grad_offset, grad_mask, grad_weight, grad_bias 228 | }; 229 | } 230 | -------------------------------------------------------------------------------- /Datasets/segmask_gt.py: -------------------------------------------------------------------------------- 1 | """ 2 | # ============================== 3 | # segmask_gt.py 4 | # library to generate groundtruth 5 | # segmentation mask given flow and 6 | # disparity change 7 | # (Adapted from code for rigidmask) 8 | # Author: Shihao Shen 9 | # Date: 14th Sep 2022 10 | # ============================== 11 | """ 12 | 13 | import argparse 14 | import os 15 | import os.path 16 | import glob 17 | import numpy as np 18 | import cv2 19 | from PIL import Image 20 | from flowlib import read_flow, readPFM, flow_to_image 21 | 22 | def dataloader(filepath, fpass='frames_cleanpass', level=6): 23 | iml0 = [] 24 | iml1 = [] 25 | flowl0 = [] 26 | disp0 = [] 27 | dispc = [] 28 | calib = [] 29 | level_stars = '/*'*level 30 | candidate_pool = glob.glob('%s/optical_flow%s'%(filepath,level_stars)) 31 | for flow_path in sorted(candidate_pool): 32 | # if 'TEST' in flow_path: continue 33 | if 'flower_storm_x2/into_future/right/OpticalFlowIntoFuture_0023_R.pfm' in flow_path: 34 | print('Skipping %s' % flow_path) 35 | continue 36 | if 'flower_storm_x2/into_future/left/OpticalFlowIntoFuture_0023_L.pfm' in flow_path: 37 | print('Skipping %s' % flow_path) 38 | continue 39 | if 'flower_storm_augmented0_x2/into_future/right/OpticalFlowIntoFuture_0023_R.pfm' in flow_path: 40 | print('Skipping %s' % flow_path) 41 | continue 42 | if 'flower_storm_augmented0_x2/into_future/left/OpticalFlowIntoFuture_0023_L.pfm' in flow_path: 43 | print('Skipping %s' % flow_path) 44 | continue 45 | # if 'FlyingThings' in flow_path and '_0014_' in flow_path: 46 | # print('Skipping %s' % flow_path) 47 | # continue 48 | # if 'FlyingThings' in flow_path and '_0015_' in flow_path: 49 | # print('Skipping %s' % flow_path) 50 | # continue 51 | idd = flow_path.split('/')[-1].split('_')[-2] 52 | if 'into_future' in flow_path: 53 | idd_p1 = '%04d'%(int(idd)+1) 54 | else: 55 | idd_p1 = '%04d'%(int(idd)-1) 56 | if os.path.exists(flow_path.replace(idd,idd_p1)): 57 | d0_path = flow_path.replace('/into_future/','/').replace('/into_past/','/').replace('optical_flow','disparity') 58 | d0_path = '%s/%s.pfm'%(d0_path.rsplit('/',1)[0],idd) 59 | dc_path = flow_path.replace('optical_flow','disparity_change') 60 | dc_path = '%s/%s.pfm'%(dc_path.rsplit('/',1)[0],idd) 61 | im_path = flow_path.replace('/into_future/','/').replace('/into_past/','/').replace('optical_flow',fpass) 62 | im0_path = '%s/%s.png'%(im_path.rsplit('/',1)[0],idd) 63 | im1_path = '%s/%s.png'%(im_path.rsplit('/',1)[0],idd_p1) 64 | 65 | # This will skip any sequence that contains less than 10 poses in camera_data.txt 66 | with open('%s/camera_data.txt'%(im0_path.replace(fpass,'camera_data').rsplit('/',2)[0]),'r') as f: 67 | if 'FlyingThings' in flow_path and len(f.readlines())!=40: 68 | print('Skipping %s' % flow_path) 69 | continue 70 | 71 | iml0.append(im0_path) 72 | iml1.append(im1_path) 73 | flowl0.append(flow_path) 74 | disp0.append(d0_path) 75 | dispc.append(dc_path) 76 | calib.append('%s/camera_data.txt'%(im0_path.replace(fpass,'camera_data').rsplit('/',2)[0])) 77 | return iml0, iml1, flowl0, disp0, dispc, calib 78 | 79 | def default_loader(path): 80 | return Image.open(path).convert('RGB') 81 | 82 | def flow_loader(path): 83 | if '.pfm' in path: 84 | data = readPFM(path)[0] 85 | data[:,:,2] = 1 86 | return data 87 | else: 88 | return read_flow(path) 89 | 90 | def load_exts(cam_file): 91 | with open(cam_file, 'r') as f: 92 | lines = f.readlines() 93 | 94 | l_exts = [] 95 | r_exts = [] 96 | for l in lines: 97 | if 'L ' in l: 98 | l_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4)) 99 | if 'R ' in l: 100 | r_exts.append(np.asarray([float(i) for i in l[2:].strip().split(' ')]).reshape(4,4)) 101 | return l_exts,r_exts 102 | 103 | def disparity_loader(path): 104 | if '.png' in path: 105 | data = Image.open(path) 106 | data = np.ascontiguousarray(data,dtype=np.float32)/256 107 | return data 108 | else: 109 | return readPFM(path)[0] 110 | 111 | # triangulation 112 | def triangulation(disp, xcoord, ycoord, bl=1, fl = 450, cx = 479.5, cy = 269.5): 113 | depth = bl*fl / disp # 450px->15mm focal length 114 | X = (xcoord - cx) * depth / fl 115 | Y = (ycoord - cy) * depth / fl 116 | Z = depth 117 | P = np.concatenate((X[np.newaxis],Y[np.newaxis],Z[np.newaxis]),0).reshape(3,-1) 118 | P = np.concatenate((P,np.ones((1,P.shape[-1]))),0) 119 | return P 120 | 121 | def exp_loader(index, iml0s, iml1s, flowl0s, disp0s=None, dispcs=None, calibs=None): 122 | ''' 123 | index: index of the frame in the file lists below 124 | iml0s: a file list of the first frames 125 | iml1s: a file list of the second frames 126 | flowl0s: a file list of the optical w.r.t. iml0s 127 | disp0s: a file list of the disparity w.r.t. iml0s 128 | dispcs: a file list of the disparity change w.r.t. disp0s 129 | calibs: a file list of the camera extrinsics 130 | ''' 131 | iml0 = iml0s[index] 132 | iml1 = iml1s[index] 133 | flowl0 = flowl0s[index] 134 | 135 | iml0 = default_loader(iml0) 136 | iml1 = default_loader(iml1) 137 | 138 | flowl0 = flow_loader(flowl0) 139 | flowl0[:,:,-1][flowl0[:,:,0] == np.inf] = 0 140 | flowl0[:,:,0][~flowl0[:,:,2].astype(bool)] = 0 141 | flowl0[:,:,1][~flowl0[:,:,2].astype(bool)] = 0 142 | flowl0 = np.ascontiguousarray(flowl0, dtype=np.float32) 143 | flowl0[np.isnan(flowl0)] = 1e6 144 | 145 | bl = 1 146 | if '15mm_' in calibs[index]: 147 | fl = 450 148 | else: 149 | fl = 1050 150 | cx = 479.5 151 | cy = 269.5 152 | intr = [[fl],[cx],[cy],[bl],[1],[0],[0],[1],[0],[0]] 153 | 154 | d1 = np.abs(disparity_loader(disp0s[index])) 155 | d2 = np.abs(disparity_loader(dispcs[index]) + d1) 156 | 157 | flowl0[:,:,2] = np.logical_and(np.logical_and(flowl0[:,:,2] == 1, d1 != 0), d2 != 0).astype(float) 158 | 159 | shape = d1.shape 160 | mesh = np.meshgrid(range(shape[1]), range(shape[0])) 161 | xcoord = mesh[0].astype(float) 162 | ycoord = mesh[1].astype(float) 163 | 164 | # triangulation in two frames 165 | P0 = triangulation(d1, xcoord, ycoord, bl=bl, fl=fl, cx=cx, cy=cy) 166 | P1 = triangulation(d2, xcoord + flowl0[:,:,0], ycoord + flowl0[:,:,1], bl=bl, fl=fl, cx=cx, cy=cy) 167 | depth0 = P0[2] 168 | depth1 = P1[2] 169 | 170 | depth0 = depth0.reshape(shape).astype(np.float32) 171 | flow3d = (P1-P0)[:3].reshape((3,)+shape).transpose((1,2,0)) 172 | 173 | fid = int(flowl0s[index].split('/')[-1].split('_')[1]) 174 | with open(calibs[index], 'r') as f: 175 | fid = fid - int(f.readline().split(' ')[-1]) 176 | l_exts, r_exts= load_exts(calibs[index]) 177 | if '/right/' in iml0s[index]: 178 | exts = r_exts 179 | else: 180 | exts = l_exts 181 | 182 | if '/into_future/' in flowl0s[index]: 183 | if (fid + 1) > len(exts) - 1: print(flowl0s[index]) 184 | if (fid) > len(exts) - 1: print(flowl0s[index]) 185 | ext1 = exts[fid+1] 186 | ext0 = exts[fid] 187 | else: 188 | if (fid - 1) > len(exts) - 1: print(flowl0s[index]) 189 | if (fid) > len(exts) - 1: print(flowl0s[index]) 190 | ext1 = exts[fid-1] 191 | ext0 = exts[fid] 192 | camT = np.eye(4); camT[1,1] = -1; camT[2,2] = -1 # Sceneflow uses Blender's coordinate system 193 | RT01 = camT.dot(np.linalg.inv(ext0)).dot(ext1).dot(camT) # ext is from camera space to world space 194 | 195 | rect_flow3d = (RT01[:3,:3].dot(P1[:3])-P0[:3]).reshape((3,)+shape).transpose((1,2,0)) # rectified scene flow 196 | 197 | depthflow = np.concatenate((depth0[:,:,np.newaxis], rect_flow3d, flow3d), 2) 198 | RT01 = np.concatenate((cv2.Rodrigues(RT01[:3,:3])[0][:,0], RT01[:3,-1])).astype(np.float32) 199 | 200 | # object mask 201 | fnum = int(iml0s[index].split('/')[-1].split('.png')[0]) 202 | obj_fname = '%s/%04d.pfm'%(flowl0s[index].replace('/optical_flow','object_index').replace('into_past/','/').replace('into_future/','/').rsplit('/',1)[0],fnum) 203 | obj_idx = disparity_loader(obj_fname) 204 | 205 | depthflow = np.concatenate((depthflow, obj_idx[:,:,np.newaxis]), 2) 206 | # depthflow dimension: H x W x 8 (depth=1 + rectified_flow3d=3 + flow3d=3 + object_segmentation=1) 207 | 208 | iml1 = np.asarray(iml1) 209 | iml0 = np.asarray(iml0) 210 | 211 | return iml0, iml1, flowl0, depthflow, intr, RT01 212 | 213 | 214 | def motionmask(flowl0, depthflow, RT01): 215 | ''' 216 | flowl0: optical flow. [H, W, 3] 217 | depthflow: a concatenation of depth, rectified scene flow, scene flow, and object segmentation. [H, W, 8] 218 | RT01: camera motion from the future frame to the current frame. [6, ] 219 | ''' 220 | valid_mask = (flowl0[:,:,2] == 1) & (depthflow[:,:,0] < 100) & (depthflow[:,:,0] > 0.01) # valid flow & valid depth 221 | Tglobal_gt = -RT01[3:, np.newaxis, np.newaxis] # background translation 222 | Tlocal_gt = depthflow[:,:,1:4].transpose(2, 0, 1) # point translation (after removing rotation) 223 | m3d_gt = np.linalg.norm(Tlocal_gt - Tglobal_gt, 2, 0) # abs. motion 224 | fgmask_gt = m3d_gt * 100 > 1 225 | fgmask_gt[~valid_mask] = False 226 | 227 | return fgmask_gt 228 | 229 | 230 | if __name__ == '__main__': 231 | parser = argparse.ArgumentParser(description='segmask_gt_generation') 232 | parser.add_argument('--database', 233 | help='path to the database (required)') 234 | parser.add_argument('--debug', action='store_true', default=False, 235 | help='generate visualization') 236 | parser.add_argument('--frames_pass', default='frames_cleanpass', 237 | help='which pass to use, either clean or final') 238 | parser.add_argument('--dataset', 239 | help='choose from FlyingThings3D, Driving, Monkaa') 240 | args = parser.parse_args() 241 | 242 | if args.debug: 243 | os.makedirs('%s/%s/results_viz' % (args.database, args.dataset), exist_ok=True) 244 | 245 | if args.dataset == 'Monkaa': 246 | level = 4 247 | else: 248 | level = 6 249 | iml0s, iml1s, flowl0s, disp0s, dispcs, calibs = dataloader('%s/%s/' % (args.database, args.dataset), 250 | level=level, fpass=args.frames_pass) 251 | 252 | print("Generating %s masks..." % len(flowl0s)) 253 | for i in range(len(iml0s)): 254 | idd = flowl0s[i].split('/')[-1].split('_')[-2] 255 | mask_fn = '%s/%s.npy' % (os.path.dirname(flowl0s[i]).replace('optical_flow', 'rigidmask'), idd) 256 | if os.path.exists(mask_fn): 257 | print(i) 258 | continue 259 | os.makedirs(os.path.dirname(mask_fn), exist_ok=True) 260 | 261 | iml0, iml1, flowl0, depthflow, intr, RT01 = exp_loader(i, iml0s, iml1s, flowl0s, disp0s, dispcs, calibs) 262 | fgmask = motionmask(flowl0, depthflow, RT01) 263 | np.save(mask_fn, fgmask) 264 | 265 | if args.debug: 266 | if args.dataset == 'Driving' and 'rigidmask/15mm_focallength/scene_forwards/fast/left' not in mask_fn: 267 | continue 268 | elif args.dataset == 'Monkaa' and 'rigidmask/eating_camera2_x2/left' not in mask_fn: 269 | continue 270 | elif args.dataset == 'FlyingThings3D' and not ('rigidmask/TEST/A' in mask_fn and 'into_future/left' in mask_fn): 271 | continue 272 | print("Visualizing %s" % mask_fn) 273 | flowl0viz = flow_to_image(flowl0) 274 | maskviz = np.stack((fgmask * 255.0, )*3, axis=-1).astype(np.uint8) 275 | inputs = np.concatenate([iml0, flowl0viz, maskviz], axis=1) 276 | cv2.imwrite('%s/%s/results_viz/%s.png' % (args.database, args.dataset, str(i).zfill(5)), cv2.cvtColor(inputs, cv2.COLOR_RGB2BGR)) --------------------------------------------------------------------------------