├── .autoenv.zsh ├── .autoenv_leave.zsh ├── .gitignore ├── .make ├── install.sh └── install_anaconda3.sh ├── LICENSE ├── Makefile ├── README.md ├── checkpoint ├── checkpoint_epoch29.pth.tar └── combine_icp_checkpoint_epoch40.pth.tar ├── code ├── .gitignore ├── Logger.py ├── __init__.py ├── config.py ├── convergence_basin.py ├── data │ ├── MovingObj3D.py │ ├── ScanNet.py │ ├── SimpleLoader.py │ ├── TUM_RGBD.py │ ├── VaryLighting.py │ ├── data_examples │ │ └── TUM │ │ │ ├── color │ │ │ ├── 1305031790.645155.png │ │ │ ├── 1305031790.713097.png │ │ │ ├── 1305031790.781258.png │ │ │ ├── 1305031790.845151.png │ │ │ └── 1305031790.913129.png │ │ │ └── depth │ │ │ ├── 1305031790.640468.png │ │ │ ├── 1305031790.709421.png │ │ │ ├── 1305031790.773548.png │ │ │ ├── 1305031790.839363.png │ │ │ └── 1305031790.909436.png │ └── dataloader.py ├── evaluate.py ├── experiments │ ├── __init__.py │ ├── kf_vo.py │ ├── select_method.py │ └── warping_objects.py ├── models │ ├── LeastSquareTracking.py │ ├── __init__.py │ ├── algorithms.py │ ├── criterions.py │ ├── geometry.py │ └── submodules.py ├── run_example.py ├── test.py ├── timers.py ├── tools │ ├── ICP.py │ ├── __init__.py │ ├── display.py │ └── rgbd_odometry.py ├── train.py └── train_utils.py ├── environment.yml ├── environment30X.yml ├── scripts ├── eval_tum_feature_icp.sh ├── eval_tum_rgbd.sh ├── run_kf_vo.sh ├── run_kf_vo_cb.sh ├── train_moving_objs3d.sh ├── train_tum_feature_icp.sh └── train_tum_rgbd.sh └── setup ├── datasets.yaml └── environment.yml /.autoenv.zsh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zsh 2 | 3 | setup(){ 4 | source $HERE/.anaconda3/bin/activate 5 | path_prepend $HERE/.anaconda3/bin 6 | } 7 | HERE=${0:a:h} 8 | setup -------------------------------------------------------------------------------- /.autoenv_leave.zsh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zsh 2 | conda deactivate 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | logs/ 3 | test_results/ 4 | -------------------------------------------------------------------------------- /.make/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | echo_bold () { 6 | echo -e "\033[1m$*\033[0m" 7 | } 8 | 9 | echo_warning () { 10 | echo -e "\033[33m$*\033[0m" 11 | } 12 | 13 | conda_check_installed () { 14 | if [ ! $# -eq 1 ]; then 15 | echo "usage: $0 PACKAGE_NAME" 16 | return 1 17 | fi 18 | conda list | awk '{print $1}' | egrep "^$1$" &>/dev/null 19 | } 20 | 21 | HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 22 | ROOT=$HERE/.. 23 | 24 | cd $ROOT 25 | 26 | source .anaconda3/bin/activate 27 | 28 | # --------------------------------------------------------------------------------------- 29 | 30 | echo_bold "==> Installing the right pip and dependencies for the fresh python" 31 | pip install --upgrade pip 32 | conda install python=3.6 # meet tensorflow requirements 33 | conda install ipython 34 | 35 | #echo_bold "==> Installing computer vision-related packages" 36 | #pip install \ 37 | # jupyter \ 38 | # cython\ 39 | # numpy\ 40 | # matplotlib\ 41 | # opencv-python \ 42 | # opencv-contrib-python \ 43 | # plyfile \ 44 | # pandas \ 45 | # requests \ 46 | # scipy \ 47 | # imageio \ 48 | # scikit-image \ 49 | # sklearn \ 50 | # pyyaml \ 51 | # tqdm \ 52 | # transforms3d \ 53 | # 54 | #echo_bold "==> Installing deep learning-related packages" 55 | #pip install future 56 | #conda install pytorch torchvision cudatoolkit=9.2 -c pytorch 57 | #pip install tensorboard 58 | 59 | echo_bold "==> Installing requirements" 60 | # pip install -r setup/requirements.txt 61 | conda env update --file environment30X.yml 62 | # pip install -e . 63 | 64 | # --------------------------------------------------------------------------------------- 65 | 66 | echo_bold "\nAll is well! You can start using this! 67 | $ source .anaconda3/bin/activate 68 | " 69 | -------------------------------------------------------------------------------- /.make/install_anaconda3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 4 | ROOT=$HERE/.. 5 | 6 | if [ ! -d $ROOT/.anaconda3 ]; then 7 | echo "==>Installing anaconda 3" 8 | echo $ROOT 9 | echo $HERE 10 | cd $ROOT 11 | curl -L https://binbin-xu.github.io//tools/install_anaconda3.sh | bash -s . 12 | fi 13 | 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright © 2020-2021 Smart Robotics Lab, Imperial College London 4 | Copyright © 2020-2021 Binbin Xu 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | @echo '## Make commands ##' 3 | @echo 4 | @$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs 5 | 6 | install_first: 7 | @.make/install_first.sh 8 | 9 | install_anaconda3: 10 | @.make/install_anaconda3.sh 11 | 12 | install: install_anaconda3 13 | @.make/install.sh 14 | 15 | clean: 16 | # @rm -rf dense_feature_tracking.egg-info 17 | @rm -rf .anaconda3 18 | 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 |

3 |

Deep Probabilistic Feature-metric Tracking

4 |
5 |

6 | Binbin Xu 7 | · 8 | Andrew J. Davison 9 | · 10 | Stefan Leutenegger 11 |

12 | 13 |

14 | Paper | 15 | Video | 16 |

17 |
18 |

19 |

20 | 21 | 22 | 23 |

24 | 25 | ## Summary 26 | 27 | This is the official repository of our RA-L 2021 paper: 28 | 29 | **Deep Probabilistic Feature-metric Tracking**, \ 30 | *Binbin Xu, Andrew J. Davison, Stefan Leutenegger*, \ 31 | IEEE Robotics and Automation Letters (RA-L), Vol. 6, No. 1, pp. 223-230, 2021 (ICRA 2021 presentation) \ 32 | Best Paper Honorable Mention Award \ 33 | [[Paper]](https://arxiv.org/pdf/2008.13504.pdf) [[Video]](https://youtu.be/6pMosl6ZAPE) 34 | 35 | 36 | ## Setup 37 | You can reproduce the setup by using our anaconda environment configurations. We have provided an Makefile to help you install the environment. 38 | ``` bash! 39 | make install 40 | ``` 41 | 42 | Everytime before you run, activate the environment inside the repo folder 43 | 44 | ``` bash! 45 | source .anaconda3/bin/activate 46 | ``` 47 | The pre-trained network weights can be downloaded at [here](https://imperialcollegelondon.box.com/s/xryhbshxtktizjw5fpmxaic1kncxr4cw). 48 | 49 | ## Prepare the datasets 50 | 51 | **TUM RGBD Dataset**: Download the dataset from [TUM RGBD](https://vision.in.tum.de/data/datasets/rgbd-dataset/download) to '$YOUR_TUM_RGBD_DIR'. Create a symbolic link to the data directory as 52 | 53 | ``` 54 | ln -s $YOUR_TUM_RGBD_DIR code/data/data_tum 55 | ``` 56 | 57 | **MovingObjects3D Dataset** Download the dataset from [MovingObjs3D](https://drive.google.com/open?id=1EIlS4J2J0sdsq8Mw_03DXHlRQmfL8XQx) to '$YOUR_MOV_OBJS_3D_DIR'. Create a symbolic link to the data directory as 58 | 59 | ``` 60 | ln -s $YOUR_MOV_OBJS_3D_DIR code/data/data_objs3D 61 | ``` 62 | 63 | **Custom Dataset** You can also use your own dataset. 64 | Our work use the above two datasets for training and deployed the trained weights on scannet and our self-collected dataset. Please refer to the [ScanNet](code/data/ScanNet.py) and [VaryLighting.py](code/data/VaryLighting.py) for the custom dataloading. 65 | 66 | 67 | ## Training and Evaluation 68 | To run the full training and evaluation, please follow the steps below. 69 | 70 | ### Run training 71 | 72 | **Train example with TUM RGBD dataset:** 73 | 74 | ``` bash! 75 | ./scripts/train_tum_rgbd.sh 76 | ``` 77 | 78 | To check the full training setting, run the help config as 79 | ``` bash! 80 | python train.py --help 81 | ``` 82 | 83 | 84 | **Train example with MovingObjects3D:** Camera egocentric motion is dfifferent from the object-centric motion estimation and thus we provide a separate training script for the MovingObjects3D dataset. 85 | All the same as the last one only except changing the dataset name. You can also use our provided script to train the model. 86 | 87 | ``` bash! 88 | ./scripts/train_moving_objs3d.sh 89 | ``` 90 | 91 | ### Run evaluation 92 | **Run the pretrained model:** If you have set up the dataset properly with the datasets, you can run the learned model with the checkpoint we provided in the trained model directory. 93 | 94 | ``` bash! 95 | ./scripts/eval_tum_rgbd.sh 96 | ``` 97 | 98 | 99 | You can substitute the trajectory, the keyframe and the checkpoint file. The training and evaluation share the same config setting. To check the full setting, run the help config as 100 | 101 | ``` bash! 102 | python evaluate.py --help 103 | ``` 104 | 105 | **Results:** The evaluation results will be generated automatically in both '.pkl' and '*.csv' in the folder 'test_results/'. 106 | 107 | 108 | **Run comparisons:** We also provide the scripts to run the comparisons with the classic RGBD and ICP methods from Open3D. Please refer to [rgbd_odometry.py](code/tools/rgbd_odometry.py) and [ICP.py](code/tools/ICP.py) for the details accordingly. 109 | 110 | 111 | ### Joint feature-metric and geometric tracking 112 | We can combine our proposed feature-metric tracking with the geometric tracking methods to achieve better performance. We provide the scripts to run the joint tracking with the ICP methods. 113 | ``` bash! 114 | ./scripts/train_tum_feature_icp.sh 115 | ``` 116 | It is achieved by using the trained feature-metric network weights as the initialization and combing with the ICP methods as the refinement. 117 | 118 | The evaluation script is also provided as 119 | ``` 120 | ./scripts/eval_tum_feature_icp.sh 121 | ``` 122 | 123 | 124 | ### Run visual odometry 125 | Please note this is a prototype version of our **visual odometry frontend**. It mainly serves as a demo to show the performance of our method. 126 | 127 | ``` bash! 128 | ./scripts/run_kf_vo.sh 129 | ``` 130 | 131 | To visualise the keyframe tracking in the paper, add the argument `--vo_type keyframe --two_view` to the above script. 132 | To check the full setting, run the help config as 133 | ``` bash! 134 | python code/experiments/kf_vo.py --help 135 | ``` 136 | 137 | 138 | **Convergence basin analysis** for the keyframe visual odometry is also provided. Check the script `scripts/run_kf_vo_cb.sh` for more details. 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | ## Citation 147 | ```bibtex 148 | @article{Xu:etal:RAL2021, 149 | author = {Binbin Xu and Andrew Davison and Stefan Leutenegger}, 150 | journal = {{IEEE} Robotics and Automation Letters ({RAL})}, 151 | title = {Deep Probabilistic Feature-metric Tracking}, 152 | year={2021}, 153 | volume = {6}, 154 | number = {1}, 155 | pages = {223 - 230}, 156 | } 157 | ``` 158 | 159 | Please cite the paper if you found our provided code useful for you. 160 | 161 | 162 | ## License 163 | This repo is BSD 3-Clause Licensed. Part of its code is from [Taking a Deeper Look at the Inverse Compositional Algorithm](https://github.com/lvzhaoyang/DeeperInverseCompositionalAlgorithm), which is MIT licensed. We thank the authors for their great work. 164 | 165 | Copyright © 2020-2021 Smart Robotics Lab, Imperial College London \ 166 | Copyright © 2020-2021 Binbin Xu 167 | 168 | 169 | ## Contact 170 | Binbin Xu (b.xu17@imperial.ac.uk) 171 | -------------------------------------------------------------------------------- /checkpoint/checkpoint_epoch29.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/checkpoint/checkpoint_epoch29.pth.tar -------------------------------------------------------------------------------- /checkpoint/combine_icp_checkpoint_epoch40.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/checkpoint/combine_icp_checkpoint_epoch40.pth.tar -------------------------------------------------------------------------------- /code/.gitignore: -------------------------------------------------------------------------------- 1 | trained_models/* 2 | test/* 3 | test_results/* 4 | */**/*.pyc 5 | logs/* 6 | -------------------------------------------------------------------------------- /code/Logger.py: -------------------------------------------------------------------------------- 1 | """ Tensorflow Logger for training and testing including images 2 | # SPDX-FileCopyrightText: 2021 Binbin Xu 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | @author: Zhaoyang Lv 6 | @date: March 2019 7 | """ 8 | 9 | import sys, os, shutil 10 | import os.path as osp 11 | import subprocess 12 | from torch.utils.tensorboard import SummaryWriter 13 | import torch 14 | import numpy as np 15 | import cv2 16 | from collections import OrderedDict 17 | 18 | class Logger(object): 19 | """ 20 | example usage: 21 | 22 | stdout = Logger('log.txt') 23 | sys.stdout = stdout 24 | 25 | ... your code here ... 26 | 27 | stdout.delink() 28 | """ 29 | def __init__(self, filename="Default.log"): 30 | self.terminal = sys.stdout 31 | self.log = open(filename, "w") 32 | 33 | def delink(self): 34 | self.log.close() 35 | #self.log = open('foo', "w") 36 | # self.write = self.writeTerminalOnly 37 | 38 | def writeTerminalOnly(self, message): 39 | self.terminal.write(message) 40 | 41 | def write(self, message): 42 | self.terminal.write(message) 43 | self.log.write(message) 44 | 45 | def flush(self): 46 | pass 47 | 48 | class TensorBoardLogger(object): 49 | def __init__(self, logging_dir, logfile_name, print_freq = 10): 50 | 51 | self.log_dir = logging_dir 52 | self.print_freq = print_freq 53 | 54 | if not os.path.isdir(logging_dir): 55 | os.makedirs(logging_dir) 56 | 57 | self.summary_writer = SummaryWriter(log_dir=logging_dir) 58 | 59 | # standard logger to print to terminal 60 | logfile = osp.join(logging_dir,'log.txt') 61 | stdout = Logger(logfile) 62 | sys.stdout = stdout 63 | 64 | def write_to_tensorboard(self, display_dict, iteration): 65 | """ Write the saved states (display_dict) to tensorboard 66 | """ 67 | for k, v in display_dict.items(): 68 | self.summary_writer.add_scalar(k, v, iteration) 69 | 70 | def add_images_to_tensorboard(self, image_list: list, name, iteration): 71 | 72 | if iteration % self.print_freq != 0: 73 | return 74 | 75 | concated_images = np.concatenate(image_list, axis=1) 76 | ratio = 960 / concated_images.shape[1] 77 | if ratio < 1: 78 | concated_images = cv2.resize(concated_images, None, fx=ratio, fy=ratio) 79 | concated_images = cv2.cvtColor(concated_images, cv2.COLOR_BGR2RGB) 80 | self.summary_writer.add_image(name, concated_images, 81 | global_step=iteration, 82 | dataformats='HWC', 83 | ) 84 | 85 | def write_to_terminal(self, display_dict, epoch, batch_iter, epoch_len, batch_time, is_train = True): 86 | """ Write the save states (display_dict) and training information to terminal for display 87 | """ 88 | 89 | if batch_iter % self.print_freq != 0: 90 | return 91 | 92 | if is_train: 93 | prefix = 'Train' 94 | else: 95 | prefix = 'Test' 96 | 97 | state = prefix + ':\tEpoch %d, Batch %d/%d, BatchTime %.4f'%(epoch+1, batch_iter, epoch_len, batch_time) 98 | 99 | loss = '' 100 | for k, v in display_dict.items(): 101 | loss += k + ' ' + '%.8f ' % v 102 | 103 | print(state + loss) 104 | 105 | def save_checkpoint(self, network, state_info = None, 106 | filename='checkpoint.pth.tar'): 107 | """save checkpoint to disk""" 108 | state_dict = network.state_dict().copy() 109 | 110 | if torch.cuda.device_count() > 1: 111 | state_dict_rename = OrderedDict() 112 | for k, v in state_dict.items(): 113 | name = k[7:] # remove `module.` 114 | state_dict_rename[name] = v 115 | state_dict = state_dict_rename 116 | 117 | if state_info is None: 118 | state = {'state_dict': state_dict} 119 | else: 120 | state = state_info 121 | state['state_dict'] = state_dict 122 | 123 | checkpoint_path = osp.join(self.log_dir,filename) 124 | torch.save(state, checkpoint_path) 125 | return checkpoint_path 126 | 127 | 128 | def log_git_revisions_hash(dir): 129 | hashes = [] 130 | # the latest git information 131 | latest_commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD']) 132 | # hashes.append(subprocess.check_output(['git', 'rev-parse', 'HEAD'])) 133 | # hashes.append(subprocess.check_output(['git', 'rev-parse', 'HEAD^'])) 134 | # return hashes 135 | f = open(osp.join(dir, 'git_status.txt'), "w") 136 | f.write(str(latest_commit_id)) 137 | f.close() 138 | 139 | 140 | def check_directory(filename): 141 | target_dir = os.path.dirname(filename) 142 | if not os.path.isdir(target_dir): 143 | os.makedirs(target_dir) -------------------------------------------------------------------------------- /code/__init__.py: -------------------------------------------------------------------------------- 1 | import pkgutil 2 | __path__ = pkgutil.extend_path(__path__, __name__) 3 | -------------------------------------------------------------------------------- /code/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Argparse configuration for training and testing in the paper 3 | # SPDX-FileCopyrightText: 2021 Binbin Xu 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | 7 | def add_network_config(parser): 8 | parser.add_argument('--feature_channel', default=1, type=int, 9 | help='Specify the feature channel used for tracking. The default is 1.\n') 10 | parser.add_argument('--uncertainty_channel', default=1, type=int, 11 | help='Specify the uncertainty channel used for tracking when using uncerti. The default is 1.\n') 12 | parser.add_argument('--feature_extract', default='average', type=str, 13 | choices=('1by1', 'conv', 'skip', 'average', 'prob_fuse'), 14 | help='Specify the method to extract feature from the pyramid. The default is using 1by1.\n') 15 | parser.add_argument('--uncertainty', 16 | default='None', type=str, 17 | choices=('None', 'identity', 'sigmoid', 'feature', 'gaussian', 'laplacian', 'old_gaussian', 'old_laplacian'), 18 | help='Choose a uncertainty function for feature-metric tracking. DIC is the original CVPR work. ' 19 | 'None using identity matrix. and track is used in GN tracking\n') 20 | parser.add_argument('--combine_ICP',action='store_true', 21 | help='Combine the training with ICP.\n') 22 | parser.add_argument('--add_init_pose_noise', action='store_true', 23 | help='Add noise in the init pose (translation) only in training.\n') 24 | parser.add_argument('--init_pose', default='identity', type=str, 25 | choices=('identity', 'sfm_net', 'dense_net'), 26 | help='Use predicted pose as initial pose.\n') 27 | # @TODO: try different number 0.01 in sfm-learner, 0.1 in deeptam 28 | parser.add_argument('--scale_init_pose', default='0.01', type=float, 29 | help='Scaling the initial predicted pose.\n') 30 | parser.add_argument('--train_init_pose', action='store_true', 31 | help='Jointly train pose predictor by regressing predicted pose to the ground truth.\n') 32 | parser.add_argument('--multi_hypo', default='None', type=str, 33 | choices=('None', 'average', 'prob_fuse', 'res_prob_fuse'), 34 | help='Use multi hypothesis for init pose guess.\n') 35 | parser.add_argument('--res_input', action='store_true', 36 | help='Also input residual for posenet.\n') 37 | # virtual does not work well 38 | # parser.add_argument('--virtual_camera', action='store_true', 39 | # help='Use rendered virtual frame and virtual camera instead of img1 \n') 40 | parser.add_argument('--vis_feat', default=False, action='store_true', 41 | help='visualize the feature maps in the training') 42 | parser.add_argument('--scannet_subset_train', default='0.25', type=float, 43 | help='Subset ratio in scannet for training.\n') 44 | parser.add_argument('--scannet_subset_val', default='0.005', type=float, 45 | help='Subset ratio in scannet for validation.\n') 46 | parser.add_argument('--train_uncer_prop', action='store_true', 47 | help='Use uncertainty propagation in the training loss\n') 48 | parser.add_argument('--obj_only', action='store_true', 49 | help='Use uncertainty propagation in the training loss\n') 50 | parser.add_argument('--loss', default='EPE3D', type=str, 51 | choices=('EPE3D', 'RPE', 'UEPE3D', 'URPE'), 52 | help='Training loss.\n') 53 | parser.add_argument('--remove_tru_sigma', action='store_true', 54 | help='Remove truncated uncertainty areas in the tracking for training/testing\n') 55 | parser.add_argument('--scaler', default='None', type=str, 56 | choices=('None', 'oneResidual', 'twoResidual', 'MultiScale2w', 'expMultiScale'), 57 | help='Choose a scale function for combing ICP and feature methods. \n') 58 | parser.add_argument('--scale_icp', default='0.01', type=float, 59 | help='Scaling the ICP w.r.t feature/RGB.\n') 60 | parser.add_argument('--add_vl_dataset', action='store_true', 61 | help='Add varying lighting dataset to the TUM dataset for training/validation \n') 62 | 63 | def add_tracking_config(parser): 64 | add_network_config(parser) 65 | parser.add_argument('--network', 66 | default='DeepIC', type=str, 67 | choices=('DeepIC', 'GaussNewton'), 68 | help='Choose a network to run. \n \ 69 | The DeepIC is the proposed Deeper Inverse Compositional method. \n\ 70 | The GuassNewton is the baseline for Inverse Compositional method which does not include \ 71 | any learnable parameters\n') 72 | parser.add_argument('--mestimator', 73 | default='MultiScale2w', type=str, 74 | choices=('None', 'MultiScale2w'), 75 | help='Choose a weighting function for the Trust Region method.\n\ 76 | The MultiScale2w is the proposed (B) convolutional M-estimator. \n') 77 | parser.add_argument('--solver', 78 | default='Direct-ResVol', type=str, 79 | choices=('Direct-Nodamping', 'Direct-ResVol'), 80 | help='Choose the solver function for the Trust Region method. \n\ 81 | Direct-Nodamping is the Gauss-Newton algorithm, which does not use damping. \n\ 82 | Direct-ResVol is the proposed (C) Trust-Region Network. \n\ 83 | (default: Direct-ResVol) ') 84 | parser.add_argument('--direction', 85 | default='inverse', type=str, 86 | choices=('inverse', 'forward'), 87 | help='Choose the direction to update pose: inverse, or forward \n') 88 | parser.add_argument('--encoder_name', 89 | default='ConvRGBD2', 90 | choices=('ConvRGBD2', 'RGB', 'ConvRGBD'), 91 | help='The encoder architectures. \ 92 | ConvRGBD2 takes the two-view features as input. \n\ 93 | RGB is using the raw RGB images as input (converted to intensity afterwards).\n\ 94 | (default: ConvRGBD2)') 95 | parser.add_argument('--max_iter_per_pyr', 96 | default=3, type=int, 97 | help='The maximum number of iterations at each pyramids.\n') 98 | parser.add_argument('--no_weight_sharing', 99 | action='store_true', 100 | help='If this flag is on, we disable sharing the weights across different backbone network when extracing \ 101 | features. In default, we share the weights for all network in each pyramid level.\n') 102 | parser.add_argument('--tr_samples', default=10, type=int, 103 | help='Set the number of trust-region samples. (default: 10)\n') 104 | 105 | def add_basics_config(parser): 106 | """ the basic setting 107 | (supposed to be shared through train and inference) 108 | """ 109 | parser.add_argument('--cpu_workers', type=int, default=12, 110 | help="Number of cpu threads for data loader.\n") 111 | parser.add_argument('--dataset', type=str, 112 | choices=('TUM_RGBD', 'ScanNet', 'MovingObjects3D', 'VaryLighting'), 113 | help='Choose a dataset to train/val/evaluate.\n') 114 | parser.add_argument('--image_resize', type=float, default=None, 115 | help='downsize ratio for input images') 116 | parser.add_argument('--time', dest='time', action='store_true', 117 | help='Count the execution time of each step.\n' ) 118 | 119 | def add_test_basics_config(parser): 120 | parser.add_argument('--tracker', default='learning_based', type=str, 121 | choices=('learning_based', 'ICP', 'ColorICP', 'RGBD')) 122 | parser.add_argument('--batch_per_gpu', default=8, type=int, 123 | help='Specify the batch size during test. The default is 8.\n') 124 | parser.add_argument('--checkpoint', default='', type=str, 125 | help='Choose a checkpoint model to test.\n') 126 | parser.add_argument('--keyframes', 127 | default='1,2,4,8', type=str, 128 | help='Choose the number of keyframes to train the algorithm.\n') 129 | parser.add_argument('--verbose', action='store_true', 130 | help='Print/save all the intermediate representations') 131 | parser.add_argument('--eval_set', default='test', 132 | choices=('test', 'validation')) 133 | parser.add_argument('--trajectory', type=str, 134 | default = '', 135 | help = 'Specify a trajectory to run.\n') 136 | 137 | def add_train_basics_config(parser): 138 | """ add the basics about the training """ 139 | parser.add_argument('--checkpoint', default='', type=str, 140 | help='Choose a pretrained checkpoint model to start with. \n') 141 | parser.add_argument('--batch_per_gpu', default=64, type=int, 142 | help='Specify the batch size during training.\n') 143 | parser.add_argument('--epochs', 144 | default=30, type=int, 145 | help='The total number of total epochs to run. Default is 30.\n' ) 146 | parser.add_argument('--resume_training', 147 | dest='resume_training', action='store_true', 148 | help='Resume the training using the loaded checkpoint. If not, restart the training. \n\ 149 | You will need to use the --checkpoint config to load the pretrained checkpoint' ) 150 | parser.add_argument('--pretrained_model', default='', type=str, 151 | help='Initialize the model weights with pretrained model.\n') 152 | parser.add_argument('--no_val', 153 | default=False, 154 | action='store_true', 155 | help='Use no validatation set for training.\n') 156 | parser.add_argument('--keyframes', 157 | default='1,2,4,8', type=str, 158 | help='Choose the number of keyframes to train the algorithm') 159 | parser.add_argument('--verbose', action='store_true', 160 | help='Print/save all the intermediate representations.\n') 161 | 162 | def add_train_log_config(parser): 163 | """ checkpoint and log options """ 164 | parser.add_argument('--checkpoint_folder', default='', type=str, 165 | help='The folder name (postfix) to save the checkpoint.') 166 | parser.add_argument('--snapshot', default=1, type=int, 167 | help='Number of interations to save a snapshot') 168 | parser.add_argument('--save_checkpoint_freq', 169 | default=1, type=int, 170 | help='save the checkpoint for every N epochs') 171 | parser.add_argument('--prefix', default='', type=str, 172 | help='the prefix string added to the log files') 173 | parser.add_argument('-p', '--print_freq', 174 | default=10, type=int, 175 | help='print frequency (default: 10)') 176 | 177 | 178 | def add_train_optim_config(parser): 179 | """ add training optimization options """ 180 | parser.add_argument('--opt', 181 | type=str, default='adam', choices=('sgd','adam'), 182 | help='choice of optimizer (default: adam) \n') 183 | parser.add_argument('--lr', 184 | default=0.0005, type=float, 185 | help='initial learning rate. \n') 186 | parser.add_argument('--lr_decay_ratio', 187 | default=0.5, type=float, 188 | help='lr decay ratio (default:0.5)') 189 | parser.add_argument('--lr_decay_epochs', 190 | default=[5, 10, 20], type=int, nargs='+', 191 | help='lr decay epochs') 192 | parser.add_argument('--lr_min', default=1e-6, type=float, 193 | help='minimum learning rate') 194 | parser.add_argument('--lr_restart', default=10, type=int, 195 | help='restart learning after N epochs') 196 | 197 | def add_train_loss_config(parser): 198 | """ add training configuration for the loss function """ 199 | parser.add_argument('--regression_loss_type', 200 | default='SmoothL1', type=str, choices=('L1', 'SmoothL1'), 201 | help='Loss function for flow regression (default: SmoothL1 loss)') 202 | 203 | def add_vo_config(parser): 204 | """ add testing configuration for kf-vo demo """ 205 | parser.add_argument('--vo', default='feature_icp', type=str, 206 | choices=('DeepIC', 'RGB', 'ICP', 'RGB+ICP', 'feature', 'feature_icp'), 207 | help='Select which tracking method to use for visual odometry.\n') 208 | parser.add_argument('--vo_type', default='incremental', type=str, 209 | choices=('incremental', 'keyframe'), 210 | help='Select which reference frame to use for tracking.\n') 211 | parser.add_argument('--two_view', action='store_true', 212 | help='Only visualization two views.\n' ) 213 | parser.add_argument('--gt_tracker', action='store_true', 214 | help='Use ground truth pose for point cloud visualization') 215 | parser.add_argument('--save_img', action='store_true', 216 | help='Save visualizations.\n' ) 217 | 218 | def add_cb_config(parser): 219 | """ add visualization configurations for convergence basin """ 220 | parser.add_argument('--cb_dimension', default='2D', type=str, 221 | choices=('1D', '2D', '6D'), 222 | help='Select which dimension to visualize for convergence basin.\n') 223 | parser.add_argument('--save_img', action='store_true', 224 | help='Save visualizations.\n' ) 225 | parser.add_argument('--reset_cb', action='store_true', 226 | help='Save visualizations.\n' ) 227 | parser.add_argument('--pert_samples', default=31, type=int, 228 | help='perturbation samples in each pose dimension') 229 | 230 | def add_object_config(parser): 231 | parser.add_argument('--method', default='feature_icp', type=str, 232 | choices=('DeepIC', 'RGB', 'ICP', 'RGB+ICP', 'feature', 'feature_icp'), 233 | help='Select which tracking method to use for visual odometry.\n') 234 | parser.add_argument('--batch_per_gpu', default=64, type=int, 235 | help='Specify the batch size during test. The default is 8.\n') 236 | parser.add_argument('--checkpoint', default='', type=str, 237 | help='Choose a checkpoint model to test.\n') 238 | parser.add_argument('--keyframes', 239 | default='1,2,4', type=str, 240 | help='Choose the number of keyframes to train the algorithm.\n') 241 | parser.add_argument('--eval_set', default='test', 242 | choices=('test', 'validation')) 243 | parser.add_argument('--object', type=str, 244 | default = '', 245 | help = 'Specify a trajectory to run.\n') 246 | parser.add_argument('--save_img', action='store_true', 247 | help='Save visualizations.\n' ) 248 | parser.add_argument('--gt_pose', action='store_true', 249 | help='Save visualizations.\n' ) 250 | parser.add_argument('--recompute', action='store_true', 251 | help='Save visualizations.\n' ) -------------------------------------------------------------------------------- /code/data/MovingObj3D.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data loader for MovingObjs 3D dataset 3 | 4 | @author: Zhaoyang Lv 5 | @date: May 2019 6 | """ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | from __future__ import unicode_literals 12 | 13 | import sys, os, random 14 | import pickle 15 | import functools 16 | 17 | import numpy as np 18 | import torch.utils.data as data 19 | import os.path as osp 20 | 21 | from imageio import imread 22 | from tqdm import tqdm 23 | 24 | from cv2 import resize, INTER_NEAREST 25 | 26 | class MovingObjects3D(data.Dataset): 27 | 28 | # every sequence has 200 frames. 29 | categories = { 30 | 'train': {'aeroplane': [0,190], 31 | 'bicycle': [0,190], 32 | 'bus': [0,190], 33 | 'car': [0,190]}, 34 | 35 | 'validation': {'aeroplane': [190,200], 36 | 'bicycle': [190,200], 37 | 'bus': [190,200], 38 | 'car': [190,200]}, 39 | 40 | 'test': {'boat': [0,200], 41 | 'motorbike': [0,200]} 42 | } 43 | 44 | def __init__(self, root, load_type='train', keyframes = [1], data_transform=None, 45 | category=None, image_resize=0.5): 46 | super(MovingObjects3D, self).__init__() 47 | 48 | self.base_folder = osp.join(root) 49 | 50 | data_all = self.categories[load_type] 51 | 52 | # split it into train and test set (the first 20 are for test) 53 | self.image_seq = [] 54 | self.depth_seq = [] 55 | self.invalid_seq = [] 56 | self.object_mask_seq = [] 57 | self.cam_pose_seq = [] 58 | self.obj_pose_seq = [] 59 | self.obj_vis_idx = [] 60 | self.calib = [] 61 | self.obj_names = [] 62 | 63 | self.transforms = data_transform 64 | 65 | if load_type in ['validation', 'test']: 66 | # should not mix different keyframes in test 67 | assert(len(keyframes) == 1) 68 | self.keyframes = [1] 69 | self.sample_freq = keyframes[0] 70 | else: 71 | self.keyframes = keyframes 72 | self.sample_freq = 1 73 | 74 | self.ids = 0 75 | self.images_size = [240, 320] 76 | # get the accumulated image sequences on the fly 77 | self.seq_acc_ids = [0] 78 | for data_obj, frame_interval in data_all.items(): 79 | if category is not None and data_obj != category: 80 | continue 81 | 82 | start_idx, end_idx = frame_interval 83 | print('Load {:} data from frame {:d} to {:d}'.format(data_obj, start_idx, end_idx)) 84 | for seq_idx in range(start_idx, end_idx, 1): 85 | seq_str = "{:06d}".format(seq_idx) 86 | 87 | info_pkl= osp.join(self.base_folder, 88 | data_obj, seq_str, 'info.pkl') 89 | 90 | color_seq, depth_seq, invalid_seq, mask_seq, camera_poses_seq, object_poses_seq,\ 91 | obj_visible_frames, calib_seq = extract_info_pickle(info_pkl) 92 | 93 | obj_visible_frames = obj_visible_frames[::self.sample_freq] 94 | 95 | self.image_seq.append([osp.join(self.base_folder, x) for x in color_seq]) 96 | self.depth_seq.append([osp.join(self.base_folder, x) for x in depth_seq]) 97 | # self.invalid_seq.append(invalid_seq) 98 | self.object_mask_seq.append([osp.join(self.base_folder, x) for x in mask_seq]) 99 | self.cam_pose_seq.append(camera_poses_seq) 100 | self.obj_pose_seq.append(object_poses_seq) 101 | self.calib.append(calib_seq) 102 | self.obj_vis_idx.append(obj_visible_frames) 103 | 104 | self.obj_names.append('{:}_{:03d}'.format(data_obj, seq_idx)) 105 | 106 | total_valid_frames = max(0, len(obj_visible_frames) - max(self.keyframes)) 107 | 108 | self.ids += total_valid_frames 109 | self.seq_acc_ids.append(self.ids) 110 | 111 | # downscale the input image to half 112 | self.fx_s = image_resize 113 | self.fy_s = image_resize 114 | 115 | print('There are a total of {:} valid frames'.format(self.ids)) 116 | 117 | def __len__(self): 118 | return self.ids 119 | 120 | def __getitem__(self, index): 121 | # the index we want from search sorted is shifted for one 122 | seq_idx = max(np.searchsorted(self.seq_acc_ids, index+1) - 1, 0) 123 | frame_idx = index - self.seq_acc_ids[seq_idx] 124 | 125 | this_idx= self.obj_vis_idx[seq_idx][frame_idx] 126 | next_idx= self.obj_vis_idx[seq_idx][frame_idx + random.choice(self.keyframes)] 127 | 128 | color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx]) 129 | color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx]) 130 | 131 | if self.transforms: 132 | color0, color1 = self.transforms([color0, color1]) 133 | 134 | depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx]) 135 | depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx]) 136 | 137 | cam_pose0 = self.cam_pose_seq[seq_idx][this_idx] 138 | cam_pose1 = self.cam_pose_seq[seq_idx][next_idx] 139 | obj_pose0 = self.obj_pose_seq[seq_idx][this_idx] 140 | obj_pose1 = self.obj_pose_seq[seq_idx][next_idx] 141 | 142 | # the relative allocentric transform of objects 143 | transform = functools.reduce(np.dot, 144 | [np.linalg.inv(cam_pose1), obj_pose1, np.linalg.inv(obj_pose0), cam_pose0]).astype(np.float32) 145 | 146 | # the validity of the object is up the object mask 147 | obj_index = 1 # object index is in default to be 1 148 | obj_mask0 = self.__load_binary_mask_tensor(self.object_mask_seq[seq_idx][this_idx], obj_index) 149 | obj_mask1 = self.__load_binary_mask_tensor(self.object_mask_seq[seq_idx][next_idx], obj_index) 150 | 151 | calib = np.asarray(self.calib[seq_idx], dtype=np.float32) 152 | calib[0] *= self.fx_s 153 | calib[1] *= self.fy_s 154 | calib[2] *= self.fx_s 155 | calib[3] *= self.fy_s 156 | 157 | obj_name = self.obj_names[seq_idx] 158 | # pair_name = '{:}/{:06d}to{:06d}'.format(obj_name, this_idx, next_idx) 159 | pair_name = {'seq': obj_name, 160 | 'seq_idx': seq_idx, 161 | 'frame0': this_idx, 162 | 'frame1': next_idx} 163 | 164 | return color0, color1, depth0, depth1, transform, calib, obj_mask0, obj_mask1, pair_name 165 | 166 | def get_original_size_batch(self, index): 167 | # the index we want from search sorted is shifted for one 168 | seq_idx = max(np.searchsorted(self.seq_acc_ids, index+1) - 1, 0) 169 | frame_idx = index - self.seq_acc_ids[seq_idx] 170 | 171 | this_idx= self.obj_vis_idx[seq_idx][frame_idx] 172 | next_idx= self.obj_vis_idx[seq_idx][frame_idx + random.choice(self.keyframes)] 173 | 174 | color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx], do_resize=False) 175 | color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx], do_resize=False) 176 | 177 | if self.transforms: 178 | color0, color1 = self.transforms([color0, color1]) 179 | 180 | depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx], do_resize=False) 181 | depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx], do_resize=False) 182 | 183 | cam_pose0 = self.cam_pose_seq[seq_idx][this_idx] 184 | cam_pose1 = self.cam_pose_seq[seq_idx][next_idx] 185 | obj_pose0 = self.obj_pose_seq[seq_idx][this_idx] 186 | obj_pose1 = self.obj_pose_seq[seq_idx][next_idx] 187 | 188 | # the relative allocentric transform of objects 189 | transform = functools.reduce(np.dot, 190 | [np.linalg.inv(cam_pose1), obj_pose1, np.linalg.inv(obj_pose0), cam_pose0]).astype(np.float32) 191 | 192 | # the validity of the object is up the object mask 193 | obj_index = 1 # object index is in default to be 1 194 | obj_mask0 = self.__load_binary_mask_tensor(self.object_mask_seq[seq_idx][this_idx], obj_index, do_resize=False) 195 | obj_mask1 = self.__load_binary_mask_tensor(self.object_mask_seq[seq_idx][next_idx], obj_index, do_resize=False) 196 | 197 | calib = np.asarray(self.calib[seq_idx], dtype=np.float32) 198 | 199 | obj_name = self.obj_names[seq_idx] 200 | # pair_name = '{:}/{:06d}to{:06d}'.format(obj_name, this_idx, next_idx) 201 | pair_name = {'seq': obj_name, 202 | 'seq_idx': seq_idx, 203 | 'frame0': this_idx, 204 | 'frame1': next_idx} 205 | 206 | return color0, color1, depth0, depth1, transform, calib, obj_mask0, obj_mask1, pair_name 207 | 208 | def __load_rgb_tensor(self, path, do_resize=True): 209 | """ Load the rgb image 210 | """ 211 | image = imread(path)[:, :, :3] 212 | image = image.astype(np.float32) / 255.0 213 | if do_resize: 214 | image = resize(image, None, fx=self.fx_s, fy=self.fy_s) 215 | return image 216 | 217 | def __load_depth_tensor(self, path, do_resize=True): 218 | """ Load the depth 219 | """ 220 | depth = imread(path).astype(np.float32) / 1e3 221 | if do_resize: 222 | depth = resize(depth, None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST) 223 | depth = np.clip(depth, 1e-1, 1e2) # the valid region of the depth 224 | return depth[np.newaxis, :] 225 | 226 | def __load_binary_mask_tensor(self, path, seg_index, do_resize=True): 227 | """ Load a binary segmentation mask (numbers) 228 | If the object matches the specified index, return true; 229 | Otherwise, return false 230 | """ 231 | obj_mask = imread(path) 232 | mask = (obj_mask == seg_index) 233 | if do_resize: 234 | mask = resize(mask.astype(np.float), None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST) 235 | return mask.astype(np.bool)[np.newaxis, :] 236 | 237 | def extract_info_pickle(info_pkl): 238 | 239 | with open(info_pkl, 'rb') as p: 240 | info = pickle.load(p) 241 | 242 | color_seq = [x.split('final/')[1] for x in info['color']] 243 | depth_seq = [x.split('final/')[1] for x in info['depth']] 244 | invalid_seq = [x.split('final/')[1] for x in info['invalid'] ] 245 | mask_seq = [x.split('final/')[1] for x in info['object_mask']] 246 | 247 | # in this rendering setting, there is only one object 248 | camera_poses_seq = info['pose'] 249 | object_poses_seq = info['object_poses']['Model_1'] 250 | object_visible_frames = info['object_visible_frames']['Model_1'] 251 | 252 | calib_seq = info['calib'] 253 | 254 | return color_seq, depth_seq, invalid_seq, mask_seq, \ 255 | camera_poses_seq, object_poses_seq, object_visible_frames, calib_seq 256 | 257 | 258 | if __name__ == '__main__': 259 | from data.dataloader import load_data 260 | import torchvision.utils as torch_utils 261 | 262 | # loader = MovingObjects3D('', load_type='train', keyframes=[1]) 263 | loader = load_data('MovingObjects3D', keyframes=[1], load_type='train') 264 | torch_loader = data.DataLoader(loader, batch_size=16, shuffle=False, num_workers=4) 265 | 266 | for batch in torch_loader: 267 | color0, color1, depth0, depth1, transform, K, mask0, mask1, names = batch 268 | B,C,H,W=color0.shape 269 | 270 | bcolor0_img = torch_utils.make_grid(color0, nrow=4) 271 | bcolor1_img = torch_utils.make_grid(color1, nrow=4) 272 | # bdepth0_img = torch_utils.make_grid(depth0, nrow=4) 273 | # bdepth1_img = torch_utils.make_grid(depth1, nrow=4) 274 | bmask0_img = torch_utils.make_grid(mask0.view(B,1,H,W)*255, nrow=4) 275 | bmask1_img = torch_utils.make_grid(mask1.view(B,1,H,W)*255, nrow=4) 276 | 277 | import matplotlib.pyplot as plt 278 | plt.figure() 279 | plt.imshow(bcolor0_img.numpy().transpose((1,2,0))) 280 | plt.figure() 281 | plt.imshow(bcolor1_img.numpy().transpose((1,2,0))) 282 | # plt.figure() 283 | # plt.imshow(bdepth0_img.numpy().transpose((1,2,0))) 284 | # plt.figure() 285 | # plt.imshow(bdepth1_img.numpy().transpose((1,2,0))) 286 | plt.figure() 287 | plt.imshow(bmask0_img.numpy().transpose((1,2,0))) 288 | plt.figure() 289 | plt.imshow(bmask1_img.numpy().transpose((1,2,0))) 290 | plt.show() 291 | 292 | -------------------------------------------------------------------------------- /code/data/ScanNet.py: -------------------------------------------------------------------------------- 1 | """ The dataloader for ScanNet dataset 2 | # SPDX-FileCopyrightText: 2021 Binbin Xu 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | import os, random 7 | import os.path as osp 8 | 9 | import numpy as np 10 | from torch.utils.data import Dataset 11 | from imageio import imread 12 | from tqdm import tqdm 13 | import pickle 14 | from cv2 import resize, INTER_NEAREST 15 | 16 | 17 | class ScanNet(Dataset): 18 | 19 | def __init__(self, root=None, category='train', 20 | keyframes=[1], data_transform=None, select_traj=None, 21 | image_resize=0.25, truncate_depth=True, 22 | subset_train=0.95, subset_val=0.05): 23 | assert root is not None 24 | super(ScanNet, self).__init__() 25 | 26 | self.image_seq = [] # list (seq) of list (frame) of string (rgb image path) 27 | self.timestamp = [] # empty 28 | self.depth_seq = [] # list (seq) of list (frame) of string (depth image path) 29 | # self.invalid_seq = [] # empty 30 | self.cam_pose_seq = [] # list (seq) of list (frame) of 4 X 4 ndarray 31 | self.calib = [] # list (seq) of list (intrinsics: fx, fy, cx, cy) 32 | self.seq_names = [] # list (seq) of string (seq name) 33 | 34 | self.subset_train = subset_train # only use subset for training 35 | self.subset_val = subset_val # only use subset for validation 36 | assert self.subset_train + self.subset_val <= 1 37 | self.ids = 0 38 | self.seq_acc_ids = [0] 39 | self.keyframes = keyframes 40 | self.cam = { 41 | 'distCoeffs': None, 42 | 'fx': 577.871, 43 | 'fy': 577.871, 44 | 'ux': 319.5, 45 | 'uy': 239.5, 46 | 'size': (640, 480), 47 | } 48 | self.depth_conversion = 1.0/5e3 49 | 50 | self.transforms = data_transform 51 | 52 | if category == 'test': 53 | self.__load_test(osp.join(root, 'val'), select_traj) 54 | else: # train and validation 55 | self.__load_train_val(osp.join(root, 'train'), category) 56 | 57 | # downscale the input image to a quarter 58 | self.fx_s = image_resize 59 | self.fy_s = image_resize 60 | self.truncate_depth = truncate_depth 61 | 62 | print('ScanNet dataloader for {:} using keyframe {:}: \ 63 | {:} valid frames'.format(category, keyframes, self.ids)) 64 | 65 | def __read_scans(self, data_dir): 66 | # glob for sequences 67 | sequences = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))] 68 | print('Found {} sequences in directory: {}'.format(len(sequences), data_dir)) 69 | 70 | scans = [] 71 | with tqdm(total=len(sequences)) as t: 72 | for seq in sequences: 73 | seq_dir = os.path.join(data_dir, seq) 74 | # synchronized trajectory file 75 | sync_traj_file = osp.join(seq_dir, 'sync_trajectory.pkl') 76 | 77 | if not osp.isfile(sync_traj_file): 78 | print("The synchronized trajectory file {:} has not been generated.".format(seq)) 79 | print("Generate it now...") 80 | 81 | # get sequence length from the _info.txt file 82 | nframes = int(open(os.path.join(seq_dir, '_info.txt')).readlines()[-1].split()[-1]) 83 | 84 | views = list() 85 | for i in range(nframes): 86 | frame = os.path.join(seq_dir, 'frame-{:06d}'.format(i)) 87 | pose_file = os.path.join(seq_dir, frame + '.pose.txt') 88 | pose = np.loadtxt(open(pose_file, 'r')) 89 | 90 | # do not use any frame with inf pose 91 | if np.isinf(np.sum(pose)): 92 | print(frame) 93 | continue 94 | views.append({'img': frame + '.color.jpg', 95 | 'dpt': frame + '.merged_depth.png', 96 | 'frame_id': i, 97 | 'pose': pose}) 98 | 99 | # export trajectory file 100 | with open(sync_traj_file, 'wb') as output: 101 | pickle.dump(views, output) 102 | 103 | else: 104 | with open(sync_traj_file, 'rb') as p: 105 | views = pickle.load(p) 106 | 107 | scans.append(views) 108 | t.set_postfix({'seq': seq}) 109 | t.update() 110 | return scans 111 | 112 | def __load_train_val(self, root, category): 113 | scans = self.__read_scans(root) 114 | 115 | for scene in scans: 116 | total_num = len(scene) 117 | # the ratio to split the train & validation set 118 | if category == 'train': 119 | start_idx, end_idx = 0, int(self.subset_train * total_num) 120 | else: 121 | start_idx, end_idx = int((1-self.subset_val) * total_num), total_num 122 | 123 | images = [scene[idx]['img'] for idx in range(start_idx, end_idx)] 124 | depths = [scene[idx]['dpt'] for idx in range(start_idx, end_idx)] 125 | extrin = [scene[idx]['pose'] for idx in range(start_idx, end_idx)] 126 | # fake timestamp with frame id 127 | frame_id = [scene[idx]['frame_id'] for idx in range(start_idx, end_idx)] 128 | seq_name = osp.basename(osp.dirname(images[0])) 129 | calib = [self.cam['fx'], self.cam['fy'], self.cam['ux'], self.cam['uy']] 130 | 131 | self.calib.append(calib) 132 | self.image_seq.append(images) 133 | self.depth_seq.append(depths) 134 | self.timestamp.append(frame_id) 135 | self.cam_pose_seq.append(extrin) 136 | self.seq_names.append(seq_name) 137 | self.ids += max(0, len(images) - max(self.keyframes)) 138 | self.seq_acc_ids.append(self.ids) 139 | 140 | def __load_test(self, root, select_traj=None): 141 | """ Note: 142 | The test trajectory is loaded slightly different from the train/validation trajectory. 143 | We only select keyframes from the entire trajectory, rather than use every individual frame. 144 | For a given trajectory of length N, using key-frame 2, the train/validation set will use 145 | [[1, 3], [2, 4], [3, 5],...[N-1, N]], 146 | while test set will use pair 147 | [[1, 3], [3, 5], [5, 7],...[N-1, N]] 148 | This difference result in a change in the trajectory length when using different keyframes. 149 | 150 | The benefit of sampling keyframes of the test set is that the output is a more reasonable trajectory; 151 | And in training/validation, we fully leverage every pair of image. 152 | """ 153 | 154 | assert (len(self.keyframes) == 1) 155 | scans = self.__read_scans(root) 156 | kf = self.keyframes[0] 157 | self.keyframes = [1] 158 | 159 | for scene in scans: 160 | seq_name = osp.basename(osp.dirname(scene[0]['img'])) 161 | if select_traj is not None: 162 | if seq_name != select_traj: continue 163 | 164 | calib = [self.cam['fx'], self.cam['fy'], self.cam['ux'], self.cam['uy']] 165 | self.calib.append(calib) 166 | 167 | total_num = len(scene) 168 | images = [scene[idx]['img'] for idx in range(0, total_num, kf)] 169 | depths = [scene[idx]['dpt'] for idx in range(0, total_num, kf)] 170 | extrin = [scene[idx]['pose'] for idx in range(0, total_num, kf)] 171 | 172 | # fake timestamp with frame id 173 | timestamp = [scene[idx]['frame_id'] for idx in range(0, total_num, kf)] 174 | self.image_seq.append(images) 175 | self.timestamp.append(timestamp) 176 | self.depth_seq.append(depths) 177 | self.cam_pose_seq.append(extrin) 178 | self.seq_names.append(seq_name) 179 | self.ids += max(0, len(images) - 1) 180 | self.seq_acc_ids.append(self.ids) 181 | 182 | if len(self.image_seq) == 0: 183 | raise Exception("The specified trajectory is not in the test set nor supported.") 184 | 185 | def __getitem__(self, index): 186 | seq_idx = max(np.searchsorted(self.seq_acc_ids, index + 1) - 1, 0) 187 | frame_idx = index - self.seq_acc_ids[seq_idx] 188 | 189 | this_idx = frame_idx 190 | next_idx = frame_idx + random.choice(self.keyframes) 191 | 192 | # if the next random keyframe is too far 193 | if self.timestamp[seq_idx][next_idx] - self.timestamp[seq_idx][this_idx] > max(self.keyframes): 194 | search_keyframes = self.keyframes[::-1] + [-kf for kf in self.keyframes] 195 | inf_pose_issue = True 196 | print("search:", self.timestamp[seq_idx][this_idx]) 197 | for keyframe in search_keyframes: 198 | next_idx = frame_idx + keyframe 199 | if abs(self.timestamp[seq_idx][next_idx] - self.timestamp[seq_idx][this_idx]) <= max(self.keyframes): 200 | inf_pose_issue = False 201 | break 202 | if inf_pose_issue: 203 | next_idx = frame_idx + 1 204 | print("#invalid frame:", self.image_seq[seq_idx][this_idx]) 205 | # raise ValueError 206 | color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx]) 207 | color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx]) 208 | depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx]) 209 | depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx]) 210 | 211 | if self.transforms: 212 | color0, color1 = self.transforms([color0, color1]) 213 | 214 | # normalize the coordinate 215 | calib = np.asarray(self.calib[seq_idx], dtype=np.float32) 216 | calib[0] *= self.fx_s 217 | calib[1] *= self.fy_s 218 | calib[2] *= self.fx_s 219 | calib[3] *= self.fy_s 220 | 221 | cam_pose0 = self.cam_pose_seq[seq_idx][this_idx] 222 | cam_pose1 = self.cam_pose_seq[seq_idx][next_idx] 223 | transform = np.dot(np.linalg.inv(cam_pose1), cam_pose0).astype(np.float32) 224 | 225 | name = {'seq': self.seq_names[seq_idx], 226 | 'frame0': this_idx, 227 | 'frame1': next_idx} 228 | 229 | # camera_info = dict() 230 | camera_info = {"height": color0.shape[0], 231 | "width": color0.shape[1], 232 | "fx": calib[0], 233 | "fy": calib[1], 234 | "ux": calib[2], 235 | "uy": calib[3]} 236 | 237 | return color0, color1, depth0, depth1, transform, calib, name, camera_info 238 | 239 | def __len__(self): 240 | return self.ids 241 | 242 | def __load_rgb_tensor(self, path): 243 | image = imread(path)[:, :, :3] 244 | image = image.astype(np.float32) / 255.0 245 | image = resize(image, None, fx=self.fx_s, fy=self.fy_s) 246 | return image 247 | 248 | def __load_depth_tensor(self, path): 249 | """ Load the depth: 250 | The depth images are scaled by a factor of 5000, i.e., a pixel 251 | value of 5000 in the depth image corresponds to a distance of 252 | 1 meter from the camera, 10000 to 2 meter distance, etc. 253 | A pixel value of 0 means missing value/no data. 254 | """ 255 | depth = imread(path).astype(np.float32) * self.depth_conversion 256 | depth = resize(depth, None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST) 257 | if self.truncate_depth: 258 | depth = np.clip(depth, a_min=0.5, a_max=5.0) # the accurate range of kinect depth 259 | return depth[np.newaxis, :] -------------------------------------------------------------------------------- /code/data/SimpleLoader.py: -------------------------------------------------------------------------------- 1 | """ 2 | This Simple loader partially refers to 3 | https://github.com/NVlabs/learningrigidity/blob/master/SimpleLoader.py 4 | # SPDX-FileCopyrightText: 2021 Binbin Xu 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | @author: Zhaoyang Lv 8 | @date: May, 2019 9 | """ 10 | 11 | import sys, os, random 12 | import torch.utils.data as data 13 | import os.path as osp 14 | 15 | import numpy as np 16 | 17 | from imageio import imread 18 | 19 | class SimpleLoader(data.Dataset): 20 | 21 | def __init__(self, color_dir, depth_dir, K): 22 | """ 23 | :param the directory of color images 24 | :param the directory of depth images 25 | :param the intrinsic parameter [fx, fy, cx, cy] 26 | """ 27 | 28 | print('This simple loader is designed for TUM. \n\ 29 | The depth scale may be different in your depth format. ') 30 | 31 | color_files = sorted(os.listdir(color_dir)) 32 | depth_files = sorted(os.listdir(depth_dir)) 33 | 34 | # please ensure the two folders use the same number of synchronized files 35 | assert(len(color_files) == len(depth_files)) 36 | 37 | self.color_pairs = [] 38 | self.depth_pairs = [] 39 | self.ids = len(color_files) - 1 40 | for idx in range(self.ids): 41 | self.color_pairs.append([ 42 | osp.join(color_dir, color_files[idx]), 43 | osp.join(color_dir, color_files[idx+1]) 44 | ] ) 45 | self.depth_pairs.append([ 46 | osp.join(depth_dir, depth_files[idx]), 47 | osp.join(depth_dir, depth_files[idx+1]) 48 | ] ) 49 | 50 | self.K = K 51 | 52 | def __getitem__(self, index): 53 | 54 | image0_path, image1_path = self.color_pairs[index] 55 | depth0_path, depth1_path = self.depth_pairs[index] 56 | 57 | image0 = self.__load_rgb_tensor(image0_path) 58 | image1 = self.__load_rgb_tensor(image1_path) 59 | 60 | depth0 = self.__load_depth_tensor(depth0_path) 61 | depth1 = self.__load_depth_tensor(depth1_path) 62 | 63 | calib = np.asarray(self.K, dtype=np.float32) 64 | 65 | return image0, image1, depth0, depth1, calib 66 | 67 | def __len__(self): 68 | return self.ids 69 | 70 | def __load_rgb_tensor(self, path): 71 | image = imread(path) 72 | image = image.astype(np.float32) / 255.0 73 | image = np.transpose(image, (2,0,1)) 74 | return np.asarray(image.astype(np.float32)) 75 | 76 | def __load_depth_tensor(self, path): 77 | assert(path.endswith('.png')) 78 | depth = imread(path).astype(np.float32) / 5e3 79 | depth = np.clip(depth, a_min=0.5, a_max=5.0) 80 | 81 | return np.asarray(depth[np.newaxis, :]) -------------------------------------------------------------------------------- /code/data/VaryLighting.py: -------------------------------------------------------------------------------- 1 | """ The dataloader for custom dataset 2 | # SPDX-FileCopyrightText: 2021 Binbin Xu 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | """ 5 | 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | from __future__ import unicode_literals 10 | 11 | import sys, os, random 12 | 13 | import os.path as osp 14 | import torch.utils.data as data 15 | 16 | from imageio import imread 17 | from cv2 import resize, INTER_NEAREST 18 | 19 | import os.path 20 | import glob 21 | 22 | # import third party 23 | import numpy as np 24 | import random 25 | 26 | 27 | def get_depth_from_corresponding_rgb(rgb_file_abs): 28 | rgb_dir, rgb_file_rel = os.path.split(rgb_file_abs) 29 | depth_dir = rgb_dir.replace("rgb", "depth") 30 | depth_file = os.path.join(depth_dir, rgb_file_rel) 31 | return depth_file 32 | 33 | 34 | class VaryLighting(data.Dataset): 35 | 36 | """ 37 | Dataset class for our varying lighting dataset 38 | """ 39 | base = 'data' 40 | IMAGE_HEIGHT = 480 41 | IMAGE_WIDTH = 640 42 | DEPTH_SCALE = 1.0/1000.0 43 | K = [525.0, 525.0, 319.5, 239.5] 44 | 45 | def __init__(self, root='', category='keyframe', 46 | keyframes=[1,], data_transform=None, select_traj=None, 47 | image_resize=0.25, truncate_depth=True, pair='incremental', 48 | ): 49 | 50 | super(VaryLighting, self).__init__() 51 | assert pair in ['incremental', 'keyframe'] 52 | self.pair = pair 53 | 54 | self.image_seq = [] # list (seq) of list (frame) of string (rgb image path) 55 | self.timestamp = [] # empty 56 | self.depth_seq = [] # list (seq) of list (frame) of string (depth image path) 57 | self.invalid_seq = [] # empty 58 | self.cam_pose_seq = [] # list (seq) of list (frame) of 4 X 4 ndarray 59 | self.calib = [] # list (seq) of list (intrinsics: fx, fy, cx, cy) 60 | self.seq_names = [] # list (seq) of string (seq name) 61 | 62 | self.ids = 0 63 | self.seq_acc_ids = [0] 64 | self.keyframes = keyframes 65 | 66 | self.transforms = data_transform 67 | 68 | if category == 'test': 69 | # self.set_test_mode() 70 | self.__load_test(root, select_traj) 71 | elif category in ['train', 'validation']: # train and validation 72 | raise NotImplementedError() 73 | elif category == 'kf': 74 | self.__load_kf(root, select_traj) 75 | else: 76 | raise NotImplementedError 77 | 78 | # downscale the input image to a quarter 79 | self.fx_s = image_resize 80 | self.fy_s = image_resize 81 | self.truncate_depth = truncate_depth 82 | 83 | print('Vary Lighting dataloader for {:} using keyframe {:}: \ 84 | {:} valid frames'.format(category, keyframes, self.ids)) 85 | 86 | 87 | def __load_test(self, root, select_traj=None): 88 | """ Note: 89 | The test trajectory is loaded slightly different from the train/validation trajectory. 90 | We only select keyframes from the entire trajectory, rather than use every individual frame. 91 | For a given trajectory of length N, using key-frame 2, the train/validation set will use 92 | [[1, 3], [2, 4], [3, 5],...[N-1, N]], 93 | while test set will use pair 94 | [[1, 3], [3, 5], [5, 7],...[N-1, N]] 95 | This difference result in a change in the trajectory length when using different keyframes. 96 | 97 | The benefit of sampling keyframes of the test set is that the output is a more reasonable trajectory; 98 | And in training/validation, we fully leverage every pair of image. 99 | """ 100 | 101 | assert(len(self.keyframes) == 1) 102 | kf = self.keyframes[0] 103 | self.keyframes = [1] 104 | track_scene = osp.join(root, "*/") 105 | scene_lists = glob.glob(track_scene, recursive=True) 106 | 107 | self._num_scenes = len(scene_lists) 108 | if self._num_scenes is None: 109 | raise ValueError("No sub-folder data in the training or validation dataset") 110 | for scene in scene_lists: 111 | scene_name = osp.basename(osp.dirname(scene)) 112 | if select_traj is not None: 113 | if scene_name != select_traj: continue 114 | 115 | rgb_images_regex = os.path.join(scene, "rgb/*.png") 116 | all_rgb_images_in_scene = sorted(glob.glob(rgb_images_regex)) 117 | total_num = len(all_rgb_images_in_scene) 118 | 119 | self.calib.append(self.K) 120 | 121 | 122 | images = [all_rgb_images_in_scene[idx] for idx in range(0, total_num, kf)] 123 | # fake timestamps 124 | timestamp = [os.path.splitext(os.path.basename(image))[0] for image in images] 125 | depths = [get_depth_from_corresponding_rgb(rgb_file) for rgb_file in images] 126 | extrin = [None] * len(images) # [tq2mat(frames[idx][0]) for idx in range(0, total_num, kf)] 127 | self.image_seq.append(images) 128 | self.timestamp.append(timestamp) 129 | self.depth_seq.append(depths) 130 | self.cam_pose_seq.append(extrin) 131 | self.seq_names.append(scene) 132 | self.ids += max(0, len(images)-1) 133 | self.seq_acc_ids.append(self.ids) 134 | 135 | def __load_rgb_tensor(self, path): 136 | """ Load the rgb image 137 | """ 138 | image = imread(path)[:, :, :3] 139 | image = image.astype(np.float32) / 255.0 140 | image = resize(image, None, fx=self.fx_s, fy=self.fy_s) 141 | return image 142 | 143 | def __load_depth_tensor(self, path): 144 | """ Load the depth: 145 | The depth images are scaled by a factor of 5000, i.e., a pixel 146 | value of 5000 in the depth image corresponds to a distance of 147 | 1 meter from the camera, 10000 to 2 meter distance, etc. 148 | A pixel value of 0 means missing value/no data. 149 | """ 150 | depth = imread(path).astype(np.float32) / 5e3 151 | depth = resize(depth, None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST) 152 | if self.truncate_depth: 153 | depth = np.clip(depth, a_min=0.5, a_max=5.0) # the accurate range of kinect depth 154 | return depth[np.newaxis, :] 155 | 156 | def __getitem__(self, index): 157 | # pair in the way like [[1, 3], [3, 5], [5, 7],...[N-1, N]] 158 | seq_idx = max(np.searchsorted(self.seq_acc_ids, index + 1) - 1, 0) 159 | frame_idx = index - self.seq_acc_ids[seq_idx] 160 | 161 | this_idx = frame_idx 162 | next_idx = frame_idx + random.choice(self.keyframes) 163 | 164 | color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx]) 165 | color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx]) 166 | 167 | depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx]) 168 | depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx]) 169 | 170 | if self.transforms: 171 | color0, color1 = self.transforms([color0, color1]) 172 | 173 | # normalize the coordinate 174 | calib = np.asarray(self.calib[seq_idx], dtype=np.float32) 175 | calib[0] *= self.fx_s 176 | calib[1] *= self.fy_s 177 | calib[2] *= self.fx_s 178 | calib[3] *= self.fy_s 179 | 180 | # cam_pose0 = self.cam_pose_seq[seq_idx][this_idx] 181 | # cam_pose1 = self.cam_pose_seq[seq_idx][next_idx] 182 | # transform = np.dot(np.linalg.inv(cam_pose1), cam_pose0).astype(np.float32) 183 | transform = None 184 | 185 | name = {'seq': self.seq_names[seq_idx], 186 | 'frame0': this_idx, 187 | 'frame1': next_idx} 188 | 189 | # camera_info = dict() 190 | camera_info = {"height": color0.shape[0], 191 | "width": color0.shape[1], 192 | "fx": calib[0], 193 | "fy": calib[1], 194 | "ux": calib[2], 195 | "uy": calib[3]} 196 | return color0, color1, depth0, depth1, transform, calib, name, camera_info 197 | 198 | 199 | def get_keypair(self, index, kf_idx=0): 200 | # pair in the way like [[1, 3], [1, 5], [1, 7],...[1, N]] 201 | seq_idx = max(np.searchsorted(self.seq_acc_ids, index + 1) - 1, 0) 202 | frame_idx = index - self.seq_acc_ids[seq_idx] 203 | 204 | this_idx = kf_idx 205 | next_idx = frame_idx 206 | 207 | color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx]) 208 | color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx]) 209 | 210 | depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx]) 211 | depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx]) 212 | 213 | if self.transforms: 214 | color0, color1 = self.transforms([color0, color1]) 215 | 216 | # normalize the coordinate 217 | calib = np.asarray(self.calib[seq_idx], dtype=np.float32) 218 | calib[0] *= self.fx_s 219 | calib[1] *= self.fy_s 220 | calib[2] *= self.fx_s 221 | calib[3] *= self.fy_s 222 | 223 | # cam_pose0 = self.cam_pose_seq[seq_idx][this_idx] 224 | # cam_pose1 = self.cam_pose_seq[seq_idx][next_idx] 225 | # transform = np.dot(np.linalg.inv(cam_pose1), cam_pose0).astype(np.float32) 226 | transform = None 227 | 228 | name = {'seq': self.seq_names[seq_idx], 229 | 'frame0': this_idx, 230 | 'frame1': next_idx} 231 | 232 | # camera_info = dict() 233 | camera_info = {"height": color0.shape[0], 234 | "width": color0.shape[1], 235 | "fx": calib[0], 236 | "fy": calib[1], 237 | "ux": calib[2], 238 | "uy": calib[3]} 239 | return color0, color1, depth0, depth1, transform, calib, name, camera_info 240 | 241 | def __len__(self): 242 | return self.ids -------------------------------------------------------------------------------- /code/data/data_examples/TUM/color/1305031790.645155.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.645155.png -------------------------------------------------------------------------------- /code/data/data_examples/TUM/color/1305031790.713097.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.713097.png -------------------------------------------------------------------------------- /code/data/data_examples/TUM/color/1305031790.781258.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.781258.png -------------------------------------------------------------------------------- /code/data/data_examples/TUM/color/1305031790.845151.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.845151.png -------------------------------------------------------------------------------- /code/data/data_examples/TUM/color/1305031790.913129.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.913129.png -------------------------------------------------------------------------------- /code/data/data_examples/TUM/depth/1305031790.640468.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.640468.png -------------------------------------------------------------------------------- /code/data/data_examples/TUM/depth/1305031790.709421.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.709421.png -------------------------------------------------------------------------------- /code/data/data_examples/TUM/depth/1305031790.773548.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.773548.png -------------------------------------------------------------------------------- /code/data/data_examples/TUM/depth/1305031790.839363.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.839363.png -------------------------------------------------------------------------------- /code/data/data_examples/TUM/depth/1305031790.909436.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.909436.png -------------------------------------------------------------------------------- /code/data/dataloader.py: -------------------------------------------------------------------------------- 1 | """ The dataloaders for training and evaluation 2 | # SPDX-FileCopyrightText: 2021 Binbin Xu 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | 5 | @author: Zhaoyang Lv 6 | @date: March 2019 7 | """ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | from __future__ import unicode_literals 13 | 14 | import torchvision.transforms as transforms 15 | import numpy as np 16 | import os 17 | import socket 18 | import yaml 19 | try: 20 | # use faster C loader if available 21 | from yaml import CLoader 22 | except ImportError: 23 | from yaml import Loader as CLoader 24 | 25 | 26 | def get_datasets_path(which_dataset): 27 | # utils_path = os.getcwd().split('/') 28 | # print(utils_path) 29 | # source_folder = '/'.join(utils_path[:-2]) 30 | # print(source_folder) 31 | # return source_folder 32 | curr_path = os.path.realpath(__file__) 33 | env_file_path = os.path.realpath(os.path.join(curr_path, '../../../setup/datasets.yaml')) 34 | hostname = str(socket.gethostname()) 35 | env_config = yaml.load(open(env_file_path), Loader=CLoader) 36 | return env_config[which_dataset][hostname]['dataset_root'] 37 | 38 | TUM_DATASET_DIR = get_datasets_path('TUM_RGBD') 39 | MOVING_OBJECTS_3D = get_datasets_path('MOVING_OBJECTS_3D') 40 | ScanNet_DATASET_DIR = get_datasets_path('SCANNET') 41 | VL_DATASET_DIR = get_datasets_path('VaryLighting') 42 | 43 | def load_data(dataset_name, keyframes = None, load_type = 'train', 44 | select_trajectory = '', load_numpy = False, image_resize=0.25, truncate_depth=True, 45 | options=None, pair='incremental'): 46 | """ Use two frame camera pose data loader 47 | """ 48 | if select_trajectory == '': 49 | select_trajectory = None 50 | 51 | if not load_numpy: 52 | if load_type == 'train': 53 | data_transform = image_transforms(['color_augment', 'numpy2torch']) 54 | else: 55 | data_transform = image_transforms(['numpy2torch']) 56 | else: 57 | data_transform = image_transforms([]) 58 | 59 | if dataset_name == 'TUM_RGBD': 60 | from data.TUM_RGBD import TUM 61 | np_loader = TUM(TUM_DATASET_DIR, load_type, keyframes, 62 | data_transform, select_trajectory, 63 | image_resize=image_resize, 64 | truncate_depth=truncate_depth, 65 | add_vl_dataset=options.add_vl_dataset, 66 | ) 67 | elif dataset_name == 'ScanNet': 68 | from data.ScanNet import ScanNet 69 | np_loader = ScanNet(ScanNet_DATASET_DIR, load_type, keyframes, 70 | data_transform, select_trajectory, 71 | image_resize=image_resize, 72 | truncate_depth=truncate_depth, 73 | subset_train=options.scannet_subset_train, 74 | subset_val=options.scannet_subset_val, 75 | ) 76 | elif dataset_name == 'MovingObjects3D': 77 | from data.MovingObj3D import MovingObjects3D 78 | np_loader = MovingObjects3D(MOVING_OBJECTS_3D, load_type, 79 | keyframes, data_transform, 80 | category=select_trajectory, 81 | image_resize=image_resize, 82 | ) 83 | # elif dataset_name == 'BundleFusion': 84 | # from data.BundleFusion import BundleFusion 85 | # np_loader = BundleFusion(load_type, keyframes, data_transform) 86 | # elif dataset_name == 'Refresh': 87 | # from data.REFRESH import REFRESH 88 | # np_loader = REFRESH(load_type, keyframes) 89 | elif dataset_name == 'VaryLighting': 90 | from data.VaryLighting import VaryLighting 91 | np_loader = VaryLighting(VL_DATASET_DIR, load_type, keyframes, 92 | data_transform, select_trajectory, 93 | pair=pair, 94 | image_resize=image_resize, 95 | truncate_depth=truncate_depth, 96 | ) 97 | else: 98 | raise NotImplementedError() 99 | 100 | return np_loader 101 | 102 | def image_transforms(options): 103 | 104 | transform_list = [] 105 | 106 | if 'color_augment' in options: 107 | augment_parameters = [0.9, 1.1, 0.9, 1.1, 0.9, 1.1] 108 | transform_list.append(AugmentImages(augment_parameters)) 109 | 110 | if 'numpy2torch' in options: 111 | transform_list.append(ToTensor()) 112 | 113 | # if 'color_normalize' in options: # we do it on the fly 114 | # transform_list.append(ColorNormalize()) 115 | 116 | return transforms.Compose(transform_list) 117 | 118 | class ColorNormalize(object): 119 | 120 | def __init__(self): 121 | rgb_mean = (0.4914, 0.4822, 0.4465) 122 | rgb_std = (0.2023, 0.1994, 0.2010) 123 | self.transform = transforms.Normalize(mean=rgb_mean, std=rgb_std) 124 | 125 | def __call__(self, sample): 126 | return [self.transform(x) for x in sample] 127 | 128 | class ToTensor(object): 129 | def __init__(self): 130 | self.transform = transforms.ToTensor() 131 | 132 | def __call__(self, sample): 133 | return [self.transform(x) for x in sample] 134 | 135 | class AugmentImages(object): 136 | def __init__(self, augment_parameters): 137 | self.gamma_low = augment_parameters[0] # 0.9 138 | self.gamma_high = augment_parameters[1] # 1.1 139 | self.brightness_low = augment_parameters[2] # 0.9 140 | self.brightness_high = augment_parameters[3] # 1,1 141 | self.color_low = augment_parameters[4] # 0.9 142 | self.color_high = augment_parameters[5] # 1.1 143 | 144 | self.thresh = 0.5 145 | 146 | def __call__(self, sample): 147 | p = np.random.uniform(0, 1, 1) 148 | if p > self.thresh: 149 | random_gamma = np.random.uniform(self.gamma_low, self.gamma_high) 150 | random_brightness = np.random.uniform(self.brightness_low, self.brightness_high) 151 | random_colors = np.random.uniform(self.color_low, self.color_high, 3) 152 | for x in sample: 153 | x = x ** random_gamma # randomly shift gamma 154 | x = x * random_brightness # randomly shift brightness 155 | for i in range(3): # randomly shift color 156 | x[:, :, i] *= random_colors[i] 157 | x[:, :, i] *= random_colors[i] 158 | x = np.clip(x, a_min=0, a_max=1) # saturate 159 | return sample 160 | else: 161 | return sample 162 | -------------------------------------------------------------------------------- /code/evaluate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluation scripts to evaluate the tracking accuracy of the proposed method 3 | 4 | # SPDX-FileCopyrightText: 2021 Binbin Xu 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | @author: Zhaoyang Lv 8 | @date: March 2019 9 | """ 10 | 11 | import os, sys, argparse, pickle 12 | import os.path as osp 13 | import numpy as np 14 | import pandas as pd 15 | from tools.rgbd_odometry import RGBDOdometry 16 | from tools.ICP import ICP_Odometry 17 | 18 | import torch 19 | import torch.utils.data as data 20 | import torchvision.utils as torch_utils 21 | import torch.nn as nn 22 | 23 | import models.LeastSquareTracking as ICtracking 24 | import models.criterions as criterions 25 | import train_utils 26 | import config 27 | from Logger import check_directory 28 | 29 | from data.dataloader import load_data 30 | from timers import Timers 31 | from tqdm import tqdm 32 | 33 | 34 | def eval_trajectories(dataset): 35 | if dataset == 'TUM_RGBD': 36 | return { 37 | 'TUM_RGBD': ['rgbd_dataset_freiburg1_360', 38 | 'rgbd_dataset_freiburg1_desk', 39 | 'rgbd_dataset_freiburg2_desk', 40 | 'rgbd_dataset_freiburg2_pioneer_360'] 41 | }[dataset] 42 | elif dataset == 'MovingObjects3D': 43 | return { 44 | 'MovingObjects3D': ['boat', 45 | 'motorbike', 46 | ] 47 | }[dataset] 48 | elif dataset == 'ScanNet': 49 | return { 50 | 'ScanNet': ['scene0565_00', 51 | 'scene0011_00', 52 | ] 53 | }[dataset] 54 | else: 55 | raise NotImplementedError() 56 | 57 | 58 | def nostructure_trajectory(dataset): 59 | if dataset == 'TUM_RGBD': 60 | return { 61 | 'TUM_RGBD': ['rgbd_dataset_freiburg3_nostructure_notexture_far', 62 | 'rgbd_dataset_freiburg3_nostructure_notexture_near_withloop', 63 | 'rgbd_dataset_freiburg3_nostructure_texture_far', 64 | 'rgbd_dataset_freiburg3_nostructure_texture_near_withloop'] 65 | }[dataset] 66 | else: 67 | raise NotImplementedError() 68 | 69 | 70 | def notexture_trajectory(dataset): 71 | if dataset == 'TUM_RGBD': 72 | return { 73 | 'TUM_RGBD': ['rgbd_dataset_freiburg3_nostructure_notexture_far', 74 | 'rgbd_dataset_freiburg3_nostructure_notexture_near_withloop', 75 | 'rgbd_dataset_freiburg3_structure_notexture_near', 76 | ] 77 | }[dataset] 78 | else: 79 | raise NotImplementedError() 80 | 81 | 82 | def structure_texture_trajectory(dataset): 83 | if dataset == 'TUM_RGBD': 84 | return { 85 | 'TUM_RGBD': ['rgbd_dataset_freiburg3_structure_texture_far', 86 | 'rgbd_dataset_freiburg3_structure_texture_near',] 87 | }[dataset] 88 | else: 89 | raise NotImplementedError() 90 | 91 | 92 | def create_eval_loaders(options, eval_type, keyframes, 93 | total_batch_size = 8, 94 | trajectory = ''): 95 | """ create the evaluation loader at different keyframes set-up 96 | """ 97 | eval_loaders = {} 98 | 99 | if trajectory == '': 100 | trajectories = eval_trajectories(options.dataset) 101 | elif trajectory == 'nostructure': 102 | trajectories = nostructure_trajectory(options.dataset) 103 | elif trajectory == 'notexture': 104 | trajectories = notexture_trajectory(options.dataset) 105 | elif trajectory == 'structure_texture': 106 | trajectories = structure_texture_trajectory(options.dataset) 107 | else: 108 | trajectories = [trajectory] 109 | 110 | for trajectory in trajectories: 111 | for kf in keyframes: 112 | if options.image_resize is not None: 113 | np_loader = load_data(options.dataset, [kf], eval_type, trajectory, 114 | image_resize=options.image_resize, options=options) 115 | else: 116 | np_loader = load_data(options.dataset, [kf], eval_type, trajectory, options=options) 117 | eval_loaders['{:}_keyframe_{:}'.format(trajectory, kf)] = data.DataLoader(np_loader, 118 | batch_size = int(total_batch_size), 119 | shuffle = False, num_workers = options.cpu_workers) 120 | 121 | return eval_loaders 122 | 123 | def evaluate_trust_region(dataloader, net, objectives, eval_name='', 124 | known_mask = False, timers = None, logger=None, epoch=0, obj_only=False, tracker='learning_based'): 125 | """ evaluate the trust-region method given the two-frame pose estimation 126 | :param the pytorch dataloader 127 | :param the network 128 | :param the evaluation objective names, e.g. RPE, EPE3D 129 | :param True if ground mask if known 130 | :param (optional) timing each step 131 | """ 132 | 133 | progress = tqdm(dataloader, ncols=100, 134 | desc = 'evaluate deeper inverse compositional algorithm {:}'.format(eval_name), 135 | total= len(dataloader)) 136 | 137 | if tracker == 'learning_based': 138 | net.eval() 139 | 140 | total_frames = len(dataloader.dataset) 141 | 142 | outputs = { 143 | 'R_est': np.zeros((total_frames, 3, 3)), 144 | 't_est': np.zeros((total_frames, 3)), 145 | 'names': [] 146 | } 147 | flow_loss, rpe_loss = None, None 148 | if 'EPE3D' in objectives: 149 | flow_loss = criterions.compute_RT_EPE_loss 150 | outputs['epes'] = np.zeros(total_frames) 151 | if 'RPE' in objectives: 152 | rpe_loss = criterions.compute_RPE_loss 153 | outputs['angular_error'] = np.zeros(total_frames) 154 | outputs['translation_error'] = np.zeros(total_frames) 155 | 156 | count_base = 0 157 | 158 | if timers: timers.tic('one iteration') 159 | 160 | count = 1 161 | for idx, batch in enumerate(progress): 162 | 163 | if timers: timers.tic('forward step') 164 | 165 | names = batch[-1] 166 | 167 | if known_mask: # for dataset that with mask or need mask 168 | color0, color1, depth0, depth1, Rt, K, obj_mask0, obj_mask1 = \ 169 | train_utils.check_cuda(batch[:8]) 170 | else: 171 | color0, color1, depth0, depth1, Rt, K = \ 172 | train_utils.check_cuda(batch[:6]) 173 | obj_mask0, obj_mask1 = None, None 174 | 175 | B, _, H, W = depth0.shape 176 | iter = epoch * total_frames + count_base 177 | with torch.no_grad(): 178 | if tracker == 'learning_based': 179 | if obj_only: 180 | output = net.forward(color0, color1, depth0, depth1, K, 181 | obj_mask0=obj_mask0, obj_mask1=obj_mask1, 182 | logger=logger, iteration=iter) 183 | else: 184 | output = net.forward(color0, color1, depth0, depth1, K, 185 | logger=logger, iteration=iter) 186 | elif options.tracker in ['ColorICP', 'ICP', 'RGBD']: 187 | if obj_only: 188 | output = net.batch_track(color0, depth0, color1, depth1, K, 189 | batch_objmask0=obj_mask0, batch_objmask1=obj_mask1) 190 | else: 191 | output = net.batch_track(color0, depth0, color1, depth1, K) 192 | else: 193 | raise NotImplementedError("unsupported test tracker: check argument of --tracker again") 194 | R, t = output 195 | 196 | if timers: timers.toc('forward step') 197 | 198 | outputs['R_est'][count_base:count_base+B] = R.cpu().numpy() 199 | outputs['t_est'][count_base:count_base+B] = t.cpu().numpy() 200 | 201 | if timers: timers.tic('evaluate') 202 | R_gt, t_gt = Rt[:,:3,:3], Rt[:,:3,3] 203 | if rpe_loss: # evaluate the relative pose error 204 | angle_error, trans_error = rpe_loss(R, t, R_gt, t_gt) 205 | outputs['angular_error'][count_base:count_base+B] = angle_error.cpu().numpy() 206 | outputs['translation_error'][count_base:count_base+B] = trans_error.cpu().numpy() 207 | 208 | if flow_loss:# evaluate the end-point-error loss 3D 209 | invalid_mask = (depth0 == depth0.min()) | (depth0 == depth0.max()) 210 | if obj_mask0 is not None: 211 | invalid_mask = ~obj_mask0 | invalid_mask 212 | 213 | epes3d = flow_loss(R, t, R_gt, t_gt, depth0, K, invalid=invalid_mask) 214 | outputs['epes'][count_base:count_base+B] = epes3d.cpu().numpy() 215 | 216 | outputs['names'] += names 217 | 218 | count_base += B 219 | 220 | if timers: timers.toc('evaluate') 221 | if timers: timers.toc('one iteration') 222 | if timers: timers.tic('one iteration') 223 | 224 | if timers: timers.print() 225 | 226 | return outputs 227 | 228 | def test_TrustRegion(options): 229 | 230 | if options.time: 231 | timers = Timers() 232 | else: 233 | timers = None 234 | 235 | print('Evaluate test performance with the (deep) direct method.') 236 | 237 | total_batch_size = options.batch_per_gpu * torch.cuda.device_count() 238 | 239 | keyframes = [int(x) for x in options.keyframes.split(',')] 240 | if options.dataset in ['BundleFusion', 'TUM_RGBD']: 241 | obj_has_mask = False 242 | else: 243 | obj_has_mask = True 244 | 245 | eval_loaders = create_eval_loaders(options, options.eval_set, 246 | keyframes, total_batch_size, options.trajectory) 247 | 248 | if options.tracker == 'learning_based': 249 | if options.checkpoint == '': 250 | print('No checkpoint loaded. Use the non-learning method') 251 | net = ICtracking.LeastSquareTracking( 252 | encoder_name = 'RGB', 253 | uncertainty_type=options.uncertainty, 254 | direction=options.direction, 255 | max_iter_per_pyr= options.max_iter_per_pyr, 256 | options=options, 257 | mEst_type = 'None', 258 | solver_type = 'Direct-Nodamping') 259 | if torch.cuda.is_available(): net.cuda() 260 | net.eval() 261 | else: 262 | train_utils.load_checkpoint_test(options) 263 | 264 | net = ICtracking.LeastSquareTracking( 265 | encoder_name = options.encoder_name, 266 | uncertainty_type=options.uncertainty, 267 | direction=options.direction, 268 | max_iter_per_pyr= options.max_iter_per_pyr, 269 | mEst_type = options.mestimator, 270 | options=options, 271 | solver_type = options.solver, 272 | no_weight_sharing = options.no_weight_sharing) 273 | 274 | if torch.cuda.is_available(): net.cuda() 275 | net.eval() 276 | 277 | # check whether it is a single checkpoint or a directory 278 | net.load_state_dict(torch.load(options.checkpoint)['state_dict']) 279 | elif options.tracker == 'ICP': 280 | icp_tracker = ICP_Odometry('Point2Plane') 281 | net = icp_tracker 282 | elif options.tracker == 'ColorICP': 283 | color_icp_tracker = ICP_Odometry('ColorICP') 284 | net = color_icp_tracker 285 | elif options.tracker == 'RGBD': 286 | rgbd_tracker = RGBDOdometry("RGBD") 287 | net = rgbd_tracker 288 | else: 289 | raise NotImplementedError("unsupported test tracker: check argument of --tracker again") 290 | 291 | eval_objectives = ['EPE3D', 'RPE'] 292 | 293 | output_prefix = '_'.join([ 294 | options.network, 295 | options.encoder_name, 296 | options.mestimator, 297 | options.solver, 298 | 'iter', str(options.max_iter_per_pyr) 299 | ]) 300 | 301 | # evaluate results per trajectory per key-frame 302 | outputs = {} 303 | for k, loader in eval_loaders.items(): 304 | 305 | traj_name, kf = k.split('_keyframe_') 306 | 307 | output_name = '{:}_{:}'.format(output_prefix, k) 308 | info = evaluate_trust_region(loader, net, 309 | eval_objectives, 310 | eval_name = 'tmp/'+output_name, 311 | known_mask=obj_has_mask, 312 | obj_only=options.obj_only, 313 | tracker=options.tracker, 314 | timers=timers, 315 | ) 316 | 317 | # collect results 318 | outputs[k] = pd.Series([info['epes'].mean(), 319 | info['angular_error'].mean(), 320 | info['translation_error'].mean(), 321 | info['epes'].shape[0], int(kf), traj_name], 322 | index=['3D EPE', 'axis error', 'trans error', 'total frames', 'keyframe', 'trajectory']) 323 | 324 | print(outputs[k]) 325 | 326 | checkpoint_name = options.checkpoint.replace('.pth.tar', '') 327 | if checkpoint_name == '': 328 | checkpoint_name = 'nolearning' 329 | if options.tracker in ['ColorICP', 'ICP', 'RGBD']: 330 | checkpoint_name += ('_'+ options.tracker) 331 | output_dir = osp.join(options.eval_set+'_results', checkpoint_name, k) 332 | output_pkl = output_dir + '.pkl' 333 | 334 | check_directory(output_pkl) 335 | 336 | with open(output_pkl, 'wb') as output: # dump per-frame results info 337 | info = info 338 | pickle.dump(info, output) 339 | 340 | """ =============================================================== """ 341 | """ Generate the final evaluation results """ 342 | """ =============================================================== """ 343 | 344 | outputs_pd = pd.DataFrame(outputs).T 345 | outputs_pd['3D EPE'] *= 100 # convert to cm 346 | outputs_pd['axis error'] *= (180/np.pi) # convert to degree 347 | outputs_pd['trans error'] *= 100 # convert to cm 348 | 349 | print(outputs_pd) 350 | 351 | stats_dict = {} 352 | for kf in keyframes: 353 | kf_outputs = outputs_pd[outputs_pd['keyframe']==kf] 354 | 355 | stats_dict['mean values of trajectories keyframe {:}'.format(kf)] = pd.Series( 356 | [kf_outputs['3D EPE'].mean(), 357 | kf_outputs['axis error'].mean(), 358 | kf_outputs['trans error'].mean(), kf], 359 | index=['3D EPE', 'axis error', 'trans error', 'keyframe']) 360 | 361 | total_frames = kf_outputs['total frames'].sum() 362 | stats_dict['mean values of frames keyframe {:}'.format(kf)] = pd.Series( 363 | [(kf_outputs['3D EPE'] * kf_outputs['total frames']).sum() / total_frames, 364 | (kf_outputs['axis error'] * kf_outputs['total frames']).sum() / total_frames, 365 | (kf_outputs['trans error']* kf_outputs['total frames']).sum() / total_frames, kf], 366 | index=['3D EPE', 'axis error', 'trans error', 'keyframe']) 367 | 368 | stats_pd = pd.DataFrame(stats_dict).T 369 | print(stats_pd) 370 | 371 | final_pd = outputs_pd.append(stats_pd, sort=False) 372 | final_pd.to_csv('{:}.csv'.format(output_dir)) 373 | 374 | return outputs_pd 375 | 376 | if __name__ == '__main__': 377 | 378 | parser = argparse.ArgumentParser(description="Evaluate the network") 379 | config.add_basics_config(parser) 380 | config.add_test_basics_config(parser) 381 | config.add_tracking_config(parser) 382 | 383 | options = parser.parse_args() 384 | 385 | print('---------------------------------------') 386 | 387 | outputs = test_TrustRegion(options) 388 | 389 | -------------------------------------------------------------------------------- /code/experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/experiments/__init__.py -------------------------------------------------------------------------------- /code/experiments/kf_vo.py: -------------------------------------------------------------------------------- 1 | """ Script to run keyframe visual odometry on a sequence of images 2 | using the proposed probablilistic feature-metric trackingmethod. 3 | # SPDX-FileCopyrightText: 2021 Binbin Xu 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | 7 | #!/usr/bin/env python 8 | 9 | # import standard library 10 | import os 11 | import sys 12 | import argparse 13 | import os.path as osp 14 | # import third party 15 | import cv2 16 | from evaluate import create_eval_loaders 17 | 18 | import numpy as np 19 | # opengl/trimesh visualization 20 | import pyglet 21 | import trimesh 22 | import trimesh.viewer as tv 23 | import trimesh.transformations as tf 24 | import torch 25 | from imageio import imread 26 | 27 | import config 28 | from models.geometry import batch_create_transform 29 | from experiments.select_method import select_method 30 | from train_utils import check_cuda 31 | from data.dataloader import load_data 32 | from Logger import check_directory 33 | 34 | 35 | def init_scene(scene): 36 | scene.geometry = {} 37 | scene.graph.clear() 38 | scene.init = True 39 | 40 | # clear poses 41 | scene.gt_poses = [] 42 | scene.est_poses = [] 43 | scene.timestamps = [] 44 | 45 | return scene 46 | 47 | 48 | def camera_transform(transform=None): 49 | if transform is None: 50 | transform = np.eye(4) 51 | return transform @ trimesh.transformations.rotation_matrix( 52 | np.deg2rad(-180), [1, 0, 0] 53 | ) 54 | 55 | 56 | def pointcloud_from_depth( 57 | depth: np.ndarray, 58 | fx: float, 59 | fy: float, 60 | cx: float, 61 | cy: float, 62 | depth_type: str = 'z', 63 | ) -> np.ndarray: 64 | assert depth_type in ['z', 'euclidean'], 'Unexpected depth_type' 65 | assert depth.dtype.kind == 'f', 'depth must be float and have meter values' 66 | 67 | rows, cols = depth.shape 68 | c, r = np.meshgrid(np.arange(cols), np.arange(rows), sparse=True) 69 | valid = ~np.isnan(depth) 70 | z = np.where(valid, depth, np.nan) 71 | x = np.where(valid, z * (c - cx) / fx, np.nan) 72 | y = np.where(valid, z * (r - cy) / fy, np.nan) 73 | pc = np.dstack((x, y, z)) 74 | 75 | if depth_type == 'euclidean': 76 | norm = np.linalg.norm(pc, axis=2) 77 | pc = pc * (z / norm)[:, :, None] 78 | return pc 79 | 80 | 81 | def callback(scene): 82 | if not scene.is_play: 83 | return 84 | 85 | dataset = scene.dataloader 86 | options = scene.options 87 | if scene.index >= len(dataset): 88 | return 89 | 90 | if scene.vo_type == 'incremental': 91 | batch = dataset[scene.index - 1] 92 | else: 93 | batch = dataset.get_keypair(scene.index) 94 | color0, color1, depth0, depth1, GT_Rt, intrins, name = check_cuda( 95 | batch[:7]) 96 | 97 | scene_id = name['seq'] 98 | 99 | # Reset scene for new scene. 100 | if scene_id != scene.video_id: 101 | scene = init_scene(scene) 102 | scene.init_idx = scene.index 103 | scene.video_id = scene_id 104 | else: 105 | scene.init = False 106 | 107 | GT_WC = dataset.cam_pose_seq[0][scene.index] 108 | depth_file = dataset.depth_seq[0][scene.index] 109 | if not options.save_img: 110 | # half resolution 111 | rgb = color1.permute((1, 2, 0)).cpu().numpy() 112 | depth = imread(depth_file).astype(np.float32) / 5e3 113 | depth = cv2.resize(depth, None, fx=dataset.fx_s, 114 | fy=dataset.fy_s, interpolation=cv2.INTER_NEAREST) 115 | K = {"fx": intrins[0].item(), "fy": intrins[1].item(), 116 | "ux": intrins[2].item(), "uy": intrins[3].item()} 117 | else: 118 | # original resolution for demo 119 | rgb = imread(dataset.image_seq[0][scene.index]) 120 | depth = imread(depth_file).astype(np.float32) / 5e3 121 | calib = np.asarray(dataset.calib[0], dtype=np.float32) 122 | K = {"fx": calib[0], "fy": calib[1], 123 | "ux": calib[2], "uy": calib[3]} 124 | 125 | # save input rgb and depth images 126 | img_index_png = str(scene.index).zfill(5)+'.png' 127 | if options.dataset == 'VaryLighting': 128 | output_folder = osp.join(dataset.seq_names[0], 'kf_vo', options.vo) 129 | else: 130 | output_folder = os.path.join( 131 | '/home/binbin/Pictures', 'kf_vo', options.vo) 132 | 133 | rgb_img = osp.join(output_folder, 'rgb', img_index_png) 134 | depth_img = osp.join(output_folder, 'depth', img_index_png) 135 | check_directory(rgb_img) 136 | check_directory(depth_img) 137 | cv2.imwrite(rgb_img, cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)) 138 | cv2.imwrite(depth_img, depth) 139 | 140 | if scene.init: 141 | if GT_WC is not None: 142 | T_WC = GT_WC 143 | else: 144 | T_WC = np.array([ 145 | [0.0, -1.0, 0.0, -0.0], 146 | [-1.0, 0.0, 0.0, 0.0], 147 | [0.0, 0.0, -1.0, 0.0], 148 | [0.0, 0.0, 0.0, 1.0], 149 | ]) 150 | # T_WC = np.eye(4) 151 | scene.T_WC = T_WC 152 | if scene.vo_type == 'keyframe': 153 | scene.T_WK = T_WC 154 | else: 155 | with torch.no_grad(): 156 | color0 = color0.unsqueeze(dim=0) 157 | color1 = color1.unsqueeze(dim=0) 158 | depth0 = depth0.unsqueeze(dim=0) 159 | depth1 = depth1.unsqueeze(dim=0) 160 | intrins = intrins.unsqueeze(dim=0) 161 | if options.save_img: 162 | output = scene.network.forward( 163 | color0, color1, depth0, depth1, intrins, index=scene.index) 164 | else: 165 | output = scene.network.forward( 166 | color0, color1, depth0, depth1, intrins) 167 | R, t = output 168 | if scene.is_gt_tracking: 169 | T_WC = GT_WC 170 | scene.T_WC = T_WC 171 | else: 172 | if scene.vo_type == 'incremental': 173 | T_CR = batch_create_transform(t, R) 174 | # T_CR = GT_Rt 175 | T_CR = T_CR.squeeze(dim=0).cpu().numpy() 176 | T_WC = np.dot(scene.T_WC, np.linalg.inv( 177 | T_CR)).astype(np.float32) 178 | elif scene.vo_type == 'keyframe': 179 | T_CK = batch_create_transform(t, R) 180 | # T_CK = GT_Rt 181 | T_CK = T_CK.squeeze(dim=0).cpu().numpy() 182 | T_WC = np.dot(scene.T_WK, np.linalg.inv( 183 | T_CK)).astype(np.float32) 184 | 185 | # print large drift in keyframe tracking, 186 | # just for noticing a possible tracking failure, not usd later 187 | T_CC = np.dot(np.linalg.inv(T_WC), 188 | scene.T_WC).astype(np.float32) 189 | trs_drift = np.copy(T_CC[0:3, 3:4]).transpose() 190 | if np.linalg.norm(trs_drift) > 0.02: 191 | print(depth_file) 192 | else: 193 | raise NotImplementedError() 194 | scene.T_WC = T_WC 195 | 196 | pcd = pointcloud_from_depth( 197 | depth, fx=K['fx'], fy=K['fy'], cx=K['ux'], cy=K['uy'] 198 | ) 199 | nonnan = ~np.isnan(depth) 200 | geom = trimesh.PointCloud(vertices=pcd[nonnan], colors=rgb[nonnan]) 201 | # XYZ->RGB, Z is blue 202 | if options.dataset == 'VaryLighting': 203 | axis = trimesh.creation.axis(0.005, origin_color=(0, 0, 0)) 204 | elif options.dataset in ['TUM_RGBD', 'ScanNet']: 205 | axis = trimesh.creation.axis(0.01, origin_color=(0, 0, 0)) 206 | else: 207 | raise NotImplementedError() 208 | 209 | # two view: keyframe - live frames visualization 210 | if scene.vo_type == 'keyframe' and scene.two_view: 211 | if scene.init: 212 | scene.add_geometry(geom, transform=scene.T_WK, geom_name='key') 213 | scene.add_geometry(geom, transform=T_WC, geom_name='live') 214 | scene.add_geometry(axis, transform=T_WC, geom_name='camera_view') 215 | else: 216 | # after the first view, delete the old live view and add new live view 217 | scene.delete_geometry('live') 218 | scene.delete_geometry('camera_view') 219 | scene.add_geometry(geom, transform=T_WC, geom_name='live') 220 | scene.add_geometry(axis, transform=T_WC, geom_name='camera_view') 221 | 222 | else: 223 | scene.add_geometry(geom, transform=T_WC) 224 | 225 | # draw camera trajectory 226 | trs = np.copy(T_WC[0:3, 3:4]).transpose() 227 | cam = trimesh.PointCloud(vertices=trs, colors=[255, 0, 0]) 228 | scene.add_geometry(cam) 229 | 230 | scene.add_geometry(axis, transform=T_WC) 231 | 232 | if scene.last_pose is not None: 233 | poses_seg = np.stack((scene.last_pose, trs), axis=1) 234 | cam_seg = trimesh.load_path(poses_seg) 235 | scene.add_geometry(cam_seg) 236 | scene.last_pose = trs 237 | 238 | # A kind of current camera view, but a bit far away to see whole scene. 239 | scene.camera.resolution = (rgb.shape[1], rgb.shape[0]) 240 | scene.camera.focal = (K['fx'], K['fy']) 241 | if dataset.realscene: 242 | if options.save_img: 243 | if scene.vo_type == 'keyframe': 244 | # T_see = np.array([ 245 | # [1.000, 0.000, 0.000, 0.2], 246 | # [0.000, 0.866, 0.500, -0.7], 247 | # [0.000, -0.500, 0.866, -0.7], 248 | # [0.000, 0.000, 0.000, 1.0], 249 | # ]) 250 | T_see = np.array([ 251 | [1.000, 0.000, 0.000, 0.2], 252 | [0.000, 0.866, 0.500, -0.7], 253 | [0.000, -0.500, 0.866, -0.8], 254 | [0.000, 0.000, 0.000, 1.0], 255 | ]) 256 | 257 | # T_see = np.array([ 258 | # [1.000, 0.000, 0.000, 0.2], 259 | # [0.000, 0.985, 0.174, -0.3], 260 | # [0.000, -0.174, 0.985, -0.6], 261 | # [0.000, 0.000, 0.000, 1.0], 262 | # ]) 263 | scene.camera_transform = camera_transform( 264 | np.matmul(scene.T_WK, T_see) 265 | ) 266 | else: 267 | # if scene.index < 140: 268 | # T_see = np.array([ 269 | # [1.000, 0.000, 0.000, 0.2], 270 | # [0.000, 0.866, 0.500, -2.0], 271 | # [0.000, -0.500, 0.866, -2.0], 272 | # [0.000, 0.000, 0.000, 1.0], 273 | # ]) 274 | # scene.camera_transform = camera_transform( 275 | # np.matmul(scene.T_WC, T_see) 276 | # ) 277 | pass 278 | else: 279 | # adjust which transformation use to set the see pose 280 | if scene.vo_type == 'keyframe': 281 | T_see = np.array([ 282 | [1.000, 0.000, 0.000, 0.2], 283 | [0.000, 0.866, 0.500, -0.7], 284 | [0.000, -0.500, 0.866, -0.8], 285 | [0.000, 0.000, 0.000, 1.0], 286 | ]) 287 | # T_see = np.array([ 288 | # [1.000, 0.000, 0.000, 0.2], 289 | # [0.000, 0.985, 0.174, -0.3], 290 | # [0.000, -0.174, 0.985, -0.6], 291 | # [0.000, 0.000, 0.000, 1.0], 292 | # ]) 293 | # T_see = np.array([ 294 | # [1.000, 0.000, 0.000, 0.2], 295 | # [0.000, 0.985, 0.174, -0.3], 296 | # [0.000, -0.174, 0.985, -0.6], 297 | # [0.000, 0.000, 0.000, 1.0], 298 | # ]) 299 | # T_see = np.array([ 300 | # [1.000, 0.000, 0.000, 0.2], 301 | # [0.000, 0.866, 0.500, -0.8], 302 | # [0.000, -0.500, 0.866, -0.8], 303 | # [0.000, 0.000, 0.000, 1.0], 304 | # ]) 305 | 306 | scene.camera_transform = camera_transform( 307 | np.matmul(scene.T_WK, T_see) 308 | ) 309 | else: 310 | scene.camera.transform = T_WC @ tf.translation_matrix([0, 0, 2.5]) 311 | 312 | # if scene.index == scene.init_idx + 1: 313 | # input() 314 | print(scene.index) 315 | scene.index += 1 # scene.track_config['frame_step'] 316 | # print("<=================================") 317 | if options.save_img: 318 | return 319 | 320 | 321 | def main(options): 322 | 323 | if options.dataset == 'TUM_RGBD': 324 | sequence_dir = 'rgbd_dataset_freiburg1_desk' 325 | np_loader = load_data('TUM_RGBD', keyframes=[1, ], load_type='test', 326 | select_trajectory=sequence_dir, 327 | truncate_depth=True, 328 | options=options, 329 | load_numpy=False) 330 | elif options.dataset == 'VaryLighting': 331 | np_loader = load_data('VaryLighting', keyframes=[1, ], load_type='test', 332 | select_trajectory='scene17_demo', # 'l_scene3', 333 | truncate_depth=True, 334 | load_numpy=False, 335 | pair=options.vo_type, 336 | ) 337 | elif options.dataset == 'ScanNet': 338 | np_loader = load_data('ScanNet', keyframes=[1, ], load_type='test', 339 | select_trajectory='scene0593_00', 340 | truncate_depth=True, 341 | load_numpy=False, 342 | options=options, 343 | ) 344 | 345 | scene = trimesh.Scene() 346 | scene.dataloader = np_loader 347 | scene.dataloader.realscene = True 348 | # total_batch_size = options.batch_per_gpu * torch.cuda.device_count() 349 | 350 | # keyframes = [int(x) for x in options.keyframes.split(',')] 351 | # if options.dataset in ['BundleFusion', 'TUM_RGBD']: 352 | # obj_has_mask = False 353 | # else: 354 | # obj_has_mask = True 355 | 356 | # eval_loaders = create_eval_loaders(options, options.eval_set, 357 | # [1,], total_batch_size, options.trajectory) 358 | 359 | tracker = select_method(options.vo, options) 360 | scene.network = tracker 361 | 362 | scene.index = 0 # config['start_frame'] # starting frame e.g. 60 363 | scene.video_id = None 364 | scene.last_pose = None 365 | scene.is_gt_tracking = options.gt_tracker 366 | scene.init = False # True only for the first frame 367 | scene.is_play = True # immediately start playing when called 368 | scene.vo_type = options.vo_type 369 | scene.two_view = options.two_view 370 | scene.options = options 371 | 372 | callback(scene) 373 | window = trimesh.viewer.SceneViewer( 374 | scene, callback=callback, start_loop=False, resolution=(1080, 720) 375 | ) 376 | 377 | @window.event 378 | def on_key_press(symbol, modifiers): 379 | if modifiers == 0: 380 | if symbol == pyglet.window.key.P: 381 | scene.is_play = not scene.is_play 382 | 383 | print('Press P key to pause/resume.') 384 | 385 | if not options.save_img: 386 | # scene.show() 387 | pyglet.app.run() 388 | else: 389 | # import pyrender 390 | # scene_pyrender = pyrender.Scene.from_trimesh(scene) 391 | # renderer = pyrender.OffscreenRenderer(viewport_height=480, viewport_width=640, point_size=1) 392 | # rgb, depth = renderer.render(scene_pyrender) 393 | 394 | if options.dataset == 'VaryLighting': 395 | output_dir = osp.join(np_loader.seq_names[0], 'kf_vo', options.vo) 396 | else: 397 | output_dir = os.path.join( 398 | '/home/binbin/Pictures', 'kf_vo', options.vo) 399 | check_directory(output_dir + '/*.png') 400 | for frame_id in range(len(scene.dataloader)): 401 | # scene.save_image() 402 | callback(scene) 403 | file_name = os.path.join(output_dir, 'render', str( 404 | scene.index-1).zfill(5) + '.png') 405 | check_directory(file_name) 406 | with open(file_name, "wb") as f: 407 | f.write(scene.save_image()) 408 | f.close() 409 | 410 | 411 | if __name__ == "__main__": 412 | parser = argparse.ArgumentParser(description="Evaluate the network") 413 | config.add_basics_config(parser) 414 | config.add_test_basics_config(parser) 415 | config.add_tracking_config(parser) 416 | config.add_vo_config(parser) 417 | 418 | options = parser.parse_args() 419 | # to save visualization: --save_img and --vis_feat 420 | print('---------------------------------------') 421 | main(options) 422 | -------------------------------------------------------------------------------- /code/experiments/select_method.py: -------------------------------------------------------------------------------- 1 | """ 2 | A wrapper to select different methods for comparison 3 | # SPDX-FileCopyrightText: 2021 Binbin Xu 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | 7 | import torch 8 | from models.LeastSquareTracking import LeastSquareTracking 9 | 10 | # load comparison methods 11 | def select_method(method_name, options): 12 | assert method_name in ['DeepIC', 'RGB', 'ICP', 'RGB+ICP', 'feature', 'feature_icp'] 13 | if method_name == 'DeepIC': 14 | print('==>Load DeepIC method') 15 | if options.dataset == 'MovingObjects3D': 16 | deepIC_checkpoint = '/media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/logs/MovingObjects3D/124/cvpr124_ConvRGBD2_MultiScale2w_Direct-ResVol_MovingObjects3D_obj_False_uCh_1_None_rmT_False_fCh_1_average_iP_identity_mH_None_wICP_False_s_None_lr_0.0005_batch_64/checkpoint_epoch29.pth.tar' 17 | else: 18 | deepIC_checkpoint = '/media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/code/trained_models/TUM_RGBD_ABC_final.pth.tar' 19 | deepIC = LeastSquareTracking( 20 | encoder_name='ConvRGBD2', 21 | direction='inverse', 22 | max_iter_per_pyr=3, 23 | mEst_type='MultiScale2w', 24 | # options=options, 25 | solver_type='Direct-ResVol', 26 | feature_channel=1, 27 | feature_extract='average', 28 | uncertainty_type='None', 29 | combine_ICP=False, 30 | scaler='None', 31 | init_pose_type='identity', 32 | options=options, 33 | ) 34 | 35 | if torch.cuda.is_available(): deepIC.cuda() 36 | 37 | # check whether it is a single checkpoint or a directory 38 | deepIC.load_state_dict(torch.load(deepIC_checkpoint)['state_dict']) 39 | deepIC.eval() 40 | return deepIC 41 | 42 | if method_name == 'RGB': 43 | print('==>Load RGB method') 44 | rgb_tracker = LeastSquareTracking( 45 | encoder_name='RGB', 46 | combine_ICP=False, 47 | feature_channel=1, 48 | uncertainty_channel=1, 49 | # feature_extract='conv', 50 | uncertainty_type='None', 51 | scaler='None', 52 | direction='inverse', 53 | max_iter_per_pyr=options.max_iter_per_pyr, 54 | mEst_type='None', 55 | solver_type='Direct-Nodamping', 56 | init_pose_type='identity', 57 | options=options, 58 | ) 59 | if torch.cuda.is_available(): rgb_tracker.cuda() 60 | rgb_tracker.eval() 61 | return rgb_tracker 62 | 63 | if method_name == 'ICP': 64 | print('==>Load ICP method') 65 | icp_tracker = LeastSquareTracking( 66 | encoder_name='ICP', 67 | combine_ICP=False, 68 | # feature_channel=1, 69 | # uncertainty_channel=1, 70 | # feature_extract='conv', 71 | uncertainty_type='ICP', 72 | scaler='None', 73 | direction='inverse', 74 | max_iter_per_pyr=options.max_iter_per_pyr, 75 | mEst_type='None', 76 | solver_type='Direct-Nodamping', 77 | init_pose_type='identity', 78 | options=options, 79 | ) 80 | if torch.cuda.is_available(): icp_tracker.cuda() 81 | icp_tracker.eval() 82 | return icp_tracker 83 | 84 | if method_name == 'RGB+ICP': 85 | print('==>Load RGB+ICP method') 86 | rgbd_tracker = LeastSquareTracking( 87 | encoder_name='RGB', 88 | combine_ICP=True, 89 | # feature_channel=1, 90 | uncertainty_channel=1, 91 | # feature_extract='conv', 92 | uncertainty_type='identity', 93 | scaler='None', 94 | direction='inverse', 95 | max_iter_per_pyr=options.max_iter_per_pyr, 96 | mEst_type='None', 97 | solver_type='Direct-Nodamping', 98 | init_pose_type='identity', 99 | remove_tru_sigma=False, 100 | scale_scaler=0.2, 101 | options=options, 102 | ) 103 | if torch.cuda.is_available(): rgbd_tracker.cuda() 104 | rgbd_tracker.eval() 105 | return rgbd_tracker 106 | 107 | if method_name == 'feature': 108 | # train_utils.load_checkpoint_test(options) 109 | # 110 | # net = ICtracking.LeastSquareTracking( 111 | # encoder_name=options.encoder_name, 112 | # uncertainty_type=options.uncertainty, 113 | # direction=options.direction, 114 | # max_iter_per_pyr=options.max_iter_per_pyr, 115 | # mEst_type=options.mestimator, 116 | # options=options, 117 | # solver_type=options.solver, 118 | # no_weight_sharing=options.no_weight_sharing) 119 | # 120 | # if torch.cuda.is_available(): net.cuda() 121 | # net.eval() 122 | # 123 | # # check whether it is a single checkpoint or a directory 124 | # net.load_state_dict(torch.load(options.checkpoint)['state_dict']) 125 | # method_list['trained_method'] = net 126 | # train_utils.load_checkpoint_test(options) 127 | print('==>Load our feature-metric method') 128 | net = LeastSquareTracking( 129 | encoder_name=options.encoder_name, 130 | uncertainty_type=options.uncertainty, 131 | direction=options.direction, 132 | max_iter_per_pyr=options.max_iter_per_pyr, 133 | mEst_type=options.mestimator, 134 | options=options, 135 | solver_type=options.solver, 136 | combine_ICP=False, 137 | no_weight_sharing=options.no_weight_sharing) 138 | 139 | if torch.cuda.is_available(): net.cuda() 140 | 141 | # check whether it is a single checkpoint or a directory 142 | if options.checkpoint == '': 143 | if options.dataset in ['TUM_RGBD', 'VaryLighting']: 144 | checkpoint = '/media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/logs/TUM_RGBD/check_change/check4_ConvRGBD2_None_Direct-Nodamping_dataset_TUM_RGBD_obj_False_laplacian_uncerCh_1_featCh_8_conv_initPose_sfm_net_multiHypo_prob_fuse_uncer_prop_False_lr_0.0005_batch_64/checkpoint_epoch29.pth.tar' 145 | else: 146 | raise NotImplementedError() 147 | net.load_state_dict(torch.load(checkpoint)['state_dict']) 148 | else: 149 | net.load_state_dict(torch.load(options.checkpoint)['state_dict']) 150 | net.eval() 151 | return net 152 | 153 | if method_name == 'feature_icp': 154 | print('==>Load our feature-metric+ICP method') 155 | feature_icp = LeastSquareTracking( 156 | encoder_name=options.encoder_name, 157 | uncertainty_type=options.uncertainty, 158 | direction=options.direction, 159 | max_iter_per_pyr=options.max_iter_per_pyr, 160 | mEst_type=options.mestimator, 161 | options=options, 162 | solver_type=options.solver, 163 | combine_ICP=True, 164 | scale_scaler=options.scale_icp, 165 | no_weight_sharing=options.no_weight_sharing) 166 | 167 | if torch.cuda.is_available(): feature_icp.cuda() 168 | 169 | # check whether it is a single checkpoint or a directory 170 | if options.checkpoint == '': 171 | if options.dataset in ['TUM_RGBD', 'VaryLighting']: 172 | checkpoint = '/media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/logs/TUM_RGBD/finetune/finetune_vl_icp_ConvRGBD2_None_Direct-Nodamping_TUM_RGBD_obj_False_uCh_1_laplacian_rmT_True_fCh_8_conv_iP_sfm_net_mH_prob_fuse_wICP_True_s_None_lr_0.0005_batch_64/checkpoint_epoch40.pth.tar' 173 | else: 174 | raise NotImplementedError() 175 | feature_icp.load_state_dict(torch.load(checkpoint)['state_dict']) 176 | else: 177 | feature_icp.load_state_dict(torch.load(options.checkpoint)['state_dict']) 178 | feature_icp.eval() 179 | return feature_icp 180 | else: 181 | raise NotImplementedError("unsupported test tracker: check argument of --tracker again") -------------------------------------------------------------------------------- /code/experiments/warping_objects.py: -------------------------------------------------------------------------------- 1 | """ 2 | Experiments to warp objects for visualization 3 | # SPDX-FileCopyrightText: 2021 Binbin Xu 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | 7 | import argparse, pickle 8 | import os.path as osp 9 | import cv2 10 | import numpy as np 11 | from tqdm import tqdm 12 | import torch 13 | import torch.utils.data as data 14 | 15 | import config 16 | import models.geometry as geometry 17 | from Logger import check_directory 18 | from train_utils import check_cuda 19 | from data.dataloader import load_data, MOVING_OBJECTS_3D 20 | from experiments.select_method import select_method 21 | from tools import display 22 | 23 | def compute_pose(dataloader, tracker, k, tracker_name, use_gt_pose): 24 | count_base = 0 25 | total_frames = len(dataloader.dataset) 26 | progress = tqdm(dataloader, ncols=100, 27 | # desc = 'evaluate deeper inverse compositional algorithm {:}'.format(eval_name), 28 | total= len(dataloader)) 29 | outputs = { 30 | 'R_est': np.zeros((total_frames, 3, 3)), 31 | 't_est': np.zeros((total_frames, 3)), 32 | 'seq_idx': np.zeros((total_frames)), 33 | 'frame0': np.zeros((total_frames)), 34 | 'frame1': np.zeros((total_frames)), 35 | } 36 | # computing pose 37 | for idx, batch in enumerate(progress): 38 | color0, color1, depth0, depth1, transform, K, mask0, mask1, names = check_cuda(batch) 39 | B, C, H, W = color0.shape 40 | if use_gt_pose: 41 | R_gt, t_gt = transform[:, :3, :3], transform[:, :3, 3] 42 | Rt = [R_gt, t_gt] 43 | else: 44 | with torch.no_grad(): 45 | if options.obj_only: 46 | output = tracker.forward(color0, color1, depth0, depth1, K, 47 | obj_mask0=mask0, obj_mask1=mask1, 48 | ) 49 | else: 50 | output = tracker.forward(color0, color1, depth0, depth1, K, 51 | index=idx, 52 | ) 53 | Rt = output 54 | R, t = Rt 55 | outputs['R_est'][count_base:count_base + B] = R.cpu().numpy() 56 | outputs['t_est'][count_base:count_base + B] = t.cpu().numpy() 57 | outputs['seq_idx'][count_base:count_base + B] = names['seq_idx'].cpu().numpy() 58 | outputs['frame0'][count_base:count_base + B] = names['frame0'].cpu().numpy() 59 | outputs['frame1'][count_base:count_base + B] = names['frame1'].cpu().numpy() 60 | count_base += B 61 | return outputs 62 | 63 | def test_object_warping(options): 64 | # loader = MovingObjects3D('', load_type='train', keyframes=[1]) 65 | assert options.dataset == 'MovingObjects3D' 66 | keyframes = [int(x) for x in options.keyframes.split(',')] 67 | objects = ['boat', 'motorbike'] if options.object == '' else [options.object] 68 | eval_loaders = {} 69 | for test_object in objects: 70 | for kf in keyframes: 71 | np_loader = load_data(options.dataset, keyframes=[kf], 72 | load_type=options.eval_set, 73 | select_trajectory=test_object, options=options) 74 | eval_loaders['{:}_keyframe_{:}'. 75 | format(test_object, kf)] = data.DataLoader(np_loader, 76 | batch_size=int(options.batch_per_gpu), 77 | shuffle=False, num_workers = options.cpu_workers) 78 | use_gt_pose = options.gt_pose 79 | # method_list = {} 80 | if not use_gt_pose: 81 | tracker = select_method(options.method, options) 82 | else: 83 | tracker = None 84 | options.method = 'gt' 85 | method_list = {options.method: tracker} 86 | 87 | for k, loader in eval_loaders.items(): 88 | for method_name in method_list: 89 | # method = method_list 90 | tracker = method_list[method_name] 91 | output_dir_method = osp.join(MOVING_OBJECTS_3D, 'visualization', method_name) 92 | output_dir = osp.join(output_dir_method, k) 93 | output_pkl = output_dir + '/pose.pkl' 94 | output_compose_dir = osp.join(output_dir, 'compose') 95 | output_input_dir = osp.join(output_dir, 'input') 96 | output_residual_dir = osp.join(output_dir, 'res') 97 | check_directory(output_pkl) 98 | check_directory(output_compose_dir + '/.png') 99 | check_directory(output_input_dir + '/.png') 100 | check_directory(output_residual_dir + '/.png') 101 | 102 | if options.recompute or not osp.isfile(output_pkl): 103 | info = compute_pose(loader, tracker, k, method_name, use_gt_pose) 104 | with open(output_pkl, 'wb') as output: 105 | pickle.dump(info, output) 106 | else: 107 | with open(output_pkl, 'rb') as pkl_file: 108 | info = pickle.load(pkl_file) 109 | # info = compute_pose(loader, tracker, k, method_name, use_gt_pose) 110 | 111 | # visualize residuals 112 | loader.dataset.fx_s = 1.0 113 | loader.dataset.fy_s = 1.0 114 | progress = tqdm(loader, ncols=100, 115 | desc='compute residual for object {:} using {}'.format(k, method_name), 116 | total=len(loader)) 117 | count_base = 0 118 | for idx, batch in enumerate(progress): 119 | color0, color1, depth0, depth1, transform, K, mask0, mask1, names = check_cuda(batch) 120 | # color0, color1, depth0, depth1, transform, K, mask0, mask1, names = check_cuda(loader.dataset.get_original_size_batch(idx)) 121 | B, C, H, W = color0.shape 122 | invD0 = 1.0 / depth0 123 | invD1 = 1.0 / depth1 124 | 125 | R = torch.stack(check_cuda(info['R_est'][count_base:count_base + B]), dim=0).float() 126 | t = torch.stack(check_cuda(info['t_est'][count_base:count_base + B]), dim=0).float() 127 | Rt = [R, t] 128 | # R_gt, t_gt = transform[:, :3, :3], transform[:, :3, 3] 129 | # Rt = [R_gt, t_gt] 130 | px, py = geometry.generate_xy_grid(B, H, W, K) 131 | u_warped, v_warped, inv_z_warped = geometry.batch_warp_inverse_depth( 132 | px, py, invD0, Rt, K) 133 | x1_1to0 = geometry.warp_features(color1, u_warped, v_warped) 134 | crd = torch.cat((u_warped, v_warped), dim=1) 135 | occ = geometry.check_occ(inv_z_warped, invD1, crd, DIC_version=True) 136 | 137 | residuals = x1_1to0 - color0 # equation (12) 138 | # remove occlusion 139 | x1_1to0[occ.expand(B, C, H, W)] = 0 140 | 141 | if mask0 is not None: 142 | bg_mask0 = ~mask0 143 | res_occ = occ | (bg_mask0.view(B, 1, H, W)) 144 | else: 145 | res_occ = occ 146 | residuals[res_occ.expand(B, C, H, W)] = 0 147 | residuals = residuals.mean(dim=1, keepdim=True) 148 | 149 | # # for each image 150 | for idx in range(B): 151 | feat_residual = display.create_mosaic([color0[idx:idx+1], color1[idx:idx+1], x1_1to0[idx:idx+1], residuals[idx:idx+1]], 152 | cmap=['NORMAL', 'NORMAL', 'NORMAL', cv2.COLORMAP_JET], 153 | order='CHW') 154 | input0 = feat_residual[0:H, 0:W, :].copy() 155 | input1 = feat_residual[0:H, W:, :].copy() 156 | warped = feat_residual[H:, 0:W, :].copy() 157 | res = feat_residual[H:, W:, :].copy() 158 | obj_mask0 = mask0[idx:idx+1].squeeze().cpu().numpy().astype(np.uint8)*255 159 | obj_mask1 = mask1[idx:idx+1].squeeze().cpu().numpy().astype(np.uint8)*255 160 | contours0, hierarchy0 = cv2.findContours(obj_mask0, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) 161 | contours1, hierarchy1 = cv2.findContours(obj_mask1, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) 162 | 163 | input0 = cv2.drawContours(input0, contours0, -1, (0, 0, 255), 1) 164 | input1 = cv2.drawContours(input1, contours0, -1, (0, 0, 255), 1) 165 | input1 = cv2.drawContours(input1, contours1, -1, (0, 255, 0), 1) 166 | overlay = cv2.drawContours(warped, contours0, -1, (0, 0, 255), 1) 167 | overlay = cv2.drawContours(overlay, contours1, -1, (0, 255, 0), 1) 168 | 169 | # visualization for debugging 170 | if options.save_img: 171 | idx_in_batch=count_base+idx 172 | seq_idx = int(info['seq_idx'][idx_in_batch]) 173 | idx0 = int(info['frame0'][idx_in_batch]) 174 | idx1 = int(info['frame1'][idx_in_batch]) 175 | index = "_" + str.zfill(str(idx_in_batch), 5) + '.png' 176 | image_name = osp.join(output_compose_dir, 'compose'+index) 177 | cv2.imwrite(image_name, feat_residual) 178 | overlay_name = osp.join(output_residual_dir, 'overlay'+index) 179 | input_name = osp.join(output_input_dir, 'input0'+index) 180 | cv2.imwrite(overlay_name, overlay) 181 | cv2.imwrite(input_name, input0) 182 | cv2.imwrite(input_name.replace('input0', "input1"), input1) 183 | 184 | pair_dir = osp.join(output_dir, 'sequence', 185 | 'seq' + str(seq_idx) + "_" + str(idx0) + "_" + str(idx1), 186 | ) 187 | check_directory(pair_dir + '/.png') 188 | cv2.imwrite(overlay_name.replace('overlay', "residual"), res) 189 | cv2.imwrite(pair_dir+"/overlay.png", overlay) 190 | cv2.imwrite(pair_dir+"/input0.png", input0) 191 | cv2.imwrite(pair_dir+"/input1.png", input1) 192 | cv2.imwrite(pair_dir+"/residual.png", res) 193 | else: 194 | cv2.imshow("overlay", overlay) 195 | cv2.imshow("input0", input0) 196 | cv2.imshow("input1", input1) 197 | 198 | window_name = "feature-metric residuals" 199 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) 200 | cv2.imshow(window_name, feat_residual) 201 | # image_name = osp.join(output_dir, 'residual'+str(idx)+'.png') 202 | # cv2.imwrite(image_name, feat_residual) 203 | cv2.waitKey(0) 204 | count_base += B 205 | 206 | 207 | if __name__ == '__main__': 208 | 209 | parser = argparse.ArgumentParser(description="Evaluate the network") 210 | config.add_basics_config(parser) 211 | config.add_tracking_config(parser) 212 | config.add_object_config(parser) 213 | options = parser.parse_args() 214 | 215 | print('---------------------------------------') 216 | test_object_warping(options) 217 | 218 | -------------------------------------------------------------------------------- /code/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/models/__init__.py -------------------------------------------------------------------------------- /code/models/criterions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Some training criterions tested in the experiments 3 | 4 | # SPDX-FileCopyrightText: 2021 Binbin Xu 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | @author: Zhaoyang Lv 8 | @date: March, 2019 9 | """ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | from __future__ import unicode_literals 15 | 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as func 19 | import models.geometry as geo 20 | from models.algorithms import invH, lev_mar_H 21 | 22 | 23 | def EPE3D_loss(input_flow, target_flow, invalid=None): 24 | """ 25 | :param the estimated optical / scene flow 26 | :param the ground truth / target optical / scene flow 27 | :param the invalid mask, the mask has value 1 for all areas that are invalid 28 | """ 29 | epe_map = torch.norm(target_flow-input_flow,p=2,dim=1) 30 | B = epe_map.shape[0] 31 | 32 | invalid_flow = (target_flow != target_flow) # check Nan same as torch.isnan 33 | 34 | mask = (invalid_flow[:,0,:,:] | invalid_flow[:,1,:,:] | invalid_flow[:,2,:,:]) 35 | if invalid is not None: 36 | mask = mask | (invalid.view(mask.shape) > 0) 37 | 38 | epes = [] 39 | for idx in range(B): 40 | epe_sample = epe_map[idx][~mask[idx].data] 41 | if len(epe_sample) == 0: 42 | epes.append(torch.zeros(()).type_as(input_flow)) 43 | else: 44 | epes.append(epe_sample.mean()) 45 | 46 | return torch.stack(epes) 47 | 48 | 49 | def RPE(R, t): 50 | """ Calcualte the relative pose error 51 | (a batch version of the RPE error defined in TUM RGBD SLAM TUM dataset) 52 | :param relative rotation 53 | :param relative translation 54 | """ 55 | angle_error = geo.batch_mat2angle(R) 56 | trans_error = torch.norm(t, p=2, dim=1) 57 | return angle_error, trans_error 58 | 59 | 60 | def compute_RPE_uncertainty(R_est, t_est, R_gt, t_gt, inv_var): 61 | loss = 0 62 | if R_est.dim() > 3: # training time [batch, num_poses, rot_row, rot_col] 63 | 64 | for idx in range(R_est.shape[1]): 65 | dR = geo.batch_mat2twist(R_gt.detach().contiguous()) - geo.batch_mat2twist(R_est[:,idx]) 66 | dt = t_gt.detach() - t_est[:,idx] 67 | dKsi = torch.cat([dR, dt], dim=-1).unsqueeze(dim=-1) 68 | Hessian = lev_mar_H(inv_var[:,idx]) 69 | sigma_ksi = invH(Hessian) 70 | det_var = torch.det(sigma_ksi) 71 | # clamp 72 | det_var = det_var.clamp(min=1e-9) 73 | weighted_error = torch.bmm(dKsi.transpose(1, 2), torch.bmm(inv_var[:,idx], dKsi)).squeeze() 74 | regularization = torch.log(1e-6 + det_var) 75 | loss += (weighted_error + regularization).sum() 76 | return loss 77 | 78 | 79 | def compute_RPE_loss(R_est, t_est, R_gt, t_gt): 80 | """ 81 | :param estimated rotation matrix Bx3x3 82 | :param estimated translation vector Bx3 83 | :param ground truth rotation matrix Bx3x3 84 | :param ground truth translation vector Bx3 85 | """ 86 | B=R_est.shape[0] 87 | if R_est.dim() > 3: # training time [batch, num_poses, rot_row, rot_col] 88 | angle_error = 0 89 | trans_error = 0 90 | for idx in range(R_est.shape[1]): 91 | dR, dt = geo.batch_Rt_between(R_est[:, idx], t_est[:, idx], R_gt, t_gt) 92 | angle_error_i, trans_error_i = RPE(dR, dt) 93 | angle_error += angle_error_i.norm(p=2).sum() 94 | trans_error += trans_error_i.norm(p=2).sum() 95 | else: 96 | dR, dt = geo.batch_Rt_between(R_est, t_est, R_gt, t_gt) 97 | angle_error, trans_error = RPE(dR, dt) 98 | return angle_error, trans_error 99 | 100 | 101 | def compute_RT_EPE_loss(R_est, t_est, R_gt, t_gt, depth0, K, invalid=None): 102 | """ Compute the epe point error of rotation & translation 103 | :param estimated rotation matrix Bx3x3 104 | :param estimated translation vector Bx3 105 | :param ground truth rotation matrix Bx3x3 106 | :param ground truth translation vector Bx3 107 | :param reference depth image, 108 | :param camera intrinsic 109 | """ 110 | 111 | loss = 0 112 | if R_est.dim() > 3: # training time [batch, num_poses, rot_row, rot_col] 113 | rH, rW = 60, 80 # we train the algorithm using a downsized input, (since the size of the input is not super important at training time) 114 | 115 | B, C, H, W = depth0.shape 116 | rdepth = func.interpolate(depth0, size=(rH, rW), mode='bilinear') 117 | rinvalid = func.interpolate(invalid.float(), size=(rH, rW), mode='bilinear') 118 | rK = K.clone() 119 | rK[:, 0] *= float(rW) / W 120 | rK[:, 1] *= float(rH) / H 121 | rK[:, 2] *= float(rW) / W 122 | rK[:, 3] *= float(rH) / H 123 | xyz = geo.batch_inverse_project(rdepth, rK) 124 | flow_gt = geo.batch_transform_xyz(xyz, R_gt, t_gt, get_Jacobian=False) 125 | 126 | for idx in range(R_est.shape[1]): 127 | flow_est = geo.batch_transform_xyz(xyz, R_est[:, idx], t_est[:, idx], get_Jacobian=False) 128 | loss += EPE3D_loss(flow_est, flow_gt.detach(), invalid=rinvalid) # * (1< (B*H*w,3,1) 153 | deltaP = deltaP.view(B,3,-1).permute(0,2,1).contiguous().view(-1,3).unsqueeze(dim=-1) 154 | dim_ind = True 155 | if uncer_loss_type == 'gaussian': 156 | # if assume each dimension independent 157 | if dim_ind: 158 | diag_mask = torch.eye(3).view(1, 3, 3).type_as(variance) 159 | variance = torch.clamp(variance, min=1e-3) 160 | variance = diag_mask * variance 161 | 162 | # inv_var = invH(variance) 163 | inv_var = torch.inverse(variance) 164 | weighted_error = torch.bmm(deltaP.transpose(1, 2), torch.bmm(inv_var, deltaP)).squeeze() 165 | if dim_ind: 166 | regularization = variance.diagonal(dim1=1, dim2=2).log().sum(dim=1) 167 | else: 168 | det_var = torch.det(variance) 169 | # make it numerical stable 170 | det_var = torch.clamp(det_var, min=1e-3) 171 | regularization = torch.log(det_var) 172 | epe_map = weighted_error + regularization 173 | elif uncer_loss_type == 'laplacian': 174 | raise NotImplementedError("the bessel function needs to be implemented") 175 | sigma = torch.sqrt(variance + 1e-7) 176 | weighted_error = torch.abs(deltaP) / (sigma + 1e-7) 177 | regularization = torch.log(torch.det(sigma)) 178 | epe_map = weighted_error + regularization 179 | else: 180 | raise NotImplementedError() 181 | epe_map = epe_map.view(B,H,W) 182 | B = epe_map.shape[0] 183 | 184 | invalid_flow = (target_flow != target_flow) # check Nan same as torch.isnan 185 | 186 | mask = (invalid_flow[:,0,:,:] | invalid_flow[:,1,:,:] | invalid_flow[:,2,:,:]) 187 | if invalid is not None: 188 | mask = mask | (invalid.view(mask.shape) > 0) 189 | 190 | epes = [] 191 | o_epes = [] 192 | for idx in range(B): 193 | epe_sample = epe_map[idx][~mask[idx].data] 194 | if len(epe_sample) == 0: 195 | epes.append(torch.zeros(()).type_as(input_flow)) 196 | else: 197 | epes.append(epe_sample.mean()) 198 | 199 | if uncer_loss_type is not None: 200 | o_epe_sample = o_epe_map[idx][~mask[idx].data] 201 | if len(o_epe_sample) == 0: 202 | o_epes.append(torch.zeros(()).type_as(input_flow)) 203 | else: 204 | o_epes.append(o_epe_sample.mean()) 205 | if uncer_loss_type is not None: 206 | return torch.stack(epes), torch.stack(o_epes) 207 | else: 208 | return torch.stack(epes) 209 | 210 | 211 | def compute_RT_EPE_uncertainty_loss(R_est, t_est, R_gt, t_gt, depth0, K, sigma_ksi, uncertainty_type, invalid=None): 212 | """ Compute the epe point error of rotation & translation 213 | :param estimated rotation matrix BxNx3x3 214 | :param estimated translation vector BxNx3 215 | :param ground truth rotation matrix Bx3x3 216 | :param ground truth translation vector Bx3 217 | :param reference depth image, 218 | :param camera intrinsic 219 | """ 220 | 221 | loss = 0 222 | epe = 0 223 | assert R_est.dim() > 3 # training time [batch, num_poses, rot_row, rot_col] 224 | rH, rW = 60, 80 # we train the algorithm using a downsized input, (since the size of the input is not super important at training time) 225 | 226 | B,C,H,W = depth0.shape 227 | rdepth = func.interpolate(depth0, size=(rH, rW), mode='bilinear') 228 | rinvalid = func.interpolate(invalid.float(), size=(rH,rW), mode='bilinear') 229 | rK = K.clone() 230 | rK[:,0] *= float(rW) / W 231 | rK[:,1] *= float(rH) / H 232 | rK[:,2] *= float(rW) / W 233 | rK[:,3] *= float(rH) / H 234 | xyz = geo.batch_inverse_project(rdepth, rK) 235 | flow_gt = geo.batch_transform_xyz(xyz, R_gt, t_gt, get_Jacobian=False) 236 | 237 | for idx in range(R_est.shape[1]): 238 | flow_est, J_flow = geo.batch_transform_xyz(xyz, R_est[:,idx], t_est[:,idx], get_Jacobian=True) 239 | # uncertainty propagation: J*sigma*J^T 240 | sigma_ksai_level = sigma_ksi[:,idx:idx+1,:,:].repeat(1,rH*rW,1,1).view(-1,6,6) 241 | J_sigmaKsi = torch.bmm(J_flow, sigma_ksai_level) 242 | sigma_deltaP = torch.bmm(J_sigmaKsi, torch.transpose(J_flow, 1, 2)) 243 | loss_l, epe_l = UEPE3D_loss(flow_est, flow_gt.detach(), variance=sigma_deltaP, uncer_loss_type=uncertainty_type, invalid=rinvalid,) 244 | loss += loss_l 245 | epe += epe_l 246 | 247 | return loss, epe 248 | 249 | -------------------------------------------------------------------------------- /code/models/submodules.py: -------------------------------------------------------------------------------- 1 | """ 2 | Submodules to build up CNN 3 | 4 | # SPDX-FileCopyrightText: 2021 Binbin Xu 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | @author: Zhaoyang Lv 8 | @date: March, 2019 9 | """ 10 | 11 | from __future__ import print_function 12 | 13 | import torch.nn as nn 14 | import torch 15 | import numpy as np 16 | 17 | from torch.nn import init 18 | from torchvision import transforms 19 | 20 | def color_normalize(color): 21 | rgb_mean = torch.Tensor([0.4914, 0.4822, 0.4465]).type_as(color) 22 | rgb_std = torch.Tensor([0.2023, 0.1994, 0.2010]).type_as(color) 23 | return (color - rgb_mean.view(1,3,1,1)) / rgb_std.view(1,3,1,1) 24 | 25 | def convLayer(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, dilation=1, bias=False): 26 | """ A wrapper of convolution-batchnorm-ReLU module 27 | """ 28 | if batchNorm: 29 | return nn.Sequential( 30 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2 + dilation-1, dilation=dilation, bias=bias), 31 | nn.BatchNorm2d(out_planes), 32 | #nn.LeakyReLU(0.1,inplace=True) # deprecated 33 | nn.ELU(inplace=True) 34 | ) 35 | else: 36 | return nn.Sequential( 37 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2 + dilation-1, dilation=dilation, bias=True), 38 | #nn.LeakyReLU(0.1,inplace=True) # deprecated 39 | nn.ELU(inplace=True) 40 | ) 41 | 42 | def convLayer1d(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, dilation=1, bias=False): 43 | """ A wrapper of convolution-batchnorm-ReLU module 44 | """ 45 | if batchNorm: 46 | return nn.Sequential( 47 | nn.Conv1d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2 + dilation-1, dilation=dilation, bias=bias), 48 | nn.BatchNorm1d(out_planes), 49 | nn.ELU(inplace=True) 50 | # nn.ReLU(inplace=True) 51 | ) 52 | else: 53 | return nn.Sequential( 54 | nn.Conv1d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2 + dilation-1, dilation=dilation, bias=True), 55 | nn.ELU(inplace=True) 56 | # nn.ReLU(inplace=True) 57 | ) 58 | 59 | def fcLayer(in_planes, out_planes, bias=True): 60 | return nn.Sequential( 61 | nn.Linear(in_planes, out_planes, bias), 62 | nn.ReLU(inplace=True) 63 | ) 64 | 65 | def initialize_weights(modules, method='xavier'): 66 | for m in modules: 67 | if isinstance(m, nn.Conv2d): 68 | if m.bias is not None: 69 | m.bias.data.zero_() 70 | if method == 'xavier': 71 | init.xavier_uniform_(m.weight) 72 | elif method == 'kaiming': 73 | init.kaiming_uniform_(m.weight) 74 | 75 | if isinstance(m, nn.ConvTranspose2d): 76 | if m.bias is not None: 77 | m.bias.data.zero_() 78 | if method == 'xavier': 79 | init.xavier_uniform_(m.weight) 80 | elif method == 'kaiming': 81 | init.kaiming_uniform_(m.weight) 82 | 83 | class ListModule(nn.Module): 84 | """ The implementation of a list of modules from 85 | https://discuss.pytorch.org/t/list-of-nn-module-in-a-nn-module/219/2 86 | """ 87 | def __init__(self, *args): 88 | super(ListModule, self).__init__() 89 | idx = 0 90 | for module in args: 91 | self.add_module(str(idx), module) 92 | idx += 1 93 | 94 | def __getitem__(self, idx): 95 | if idx < 0 or idx >= len(self._modules): 96 | raise IndexError('index {} is out of range'.format(idx)) 97 | it = iter(self._modules.values()) 98 | for i in range(idx): 99 | next(it) 100 | return next(it) 101 | 102 | def __iter__(self): 103 | return iter(self._modules.values()) 104 | 105 | def __len__(self): 106 | return len(self._modules) 107 | -------------------------------------------------------------------------------- /code/run_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | An extremely simple example to show how to run the algorithm 3 | 4 | @author: Zhaoyang Lv 5 | @date: May 2019 6 | """ 7 | 8 | import argparse 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as func 13 | 14 | import models.LeastSquareTracking as ICtracking 15 | 16 | from tqdm import tqdm 17 | from torch.utils.data import DataLoader 18 | from train_utils import check_cuda 19 | from data.SimpleLoader import SimpleLoader 20 | 21 | def resize(img0, img1, depth0, depth1, K_in, resizeH, resizeW): 22 | H, W = img0.shape[-2:] 23 | 24 | I0 = func.interpolate(img0, (resizeH,resizeW), mode='bilinear', align_corners=True) 25 | I1 = func.interpolate(img1, (resizeH,resizeW), mode='bilinear', align_corners=True) 26 | D0 = func.interpolate(depth0, (resizeH,resizeW), mode='nearest') 27 | D1 = func.interpolate(depth1, (resizeH,resizeW), mode='nearest') 28 | 29 | sx = resizeH / H 30 | sy = resizeW / W 31 | 32 | K = K_in.clone() 33 | K[:,0] *= sx 34 | K[:,1] *= sy 35 | K[:,2] *= sx 36 | K[:,3] *= sy 37 | 38 | return I0, I1, D0, D1, K 39 | 40 | def run_inference(dataloader, net): 41 | 42 | progress = tqdm(dataloader, ncols=100, 43 | desc = 'Run the deeper inverse compositional algorithm', 44 | total= len(dataloader)) 45 | 46 | net.eval() 47 | 48 | for idx, batch, in enumerate(progress): 49 | 50 | color0, color1, depth0, depth1, K = check_cuda(batch) 51 | 52 | # downsize the input to 120*160, it is the size of data when the algorthm is trained 53 | C0, C1, D0, D1, K = resize(color0, color1, depth0, depth1, K, resizeH = 120, resizeW=160) 54 | 55 | with torch.no_grad(): 56 | R, t = net.forward(C0, C1, D0, D1, K) 57 | 58 | print('Rotation: ') 59 | print(R) 60 | print('translation: ') 61 | print(t) 62 | 63 | if __name__ == '__main__': 64 | 65 | parser = argparse.ArgumentParser(description='Run the network inference example.') 66 | 67 | parser.add_argument('--checkpoint', default='trained_models/TUM_RGBD_ABC_final.pth.tar', 68 | type=str, help='the path to the pre-trained checkpoint.') 69 | 70 | parser.add_argument('--color_dir', default='data/data_examples/TUM/color', 71 | help='the directory of color images') 72 | parser.add_argument('--depth_dir', default='data/data_examples/TUM/depth', 73 | help='the directory of depth images') 74 | parser.add_argument('--intrinsic', default='525.0,525.0,319.5,239.5', 75 | help='Simple pin-hole camera intrinsics, input in the format (fx, fy, cx, cy)') 76 | 77 | config = parser.parse_args() 78 | 79 | K = [float(x) for x in config.intrinsic.split(',')] 80 | 81 | simple_loader = SimpleLoader(config.color_dir, config.depth_dir, K) 82 | simple_loader = DataLoader(simple_loader, batch_size=1, shuffle=False) 83 | 84 | net = ICtracking.LeastSquareTracking( 85 | encoder_name = 'ConvRGBD2', 86 | max_iter_per_pyr= 3, 87 | mEst_type = 'MultiScale2w', 88 | solver_type = 'Direct-ResVol', 89 | uncertainty_type='None', 90 | direction='inverse', 91 | # options=options, 92 | ) 93 | 94 | # net = ICtracking.LeastSquareTracking( 95 | # encoder_name = 'RGB', 96 | # max_iter_per_pyr= 50, 97 | # mEst_type = 'None', 98 | # solver_type = 'Direct-Nodamping', 99 | # uncertainty_type='None', 100 | # direction='forward') 101 | 102 | if torch.cuda.is_available(): 103 | net.cuda() 104 | 105 | net.load_state_dict(torch.load(config.checkpoint)['state_dict']) 106 | 107 | run_inference(simple_loader, net) -------------------------------------------------------------------------------- /code/timers.py: -------------------------------------------------------------------------------- 1 | """ 2 | A timing utility 3 | 4 | @author: Zhaoyang Lv 5 | @date: March 2019 6 | """ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | from __future__ import unicode_literals 12 | 13 | import time 14 | import numpy as np 15 | from collections import defaultdict 16 | 17 | class Timer(object): 18 | 19 | def __init__(self): 20 | self.reset() 21 | 22 | def tic(self): 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | 30 | def total(self): 31 | """ return the total amount of time """ 32 | return self.total_time 33 | 34 | def avg(self): 35 | """ return the average amount of time """ 36 | return self.total_time / float(self.calls) 37 | 38 | def reset(self): 39 | self.total_time = 0. 40 | self.calls = 0 41 | self.start_time = 0. 42 | self.diff = 0. 43 | 44 | class Timers(object): 45 | 46 | def __init__(self): 47 | self.timers = defaultdict(Timer) 48 | 49 | def tic(self, key): 50 | self.timers[key].tic() 51 | 52 | def toc(self, key): 53 | self.timers[key].toc() 54 | 55 | def print(self, key=None): 56 | if key is None: 57 | # print all time 58 | for k, v in self.timers.items(): 59 | print("Average time for {:}: {:}".format(k, v.avg())) 60 | else: 61 | print("Average time for {:}: {:}".format(key, self.timers[key].avg())) 62 | 63 | def get_avg(self, key): 64 | return self.timers[key].avg() 65 | -------------------------------------------------------------------------------- /code/tools/ICP.py: -------------------------------------------------------------------------------- 1 | """ 2 | An implementation of ICP odometry using Open3D library for comparison in the paper 3 | # SPDX-FileCopyrightText: 2021 Binbin Xu 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | 7 | import open3d as o3d 8 | import numpy as np 9 | import torch 10 | import copy 11 | import cv2 12 | 13 | class ICP_Odometry: 14 | 15 | def __init__(self, mode='Point2Plane'): 16 | self.mode = mode 17 | if mode == 'Point2Plane': 18 | print("Using Point-to-plane ICP") 19 | elif mode == 'Point2Point': 20 | print("Using Point-to-point ICP") 21 | elif mode == "ColorICP": 22 | print("using ColorICP") 23 | elif mode == 'Iter_Point2Plane': 24 | print("Using iterative Point-to-plane ICP") 25 | elif mode == "Iter_ColorICP": 26 | print("using iterative ColorICP") 27 | else: 28 | raise NotImplementedError() 29 | 30 | def set_K(self, K, width, height): 31 | fx, fy, cx, cy = K 32 | K = o3d.camera.PinholeCameraIntrinsic(width, height, fx, fy, cx, cy) 33 | return K 34 | 35 | def batch_track(self, batch_rgb0, batch_dpt0, batch_rgb1, batch_dpt1, batch_K, 36 | batch_objmask0=None, batch_objmask1=None, vis_pcd=False): 37 | assert batch_dpt0.ndim == 4 38 | B = batch_dpt0.shape[0] 39 | batch_R = [] 40 | batch_t = [] 41 | if batch_objmask0 is not None: 42 | batch_dpt0 = batch_dpt0 * batch_objmask0 43 | if batch_objmask1 is not None: 44 | batch_dpt1 = batch_dpt1 * batch_objmask1 45 | for i in range(B): 46 | rgb0 = batch_rgb0[i].permute(1, 2, 0).cpu().numpy() 47 | dpt0 = batch_dpt0[i].permute(1,2,0).cpu().numpy() 48 | rgb1 = batch_rgb1[i].permute(1, 2, 0).cpu().numpy() 49 | dpt1 = batch_dpt1[i].permute(1,2,0).cpu().numpy() 50 | K = batch_K[i].cpu().numpy().tolist() 51 | pose10 = self.track(rgb0, dpt0, rgb1, dpt1, K) 52 | batch_R.append(pose10[0]) 53 | batch_t.append(pose10[1]) 54 | 55 | batch_R = torch.tensor(batch_R).type_as(batch_K) 56 | batch_t = torch.tensor(batch_t).type_as(batch_K) 57 | return batch_R, batch_t 58 | 59 | def draw_registration_result(self, source, target, transformation, name='Open3D'): 60 | source_temp = copy.deepcopy(source) 61 | target_temp = copy.deepcopy(target) 62 | source_temp.paint_uniform_color([1, 0.706, 0]) 63 | target_temp.paint_uniform_color([0, 0.651, 0.929]) 64 | source_temp.transform(transformation) 65 | o3d.visualization.draw_geometries([source_temp, target_temp], window_name=name) 66 | 67 | def track(self, rgb0, dpt0, rgb1, dpt1, K, vis_pcd=True, odo_init=None): 68 | H, W, _ = dpt0.shape 69 | intrinsic = self.set_K(K, H, W) 70 | # pcd_0 = o3d.geometry.PointCloud.create_from_depth_image(o3d.geometry.Image(dpt0), 71 | # intrinsic=intrinsic, 72 | # depth_scale=1.0, 73 | # depth_trunc=5.0) 74 | # pcd_1 = o3d.geometry.PointCloud.create_from_depth_image(o3d.geometry.Image(dpt1), 75 | # intrinsic=intrinsic, 76 | # depth_scale=1.0, 77 | # depth_trunc=5.0) 78 | rgbd_0 = o3d.geometry.RGBDImage.create_from_color_and_depth( 79 | o3d.geometry.Image(rgb0), o3d.geometry.Image(dpt0), depth_scale=1, depth_trunc=4.0) 80 | rgbd_1 = o3d.geometry.RGBDImage.create_from_color_and_depth( 81 | o3d.geometry.Image(rgb1), o3d.geometry.Image(dpt1), depth_scale=1, depth_trunc=4.0) 82 | pcd_0 = o3d.geometry.PointCloud.create_from_rgbd_image( 83 | rgbd_0, intrinsic) 84 | pcd_1 = o3d.geometry.PointCloud.create_from_rgbd_image( 85 | rgbd_1, intrinsic) 86 | 87 | if odo_init is None: 88 | odo_init = np.identity(4) 89 | 90 | # point-to-point ICP 91 | if self.mode == 'Point2Point': 92 | reg_p2p = o3d.registration.registration_icp( 93 | pcd_0, pcd_1, 0.02, odo_init, 94 | o3d.registration.TransformationEstimationPointToPoint()) 95 | T_10 = reg_p2p.transformation 96 | 97 | # point-to-plane ICP 98 | elif self.mode == 'Point2Plane': 99 | # radius = 0.01 100 | # source_down = pcd_0.voxel_down_sample(radius) 101 | # target_down = pcd_1.voxel_down_sample(radius) 102 | # 103 | # # print("3-2. Estimate normal.") 104 | # source_down.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30)) 105 | # target_down.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30)) 106 | # reg_p2l = o3d.registration.registration_icp(source_down, target_down, 0.2, odo_init, 107 | # o3d.registration.TransformationEstimationPointToPlane(), 108 | # o3d.registration.ICPConvergenceCriteria(relative_fitness=1e-6, 109 | # relative_rmse=1e-6, 110 | # max_iteration=50) 111 | # 112 | # ) 113 | iter = 10 114 | pcd_0.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid( 115 | radius=0.1, max_nn=30)) 116 | pcd_1.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid( 117 | radius=0.1, max_nn=30)) 118 | reg_p2l = o3d.registration.registration_icp( 119 | pcd_0, pcd_1, 0.4, odo_init, 120 | o3d.registration.TransformationEstimationPointToPlane(), 121 | o3d.registration.ICPConvergenceCriteria(relative_fitness=1e-6, 122 | relative_rmse=1e-6, 123 | max_iteration=iter) 124 | ) 125 | 126 | T_10 = reg_p2l.transformation 127 | 128 | elif self.mode == 'ColorICP': 129 | pcd_0.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid( 130 | radius=0.1, max_nn=30)) 131 | pcd_1.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid( 132 | radius=0.1, max_nn=30)) 133 | reg_p2l = o3d.registration.registration_colored_icp( 134 | pcd_0, pcd_1, 0.02, odo_init,) 135 | T_10 = reg_p2l.transformation 136 | 137 | elif self.mode in ['Iter_Point2Plane', 'Iter_ColorICP']: 138 | voxel_radius = [0.04, 0.02, 0.01] 139 | max_iter = [50, 30, 14] 140 | T_10 = odo_init 141 | for scale in range(3): 142 | iter = max_iter[scale] 143 | radius = voxel_radius[scale] 144 | 145 | pcd0_down = pcd_0.voxel_down_sample(radius) 146 | pcd1_down = pcd_1.voxel_down_sample(radius) 147 | 148 | pcd0_down.estimate_normals( 149 | o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30)) 150 | pcd1_down.estimate_normals( 151 | o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30)) 152 | 153 | # point-to-plane ICP 154 | if self.mode == 'Iter_Point2Plane': 155 | result_icp = o3d.registration.registration_icp( 156 | pcd0_down, pcd1_down, radius, T_10, 157 | o3d.registration.TransformationEstimationPointToPlane(), 158 | o3d.registration.ICPConvergenceCriteria(relative_fitness=1e-6, 159 | relative_rmse=1e-6, 160 | max_iteration=iter) 161 | ) 162 | elif self.mode == 'Iter_ColorICP': 163 | # colored ICP 164 | result_icp = o3d.registration.registration_colored_icp( 165 | pcd0_down, pcd1_down, radius*2, T_10, 166 | o3d.registration.ICPConvergenceCriteria(relative_fitness=1e-6, 167 | relative_rmse=1e-6, 168 | max_iteration=iter)) 169 | else: 170 | raise NotImplementedError 171 | T_10 = result_icp.transformation 172 | else: 173 | raise NotImplementedError() 174 | 175 | # T_10 = result_icp.transformation 176 | trs = T_10[0:3, 3] 177 | rot = T_10[0:3, 0:3] 178 | pose10 = [rot, trs] 179 | 180 | if (trs > 1).sum(): 181 | print('pose', T_10) 182 | 183 | # cv2.imshow('rgb0', rgb0) 184 | # cv2.imshow('rgb1', rgb1) 185 | # cv2.waitKey(0) 186 | # self.draw_registration_result(pcd_0, pcd_1, odo_init, name='init') 187 | # self.draw_registration_result(pcd_0, pcd_1, T_10, name='aligned') 188 | 189 | T_10 = odo_init 190 | trs = T_10[0:3, 3] 191 | rot = T_10[0:3, 0:3] 192 | pose10 = [rot, trs] 193 | 194 | 195 | return pose10 -------------------------------------------------------------------------------- /code/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/tools/__init__.py -------------------------------------------------------------------------------- /code/tools/display.py: -------------------------------------------------------------------------------- 1 | """ 2 | visualisation tool for debugging and demo 3 | # SPDX-FileCopyrightText: 2021 Binbin Xu 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | 7 | import cv2 8 | # cv2.setNumThreads(0) 9 | 10 | import math 11 | import sys 12 | import numpy as np 13 | import torch 14 | 15 | def convert_flow_for_display(flow): 16 | """ 17 | Converts a 2D image (e.g. flow) to bgr 18 | 19 | :param flow: 20 | :type flow: optical flow of size [2, H, W] 21 | :return: 22 | :rtype: 23 | """ 24 | 25 | ang = np.arctan2(flow[1, :, :], flow[0, :, :]) 26 | ang[ang < 0] += 2 * np.pi 27 | ang /= 2 * np.pi 28 | mag = np.sqrt(flow[0, :, :] ** 2. + flow[1, :, :] ** 2.) 29 | mag = np.clip(mag / (np.percentile(mag, 99) + 1e-6), 0., 1.) 30 | hfill_hsv = np.stack([ang * 180, mag * 255, np.ones_like(ang) * 255], 2).astype(np.uint8) 31 | flow_rgb = cv2.cvtColor(hfill_hsv, cv2.COLOR_HSV2RGB) / 255 32 | return np.transpose(flow_rgb, [2, 0, 1]) 33 | 34 | 35 | def single_image_tensor_mat(T): # [1, C, H, W] 36 | img_mat = T.squeeze(0).permute(1, 2, 0).numpy() 37 | show = cv2.cvtColor(img_mat, cv2.COLOR_BGR2RGB) 38 | return show 39 | 40 | 41 | def image_to_display(image, cmap=cv2.COLORMAP_JET, order='CHW', normalize=False): 42 | """ 43 | accepts a [1xHxW] or [DxHxW] float image with values in range [0,1] 44 | => change it range of (0, 255) for visualization 45 | :param image: 46 | :type image: 47 | :param cmap: cv2.COLORMAP_BONE or cv2.COLORMAP_JET, or NORMAL(no-processing) 48 | :type cmap: 49 | :param order: 50 | :type order: 51 | :param normalize: if true, noramalize to 0~1, otherwise clip to 0-1 52 | :type normalize: 53 | :return: a visiable BGR image in range(0,255), in fault a colored heat map (in JET color map) 54 | :rtype: opencv mat [H, W, C] 55 | """ 56 | if order is 'HWC' and len(image.shape) > 2: 57 | image = np.rollaxis(image, axis=2) 58 | # image = np.moveaxis(image, 2, 0) 59 | image = np.squeeze(image) # 2d or 3d 60 | 61 | if len(image.shape) == 3 and image.shape[0] == 2: 62 | image = convert_flow_for_display(image) 63 | 64 | if normalize: 65 | # handle nan pixels 66 | min_intensity = np.nanmin(image) 67 | max_intensity = np.nanmax(image) 68 | image = (image - min_intensity) / (max_intensity - min_intensity) 69 | image = np.uint8(image * 255) 70 | else: 71 | if image.dtype != np.uint8: 72 | image = np.clip(image, 0, 1) 73 | image = (image * 255).astype(np.uint8) 74 | # image = (image * 1).astype(np.uint8) 75 | 76 | if image.ndim == 3: 77 | if image.shape[0] == 3: 78 | image = np.transpose(image, [1, 2, 0]) 79 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 80 | elif image.ndim == 2: 81 | if cmap == "NORMAL": 82 | image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) 83 | else: 84 | image = cv2.applyColorMap(image, cmap) 85 | return image 86 | 87 | 88 | # image_array needs to be 2d 89 | def create_mosaic(image_array, cmap=None, points=None, order='CHW', normalize=False): 90 | """ 91 | Stitch array of images into a big concancated images 92 | 93 | :param image_array: subimages to be displayed 94 | :type image_array: Two-dimensional lists (arrays) , if in a stretch 1D lisr, will be stretched back 95 | each element is an image of [DxHxW] 96 | :param cmap: list of color map => common: cv2.COLORMAP_BONE or cv2.COLORMAP_JET or 'NORMAL' 97 | :type cmap: 98 | :param order: 99 | :type order: 100 | :param normalize: normalize to 0-1 range 101 | :type normalize: bool 102 | :return: image to be showed 103 | :rtype: numpy array 104 | """ 105 | batch_version = (len(image_array[0].shape) == 4) 106 | 107 | if not isinstance(image_array[0], list): # if image_array is a stretch 1D list 108 | image_size = math.ceil(math.sqrt(len(image_array))) # stretch back to 2D list [N by N] 109 | image_array = [image_array[i:min(i + image_size, len(image_array))] 110 | for i in range(0, len(image_array), image_size)] 111 | 112 | max_cols = max([len(row) for row in image_array]) # because not every row (1st array) has the same size 113 | rows = [] 114 | 115 | if cmap is None: 116 | cmap = [cv2.COLORMAP_JET] 117 | elif not isinstance(cmap, list): 118 | cmap = [cmap] 119 | 120 | if not isinstance(normalize, list): 121 | normalize = [normalize] 122 | 123 | if points is not None: 124 | if not isinstance(points, list): 125 | points = [points] 126 | 127 | i = 0 128 | for image_row in image_array: 129 | if len(image_row) == 0: 130 | continue 131 | image_row_processed = [] 132 | for image in image_row: 133 | if torch.is_tensor(image): 134 | if batch_version: 135 | image = image[0:1, :, :, :] 136 | if len(image.shape) == 4: #[B. C, H, W] 137 | image = image.squeeze(0) 138 | if order == 'CHW': 139 | image = image.permute(1, 2, 0) # [H, W, C] 140 | if image.shape[2] not in(0, 3): # sum all channel features 141 | image = image.sum(dim=2) 142 | image = image.cpu().numpy() 143 | image_colorized = image_to_display(image, cmap[i % len(cmap)], 144 | order, 145 | normalize[i % len(normalize)]) 146 | if points is not None: 147 | image_colorized = visualize_matches_on_image(image_colorized, points[i % len(points)]) 148 | image_row_processed.append(image_colorized) 149 | i += 1 150 | nimages = len(image_row_processed) 151 | if nimages < max_cols: # padding zero(black) images in the empty areas 152 | image_row_processed += [np.zeros_like(image_row_processed[-1])] * (max_cols - nimages) 153 | rows.append(np.concatenate(image_row_processed, axis=1)) #horizontally 154 | return np.concatenate(rows, axis=0) # vertically 155 | 156 | 157 | def visualize_image_features(img_list: list, feat_list: list, feat_cmap=cv2.COLORMAP_WINTER): 158 | feat_sum_list = [feat[0, :, :, :].sum(dim=0) for feat in feat_list] 159 | if feat_cmap is not None and feat_cmap is not list: 160 | feat_cmap = [feat_cmap] * len(feat_sum_list) 161 | img_cmap = ['NORMAL'] * len(img_list) 162 | cmap = img_cmap + feat_cmap 163 | img_show = create_mosaic(img_list + feat_sum_list, cmap=cmap) 164 | 165 | return img_show 166 | 167 | 168 | def visualize_matches_on_image(image, matches): 169 | """ 170 | :param image: [] 171 | :type image: Union[torch.Tensor, numpy.ndarray] 172 | :param matches: 173 | :type matches: torch.Tensor 174 | :return: 175 | :rtype: None 176 | """ 177 | num_matches = matches.shape[1] 178 | if torch.is_tensor(image): 179 | image = image.detach().cpu().numpy() 180 | if torch.is_tensor(matches): 181 | matches = matches.detach().cpu().numpy() 182 | # just for visualization, round it: 183 | matches = matches.astype(int) 184 | output = image.copy() 185 | red = (0, 0, 255) 186 | alpha = 0.6 187 | radius = int(image.shape[1] / 64) # should be 10 when the image width is 640 188 | for i in range(num_matches): 189 | image = cv2.circle(image, (matches[0, i], matches[1, i]), radius, red, -1) 190 | #blend 191 | output = cv2.addWeighted(image, alpha, output, 1 - alpha, 0, ) 192 | return output 193 | 194 | 195 | def visualize_feature_channels(feat_map, rgb=None, points=None, order='CHW', add_ftr_avg=True): 196 | """ 197 | :param points: points to draw on images 198 | :type points: torch.Tensor [2, #samples] 199 | :param feat_map: 200 | :type feat_map: torch.Tensor [B, H, W, C] 201 | :param rgb: 202 | :type rgb: numpy.ndarray [H, W, C] or [B, C, H, W] 203 | :param order: 'HWC' or 'CHW' 204 | :type order: str 205 | :return: 206 | :rtype: numpy.ndarray 207 | """ 208 | assert len(feat_map.shape) == 4, "feature-map should be a 4-dim tensor" 209 | assert order in ['HWC', 'CHW'] 210 | 211 | batch_version = (feat_map.shape[0] != 1) 212 | feat_map = feat_map.detach() 213 | if points is not None: points = points.detach() 214 | if not batch_version: 215 | feat_map = feat_map.squeeze(dim=0) 216 | if points is not None: points = points.squeeze() 217 | else: 218 | # if in batch, only visualize the 1st feature map 219 | feat_map = feat_map[0, :, :, :] 220 | if points is not None: points = points[0, :, :] 221 | 222 | if order == 'CHW': 223 | feat_map = feat_map.permute(1, 2, 0) # convert to [H, W, C] 224 | D = feat_map.shape[2] 225 | feat_map_sum = feat_map.sum(dim=2) 226 | 227 | if rgb is not None: 228 | if torch.is_tensor(rgb) and len(rgb.shape) == 4: 229 | rgb = rgb.detach() 230 | if not batch_version: 231 | rgb = rgb.squeeze() 232 | else: 233 | # if in batch, only visualize the 1st feature map 234 | rgb = rgb[0, :, :, :] 235 | rgb = rgb.permute(1, 2, 0) # convert to [H, W, C] 236 | if add_ftr_avg: 237 | feat_map_channel_list = [rgb, feat_map_sum] 238 | else: 239 | feat_map_channel_list = [rgb] 240 | else: 241 | if add_ftr_avg: 242 | feat_map_channel_list = [feat_map_sum] 243 | else: 244 | feat_map_channel_list = [] 245 | 246 | for d in range(D): 247 | feat_map_channel = feat_map[:, :, d] 248 | feat_map_channel_list.append(feat_map_channel) 249 | 250 | cmap = [cv2.COLORMAP_JET] * (D + 1) 251 | if rgb is not None: 252 | cmap = ['NORMAL'] + cmap 253 | feature_channels = create_mosaic(feat_map_channel_list, cmap=cmap, points=points, order='HWC', normalize=True) 254 | return feature_channels 255 | 256 | 257 | def normalize_descriptor_channel_wise(res): 258 | """ 259 | Normalizes the descriptor into RGB color space for each channel 260 | :param res: numpy.array [H,W,D] 261 | Output of the network, per-pixel dense descriptor 262 | :param stats: dict, with fields ['min', 'max', 'mean'], which are used to normalize descriptor 263 | :return: numpy.array 264 | normalized descriptor [H,W,D] 265 | """ 266 | 267 | # get #channel 268 | D = np.shape(res)[-1] 269 | normed_res = np.zeros_like(res) 270 | eps = 1e-10 271 | 272 | for d in range(D): 273 | res_min = np.min(res[:, :, d]) 274 | res_max = np.max(res[:, :, d]) 275 | scale_factor = res_max - res_min + eps 276 | normed_res[:, :, d] = (res[:, :, d] - res_min) / scale_factor 277 | 278 | return normed_res 279 | 280 | 281 | def colorize(hue, lightness, normalize_hue=True, lightness_range=1.0): 282 | """ 283 | Project images onto input images 284 | hue is normalized channel&image-wise 285 | 286 | :param hue: Features to be visualized 287 | :type hue: size of [#batch, #channel, H, W], ith its range is supposed to be [-1.0, 1.0] 288 | :param lightness: input image (grey) 289 | :type lightness: size [#batch, 1, H, W], its value range is supposed to be [0, 1.0] 290 | :param normalize_hue: normalize hue to [0, 1] 291 | :type normalize_hue: 292 | :param lightness_range: 293 | :type lightness_range: 294 | :return: hue overlapped on the lightness 295 | :rtype: size of [#batch, #channel, 3, H, W] 296 | """ 297 | # process the input value to be visualisation range 298 | lightness /= lightness_range 299 | 300 | out = np.zeros(list(hue.shape) + [3]) # now size become [#batch, #channel, H, W, 3] 301 | if normalize_hue: 302 | image_num = np.shape(hue)[0] 303 | normed_hue = np.zeros_like(hue) 304 | # for i in xrange(image_num): 305 | # hue_per_image = hue[i, :, :, :] 306 | # hue_per_image = np.transpose(hue_per_image, [1, 2, 0]) 307 | # normalized_hue_image = normalize_descriptor_channel_wise(hue_per_image) 308 | # normalized_hue_image = np.transpose(normalized_hue_image, [2, 0, 1]) 309 | # normalize_hue[i, :, :, :] = normalized_hue_image 310 | channel_num = np.shape(hue)[1] 311 | eps = 1e-10 312 | 313 | for i in range(image_num): 314 | for c in range(channel_num): 315 | hue_min = np.min(hue[i, c, :, :]) 316 | hue_max = np.max(hue[i, c, :, :]) 317 | scale_factor = hue_max - hue_min + eps 318 | normed_hue[i, c, :, :] = (hue[i, c, :, :] - hue_min) / scale_factor 319 | else: 320 | normed_hue = np.clip(hue, 0, 1.0) * 0.5 + 0.5 321 | out[:, :, :, :, 0] = normed_hue * 120.0 / 255.0 322 | out[:, :, :, :, 1] = (lightness - 0.5) * 0.5 + 0.5 323 | # out[:, :, :, :, 2] = np.ones(hue.shape) * (np.abs(np.clip(hue, -1.0, 1.0) * 1.0) + 0.0) 324 | out[:, :, :, :, 2] = np.ones(hue.shape) # * (normed_hue) 325 | out = np.reshape(out, [-1, hue.shape[3], 3]) # [#batch * #channel * H, W, 3] 326 | out = cv2.cvtColor((out * 255).astype(np.uint8), cv2.COLOR_HLS2RGB).astype(np.float32) / 255 327 | out = np.reshape(out, list(hue.shape) + [3]) # [#batch, #channel, H, W, 3] 328 | out = np.transpose(out, [0, 1, 4, 2, 3]) # [#batch, #channel, 3, H, W]. this is to meet the create_mosaic function 329 | return out 330 | 331 | 332 | def visualise_frames(mat, name, max_img_visual=None, max_channel_visual=None, step_image=1, step_channel=1, 333 | mosaic_save=None): 334 | """ 335 | visualize batches of images in n-dimsional 336 | :param mat: images to be showed 337 | :type mat: numpy array of [#batch, #channel, 3, H, W] 338 | :param name: opencv window name 339 | :type name: string 340 | :param max_img_visual: image number to be showed 341 | :type max_img_visual: int 342 | :param max_channel_visual:channel number to be shoed 343 | :type max_channel_visual: int 344 | :param step_image: the step to skip image number 345 | :type step_image: int 346 | :param step_channel: the step to skip channel number 347 | :type step_channel: int 348 | :param mosaic_save: if not none, the directory to save the mosaic image 349 | :type mosaic_save: string 350 | :return: mosaic image -> deprecated currently 351 | :rtype: 352 | """ 353 | array = list() 354 | max_img = mat.shape[0] if max_img_visual is None else min(max_img_visual, mat.shape[0]) 355 | max_channel = mat.shape[1] if max_channel_visual is None else min(max_channel_visual, mat.shape[1]) 356 | for i in range(0, max_img, step_image): 357 | sub = [] 358 | for j in range(0, max_channel, step_channel): 359 | sub.append(mat[i, j]) 360 | array.append(sub) 361 | mosaic = create_mosaic(array) 362 | cv2.namedWindow(name, cv2.WINDOW_NORMAL) 363 | cv2.imshow(name, mosaic) 364 | if mosaic is not None: 365 | cv2.imwrite(mosaic_save, mosaic) 366 | return mosaic 367 | 368 | 369 | 370 | class Toolbar: 371 | def reset(self, width, tot, title=None): 372 | self.width = max(int(min(width, tot)), 1) 373 | self.tot = int(max(tot, 1)) 374 | self.current = 0 375 | if title is not None: 376 | print(title) 377 | sys.stdout.write("[%s]" % (" " * self.width)) 378 | sys.stdout.flush() 379 | sys.stdout.write("\b" * (self.width + 1)) 380 | 381 | def incr(self): 382 | n = self.current + 1 383 | if math.floor(n * self.width / self.tot) > math.floor(self.current * self.width / self.tot): 384 | sys.stdout.write("-") 385 | sys.stdout.flush() 386 | self.current = n 387 | if self.current == self.tot: 388 | sys.stdout.write("\n") 389 | -------------------------------------------------------------------------------- /code/tools/rgbd_odometry.py: -------------------------------------------------------------------------------- 1 | """ 2 | An implementation of RGBD odometry using Open3D library for comparison in the paper 3 | # SPDX-FileCopyrightText: 2021 Binbin Xu 4 | # SPDX-License-Identifier: BSD-3-Clause 5 | """ 6 | 7 | import open3d as o3d 8 | import numpy as np 9 | import torch 10 | import copy 11 | 12 | class RGBDOdometry(): 13 | 14 | def __init__(self, mode='RGBD'): 15 | 16 | self.odo_opt = None 17 | if mode == "RGBD": 18 | print("Using RGB-D Odometry") 19 | self.odo_opt = o3d.odometry.RGBDOdometryJacobianFromColorTerm() 20 | elif mode == "COLOR_ICP": 21 | print("Using Hybrid RGB-D Odometry") 22 | self.odo_opt = o3d.odometry.RGBDOdometryJacobianFromHybridTerm() 23 | else: 24 | raise NotImplementedError() 25 | 26 | def set_K(self, K, width, height): 27 | fx, fy, cx, cy = K 28 | K = o3d.camera.PinholeCameraIntrinsic(width, height, fx, fy, cx, cy) 29 | return K 30 | 31 | def batch_track(self, batch_rgb0, batch_dpt0, batch_rgb1, batch_dpt1, batch_K, 32 | batch_objmask0=None, batch_objmask1=None, vis_pcd=True): 33 | assert batch_rgb0.ndim == 4 34 | B = batch_rgb0.shape[0] 35 | batch_R = [] 36 | batch_t = [] 37 | if batch_objmask0 is not None: 38 | batch_dpt0 = batch_dpt0 * batch_objmask0 39 | if batch_objmask1 is not None: 40 | batch_dpt1 = batch_dpt1 * batch_objmask1 41 | for i in range(B): 42 | rgb0 = batch_rgb0[i].permute(1,2,0).cpu().numpy() 43 | dpt0 = batch_dpt0[i].permute(1,2,0).cpu().numpy() 44 | rgb1 = batch_rgb1[i].permute(1,2,0).cpu().numpy() 45 | dpt1 = batch_dpt1[i].permute(1,2,0).cpu().numpy() 46 | K = batch_K[i].cpu().numpy().tolist() 47 | pose10, _ = self.track(rgb0, dpt0, rgb1, dpt1, K) 48 | batch_R.append(pose10[0]) 49 | batch_t.append(pose10[1]) 50 | 51 | batch_R = torch.tensor(batch_R).type_as(batch_K) 52 | batch_t = torch.tensor(batch_t).type_as(batch_K) 53 | return batch_R, batch_t 54 | 55 | def draw_registration_result(self, source, target, transformation, name='Open3D'): 56 | source_temp = copy.deepcopy(source) 57 | target_temp = copy.deepcopy(target) 58 | source_temp.paint_uniform_color([1, 0.706, 0]) 59 | target_temp.paint_uniform_color([0, 0.651, 0.929]) 60 | source_temp.transform(transformation) 61 | o3d.visualization.draw_geometries([source_temp, target_temp], window_name=name) 62 | 63 | def track(self, rgb0, dpt0, rgb1, dpt1, K, vis_pcd=True, odo_init=None): 64 | H, W, _ = rgb0.shape 65 | intrinsic = self.set_K(K, H, W) 66 | rgbd_0 = o3d.geometry.RGBDImage.create_from_color_and_depth( 67 | o3d.geometry.Image(rgb0), o3d.geometry.Image(dpt0), depth_scale=1, depth_trunc=3.0) 68 | rgbd_1 = o3d.geometry.RGBDImage.create_from_color_and_depth( 69 | o3d.geometry.Image(rgb1), o3d.geometry.Image(dpt1), depth_scale=1, depth_trunc=3.0) 70 | if odo_init is None: 71 | odo_init = np.identity(4) 72 | if vis_pcd: 73 | pcd_0 = o3d.geometry.PointCloud.create_from_rgbd_image( 74 | rgbd_0, intrinsic) 75 | pcd_1 = o3d.geometry.PointCloud.create_from_rgbd_image( 76 | rgbd_1, intrinsic) 77 | 78 | # option = o3d.odometry.OdometryOption() 79 | option = o3d.odometry.OdometryOption(min_depth=0.01, max_depth_diff=1.0) 80 | # print(option) 81 | 82 | [is_success, T_10, info] = o3d.odometry.compute_rgbd_odometry( 83 | rgbd_0, rgbd_1, intrinsic, 84 | odo_init, self.odo_opt, option) 85 | 86 | trs = T_10[0:3, 3] 87 | if (trs>1).sum(): #is_success and vis_pcd: 88 | print(T_10) 89 | print(is_success) 90 | # pcd_0 = o3d.geometry.PointCloud.create_from_rgbd_image( 91 | # rgbd_0, intrinsic) 92 | # pcd_0.transform(T_10) 93 | # o3d.visualization.draw_geometries([pcd_1, pcd_0]) 94 | self.draw_registration_result(pcd_0, pcd_1, odo_init, 'init') 95 | self.draw_registration_result(pcd_0, pcd_1, T_10, 'aligned') 96 | 97 | trs = T_10[0:3, 3] 98 | rot = T_10[0:3, 0:3] 99 | pose10 = [rot, trs] 100 | 101 | return pose10, is_success -------------------------------------------------------------------------------- /code/train.py: -------------------------------------------------------------------------------- 1 | """ 2 | The training script for Deep Probabilistic Feature-metric Tracking, 3 | 4 | # SPDX-FileCopyrightText: 2021 Binbin Xu 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | 7 | @author: Zhaoyang Lv 8 | @date: March 2019 9 | """ 10 | 11 | import os, sys, argparse, time 12 | import evaluate as eval_utils 13 | import models.LeastSquareTracking as ICtracking 14 | import models.criterions as criterions 15 | import models.geometry as geometry 16 | import train_utils 17 | import config 18 | from data.dataloader import load_data 19 | from Logger import log_git_revisions_hash 20 | 21 | import torch 22 | import torch.nn as nn 23 | import torch.utils.data as data 24 | 25 | from timers import Timers 26 | from tqdm import tqdm 27 | 28 | 29 | class data_prefetcher(): 30 | def __init__(self, loader): 31 | self.loader = iter(loader) 32 | self.stream = torch.cuda.Stream() 33 | # self.mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).cuda().view(1,3,1,1) 34 | # self.std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).cuda().view(1,3,1,1) 35 | # With Amp, it isn't necessary to manually convert data to half. 36 | # if args.fp16: 37 | # self.mean = self.mean.half() 38 | # self.std = self.std.half() 39 | self.preload() 40 | #self.next_batch = None 41 | 42 | def preload(self): 43 | try: 44 | self.next_batch = next(self.loader) 45 | except StopIteration: 46 | self.next_batch = None 47 | return 48 | with torch.cuda.stream(self.stream): 49 | for k, val in enumerate(self.next_batch): 50 | if torch.is_tensor(val): 51 | self.next_batch[k] = self.next_batch[k].cuda(non_blocking=True) 52 | 53 | # With Amp, it isn't necessary to manually convert data to half. 54 | # if args.fp16: 55 | # self.next_input = self.next_input.half() 56 | # else: 57 | # self.next_input = self.next_input.float() 58 | 59 | def next(self): 60 | torch.cuda.current_stream().wait_stream(self.stream) 61 | next_batch = self.next_batch 62 | self.preload() 63 | return next_batch 64 | 65 | def create_train_eval_loaders(options, eval_type, keyframes, 66 | total_batch_size = 8, 67 | trajectory = ''): 68 | """ create the evaluation loader at different keyframes set-up 69 | """ 70 | eval_loaders = {} 71 | 72 | for kf in keyframes: 73 | if options.image_resize is not None: 74 | np_loader = load_data(options.dataset, [kf], eval_type, trajectory, 75 | image_resize=options.image_resize, 76 | options=options) 77 | else: 78 | np_loader = load_data(options.dataset, [kf], eval_type, trajectory, options=options) 79 | eval_loaders['{:}_keyframe_{:}'.format(trajectory, kf)] = data.DataLoader(np_loader, 80 | pin_memory=True, 81 | batch_size = int(total_batch_size), 82 | shuffle = False, num_workers = options.cpu_workers) 83 | 84 | return eval_loaders 85 | 86 | def train_one_epoch(options, dataloader, net, optim, epoch, logger, objectives, 87 | known_mask=False, timers=None): 88 | 89 | net.train() 90 | 91 | # prefetcher = data_prefetcher(dataloader) 92 | 93 | progress = tqdm(dataloader, ncols=100, 94 | desc = 'train deeper inverse compositional algorithm #epoch{:}'.format(epoch), 95 | total= len(dataloader)) 96 | 97 | epoch_len = len(dataloader) 98 | 99 | if timers is None: timers_iter = Timers() 100 | 101 | if timers: timers.tic('one iteration') 102 | else: timers_iter.tic('one iteration') 103 | 104 | for batch_idx, batch in enumerate(progress): 105 | 106 | # batch = prefetcher.next() 107 | # batch_idx = 0 108 | # with tqdm(total=epoch_len) as pbar: 109 | # while batch is not None: 110 | # batch_idx += 1 111 | # if batch_idx >= epoch_len: 112 | # break 113 | 114 | iteration = epoch*epoch_len + batch_idx 115 | display_dict = {} 116 | 117 | optim.zero_grad() 118 | 119 | if timers: timers.tic('forward step') 120 | 121 | if known_mask: # for dataset that with mask or need mask 122 | color0, color1, depth0, depth1, Rt, K, obj_mask0, obj_mask1 = \ 123 | train_utils.check_cuda(batch[:8]) 124 | else: 125 | color0, color1, depth0, depth1, Rt, K = \ 126 | train_utils.check_cuda(batch[:6]) 127 | obj_mask0, obj_mask1 = None, None 128 | 129 | # Bypass lazy way to bypass invalid pixels. 130 | invalid_mask = (depth0 == depth0.min()) | (depth0 == depth0.max()) 131 | if obj_mask0 is not None: 132 | invalid_mask = ~obj_mask0 | invalid_mask 133 | 134 | if options.train_uncer_prop: 135 | if options.obj_only: 136 | Rs, ts, sigma_ksi = net.forward(color0, color1, depth0, depth1, K, 137 | obj_mask0=obj_mask0, obj_mask1=obj_mask1, 138 | logger=logger, 139 | vis=options.vis_feat, 140 | iteration=iteration)[:3] 141 | else: 142 | Rs, ts, sigma_ksi = net.forward(color0, color1, depth0, depth1, K, 143 | logger=logger, 144 | vis=options.vis_feat, 145 | iteration=iteration)[:3] 146 | else: 147 | if options.obj_only: 148 | Rs, ts = net.forward(color0, color1, depth0, depth1, K, 149 | obj_mask0=obj_mask0, obj_mask1=obj_mask1, 150 | logger=logger, 151 | vis=options.vis_feat, 152 | iteration=iteration)[:2] 153 | else: 154 | Rs, ts = net.forward(color0, color1, depth0, depth1, K, 155 | logger=logger, 156 | vis=options.vis_feat, 157 | iteration=iteration)[:2] 158 | 159 | if timers: timers.toc('forward step') 160 | if timers: timers.tic('calculate loss') 161 | 162 | R_gt, t_gt = Rt[:,:3,:3], Rt[:,:3,3] 163 | 164 | # assert(flow_loss) # the only loss used for training 165 | # we want to compute epe anyway 166 | flow_loss = criterions.compute_RT_EPE_loss 167 | 168 | epes3d = flow_loss(Rs, ts, R_gt, t_gt, depth0, K, invalid=invalid_mask).mean() * 1e2 169 | if 'EPE3D' in objectives: 170 | loss = epes3d 171 | elif 'RPE' in objectives: 172 | angle_error, trans_error = criterions.compute_RPE_loss(Rs, ts, R_gt, t_gt) 173 | loss = angle_error + trans_error 174 | elif 'URPE' in objectives: 175 | assert options.train_uncer_prop 176 | loss = criterions.compute_RPE_uncertainty(Rs, ts, R_gt, t_gt, sigma_ksi) 177 | elif 'UEPE' in objectives: 178 | loss = criterions.compute_RT_EPE_uncertainty_loss(Rs, ts, R_gt, t_gt, depth0, K, sigma_ksi=sigma_ksi, uncertainty_type=options.uncertainty, invalid=invalid_mask) 179 | 180 | display_dict['train_epes3d'] = epes3d.item() 181 | display_dict['train_loss'] = loss.item() 182 | 183 | if timers: timers.toc('calculate loss') 184 | if timers: timers.tic('backward') 185 | 186 | loss.backward() 187 | 188 | if timers: timers.toc('backward') 189 | torch.nn.utils.clip_grad_norm_(net.parameters(), 5.0) 190 | # if options.uncertainty == 'gaussian': 191 | # torch.nn.utils.clip_grad_norm_(net.parameters(), 5.0) 192 | optim.step() 193 | 194 | lr = train_utils.get_learning_rate(optim) 195 | display_dict['lr'] = lr 196 | 197 | if timers: 198 | timers.toc('one iteration') 199 | batch_time = timers.get_avg('one iteration') 200 | timers.tic('one iteration') 201 | else: 202 | timers_iter.toc('one iteration') 203 | batch_time = timers_iter.get_avg('one iteration') 204 | timers_iter.tic('one iteration') 205 | 206 | logger.write_to_tensorboard(display_dict, iteration) 207 | logger.write_to_terminal(display_dict, epoch, batch_idx, epoch_len, batch_time, is_train=True) 208 | 209 | # batch = prefetcher.next() 210 | # pbar.update(1) 211 | 212 | def train(options): 213 | 214 | if options.time: 215 | timers = Timers() 216 | else: 217 | timers = None 218 | 219 | total_batch_size = options.batch_per_gpu * torch.cuda.device_count() 220 | 221 | checkpoint = train_utils.load_checkpoint_train(options) 222 | 223 | keyframes = [int(x) for x in options.keyframes.split(',')] 224 | if options.image_resize is not None: 225 | train_loader = load_data(options.dataset, keyframes, load_type='train', 226 | image_resize=options.image_resize, options=options) 227 | else: 228 | train_loader = load_data(options.dataset, keyframes, load_type = 'train', options=options) 229 | train_loader = data.DataLoader(train_loader, 230 | batch_size = total_batch_size, 231 | pin_memory=True, 232 | shuffle = True, num_workers = options.cpu_workers) 233 | if options.dataset in ['BundleFusion', 'TUM_RGBD', 'ScanNet']: 234 | obj_has_mask = False 235 | else: 236 | obj_has_mask = True 237 | 238 | eval_loaders = create_train_eval_loaders(options, 'validation', keyframes, total_batch_size) 239 | 240 | logfile_name = '_'.join([ 241 | options.prefix, # the current test version 242 | # options.network, 243 | options.encoder_name, 244 | options.mestimator, 245 | options.solver, 246 | options.dataset, 247 | 'obj', str(options.obj_only), 248 | 'uCh', str(options.uncertainty_channel), 249 | options.uncertainty, 250 | 'rmT', str(options.remove_tru_sigma), 251 | # options.direction, 252 | 'fCh', str(options.feature_channel), 253 | options.feature_extract, 254 | 'iP', options.init_pose, 255 | 'mH', options.multi_hypo, 256 | # 'resInput', str(options.res_input), 257 | # 'initScale', str(options.scale_init_pose), 258 | # 'uncer_prop', str(options.train_uncer_prop), 259 | 'wICP', str(options.combine_ICP), 260 | 's', options.scaler, 261 | 'lr', str(options.lr), 262 | 'batch', str(total_batch_size), 263 | # 'kf', options.keyframes 264 | ]) 265 | 266 | print("Initialize and train the Deep Trust Region Network") 267 | net = ICtracking.LeastSquareTracking( 268 | encoder_name = options.encoder_name, 269 | uncertainty_type= options.uncertainty, 270 | direction = options.direction, 271 | max_iter_per_pyr= options.max_iter_per_pyr, 272 | mEst_type = options.mestimator, 273 | solver_type = options.solver, 274 | options = options, 275 | tr_samples = options.tr_samples, 276 | add_init_noise = options.add_init_pose_noise, 277 | no_weight_sharing = options.no_weight_sharing, 278 | timers = timers) 279 | 280 | if options.no_weight_sharing: 281 | logfile_name += '_no_weight_sharing' 282 | logger = train_utils.initialize_logger(options, logfile_name) 283 | log_git_revisions_hash(logger.log_dir) 284 | with open(os.path.join(logger.log_dir,'commandline_args.txt'), 'w') as f: 285 | f.write('\n'.join(sys.argv[1:])) 286 | 287 | if options.checkpoint: 288 | net.load_state_dict(checkpoint['state_dict']) 289 | 290 | if torch.cuda.is_available(): 291 | net.cuda() 292 | 293 | net.train() 294 | 295 | if torch.cuda.device_count() > 1: 296 | print("Use", torch.cuda.device_count(), "GPUs for training!") 297 | # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs 298 | net = nn.DataParallel(net) 299 | 300 | train_objective = [options.loss] # ['EPE3D'] # Note: we don't use RPE for training 301 | eval_objectives = ['EPE3D', 'RPE'] 302 | 303 | num_params = train_utils.count_parameters(net) 304 | 305 | if num_params < 1: 306 | print('There is no learnable parameters in this baseline.') 307 | print('No training. Only one iteration of evaluation') 308 | no_training = True 309 | else: 310 | print('There is a total of {:} learnabled parameters'.format(num_params)) 311 | no_training = False 312 | optim = train_utils.create_optim(options, net) 313 | scheduler = torch.optim.lr_scheduler.MultiStepLR(optim, 314 | milestones=options.lr_decay_epochs, 315 | gamma=options.lr_decay_ratio) 316 | 317 | freq = options.save_checkpoint_freq 318 | for epoch in range(options.start_epoch, options.epochs): 319 | 320 | if epoch % freq == 0: 321 | checkpoint_name = 'checkpoint_epoch{:d}.pth.tar'.format(epoch) 322 | print('save {:}'.format(checkpoint_name)) 323 | state_info = {'epoch': epoch, 'num_param': num_params} 324 | logger.save_checkpoint(net, state_info, filename=checkpoint_name) 325 | 326 | if options.no_val is False: 327 | for k, loader in eval_loaders.items(): 328 | 329 | eval_name = '{:}_{:}'.format(options.dataset, k) 330 | 331 | eval_info = eval_utils.evaluate_trust_region( 332 | loader, net, eval_objectives, 333 | known_mask = obj_has_mask, 334 | eval_name = eval_name, 335 | timers = timers, 336 | logger=logger, 337 | obj_only=options.obj_only, 338 | epoch=epoch, 339 | tracker='learning_based', 340 | ) 341 | 342 | display_dict = {"{:}_epe3d".format(eval_name): eval_info['epes'].mean(), 343 | "{:}_rpe_angular".format(eval_name): eval_info['angular_error'].mean(), 344 | "{:}_rpe_translation".format(eval_name): eval_info['translation_error'].mean()} 345 | 346 | logger.write_to_tensorboard(display_dict, epoch) 347 | 348 | if no_training: break 349 | 350 | train_one_epoch(options, train_loader, net, optim, epoch, logger, 351 | train_objective, known_mask=obj_has_mask, timers=timers) 352 | 353 | scheduler.step() 354 | 355 | if __name__ == '__main__': 356 | 357 | parser = argparse.ArgumentParser(description='Training the network') 358 | 359 | config.add_basics_config(parser) 360 | config.add_train_basics_config(parser) 361 | config.add_train_optim_config(parser) 362 | config.add_train_log_config(parser) 363 | config.add_train_loss_config(parser) 364 | config.add_tracking_config(parser) 365 | 366 | options = parser.parse_args() 367 | 368 | options.start_epoch = 0 369 | 370 | print('---------------------------------------') 371 | print_options = vars(options) 372 | for key in print_options.keys(): 373 | print(key+': '+str(print_options[key])) 374 | print('---------------------------------------') 375 | 376 | # torch.backends.cudnn.benchmark = True 377 | torch.manual_seed(1) 378 | torch.cuda.manual_seed(1) 379 | 380 | print('Start training...') 381 | train(options) 382 | -------------------------------------------------------------------------------- /code/train_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | The training utility functions 3 | 4 | @author: Zhaoyang Lv 5 | @date: March 2019 6 | """ 7 | 8 | import os, sys 9 | from os.path import join 10 | import torch 11 | import torch.nn as nn 12 | 13 | def check_cuda(items): 14 | if torch.cuda.is_available(): 15 | to_cuda=[] 16 | for x in items: 17 | if torch.is_tensor(x): 18 | to_cuda.append(x.cuda()) 19 | elif type(x).__module__ == 'numpy': 20 | to_cuda.append(torch.from_numpy(x).cuda()) 21 | else: 22 | to_cuda.append(x) 23 | return to_cuda 24 | # return [x.cuda() if torch.is_tensor(x) else torch.from_numpy(x).cuda() for x in items] 25 | else: 26 | return items 27 | 28 | def initialize_logger(opt, logfile_name): 29 | """ Initialize the logger for the network 30 | """ 31 | from Logger import TensorBoardLogger 32 | log_dir = opt.dataset 33 | # if opt.resume_training: 34 | # logfile_name = '_'.join([ 35 | # logfile_name, 36 | # 'resume']) 37 | 38 | log_dir = join('logs', log_dir, opt.checkpoint_folder, logfile_name) 39 | logger = TensorBoardLogger(log_dir, logfile_name) 40 | return logger 41 | 42 | def create_optim(config, network): 43 | """ Create the optimizer 44 | """ 45 | if config.opt=='sgd': 46 | optim = torch.optim.SGD(network.parameters(), 47 | lr = config.lr, 48 | momentum = 0.9, 49 | weight_decay = 4e-4, 50 | nesterov=False) 51 | elif config.opt=='adam' or config.opt=='sgdr': 52 | optim = torch.optim.Adam(network.parameters(), 53 | lr = config.lr, 54 | weight_decay = 4e-4 55 | ) 56 | elif config.opt=='rmsprop': 57 | optim = torch.optim.RMSprop(network.parameters(), 58 | lr = config.lr, 59 | weight_decay = 1e-4) 60 | else: 61 | raise NotImplementedError 62 | 63 | return optim 64 | 65 | def count_parameters(model): 66 | return sum(p.numel() for p in model.parameters() if p.requires_grad) 67 | 68 | def load_checkpoint_test(opt): 69 | if os.path.isfile(opt.checkpoint): 70 | print('=> loading checkpoint '+ opt.checkpoint) 71 | checkpoint = torch.load(opt.checkpoint) 72 | else: 73 | raise Exception('=> no checkpoint found at '+opt.checkpoint) 74 | return checkpoint 75 | 76 | def load_checkpoint_train(opt): 77 | """ Loading the checking-point file if specified 78 | """ 79 | checkpoint = None 80 | if opt.checkpoint: 81 | if os.path.isfile(opt.checkpoint): 82 | print('=> loading checkpoint '+ opt.checkpoint) 83 | 84 | checkpoint = torch.load(opt.checkpoint) 85 | print('=> loaded checkpoint '+ opt.checkpoint+' epoch %d'%checkpoint['epoch']) 86 | if opt.resume_training: 87 | opt.start_epoch = checkpoint['epoch'] 88 | print('resume training on the checkpoint') 89 | else: 90 | print('start new training...') 91 | 92 | # This is to configure the module loaded from multi-gpu 93 | #if opt.checkpoint_multigpu: 94 | # from collections import OrderedDict 95 | # state_dict_rename = OrderedDict() 96 | # for k, v in checkpoint['state_dict'].items(): 97 | # name = k[7:] # remove `module.` 98 | # state_dict_rename[name] = v 99 | # checkpoint['state_dict'] = state_dict_rename 100 | else: 101 | print('=> no checkpoint found at '+opt.checkpoint) 102 | return checkpoint 103 | 104 | def set_learning_rate(optim, lr): 105 | """ manual set the learning rate for all specified parameters 106 | """ 107 | for param_group in optim.param_groups: 108 | param_group['lr']=lr 109 | 110 | def get_learning_rate(optim, name=None): 111 | """ retrieve the current learning rate 112 | """ 113 | if name is None: 114 | # assume all the learning rate remains the same 115 | return optim.param_groups[0]['lr'] 116 | 117 | def adjust_learning_rate_manual(optim, epoch, lr, lr_decay_epochs, lr_decay_ratio): 118 | """ DIY the learning rate 119 | """ 120 | for e in lr_decay_epochs: 121 | if epoch