├── .autoenv.zsh
├── .autoenv_leave.zsh
├── .gitignore
├── .make
    ├── install.sh
    └── install_anaconda3.sh
├── LICENSE
├── Makefile
├── README.md
├── checkpoint
    ├── checkpoint_epoch29.pth.tar
    └── combine_icp_checkpoint_epoch40.pth.tar
├── code
    ├── .gitignore
    ├── Logger.py
    ├── __init__.py
    ├── config.py
    ├── convergence_basin.py
    ├── data
    │   ├── MovingObj3D.py
    │   ├── ScanNet.py
    │   ├── SimpleLoader.py
    │   ├── TUM_RGBD.py
    │   ├── VaryLighting.py
    │   ├── data_examples
    │   │   └── TUM
    │   │   │   ├── color
    │   │   │       ├── 1305031790.645155.png
    │   │   │       ├── 1305031790.713097.png
    │   │   │       ├── 1305031790.781258.png
    │   │   │       ├── 1305031790.845151.png
    │   │   │       └── 1305031790.913129.png
    │   │   │   └── depth
    │   │   │       ├── 1305031790.640468.png
    │   │   │       ├── 1305031790.709421.png
    │   │   │       ├── 1305031790.773548.png
    │   │   │       ├── 1305031790.839363.png
    │   │   │       └── 1305031790.909436.png
    │   └── dataloader.py
    ├── evaluate.py
    ├── experiments
    │   ├── __init__.py
    │   ├── kf_vo.py
    │   ├── select_method.py
    │   └── warping_objects.py
    ├── models
    │   ├── LeastSquareTracking.py
    │   ├── __init__.py
    │   ├── algorithms.py
    │   ├── criterions.py
    │   ├── geometry.py
    │   └── submodules.py
    ├── run_example.py
    ├── test.py
    ├── timers.py
    ├── tools
    │   ├── ICP.py
    │   ├── __init__.py
    │   ├── display.py
    │   └── rgbd_odometry.py
    ├── train.py
    └── train_utils.py
├── environment.yml
├── environment30X.yml
├── scripts
    ├── eval_tum_feature_icp.sh
    ├── eval_tum_rgbd.sh
    ├── run_kf_vo.sh
    ├── run_kf_vo_cb.sh
    ├── train_moving_objs3d.sh
    ├── train_tum_feature_icp.sh
    └── train_tum_rgbd.sh
└── setup
    ├── datasets.yaml
    └── environment.yml


/.autoenv.zsh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env zsh
2 | 
3 | setup(){
4 |     source $HERE/.anaconda3/bin/activate
5 |     path_prepend $HERE/.anaconda3/bin
6 | }
7 | HERE=${0:a:h}
8 | setup


--------------------------------------------------------------------------------
/.autoenv_leave.zsh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env zsh
2 | conda deactivate
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.DS_Store
2 | logs/
3 | test_results/
4 | 


--------------------------------------------------------------------------------
/.make/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | echo_bold () {
 6 |   echo -e "\033[1m$*\033[0m"
 7 | }
 8 | 
 9 | echo_warning () {
10 |   echo -e "\033[33m$*\033[0m"
11 | }
12 | 
13 | conda_check_installed () {
14 |   if [ ! $# -eq 1 ]; then
15 |     echo "usage: $0 PACKAGE_NAME"
16 |     return 1
17 |   fi
18 |   conda list | awk '{print $1}' | egrep "^$1$" &>/dev/null
19 | }
20 | 
21 | HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
22 | ROOT=$HERE/..
23 | 
24 | cd $ROOT
25 | 
26 | source .anaconda3/bin/activate
27 | 
28 | # ---------------------------------------------------------------------------------------
29 | 
30 | echo_bold "==> Installing the right pip and dependencies for the fresh python"
31 | pip install --upgrade pip 
32 | conda install python=3.6  # meet tensorflow requirements
33 | conda install ipython
34 | 
35 | #echo_bold "==> Installing computer vision-related packages"
36 | #pip install \
37 | #  jupyter \
38 | #  cython\
39 | #  numpy\
40 | #  matplotlib\
41 | #  opencv-python \
42 | #  opencv-contrib-python \
43 | #  plyfile \
44 | #  pandas \
45 | #  requests \
46 | #  scipy \
47 | #  imageio \
48 | #  scikit-image \
49 | #  sklearn \
50 | #  pyyaml \
51 | #  tqdm \
52 | #  transforms3d \
53 | #
54 | #echo_bold "==> Installing deep learning-related packages"
55 | #pip install future 
56 | #conda install pytorch torchvision cudatoolkit=9.2 -c pytorch
57 | #pip install tensorboard
58 | 
59 | echo_bold "==> Installing requirements"
60 | # pip install -r setup/requirements.txt
61 | conda env update --file environment30X.yml
62 | # pip install -e .
63 | 
64 | # ---------------------------------------------------------------------------------------
65 | 
66 | echo_bold "\nAll is well! You can start using this!
67 |   $ source .anaconda3/bin/activate
68 | "
69 | 


--------------------------------------------------------------------------------
/.make/install_anaconda3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 4 | ROOT=$HERE/..
 5 | 
 6 | if [ ! -d $ROOT/.anaconda3 ]; then
 7 |   echo "==>Installing anaconda 3"
 8 |   echo $ROOT
 9 |   echo $HERE
10 |   cd $ROOT
11 |   curl -L https://binbin-xu.github.io//tools/install_anaconda3.sh | bash -s .
12 | fi
13 | 
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright © 2020-2021 Smart Robotics Lab, Imperial College London
 4 | Copyright © 2020-2021 Binbin Xu
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all:
 2 | 	@echo '## Make commands ##'
 3 | 	@echo
 4 | 	@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs
 5 | 
 6 | install_first:
 7 | 	@.make/install_first.sh
 8 |         
 9 | install_anaconda3:
10 | 	@.make/install_anaconda3.sh
11 | 
12 | install: install_anaconda3  
13 | 	@.make/install.sh
14 | 
15 | clean:
16 | 	# @rm -rf dense_feature_tracking.egg-info
17 | 	@rm -rf .anaconda3
18 | 
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <div align="center">
  3 |     <h1>Deep Probabilistic Feature-metric Tracking</h1>
  4 |   </div>
  5 |   <p align="center">
  6 |     <a href="https://binbin-xu.github.io/"><strong>Binbin Xu</strong></a>
  7 |     ·
  8 |     <a href="https://www.doc.ic.ac.uk/~ajd/"><strong>Andrew J. Davison</strong></a>
  9 |     ·
 10 |     <a href="https://mlr.in.tum.de/members/leuteneg"><strong>Stefan Leutenegger</strong></a>
 11 |   </p>
 12 |   <!-- <h2 align="center">In Review</h2> -->
 13 |   <h3 align="center">
 14 |     <a href="https://arxiv.org/pdf/2008.13504.pdf">Paper</a> |
 15 |     <a href="https://youtu.be/6pMosl6ZAPE">Video</a> |
 16 |   </h3>
 17 |   <div align="center"></div>
 18 | </p>
 19 | <p align="center">
 20 |   <a href="#">
 21 |     <img src="https://binbin-xu.github.io/pub/ral2020/ral2020.gif" alt="">
 22 |   </a>
 23 | </p>
 24 | 
 25 | ## Summary
 26 | 
 27 | This is the official repository of our RA-L 2021 paper:
 28 | 
 29 | **Deep Probabilistic Feature-metric Tracking**, \
 30 | *Binbin Xu, Andrew J. Davison, Stefan Leutenegger*, \
 31 | IEEE Robotics and Automation Letters (RA-L), Vol. 6, No. 1, pp. 223-230, 2021 (ICRA 2021 presentation) \
 32 | Best Paper Honorable Mention Award \
 33 | [[Paper]](https://arxiv.org/pdf/2008.13504.pdf) [[Video]](https://youtu.be/6pMosl6ZAPE)
 34 | 
 35 | 
 36 | ## Setup 
 37 | You can reproduce the setup by using our anaconda environment configurations. We have provided an Makefile to help you install the environment. 
 38 | ``` bash!
 39 | make install
 40 | ```
 41 | 
 42 | Everytime before you run, activate the environment inside the repo folder
 43 | 
 44 | ``` bash!
 45 | source .anaconda3/bin/activate
 46 | ```
 47 | The pre-trained network weights can be downloaded at [here](https://imperialcollegelondon.box.com/s/xryhbshxtktizjw5fpmxaic1kncxr4cw).
 48 | 
 49 | ## Prepare the datasets 
 50 | 
 51 | **TUM RGBD Dataset**: Download the dataset from [TUM RGBD](https://vision.in.tum.de/data/datasets/rgbd-dataset/download) to '$YOUR_TUM_RGBD_DIR'. Create a symbolic link to the data directory as 
 52 | 
 53 | ```
 54 | ln -s $YOUR_TUM_RGBD_DIR code/data/data_tum
 55 | ```
 56 | 
 57 | **MovingObjects3D Dataset** Download the dataset from [MovingObjs3D](https://drive.google.com/open?id=1EIlS4J2J0sdsq8Mw_03DXHlRQmfL8XQx) to '$YOUR_MOV_OBJS_3D_DIR'. Create a symbolic link to the data directory as 
 58 | 
 59 | ```
 60 | ln -s $YOUR_MOV_OBJS_3D_DIR code/data/data_objs3D
 61 | ```
 62 | 
 63 | **Custom Dataset** You can also use your own dataset. 
 64 | Our work use the above two datasets for training and deployed the trained weights on scannet and our self-collected dataset. Please refer to the [ScanNet](code/data/ScanNet.py) and [VaryLighting.py](code/data/VaryLighting.py) for the custom dataloading.
 65 | 
 66 | 
 67 | ## Training and Evaluation
 68 | To run the full training and evaluation, please follow the steps below.
 69 | 
 70 | ### Run training
 71 | 
 72 | **Train example with TUM RGBD dataset:** 
 73 | 
 74 | ``` bash! 
 75 | ./scripts/train_tum_rgbd.sh
 76 | ```
 77 | 
 78 | To check the full training setting, run the help config as 
 79 | ``` bash!
 80 | python train.py --help
 81 | ``` 
 82 | 
 83 | 
 84 | **Train example with MovingObjects3D:** Camera egocentric motion is dfifferent from the object-centric motion estimation and thus we provide a separate training script for the MovingObjects3D dataset.
 85 | All the same as the last one only except changing the dataset name. You can also use our provided script to train the model. 
 86 | 
 87 | ``` bash!
 88 | ./scripts/train_moving_objs3d.sh
 89 | ```
 90 | 
 91 | ### Run evaluation
 92 | **Run the pretrained model:** If you have set up the dataset properly with the datasets, you can run the learned model with the checkpoint we provided in the trained model directory.
 93 | 
 94 | ``` bash!
 95 | ./scripts/eval_tum_rgbd.sh
 96 | ```
 97 | 
 98 | 
 99 | You can substitute the trajectory, the keyframe and the checkpoint file. The training and evaluation share the same config setting. To check the full setting, run the help config as
100 | 
101 | ``` bash!
102 | python evaluate.py --help
103 | ```
104 | 
105 | **Results:** The evaluation results will be generated automatically in both '.pkl' and '*.csv' in the folder 'test_results/'.
106 | 
107 | 
108 | **Run comparisons:** We also provide the scripts to run the comparisons with the classic RGBD and ICP methods from Open3D. Please refer to [rgbd_odometry.py](code/tools/rgbd_odometry.py) and [ICP.py](code/tools/ICP.py) for the details accordingly. 
109 | 
110 | 
111 | ### Joint feature-metric and geometric tracking 
112 | We can combine our proposed feature-metric tracking with the geometric tracking methods to achieve better performance. We provide the scripts to run the joint tracking with the ICP methods.
113 | ``` bash!
114 | ./scripts/train_tum_feature_icp.sh
115 | ```
116 | It is achieved by using the trained feature-metric network weights as the initialization and combing with the ICP methods as the refinement. 
117 | 
118 | The evaluation script is also provided as
119 | ```
120 | ./scripts/eval_tum_feature_icp.sh
121 | ```
122 | 
123 | 
124 | ### Run visual odometry
125 | Please note this is a prototype version of our **visual odometry frontend**. It mainly serves as a demo to show the performance of our method. 
126 | 
127 | ``` bash!
128 | ./scripts/run_kf_vo.sh
129 | ```
130 | 
131 | To visualise the keyframe tracking in the paper, add the argument `--vo_type keyframe --two_view` to the above script.
132 | To check the full setting, run the help config as 
133 | ``` bash!
134 | python code/experiments/kf_vo.py --help
135 | ``` 
136 | 
137 | 
138 | **Convergence basin analysis** for the keyframe visual odometry is also provided. Check the script `scripts/run_kf_vo_cb.sh` for more details.
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | ## Citation
147 | ```bibtex
148 | @article{Xu:etal:RAL2021,
149 |  author = {Binbin Xu and Andrew Davison and Stefan Leutenegger},
150 |  journal = {{IEEE} Robotics and Automation Letters ({RAL})},
151 |  title = {Deep Probabilistic Feature-metric Tracking},
152 |   year={2021},
153 |   volume = {6},
154 |   number = {1},
155 |   pages = {223 - 230},
156 | }
157 | ```
158 | 
159 | Please cite the paper if you found our provided code useful for you. 
160 | 
161 | 
162 | ## License
163 | This repo is BSD 3-Clause Licensed. Part of its code is from [Taking a Deeper Look at the Inverse Compositional Algorithm](https://github.com/lvzhaoyang/DeeperInverseCompositionalAlgorithm), which is MIT licensed. We thank the authors for their great work.
164 | 
165 | Copyright © 2020-2021 Smart Robotics Lab, Imperial College London \
166 | Copyright © 2020-2021 Binbin Xu 
167 | 
168 | 
169 | ## Contact
170 | Binbin Xu (b.xu17@imperial.ac.uk)
171 | 


--------------------------------------------------------------------------------
/checkpoint/checkpoint_epoch29.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/checkpoint/checkpoint_epoch29.pth.tar


--------------------------------------------------------------------------------
/checkpoint/combine_icp_checkpoint_epoch40.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/checkpoint/combine_icp_checkpoint_epoch40.pth.tar


--------------------------------------------------------------------------------
/code/.gitignore:
--------------------------------------------------------------------------------
1 | trained_models/*
2 | test/*
3 | test_results/*
4 | */**/*.pyc
5 | logs/*
6 | 


--------------------------------------------------------------------------------
/code/Logger.py:
--------------------------------------------------------------------------------
  1 | """ Tensorflow Logger for training and testing including images
  2 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | 
  5 | @author: Zhaoyang Lv
  6 | @date: March 2019
  7 | """
  8 | 
  9 | import sys, os, shutil
 10 | import os.path as osp
 11 | import subprocess
 12 | from torch.utils.tensorboard import SummaryWriter
 13 | import torch
 14 | import numpy as np
 15 | import cv2
 16 | from collections import OrderedDict
 17 | 
 18 | class Logger(object):
 19 |     """
 20 |     example usage:
 21 | 
 22 |         stdout = Logger('log.txt')
 23 |         sys.stdout = stdout
 24 | 
 25 |         ... your code here ...
 26 | 
 27 |         stdout.delink()
 28 |     """
 29 |     def __init__(self, filename="Default.log"):
 30 |         self.terminal = sys.stdout
 31 |         self.log = open(filename, "w")
 32 | 
 33 |     def delink(self):
 34 |         self.log.close()
 35 |         #self.log = open('foo', "w")
 36 | #        self.write = self.writeTerminalOnly
 37 | 
 38 |     def writeTerminalOnly(self, message):
 39 |         self.terminal.write(message)
 40 | 
 41 |     def write(self, message):
 42 |         self.terminal.write(message)
 43 |         self.log.write(message)
 44 | 
 45 |     def flush(self):
 46 |         pass
 47 | 
 48 | class TensorBoardLogger(object):
 49 |     def __init__(self, logging_dir, logfile_name, print_freq = 10):
 50 | 
 51 |         self.log_dir = logging_dir
 52 |         self.print_freq = print_freq
 53 | 
 54 |         if not os.path.isdir(logging_dir):
 55 |             os.makedirs(logging_dir)
 56 | 
 57 |         self.summary_writer = SummaryWriter(log_dir=logging_dir)
 58 | 
 59 |         # standard logger to print to terminal
 60 |         logfile = osp.join(logging_dir,'log.txt')
 61 |         stdout = Logger(logfile)
 62 |         sys.stdout = stdout
 63 | 
 64 |     def write_to_tensorboard(self, display_dict, iteration):
 65 |         """ Write the saved states (display_dict) to tensorboard
 66 |         """
 67 |         for k, v in display_dict.items():
 68 |             self.summary_writer.add_scalar(k, v, iteration)
 69 | 
 70 |     def add_images_to_tensorboard(self, image_list: list, name, iteration):
 71 | 
 72 |         if iteration % self.print_freq != 0:
 73 |             return
 74 | 
 75 |         concated_images = np.concatenate(image_list, axis=1)
 76 |         ratio = 960 / concated_images.shape[1]
 77 |         if ratio < 1:
 78 |             concated_images = cv2.resize(concated_images, None, fx=ratio, fy=ratio)
 79 |         concated_images = cv2.cvtColor(concated_images, cv2.COLOR_BGR2RGB)
 80 |         self.summary_writer.add_image(name, concated_images,
 81 |                                       global_step=iteration,
 82 |                                       dataformats='HWC',
 83 |                                       )
 84 | 
 85 |     def write_to_terminal(self, display_dict, epoch, batch_iter, epoch_len, batch_time, is_train = True):
 86 |         """ Write the save states (display_dict) and training information to terminal for display
 87 |         """
 88 | 
 89 |         if batch_iter % self.print_freq != 0:
 90 |             return
 91 | 
 92 |         if is_train:
 93 |             prefix = 'Train'
 94 |         else:
 95 |             prefix = 'Test'
 96 | 
 97 |         state = prefix + ':\tEpoch %d, Batch %d/%d, BatchTime %.4f'%(epoch+1, batch_iter, epoch_len, batch_time)
 98 | 
 99 |         loss = ''
100 |         for k, v in display_dict.items():
101 |             loss += k + ' ' + '%.8f ' % v
102 | 
103 |         print(state + loss)
104 | 
105 |     def save_checkpoint(self, network, state_info = None,
106 |         filename='checkpoint.pth.tar'):
107 |         """save checkpoint to disk"""
108 |         state_dict = network.state_dict().copy()
109 | 
110 |         if torch.cuda.device_count() > 1:
111 |             state_dict_rename = OrderedDict()
112 |             for k, v in state_dict.items():
113 |                 name = k[7:] # remove `module.`
114 |                 state_dict_rename[name] = v
115 |             state_dict = state_dict_rename
116 | 
117 |         if state_info is None:
118 |             state = {'state_dict': state_dict}
119 |         else:
120 |             state = state_info
121 |             state['state_dict'] = state_dict
122 | 
123 |         checkpoint_path = osp.join(self.log_dir,filename)
124 |         torch.save(state, checkpoint_path)
125 |         return checkpoint_path
126 | 
127 | 
128 | def log_git_revisions_hash(dir):
129 |     hashes = []
130 |     # the latest git information
131 |     latest_commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
132 |     # hashes.append(subprocess.check_output(['git', 'rev-parse', 'HEAD']))
133 |     # hashes.append(subprocess.check_output(['git', 'rev-parse', 'HEAD^']))
134 |     # return hashes
135 |     f = open(osp.join(dir, 'git_status.txt'), "w")
136 |     f.write(str(latest_commit_id))
137 |     f.close()
138 | 
139 | 
140 | def check_directory(filename):
141 |     target_dir = os.path.dirname(filename)
142 |     if not os.path.isdir(target_dir):
143 |         os.makedirs(target_dir)


--------------------------------------------------------------------------------
/code/__init__.py:
--------------------------------------------------------------------------------
1 | import pkgutil
2 | __path__ = pkgutil.extend_path(__path__, __name__)
3 | 


--------------------------------------------------------------------------------
/code/config.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | Argparse configuration for training and testing in the paper
  3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  4 | # SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | 
  7 | def add_network_config(parser):
  8 |     parser.add_argument('--feature_channel', default=1, type=int,
  9 |         help='Specify the feature channel used for tracking. The default is 1.\n')
 10 |     parser.add_argument('--uncertainty_channel', default=1, type=int,
 11 |         help='Specify the uncertainty channel used for tracking when using uncerti. The default is 1.\n')
 12 |     parser.add_argument('--feature_extract', default='average', type=str,
 13 |                         choices=('1by1', 'conv', 'skip', 'average', 'prob_fuse'),
 14 |                         help='Specify the method to extract feature from the pyramid. The default is using 1by1.\n')
 15 |     parser.add_argument('--uncertainty',
 16 |                         default='None', type=str,
 17 |                         choices=('None', 'identity', 'sigmoid', 'feature', 'gaussian', 'laplacian', 'old_gaussian', 'old_laplacian'),
 18 |                         help='Choose a uncertainty function for feature-metric tracking. DIC is the original CVPR work. '
 19 |                              'None using identity matrix. and track is used in GN tracking\n')
 20 |     parser.add_argument('--combine_ICP',action='store_true',
 21 |                         help='Combine the training with ICP.\n')
 22 |     parser.add_argument('--add_init_pose_noise', action='store_true',
 23 |                         help='Add noise in the init pose (translation) only in training.\n')
 24 |     parser.add_argument('--init_pose', default='identity', type=str,
 25 |                         choices=('identity', 'sfm_net', 'dense_net'),
 26 |                         help='Use predicted pose as initial pose.\n')
 27 |     # @TODO: try different number 0.01 in sfm-learner, 0.1 in deeptam
 28 |     parser.add_argument('--scale_init_pose', default='0.01', type=float,
 29 |                         help='Scaling the initial predicted pose.\n')
 30 |     parser.add_argument('--train_init_pose', action='store_true',
 31 |                         help='Jointly train pose predictor by regressing predicted pose to the ground truth.\n')
 32 |     parser.add_argument('--multi_hypo',  default='None', type=str,
 33 |                         choices=('None', 'average', 'prob_fuse', 'res_prob_fuse'),
 34 |                         help='Use multi hypothesis for init pose guess.\n')
 35 |     parser.add_argument('--res_input', action='store_true',
 36 |                         help='Also input residual for posenet.\n')
 37 |     # virtual does not work well
 38 |     # parser.add_argument('--virtual_camera', action='store_true',
 39 |     #                     help='Use rendered virtual frame and virtual camera instead of img1 \n')
 40 |     parser.add_argument('--vis_feat', default=False, action='store_true',
 41 |                         help='visualize the feature maps in the training')
 42 |     parser.add_argument('--scannet_subset_train', default='0.25', type=float,
 43 |                         help='Subset ratio in scannet for training.\n')
 44 |     parser.add_argument('--scannet_subset_val', default='0.005', type=float,
 45 |                         help='Subset ratio in scannet for validation.\n')
 46 |     parser.add_argument('--train_uncer_prop', action='store_true',
 47 |                         help='Use uncertainty propagation in the training loss\n')
 48 |     parser.add_argument('--obj_only', action='store_true',
 49 |                         help='Use uncertainty propagation in the training loss\n')
 50 |     parser.add_argument('--loss', default='EPE3D', type=str,
 51 |                         choices=('EPE3D', 'RPE', 'UEPE3D', 'URPE'),
 52 |                         help='Training loss.\n')
 53 |     parser.add_argument('--remove_tru_sigma', action='store_true',
 54 |                         help='Remove truncated uncertainty areas in the tracking for training/testing\n')
 55 |     parser.add_argument('--scaler', default='None', type=str,
 56 |                         choices=('None', 'oneResidual', 'twoResidual', 'MultiScale2w', 'expMultiScale'),
 57 |                         help='Choose a scale function for combing ICP and feature methods. \n')
 58 |     parser.add_argument('--scale_icp', default='0.01', type=float,
 59 |                         help='Scaling the ICP w.r.t feature/RGB.\n')
 60 |     parser.add_argument('--add_vl_dataset', action='store_true',
 61 |                         help='Add varying lighting dataset to the TUM dataset for training/validation \n')
 62 | 
 63 | def add_tracking_config(parser):
 64 |     add_network_config(parser)
 65 |     parser.add_argument('--network',
 66 |         default='DeepIC', type=str,
 67 |         choices=('DeepIC', 'GaussNewton'),
 68 |         help='Choose a network to run. \n \
 69 |         The DeepIC is the proposed Deeper Inverse Compositional method. \n\
 70 |         The GuassNewton is the baseline for Inverse Compositional method which does not include \
 71 |         any learnable parameters\n')
 72 |     parser.add_argument('--mestimator',
 73 |         default='MultiScale2w', type=str,
 74 |         choices=('None', 'MultiScale2w'),
 75 |         help='Choose a weighting function for the Trust Region method.\n\
 76 |             The MultiScale2w is the proposed (B) convolutional M-estimator. \n')
 77 |     parser.add_argument('--solver',
 78 |         default='Direct-ResVol', type=str,
 79 |         choices=('Direct-Nodamping', 'Direct-ResVol'),
 80 |         help='Choose the solver function for the Trust Region method. \n\
 81 |             Direct-Nodamping is the Gauss-Newton algorithm, which does not use damping. \n\
 82 |             Direct-ResVol is the proposed (C) Trust-Region Network. \n\
 83 |             (default: Direct-ResVol) ')
 84 |     parser.add_argument('--direction',
 85 |         default='inverse', type=str,
 86 |         choices=('inverse', 'forward'),
 87 |         help='Choose the direction to update pose: inverse, or forward \n')
 88 |     parser.add_argument('--encoder_name',
 89 |         default='ConvRGBD2',
 90 |         choices=('ConvRGBD2', 'RGB', 'ConvRGBD'),
 91 |         help='The encoder architectures. \
 92 |             ConvRGBD2 takes the two-view features as input. \n\
 93 |             RGB is using the raw RGB images as input (converted to intensity afterwards).\n\
 94 |             (default: ConvRGBD2)')
 95 |     parser.add_argument('--max_iter_per_pyr',
 96 |         default=3, type=int,
 97 |         help='The maximum number of iterations at each pyramids.\n')
 98 |     parser.add_argument('--no_weight_sharing',
 99 |         action='store_true',
100 |         help='If this flag is on, we disable sharing the weights across different backbone network when extracing \
101 |          features. In default, we share the weights for all network in each pyramid level.\n')
102 |     parser.add_argument('--tr_samples', default=10, type=int,
103 |         help='Set the number of trust-region samples. (default: 10)\n')
104 | 
105 | def add_basics_config(parser):
106 |     """ the basic setting
107 |     (supposed to be shared through train and inference)
108 |     """
109 |     parser.add_argument('--cpu_workers', type=int, default=12,
110 |         help="Number of cpu threads for data loader.\n")
111 |     parser.add_argument('--dataset', type=str,
112 |         choices=('TUM_RGBD', 'ScanNet', 'MovingObjects3D', 'VaryLighting'),
113 |         help='Choose a dataset to train/val/evaluate.\n')
114 |     parser.add_argument('--image_resize', type=float, default=None,
115 |                         help='downsize ratio for input images')
116 |     parser.add_argument('--time', dest='time', action='store_true',
117 |         help='Count the execution time of each step.\n' )
118 | 
119 | def add_test_basics_config(parser):
120 |     parser.add_argument('--tracker', default='learning_based', type=str,
121 |                         choices=('learning_based', 'ICP', 'ColorICP', 'RGBD'))
122 |     parser.add_argument('--batch_per_gpu', default=8, type=int,
123 |         help='Specify the batch size during test. The default is 8.\n')
124 |     parser.add_argument('--checkpoint', default='', type=str,
125 |         help='Choose a checkpoint model to test.\n')
126 |     parser.add_argument('--keyframes',
127 |         default='1,2,4,8', type=str,
128 |         help='Choose the number of keyframes to train the algorithm.\n')
129 |     parser.add_argument('--verbose', action='store_true',
130 |         help='Print/save all the intermediate representations')
131 |     parser.add_argument('--eval_set', default='test',
132 |         choices=('test', 'validation'))
133 |     parser.add_argument('--trajectory', type=str, 
134 |         default = '',
135 |         help = 'Specify a trajectory to run.\n')
136 | 
137 | def add_train_basics_config(parser):
138 |     """ add the basics about the training """
139 |     parser.add_argument('--checkpoint', default='', type=str,
140 |         help='Choose a pretrained checkpoint model to start with. \n')
141 |     parser.add_argument('--batch_per_gpu', default=64, type=int,
142 |         help='Specify the batch size during training.\n')
143 |     parser.add_argument('--epochs',
144 |         default=30, type=int,
145 |         help='The total number of total epochs to run. Default is 30.\n' )
146 |     parser.add_argument('--resume_training',
147 |         dest='resume_training', action='store_true',
148 |         help='Resume the training using the loaded checkpoint. If not, restart the training. \n\
149 |             You will need to use the --checkpoint config to load the pretrained checkpoint' )
150 |     parser.add_argument('--pretrained_model', default='', type=str,
151 |         help='Initialize the model weights with pretrained model.\n')
152 |     parser.add_argument('--no_val',
153 |         default=False,
154 |         action='store_true',
155 |         help='Use no validatation set for training.\n')
156 |     parser.add_argument('--keyframes',
157 |         default='1,2,4,8', type=str,
158 |         help='Choose the number of keyframes to train the algorithm')
159 |     parser.add_argument('--verbose', action='store_true',
160 |         help='Print/save all the intermediate representations.\n')
161 | 
162 | def add_train_log_config(parser):
163 |     """ checkpoint and log options """
164 |     parser.add_argument('--checkpoint_folder', default='', type=str,
165 |         help='The folder name (postfix) to save the checkpoint.')
166 |     parser.add_argument('--snapshot', default=1, type=int,
167 |         help='Number of interations to save a snapshot')
168 |     parser.add_argument('--save_checkpoint_freq',
169 |         default=1, type=int,
170 |         help='save the checkpoint for every N epochs')
171 |     parser.add_argument('--prefix', default='', type=str,
172 |         help='the prefix string added to the log files')
173 |     parser.add_argument('-p', '--print_freq',
174 |         default=10, type=int,
175 |         help='print frequency (default: 10)')
176 | 
177 | 
178 | def add_train_optim_config(parser):
179 |     """ add training optimization options """
180 |     parser.add_argument('--opt',
181 |         type=str, default='adam', choices=('sgd','adam'),
182 |         help='choice of optimizer (default: adam) \n')
183 |     parser.add_argument('--lr',
184 |         default=0.0005, type=float,
185 |         help='initial learning rate. \n')
186 |     parser.add_argument('--lr_decay_ratio',
187 |         default=0.5, type=float,
188 |         help='lr decay ratio (default:0.5)')
189 |     parser.add_argument('--lr_decay_epochs',
190 |         default=[5, 10, 20], type=int, nargs='+',
191 |         help='lr decay epochs')
192 |     parser.add_argument('--lr_min', default=1e-6, type=float,
193 |         help='minimum learning rate')
194 |     parser.add_argument('--lr_restart', default=10, type=int,
195 |         help='restart learning after N epochs')
196 | 
197 | def add_train_loss_config(parser):
198 |     """ add training configuration for the loss function """
199 |     parser.add_argument('--regression_loss_type',
200 |         default='SmoothL1', type=str, choices=('L1', 'SmoothL1'),
201 |         help='Loss function for flow regression (default: SmoothL1 loss)')
202 | 
203 | def add_vo_config(parser):
204 |     """ add testing configuration for kf-vo demo """
205 |     parser.add_argument('--vo',  default='feature_icp', type=str,
206 |                         choices=('DeepIC', 'RGB', 'ICP', 'RGB+ICP', 'feature', 'feature_icp'),
207 |                         help='Select which tracking method to use for visual odometry.\n')
208 |     parser.add_argument('--vo_type', default='incremental', type=str,
209 |                         choices=('incremental', 'keyframe'),
210 |                         help='Select which reference frame to use for tracking.\n')
211 |     parser.add_argument('--two_view', action='store_true',
212 |         help='Only visualization two views.\n' )
213 |     parser.add_argument('--gt_tracker', action='store_true',
214 |         help='Use ground truth pose for point cloud visualization')
215 |     parser.add_argument('--save_img', action='store_true',
216 |         help='Save visualizations.\n' )
217 | 
218 | def add_cb_config(parser):
219 |     """ add visualization configurations for convergence basin """
220 |     parser.add_argument('--cb_dimension',  default='2D', type=str,
221 |                         choices=('1D', '2D', '6D'),
222 |                         help='Select which dimension to visualize for convergence basin.\n')
223 |     parser.add_argument('--save_img', action='store_true',
224 |         help='Save visualizations.\n' )
225 |     parser.add_argument('--reset_cb', action='store_true',
226 |         help='Save visualizations.\n' )
227 |     parser.add_argument('--pert_samples', default=31, type=int,
228 |         help='perturbation samples in each pose dimension')
229 | 
230 | def add_object_config(parser):
231 |     parser.add_argument('--method',  default='feature_icp', type=str,
232 |                         choices=('DeepIC', 'RGB', 'ICP', 'RGB+ICP', 'feature', 'feature_icp'),
233 |                         help='Select which tracking method to use for visual odometry.\n')
234 |     parser.add_argument('--batch_per_gpu', default=64, type=int,
235 |         help='Specify the batch size during test. The default is 8.\n')
236 |     parser.add_argument('--checkpoint', default='', type=str,
237 |         help='Choose a checkpoint model to test.\n')
238 |     parser.add_argument('--keyframes',
239 |         default='1,2,4', type=str,
240 |         help='Choose the number of keyframes to train the algorithm.\n')
241 |     parser.add_argument('--eval_set', default='test',
242 |         choices=('test', 'validation'))
243 |     parser.add_argument('--object', type=str,
244 |         default = '',
245 |         help = 'Specify a trajectory to run.\n')
246 |     parser.add_argument('--save_img', action='store_true',
247 |         help='Save visualizations.\n' )
248 |     parser.add_argument('--gt_pose', action='store_true',
249 |         help='Save visualizations.\n' )
250 |     parser.add_argument('--recompute', action='store_true',
251 |         help='Save visualizations.\n' )


--------------------------------------------------------------------------------
/code/data/MovingObj3D.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Data loader for MovingObjs 3D dataset
  3 | 
  4 | @author: Zhaoyang Lv
  5 | @date: May 2019
  6 | """
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | from __future__ import unicode_literals
 12 | 
 13 | import sys, os, random
 14 | import pickle
 15 | import functools
 16 | 
 17 | import numpy as np
 18 | import torch.utils.data as data
 19 | import os.path as osp
 20 | 
 21 | from imageio import imread
 22 | from tqdm import tqdm
 23 | 
 24 | from cv2 import resize, INTER_NEAREST
 25 | 
 26 | class MovingObjects3D(data.Dataset):
 27 | 
 28 |     # every sequence has 200 frames.
 29 |     categories = {
 30 |         'train': {'aeroplane':  [0,190],
 31 |                 'bicycle':      [0,190],
 32 |                 'bus':          [0,190],
 33 |                 'car':          [0,190]},
 34 | 
 35 |         'validation': {'aeroplane': [190,200],
 36 |                 'bicycle':          [190,200],
 37 |                 'bus':              [190,200],
 38 |                 'car':              [190,200]},
 39 | 
 40 |         'test': {'boat':           [0,200],
 41 |                 'motorbike':        [0,200]}
 42 |     }
 43 | 
 44 |     def __init__(self, root, load_type='train', keyframes = [1], data_transform=None,
 45 |                  category=None, image_resize=0.5):
 46 |         super(MovingObjects3D, self).__init__()
 47 | 
 48 |         self.base_folder = osp.join(root)
 49 | 
 50 |         data_all = self.categories[load_type]
 51 | 
 52 |         # split it into train and test set (the first 20 are for test)
 53 |         self.image_seq = []
 54 |         self.depth_seq = []
 55 |         self.invalid_seq = []
 56 |         self.object_mask_seq = []
 57 |         self.cam_pose_seq = []
 58 |         self.obj_pose_seq = []
 59 |         self.obj_vis_idx = []
 60 |         self.calib = []
 61 |         self.obj_names = []
 62 | 
 63 |         self.transforms = data_transform
 64 | 
 65 |         if load_type in ['validation', 'test']:
 66 |             # should not mix different keyframes in test
 67 |             assert(len(keyframes) == 1)
 68 |             self.keyframes = [1]
 69 |             self.sample_freq = keyframes[0]
 70 |         else:
 71 |             self.keyframes = keyframes
 72 |             self.sample_freq = 1
 73 | 
 74 |         self.ids = 0
 75 |         self.images_size = [240, 320]
 76 |         # get the accumulated image sequences on the fly
 77 |         self.seq_acc_ids = [0]
 78 |         for data_obj, frame_interval in data_all.items():
 79 |             if category is not None and data_obj != category:
 80 |                 continue
 81 | 
 82 |             start_idx, end_idx = frame_interval
 83 |             print('Load {:} data from frame {:d} to {:d}'.format(data_obj, start_idx, end_idx))
 84 |             for seq_idx in range(start_idx, end_idx, 1):
 85 |                 seq_str = "{:06d}".format(seq_idx)
 86 | 
 87 |                 info_pkl= osp.join(self.base_folder,
 88 |                     data_obj, seq_str, 'info.pkl')
 89 | 
 90 |                 color_seq, depth_seq, invalid_seq, mask_seq, camera_poses_seq, object_poses_seq,\
 91 |                     obj_visible_frames, calib_seq = extract_info_pickle(info_pkl)
 92 | 
 93 |                 obj_visible_frames = obj_visible_frames[::self.sample_freq]
 94 | 
 95 |                 self.image_seq.append([osp.join(self.base_folder, x) for x in color_seq]) 
 96 |                 self.depth_seq.append([osp.join(self.base_folder, x) for x in depth_seq])
 97 |                 # self.invalid_seq.append(invalid_seq)
 98 |                 self.object_mask_seq.append([osp.join(self.base_folder, x) for x in mask_seq])
 99 |                 self.cam_pose_seq.append(camera_poses_seq)
100 |                 self.obj_pose_seq.append(object_poses_seq)
101 |                 self.calib.append(calib_seq)
102 |                 self.obj_vis_idx.append(obj_visible_frames)
103 | 
104 |                 self.obj_names.append('{:}_{:03d}'.format(data_obj, seq_idx))
105 | 
106 |                 total_valid_frames = max(0, len(obj_visible_frames) - max(self.keyframes))
107 | 
108 |                 self.ids += total_valid_frames
109 |                 self.seq_acc_ids.append(self.ids)
110 | 
111 |         # downscale the input image to half
112 |         self.fx_s = image_resize
113 |         self.fy_s = image_resize
114 | 
115 |         print('There are a total of {:} valid frames'.format(self.ids))
116 | 
117 |     def __len__(self):
118 |         return self.ids
119 | 
120 |     def __getitem__(self, index):
121 |         # the index we want from search sorted is shifted for one
122 |         seq_idx = max(np.searchsorted(self.seq_acc_ids, index+1) - 1, 0)
123 |         frame_idx = index - self.seq_acc_ids[seq_idx]
124 | 
125 |         this_idx= self.obj_vis_idx[seq_idx][frame_idx]
126 |         next_idx= self.obj_vis_idx[seq_idx][frame_idx + random.choice(self.keyframes)]
127 | 
128 |         color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx])
129 |         color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx])
130 | 
131 |         if self.transforms:
132 |             color0, color1 = self.transforms([color0, color1])
133 | 
134 |         depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx])
135 |         depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx])
136 | 
137 |         cam_pose0 = self.cam_pose_seq[seq_idx][this_idx]
138 |         cam_pose1 = self.cam_pose_seq[seq_idx][next_idx]
139 |         obj_pose0 = self.obj_pose_seq[seq_idx][this_idx]
140 |         obj_pose1 = self.obj_pose_seq[seq_idx][next_idx]
141 | 
142 |         # the relative allocentric transform of objects
143 |         transform = functools.reduce(np.dot,
144 |         [np.linalg.inv(cam_pose1), obj_pose1, np.linalg.inv(obj_pose0), cam_pose0]).astype(np.float32)
145 | 
146 |         # the validity of the object is up the object mask
147 |         obj_index = 1 # object index is in default to be 1
148 |         obj_mask0 = self.__load_binary_mask_tensor(self.object_mask_seq[seq_idx][this_idx], obj_index)
149 |         obj_mask1 = self.__load_binary_mask_tensor(self.object_mask_seq[seq_idx][next_idx], obj_index)
150 | 
151 |         calib = np.asarray(self.calib[seq_idx], dtype=np.float32)
152 |         calib[0] *= self.fx_s
153 |         calib[1] *= self.fy_s
154 |         calib[2] *= self.fx_s
155 |         calib[3] *= self.fy_s
156 | 
157 |         obj_name = self.obj_names[seq_idx]
158 |         # pair_name = '{:}/{:06d}to{:06d}'.format(obj_name, this_idx, next_idx)
159 |         pair_name = {'seq': obj_name,
160 |                      'seq_idx': seq_idx,
161 |                      'frame0': this_idx,
162 |                      'frame1': next_idx}
163 | 
164 |         return color0, color1, depth0, depth1, transform, calib, obj_mask0, obj_mask1, pair_name
165 | 
166 |     def get_original_size_batch(self, index):
167 |         # the index we want from search sorted is shifted for one
168 |         seq_idx = max(np.searchsorted(self.seq_acc_ids, index+1) - 1, 0)
169 |         frame_idx = index - self.seq_acc_ids[seq_idx]
170 | 
171 |         this_idx= self.obj_vis_idx[seq_idx][frame_idx]
172 |         next_idx= self.obj_vis_idx[seq_idx][frame_idx + random.choice(self.keyframes)]
173 | 
174 |         color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx], do_resize=False)
175 |         color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx], do_resize=False)
176 | 
177 |         if self.transforms:
178 |             color0, color1 = self.transforms([color0, color1])
179 | 
180 |         depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx], do_resize=False)
181 |         depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx], do_resize=False)
182 | 
183 |         cam_pose0 = self.cam_pose_seq[seq_idx][this_idx]
184 |         cam_pose1 = self.cam_pose_seq[seq_idx][next_idx]
185 |         obj_pose0 = self.obj_pose_seq[seq_idx][this_idx]
186 |         obj_pose1 = self.obj_pose_seq[seq_idx][next_idx]
187 | 
188 |         # the relative allocentric transform of objects
189 |         transform = functools.reduce(np.dot,
190 |         [np.linalg.inv(cam_pose1), obj_pose1, np.linalg.inv(obj_pose0), cam_pose0]).astype(np.float32)
191 | 
192 |         # the validity of the object is up the object mask
193 |         obj_index = 1 # object index is in default to be 1
194 |         obj_mask0 = self.__load_binary_mask_tensor(self.object_mask_seq[seq_idx][this_idx], obj_index, do_resize=False)
195 |         obj_mask1 = self.__load_binary_mask_tensor(self.object_mask_seq[seq_idx][next_idx], obj_index, do_resize=False)
196 | 
197 |         calib = np.asarray(self.calib[seq_idx], dtype=np.float32)
198 | 
199 |         obj_name = self.obj_names[seq_idx]
200 |         # pair_name = '{:}/{:06d}to{:06d}'.format(obj_name, this_idx, next_idx)
201 |         pair_name = {'seq': obj_name,
202 |                      'seq_idx': seq_idx,
203 |                      'frame0': this_idx,
204 |                      'frame1': next_idx}
205 | 
206 |         return color0, color1, depth0, depth1, transform, calib, obj_mask0, obj_mask1, pair_name
207 | 
208 |     def __load_rgb_tensor(self, path, do_resize=True):
209 |         """ Load the rgb image
210 |         """
211 |         image = imread(path)[:, :, :3]
212 |         image = image.astype(np.float32) / 255.0
213 |         if do_resize:
214 |             image = resize(image, None, fx=self.fx_s, fy=self.fy_s)
215 |         return image
216 | 
217 |     def __load_depth_tensor(self, path, do_resize=True):
218 |         """ Load the depth
219 |         """
220 |         depth = imread(path).astype(np.float32) / 1e3
221 |         if do_resize:
222 |             depth = resize(depth, None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST)
223 |         depth = np.clip(depth, 1e-1, 1e2) # the valid region of the depth
224 |         return depth[np.newaxis, :]
225 | 
226 |     def __load_binary_mask_tensor(self, path, seg_index, do_resize=True):
227 |         """ Load a binary segmentation mask (numbers)
228 |             If the object matches the specified index, return true;
229 |             Otherwise, return false
230 |         """
231 |         obj_mask = imread(path)
232 |         mask = (obj_mask == seg_index)
233 |         if do_resize:
234 |             mask = resize(mask.astype(np.float), None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST)
235 |         return mask.astype(np.bool)[np.newaxis, :]
236 | 
237 | def extract_info_pickle(info_pkl): 
238 | 
239 |     with open(info_pkl, 'rb') as p:
240 |         info = pickle.load(p)
241 | 
242 |         color_seq = [x.split('final/')[1] for x in info['color']]
243 |         depth_seq = [x.split('final/')[1] for x in info['depth']]
244 |         invalid_seq = [x.split('final/')[1] for x in info['invalid'] ]
245 |         mask_seq = [x.split('final/')[1] for x in info['object_mask']]
246 | 
247 |         # in this rendering setting, there is only one object
248 |         camera_poses_seq = info['pose']
249 |         object_poses_seq = info['object_poses']['Model_1']
250 |         object_visible_frames = info['object_visible_frames']['Model_1']
251 | 
252 |         calib_seq = info['calib']
253 | 
254 |     return color_seq, depth_seq, invalid_seq, mask_seq, \
255 |         camera_poses_seq, object_poses_seq, object_visible_frames, calib_seq
256 | 
257 | 
258 | if __name__ == '__main__':
259 |     from data.dataloader import load_data
260 |     import torchvision.utils as torch_utils
261 | 
262 |     # loader = MovingObjects3D('', load_type='train', keyframes=[1])
263 |     loader = load_data('MovingObjects3D', keyframes=[1], load_type='train')
264 |     torch_loader = data.DataLoader(loader, batch_size=16, shuffle=False, num_workers=4)
265 | 
266 |     for batch in torch_loader:
267 |         color0, color1, depth0, depth1, transform, K, mask0, mask1, names = batch
268 |         B,C,H,W=color0.shape
269 | 
270 |         bcolor0_img = torch_utils.make_grid(color0, nrow=4)
271 |         bcolor1_img = torch_utils.make_grid(color1, nrow=4)
272 |         # bdepth0_img = torch_utils.make_grid(depth0, nrow=4)
273 |         # bdepth1_img = torch_utils.make_grid(depth1, nrow=4)
274 |         bmask0_img = torch_utils.make_grid(mask0.view(B,1,H,W)*255, nrow=4)
275 |         bmask1_img = torch_utils.make_grid(mask1.view(B,1,H,W)*255, nrow=4)
276 | 
277 |         import matplotlib.pyplot as plt
278 |         plt.figure()
279 |         plt.imshow(bcolor0_img.numpy().transpose((1,2,0)))
280 |         plt.figure()
281 |         plt.imshow(bcolor1_img.numpy().transpose((1,2,0)))
282 |         # plt.figure()
283 |         # plt.imshow(bdepth0_img.numpy().transpose((1,2,0)))
284 |         # plt.figure()
285 |         # plt.imshow(bdepth1_img.numpy().transpose((1,2,0)))
286 |         plt.figure()
287 |         plt.imshow(bmask0_img.numpy().transpose((1,2,0)))
288 |         plt.figure()
289 |         plt.imshow(bmask1_img.numpy().transpose((1,2,0)))
290 |         plt.show()
291 | 
292 | 


--------------------------------------------------------------------------------
/code/data/ScanNet.py:
--------------------------------------------------------------------------------
  1 | """ The dataloader for ScanNet dataset
  2 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | """
  5 | 
  6 | import os, random
  7 | import os.path as osp
  8 | 
  9 | import numpy as np
 10 | from torch.utils.data import Dataset
 11 | from imageio import imread
 12 | from tqdm import tqdm
 13 | import pickle
 14 | from cv2 import resize, INTER_NEAREST
 15 | 
 16 | 
 17 | class ScanNet(Dataset):
 18 | 
 19 |     def __init__(self, root=None, category='train',
 20 |                  keyframes=[1], data_transform=None, select_traj=None,
 21 |                  image_resize=0.25, truncate_depth=True,
 22 |                  subset_train=0.95, subset_val=0.05):
 23 |         assert root is not None
 24 |         super(ScanNet, self).__init__()
 25 | 
 26 |         self.image_seq = []  # list (seq) of list (frame) of string (rgb image path)
 27 |         self.timestamp = []  # empty
 28 |         self.depth_seq = []  # list (seq) of list (frame) of string (depth image path)
 29 |         # self.invalid_seq = []  # empty
 30 |         self.cam_pose_seq = []  # list (seq) of list (frame) of 4 X 4 ndarray
 31 |         self.calib = []  # list (seq) of list (intrinsics: fx, fy, cx, cy)
 32 |         self.seq_names = []  # list (seq) of string (seq name)
 33 | 
 34 |         self.subset_train = subset_train   # only use subset for training
 35 |         self.subset_val = subset_val  # only use subset for validation
 36 |         assert self.subset_train + self.subset_val <= 1
 37 |         self.ids = 0
 38 |         self.seq_acc_ids = [0]
 39 |         self.keyframes = keyframes
 40 |         self.cam =  {
 41 |             'distCoeffs': None,
 42 |             'fx': 577.871,
 43 |             'fy': 577.871,
 44 |             'ux': 319.5,
 45 |             'uy': 239.5,
 46 |             'size': (640, 480),
 47 |         }
 48 |         self.depth_conversion = 1.0/5e3
 49 | 
 50 |         self.transforms = data_transform
 51 | 
 52 |         if category == 'test':
 53 |             self.__load_test(osp.join(root, 'val'), select_traj)
 54 |         else:  # train and validation
 55 |             self.__load_train_val(osp.join(root, 'train'), category)
 56 | 
 57 |         # downscale the input image to a quarter
 58 |         self.fx_s = image_resize
 59 |         self.fy_s = image_resize
 60 |         self.truncate_depth = truncate_depth
 61 | 
 62 |         print('ScanNet dataloader for {:} using keyframe {:}: \
 63 |             {:} valid frames'.format(category, keyframes, self.ids))
 64 | 
 65 |     def __read_scans(self, data_dir):
 66 |         # glob for sequences
 67 |         sequences = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]
 68 |         print('Found {} sequences in directory: {}'.format(len(sequences), data_dir))
 69 | 
 70 |         scans = []
 71 |         with tqdm(total=len(sequences)) as t:
 72 |             for seq in sequences:
 73 |                 seq_dir = os.path.join(data_dir, seq)
 74 |                 # synchronized trajectory file
 75 |                 sync_traj_file = osp.join(seq_dir, 'sync_trajectory.pkl')
 76 | 
 77 |                 if not osp.isfile(sync_traj_file):
 78 |                     print("The synchronized trajectory file {:} has not been generated.".format(seq))
 79 |                     print("Generate it now...")
 80 | 
 81 |                     # get sequence length from the _info.txt file
 82 |                     nframes = int(open(os.path.join(seq_dir, '_info.txt')).readlines()[-1].split()[-1])
 83 | 
 84 |                     views = list()
 85 |                     for i in range(nframes):
 86 |                         frame = os.path.join(seq_dir, 'frame-{:06d}'.format(i))
 87 |                         pose_file = os.path.join(seq_dir, frame + '.pose.txt')
 88 |                         pose = np.loadtxt(open(pose_file, 'r'))
 89 | 
 90 |                         # do not use any frame with inf pose
 91 |                         if np.isinf(np.sum(pose)):
 92 |                             print(frame)
 93 |                             continue
 94 |                         views.append({'img': frame + '.color.jpg',
 95 |                                       'dpt': frame + '.merged_depth.png',
 96 |                                       'frame_id': i,
 97 |                                       'pose': pose})
 98 | 
 99 |                     # export trajectory file
100 |                     with open(sync_traj_file, 'wb') as output:
101 |                         pickle.dump(views, output)
102 | 
103 |                 else:
104 |                     with open(sync_traj_file, 'rb') as p:
105 |                         views = pickle.load(p)
106 | 
107 |                 scans.append(views)
108 |                 t.set_postfix({'seq': seq})
109 |                 t.update()
110 |         return scans
111 | 
112 |     def __load_train_val(self, root, category):
113 |         scans = self.__read_scans(root)
114 | 
115 |         for scene in scans:
116 |             total_num = len(scene)
117 |             # the ratio to split the train & validation set
118 |             if category == 'train':
119 |                 start_idx, end_idx = 0, int(self.subset_train * total_num)
120 |             else:
121 |                 start_idx, end_idx = int((1-self.subset_val) * total_num), total_num
122 | 
123 |             images = [scene[idx]['img'] for idx in range(start_idx, end_idx)]
124 |             depths = [scene[idx]['dpt'] for idx in range(start_idx, end_idx)]
125 |             extrin = [scene[idx]['pose'] for idx in range(start_idx, end_idx)]
126 |             # fake timestamp with frame id
127 |             frame_id = [scene[idx]['frame_id'] for idx in range(start_idx, end_idx)]
128 |             seq_name = osp.basename(osp.dirname(images[0]))
129 |             calib = [self.cam['fx'], self.cam['fy'], self.cam['ux'], self.cam['uy']]
130 | 
131 |             self.calib.append(calib)
132 |             self.image_seq.append(images)
133 |             self.depth_seq.append(depths)
134 |             self.timestamp.append(frame_id)
135 |             self.cam_pose_seq.append(extrin)
136 |             self.seq_names.append(seq_name)
137 |             self.ids += max(0, len(images) - max(self.keyframes))
138 |             self.seq_acc_ids.append(self.ids)
139 | 
140 |     def __load_test(self, root, select_traj=None):
141 |         """ Note:
142 |         The test trajectory is loaded slightly different from the train/validation trajectory.
143 |         We only select keyframes from the entire trajectory, rather than use every individual frame.
144 |         For a given trajectory of length N, using key-frame 2, the train/validation set will use
145 |         [[1, 3], [2, 4], [3, 5],...[N-1, N]],
146 |         while test set will use pair
147 |         [[1, 3], [3, 5], [5, 7],...[N-1, N]]
148 |         This difference result in a change in the trajectory length when using different keyframes.
149 | 
150 |         The benefit of sampling keyframes of the test set is that the output is a more reasonable trajectory;
151 |         And in training/validation, we fully leverage every pair of image.
152 |         """
153 | 
154 |         assert (len(self.keyframes) == 1)
155 |         scans = self.__read_scans(root)
156 |         kf = self.keyframes[0]
157 |         self.keyframes = [1]
158 | 
159 |         for scene in scans:
160 |             seq_name = osp.basename(osp.dirname(scene[0]['img']))
161 |             if select_traj is not None:
162 |                 if seq_name != select_traj: continue
163 | 
164 |             calib = [self.cam['fx'], self.cam['fy'], self.cam['ux'], self.cam['uy']]
165 |             self.calib.append(calib)
166 | 
167 |             total_num = len(scene)
168 |             images = [scene[idx]['img'] for idx in range(0, total_num, kf)]
169 |             depths = [scene[idx]['dpt'] for idx in range(0, total_num, kf)]
170 |             extrin = [scene[idx]['pose'] for idx in range(0, total_num, kf)]
171 | 
172 |             # fake timestamp with frame id
173 |             timestamp = [scene[idx]['frame_id'] for idx in range(0, total_num, kf)]
174 |             self.image_seq.append(images)
175 |             self.timestamp.append(timestamp)
176 |             self.depth_seq.append(depths)
177 |             self.cam_pose_seq.append(extrin)
178 |             self.seq_names.append(seq_name)
179 |             self.ids += max(0, len(images) - 1)
180 |             self.seq_acc_ids.append(self.ids)
181 | 
182 |         if len(self.image_seq) == 0:
183 |             raise Exception("The specified trajectory is not in the test set nor supported.")
184 | 
185 |     def __getitem__(self, index):
186 |         seq_idx = max(np.searchsorted(self.seq_acc_ids, index + 1) - 1, 0)
187 |         frame_idx = index - self.seq_acc_ids[seq_idx]
188 | 
189 |         this_idx = frame_idx
190 |         next_idx = frame_idx + random.choice(self.keyframes)
191 | 
192 |         # if the next random keyframe is too far
193 |         if self.timestamp[seq_idx][next_idx] - self.timestamp[seq_idx][this_idx] > max(self.keyframes):
194 |             search_keyframes = self.keyframes[::-1] + [-kf for kf in self.keyframes]
195 |             inf_pose_issue = True
196 |             print("search:", self.timestamp[seq_idx][this_idx])
197 |             for keyframe in search_keyframes:
198 |                 next_idx = frame_idx + keyframe
199 |                 if abs(self.timestamp[seq_idx][next_idx] - self.timestamp[seq_idx][this_idx]) <= max(self.keyframes):
200 |                     inf_pose_issue = False
201 |                     break
202 |             if inf_pose_issue:
203 |                 next_idx = frame_idx + 1
204 |                 print("#invalid frame:", self.image_seq[seq_idx][this_idx])
205 |                 # raise ValueError
206 |         color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx])
207 |         color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx])
208 |         depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx])
209 |         depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx])
210 | 
211 |         if self.transforms:
212 |             color0, color1 = self.transforms([color0, color1])
213 | 
214 |             # normalize the coordinate
215 |         calib = np.asarray(self.calib[seq_idx], dtype=np.float32)
216 |         calib[0] *= self.fx_s
217 |         calib[1] *= self.fy_s
218 |         calib[2] *= self.fx_s
219 |         calib[3] *= self.fy_s
220 | 
221 |         cam_pose0 = self.cam_pose_seq[seq_idx][this_idx]
222 |         cam_pose1 = self.cam_pose_seq[seq_idx][next_idx]
223 |         transform = np.dot(np.linalg.inv(cam_pose1), cam_pose0).astype(np.float32)
224 | 
225 |         name = {'seq': self.seq_names[seq_idx],
226 |                 'frame0': this_idx,
227 |                 'frame1': next_idx}
228 | 
229 |         # camera_info = dict()
230 |         camera_info = {"height": color0.shape[0],
231 |                        "width": color0.shape[1],
232 |                        "fx": calib[0],
233 |                        "fy": calib[1],
234 |                        "ux": calib[2],
235 |                        "uy": calib[3]}
236 | 
237 |         return color0, color1, depth0, depth1, transform, calib, name, camera_info
238 | 
239 |     def __len__(self):
240 |         return self.ids
241 | 
242 |     def __load_rgb_tensor(self, path):
243 |         image = imread(path)[:, :, :3]
244 |         image = image.astype(np.float32) / 255.0
245 |         image = resize(image, None, fx=self.fx_s, fy=self.fy_s)
246 |         return image
247 | 
248 |     def __load_depth_tensor(self, path):
249 |         """ Load the depth:
250 |             The depth images are scaled by a factor of 5000, i.e., a pixel
251 |             value of 5000 in the depth image corresponds to a distance of
252 |             1 meter from the camera, 10000 to 2 meter distance, etc.
253 |             A pixel value of 0 means missing value/no data.
254 |         """
255 |         depth = imread(path).astype(np.float32) * self.depth_conversion
256 |         depth = resize(depth, None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST)
257 |         if self.truncate_depth:
258 |             depth = np.clip(depth, a_min=0.5, a_max=5.0) # the accurate range of kinect depth
259 |         return depth[np.newaxis, :]


--------------------------------------------------------------------------------
/code/data/SimpleLoader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This Simple loader partially refers to
 3 | https://github.com/NVlabs/learningrigidity/blob/master/SimpleLoader.py
 4 | # SPDX-FileCopyrightText: 2021 Binbin Xu
 5 | # SPDX-License-Identifier: BSD-3-Clause
 6 | 
 7 | @author: Zhaoyang Lv
 8 | @date: May, 2019
 9 | """
10 | 
11 | import sys, os, random
12 | import torch.utils.data as data
13 | import os.path as osp
14 | 
15 | import numpy as np
16 | 
17 | from imageio import imread
18 | 
19 | class SimpleLoader(data.Dataset):
20 |     
21 |     def __init__(self, color_dir, depth_dir, K):
22 |         """
23 |         :param the directory of color images
24 |         :param the directory of depth images
25 |         :param the intrinsic parameter [fx, fy, cx, cy]
26 |         """
27 | 
28 |         print('This simple loader is designed for TUM. \n\
29 |             The depth scale may be different in your depth format. ')
30 | 
31 |         color_files = sorted(os.listdir(color_dir))
32 |         depth_files = sorted(os.listdir(depth_dir))
33 | 
34 |         # please ensure the two folders use the same number of synchronized files
35 |         assert(len(color_files) == len(depth_files))
36 | 
37 |         self.color_pairs = []
38 |         self.depth_pairs = []
39 |         self.ids = len(color_files) - 1
40 |         for idx in range(self.ids):        
41 |             self.color_pairs.append([
42 |                 osp.join(color_dir, color_files[idx]), 
43 |                 osp.join(color_dir, color_files[idx+1])
44 |                 ] )
45 |             self.depth_pairs.append([
46 |                 osp.join(depth_dir, depth_files[idx]), 
47 |                 osp.join(depth_dir, depth_files[idx+1])
48 |                 ] )
49 | 
50 |         self.K = K 
51 | 
52 |     def __getitem__(self, index):
53 | 
54 |         image0_path, image1_path = self.color_pairs[index]
55 |         depth0_path, depth1_path = self.depth_pairs[index]
56 | 
57 |         image0 = self.__load_rgb_tensor(image0_path)
58 |         image1 = self.__load_rgb_tensor(image1_path)
59 | 
60 |         depth0 = self.__load_depth_tensor(depth0_path)
61 |         depth1 = self.__load_depth_tensor(depth1_path)
62 | 
63 |         calib = np.asarray(self.K, dtype=np.float32)
64 | 
65 |         return image0, image1, depth0, depth1, calib
66 | 
67 |     def __len__(self):
68 |         return self.ids
69 | 
70 |     def __load_rgb_tensor(self, path):
71 |         image = imread(path)
72 |         image = image.astype(np.float32) / 255.0
73 |         image = np.transpose(image, (2,0,1))
74 |         return np.asarray(image.astype(np.float32))
75 | 
76 |     def __load_depth_tensor(self, path):
77 |         assert(path.endswith('.png'))
78 |         depth = imread(path).astype(np.float32) / 5e3
79 |         depth = np.clip(depth, a_min=0.5, a_max=5.0)
80 | 
81 |         return np.asarray(depth[np.newaxis, :])


--------------------------------------------------------------------------------
/code/data/VaryLighting.py:
--------------------------------------------------------------------------------
  1 | """ The dataloader for custom dataset
  2 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | """
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | from __future__ import unicode_literals
 10 | 
 11 | import sys, os, random
 12 | 
 13 | import os.path as osp
 14 | import torch.utils.data as data
 15 | 
 16 | from imageio import imread
 17 | from cv2 import resize, INTER_NEAREST
 18 | 
 19 | import os.path
 20 | import glob
 21 | 
 22 | # import third party
 23 | import numpy as np
 24 | import random
 25 | 
 26 | 
 27 | def get_depth_from_corresponding_rgb(rgb_file_abs):
 28 |     rgb_dir, rgb_file_rel = os.path.split(rgb_file_abs)
 29 |     depth_dir = rgb_dir.replace("rgb", "depth")
 30 |     depth_file = os.path.join(depth_dir, rgb_file_rel)
 31 |     return depth_file
 32 | 
 33 | 
 34 | class VaryLighting(data.Dataset):
 35 | 
 36 |     """
 37 |     Dataset class for our varying lighting dataset
 38 |     """
 39 |     base = 'data'
 40 |     IMAGE_HEIGHT = 480
 41 |     IMAGE_WIDTH = 640
 42 |     DEPTH_SCALE = 1.0/1000.0
 43 |     K = [525.0, 525.0, 319.5, 239.5]
 44 | 
 45 |     def __init__(self, root='', category='keyframe',
 46 |                  keyframes=[1,], data_transform=None, select_traj=None,
 47 |                  image_resize=0.25, truncate_depth=True, pair='incremental',
 48 |                  ):
 49 | 
 50 |         super(VaryLighting, self).__init__()
 51 |         assert pair in ['incremental', 'keyframe']
 52 |         self.pair = pair
 53 | 
 54 |         self.image_seq = []  # list (seq) of list (frame) of string (rgb image path)
 55 |         self.timestamp = []  # empty
 56 |         self.depth_seq = []  # list (seq) of list (frame) of string (depth image path)
 57 |         self.invalid_seq = []  # empty
 58 |         self.cam_pose_seq = []  # list (seq) of list (frame) of 4 X 4 ndarray
 59 |         self.calib = []  # list (seq) of list (intrinsics: fx, fy, cx, cy)
 60 |         self.seq_names = []  # list (seq) of string (seq name)
 61 | 
 62 |         self.ids = 0
 63 |         self.seq_acc_ids = [0]
 64 |         self.keyframes = keyframes
 65 | 
 66 |         self.transforms = data_transform
 67 | 
 68 |         if category == 'test':
 69 |             # self.set_test_mode()
 70 |             self.__load_test(root, select_traj)
 71 |         elif category in ['train', 'validation']:  # train and validation
 72 |             raise NotImplementedError()
 73 |         elif category == 'kf':
 74 |             self.__load_kf(root, select_traj)
 75 |         else:
 76 |             raise NotImplementedError
 77 | 
 78 |         # downscale the input image to a quarter
 79 |         self.fx_s = image_resize
 80 |         self.fy_s = image_resize
 81 |         self.truncate_depth = truncate_depth
 82 | 
 83 |         print('Vary Lighting dataloader for {:} using keyframe {:}: \
 84 |                       {:} valid frames'.format(category, keyframes, self.ids))
 85 | 
 86 | 
 87 |     def __load_test(self, root, select_traj=None):
 88 |         """ Note:
 89 |         The test trajectory is loaded slightly different from the train/validation trajectory.
 90 |         We only select keyframes from the entire trajectory, rather than use every individual frame.
 91 |         For a given trajectory of length N, using key-frame 2, the train/validation set will use
 92 |         [[1, 3], [2, 4], [3, 5],...[N-1, N]],
 93 |         while test set will use pair
 94 |         [[1, 3], [3, 5], [5, 7],...[N-1, N]]
 95 |         This difference result in a change in the trajectory length when using different keyframes.
 96 | 
 97 |         The benefit of sampling keyframes of the test set is that the output is a more reasonable trajectory;
 98 |         And in training/validation, we fully leverage every pair of image.
 99 |         """
100 | 
101 |         assert(len(self.keyframes) == 1)
102 |         kf = self.keyframes[0]
103 |         self.keyframes = [1]
104 |         track_scene = osp.join(root, "*/")
105 |         scene_lists = glob.glob(track_scene, recursive=True)
106 | 
107 |         self._num_scenes = len(scene_lists)
108 |         if self._num_scenes is None:
109 |             raise ValueError("No sub-folder data in the training or validation dataset")
110 |         for scene in scene_lists:
111 |             scene_name = osp.basename(osp.dirname(scene))
112 |             if select_traj is not None:
113 |                 if scene_name != select_traj: continue
114 | 
115 |             rgb_images_regex = os.path.join(scene, "rgb/*.png")
116 |             all_rgb_images_in_scene = sorted(glob.glob(rgb_images_regex))
117 |             total_num = len(all_rgb_images_in_scene)
118 | 
119 |             self.calib.append(self.K)
120 | 
121 | 
122 |             images = [all_rgb_images_in_scene[idx] for idx in range(0, total_num, kf)]
123 |             # fake timestamps
124 |             timestamp = [os.path.splitext(os.path.basename(image))[0] for image in images]
125 |             depths = [get_depth_from_corresponding_rgb(rgb_file) for rgb_file in images]
126 |             extrin = [None] * len(images)  # [tq2mat(frames[idx][0]) for idx in range(0, total_num, kf)]
127 |             self.image_seq.append(images)
128 |             self.timestamp.append(timestamp)
129 |             self.depth_seq.append(depths)
130 |             self.cam_pose_seq.append(extrin)
131 |             self.seq_names.append(scene)
132 |             self.ids += max(0, len(images)-1)
133 |             self.seq_acc_ids.append(self.ids)
134 | 
135 |     def __load_rgb_tensor(self, path):
136 |         """ Load the rgb image
137 |         """
138 |         image = imread(path)[:, :, :3]
139 |         image = image.astype(np.float32) / 255.0
140 |         image = resize(image, None, fx=self.fx_s, fy=self.fy_s)
141 |         return image
142 | 
143 |     def __load_depth_tensor(self, path):
144 |         """ Load the depth:
145 |             The depth images are scaled by a factor of 5000, i.e., a pixel
146 |             value of 5000 in the depth image corresponds to a distance of
147 |             1 meter from the camera, 10000 to 2 meter distance, etc.
148 |             A pixel value of 0 means missing value/no data.
149 |         """
150 |         depth = imread(path).astype(np.float32) / 5e3
151 |         depth = resize(depth, None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST)
152 |         if self.truncate_depth:
153 |             depth = np.clip(depth, a_min=0.5, a_max=5.0) # the accurate range of kinect depth
154 |         return depth[np.newaxis, :]
155 | 
156 |     def __getitem__(self, index):
157 |         # pair in the way like [[1, 3], [3, 5], [5, 7],...[N-1, N]]
158 |         seq_idx = max(np.searchsorted(self.seq_acc_ids, index + 1) - 1, 0)
159 |         frame_idx = index - self.seq_acc_ids[seq_idx]
160 | 
161 |         this_idx = frame_idx
162 |         next_idx = frame_idx + random.choice(self.keyframes)
163 | 
164 |         color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx])
165 |         color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx])
166 | 
167 |         depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx])
168 |         depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx])
169 | 
170 |         if self.transforms:
171 |             color0, color1 = self.transforms([color0, color1])
172 | 
173 |             # normalize the coordinate
174 |         calib = np.asarray(self.calib[seq_idx], dtype=np.float32)
175 |         calib[0] *= self.fx_s
176 |         calib[1] *= self.fy_s
177 |         calib[2] *= self.fx_s
178 |         calib[3] *= self.fy_s
179 | 
180 |         # cam_pose0 = self.cam_pose_seq[seq_idx][this_idx]
181 |         # cam_pose1 = self.cam_pose_seq[seq_idx][next_idx]
182 |         # transform = np.dot(np.linalg.inv(cam_pose1), cam_pose0).astype(np.float32)
183 |         transform = None
184 | 
185 |         name = {'seq': self.seq_names[seq_idx],
186 |                 'frame0': this_idx,
187 |                 'frame1': next_idx}
188 | 
189 |         # camera_info = dict()
190 |         camera_info = {"height": color0.shape[0],
191 |                        "width": color0.shape[1],
192 |                        "fx": calib[0],
193 |                        "fy": calib[1],
194 |                        "ux": calib[2],
195 |                        "uy": calib[3]}
196 |         return color0, color1, depth0, depth1, transform, calib, name, camera_info
197 | 
198 | 
199 |     def get_keypair(self, index, kf_idx=0):
200 |         # pair in the way like [[1, 3], [1, 5], [1, 7],...[1, N]]
201 |         seq_idx = max(np.searchsorted(self.seq_acc_ids, index + 1) - 1, 0)
202 |         frame_idx = index - self.seq_acc_ids[seq_idx]
203 | 
204 |         this_idx = kf_idx
205 |         next_idx = frame_idx
206 | 
207 |         color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx])
208 |         color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx])
209 | 
210 |         depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx])
211 |         depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx])
212 | 
213 |         if self.transforms:
214 |             color0, color1 = self.transforms([color0, color1])
215 | 
216 |             # normalize the coordinate
217 |         calib = np.asarray(self.calib[seq_idx], dtype=np.float32)
218 |         calib[0] *= self.fx_s
219 |         calib[1] *= self.fy_s
220 |         calib[2] *= self.fx_s
221 |         calib[3] *= self.fy_s
222 | 
223 |         # cam_pose0 = self.cam_pose_seq[seq_idx][this_idx]
224 |         # cam_pose1 = self.cam_pose_seq[seq_idx][next_idx]
225 |         # transform = np.dot(np.linalg.inv(cam_pose1), cam_pose0).astype(np.float32)
226 |         transform = None
227 | 
228 |         name = {'seq': self.seq_names[seq_idx],
229 |                 'frame0': this_idx,
230 |                 'frame1': next_idx}
231 | 
232 |         # camera_info = dict()
233 |         camera_info = {"height": color0.shape[0],
234 |                        "width": color0.shape[1],
235 |                        "fx": calib[0],
236 |                        "fy": calib[1],
237 |                        "ux": calib[2],
238 |                        "uy": calib[3]}
239 |         return color0, color1, depth0, depth1, transform, calib, name, camera_info
240 | 
241 |     def __len__(self):
242 |         return self.ids


--------------------------------------------------------------------------------
/code/data/data_examples/TUM/color/1305031790.645155.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.645155.png


--------------------------------------------------------------------------------
/code/data/data_examples/TUM/color/1305031790.713097.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.713097.png


--------------------------------------------------------------------------------
/code/data/data_examples/TUM/color/1305031790.781258.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.781258.png


--------------------------------------------------------------------------------
/code/data/data_examples/TUM/color/1305031790.845151.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.845151.png


--------------------------------------------------------------------------------
/code/data/data_examples/TUM/color/1305031790.913129.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.913129.png


--------------------------------------------------------------------------------
/code/data/data_examples/TUM/depth/1305031790.640468.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.640468.png


--------------------------------------------------------------------------------
/code/data/data_examples/TUM/depth/1305031790.709421.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.709421.png


--------------------------------------------------------------------------------
/code/data/data_examples/TUM/depth/1305031790.773548.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.773548.png


--------------------------------------------------------------------------------
/code/data/data_examples/TUM/depth/1305031790.839363.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.839363.png


--------------------------------------------------------------------------------
/code/data/data_examples/TUM/depth/1305031790.909436.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.909436.png


--------------------------------------------------------------------------------
/code/data/dataloader.py:
--------------------------------------------------------------------------------
  1 | """ The dataloaders for training and evaluation
  2 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  3 | # SPDX-License-Identifier: BSD-3-Clause
  4 | 
  5 | @author: Zhaoyang Lv
  6 | @date: March 2019
  7 | """
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | from __future__ import unicode_literals
 13 | 
 14 | import torchvision.transforms as transforms
 15 | import numpy as np
 16 | import os
 17 | import socket
 18 | import yaml
 19 | try:
 20 |     # use faster C loader if available
 21 |     from yaml import CLoader
 22 | except ImportError:
 23 |     from yaml import Loader as CLoader
 24 | 
 25 | 
 26 | def get_datasets_path(which_dataset):
 27 |     # utils_path = os.getcwd().split('/')
 28 |     # print(utils_path)
 29 |     # source_folder = '/'.join(utils_path[:-2])
 30 |     # print(source_folder)
 31 |     # return source_folder
 32 |     curr_path = os.path.realpath(__file__)
 33 |     env_file_path = os.path.realpath(os.path.join(curr_path, '../../../setup/datasets.yaml'))
 34 |     hostname = str(socket.gethostname())
 35 |     env_config = yaml.load(open(env_file_path), Loader=CLoader)
 36 |     return env_config[which_dataset][hostname]['dataset_root']
 37 | 
 38 | TUM_DATASET_DIR = get_datasets_path('TUM_RGBD')
 39 | MOVING_OBJECTS_3D = get_datasets_path('MOVING_OBJECTS_3D')
 40 | ScanNet_DATASET_DIR = get_datasets_path('SCANNET')
 41 | VL_DATASET_DIR = get_datasets_path('VaryLighting')
 42 | 
 43 | def load_data(dataset_name, keyframes = None, load_type = 'train',
 44 |     select_trajectory = '', load_numpy = False, image_resize=0.25, truncate_depth=True,
 45 |     options=None, pair='incremental'):
 46 |     """ Use two frame camera pose data loader
 47 |     """
 48 |     if select_trajectory == '':
 49 |         select_trajectory = None
 50 | 
 51 |     if not load_numpy:
 52 |         if load_type == 'train': 
 53 |             data_transform = image_transforms(['color_augment', 'numpy2torch'])
 54 |         else:
 55 |             data_transform = image_transforms(['numpy2torch'])
 56 |     else:
 57 |         data_transform = image_transforms([])
 58 | 
 59 |     if dataset_name == 'TUM_RGBD':
 60 |         from data.TUM_RGBD import TUM
 61 |         np_loader = TUM(TUM_DATASET_DIR, load_type, keyframes,
 62 |                         data_transform, select_trajectory,
 63 |                         image_resize=image_resize,
 64 |                         truncate_depth=truncate_depth,
 65 |                         add_vl_dataset=options.add_vl_dataset,
 66 |                         )
 67 |     elif dataset_name == 'ScanNet':
 68 |         from data.ScanNet import ScanNet
 69 |         np_loader = ScanNet(ScanNet_DATASET_DIR, load_type, keyframes,
 70 |                             data_transform, select_trajectory,
 71 |                             image_resize=image_resize,
 72 |                             truncate_depth=truncate_depth,
 73 |                             subset_train=options.scannet_subset_train,
 74 |                             subset_val=options.scannet_subset_val,
 75 |                             )
 76 |     elif dataset_name == 'MovingObjects3D': 
 77 |         from data.MovingObj3D import MovingObjects3D
 78 |         np_loader = MovingObjects3D(MOVING_OBJECTS_3D, load_type,
 79 |                                     keyframes, data_transform,
 80 |                                     category=select_trajectory,
 81 |                                     image_resize=image_resize,
 82 |                                     )
 83 |     # elif dataset_name == 'BundleFusion':
 84 |     #     from data.BundleFusion import BundleFusion
 85 |     #     np_loader = BundleFusion(load_type, keyframes, data_transform)
 86 |     # elif dataset_name == 'Refresh':
 87 |     #     from data.REFRESH import REFRESH
 88 |     #     np_loader = REFRESH(load_type, keyframes)
 89 |     elif dataset_name == 'VaryLighting':
 90 |         from data.VaryLighting import VaryLighting
 91 |         np_loader = VaryLighting(VL_DATASET_DIR, load_type, keyframes,
 92 |                                  data_transform, select_trajectory,
 93 |                                  pair=pair,
 94 |                                  image_resize=image_resize,
 95 |                                  truncate_depth=truncate_depth,
 96 |                                  )
 97 |     else:
 98 |         raise NotImplementedError()
 99 | 
100 |     return np_loader
101 | 
102 | def image_transforms(options):
103 | 
104 |     transform_list = []
105 | 
106 |     if 'color_augment' in options: 
107 |         augment_parameters = [0.9, 1.1, 0.9, 1.1, 0.9, 1.1]
108 |         transform_list.append(AugmentImages(augment_parameters))
109 | 
110 |     if 'numpy2torch' in options:
111 |         transform_list.append(ToTensor())
112 | 
113 |     # if 'color_normalize' in options: # we do it on the fly
114 |     #     transform_list.append(ColorNormalize())
115 | 
116 |     return transforms.Compose(transform_list)
117 | 
118 | class ColorNormalize(object):
119 | 
120 |     def __init__(self):
121 |         rgb_mean = (0.4914, 0.4822, 0.4465)
122 |         rgb_std = (0.2023, 0.1994, 0.2010)
123 |         self.transform = transforms.Normalize(mean=rgb_mean, std=rgb_std)
124 | 
125 |     def __call__(self, sample):
126 |         return [self.transform(x) for x in sample]
127 | 
128 | class ToTensor(object):
129 |     def __init__(self):
130 |         self.transform = transforms.ToTensor()
131 | 
132 |     def __call__(self, sample):
133 |         return [self.transform(x) for x in sample] 
134 | 
135 | class AugmentImages(object):
136 |     def __init__(self, augment_parameters):
137 |         self.gamma_low  = augment_parameters[0]         # 0.9
138 |         self.gamma_high = augment_parameters[1]         # 1.1
139 |         self.brightness_low  = augment_parameters[2]    # 0.9
140 |         self.brightness_high = augment_parameters[3]    # 1,1
141 |         self.color_low  = augment_parameters[4]         # 0.9
142 |         self.color_high = augment_parameters[5]         # 1.1
143 | 
144 |         self.thresh = 0.5
145 | 
146 |     def __call__(self, sample):
147 |         p = np.random.uniform(0, 1, 1)
148 |         if p > self.thresh:
149 |             random_gamma = np.random.uniform(self.gamma_low, self.gamma_high)
150 |             random_brightness = np.random.uniform(self.brightness_low, self.brightness_high)
151 |             random_colors = np.random.uniform(self.color_low, self.color_high, 3)
152 |             for x in sample:
153 |                 x = x ** random_gamma             # randomly shift gamma
154 |                 x = x * random_brightness         # randomly shift brightness
155 |                 for i in range(3):                # randomly shift color
156 |                     x[:, :, i] *= random_colors[i]
157 |                     x[:, :, i] *= random_colors[i]
158 |                 x = np.clip(x, a_min=0, a_max=1)  # saturate
159 |             return sample
160 |         else:        
161 |             return sample
162 | 


--------------------------------------------------------------------------------
/code/evaluate.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | Evaluation scripts to evaluate the tracking accuracy of the proposed method
  3 | 
  4 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  5 | # SPDX-License-Identifier: BSD-3-Clause
  6 | 
  7 | @author: Zhaoyang Lv
  8 | @date: March 2019
  9 | """
 10 | 
 11 | import os, sys, argparse, pickle
 12 | import os.path as osp
 13 | import numpy as np
 14 | import pandas as pd
 15 | from tools.rgbd_odometry import RGBDOdometry
 16 | from tools.ICP import ICP_Odometry
 17 | 
 18 | import torch
 19 | import torch.utils.data as data
 20 | import torchvision.utils as torch_utils
 21 | import torch.nn as nn
 22 | 
 23 | import models.LeastSquareTracking as ICtracking
 24 | import models.criterions as criterions
 25 | import train_utils
 26 | import config
 27 | from Logger import check_directory
 28 | 
 29 | from data.dataloader import load_data
 30 | from timers import Timers
 31 | from tqdm import tqdm
 32 | 
 33 | 
 34 | def eval_trajectories(dataset):
 35 |     if dataset == 'TUM_RGBD':
 36 |         return {
 37 |             'TUM_RGBD': ['rgbd_dataset_freiburg1_360',
 38 |                 'rgbd_dataset_freiburg1_desk',
 39 |                 'rgbd_dataset_freiburg2_desk',
 40 |                 'rgbd_dataset_freiburg2_pioneer_360']
 41 |         }[dataset]
 42 |     elif dataset == 'MovingObjects3D':
 43 |         return {
 44 |             'MovingObjects3D': ['boat',
 45 |                                 'motorbike',
 46 |                                 ]
 47 |         }[dataset]
 48 |     elif dataset == 'ScanNet':
 49 |         return {
 50 |             'ScanNet': ['scene0565_00',
 51 |                         'scene0011_00',
 52 |                         ]
 53 |         }[dataset]
 54 |     else:
 55 |         raise NotImplementedError()
 56 | 
 57 | 
 58 | def nostructure_trajectory(dataset):
 59 |     if dataset == 'TUM_RGBD':
 60 |         return {
 61 |             'TUM_RGBD': ['rgbd_dataset_freiburg3_nostructure_notexture_far',
 62 |                          'rgbd_dataset_freiburg3_nostructure_notexture_near_withloop',
 63 |                          'rgbd_dataset_freiburg3_nostructure_texture_far',
 64 |                          'rgbd_dataset_freiburg3_nostructure_texture_near_withloop']
 65 |         }[dataset]
 66 |     else:
 67 |         raise NotImplementedError()
 68 | 
 69 | 
 70 | def notexture_trajectory(dataset):
 71 |     if dataset == 'TUM_RGBD':
 72 |         return {
 73 |             'TUM_RGBD': ['rgbd_dataset_freiburg3_nostructure_notexture_far',
 74 |                          'rgbd_dataset_freiburg3_nostructure_notexture_near_withloop',
 75 |                          'rgbd_dataset_freiburg3_structure_notexture_near',
 76 |                          ]
 77 |         }[dataset]
 78 |     else:
 79 |         raise NotImplementedError()
 80 | 
 81 | 
 82 | def structure_texture_trajectory(dataset):
 83 |     if dataset == 'TUM_RGBD':
 84 |         return {
 85 |             'TUM_RGBD': ['rgbd_dataset_freiburg3_structure_texture_far',
 86 |                          'rgbd_dataset_freiburg3_structure_texture_near',]
 87 |         }[dataset]
 88 |     else:
 89 |         raise NotImplementedError()
 90 | 
 91 | 
 92 | def create_eval_loaders(options, eval_type, keyframes, 
 93 |     total_batch_size = 8, 
 94 |     trajectory  = ''):
 95 |     """ create the evaluation loader at different keyframes set-up
 96 |     """
 97 |     eval_loaders = {}
 98 | 
 99 |     if trajectory == '': 
100 |         trajectories = eval_trajectories(options.dataset)
101 |     elif trajectory == 'nostructure':
102 |         trajectories = nostructure_trajectory(options.dataset)
103 |     elif trajectory == 'notexture':
104 |         trajectories = notexture_trajectory(options.dataset)
105 |     elif trajectory == 'structure_texture':
106 |         trajectories = structure_texture_trajectory(options.dataset)
107 |     else: 
108 |         trajectories = [trajectory]
109 | 
110 |     for trajectory in trajectories:
111 |         for kf in keyframes:
112 |             if options.image_resize is not None:
113 |                 np_loader = load_data(options.dataset, [kf], eval_type, trajectory,
114 |                                       image_resize=options.image_resize, options=options)
115 |             else:
116 |                 np_loader = load_data(options.dataset, [kf], eval_type, trajectory, options=options)
117 |             eval_loaders['{:}_keyframe_{:}'.format(trajectory, kf)] = data.DataLoader(np_loader, 
118 |                 batch_size = int(total_batch_size),
119 |                 shuffle = False, num_workers = options.cpu_workers)
120 |     
121 |     return eval_loaders
122 | 
123 | def evaluate_trust_region(dataloader, net, objectives, eval_name='',
124 |         known_mask = False, timers = None, logger=None, epoch=0, obj_only=False, tracker='learning_based'):
125 |     """ evaluate the trust-region method given the two-frame pose estimation
126 |     :param the pytorch dataloader
127 |     :param the network
128 |     :param the evaluation objective names, e.g. RPE, EPE3D
129 |     :param True if ground mask if known
130 |     :param (optional) timing each step
131 |     """
132 | 
133 |     progress = tqdm(dataloader, ncols=100,
134 |         desc = 'evaluate deeper inverse compositional algorithm {:}'.format(eval_name),
135 |         total= len(dataloader))
136 | 
137 |     if tracker == 'learning_based':
138 |         net.eval()
139 | 
140 |     total_frames = len(dataloader.dataset)
141 | 
142 |     outputs = {
143 |         'R_est': np.zeros((total_frames, 3, 3)),
144 |         't_est': np.zeros((total_frames, 3)),
145 |         'names': []
146 |     }
147 |     flow_loss, rpe_loss = None, None
148 |     if 'EPE3D' in objectives: 
149 |         flow_loss = criterions.compute_RT_EPE_loss
150 |         outputs['epes'] = np.zeros(total_frames)
151 |     if 'RPE' in objectives:
152 |         rpe_loss = criterions.compute_RPE_loss
153 |         outputs['angular_error'] = np.zeros(total_frames)  
154 |         outputs['translation_error'] = np.zeros(total_frames) 
155 | 
156 |     count_base = 0
157 | 
158 |     if timers: timers.tic('one iteration')
159 | 
160 |     count = 1
161 |     for idx, batch in enumerate(progress):
162 | 
163 |         if timers: timers.tic('forward step')
164 | 
165 |         names = batch[-1]
166 | 
167 |         if known_mask: # for dataset that with mask or need mask
168 |             color0, color1, depth0, depth1, Rt, K, obj_mask0, obj_mask1 = \
169 |                 train_utils.check_cuda(batch[:8])
170 |         else:
171 |             color0, color1, depth0, depth1, Rt, K = \
172 |                 train_utils.check_cuda(batch[:6])
173 |             obj_mask0, obj_mask1 = None, None
174 | 
175 |         B, _, H, W = depth0.shape
176 |         iter = epoch * total_frames + count_base
177 |         with torch.no_grad():
178 |             if tracker == 'learning_based':
179 |                 if obj_only:
180 |                     output = net.forward(color0, color1, depth0, depth1, K,
181 |                                         obj_mask0=obj_mask0, obj_mask1=obj_mask1,
182 |                                         logger=logger, iteration=iter)
183 |                 else:
184 |                     output = net.forward(color0, color1, depth0, depth1, K,
185 |                                         logger=logger, iteration=iter)
186 |             elif options.tracker in ['ColorICP', 'ICP', 'RGBD']:
187 |                 if obj_only:
188 |                     output = net.batch_track(color0, depth0, color1, depth1, K,
189 |                                              batch_objmask0=obj_mask0, batch_objmask1=obj_mask1)
190 |                 else:
191 |                     output = net.batch_track(color0, depth0, color1, depth1, K)
192 |             else:
193 |                 raise NotImplementedError("unsupported test tracker: check argument of --tracker again")
194 |             R, t = output
195 | 
196 |         if timers: timers.toc('forward step')
197 | 
198 |         outputs['R_est'][count_base:count_base+B] = R.cpu().numpy()
199 |         outputs['t_est'][count_base:count_base+B] = t.cpu().numpy()
200 | 
201 |         if timers: timers.tic('evaluate')
202 |         R_gt, t_gt = Rt[:,:3,:3], Rt[:,:3,3]
203 |         if rpe_loss: # evaluate the relative pose error             
204 |             angle_error, trans_error = rpe_loss(R, t, R_gt, t_gt)
205 |             outputs['angular_error'][count_base:count_base+B] = angle_error.cpu().numpy()
206 |             outputs['translation_error'][count_base:count_base+B] = trans_error.cpu().numpy()
207 | 
208 |         if flow_loss:# evaluate the end-point-error loss 3D
209 |             invalid_mask = (depth0 == depth0.min()) | (depth0 == depth0.max())
210 |             if obj_mask0 is not None: 
211 |                 invalid_mask = ~obj_mask0 | invalid_mask
212 | 
213 |             epes3d = flow_loss(R, t, R_gt, t_gt, depth0, K, invalid=invalid_mask)            
214 |             outputs['epes'][count_base:count_base+B] = epes3d.cpu().numpy()
215 | 
216 |         outputs['names'] += names
217 | 
218 |         count_base += B
219 | 
220 |         if timers: timers.toc('evaluate')
221 |         if timers: timers.toc('one iteration')
222 |         if timers: timers.tic('one iteration')
223 | 
224 |     if timers: timers.print()
225 | 
226 |     return outputs
227 | 
228 | def test_TrustRegion(options):
229 | 
230 |     if options.time:
231 |         timers = Timers()
232 |     else:
233 |         timers = None
234 | 
235 |     print('Evaluate test performance with the (deep) direct method.')
236 | 
237 |     total_batch_size = options.batch_per_gpu *  torch.cuda.device_count()
238 | 
239 |     keyframes = [int(x) for x in options.keyframes.split(',')]
240 |     if options.dataset in ['BundleFusion', 'TUM_RGBD']:
241 |         obj_has_mask = False
242 |     else:
243 |         obj_has_mask = True
244 | 
245 |     eval_loaders = create_eval_loaders(options, options.eval_set,
246 |         keyframes, total_batch_size, options.trajectory)
247 | 
248 |     if options.tracker == 'learning_based':
249 |         if options.checkpoint == '':
250 |             print('No checkpoint loaded. Use the non-learning method')
251 |             net = ICtracking.LeastSquareTracking(
252 |                 encoder_name    = 'RGB',
253 |                 uncertainty_type=options.uncertainty,
254 |                 direction=options.direction,
255 |                 max_iter_per_pyr= options.max_iter_per_pyr,
256 |                 options=options,
257 |                 mEst_type       = 'None',
258 |                 solver_type     = 'Direct-Nodamping')
259 |             if torch.cuda.is_available(): net.cuda()
260 |             net.eval()
261 |         else:
262 |             train_utils.load_checkpoint_test(options)
263 | 
264 |             net = ICtracking.LeastSquareTracking(
265 |                 encoder_name    = options.encoder_name,
266 |                 uncertainty_type=options.uncertainty,
267 |                 direction=options.direction,
268 |                 max_iter_per_pyr= options.max_iter_per_pyr,
269 |                 mEst_type       = options.mestimator,
270 |                 options=options,
271 |                 solver_type     = options.solver,
272 |                 no_weight_sharing = options.no_weight_sharing)
273 | 
274 |             if torch.cuda.is_available(): net.cuda()
275 |             net.eval()
276 | 
277 |             # check whether it is a single checkpoint or a directory
278 |             net.load_state_dict(torch.load(options.checkpoint)['state_dict'])
279 |     elif options.tracker == 'ICP':
280 |         icp_tracker = ICP_Odometry('Point2Plane')
281 |         net = icp_tracker
282 |     elif options.tracker == 'ColorICP':
283 |         color_icp_tracker = ICP_Odometry('ColorICP')
284 |         net = color_icp_tracker
285 |     elif options.tracker == 'RGBD':
286 |         rgbd_tracker = RGBDOdometry("RGBD")
287 |         net = rgbd_tracker
288 |     else:
289 |         raise NotImplementedError("unsupported test tracker: check argument of --tracker again")
290 | 
291 |     eval_objectives = ['EPE3D', 'RPE']
292 | 
293 |     output_prefix = '_'.join([
294 |         options.network,
295 |         options.encoder_name,
296 |         options.mestimator,
297 |         options.solver,
298 |         'iter', str(options.max_iter_per_pyr)
299 |     ])
300 | 
301 |     # evaluate results per trajectory per key-frame
302 |     outputs = {}
303 |     for k, loader in eval_loaders.items():
304 | 
305 |         traj_name, kf = k.split('_keyframe_')
306 | 
307 |         output_name = '{:}_{:}'.format(output_prefix, k)
308 |         info = evaluate_trust_region(loader, net,
309 |             eval_objectives,
310 |             eval_name = 'tmp/'+output_name,
311 |             known_mask=obj_has_mask,
312 |             obj_only=options.obj_only,
313 |             tracker=options.tracker,
314 |             timers=timers,
315 |             )
316 | 
317 |         # collect results 
318 |         outputs[k] = pd.Series([info['epes'].mean(), 
319 |             info['angular_error'].mean(), 
320 |             info['translation_error'].mean(), 
321 |             info['epes'].shape[0], int(kf), traj_name], 
322 |             index=['3D EPE', 'axis error', 'trans error', 'total frames', 'keyframe', 'trajectory'])
323 | 
324 |         print(outputs[k])
325 | 
326 |         checkpoint_name = options.checkpoint.replace('.pth.tar', '')
327 |         if checkpoint_name == '':
328 |             checkpoint_name = 'nolearning'
329 |             if options.tracker in ['ColorICP', 'ICP', 'RGBD']:
330 |                 checkpoint_name += ('_'+ options.tracker)
331 |         output_dir = osp.join(options.eval_set+'_results', checkpoint_name, k)
332 |         output_pkl = output_dir + '.pkl'
333 |         
334 |         check_directory(output_pkl)
335 | 
336 |         with open(output_pkl, 'wb') as output: # dump per-frame results info
337 |             info = info
338 |             pickle.dump(info, output)
339 | 
340 |     """ =============================================================== """
341 |     """             Generate the final evaluation results                   """
342 |     """ =============================================================== """
343 | 
344 |     outputs_pd = pd.DataFrame(outputs).T
345 |     outputs_pd['3D EPE'] *= 100 # convert to cm
346 |     outputs_pd['axis error'] *= (180/np.pi) # convert to degree
347 |     outputs_pd['trans error'] *= 100 # convert to cm
348 | 
349 |     print(outputs_pd)
350 | 
351 |     stats_dict = {}
352 |     for kf in keyframes:        
353 |         kf_outputs = outputs_pd[outputs_pd['keyframe']==kf]
354 | 
355 |         stats_dict['mean values of trajectories keyframe {:}'.format(kf)] = pd.Series(
356 |             [kf_outputs['3D EPE'].mean(), 
357 |              kf_outputs['axis error'].mean(),
358 |              kf_outputs['trans error'].mean(), kf], 
359 |             index=['3D EPE', 'axis error', 'trans error', 'keyframe'])
360 | 
361 |         total_frames = kf_outputs['total frames'].sum()
362 |         stats_dict['mean values of frames keyframe {:}'.format(kf)] = pd.Series(
363 |             [(kf_outputs['3D EPE'] * kf_outputs['total frames']).sum() / total_frames, 
364 |              (kf_outputs['axis error'] * kf_outputs['total frames']).sum() / total_frames, 
365 |              (kf_outputs['trans error']* kf_outputs['total frames']).sum() / total_frames, kf],
366 |             index=['3D EPE', 'axis error', 'trans error', 'keyframe'])
367 | 
368 |     stats_pd = pd.DataFrame(stats_dict).T
369 |     print(stats_pd)
370 | 
371 |     final_pd = outputs_pd.append(stats_pd, sort=False)
372 |     final_pd.to_csv('{:}.csv'.format(output_dir))
373 | 
374 |     return outputs_pd
375 | 
376 | if __name__ == '__main__':
377 | 
378 |     parser = argparse.ArgumentParser(description="Evaluate the network")
379 |     config.add_basics_config(parser)
380 |     config.add_test_basics_config(parser)
381 |     config.add_tracking_config(parser)
382 | 
383 |     options = parser.parse_args()
384 | 
385 |     print('---------------------------------------')
386 | 
387 |     outputs = test_TrustRegion(options)
388 | 
389 | 


--------------------------------------------------------------------------------
/code/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/experiments/__init__.py


--------------------------------------------------------------------------------
/code/experiments/kf_vo.py:
--------------------------------------------------------------------------------
  1 | """ Script to run keyframe visual odometry on a sequence of images 
  2 | using the proposed probablilistic feature-metric trackingmethod.
  3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  4 | # SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | 
  7 | #!/usr/bin/env python
  8 | 
  9 | # import standard library
 10 | import os
 11 | import sys
 12 | import argparse
 13 | import os.path as osp
 14 | # import third party
 15 | import cv2
 16 | from evaluate import create_eval_loaders
 17 | 
 18 | import numpy as np
 19 | # opengl/trimesh visualization
 20 | import pyglet
 21 | import trimesh
 22 | import trimesh.viewer as tv
 23 | import trimesh.transformations as tf
 24 | import torch
 25 | from imageio import imread
 26 | 
 27 | import config
 28 | from models.geometry import batch_create_transform
 29 | from experiments.select_method import select_method
 30 | from train_utils import check_cuda
 31 | from data.dataloader import load_data
 32 | from Logger import check_directory
 33 | 
 34 | 
 35 | def init_scene(scene):
 36 |     scene.geometry = {}
 37 |     scene.graph.clear()
 38 |     scene.init = True
 39 | 
 40 |     # clear poses
 41 |     scene.gt_poses = []
 42 |     scene.est_poses = []
 43 |     scene.timestamps = []
 44 | 
 45 |     return scene
 46 | 
 47 | 
 48 | def camera_transform(transform=None):
 49 |     if transform is None:
 50 |         transform = np.eye(4)
 51 |     return transform @ trimesh.transformations.rotation_matrix(
 52 |         np.deg2rad(-180), [1, 0, 0]
 53 |     )
 54 | 
 55 | 
 56 | def pointcloud_from_depth(
 57 |     depth: np.ndarray,
 58 |     fx: float,
 59 |     fy: float,
 60 |     cx: float,
 61 |     cy: float,
 62 |     depth_type: str = 'z',
 63 | ) -> np.ndarray:
 64 |     assert depth_type in ['z', 'euclidean'], 'Unexpected depth_type'
 65 |     assert depth.dtype.kind == 'f', 'depth must be float and have meter values'
 66 | 
 67 |     rows, cols = depth.shape
 68 |     c, r = np.meshgrid(np.arange(cols), np.arange(rows), sparse=True)
 69 |     valid = ~np.isnan(depth)
 70 |     z = np.where(valid, depth, np.nan)
 71 |     x = np.where(valid, z * (c - cx) / fx, np.nan)
 72 |     y = np.where(valid, z * (r - cy) / fy, np.nan)
 73 |     pc = np.dstack((x, y, z))
 74 | 
 75 |     if depth_type == 'euclidean':
 76 |         norm = np.linalg.norm(pc, axis=2)
 77 |         pc = pc * (z / norm)[:, :, None]
 78 |     return pc
 79 | 
 80 | 
 81 | def callback(scene):
 82 |     if not scene.is_play:
 83 |         return
 84 | 
 85 |     dataset = scene.dataloader
 86 |     options = scene.options
 87 |     if scene.index >= len(dataset):
 88 |         return
 89 | 
 90 |     if scene.vo_type == 'incremental':
 91 |         batch = dataset[scene.index - 1]
 92 |     else:
 93 |         batch = dataset.get_keypair(scene.index)
 94 |     color0, color1, depth0, depth1, GT_Rt, intrins, name = check_cuda(
 95 |         batch[:7])
 96 | 
 97 |     scene_id = name['seq']
 98 | 
 99 |     # Reset scene for new scene.
100 |     if scene_id != scene.video_id:
101 |         scene = init_scene(scene)
102 |         scene.init_idx = scene.index
103 |         scene.video_id = scene_id
104 |     else:
105 |         scene.init = False
106 | 
107 |     GT_WC = dataset.cam_pose_seq[0][scene.index]
108 |     depth_file = dataset.depth_seq[0][scene.index]
109 |     if not options.save_img:
110 |         # half resolution
111 |         rgb = color1.permute((1, 2, 0)).cpu().numpy()
112 |         depth = imread(depth_file).astype(np.float32) / 5e3
113 |         depth = cv2.resize(depth, None, fx=dataset.fx_s,
114 |                            fy=dataset.fy_s, interpolation=cv2.INTER_NEAREST)
115 |         K = {"fx": intrins[0].item(), "fy": intrins[1].item(),
116 |              "ux": intrins[2].item(), "uy": intrins[3].item()}
117 |     else:
118 |         # original resolution for demo
119 |         rgb = imread(dataset.image_seq[0][scene.index])
120 |         depth = imread(depth_file).astype(np.float32) / 5e3
121 |         calib = np.asarray(dataset.calib[0], dtype=np.float32)
122 |         K = {"fx": calib[0], "fy": calib[1],
123 |              "ux": calib[2], "uy": calib[3]}
124 | 
125 |         # save input rgb and depth images
126 |         img_index_png = str(scene.index).zfill(5)+'.png'
127 |         if options.dataset == 'VaryLighting':
128 |             output_folder = osp.join(dataset.seq_names[0], 'kf_vo', options.vo)
129 |         else:
130 |             output_folder = os.path.join(
131 |                 '/home/binbin/Pictures', 'kf_vo', options.vo)
132 | 
133 |         rgb_img = osp.join(output_folder, 'rgb', img_index_png)
134 |         depth_img = osp.join(output_folder, 'depth', img_index_png)
135 |         check_directory(rgb_img)
136 |         check_directory(depth_img)
137 |         cv2.imwrite(rgb_img, cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR))
138 |         cv2.imwrite(depth_img, depth)
139 | 
140 |     if scene.init:
141 |         if GT_WC is not None:
142 |             T_WC = GT_WC
143 |         else:
144 |             T_WC = np.array([
145 |                 [0.0, -1.0, 0.0, -0.0],
146 |                 [-1.0, 0.0, 0.0, 0.0],
147 |                 [0.0, 0.0, -1.0, 0.0],
148 |                 [0.0, 0.0, 0.0, 1.0],
149 |             ])
150 |             # T_WC = np.eye(4)
151 |         scene.T_WC = T_WC
152 |         if scene.vo_type == 'keyframe':
153 |             scene.T_WK = T_WC
154 |     else:
155 |         with torch.no_grad():
156 |             color0 = color0.unsqueeze(dim=0)
157 |             color1 = color1.unsqueeze(dim=0)
158 |             depth0 = depth0.unsqueeze(dim=0)
159 |             depth1 = depth1.unsqueeze(dim=0)
160 |             intrins = intrins.unsqueeze(dim=0)
161 |             if options.save_img:
162 |                 output = scene.network.forward(
163 |                     color0, color1, depth0, depth1, intrins, index=scene.index)
164 |             else:
165 |                 output = scene.network.forward(
166 |                     color0, color1, depth0, depth1, intrins)
167 |         R, t = output
168 |         if scene.is_gt_tracking:
169 |             T_WC = GT_WC
170 |             scene.T_WC = T_WC
171 |         else:
172 |             if scene.vo_type == 'incremental':
173 |                 T_CR = batch_create_transform(t, R)
174 |                 # T_CR = GT_Rt
175 |                 T_CR = T_CR.squeeze(dim=0).cpu().numpy()
176 |                 T_WC = np.dot(scene.T_WC, np.linalg.inv(
177 |                     T_CR)).astype(np.float32)
178 |             elif scene.vo_type == 'keyframe':
179 |                 T_CK = batch_create_transform(t, R)
180 |                 # T_CK = GT_Rt
181 |                 T_CK = T_CK.squeeze(dim=0).cpu().numpy()
182 |                 T_WC = np.dot(scene.T_WK, np.linalg.inv(
183 |                     T_CK)).astype(np.float32)
184 | 
185 |                 # print large drift in keyframe tracking,
186 |                 # just for noticing a possible tracking failure, not usd later
187 |                 T_CC = np.dot(np.linalg.inv(T_WC),
188 |                               scene.T_WC).astype(np.float32)
189 |                 trs_drift = np.copy(T_CC[0:3, 3:4]).transpose()
190 |                 if np.linalg.norm(trs_drift) > 0.02:
191 |                     print(depth_file)
192 |             else:
193 |                 raise NotImplementedError()
194 |             scene.T_WC = T_WC
195 | 
196 |     pcd = pointcloud_from_depth(
197 |         depth, fx=K['fx'], fy=K['fy'], cx=K['ux'], cy=K['uy']
198 |     )
199 |     nonnan = ~np.isnan(depth)
200 |     geom = trimesh.PointCloud(vertices=pcd[nonnan], colors=rgb[nonnan])
201 |     # XYZ->RGB, Z is blue
202 |     if options.dataset == 'VaryLighting':
203 |         axis = trimesh.creation.axis(0.005, origin_color=(0, 0, 0))
204 |     elif options.dataset in ['TUM_RGBD', 'ScanNet']:
205 |         axis = trimesh.creation.axis(0.01, origin_color=(0, 0, 0))
206 |     else:
207 |         raise NotImplementedError()
208 | 
209 |     # two view: keyframe - live frames visualization
210 |     if scene.vo_type == 'keyframe' and scene.two_view:
211 |         if scene.init:
212 |             scene.add_geometry(geom, transform=scene.T_WK, geom_name='key')
213 |             scene.add_geometry(geom, transform=T_WC, geom_name='live')
214 |             scene.add_geometry(axis, transform=T_WC, geom_name='camera_view')
215 |         else:
216 |             # after the first view, delete the old live view and add new live view
217 |             scene.delete_geometry('live')
218 |             scene.delete_geometry('camera_view')
219 |             scene.add_geometry(geom, transform=T_WC, geom_name='live')
220 |             scene.add_geometry(axis, transform=T_WC, geom_name='camera_view')
221 | 
222 |     else:
223 |         scene.add_geometry(geom, transform=T_WC)
224 | 
225 |         # draw camera trajectory
226 |         trs = np.copy(T_WC[0:3, 3:4]).transpose()
227 |         cam = trimesh.PointCloud(vertices=trs, colors=[255, 0, 0])
228 |         scene.add_geometry(cam)
229 | 
230 |         scene.add_geometry(axis, transform=T_WC)
231 | 
232 |         if scene.last_pose is not None:
233 |             poses_seg = np.stack((scene.last_pose, trs), axis=1)
234 |             cam_seg = trimesh.load_path(poses_seg)
235 |             scene.add_geometry(cam_seg)
236 |         scene.last_pose = trs
237 | 
238 |     # A kind of current camera view, but a bit far away to see whole scene.
239 |     scene.camera.resolution = (rgb.shape[1], rgb.shape[0])
240 |     scene.camera.focal = (K['fx'], K['fy'])
241 |     if dataset.realscene:
242 |         if options.save_img:
243 |             if scene.vo_type == 'keyframe':
244 |                 # T_see = np.array([
245 |                 #     [1.000, 0.000, 0.000, 0.2],
246 |                 #     [0.000, 0.866, 0.500, -0.7],
247 |                 #     [0.000, -0.500, 0.866, -0.7],
248 |                 #     [0.000, 0.000, 0.000, 1.0],
249 |                 # ])
250 |                 T_see = np.array([
251 |                     [1.000, 0.000, 0.000, 0.2],
252 |                     [0.000, 0.866, 0.500, -0.7],
253 |                     [0.000, -0.500, 0.866, -0.8],
254 |                     [0.000, 0.000, 0.000, 1.0],
255 |                 ])
256 | 
257 |                 # T_see = np.array([
258 |                 #     [1.000, 0.000, 0.000, 0.2],
259 |                 #     [0.000, 0.985, 0.174, -0.3],
260 |                 #     [0.000, -0.174, 0.985, -0.6],
261 |                 #     [0.000, 0.000, 0.000, 1.0],
262 |                 # ])
263 |                 scene.camera_transform = camera_transform(
264 |                     np.matmul(scene.T_WK, T_see)
265 |                 )
266 |             else:
267 |                 # if scene.index < 140:
268 |                 #     T_see = np.array([
269 |                 #         [1.000, 0.000, 0.000, 0.2],
270 |                 #         [0.000, 0.866, 0.500, -2.0],
271 |                 #         [0.000, -0.500, 0.866, -2.0],
272 |                 #         [0.000, 0.000, 0.000, 1.0],
273 |                 #     ])
274 |                 #     scene.camera_transform = camera_transform(
275 |                 #         np.matmul(scene.T_WC, T_see)
276 |                 #     )
277 |                 pass
278 |         else:
279 |             # adjust which transformation use to set the see pose
280 |             if scene.vo_type == 'keyframe':
281 |                 T_see = np.array([
282 |                     [1.000, 0.000, 0.000, 0.2],
283 |                     [0.000, 0.866, 0.500, -0.7],
284 |                     [0.000, -0.500, 0.866, -0.8],
285 |                     [0.000, 0.000, 0.000, 1.0],
286 |                 ])
287 |                 # T_see = np.array([
288 |                 #     [1.000, 0.000, 0.000, 0.2],
289 |                 #     [0.000, 0.985, 0.174, -0.3],
290 |                 #     [0.000, -0.174, 0.985, -0.6],
291 |                 #     [0.000, 0.000, 0.000, 1.0],
292 |                 # ])
293 |                 # T_see = np.array([
294 |                 #     [1.000, 0.000, 0.000, 0.2],
295 |                 #     [0.000, 0.985, 0.174, -0.3],
296 |                 #     [0.000, -0.174, 0.985, -0.6],
297 |                 #     [0.000, 0.000, 0.000, 1.0],
298 |                 # ])
299 |                 # T_see = np.array([
300 |                 #     [1.000, 0.000, 0.000, 0.2],
301 |                 #     [0.000, 0.866, 0.500, -0.8],
302 |                 #     [0.000, -0.500, 0.866, -0.8],
303 |                 #     [0.000, 0.000, 0.000, 1.0],
304 |                 # ])
305 | 
306 |                 scene.camera_transform = camera_transform(
307 |                     np.matmul(scene.T_WK, T_see)
308 |                 )
309 |     else:
310 |         scene.camera.transform = T_WC @ tf.translation_matrix([0, 0, 2.5])
311 | 
312 |     # if scene.index == scene.init_idx + 1:
313 |     #     input()
314 |     print(scene.index)
315 |     scene.index += 1  # scene.track_config['frame_step']
316 |     # print("<=================================")
317 |     if options.save_img:
318 |         return
319 | 
320 | 
321 | def main(options):
322 | 
323 |     if options.dataset == 'TUM_RGBD':
324 |         sequence_dir = 'rgbd_dataset_freiburg1_desk'
325 |         np_loader = load_data('TUM_RGBD', keyframes=[1, ], load_type='test',
326 |                               select_trajectory=sequence_dir,
327 |                               truncate_depth=True,
328 |                               options=options,
329 |                               load_numpy=False)
330 |     elif options.dataset == 'VaryLighting':
331 |         np_loader = load_data('VaryLighting', keyframes=[1, ], load_type='test',
332 |                               select_trajectory='scene17_demo',  # 'l_scene3',
333 |                               truncate_depth=True,
334 |                               load_numpy=False,
335 |                               pair=options.vo_type,
336 |                               )
337 |     elif options.dataset == 'ScanNet':
338 |         np_loader = load_data('ScanNet', keyframes=[1, ], load_type='test',
339 |                               select_trajectory='scene0593_00',
340 |                               truncate_depth=True,
341 |                               load_numpy=False,
342 |                               options=options,
343 |                               )
344 | 
345 |     scene = trimesh.Scene()
346 |     scene.dataloader = np_loader
347 |     scene.dataloader.realscene = True
348 |     # total_batch_size = options.batch_per_gpu * torch.cuda.device_count()
349 | 
350 |     # keyframes = [int(x) for x in options.keyframes.split(',')]
351 |     # if options.dataset in ['BundleFusion', 'TUM_RGBD']:
352 |     #     obj_has_mask = False
353 |     # else:
354 |     #     obj_has_mask = True
355 | 
356 |     # eval_loaders = create_eval_loaders(options, options.eval_set,
357 |     #                                    [1,], total_batch_size, options.trajectory)
358 | 
359 |     tracker = select_method(options.vo, options)
360 |     scene.network = tracker
361 | 
362 |     scene.index = 0  # config['start_frame']  # starting frame e.g. 60
363 |     scene.video_id = None
364 |     scene.last_pose = None
365 |     scene.is_gt_tracking = options.gt_tracker
366 |     scene.init = False    # True only for the first frame
367 |     scene.is_play = True  # immediately start playing when called
368 |     scene.vo_type = options.vo_type
369 |     scene.two_view = options.two_view
370 |     scene.options = options
371 | 
372 |     callback(scene)
373 |     window = trimesh.viewer.SceneViewer(
374 |         scene, callback=callback, start_loop=False, resolution=(1080, 720)
375 |     )
376 | 
377 |     @window.event
378 |     def on_key_press(symbol, modifiers):
379 |         if modifiers == 0:
380 |             if symbol == pyglet.window.key.P:
381 |                 scene.is_play = not scene.is_play
382 | 
383 |     print('Press P key to pause/resume.')
384 | 
385 |     if not options.save_img:
386 |         # scene.show()
387 |         pyglet.app.run()
388 |     else:
389 |         # import pyrender
390 |         # scene_pyrender = pyrender.Scene.from_trimesh(scene)
391 |         # renderer = pyrender.OffscreenRenderer(viewport_height=480, viewport_width=640, point_size=1)
392 |         # rgb, depth = renderer.render(scene_pyrender)
393 | 
394 |         if options.dataset == 'VaryLighting':
395 |             output_dir = osp.join(np_loader.seq_names[0], 'kf_vo', options.vo)
396 |         else:
397 |             output_dir = os.path.join(
398 |                 '/home/binbin/Pictures', 'kf_vo', options.vo)
399 |         check_directory(output_dir + '/*.png')
400 |         for frame_id in range(len(scene.dataloader)):
401 |             # scene.save_image()
402 |             callback(scene)
403 |             file_name = os.path.join(output_dir, 'render', str(
404 |                 scene.index-1).zfill(5) + '.png')
405 |             check_directory(file_name)
406 |             with open(file_name, "wb") as f:
407 |                 f.write(scene.save_image())
408 |                 f.close()
409 | 
410 | 
411 | if __name__ == "__main__":
412 |     parser = argparse.ArgumentParser(description="Evaluate the network")
413 |     config.add_basics_config(parser)
414 |     config.add_test_basics_config(parser)
415 |     config.add_tracking_config(parser)
416 |     config.add_vo_config(parser)
417 | 
418 |     options = parser.parse_args()
419 |     # to save visualization: --save_img and --vis_feat
420 |     print('---------------------------------------')
421 |     main(options)
422 | 


--------------------------------------------------------------------------------
/code/experiments/select_method.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | A wrapper to select different methods for comparison
  3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  4 | # SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | 
  7 | import torch
  8 | from models.LeastSquareTracking import LeastSquareTracking
  9 | 
 10 | # load comparison methods
 11 | def select_method(method_name, options):
 12 |     assert method_name in ['DeepIC', 'RGB', 'ICP', 'RGB+ICP', 'feature', 'feature_icp']
 13 |     if method_name == 'DeepIC':
 14 |         print('==>Load DeepIC method')
 15 |         if options.dataset == 'MovingObjects3D':
 16 |             deepIC_checkpoint = '/media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/logs/MovingObjects3D/124/cvpr124_ConvRGBD2_MultiScale2w_Direct-ResVol_MovingObjects3D_obj_False_uCh_1_None_rmT_False_fCh_1_average_iP_identity_mH_None_wICP_False_s_None_lr_0.0005_batch_64/checkpoint_epoch29.pth.tar'
 17 |         else:
 18 |             deepIC_checkpoint = '/media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/code/trained_models/TUM_RGBD_ABC_final.pth.tar'
 19 |         deepIC = LeastSquareTracking(
 20 |             encoder_name='ConvRGBD2',
 21 |             direction='inverse',
 22 |             max_iter_per_pyr=3,
 23 |             mEst_type='MultiScale2w',
 24 |             # options=options,
 25 |             solver_type='Direct-ResVol',
 26 |             feature_channel=1,
 27 |             feature_extract='average',
 28 |             uncertainty_type='None',
 29 |             combine_ICP=False,
 30 |             scaler='None',
 31 |             init_pose_type='identity',
 32 |             options=options,
 33 |         )
 34 | 
 35 |         if torch.cuda.is_available(): deepIC.cuda()
 36 | 
 37 |         # check whether it is a single checkpoint or a directory
 38 |         deepIC.load_state_dict(torch.load(deepIC_checkpoint)['state_dict'])
 39 |         deepIC.eval()
 40 |         return deepIC
 41 | 
 42 |     if method_name == 'RGB':
 43 |         print('==>Load RGB method')
 44 |         rgb_tracker = LeastSquareTracking(
 45 |             encoder_name='RGB',
 46 |             combine_ICP=False,
 47 |             feature_channel=1,
 48 |             uncertainty_channel=1,
 49 |             # feature_extract='conv',
 50 |             uncertainty_type='None',
 51 |             scaler='None',
 52 |             direction='inverse',
 53 |             max_iter_per_pyr=options.max_iter_per_pyr,
 54 |             mEst_type='None',
 55 |             solver_type='Direct-Nodamping',
 56 |             init_pose_type='identity',
 57 |             options=options,
 58 |         )
 59 |         if torch.cuda.is_available(): rgb_tracker.cuda()
 60 |         rgb_tracker.eval()
 61 |         return rgb_tracker
 62 | 
 63 |     if method_name == 'ICP':
 64 |         print('==>Load ICP method')
 65 |         icp_tracker = LeastSquareTracking(
 66 |             encoder_name='ICP',
 67 |             combine_ICP=False,
 68 |             # feature_channel=1,
 69 |             # uncertainty_channel=1,
 70 |             # feature_extract='conv',
 71 |             uncertainty_type='ICP',
 72 |             scaler='None',
 73 |             direction='inverse',
 74 |             max_iter_per_pyr=options.max_iter_per_pyr,
 75 |             mEst_type='None',
 76 |             solver_type='Direct-Nodamping',
 77 |             init_pose_type='identity',
 78 |             options=options,
 79 |         )
 80 |         if torch.cuda.is_available(): icp_tracker.cuda()
 81 |         icp_tracker.eval()
 82 |         return icp_tracker
 83 | 
 84 |     if method_name == 'RGB+ICP':
 85 |         print('==>Load RGB+ICP method')
 86 |         rgbd_tracker = LeastSquareTracking(
 87 |             encoder_name='RGB',
 88 |             combine_ICP=True,
 89 |             # feature_channel=1,
 90 |             uncertainty_channel=1,
 91 |             # feature_extract='conv',
 92 |             uncertainty_type='identity',
 93 |             scaler='None',
 94 |             direction='inverse',
 95 |             max_iter_per_pyr=options.max_iter_per_pyr,
 96 |             mEst_type='None',
 97 |             solver_type='Direct-Nodamping',
 98 |             init_pose_type='identity',
 99 |             remove_tru_sigma=False,
100 |             scale_scaler=0.2,
101 |             options=options,
102 |         )
103 |         if torch.cuda.is_available(): rgbd_tracker.cuda()
104 |         rgbd_tracker.eval()
105 |         return rgbd_tracker
106 | 
107 |     if method_name == 'feature':
108 |         # train_utils.load_checkpoint_test(options)
109 |         #
110 |         # net = ICtracking.LeastSquareTracking(
111 |         #     encoder_name=options.encoder_name,
112 |         #     uncertainty_type=options.uncertainty,
113 |         #     direction=options.direction,
114 |         #     max_iter_per_pyr=options.max_iter_per_pyr,
115 |         #     mEst_type=options.mestimator,
116 |         #     options=options,
117 |         #     solver_type=options.solver,
118 |         #     no_weight_sharing=options.no_weight_sharing)
119 |         #
120 |         # if torch.cuda.is_available(): net.cuda()
121 |         # net.eval()
122 |         #
123 |         # # check whether it is a single checkpoint or a directory
124 |         # net.load_state_dict(torch.load(options.checkpoint)['state_dict'])
125 |         # method_list['trained_method'] = net
126 |         # train_utils.load_checkpoint_test(options)
127 |         print('==>Load our feature-metric method')
128 |         net = LeastSquareTracking(
129 |             encoder_name=options.encoder_name,
130 |             uncertainty_type=options.uncertainty,
131 |             direction=options.direction,
132 |             max_iter_per_pyr=options.max_iter_per_pyr,
133 |             mEst_type=options.mestimator,
134 |             options=options,
135 |             solver_type=options.solver,
136 |             combine_ICP=False,
137 |             no_weight_sharing=options.no_weight_sharing)
138 | 
139 |         if torch.cuda.is_available(): net.cuda()
140 | 
141 |         # check whether it is a single checkpoint or a directory
142 |         if options.checkpoint == '':
143 |             if options.dataset in ['TUM_RGBD', 'VaryLighting']:
144 |                 checkpoint = '/media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/logs/TUM_RGBD/check_change/check4_ConvRGBD2_None_Direct-Nodamping_dataset_TUM_RGBD_obj_False_laplacian_uncerCh_1_featCh_8_conv_initPose_sfm_net_multiHypo_prob_fuse_uncer_prop_False_lr_0.0005_batch_64/checkpoint_epoch29.pth.tar'
145 |             else:
146 |                 raise NotImplementedError()
147 |             net.load_state_dict(torch.load(checkpoint)['state_dict'])
148 |         else:
149 |             net.load_state_dict(torch.load(options.checkpoint)['state_dict'])
150 |         net.eval()
151 |         return net
152 | 
153 |     if method_name == 'feature_icp':
154 |         print('==>Load our feature-metric+ICP method')
155 |         feature_icp = LeastSquareTracking(
156 |             encoder_name=options.encoder_name,
157 |             uncertainty_type=options.uncertainty,
158 |             direction=options.direction,
159 |             max_iter_per_pyr=options.max_iter_per_pyr,
160 |             mEst_type=options.mestimator,
161 |             options=options,
162 |             solver_type=options.solver,
163 |             combine_ICP=True,
164 |             scale_scaler=options.scale_icp,
165 |             no_weight_sharing=options.no_weight_sharing)
166 | 
167 |         if torch.cuda.is_available(): feature_icp.cuda()
168 | 
169 |         # check whether it is a single checkpoint or a directory
170 |         if options.checkpoint == '':
171 |             if options.dataset in ['TUM_RGBD', 'VaryLighting']:
172 |                 checkpoint = '/media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/logs/TUM_RGBD/finetune/finetune_vl_icp_ConvRGBD2_None_Direct-Nodamping_TUM_RGBD_obj_False_uCh_1_laplacian_rmT_True_fCh_8_conv_iP_sfm_net_mH_prob_fuse_wICP_True_s_None_lr_0.0005_batch_64/checkpoint_epoch40.pth.tar'
173 |             else:
174 |                 raise NotImplementedError()
175 |             feature_icp.load_state_dict(torch.load(checkpoint)['state_dict'])
176 |         else:
177 |             feature_icp.load_state_dict(torch.load(options.checkpoint)['state_dict'])
178 |         feature_icp.eval()
179 |         return feature_icp
180 |     else:
181 |         raise NotImplementedError("unsupported test tracker: check argument of --tracker again")


--------------------------------------------------------------------------------
/code/experiments/warping_objects.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | Experiments to warp objects for visualization
  3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  4 | # SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | 
  7 | import argparse, pickle
  8 | import os.path as osp
  9 | import cv2
 10 | import numpy as np
 11 | from tqdm import tqdm
 12 | import torch
 13 | import torch.utils.data as data
 14 | 
 15 | import config
 16 | import models.geometry as geometry
 17 | from Logger import check_directory
 18 | from train_utils import check_cuda
 19 | from data.dataloader import load_data, MOVING_OBJECTS_3D
 20 | from experiments.select_method import select_method
 21 | from tools import display
 22 | 
 23 | def compute_pose(dataloader, tracker, k, tracker_name, use_gt_pose):
 24 |     count_base = 0
 25 |     total_frames = len(dataloader.dataset)
 26 |     progress = tqdm(dataloader, ncols=100,
 27 |         # desc = 'evaluate deeper inverse compositional algorithm {:}'.format(eval_name),
 28 |         total= len(dataloader))
 29 |     outputs = {
 30 |         'R_est': np.zeros((total_frames, 3, 3)),
 31 |         't_est': np.zeros((total_frames, 3)),
 32 |         'seq_idx': np.zeros((total_frames)),
 33 |         'frame0': np.zeros((total_frames)),
 34 |         'frame1': np.zeros((total_frames)),
 35 |     }
 36 |     # computing pose
 37 |     for idx, batch in enumerate(progress):
 38 |         color0, color1, depth0, depth1, transform, K, mask0, mask1, names = check_cuda(batch)
 39 |         B, C, H, W = color0.shape
 40 |         if use_gt_pose:
 41 |             R_gt, t_gt = transform[:, :3, :3], transform[:, :3, 3]
 42 |             Rt = [R_gt, t_gt]
 43 |         else:
 44 |             with torch.no_grad():
 45 |                 if options.obj_only:
 46 |                     output = tracker.forward(color0, color1, depth0, depth1, K,
 47 |                                              obj_mask0=mask0, obj_mask1=mask1,
 48 |                                              )
 49 |                 else:
 50 |                     output = tracker.forward(color0, color1, depth0, depth1, K,
 51 |                                              index=idx,
 52 |                                              )
 53 |             Rt = output
 54 |         R, t = Rt
 55 |         outputs['R_est'][count_base:count_base + B] = R.cpu().numpy()
 56 |         outputs['t_est'][count_base:count_base + B] = t.cpu().numpy()
 57 |         outputs['seq_idx'][count_base:count_base + B] = names['seq_idx'].cpu().numpy()
 58 |         outputs['frame0'][count_base:count_base + B] = names['frame0'].cpu().numpy()
 59 |         outputs['frame1'][count_base:count_base + B] = names['frame1'].cpu().numpy()
 60 |         count_base += B
 61 |     return outputs
 62 | 
 63 | def test_object_warping(options):
 64 |     # loader = MovingObjects3D('', load_type='train', keyframes=[1])
 65 |     assert options.dataset == 'MovingObjects3D'
 66 |     keyframes = [int(x) for x in options.keyframes.split(',')]
 67 |     objects = ['boat', 'motorbike'] if options.object == '' else [options.object]
 68 |     eval_loaders = {}
 69 |     for test_object in objects:
 70 |         for kf in keyframes:
 71 |             np_loader = load_data(options.dataset, keyframes=[kf],
 72 |                                   load_type=options.eval_set,
 73 |                                   select_trajectory=test_object, options=options)
 74 |             eval_loaders['{:}_keyframe_{:}'.
 75 |                 format(test_object, kf)] = data.DataLoader(np_loader,
 76 |                                                            batch_size=int(options.batch_per_gpu),
 77 |                                                            shuffle=False, num_workers = options.cpu_workers)
 78 |     use_gt_pose = options.gt_pose
 79 |     # method_list = {}
 80 |     if not use_gt_pose:
 81 |         tracker = select_method(options.method, options)
 82 |     else:
 83 |         tracker = None
 84 |         options.method = 'gt'
 85 |     method_list = {options.method: tracker}
 86 | 
 87 |     for k, loader in eval_loaders.items():
 88 |         for method_name in method_list:
 89 |         # method = method_list
 90 |             tracker = method_list[method_name]
 91 |             output_dir_method = osp.join(MOVING_OBJECTS_3D, 'visualization', method_name)
 92 |             output_dir = osp.join(output_dir_method, k)
 93 |             output_pkl = output_dir + '/pose.pkl'
 94 |             output_compose_dir = osp.join(output_dir, 'compose')
 95 |             output_input_dir = osp.join(output_dir, 'input')
 96 |             output_residual_dir = osp.join(output_dir, 'res')
 97 |             check_directory(output_pkl)
 98 |             check_directory(output_compose_dir + '/.png')
 99 |             check_directory(output_input_dir + '/.png')
100 |             check_directory(output_residual_dir + '/.png')
101 | 
102 |             if options.recompute or not osp.isfile(output_pkl):
103 |                 info = compute_pose(loader, tracker, k, method_name, use_gt_pose)
104 |                 with open(output_pkl, 'wb') as output:
105 |                     pickle.dump(info, output)
106 |             else:
107 |                 with open(output_pkl, 'rb') as pkl_file:
108 |                     info = pickle.load(pkl_file)
109 |             # info = compute_pose(loader, tracker, k, method_name, use_gt_pose)
110 | 
111 |             # visualize residuals
112 |             loader.dataset.fx_s = 1.0
113 |             loader.dataset.fy_s = 1.0
114 |             progress = tqdm(loader, ncols=100,
115 |                             desc='compute residual for object {:} using {}'.format(k, method_name),
116 |                             total=len(loader))
117 |             count_base = 0
118 |             for idx, batch in enumerate(progress):
119 |                 color0, color1, depth0, depth1, transform, K, mask0, mask1, names = check_cuda(batch)
120 |                 # color0, color1, depth0, depth1, transform, K, mask0, mask1, names = check_cuda(loader.dataset.get_original_size_batch(idx))
121 |                 B, C, H, W = color0.shape
122 |                 invD0 = 1.0 / depth0
123 |                 invD1 = 1.0 / depth1
124 | 
125 |                 R = torch.stack(check_cuda(info['R_est'][count_base:count_base + B]), dim=0).float()
126 |                 t = torch.stack(check_cuda(info['t_est'][count_base:count_base + B]), dim=0).float()
127 |                 Rt = [R, t]
128 |                 # R_gt, t_gt = transform[:, :3, :3], transform[:, :3, 3]
129 |                 # Rt = [R_gt, t_gt]
130 |                 px, py = geometry.generate_xy_grid(B, H, W, K)
131 |                 u_warped, v_warped, inv_z_warped = geometry.batch_warp_inverse_depth(
132 |                     px, py, invD0, Rt, K)
133 |                 x1_1to0 = geometry.warp_features(color1, u_warped, v_warped)
134 |                 crd = torch.cat((u_warped, v_warped), dim=1)
135 |                 occ = geometry.check_occ(inv_z_warped, invD1, crd, DIC_version=True)
136 | 
137 |                 residuals = x1_1to0 - color0  # equation (12)
138 |                 # remove occlusion
139 |                 x1_1to0[occ.expand(B, C, H, W)] = 0
140 | 
141 |                 if mask0 is not None:
142 |                     bg_mask0 = ~mask0
143 |                     res_occ = occ | (bg_mask0.view(B, 1, H, W))
144 |                 else:
145 |                     res_occ = occ
146 |                 residuals[res_occ.expand(B, C, H, W)] = 0
147 |                 residuals = residuals.mean(dim=1, keepdim=True)
148 | 
149 |                 # # for each image
150 |                 for idx in range(B):
151 |                     feat_residual = display.create_mosaic([color0[idx:idx+1], color1[idx:idx+1], x1_1to0[idx:idx+1], residuals[idx:idx+1]],
152 |                                                           cmap=['NORMAL', 'NORMAL', 'NORMAL', cv2.COLORMAP_JET],
153 |                                                           order='CHW')
154 |                     input0 = feat_residual[0:H, 0:W, :].copy()
155 |                     input1 = feat_residual[0:H, W:, :].copy()
156 |                     warped = feat_residual[H:, 0:W, :].copy()
157 |                     res = feat_residual[H:, W:, :].copy()
158 |                     obj_mask0 = mask0[idx:idx+1].squeeze().cpu().numpy().astype(np.uint8)*255
159 |                     obj_mask1 = mask1[idx:idx+1].squeeze().cpu().numpy().astype(np.uint8)*255
160 |                     contours0, hierarchy0 = cv2.findContours(obj_mask0, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
161 |                     contours1, hierarchy1 = cv2.findContours(obj_mask1, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
162 | 
163 |                     input0 = cv2.drawContours(input0, contours0, -1, (0, 0, 255), 1)
164 |                     input1 = cv2.drawContours(input1, contours0, -1, (0, 0, 255), 1)
165 |                     input1 = cv2.drawContours(input1, contours1, -1, (0, 255, 0), 1)
166 |                     overlay = cv2.drawContours(warped, contours0, -1, (0, 0, 255), 1)
167 |                     overlay = cv2.drawContours(overlay, contours1, -1, (0, 255, 0), 1)
168 | 
169 |                     # visualization for debugging
170 |                     if options.save_img:
171 |                         idx_in_batch=count_base+idx
172 |                         seq_idx = int(info['seq_idx'][idx_in_batch])
173 |                         idx0 = int(info['frame0'][idx_in_batch])
174 |                         idx1 = int(info['frame1'][idx_in_batch])
175 |                         index = "_" + str.zfill(str(idx_in_batch), 5) + '.png'
176 |                         image_name = osp.join(output_compose_dir, 'compose'+index)
177 |                         cv2.imwrite(image_name, feat_residual)
178 |                         overlay_name = osp.join(output_residual_dir, 'overlay'+index)
179 |                         input_name = osp.join(output_input_dir, 'input0'+index)
180 |                         cv2.imwrite(overlay_name, overlay)
181 |                         cv2.imwrite(input_name, input0)
182 |                         cv2.imwrite(input_name.replace('input0', "input1"), input1)
183 | 
184 |                         pair_dir = osp.join(output_dir, 'sequence',
185 |                                             'seq' + str(seq_idx) + "_" + str(idx0) + "_" + str(idx1),
186 |                                             )
187 |                         check_directory(pair_dir + '/.png')
188 |                         cv2.imwrite(overlay_name.replace('overlay', "residual"), res)
189 |                         cv2.imwrite(pair_dir+"/overlay.png", overlay)
190 |                         cv2.imwrite(pair_dir+"/input0.png", input0)
191 |                         cv2.imwrite(pair_dir+"/input1.png", input1)
192 |                         cv2.imwrite(pair_dir+"/residual.png", res)
193 |                     else:
194 |                         cv2.imshow("overlay", overlay)
195 |                         cv2.imshow("input0", input0)
196 |                         cv2.imshow("input1", input1)
197 | 
198 |                         window_name = "feature-metric residuals"
199 |                         cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
200 |                         cv2.imshow(window_name, feat_residual)
201 |                         # image_name = osp.join(output_dir, 'residual'+str(idx)+'.png')
202 |                         # cv2.imwrite(image_name, feat_residual)
203 |                         cv2.waitKey(0)
204 |                 count_base += B
205 | 
206 | 
207 | if __name__ == '__main__':
208 | 
209 |     parser = argparse.ArgumentParser(description="Evaluate the network")
210 |     config.add_basics_config(parser)
211 |     config.add_tracking_config(parser)
212 |     config.add_object_config(parser)
213 |     options = parser.parse_args()
214 | 
215 |     print('---------------------------------------')
216 |     test_object_warping(options)
217 | 
218 | 


--------------------------------------------------------------------------------
/code/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/models/__init__.py


--------------------------------------------------------------------------------
/code/models/criterions.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Some training criterions tested in the experiments
  3 | 
  4 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  5 | # SPDX-License-Identifier: BSD-3-Clause
  6 | 
  7 | @author: Zhaoyang Lv
  8 | @date: March, 2019
  9 | """
 10 | 
 11 | from __future__ import absolute_import
 12 | from __future__ import division
 13 | from __future__ import print_function
 14 | from __future__ import unicode_literals
 15 | 
 16 | import torch
 17 | import torch.nn as nn
 18 | import torch.nn.functional as func
 19 | import models.geometry as geo
 20 | from models.algorithms import invH, lev_mar_H
 21 | 
 22 | 
 23 | def EPE3D_loss(input_flow, target_flow, invalid=None):
 24 |     """
 25 |     :param the estimated optical / scene flow
 26 |     :param the ground truth / target optical / scene flow
 27 |     :param the invalid mask, the mask has value 1 for all areas that are invalid
 28 |     """
 29 |     epe_map = torch.norm(target_flow-input_flow,p=2,dim=1)
 30 |     B = epe_map.shape[0]
 31 | 
 32 |     invalid_flow = (target_flow != target_flow) # check Nan same as torch.isnan
 33 | 
 34 |     mask = (invalid_flow[:,0,:,:] | invalid_flow[:,1,:,:] | invalid_flow[:,2,:,:])
 35 |     if invalid is not None:
 36 |         mask = mask | (invalid.view(mask.shape) > 0)
 37 | 
 38 |     epes = []
 39 |     for idx in range(B):
 40 |         epe_sample = epe_map[idx][~mask[idx].data]
 41 |         if len(epe_sample) == 0:
 42 |             epes.append(torch.zeros(()).type_as(input_flow))
 43 |         else:
 44 |             epes.append(epe_sample.mean())
 45 | 
 46 |     return torch.stack(epes)
 47 | 
 48 | 
 49 | def RPE(R, t):
 50 |     """ Calcualte the relative pose error 
 51 |     (a batch version of the RPE error defined in TUM RGBD SLAM TUM dataset)
 52 |     :param relative rotation
 53 |     :param relative translation
 54 |     """
 55 |     angle_error = geo.batch_mat2angle(R)
 56 |     trans_error = torch.norm(t, p=2, dim=1) 
 57 |     return angle_error, trans_error
 58 | 
 59 | 
 60 | def compute_RPE_uncertainty(R_est, t_est, R_gt, t_gt, inv_var):
 61 |     loss = 0
 62 |     if R_est.dim() > 3: # training time [batch, num_poses, rot_row, rot_col]
 63 | 
 64 |         for idx in range(R_est.shape[1]):
 65 |             dR = geo.batch_mat2twist(R_gt.detach().contiguous()) - geo.batch_mat2twist(R_est[:,idx])
 66 |             dt = t_gt.detach() - t_est[:,idx]
 67 |             dKsi = torch.cat([dR, dt], dim=-1).unsqueeze(dim=-1)
 68 |             Hessian = lev_mar_H(inv_var[:,idx])
 69 |             sigma_ksi = invH(Hessian)
 70 |             det_var = torch.det(sigma_ksi)
 71 |             # clamp
 72 |             det_var = det_var.clamp(min=1e-9)
 73 |             weighted_error = torch.bmm(dKsi.transpose(1, 2), torch.bmm(inv_var[:,idx], dKsi)).squeeze()
 74 |             regularization = torch.log(1e-6 + det_var)
 75 |             loss += (weighted_error + regularization).sum()
 76 |     return loss
 77 | 
 78 | 
 79 | def compute_RPE_loss(R_est, t_est, R_gt, t_gt):
 80 |     """
 81 |     :param estimated rotation matrix Bx3x3
 82 |     :param estimated translation vector Bx3
 83 |     :param ground truth rotation matrix Bx3x3
 84 |     :param ground truth translation vector Bx3
 85 |     """
 86 |     B=R_est.shape[0]
 87 |     if R_est.dim() > 3:  # training time [batch, num_poses, rot_row, rot_col]
 88 |         angle_error = 0
 89 |         trans_error = 0
 90 |         for idx in range(R_est.shape[1]):
 91 |             dR, dt = geo.batch_Rt_between(R_est[:, idx], t_est[:, idx], R_gt, t_gt)
 92 |             angle_error_i, trans_error_i = RPE(dR, dt)
 93 |             angle_error += angle_error_i.norm(p=2).sum()
 94 |             trans_error += trans_error_i.norm(p=2).sum()
 95 |     else:
 96 |         dR, dt = geo.batch_Rt_between(R_est, t_est, R_gt, t_gt)
 97 |         angle_error, trans_error = RPE(dR, dt)
 98 |     return angle_error, trans_error
 99 | 
100 | 
101 | def compute_RT_EPE_loss(R_est, t_est, R_gt, t_gt, depth0, K, invalid=None):
102 |     """ Compute the epe point error of rotation & translation
103 |     :param estimated rotation matrix Bx3x3
104 |     :param estimated translation vector Bx3
105 |     :param ground truth rotation matrix Bx3x3
106 |     :param ground truth translation vector Bx3
107 |     :param reference depth image,
108 |     :param camera intrinsic
109 |     """
110 | 
111 |     loss = 0
112 |     if R_est.dim() > 3:  # training time [batch, num_poses, rot_row, rot_col]
113 |         rH, rW = 60, 80  # we train the algorithm using a downsized input, (since the size of the input is not super important at training time)
114 | 
115 |         B, C, H, W = depth0.shape
116 |         rdepth = func.interpolate(depth0, size=(rH, rW), mode='bilinear')
117 |         rinvalid = func.interpolate(invalid.float(), size=(rH, rW), mode='bilinear')
118 |         rK = K.clone()
119 |         rK[:, 0] *= float(rW) / W
120 |         rK[:, 1] *= float(rH) / H
121 |         rK[:, 2] *= float(rW) / W
122 |         rK[:, 3] *= float(rH) / H
123 |         xyz = geo.batch_inverse_project(rdepth, rK)
124 |         flow_gt = geo.batch_transform_xyz(xyz, R_gt, t_gt, get_Jacobian=False)
125 | 
126 |         for idx in range(R_est.shape[1]):
127 |             flow_est = geo.batch_transform_xyz(xyz, R_est[:, idx], t_est[:, idx], get_Jacobian=False)
128 |             loss += EPE3D_loss(flow_est, flow_gt.detach(), invalid=rinvalid)  # * (1<<idx) scaling does not help that much
129 |     else:
130 |         xyz = geo.batch_inverse_project(depth0, K)
131 |         flow_gt = geo.batch_transform_xyz(xyz, R_gt, t_gt, get_Jacobian=False)
132 | 
133 |         flow_est = geo.batch_transform_xyz(xyz, R_est, t_est, get_Jacobian=False)
134 |         loss = EPE3D_loss(flow_est, flow_gt, invalid=invalid)
135 | 
136 |     return loss
137 | 
138 | 
139 | def UEPE3D_loss(input_flow, target_flow, variance, uncer_loss_type, invalid=None):
140 |     """
141 |     :param the estimated optical / scene flow
142 |     :param the ground truth / target optical / scene flow
143 |     :param the invalid mask, the mask has value 1 for all areas that are invalid
144 |     """
145 |     if uncer_loss_type is None:
146 |         epe_map = torch.norm(target_flow-input_flow,p=2,dim=1)
147 |     else:
148 |         assert type(variance) != None
149 |         deltaP = target_flow - input_flow
150 |         o_epe_map = torch.norm(deltaP, p=2, dim=1)
151 |         B,C,H,W = deltaP.shape
152 |         # dimension permutation (B,3,H,W) -> (B*H*w,3,1)
153 |         deltaP = deltaP.view(B,3,-1).permute(0,2,1).contiguous().view(-1,3).unsqueeze(dim=-1)
154 |         dim_ind = True
155 |         if uncer_loss_type == 'gaussian':
156 |             # if assume each dimension independent
157 |             if dim_ind:
158 |                 diag_mask = torch.eye(3).view(1, 3, 3).type_as(variance)
159 |                 variance = torch.clamp(variance, min=1e-3)
160 |                 variance = diag_mask * variance
161 | 
162 |             # inv_var = invH(variance)
163 |             inv_var = torch.inverse(variance)
164 |             weighted_error = torch.bmm(deltaP.transpose(1, 2), torch.bmm(inv_var, deltaP)).squeeze()
165 |             if dim_ind:
166 |                 regularization = variance.diagonal(dim1=1, dim2=2).log().sum(dim=1)
167 |             else:
168 |                 det_var = torch.det(variance)
169 |                 # make it numerical stable
170 |                 det_var = torch.clamp(det_var, min=1e-3)
171 |                 regularization = torch.log(det_var)
172 |             epe_map = weighted_error + regularization
173 |         elif uncer_loss_type == 'laplacian':
174 |             raise NotImplementedError("the bessel function needs to be implemented")
175 |             sigma = torch.sqrt(variance + 1e-7)
176 |             weighted_error = torch.abs(deltaP) / (sigma + 1e-7)
177 |             regularization = torch.log(torch.det(sigma))
178 |             epe_map = weighted_error + regularization
179 |         else:
180 |             raise NotImplementedError()
181 |         epe_map = epe_map.view(B,H,W)
182 |     B = epe_map.shape[0]
183 | 
184 |     invalid_flow = (target_flow != target_flow) # check Nan same as torch.isnan
185 | 
186 |     mask = (invalid_flow[:,0,:,:] | invalid_flow[:,1,:,:] | invalid_flow[:,2,:,:])
187 |     if invalid is not None:
188 |         mask = mask | (invalid.view(mask.shape) > 0)
189 | 
190 |     epes = []
191 |     o_epes = []
192 |     for idx in range(B):
193 |         epe_sample = epe_map[idx][~mask[idx].data]
194 |         if len(epe_sample) == 0:
195 |             epes.append(torch.zeros(()).type_as(input_flow))
196 |         else:
197 |             epes.append(epe_sample.mean())
198 | 
199 |         if uncer_loss_type is not None:
200 |             o_epe_sample = o_epe_map[idx][~mask[idx].data]
201 |             if len(o_epe_sample) == 0:
202 |                 o_epes.append(torch.zeros(()).type_as(input_flow))
203 |             else:
204 |                 o_epes.append(o_epe_sample.mean())
205 |     if uncer_loss_type is not None:
206 |         return torch.stack(epes), torch.stack(o_epes)
207 |     else:
208 |         return torch.stack(epes)
209 | 
210 | 
211 | def compute_RT_EPE_uncertainty_loss(R_est, t_est, R_gt, t_gt, depth0, K, sigma_ksi, uncertainty_type, invalid=None):
212 |     """ Compute the epe point error of rotation & translation
213 |     :param estimated rotation matrix BxNx3x3
214 |     :param estimated translation vector BxNx3
215 |     :param ground truth rotation matrix Bx3x3
216 |     :param ground truth translation vector Bx3
217 |     :param reference depth image, 
218 |     :param camera intrinsic 
219 |     """
220 |     
221 |     loss = 0
222 |     epe = 0
223 |     assert R_est.dim() > 3 # training time [batch, num_poses, rot_row, rot_col]
224 |     rH, rW = 60, 80 # we train the algorithm using a downsized input, (since the size of the input is not super important at training time)
225 | 
226 |     B,C,H,W = depth0.shape
227 |     rdepth = func.interpolate(depth0, size=(rH, rW), mode='bilinear')
228 |     rinvalid = func.interpolate(invalid.float(), size=(rH,rW), mode='bilinear')
229 |     rK = K.clone()
230 |     rK[:,0] *= float(rW) / W
231 |     rK[:,1] *= float(rH) / H
232 |     rK[:,2] *= float(rW) / W
233 |     rK[:,3] *= float(rH) / H
234 |     xyz = geo.batch_inverse_project(rdepth, rK)
235 |     flow_gt = geo.batch_transform_xyz(xyz, R_gt, t_gt, get_Jacobian=False)
236 | 
237 |     for idx in range(R_est.shape[1]):
238 |         flow_est, J_flow = geo.batch_transform_xyz(xyz, R_est[:,idx], t_est[:,idx], get_Jacobian=True)
239 |         # uncertainty propagation: J*sigma*J^T
240 |         sigma_ksai_level = sigma_ksi[:,idx:idx+1,:,:].repeat(1,rH*rW,1,1).view(-1,6,6)
241 |         J_sigmaKsi = torch.bmm(J_flow, sigma_ksai_level)
242 |         sigma_deltaP = torch.bmm(J_sigmaKsi, torch.transpose(J_flow, 1, 2))
243 |         loss_l, epe_l = UEPE3D_loss(flow_est, flow_gt.detach(), variance=sigma_deltaP, uncer_loss_type=uncertainty_type, invalid=rinvalid,)
244 |         loss += loss_l
245 |         epe += epe_l
246 | 
247 |     return loss, epe
248 | 
249 | 


--------------------------------------------------------------------------------
/code/models/submodules.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Submodules to build up CNN
  3 | 
  4 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  5 | # SPDX-License-Identifier: BSD-3-Clause
  6 | 
  7 | @author: Zhaoyang Lv
  8 | @date: March, 2019
  9 | """
 10 | 
 11 | from __future__ import print_function
 12 | 
 13 | import torch.nn as nn
 14 | import torch
 15 | import numpy as np
 16 | 
 17 | from torch.nn import init
 18 | from torchvision import transforms
 19 | 
 20 | def color_normalize(color):
 21 |     rgb_mean = torch.Tensor([0.4914, 0.4822, 0.4465]).type_as(color)
 22 |     rgb_std = torch.Tensor([0.2023, 0.1994, 0.2010]).type_as(color)
 23 |     return (color - rgb_mean.view(1,3,1,1)) / rgb_std.view(1,3,1,1)
 24 | 
 25 | def convLayer(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, dilation=1, bias=False):
 26 |     """ A wrapper of convolution-batchnorm-ReLU module
 27 |     """
 28 |     if batchNorm:
 29 |         return nn.Sequential(
 30 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2 + dilation-1, dilation=dilation, bias=bias),
 31 |             nn.BatchNorm2d(out_planes),
 32 |             #nn.LeakyReLU(0.1,inplace=True) # deprecated 
 33 |             nn.ELU(inplace=True)
 34 |         )
 35 |     else:
 36 |         return nn.Sequential(
 37 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2 + dilation-1, dilation=dilation, bias=True),
 38 |             #nn.LeakyReLU(0.1,inplace=True) # deprecated
 39 |             nn.ELU(inplace=True)
 40 |         )
 41 | 
 42 | def convLayer1d(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, dilation=1, bias=False):
 43 |     """ A wrapper of convolution-batchnorm-ReLU module
 44 |     """
 45 |     if batchNorm:
 46 |         return nn.Sequential(
 47 |             nn.Conv1d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2 + dilation-1, dilation=dilation, bias=bias),
 48 |             nn.BatchNorm1d(out_planes),
 49 |             nn.ELU(inplace=True)
 50 |             # nn.ReLU(inplace=True)
 51 |         )
 52 |     else:
 53 |         return nn.Sequential(
 54 |             nn.Conv1d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2 + dilation-1, dilation=dilation, bias=True),
 55 |             nn.ELU(inplace=True)
 56 |             # nn.ReLU(inplace=True)
 57 |         )
 58 | 
 59 | def fcLayer(in_planes, out_planes, bias=True):
 60 |     return nn.Sequential(
 61 |         nn.Linear(in_planes, out_planes, bias),
 62 |         nn.ReLU(inplace=True)
 63 |     )
 64 | 
 65 | def initialize_weights(modules, method='xavier'):
 66 |     for m in modules:
 67 |         if isinstance(m, nn.Conv2d):
 68 |             if m.bias is not None:
 69 |                 m.bias.data.zero_()
 70 |             if method == 'xavier':
 71 |                 init.xavier_uniform_(m.weight)
 72 |             elif method == 'kaiming':
 73 |                 init.kaiming_uniform_(m.weight)
 74 | 
 75 |         if isinstance(m, nn.ConvTranspose2d):
 76 |             if m.bias is not None:
 77 |                 m.bias.data.zero_()
 78 |             if method == 'xavier':
 79 |                 init.xavier_uniform_(m.weight)
 80 |             elif method == 'kaiming':
 81 |                 init.kaiming_uniform_(m.weight)
 82 |                 
 83 | class ListModule(nn.Module):
 84 |     """ The implementation of a list of modules from
 85 |     https://discuss.pytorch.org/t/list-of-nn-module-in-a-nn-module/219/2
 86 |     """
 87 |     def __init__(self, *args):
 88 |         super(ListModule, self).__init__()
 89 |         idx = 0
 90 |         for module in args:
 91 |             self.add_module(str(idx), module)
 92 |             idx += 1
 93 | 
 94 |     def __getitem__(self, idx):
 95 |         if idx < 0 or idx >= len(self._modules):
 96 |             raise IndexError('index {} is out of range'.format(idx))
 97 |         it = iter(self._modules.values())
 98 |         for i in range(idx):
 99 |             next(it)
100 |         return next(it)
101 | 
102 |     def __iter__(self):
103 |         return iter(self._modules.values())
104 | 
105 |     def __len__(self):
106 |         return len(self._modules)
107 | 


--------------------------------------------------------------------------------
/code/run_example.py:
--------------------------------------------------------------------------------
  1 | """
  2 | An extremely simple example to show how to run the algorithm
  3 | 
  4 | @author: Zhaoyang Lv
  5 | @date: May 2019
  6 | """
  7 | 
  8 | import argparse 
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as func
 13 | 
 14 | import models.LeastSquareTracking as ICtracking
 15 | 
 16 | from tqdm import tqdm
 17 | from torch.utils.data import DataLoader
 18 | from train_utils import check_cuda
 19 | from data.SimpleLoader import SimpleLoader
 20 | 
 21 | def resize(img0, img1, depth0, depth1, K_in, resizeH, resizeW): 
 22 |     H, W = img0.shape[-2:]
 23 | 
 24 |     I0 = func.interpolate(img0, (resizeH,resizeW), mode='bilinear', align_corners=True)
 25 |     I1 = func.interpolate(img1, (resizeH,resizeW), mode='bilinear', align_corners=True)
 26 |     D0 = func.interpolate(depth0, (resizeH,resizeW), mode='nearest')
 27 |     D1 = func.interpolate(depth1, (resizeH,resizeW), mode='nearest')
 28 | 
 29 |     sx = resizeH / H
 30 |     sy = resizeW / W
 31 | 
 32 |     K = K_in.clone()
 33 |     K[:,0] *= sx
 34 |     K[:,1] *= sy
 35 |     K[:,2] *= sx
 36 |     K[:,3] *= sy
 37 | 
 38 |     return I0, I1, D0, D1, K
 39 | 
 40 | def run_inference(dataloader, net):
 41 | 
 42 |     progress = tqdm(dataloader, ncols=100, 
 43 |         desc = 'Run the deeper inverse compositional algorithm', 
 44 |         total= len(dataloader))
 45 | 
 46 |     net.eval()
 47 | 
 48 |     for idx, batch, in enumerate(progress): 
 49 | 
 50 |         color0, color1, depth0, depth1, K = check_cuda(batch)
 51 | 
 52 |         # downsize the input to 120*160, it is the size of data when the algorthm is trained
 53 |         C0, C1, D0, D1, K = resize(color0, color1, depth0, depth1, K, resizeH = 120, resizeW=160)
 54 | 
 55 |         with torch.no_grad():
 56 |             R, t = net.forward(C0, C1, D0, D1, K)
 57 | 
 58 |         print('Rotation: ')
 59 |         print(R)
 60 |         print('translation: ')
 61 |         print(t)
 62 | 
 63 | if __name__ == '__main__': 
 64 | 
 65 |     parser = argparse.ArgumentParser(description='Run the network inference example.')
 66 | 
 67 |     parser.add_argument('--checkpoint', default='trained_models/TUM_RGBD_ABC_final.pth.tar', 
 68 |         type=str, help='the path to the pre-trained checkpoint.')
 69 | 
 70 |     parser.add_argument('--color_dir', default='data/data_examples/TUM/color',
 71 |                         help='the directory of color images')
 72 |     parser.add_argument('--depth_dir', default='data/data_examples/TUM/depth',
 73 |                         help='the directory of depth images')
 74 |     parser.add_argument('--intrinsic', default='525.0,525.0,319.5,239.5', 
 75 |         help='Simple pin-hole camera intrinsics, input in the format (fx, fy, cx, cy)')
 76 | 
 77 |     config = parser.parse_args()
 78 |     
 79 |     K = [float(x) for x in config.intrinsic.split(',')]
 80 | 
 81 |     simple_loader = SimpleLoader(config.color_dir, config.depth_dir, K)
 82 |     simple_loader = DataLoader(simple_loader, batch_size=1, shuffle=False)
 83 | 
 84 |     net = ICtracking.LeastSquareTracking(
 85 |         encoder_name    = 'ConvRGBD2',
 86 |         max_iter_per_pyr= 3,
 87 |         mEst_type       = 'MultiScale2w',
 88 |         solver_type     = 'Direct-ResVol',
 89 |         uncertainty_type='None',
 90 |         direction='inverse',
 91 |         # options=options,
 92 |     )
 93 | 
 94 |     # net = ICtracking.LeastSquareTracking(
 95 |     #     encoder_name    = 'RGB',
 96 |     #     max_iter_per_pyr= 50,
 97 |     #     mEst_type       = 'None',
 98 |     #     solver_type     = 'Direct-Nodamping',
 99 |     #     uncertainty_type='None',
100 |     #     direction='forward')
101 | 
102 |     if torch.cuda.is_available(): 
103 |         net.cuda()
104 |     
105 |     net.load_state_dict(torch.load(config.checkpoint)['state_dict'])
106 | 
107 |     run_inference(simple_loader, net)


--------------------------------------------------------------------------------
/code/timers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A timing utility
 3 | 
 4 | @author: Zhaoyang Lv
 5 | @date: March 2019
 6 | """
 7 | 
 8 | from __future__ import absolute_import
 9 | from __future__ import division
10 | from __future__ import print_function
11 | from __future__ import unicode_literals
12 | 
13 | import time
14 | import numpy as np
15 | from collections import defaultdict
16 | 
17 | class Timer(object):
18 | 
19 |     def __init__(self):
20 |         self.reset()
21 | 
22 |     def tic(self):
23 |         self.start_time = time.time()
24 | 
25 |     def toc(self, average=True):
26 |         self.diff = time.time() - self.start_time
27 |         self.total_time += self.diff
28 |         self.calls += 1
29 | 
30 |     def total(self):
31 |         """ return the total amount of time """
32 |         return self.total_time
33 | 
34 |     def avg(self):
35 |         """ return the average amount of time """
36 |         return self.total_time / float(self.calls)
37 | 
38 |     def reset(self):
39 |         self.total_time = 0.
40 |         self.calls = 0
41 |         self.start_time = 0.
42 |         self.diff = 0.
43 | 
44 | class Timers(object):
45 | 
46 |     def __init__(self):
47 |         self.timers = defaultdict(Timer)
48 | 
49 |     def tic(self, key):
50 |         self.timers[key].tic()
51 | 
52 |     def toc(self, key):
53 |         self.timers[key].toc()
54 | 
55 |     def print(self, key=None):
56 |         if key is None:
57 |             # print all time
58 |             for k, v in self.timers.items():
59 |                 print("Average time for {:}: {:}".format(k, v.avg()))
60 |         else:
61 |             print("Average time for {:}: {:}".format(key, self.timers[key].avg()))
62 | 
63 |     def get_avg(self, key):
64 |         return self.timers[key].avg()
65 | 


--------------------------------------------------------------------------------
/code/tools/ICP.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | An implementation of ICP odometry using Open3D library for comparison in the paper
  3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  4 | # SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | 
  7 | import open3d as o3d
  8 | import numpy as np
  9 | import torch
 10 | import copy
 11 | import cv2
 12 | 
 13 | class ICP_Odometry:
 14 | 
 15 |     def __init__(self, mode='Point2Plane'):
 16 |         self.mode = mode
 17 |         if mode == 'Point2Plane':
 18 |             print("Using Point-to-plane ICP")
 19 |         elif mode == 'Point2Point':
 20 |             print("Using Point-to-point ICP")
 21 |         elif mode == "ColorICP":
 22 |             print("using ColorICP")
 23 |         elif mode == 'Iter_Point2Plane':
 24 |             print("Using iterative Point-to-plane ICP")
 25 |         elif mode == "Iter_ColorICP":
 26 |             print("using iterative ColorICP")
 27 |         else:
 28 |             raise NotImplementedError()
 29 | 
 30 |     def set_K(self, K, width, height):
 31 |         fx, fy, cx, cy = K
 32 |         K = o3d.camera.PinholeCameraIntrinsic(width, height, fx, fy, cx, cy)
 33 |         return K
 34 | 
 35 |     def batch_track(self, batch_rgb0, batch_dpt0, batch_rgb1, batch_dpt1, batch_K,
 36 |                     batch_objmask0=None, batch_objmask1=None, vis_pcd=False):
 37 |         assert batch_dpt0.ndim == 4
 38 |         B = batch_dpt0.shape[0]
 39 |         batch_R = []
 40 |         batch_t = []
 41 |         if batch_objmask0 is not None:
 42 |             batch_dpt0 = batch_dpt0 * batch_objmask0
 43 |         if batch_objmask1 is not None:
 44 |             batch_dpt1 = batch_dpt1 * batch_objmask1
 45 |         for i in range(B):
 46 |             rgb0 = batch_rgb0[i].permute(1, 2, 0).cpu().numpy()
 47 |             dpt0 = batch_dpt0[i].permute(1,2,0).cpu().numpy()
 48 |             rgb1 = batch_rgb1[i].permute(1, 2, 0).cpu().numpy()
 49 |             dpt1 = batch_dpt1[i].permute(1,2,0).cpu().numpy()
 50 |             K = batch_K[i].cpu().numpy().tolist()
 51 |             pose10 = self.track(rgb0, dpt0, rgb1, dpt1, K)
 52 |             batch_R.append(pose10[0])
 53 |             batch_t.append(pose10[1])
 54 | 
 55 |         batch_R = torch.tensor(batch_R).type_as(batch_K)
 56 |         batch_t = torch.tensor(batch_t).type_as(batch_K)
 57 |         return batch_R, batch_t
 58 | 
 59 |     def draw_registration_result(self, source, target, transformation, name='Open3D'):
 60 |         source_temp = copy.deepcopy(source)
 61 |         target_temp = copy.deepcopy(target)
 62 |         source_temp.paint_uniform_color([1, 0.706, 0])
 63 |         target_temp.paint_uniform_color([0, 0.651, 0.929])
 64 |         source_temp.transform(transformation)
 65 |         o3d.visualization.draw_geometries([source_temp, target_temp], window_name=name)
 66 | 
 67 |     def track(self, rgb0, dpt0, rgb1, dpt1, K, vis_pcd=True, odo_init=None):
 68 |         H, W, _ = dpt0.shape
 69 |         intrinsic = self.set_K(K, H, W)
 70 |         # pcd_0 = o3d.geometry.PointCloud.create_from_depth_image(o3d.geometry.Image(dpt0),
 71 |         #                                                         intrinsic=intrinsic,
 72 |         #                                                         depth_scale=1.0,
 73 |         #                                                         depth_trunc=5.0)
 74 |         # pcd_1 = o3d.geometry.PointCloud.create_from_depth_image(o3d.geometry.Image(dpt1),
 75 |         #                                                         intrinsic=intrinsic,
 76 |         #                                                         depth_scale=1.0,
 77 |         #                                                         depth_trunc=5.0)
 78 |         rgbd_0 = o3d.geometry.RGBDImage.create_from_color_and_depth(
 79 |             o3d.geometry.Image(rgb0), o3d.geometry.Image(dpt0), depth_scale=1, depth_trunc=4.0)
 80 |         rgbd_1 = o3d.geometry.RGBDImage.create_from_color_and_depth(
 81 |             o3d.geometry.Image(rgb1), o3d.geometry.Image(dpt1), depth_scale=1, depth_trunc=4.0)
 82 |         pcd_0 = o3d.geometry.PointCloud.create_from_rgbd_image(
 83 |             rgbd_0, intrinsic)
 84 |         pcd_1 = o3d.geometry.PointCloud.create_from_rgbd_image(
 85 |             rgbd_1, intrinsic)
 86 | 
 87 |         if odo_init is None:
 88 |             odo_init = np.identity(4)
 89 | 
 90 |         # point-to-point ICP
 91 |         if self.mode == 'Point2Point':
 92 |             reg_p2p = o3d.registration.registration_icp(
 93 |                 pcd_0, pcd_1, 0.02, odo_init,
 94 |                 o3d.registration.TransformationEstimationPointToPoint())
 95 |             T_10 = reg_p2p.transformation
 96 | 
 97 |         # point-to-plane ICP
 98 |         elif self.mode == 'Point2Plane':
 99 |             # radius = 0.01
100 |             # source_down = pcd_0.voxel_down_sample(radius)
101 |             # target_down = pcd_1.voxel_down_sample(radius)
102 |             #
103 |             # # print("3-2. Estimate normal.")
104 |             # source_down.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
105 |             # target_down.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
106 |             # reg_p2l = o3d.registration.registration_icp(source_down, target_down, 0.2, odo_init,
107 |             #                                             o3d.registration.TransformationEstimationPointToPlane(),
108 |             #                                             o3d.registration.ICPConvergenceCriteria(relative_fitness=1e-6,
109 |             #                                                                                     relative_rmse=1e-6,
110 |             #                                                                                     max_iteration=50)
111 |             #
112 |             #                                             )
113 |             iter = 10
114 |             pcd_0.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(
115 |                 radius=0.1, max_nn=30))
116 |             pcd_1.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(
117 |                 radius=0.1, max_nn=30))
118 |             reg_p2l = o3d.registration.registration_icp(
119 |                 pcd_0, pcd_1, 0.4, odo_init,
120 |                 o3d.registration.TransformationEstimationPointToPlane(),
121 |                 o3d.registration.ICPConvergenceCriteria(relative_fitness=1e-6,
122 |                                                         relative_rmse=1e-6,
123 |                                                         max_iteration=iter)
124 |             )
125 | 
126 |             T_10 = reg_p2l.transformation
127 | 
128 |         elif self.mode == 'ColorICP':
129 |             pcd_0.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(
130 |                 radius=0.1, max_nn=30))
131 |             pcd_1.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(
132 |                 radius=0.1, max_nn=30))
133 |             reg_p2l = o3d.registration.registration_colored_icp(
134 |                 pcd_0, pcd_1, 0.02, odo_init,)
135 |             T_10 = reg_p2l.transformation
136 | 
137 |         elif self.mode in ['Iter_Point2Plane', 'Iter_ColorICP']:
138 |             voxel_radius = [0.04, 0.02, 0.01]
139 |             max_iter = [50, 30, 14]
140 |             T_10 = odo_init
141 |             for scale in range(3):
142 |                 iter = max_iter[scale]
143 |                 radius = voxel_radius[scale]
144 | 
145 |                 pcd0_down = pcd_0.voxel_down_sample(radius)
146 |                 pcd1_down = pcd_1.voxel_down_sample(radius)
147 | 
148 |                 pcd0_down.estimate_normals(
149 |                     o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))
150 |                 pcd1_down.estimate_normals(
151 |                     o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))
152 | 
153 |                 # point-to-plane ICP
154 |                 if self.mode == 'Iter_Point2Plane':
155 |                     result_icp = o3d.registration.registration_icp(
156 |                         pcd0_down, pcd1_down, radius, T_10,
157 |                         o3d.registration.TransformationEstimationPointToPlane(),
158 |                         o3d.registration.ICPConvergenceCriteria(relative_fitness=1e-6,
159 |                                                                 relative_rmse=1e-6,
160 |                                                                 max_iteration=iter)
161 |                     )
162 |                 elif self.mode == 'Iter_ColorICP':
163 |                     # colored ICP
164 |                     result_icp = o3d.registration.registration_colored_icp(
165 |                         pcd0_down, pcd1_down, radius*2, T_10,
166 |                         o3d.registration.ICPConvergenceCriteria(relative_fitness=1e-6,
167 |                                                                 relative_rmse=1e-6,
168 |                                                                 max_iteration=iter))
169 |                 else:
170 |                     raise NotImplementedError
171 |                 T_10 = result_icp.transformation
172 |         else:
173 |             raise NotImplementedError()
174 | 
175 |         # T_10 = result_icp.transformation
176 |         trs = T_10[0:3, 3]
177 |         rot = T_10[0:3, 0:3]
178 |         pose10 = [rot, trs]
179 | 
180 |         if (trs > 1).sum():
181 |             print('pose', T_10)
182 | 
183 |             # cv2.imshow('rgb0', rgb0)
184 |             # cv2.imshow('rgb1', rgb1)
185 |             # cv2.waitKey(0)
186 |             # self.draw_registration_result(pcd_0, pcd_1, odo_init, name='init')
187 |             # self.draw_registration_result(pcd_0, pcd_1, T_10, name='aligned')
188 | 
189 |             T_10 = odo_init
190 |             trs = T_10[0:3, 3]
191 |             rot = T_10[0:3, 0:3]
192 |             pose10 = [rot, trs]
193 | 
194 | 
195 |         return pose10


--------------------------------------------------------------------------------
/code/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/tools/__init__.py


--------------------------------------------------------------------------------
/code/tools/display.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | visualisation tool for debugging and demo
  3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  4 | # SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | 
  7 | import cv2
  8 | # cv2.setNumThreads(0)
  9 | 
 10 | import math
 11 | import sys
 12 | import numpy as np
 13 | import torch
 14 | 
 15 | def convert_flow_for_display(flow):
 16 |     """
 17 |     Converts a 2D image (e.g. flow) to bgr
 18 | 
 19 |     :param flow:
 20 |     :type flow: optical flow of size [2, H, W]
 21 |     :return:
 22 |     :rtype:
 23 |     """
 24 | 
 25 |     ang = np.arctan2(flow[1, :, :], flow[0, :, :])
 26 |     ang[ang < 0] += 2 * np.pi
 27 |     ang /= 2 * np.pi
 28 |     mag = np.sqrt(flow[0, :, :] ** 2. + flow[1, :, :] ** 2.)
 29 |     mag = np.clip(mag / (np.percentile(mag, 99) + 1e-6), 0., 1.)
 30 |     hfill_hsv = np.stack([ang * 180, mag * 255, np.ones_like(ang) * 255], 2).astype(np.uint8)
 31 |     flow_rgb = cv2.cvtColor(hfill_hsv, cv2.COLOR_HSV2RGB) / 255
 32 |     return np.transpose(flow_rgb, [2, 0, 1])
 33 | 
 34 | 
 35 | def single_image_tensor_mat(T):  # [1, C, H, W]
 36 |     img_mat = T.squeeze(0).permute(1, 2, 0).numpy()
 37 |     show = cv2.cvtColor(img_mat, cv2.COLOR_BGR2RGB)
 38 |     return show
 39 | 
 40 | 
 41 | def image_to_display(image, cmap=cv2.COLORMAP_JET, order='CHW', normalize=False):
 42 |     """
 43 |     accepts a [1xHxW] or [DxHxW] float image with values in range [0,1]
 44 |     => change it range of (0, 255) for visualization
 45 |     :param image:
 46 |     :type image:
 47 |     :param cmap: cv2.COLORMAP_BONE or cv2.COLORMAP_JET, or NORMAL(no-processing)
 48 |     :type cmap:
 49 |     :param order:
 50 |     :type order:
 51 |     :param normalize: if true, noramalize to 0~1, otherwise clip to 0-1
 52 |     :type normalize:
 53 |     :return: a visiable BGR image in range(0,255), in fault a colored heat map (in JET color map)
 54 |     :rtype: opencv mat [H, W, C]
 55 |     """
 56 |     if order is 'HWC' and len(image.shape) > 2:
 57 |         image = np.rollaxis(image, axis=2)
 58 |         # image = np.moveaxis(image, 2, 0)
 59 |     image = np.squeeze(image)  # 2d or 3d
 60 | 
 61 |     if len(image.shape) == 3 and image.shape[0] == 2:
 62 |         image = convert_flow_for_display(image)
 63 | 
 64 |     if normalize:
 65 |         # handle nan pixels
 66 |         min_intensity = np.nanmin(image)
 67 |         max_intensity = np.nanmax(image)
 68 |         image = (image - min_intensity) / (max_intensity - min_intensity)
 69 |         image = np.uint8(image * 255)
 70 |     else:
 71 |         if image.dtype != np.uint8:
 72 |             image = np.clip(image, 0, 1)
 73 |             image = (image * 255).astype(np.uint8)
 74 |     # image = (image * 1).astype(np.uint8)
 75 | 
 76 |     if image.ndim == 3:
 77 |         if image.shape[0] == 3:
 78 |             image = np.transpose(image, [1, 2, 0])
 79 |         image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
 80 |     elif image.ndim == 2:
 81 |         if cmap == "NORMAL":
 82 |             image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
 83 |         else:
 84 |             image = cv2.applyColorMap(image, cmap)
 85 |     return image
 86 | 
 87 | 
 88 | # image_array needs to be 2d
 89 | def create_mosaic(image_array, cmap=None, points=None, order='CHW', normalize=False):
 90 |     """
 91 |     Stitch array of images into a big concancated images
 92 | 
 93 |     :param image_array: subimages to be displayed
 94 |     :type image_array: Two-dimensional lists (arrays) , if in a stretch 1D lisr, will be stretched back
 95 |                         each element is an image of  [DxHxW]
 96 |     :param cmap: list of color map => common: cv2.COLORMAP_BONE or cv2.COLORMAP_JET or 'NORMAL'
 97 |     :type cmap:
 98 |     :param order:
 99 |     :type order:
100 |     :param normalize: normalize to 0-1 range
101 |     :type normalize: bool
102 |     :return: image to be showed
103 |     :rtype: numpy array
104 |     """
105 |     batch_version = (len(image_array[0].shape) == 4)
106 | 
107 |     if not isinstance(image_array[0], list):  # if image_array is a stretch 1D list
108 |         image_size = math.ceil(math.sqrt(len(image_array)))  # stretch back to 2D list [N by N]
109 |         image_array = [image_array[i:min(i + image_size, len(image_array))]
110 |                        for i in range(0, len(image_array), image_size)]
111 | 
112 |     max_cols = max([len(row) for row in image_array])   # because not every row (1st array) has the same size
113 |     rows = []
114 | 
115 |     if cmap is None:
116 |         cmap = [cv2.COLORMAP_JET]
117 |     elif not isinstance(cmap, list):
118 |         cmap = [cmap]
119 | 
120 |     if not isinstance(normalize, list):
121 |         normalize = [normalize]
122 | 
123 |     if points is not None:
124 |         if not isinstance(points, list):
125 |             points = [points]
126 | 
127 |     i = 0
128 |     for image_row in image_array:
129 |         if len(image_row) == 0:
130 |             continue
131 |         image_row_processed = []
132 |         for image in image_row:
133 |             if torch.is_tensor(image):
134 |                 if batch_version:
135 |                     image = image[0:1, :, :, :]
136 |                 if len(image.shape) == 4:  #[B. C, H, W]
137 |                     image = image.squeeze(0)
138 |                     if order == 'CHW':
139 |                         image = image.permute(1, 2, 0)  # [H, W, C]
140 |                     if image.shape[2] not in(0, 3):  # sum all channel features
141 |                         image = image.sum(dim=2)
142 |                 image = image.cpu().numpy()
143 |             image_colorized = image_to_display(image, cmap[i % len(cmap)],
144 |                                                order,
145 |                                                normalize[i % len(normalize)])
146 |             if points is not None:
147 |                 image_colorized = visualize_matches_on_image(image_colorized, points[i % len(points)])
148 |             image_row_processed.append(image_colorized)
149 |             i += 1
150 |         nimages = len(image_row_processed)
151 |         if nimages < max_cols:  # padding zero(black) images in the empty areas
152 |             image_row_processed += [np.zeros_like(image_row_processed[-1])] * (max_cols - nimages)
153 |         rows.append(np.concatenate(image_row_processed, axis=1))  #horizontally
154 |     return np.concatenate(rows, axis=0)  # vertically
155 | 
156 | 
157 | def visualize_image_features(img_list: list, feat_list: list, feat_cmap=cv2.COLORMAP_WINTER):
158 |     feat_sum_list = [feat[0, :, :, :].sum(dim=0) for feat in feat_list]
159 |     if feat_cmap is not None and feat_cmap is not list:
160 |         feat_cmap = [feat_cmap] * len(feat_sum_list)
161 |     img_cmap = ['NORMAL'] * len(img_list)
162 |     cmap = img_cmap + feat_cmap
163 |     img_show = create_mosaic(img_list + feat_sum_list, cmap=cmap)
164 | 
165 |     return img_show
166 | 
167 | 
168 | def visualize_matches_on_image(image, matches):
169 |     """
170 |     :param image: []
171 |     :type image: Union[torch.Tensor, numpy.ndarray]
172 |     :param matches:
173 |     :type matches: torch.Tensor
174 |     :return:
175 |     :rtype: None
176 |     """
177 |     num_matches = matches.shape[1]
178 |     if torch.is_tensor(image):
179 |         image = image.detach().cpu().numpy()
180 |     if torch.is_tensor(matches):
181 |         matches = matches.detach().cpu().numpy()
182 |     # just for visualization, round it:
183 |     matches = matches.astype(int)
184 |     output = image.copy()
185 |     red = (0, 0, 255)
186 |     alpha = 0.6
187 |     radius = int(image.shape[1] / 64)  # should be 10 when the image width is 640
188 |     for i in range(num_matches):
189 |         image = cv2.circle(image, (matches[0, i], matches[1, i]), radius, red, -1)
190 |     #blend
191 |     output = cv2.addWeighted(image, alpha, output, 1 - alpha, 0, )
192 |     return output
193 | 
194 | 
195 | def visualize_feature_channels(feat_map, rgb=None, points=None, order='CHW', add_ftr_avg=True):
196 |     """
197 |     :param points: points to draw on images
198 |     :type points: torch.Tensor [2, #samples]
199 |     :param feat_map:
200 |     :type feat_map: torch.Tensor [B, H, W, C]
201 |     :param rgb:
202 |     :type rgb: numpy.ndarray  [H, W, C]  or [B, C, H, W]
203 |     :param order: 'HWC' or 'CHW'
204 |     :type order: str
205 |     :return:
206 |     :rtype: numpy.ndarray
207 |     """
208 |     assert len(feat_map.shape) == 4, "feature-map should be a 4-dim tensor"
209 |     assert order in ['HWC', 'CHW']
210 | 
211 |     batch_version = (feat_map.shape[0] != 1)
212 |     feat_map = feat_map.detach()
213 |     if points is not None: points = points.detach()
214 |     if not batch_version:
215 |         feat_map = feat_map.squeeze(dim=0)
216 |         if points is not None: points = points.squeeze()
217 |     else:
218 |         # if in batch, only visualize the 1st feature map
219 |         feat_map = feat_map[0, :, :, :]
220 |         if points is not None: points = points[0, :, :]
221 | 
222 |     if order == 'CHW':
223 |         feat_map = feat_map.permute(1, 2, 0)  # convert to [H, W, C]
224 |     D = feat_map.shape[2]
225 |     feat_map_sum = feat_map.sum(dim=2)
226 | 
227 |     if rgb is not None:
228 |         if torch.is_tensor(rgb) and len(rgb.shape) == 4:
229 |             rgb = rgb.detach()
230 |             if not batch_version:
231 |                 rgb = rgb.squeeze()
232 |             else:
233 |                 # if in batch, only visualize the 1st feature map
234 |                 rgb = rgb[0, :, :, :]
235 |             rgb = rgb.permute(1, 2, 0)  # convert to [H, W, C]
236 |         if add_ftr_avg:
237 |             feat_map_channel_list = [rgb, feat_map_sum]
238 |         else:
239 |             feat_map_channel_list = [rgb]
240 |     else:
241 |         if add_ftr_avg:
242 |             feat_map_channel_list = [feat_map_sum]
243 |         else:
244 |             feat_map_channel_list = []
245 | 
246 |     for d in range(D):
247 |         feat_map_channel = feat_map[:, :, d]
248 |         feat_map_channel_list.append(feat_map_channel)
249 | 
250 |     cmap = [cv2.COLORMAP_JET] * (D + 1)
251 |     if rgb is not None:
252 |         cmap = ['NORMAL'] + cmap
253 |     feature_channels = create_mosaic(feat_map_channel_list, cmap=cmap, points=points, order='HWC', normalize=True)
254 |     return feature_channels
255 | 
256 | 
257 | def normalize_descriptor_channel_wise(res):
258 |     """
259 |     Normalizes the descriptor into RGB color space for each channel
260 |     :param res: numpy.array [H,W,D]
261 |         Output of the network, per-pixel dense descriptor
262 |     :param stats: dict, with fields ['min', 'max', 'mean'], which are used to normalize descriptor
263 |     :return: numpy.array
264 |         normalized descriptor [H,W,D]
265 |     """
266 | 
267 |     # get #channel
268 |     D = np.shape(res)[-1]
269 |     normed_res = np.zeros_like(res)
270 |     eps = 1e-10
271 | 
272 |     for d in range(D):
273 |         res_min = np.min(res[:, :, d])
274 |         res_max = np.max(res[:, :, d])
275 |         scale_factor = res_max - res_min + eps
276 |         normed_res[:, :, d] = (res[:, :, d] - res_min) / scale_factor
277 | 
278 |     return normed_res
279 | 
280 | 
281 | def colorize(hue, lightness, normalize_hue=True, lightness_range=1.0):
282 |     """
283 |     Project images onto input images
284 |     hue is normalized channel&image-wise
285 | 
286 |     :param hue: Features to be visualized
287 |     :type hue: size of [#batch, #channel,  H, W], ith its range is supposed to be [-1.0, 1.0]
288 |     :param lightness: input image (grey)
289 |     :type lightness:  size [#batch, 1,  H, W], its value range is supposed to be [0, 1.0]
290 |     :param normalize_hue: normalize hue to [0, 1]
291 |     :type normalize_hue:
292 |     :param lightness_range:
293 |     :type lightness_range:
294 |     :return: hue overlapped on the lightness
295 |     :rtype: size of [#batch, #channel, 3, H, W]
296 |     """
297 |     # process the input value to be visualisation range
298 |     lightness /= lightness_range
299 | 
300 |     out = np.zeros(list(hue.shape) + [3])  # now size become [#batch, #channel,  H, W, 3]
301 |     if normalize_hue:
302 |         image_num = np.shape(hue)[0]
303 |         normed_hue = np.zeros_like(hue)
304 |         # for i in xrange(image_num):
305 |         #     hue_per_image = hue[i, :, :, :]
306 |         #     hue_per_image = np.transpose(hue_per_image, [1, 2, 0])
307 |         #     normalized_hue_image = normalize_descriptor_channel_wise(hue_per_image)
308 |         #     normalized_hue_image = np.transpose(normalized_hue_image, [2, 0, 1])
309 |         #     normalize_hue[i, :, :, :] = normalized_hue_image
310 |         channel_num = np.shape(hue)[1]
311 |         eps = 1e-10
312 | 
313 |         for i in range(image_num):
314 |             for c in range(channel_num):
315 |                 hue_min = np.min(hue[i, c, :, :])
316 |                 hue_max = np.max(hue[i, c, :, :])
317 |                 scale_factor = hue_max - hue_min + eps
318 |                 normed_hue[i, c, :, :] = (hue[i, c, :, :] - hue_min) / scale_factor
319 |     else:
320 |         normed_hue = np.clip(hue, 0, 1.0) * 0.5 + 0.5
321 |     out[:, :, :, :, 0] = normed_hue * 120.0 / 255.0
322 |     out[:, :, :, :, 1] = (lightness - 0.5) * 0.5 + 0.5
323 |     # out[:, :, :, :, 2] = np.ones(hue.shape) * (np.abs(np.clip(hue, -1.0, 1.0) * 1.0) + 0.0)
324 |     out[:, :, :, :, 2] = np.ones(hue.shape) # * (normed_hue)
325 |     out = np.reshape(out, [-1, hue.shape[3], 3])  # [#batch * #channel * H, W, 3]
326 |     out = cv2.cvtColor((out * 255).astype(np.uint8), cv2.COLOR_HLS2RGB).astype(np.float32) / 255
327 |     out = np.reshape(out, list(hue.shape) + [3])  # [#batch, #channel,  H, W, 3]
328 |     out = np.transpose(out, [0, 1, 4, 2, 3])  # [#batch, #channel, 3, H, W]. this is to meet the create_mosaic function
329 |     return out
330 | 
331 | 
332 | def visualise_frames(mat, name, max_img_visual=None, max_channel_visual=None, step_image=1, step_channel=1,
333 |                      mosaic_save=None):
334 |     """
335 |     visualize batches of images in n-dimsional
336 |     :param mat: images to be showed
337 |     :type mat: numpy array of [#batch, #channel, 3, H, W]
338 |     :param name: opencv window name
339 |     :type name: string
340 |     :param max_img_visual: image number to be showed
341 |     :type max_img_visual: int
342 |     :param max_channel_visual:channel number to be shoed
343 |     :type max_channel_visual: int
344 |     :param step_image: the step to skip image number
345 |     :type step_image: int
346 |     :param step_channel: the step to skip channel number
347 |     :type step_channel: int
348 |     :param mosaic_save: if not none, the directory to save the mosaic image
349 |     :type mosaic_save: string
350 |     :return: mosaic image -> deprecated currently
351 |     :rtype:
352 |     """
353 |     array = list()
354 |     max_img = mat.shape[0] if max_img_visual is None else min(max_img_visual, mat.shape[0])
355 |     max_channel = mat.shape[1] if max_channel_visual is None else min(max_channel_visual, mat.shape[1])
356 |     for i in range(0, max_img, step_image):
357 |         sub = []
358 |         for j in range(0, max_channel, step_channel):
359 |             sub.append(mat[i, j])
360 |         array.append(sub)
361 |     mosaic = create_mosaic(array)
362 |     cv2.namedWindow(name, cv2.WINDOW_NORMAL)
363 |     cv2.imshow(name, mosaic)
364 |     if mosaic is not None:
365 |         cv2.imwrite(mosaic_save, mosaic)
366 |     return mosaic
367 | 
368 | 
369 | 
370 | class Toolbar:
371 |     def reset(self, width, tot, title=None):
372 |         self.width = max(int(min(width, tot)), 1)
373 |         self.tot = int(max(tot, 1))
374 |         self.current = 0
375 |         if title is not None:
376 |             print(title)
377 |         sys.stdout.write("[%s]" % (" " * self.width))
378 |         sys.stdout.flush()
379 |         sys.stdout.write("\b" * (self.width + 1))
380 | 
381 |     def incr(self):
382 |         n = self.current + 1
383 |         if math.floor(n * self.width / self.tot) > math.floor(self.current * self.width / self.tot):
384 |             sys.stdout.write("-")
385 |             sys.stdout.flush()
386 |         self.current = n
387 |         if self.current == self.tot:
388 |             sys.stdout.write("\n")
389 | 


--------------------------------------------------------------------------------
/code/tools/rgbd_odometry.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | An implementation of RGBD odometry using Open3D library for comparison in the paper
  3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  4 | # SPDX-License-Identifier: BSD-3-Clause
  5 | """
  6 | 
  7 | import open3d as o3d
  8 | import numpy as np
  9 | import torch
 10 | import copy
 11 | 
 12 | class RGBDOdometry():
 13 | 
 14 |     def __init__(self, mode='RGBD'):
 15 | 
 16 |         self.odo_opt = None
 17 |         if mode == "RGBD":
 18 |             print("Using RGB-D Odometry")
 19 |             self.odo_opt = o3d.odometry.RGBDOdometryJacobianFromColorTerm()
 20 |         elif mode == "COLOR_ICP":
 21 |             print("Using Hybrid RGB-D Odometry")
 22 |             self.odo_opt = o3d.odometry.RGBDOdometryJacobianFromHybridTerm()
 23 |         else:
 24 |             raise NotImplementedError()
 25 | 
 26 |     def set_K(self, K, width, height):
 27 |         fx, fy, cx, cy = K
 28 |         K = o3d.camera.PinholeCameraIntrinsic(width, height, fx, fy, cx, cy)
 29 |         return K
 30 | 
 31 |     def batch_track(self, batch_rgb0, batch_dpt0, batch_rgb1, batch_dpt1, batch_K,
 32 |                     batch_objmask0=None, batch_objmask1=None, vis_pcd=True):
 33 |         assert batch_rgb0.ndim == 4
 34 |         B = batch_rgb0.shape[0]
 35 |         batch_R = []
 36 |         batch_t = []
 37 |         if batch_objmask0 is not None:
 38 |             batch_dpt0 = batch_dpt0 * batch_objmask0
 39 |         if batch_objmask1 is not None:
 40 |             batch_dpt1 = batch_dpt1 * batch_objmask1
 41 |         for i in range(B):
 42 |             rgb0 = batch_rgb0[i].permute(1,2,0).cpu().numpy()
 43 |             dpt0 = batch_dpt0[i].permute(1,2,0).cpu().numpy()
 44 |             rgb1 = batch_rgb1[i].permute(1,2,0).cpu().numpy()
 45 |             dpt1 = batch_dpt1[i].permute(1,2,0).cpu().numpy()
 46 |             K = batch_K[i].cpu().numpy().tolist()
 47 |             pose10, _ = self.track(rgb0, dpt0, rgb1, dpt1, K)
 48 |             batch_R.append(pose10[0])
 49 |             batch_t.append(pose10[1])
 50 | 
 51 |         batch_R = torch.tensor(batch_R).type_as(batch_K)
 52 |         batch_t = torch.tensor(batch_t).type_as(batch_K)
 53 |         return batch_R, batch_t
 54 | 
 55 |     def draw_registration_result(self, source, target, transformation, name='Open3D'):
 56 |         source_temp = copy.deepcopy(source)
 57 |         target_temp = copy.deepcopy(target)
 58 |         source_temp.paint_uniform_color([1, 0.706, 0])
 59 |         target_temp.paint_uniform_color([0, 0.651, 0.929])
 60 |         source_temp.transform(transformation)
 61 |         o3d.visualization.draw_geometries([source_temp, target_temp], window_name=name)
 62 | 
 63 |     def track(self, rgb0, dpt0, rgb1, dpt1, K, vis_pcd=True, odo_init=None):
 64 |         H, W, _ = rgb0.shape
 65 |         intrinsic = self.set_K(K, H, W)
 66 |         rgbd_0 = o3d.geometry.RGBDImage.create_from_color_and_depth(
 67 |             o3d.geometry.Image(rgb0), o3d.geometry.Image(dpt0), depth_scale=1, depth_trunc=3.0)
 68 |         rgbd_1 = o3d.geometry.RGBDImage.create_from_color_and_depth(
 69 |             o3d.geometry.Image(rgb1), o3d.geometry.Image(dpt1), depth_scale=1, depth_trunc=3.0)
 70 |         if odo_init is None:
 71 |             odo_init = np.identity(4)
 72 |         if vis_pcd:
 73 |             pcd_0 = o3d.geometry.PointCloud.create_from_rgbd_image(
 74 |                 rgbd_0, intrinsic)
 75 |             pcd_1 = o3d.geometry.PointCloud.create_from_rgbd_image(
 76 |                 rgbd_1, intrinsic)
 77 | 
 78 |         # option = o3d.odometry.OdometryOption()
 79 |         option = o3d.odometry.OdometryOption(min_depth=0.01, max_depth_diff=1.0)
 80 |         # print(option)
 81 | 
 82 |         [is_success, T_10, info] = o3d.odometry.compute_rgbd_odometry(
 83 |             rgbd_0, rgbd_1, intrinsic,
 84 |             odo_init, self.odo_opt, option)
 85 | 
 86 |         trs = T_10[0:3, 3]
 87 |         if (trs>1).sum(): #is_success and vis_pcd:
 88 |             print(T_10)
 89 |             print(is_success)
 90 |             # pcd_0 = o3d.geometry.PointCloud.create_from_rgbd_image(
 91 |             #     rgbd_0, intrinsic)
 92 |             # pcd_0.transform(T_10)
 93 |             # o3d.visualization.draw_geometries([pcd_1, pcd_0])
 94 |             self.draw_registration_result(pcd_0, pcd_1, odo_init, 'init')
 95 |             self.draw_registration_result(pcd_0, pcd_1, T_10, 'aligned')
 96 | 
 97 |         trs = T_10[0:3, 3]
 98 |         rot = T_10[0:3, 0:3]
 99 |         pose10 = [rot, trs]
100 | 
101 |         return pose10, is_success


--------------------------------------------------------------------------------
/code/train.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The training script for Deep Probabilistic Feature-metric Tracking,
  3 | 
  4 | # SPDX-FileCopyrightText: 2021 Binbin Xu
  5 | # SPDX-License-Identifier: BSD-3-Clause
  6 | 
  7 | @author: Zhaoyang Lv 
  8 | @date: March 2019
  9 | """
 10 | 
 11 | import os, sys, argparse, time
 12 | import evaluate as eval_utils
 13 | import models.LeastSquareTracking as ICtracking
 14 | import models.criterions as criterions
 15 | import models.geometry as geometry
 16 | import train_utils
 17 | import config
 18 | from data.dataloader import load_data
 19 | from Logger import log_git_revisions_hash
 20 | 
 21 | import torch
 22 | import torch.nn as nn
 23 | import torch.utils.data as data
 24 | 
 25 | from timers import Timers
 26 | from tqdm import tqdm
 27 | 
 28 | 
 29 | class data_prefetcher():
 30 |     def __init__(self, loader):
 31 |         self.loader = iter(loader)
 32 |         self.stream = torch.cuda.Stream()
 33 |         # self.mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).cuda().view(1,3,1,1)
 34 |         # self.std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).cuda().view(1,3,1,1)
 35 |         # With Amp, it isn't necessary to manually convert data to half.
 36 |         # if args.fp16:
 37 |         #     self.mean = self.mean.half()
 38 |         #     self.std = self.std.half()
 39 |         self.preload()
 40 |         #self.next_batch = None
 41 | 
 42 |     def preload(self):
 43 |         try:
 44 |             self.next_batch = next(self.loader)
 45 |         except StopIteration:
 46 |             self.next_batch = None
 47 |             return
 48 |         with torch.cuda.stream(self.stream):
 49 |             for k, val in enumerate(self.next_batch):
 50 |                 if torch.is_tensor(val):
 51 |                     self.next_batch[k] = self.next_batch[k].cuda(non_blocking=True)
 52 | 
 53 |             # With Amp, it isn't necessary to manually convert data to half.
 54 |             # if args.fp16:
 55 |             #     self.next_input = self.next_input.half()
 56 |             # else:
 57 |             #     self.next_input = self.next_input.float()
 58 | 
 59 |     def next(self):
 60 |         torch.cuda.current_stream().wait_stream(self.stream)
 61 |         next_batch = self.next_batch
 62 |         self.preload()
 63 |         return next_batch
 64 | 
 65 | def create_train_eval_loaders(options, eval_type, keyframes, 
 66 |     total_batch_size = 8, 
 67 |     trajectory  = ''):
 68 |     """ create the evaluation loader at different keyframes set-up
 69 |     """
 70 |     eval_loaders = {}
 71 | 
 72 |     for kf in keyframes:
 73 |         if options.image_resize is not None:
 74 |             np_loader = load_data(options.dataset, [kf], eval_type, trajectory,
 75 |                                   image_resize=options.image_resize,
 76 |                                   options=options)
 77 |         else:
 78 |             np_loader = load_data(options.dataset, [kf], eval_type, trajectory, options=options)
 79 |         eval_loaders['{:}_keyframe_{:}'.format(trajectory, kf)] = data.DataLoader(np_loader, 
 80 |             pin_memory=True,
 81 |             batch_size = int(total_batch_size),
 82 |             shuffle = False, num_workers = options.cpu_workers)
 83 |     
 84 |     return eval_loaders
 85 | 
 86 | def train_one_epoch(options, dataloader, net, optim, epoch, logger, objectives,
 87 |     known_mask=False, timers=None):
 88 | 
 89 |     net.train()
 90 | 
 91 |     # prefetcher = data_prefetcher(dataloader)
 92 |     
 93 |     progress = tqdm(dataloader, ncols=100,
 94 |         desc = 'train deeper inverse compositional algorithm #epoch{:}'.format(epoch),
 95 |         total= len(dataloader))
 96 | 
 97 |     epoch_len = len(dataloader)
 98 | 
 99 |     if timers is None: timers_iter = Timers()
100 | 
101 |     if timers: timers.tic('one iteration')
102 |     else: timers_iter.tic('one iteration')
103 | 
104 |     for batch_idx, batch in enumerate(progress):
105 | 
106 |     # batch = prefetcher.next()
107 |     # batch_idx = 0
108 |     # with tqdm(total=epoch_len) as pbar:
109 |     #     while batch is not None:
110 |     #         batch_idx += 1
111 |     #         if batch_idx >= epoch_len:
112 |     #             break
113 |             
114 |             iteration = epoch*epoch_len + batch_idx
115 |             display_dict = {}
116 | 
117 |             optim.zero_grad()
118 | 
119 |             if timers: timers.tic('forward step')
120 | 
121 |             if known_mask: # for dataset that with mask or need mask
122 |                 color0, color1, depth0, depth1, Rt, K, obj_mask0, obj_mask1 = \
123 |                     train_utils.check_cuda(batch[:8])
124 |             else:
125 |                 color0, color1, depth0, depth1, Rt, K = \
126 |                     train_utils.check_cuda(batch[:6])
127 |                 obj_mask0, obj_mask1 = None, None
128 | 
129 |             # Bypass lazy way to bypass invalid pixels. 
130 |             invalid_mask = (depth0 == depth0.min()) | (depth0 == depth0.max())
131 |             if obj_mask0 is not None:
132 |                 invalid_mask = ~obj_mask0 | invalid_mask
133 | 
134 |             if options.train_uncer_prop:
135 |                 if options.obj_only:
136 |                     Rs, ts, sigma_ksi = net.forward(color0, color1, depth0, depth1, K,
137 |                                                     obj_mask0=obj_mask0, obj_mask1=obj_mask1,
138 |                                                     logger=logger,
139 |                                                     vis=options.vis_feat,
140 |                                                     iteration=iteration)[:3]
141 |                 else:
142 |                     Rs, ts, sigma_ksi = net.forward(color0, color1, depth0, depth1, K,
143 |                                                     logger=logger,
144 |                                                     vis=options.vis_feat,
145 |                                                     iteration=iteration)[:3]
146 |             else:
147 |                 if options.obj_only:
148 |                     Rs, ts = net.forward(color0, color1, depth0, depth1, K,
149 |                                         obj_mask0=obj_mask0, obj_mask1=obj_mask1,
150 |                                         logger=logger,
151 |                                         vis=options.vis_feat,
152 |                                         iteration=iteration)[:2]
153 |                 else:
154 |                     Rs, ts = net.forward(color0, color1, depth0, depth1, K,
155 |                                         logger=logger,
156 |                                         vis=options.vis_feat,
157 |                                         iteration=iteration)[:2]
158 | 
159 |             if timers: timers.toc('forward step')
160 |             if timers: timers.tic('calculate loss')
161 | 
162 |             R_gt, t_gt = Rt[:,:3,:3], Rt[:,:3,3]
163 | 
164 |             # assert(flow_loss) # the only loss used for training
165 |             # we want to compute epe anyway
166 |             flow_loss = criterions.compute_RT_EPE_loss
167 | 
168 |             epes3d = flow_loss(Rs, ts, R_gt, t_gt, depth0, K, invalid=invalid_mask).mean() * 1e2
169 |             if 'EPE3D' in objectives:
170 |                 loss = epes3d
171 |             elif 'RPE' in objectives:
172 |                 angle_error, trans_error = criterions.compute_RPE_loss(Rs, ts, R_gt, t_gt)
173 |                 loss = angle_error + trans_error
174 |             elif 'URPE' in objectives:
175 |                 assert options.train_uncer_prop
176 |                 loss = criterions.compute_RPE_uncertainty(Rs, ts, R_gt, t_gt, sigma_ksi)
177 |             elif 'UEPE' in objectives:
178 |                 loss = criterions.compute_RT_EPE_uncertainty_loss(Rs, ts, R_gt, t_gt, depth0, K, sigma_ksi=sigma_ksi, uncertainty_type=options.uncertainty, invalid=invalid_mask)
179 | 
180 |             display_dict['train_epes3d'] = epes3d.item()
181 |             display_dict['train_loss'] = loss.item()
182 | 
183 |             if timers: timers.toc('calculate loss')
184 |             if timers: timers.tic('backward')
185 | 
186 |             loss.backward()
187 | 
188 |             if timers: timers.toc('backward')
189 |             torch.nn.utils.clip_grad_norm_(net.parameters(), 5.0)
190 |             # if options.uncertainty == 'gaussian':
191 |             #     torch.nn.utils.clip_grad_norm_(net.parameters(), 5.0)
192 |             optim.step()
193 | 
194 |             lr = train_utils.get_learning_rate(optim)
195 |             display_dict['lr'] = lr
196 | 
197 |             if timers:
198 |                 timers.toc('one iteration')
199 |                 batch_time = timers.get_avg('one iteration')
200 |                 timers.tic('one iteration')
201 |             else:
202 |                 timers_iter.toc('one iteration')
203 |                 batch_time = timers_iter.get_avg('one iteration')
204 |                 timers_iter.tic('one iteration')
205 | 
206 |             logger.write_to_tensorboard(display_dict, iteration)
207 |             logger.write_to_terminal(display_dict, epoch, batch_idx, epoch_len, batch_time, is_train=True)
208 |             
209 |             # batch = prefetcher.next()
210 |             # pbar.update(1)
211 | 
212 | def train(options):
213 | 
214 |     if options.time:
215 |         timers = Timers()
216 |     else:
217 |         timers = None
218 | 
219 |     total_batch_size = options.batch_per_gpu *  torch.cuda.device_count()
220 | 
221 |     checkpoint = train_utils.load_checkpoint_train(options)
222 | 
223 |     keyframes = [int(x) for x in options.keyframes.split(',')]
224 |     if options.image_resize is not None:
225 |         train_loader = load_data(options.dataset, keyframes, load_type='train',
226 |                                  image_resize=options.image_resize, options=options)
227 |     else:
228 |         train_loader = load_data(options.dataset, keyframes, load_type = 'train', options=options)
229 |     train_loader = data.DataLoader(train_loader,
230 |         batch_size = total_batch_size,
231 |         pin_memory=True,
232 |         shuffle = True, num_workers = options.cpu_workers)
233 |     if options.dataset in ['BundleFusion', 'TUM_RGBD', 'ScanNet']:
234 |         obj_has_mask = False
235 |     else:
236 |         obj_has_mask = True
237 | 
238 |     eval_loaders = create_train_eval_loaders(options, 'validation', keyframes, total_batch_size)
239 | 
240 |     logfile_name = '_'.join([
241 |         options.prefix, # the current test version
242 |         # options.network,
243 |         options.encoder_name,
244 |         options.mestimator,
245 |         options.solver,
246 |         options.dataset,
247 |         'obj', str(options.obj_only),
248 |         'uCh', str(options.uncertainty_channel),
249 |         options.uncertainty,
250 |         'rmT', str(options.remove_tru_sigma),
251 |         # options.direction,
252 |         'fCh', str(options.feature_channel),
253 |         options.feature_extract,
254 |         'iP', options.init_pose,
255 |         'mH', options.multi_hypo,
256 |         # 'resInput', str(options.res_input),
257 |         # 'initScale', str(options.scale_init_pose),
258 |         # 'uncer_prop', str(options.train_uncer_prop),
259 |         'wICP', str(options.combine_ICP),
260 |         's', options.scaler,
261 |         'lr', str(options.lr),
262 |         'batch', str(total_batch_size),
263 |         # 'kf', options.keyframes
264 |         ])
265 | 
266 |     print("Initialize and train the Deep Trust Region Network")
267 |     net = ICtracking.LeastSquareTracking(
268 |         encoder_name    = options.encoder_name,
269 |         uncertainty_type= options.uncertainty,
270 |         direction       = options.direction,
271 |         max_iter_per_pyr= options.max_iter_per_pyr,
272 |         mEst_type       = options.mestimator,
273 |         solver_type     = options.solver,
274 |         options         = options,
275 |         tr_samples      = options.tr_samples,
276 |         add_init_noise  = options.add_init_pose_noise,
277 |         no_weight_sharing = options.no_weight_sharing,
278 |         timers          = timers)
279 | 
280 |     if options.no_weight_sharing:
281 |         logfile_name += '_no_weight_sharing'
282 |     logger = train_utils.initialize_logger(options, logfile_name)
283 |     log_git_revisions_hash(logger.log_dir)
284 |     with open(os.path.join(logger.log_dir,'commandline_args.txt'), 'w') as f:
285 |         f.write('\n'.join(sys.argv[1:]))
286 | 
287 |     if options.checkpoint:
288 |         net.load_state_dict(checkpoint['state_dict'])
289 | 
290 |     if torch.cuda.is_available():
291 |         net.cuda()
292 | 
293 |     net.train()
294 | 
295 |     if torch.cuda.device_count() > 1:
296 |         print("Use", torch.cuda.device_count(), "GPUs for training!")
297 |         # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
298 |         net = nn.DataParallel(net)
299 | 
300 |     train_objective = [options.loss]  # ['EPE3D'] # Note: we don't use RPE for training
301 |     eval_objectives = ['EPE3D', 'RPE']
302 | 
303 |     num_params = train_utils.count_parameters(net)
304 | 
305 |     if num_params < 1:
306 |         print('There is no learnable parameters in this baseline.')
307 |         print('No training. Only one iteration of evaluation')
308 |         no_training = True
309 |     else:
310 |         print('There is a total of {:} learnabled parameters'.format(num_params))
311 |         no_training = False
312 |         optim = train_utils.create_optim(options, net)
313 |         scheduler = torch.optim.lr_scheduler.MultiStepLR(optim,
314 |             milestones=options.lr_decay_epochs,
315 |             gamma=options.lr_decay_ratio)
316 | 
317 |     freq = options.save_checkpoint_freq
318 |     for epoch in range(options.start_epoch, options.epochs):
319 | 
320 |         if epoch % freq == 0:
321 |             checkpoint_name = 'checkpoint_epoch{:d}.pth.tar'.format(epoch)
322 |             print('save {:}'.format(checkpoint_name))
323 |             state_info = {'epoch': epoch, 'num_param': num_params}
324 |             logger.save_checkpoint(net, state_info, filename=checkpoint_name)
325 | 
326 |         if options.no_val is False:
327 |             for k, loader in eval_loaders.items():
328 | 
329 |                 eval_name = '{:}_{:}'.format(options.dataset, k)
330 | 
331 |                 eval_info = eval_utils.evaluate_trust_region(
332 |                     loader, net, eval_objectives, 
333 |                     known_mask  = obj_has_mask, 
334 |                     eval_name   = eval_name,
335 |                     timers      = timers,
336 |                     logger=logger,
337 |                     obj_only=options.obj_only,
338 |                     epoch=epoch,
339 |                     tracker='learning_based',
340 |                 )
341 | 
342 |                 display_dict = {"{:}_epe3d".format(eval_name): eval_info['epes'].mean(), 
343 |                     "{:}_rpe_angular".format(eval_name): eval_info['angular_error'].mean(), 
344 |                     "{:}_rpe_translation".format(eval_name): eval_info['translation_error'].mean()}
345 | 
346 |                 logger.write_to_tensorboard(display_dict, epoch)
347 | 
348 |         if no_training: break
349 | 
350 |         train_one_epoch(options, train_loader, net, optim, epoch, logger,
351 |             train_objective, known_mask=obj_has_mask, timers=timers)
352 | 
353 |         scheduler.step()
354 | 
355 | if __name__ == '__main__':
356 | 
357 |     parser = argparse.ArgumentParser(description='Training the network')
358 | 
359 |     config.add_basics_config(parser)
360 |     config.add_train_basics_config(parser)
361 |     config.add_train_optim_config(parser)
362 |     config.add_train_log_config(parser)
363 |     config.add_train_loss_config(parser)
364 |     config.add_tracking_config(parser)
365 | 
366 |     options = parser.parse_args()
367 | 
368 |     options.start_epoch = 0
369 | 
370 |     print('---------------------------------------')
371 |     print_options = vars(options)
372 |     for key in print_options.keys():
373 |         print(key+': '+str(print_options[key]))
374 |     print('---------------------------------------')
375 |     
376 |     # torch.backends.cudnn.benchmark = True
377 |     torch.manual_seed(1)
378 |     torch.cuda.manual_seed(1)
379 | 
380 |     print('Start training...')
381 |     train(options)
382 | 


--------------------------------------------------------------------------------
/code/train_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The training utility functions
  3 | 
  4 | @author: Zhaoyang Lv
  5 | @date: March 2019
  6 | """
  7 | 
  8 | import os, sys
  9 | from os.path import join
 10 | import torch
 11 | import torch.nn as nn
 12 | 
 13 | def check_cuda(items):
 14 |     if torch.cuda.is_available():
 15 |         to_cuda=[]
 16 |         for x in items:
 17 |             if torch.is_tensor(x):
 18 |                 to_cuda.append(x.cuda())
 19 |             elif type(x).__module__ == 'numpy':
 20 |                 to_cuda.append(torch.from_numpy(x).cuda())
 21 |             else:
 22 |                 to_cuda.append(x)
 23 |         return to_cuda
 24 |         # return [x.cuda() if torch.is_tensor(x) else torch.from_numpy(x).cuda() for x in items]
 25 |     else:
 26 |         return items
 27 | 
 28 | def initialize_logger(opt, logfile_name):
 29 |     """ Initialize the logger for the network
 30 |     """
 31 |     from Logger import TensorBoardLogger
 32 |     log_dir = opt.dataset
 33 |     # if opt.resume_training:
 34 |     #     logfile_name = '_'.join([
 35 |     #         logfile_name,
 36 |     #         'resume'])
 37 | 
 38 |     log_dir = join('logs', log_dir, opt.checkpoint_folder, logfile_name)
 39 |     logger = TensorBoardLogger(log_dir, logfile_name)
 40 |     return logger
 41 | 
 42 | def create_optim(config, network):
 43 |     """ Create the optimizer
 44 |     """
 45 |     if config.opt=='sgd':
 46 |         optim = torch.optim.SGD(network.parameters(),
 47 |                     lr = config.lr,
 48 |                     momentum = 0.9,
 49 |                     weight_decay = 4e-4,
 50 |                     nesterov=False)
 51 |     elif config.opt=='adam' or config.opt=='sgdr':
 52 |         optim = torch.optim.Adam(network.parameters(),
 53 |                     lr = config.lr,
 54 |                     weight_decay = 4e-4
 55 |                     )
 56 |     elif config.opt=='rmsprop':
 57 |         optim = torch.optim.RMSprop(network.parameters(),
 58 |                     lr = config.lr,
 59 |                     weight_decay = 1e-4)
 60 |     else:
 61 |         raise NotImplementedError
 62 | 
 63 |     return optim
 64 | 
 65 | def count_parameters(model):
 66 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
 67 | 
 68 | def load_checkpoint_test(opt):
 69 |     if os.path.isfile(opt.checkpoint):
 70 |         print('=> loading checkpoint '+ opt.checkpoint)
 71 |         checkpoint = torch.load(opt.checkpoint)
 72 |     else:
 73 |         raise Exception('=> no checkpoint found at '+opt.checkpoint)
 74 |     return checkpoint
 75 | 
 76 | def load_checkpoint_train(opt):
 77 |     """ Loading the checking-point file if specified
 78 |     """
 79 |     checkpoint = None
 80 |     if opt.checkpoint:
 81 |         if os.path.isfile(opt.checkpoint):
 82 |             print('=> loading checkpoint '+ opt.checkpoint)
 83 | 
 84 |             checkpoint = torch.load(opt.checkpoint)
 85 |             print('=> loaded checkpoint '+ opt.checkpoint+' epoch %d'%checkpoint['epoch'])
 86 |             if opt.resume_training:
 87 |                 opt.start_epoch = checkpoint['epoch']
 88 |                 print('resume training on the checkpoint')
 89 |             else:
 90 |                 print('start new training...')
 91 | 
 92 |             # This is to configure the module loaded from multi-gpu
 93 |             #if opt.checkpoint_multigpu:
 94 |             #    from collections import OrderedDict
 95 |             #    state_dict_rename = OrderedDict()
 96 |             #    for k, v in checkpoint['state_dict'].items():
 97 |             #        name = k[7:] # remove `module.`
 98 |             #        state_dict_rename[name] = v
 99 |             #    checkpoint['state_dict'] = state_dict_rename
100 |         else:
101 |             print('=> no checkpoint found at '+opt.checkpoint)
102 |     return checkpoint
103 | 
104 | def set_learning_rate(optim, lr):
105 |     """ manual set the learning rate for all specified parameters
106 |     """
107 |     for param_group in optim.param_groups:
108 |         param_group['lr']=lr
109 | 
110 | def get_learning_rate(optim, name=None):
111 |     """ retrieve the current learning rate
112 |     """
113 |     if name is None:
114 |         # assume all the learning rate remains the same
115 |         return optim.param_groups[0]['lr']
116 | 
117 | def adjust_learning_rate_manual(optim, epoch, lr, lr_decay_epochs, lr_decay_ratio):
118 |     """ DIY the learning rate
119 |     """
120 |     for e in lr_decay_epochs:
121 |         if epoch<e: break
122 |         lr *= lr_decay_ratio
123 |     set_learning_rate(optim, lr)
124 |     return lr
125 | 
126 | def resize_input(img0, img1):
127 |     """ Resize a pair of inputs
128 |     """
129 |     B, C, H, W = img0.shape
130 |     resize_H = (H / 64) * 64
131 |     resize_W = (W / 64) * 64
132 |     if H != resize_H or W != resize_W:
133 |         resize_img = nn.Upsample(size=(resize_H, resize_W),mode='bilinear')
134 |         img0 = resize_img(img0)
135 |         img1 = resize_img(img1)
136 | 
137 |     return img0, img1
138 | 
139 | 
140 | """
141 | Deprecated Functions!!!
142 | """
143 | 
144 | def schedule_SGDR(optim, lr_min, lr_max, T_max, current_epoch, snapshots=None):
145 |     """ Use SGD Restart method for learning rate scheduling
146 |     """
147 |     last_epoch = current_epoch % T_max - 1
148 |     if last_epoch == 0:
149 |         print('Restart SGD. Set learning rate to {:}'.format(lr_max))
150 |         set_learning_rate(optim, lr_max)
151 | 
152 |     scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, T_max, lr_min, last_epoch)
153 | 
154 |     if snapshots is not None:
155 |         # reload the snapshot using the best model on validation set
156 |         print('Load snapshot')
157 |         pass
158 | 
159 |     return scheduler
160 | 
161 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: base
  2 | channels:
  3 |   - pytorch
  4 |   - defaults
  5 | dependencies:
  6 |   - _libgcc_mutex=0.1=main
  7 |   - asn1crypto=1.2.0=py36_0
  8 |   - backcall=0.1.0=py36_0
  9 |   - blas=1.0=mkl
 10 |   - ca-certificates=2019.10.16=0
 11 |   - certifi=2019.9.11=py36_0
 12 |   - cffi=1.13.2=py36h2e261b9_0
 13 |   - chardet=3.0.4=py36_1003
 14 |   - conda=4.7.12=py36_0
 15 |   - conda-package-handling=1.6.0=py36h7b6447c_0
 16 |   - cryptography=2.8=py36h1ba5d50_0
 17 |   - cudatoolkit=9.2=0
 18 |   - decorator=4.4.1=py_0
 19 |   - freetype=2.9.1=h8a8886c_1
 20 |   - idna=2.8=py36_0
 21 |   - intel-openmp=2019.4=243
 22 |   - ipython=7.9.0=py36h39e3cac_0
 23 |   - ipython_genutils=0.2.0=py36_0
 24 |   - jedi=0.15.1=py36_0
 25 |   - jpeg=9b=h024ee3a_2
 26 |   - libedit=3.1.20181209=hc058e9b_0
 27 |   - libffi=3.2.1=hd88cf55_4
 28 |   - libgcc-ng=9.1.0=hdf63c60_0
 29 |   - libgfortran-ng=7.3.0=hdf63c60_0
 30 |   - libpng=1.6.37=hbc83047_0
 31 |   - libstdcxx-ng=9.1.0=hdf63c60_0
 32 |   - libtiff=4.1.0=h2733197_0
 33 |   - mkl=2019.4=243
 34 |   - mkl-service=2.3.0=py36he904b0f_0
 35 |   - mkl_fft=1.0.15=py36ha843d7b_0
 36 |   - mkl_random=1.1.0=py36hd6b4f25_0
 37 |   - ncurses=6.1=he6710b0_1
 38 |   - ninja=1.9.0=py36hfd86e86_0
 39 |   - numpy-base=1.17.3=py36hde5b4d6_0
 40 |   - olefile=0.46=py36_0
 41 |   - openssl=1.1.1d=h7b6447c_3
 42 |   - parso=0.5.1=py_0
 43 |   - pexpect=4.7.0=py36_0
 44 |   - pickleshare=0.7.5=py36_0
 45 |   - pillow=6.2.1=py36h34e0f95_0
 46 |   - pip=19.3.1=py36_0
 47 |   - prompt_toolkit=2.0.10=py_0
 48 |   - ptyprocess=0.6.0=py36_0
 49 |   - pycosat=0.6.3=py36h14c3975_0
 50 |   - pycparser=2.19=py36_0
 51 |   - pygments=2.4.2=py_0
 52 |   - pyopenssl=19.1.0=py36_0
 53 |   - pysocks=1.7.1=py36_0
 54 |   - python=3.6.9=h265db76_0
 55 |   - pytorch=1.3.1=py3.6_cuda9.2.148_cudnn7.6.3_0
 56 |   - readline=7.0=h7b6447c_5
 57 |   - requests=2.22.0=py36_0
 58 |   - ruamel_yaml=0.15.46=py36h14c3975_0
 59 |   - setuptools=42.0.1=py36_0
 60 |   - six=1.13.0=py36_0
 61 |   - sqlite=3.30.1=h7b6447c_0
 62 |   - tk=8.6.8=hbc83047_0
 63 |   - torchvision=0.4.2=py36_cu92
 64 |   - tqdm=4.39.0=py_0
 65 |   - traitlets=4.3.3=py36_0
 66 |   - urllib3=1.24.2=py36_0
 67 |   - wcwidth=0.1.7=py36_0
 68 |   - wheel=0.33.6=py36_0
 69 |   - xz=5.2.4=h14c3975_4
 70 |   - yaml=0.1.7=had09818_2
 71 |   - zlib=1.2.11=h7b6447c_3
 72 |   - zstd=1.3.7=h0b5b093_0
 73 |   - pip:
 74 |     - absl-py==0.8.1
 75 |     - attrs==19.3.0
 76 |     - autopep8==1.4.4
 77 |     - bleach==3.1.0
 78 |     - cachetools==3.1.1
 79 |     - cython==0.29.14
 80 |     - defusedxml==0.6.0
 81 |     - entrypoints==0.3
 82 |     - future==0.18.2
 83 |     - google-auth==1.7.1
 84 |     - google-auth-oauthlib==0.4.1
 85 |     - grpcio==1.25.0
 86 |     - imageio==2.6.1
 87 |     - importlib-metadata==0.23
 88 |     - ipykernel==5.1.3
 89 |     - ipywidgets==7.5.1
 90 |     - jinja2==2.10.3
 91 |     - joblib==0.14.0
 92 |     - jpeg4py==0.1.4
 93 |     - jsonschema==3.2.0
 94 |     - jupyter==1.0.0
 95 |     - jupyter-client==5.3.4
 96 |     - jupyter-console==6.0.0
 97 |     - jupyter-core==4.6.1
 98 |     - markdown==3.1.1
 99 |     - markupsafe==1.1.1
100 |     - mistune==0.8.4
101 |     - more-itertools==7.2.0
102 |     - nbconvert==5.6.1
103 |     - nbformat==4.4.0
104 |     - networkx==2.4
105 |     - notebook==6.0.2
106 |     - numpy==1.17.4
107 |     - oauthlib==3.1.0
108 |     - open3d==0.9.0.0
109 |     - opencv-contrib-python==4.1.2.30
110 |     - opencv-python==4.1.2.30
111 |     - pandas==0.25.3
112 |     - pandocfilters==1.4.2
113 |     - plyfile==0.7.1
114 |     - prefetch-generator==1.0.1
115 |     - prometheus-client==0.7.1
116 |     - protobuf==3.11.0
117 |     - pyasn1==0.4.8
118 |     - pyasn1-modules==0.2.7
119 |     - pycodestyle==2.5.0
120 |     - pyrsistent==0.15.6
121 |     - pytz==2019.3
122 |     - pywavelets==1.1.1
123 |     - pyyaml==5.1.2
124 |     - pyzmq==18.1.1
125 |     - qtconsole==4.6.0
126 |     - requests-oauthlib==1.3.0
127 |     - rope==0.14.0
128 |     - rsa==4.0
129 |     - scikit-image==0.16.2
130 |     - scikit-learn==0.21.3
131 |     - scipy==1.3.3
132 |     - send2trash==1.5.0
133 |     - sklearn==0.0
134 |     - tensorboard==2.0.2
135 |     - terminado==0.8.3
136 |     - testpath==0.4.4
137 |     - tornado==6.0.3
138 |     - transforms3d==0.3.1
139 |     - webencodings==0.5.1
140 |     - werkzeug==0.16.0
141 |     - widgetsnbextension==3.5.1
142 |     - zipp==0.6.0
143 | prefix: /media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/.anaconda3
144 | 
145 | 


--------------------------------------------------------------------------------
/environment30X.yml:
--------------------------------------------------------------------------------
  1 | name: base
  2 | channels:
  3 |   - pytorch
  4 |   - nvidia
  5 |   - defaults
  6 | dependencies:
  7 |   - _libgcc_mutex=0.1=main
  8 |   - asn1crypto=1.2.0=py36_0
  9 |   - backcall=0.1.0=py36_0
 10 |   - blas=1.0=mkl
 11 |   - bzip2=1.0.8=h7b6447c_0
 12 |   - ca-certificates=2021.10.26=h06a4308_2
 13 |   - certifi=2020.6.20=pyhd3eb1b0_3
 14 |   - cffi=1.13.2=py36h2e261b9_0
 15 |   - chardet=3.0.4=py36_1003
 16 |   - conda=4.7.12=py36_0
 17 |   - conda-package-handling=1.6.0=py36h7b6447c_0
 18 |   - cryptography=2.8=py36h1ba5d50_0
 19 |   - cudatoolkit=11.1.74=h6bb024c_0
 20 |   - dataclasses=0.8=pyh4f3eec9_6
 21 |   - decorator=4.4.1=py_0
 22 |   - ffmpeg=4.2.2=h20bf706_0
 23 |   - freetype=2.9.1=h8a8886c_1
 24 |   - gmp=6.2.1=h2531618_2
 25 |   - gnutls=3.6.15=he1e5248_0
 26 |   - idna=2.8=py36_0
 27 |   - intel-openmp=2019.4=243
 28 |   - ipython=7.9.0=py36h39e3cac_0
 29 |   - ipython_genutils=0.2.0=py36_0
 30 |   - jedi=0.15.1=py36_0
 31 |   - jpeg=9b=h024ee3a_2
 32 |   - lame=3.100=h7b6447c_0
 33 |   - libedit=3.1.20181209=hc058e9b_0
 34 |   - libffi=3.2.1=hd88cf55_4
 35 |   - libgcc-ng=9.1.0=hdf63c60_0
 36 |   - libgfortran-ng=7.3.0=hdf63c60_0
 37 |   - libidn2=2.3.2=h7f8727e_0
 38 |   - libopus=1.3.1=h7b6447c_0
 39 |   - libpng=1.6.37=hbc83047_0
 40 |   - libstdcxx-ng=9.1.0=hdf63c60_0
 41 |   - libtasn1=4.16.0=h27cfd23_0
 42 |   - libtiff=4.1.0=h2733197_0
 43 |   - libunistring=0.9.10=h27cfd23_0
 44 |   - libuv=1.40.0=h7b6447c_0
 45 |   - libvpx=1.7.0=h439df22_0
 46 |   - mkl=2019.4=243
 47 |   - mkl-service=2.3.0=py36he904b0f_0
 48 |   - mkl_fft=1.0.15=py36ha843d7b_0
 49 |   - mkl_random=1.1.0=py36hd6b4f25_0
 50 |   - ncurses=6.1=he6710b0_1
 51 |   - nettle=3.7.3=hbbd107a_1
 52 |   - ninja=1.9.0=py36hfd86e86_0
 53 |   - numpy-base=1.17.3=py36hde5b4d6_0
 54 |   - olefile=0.46=py36_0
 55 |   - openh264=2.1.1=h4ff587b_0
 56 |   - openssl=1.1.1l=h7f8727e_0
 57 |   - parso=0.5.1=py_0
 58 |   - pexpect=4.7.0=py36_0
 59 |   - pickleshare=0.7.5=py36_0
 60 |   - pillow=6.2.1=py36h34e0f95_0
 61 |   - pip=19.3.1=py36_0
 62 |   - prompt_toolkit=2.0.10=py_0
 63 |   - ptyprocess=0.6.0=py36_0
 64 |   - pycosat=0.6.3=py36h14c3975_0
 65 |   - pycparser=2.19=py36_0
 66 |   - pygments=2.4.2=py_0
 67 |   - pyopenssl=19.1.0=py36_0
 68 |   - pysocks=1.7.1=py36_0
 69 |   - python=3.6.9=h265db76_0
 70 |   - pytorch=1.10.1=py3.6_cuda11.1_cudnn8.0.5_0
 71 |   - pytorch-mutex=1.0=cuda
 72 |   - readline=7.0=h7b6447c_5
 73 |   - requests=2.22.0=py36_0
 74 |   - ruamel_yaml=0.15.46=py36h14c3975_0
 75 |   - setuptools=42.0.1=py36_0
 76 |   - six=1.13.0=py36_0
 77 |   - sqlite=3.30.1=h7b6447c_0
 78 |   - tk=8.6.8=hbc83047_0
 79 |   - torchvision=0.11.2=py36_cu111
 80 |   - tqdm=4.39.0=py_0
 81 |   - traitlets=4.3.3=py36_0
 82 |   - typing_extensions=3.10.0.2=pyh06a4308_0
 83 |   - urllib3=1.24.2=py36_0
 84 |   - wcwidth=0.1.7=py36_0
 85 |   - wheel=0.33.6=py36_0
 86 |   - x264=1!157.20191217=h7b6447c_0
 87 |   - xz=5.2.4=h14c3975_4
 88 |   - yaml=0.1.7=had09818_2
 89 |   - zlib=1.2.11=h7b6447c_3
 90 |   - zstd=1.3.7=h0b5b093_0
 91 |   - pip:
 92 |     - absl-py==0.8.1
 93 |     - attrs==19.3.0
 94 |     - autopep8==1.4.4
 95 |     - bleach==3.1.0
 96 |     - cachetools==3.1.1
 97 |     - cython==0.29.14
 98 |     - defusedxml==0.6.0
 99 |     - entrypoints==0.3
100 |     - future==0.18.2
101 |     - google-auth==1.7.1
102 |     - google-auth-oauthlib==0.4.1
103 |     - grpcio==1.25.0
104 |     - imageio==2.6.1
105 |     - importlib-metadata==0.23
106 |     - ipykernel==5.1.3
107 |     - ipywidgets==7.5.1
108 |     - jinja2==2.10.3
109 |     - joblib==0.14.0
110 |     - jpeg4py==0.1.4
111 |     - jsonschema==3.2.0
112 |     - jupyter==1.0.0
113 |     - jupyter-client==5.3.4
114 |     - jupyter-console==6.0.0
115 |     - jupyter-core==4.6.1
116 |     - markdown==3.1.1
117 |     - markupsafe==1.1.1
118 |     - mistune==0.8.4
119 |     - more-itertools==7.2.0
120 |     - nbconvert==5.6.1
121 |     - nbformat==4.4.0
122 |     - networkx==2.4
123 |     - notebook==6.0.2
124 |     - numpy==1.17.4
125 |     - oauthlib==3.1.0
126 |     - open3d==0.9.0.0
127 |     - opencv-contrib-python==4.1.2.30
128 |     - opencv-python==4.1.2.30
129 |     - pandas==0.25.3
130 |     - pandocfilters==1.4.2
131 |     - plyfile==0.7.1
132 |     - prefetch-generator==1.0.1
133 |     - prometheus-client==0.7.1
134 |     - protobuf==3.11.0
135 |     - pyasn1==0.4.8
136 |     - pyasn1-modules==0.2.7
137 |     - pycodestyle==2.5.0
138 |     - pyglet==1.5.2
139 |     - pyrsistent==0.15.6
140 |     - pytz==2019.3
141 |     - pywavelets==1.1.1
142 |     - pyyaml==5.1.2
143 |     - pyzmq==18.1.1
144 |     - qtconsole==4.6.0
145 |     - requests-oauthlib==1.3.0
146 |     - rope==0.14.0
147 |     - rsa==4.0
148 |     - scikit-image==0.16.2
149 |     - scikit-learn==0.21.3
150 |     - scipy==1.3.3
151 |     - send2trash==1.5.0
152 |     - shapely==1.7.0
153 |     - sklearn==0.0
154 |     - tensorboard==2.0.2
155 |     - terminado==0.8.3
156 |     - testpath==0.4.4
157 |     - torchsummary==1.5.1
158 |     - tornado==6.0.3
159 |     - transforms3d==0.3.1
160 |     - trimesh==3.6.20
161 |     - webencodings==0.5.1
162 |     - werkzeug==0.16.0
163 |     - widgetsnbextension==3.5.1
164 |     - zipp==0.6.0
165 | prefix: /media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/.anaconda3
166 | 
167 | 


--------------------------------------------------------------------------------
/scripts/eval_tum_feature_icp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python code/evaluate.py \
 4 | --encoder_name ConvRGBD2 \
 5 | --mestimator None \
 6 | --solver Direct-Nodamping \
 7 | --dataset TUM_RGBD \
 8 | --keyframes 1,2,4,8 \
 9 | --cpu_workers 12 \
10 | --batch_per_gpu 96 \
11 | --feature_channel 8 \
12 | --uncertainty_channel 1 \
13 | --feature_extract conv \
14 | --uncertainty laplacian \
15 | --remove_tru_sigma \
16 | --init_pose sfm_net \
17 | --train_init_pose \
18 | --multi_hypo prob_fuse \
19 | --checkpoint combine_icp_checkpoint_epoch40.pth.tar \
20 | --scaler None \
21 | --combine_ICP


--------------------------------------------------------------------------------
/scripts/eval_tum_rgbd.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python code/evaluate.py \
 4 | --keyframes 1,2,4,8 \
 5 | --encoder_name \
 6 | ConvRGBD2 \
 7 | --mestimator \
 8 | None \
 9 | --solver \
10 | Direct-Nodamping \
11 | --dataset \
12 | TUM_RGBD \
13 | --batch_per_gpu \
14 | 96 \
15 | --feature_channel \
16 | 8 \
17 | --uncertainty_channel \
18 | 1 \
19 | --feature_extract \
20 | conv \
21 | --uncertainty \
22 | laplacian \
23 | --remove_tru_sigma \
24 | --init_pose \
25 | sfm_net \
26 | --train_init_pose \
27 | --multi_hypo \
28 | prob_fuse \
29 | --checkpoint checkpoint_epoch29.pth.tar \
30 | --scaler None \
31 | --time
32 | 


--------------------------------------------------------------------------------
/scripts/run_kf_vo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python code/experiments/kf_vo.py \
 4 | --dataset VaryLighting \
 5 | --encoder_name ConvRGBD2 \
 6 | --mestimator None \
 7 | --solver Direct-Nodamping \
 8 | --cpu_workers 12 \
 9 | --batch_per_gpu 96 \
10 | --feature_channel 8 \
11 | --feature_extract conv \
12 | --uncertainty laplacian \
13 | --uncertainty_channel 1 \
14 | --direction inverse \
15 | --init_pose sfm_net \
16 | --train_init_pose \
17 | --multi_hypo prob_fuse \
18 | --remove_tru_sigma \
19 | --checkpoint $PROJECT_DIR/checkpoint/checkpoint_epoch29.pth.tar \
20 | --vo feature
21 | 
22 | # keyframe tracking visualization
23 | # --vo_type keyframe --two_view


--------------------------------------------------------------------------------
/scripts/run_kf_vo_cb.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | python code/convergence_basin.py \
 4 | --dataset TUM_RGBD \
 5 | --encoder_name ConvRGBD2 \
 6 | --mestimator None \
 7 | --solver Direct-Nodamping \
 8 | --cpu_workers 12 \
 9 | --batch_per_gpu 64 \
10 | --feature_channel 8 \
11 | --feature_extract conv \
12 | --uncertainty laplacian \
13 | --uncertainty_channel 1 \
14 | --direction inverse \
15 | --init_pose sfm_net \
16 | --train_init_pose \
17 | --multi_hypo prob_fuse \
18 | --remove_tru_sigma \
19 | --checkpoint $PROJECT_DIR$/checkpoint_epoch29.pth.tar \
20 | --eval_set validation \
21 | --trajectory structure_texture \
22 | --keyframes 8 \
23 | --cb_dimension 2D \
24 | --save_img \
25 | --reset_cb
26 | 
27 | # --combine_ICP \
28 | # --checkpoint $PROJECT_DIR$/../logs/combine_ICP/checkpoint_epoch40.pth.tar \
29 | 


--------------------------------------------------------------------------------
/scripts/train_moving_objs3d.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | python train.py --dataset MovingObjs3D \
 4 | --keyframes 1,2,4,8 \
 5 | --cpu_workers 0 \
 6 | --batch_per_gpu 24 \
 7 | --encoder_name ConvRGBD2 \ 
 8 | --mestimator None \ 
 9 | --solver Direct-Nodamping \
10 | --feature_channel 8 \ 
11 | --feature_extract conv \
12 | --uncertainty laplacian \
13 | --uncertainty_channel 1 \
14 | --init_pose sfm_net \
15 | --remove_tru_sigma \
16 | --train_init_pose \
17 | --multi_hypo prob_fuse \
18 | --prefix object \


--------------------------------------------------------------------------------
/scripts/train_tum_feature_icp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python code/train.py \
 4 | --dataset TUM_RGBD \
 5 | --encoder_name ConvRGBD2 \
 6 | --mestimator None \
 7 | --solver Direct-Nodamping \
 8 | --keyframes 1,2,4,8 \
 9 | --cpu_workers 0 \
10 | --batch_per_gpu 64 \
11 | --feature_channel 8 \
12 | --feature_extract conv \
13 | --uncertainty laplacian \
14 | --remove_tru_sigma \
15 | --uncertainty_channel 1 \
16 | --init_pose sfm_net \
17 | --train_init_pose \
18 | --multi_hypo prob_fuse \
19 | --combine_ICP \
20 | --scaler None \
21 | --resume_training \
22 | --checkpoint $PROJECT_DIR$/../logs/TUM_RGBD/checkpoint_epoch29.pth.tar \
23 | --prefix combine_ICP
24 | 


--------------------------------------------------------------------------------
/scripts/train_tum_rgbd.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python code/train.py \
 4 | --encoder_name \
 5 | ConvRGBD2 \
 6 | --mestimator \
 7 | None \
 8 | --solver \
 9 | Direct-Nodamping \
10 | --dataset \
11 | TUM_RGBD \
12 | --batch_per_gpu \
13 | 32 \
14 | --feature_channel \
15 | 8 \
16 | --uncertainty_channel \
17 | 1 \
18 | --feature_extract \
19 | conv \
20 | --uncertainty \
21 | laplacian \
22 | --remove_tru_sigma \
23 | --init_pose \
24 | sfm_net \
25 | --train_init_pose \
26 | --multi_hypo \
27 | prob_fuse 


--------------------------------------------------------------------------------
/setup/datasets.yaml:
--------------------------------------------------------------------------------
 1 | TUM_RGBD:
 2 |   binbin-PC:
 3 |     dataset_root: '/media/binbin/data/dataset/tum-rgbd'
 4 | 
 5 |   bigdaddy:
 6 |     dataset_root: '/mnt/datasets/bx516/tum-rgbd'
 7 | 
 8 |   cressida:
 9 |     dataset_root: '/mnt/datasets/bx516/tum-rgbd'
10 | 
11 |   basalt:
12 |     dataset_root: '/mnt/datasets/bx516/tum-rgbd'
13 | 
14 |   bigboy:
15 |     dataset_root: '/mnt/disk/bx516/tum-rgbd'
16 | 
17 | SCANNET:
18 |   binbin-PC:
19 |     dataset_root: '/media/binbin/InteriorNet_HD1/scannet/scannet_small/scannet_small'
20 | 
21 |   bigdaddy:
22 |     dataset_root: '/mnt/backup/bx516/scannet_small'
23 | 
24 |   cressida:
25 |     dataset_root: '/mnt/datasets/binbin/scannet'
26 | 
27 |   basalt:
28 |     dataset_root: '/mnt/datasets/bx516/scannet'
29 | 
30 |   bigboy:
31 |     dataset_root: '/mnt/disk/bx516/scannet'
32 | 
33 | MOVING_OBJECTS_3D:
34 |   binbin-PC:
35 |     dataset_root: '/media/binbin/data/dataset/movingObjects3D'
36 | 
37 |   bigdaddy:
38 |     dataset_root: '/mnt/datasets/bx516/movingObjects3D'
39 | 
40 |   cressida:
41 |     dataset_root: '/mnt/datasets/bx516/movingObjects3D'
42 | 
43 |   basalt:
44 |     dataset_root: '/mnt/datasets/bx516/movingObjects3D'
45 | 
46 |   bigboy:
47 |     dataset_root: '/mnt/disk/bx516/movingObjects3D'
48 | 
49 | VaryLighting:
50 |   binbin-PC:
51 |     dataset_root:  '/media/binbin/data/dataset/vary-lighting/track'
52 |   
53 |   bigdaddy:
54 |     dataset_root: '/mnt/datasets/bx516/vary-lighting/track'
55 | 
56 |   cressida:
57 |     dataset_root: '/mnt/datasets/bx516/vary-lighting/track'
58 | 
59 |   basalt:
60 |     dataset_root: '/mnt/datasets/bx516/vary-lighting/track'
61 | 
62 |   bigboy:
63 |     dataset_root: '/mnt/disk/bx516/vary-lighting/track' 


--------------------------------------------------------------------------------
/setup/environment.yml:
--------------------------------------------------------------------------------
  1 | name: base
  2 | channels:
  3 |   - pytorch
  4 |   - defaults
  5 | dependencies:
  6 |   - _libgcc_mutex=0.1=main
  7 |   - asn1crypto=1.2.0=py36_0
  8 |   - backcall=0.1.0=py36_0
  9 |   - blas=1.0=mkl
 10 |   - ca-certificates=2019.10.16=0
 11 |   - certifi=2019.9.11=py36_0
 12 |   - cffi=1.13.2=py36h2e261b9_0
 13 |   - chardet=3.0.4=py36_1003
 14 |   - conda=4.7.12=py36_0
 15 |   - conda-package-handling=1.6.0=py36h7b6447c_0
 16 |   - cryptography=2.8=py36h1ba5d50_0
 17 |   - cudatoolkit=9.2=0
 18 |   - decorator=4.4.1=py_0
 19 |   - freetype=2.9.1=h8a8886c_1
 20 |   - idna=2.8=py36_0
 21 |   - intel-openmp=2019.4=243
 22 |   - ipython=7.9.0=py36h39e3cac_0
 23 |   - ipython_genutils=0.2.0=py36_0
 24 |   - jedi=0.15.1=py36_0
 25 |   - jpeg=9b=h024ee3a_2
 26 |   - libedit=3.1.20181209=hc058e9b_0
 27 |   - libffi=3.2.1=hd88cf55_4
 28 |   - libgcc-ng=9.1.0=hdf63c60_0
 29 |   - libgfortran-ng=7.3.0=hdf63c60_0
 30 |   - libpng=1.6.37=hbc83047_0
 31 |   - libstdcxx-ng=9.1.0=hdf63c60_0
 32 |   - libtiff=4.1.0=h2733197_0
 33 |   - mkl=2019.4=243
 34 |   - mkl-service=2.3.0=py36he904b0f_0
 35 |   - mkl_fft=1.0.15=py36ha843d7b_0
 36 |   - mkl_random=1.1.0=py36hd6b4f25_0
 37 |   - ncurses=6.1=he6710b0_1
 38 |   - ninja=1.9.0=py36hfd86e86_0
 39 |   - numpy-base=1.17.4=py36hde5b4d6_0
 40 |   - olefile=0.46=py36_0
 41 |   - openssl=1.1.1d=h7b6447c_3
 42 |   - parso=0.5.1=py_0
 43 |   - pexpect=4.7.0=py36_0
 44 |   - pickleshare=0.7.5=py36_0
 45 |   - pillow=6.2.1=py36h34e0f95_0
 46 |   - pip=19.3.1=py36_0
 47 |   - ptyprocess=0.6.0=py36_0
 48 |   - pycosat=0.6.3=py36h14c3975_0
 49 |   - pycparser=2.19=py36_0
 50 |   - pygments=2.4.2=py_0
 51 |   - pyopenssl=19.1.0=py36_0
 52 |   - pysocks=1.7.1=py36_0
 53 |   - python=3.6.9=h265db76_0
 54 |   - pytorch=1.3.1=py3.6_cuda9.2.148_cudnn7.6.3_0
 55 |   - readline=7.0=h7b6447c_5
 56 |   - requests=2.22.0=py36_0
 57 |   - ruamel_yaml=0.15.46=py36h14c3975_0
 58 |   - setuptools=42.0.1=py36_0
 59 |   - six=1.13.0=py36_0
 60 |   - sqlite=3.30.1=h7b6447c_0
 61 |   - tk=8.6.8=hbc83047_0
 62 |   - torchvision=0.4.2=py36_cu92
 63 |   - tqdm=4.39.0=py_0
 64 |   - traitlets=4.3.3=py36_0
 65 |   - urllib3=1.24.2=py36_0
 66 |   - wcwidth=0.1.7=py36_0
 67 |   - wheel=0.33.6=py36_0
 68 |   - xz=5.2.4=h14c3975_4
 69 |   - yaml=0.1.7=had09818_2
 70 |   - zlib=1.2.11=h7b6447c_3
 71 |   - zstd=1.3.7=h0b5b093_0
 72 |   - pip:
 73 |     - absl-py==0.8.1
 74 |     - attrs==19.3.0
 75 |     - bleach==3.1.0
 76 |     - cachetools==3.1.1
 77 |     - cycler==0.10.0
 78 |     - cython==0.29.14
 79 |     - defusedxml==0.6.0
 80 |     - entrypoints==0.3
 81 |     - future==0.18.2
 82 |     - google-auth==1.7.1
 83 |     - google-auth-oauthlib==0.4.1
 84 |     - grpcio==1.25.0
 85 |     - imageio==2.6.1
 86 |     - importlib-metadata==1.1.0
 87 |     - ipykernel==5.1.3
 88 |     - ipywidgets==7.5.1
 89 |     - jinja2==2.10.3
 90 |     - joblib==0.14.0
 91 |     - jsonschema==3.2.0
 92 |     - jupyter==1.0.0
 93 |     - jupyter-client==5.3.4
 94 |     - jupyter-console==6.0.0
 95 |     - jupyter-core==4.6.1
 96 |     - kiwisolver==1.1.0
 97 |     - markdown==3.1.1
 98 |     - markupsafe==1.1.1
 99 |     - matplotlib==3.1.2
100 |     - mistune==0.8.4
101 |     - more-itertools==8.0.0
102 |     - nbconvert==5.6.1
103 |     - nbformat==4.4.0
104 |     - networkx==2.4
105 |     - notebook==6.0.2
106 |     - numpy==1.17.4
107 |     - oauthlib==3.1.0
108 |     - open3d==0.9.0.0
109 |     - opencv-contrib-python==4.1.2.30
110 |     - opencv-python==4.1.2.30
111 |     - pandas==0.25.3
112 |     - pandocfilters==1.4.2
113 |     - plyfile==0.7.1
114 |     - prometheus-client==0.7.1
115 |     - prompt-toolkit==2.0.10
116 |     - protobuf==3.11.1
117 |     - pyasn1==0.4.8
118 |     - pyasn1-modules==0.2.7
119 |     - pyparsing==2.4.5
120 |     - pyrsistent==0.15.6
121 |     - python-dateutil==2.8.1
122 |     - pytz==2019.3
123 |     - pywavelets==1.1.1
124 |     - pyyaml==5.2
125 |     - pyzmq==18.1.1
126 |     - qtconsole==4.6.0
127 |     - requests-oauthlib==1.3.0
128 |     - rsa==4.0
129 |     - scikit-image==0.16.2
130 |     - scikit-learn==0.22
131 |     - scipy==1.3.3
132 |     - send2trash==1.5.0
133 |     - sklearn==0.0
134 |     - tensorboard==2.0.2
135 |     - terminado==0.8.3
136 |     - testpath==0.4.4
137 |     - tornado==6.0.3
138 |     - transforms3d==0.3.1
139 |     - webencodings==0.5.1
140 |     - werkzeug==0.16.0
141 |     - widgetsnbextension==3.5.1
142 |     - zipp==0.6.0
143 | prefix: /home/bx516/ObjFeatTrack/.anaconda3
144 | 
145 | 


--------------------------------------------------------------------------------