├── .autoenv.zsh
├── .autoenv_leave.zsh
├── .gitignore
├── .make
├── install.sh
└── install_anaconda3.sh
├── LICENSE
├── Makefile
├── README.md
├── checkpoint
├── checkpoint_epoch29.pth.tar
└── combine_icp_checkpoint_epoch40.pth.tar
├── code
├── .gitignore
├── Logger.py
├── __init__.py
├── config.py
├── convergence_basin.py
├── data
│ ├── MovingObj3D.py
│ ├── ScanNet.py
│ ├── SimpleLoader.py
│ ├── TUM_RGBD.py
│ ├── VaryLighting.py
│ ├── data_examples
│ │ └── TUM
│ │ │ ├── color
│ │ │ ├── 1305031790.645155.png
│ │ │ ├── 1305031790.713097.png
│ │ │ ├── 1305031790.781258.png
│ │ │ ├── 1305031790.845151.png
│ │ │ └── 1305031790.913129.png
│ │ │ └── depth
│ │ │ ├── 1305031790.640468.png
│ │ │ ├── 1305031790.709421.png
│ │ │ ├── 1305031790.773548.png
│ │ │ ├── 1305031790.839363.png
│ │ │ └── 1305031790.909436.png
│ └── dataloader.py
├── evaluate.py
├── experiments
│ ├── __init__.py
│ ├── kf_vo.py
│ ├── select_method.py
│ └── warping_objects.py
├── models
│ ├── LeastSquareTracking.py
│ ├── __init__.py
│ ├── algorithms.py
│ ├── criterions.py
│ ├── geometry.py
│ └── submodules.py
├── run_example.py
├── test.py
├── timers.py
├── tools
│ ├── ICP.py
│ ├── __init__.py
│ ├── display.py
│ └── rgbd_odometry.py
├── train.py
└── train_utils.py
├── environment.yml
├── environment30X.yml
├── scripts
├── eval_tum_feature_icp.sh
├── eval_tum_rgbd.sh
├── run_kf_vo.sh
├── run_kf_vo_cb.sh
├── train_moving_objs3d.sh
├── train_tum_feature_icp.sh
└── train_tum_rgbd.sh
└── setup
├── datasets.yaml
└── environment.yml
/.autoenv.zsh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env zsh
2 |
3 | setup(){
4 | source $HERE/.anaconda3/bin/activate
5 | path_prepend $HERE/.anaconda3/bin
6 | }
7 | HERE=${0:a:h}
8 | setup
--------------------------------------------------------------------------------
/.autoenv_leave.zsh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env zsh
2 | conda deactivate
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.DS_Store
2 | logs/
3 | test_results/
4 |
--------------------------------------------------------------------------------
/.make/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | echo_bold () {
6 | echo -e "\033[1m$*\033[0m"
7 | }
8 |
9 | echo_warning () {
10 | echo -e "\033[33m$*\033[0m"
11 | }
12 |
13 | conda_check_installed () {
14 | if [ ! $# -eq 1 ]; then
15 | echo "usage: $0 PACKAGE_NAME"
16 | return 1
17 | fi
18 | conda list | awk '{print $1}' | egrep "^$1$" &>/dev/null
19 | }
20 |
21 | HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
22 | ROOT=$HERE/..
23 |
24 | cd $ROOT
25 |
26 | source .anaconda3/bin/activate
27 |
28 | # ---------------------------------------------------------------------------------------
29 |
30 | echo_bold "==> Installing the right pip and dependencies for the fresh python"
31 | pip install --upgrade pip
32 | conda install python=3.6 # meet tensorflow requirements
33 | conda install ipython
34 |
35 | #echo_bold "==> Installing computer vision-related packages"
36 | #pip install \
37 | # jupyter \
38 | # cython\
39 | # numpy\
40 | # matplotlib\
41 | # opencv-python \
42 | # opencv-contrib-python \
43 | # plyfile \
44 | # pandas \
45 | # requests \
46 | # scipy \
47 | # imageio \
48 | # scikit-image \
49 | # sklearn \
50 | # pyyaml \
51 | # tqdm \
52 | # transforms3d \
53 | #
54 | #echo_bold "==> Installing deep learning-related packages"
55 | #pip install future
56 | #conda install pytorch torchvision cudatoolkit=9.2 -c pytorch
57 | #pip install tensorboard
58 |
59 | echo_bold "==> Installing requirements"
60 | # pip install -r setup/requirements.txt
61 | conda env update --file environment30X.yml
62 | # pip install -e .
63 |
64 | # ---------------------------------------------------------------------------------------
65 |
66 | echo_bold "\nAll is well! You can start using this!
67 | $ source .anaconda3/bin/activate
68 | "
69 |
--------------------------------------------------------------------------------
/.make/install_anaconda3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
4 | ROOT=$HERE/..
5 |
6 | if [ ! -d $ROOT/.anaconda3 ]; then
7 | echo "==>Installing anaconda 3"
8 | echo $ROOT
9 | echo $HERE
10 | cd $ROOT
11 | curl -L https://binbin-xu.github.io//tools/install_anaconda3.sh | bash -s .
12 | fi
13 |
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright © 2020-2021 Smart Robotics Lab, Imperial College London
4 | Copyright © 2020-2021 Binbin Xu
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | @echo '## Make commands ##'
3 | @echo
4 | @$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs
5 |
6 | install_first:
7 | @.make/install_first.sh
8 |
9 | install_anaconda3:
10 | @.make/install_anaconda3.sh
11 |
12 | install: install_anaconda3
13 | @.make/install.sh
14 |
15 | clean:
16 | # @rm -rf dense_feature_tracking.egg-info
17 | @rm -rf .anaconda3
18 |
19 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
Deep Probabilistic Feature-metric Tracking
4 |
5 |
6 | Binbin Xu
7 | ·
8 | Andrew J. Davison
9 | ·
10 | Stefan Leutenegger
11 |
12 |
13 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | ## Summary
26 |
27 | This is the official repository of our RA-L 2021 paper:
28 |
29 | **Deep Probabilistic Feature-metric Tracking**, \
30 | *Binbin Xu, Andrew J. Davison, Stefan Leutenegger*, \
31 | IEEE Robotics and Automation Letters (RA-L), Vol. 6, No. 1, pp. 223-230, 2021 (ICRA 2021 presentation) \
32 | Best Paper Honorable Mention Award \
33 | [[Paper]](https://arxiv.org/pdf/2008.13504.pdf) [[Video]](https://youtu.be/6pMosl6ZAPE)
34 |
35 |
36 | ## Setup
37 | You can reproduce the setup by using our anaconda environment configurations. We have provided an Makefile to help you install the environment.
38 | ``` bash!
39 | make install
40 | ```
41 |
42 | Everytime before you run, activate the environment inside the repo folder
43 |
44 | ``` bash!
45 | source .anaconda3/bin/activate
46 | ```
47 | The pre-trained network weights can be downloaded at [here](https://imperialcollegelondon.box.com/s/xryhbshxtktizjw5fpmxaic1kncxr4cw).
48 |
49 | ## Prepare the datasets
50 |
51 | **TUM RGBD Dataset**: Download the dataset from [TUM RGBD](https://vision.in.tum.de/data/datasets/rgbd-dataset/download) to '$YOUR_TUM_RGBD_DIR'. Create a symbolic link to the data directory as
52 |
53 | ```
54 | ln -s $YOUR_TUM_RGBD_DIR code/data/data_tum
55 | ```
56 |
57 | **MovingObjects3D Dataset** Download the dataset from [MovingObjs3D](https://drive.google.com/open?id=1EIlS4J2J0sdsq8Mw_03DXHlRQmfL8XQx) to '$YOUR_MOV_OBJS_3D_DIR'. Create a symbolic link to the data directory as
58 |
59 | ```
60 | ln -s $YOUR_MOV_OBJS_3D_DIR code/data/data_objs3D
61 | ```
62 |
63 | **Custom Dataset** You can also use your own dataset.
64 | Our work use the above two datasets for training and deployed the trained weights on scannet and our self-collected dataset. Please refer to the [ScanNet](code/data/ScanNet.py) and [VaryLighting.py](code/data/VaryLighting.py) for the custom dataloading.
65 |
66 |
67 | ## Training and Evaluation
68 | To run the full training and evaluation, please follow the steps below.
69 |
70 | ### Run training
71 |
72 | **Train example with TUM RGBD dataset:**
73 |
74 | ``` bash!
75 | ./scripts/train_tum_rgbd.sh
76 | ```
77 |
78 | To check the full training setting, run the help config as
79 | ``` bash!
80 | python train.py --help
81 | ```
82 |
83 |
84 | **Train example with MovingObjects3D:** Camera egocentric motion is dfifferent from the object-centric motion estimation and thus we provide a separate training script for the MovingObjects3D dataset.
85 | All the same as the last one only except changing the dataset name. You can also use our provided script to train the model.
86 |
87 | ``` bash!
88 | ./scripts/train_moving_objs3d.sh
89 | ```
90 |
91 | ### Run evaluation
92 | **Run the pretrained model:** If you have set up the dataset properly with the datasets, you can run the learned model with the checkpoint we provided in the trained model directory.
93 |
94 | ``` bash!
95 | ./scripts/eval_tum_rgbd.sh
96 | ```
97 |
98 |
99 | You can substitute the trajectory, the keyframe and the checkpoint file. The training and evaluation share the same config setting. To check the full setting, run the help config as
100 |
101 | ``` bash!
102 | python evaluate.py --help
103 | ```
104 |
105 | **Results:** The evaluation results will be generated automatically in both '.pkl' and '*.csv' in the folder 'test_results/'.
106 |
107 |
108 | **Run comparisons:** We also provide the scripts to run the comparisons with the classic RGBD and ICP methods from Open3D. Please refer to [rgbd_odometry.py](code/tools/rgbd_odometry.py) and [ICP.py](code/tools/ICP.py) for the details accordingly.
109 |
110 |
111 | ### Joint feature-metric and geometric tracking
112 | We can combine our proposed feature-metric tracking with the geometric tracking methods to achieve better performance. We provide the scripts to run the joint tracking with the ICP methods.
113 | ``` bash!
114 | ./scripts/train_tum_feature_icp.sh
115 | ```
116 | It is achieved by using the trained feature-metric network weights as the initialization and combing with the ICP methods as the refinement.
117 |
118 | The evaluation script is also provided as
119 | ```
120 | ./scripts/eval_tum_feature_icp.sh
121 | ```
122 |
123 |
124 | ### Run visual odometry
125 | Please note this is a prototype version of our **visual odometry frontend**. It mainly serves as a demo to show the performance of our method.
126 |
127 | ``` bash!
128 | ./scripts/run_kf_vo.sh
129 | ```
130 |
131 | To visualise the keyframe tracking in the paper, add the argument `--vo_type keyframe --two_view` to the above script.
132 | To check the full setting, run the help config as
133 | ``` bash!
134 | python code/experiments/kf_vo.py --help
135 | ```
136 |
137 |
138 | **Convergence basin analysis** for the keyframe visual odometry is also provided. Check the script `scripts/run_kf_vo_cb.sh` for more details.
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 | ## Citation
147 | ```bibtex
148 | @article{Xu:etal:RAL2021,
149 | author = {Binbin Xu and Andrew Davison and Stefan Leutenegger},
150 | journal = {{IEEE} Robotics and Automation Letters ({RAL})},
151 | title = {Deep Probabilistic Feature-metric Tracking},
152 | year={2021},
153 | volume = {6},
154 | number = {1},
155 | pages = {223 - 230},
156 | }
157 | ```
158 |
159 | Please cite the paper if you found our provided code useful for you.
160 |
161 |
162 | ## License
163 | This repo is BSD 3-Clause Licensed. Part of its code is from [Taking a Deeper Look at the Inverse Compositional Algorithm](https://github.com/lvzhaoyang/DeeperInverseCompositionalAlgorithm), which is MIT licensed. We thank the authors for their great work.
164 |
165 | Copyright © 2020-2021 Smart Robotics Lab, Imperial College London \
166 | Copyright © 2020-2021 Binbin Xu
167 |
168 |
169 | ## Contact
170 | Binbin Xu (b.xu17@imperial.ac.uk)
171 |
--------------------------------------------------------------------------------
/checkpoint/checkpoint_epoch29.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/checkpoint/checkpoint_epoch29.pth.tar
--------------------------------------------------------------------------------
/checkpoint/combine_icp_checkpoint_epoch40.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/checkpoint/combine_icp_checkpoint_epoch40.pth.tar
--------------------------------------------------------------------------------
/code/.gitignore:
--------------------------------------------------------------------------------
1 | trained_models/*
2 | test/*
3 | test_results/*
4 | */**/*.pyc
5 | logs/*
6 |
--------------------------------------------------------------------------------
/code/Logger.py:
--------------------------------------------------------------------------------
1 | """ Tensorflow Logger for training and testing including images
2 | # SPDX-FileCopyrightText: 2021 Binbin Xu
3 | # SPDX-License-Identifier: BSD-3-Clause
4 |
5 | @author: Zhaoyang Lv
6 | @date: March 2019
7 | """
8 |
9 | import sys, os, shutil
10 | import os.path as osp
11 | import subprocess
12 | from torch.utils.tensorboard import SummaryWriter
13 | import torch
14 | import numpy as np
15 | import cv2
16 | from collections import OrderedDict
17 |
18 | class Logger(object):
19 | """
20 | example usage:
21 |
22 | stdout = Logger('log.txt')
23 | sys.stdout = stdout
24 |
25 | ... your code here ...
26 |
27 | stdout.delink()
28 | """
29 | def __init__(self, filename="Default.log"):
30 | self.terminal = sys.stdout
31 | self.log = open(filename, "w")
32 |
33 | def delink(self):
34 | self.log.close()
35 | #self.log = open('foo', "w")
36 | # self.write = self.writeTerminalOnly
37 |
38 | def writeTerminalOnly(self, message):
39 | self.terminal.write(message)
40 |
41 | def write(self, message):
42 | self.terminal.write(message)
43 | self.log.write(message)
44 |
45 | def flush(self):
46 | pass
47 |
48 | class TensorBoardLogger(object):
49 | def __init__(self, logging_dir, logfile_name, print_freq = 10):
50 |
51 | self.log_dir = logging_dir
52 | self.print_freq = print_freq
53 |
54 | if not os.path.isdir(logging_dir):
55 | os.makedirs(logging_dir)
56 |
57 | self.summary_writer = SummaryWriter(log_dir=logging_dir)
58 |
59 | # standard logger to print to terminal
60 | logfile = osp.join(logging_dir,'log.txt')
61 | stdout = Logger(logfile)
62 | sys.stdout = stdout
63 |
64 | def write_to_tensorboard(self, display_dict, iteration):
65 | """ Write the saved states (display_dict) to tensorboard
66 | """
67 | for k, v in display_dict.items():
68 | self.summary_writer.add_scalar(k, v, iteration)
69 |
70 | def add_images_to_tensorboard(self, image_list: list, name, iteration):
71 |
72 | if iteration % self.print_freq != 0:
73 | return
74 |
75 | concated_images = np.concatenate(image_list, axis=1)
76 | ratio = 960 / concated_images.shape[1]
77 | if ratio < 1:
78 | concated_images = cv2.resize(concated_images, None, fx=ratio, fy=ratio)
79 | concated_images = cv2.cvtColor(concated_images, cv2.COLOR_BGR2RGB)
80 | self.summary_writer.add_image(name, concated_images,
81 | global_step=iteration,
82 | dataformats='HWC',
83 | )
84 |
85 | def write_to_terminal(self, display_dict, epoch, batch_iter, epoch_len, batch_time, is_train = True):
86 | """ Write the save states (display_dict) and training information to terminal for display
87 | """
88 |
89 | if batch_iter % self.print_freq != 0:
90 | return
91 |
92 | if is_train:
93 | prefix = 'Train'
94 | else:
95 | prefix = 'Test'
96 |
97 | state = prefix + ':\tEpoch %d, Batch %d/%d, BatchTime %.4f'%(epoch+1, batch_iter, epoch_len, batch_time)
98 |
99 | loss = ''
100 | for k, v in display_dict.items():
101 | loss += k + ' ' + '%.8f ' % v
102 |
103 | print(state + loss)
104 |
105 | def save_checkpoint(self, network, state_info = None,
106 | filename='checkpoint.pth.tar'):
107 | """save checkpoint to disk"""
108 | state_dict = network.state_dict().copy()
109 |
110 | if torch.cuda.device_count() > 1:
111 | state_dict_rename = OrderedDict()
112 | for k, v in state_dict.items():
113 | name = k[7:] # remove `module.`
114 | state_dict_rename[name] = v
115 | state_dict = state_dict_rename
116 |
117 | if state_info is None:
118 | state = {'state_dict': state_dict}
119 | else:
120 | state = state_info
121 | state['state_dict'] = state_dict
122 |
123 | checkpoint_path = osp.join(self.log_dir,filename)
124 | torch.save(state, checkpoint_path)
125 | return checkpoint_path
126 |
127 |
128 | def log_git_revisions_hash(dir):
129 | hashes = []
130 | # the latest git information
131 | latest_commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
132 | # hashes.append(subprocess.check_output(['git', 'rev-parse', 'HEAD']))
133 | # hashes.append(subprocess.check_output(['git', 'rev-parse', 'HEAD^']))
134 | # return hashes
135 | f = open(osp.join(dir, 'git_status.txt'), "w")
136 | f.write(str(latest_commit_id))
137 | f.close()
138 |
139 |
140 | def check_directory(filename):
141 | target_dir = os.path.dirname(filename)
142 | if not os.path.isdir(target_dir):
143 | os.makedirs(target_dir)
--------------------------------------------------------------------------------
/code/__init__.py:
--------------------------------------------------------------------------------
1 | import pkgutil
2 | __path__ = pkgutil.extend_path(__path__, __name__)
3 |
--------------------------------------------------------------------------------
/code/config.py:
--------------------------------------------------------------------------------
1 | """
2 | Argparse configuration for training and testing in the paper
3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | """
6 |
7 | def add_network_config(parser):
8 | parser.add_argument('--feature_channel', default=1, type=int,
9 | help='Specify the feature channel used for tracking. The default is 1.\n')
10 | parser.add_argument('--uncertainty_channel', default=1, type=int,
11 | help='Specify the uncertainty channel used for tracking when using uncerti. The default is 1.\n')
12 | parser.add_argument('--feature_extract', default='average', type=str,
13 | choices=('1by1', 'conv', 'skip', 'average', 'prob_fuse'),
14 | help='Specify the method to extract feature from the pyramid. The default is using 1by1.\n')
15 | parser.add_argument('--uncertainty',
16 | default='None', type=str,
17 | choices=('None', 'identity', 'sigmoid', 'feature', 'gaussian', 'laplacian', 'old_gaussian', 'old_laplacian'),
18 | help='Choose a uncertainty function for feature-metric tracking. DIC is the original CVPR work. '
19 | 'None using identity matrix. and track is used in GN tracking\n')
20 | parser.add_argument('--combine_ICP',action='store_true',
21 | help='Combine the training with ICP.\n')
22 | parser.add_argument('--add_init_pose_noise', action='store_true',
23 | help='Add noise in the init pose (translation) only in training.\n')
24 | parser.add_argument('--init_pose', default='identity', type=str,
25 | choices=('identity', 'sfm_net', 'dense_net'),
26 | help='Use predicted pose as initial pose.\n')
27 | # @TODO: try different number 0.01 in sfm-learner, 0.1 in deeptam
28 | parser.add_argument('--scale_init_pose', default='0.01', type=float,
29 | help='Scaling the initial predicted pose.\n')
30 | parser.add_argument('--train_init_pose', action='store_true',
31 | help='Jointly train pose predictor by regressing predicted pose to the ground truth.\n')
32 | parser.add_argument('--multi_hypo', default='None', type=str,
33 | choices=('None', 'average', 'prob_fuse', 'res_prob_fuse'),
34 | help='Use multi hypothesis for init pose guess.\n')
35 | parser.add_argument('--res_input', action='store_true',
36 | help='Also input residual for posenet.\n')
37 | # virtual does not work well
38 | # parser.add_argument('--virtual_camera', action='store_true',
39 | # help='Use rendered virtual frame and virtual camera instead of img1 \n')
40 | parser.add_argument('--vis_feat', default=False, action='store_true',
41 | help='visualize the feature maps in the training')
42 | parser.add_argument('--scannet_subset_train', default='0.25', type=float,
43 | help='Subset ratio in scannet for training.\n')
44 | parser.add_argument('--scannet_subset_val', default='0.005', type=float,
45 | help='Subset ratio in scannet for validation.\n')
46 | parser.add_argument('--train_uncer_prop', action='store_true',
47 | help='Use uncertainty propagation in the training loss\n')
48 | parser.add_argument('--obj_only', action='store_true',
49 | help='Use uncertainty propagation in the training loss\n')
50 | parser.add_argument('--loss', default='EPE3D', type=str,
51 | choices=('EPE3D', 'RPE', 'UEPE3D', 'URPE'),
52 | help='Training loss.\n')
53 | parser.add_argument('--remove_tru_sigma', action='store_true',
54 | help='Remove truncated uncertainty areas in the tracking for training/testing\n')
55 | parser.add_argument('--scaler', default='None', type=str,
56 | choices=('None', 'oneResidual', 'twoResidual', 'MultiScale2w', 'expMultiScale'),
57 | help='Choose a scale function for combing ICP and feature methods. \n')
58 | parser.add_argument('--scale_icp', default='0.01', type=float,
59 | help='Scaling the ICP w.r.t feature/RGB.\n')
60 | parser.add_argument('--add_vl_dataset', action='store_true',
61 | help='Add varying lighting dataset to the TUM dataset for training/validation \n')
62 |
63 | def add_tracking_config(parser):
64 | add_network_config(parser)
65 | parser.add_argument('--network',
66 | default='DeepIC', type=str,
67 | choices=('DeepIC', 'GaussNewton'),
68 | help='Choose a network to run. \n \
69 | The DeepIC is the proposed Deeper Inverse Compositional method. \n\
70 | The GuassNewton is the baseline for Inverse Compositional method which does not include \
71 | any learnable parameters\n')
72 | parser.add_argument('--mestimator',
73 | default='MultiScale2w', type=str,
74 | choices=('None', 'MultiScale2w'),
75 | help='Choose a weighting function for the Trust Region method.\n\
76 | The MultiScale2w is the proposed (B) convolutional M-estimator. \n')
77 | parser.add_argument('--solver',
78 | default='Direct-ResVol', type=str,
79 | choices=('Direct-Nodamping', 'Direct-ResVol'),
80 | help='Choose the solver function for the Trust Region method. \n\
81 | Direct-Nodamping is the Gauss-Newton algorithm, which does not use damping. \n\
82 | Direct-ResVol is the proposed (C) Trust-Region Network. \n\
83 | (default: Direct-ResVol) ')
84 | parser.add_argument('--direction',
85 | default='inverse', type=str,
86 | choices=('inverse', 'forward'),
87 | help='Choose the direction to update pose: inverse, or forward \n')
88 | parser.add_argument('--encoder_name',
89 | default='ConvRGBD2',
90 | choices=('ConvRGBD2', 'RGB', 'ConvRGBD'),
91 | help='The encoder architectures. \
92 | ConvRGBD2 takes the two-view features as input. \n\
93 | RGB is using the raw RGB images as input (converted to intensity afterwards).\n\
94 | (default: ConvRGBD2)')
95 | parser.add_argument('--max_iter_per_pyr',
96 | default=3, type=int,
97 | help='The maximum number of iterations at each pyramids.\n')
98 | parser.add_argument('--no_weight_sharing',
99 | action='store_true',
100 | help='If this flag is on, we disable sharing the weights across different backbone network when extracing \
101 | features. In default, we share the weights for all network in each pyramid level.\n')
102 | parser.add_argument('--tr_samples', default=10, type=int,
103 | help='Set the number of trust-region samples. (default: 10)\n')
104 |
105 | def add_basics_config(parser):
106 | """ the basic setting
107 | (supposed to be shared through train and inference)
108 | """
109 | parser.add_argument('--cpu_workers', type=int, default=12,
110 | help="Number of cpu threads for data loader.\n")
111 | parser.add_argument('--dataset', type=str,
112 | choices=('TUM_RGBD', 'ScanNet', 'MovingObjects3D', 'VaryLighting'),
113 | help='Choose a dataset to train/val/evaluate.\n')
114 | parser.add_argument('--image_resize', type=float, default=None,
115 | help='downsize ratio for input images')
116 | parser.add_argument('--time', dest='time', action='store_true',
117 | help='Count the execution time of each step.\n' )
118 |
119 | def add_test_basics_config(parser):
120 | parser.add_argument('--tracker', default='learning_based', type=str,
121 | choices=('learning_based', 'ICP', 'ColorICP', 'RGBD'))
122 | parser.add_argument('--batch_per_gpu', default=8, type=int,
123 | help='Specify the batch size during test. The default is 8.\n')
124 | parser.add_argument('--checkpoint', default='', type=str,
125 | help='Choose a checkpoint model to test.\n')
126 | parser.add_argument('--keyframes',
127 | default='1,2,4,8', type=str,
128 | help='Choose the number of keyframes to train the algorithm.\n')
129 | parser.add_argument('--verbose', action='store_true',
130 | help='Print/save all the intermediate representations')
131 | parser.add_argument('--eval_set', default='test',
132 | choices=('test', 'validation'))
133 | parser.add_argument('--trajectory', type=str,
134 | default = '',
135 | help = 'Specify a trajectory to run.\n')
136 |
137 | def add_train_basics_config(parser):
138 | """ add the basics about the training """
139 | parser.add_argument('--checkpoint', default='', type=str,
140 | help='Choose a pretrained checkpoint model to start with. \n')
141 | parser.add_argument('--batch_per_gpu', default=64, type=int,
142 | help='Specify the batch size during training.\n')
143 | parser.add_argument('--epochs',
144 | default=30, type=int,
145 | help='The total number of total epochs to run. Default is 30.\n' )
146 | parser.add_argument('--resume_training',
147 | dest='resume_training', action='store_true',
148 | help='Resume the training using the loaded checkpoint. If not, restart the training. \n\
149 | You will need to use the --checkpoint config to load the pretrained checkpoint' )
150 | parser.add_argument('--pretrained_model', default='', type=str,
151 | help='Initialize the model weights with pretrained model.\n')
152 | parser.add_argument('--no_val',
153 | default=False,
154 | action='store_true',
155 | help='Use no validatation set for training.\n')
156 | parser.add_argument('--keyframes',
157 | default='1,2,4,8', type=str,
158 | help='Choose the number of keyframes to train the algorithm')
159 | parser.add_argument('--verbose', action='store_true',
160 | help='Print/save all the intermediate representations.\n')
161 |
162 | def add_train_log_config(parser):
163 | """ checkpoint and log options """
164 | parser.add_argument('--checkpoint_folder', default='', type=str,
165 | help='The folder name (postfix) to save the checkpoint.')
166 | parser.add_argument('--snapshot', default=1, type=int,
167 | help='Number of interations to save a snapshot')
168 | parser.add_argument('--save_checkpoint_freq',
169 | default=1, type=int,
170 | help='save the checkpoint for every N epochs')
171 | parser.add_argument('--prefix', default='', type=str,
172 | help='the prefix string added to the log files')
173 | parser.add_argument('-p', '--print_freq',
174 | default=10, type=int,
175 | help='print frequency (default: 10)')
176 |
177 |
178 | def add_train_optim_config(parser):
179 | """ add training optimization options """
180 | parser.add_argument('--opt',
181 | type=str, default='adam', choices=('sgd','adam'),
182 | help='choice of optimizer (default: adam) \n')
183 | parser.add_argument('--lr',
184 | default=0.0005, type=float,
185 | help='initial learning rate. \n')
186 | parser.add_argument('--lr_decay_ratio',
187 | default=0.5, type=float,
188 | help='lr decay ratio (default:0.5)')
189 | parser.add_argument('--lr_decay_epochs',
190 | default=[5, 10, 20], type=int, nargs='+',
191 | help='lr decay epochs')
192 | parser.add_argument('--lr_min', default=1e-6, type=float,
193 | help='minimum learning rate')
194 | parser.add_argument('--lr_restart', default=10, type=int,
195 | help='restart learning after N epochs')
196 |
197 | def add_train_loss_config(parser):
198 | """ add training configuration for the loss function """
199 | parser.add_argument('--regression_loss_type',
200 | default='SmoothL1', type=str, choices=('L1', 'SmoothL1'),
201 | help='Loss function for flow regression (default: SmoothL1 loss)')
202 |
203 | def add_vo_config(parser):
204 | """ add testing configuration for kf-vo demo """
205 | parser.add_argument('--vo', default='feature_icp', type=str,
206 | choices=('DeepIC', 'RGB', 'ICP', 'RGB+ICP', 'feature', 'feature_icp'),
207 | help='Select which tracking method to use for visual odometry.\n')
208 | parser.add_argument('--vo_type', default='incremental', type=str,
209 | choices=('incremental', 'keyframe'),
210 | help='Select which reference frame to use for tracking.\n')
211 | parser.add_argument('--two_view', action='store_true',
212 | help='Only visualization two views.\n' )
213 | parser.add_argument('--gt_tracker', action='store_true',
214 | help='Use ground truth pose for point cloud visualization')
215 | parser.add_argument('--save_img', action='store_true',
216 | help='Save visualizations.\n' )
217 |
218 | def add_cb_config(parser):
219 | """ add visualization configurations for convergence basin """
220 | parser.add_argument('--cb_dimension', default='2D', type=str,
221 | choices=('1D', '2D', '6D'),
222 | help='Select which dimension to visualize for convergence basin.\n')
223 | parser.add_argument('--save_img', action='store_true',
224 | help='Save visualizations.\n' )
225 | parser.add_argument('--reset_cb', action='store_true',
226 | help='Save visualizations.\n' )
227 | parser.add_argument('--pert_samples', default=31, type=int,
228 | help='perturbation samples in each pose dimension')
229 |
230 | def add_object_config(parser):
231 | parser.add_argument('--method', default='feature_icp', type=str,
232 | choices=('DeepIC', 'RGB', 'ICP', 'RGB+ICP', 'feature', 'feature_icp'),
233 | help='Select which tracking method to use for visual odometry.\n')
234 | parser.add_argument('--batch_per_gpu', default=64, type=int,
235 | help='Specify the batch size during test. The default is 8.\n')
236 | parser.add_argument('--checkpoint', default='', type=str,
237 | help='Choose a checkpoint model to test.\n')
238 | parser.add_argument('--keyframes',
239 | default='1,2,4', type=str,
240 | help='Choose the number of keyframes to train the algorithm.\n')
241 | parser.add_argument('--eval_set', default='test',
242 | choices=('test', 'validation'))
243 | parser.add_argument('--object', type=str,
244 | default = '',
245 | help = 'Specify a trajectory to run.\n')
246 | parser.add_argument('--save_img', action='store_true',
247 | help='Save visualizations.\n' )
248 | parser.add_argument('--gt_pose', action='store_true',
249 | help='Save visualizations.\n' )
250 | parser.add_argument('--recompute', action='store_true',
251 | help='Save visualizations.\n' )
--------------------------------------------------------------------------------
/code/data/MovingObj3D.py:
--------------------------------------------------------------------------------
1 | """
2 | Data loader for MovingObjs 3D dataset
3 |
4 | @author: Zhaoyang Lv
5 | @date: May 2019
6 | """
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 | from __future__ import unicode_literals
12 |
13 | import sys, os, random
14 | import pickle
15 | import functools
16 |
17 | import numpy as np
18 | import torch.utils.data as data
19 | import os.path as osp
20 |
21 | from imageio import imread
22 | from tqdm import tqdm
23 |
24 | from cv2 import resize, INTER_NEAREST
25 |
26 | class MovingObjects3D(data.Dataset):
27 |
28 | # every sequence has 200 frames.
29 | categories = {
30 | 'train': {'aeroplane': [0,190],
31 | 'bicycle': [0,190],
32 | 'bus': [0,190],
33 | 'car': [0,190]},
34 |
35 | 'validation': {'aeroplane': [190,200],
36 | 'bicycle': [190,200],
37 | 'bus': [190,200],
38 | 'car': [190,200]},
39 |
40 | 'test': {'boat': [0,200],
41 | 'motorbike': [0,200]}
42 | }
43 |
44 | def __init__(self, root, load_type='train', keyframes = [1], data_transform=None,
45 | category=None, image_resize=0.5):
46 | super(MovingObjects3D, self).__init__()
47 |
48 | self.base_folder = osp.join(root)
49 |
50 | data_all = self.categories[load_type]
51 |
52 | # split it into train and test set (the first 20 are for test)
53 | self.image_seq = []
54 | self.depth_seq = []
55 | self.invalid_seq = []
56 | self.object_mask_seq = []
57 | self.cam_pose_seq = []
58 | self.obj_pose_seq = []
59 | self.obj_vis_idx = []
60 | self.calib = []
61 | self.obj_names = []
62 |
63 | self.transforms = data_transform
64 |
65 | if load_type in ['validation', 'test']:
66 | # should not mix different keyframes in test
67 | assert(len(keyframes) == 1)
68 | self.keyframes = [1]
69 | self.sample_freq = keyframes[0]
70 | else:
71 | self.keyframes = keyframes
72 | self.sample_freq = 1
73 |
74 | self.ids = 0
75 | self.images_size = [240, 320]
76 | # get the accumulated image sequences on the fly
77 | self.seq_acc_ids = [0]
78 | for data_obj, frame_interval in data_all.items():
79 | if category is not None and data_obj != category:
80 | continue
81 |
82 | start_idx, end_idx = frame_interval
83 | print('Load {:} data from frame {:d} to {:d}'.format(data_obj, start_idx, end_idx))
84 | for seq_idx in range(start_idx, end_idx, 1):
85 | seq_str = "{:06d}".format(seq_idx)
86 |
87 | info_pkl= osp.join(self.base_folder,
88 | data_obj, seq_str, 'info.pkl')
89 |
90 | color_seq, depth_seq, invalid_seq, mask_seq, camera_poses_seq, object_poses_seq,\
91 | obj_visible_frames, calib_seq = extract_info_pickle(info_pkl)
92 |
93 | obj_visible_frames = obj_visible_frames[::self.sample_freq]
94 |
95 | self.image_seq.append([osp.join(self.base_folder, x) for x in color_seq])
96 | self.depth_seq.append([osp.join(self.base_folder, x) for x in depth_seq])
97 | # self.invalid_seq.append(invalid_seq)
98 | self.object_mask_seq.append([osp.join(self.base_folder, x) for x in mask_seq])
99 | self.cam_pose_seq.append(camera_poses_seq)
100 | self.obj_pose_seq.append(object_poses_seq)
101 | self.calib.append(calib_seq)
102 | self.obj_vis_idx.append(obj_visible_frames)
103 |
104 | self.obj_names.append('{:}_{:03d}'.format(data_obj, seq_idx))
105 |
106 | total_valid_frames = max(0, len(obj_visible_frames) - max(self.keyframes))
107 |
108 | self.ids += total_valid_frames
109 | self.seq_acc_ids.append(self.ids)
110 |
111 | # downscale the input image to half
112 | self.fx_s = image_resize
113 | self.fy_s = image_resize
114 |
115 | print('There are a total of {:} valid frames'.format(self.ids))
116 |
117 | def __len__(self):
118 | return self.ids
119 |
120 | def __getitem__(self, index):
121 | # the index we want from search sorted is shifted for one
122 | seq_idx = max(np.searchsorted(self.seq_acc_ids, index+1) - 1, 0)
123 | frame_idx = index - self.seq_acc_ids[seq_idx]
124 |
125 | this_idx= self.obj_vis_idx[seq_idx][frame_idx]
126 | next_idx= self.obj_vis_idx[seq_idx][frame_idx + random.choice(self.keyframes)]
127 |
128 | color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx])
129 | color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx])
130 |
131 | if self.transforms:
132 | color0, color1 = self.transforms([color0, color1])
133 |
134 | depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx])
135 | depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx])
136 |
137 | cam_pose0 = self.cam_pose_seq[seq_idx][this_idx]
138 | cam_pose1 = self.cam_pose_seq[seq_idx][next_idx]
139 | obj_pose0 = self.obj_pose_seq[seq_idx][this_idx]
140 | obj_pose1 = self.obj_pose_seq[seq_idx][next_idx]
141 |
142 | # the relative allocentric transform of objects
143 | transform = functools.reduce(np.dot,
144 | [np.linalg.inv(cam_pose1), obj_pose1, np.linalg.inv(obj_pose0), cam_pose0]).astype(np.float32)
145 |
146 | # the validity of the object is up the object mask
147 | obj_index = 1 # object index is in default to be 1
148 | obj_mask0 = self.__load_binary_mask_tensor(self.object_mask_seq[seq_idx][this_idx], obj_index)
149 | obj_mask1 = self.__load_binary_mask_tensor(self.object_mask_seq[seq_idx][next_idx], obj_index)
150 |
151 | calib = np.asarray(self.calib[seq_idx], dtype=np.float32)
152 | calib[0] *= self.fx_s
153 | calib[1] *= self.fy_s
154 | calib[2] *= self.fx_s
155 | calib[3] *= self.fy_s
156 |
157 | obj_name = self.obj_names[seq_idx]
158 | # pair_name = '{:}/{:06d}to{:06d}'.format(obj_name, this_idx, next_idx)
159 | pair_name = {'seq': obj_name,
160 | 'seq_idx': seq_idx,
161 | 'frame0': this_idx,
162 | 'frame1': next_idx}
163 |
164 | return color0, color1, depth0, depth1, transform, calib, obj_mask0, obj_mask1, pair_name
165 |
166 | def get_original_size_batch(self, index):
167 | # the index we want from search sorted is shifted for one
168 | seq_idx = max(np.searchsorted(self.seq_acc_ids, index+1) - 1, 0)
169 | frame_idx = index - self.seq_acc_ids[seq_idx]
170 |
171 | this_idx= self.obj_vis_idx[seq_idx][frame_idx]
172 | next_idx= self.obj_vis_idx[seq_idx][frame_idx + random.choice(self.keyframes)]
173 |
174 | color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx], do_resize=False)
175 | color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx], do_resize=False)
176 |
177 | if self.transforms:
178 | color0, color1 = self.transforms([color0, color1])
179 |
180 | depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx], do_resize=False)
181 | depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx], do_resize=False)
182 |
183 | cam_pose0 = self.cam_pose_seq[seq_idx][this_idx]
184 | cam_pose1 = self.cam_pose_seq[seq_idx][next_idx]
185 | obj_pose0 = self.obj_pose_seq[seq_idx][this_idx]
186 | obj_pose1 = self.obj_pose_seq[seq_idx][next_idx]
187 |
188 | # the relative allocentric transform of objects
189 | transform = functools.reduce(np.dot,
190 | [np.linalg.inv(cam_pose1), obj_pose1, np.linalg.inv(obj_pose0), cam_pose0]).astype(np.float32)
191 |
192 | # the validity of the object is up the object mask
193 | obj_index = 1 # object index is in default to be 1
194 | obj_mask0 = self.__load_binary_mask_tensor(self.object_mask_seq[seq_idx][this_idx], obj_index, do_resize=False)
195 | obj_mask1 = self.__load_binary_mask_tensor(self.object_mask_seq[seq_idx][next_idx], obj_index, do_resize=False)
196 |
197 | calib = np.asarray(self.calib[seq_idx], dtype=np.float32)
198 |
199 | obj_name = self.obj_names[seq_idx]
200 | # pair_name = '{:}/{:06d}to{:06d}'.format(obj_name, this_idx, next_idx)
201 | pair_name = {'seq': obj_name,
202 | 'seq_idx': seq_idx,
203 | 'frame0': this_idx,
204 | 'frame1': next_idx}
205 |
206 | return color0, color1, depth0, depth1, transform, calib, obj_mask0, obj_mask1, pair_name
207 |
208 | def __load_rgb_tensor(self, path, do_resize=True):
209 | """ Load the rgb image
210 | """
211 | image = imread(path)[:, :, :3]
212 | image = image.astype(np.float32) / 255.0
213 | if do_resize:
214 | image = resize(image, None, fx=self.fx_s, fy=self.fy_s)
215 | return image
216 |
217 | def __load_depth_tensor(self, path, do_resize=True):
218 | """ Load the depth
219 | """
220 | depth = imread(path).astype(np.float32) / 1e3
221 | if do_resize:
222 | depth = resize(depth, None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST)
223 | depth = np.clip(depth, 1e-1, 1e2) # the valid region of the depth
224 | return depth[np.newaxis, :]
225 |
226 | def __load_binary_mask_tensor(self, path, seg_index, do_resize=True):
227 | """ Load a binary segmentation mask (numbers)
228 | If the object matches the specified index, return true;
229 | Otherwise, return false
230 | """
231 | obj_mask = imread(path)
232 | mask = (obj_mask == seg_index)
233 | if do_resize:
234 | mask = resize(mask.astype(np.float), None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST)
235 | return mask.astype(np.bool)[np.newaxis, :]
236 |
237 | def extract_info_pickle(info_pkl):
238 |
239 | with open(info_pkl, 'rb') as p:
240 | info = pickle.load(p)
241 |
242 | color_seq = [x.split('final/')[1] for x in info['color']]
243 | depth_seq = [x.split('final/')[1] for x in info['depth']]
244 | invalid_seq = [x.split('final/')[1] for x in info['invalid'] ]
245 | mask_seq = [x.split('final/')[1] for x in info['object_mask']]
246 |
247 | # in this rendering setting, there is only one object
248 | camera_poses_seq = info['pose']
249 | object_poses_seq = info['object_poses']['Model_1']
250 | object_visible_frames = info['object_visible_frames']['Model_1']
251 |
252 | calib_seq = info['calib']
253 |
254 | return color_seq, depth_seq, invalid_seq, mask_seq, \
255 | camera_poses_seq, object_poses_seq, object_visible_frames, calib_seq
256 |
257 |
258 | if __name__ == '__main__':
259 | from data.dataloader import load_data
260 | import torchvision.utils as torch_utils
261 |
262 | # loader = MovingObjects3D('', load_type='train', keyframes=[1])
263 | loader = load_data('MovingObjects3D', keyframes=[1], load_type='train')
264 | torch_loader = data.DataLoader(loader, batch_size=16, shuffle=False, num_workers=4)
265 |
266 | for batch in torch_loader:
267 | color0, color1, depth0, depth1, transform, K, mask0, mask1, names = batch
268 | B,C,H,W=color0.shape
269 |
270 | bcolor0_img = torch_utils.make_grid(color0, nrow=4)
271 | bcolor1_img = torch_utils.make_grid(color1, nrow=4)
272 | # bdepth0_img = torch_utils.make_grid(depth0, nrow=4)
273 | # bdepth1_img = torch_utils.make_grid(depth1, nrow=4)
274 | bmask0_img = torch_utils.make_grid(mask0.view(B,1,H,W)*255, nrow=4)
275 | bmask1_img = torch_utils.make_grid(mask1.view(B,1,H,W)*255, nrow=4)
276 |
277 | import matplotlib.pyplot as plt
278 | plt.figure()
279 | plt.imshow(bcolor0_img.numpy().transpose((1,2,0)))
280 | plt.figure()
281 | plt.imshow(bcolor1_img.numpy().transpose((1,2,0)))
282 | # plt.figure()
283 | # plt.imshow(bdepth0_img.numpy().transpose((1,2,0)))
284 | # plt.figure()
285 | # plt.imshow(bdepth1_img.numpy().transpose((1,2,0)))
286 | plt.figure()
287 | plt.imshow(bmask0_img.numpy().transpose((1,2,0)))
288 | plt.figure()
289 | plt.imshow(bmask1_img.numpy().transpose((1,2,0)))
290 | plt.show()
291 |
292 |
--------------------------------------------------------------------------------
/code/data/ScanNet.py:
--------------------------------------------------------------------------------
1 | """ The dataloader for ScanNet dataset
2 | # SPDX-FileCopyrightText: 2021 Binbin Xu
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | import os, random
7 | import os.path as osp
8 |
9 | import numpy as np
10 | from torch.utils.data import Dataset
11 | from imageio import imread
12 | from tqdm import tqdm
13 | import pickle
14 | from cv2 import resize, INTER_NEAREST
15 |
16 |
17 | class ScanNet(Dataset):
18 |
19 | def __init__(self, root=None, category='train',
20 | keyframes=[1], data_transform=None, select_traj=None,
21 | image_resize=0.25, truncate_depth=True,
22 | subset_train=0.95, subset_val=0.05):
23 | assert root is not None
24 | super(ScanNet, self).__init__()
25 |
26 | self.image_seq = [] # list (seq) of list (frame) of string (rgb image path)
27 | self.timestamp = [] # empty
28 | self.depth_seq = [] # list (seq) of list (frame) of string (depth image path)
29 | # self.invalid_seq = [] # empty
30 | self.cam_pose_seq = [] # list (seq) of list (frame) of 4 X 4 ndarray
31 | self.calib = [] # list (seq) of list (intrinsics: fx, fy, cx, cy)
32 | self.seq_names = [] # list (seq) of string (seq name)
33 |
34 | self.subset_train = subset_train # only use subset for training
35 | self.subset_val = subset_val # only use subset for validation
36 | assert self.subset_train + self.subset_val <= 1
37 | self.ids = 0
38 | self.seq_acc_ids = [0]
39 | self.keyframes = keyframes
40 | self.cam = {
41 | 'distCoeffs': None,
42 | 'fx': 577.871,
43 | 'fy': 577.871,
44 | 'ux': 319.5,
45 | 'uy': 239.5,
46 | 'size': (640, 480),
47 | }
48 | self.depth_conversion = 1.0/5e3
49 |
50 | self.transforms = data_transform
51 |
52 | if category == 'test':
53 | self.__load_test(osp.join(root, 'val'), select_traj)
54 | else: # train and validation
55 | self.__load_train_val(osp.join(root, 'train'), category)
56 |
57 | # downscale the input image to a quarter
58 | self.fx_s = image_resize
59 | self.fy_s = image_resize
60 | self.truncate_depth = truncate_depth
61 |
62 | print('ScanNet dataloader for {:} using keyframe {:}: \
63 | {:} valid frames'.format(category, keyframes, self.ids))
64 |
65 | def __read_scans(self, data_dir):
66 | # glob for sequences
67 | sequences = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]
68 | print('Found {} sequences in directory: {}'.format(len(sequences), data_dir))
69 |
70 | scans = []
71 | with tqdm(total=len(sequences)) as t:
72 | for seq in sequences:
73 | seq_dir = os.path.join(data_dir, seq)
74 | # synchronized trajectory file
75 | sync_traj_file = osp.join(seq_dir, 'sync_trajectory.pkl')
76 |
77 | if not osp.isfile(sync_traj_file):
78 | print("The synchronized trajectory file {:} has not been generated.".format(seq))
79 | print("Generate it now...")
80 |
81 | # get sequence length from the _info.txt file
82 | nframes = int(open(os.path.join(seq_dir, '_info.txt')).readlines()[-1].split()[-1])
83 |
84 | views = list()
85 | for i in range(nframes):
86 | frame = os.path.join(seq_dir, 'frame-{:06d}'.format(i))
87 | pose_file = os.path.join(seq_dir, frame + '.pose.txt')
88 | pose = np.loadtxt(open(pose_file, 'r'))
89 |
90 | # do not use any frame with inf pose
91 | if np.isinf(np.sum(pose)):
92 | print(frame)
93 | continue
94 | views.append({'img': frame + '.color.jpg',
95 | 'dpt': frame + '.merged_depth.png',
96 | 'frame_id': i,
97 | 'pose': pose})
98 |
99 | # export trajectory file
100 | with open(sync_traj_file, 'wb') as output:
101 | pickle.dump(views, output)
102 |
103 | else:
104 | with open(sync_traj_file, 'rb') as p:
105 | views = pickle.load(p)
106 |
107 | scans.append(views)
108 | t.set_postfix({'seq': seq})
109 | t.update()
110 | return scans
111 |
112 | def __load_train_val(self, root, category):
113 | scans = self.__read_scans(root)
114 |
115 | for scene in scans:
116 | total_num = len(scene)
117 | # the ratio to split the train & validation set
118 | if category == 'train':
119 | start_idx, end_idx = 0, int(self.subset_train * total_num)
120 | else:
121 | start_idx, end_idx = int((1-self.subset_val) * total_num), total_num
122 |
123 | images = [scene[idx]['img'] for idx in range(start_idx, end_idx)]
124 | depths = [scene[idx]['dpt'] for idx in range(start_idx, end_idx)]
125 | extrin = [scene[idx]['pose'] for idx in range(start_idx, end_idx)]
126 | # fake timestamp with frame id
127 | frame_id = [scene[idx]['frame_id'] for idx in range(start_idx, end_idx)]
128 | seq_name = osp.basename(osp.dirname(images[0]))
129 | calib = [self.cam['fx'], self.cam['fy'], self.cam['ux'], self.cam['uy']]
130 |
131 | self.calib.append(calib)
132 | self.image_seq.append(images)
133 | self.depth_seq.append(depths)
134 | self.timestamp.append(frame_id)
135 | self.cam_pose_seq.append(extrin)
136 | self.seq_names.append(seq_name)
137 | self.ids += max(0, len(images) - max(self.keyframes))
138 | self.seq_acc_ids.append(self.ids)
139 |
140 | def __load_test(self, root, select_traj=None):
141 | """ Note:
142 | The test trajectory is loaded slightly different from the train/validation trajectory.
143 | We only select keyframes from the entire trajectory, rather than use every individual frame.
144 | For a given trajectory of length N, using key-frame 2, the train/validation set will use
145 | [[1, 3], [2, 4], [3, 5],...[N-1, N]],
146 | while test set will use pair
147 | [[1, 3], [3, 5], [5, 7],...[N-1, N]]
148 | This difference result in a change in the trajectory length when using different keyframes.
149 |
150 | The benefit of sampling keyframes of the test set is that the output is a more reasonable trajectory;
151 | And in training/validation, we fully leverage every pair of image.
152 | """
153 |
154 | assert (len(self.keyframes) == 1)
155 | scans = self.__read_scans(root)
156 | kf = self.keyframes[0]
157 | self.keyframes = [1]
158 |
159 | for scene in scans:
160 | seq_name = osp.basename(osp.dirname(scene[0]['img']))
161 | if select_traj is not None:
162 | if seq_name != select_traj: continue
163 |
164 | calib = [self.cam['fx'], self.cam['fy'], self.cam['ux'], self.cam['uy']]
165 | self.calib.append(calib)
166 |
167 | total_num = len(scene)
168 | images = [scene[idx]['img'] for idx in range(0, total_num, kf)]
169 | depths = [scene[idx]['dpt'] for idx in range(0, total_num, kf)]
170 | extrin = [scene[idx]['pose'] for idx in range(0, total_num, kf)]
171 |
172 | # fake timestamp with frame id
173 | timestamp = [scene[idx]['frame_id'] for idx in range(0, total_num, kf)]
174 | self.image_seq.append(images)
175 | self.timestamp.append(timestamp)
176 | self.depth_seq.append(depths)
177 | self.cam_pose_seq.append(extrin)
178 | self.seq_names.append(seq_name)
179 | self.ids += max(0, len(images) - 1)
180 | self.seq_acc_ids.append(self.ids)
181 |
182 | if len(self.image_seq) == 0:
183 | raise Exception("The specified trajectory is not in the test set nor supported.")
184 |
185 | def __getitem__(self, index):
186 | seq_idx = max(np.searchsorted(self.seq_acc_ids, index + 1) - 1, 0)
187 | frame_idx = index - self.seq_acc_ids[seq_idx]
188 |
189 | this_idx = frame_idx
190 | next_idx = frame_idx + random.choice(self.keyframes)
191 |
192 | # if the next random keyframe is too far
193 | if self.timestamp[seq_idx][next_idx] - self.timestamp[seq_idx][this_idx] > max(self.keyframes):
194 | search_keyframes = self.keyframes[::-1] + [-kf for kf in self.keyframes]
195 | inf_pose_issue = True
196 | print("search:", self.timestamp[seq_idx][this_idx])
197 | for keyframe in search_keyframes:
198 | next_idx = frame_idx + keyframe
199 | if abs(self.timestamp[seq_idx][next_idx] - self.timestamp[seq_idx][this_idx]) <= max(self.keyframes):
200 | inf_pose_issue = False
201 | break
202 | if inf_pose_issue:
203 | next_idx = frame_idx + 1
204 | print("#invalid frame:", self.image_seq[seq_idx][this_idx])
205 | # raise ValueError
206 | color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx])
207 | color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx])
208 | depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx])
209 | depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx])
210 |
211 | if self.transforms:
212 | color0, color1 = self.transforms([color0, color1])
213 |
214 | # normalize the coordinate
215 | calib = np.asarray(self.calib[seq_idx], dtype=np.float32)
216 | calib[0] *= self.fx_s
217 | calib[1] *= self.fy_s
218 | calib[2] *= self.fx_s
219 | calib[3] *= self.fy_s
220 |
221 | cam_pose0 = self.cam_pose_seq[seq_idx][this_idx]
222 | cam_pose1 = self.cam_pose_seq[seq_idx][next_idx]
223 | transform = np.dot(np.linalg.inv(cam_pose1), cam_pose0).astype(np.float32)
224 |
225 | name = {'seq': self.seq_names[seq_idx],
226 | 'frame0': this_idx,
227 | 'frame1': next_idx}
228 |
229 | # camera_info = dict()
230 | camera_info = {"height": color0.shape[0],
231 | "width": color0.shape[1],
232 | "fx": calib[0],
233 | "fy": calib[1],
234 | "ux": calib[2],
235 | "uy": calib[3]}
236 |
237 | return color0, color1, depth0, depth1, transform, calib, name, camera_info
238 |
239 | def __len__(self):
240 | return self.ids
241 |
242 | def __load_rgb_tensor(self, path):
243 | image = imread(path)[:, :, :3]
244 | image = image.astype(np.float32) / 255.0
245 | image = resize(image, None, fx=self.fx_s, fy=self.fy_s)
246 | return image
247 |
248 | def __load_depth_tensor(self, path):
249 | """ Load the depth:
250 | The depth images are scaled by a factor of 5000, i.e., a pixel
251 | value of 5000 in the depth image corresponds to a distance of
252 | 1 meter from the camera, 10000 to 2 meter distance, etc.
253 | A pixel value of 0 means missing value/no data.
254 | """
255 | depth = imread(path).astype(np.float32) * self.depth_conversion
256 | depth = resize(depth, None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST)
257 | if self.truncate_depth:
258 | depth = np.clip(depth, a_min=0.5, a_max=5.0) # the accurate range of kinect depth
259 | return depth[np.newaxis, :]
--------------------------------------------------------------------------------
/code/data/SimpleLoader.py:
--------------------------------------------------------------------------------
1 | """
2 | This Simple loader partially refers to
3 | https://github.com/NVlabs/learningrigidity/blob/master/SimpleLoader.py
4 | # SPDX-FileCopyrightText: 2021 Binbin Xu
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | @author: Zhaoyang Lv
8 | @date: May, 2019
9 | """
10 |
11 | import sys, os, random
12 | import torch.utils.data as data
13 | import os.path as osp
14 |
15 | import numpy as np
16 |
17 | from imageio import imread
18 |
19 | class SimpleLoader(data.Dataset):
20 |
21 | def __init__(self, color_dir, depth_dir, K):
22 | """
23 | :param the directory of color images
24 | :param the directory of depth images
25 | :param the intrinsic parameter [fx, fy, cx, cy]
26 | """
27 |
28 | print('This simple loader is designed for TUM. \n\
29 | The depth scale may be different in your depth format. ')
30 |
31 | color_files = sorted(os.listdir(color_dir))
32 | depth_files = sorted(os.listdir(depth_dir))
33 |
34 | # please ensure the two folders use the same number of synchronized files
35 | assert(len(color_files) == len(depth_files))
36 |
37 | self.color_pairs = []
38 | self.depth_pairs = []
39 | self.ids = len(color_files) - 1
40 | for idx in range(self.ids):
41 | self.color_pairs.append([
42 | osp.join(color_dir, color_files[idx]),
43 | osp.join(color_dir, color_files[idx+1])
44 | ] )
45 | self.depth_pairs.append([
46 | osp.join(depth_dir, depth_files[idx]),
47 | osp.join(depth_dir, depth_files[idx+1])
48 | ] )
49 |
50 | self.K = K
51 |
52 | def __getitem__(self, index):
53 |
54 | image0_path, image1_path = self.color_pairs[index]
55 | depth0_path, depth1_path = self.depth_pairs[index]
56 |
57 | image0 = self.__load_rgb_tensor(image0_path)
58 | image1 = self.__load_rgb_tensor(image1_path)
59 |
60 | depth0 = self.__load_depth_tensor(depth0_path)
61 | depth1 = self.__load_depth_tensor(depth1_path)
62 |
63 | calib = np.asarray(self.K, dtype=np.float32)
64 |
65 | return image0, image1, depth0, depth1, calib
66 |
67 | def __len__(self):
68 | return self.ids
69 |
70 | def __load_rgb_tensor(self, path):
71 | image = imread(path)
72 | image = image.astype(np.float32) / 255.0
73 | image = np.transpose(image, (2,0,1))
74 | return np.asarray(image.astype(np.float32))
75 |
76 | def __load_depth_tensor(self, path):
77 | assert(path.endswith('.png'))
78 | depth = imread(path).astype(np.float32) / 5e3
79 | depth = np.clip(depth, a_min=0.5, a_max=5.0)
80 |
81 | return np.asarray(depth[np.newaxis, :])
--------------------------------------------------------------------------------
/code/data/VaryLighting.py:
--------------------------------------------------------------------------------
1 | """ The dataloader for custom dataset
2 | # SPDX-FileCopyrightText: 2021 Binbin Xu
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | """
5 |
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 | from __future__ import unicode_literals
10 |
11 | import sys, os, random
12 |
13 | import os.path as osp
14 | import torch.utils.data as data
15 |
16 | from imageio import imread
17 | from cv2 import resize, INTER_NEAREST
18 |
19 | import os.path
20 | import glob
21 |
22 | # import third party
23 | import numpy as np
24 | import random
25 |
26 |
27 | def get_depth_from_corresponding_rgb(rgb_file_abs):
28 | rgb_dir, rgb_file_rel = os.path.split(rgb_file_abs)
29 | depth_dir = rgb_dir.replace("rgb", "depth")
30 | depth_file = os.path.join(depth_dir, rgb_file_rel)
31 | return depth_file
32 |
33 |
34 | class VaryLighting(data.Dataset):
35 |
36 | """
37 | Dataset class for our varying lighting dataset
38 | """
39 | base = 'data'
40 | IMAGE_HEIGHT = 480
41 | IMAGE_WIDTH = 640
42 | DEPTH_SCALE = 1.0/1000.0
43 | K = [525.0, 525.0, 319.5, 239.5]
44 |
45 | def __init__(self, root='', category='keyframe',
46 | keyframes=[1,], data_transform=None, select_traj=None,
47 | image_resize=0.25, truncate_depth=True, pair='incremental',
48 | ):
49 |
50 | super(VaryLighting, self).__init__()
51 | assert pair in ['incremental', 'keyframe']
52 | self.pair = pair
53 |
54 | self.image_seq = [] # list (seq) of list (frame) of string (rgb image path)
55 | self.timestamp = [] # empty
56 | self.depth_seq = [] # list (seq) of list (frame) of string (depth image path)
57 | self.invalid_seq = [] # empty
58 | self.cam_pose_seq = [] # list (seq) of list (frame) of 4 X 4 ndarray
59 | self.calib = [] # list (seq) of list (intrinsics: fx, fy, cx, cy)
60 | self.seq_names = [] # list (seq) of string (seq name)
61 |
62 | self.ids = 0
63 | self.seq_acc_ids = [0]
64 | self.keyframes = keyframes
65 |
66 | self.transforms = data_transform
67 |
68 | if category == 'test':
69 | # self.set_test_mode()
70 | self.__load_test(root, select_traj)
71 | elif category in ['train', 'validation']: # train and validation
72 | raise NotImplementedError()
73 | elif category == 'kf':
74 | self.__load_kf(root, select_traj)
75 | else:
76 | raise NotImplementedError
77 |
78 | # downscale the input image to a quarter
79 | self.fx_s = image_resize
80 | self.fy_s = image_resize
81 | self.truncate_depth = truncate_depth
82 |
83 | print('Vary Lighting dataloader for {:} using keyframe {:}: \
84 | {:} valid frames'.format(category, keyframes, self.ids))
85 |
86 |
87 | def __load_test(self, root, select_traj=None):
88 | """ Note:
89 | The test trajectory is loaded slightly different from the train/validation trajectory.
90 | We only select keyframes from the entire trajectory, rather than use every individual frame.
91 | For a given trajectory of length N, using key-frame 2, the train/validation set will use
92 | [[1, 3], [2, 4], [3, 5],...[N-1, N]],
93 | while test set will use pair
94 | [[1, 3], [3, 5], [5, 7],...[N-1, N]]
95 | This difference result in a change in the trajectory length when using different keyframes.
96 |
97 | The benefit of sampling keyframes of the test set is that the output is a more reasonable trajectory;
98 | And in training/validation, we fully leverage every pair of image.
99 | """
100 |
101 | assert(len(self.keyframes) == 1)
102 | kf = self.keyframes[0]
103 | self.keyframes = [1]
104 | track_scene = osp.join(root, "*/")
105 | scene_lists = glob.glob(track_scene, recursive=True)
106 |
107 | self._num_scenes = len(scene_lists)
108 | if self._num_scenes is None:
109 | raise ValueError("No sub-folder data in the training or validation dataset")
110 | for scene in scene_lists:
111 | scene_name = osp.basename(osp.dirname(scene))
112 | if select_traj is not None:
113 | if scene_name != select_traj: continue
114 |
115 | rgb_images_regex = os.path.join(scene, "rgb/*.png")
116 | all_rgb_images_in_scene = sorted(glob.glob(rgb_images_regex))
117 | total_num = len(all_rgb_images_in_scene)
118 |
119 | self.calib.append(self.K)
120 |
121 |
122 | images = [all_rgb_images_in_scene[idx] for idx in range(0, total_num, kf)]
123 | # fake timestamps
124 | timestamp = [os.path.splitext(os.path.basename(image))[0] for image in images]
125 | depths = [get_depth_from_corresponding_rgb(rgb_file) for rgb_file in images]
126 | extrin = [None] * len(images) # [tq2mat(frames[idx][0]) for idx in range(0, total_num, kf)]
127 | self.image_seq.append(images)
128 | self.timestamp.append(timestamp)
129 | self.depth_seq.append(depths)
130 | self.cam_pose_seq.append(extrin)
131 | self.seq_names.append(scene)
132 | self.ids += max(0, len(images)-1)
133 | self.seq_acc_ids.append(self.ids)
134 |
135 | def __load_rgb_tensor(self, path):
136 | """ Load the rgb image
137 | """
138 | image = imread(path)[:, :, :3]
139 | image = image.astype(np.float32) / 255.0
140 | image = resize(image, None, fx=self.fx_s, fy=self.fy_s)
141 | return image
142 |
143 | def __load_depth_tensor(self, path):
144 | """ Load the depth:
145 | The depth images are scaled by a factor of 5000, i.e., a pixel
146 | value of 5000 in the depth image corresponds to a distance of
147 | 1 meter from the camera, 10000 to 2 meter distance, etc.
148 | A pixel value of 0 means missing value/no data.
149 | """
150 | depth = imread(path).astype(np.float32) / 5e3
151 | depth = resize(depth, None, fx=self.fx_s, fy=self.fy_s, interpolation=INTER_NEAREST)
152 | if self.truncate_depth:
153 | depth = np.clip(depth, a_min=0.5, a_max=5.0) # the accurate range of kinect depth
154 | return depth[np.newaxis, :]
155 |
156 | def __getitem__(self, index):
157 | # pair in the way like [[1, 3], [3, 5], [5, 7],...[N-1, N]]
158 | seq_idx = max(np.searchsorted(self.seq_acc_ids, index + 1) - 1, 0)
159 | frame_idx = index - self.seq_acc_ids[seq_idx]
160 |
161 | this_idx = frame_idx
162 | next_idx = frame_idx + random.choice(self.keyframes)
163 |
164 | color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx])
165 | color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx])
166 |
167 | depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx])
168 | depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx])
169 |
170 | if self.transforms:
171 | color0, color1 = self.transforms([color0, color1])
172 |
173 | # normalize the coordinate
174 | calib = np.asarray(self.calib[seq_idx], dtype=np.float32)
175 | calib[0] *= self.fx_s
176 | calib[1] *= self.fy_s
177 | calib[2] *= self.fx_s
178 | calib[3] *= self.fy_s
179 |
180 | # cam_pose0 = self.cam_pose_seq[seq_idx][this_idx]
181 | # cam_pose1 = self.cam_pose_seq[seq_idx][next_idx]
182 | # transform = np.dot(np.linalg.inv(cam_pose1), cam_pose0).astype(np.float32)
183 | transform = None
184 |
185 | name = {'seq': self.seq_names[seq_idx],
186 | 'frame0': this_idx,
187 | 'frame1': next_idx}
188 |
189 | # camera_info = dict()
190 | camera_info = {"height": color0.shape[0],
191 | "width": color0.shape[1],
192 | "fx": calib[0],
193 | "fy": calib[1],
194 | "ux": calib[2],
195 | "uy": calib[3]}
196 | return color0, color1, depth0, depth1, transform, calib, name, camera_info
197 |
198 |
199 | def get_keypair(self, index, kf_idx=0):
200 | # pair in the way like [[1, 3], [1, 5], [1, 7],...[1, N]]
201 | seq_idx = max(np.searchsorted(self.seq_acc_ids, index + 1) - 1, 0)
202 | frame_idx = index - self.seq_acc_ids[seq_idx]
203 |
204 | this_idx = kf_idx
205 | next_idx = frame_idx
206 |
207 | color0 = self.__load_rgb_tensor(self.image_seq[seq_idx][this_idx])
208 | color1 = self.__load_rgb_tensor(self.image_seq[seq_idx][next_idx])
209 |
210 | depth0 = self.__load_depth_tensor(self.depth_seq[seq_idx][this_idx])
211 | depth1 = self.__load_depth_tensor(self.depth_seq[seq_idx][next_idx])
212 |
213 | if self.transforms:
214 | color0, color1 = self.transforms([color0, color1])
215 |
216 | # normalize the coordinate
217 | calib = np.asarray(self.calib[seq_idx], dtype=np.float32)
218 | calib[0] *= self.fx_s
219 | calib[1] *= self.fy_s
220 | calib[2] *= self.fx_s
221 | calib[3] *= self.fy_s
222 |
223 | # cam_pose0 = self.cam_pose_seq[seq_idx][this_idx]
224 | # cam_pose1 = self.cam_pose_seq[seq_idx][next_idx]
225 | # transform = np.dot(np.linalg.inv(cam_pose1), cam_pose0).astype(np.float32)
226 | transform = None
227 |
228 | name = {'seq': self.seq_names[seq_idx],
229 | 'frame0': this_idx,
230 | 'frame1': next_idx}
231 |
232 | # camera_info = dict()
233 | camera_info = {"height": color0.shape[0],
234 | "width": color0.shape[1],
235 | "fx": calib[0],
236 | "fy": calib[1],
237 | "ux": calib[2],
238 | "uy": calib[3]}
239 | return color0, color1, depth0, depth1, transform, calib, name, camera_info
240 |
241 | def __len__(self):
242 | return self.ids
--------------------------------------------------------------------------------
/code/data/data_examples/TUM/color/1305031790.645155.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.645155.png
--------------------------------------------------------------------------------
/code/data/data_examples/TUM/color/1305031790.713097.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.713097.png
--------------------------------------------------------------------------------
/code/data/data_examples/TUM/color/1305031790.781258.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.781258.png
--------------------------------------------------------------------------------
/code/data/data_examples/TUM/color/1305031790.845151.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.845151.png
--------------------------------------------------------------------------------
/code/data/data_examples/TUM/color/1305031790.913129.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/color/1305031790.913129.png
--------------------------------------------------------------------------------
/code/data/data_examples/TUM/depth/1305031790.640468.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.640468.png
--------------------------------------------------------------------------------
/code/data/data_examples/TUM/depth/1305031790.709421.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.709421.png
--------------------------------------------------------------------------------
/code/data/data_examples/TUM/depth/1305031790.773548.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.773548.png
--------------------------------------------------------------------------------
/code/data/data_examples/TUM/depth/1305031790.839363.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.839363.png
--------------------------------------------------------------------------------
/code/data/data_examples/TUM/depth/1305031790.909436.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/data/data_examples/TUM/depth/1305031790.909436.png
--------------------------------------------------------------------------------
/code/data/dataloader.py:
--------------------------------------------------------------------------------
1 | """ The dataloaders for training and evaluation
2 | # SPDX-FileCopyrightText: 2021 Binbin Xu
3 | # SPDX-License-Identifier: BSD-3-Clause
4 |
5 | @author: Zhaoyang Lv
6 | @date: March 2019
7 | """
8 |
9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | from __future__ import unicode_literals
13 |
14 | import torchvision.transforms as transforms
15 | import numpy as np
16 | import os
17 | import socket
18 | import yaml
19 | try:
20 | # use faster C loader if available
21 | from yaml import CLoader
22 | except ImportError:
23 | from yaml import Loader as CLoader
24 |
25 |
26 | def get_datasets_path(which_dataset):
27 | # utils_path = os.getcwd().split('/')
28 | # print(utils_path)
29 | # source_folder = '/'.join(utils_path[:-2])
30 | # print(source_folder)
31 | # return source_folder
32 | curr_path = os.path.realpath(__file__)
33 | env_file_path = os.path.realpath(os.path.join(curr_path, '../../../setup/datasets.yaml'))
34 | hostname = str(socket.gethostname())
35 | env_config = yaml.load(open(env_file_path), Loader=CLoader)
36 | return env_config[which_dataset][hostname]['dataset_root']
37 |
38 | TUM_DATASET_DIR = get_datasets_path('TUM_RGBD')
39 | MOVING_OBJECTS_3D = get_datasets_path('MOVING_OBJECTS_3D')
40 | ScanNet_DATASET_DIR = get_datasets_path('SCANNET')
41 | VL_DATASET_DIR = get_datasets_path('VaryLighting')
42 |
43 | def load_data(dataset_name, keyframes = None, load_type = 'train',
44 | select_trajectory = '', load_numpy = False, image_resize=0.25, truncate_depth=True,
45 | options=None, pair='incremental'):
46 | """ Use two frame camera pose data loader
47 | """
48 | if select_trajectory == '':
49 | select_trajectory = None
50 |
51 | if not load_numpy:
52 | if load_type == 'train':
53 | data_transform = image_transforms(['color_augment', 'numpy2torch'])
54 | else:
55 | data_transform = image_transforms(['numpy2torch'])
56 | else:
57 | data_transform = image_transforms([])
58 |
59 | if dataset_name == 'TUM_RGBD':
60 | from data.TUM_RGBD import TUM
61 | np_loader = TUM(TUM_DATASET_DIR, load_type, keyframes,
62 | data_transform, select_trajectory,
63 | image_resize=image_resize,
64 | truncate_depth=truncate_depth,
65 | add_vl_dataset=options.add_vl_dataset,
66 | )
67 | elif dataset_name == 'ScanNet':
68 | from data.ScanNet import ScanNet
69 | np_loader = ScanNet(ScanNet_DATASET_DIR, load_type, keyframes,
70 | data_transform, select_trajectory,
71 | image_resize=image_resize,
72 | truncate_depth=truncate_depth,
73 | subset_train=options.scannet_subset_train,
74 | subset_val=options.scannet_subset_val,
75 | )
76 | elif dataset_name == 'MovingObjects3D':
77 | from data.MovingObj3D import MovingObjects3D
78 | np_loader = MovingObjects3D(MOVING_OBJECTS_3D, load_type,
79 | keyframes, data_transform,
80 | category=select_trajectory,
81 | image_resize=image_resize,
82 | )
83 | # elif dataset_name == 'BundleFusion':
84 | # from data.BundleFusion import BundleFusion
85 | # np_loader = BundleFusion(load_type, keyframes, data_transform)
86 | # elif dataset_name == 'Refresh':
87 | # from data.REFRESH import REFRESH
88 | # np_loader = REFRESH(load_type, keyframes)
89 | elif dataset_name == 'VaryLighting':
90 | from data.VaryLighting import VaryLighting
91 | np_loader = VaryLighting(VL_DATASET_DIR, load_type, keyframes,
92 | data_transform, select_trajectory,
93 | pair=pair,
94 | image_resize=image_resize,
95 | truncate_depth=truncate_depth,
96 | )
97 | else:
98 | raise NotImplementedError()
99 |
100 | return np_loader
101 |
102 | def image_transforms(options):
103 |
104 | transform_list = []
105 |
106 | if 'color_augment' in options:
107 | augment_parameters = [0.9, 1.1, 0.9, 1.1, 0.9, 1.1]
108 | transform_list.append(AugmentImages(augment_parameters))
109 |
110 | if 'numpy2torch' in options:
111 | transform_list.append(ToTensor())
112 |
113 | # if 'color_normalize' in options: # we do it on the fly
114 | # transform_list.append(ColorNormalize())
115 |
116 | return transforms.Compose(transform_list)
117 |
118 | class ColorNormalize(object):
119 |
120 | def __init__(self):
121 | rgb_mean = (0.4914, 0.4822, 0.4465)
122 | rgb_std = (0.2023, 0.1994, 0.2010)
123 | self.transform = transforms.Normalize(mean=rgb_mean, std=rgb_std)
124 |
125 | def __call__(self, sample):
126 | return [self.transform(x) for x in sample]
127 |
128 | class ToTensor(object):
129 | def __init__(self):
130 | self.transform = transforms.ToTensor()
131 |
132 | def __call__(self, sample):
133 | return [self.transform(x) for x in sample]
134 |
135 | class AugmentImages(object):
136 | def __init__(self, augment_parameters):
137 | self.gamma_low = augment_parameters[0] # 0.9
138 | self.gamma_high = augment_parameters[1] # 1.1
139 | self.brightness_low = augment_parameters[2] # 0.9
140 | self.brightness_high = augment_parameters[3] # 1,1
141 | self.color_low = augment_parameters[4] # 0.9
142 | self.color_high = augment_parameters[5] # 1.1
143 |
144 | self.thresh = 0.5
145 |
146 | def __call__(self, sample):
147 | p = np.random.uniform(0, 1, 1)
148 | if p > self.thresh:
149 | random_gamma = np.random.uniform(self.gamma_low, self.gamma_high)
150 | random_brightness = np.random.uniform(self.brightness_low, self.brightness_high)
151 | random_colors = np.random.uniform(self.color_low, self.color_high, 3)
152 | for x in sample:
153 | x = x ** random_gamma # randomly shift gamma
154 | x = x * random_brightness # randomly shift brightness
155 | for i in range(3): # randomly shift color
156 | x[:, :, i] *= random_colors[i]
157 | x[:, :, i] *= random_colors[i]
158 | x = np.clip(x, a_min=0, a_max=1) # saturate
159 | return sample
160 | else:
161 | return sample
162 |
--------------------------------------------------------------------------------
/code/evaluate.py:
--------------------------------------------------------------------------------
1 | """
2 | Evaluation scripts to evaluate the tracking accuracy of the proposed method
3 |
4 | # SPDX-FileCopyrightText: 2021 Binbin Xu
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | @author: Zhaoyang Lv
8 | @date: March 2019
9 | """
10 |
11 | import os, sys, argparse, pickle
12 | import os.path as osp
13 | import numpy as np
14 | import pandas as pd
15 | from tools.rgbd_odometry import RGBDOdometry
16 | from tools.ICP import ICP_Odometry
17 |
18 | import torch
19 | import torch.utils.data as data
20 | import torchvision.utils as torch_utils
21 | import torch.nn as nn
22 |
23 | import models.LeastSquareTracking as ICtracking
24 | import models.criterions as criterions
25 | import train_utils
26 | import config
27 | from Logger import check_directory
28 |
29 | from data.dataloader import load_data
30 | from timers import Timers
31 | from tqdm import tqdm
32 |
33 |
34 | def eval_trajectories(dataset):
35 | if dataset == 'TUM_RGBD':
36 | return {
37 | 'TUM_RGBD': ['rgbd_dataset_freiburg1_360',
38 | 'rgbd_dataset_freiburg1_desk',
39 | 'rgbd_dataset_freiburg2_desk',
40 | 'rgbd_dataset_freiburg2_pioneer_360']
41 | }[dataset]
42 | elif dataset == 'MovingObjects3D':
43 | return {
44 | 'MovingObjects3D': ['boat',
45 | 'motorbike',
46 | ]
47 | }[dataset]
48 | elif dataset == 'ScanNet':
49 | return {
50 | 'ScanNet': ['scene0565_00',
51 | 'scene0011_00',
52 | ]
53 | }[dataset]
54 | else:
55 | raise NotImplementedError()
56 |
57 |
58 | def nostructure_trajectory(dataset):
59 | if dataset == 'TUM_RGBD':
60 | return {
61 | 'TUM_RGBD': ['rgbd_dataset_freiburg3_nostructure_notexture_far',
62 | 'rgbd_dataset_freiburg3_nostructure_notexture_near_withloop',
63 | 'rgbd_dataset_freiburg3_nostructure_texture_far',
64 | 'rgbd_dataset_freiburg3_nostructure_texture_near_withloop']
65 | }[dataset]
66 | else:
67 | raise NotImplementedError()
68 |
69 |
70 | def notexture_trajectory(dataset):
71 | if dataset == 'TUM_RGBD':
72 | return {
73 | 'TUM_RGBD': ['rgbd_dataset_freiburg3_nostructure_notexture_far',
74 | 'rgbd_dataset_freiburg3_nostructure_notexture_near_withloop',
75 | 'rgbd_dataset_freiburg3_structure_notexture_near',
76 | ]
77 | }[dataset]
78 | else:
79 | raise NotImplementedError()
80 |
81 |
82 | def structure_texture_trajectory(dataset):
83 | if dataset == 'TUM_RGBD':
84 | return {
85 | 'TUM_RGBD': ['rgbd_dataset_freiburg3_structure_texture_far',
86 | 'rgbd_dataset_freiburg3_structure_texture_near',]
87 | }[dataset]
88 | else:
89 | raise NotImplementedError()
90 |
91 |
92 | def create_eval_loaders(options, eval_type, keyframes,
93 | total_batch_size = 8,
94 | trajectory = ''):
95 | """ create the evaluation loader at different keyframes set-up
96 | """
97 | eval_loaders = {}
98 |
99 | if trajectory == '':
100 | trajectories = eval_trajectories(options.dataset)
101 | elif trajectory == 'nostructure':
102 | trajectories = nostructure_trajectory(options.dataset)
103 | elif trajectory == 'notexture':
104 | trajectories = notexture_trajectory(options.dataset)
105 | elif trajectory == 'structure_texture':
106 | trajectories = structure_texture_trajectory(options.dataset)
107 | else:
108 | trajectories = [trajectory]
109 |
110 | for trajectory in trajectories:
111 | for kf in keyframes:
112 | if options.image_resize is not None:
113 | np_loader = load_data(options.dataset, [kf], eval_type, trajectory,
114 | image_resize=options.image_resize, options=options)
115 | else:
116 | np_loader = load_data(options.dataset, [kf], eval_type, trajectory, options=options)
117 | eval_loaders['{:}_keyframe_{:}'.format(trajectory, kf)] = data.DataLoader(np_loader,
118 | batch_size = int(total_batch_size),
119 | shuffle = False, num_workers = options.cpu_workers)
120 |
121 | return eval_loaders
122 |
123 | def evaluate_trust_region(dataloader, net, objectives, eval_name='',
124 | known_mask = False, timers = None, logger=None, epoch=0, obj_only=False, tracker='learning_based'):
125 | """ evaluate the trust-region method given the two-frame pose estimation
126 | :param the pytorch dataloader
127 | :param the network
128 | :param the evaluation objective names, e.g. RPE, EPE3D
129 | :param True if ground mask if known
130 | :param (optional) timing each step
131 | """
132 |
133 | progress = tqdm(dataloader, ncols=100,
134 | desc = 'evaluate deeper inverse compositional algorithm {:}'.format(eval_name),
135 | total= len(dataloader))
136 |
137 | if tracker == 'learning_based':
138 | net.eval()
139 |
140 | total_frames = len(dataloader.dataset)
141 |
142 | outputs = {
143 | 'R_est': np.zeros((total_frames, 3, 3)),
144 | 't_est': np.zeros((total_frames, 3)),
145 | 'names': []
146 | }
147 | flow_loss, rpe_loss = None, None
148 | if 'EPE3D' in objectives:
149 | flow_loss = criterions.compute_RT_EPE_loss
150 | outputs['epes'] = np.zeros(total_frames)
151 | if 'RPE' in objectives:
152 | rpe_loss = criterions.compute_RPE_loss
153 | outputs['angular_error'] = np.zeros(total_frames)
154 | outputs['translation_error'] = np.zeros(total_frames)
155 |
156 | count_base = 0
157 |
158 | if timers: timers.tic('one iteration')
159 |
160 | count = 1
161 | for idx, batch in enumerate(progress):
162 |
163 | if timers: timers.tic('forward step')
164 |
165 | names = batch[-1]
166 |
167 | if known_mask: # for dataset that with mask or need mask
168 | color0, color1, depth0, depth1, Rt, K, obj_mask0, obj_mask1 = \
169 | train_utils.check_cuda(batch[:8])
170 | else:
171 | color0, color1, depth0, depth1, Rt, K = \
172 | train_utils.check_cuda(batch[:6])
173 | obj_mask0, obj_mask1 = None, None
174 |
175 | B, _, H, W = depth0.shape
176 | iter = epoch * total_frames + count_base
177 | with torch.no_grad():
178 | if tracker == 'learning_based':
179 | if obj_only:
180 | output = net.forward(color0, color1, depth0, depth1, K,
181 | obj_mask0=obj_mask0, obj_mask1=obj_mask1,
182 | logger=logger, iteration=iter)
183 | else:
184 | output = net.forward(color0, color1, depth0, depth1, K,
185 | logger=logger, iteration=iter)
186 | elif options.tracker in ['ColorICP', 'ICP', 'RGBD']:
187 | if obj_only:
188 | output = net.batch_track(color0, depth0, color1, depth1, K,
189 | batch_objmask0=obj_mask0, batch_objmask1=obj_mask1)
190 | else:
191 | output = net.batch_track(color0, depth0, color1, depth1, K)
192 | else:
193 | raise NotImplementedError("unsupported test tracker: check argument of --tracker again")
194 | R, t = output
195 |
196 | if timers: timers.toc('forward step')
197 |
198 | outputs['R_est'][count_base:count_base+B] = R.cpu().numpy()
199 | outputs['t_est'][count_base:count_base+B] = t.cpu().numpy()
200 |
201 | if timers: timers.tic('evaluate')
202 | R_gt, t_gt = Rt[:,:3,:3], Rt[:,:3,3]
203 | if rpe_loss: # evaluate the relative pose error
204 | angle_error, trans_error = rpe_loss(R, t, R_gt, t_gt)
205 | outputs['angular_error'][count_base:count_base+B] = angle_error.cpu().numpy()
206 | outputs['translation_error'][count_base:count_base+B] = trans_error.cpu().numpy()
207 |
208 | if flow_loss:# evaluate the end-point-error loss 3D
209 | invalid_mask = (depth0 == depth0.min()) | (depth0 == depth0.max())
210 | if obj_mask0 is not None:
211 | invalid_mask = ~obj_mask0 | invalid_mask
212 |
213 | epes3d = flow_loss(R, t, R_gt, t_gt, depth0, K, invalid=invalid_mask)
214 | outputs['epes'][count_base:count_base+B] = epes3d.cpu().numpy()
215 |
216 | outputs['names'] += names
217 |
218 | count_base += B
219 |
220 | if timers: timers.toc('evaluate')
221 | if timers: timers.toc('one iteration')
222 | if timers: timers.tic('one iteration')
223 |
224 | if timers: timers.print()
225 |
226 | return outputs
227 |
228 | def test_TrustRegion(options):
229 |
230 | if options.time:
231 | timers = Timers()
232 | else:
233 | timers = None
234 |
235 | print('Evaluate test performance with the (deep) direct method.')
236 |
237 | total_batch_size = options.batch_per_gpu * torch.cuda.device_count()
238 |
239 | keyframes = [int(x) for x in options.keyframes.split(',')]
240 | if options.dataset in ['BundleFusion', 'TUM_RGBD']:
241 | obj_has_mask = False
242 | else:
243 | obj_has_mask = True
244 |
245 | eval_loaders = create_eval_loaders(options, options.eval_set,
246 | keyframes, total_batch_size, options.trajectory)
247 |
248 | if options.tracker == 'learning_based':
249 | if options.checkpoint == '':
250 | print('No checkpoint loaded. Use the non-learning method')
251 | net = ICtracking.LeastSquareTracking(
252 | encoder_name = 'RGB',
253 | uncertainty_type=options.uncertainty,
254 | direction=options.direction,
255 | max_iter_per_pyr= options.max_iter_per_pyr,
256 | options=options,
257 | mEst_type = 'None',
258 | solver_type = 'Direct-Nodamping')
259 | if torch.cuda.is_available(): net.cuda()
260 | net.eval()
261 | else:
262 | train_utils.load_checkpoint_test(options)
263 |
264 | net = ICtracking.LeastSquareTracking(
265 | encoder_name = options.encoder_name,
266 | uncertainty_type=options.uncertainty,
267 | direction=options.direction,
268 | max_iter_per_pyr= options.max_iter_per_pyr,
269 | mEst_type = options.mestimator,
270 | options=options,
271 | solver_type = options.solver,
272 | no_weight_sharing = options.no_weight_sharing)
273 |
274 | if torch.cuda.is_available(): net.cuda()
275 | net.eval()
276 |
277 | # check whether it is a single checkpoint or a directory
278 | net.load_state_dict(torch.load(options.checkpoint)['state_dict'])
279 | elif options.tracker == 'ICP':
280 | icp_tracker = ICP_Odometry('Point2Plane')
281 | net = icp_tracker
282 | elif options.tracker == 'ColorICP':
283 | color_icp_tracker = ICP_Odometry('ColorICP')
284 | net = color_icp_tracker
285 | elif options.tracker == 'RGBD':
286 | rgbd_tracker = RGBDOdometry("RGBD")
287 | net = rgbd_tracker
288 | else:
289 | raise NotImplementedError("unsupported test tracker: check argument of --tracker again")
290 |
291 | eval_objectives = ['EPE3D', 'RPE']
292 |
293 | output_prefix = '_'.join([
294 | options.network,
295 | options.encoder_name,
296 | options.mestimator,
297 | options.solver,
298 | 'iter', str(options.max_iter_per_pyr)
299 | ])
300 |
301 | # evaluate results per trajectory per key-frame
302 | outputs = {}
303 | for k, loader in eval_loaders.items():
304 |
305 | traj_name, kf = k.split('_keyframe_')
306 |
307 | output_name = '{:}_{:}'.format(output_prefix, k)
308 | info = evaluate_trust_region(loader, net,
309 | eval_objectives,
310 | eval_name = 'tmp/'+output_name,
311 | known_mask=obj_has_mask,
312 | obj_only=options.obj_only,
313 | tracker=options.tracker,
314 | timers=timers,
315 | )
316 |
317 | # collect results
318 | outputs[k] = pd.Series([info['epes'].mean(),
319 | info['angular_error'].mean(),
320 | info['translation_error'].mean(),
321 | info['epes'].shape[0], int(kf), traj_name],
322 | index=['3D EPE', 'axis error', 'trans error', 'total frames', 'keyframe', 'trajectory'])
323 |
324 | print(outputs[k])
325 |
326 | checkpoint_name = options.checkpoint.replace('.pth.tar', '')
327 | if checkpoint_name == '':
328 | checkpoint_name = 'nolearning'
329 | if options.tracker in ['ColorICP', 'ICP', 'RGBD']:
330 | checkpoint_name += ('_'+ options.tracker)
331 | output_dir = osp.join(options.eval_set+'_results', checkpoint_name, k)
332 | output_pkl = output_dir + '.pkl'
333 |
334 | check_directory(output_pkl)
335 |
336 | with open(output_pkl, 'wb') as output: # dump per-frame results info
337 | info = info
338 | pickle.dump(info, output)
339 |
340 | """ =============================================================== """
341 | """ Generate the final evaluation results """
342 | """ =============================================================== """
343 |
344 | outputs_pd = pd.DataFrame(outputs).T
345 | outputs_pd['3D EPE'] *= 100 # convert to cm
346 | outputs_pd['axis error'] *= (180/np.pi) # convert to degree
347 | outputs_pd['trans error'] *= 100 # convert to cm
348 |
349 | print(outputs_pd)
350 |
351 | stats_dict = {}
352 | for kf in keyframes:
353 | kf_outputs = outputs_pd[outputs_pd['keyframe']==kf]
354 |
355 | stats_dict['mean values of trajectories keyframe {:}'.format(kf)] = pd.Series(
356 | [kf_outputs['3D EPE'].mean(),
357 | kf_outputs['axis error'].mean(),
358 | kf_outputs['trans error'].mean(), kf],
359 | index=['3D EPE', 'axis error', 'trans error', 'keyframe'])
360 |
361 | total_frames = kf_outputs['total frames'].sum()
362 | stats_dict['mean values of frames keyframe {:}'.format(kf)] = pd.Series(
363 | [(kf_outputs['3D EPE'] * kf_outputs['total frames']).sum() / total_frames,
364 | (kf_outputs['axis error'] * kf_outputs['total frames']).sum() / total_frames,
365 | (kf_outputs['trans error']* kf_outputs['total frames']).sum() / total_frames, kf],
366 | index=['3D EPE', 'axis error', 'trans error', 'keyframe'])
367 |
368 | stats_pd = pd.DataFrame(stats_dict).T
369 | print(stats_pd)
370 |
371 | final_pd = outputs_pd.append(stats_pd, sort=False)
372 | final_pd.to_csv('{:}.csv'.format(output_dir))
373 |
374 | return outputs_pd
375 |
376 | if __name__ == '__main__':
377 |
378 | parser = argparse.ArgumentParser(description="Evaluate the network")
379 | config.add_basics_config(parser)
380 | config.add_test_basics_config(parser)
381 | config.add_tracking_config(parser)
382 |
383 | options = parser.parse_args()
384 |
385 | print('---------------------------------------')
386 |
387 | outputs = test_TrustRegion(options)
388 |
389 |
--------------------------------------------------------------------------------
/code/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/experiments/__init__.py
--------------------------------------------------------------------------------
/code/experiments/kf_vo.py:
--------------------------------------------------------------------------------
1 | """ Script to run keyframe visual odometry on a sequence of images
2 | using the proposed probablilistic feature-metric trackingmethod.
3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | """
6 |
7 | #!/usr/bin/env python
8 |
9 | # import standard library
10 | import os
11 | import sys
12 | import argparse
13 | import os.path as osp
14 | # import third party
15 | import cv2
16 | from evaluate import create_eval_loaders
17 |
18 | import numpy as np
19 | # opengl/trimesh visualization
20 | import pyglet
21 | import trimesh
22 | import trimesh.viewer as tv
23 | import trimesh.transformations as tf
24 | import torch
25 | from imageio import imread
26 |
27 | import config
28 | from models.geometry import batch_create_transform
29 | from experiments.select_method import select_method
30 | from train_utils import check_cuda
31 | from data.dataloader import load_data
32 | from Logger import check_directory
33 |
34 |
35 | def init_scene(scene):
36 | scene.geometry = {}
37 | scene.graph.clear()
38 | scene.init = True
39 |
40 | # clear poses
41 | scene.gt_poses = []
42 | scene.est_poses = []
43 | scene.timestamps = []
44 |
45 | return scene
46 |
47 |
48 | def camera_transform(transform=None):
49 | if transform is None:
50 | transform = np.eye(4)
51 | return transform @ trimesh.transformations.rotation_matrix(
52 | np.deg2rad(-180), [1, 0, 0]
53 | )
54 |
55 |
56 | def pointcloud_from_depth(
57 | depth: np.ndarray,
58 | fx: float,
59 | fy: float,
60 | cx: float,
61 | cy: float,
62 | depth_type: str = 'z',
63 | ) -> np.ndarray:
64 | assert depth_type in ['z', 'euclidean'], 'Unexpected depth_type'
65 | assert depth.dtype.kind == 'f', 'depth must be float and have meter values'
66 |
67 | rows, cols = depth.shape
68 | c, r = np.meshgrid(np.arange(cols), np.arange(rows), sparse=True)
69 | valid = ~np.isnan(depth)
70 | z = np.where(valid, depth, np.nan)
71 | x = np.where(valid, z * (c - cx) / fx, np.nan)
72 | y = np.where(valid, z * (r - cy) / fy, np.nan)
73 | pc = np.dstack((x, y, z))
74 |
75 | if depth_type == 'euclidean':
76 | norm = np.linalg.norm(pc, axis=2)
77 | pc = pc * (z / norm)[:, :, None]
78 | return pc
79 |
80 |
81 | def callback(scene):
82 | if not scene.is_play:
83 | return
84 |
85 | dataset = scene.dataloader
86 | options = scene.options
87 | if scene.index >= len(dataset):
88 | return
89 |
90 | if scene.vo_type == 'incremental':
91 | batch = dataset[scene.index - 1]
92 | else:
93 | batch = dataset.get_keypair(scene.index)
94 | color0, color1, depth0, depth1, GT_Rt, intrins, name = check_cuda(
95 | batch[:7])
96 |
97 | scene_id = name['seq']
98 |
99 | # Reset scene for new scene.
100 | if scene_id != scene.video_id:
101 | scene = init_scene(scene)
102 | scene.init_idx = scene.index
103 | scene.video_id = scene_id
104 | else:
105 | scene.init = False
106 |
107 | GT_WC = dataset.cam_pose_seq[0][scene.index]
108 | depth_file = dataset.depth_seq[0][scene.index]
109 | if not options.save_img:
110 | # half resolution
111 | rgb = color1.permute((1, 2, 0)).cpu().numpy()
112 | depth = imread(depth_file).astype(np.float32) / 5e3
113 | depth = cv2.resize(depth, None, fx=dataset.fx_s,
114 | fy=dataset.fy_s, interpolation=cv2.INTER_NEAREST)
115 | K = {"fx": intrins[0].item(), "fy": intrins[1].item(),
116 | "ux": intrins[2].item(), "uy": intrins[3].item()}
117 | else:
118 | # original resolution for demo
119 | rgb = imread(dataset.image_seq[0][scene.index])
120 | depth = imread(depth_file).astype(np.float32) / 5e3
121 | calib = np.asarray(dataset.calib[0], dtype=np.float32)
122 | K = {"fx": calib[0], "fy": calib[1],
123 | "ux": calib[2], "uy": calib[3]}
124 |
125 | # save input rgb and depth images
126 | img_index_png = str(scene.index).zfill(5)+'.png'
127 | if options.dataset == 'VaryLighting':
128 | output_folder = osp.join(dataset.seq_names[0], 'kf_vo', options.vo)
129 | else:
130 | output_folder = os.path.join(
131 | '/home/binbin/Pictures', 'kf_vo', options.vo)
132 |
133 | rgb_img = osp.join(output_folder, 'rgb', img_index_png)
134 | depth_img = osp.join(output_folder, 'depth', img_index_png)
135 | check_directory(rgb_img)
136 | check_directory(depth_img)
137 | cv2.imwrite(rgb_img, cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR))
138 | cv2.imwrite(depth_img, depth)
139 |
140 | if scene.init:
141 | if GT_WC is not None:
142 | T_WC = GT_WC
143 | else:
144 | T_WC = np.array([
145 | [0.0, -1.0, 0.0, -0.0],
146 | [-1.0, 0.0, 0.0, 0.0],
147 | [0.0, 0.0, -1.0, 0.0],
148 | [0.0, 0.0, 0.0, 1.0],
149 | ])
150 | # T_WC = np.eye(4)
151 | scene.T_WC = T_WC
152 | if scene.vo_type == 'keyframe':
153 | scene.T_WK = T_WC
154 | else:
155 | with torch.no_grad():
156 | color0 = color0.unsqueeze(dim=0)
157 | color1 = color1.unsqueeze(dim=0)
158 | depth0 = depth0.unsqueeze(dim=0)
159 | depth1 = depth1.unsqueeze(dim=0)
160 | intrins = intrins.unsqueeze(dim=0)
161 | if options.save_img:
162 | output = scene.network.forward(
163 | color0, color1, depth0, depth1, intrins, index=scene.index)
164 | else:
165 | output = scene.network.forward(
166 | color0, color1, depth0, depth1, intrins)
167 | R, t = output
168 | if scene.is_gt_tracking:
169 | T_WC = GT_WC
170 | scene.T_WC = T_WC
171 | else:
172 | if scene.vo_type == 'incremental':
173 | T_CR = batch_create_transform(t, R)
174 | # T_CR = GT_Rt
175 | T_CR = T_CR.squeeze(dim=0).cpu().numpy()
176 | T_WC = np.dot(scene.T_WC, np.linalg.inv(
177 | T_CR)).astype(np.float32)
178 | elif scene.vo_type == 'keyframe':
179 | T_CK = batch_create_transform(t, R)
180 | # T_CK = GT_Rt
181 | T_CK = T_CK.squeeze(dim=0).cpu().numpy()
182 | T_WC = np.dot(scene.T_WK, np.linalg.inv(
183 | T_CK)).astype(np.float32)
184 |
185 | # print large drift in keyframe tracking,
186 | # just for noticing a possible tracking failure, not usd later
187 | T_CC = np.dot(np.linalg.inv(T_WC),
188 | scene.T_WC).astype(np.float32)
189 | trs_drift = np.copy(T_CC[0:3, 3:4]).transpose()
190 | if np.linalg.norm(trs_drift) > 0.02:
191 | print(depth_file)
192 | else:
193 | raise NotImplementedError()
194 | scene.T_WC = T_WC
195 |
196 | pcd = pointcloud_from_depth(
197 | depth, fx=K['fx'], fy=K['fy'], cx=K['ux'], cy=K['uy']
198 | )
199 | nonnan = ~np.isnan(depth)
200 | geom = trimesh.PointCloud(vertices=pcd[nonnan], colors=rgb[nonnan])
201 | # XYZ->RGB, Z is blue
202 | if options.dataset == 'VaryLighting':
203 | axis = trimesh.creation.axis(0.005, origin_color=(0, 0, 0))
204 | elif options.dataset in ['TUM_RGBD', 'ScanNet']:
205 | axis = trimesh.creation.axis(0.01, origin_color=(0, 0, 0))
206 | else:
207 | raise NotImplementedError()
208 |
209 | # two view: keyframe - live frames visualization
210 | if scene.vo_type == 'keyframe' and scene.two_view:
211 | if scene.init:
212 | scene.add_geometry(geom, transform=scene.T_WK, geom_name='key')
213 | scene.add_geometry(geom, transform=T_WC, geom_name='live')
214 | scene.add_geometry(axis, transform=T_WC, geom_name='camera_view')
215 | else:
216 | # after the first view, delete the old live view and add new live view
217 | scene.delete_geometry('live')
218 | scene.delete_geometry('camera_view')
219 | scene.add_geometry(geom, transform=T_WC, geom_name='live')
220 | scene.add_geometry(axis, transform=T_WC, geom_name='camera_view')
221 |
222 | else:
223 | scene.add_geometry(geom, transform=T_WC)
224 |
225 | # draw camera trajectory
226 | trs = np.copy(T_WC[0:3, 3:4]).transpose()
227 | cam = trimesh.PointCloud(vertices=trs, colors=[255, 0, 0])
228 | scene.add_geometry(cam)
229 |
230 | scene.add_geometry(axis, transform=T_WC)
231 |
232 | if scene.last_pose is not None:
233 | poses_seg = np.stack((scene.last_pose, trs), axis=1)
234 | cam_seg = trimesh.load_path(poses_seg)
235 | scene.add_geometry(cam_seg)
236 | scene.last_pose = trs
237 |
238 | # A kind of current camera view, but a bit far away to see whole scene.
239 | scene.camera.resolution = (rgb.shape[1], rgb.shape[0])
240 | scene.camera.focal = (K['fx'], K['fy'])
241 | if dataset.realscene:
242 | if options.save_img:
243 | if scene.vo_type == 'keyframe':
244 | # T_see = np.array([
245 | # [1.000, 0.000, 0.000, 0.2],
246 | # [0.000, 0.866, 0.500, -0.7],
247 | # [0.000, -0.500, 0.866, -0.7],
248 | # [0.000, 0.000, 0.000, 1.0],
249 | # ])
250 | T_see = np.array([
251 | [1.000, 0.000, 0.000, 0.2],
252 | [0.000, 0.866, 0.500, -0.7],
253 | [0.000, -0.500, 0.866, -0.8],
254 | [0.000, 0.000, 0.000, 1.0],
255 | ])
256 |
257 | # T_see = np.array([
258 | # [1.000, 0.000, 0.000, 0.2],
259 | # [0.000, 0.985, 0.174, -0.3],
260 | # [0.000, -0.174, 0.985, -0.6],
261 | # [0.000, 0.000, 0.000, 1.0],
262 | # ])
263 | scene.camera_transform = camera_transform(
264 | np.matmul(scene.T_WK, T_see)
265 | )
266 | else:
267 | # if scene.index < 140:
268 | # T_see = np.array([
269 | # [1.000, 0.000, 0.000, 0.2],
270 | # [0.000, 0.866, 0.500, -2.0],
271 | # [0.000, -0.500, 0.866, -2.0],
272 | # [0.000, 0.000, 0.000, 1.0],
273 | # ])
274 | # scene.camera_transform = camera_transform(
275 | # np.matmul(scene.T_WC, T_see)
276 | # )
277 | pass
278 | else:
279 | # adjust which transformation use to set the see pose
280 | if scene.vo_type == 'keyframe':
281 | T_see = np.array([
282 | [1.000, 0.000, 0.000, 0.2],
283 | [0.000, 0.866, 0.500, -0.7],
284 | [0.000, -0.500, 0.866, -0.8],
285 | [0.000, 0.000, 0.000, 1.0],
286 | ])
287 | # T_see = np.array([
288 | # [1.000, 0.000, 0.000, 0.2],
289 | # [0.000, 0.985, 0.174, -0.3],
290 | # [0.000, -0.174, 0.985, -0.6],
291 | # [0.000, 0.000, 0.000, 1.0],
292 | # ])
293 | # T_see = np.array([
294 | # [1.000, 0.000, 0.000, 0.2],
295 | # [0.000, 0.985, 0.174, -0.3],
296 | # [0.000, -0.174, 0.985, -0.6],
297 | # [0.000, 0.000, 0.000, 1.0],
298 | # ])
299 | # T_see = np.array([
300 | # [1.000, 0.000, 0.000, 0.2],
301 | # [0.000, 0.866, 0.500, -0.8],
302 | # [0.000, -0.500, 0.866, -0.8],
303 | # [0.000, 0.000, 0.000, 1.0],
304 | # ])
305 |
306 | scene.camera_transform = camera_transform(
307 | np.matmul(scene.T_WK, T_see)
308 | )
309 | else:
310 | scene.camera.transform = T_WC @ tf.translation_matrix([0, 0, 2.5])
311 |
312 | # if scene.index == scene.init_idx + 1:
313 | # input()
314 | print(scene.index)
315 | scene.index += 1 # scene.track_config['frame_step']
316 | # print("<=================================")
317 | if options.save_img:
318 | return
319 |
320 |
321 | def main(options):
322 |
323 | if options.dataset == 'TUM_RGBD':
324 | sequence_dir = 'rgbd_dataset_freiburg1_desk'
325 | np_loader = load_data('TUM_RGBD', keyframes=[1, ], load_type='test',
326 | select_trajectory=sequence_dir,
327 | truncate_depth=True,
328 | options=options,
329 | load_numpy=False)
330 | elif options.dataset == 'VaryLighting':
331 | np_loader = load_data('VaryLighting', keyframes=[1, ], load_type='test',
332 | select_trajectory='scene17_demo', # 'l_scene3',
333 | truncate_depth=True,
334 | load_numpy=False,
335 | pair=options.vo_type,
336 | )
337 | elif options.dataset == 'ScanNet':
338 | np_loader = load_data('ScanNet', keyframes=[1, ], load_type='test',
339 | select_trajectory='scene0593_00',
340 | truncate_depth=True,
341 | load_numpy=False,
342 | options=options,
343 | )
344 |
345 | scene = trimesh.Scene()
346 | scene.dataloader = np_loader
347 | scene.dataloader.realscene = True
348 | # total_batch_size = options.batch_per_gpu * torch.cuda.device_count()
349 |
350 | # keyframes = [int(x) for x in options.keyframes.split(',')]
351 | # if options.dataset in ['BundleFusion', 'TUM_RGBD']:
352 | # obj_has_mask = False
353 | # else:
354 | # obj_has_mask = True
355 |
356 | # eval_loaders = create_eval_loaders(options, options.eval_set,
357 | # [1,], total_batch_size, options.trajectory)
358 |
359 | tracker = select_method(options.vo, options)
360 | scene.network = tracker
361 |
362 | scene.index = 0 # config['start_frame'] # starting frame e.g. 60
363 | scene.video_id = None
364 | scene.last_pose = None
365 | scene.is_gt_tracking = options.gt_tracker
366 | scene.init = False # True only for the first frame
367 | scene.is_play = True # immediately start playing when called
368 | scene.vo_type = options.vo_type
369 | scene.two_view = options.two_view
370 | scene.options = options
371 |
372 | callback(scene)
373 | window = trimesh.viewer.SceneViewer(
374 | scene, callback=callback, start_loop=False, resolution=(1080, 720)
375 | )
376 |
377 | @window.event
378 | def on_key_press(symbol, modifiers):
379 | if modifiers == 0:
380 | if symbol == pyglet.window.key.P:
381 | scene.is_play = not scene.is_play
382 |
383 | print('Press P key to pause/resume.')
384 |
385 | if not options.save_img:
386 | # scene.show()
387 | pyglet.app.run()
388 | else:
389 | # import pyrender
390 | # scene_pyrender = pyrender.Scene.from_trimesh(scene)
391 | # renderer = pyrender.OffscreenRenderer(viewport_height=480, viewport_width=640, point_size=1)
392 | # rgb, depth = renderer.render(scene_pyrender)
393 |
394 | if options.dataset == 'VaryLighting':
395 | output_dir = osp.join(np_loader.seq_names[0], 'kf_vo', options.vo)
396 | else:
397 | output_dir = os.path.join(
398 | '/home/binbin/Pictures', 'kf_vo', options.vo)
399 | check_directory(output_dir + '/*.png')
400 | for frame_id in range(len(scene.dataloader)):
401 | # scene.save_image()
402 | callback(scene)
403 | file_name = os.path.join(output_dir, 'render', str(
404 | scene.index-1).zfill(5) + '.png')
405 | check_directory(file_name)
406 | with open(file_name, "wb") as f:
407 | f.write(scene.save_image())
408 | f.close()
409 |
410 |
411 | if __name__ == "__main__":
412 | parser = argparse.ArgumentParser(description="Evaluate the network")
413 | config.add_basics_config(parser)
414 | config.add_test_basics_config(parser)
415 | config.add_tracking_config(parser)
416 | config.add_vo_config(parser)
417 |
418 | options = parser.parse_args()
419 | # to save visualization: --save_img and --vis_feat
420 | print('---------------------------------------')
421 | main(options)
422 |
--------------------------------------------------------------------------------
/code/experiments/select_method.py:
--------------------------------------------------------------------------------
1 | """
2 | A wrapper to select different methods for comparison
3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | """
6 |
7 | import torch
8 | from models.LeastSquareTracking import LeastSquareTracking
9 |
10 | # load comparison methods
11 | def select_method(method_name, options):
12 | assert method_name in ['DeepIC', 'RGB', 'ICP', 'RGB+ICP', 'feature', 'feature_icp']
13 | if method_name == 'DeepIC':
14 | print('==>Load DeepIC method')
15 | if options.dataset == 'MovingObjects3D':
16 | deepIC_checkpoint = '/media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/logs/MovingObjects3D/124/cvpr124_ConvRGBD2_MultiScale2w_Direct-ResVol_MovingObjects3D_obj_False_uCh_1_None_rmT_False_fCh_1_average_iP_identity_mH_None_wICP_False_s_None_lr_0.0005_batch_64/checkpoint_epoch29.pth.tar'
17 | else:
18 | deepIC_checkpoint = '/media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/code/trained_models/TUM_RGBD_ABC_final.pth.tar'
19 | deepIC = LeastSquareTracking(
20 | encoder_name='ConvRGBD2',
21 | direction='inverse',
22 | max_iter_per_pyr=3,
23 | mEst_type='MultiScale2w',
24 | # options=options,
25 | solver_type='Direct-ResVol',
26 | feature_channel=1,
27 | feature_extract='average',
28 | uncertainty_type='None',
29 | combine_ICP=False,
30 | scaler='None',
31 | init_pose_type='identity',
32 | options=options,
33 | )
34 |
35 | if torch.cuda.is_available(): deepIC.cuda()
36 |
37 | # check whether it is a single checkpoint or a directory
38 | deepIC.load_state_dict(torch.load(deepIC_checkpoint)['state_dict'])
39 | deepIC.eval()
40 | return deepIC
41 |
42 | if method_name == 'RGB':
43 | print('==>Load RGB method')
44 | rgb_tracker = LeastSquareTracking(
45 | encoder_name='RGB',
46 | combine_ICP=False,
47 | feature_channel=1,
48 | uncertainty_channel=1,
49 | # feature_extract='conv',
50 | uncertainty_type='None',
51 | scaler='None',
52 | direction='inverse',
53 | max_iter_per_pyr=options.max_iter_per_pyr,
54 | mEst_type='None',
55 | solver_type='Direct-Nodamping',
56 | init_pose_type='identity',
57 | options=options,
58 | )
59 | if torch.cuda.is_available(): rgb_tracker.cuda()
60 | rgb_tracker.eval()
61 | return rgb_tracker
62 |
63 | if method_name == 'ICP':
64 | print('==>Load ICP method')
65 | icp_tracker = LeastSquareTracking(
66 | encoder_name='ICP',
67 | combine_ICP=False,
68 | # feature_channel=1,
69 | # uncertainty_channel=1,
70 | # feature_extract='conv',
71 | uncertainty_type='ICP',
72 | scaler='None',
73 | direction='inverse',
74 | max_iter_per_pyr=options.max_iter_per_pyr,
75 | mEst_type='None',
76 | solver_type='Direct-Nodamping',
77 | init_pose_type='identity',
78 | options=options,
79 | )
80 | if torch.cuda.is_available(): icp_tracker.cuda()
81 | icp_tracker.eval()
82 | return icp_tracker
83 |
84 | if method_name == 'RGB+ICP':
85 | print('==>Load RGB+ICP method')
86 | rgbd_tracker = LeastSquareTracking(
87 | encoder_name='RGB',
88 | combine_ICP=True,
89 | # feature_channel=1,
90 | uncertainty_channel=1,
91 | # feature_extract='conv',
92 | uncertainty_type='identity',
93 | scaler='None',
94 | direction='inverse',
95 | max_iter_per_pyr=options.max_iter_per_pyr,
96 | mEst_type='None',
97 | solver_type='Direct-Nodamping',
98 | init_pose_type='identity',
99 | remove_tru_sigma=False,
100 | scale_scaler=0.2,
101 | options=options,
102 | )
103 | if torch.cuda.is_available(): rgbd_tracker.cuda()
104 | rgbd_tracker.eval()
105 | return rgbd_tracker
106 |
107 | if method_name == 'feature':
108 | # train_utils.load_checkpoint_test(options)
109 | #
110 | # net = ICtracking.LeastSquareTracking(
111 | # encoder_name=options.encoder_name,
112 | # uncertainty_type=options.uncertainty,
113 | # direction=options.direction,
114 | # max_iter_per_pyr=options.max_iter_per_pyr,
115 | # mEst_type=options.mestimator,
116 | # options=options,
117 | # solver_type=options.solver,
118 | # no_weight_sharing=options.no_weight_sharing)
119 | #
120 | # if torch.cuda.is_available(): net.cuda()
121 | # net.eval()
122 | #
123 | # # check whether it is a single checkpoint or a directory
124 | # net.load_state_dict(torch.load(options.checkpoint)['state_dict'])
125 | # method_list['trained_method'] = net
126 | # train_utils.load_checkpoint_test(options)
127 | print('==>Load our feature-metric method')
128 | net = LeastSquareTracking(
129 | encoder_name=options.encoder_name,
130 | uncertainty_type=options.uncertainty,
131 | direction=options.direction,
132 | max_iter_per_pyr=options.max_iter_per_pyr,
133 | mEst_type=options.mestimator,
134 | options=options,
135 | solver_type=options.solver,
136 | combine_ICP=False,
137 | no_weight_sharing=options.no_weight_sharing)
138 |
139 | if torch.cuda.is_available(): net.cuda()
140 |
141 | # check whether it is a single checkpoint or a directory
142 | if options.checkpoint == '':
143 | if options.dataset in ['TUM_RGBD', 'VaryLighting']:
144 | checkpoint = '/media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/logs/TUM_RGBD/check_change/check4_ConvRGBD2_None_Direct-Nodamping_dataset_TUM_RGBD_obj_False_laplacian_uncerCh_1_featCh_8_conv_initPose_sfm_net_multiHypo_prob_fuse_uncer_prop_False_lr_0.0005_batch_64/checkpoint_epoch29.pth.tar'
145 | else:
146 | raise NotImplementedError()
147 | net.load_state_dict(torch.load(checkpoint)['state_dict'])
148 | else:
149 | net.load_state_dict(torch.load(options.checkpoint)['state_dict'])
150 | net.eval()
151 | return net
152 |
153 | if method_name == 'feature_icp':
154 | print('==>Load our feature-metric+ICP method')
155 | feature_icp = LeastSquareTracking(
156 | encoder_name=options.encoder_name,
157 | uncertainty_type=options.uncertainty,
158 | direction=options.direction,
159 | max_iter_per_pyr=options.max_iter_per_pyr,
160 | mEst_type=options.mestimator,
161 | options=options,
162 | solver_type=options.solver,
163 | combine_ICP=True,
164 | scale_scaler=options.scale_icp,
165 | no_weight_sharing=options.no_weight_sharing)
166 |
167 | if torch.cuda.is_available(): feature_icp.cuda()
168 |
169 | # check whether it is a single checkpoint or a directory
170 | if options.checkpoint == '':
171 | if options.dataset in ['TUM_RGBD', 'VaryLighting']:
172 | checkpoint = '/media/binbin/code/SLAM/DeeperInverseCompositionalAlgorithm/logs/TUM_RGBD/finetune/finetune_vl_icp_ConvRGBD2_None_Direct-Nodamping_TUM_RGBD_obj_False_uCh_1_laplacian_rmT_True_fCh_8_conv_iP_sfm_net_mH_prob_fuse_wICP_True_s_None_lr_0.0005_batch_64/checkpoint_epoch40.pth.tar'
173 | else:
174 | raise NotImplementedError()
175 | feature_icp.load_state_dict(torch.load(checkpoint)['state_dict'])
176 | else:
177 | feature_icp.load_state_dict(torch.load(options.checkpoint)['state_dict'])
178 | feature_icp.eval()
179 | return feature_icp
180 | else:
181 | raise NotImplementedError("unsupported test tracker: check argument of --tracker again")
--------------------------------------------------------------------------------
/code/experiments/warping_objects.py:
--------------------------------------------------------------------------------
1 | """
2 | Experiments to warp objects for visualization
3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | """
6 |
7 | import argparse, pickle
8 | import os.path as osp
9 | import cv2
10 | import numpy as np
11 | from tqdm import tqdm
12 | import torch
13 | import torch.utils.data as data
14 |
15 | import config
16 | import models.geometry as geometry
17 | from Logger import check_directory
18 | from train_utils import check_cuda
19 | from data.dataloader import load_data, MOVING_OBJECTS_3D
20 | from experiments.select_method import select_method
21 | from tools import display
22 |
23 | def compute_pose(dataloader, tracker, k, tracker_name, use_gt_pose):
24 | count_base = 0
25 | total_frames = len(dataloader.dataset)
26 | progress = tqdm(dataloader, ncols=100,
27 | # desc = 'evaluate deeper inverse compositional algorithm {:}'.format(eval_name),
28 | total= len(dataloader))
29 | outputs = {
30 | 'R_est': np.zeros((total_frames, 3, 3)),
31 | 't_est': np.zeros((total_frames, 3)),
32 | 'seq_idx': np.zeros((total_frames)),
33 | 'frame0': np.zeros((total_frames)),
34 | 'frame1': np.zeros((total_frames)),
35 | }
36 | # computing pose
37 | for idx, batch in enumerate(progress):
38 | color0, color1, depth0, depth1, transform, K, mask0, mask1, names = check_cuda(batch)
39 | B, C, H, W = color0.shape
40 | if use_gt_pose:
41 | R_gt, t_gt = transform[:, :3, :3], transform[:, :3, 3]
42 | Rt = [R_gt, t_gt]
43 | else:
44 | with torch.no_grad():
45 | if options.obj_only:
46 | output = tracker.forward(color0, color1, depth0, depth1, K,
47 | obj_mask0=mask0, obj_mask1=mask1,
48 | )
49 | else:
50 | output = tracker.forward(color0, color1, depth0, depth1, K,
51 | index=idx,
52 | )
53 | Rt = output
54 | R, t = Rt
55 | outputs['R_est'][count_base:count_base + B] = R.cpu().numpy()
56 | outputs['t_est'][count_base:count_base + B] = t.cpu().numpy()
57 | outputs['seq_idx'][count_base:count_base + B] = names['seq_idx'].cpu().numpy()
58 | outputs['frame0'][count_base:count_base + B] = names['frame0'].cpu().numpy()
59 | outputs['frame1'][count_base:count_base + B] = names['frame1'].cpu().numpy()
60 | count_base += B
61 | return outputs
62 |
63 | def test_object_warping(options):
64 | # loader = MovingObjects3D('', load_type='train', keyframes=[1])
65 | assert options.dataset == 'MovingObjects3D'
66 | keyframes = [int(x) for x in options.keyframes.split(',')]
67 | objects = ['boat', 'motorbike'] if options.object == '' else [options.object]
68 | eval_loaders = {}
69 | for test_object in objects:
70 | for kf in keyframes:
71 | np_loader = load_data(options.dataset, keyframes=[kf],
72 | load_type=options.eval_set,
73 | select_trajectory=test_object, options=options)
74 | eval_loaders['{:}_keyframe_{:}'.
75 | format(test_object, kf)] = data.DataLoader(np_loader,
76 | batch_size=int(options.batch_per_gpu),
77 | shuffle=False, num_workers = options.cpu_workers)
78 | use_gt_pose = options.gt_pose
79 | # method_list = {}
80 | if not use_gt_pose:
81 | tracker = select_method(options.method, options)
82 | else:
83 | tracker = None
84 | options.method = 'gt'
85 | method_list = {options.method: tracker}
86 |
87 | for k, loader in eval_loaders.items():
88 | for method_name in method_list:
89 | # method = method_list
90 | tracker = method_list[method_name]
91 | output_dir_method = osp.join(MOVING_OBJECTS_3D, 'visualization', method_name)
92 | output_dir = osp.join(output_dir_method, k)
93 | output_pkl = output_dir + '/pose.pkl'
94 | output_compose_dir = osp.join(output_dir, 'compose')
95 | output_input_dir = osp.join(output_dir, 'input')
96 | output_residual_dir = osp.join(output_dir, 'res')
97 | check_directory(output_pkl)
98 | check_directory(output_compose_dir + '/.png')
99 | check_directory(output_input_dir + '/.png')
100 | check_directory(output_residual_dir + '/.png')
101 |
102 | if options.recompute or not osp.isfile(output_pkl):
103 | info = compute_pose(loader, tracker, k, method_name, use_gt_pose)
104 | with open(output_pkl, 'wb') as output:
105 | pickle.dump(info, output)
106 | else:
107 | with open(output_pkl, 'rb') as pkl_file:
108 | info = pickle.load(pkl_file)
109 | # info = compute_pose(loader, tracker, k, method_name, use_gt_pose)
110 |
111 | # visualize residuals
112 | loader.dataset.fx_s = 1.0
113 | loader.dataset.fy_s = 1.0
114 | progress = tqdm(loader, ncols=100,
115 | desc='compute residual for object {:} using {}'.format(k, method_name),
116 | total=len(loader))
117 | count_base = 0
118 | for idx, batch in enumerate(progress):
119 | color0, color1, depth0, depth1, transform, K, mask0, mask1, names = check_cuda(batch)
120 | # color0, color1, depth0, depth1, transform, K, mask0, mask1, names = check_cuda(loader.dataset.get_original_size_batch(idx))
121 | B, C, H, W = color0.shape
122 | invD0 = 1.0 / depth0
123 | invD1 = 1.0 / depth1
124 |
125 | R = torch.stack(check_cuda(info['R_est'][count_base:count_base + B]), dim=0).float()
126 | t = torch.stack(check_cuda(info['t_est'][count_base:count_base + B]), dim=0).float()
127 | Rt = [R, t]
128 | # R_gt, t_gt = transform[:, :3, :3], transform[:, :3, 3]
129 | # Rt = [R_gt, t_gt]
130 | px, py = geometry.generate_xy_grid(B, H, W, K)
131 | u_warped, v_warped, inv_z_warped = geometry.batch_warp_inverse_depth(
132 | px, py, invD0, Rt, K)
133 | x1_1to0 = geometry.warp_features(color1, u_warped, v_warped)
134 | crd = torch.cat((u_warped, v_warped), dim=1)
135 | occ = geometry.check_occ(inv_z_warped, invD1, crd, DIC_version=True)
136 |
137 | residuals = x1_1to0 - color0 # equation (12)
138 | # remove occlusion
139 | x1_1to0[occ.expand(B, C, H, W)] = 0
140 |
141 | if mask0 is not None:
142 | bg_mask0 = ~mask0
143 | res_occ = occ | (bg_mask0.view(B, 1, H, W))
144 | else:
145 | res_occ = occ
146 | residuals[res_occ.expand(B, C, H, W)] = 0
147 | residuals = residuals.mean(dim=1, keepdim=True)
148 |
149 | # # for each image
150 | for idx in range(B):
151 | feat_residual = display.create_mosaic([color0[idx:idx+1], color1[idx:idx+1], x1_1to0[idx:idx+1], residuals[idx:idx+1]],
152 | cmap=['NORMAL', 'NORMAL', 'NORMAL', cv2.COLORMAP_JET],
153 | order='CHW')
154 | input0 = feat_residual[0:H, 0:W, :].copy()
155 | input1 = feat_residual[0:H, W:, :].copy()
156 | warped = feat_residual[H:, 0:W, :].copy()
157 | res = feat_residual[H:, W:, :].copy()
158 | obj_mask0 = mask0[idx:idx+1].squeeze().cpu().numpy().astype(np.uint8)*255
159 | obj_mask1 = mask1[idx:idx+1].squeeze().cpu().numpy().astype(np.uint8)*255
160 | contours0, hierarchy0 = cv2.findContours(obj_mask0, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
161 | contours1, hierarchy1 = cv2.findContours(obj_mask1, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
162 |
163 | input0 = cv2.drawContours(input0, contours0, -1, (0, 0, 255), 1)
164 | input1 = cv2.drawContours(input1, contours0, -1, (0, 0, 255), 1)
165 | input1 = cv2.drawContours(input1, contours1, -1, (0, 255, 0), 1)
166 | overlay = cv2.drawContours(warped, contours0, -1, (0, 0, 255), 1)
167 | overlay = cv2.drawContours(overlay, contours1, -1, (0, 255, 0), 1)
168 |
169 | # visualization for debugging
170 | if options.save_img:
171 | idx_in_batch=count_base+idx
172 | seq_idx = int(info['seq_idx'][idx_in_batch])
173 | idx0 = int(info['frame0'][idx_in_batch])
174 | idx1 = int(info['frame1'][idx_in_batch])
175 | index = "_" + str.zfill(str(idx_in_batch), 5) + '.png'
176 | image_name = osp.join(output_compose_dir, 'compose'+index)
177 | cv2.imwrite(image_name, feat_residual)
178 | overlay_name = osp.join(output_residual_dir, 'overlay'+index)
179 | input_name = osp.join(output_input_dir, 'input0'+index)
180 | cv2.imwrite(overlay_name, overlay)
181 | cv2.imwrite(input_name, input0)
182 | cv2.imwrite(input_name.replace('input0', "input1"), input1)
183 |
184 | pair_dir = osp.join(output_dir, 'sequence',
185 | 'seq' + str(seq_idx) + "_" + str(idx0) + "_" + str(idx1),
186 | )
187 | check_directory(pair_dir + '/.png')
188 | cv2.imwrite(overlay_name.replace('overlay', "residual"), res)
189 | cv2.imwrite(pair_dir+"/overlay.png", overlay)
190 | cv2.imwrite(pair_dir+"/input0.png", input0)
191 | cv2.imwrite(pair_dir+"/input1.png", input1)
192 | cv2.imwrite(pair_dir+"/residual.png", res)
193 | else:
194 | cv2.imshow("overlay", overlay)
195 | cv2.imshow("input0", input0)
196 | cv2.imshow("input1", input1)
197 |
198 | window_name = "feature-metric residuals"
199 | cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
200 | cv2.imshow(window_name, feat_residual)
201 | # image_name = osp.join(output_dir, 'residual'+str(idx)+'.png')
202 | # cv2.imwrite(image_name, feat_residual)
203 | cv2.waitKey(0)
204 | count_base += B
205 |
206 |
207 | if __name__ == '__main__':
208 |
209 | parser = argparse.ArgumentParser(description="Evaluate the network")
210 | config.add_basics_config(parser)
211 | config.add_tracking_config(parser)
212 | config.add_object_config(parser)
213 | options = parser.parse_args()
214 |
215 | print('---------------------------------------')
216 | test_object_warping(options)
217 |
218 |
--------------------------------------------------------------------------------
/code/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/models/__init__.py
--------------------------------------------------------------------------------
/code/models/criterions.py:
--------------------------------------------------------------------------------
1 | """
2 | Some training criterions tested in the experiments
3 |
4 | # SPDX-FileCopyrightText: 2021 Binbin Xu
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | @author: Zhaoyang Lv
8 | @date: March, 2019
9 | """
10 |
11 | from __future__ import absolute_import
12 | from __future__ import division
13 | from __future__ import print_function
14 | from __future__ import unicode_literals
15 |
16 | import torch
17 | import torch.nn as nn
18 | import torch.nn.functional as func
19 | import models.geometry as geo
20 | from models.algorithms import invH, lev_mar_H
21 |
22 |
23 | def EPE3D_loss(input_flow, target_flow, invalid=None):
24 | """
25 | :param the estimated optical / scene flow
26 | :param the ground truth / target optical / scene flow
27 | :param the invalid mask, the mask has value 1 for all areas that are invalid
28 | """
29 | epe_map = torch.norm(target_flow-input_flow,p=2,dim=1)
30 | B = epe_map.shape[0]
31 |
32 | invalid_flow = (target_flow != target_flow) # check Nan same as torch.isnan
33 |
34 | mask = (invalid_flow[:,0,:,:] | invalid_flow[:,1,:,:] | invalid_flow[:,2,:,:])
35 | if invalid is not None:
36 | mask = mask | (invalid.view(mask.shape) > 0)
37 |
38 | epes = []
39 | for idx in range(B):
40 | epe_sample = epe_map[idx][~mask[idx].data]
41 | if len(epe_sample) == 0:
42 | epes.append(torch.zeros(()).type_as(input_flow))
43 | else:
44 | epes.append(epe_sample.mean())
45 |
46 | return torch.stack(epes)
47 |
48 |
49 | def RPE(R, t):
50 | """ Calcualte the relative pose error
51 | (a batch version of the RPE error defined in TUM RGBD SLAM TUM dataset)
52 | :param relative rotation
53 | :param relative translation
54 | """
55 | angle_error = geo.batch_mat2angle(R)
56 | trans_error = torch.norm(t, p=2, dim=1)
57 | return angle_error, trans_error
58 |
59 |
60 | def compute_RPE_uncertainty(R_est, t_est, R_gt, t_gt, inv_var):
61 | loss = 0
62 | if R_est.dim() > 3: # training time [batch, num_poses, rot_row, rot_col]
63 |
64 | for idx in range(R_est.shape[1]):
65 | dR = geo.batch_mat2twist(R_gt.detach().contiguous()) - geo.batch_mat2twist(R_est[:,idx])
66 | dt = t_gt.detach() - t_est[:,idx]
67 | dKsi = torch.cat([dR, dt], dim=-1).unsqueeze(dim=-1)
68 | Hessian = lev_mar_H(inv_var[:,idx])
69 | sigma_ksi = invH(Hessian)
70 | det_var = torch.det(sigma_ksi)
71 | # clamp
72 | det_var = det_var.clamp(min=1e-9)
73 | weighted_error = torch.bmm(dKsi.transpose(1, 2), torch.bmm(inv_var[:,idx], dKsi)).squeeze()
74 | regularization = torch.log(1e-6 + det_var)
75 | loss += (weighted_error + regularization).sum()
76 | return loss
77 |
78 |
79 | def compute_RPE_loss(R_est, t_est, R_gt, t_gt):
80 | """
81 | :param estimated rotation matrix Bx3x3
82 | :param estimated translation vector Bx3
83 | :param ground truth rotation matrix Bx3x3
84 | :param ground truth translation vector Bx3
85 | """
86 | B=R_est.shape[0]
87 | if R_est.dim() > 3: # training time [batch, num_poses, rot_row, rot_col]
88 | angle_error = 0
89 | trans_error = 0
90 | for idx in range(R_est.shape[1]):
91 | dR, dt = geo.batch_Rt_between(R_est[:, idx], t_est[:, idx], R_gt, t_gt)
92 | angle_error_i, trans_error_i = RPE(dR, dt)
93 | angle_error += angle_error_i.norm(p=2).sum()
94 | trans_error += trans_error_i.norm(p=2).sum()
95 | else:
96 | dR, dt = geo.batch_Rt_between(R_est, t_est, R_gt, t_gt)
97 | angle_error, trans_error = RPE(dR, dt)
98 | return angle_error, trans_error
99 |
100 |
101 | def compute_RT_EPE_loss(R_est, t_est, R_gt, t_gt, depth0, K, invalid=None):
102 | """ Compute the epe point error of rotation & translation
103 | :param estimated rotation matrix Bx3x3
104 | :param estimated translation vector Bx3
105 | :param ground truth rotation matrix Bx3x3
106 | :param ground truth translation vector Bx3
107 | :param reference depth image,
108 | :param camera intrinsic
109 | """
110 |
111 | loss = 0
112 | if R_est.dim() > 3: # training time [batch, num_poses, rot_row, rot_col]
113 | rH, rW = 60, 80 # we train the algorithm using a downsized input, (since the size of the input is not super important at training time)
114 |
115 | B, C, H, W = depth0.shape
116 | rdepth = func.interpolate(depth0, size=(rH, rW), mode='bilinear')
117 | rinvalid = func.interpolate(invalid.float(), size=(rH, rW), mode='bilinear')
118 | rK = K.clone()
119 | rK[:, 0] *= float(rW) / W
120 | rK[:, 1] *= float(rH) / H
121 | rK[:, 2] *= float(rW) / W
122 | rK[:, 3] *= float(rH) / H
123 | xyz = geo.batch_inverse_project(rdepth, rK)
124 | flow_gt = geo.batch_transform_xyz(xyz, R_gt, t_gt, get_Jacobian=False)
125 |
126 | for idx in range(R_est.shape[1]):
127 | flow_est = geo.batch_transform_xyz(xyz, R_est[:, idx], t_est[:, idx], get_Jacobian=False)
128 | loss += EPE3D_loss(flow_est, flow_gt.detach(), invalid=rinvalid) # * (1< (B*H*w,3,1)
153 | deltaP = deltaP.view(B,3,-1).permute(0,2,1).contiguous().view(-1,3).unsqueeze(dim=-1)
154 | dim_ind = True
155 | if uncer_loss_type == 'gaussian':
156 | # if assume each dimension independent
157 | if dim_ind:
158 | diag_mask = torch.eye(3).view(1, 3, 3).type_as(variance)
159 | variance = torch.clamp(variance, min=1e-3)
160 | variance = diag_mask * variance
161 |
162 | # inv_var = invH(variance)
163 | inv_var = torch.inverse(variance)
164 | weighted_error = torch.bmm(deltaP.transpose(1, 2), torch.bmm(inv_var, deltaP)).squeeze()
165 | if dim_ind:
166 | regularization = variance.diagonal(dim1=1, dim2=2).log().sum(dim=1)
167 | else:
168 | det_var = torch.det(variance)
169 | # make it numerical stable
170 | det_var = torch.clamp(det_var, min=1e-3)
171 | regularization = torch.log(det_var)
172 | epe_map = weighted_error + regularization
173 | elif uncer_loss_type == 'laplacian':
174 | raise NotImplementedError("the bessel function needs to be implemented")
175 | sigma = torch.sqrt(variance + 1e-7)
176 | weighted_error = torch.abs(deltaP) / (sigma + 1e-7)
177 | regularization = torch.log(torch.det(sigma))
178 | epe_map = weighted_error + regularization
179 | else:
180 | raise NotImplementedError()
181 | epe_map = epe_map.view(B,H,W)
182 | B = epe_map.shape[0]
183 |
184 | invalid_flow = (target_flow != target_flow) # check Nan same as torch.isnan
185 |
186 | mask = (invalid_flow[:,0,:,:] | invalid_flow[:,1,:,:] | invalid_flow[:,2,:,:])
187 | if invalid is not None:
188 | mask = mask | (invalid.view(mask.shape) > 0)
189 |
190 | epes = []
191 | o_epes = []
192 | for idx in range(B):
193 | epe_sample = epe_map[idx][~mask[idx].data]
194 | if len(epe_sample) == 0:
195 | epes.append(torch.zeros(()).type_as(input_flow))
196 | else:
197 | epes.append(epe_sample.mean())
198 |
199 | if uncer_loss_type is not None:
200 | o_epe_sample = o_epe_map[idx][~mask[idx].data]
201 | if len(o_epe_sample) == 0:
202 | o_epes.append(torch.zeros(()).type_as(input_flow))
203 | else:
204 | o_epes.append(o_epe_sample.mean())
205 | if uncer_loss_type is not None:
206 | return torch.stack(epes), torch.stack(o_epes)
207 | else:
208 | return torch.stack(epes)
209 |
210 |
211 | def compute_RT_EPE_uncertainty_loss(R_est, t_est, R_gt, t_gt, depth0, K, sigma_ksi, uncertainty_type, invalid=None):
212 | """ Compute the epe point error of rotation & translation
213 | :param estimated rotation matrix BxNx3x3
214 | :param estimated translation vector BxNx3
215 | :param ground truth rotation matrix Bx3x3
216 | :param ground truth translation vector Bx3
217 | :param reference depth image,
218 | :param camera intrinsic
219 | """
220 |
221 | loss = 0
222 | epe = 0
223 | assert R_est.dim() > 3 # training time [batch, num_poses, rot_row, rot_col]
224 | rH, rW = 60, 80 # we train the algorithm using a downsized input, (since the size of the input is not super important at training time)
225 |
226 | B,C,H,W = depth0.shape
227 | rdepth = func.interpolate(depth0, size=(rH, rW), mode='bilinear')
228 | rinvalid = func.interpolate(invalid.float(), size=(rH,rW), mode='bilinear')
229 | rK = K.clone()
230 | rK[:,0] *= float(rW) / W
231 | rK[:,1] *= float(rH) / H
232 | rK[:,2] *= float(rW) / W
233 | rK[:,3] *= float(rH) / H
234 | xyz = geo.batch_inverse_project(rdepth, rK)
235 | flow_gt = geo.batch_transform_xyz(xyz, R_gt, t_gt, get_Jacobian=False)
236 |
237 | for idx in range(R_est.shape[1]):
238 | flow_est, J_flow = geo.batch_transform_xyz(xyz, R_est[:,idx], t_est[:,idx], get_Jacobian=True)
239 | # uncertainty propagation: J*sigma*J^T
240 | sigma_ksai_level = sigma_ksi[:,idx:idx+1,:,:].repeat(1,rH*rW,1,1).view(-1,6,6)
241 | J_sigmaKsi = torch.bmm(J_flow, sigma_ksai_level)
242 | sigma_deltaP = torch.bmm(J_sigmaKsi, torch.transpose(J_flow, 1, 2))
243 | loss_l, epe_l = UEPE3D_loss(flow_est, flow_gt.detach(), variance=sigma_deltaP, uncer_loss_type=uncertainty_type, invalid=rinvalid,)
244 | loss += loss_l
245 | epe += epe_l
246 |
247 | return loss, epe
248 |
249 |
--------------------------------------------------------------------------------
/code/models/submodules.py:
--------------------------------------------------------------------------------
1 | """
2 | Submodules to build up CNN
3 |
4 | # SPDX-FileCopyrightText: 2021 Binbin Xu
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | @author: Zhaoyang Lv
8 | @date: March, 2019
9 | """
10 |
11 | from __future__ import print_function
12 |
13 | import torch.nn as nn
14 | import torch
15 | import numpy as np
16 |
17 | from torch.nn import init
18 | from torchvision import transforms
19 |
20 | def color_normalize(color):
21 | rgb_mean = torch.Tensor([0.4914, 0.4822, 0.4465]).type_as(color)
22 | rgb_std = torch.Tensor([0.2023, 0.1994, 0.2010]).type_as(color)
23 | return (color - rgb_mean.view(1,3,1,1)) / rgb_std.view(1,3,1,1)
24 |
25 | def convLayer(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, dilation=1, bias=False):
26 | """ A wrapper of convolution-batchnorm-ReLU module
27 | """
28 | if batchNorm:
29 | return nn.Sequential(
30 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2 + dilation-1, dilation=dilation, bias=bias),
31 | nn.BatchNorm2d(out_planes),
32 | #nn.LeakyReLU(0.1,inplace=True) # deprecated
33 | nn.ELU(inplace=True)
34 | )
35 | else:
36 | return nn.Sequential(
37 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2 + dilation-1, dilation=dilation, bias=True),
38 | #nn.LeakyReLU(0.1,inplace=True) # deprecated
39 | nn.ELU(inplace=True)
40 | )
41 |
42 | def convLayer1d(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, dilation=1, bias=False):
43 | """ A wrapper of convolution-batchnorm-ReLU module
44 | """
45 | if batchNorm:
46 | return nn.Sequential(
47 | nn.Conv1d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2 + dilation-1, dilation=dilation, bias=bias),
48 | nn.BatchNorm1d(out_planes),
49 | nn.ELU(inplace=True)
50 | # nn.ReLU(inplace=True)
51 | )
52 | else:
53 | return nn.Sequential(
54 | nn.Conv1d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2 + dilation-1, dilation=dilation, bias=True),
55 | nn.ELU(inplace=True)
56 | # nn.ReLU(inplace=True)
57 | )
58 |
59 | def fcLayer(in_planes, out_planes, bias=True):
60 | return nn.Sequential(
61 | nn.Linear(in_planes, out_planes, bias),
62 | nn.ReLU(inplace=True)
63 | )
64 |
65 | def initialize_weights(modules, method='xavier'):
66 | for m in modules:
67 | if isinstance(m, nn.Conv2d):
68 | if m.bias is not None:
69 | m.bias.data.zero_()
70 | if method == 'xavier':
71 | init.xavier_uniform_(m.weight)
72 | elif method == 'kaiming':
73 | init.kaiming_uniform_(m.weight)
74 |
75 | if isinstance(m, nn.ConvTranspose2d):
76 | if m.bias is not None:
77 | m.bias.data.zero_()
78 | if method == 'xavier':
79 | init.xavier_uniform_(m.weight)
80 | elif method == 'kaiming':
81 | init.kaiming_uniform_(m.weight)
82 |
83 | class ListModule(nn.Module):
84 | """ The implementation of a list of modules from
85 | https://discuss.pytorch.org/t/list-of-nn-module-in-a-nn-module/219/2
86 | """
87 | def __init__(self, *args):
88 | super(ListModule, self).__init__()
89 | idx = 0
90 | for module in args:
91 | self.add_module(str(idx), module)
92 | idx += 1
93 |
94 | def __getitem__(self, idx):
95 | if idx < 0 or idx >= len(self._modules):
96 | raise IndexError('index {} is out of range'.format(idx))
97 | it = iter(self._modules.values())
98 | for i in range(idx):
99 | next(it)
100 | return next(it)
101 |
102 | def __iter__(self):
103 | return iter(self._modules.values())
104 |
105 | def __len__(self):
106 | return len(self._modules)
107 |
--------------------------------------------------------------------------------
/code/run_example.py:
--------------------------------------------------------------------------------
1 | """
2 | An extremely simple example to show how to run the algorithm
3 |
4 | @author: Zhaoyang Lv
5 | @date: May 2019
6 | """
7 |
8 | import argparse
9 |
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as func
13 |
14 | import models.LeastSquareTracking as ICtracking
15 |
16 | from tqdm import tqdm
17 | from torch.utils.data import DataLoader
18 | from train_utils import check_cuda
19 | from data.SimpleLoader import SimpleLoader
20 |
21 | def resize(img0, img1, depth0, depth1, K_in, resizeH, resizeW):
22 | H, W = img0.shape[-2:]
23 |
24 | I0 = func.interpolate(img0, (resizeH,resizeW), mode='bilinear', align_corners=True)
25 | I1 = func.interpolate(img1, (resizeH,resizeW), mode='bilinear', align_corners=True)
26 | D0 = func.interpolate(depth0, (resizeH,resizeW), mode='nearest')
27 | D1 = func.interpolate(depth1, (resizeH,resizeW), mode='nearest')
28 |
29 | sx = resizeH / H
30 | sy = resizeW / W
31 |
32 | K = K_in.clone()
33 | K[:,0] *= sx
34 | K[:,1] *= sy
35 | K[:,2] *= sx
36 | K[:,3] *= sy
37 |
38 | return I0, I1, D0, D1, K
39 |
40 | def run_inference(dataloader, net):
41 |
42 | progress = tqdm(dataloader, ncols=100,
43 | desc = 'Run the deeper inverse compositional algorithm',
44 | total= len(dataloader))
45 |
46 | net.eval()
47 |
48 | for idx, batch, in enumerate(progress):
49 |
50 | color0, color1, depth0, depth1, K = check_cuda(batch)
51 |
52 | # downsize the input to 120*160, it is the size of data when the algorthm is trained
53 | C0, C1, D0, D1, K = resize(color0, color1, depth0, depth1, K, resizeH = 120, resizeW=160)
54 |
55 | with torch.no_grad():
56 | R, t = net.forward(C0, C1, D0, D1, K)
57 |
58 | print('Rotation: ')
59 | print(R)
60 | print('translation: ')
61 | print(t)
62 |
63 | if __name__ == '__main__':
64 |
65 | parser = argparse.ArgumentParser(description='Run the network inference example.')
66 |
67 | parser.add_argument('--checkpoint', default='trained_models/TUM_RGBD_ABC_final.pth.tar',
68 | type=str, help='the path to the pre-trained checkpoint.')
69 |
70 | parser.add_argument('--color_dir', default='data/data_examples/TUM/color',
71 | help='the directory of color images')
72 | parser.add_argument('--depth_dir', default='data/data_examples/TUM/depth',
73 | help='the directory of depth images')
74 | parser.add_argument('--intrinsic', default='525.0,525.0,319.5,239.5',
75 | help='Simple pin-hole camera intrinsics, input in the format (fx, fy, cx, cy)')
76 |
77 | config = parser.parse_args()
78 |
79 | K = [float(x) for x in config.intrinsic.split(',')]
80 |
81 | simple_loader = SimpleLoader(config.color_dir, config.depth_dir, K)
82 | simple_loader = DataLoader(simple_loader, batch_size=1, shuffle=False)
83 |
84 | net = ICtracking.LeastSquareTracking(
85 | encoder_name = 'ConvRGBD2',
86 | max_iter_per_pyr= 3,
87 | mEst_type = 'MultiScale2w',
88 | solver_type = 'Direct-ResVol',
89 | uncertainty_type='None',
90 | direction='inverse',
91 | # options=options,
92 | )
93 |
94 | # net = ICtracking.LeastSquareTracking(
95 | # encoder_name = 'RGB',
96 | # max_iter_per_pyr= 50,
97 | # mEst_type = 'None',
98 | # solver_type = 'Direct-Nodamping',
99 | # uncertainty_type='None',
100 | # direction='forward')
101 |
102 | if torch.cuda.is_available():
103 | net.cuda()
104 |
105 | net.load_state_dict(torch.load(config.checkpoint)['state_dict'])
106 |
107 | run_inference(simple_loader, net)
--------------------------------------------------------------------------------
/code/timers.py:
--------------------------------------------------------------------------------
1 | """
2 | A timing utility
3 |
4 | @author: Zhaoyang Lv
5 | @date: March 2019
6 | """
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 | from __future__ import unicode_literals
12 |
13 | import time
14 | import numpy as np
15 | from collections import defaultdict
16 |
17 | class Timer(object):
18 |
19 | def __init__(self):
20 | self.reset()
21 |
22 | def tic(self):
23 | self.start_time = time.time()
24 |
25 | def toc(self, average=True):
26 | self.diff = time.time() - self.start_time
27 | self.total_time += self.diff
28 | self.calls += 1
29 |
30 | def total(self):
31 | """ return the total amount of time """
32 | return self.total_time
33 |
34 | def avg(self):
35 | """ return the average amount of time """
36 | return self.total_time / float(self.calls)
37 |
38 | def reset(self):
39 | self.total_time = 0.
40 | self.calls = 0
41 | self.start_time = 0.
42 | self.diff = 0.
43 |
44 | class Timers(object):
45 |
46 | def __init__(self):
47 | self.timers = defaultdict(Timer)
48 |
49 | def tic(self, key):
50 | self.timers[key].tic()
51 |
52 | def toc(self, key):
53 | self.timers[key].toc()
54 |
55 | def print(self, key=None):
56 | if key is None:
57 | # print all time
58 | for k, v in self.timers.items():
59 | print("Average time for {:}: {:}".format(k, v.avg()))
60 | else:
61 | print("Average time for {:}: {:}".format(key, self.timers[key].avg()))
62 |
63 | def get_avg(self, key):
64 | return self.timers[key].avg()
65 |
--------------------------------------------------------------------------------
/code/tools/ICP.py:
--------------------------------------------------------------------------------
1 | """
2 | An implementation of ICP odometry using Open3D library for comparison in the paper
3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | """
6 |
7 | import open3d as o3d
8 | import numpy as np
9 | import torch
10 | import copy
11 | import cv2
12 |
13 | class ICP_Odometry:
14 |
15 | def __init__(self, mode='Point2Plane'):
16 | self.mode = mode
17 | if mode == 'Point2Plane':
18 | print("Using Point-to-plane ICP")
19 | elif mode == 'Point2Point':
20 | print("Using Point-to-point ICP")
21 | elif mode == "ColorICP":
22 | print("using ColorICP")
23 | elif mode == 'Iter_Point2Plane':
24 | print("Using iterative Point-to-plane ICP")
25 | elif mode == "Iter_ColorICP":
26 | print("using iterative ColorICP")
27 | else:
28 | raise NotImplementedError()
29 |
30 | def set_K(self, K, width, height):
31 | fx, fy, cx, cy = K
32 | K = o3d.camera.PinholeCameraIntrinsic(width, height, fx, fy, cx, cy)
33 | return K
34 |
35 | def batch_track(self, batch_rgb0, batch_dpt0, batch_rgb1, batch_dpt1, batch_K,
36 | batch_objmask0=None, batch_objmask1=None, vis_pcd=False):
37 | assert batch_dpt0.ndim == 4
38 | B = batch_dpt0.shape[0]
39 | batch_R = []
40 | batch_t = []
41 | if batch_objmask0 is not None:
42 | batch_dpt0 = batch_dpt0 * batch_objmask0
43 | if batch_objmask1 is not None:
44 | batch_dpt1 = batch_dpt1 * batch_objmask1
45 | for i in range(B):
46 | rgb0 = batch_rgb0[i].permute(1, 2, 0).cpu().numpy()
47 | dpt0 = batch_dpt0[i].permute(1,2,0).cpu().numpy()
48 | rgb1 = batch_rgb1[i].permute(1, 2, 0).cpu().numpy()
49 | dpt1 = batch_dpt1[i].permute(1,2,0).cpu().numpy()
50 | K = batch_K[i].cpu().numpy().tolist()
51 | pose10 = self.track(rgb0, dpt0, rgb1, dpt1, K)
52 | batch_R.append(pose10[0])
53 | batch_t.append(pose10[1])
54 |
55 | batch_R = torch.tensor(batch_R).type_as(batch_K)
56 | batch_t = torch.tensor(batch_t).type_as(batch_K)
57 | return batch_R, batch_t
58 |
59 | def draw_registration_result(self, source, target, transformation, name='Open3D'):
60 | source_temp = copy.deepcopy(source)
61 | target_temp = copy.deepcopy(target)
62 | source_temp.paint_uniform_color([1, 0.706, 0])
63 | target_temp.paint_uniform_color([0, 0.651, 0.929])
64 | source_temp.transform(transformation)
65 | o3d.visualization.draw_geometries([source_temp, target_temp], window_name=name)
66 |
67 | def track(self, rgb0, dpt0, rgb1, dpt1, K, vis_pcd=True, odo_init=None):
68 | H, W, _ = dpt0.shape
69 | intrinsic = self.set_K(K, H, W)
70 | # pcd_0 = o3d.geometry.PointCloud.create_from_depth_image(o3d.geometry.Image(dpt0),
71 | # intrinsic=intrinsic,
72 | # depth_scale=1.0,
73 | # depth_trunc=5.0)
74 | # pcd_1 = o3d.geometry.PointCloud.create_from_depth_image(o3d.geometry.Image(dpt1),
75 | # intrinsic=intrinsic,
76 | # depth_scale=1.0,
77 | # depth_trunc=5.0)
78 | rgbd_0 = o3d.geometry.RGBDImage.create_from_color_and_depth(
79 | o3d.geometry.Image(rgb0), o3d.geometry.Image(dpt0), depth_scale=1, depth_trunc=4.0)
80 | rgbd_1 = o3d.geometry.RGBDImage.create_from_color_and_depth(
81 | o3d.geometry.Image(rgb1), o3d.geometry.Image(dpt1), depth_scale=1, depth_trunc=4.0)
82 | pcd_0 = o3d.geometry.PointCloud.create_from_rgbd_image(
83 | rgbd_0, intrinsic)
84 | pcd_1 = o3d.geometry.PointCloud.create_from_rgbd_image(
85 | rgbd_1, intrinsic)
86 |
87 | if odo_init is None:
88 | odo_init = np.identity(4)
89 |
90 | # point-to-point ICP
91 | if self.mode == 'Point2Point':
92 | reg_p2p = o3d.registration.registration_icp(
93 | pcd_0, pcd_1, 0.02, odo_init,
94 | o3d.registration.TransformationEstimationPointToPoint())
95 | T_10 = reg_p2p.transformation
96 |
97 | # point-to-plane ICP
98 | elif self.mode == 'Point2Plane':
99 | # radius = 0.01
100 | # source_down = pcd_0.voxel_down_sample(radius)
101 | # target_down = pcd_1.voxel_down_sample(radius)
102 | #
103 | # # print("3-2. Estimate normal.")
104 | # source_down.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
105 | # target_down.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
106 | # reg_p2l = o3d.registration.registration_icp(source_down, target_down, 0.2, odo_init,
107 | # o3d.registration.TransformationEstimationPointToPlane(),
108 | # o3d.registration.ICPConvergenceCriteria(relative_fitness=1e-6,
109 | # relative_rmse=1e-6,
110 | # max_iteration=50)
111 | #
112 | # )
113 | iter = 10
114 | pcd_0.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(
115 | radius=0.1, max_nn=30))
116 | pcd_1.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(
117 | radius=0.1, max_nn=30))
118 | reg_p2l = o3d.registration.registration_icp(
119 | pcd_0, pcd_1, 0.4, odo_init,
120 | o3d.registration.TransformationEstimationPointToPlane(),
121 | o3d.registration.ICPConvergenceCriteria(relative_fitness=1e-6,
122 | relative_rmse=1e-6,
123 | max_iteration=iter)
124 | )
125 |
126 | T_10 = reg_p2l.transformation
127 |
128 | elif self.mode == 'ColorICP':
129 | pcd_0.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(
130 | radius=0.1, max_nn=30))
131 | pcd_1.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(
132 | radius=0.1, max_nn=30))
133 | reg_p2l = o3d.registration.registration_colored_icp(
134 | pcd_0, pcd_1, 0.02, odo_init,)
135 | T_10 = reg_p2l.transformation
136 |
137 | elif self.mode in ['Iter_Point2Plane', 'Iter_ColorICP']:
138 | voxel_radius = [0.04, 0.02, 0.01]
139 | max_iter = [50, 30, 14]
140 | T_10 = odo_init
141 | for scale in range(3):
142 | iter = max_iter[scale]
143 | radius = voxel_radius[scale]
144 |
145 | pcd0_down = pcd_0.voxel_down_sample(radius)
146 | pcd1_down = pcd_1.voxel_down_sample(radius)
147 |
148 | pcd0_down.estimate_normals(
149 | o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))
150 | pcd1_down.estimate_normals(
151 | o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))
152 |
153 | # point-to-plane ICP
154 | if self.mode == 'Iter_Point2Plane':
155 | result_icp = o3d.registration.registration_icp(
156 | pcd0_down, pcd1_down, radius, T_10,
157 | o3d.registration.TransformationEstimationPointToPlane(),
158 | o3d.registration.ICPConvergenceCriteria(relative_fitness=1e-6,
159 | relative_rmse=1e-6,
160 | max_iteration=iter)
161 | )
162 | elif self.mode == 'Iter_ColorICP':
163 | # colored ICP
164 | result_icp = o3d.registration.registration_colored_icp(
165 | pcd0_down, pcd1_down, radius*2, T_10,
166 | o3d.registration.ICPConvergenceCriteria(relative_fitness=1e-6,
167 | relative_rmse=1e-6,
168 | max_iteration=iter))
169 | else:
170 | raise NotImplementedError
171 | T_10 = result_icp.transformation
172 | else:
173 | raise NotImplementedError()
174 |
175 | # T_10 = result_icp.transformation
176 | trs = T_10[0:3, 3]
177 | rot = T_10[0:3, 0:3]
178 | pose10 = [rot, trs]
179 |
180 | if (trs > 1).sum():
181 | print('pose', T_10)
182 |
183 | # cv2.imshow('rgb0', rgb0)
184 | # cv2.imshow('rgb1', rgb1)
185 | # cv2.waitKey(0)
186 | # self.draw_registration_result(pcd_0, pcd_1, odo_init, name='init')
187 | # self.draw_registration_result(pcd_0, pcd_1, T_10, name='aligned')
188 |
189 | T_10 = odo_init
190 | trs = T_10[0:3, 3]
191 | rot = T_10[0:3, 0:3]
192 | pose10 = [rot, trs]
193 |
194 |
195 | return pose10
--------------------------------------------------------------------------------
/code/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smartroboticslab/deep_prob_feature_track/9e772871df320eeeff55c1398a0c34a0d2fc2fc5/code/tools/__init__.py
--------------------------------------------------------------------------------
/code/tools/display.py:
--------------------------------------------------------------------------------
1 | """
2 | visualisation tool for debugging and demo
3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | """
6 |
7 | import cv2
8 | # cv2.setNumThreads(0)
9 |
10 | import math
11 | import sys
12 | import numpy as np
13 | import torch
14 |
15 | def convert_flow_for_display(flow):
16 | """
17 | Converts a 2D image (e.g. flow) to bgr
18 |
19 | :param flow:
20 | :type flow: optical flow of size [2, H, W]
21 | :return:
22 | :rtype:
23 | """
24 |
25 | ang = np.arctan2(flow[1, :, :], flow[0, :, :])
26 | ang[ang < 0] += 2 * np.pi
27 | ang /= 2 * np.pi
28 | mag = np.sqrt(flow[0, :, :] ** 2. + flow[1, :, :] ** 2.)
29 | mag = np.clip(mag / (np.percentile(mag, 99) + 1e-6), 0., 1.)
30 | hfill_hsv = np.stack([ang * 180, mag * 255, np.ones_like(ang) * 255], 2).astype(np.uint8)
31 | flow_rgb = cv2.cvtColor(hfill_hsv, cv2.COLOR_HSV2RGB) / 255
32 | return np.transpose(flow_rgb, [2, 0, 1])
33 |
34 |
35 | def single_image_tensor_mat(T): # [1, C, H, W]
36 | img_mat = T.squeeze(0).permute(1, 2, 0).numpy()
37 | show = cv2.cvtColor(img_mat, cv2.COLOR_BGR2RGB)
38 | return show
39 |
40 |
41 | def image_to_display(image, cmap=cv2.COLORMAP_JET, order='CHW', normalize=False):
42 | """
43 | accepts a [1xHxW] or [DxHxW] float image with values in range [0,1]
44 | => change it range of (0, 255) for visualization
45 | :param image:
46 | :type image:
47 | :param cmap: cv2.COLORMAP_BONE or cv2.COLORMAP_JET, or NORMAL(no-processing)
48 | :type cmap:
49 | :param order:
50 | :type order:
51 | :param normalize: if true, noramalize to 0~1, otherwise clip to 0-1
52 | :type normalize:
53 | :return: a visiable BGR image in range(0,255), in fault a colored heat map (in JET color map)
54 | :rtype: opencv mat [H, W, C]
55 | """
56 | if order is 'HWC' and len(image.shape) > 2:
57 | image = np.rollaxis(image, axis=2)
58 | # image = np.moveaxis(image, 2, 0)
59 | image = np.squeeze(image) # 2d or 3d
60 |
61 | if len(image.shape) == 3 and image.shape[0] == 2:
62 | image = convert_flow_for_display(image)
63 |
64 | if normalize:
65 | # handle nan pixels
66 | min_intensity = np.nanmin(image)
67 | max_intensity = np.nanmax(image)
68 | image = (image - min_intensity) / (max_intensity - min_intensity)
69 | image = np.uint8(image * 255)
70 | else:
71 | if image.dtype != np.uint8:
72 | image = np.clip(image, 0, 1)
73 | image = (image * 255).astype(np.uint8)
74 | # image = (image * 1).astype(np.uint8)
75 |
76 | if image.ndim == 3:
77 | if image.shape[0] == 3:
78 | image = np.transpose(image, [1, 2, 0])
79 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
80 | elif image.ndim == 2:
81 | if cmap == "NORMAL":
82 | image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
83 | else:
84 | image = cv2.applyColorMap(image, cmap)
85 | return image
86 |
87 |
88 | # image_array needs to be 2d
89 | def create_mosaic(image_array, cmap=None, points=None, order='CHW', normalize=False):
90 | """
91 | Stitch array of images into a big concancated images
92 |
93 | :param image_array: subimages to be displayed
94 | :type image_array: Two-dimensional lists (arrays) , if in a stretch 1D lisr, will be stretched back
95 | each element is an image of [DxHxW]
96 | :param cmap: list of color map => common: cv2.COLORMAP_BONE or cv2.COLORMAP_JET or 'NORMAL'
97 | :type cmap:
98 | :param order:
99 | :type order:
100 | :param normalize: normalize to 0-1 range
101 | :type normalize: bool
102 | :return: image to be showed
103 | :rtype: numpy array
104 | """
105 | batch_version = (len(image_array[0].shape) == 4)
106 |
107 | if not isinstance(image_array[0], list): # if image_array is a stretch 1D list
108 | image_size = math.ceil(math.sqrt(len(image_array))) # stretch back to 2D list [N by N]
109 | image_array = [image_array[i:min(i + image_size, len(image_array))]
110 | for i in range(0, len(image_array), image_size)]
111 |
112 | max_cols = max([len(row) for row in image_array]) # because not every row (1st array) has the same size
113 | rows = []
114 |
115 | if cmap is None:
116 | cmap = [cv2.COLORMAP_JET]
117 | elif not isinstance(cmap, list):
118 | cmap = [cmap]
119 |
120 | if not isinstance(normalize, list):
121 | normalize = [normalize]
122 |
123 | if points is not None:
124 | if not isinstance(points, list):
125 | points = [points]
126 |
127 | i = 0
128 | for image_row in image_array:
129 | if len(image_row) == 0:
130 | continue
131 | image_row_processed = []
132 | for image in image_row:
133 | if torch.is_tensor(image):
134 | if batch_version:
135 | image = image[0:1, :, :, :]
136 | if len(image.shape) == 4: #[B. C, H, W]
137 | image = image.squeeze(0)
138 | if order == 'CHW':
139 | image = image.permute(1, 2, 0) # [H, W, C]
140 | if image.shape[2] not in(0, 3): # sum all channel features
141 | image = image.sum(dim=2)
142 | image = image.cpu().numpy()
143 | image_colorized = image_to_display(image, cmap[i % len(cmap)],
144 | order,
145 | normalize[i % len(normalize)])
146 | if points is not None:
147 | image_colorized = visualize_matches_on_image(image_colorized, points[i % len(points)])
148 | image_row_processed.append(image_colorized)
149 | i += 1
150 | nimages = len(image_row_processed)
151 | if nimages < max_cols: # padding zero(black) images in the empty areas
152 | image_row_processed += [np.zeros_like(image_row_processed[-1])] * (max_cols - nimages)
153 | rows.append(np.concatenate(image_row_processed, axis=1)) #horizontally
154 | return np.concatenate(rows, axis=0) # vertically
155 |
156 |
157 | def visualize_image_features(img_list: list, feat_list: list, feat_cmap=cv2.COLORMAP_WINTER):
158 | feat_sum_list = [feat[0, :, :, :].sum(dim=0) for feat in feat_list]
159 | if feat_cmap is not None and feat_cmap is not list:
160 | feat_cmap = [feat_cmap] * len(feat_sum_list)
161 | img_cmap = ['NORMAL'] * len(img_list)
162 | cmap = img_cmap + feat_cmap
163 | img_show = create_mosaic(img_list + feat_sum_list, cmap=cmap)
164 |
165 | return img_show
166 |
167 |
168 | def visualize_matches_on_image(image, matches):
169 | """
170 | :param image: []
171 | :type image: Union[torch.Tensor, numpy.ndarray]
172 | :param matches:
173 | :type matches: torch.Tensor
174 | :return:
175 | :rtype: None
176 | """
177 | num_matches = matches.shape[1]
178 | if torch.is_tensor(image):
179 | image = image.detach().cpu().numpy()
180 | if torch.is_tensor(matches):
181 | matches = matches.detach().cpu().numpy()
182 | # just for visualization, round it:
183 | matches = matches.astype(int)
184 | output = image.copy()
185 | red = (0, 0, 255)
186 | alpha = 0.6
187 | radius = int(image.shape[1] / 64) # should be 10 when the image width is 640
188 | for i in range(num_matches):
189 | image = cv2.circle(image, (matches[0, i], matches[1, i]), radius, red, -1)
190 | #blend
191 | output = cv2.addWeighted(image, alpha, output, 1 - alpha, 0, )
192 | return output
193 |
194 |
195 | def visualize_feature_channels(feat_map, rgb=None, points=None, order='CHW', add_ftr_avg=True):
196 | """
197 | :param points: points to draw on images
198 | :type points: torch.Tensor [2, #samples]
199 | :param feat_map:
200 | :type feat_map: torch.Tensor [B, H, W, C]
201 | :param rgb:
202 | :type rgb: numpy.ndarray [H, W, C] or [B, C, H, W]
203 | :param order: 'HWC' or 'CHW'
204 | :type order: str
205 | :return:
206 | :rtype: numpy.ndarray
207 | """
208 | assert len(feat_map.shape) == 4, "feature-map should be a 4-dim tensor"
209 | assert order in ['HWC', 'CHW']
210 |
211 | batch_version = (feat_map.shape[0] != 1)
212 | feat_map = feat_map.detach()
213 | if points is not None: points = points.detach()
214 | if not batch_version:
215 | feat_map = feat_map.squeeze(dim=0)
216 | if points is not None: points = points.squeeze()
217 | else:
218 | # if in batch, only visualize the 1st feature map
219 | feat_map = feat_map[0, :, :, :]
220 | if points is not None: points = points[0, :, :]
221 |
222 | if order == 'CHW':
223 | feat_map = feat_map.permute(1, 2, 0) # convert to [H, W, C]
224 | D = feat_map.shape[2]
225 | feat_map_sum = feat_map.sum(dim=2)
226 |
227 | if rgb is not None:
228 | if torch.is_tensor(rgb) and len(rgb.shape) == 4:
229 | rgb = rgb.detach()
230 | if not batch_version:
231 | rgb = rgb.squeeze()
232 | else:
233 | # if in batch, only visualize the 1st feature map
234 | rgb = rgb[0, :, :, :]
235 | rgb = rgb.permute(1, 2, 0) # convert to [H, W, C]
236 | if add_ftr_avg:
237 | feat_map_channel_list = [rgb, feat_map_sum]
238 | else:
239 | feat_map_channel_list = [rgb]
240 | else:
241 | if add_ftr_avg:
242 | feat_map_channel_list = [feat_map_sum]
243 | else:
244 | feat_map_channel_list = []
245 |
246 | for d in range(D):
247 | feat_map_channel = feat_map[:, :, d]
248 | feat_map_channel_list.append(feat_map_channel)
249 |
250 | cmap = [cv2.COLORMAP_JET] * (D + 1)
251 | if rgb is not None:
252 | cmap = ['NORMAL'] + cmap
253 | feature_channels = create_mosaic(feat_map_channel_list, cmap=cmap, points=points, order='HWC', normalize=True)
254 | return feature_channels
255 |
256 |
257 | def normalize_descriptor_channel_wise(res):
258 | """
259 | Normalizes the descriptor into RGB color space for each channel
260 | :param res: numpy.array [H,W,D]
261 | Output of the network, per-pixel dense descriptor
262 | :param stats: dict, with fields ['min', 'max', 'mean'], which are used to normalize descriptor
263 | :return: numpy.array
264 | normalized descriptor [H,W,D]
265 | """
266 |
267 | # get #channel
268 | D = np.shape(res)[-1]
269 | normed_res = np.zeros_like(res)
270 | eps = 1e-10
271 |
272 | for d in range(D):
273 | res_min = np.min(res[:, :, d])
274 | res_max = np.max(res[:, :, d])
275 | scale_factor = res_max - res_min + eps
276 | normed_res[:, :, d] = (res[:, :, d] - res_min) / scale_factor
277 |
278 | return normed_res
279 |
280 |
281 | def colorize(hue, lightness, normalize_hue=True, lightness_range=1.0):
282 | """
283 | Project images onto input images
284 | hue is normalized channel&image-wise
285 |
286 | :param hue: Features to be visualized
287 | :type hue: size of [#batch, #channel, H, W], ith its range is supposed to be [-1.0, 1.0]
288 | :param lightness: input image (grey)
289 | :type lightness: size [#batch, 1, H, W], its value range is supposed to be [0, 1.0]
290 | :param normalize_hue: normalize hue to [0, 1]
291 | :type normalize_hue:
292 | :param lightness_range:
293 | :type lightness_range:
294 | :return: hue overlapped on the lightness
295 | :rtype: size of [#batch, #channel, 3, H, W]
296 | """
297 | # process the input value to be visualisation range
298 | lightness /= lightness_range
299 |
300 | out = np.zeros(list(hue.shape) + [3]) # now size become [#batch, #channel, H, W, 3]
301 | if normalize_hue:
302 | image_num = np.shape(hue)[0]
303 | normed_hue = np.zeros_like(hue)
304 | # for i in xrange(image_num):
305 | # hue_per_image = hue[i, :, :, :]
306 | # hue_per_image = np.transpose(hue_per_image, [1, 2, 0])
307 | # normalized_hue_image = normalize_descriptor_channel_wise(hue_per_image)
308 | # normalized_hue_image = np.transpose(normalized_hue_image, [2, 0, 1])
309 | # normalize_hue[i, :, :, :] = normalized_hue_image
310 | channel_num = np.shape(hue)[1]
311 | eps = 1e-10
312 |
313 | for i in range(image_num):
314 | for c in range(channel_num):
315 | hue_min = np.min(hue[i, c, :, :])
316 | hue_max = np.max(hue[i, c, :, :])
317 | scale_factor = hue_max - hue_min + eps
318 | normed_hue[i, c, :, :] = (hue[i, c, :, :] - hue_min) / scale_factor
319 | else:
320 | normed_hue = np.clip(hue, 0, 1.0) * 0.5 + 0.5
321 | out[:, :, :, :, 0] = normed_hue * 120.0 / 255.0
322 | out[:, :, :, :, 1] = (lightness - 0.5) * 0.5 + 0.5
323 | # out[:, :, :, :, 2] = np.ones(hue.shape) * (np.abs(np.clip(hue, -1.0, 1.0) * 1.0) + 0.0)
324 | out[:, :, :, :, 2] = np.ones(hue.shape) # * (normed_hue)
325 | out = np.reshape(out, [-1, hue.shape[3], 3]) # [#batch * #channel * H, W, 3]
326 | out = cv2.cvtColor((out * 255).astype(np.uint8), cv2.COLOR_HLS2RGB).astype(np.float32) / 255
327 | out = np.reshape(out, list(hue.shape) + [3]) # [#batch, #channel, H, W, 3]
328 | out = np.transpose(out, [0, 1, 4, 2, 3]) # [#batch, #channel, 3, H, W]. this is to meet the create_mosaic function
329 | return out
330 |
331 |
332 | def visualise_frames(mat, name, max_img_visual=None, max_channel_visual=None, step_image=1, step_channel=1,
333 | mosaic_save=None):
334 | """
335 | visualize batches of images in n-dimsional
336 | :param mat: images to be showed
337 | :type mat: numpy array of [#batch, #channel, 3, H, W]
338 | :param name: opencv window name
339 | :type name: string
340 | :param max_img_visual: image number to be showed
341 | :type max_img_visual: int
342 | :param max_channel_visual:channel number to be shoed
343 | :type max_channel_visual: int
344 | :param step_image: the step to skip image number
345 | :type step_image: int
346 | :param step_channel: the step to skip channel number
347 | :type step_channel: int
348 | :param mosaic_save: if not none, the directory to save the mosaic image
349 | :type mosaic_save: string
350 | :return: mosaic image -> deprecated currently
351 | :rtype:
352 | """
353 | array = list()
354 | max_img = mat.shape[0] if max_img_visual is None else min(max_img_visual, mat.shape[0])
355 | max_channel = mat.shape[1] if max_channel_visual is None else min(max_channel_visual, mat.shape[1])
356 | for i in range(0, max_img, step_image):
357 | sub = []
358 | for j in range(0, max_channel, step_channel):
359 | sub.append(mat[i, j])
360 | array.append(sub)
361 | mosaic = create_mosaic(array)
362 | cv2.namedWindow(name, cv2.WINDOW_NORMAL)
363 | cv2.imshow(name, mosaic)
364 | if mosaic is not None:
365 | cv2.imwrite(mosaic_save, mosaic)
366 | return mosaic
367 |
368 |
369 |
370 | class Toolbar:
371 | def reset(self, width, tot, title=None):
372 | self.width = max(int(min(width, tot)), 1)
373 | self.tot = int(max(tot, 1))
374 | self.current = 0
375 | if title is not None:
376 | print(title)
377 | sys.stdout.write("[%s]" % (" " * self.width))
378 | sys.stdout.flush()
379 | sys.stdout.write("\b" * (self.width + 1))
380 |
381 | def incr(self):
382 | n = self.current + 1
383 | if math.floor(n * self.width / self.tot) > math.floor(self.current * self.width / self.tot):
384 | sys.stdout.write("-")
385 | sys.stdout.flush()
386 | self.current = n
387 | if self.current == self.tot:
388 | sys.stdout.write("\n")
389 |
--------------------------------------------------------------------------------
/code/tools/rgbd_odometry.py:
--------------------------------------------------------------------------------
1 | """
2 | An implementation of RGBD odometry using Open3D library for comparison in the paper
3 | # SPDX-FileCopyrightText: 2021 Binbin Xu
4 | # SPDX-License-Identifier: BSD-3-Clause
5 | """
6 |
7 | import open3d as o3d
8 | import numpy as np
9 | import torch
10 | import copy
11 |
12 | class RGBDOdometry():
13 |
14 | def __init__(self, mode='RGBD'):
15 |
16 | self.odo_opt = None
17 | if mode == "RGBD":
18 | print("Using RGB-D Odometry")
19 | self.odo_opt = o3d.odometry.RGBDOdometryJacobianFromColorTerm()
20 | elif mode == "COLOR_ICP":
21 | print("Using Hybrid RGB-D Odometry")
22 | self.odo_opt = o3d.odometry.RGBDOdometryJacobianFromHybridTerm()
23 | else:
24 | raise NotImplementedError()
25 |
26 | def set_K(self, K, width, height):
27 | fx, fy, cx, cy = K
28 | K = o3d.camera.PinholeCameraIntrinsic(width, height, fx, fy, cx, cy)
29 | return K
30 |
31 | def batch_track(self, batch_rgb0, batch_dpt0, batch_rgb1, batch_dpt1, batch_K,
32 | batch_objmask0=None, batch_objmask1=None, vis_pcd=True):
33 | assert batch_rgb0.ndim == 4
34 | B = batch_rgb0.shape[0]
35 | batch_R = []
36 | batch_t = []
37 | if batch_objmask0 is not None:
38 | batch_dpt0 = batch_dpt0 * batch_objmask0
39 | if batch_objmask1 is not None:
40 | batch_dpt1 = batch_dpt1 * batch_objmask1
41 | for i in range(B):
42 | rgb0 = batch_rgb0[i].permute(1,2,0).cpu().numpy()
43 | dpt0 = batch_dpt0[i].permute(1,2,0).cpu().numpy()
44 | rgb1 = batch_rgb1[i].permute(1,2,0).cpu().numpy()
45 | dpt1 = batch_dpt1[i].permute(1,2,0).cpu().numpy()
46 | K = batch_K[i].cpu().numpy().tolist()
47 | pose10, _ = self.track(rgb0, dpt0, rgb1, dpt1, K)
48 | batch_R.append(pose10[0])
49 | batch_t.append(pose10[1])
50 |
51 | batch_R = torch.tensor(batch_R).type_as(batch_K)
52 | batch_t = torch.tensor(batch_t).type_as(batch_K)
53 | return batch_R, batch_t
54 |
55 | def draw_registration_result(self, source, target, transformation, name='Open3D'):
56 | source_temp = copy.deepcopy(source)
57 | target_temp = copy.deepcopy(target)
58 | source_temp.paint_uniform_color([1, 0.706, 0])
59 | target_temp.paint_uniform_color([0, 0.651, 0.929])
60 | source_temp.transform(transformation)
61 | o3d.visualization.draw_geometries([source_temp, target_temp], window_name=name)
62 |
63 | def track(self, rgb0, dpt0, rgb1, dpt1, K, vis_pcd=True, odo_init=None):
64 | H, W, _ = rgb0.shape
65 | intrinsic = self.set_K(K, H, W)
66 | rgbd_0 = o3d.geometry.RGBDImage.create_from_color_and_depth(
67 | o3d.geometry.Image(rgb0), o3d.geometry.Image(dpt0), depth_scale=1, depth_trunc=3.0)
68 | rgbd_1 = o3d.geometry.RGBDImage.create_from_color_and_depth(
69 | o3d.geometry.Image(rgb1), o3d.geometry.Image(dpt1), depth_scale=1, depth_trunc=3.0)
70 | if odo_init is None:
71 | odo_init = np.identity(4)
72 | if vis_pcd:
73 | pcd_0 = o3d.geometry.PointCloud.create_from_rgbd_image(
74 | rgbd_0, intrinsic)
75 | pcd_1 = o3d.geometry.PointCloud.create_from_rgbd_image(
76 | rgbd_1, intrinsic)
77 |
78 | # option = o3d.odometry.OdometryOption()
79 | option = o3d.odometry.OdometryOption(min_depth=0.01, max_depth_diff=1.0)
80 | # print(option)
81 |
82 | [is_success, T_10, info] = o3d.odometry.compute_rgbd_odometry(
83 | rgbd_0, rgbd_1, intrinsic,
84 | odo_init, self.odo_opt, option)
85 |
86 | trs = T_10[0:3, 3]
87 | if (trs>1).sum(): #is_success and vis_pcd:
88 | print(T_10)
89 | print(is_success)
90 | # pcd_0 = o3d.geometry.PointCloud.create_from_rgbd_image(
91 | # rgbd_0, intrinsic)
92 | # pcd_0.transform(T_10)
93 | # o3d.visualization.draw_geometries([pcd_1, pcd_0])
94 | self.draw_registration_result(pcd_0, pcd_1, odo_init, 'init')
95 | self.draw_registration_result(pcd_0, pcd_1, T_10, 'aligned')
96 |
97 | trs = T_10[0:3, 3]
98 | rot = T_10[0:3, 0:3]
99 | pose10 = [rot, trs]
100 |
101 | return pose10, is_success
--------------------------------------------------------------------------------
/code/train.py:
--------------------------------------------------------------------------------
1 | """
2 | The training script for Deep Probabilistic Feature-metric Tracking,
3 |
4 | # SPDX-FileCopyrightText: 2021 Binbin Xu
5 | # SPDX-License-Identifier: BSD-3-Clause
6 |
7 | @author: Zhaoyang Lv
8 | @date: March 2019
9 | """
10 |
11 | import os, sys, argparse, time
12 | import evaluate as eval_utils
13 | import models.LeastSquareTracking as ICtracking
14 | import models.criterions as criterions
15 | import models.geometry as geometry
16 | import train_utils
17 | import config
18 | from data.dataloader import load_data
19 | from Logger import log_git_revisions_hash
20 |
21 | import torch
22 | import torch.nn as nn
23 | import torch.utils.data as data
24 |
25 | from timers import Timers
26 | from tqdm import tqdm
27 |
28 |
29 | class data_prefetcher():
30 | def __init__(self, loader):
31 | self.loader = iter(loader)
32 | self.stream = torch.cuda.Stream()
33 | # self.mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).cuda().view(1,3,1,1)
34 | # self.std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).cuda().view(1,3,1,1)
35 | # With Amp, it isn't necessary to manually convert data to half.
36 | # if args.fp16:
37 | # self.mean = self.mean.half()
38 | # self.std = self.std.half()
39 | self.preload()
40 | #self.next_batch = None
41 |
42 | def preload(self):
43 | try:
44 | self.next_batch = next(self.loader)
45 | except StopIteration:
46 | self.next_batch = None
47 | return
48 | with torch.cuda.stream(self.stream):
49 | for k, val in enumerate(self.next_batch):
50 | if torch.is_tensor(val):
51 | self.next_batch[k] = self.next_batch[k].cuda(non_blocking=True)
52 |
53 | # With Amp, it isn't necessary to manually convert data to half.
54 | # if args.fp16:
55 | # self.next_input = self.next_input.half()
56 | # else:
57 | # self.next_input = self.next_input.float()
58 |
59 | def next(self):
60 | torch.cuda.current_stream().wait_stream(self.stream)
61 | next_batch = self.next_batch
62 | self.preload()
63 | return next_batch
64 |
65 | def create_train_eval_loaders(options, eval_type, keyframes,
66 | total_batch_size = 8,
67 | trajectory = ''):
68 | """ create the evaluation loader at different keyframes set-up
69 | """
70 | eval_loaders = {}
71 |
72 | for kf in keyframes:
73 | if options.image_resize is not None:
74 | np_loader = load_data(options.dataset, [kf], eval_type, trajectory,
75 | image_resize=options.image_resize,
76 | options=options)
77 | else:
78 | np_loader = load_data(options.dataset, [kf], eval_type, trajectory, options=options)
79 | eval_loaders['{:}_keyframe_{:}'.format(trajectory, kf)] = data.DataLoader(np_loader,
80 | pin_memory=True,
81 | batch_size = int(total_batch_size),
82 | shuffle = False, num_workers = options.cpu_workers)
83 |
84 | return eval_loaders
85 |
86 | def train_one_epoch(options, dataloader, net, optim, epoch, logger, objectives,
87 | known_mask=False, timers=None):
88 |
89 | net.train()
90 |
91 | # prefetcher = data_prefetcher(dataloader)
92 |
93 | progress = tqdm(dataloader, ncols=100,
94 | desc = 'train deeper inverse compositional algorithm #epoch{:}'.format(epoch),
95 | total= len(dataloader))
96 |
97 | epoch_len = len(dataloader)
98 |
99 | if timers is None: timers_iter = Timers()
100 |
101 | if timers: timers.tic('one iteration')
102 | else: timers_iter.tic('one iteration')
103 |
104 | for batch_idx, batch in enumerate(progress):
105 |
106 | # batch = prefetcher.next()
107 | # batch_idx = 0
108 | # with tqdm(total=epoch_len) as pbar:
109 | # while batch is not None:
110 | # batch_idx += 1
111 | # if batch_idx >= epoch_len:
112 | # break
113 |
114 | iteration = epoch*epoch_len + batch_idx
115 | display_dict = {}
116 |
117 | optim.zero_grad()
118 |
119 | if timers: timers.tic('forward step')
120 |
121 | if known_mask: # for dataset that with mask or need mask
122 | color0, color1, depth0, depth1, Rt, K, obj_mask0, obj_mask1 = \
123 | train_utils.check_cuda(batch[:8])
124 | else:
125 | color0, color1, depth0, depth1, Rt, K = \
126 | train_utils.check_cuda(batch[:6])
127 | obj_mask0, obj_mask1 = None, None
128 |
129 | # Bypass lazy way to bypass invalid pixels.
130 | invalid_mask = (depth0 == depth0.min()) | (depth0 == depth0.max())
131 | if obj_mask0 is not None:
132 | invalid_mask = ~obj_mask0 | invalid_mask
133 |
134 | if options.train_uncer_prop:
135 | if options.obj_only:
136 | Rs, ts, sigma_ksi = net.forward(color0, color1, depth0, depth1, K,
137 | obj_mask0=obj_mask0, obj_mask1=obj_mask1,
138 | logger=logger,
139 | vis=options.vis_feat,
140 | iteration=iteration)[:3]
141 | else:
142 | Rs, ts, sigma_ksi = net.forward(color0, color1, depth0, depth1, K,
143 | logger=logger,
144 | vis=options.vis_feat,
145 | iteration=iteration)[:3]
146 | else:
147 | if options.obj_only:
148 | Rs, ts = net.forward(color0, color1, depth0, depth1, K,
149 | obj_mask0=obj_mask0, obj_mask1=obj_mask1,
150 | logger=logger,
151 | vis=options.vis_feat,
152 | iteration=iteration)[:2]
153 | else:
154 | Rs, ts = net.forward(color0, color1, depth0, depth1, K,
155 | logger=logger,
156 | vis=options.vis_feat,
157 | iteration=iteration)[:2]
158 |
159 | if timers: timers.toc('forward step')
160 | if timers: timers.tic('calculate loss')
161 |
162 | R_gt, t_gt = Rt[:,:3,:3], Rt[:,:3,3]
163 |
164 | # assert(flow_loss) # the only loss used for training
165 | # we want to compute epe anyway
166 | flow_loss = criterions.compute_RT_EPE_loss
167 |
168 | epes3d = flow_loss(Rs, ts, R_gt, t_gt, depth0, K, invalid=invalid_mask).mean() * 1e2
169 | if 'EPE3D' in objectives:
170 | loss = epes3d
171 | elif 'RPE' in objectives:
172 | angle_error, trans_error = criterions.compute_RPE_loss(Rs, ts, R_gt, t_gt)
173 | loss = angle_error + trans_error
174 | elif 'URPE' in objectives:
175 | assert options.train_uncer_prop
176 | loss = criterions.compute_RPE_uncertainty(Rs, ts, R_gt, t_gt, sigma_ksi)
177 | elif 'UEPE' in objectives:
178 | loss = criterions.compute_RT_EPE_uncertainty_loss(Rs, ts, R_gt, t_gt, depth0, K, sigma_ksi=sigma_ksi, uncertainty_type=options.uncertainty, invalid=invalid_mask)
179 |
180 | display_dict['train_epes3d'] = epes3d.item()
181 | display_dict['train_loss'] = loss.item()
182 |
183 | if timers: timers.toc('calculate loss')
184 | if timers: timers.tic('backward')
185 |
186 | loss.backward()
187 |
188 | if timers: timers.toc('backward')
189 | torch.nn.utils.clip_grad_norm_(net.parameters(), 5.0)
190 | # if options.uncertainty == 'gaussian':
191 | # torch.nn.utils.clip_grad_norm_(net.parameters(), 5.0)
192 | optim.step()
193 |
194 | lr = train_utils.get_learning_rate(optim)
195 | display_dict['lr'] = lr
196 |
197 | if timers:
198 | timers.toc('one iteration')
199 | batch_time = timers.get_avg('one iteration')
200 | timers.tic('one iteration')
201 | else:
202 | timers_iter.toc('one iteration')
203 | batch_time = timers_iter.get_avg('one iteration')
204 | timers_iter.tic('one iteration')
205 |
206 | logger.write_to_tensorboard(display_dict, iteration)
207 | logger.write_to_terminal(display_dict, epoch, batch_idx, epoch_len, batch_time, is_train=True)
208 |
209 | # batch = prefetcher.next()
210 | # pbar.update(1)
211 |
212 | def train(options):
213 |
214 | if options.time:
215 | timers = Timers()
216 | else:
217 | timers = None
218 |
219 | total_batch_size = options.batch_per_gpu * torch.cuda.device_count()
220 |
221 | checkpoint = train_utils.load_checkpoint_train(options)
222 |
223 | keyframes = [int(x) for x in options.keyframes.split(',')]
224 | if options.image_resize is not None:
225 | train_loader = load_data(options.dataset, keyframes, load_type='train',
226 | image_resize=options.image_resize, options=options)
227 | else:
228 | train_loader = load_data(options.dataset, keyframes, load_type = 'train', options=options)
229 | train_loader = data.DataLoader(train_loader,
230 | batch_size = total_batch_size,
231 | pin_memory=True,
232 | shuffle = True, num_workers = options.cpu_workers)
233 | if options.dataset in ['BundleFusion', 'TUM_RGBD', 'ScanNet']:
234 | obj_has_mask = False
235 | else:
236 | obj_has_mask = True
237 |
238 | eval_loaders = create_train_eval_loaders(options, 'validation', keyframes, total_batch_size)
239 |
240 | logfile_name = '_'.join([
241 | options.prefix, # the current test version
242 | # options.network,
243 | options.encoder_name,
244 | options.mestimator,
245 | options.solver,
246 | options.dataset,
247 | 'obj', str(options.obj_only),
248 | 'uCh', str(options.uncertainty_channel),
249 | options.uncertainty,
250 | 'rmT', str(options.remove_tru_sigma),
251 | # options.direction,
252 | 'fCh', str(options.feature_channel),
253 | options.feature_extract,
254 | 'iP', options.init_pose,
255 | 'mH', options.multi_hypo,
256 | # 'resInput', str(options.res_input),
257 | # 'initScale', str(options.scale_init_pose),
258 | # 'uncer_prop', str(options.train_uncer_prop),
259 | 'wICP', str(options.combine_ICP),
260 | 's', options.scaler,
261 | 'lr', str(options.lr),
262 | 'batch', str(total_batch_size),
263 | # 'kf', options.keyframes
264 | ])
265 |
266 | print("Initialize and train the Deep Trust Region Network")
267 | net = ICtracking.LeastSquareTracking(
268 | encoder_name = options.encoder_name,
269 | uncertainty_type= options.uncertainty,
270 | direction = options.direction,
271 | max_iter_per_pyr= options.max_iter_per_pyr,
272 | mEst_type = options.mestimator,
273 | solver_type = options.solver,
274 | options = options,
275 | tr_samples = options.tr_samples,
276 | add_init_noise = options.add_init_pose_noise,
277 | no_weight_sharing = options.no_weight_sharing,
278 | timers = timers)
279 |
280 | if options.no_weight_sharing:
281 | logfile_name += '_no_weight_sharing'
282 | logger = train_utils.initialize_logger(options, logfile_name)
283 | log_git_revisions_hash(logger.log_dir)
284 | with open(os.path.join(logger.log_dir,'commandline_args.txt'), 'w') as f:
285 | f.write('\n'.join(sys.argv[1:]))
286 |
287 | if options.checkpoint:
288 | net.load_state_dict(checkpoint['state_dict'])
289 |
290 | if torch.cuda.is_available():
291 | net.cuda()
292 |
293 | net.train()
294 |
295 | if torch.cuda.device_count() > 1:
296 | print("Use", torch.cuda.device_count(), "GPUs for training!")
297 | # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
298 | net = nn.DataParallel(net)
299 |
300 | train_objective = [options.loss] # ['EPE3D'] # Note: we don't use RPE for training
301 | eval_objectives = ['EPE3D', 'RPE']
302 |
303 | num_params = train_utils.count_parameters(net)
304 |
305 | if num_params < 1:
306 | print('There is no learnable parameters in this baseline.')
307 | print('No training. Only one iteration of evaluation')
308 | no_training = True
309 | else:
310 | print('There is a total of {:} learnabled parameters'.format(num_params))
311 | no_training = False
312 | optim = train_utils.create_optim(options, net)
313 | scheduler = torch.optim.lr_scheduler.MultiStepLR(optim,
314 | milestones=options.lr_decay_epochs,
315 | gamma=options.lr_decay_ratio)
316 |
317 | freq = options.save_checkpoint_freq
318 | for epoch in range(options.start_epoch, options.epochs):
319 |
320 | if epoch % freq == 0:
321 | checkpoint_name = 'checkpoint_epoch{:d}.pth.tar'.format(epoch)
322 | print('save {:}'.format(checkpoint_name))
323 | state_info = {'epoch': epoch, 'num_param': num_params}
324 | logger.save_checkpoint(net, state_info, filename=checkpoint_name)
325 |
326 | if options.no_val is False:
327 | for k, loader in eval_loaders.items():
328 |
329 | eval_name = '{:}_{:}'.format(options.dataset, k)
330 |
331 | eval_info = eval_utils.evaluate_trust_region(
332 | loader, net, eval_objectives,
333 | known_mask = obj_has_mask,
334 | eval_name = eval_name,
335 | timers = timers,
336 | logger=logger,
337 | obj_only=options.obj_only,
338 | epoch=epoch,
339 | tracker='learning_based',
340 | )
341 |
342 | display_dict = {"{:}_epe3d".format(eval_name): eval_info['epes'].mean(),
343 | "{:}_rpe_angular".format(eval_name): eval_info['angular_error'].mean(),
344 | "{:}_rpe_translation".format(eval_name): eval_info['translation_error'].mean()}
345 |
346 | logger.write_to_tensorboard(display_dict, epoch)
347 |
348 | if no_training: break
349 |
350 | train_one_epoch(options, train_loader, net, optim, epoch, logger,
351 | train_objective, known_mask=obj_has_mask, timers=timers)
352 |
353 | scheduler.step()
354 |
355 | if __name__ == '__main__':
356 |
357 | parser = argparse.ArgumentParser(description='Training the network')
358 |
359 | config.add_basics_config(parser)
360 | config.add_train_basics_config(parser)
361 | config.add_train_optim_config(parser)
362 | config.add_train_log_config(parser)
363 | config.add_train_loss_config(parser)
364 | config.add_tracking_config(parser)
365 |
366 | options = parser.parse_args()
367 |
368 | options.start_epoch = 0
369 |
370 | print('---------------------------------------')
371 | print_options = vars(options)
372 | for key in print_options.keys():
373 | print(key+': '+str(print_options[key]))
374 | print('---------------------------------------')
375 |
376 | # torch.backends.cudnn.benchmark = True
377 | torch.manual_seed(1)
378 | torch.cuda.manual_seed(1)
379 |
380 | print('Start training...')
381 | train(options)
382 |
--------------------------------------------------------------------------------
/code/train_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | The training utility functions
3 |
4 | @author: Zhaoyang Lv
5 | @date: March 2019
6 | """
7 |
8 | import os, sys
9 | from os.path import join
10 | import torch
11 | import torch.nn as nn
12 |
13 | def check_cuda(items):
14 | if torch.cuda.is_available():
15 | to_cuda=[]
16 | for x in items:
17 | if torch.is_tensor(x):
18 | to_cuda.append(x.cuda())
19 | elif type(x).__module__ == 'numpy':
20 | to_cuda.append(torch.from_numpy(x).cuda())
21 | else:
22 | to_cuda.append(x)
23 | return to_cuda
24 | # return [x.cuda() if torch.is_tensor(x) else torch.from_numpy(x).cuda() for x in items]
25 | else:
26 | return items
27 |
28 | def initialize_logger(opt, logfile_name):
29 | """ Initialize the logger for the network
30 | """
31 | from Logger import TensorBoardLogger
32 | log_dir = opt.dataset
33 | # if opt.resume_training:
34 | # logfile_name = '_'.join([
35 | # logfile_name,
36 | # 'resume'])
37 |
38 | log_dir = join('logs', log_dir, opt.checkpoint_folder, logfile_name)
39 | logger = TensorBoardLogger(log_dir, logfile_name)
40 | return logger
41 |
42 | def create_optim(config, network):
43 | """ Create the optimizer
44 | """
45 | if config.opt=='sgd':
46 | optim = torch.optim.SGD(network.parameters(),
47 | lr = config.lr,
48 | momentum = 0.9,
49 | weight_decay = 4e-4,
50 | nesterov=False)
51 | elif config.opt=='adam' or config.opt=='sgdr':
52 | optim = torch.optim.Adam(network.parameters(),
53 | lr = config.lr,
54 | weight_decay = 4e-4
55 | )
56 | elif config.opt=='rmsprop':
57 | optim = torch.optim.RMSprop(network.parameters(),
58 | lr = config.lr,
59 | weight_decay = 1e-4)
60 | else:
61 | raise NotImplementedError
62 |
63 | return optim
64 |
65 | def count_parameters(model):
66 | return sum(p.numel() for p in model.parameters() if p.requires_grad)
67 |
68 | def load_checkpoint_test(opt):
69 | if os.path.isfile(opt.checkpoint):
70 | print('=> loading checkpoint '+ opt.checkpoint)
71 | checkpoint = torch.load(opt.checkpoint)
72 | else:
73 | raise Exception('=> no checkpoint found at '+opt.checkpoint)
74 | return checkpoint
75 |
76 | def load_checkpoint_train(opt):
77 | """ Loading the checking-point file if specified
78 | """
79 | checkpoint = None
80 | if opt.checkpoint:
81 | if os.path.isfile(opt.checkpoint):
82 | print('=> loading checkpoint '+ opt.checkpoint)
83 |
84 | checkpoint = torch.load(opt.checkpoint)
85 | print('=> loaded checkpoint '+ opt.checkpoint+' epoch %d'%checkpoint['epoch'])
86 | if opt.resume_training:
87 | opt.start_epoch = checkpoint['epoch']
88 | print('resume training on the checkpoint')
89 | else:
90 | print('start new training...')
91 |
92 | # This is to configure the module loaded from multi-gpu
93 | #if opt.checkpoint_multigpu:
94 | # from collections import OrderedDict
95 | # state_dict_rename = OrderedDict()
96 | # for k, v in checkpoint['state_dict'].items():
97 | # name = k[7:] # remove `module.`
98 | # state_dict_rename[name] = v
99 | # checkpoint['state_dict'] = state_dict_rename
100 | else:
101 | print('=> no checkpoint found at '+opt.checkpoint)
102 | return checkpoint
103 |
104 | def set_learning_rate(optim, lr):
105 | """ manual set the learning rate for all specified parameters
106 | """
107 | for param_group in optim.param_groups:
108 | param_group['lr']=lr
109 |
110 | def get_learning_rate(optim, name=None):
111 | """ retrieve the current learning rate
112 | """
113 | if name is None:
114 | # assume all the learning rate remains the same
115 | return optim.param_groups[0]['lr']
116 |
117 | def adjust_learning_rate_manual(optim, epoch, lr, lr_decay_epochs, lr_decay_ratio):
118 | """ DIY the learning rate
119 | """
120 | for e in lr_decay_epochs:
121 | if epoch