├── .gitignore
├── LICENSE
├── README.md
├── config
    ├── data.json
    ├── test.json
    └── train.json
├── data
    ├── __init__.py
    ├── build_datasets.py
    ├── build_train_test_dataset_loc.py
    ├── pose_graph_tools.py
    └── sample_data.py
├── docker
    ├── Dockerfile
    ├── docker_data.sh
    ├── docker_test.sh
    └── docker_train.sh
├── download_datasets.sh
├── download_networks.sh
├── extract_features.py
├── figures
    ├── box_plot_generalization.png
    ├── box_plot_lighting_change.png
    ├── conditions_generalization.png
    ├── conditions_lighting_change.png
    ├── network.png
    ├── overview.png
    └── pipeline.png
├── requirements.txt
├── src
    ├── __init__.py
    ├── dataset.py
    ├── model
    │   ├── __init__.py
    │   ├── keypoint_block.py
    │   ├── matcher_block.py
    │   ├── pipeline.py
    │   ├── ransac_block.py
    │   ├── svd_block.py
    │   ├── unet.py
    │   └── weight_block.py
    ├── test.py
    ├── train.py
    └── utils
    │   ├── __init__.py
    │   ├── early_stopping.py
    │   ├── keypoint_tools.py
    │   ├── lie_algebra.py
    │   ├── statistics.py
    │   ├── stereo_camera_model.py
    │   ├── transform.py
    │   └── utils.py
└── visualization
    └── plots.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 97 | __pypackages__/
 98 | 
 99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 | 
103 | # SageMath parsed files
104 | *.sage.py
105 | 
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | 
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 | 
119 | # Rope project settings
120 | .ropeproject
121 | 
122 | # mkdocs documentation
123 | /site
124 | 
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 | 
130 | # Pyre type checker
131 | .pyre/
132 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2021, ASRL - Autonomous Space Robotics Lab
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Keeping an Eye on Things: Deep Learned Features for Long-Term Visual Localization
  2 | 
  3 | ![](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/overview.png?raw=true)
  4 | 
  5 | We learn visual features for localization across a large appearace change and used them in Visual Teach and repeat (VT&R) for closed-loop path-following on a robot outdoors. In VT&R, we manually drive the robot to teach a path (and build a map) that the robot subsequently repeats autonoously. We include three videos, two that detail the method and experiments ([video1](https://www.youtube.com/watch?v=26AVY1ITE3Q), [video2](https://youtu.be/JsEBs9Y5XVg)) and another that includes a [live demonstration](https://www.youtube.com/watch?v=KkG6TQOVXak) of localization between daytime and after dark.  
  6 | 
  7 | We train a neural network to predict sparse keypoints with associated descriptors and scores that can be used together with a classical pose estimator for localization. Our training pipeline includes a differentiable pose estimator such that training can be supervised with ground truth poses from data collected earlier. 
  8 | 
  9 | We insert the learned features into the existing VT&R pipeline to perform closed-loop path-following in unstructured outdoor environments. We show successful path following across all lighting conditions despite the robot's map being constructed using daylight conditions. In all, we validated the approach with 35.5 km of autonomous path-following experiments in challenging conditions.
 10 | 
 11 | The details of our method can be found in the [paper](https://arxiv.org/abs/2109.04041):
 12 | ```
 13 | @ARTICLE{gridseth_ral2022,
 14 |   author={Gridseth, Mona and Barfoot, Timothy D.},
 15 |   journal={IEEE Robotics and Automation Letters}, 
 16 |   title={Keeping an Eye on Things: Deep Learned Features for Long-Term Visual Localization}, 
 17 |   year={2022},
 18 |   volume={7},
 19 |   number={2},
 20 |   pages={1016-1023},
 21 |   doi={10.1109/LRA.2021.3136867}}
 22 | ```
 23 | 
 24 | ## Learning Pipeline
 25 | 
 26 | Our method is inpired by Barnes et al. and their [paper](https://arxiv.org/abs/2001.10789) Under the Radar: Learning to Predict Robust Keypoints for Odometry Estimation and Metric Localisation in Radar. We have adapted a similar differentiable learning pipeline and network architecture to work for a stereo camera. The training pipeline takes a pair of stereo images and uses the neural network to extract keypoints, descriptors, and scores. We match keypoints using the descriptors and compute 3D points for each of the matched 2D keypoints. The 3D points are passed to a differentiable pose estimator that uses SVD. The pipeline is illustrated below:
 27 | 
 28 | ![Test](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/pipeline.png?raw=true)
 29 | 
 30 | The neural network consists of an encoder and two decoders. The descriptors are extracted from the encoder, while the two decoders provide the keypoint coordinates and scores, see the figure below.
 31 | 
 32 | ![](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/network.png?raw=true)
 33 | 
 34 | ## Training Data
 35 | 
 36 | We train the network with data collected during autonomous path-following with a Grizzly robot using Multi-Experience VT&R. VT&R stores images and relative poses between them in a spatio-temporal pose graph. Each time the robot repeats a path, new data is added to the pose graph. We can sample images and relative ground truth poses from a large range of appearance conditions from this pose graph. We train using data from the UTIAS Long-Term Localization and Mapping Dataset. The data is desribed in detail and can be download from this [website](http://asrl.utias.utoronto.ca/datasets/2020-vtr-dataset/). 
 37 | 
 38 | ## Getting Started
 39 | 
 40 | We explain how to run code for generating datasets, training the model, testing the model, and extracting features. We provide a docker container that can be used to run the code. If running without docker is preferred, we provide a `requirements.txt` to list the neccessary dependencies and provide the the commands neeed to run the python scripts. 
 41 | 
 42 | ### Docker
 43 | 
 44 | In the `docker` folder, we provide a Dockerfile that can be used to buid an image that will run the code. To build the docker image, run the following command:
 45 | 
 46 | ```
 47 | cd docker
 48 | docker image build --shm-size=64g -t <docker_image_name> .
 49 | ```
 50 | ### Extract Features
 51 | 
 52 | We provide a simple script that shows how keypoints, scores, and descriptors can be extracted from an image. 
 53 | 
 54 | ```
 55 | python extract_features.py
 56 | ```
 57 | 
 58 | ### Generate Datset
 59 | 
 60 | As mentioned above, we use data from the UTIAS Long-Term Localization and Mapping Dataset. We have written a script that will generate training, validation, and test datasets from the localization data. We sample image pairs and relative poses from the VT&R pose graph. The code for dataset generation is found under the `data` folder. Our code assumes that the data is structured the same way as described on the dataset [website](http://asrl.utias.utoronto.ca/datasets/2020-vtr-dataset/). 
 61 | 
 62 | For the traiing and validation data we sample randomly from the pose graph for a chosen set of runs. For the test data we sample image pairs and relative poses sequentialy for the whole run to emulate localization for each run. We localize each of the chosen test runs to each other.
 63 | 
 64 | With our code, we have generated one set of training, testing, and validation datasets using the UTIAS Multiseason data and another set using the UTIAS In-The-Dark data. These can be downloaded using the `download_dataset.sh` script. This stores the data sample ids and ground truth labels (poses) needed in Pytorch. The images from the UTIAS Long-Term Localization and Mapping Dataset still need to be stored one the computer to be available to load.
 65 | 
 66 | Alternatively, if wanting to generate new datasets from scratch, this can be done by running a docker container as shown below. The `docker_data.sh` has to be updated with the correct file paths, where place holders are present.
 67 | 
 68 | ```
 69 | cd docker
 70 | ./docker_data.sh
 71 | ```
 72 | 
 73 | Finally, the code can be run without the use of docker by executing the following command:
 74 | 
 75 | ```
 76 | python -m data.build_train_test_dataset_loc --config <path_to_code>/deep_learned_visual_features/config/data.json
 77 | ```
 78 | 
 79 | ### Train
 80 | 
 81 | The model can be trained by running the following docker container, provided that updates are made to the file path place holders in `docker_test.sh`:
 82 | 
 83 | ```
 84 | cd docker
 85 | ./docker_test.sh
 86 | ```
 87 | 
 88 | The command for running the training code without docker is the following:
 89 | 
 90 | ```
 91 | python -m src.train --config <path_to_code>/deep_learned_visual_features/config/train.json
 92 | ```
 93 | 
 94 | ### Test
 95 | 
 96 | We provide model weights from training the network with the UTIAS Multiseason and UTIAS In-The-Dark datasets. They can be downloaded using the `download_networks.sh` script. We have the following model weight files:
 97 | 
 98 | `network_inthedark_layer32.pth`: the network was trained on data from the UTIAS In-The-Dark dataset and used in the lighting-change experiment described in the paper. The first layer of the network encoder is of size 32.
 99 | 
100 | `network_multiseason_layer16.pth`: the network was trained on data from the UTIAS Multiseason dataset. The first layer of the network encoder is of size 16.
101 | 
102 | `network_mutiseason_inthedark_layer16.pth`: the network was trained on data from the UTIAS Multiseason dataset and then subsequently fine-tuned with that from the UTIAS In-The-Dark dataset. It was used in the generalization experiment described in the paper. The first layer of the network encoder is of size 16.
103 | 
104 | The testing can be done using the docker container provided that place holder file paths in `docker_test.sh` are updated:
105 | 
106 | ```
107 | cd docker
108 | ./docker_test.sh
109 | ```
110 | The command for running the python test script is the following:
111 | 
112 | ```
113 | python -m src.test --config <path_to_code>/deep_learned_visual_features/config/test.json
114 | ```
115 | ### Configurations
116 | 
117 | In the `config` folder we provide json files that set the configuration parameters for the different tasks. 
118 | 
119 | #### Important note
120 | The UTIAS Multiseason and UTIAS In-The-Dark datasets were collected with two different cameras and hence their calibration parameters are different. 
121 | 
122 | ```
123 | UTIAS Multiseason camera parameters:
124 | width: 512
125 | height: 384
126 | focal length: 388.425
127 | cx: 253.502
128 | cy: 196.822
129 | baseline: 0.239946
130 | ```
131 | ```
132 | UTIAS In-The-Dark camera parameters:
133 | width: 512
134 | height: 384
135 | focal length: 387.777
136 | cx: 257.446
137 | cy: 197.718
138 | baseline: 0.239965
139 | ```
140 | 
141 | The poses from the dataset are all given in the vehicle frame of the robot. In our code we have a transform `T_s_v` that is the vehicle to sensor transform for the robot. The sensor frame coincides with the left camera frame of the stereo camera. In the code we compute pose losses with the poses in the vehicle frame. When computing the pose from 3D points using SVD, the 3D points are in the sensor frame. Hence the initially computed pose is given in the sensor frame before we transform it back to the vehicle frame, see code [here](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/src/model/svd_block.py#L71).  
142 | 
143 | #### Generate Dataset
144 | 
145 | In `config/data.json`, we find the following configurations:
146 | 
147 | `dataset_name`: give the daaset a name to reference when loading it for training/testing.\
148 | `num_train_samples`: the number of training samples we want to generate.\
149 | `num_validation_samples`: the number of validation samples we want to generate.\
150 | `max_temporal_len`: when localizing live vertices to the map vertices, allow up to this topoligical distance between them.\
151 | `dataset`: image height and width.\
152 | `train_paths`: list of name of paths we want to include in he training data ('multiseason', 'inthedark'). Use the name you have stored the dataset under on your computer, i.e. `/path_to_data/dataset_name/run_000001/...`. \
153 | `test_paths`: list of name of paths we want to include in the test data.   
154 | `sampling_ratios_train`: for each path in the training data, which portion of the total samples should it contribute. If we use two paths ('multiseason', 'inthedark') we could for example sample 0.7 from 'multiseason' and 0.3 from 'intthedark'.\
155 | `sampling_ratios_valid`: same as above for validation data.\
156 | `train_runs`: the runs from the paths to include in the training data.\
157 | `validation_runs`: the runs from the paths to include in the validation data.\
158 | `test_runs`:the runs from the paths to include in the test data.\
159 | `temporal_len`: same topoligical distance as max_temporal_len above. Here we set one fixed distance for each test run (when the specific run is used as the teach/map run).
160 | 
161 | #### Train
162 | 
163 | In `config/train.json`, we find the following configurations:
164 | 
165 | `dataset_name`: name of the dataset to use. \
166 | `checkpoint_name`: name for the checkpoint to store the model, alternatively load and resume training an exsiting model. \
167 | `training`: \
168 | &nbsp;&nbsp;&nbsp;&nbsp;`start_pose_estimation`: we can train initially using only the keypoint loss. This says which epoch to start pose estimation and using the pose loss. \
169 | &nbsp;&nbsp;&nbsp;&nbsp;`max_epochs`: stop training after this many epochs. \
170 | &nbsp;&nbsp;&nbsp;&nbsp;`patience`: how many epochs to run with validation loss not improving before stopping. \
171 | `network`: \
172 | &nbsp;&nbsp;&nbsp;&nbsp;`num_channels`: number of input channels, 3 for one RGB image. \
173 | &nbsp;&nbsp;&nbsp;&nbsp;`num_classes`: number of classes for the output of the decoders, should be 1. \
174 | &nbsp;&nbsp;&nbsp;&nbsp;`layer_size`: the size of the first layer of the encoder. Subsequent layer sizes are determined automatically. \
175 | `pipeline`: height and with of the windows in which we detect a keypoint and a parameter for whether to do dense or sparse matching of descriptors. \
176 | `outlier_rejection`: \
177 |  &nbsp;&nbsp;&nbsp;&nbsp;`type`: outlier rejection is done with ground truth poses during training. \
178 |  &nbsp;&nbsp;&nbsp;&nbsp;`dim`: doing outlier rejection for 2D points, 3D points, or in the plane (only considering x, y, heading). \
179 |  &nbsp;&nbsp;&nbsp;&nbsp;`inlier_threshold`: ignored, only needed for RANSAC during testing. \
180 |  &nbsp;&nbsp;&nbsp;&nbsp;`error_tolerance`: error threshold to be considered an inlier. \
181 |  &nbsp;&nbsp;&nbsp;&nbsp;`num_iterations`: ignored, only needed for RANSAC during testing. \
182 | `data_loader`: condifgurations for the data loader. \
183 | `stereo`: camera calibration parameters. \
184 | `dataset`: parameters for the dataset such as image heigh, width and whether to normalize the images and include disparity. \
185 | `optimizer`: configurations for the optimizer. \
186 | `scheduler`: configurations for the scheduler. \
187 | `loss`: \
188 | &nbsp;&nbsp;&nbsp;&nbsp;`types`: the loss types to use (`pose`, `pose_plane` (only SE(2)), `keypoint_2D` (error between 2D image coordinates), `keypoint_3D` error between 3D points), `keypoint_plane` (only SE(2)). \
189 | &nbsp;&nbsp;&nbsp;&nbsp;`weights`: weights for the different types of losses.
190 | 
191 | #### Test
192 | 
193 | In `config/test.json`, we find the following configurations:
194 | 
195 | `dataset_name`: name of the dataset to use. \
196 | `checkpoint_name`: name for the model checkpoint to load. \
197 | `network`: \
198 | &nbsp;&nbsp;&nbsp;&nbsp;`num_channels`: number of input channels, 3 for one RGB image. \
199 | &nbsp;&nbsp;&nbsp;&nbsp;`num_classes`: number of classes for the output of the decoders, should be 1. \
200 | &nbsp;&nbsp;&nbsp;&nbsp;`layer_size`: the size of the first layer of the encoder. Subsequent layer sizes are determined automatically. \
201 | `pipeline`: height and with of the windows in which we detect a keypoint and a parameter for whether to do dense or sparse matching of descriptors. \
202 | `outlier_rejection`: \
203 |  &nbsp;&nbsp;&nbsp;&nbsp;`type`: outlier rejection is done with RANSAC during testing. \
204 |  &nbsp;&nbsp;&nbsp;&nbsp;`dim`: doing outlier rejection for 2D points or 3D points. \
205 |  &nbsp;&nbsp;&nbsp;&nbsp;`inlier_threshold`: minimum ratio of inliers rewuired to stop RANSAC early. \
206 |  &nbsp;&nbsp;&nbsp;&nbsp;`error_tolerance`: error threshold to be considered an inlier. \
207 |  &nbsp;&nbsp;&nbsp;&nbsp;`num_iterations`: number of iterations to run RANSAC. \
208 | `data_loader`: condifgurations for the data loader. \
209 | `stereo`: camera calibration parameters. \
210 | `dataset`: parameters for the dataset such as image heigh, width and whether to normalize the images and include disparity. \
211 | `optimizer`: configurations for the optimizer. \
212 | `scheduler`: configurations for the scheduler. \
213 | `loss`: \
214 | &nbsp;&nbsp;&nbsp;&nbsp;`types`: we include the pose loss type during testing so we know if we are estimation the full SE(3) pose (`pose`) or only SE(2) (`pose_plane`).
215 | 
216 | ## Experimental Results
217 | 
218 | We use the learned features in the VT&R system such that we can test them in closed-loop operation on a robot outdoors. The code for VT&R is available open-source [here](https://utiasasrl.github.io/). The experiments are described in detail in our paper. To summarize, we conducted two experiments. In the lighting-change experiment we trained the network using the UTIAS In-The-Dark dataset from 2016 and taught a new path in the same location around noon in August 2021. We repeated the path across a full range of lighting change from 3 a.m. until 10.30 p.m. with a 100% autonomy rate. In the figures below, we show example images from different conditons and a box plot of the inlier feature matches for each run. 
219 | 
220 | ![](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/conditions_lighting_change.png?raw=true)
221 | ![](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/box_plot_lighting_change.png?raw=true)
222 | 
223 | In the generalization experiment we trained the network using the UTIAS In-The-Dark dataset from 2016 and the UTIAS Multiseason datasets from 2017 and taught a new path in August 2021. In this path we included new areas that have not been seen in he training data to test whether the features can generalize outside of the training paths. We taught the path around 1:30 p.m. and repeated it across a full range of lighting change from 4 a.m. until 9 p.m. with a 100% autonomy rate. In the figures below, we show example images from different conditons and a box plot of the inlier feature matches for each run. 
224 | 
225 | ![](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/conditions_generalization.png?raw=true)
226 | ![](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/box_plot_generalization.png?raw=true)
227 | 
228 | 
229 | 
230 | 


--------------------------------------------------------------------------------
/config/data.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "home_path": "/home/<user_name>",
 3 | 
 4 |   "dataset_name": "multiseason",
 5 |   "num_train_samples": 100000,
 6 |   "num_validation_samples": 20000,
 7 |   "max_temporal_len": 3,
 8 | 
 9 |   "dataset":{
10 |       "height": 384,
11 |       "width": 512
12 |   },
13 | 
14 |   "train_paths": ["multiseason"],
15 |   "test_paths": ["multiseason"],
16 | 
17 |   "sampling_ratios_train": {
18 |     "multiseason": 1.0,
19 |     "inthedark": 0.0
20 |   },
21 | 
22 |   "sampling_ratios_valid": {
23 |     "multiseason": 1.0,
24 |     "inthedark": 0.0
25 |   },
26 | 
27 |   "train_runs": {
28 |     "multiseason": [1, 2, 12, 14, 15, 16, 18, 22, 24, 26, 4, 7, 9, 10, 11, 75, 77, 101, 102, 28, 30, 32, 78, 80, 34, 35, 36, 37, 42, 44, 45, 47, 48, 49, 50, 51, 53, 54, 55, 56, 59, 60, 61, 62, 63, 65, 67, 68, 69, 70, 71, 72, 73, 74, 81, 83, 84, 87, 89, 93, 94, 95, 96, 97, 98, 104, 106, 107, 108, 110, 111, 113, 115, 116, 118, 119, 121, 123, 124, 126, 128, 129, 130, 131, 133, 134],
29 |     "inthedark": [1, 5, 6, 8, 10, 13, 14, 18, 20, 21, 22, 24, 26, 27, 29, 30, 31, 33, 34, 36, 38]
30 |   },
31 | 
32 |   "validation_runs": {
33 |     "multiseason": [13, 17, 23, 8, 100, 27, 33, 46, 52, 58, 66, 82, 85, 88, 90, 99, 103, 109, 112, 117, 120, 122, 127, 132],
34 |     "inthedark": [3, 7, 9, 12, 15, 19, 25, 32, 37]
35 |   },
36 | 
37 |   "test_runs": {
38 |     "multiseason": [3, 5, 29, 43, 64, 105, 125],
39 |     "inthedark": [2, 11, 16, 17, 23, 28, 35]
40 |   },
41 | 
42 |   "temporal_len": {
43 |     "multiseason": {"3":1, "5":1, "29":1, "43":1, "64":1, "105":1, "125":1},
44 |     "inthedark": {"2":1, "11":1, "16": 1, "17": 1, "23":1, "28":1, "35":1}
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/config/test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "home_path": "/home/<user_name>",
 3 | 
 4 |   "experiment_name": "test_model",
 5 |   "dataset_name": "dataset_multiseason",
 6 |   "checkpoint_name": "network_multiseason_layer16",
 7 | 
 8 |   "network": {
 9 |     "num_channels": 3,
10 |     "num_classes": 1,
11 |     "layer_size": 16
12 |   },
13 | 
14 |   "pipeline": {
15 |     "window_h": 16,
16 |     "window_w": 16,
17 |     "dense_matching": true
18 |   },
19 | 
20 |   "outlier_rejection": {
21 |     "on": true,
22 |     "type": "ransac",
23 |     "dim": ["3D"],
24 |     "inlier_threshold": 0.6,
25 |     "error_tolerance":  {"3D": 4.0, "2D": 50.0},
26 |     "num_iterations": 15
27 |   },
28 | 
29 |   "data_loader": {
30 |     "batch_size": 3,
31 |     "num_workers": 3,
32 |     "shuffle": false
33 |   },
34 | 
35 |   "stereo": {
36 |     "cu": 253.502,
37 |     "cv": 196.822,
38 |     "f": 388.425,
39 |     "b": 0.239946
40 |   },
41 | 
42 |   "dataset": {
43 |     "height": 384,
44 |     "width": 512,
45 |     "use_normalization": false,
46 |     "use_disparity": true
47 |   },
48 | 
49 |   "loss": {
50 |     "types": [
51 |       "pose_plane"
52 |     ]
53 |   }
54 | }
55 | 


--------------------------------------------------------------------------------
/config/train.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "home_path": "/home/<user_name>",
 3 | 
 4 |   "experiment_name": "train_model",
 5 |   "dataset_name": "dataset_multiseason",
 6 |   "checkpoint_name": "network_v1",
 7 | 
 8 |   "training": {
 9 |     "start_pose_estimation": 4,
10 |     "max_epochs": 500,
11 |     "patience": 50
12 |   },
13 | 
14 |   "network": {
15 |     "num_channels": 3,
16 |     "num_classes": 1,
17 |     "layer_size": 16
18 |   },
19 | 
20 |   "pipeline": {
21 |     "window_h": 16,
22 |     "window_w": 16,
23 |     "dense_matching": true
24 |   },
25 | 
26 |   "outlier_rejection": {
27 |     "on": true,
28 |     "type": "ground_truth",
29 |     "dim": ["plane"],
30 |     "inlier_threshold": 0.6,
31 |     "error_tolerance":  {"3D": 4.0, "2D": 50.0, "plane": 3.0},
32 |     "num_iterations": 15
33 |   },
34 | 
35 |   "data_loader": {
36 |     "batch_size": 3,
37 |     "num_workers": 3,
38 |     "shuffle": false
39 |   },
40 | 
41 |   "stereo": {
42 |     "cu": 253.502,
43 |     "cv": 196.822,
44 |     "f": 388.425,
45 |     "b": 0.239946
46 |   },
47 | 
48 |   "dataset": {
49 |     "height": 384,
50 |     "width": 512,
51 |     "use_normalization": false,
52 |     "use_disparity": true
53 |   },
54 | 
55 |   "optimizer": {
56 |     "type": "Adam",
57 |     "lr": 0.0001
58 |   },
59 | 
60 |   "scheduler": {
61 |     "type": "StepLR",
62 |     "step_size": 50,
63 |     "gamma": 1.0
64 |   },
65 | 
66 |   "loss": {
67 |     "types": [
68 |       "pose_plane",
69 |       "keypoint_plane"
70 |     ],
71 |     "weights": {
72 |       "translation": 1.0,
73 |       "rotation": 10.0,
74 |       "translation_x": 1.0,
75 |       "translation_y": 50.0,
76 |       "rotation_heading": 10.0,
77 |       "keypoint_2D": 0.01,
78 |       "keypoint_3D": 1.0,
79 |       "keypoint_plane": 2.5
80 |     }
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/data/__init__.py


--------------------------------------------------------------------------------
/data/build_datasets.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | from torch.utils import data
  5 | import numpy as np
  6 | 
  7 | from data.pose_graph_tools import Graph
  8 | from data.sample_data import random_sample, sequential_sample
  9 | 
 10 | def build_sub_graph(runs, data_dir):
 11 |     """
 12 |         Build a pose graph from the stored data files. We build the pose graph using the tools provided in
 13 |         https://github.com/utiasASRL/vtr-dataset-tools. We use the data provided by the UTIAS Long-Term Localization
 14 |         and Mapping Dataset that can be found at http://asrl.utias.utoronto.ca/datasets/2020-vtr-dataset/index.html.
 15 | 
 16 |         Args:
 17 |             runs (list[int]): the ids of the runs from the dataset that we want to include in the pose graph.
 18 |             data_dir (string): the top-level directory that holds the data for the path for which we build a pose graph.
 19 | 
 20 |         Returns:
 21 |             graph (Graph): a pose graph built from the runs that was provided.
 22 |     """
 23 |     teach_file = f'{data_dir}/run_000000/transforms_temporal.txt'
 24 |     repeat_files = []
 25 | 
 26 |     for run in runs:
 27 |         if run != 0:
 28 |             run_str = str(run).zfill(6)
 29 |             run_file = f'{data_dir}/run_{run_str}/transforms_spatial.txt'
 30 |             if os.path.isfile(run_file):
 31 |                 repeat_files.append(run_file)
 32 |             else:
 33 |                 print(f'Building pose graph, run_file does not exist: {run_file}')
 34 |                 exit(1)
 35 | 
 36 |     return Graph(teach_file, repeat_files)
 37 | 
 38 | def build_random_loc_dataset(data_path, path_names, runs, num_samples, temporal_length):
 39 |     """
 40 |         Build a dataset that localizes vertices of one run in the pose graph to another. We sample vertex pairs
 41 |         randomly from the pose graph.
 42 | 
 43 |         Record the pose transforms as 4x4 matrices and the 6 DOF vector equivalents. The pose from vertex, v1, to
 44 |         vertex, v2, is given as T_v2_v1. Create sample ids from the vertex ids. A vertex id consists of the id of the
 45 |         run the vertex belongs to and the id of the pose along that run, i.e. vertex_id = (run_id, pose_id). The sample
 46 |         id corresponding to pose transform T_v2_v1 is on the form pathname_runid1_poseid1_runid2_poseid2, for
 47 |         instance: mutliseason_1_531_5_542.
 48 | 
 49 |         Args:
 50 |             data_path (string): path to where the different runs of data are stored.
 51 |             path_names (list[string]): the paths we will use for sampling. One pose graph is created for each path.
 52 |             runs (dict): map from the path names to a list of the runs to use for localization pose sampling for the
 53 |                          given path.
 54 |             num_samples (dict): map from the path names to the number of samples to generate for the given paths.
 55 |             temporal_length (int): we can 'walk along' the pose graph to pair vertices that har further apart (i.e.
 56 |                                    not the closest pair). We set a fixed topological distance/steps we move away
 57 |                                    from the start vertex.
 58 | 
 59 |         Returns:
 60 |              samples (list[string]): list of all the sample ids.
 61 |              labels_se3 (dict): dictionary mapping sample id to pose transform 4x4 matrix provided as a torch.Tensor.
 62 |              labels_log (dict): dictionary mapping sample id to pose transform 6 DOF vector provided as a torch.Tensor.
 63 |     """
 64 |     all_ids = []
 65 |     labels_se3 = {}
 66 |     labels_log = {}
 67 | 
 68 |     for path_name in path_names:
 69 | 
 70 |         num_samples_path = num_samples[path_name]
 71 |         num_runs = len(runs[path_name])
 72 | 
 73 |         print(f'\nRandom dataset from path:  {path_name}')
 74 |         print(f'Sample from runs: {runs[path_name]}')
 75 |         print(f'Collect {num_samples_path} samples \n')
 76 | 
 77 |         data_dir = f'{data_path}/{path_name}'
 78 |         pose_graph = build_sub_graph(runs[path_name], data_dir)
 79 | 
 80 |         path_ids, path_labels_se3, path_labels_log = random_sample(path_name, pose_graph, runs[path_name],
 81 |                                                                    num_samples_path, temporal_length)
 82 | 
 83 |         all_ids = all_ids + path_ids
 84 |         labels_se3 = {**labels_se3, **path_labels_se3}
 85 |         labels_log = {**labels_log, **path_labels_log}
 86 | 
 87 |     print(f'\nRandom dataset total samples: {len(all_ids)}\n')
 88 |                         
 89 |     return all_ids, labels_se3, labels_log
 90 | 
 91 | def build_sequential_loc_dataset(data_path, path_name, map_run_id, live_run_ids, temporal_length):
 92 |     """
 93 |         Build a dataset that localizes all the vertices of one or more runs in the pose graph to the vertices on one
 94 |         map (or teach) run. I.e. we localize one or more live (or repeat) runs to one run that we choose as the map
 95 |         run. We get relative pose transforms for each localized vertex in the order that the vertices were created when
 96 |         driving the robot during data collection.
 97 | 
 98 |         Record the pose transforms as 4x4 matrices and the 6 DOF vector equivalents. The pose from vertex, v1, to
 99 |         vertex, v2, is given as T_v2_v1. Create sample ids from the vertex ids. A vertex id consists of the id of the
100 |         run the vertex belongs to and the id of the pose along that run, i.e. vertex_id = (run_id, pose_id). The sample
101 |         id corresponding to pose transform T_v2_v1 is on the form pathname_runid1_poseid1_runid2_poseid2, for
102 |         instance: mutliseason_1_531_5_542.
103 | 
104 |         Args:
105 |             data_path (string): path to where the different runs of data are stored.
106 |             path_name (string): name given to the path that the pose graph represents.
107 |             map_run_id (int): id of the run to localize to, i.e. compute the relative pose to vertices on this run.
108 |             live_run_ids (list[int]): the runs we localize to the map run, i.e. compute relative pose from vertices on
109 |                                       these runs.
110 |             temporal_length (int): we can 'walk along' the pose graph to pair vertices that har further apart (i.e.
111 |                                    not the closest pair). We set a fixed topological distance/steps we move away
112 |                                    from the start vertex.
113 | 
114 |         Returns:
115 |              samples (list[string]): list of all the sample ids.
116 |              labels_se3 (dict): dictionary mapping sample id to pose transform 4x4 matrix provided as a torch.Tensor.
117 |              labels_log (dict): dictionary mapping sample id to pose transform 6 DOF vector provided as a torch.Tensor.
118 |     """
119 | 
120 |     print(f'\nSequential dataset from path: {path_name}')
121 |     print(f'Map (teach) run: {map_run_id}')
122 |     print(f'Live (repeat) runs to localize: {live_run_ids} \n')
123 | 
124 |     data_dir = f'{data_path}/{path_name}'
125 |     pose_graph = build_sub_graph([map_run_id] + live_run_ids, data_dir)
126 | 
127 |     return sequential_sample(path_name, pose_graph, map_run_id, live_run_ids, temporal_length)
128 | 
129 | 


--------------------------------------------------------------------------------
/data/build_train_test_dataset_loc.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import math
  4 | import os
  5 | import pickle
  6 | import random
  7 | import sys
  8 | import time
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | import torch.backends.cudnn as cudnn
 13 | from torch.utils import data
 14 | 
 15 | from data.build_datasets import build_random_loc_dataset, build_sequential_loc_dataset
 16 | from src.utils.utils import MELData, compute_mean_std
 17 | 
 18 | torch.backends.cudnn.benchmark = True
 19 | 
 20 | def main(config):
 21 | 
 22 |     # Directory to store outputs to stdout/stderr.
 23 |     results_path = f"{config['home_path']}/results/dataset/{config['dataset_name']}/"
 24 |     # Directory where the data is stored (images, transforms text files).
 25 |     data_path = f"{config['home_path']}/data"
 26 |     # File to store the generated training dataset.
 27 |     dataset_path = f"{config['home_path']}/datasets/{config['dataset_name']}.pickle"
 28 | 
 29 |     if not os.path.exists(results_path):
 30 |         os.makedirs(results_path)
 31 | 
 32 |     # Print outputs to files
 33 |     orig_stdout = sys.stdout
 34 |     fl = open(f'{results_path}out_data.txt', 'w')
 35 |     sys.stdout = fl
 36 |     orig_stderr = sys.stderr
 37 |     fe = open(f'{results_path}err_data.txt', 'w')
 38 |     sys.stderr = fe
 39 | 
 40 |     # Record the parameter configurations.
 41 |     print(config)
 42 | 
 43 |     # Set up device, using GPU 0
 44 |     device = torch.device('cuda:{}'.format(0) if torch.cuda.device_count() > 0 else 'cpu')
 45 |     torch.cuda.set_device(0)
 46 | 
 47 |     start = time.time()
 48 | 
 49 |     num_samples_train = {}
 50 |     for path_name in config['train_paths']:
 51 |         num_samples_train[path_name] = math.floor(config['sampling_ratios_train'][path_name] * config['num_train_samples'])  
 52 | 
 53 |     num_samples_valid = {}
 54 |     for path_name in config['test_paths']:
 55 |         num_samples_valid[path_name] = math.floor(config['sampling_ratios_valid'][path_name] * config['num_validation_samples'])
 56 | 
 57 |     # Training and validation data
 58 |     train_ids, train_labels_se3, train_labels_log = build_random_loc_dataset(data_path,
 59 |                                                                              config['train_paths'],
 60 |                                                                              config['train_runs'],
 61 |                                                                              num_samples_train,
 62 |                                                                              config['max_temporal_len'])
 63 | 
 64 |     valid_ids, valid_labels_se3, valid_labels_log = build_random_loc_dataset(data_path,
 65 |                                                                              config['test_paths'],
 66 |                                                                              config['validation_runs'],
 67 |                                                                              num_samples_valid,
 68 |                                                                              config['max_temporal_len'])
 69 | 
 70 |     # Class for storing the data before we write it to a pickle file.
 71 |     mel_data = MELData()
 72 | 
 73 |     mel_data.train_ids = train_ids
 74 |     mel_data.train_labels_se3 = train_labels_se3
 75 |     mel_data.train_labels_log = train_labels_log
 76 | 
 77 |     mel_data.valid_ids = valid_ids
 78 |     mel_data.valid_labels_se3 = valid_labels_se3
 79 |     mel_data.valid_labels_log = valid_labels_log
 80 | 
 81 |     with open(dataset_path, 'wb') as f:
 82 |         pickle.dump(mel_data, f, pickle.HIGHEST_PROTOCOL)
 83 | 
 84 |     print(f'\nTraining data size: {len(train_ids)}')
 85 |     print(f'Validation data size: {len(valid_ids)}\n')
 86 |     print('Saved training and validation dataset', flush=True)
 87 |     print(f'Generating training and validation datasets took {time.time() - start} s\n')
 88 | 
 89 |     # Compute mean and std_dev for the training set to use for input normalization if desirable.
 90 |     mean, std_dev = compute_mean_std(mel_data, data_path, config['dataset']['height'], config['dataset']['width'])
 91 |     mel_data.mean = mean
 92 |     mel_data.std_dev = std_dev
 93 | 
 94 |     # Generate sequential datasets that can be used for testing.
 95 |     mel_data.paths = config['test_paths']
 96 |     for path_name in config['test_paths']:
 97 | 
 98 |         test_runs = config['test_runs'][path_name]
 99 | 
100 |         # We localize different runs of a path against each other and so store the resulting data in lists.
101 |         mel_data.test_ids[path_name] = []
102 |         mel_data.test_labels_se3[path_name] = []
103 |         mel_data.test_labels_log[path_name] = []
104 | 
105 |         # Localize each of the runs to each other (but not both ways or to themselves).
106 |         for i in range(len(test_runs) - 1):
107 | 
108 |             teach_run = test_runs[i]
109 |             repeat_runs = test_runs[(i + 1):]
110 |             temporal_len = config['temporal_len'][path_name][str(teach_run)]
111 |             test_ids, test_labels_se3, test_labels_log = build_sequential_loc_dataset(data_path,
112 |                                                                                       path_name,
113 |                                                                                       teach_run,
114 |                                                                                       repeat_runs,
115 |                                                                                       temporal_len)
116 | 
117 |             mel_data.test_ids[path_name] = mel_data.test_ids[path_name] + [test_ids]
118 |             mel_data.test_labels_se3[path_name] = mel_data.test_labels_se3[path_name] + [test_labels_se3]
119 |             mel_data.test_labels_log[path_name] = mel_data.test_labels_log[path_name] + [test_labels_log]
120 | 
121 |     with open(dataset_path, 'wb') as f:
122 |         pickle.dump(mel_data, f, pickle.HIGHEST_PROTOCOL)
123 | 
124 |     print('\nSaved test dataset', flush=True)
125 |     print(f'Generating full training, validation, and test dataset took {time.time() - start} s')
126 | 
127 |     # Stop writing outputs to files.
128 |     sys.stdout = orig_stdout
129 |     fl.close()
130 |     sys.stderr = orig_stderr
131 |     fe.close()
132 | 
133 | if __name__ == '__main__':
134 |     parser = argparse.ArgumentParser()
135 |     parser.add_argument('--config', default=None, type=str,
136 |                       help='config file path (default: None)')
137 | 
138 |     args = parser.parse_args()
139 | 
140 |     with open(args.config) as f:
141 |         config = json.load(f)
142 | 
143 |     main(config)
144 | 


--------------------------------------------------------------------------------
/data/pose_graph_tools.py:
--------------------------------------------------------------------------------
  1 | """Copied from vtr-dataset-tools: https://github.com/utiasASRL/vtr-dataset-tools/blob/master/tools.py"""
  2 | 
  3 | import copy
  4 | import numpy as np
  5 | 
  6 | from src.utils.transform import Transform
  7 | 
  8 | class Vertex:
  9 |     """
 10 |     The Vertex object represents a vertex that is part of a pose graph.
 11 |     Args:
 12 |         run_id (int): Id of the run the vertex belongs to.
 13 |         pose_id (int): Id of the pose for the vertex.
 14 |         next_run_id (int): Id of the run of the vertex that this vertex is connected to, 
 15 |             either spatially (from repeat to teach) or temporally (one teach vertex to another).
 16 |         next_pose_id (int): Id of the pose of the vertex that this vertex is connected to, 
 17 |             either spatially (from repeat to teach) or temporally (one teach vertex to another).
 18 |         next_transform (Transform): Transform from this to the connected vertex.
 19 |         teach (bool): True if vertex belongs to a teach run, False if repeat run. 
 20 |         prev_run_id (int): Id of the previous consecutive run (optional).
 21 |         prev_pose_id (int): Id of the previous consecutive pose (optional).
 22 |         timestamp (float): timestamp in seconds of the image associated with the vertex (optional). 
 23 |         gps_time (float): timestamp in seconds of the GPS measurement associated with the vertex (optional). 
 24 |         latitude (float): GPS measurement associated with the vertex (optional).
 25 |         longitude (float): GPS measurement associated with the vertex (optional).
 26 |         altitude (float): GPS measurement associated with the vertex (optional).
 27 |     """
 28 |     def __init__(self, run_id, pose_id, next_run_id, next_pose_id, next_transform, teach=False, prev_run_id=-1,
 29 |                  prev_pose_id=-1, timestamp=None, gps_time=None, latitude=None, longitude=None, altitude=None):
 30 |         self.vertex_id = (int(run_id), int(pose_id))
 31 |         self.next_id = (int(next_run_id), int(next_pose_id))
 32 |         self.next_transform = next_transform
 33 |         self.prev_id = (int(prev_run_id), int(prev_pose_id))
 34 |         self.teach = teach
 35 |         self.timestamp = timestamp
 36 |         self.gps_time = gps_time
 37 |         self.latitude = latitude
 38 |         self.longitude = longitude
 39 |         self.altitude = altitude
 40 | 
 41 | 
 42 | class Graph:
 43 |     '''
 44 |     The Graph object represents a pose graph with a teach run and several repeat runs. Each run has 
 45 |     several sequentially connected vertices. Vertices can also be connected between different runs. 
 46 |     Args:
 47 |         teach_file (str): Path to file containing temporal transforms for teach run.
 48 |         repeat_files (list of str): Paths to files containing spatial transformations for repeat runs.
 49 |         im_time_files (list of str): Paths to files containing image time stamps for runs.
 50 |         gps_files (list of str): Paths to files contaning gps data for runs.
 51 |         gps_time_files (list of str): Paths to files containing gps time stamps for runs.
 52 |     '''    
 53 |     def __init__(self, teach_file, repeat_files=None, im_time_files=None, gps_files=None, gps_time_files=None):    
 54 |         if repeat_files is None:
 55 |             repeat_files = []
 56 |         if im_time_files is None:
 57 |             im_time_files = {}
 58 |         if gps_files is None:
 59 |             gps_files = {}
 60 |         if gps_time_files is None:
 61 |             gps_time_files = {}
 62 | 
 63 |         # Read teach run transforms from file
 64 |         transforms_temporal = np.loadtxt(teach_file, delimiter=",")
 65 |         self.teach_id = int(transforms_temporal[0, 0])
 66 |         self.vertices = {}
 67 |         self.matches = {}
 68 | 
 69 |         # Add teach run vertices
 70 |         for row in transforms_temporal:
 71 |             transform = Transform(np.array([row[4:7], row[8:11], row[12:15]]), np.array([row[7], row[11], row[15]]))
 72 |             self.add_vertex(row[0], row[1], row[2], row[3], transform, True)
 73 | 
 74 |         self.add_prev_ids()
 75 | 
 76 |         # If repeat run transforms, add those to graph
 77 |         for run_file in repeat_files:
 78 |             self.add_run(run_file)
 79 | 
 80 |         self.add_timestamps(im_time_files)
 81 |         self.add_gps(gps_files, gps_time_files)
 82 | 
 83 |     def get_vertex(self, vertex_id):
 84 |         """Returns a vertex in the graph.
 85 |         Args:
 86 |             vertex_id (tuple of ints): Id of the graph vertex object. The id is a tuple containing
 87 |                 the run id and the the pose id.
 88 |         Returns:
 89 |             Vertex: The vertex object, None if the vertex does not exist.         
 90 |         """
 91 |         return self.vertices.get(vertex_id)
 92 | 
 93 |     def get_all_vertices(self):
 94 | 
 95 |         return self.vertices
 96 | 
 97 |     def is_vertex(self, vertex_id):
 98 |         """Returns whether a vertex object corresponding to the given vertex id exists in the graph.
 99 |         Args:
100 |             vertex_id (tuple of ints): Id of the graph vertex object. The id is a tuple containing 
101 |                 the run id and the the pose id.
102 |         Returns:
103 |             bool: True if vertex exists in graph, False otherwise.         
104 |         """
105 |         if vertex_id in self.vertices:
106 |             return True
107 |         else:
108 |             return False
109 | 
110 |     def add_vertex(self, run_id, pose_id, next_run_id, next_pose_id, next_transform, teach=False):
111 |         """Adds a new vertex to the graph.
112 |         Args:
113 |             run_id (int): Id of the run the vertex belongs to.
114 |             pose_id (int): Id of the pose for the vertex.
115 |             next_run_id (int): Id of the run of the vertex that this vertex is connected to, 
116 |                 either spatially (from repeat to teach) or temporally (one teach vertex to another).
117 |             next_pose_id (int): Id of the pose of the vertex that this vertex is connected to, 
118 |                 either spatially (from repeat to teach) or temporally (one teach vertex to another).
119 |             next_transform (Transform): Transform from this to the connected vertex. 
120 |         """
121 |         v = Vertex(run_id, pose_id, next_run_id, next_pose_id, next_transform, teach)
122 | 
123 |         # Check if vertex already exists
124 |         if self.is_vertex(v.vertex_id):
125 |             print("Vertex {0} already exists in graph".format(v.vertex_id))
126 |             return
127 | 
128 |         # If teach vertex, create an empty list of repeat vertices that have matched,
129 |         # otherwise check that teach vertex exists and add this vertex to its list of matches
130 |         if teach:
131 |             self.matches[v.vertex_id] = []
132 |             self.vertices[v.vertex_id] = v
133 |         elif v.next_id in self.vertices:
134 |             self.matches[v.next_id].append(v)
135 |             self.vertices[v.vertex_id] = v
136 |         else:
137 |             print("Warning: teach vertex {0} not found in graph so vertex {1} was not added.".format(v.next_id, v.vertex_id))
138 | 
139 |     def add_prev_ids(self):
140 |         """Iterates over all the teach vertices and adds the index of the previous vertex as an 
141 |            attribute to each one.
142 |         """
143 |         for v_id in self.vertices:
144 |             v = self.get_vertex(v_id)
145 |             if v.teach and v.next_id in self.vertices:
146 |                 self.vertices[v.next_id].prev_id = v_id
147 | 
148 |     def add_run(self, run_file):
149 |         """Add a new repeat run to the graph.
150 |         Args:
151 |             run_file (str): Path to file containing spatial transformations for the repeat run.
152 |         
153 |         """
154 |         transforms_spatial = np.loadtxt(run_file, delimiter=",")
155 |         for row in transforms_spatial:
156 |             transform = Transform(np.array([row[4:7], row[8:11], row[12:15]]), np.array([row[7], row[11], row[15]]))
157 |             self.add_vertex(row[0], row[1], row[2], row[3], transform, False)
158 | 
159 |     def add_timestamps(self, im_time_files):
160 |         """Add image timestamps to vertices.
161 |         Args:
162 |             im_time_files (list of str): Paths to files containing image time stamps for runs.            
163 |         """
164 |         for run in im_time_files:
165 |             run_times = np.loadtxt(im_time_files[run], delimiter=",")
166 |             for row in run_times:
167 |                 if (run, int(row[0])) in self.vertices:
168 |                     self.vertices[(run, row[0])].timestamp = float(row[1]) * 10**-9
169 |                 else:
170 |                     print("Warning: attempted to add timestamp for vertex ({0}, {1}) not in graph.".format(run, int(row[0])))
171 | 
172 |     def add_gps(self, gps_files, gps_time_files):
173 |         """Add GPS measurements and the associated timestamps to vertices.
174 |         Args:
175 |             gps_files (list of str): Paths to files contaning gps data for runs.
176 |             gps_time_files (list of str): Paths to files containing gps time stamps for runs.            
177 |         """
178 |         for run in gps_files:
179 |             run_times = np.loadtxt(gps_files[run], delimiter=",")
180 |             for row in run_times:
181 |                 if (run, row[0]) in self.vertices:
182 |                     self.vertices[(run, row[1])].latitude = row[2]
183 |                     self.vertices[(run, row[1])].longitude = row[3]
184 |                     self.vertices[(run, row[1])].altitude = row[4]
185 |                 else:
186 |                     print("Warning: attempted to add GPS data for vertex ({0}, {1}) not in graph.".format(run, int(row[1])))
187 | 
188 |         for run in gps_time_files:
189 |             run_times = np.loadtxt(gps_time_files[run], delimiter=",")
190 |             for row in run_times:
191 |                 if (run, int(row[0])) in self.vertices:
192 |                     self.vertices[(run, row[0])].gps_time = float(row[1]) * 10**-9
193 | 
194 |     def get_subgraph(self, start, end):
195 |         """Returns a subgraph made from the teach vertices between (teach_id, start) and (
196 |            teach_id, end), where teach_id refers to the run id of the teach run.
197 |         Args:
198 |             start (int): pose id of the vertex that starts the subgraph.
199 |             end (int): pose id of the vertex that ends the subgraph. 
200 |         Returns:
201 |             Graph: subgraph made from teach vertices between start and end. Original graph if pose
202 |                 ids do not correspond to existing vertices.           
203 |         """
204 |         if self.is_vertex((self.teach_id, start)) and self.is_vertex((self.teach_id, end)):
205 |             subgraph = copy.copy(self)
206 |             subgraph.matches = {id: self.matches[id] for id in self.matches if start <= id[1] <= end}
207 | 
208 |             subgraph.vertices = {}
209 |             for m in subgraph.matches:
210 |                 subgraph.vertices.update({m: self.get_vertex(m)})
211 |                 subgraph.vertices.update({repeat_vertex.vertex_id: repeat_vertex for repeat_vertex in subgraph.matches[m]})
212 | 
213 |             return subgraph
214 |         else:
215 |             print("Warning: invalid vertex chosen for subgraph, returning original graph.")
216 |             return self
217 | 
218 |     def get_topological_dist(self, vertex_id1, vertex_id2):
219 |         """Returns number of edges between two vertices in the graph.
220 |         Args:
221 |             vertex_id1 (tuple of ints): Id of the first vertex object. The id is a tuple containing 
222 |                 the run id and the the pose id.
223 |             vertex_id2 (tuple of ints): Id of the second vertex object. The id is a tuple containing 
224 |                 the run id and the the pose id.
225 |         Returns:
226 |             int: The number of edges between the two vertices, -1 if no path exists between them.           
227 |         """
228 |         path, _ = self.get_path(vertex_id1, vertex_id2)
229 |         return len(path) - 1
230 | 
231 |     def get_transform(self, vertex_id1, vertex_id2):
232 |         """Returns the transform, T_21, from vertex 1 to vertex 2 in the graph.
233 |         Args:
234 |             vertex_id1 (tuple of ints): Id of the first vertex object. The id is a tuple 
235 |                 containing the run id and the the pose id.
236 |             vertex_id2 (tuple of ints): Id of the second vertex object. The id is a tuple 
237 |                 containing the run id and the the pose id.
238 |         Returns:
239 |             Transform: The transform, T_21, from vertex 1 to vertex 2. Identity transform if no 
240 |                 path exists between the vertices.           
241 |         """
242 |         transform = Transform(np.eye(3), np.zeros((3,)))
243 | 
244 |         path, forward = self.get_path(vertex_id1, vertex_id2)
245 | 
246 |         if len(path) == 0:
247 |             print("Warning: no path found between vertex {0} and vertex {1}, returning identity transform.".format(vertex_id1, vertex_id2))
248 |             return transform
249 |         if len(path) == 1:
250 |             return transform
251 | 
252 |         # Always calculate transform in forward direction then flip if backward
253 |         if not forward:
254 |             path.reverse()
255 | 
256 |         # Compose transforms
257 |         for vertex in path[:-2]:
258 |             transform = self.get_vertex(vertex).next_transform * transform
259 | 
260 |         # Check if last vertex in path is a teach vertex
261 |         if self.get_vertex(path[-2]).next_id == path[-1]:
262 |             transform = self.get_vertex(path[-2]).next_transform * transform
263 |         else:
264 |             transform = self.get_vertex(path[-1]).next_transform.inv() * transform
265 | 
266 |         if forward:
267 |             return transform
268 |         else:
269 |             return transform.inv()
270 | 
271 |     def get_path(self, vertex_id1, vertex_id2):
272 |         """Returns a list of vertices connecting vertex 1 and vertex 2. 
273 |            Traverses graph both ways and returns shorter path (topologically).
274 |         Args:
275 |             vertex_id1 (tuple of ints): Id of the first vertex object. The id is a tuple containing 
276 |                 the run id and the the pose id.
277 |             vertex_id2 (tuple of ints): Id of the second vertex object. The id is a tuple containing 
278 |                 the run id and the the pose id.
279 |         Returns:
280 |             list of tuples of ints: List of vertex ids of vertices connecting vertex 1 and vertex 2. 
281 |                 List of length one if vertex 1 and 2 are the same. Empty list if no path is found 
282 |                 or if at least one of the vertices does not exist in the graph.
283 |             bool: True if path is in the forward direction, otherwise False.            
284 |         """
285 |         if self.get_vertex(vertex_id1) is None or self.get_vertex(vertex_id2) is None:
286 |             print("Warning: no path found. At least one of the vertices are invalid.")
287 |             return [], False
288 | 
289 |         # Set to false if can't find path between vertices
290 |         forward_valid = True
291 |         backward_valid = True
292 | 
293 |         forward_path = [vertex_id1]
294 | 
295 |         # Check if vertex are the same
296 |         if vertex_id1 == vertex_id2:
297 |             return forward_path, True
298 | 
299 |         # Follow chain of vertices from vertex 1 to vertex 2
300 |         start = self.get_vertex(vertex_id1)
301 |         goal = self.get_vertex(vertex_id2)
302 |         if not goal.teach:
303 |             goal = self.get_vertex(goal.next_id)
304 |         while start != goal:
305 |             start = self.get_vertex(start.next_id)
306 |             if start is None:
307 |                 forward_valid = False
308 |                 break
309 |             else:
310 |                 forward_path.append(start.vertex_id)
311 |         # If vertex 2 is repeat vertex add edge from teach path to vertex 2 at end
312 |         if forward_path[-1] != vertex_id2:
313 |             forward_path.append(vertex_id2)
314 | 
315 |         # Check other way around loop
316 |         backward_path = [vertex_id2]
317 |         start = self.get_vertex(vertex_id2)
318 |         goal = self.get_vertex(vertex_id1)
319 |         if not goal.teach:
320 |             goal = self.get_vertex(goal.next_id)
321 |         while start != goal:
322 |             start = self.get_vertex(start.next_id)
323 |             if start is None:
324 |                 backward_valid = False
325 |                 break
326 |             else:
327 |                 backward_path.append(start.vertex_id)
328 |         # If vertex 1 is repeat vertex add edge from teach path to vertex 1 at end
329 |         if backward_path[-1] != vertex_id1:
330 |             backward_path.append(vertex_id1)
331 | 
332 |         # Return shorter of the two. Returns boolean to indicate if path is in the forward direction
333 |         if len(forward_path) <= len(backward_path) or not backward_valid and forward_valid:
334 |             return forward_path, True
335 |         elif backward_valid:
336 |             backward_path.reverse()
337 |             return backward_path, False
338 |         else:
339 |             print("Warning: no path found. Problem with graph.")
340 |             return [], False
341 | 
342 |     def get_topo_neighbours(self, vertex_id, radius):
343 |         """Returns a set of topological neighbour vertices within radius number of edges of the 
344 |            given vertex.
345 |         Args:
346 |             vertex_id (tuple of ints): Id of the first vertex object. The id is a tuple containing 
347 |                 the run id and the the pose id.
348 |             radius (int): Distance in number of edges used as the search radius.
349 |         Returns:
350 |             set: Set of vertex ids for neighbour vertices. Set of size 1 if radius is 0. 
351 |                 Empty set if radius is negative or if the given vertex does not exist in the graph.            
352 |         """
353 |         if not self.is_vertex(vertex_id):
354 |             print("Warning: no topo neighbours found. Vertex {0} does not exist.".format(vertex_id))
355 |             return set()
356 | 
357 |         if radius < 0:
358 |             raise ValueError("Negative radius specified.")
359 | 
360 |         neighbours = {vertex_id}
361 |         if radius == 0:
362 |             return neighbours
363 | 
364 |         search_radius = 0
365 | 
366 |         v = self.get_vertex(vertex_id)
367 |         if not v.teach:
368 |             neighbours.add(v.next_id)
369 |             left_bound = v.next_id
370 |             right_bound = v.next_id
371 |             search_radius += 1
372 |         else:
373 |             left_bound = vertex_id
374 |             right_bound = vertex_id
375 | 
376 |         while search_radius < radius:
377 |             neighbours.update(m.vertex_id for m in self.matches[left_bound])
378 |             neighbours.update(m.vertex_id for m in self.matches[right_bound])
379 |             left_bound = self.get_vertex(left_bound).prev_id
380 |             right_bound = self.get_vertex(right_bound).next_id
381 |             neighbours.add(left_bound)
382 |             neighbours.add(right_bound)
383 |             search_radius += 1
384 | 
385 |         return neighbours
386 | 
387 |     def get_metric_neighbours(self, vertex_id, radius):
388 |         """Returns a set of metric neighbour vertices within radius meters of the given vertex.
389 |         Args:
390 |             vertex_id (tuple of ints): Id of the first vertex object. The id is a tuple containing 
391 |                 the run id and the the pose id.
392 |             radius (float): Distance in meters used as the search radius.
393 |         Returns:
394 |             set: Set of vertex ids for neighbour vertices. Empty set if radius is negative 
395 |                 or if the given vertex does not exist in the graph.            
396 |         """
397 |         if not self.is_vertex(vertex_id):
398 |             print("Warning: no metric neighbours found. Vertex {0} does not exist.".format(vertex_id))
399 |             return set()
400 | 
401 |         if radius < 0:
402 |             raise ValueError("Negative radius specified.")
403 |             
404 |         neighbours = {vertex_id}
405 |         v = self.get_vertex(vertex_id)
406 |         if not v.teach:
407 |             teach_v = self.get_vertex(v.next_id)
408 |             teach_T = v.next_transform
409 |         else:
410 |             teach_v = v
411 |             teach_T = Transform(np.eye(3), np.zeros((3,)))
412 | 
413 |         # Search forward direction
414 |         query_v = teach_v
415 |         query_T = teach_T
416 |         while True:
417 |             # Test repeat vertices localized to current teach
418 |             for m in self.matches[query_v.vertex_id]:
419 |                 T = m.next_transform.inv() * query_T
420 |                 if np.linalg.norm(T.r_ab_inb) < radius:
421 |                     neighbours.add(m.vertex_id)
422 | 
423 |             # Update current teach
424 |             if np.linalg.norm(query_T.r_ab_inb) < radius:
425 |                 neighbours.add(query_v.vertex_id)
426 |                 query_T = query_v.next_transform * query_T
427 |                 query_v = self.get_vertex(query_v.next_id)
428 |             else:
429 |                 break
430 | 
431 |         # Search backward direction
432 |         query_v = teach_v
433 |         query_T = teach_T
434 |         while True:
435 |             # Update current teach. Order flipped to avoid checking closest teach vertex twice
436 |             if np.linalg.norm(query_T.r_ab_inb) < radius:
437 |                 neighbours.add(query_v.vertex_id)
438 |                 query_v = self.get_vertex(query_v.prev_id)
439 |                 query_T = query_v.next_transform.inv() * query_T
440 |             else:
441 |                 break
442 |             # Test repeat vertices localized to current teach
443 |             for m in self.matches[query_v.vertex_id]:
444 |                 T = m.next_transform.inv() * query_T
445 |                 if np.linalg.norm(T.r_ab_inb) < radius:
446 |                     neighbours.add(m.vertex_id)
447 | 
448 |         return neighbours
449 | 


--------------------------------------------------------------------------------
/data/sample_data.py:
--------------------------------------------------------------------------------
  1 | from operator import itemgetter
  2 | import re
  3 | import random
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | 
  8 | from src.utils.transform import Transform
  9 | 
 10 | def random_sample(path_name, pose_graph, runs, num_samples, max_temporal_length):
 11 |     """
 12 |         Sample relative pose transforms for localization randomly from the pose graph. Compute the pose between vertices
 13 |         from different experiences in the pose graph. Record the pose transform as a 4x4 matrix and the 6 DOF vector
 14 |         equivalent. The pose from vertex, v1, to vertex, v2, is given as T_v2_v1.
 15 | 
 16 |         Create sample ids the vertex ids. A vertex id consists of the id of the run the vertex belongs to and the id of
 17 |         the pose along that run, i.e. vertex_id = (run_id, pose_id). The sample id corresponding to pose transform
 18 |         T_v2_v1 is on the form pathname_runid1_poseid1_runid2_poseid2, for instance: mutliseason_1_531_5_542.
 19 | 
 20 |         Args:
 21 |             path_name (string): name given to the path that the pose graph represents.
 22 |             pose_graph (Graph): the pose graph.
 23 |             runs (list[int]): list of the run ids of runs to sample from.
 24 |             num_samples (int): the number of samples to collect.
 25 |             max_temporal_length (int): we can 'walk along' the pose graph to pair vertices that har further apart (i.e.
 26 |                                        not the closest pair). This is the max topological distance/steps we move away
 27 |                                        form the start vertex.
 28 | 
 29 |         Returns:
 30 |              samples (list[string]): list of all the sample ids.
 31 |              labels_se3 (dict): dictionary mapping sample id to pose transform 4x4 matrix provided as a torch.Tensor.
 32 |              labels_log (dict): dictionary mapping sample id to pose transform 6 DOF vector provided as a torch.Tensor.
 33 |     """
 34 |     sample_counter = 0
 35 |     sample_counter_identity = 0
 36 |     samples = []
 37 |     labels_se3 = {}
 38 |     labels_log = {}
 39 | 
 40 |     # The ids of all the vertices in the pose graph.
 41 |     vertex_ids = pose_graph.get_all_vertices().keys()
 42 | 
 43 |     while sample_counter < num_samples:
 44 | 
 45 |         # Randomly choose a live index, live_id is on the form (run_id, pose_id)
 46 |         live_id = random.sample(vertex_ids, 1)[0]
 47 |         live_vertex = pose_graph.get_vertex(live_id)
 48 | 
 49 |         if live_vertex is None:
 50 |             print('live vertex is None')
 51 |             continue
 52 | 
 53 |         # The chosen vertex is not in one of the runs we want to sample from.
 54 |         if live_id[0] not in runs:
 55 |             continue
 56 | 
 57 |         # Get all the neighbouring vertices (on all runs/experiences) withing the specified distance.
 58 |         try:
 59 |             neighbour_ids = pose_graph.get_topo_neighbours(live_id, max_temporal_length + 1)
 60 |         except Exception as e:
 61 |             print(e)
 62 |             print(f'Could not get topological neighbours for live_id: {live_id}')
 63 |             continue
 64 | 
 65 |         if len(neighbour_ids) == 0:
 66 |             print(f'{path_name} - Random sampling: Could not find neighbours for vertex {live_id} within topological' \
 67 |                   f' distance {max_temporal_length}')
 68 |             continue
 69 | 
 70 |         # Randomly pick a target vertex from the neighbours.
 71 |         other_id = random.sample(neighbour_ids, 1)[0]
 72 | 
 73 |         if other_id[0] not in runs:
 74 |             print(f'{path_name} - Random sampling: other vertex is in run {other_id[0]}, which is not in desired runs')
 75 |             continue
 76 | 
 77 |         if live_id[0] == other_id[0]:
 78 |             print(f'{path_name} - Random sampling: do not want to localize to vertex on the same run')
 79 |             continue
 80 | 
 81 |         other_vertex = pose_graph.get_vertex(other_id)
 82 | 
 83 |         if other_vertex is None:
 84 |             print('other vertex is None')
 85 |             continue 
 86 |         
 87 |         # Create a sample ID and check that it has not already been added.
 88 |         sample_id = f'{path_name}-{live_id[0]}-{live_id[1]}-{other_id[0]}-{other_id[1]}'
 89 |         
 90 |         if not sample_id in samples:
 91 | 
 92 |             T_other_live = pose_graph.get_transform(live_id, other_id) 
 93 |             log_other_live = Transform.LogMap(T_other_live)
 94 | 
 95 |             samples.append(sample_id)
 96 |             labels_se3[sample_id] = torch.tensor(T_other_live.matrix, dtype=torch.float)
 97 |             labels_log[sample_id] = torch.tensor(log_other_live, dtype=torch.float)
 98 | 
 99 |             sample_counter += 1
100 | 
101 |         else:
102 | 
103 |             print(f'{path_name} - Random sampling: sample {sample_id} has already been added', flush=True)
104 |             print(f'Number of sampled poses so far: {sample_counter}')
105 | 
106 |     return samples, labels_se3, labels_log
107 | 
108 | 
109 | def sequential_sample(path_name, pose_graph, map_run_id, live_runs, temporal_length):
110 |     """
111 |         Sample relative pose transforms for localization sequentially from the pose graph. Compute the pose from
112 |         vertices from each of the live runs to one map run. Compute the pose for each vertex on the live runs
113 |         sequentially. Record the pose transform as a 4x4 matrix and the 6 DOF vector equivalent. The pose from vertex,
114 |         v1, to vertex, v2, is given as T_v2_v1.
115 | 
116 |         Create sample ids the vertex ids. A vertex id consists of the id of the run the vertex belongs to and the id of
117 |         the pose along that run, i.e. vertex_id = (run_id, pose_id). The sample id corresponding to pose transform
118 |         T_v2_v1 is on the form pathname_runid1_poseid1_runid2_poseid2, for instance: mutliseason_1_531_5_542.
119 | 
120 |         Args:
121 |             path_name (string): name given to the path that the pose graph represents.
122 |             pose_graph (Graph): the pose graph.
123 |             map_run_id (int): id of the run to localize to, i.e. compute the relative pose to vertices on this run.
124 |             live_runs (list[int]): the runs we localize to the map run, i.e. compute relative pose from vertices on
125 |                                    these runs.
126 |             temporal_length (int): we can 'walk along' the pose graph to pair vertices that har further apart (i.e.
127 |                                    not the closest pair). We set a fixed topological distance/steps we move away
128 |                                    from the start vertex.
129 | 
130 |         Returns:
131 |              samples (list[string]): list of all the sample ids.
132 |              labels_se3 (dict): dictionary mapping sample id to pose transform 4x4 matrix provided as a torch.Tensor.
133 |              labels_log (dict): dictionary mapping sample id to pose transform 6 DOF vector provided as a torch.Tensor.
134 |     """
135 |     samples = []
136 |     added_live = []
137 |     labels_se3 = {}
138 |     labels_log = {}
139 | 
140 |     # Get the graph vertices in a sorted list by run_id and pose_id. The vertex ids are on the form (run_id, pose_id).
141 |     # We retrieve all the vertices in the pose graph and filter out the correct ones later.
142 |     vertex_ids = pose_graph.get_all_vertices().keys()
143 |     vertex_ids = sorted(vertex_ids, key=itemgetter(0,1))
144 | 
145 |     # Localize each vertex in the live run sequentially.
146 |     for live_id in vertex_ids:
147 |         
148 |         live_vertex = pose_graph.get_vertex(live_id)
149 | 
150 |         if live_vertex is None:
151 |             continue
152 |         
153 |         if (live_id[0] in live_runs) and (live_id[0] != map_run_id):
154 | 
155 |             # Vertices are connected via the teach run 0 in the pose graph, which adds one to the distance count between
156 |             # them, i.e. if run5_pose50 is localized directly to run3_pose54, we get r5_p50 -> r0_p52 -> r3_p54 so a
157 |             # radius of 2 instead of 1.
158 |             radius = temporal_length + 1
159 |             max_radius = 8
160 |             map_pose_id = -1
161 |             smallest_metric_dist = 1000
162 |             chosen_topo_dist = 1000
163 | 
164 |             # Try to find a vertex on the map run to localize against. If we can find one at the specified topological
165 |             # distance, then increase the search radius.
166 |             while (map_pose_id == -1) and (radius <= max_radius):
167 |                 try:
168 |                     neighbour_ids = pose_graph.get_topo_neighbours(live_id, radius)
169 |                 except Exception as e:
170 |                     print(e)
171 |                     print(f'{path_name} - Sequential sampling: Could not localize {live_id} to map run, topological' \
172 |                           f' neighbours failed')
173 |                     radius += 1
174 |                     continue
175 | 
176 |                 # Find the closest vertex (on the map run) of the valid neighbours to localize to.
177 |                 for n_id in neighbour_ids:
178 |                     if n_id[0] == map_run_id:
179 |                         topo_dist = pose_graph.get_topological_dist(live_id, n_id)
180 |                         T_n_live = pose_graph.get_transform(live_id, n_id)
181 |                         metric_dist = np.linalg.norm(T_n_live.r_ab_inb)
182 |                         if (metric_dist < smallest_metric_dist) and (topo_dist >= temporal_length + 1):
183 |                             smallest_metric_dist = metric_dist
184 |                             chosen_topo_dist = topo_dist
185 |                             map_pose_id = n_id[1]
186 | 
187 |                 radius += 1
188 | 
189 |             if map_pose_id == -1:
190 |                 print(f'{path_name} - Sequential sampling: Could not localize {live_id} to map run ' \
191 |                       f'within {max_radius -1} edges.')
192 |                 continue
193 | 
194 |             if chosen_topo_dist != temporal_length + 1:
195 |                 print(f'{path_name} - Sequential sampling: Could not match {live_id} at topological distance ' \
196 |                       f'{temporal_length}, matched at length {chosen_topo_dist - 1} instead.')
197 | 
198 |             # Get the map vertex we want to localize to.
199 |             map_id = (map_run_id, map_pose_id)
200 |             map_vertex = pose_graph.get_vertex(map_id)
201 | 
202 |             if map_vertex is None:
203 |                 print(f'{path_name} - Sequential sampling: Could not localize {live_id} at topological distance ' \
204 |                       f'{temporal_length} (map vertex is None).')
205 |                 continue
206 | 
207 |             # Create a sample ID and check that it has not already been added
208 |             sample_id = f'{path_name}-{live_id[0]}-{live_id[1]}-{map_id[0]}-{map_id[1]}'
209 |             
210 |             if not sample_id in samples:
211 |                 
212 |                 T_map_live = pose_graph.get_transform(live_id, map_id)
213 |                 log_map_live = Transform.LogMap(T_map_live)
214 | 
215 |                 samples.append(sample_id)
216 |                 labels_se3[sample_id] = torch.tensor(T_map_live.matrix, dtype=torch.float)
217 |                 labels_log[sample_id] = torch.tensor(log_map_live, dtype=torch.float)
218 | 
219 |             else:
220 | 
221 |                 print(f'{path_name} - Sequential sampling: sample {sample_id} has already been added')
222 | 
223 |     return samples, labels_se3, labels_log


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | #
 2 | # This example Dockerfile illustrates a method to install
 3 | # additional packages on top of NVIDIA's PyTorch container image.
 4 | #
 5 | # To use this Dockerfile, use the `docker build` command.
 6 | # See https://docs.docker.com/engine/reference/builder/
 7 | # for more information.
 8 | #
 9 | # Software available with the container version 20.11 is listed at:
10 | # https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html
11 | #
12 | FROM nvcr.io/nvidia/pytorch:20.11-py3
13 | 
14 | # Python OpenCV should be available, if not, run the following.
15 | RUN apt-get update
16 | RUN apt-get upgrade -y --no-install-recommends
17 | RUN apt-get update
18 | 
19 | RUN pip install --upgrade pip
20 | RUN pip install opencv-python==3.4.8.29
21 | 
22 | # Add a user if desirable
23 | #RUN useradd -ms /bin/bash -u <uid> <user_name>
24 | #USER <user_name>
25 | #WORKDIR /home/<user_name>
26 | 


--------------------------------------------------------------------------------
/docker/docker_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo ""
 4 | echo "Run docker container detached, add data volumes, and execute python script."
 5 | echo ""
 6 | 
 7 | docker run --gpus "device=0" -d -it --rm --name create_vtr_dataset --shm-size=64g \
 8 | -v <path_to_intheedark_dataset_on_computer>:/home/<user_name>/data/inthedark:ro \
 9 | -v <path_to_multiseason_dataset_on_computer>:/home/<user_name>/data/multiseason:ro \
10 | -v /home/<user_name>/code:/home/<user_name>/code \
11 | -v /home/<user_name>/results:/home/<user_name>/results \
12 | -v /home/<user_name>/networks:/home/<user_name>/networks \
13 | -v /home/<user_name>/datasets:/home/<user_name>/datasets \
14 | <docker_image_name> \
15 | bash -c "cd /home/<user_name>/code/deep_learned_visual_features && python -m data.build_train_test_dataset_loc --config /home/<user_name>/code/deep_learned_visual_features/config/data.json"
16 | 


--------------------------------------------------------------------------------
/docker/docker_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo ""
 4 | echo "Run docker container detached, add data volumes, and execute python script."
 5 | echo ""
 6 | 
 7 | docker run --gpus "device=0" -d -it --rm --name create_vtr_dataset --shm-size=64g \
 8 | -v <path_to_intheedark_dataset_on_computer>:/home/<user_name>/data/inthedark:ro \
 9 | -v <path_to_multiseason_dataset_on_computer>:/home/<user_name>/data/multiseason:ro \
10 | -v /home/<user_name>/code:/home/<user_name>/code \
11 | -v /home/<user_name>/results:/home/<user_name>/results \
12 | -v /home/<user_name>/networks:/home/<user_name>/networks \
13 | -v /home/<user_name>/datasets:/home/<user_name>/datasets \
14 | <docker_image_name> \
15 | bash -c "cd /home/<user_name>/code/deep_learned_visual_features && python -m src.test --config /home/<user_name>/code/deep_learned_visual_features/config/test.json"
16 | 
17 | 


--------------------------------------------------------------------------------
/docker/docker_train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo ""
 4 | echo "Run docker container detached, add data volumes, and execute python script."
 5 | echo ""
 6 | 
 7 | docker run --gpus "device=0" -d -it --rm --name train_model --shm-size=64g \
 8 | -v <path_to_intheedark_dataset_on_computer>:/home/<user_name>/data/inthedark:ro \
 9 | -v <path_to_multiseason_dataset_on_computer>:/home/<user_name>/data/multiseason:ro \
10 | -v /home/<user_name>/code:/home/<user_name>/code \
11 | -v /home/<user_name>/results:/home/<user_name>/results \
12 | -v /home/<user_name>/networks:/home/<user_name>/networks \
13 | -v /home/<user_name>/datasets:/home/<user_name>/datasets \
14 | <docker_image_name> \
15 | bash -c "cd /home/<user_name>/code/deep_learned_visual_features && python -m src.train --config /home/<user_name>/code/deep_learned_visual_features/config/train.json"
16 | 
17 | 


--------------------------------------------------------------------------------
/download_datasets.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wget --load-cookies /tmp/cookies.txt "https://drive.google.com/uc?export=download&confirm=$(wget --quiet \
3 |      --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate \
4 |      'https://drive.google.com/uc?export=download&id=10yOGwbGDhJEhbm0h4faiy-lO-7NPGJEQ' -O- | \
5 |      sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=10yOGwbGDhJEhbm0h4faiy-lO-7NPGJEQ" -O datasets.zip \
6 |      && rm -rf /tmp/cookies.txt
7 | unzip datasets.zip
8 | rm datasets.zip


--------------------------------------------------------------------------------
/download_networks.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wget --load-cookies /tmp/cookies.txt "https://drive.google.com/uc?export=download&confirm=$(wget --quiet \
3 |      --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate \
4 |      'https://drive.google.com/uc?export=download&id=1xFGIJACmW6R_J2uzU6JAZKzQ6TBWDJ55' -O- | \
5 |      sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1xFGIJACmW6R_J2uzU6JAZKzQ6TBWDJ55" -O networks.zip \
6 |      && rm -rf /tmp/cookies.txt
7 | unzip networks.zip
8 | rm networks.zip


--------------------------------------------------------------------------------
/extract_features.py:
--------------------------------------------------------------------------------
  1 | '''A script that shows how to pass an image to the network to get keypoints, descriptors and scrores. '''
  2 | 
  3 | import sys
  4 | import os
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | from src.model.unet import UNet
 12 | from src.model.keypoint_block import KeypointBlock
 13 | from src.utils.keypoint_tools import normalize_coords, get_norm_descriptors, get_scores
 14 | 
 15 | 
 16 | def get_keypoint_info(kpt_2D, scores_map, descriptors_map):
 17 |     """
 18 |         Gather information we need associated with each detected keypoint. Compute the normalized 
 19 |         descriptor and the score for each keypoint.
 20 | 
 21 |         Args:
 22 |             kpt_2D (torch.tensor): keypoint 2D image coordinates, (Bx2xN).
 23 |             scores_map (torch.tensor): scores for each pixel, (Bx1xHxW).
 24 |             descriptors_map (torch.tensor): descriptors for each pixel, (BxCxHxW).
 25 | 
 26 |         Returns:
 27 |             kpt_desc_norm (torch.tensor): Normalized descriptor for each keypoint, (BxCxN).
 28 |             kpt_scores (torch.tensor): score for each keypoint, (Bx1xN).
 29 | 
 30 |     """
 31 |     batch_size, _, height, width = scores_map.size()
 32 | 
 33 |     kpt_2D_norm = normalize_coords(kpt_2D, batch_size, height, width).unsqueeze(1)  # Bx1xNx2
 34 | 
 35 |     kpt_desc_norm = get_norm_descriptors(descriptors_map, True, kpt_2D_norm)
 36 | 
 37 |     kpt_scores = get_scores(scores_map, kpt_2D_norm)
 38 | 
 39 |     return kpt_desc_norm, kpt_scores
 40 | 
 41 | 
 42 | class LearnedFeatureDetector(nn.Module):
 43 |     """ 
 44 |         Class to detect learned features.
 45 |     """
 46 |     def __init__(self, n_channels, layer_size, window_height, window_width, image_height, image_width, checkpoint_path, cuda):
 47 |         """
 48 |             Set the variables needed to initialize the network.
 49 | 
 50 |             Args:
 51 |                 num_channels (int): number of channels in the input image (we use 3 for one RGB image).
 52 |                 layer_size (int): size of the first layer if the encoder. The size of the following layers are
 53 |                                   determined from this.
 54 |                 window_height (int): height of window, inside which we detect one keypoint.
 55 |                 window_width (int): width of window, inside which we detect one keypoint.
 56 |                 image_height (int): height of the image.
 57 |                 image_width (int): width of the image.
 58 |                 checkpoint_path (string): path to where the network weights are stored.
 59 |                 cuda (bool): true if using the GPU.
 60 |         """
 61 |         super(LearnedFeatureDetector, self).__init__()
 62 | 
 63 |         self.cuda = cuda
 64 |         self.n_classes = 1
 65 |         self.n_channels = n_channels
 66 |         self.layer_size = layer_size
 67 |         self.window_h = window_height
 68 |         self.window_w = window_width
 69 |         self.height = image_height
 70 |         self.width = image_width
 71 | 
 72 |         # Load the network weights from a checkpoint.
 73 |         if os.path.exists(checkpoint_path):
 74 |             checkpoint = torch.load(checkpoint_path)
 75 |         else:
 76 |             raise RuntimeError(f'The specified checkpoint path does not exists: {checkpoint_path}')
 77 | 
 78 |         self.net = UNet(self.n_channels, self.n_classes, self.layer_size)
 79 |         # self.net = UNet(self.n_channels, self.n_classes, self.layer_size, self.height, self.width, checkpoint)
 80 |         self.net.load_state_dict(checkpoint['model_state_dict'])
 81 |         self.keypoint_block = KeypointBlock(self.window_h, self.window_w, self.height, self.width)
 82 |         self.sigmoid = nn.Sigmoid()
 83 | 
 84 |         if cuda:
 85 |             self.net.cuda()
 86 |             self.keypoint_block.cuda()
 87 | 
 88 |         self.net.eval()
 89 | 
 90 |     def run(self, image_tensor):
 91 |         """
 92 |             Forward pass of network to get keypoint detector values, descriptors and, scores
 93 | 
 94 |             Args:
 95 |                 image_tensor (torch.tensor, Bx3xHxW): RGB images to input to the network.
 96 | 
 97 |             Returns:
 98 |                 keypoints (torch.tensor, Bx2xN): the detected keypoints, N=number of keypoints.
 99 |                 descriptors (torch.tensor, BxCxN): descriptors for each keypoint, C=496 is length of descriptor.
100 |                 scores (torch.tensor, Bx1xN): an importance score for each keypoint.
101 | 
102 |         """
103 |         if self.cuda:
104 |             image_tensor = image_tensor.cuda()
105 | 
106 |         detector_scores, scores, descriptors = self.net(image_tensor)
107 |         scores = self.sigmoid(scores)
108 | 
109 |         # Get 2D keypoint coordinates from detector scores, Bx2xN
110 |         keypoints = self.keypoint_block(detector_scores)
111 | 
112 |         # Get one descriptor and scrore per keypoint, BxCxN, Bx1xN, C=496.
113 |         point_descriptors_norm, point_scores = get_keypoint_info(keypoints, scores, descriptors)
114 | 
115 |         return keypoints.detach().cpu(), point_descriptors_norm.detach().cpu(), point_scores.detach().cpu()
116 | 
117 | 
118 | if __name__ == '__main__':
119 | 
120 |     cuda = False
121 |     checkpoint = 'path_to_network/network_multiseason_inthedark_layer16.pth'
122 |     learned_feature_detector = LearnedFeatureDetector(n_channels=3, 
123 |                                                       layer_size=16, 
124 |                                                       window_height=16, 
125 |                                                       window_width=16, 
126 |                                                       image_height=384, 
127 |                                                       image_width=512,
128 |                                                       checkpoint_path=checkpoint,
129 |                                                       cuda=cuda)
130 | 
131 |     
132 |     test_image = torch.rand(1, 3, 384, 512)
133 |     keypoints, descriptors, scores = learned_feature_detector.run(test_image)
134 |     
135 |     print(keypoints.size())
136 |     print(descriptors.size())
137 |     print(scores.size())
138 |     
139 | 


--------------------------------------------------------------------------------
/figures/box_plot_generalization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/box_plot_generalization.png


--------------------------------------------------------------------------------
/figures/box_plot_lighting_change.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/box_plot_lighting_change.png


--------------------------------------------------------------------------------
/figures/conditions_generalization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/conditions_generalization.png


--------------------------------------------------------------------------------
/figures/conditions_lighting_change.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/conditions_lighting_change.png


--------------------------------------------------------------------------------
/figures/network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/network.png


--------------------------------------------------------------------------------
/figures/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/overview.png


--------------------------------------------------------------------------------
/figures/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/pipeline.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | matplotlib==3.3.2
 2 | numpy==1.19.2
 3 | opencv-python==3.4.8.29
 4 | Pillow=-7.0.0
 5 | scikit-image==0.15.0
 6 | scikit-learn==0.23.2
 7 | scipy==1.5.2
 8 | torch==1.8.0
 9 | torchvision=0.8.0
10 | 
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/src/__init__.py


--------------------------------------------------------------------------------
/src/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import re
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from PIL import Image
  8 | import torch
  9 | from torch.utils import data
 10 | from torchvision import transforms
 11 | 
 12 | class Dataset(data.Dataset):
 13 |     """
 14 |         Dataset for the VT&R localization data.
 15 |     """
 16 | 
 17 |     def __init__(self, data_dir, height=384, width=512, use_normalization=False, use_disparity=True):
 18 |         """
 19 |             Initialize the Dataset class with the necessary parameters.
 20 | 
 21 |             Args:
 22 |                 data_dir (string): the directory where the images are stored.
 23 |                 height (int): height of the images.
 24 |                 width (int): width of the images.
 25 |                 use_normalization (bool): whether to normalize the images.
 26 |                 use_disparity (bool): whether to generate disparity for stereo pairs.
 27 |         """
 28 | 
 29 |         self.list_ids = []
 30 |         self.labels_se3 = {}
 31 |         self.labels_log = {}
 32 |         self.data_dir = data_dir
 33 |         self.height = height
 34 |         self.width = width
 35 |         self.use_normalization = use_normalization
 36 |         self.use_disparity = use_disparity
 37 | 
 38 |         # Create transforms to apply to images
 39 |         self.image_transforms = transforms.Compose([
 40 |             transforms.ToTensor()
 41 |         ])
 42 | 
 43 |         self.normalize_transforms = None
 44 | 
 45 |     def load_mel_data(self, mel_data, mode, path_name='', index=None):
 46 |         """
 47 |             Load the dataset that we have created ahead of time.
 48 | 
 49 |             Args:
 50 |                 mel_data (MELData): object that stores the data sample ids and labels.
 51 |                 mode (string): a string with value 'training', 'validation', or 'testing'.
 52 |                 path_name (string, optional): if the data is used for testing, the name of the path we want to test.
 53 |                 index (int, optional): if the data is used for testing, we have several test paths, the index indicates
 54 |                                        which one to use.
 55 |         """
 56 | 
 57 |         # Set the mean and std_dev if we want to normalize the data.
 58 |         self.normalize_transforms = transforms.Compose([
 59 |             transforms.ToTensor(),
 60 |             transforms.Normalize(mel_data.mean, mel_data.std_dev)
 61 |         ])
 62 |         
 63 |         if mode == 'training':
 64 |             self.list_ids = mel_data.train_ids
 65 |             self.labels_se3 = mel_data.train_labels_se3
 66 |             self.labels_log = mel_data.train_labels_log
 67 | 
 68 |         elif mode == 'validation':
 69 |             self.list_ids = mel_data.valid_ids
 70 |             self.labels_se3 = mel_data.valid_labels_se3
 71 |             self.labels_log = mel_data.valid_labels_log
 72 | 
 73 |         elif mode == 'testing':
 74 |             self.list_ids = mel_data.test_ids[path_name][index]
 75 |             self.labels_se3 = mel_data.test_labels_se3[path_name][index]
 76 |             self.labels_log = mel_data.test_labels_log[path_name][index]
 77 | 
 78 |         else:
 79 |             raise ValueError('Dataset, load_mel_data: mode must be set to training, validatation, or testing.')
 80 | 
 81 |     def __len__(self):
 82 |         """
 83 |             The number of samples in the dataset.
 84 |         """
 85 |         return len(self.list_ids)
 86 | 
 87 |     def __getitem__(self, index):
 88 |         """
 89 |             Load a sample from the dataset
 90 | 
 91 |             Args:
 92 |                 index (int): index of the sample to load.
 93 | 
 94 |             Returns:
 95 |                 X (torch.tensor): two RGB stereo pairs of images stacked together, one source image pair and one target
 96 |                                   image pair (Bx12xHxW).
 97 |                 D (torch.tensor): two disparity images, computed from the two pairs of stereo images (Bx2xHW).
 98 |                 sample_id (string): the id for the data sample on the form
 99 |                                     pathname_runidsrc_poseidsrc_runidtrg_poseidtrg.
100 |                 y_se3 (torch.tensor): the relative pose from the source to target, T_trg_src, given as a 4x4 matrix
101 |                                       (Bx4x4).
102 |                 y_log (torch.tensor): the relative pose from the source to target given as a length 6 vector (Bx6).
103 |         """
104 | 
105 |         # Select sample
106 |         sample_id = self.list_ids[index]
107 | 
108 |         # Get the path name, run id, and pose id for the sample. Combining the run and pose id gives us the id of
109 |         # a given vertex in the pose graph that the data was sampled from.
110 |         data_info = re.findall('\w+', sample_id)
111 |         path_name = data_info[0]
112 |         run_ids = data_info[1::2]
113 |         pose_ids = data_info[2::2]
114 | 
115 |         num_frames = 2                  # One source and one target frame.
116 |         num_channels = num_frames * 6   # Two RGB stereo pairs, 2 * 2 * 3.
117 | 
118 |         X = torch.zeros(num_channels, self.height, self.width)   # Images
119 |         D = torch.zeros(num_frames, self.height, self.width)     # Disparity
120 | 
121 |         self.add_images(X, 0, path_name, run_ids[0], pose_ids[0], self.use_normalization) # Source image
122 |         self.add_images(X, 1, path_name, run_ids[1], pose_ids[1], self.use_normalization) # Target image
123 | 
124 |         if self.use_disparity:
125 |             self.add_disparity(D, 0, path_name, run_ids[0], pose_ids[0]) # Source disparity
126 |             self.add_disparity(D, 1, path_name, run_ids[1], pose_ids[1]) # Target disparity
127 | 
128 |         # Pose transforms
129 |         y_se3 = self.labels_se3[sample_id]
130 |         y_log = self.labels_log[sample_id]
131 | 
132 |         return X, D, sample_id, y_se3, y_log
133 | 
134 |     def add_images(self, X, ind, path_name, run_id, pose_id, normalize_img):
135 |         """
136 |             Add a stereo pair of images to the images tensor.
137 | 
138 |             Args:
139 |                 X (torch.tensor): the tensor to hold the RGB stereo pairs of images (Bx12xHxW).
140 |                 ind (int): index indicating which image to add.
141 |                 path_name (string): name of the path the images are taken from.
142 |                 run_id (int): id of the run the images are taken from.
143 |                 pose_id (int): id of the pose along the given run that the images are taken from.
144 |                 normalize_img (bool): whether to normalize the image.
145 |         """
146 |         # Stereo pair of RGB images (2 x 3 channels).
147 |         start = ind * 6
148 |         self.add_image(X, start, 'left', path_name, run_id, pose_id, normalize_img)
149 |         self.add_image(X, start + 3, 'right', path_name, run_id, pose_id, normalize_img)
150 | 
151 |     def add_image(self, X, start_ind, loc, path_name, run_id, pose_id, normalize_img):
152 |         """
153 |             Add one image to the images tensor.
154 | 
155 |             Args:
156 |                 X (torch.tensor): the tensor to hold the RGB stereo pairs of images (Bx12xHxW).
157 |                 start_ind (int): the index of the tensor where we start adding the images channels.
158 |                 loc (string): 'left' or 'right' insicating which image in the stereo pair we are adding.
159 |                 path_name (string): name of the path the images are taken from.
160 |                 run_id (int): id of the run the images are taken from.
161 |                 pose_id (int): id of the pose along the given run that the images are taken from.
162 |                 normalize_img (bool): whether to normalize the image.
163 |         """
164 |         # Generate the image file name based on the id of the vertex the image belongs to.
165 |         img_file = f"{self.data_dir}/{path_name}/run_{run_id.zfill(6)}/" \
166 |                    f"images/{loc}/{pose_id.zfill(6)}.png"
167 | 
168 |         img = Image.open(img_file)
169 | 
170 |         # Turn the image into a tensor and normalize if required.
171 |         if normalize_img:
172 |             X[start_ind:start_ind + 3, :, :] = self.normalize_transforms(img)
173 |         else:
174 |             X[start_ind:start_ind + 3, :, :] = self.image_transforms(img)
175 | 
176 |     def get_disparity(self, left_img, right_img):
177 |         """
178 |             Create the disparity image using functions from OpenCV.
179 | 
180 |             Args:
181 |                 left_img (numpy.uint8): left stereo image.
182 |                 right_img (numpy.uint8): right stereo image.
183 |         """
184 |         stereo = cv2.StereoSGBM_create(minDisparity = 0,
185 |                                        numDisparities = 48, 
186 |                                        blockSize = 5, 
187 |                                        preFilterCap = 30, 
188 |                                        uniquenessRatio = 20, 
189 |                                        P1 = 200, 
190 |                                        P2 = 800, 
191 |                                        speckleWindowSize = 200, 
192 |                                        speckleRange = 1, 
193 |                                        disp12MaxDiff = -1)
194 | 
195 |         disp = stereo.compute(left_img, right_img)
196 |         disp  = disp.astype(np.float32) / 16.0
197 | 
198 |         # Adjust values close to or equal to zero, which would cause problems for depth calculation.
199 |         disp[(disp < 1e-4) & (disp >= 0.0)] = 1e-4
200 | 
201 |         return torch.from_numpy(disp)
202 | 
203 |     def add_disparity(self, D, index, path_name, run_id, pose_id):
204 |         """
205 |             Add a disparity image to the tensor holding the disparity images.
206 | 
207 |             Args:
208 |                 D (torch.tensor): the tensor holding the disparity images.
209 |                 index (int): the index of the channel where we will add the disparity image in D.
210 |                 path_name (string): name of the path the images are taken from.
211 |                 run_id (int): id of the run the images are taken from.
212 |                 pose_id (int): id of the pose along the given run that the images are taken from.
213 |         """
214 |         # Generate the image file names based on the id of the vertex the image belongs to.
215 |         img_file_left = f"{self.data_dir}/{path_name}/run_{run_id.zfill(6)}/images/" \
216 |                         f"left/{pose_id.zfill(6)}.png"
217 |         img_file_right = f"{self.data_dir}/{path_name}/run_{run_id.zfill(6)}/images/" \
218 |                         f"right/{pose_id.zfill(6)}.png"
219 | 
220 |         left_img = np.uint8(cv2.imread(img_file_left, 0))
221 |         right_img = np.uint8(cv2.imread(img_file_right, 0))
222 |             
223 |         # Compute disparity using OpenCV.
224 |         disparity = self.get_disparity(left_img, right_img)
225 | 
226 |         D[index, :, :] = disparity


--------------------------------------------------------------------------------
/src/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/src/model/__init__.py


--------------------------------------------------------------------------------
/src/model/keypoint_block.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class KeypointBlock(nn.Module):
 6 |     """
 7 |         Given the dense detector values from the UNet decoder, compute the 2D keypoints.
 8 |     """
 9 |     def __init__(self, window_height, window_width, image_height, image_width):
10 |         """
11 |             Initialize by setting the height and width of windows where keypoints should be detected. One keypoint is
12 |             detected per window and hence the size of the windows determines the number of possible keypoints.
13 | 
14 |             Args:
15 |                 window_height (int): height of window, inside which we detect one keypoint.
16 |                 window_width (int): width of window, inside which we detect one keypoint.
17 |                 image_height (int): height of the image.
18 |                 image_width (int): width of the image.
19 |         """
20 |         super(KeypointBlock, self).__init__()
21 |         self.window_height = window_height
22 |         self.window_width = window_width
23 |         self.temperature = 1.0
24 | 
25 |         v_coords, u_coords = torch.meshgrid([torch.arange(0, image_height), torch.arange(0, image_width)])
26 |         v_coords = v_coords.unsqueeze(0).float()  # 1 x H x W
27 |         u_coords = u_coords.unsqueeze(0).float()
28 | 
29 |         self.register_buffer('v_coords', v_coords)
30 |         self.register_buffer('u_coords', u_coords)
31 | 
32 |     def forward(self, detector_values):
33 |         """
34 |             Given a tensor of detector values (same width/height as the original image), divide the tensor into
35 |             windows and use a spatial softmax over each window. The 2D coordinates of one keypoint is estimated for each
36 |             window.
37 | 
38 |             Args:
39 |                 detector_values (torch.tensor, Bx1xHxW): Tensor of detector values from the network decoder.
40 |             Returns:
41 |                 keypoints_2D (torch.tensor, Bx2xN): Keypoint coordinates.
42 |         """
43 | 
44 |         batch_size, _, height, width = detector_values.size()
45 | 
46 |         v_windows = F.unfold(self.v_coords.expand(batch_size, 1, height, width),
47 |                              kernel_size=(self.window_height, self.window_width),
48 |                              stride=(self.window_height, self.window_width))  # B x n_window_elements x n_windows
49 |         u_windows = F.unfold(self.u_coords.expand(batch_size, 1, height, width),
50 |                              kernel_size=(self.window_height, self.window_width),
51 |                              stride=(self.window_height, self.window_width))
52 | 
53 |         detector_values_windows = F.unfold(detector_values,
54 |                                            kernel_size=(self.window_height, self.window_width),
55 |                                            stride=(self.window_height, self.window_width))  # B x n_wind_elements x n_windows
56 | 
57 |         softmax_attention = F.softmax(detector_values_windows / self.temperature, dim=1)  # B x n_wind_elements x n_windows
58 | 
59 |         expected_v = torch.sum(v_windows * softmax_attention, dim=1)  # B x n_windows
60 |         expected_u = torch.sum(u_windows * softmax_attention, dim=1)
61 |         keypoints_2D = torch.stack([expected_u, expected_v], dim=2).transpose(2, 1)  # B x 2 x n_windows
62 | 
63 |         return keypoints_2D
64 | 


--------------------------------------------------------------------------------
/src/model/matcher_block.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class MatcherBlock(nn.Module):
 6 |     """
 7 |         Match the descriptors corresponding to the source keypoints to all descriptors in the target frame. Use the
 8 |         values from descriptor matching to compute the coordinates of the matched keypoints in the target frame, which
 9 |         we call pseudo-points.
10 |     """
11 | 
12 |     def __init__(self):
13 |         super(MatcherBlock, self).__init__()
14 | 
15 |     def forward(self, kpt_2D_src, kpt_2D_trg, kpt_desc_norm_src, kpt_desc_norm_trg):
16 |         """
17 |             Compute coordinates for the matches target keypoints, which we refer to as pseudo-points.
18 | 
19 |             Args:
20 |                 kpt_2D_src (torch.tensor, Bx2xN): sparse keypoints detected in the source image.
21 |                 kpt_2D_trg (torch.tensor, Bx2x(N or HW)): coordinates of target keypoints to match. If we are doing
22 |                                                           dense matching these will be the coordinates of all the pixels
23 |                                                           in the target image (Bx2xWH). If we are doing sparse matching,
24 |                                                           they will be the coordinates of the sparse detected keypoints
25 |                                                           in the target image (Bx2xN).
26 |                 kpt_desc_norm_src (torch.tensor, BxCxN): Normalized descriptors corresponding to the source keypoints.
27 |                                                          (C is the length of the descriptor).
28 |                 kpt_desc_norm_trg (torch.tensor, BxCx(N or HW)): Normalized descriptors corresponding to the target
29 |                                                                  keypoints. BxCxHW for dense matching and BxCxN for
30 |                                                                  sparse matching.
31 | 
32 |             Returns:
33 |                 kpt_2D_pseudo (torch.tensor, Bx2xN): keypoint coordinates of the matched pseudo-points in the target
34 |                                                      frame.
35 |          """
36 | 
37 |         batch_size, _, n_points = kpt_2D_src.size()
38 | 
39 |         # For all source keypoints, compute match value between each source point and all points in target frame.
40 |         # Apply softmax for each source point along the dimension of the target points.
41 |         match_vals = torch.matmul(kpt_desc_norm_src.transpose(2, 1), kpt_desc_norm_trg) \
42 |                      / float(kpt_desc_norm_trg.size(1))        # BxNx(HW or N)
43 | 
44 |         match_vals = (match_vals + 1.0) # [-1, 1] -> [0, 2]
45 |         temperature = 0.01
46 |         soft_match_vals = F.softmax(match_vals / 0.01, dim=2)  # BxNx(HW or N)
47 | 
48 |         # Compute pseudo-point as weighted sum of point coordinates from target image, Bx2xN
49 |         kpt_2D_pseudo = torch.matmul(kpt_2D_trg, soft_match_vals.transpose(2, 1))
50 | 
51 |         return kpt_2D_pseudo
52 | 
53 | 


--------------------------------------------------------------------------------
/src/model/pipeline.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from src.model.keypoint_block import KeypointBlock
  6 | from src.model.matcher_block import MatcherBlock
  7 | from src.model.ransac_block import RANSACBlock
  8 | from src.model.svd_block import SVDBlock
  9 | from src.model.unet import UNet
 10 | from src.model.weight_block import WeightBlock
 11 | from src.utils.keypoint_tools import get_keypoint_info, get_norm_descriptors
 12 | from src.utils.lie_algebra import se3_inv, se3_log
 13 | from src.utils.stereo_camera_model import StereoCameraModel
 14 | 
 15 | class Pipeline(nn.Module):
 16 |     """
 17 |         The Pipeline class implements the training pipeline. It takes a source and target image pair and the ground
 18 |         truth relative pose between them. It calls functions to execute the steps of the training pipeline such as
 19 |         keypoint detection, descriptor and score computation, keypoint matching, 3D point computation and, finally,
 20 |         pose computation. It also computes the loss functions for training.
 21 |     """
 22 |     def __init__(self, config):
 23 |         """
 24 |             Set up variables needed in the training pipeline.
 25 | 
 26 |             Args:
 27 |                 config (dict): dictionary with the configuration parameters for the training pipeline.
 28 |         """
 29 |         super(Pipeline, self).__init__()
 30 | 
 31 |         self.config = config
 32 | 
 33 |         self.window_h = config['pipeline']['window_h']
 34 |         self.window_w = config['pipeline']['window_w']
 35 |         self.dense_matching = config['pipeline']['dense_matching']
 36 | 
 37 |         # Image height and width
 38 |         self.height = config['dataset']['height']
 39 |         self.width = config['dataset']['width']
 40 | 
 41 |         # Transform from sensor to vehicle.
 42 |         T_s_v = torch.tensor([[0.000796327, -1.0, 0.0, 0.119873],
 43 |                               [-0.330472, -0.000263164, -0.943816, 1.49473],
 44 |                               [0.943815, 0.000751586, -0.330472, 0.354804],
 45 |                               [0.0, 0.0, 0.0, 1.0]])
 46 |         self.register_buffer('T_s_v', T_s_v)
 47 | 
 48 |         # Set up the different blocks needed in the pipeline
 49 |         self.keypoint_block = KeypointBlock(self.window_h, self.window_w, self.height, self.width)
 50 |         self.matcher_block = MatcherBlock()
 51 |         self.weight_block = WeightBlock()
 52 |         self.svd_block = SVDBlock(self.T_s_v)
 53 |         self.ransac_block = RANSACBlock(config, self.T_s_v)
 54 | 
 55 |         self.stereo_cam = StereoCameraModel(config['stereo']['cu'], config['stereo']['cv'],
 56 |                                             config['stereo']['f'], config['stereo']['b'])
 57 | 
 58 |         # Set up the coordinates for the image pixels, only need to do this once.
 59 |         v_coord, u_coord = torch.meshgrid([torch.arange(0, self.height), torch.arange(0, self.width)])
 60 |         v_coord = v_coord.reshape(self.height * self.width).float()  # HW
 61 |         u_coord = u_coord.reshape(self.height * self.width).float()
 62 |         image_coords = torch.stack((u_coord, v_coord), dim=0)  # 2xHW
 63 |         self.register_buffer('image_coords', image_coords)
 64 | 
 65 |     def forward(self, net, images, disparities, pose_se3, pose_log, epoch, test=False):
 66 |         """
 67 |             A forward pass of the training piepline to estimate the relative pose given a source and target image
 68 |             pair. Also computes losses for training.
 69 | 
 70 |             Args:
 71 |                 net (torch.nn.Module): neural network module.
 72 |                 images (torch.tensor): a stereo pair of RGB source and target images stacked together (Bx12xHxW).
 73 |                 disparities (torch.tensor): a pair of source and target disparity images stacked together (Bx2xHxW).
 74 |                                             The disparity is estimated from the left and right stereo pair.
 75 |                 pose_se3 (torch.tensor): a 4x4 matrix representing the ground truth relative pose transformation from
 76 |                                           the source to target frame, T_trg_src (Bx4x4).
 77 |                 pose_log (torch.tensor): a length 6 vector representing the ground truth relative pose transformation
 78 |                                           from the source to target frame (Bx6).
 79 |                 epoch (int): the current training epoch.
 80 |                 test (bool): True if we are running a test, False if training or validation.
 81 | 
 82 |             Returns:
 83 |                 losses (dict): a dictionary mapping the type of loss to its value. Also includes the weighted sum of
 84 |                                the individual losses.
 85 |                 T_trg_src (torch.tensor): a 4x4 matrix representing the estimated relative pose transformation from
 86 |                                           the source to target frame, T_trg_src (Bx4x4).
 87 |         """
 88 | 
 89 | 
 90 |         ################################################################################################################
 91 |         #  Setup of variables
 92 |         ################################################################################################################
 93 | 
 94 |         batch_size = images.size(0)
 95 | 
 96 |         im_channels = [0, 1, 2, 6, 7, 8] # Picking out the left images from the four stereo src and trg images.
 97 |         images = images[:, im_channels, :, :].cuda()
 98 |         disparities = disparities.cuda()
 99 | 
100 |         pose_se3 = pose_se3.cuda()
101 |         pose_log = pose_log.cuda()
102 | 
103 |         ################################################################################################################
104 |         #  Get keypoints and associated info for the source and target frames
105 |         ################################################################################################################
106 | 
107 |         # Pass the images through the UNet (Bx1xHxW, Bx1xHxW, BxCxHxW)
108 |         detector_scores_src, scores_src, descriptors_src = net(images[:, :3, :, :])
109 |         detector_scores_trg, scores_trg, descriptors_trg = net(images[:, 3:, :, :])
110 | 
111 |         # Get 2D keypoint coordinates from detector scores, Bx2xN
112 |         kpt_2D_src = self.keypoint_block(detector_scores_src)
113 |         kpt_2D_trg = self.keypoint_block(detector_scores_trg)
114 | 
115 |         # Get 3D point coordinates, normalized descriptors, and scores associated with each individual keypoint
116 |         # (Bx4xN, BxCxN, Bx1xN).
117 |         kpt_3D_src, kpt_valid_src, kpt_desc_norm_src, kpt_scores_src = get_keypoint_info(kpt_2D_src,
118 |                                                                                          scores_src,
119 |                                                                                          descriptors_src,
120 |                                                                                          disparities[:, 0, :, :],
121 |                                                                                          self.stereo_cam)
122 | 
123 |         kpt_3D_trg, kpt_valid_trg, kpt_desc_norm_trg, kpt_scores_trg = get_keypoint_info(kpt_2D_trg,
124 |                                                                                          scores_trg,
125 |                                                                                          descriptors_trg,
126 |                                                                                          disparities[:, 0, :, :],
127 |                                                                                          self.stereo_cam)
128 | 
129 | 
130 |         ################################################################################################################
131 |         # Match keypoints from the source and target frames
132 |         ################################################################################################################
133 | 
134 |         if self.config['pipeline']['dense_matching']:
135 | 
136 |             # Match against descriptors for each pixel in the target.
137 |             desc_norm_trg_dense = get_norm_descriptors(descriptors_trg)
138 |             kpt_2D_trg_dense = self.image_coords.unsqueeze(0).expand(batch_size, 2, self.height * self.width)
139 | 
140 |             # Compute the coordinates of the matched keypoints in the target frame, which we refer to as pseudo points.
141 |             kpt_2D_pseudo = self.matcher_block(kpt_2D_src, kpt_2D_trg_dense, kpt_desc_norm_src, desc_norm_trg_dense)
142 | 
143 |         else:
144 | 
145 |             # Match only against descriptors associated with detected keypoints in the target frame.
146 |             kpt_2D_pseudo = self.matcher_block(kpt_2D_src, kpt_2D_trg, kpt_desc_norm_src, kpt_desc_norm_trg)
147 | 
148 |         # Get 3D point coordinates, normalized descriptors, and scores associated with each individual matched pseudo
149 |         # point in the target frame (Bx4xN, BxCxN, Bx1xN).
150 |         kpt_3D_pseudo, kpt_valid_pseudo, kpt_desc_norm_pseudo, kpt_scores_pseudo = get_keypoint_info(kpt_2D_pseudo,
151 |                                                                                                    scores_trg,
152 |                                                                                                    descriptors_trg,
153 |                                                                                                    disparities[:,1,:,:],
154 |                                                                                                    self.stereo_cam)
155 | 
156 |         # Compute the weight associated with each matched point pair. They will be used when computing the pose.
157 |         weights = self.weight_block(kpt_desc_norm_src, kpt_desc_norm_pseudo, kpt_scores_src, kpt_scores_pseudo)
158 | 
159 | 
160 |         ################################################################################################################
161 |         # Outlier rejection
162 |         ################################################################################################################
163 | 
164 |         # Find the inliers either by using the ground truth pose (training) or RANSAC (inference).
165 |         valid_inliers = torch.ones(kpt_valid_src.size()).type_as(kpt_valid_src)
166 | 
167 |         if self.config['outlier_rejection']['on'] and (self.config['outlier_rejection']['type'] == 'ground_truth'):
168 | 
169 |             # For training, we use the ground truth pose to find the ground truth location of the target points by
170 |             # transforming them from the source frame to the target frames.
171 | 
172 |             T_trg_src_gt_inl = pose_se3 # Ground truth transform to use to find inliers.
173 | 
174 |             if 'plane' in self.config['outlier_rejection']['dim']:
175 |                 # If we use only part of the ground truth pose (x, y, heading) we transform the points in the plane
176 |                 # only and adjust the ground truth transform accordingly.
177 |                 log_pose = pose_log
178 | 
179 |                 one = torch.ones(batch_size).cuda()    # B
180 |                 zero = torch.zeros(batch_size).cuda()  # B
181 | 
182 |                 rot_col1 = torch.stack([torch.cos(log_pose[:, 5]), torch.sin(log_pose[:, 5]),
183 |                                             zero.clone(), zero.clone()], dim=1)                             # Bx4
184 |                 rot_col2 = torch.stack([-torch.sin(log_pose[:, 5]), torch.cos(log_pose[:, 5]),
185 |                                             zero.clone(), zero.clone()], dim=1)                             # Bx4
186 |                 rot_col3 = torch.stack([zero.clone(), zero.clone(), one.clone(), zero.clone()], dim=1)      # Bx4
187 |                 trans_col = torch.stack([log_pose[:, 0], log_pose[:, 1], zero.clone(), one.clone()], dim=1) # Bx4
188 | 
189 |                 T_trg_src_gt_inl = torch.stack([rot_col1, rot_col2, rot_col3, trans_col], dim=2)  # Bx4x4
190 | 
191 |             T_s_v = self.T_s_v.expand(batch_size, 4, 4)
192 |             T_trg_src_gt_inl_sensor = T_s_v.bmm(T_trg_src_gt_inl).bmm(se3_inv(T_s_v)) # Transform pose to sensor frame.
193 | 
194 |             # The ground truth target points, which will be compared to the matched target points.
195 |             kpt_3D_pseudo_gt = T_trg_src_gt_inl_sensor.bmm(kpt_3D_src)                    # Bx4xN
196 |             kpt_2D_pseudo_gt = self.stereo_cam.camera_model(kpt_3D_pseudo_gt)[:, 0:2, :]  # Bx2xN
197 | 
198 |             # Find the residual between the ground truth and matched point coordinates and determine the inliers.
199 |             if '3D' in self.config['outlier_rejection']['dim']:
200 |                 err = torch.norm(kpt_3D_pseudo - kpt_3D_pseudo_gt, dim=1)  # B x N
201 |                 inliers_3D = err < self.config['outlier_rejection']['error_tolerance']['3D']
202 |                 valid_inliers = valid_inliers & (inliers_3D.unsqueeze(1))
203 | 
204 |             if ('2D' in self.config['outlier_rejection']['dim']):
205 |                 err = torch.norm(kpt_2D_pseudo - kpt_2D_pseudo_gt, dim=1)  # B x N
206 |                 inliers_2D = err < self.config['outlier_rejection']['error_tolerance']['2D']
207 |                 valid_inliers = valid_inliers & (inliers_2D.unsqueeze(1))
208 | 
209 |             if 'plane' in self.config['outlier_rejection']['dim']:
210 |                 # Do comparison in the vehicle frame, not the sensor frame.
211 |                 kpt_3D_pseudo_gt_vehicle = T_trg_src_gt_inl.bmm(se3_inv(T_s_v).bmm(kpt_3D_src))  # Bx4xN
212 |                 kpt_3D_pseudo_vehicle = se3_inv(T_s_v).bmm(kpt_3D_pseudo)
213 |                 err = torch.norm(kpt_3D_pseudo_vehicle[:, 0:2, :] - kpt_3D_pseudo_gt_vehicle[:, 0:2, :], dim=1)  # BxN
214 |                 inliers_plane = err < self.config['outlier_rejection']['error_tolerance']['plane']
215 |                 valid_inliers = valid_inliers & (inliers_plane.unsqueeze(1))
216 | 
217 |         if self.config['outlier_rejection']['on'] and (self.config['outlier_rejection']['type'] == 'ransac'):
218 |             # Find outlier based on RANSAC.
219 |             ransac_inliers = self.ransac_block(kpt_3D_src,
220 |                                                kpt_3D_pseudo,
221 |                                                kpt_2D_pseudo,
222 |                                                kpt_valid_src,
223 |                                                kpt_valid_pseudo,
224 |                                                weights,
225 |                                                self.config['outlier_rejection']['dim'][0])
226 | 
227 |             valid_inliers = ransac_inliers.unsqueeze(1)
228 | 
229 | 
230 |         ################################################################################################################
231 |         # Compute the pose
232 |         ################################################################################################################
233 | 
234 |         # We can choose to use just the keypoint loss and not compute pose for the first few epochs.
235 |         if test or (epoch >= self.config['training']['start_pose_estimation']):
236 | 
237 |             #  Check that we have enough inliers for all example sin the bach to compute pose.
238 |             valid = kpt_valid_src & kpt_valid_pseudo & valid_inliers
239 | 
240 |             num_inliers = torch.sum(valid.squeeze(1), dim=1)[0]
241 |             if torch.any(num_inliers < 6):
242 |                 raise RuntimeError('Too few inliers to compute pose: {}'.format(num_inliers))
243 | 
244 |             weights[valid == 0] = 0.0
245 |             T_trg_src = self.svd_block(kpt_3D_src, kpt_3D_pseudo, weights)
246 | 
247 |         # If we are testing, return the pose.
248 |         if test:
249 |             return T_trg_src
250 | 
251 |         ################################################################################################################
252 |         # Compute the losses
253 |         ################################################################################################################
254 | 
255 |         # Variables to store the loss
256 |         losses = {'total': torch.zeros(1).cuda()}
257 |         mse_loss_fn = torch.nn.MSELoss()
258 |         loss_types = self.config['loss']['types']
259 |         loss_weights = self.config['loss']['weights']
260 | 
261 |         # Keypoint loss in different versions depending on using 2D coordinates, 3D coordinates, or a subset of the
262 |         # ground truth pose to transform the target points in the plane.
263 |         if 'keypoint_3D' in loss_types:
264 |             valid = kpt_valid_pseudo & kpt_valid_src & valid_inliers   # B x 1 x N
265 |             valid = valid.expand(batch_size, 4, valid.size(2))         # B x 4 x N
266 |             keypoint_loss = mse_loss_fn(kpt_3D_pseudo[valid], kpt_3D_pseudo_gt[valid])
267 |             keypoint_loss *= loss_weights['keypoint_3D']
268 |             losses['keypoint_3D'] = keypoint_loss
269 |             losses['total'] += keypoint_loss
270 | 
271 |         if 'keypoint_2D' in loss_types:
272 |             valid = kpt_valid_pseudo & kpt_valid_src & valid_inliers  # B x 1 x N
273 |             valid = valid.expand(batch_size, 2, valid.size(2))        # B x 2 x N
274 |             keypoint_loss = mse_loss_fn(kpt_2D_pseudo[valid], kpt_2D_pseudo_gt[valid])
275 |             keypoint_loss *= loss_weights['keypoint_2D']
276 |             losses['keypoint_2D'] = keypoint_loss
277 |             losses['total'] += keypoint_loss
278 | 
279 |         if ('keypoint_plane' in loss_types):
280 |             valid = kpt_valid_pseudo & kpt_valid_src & valid_inliers   # B x 1 x N
281 |             valid = valid.expand(batch_size, 2, valid.size(2))     # B x 4 x N
282 |             kpt_3D_pseudo_gt_vehicle = T_trg_src_gt_inl.bmm(se3_inv(T_s_v).bmm(kpt_3D_src))  # Bx4xN
283 |             kpt_3D_pseudo_vehicle = se3_inv(T_s_v).bmm(kpt_3D_pseudo)
284 |             keypoint_loss = mse_loss_fn(kpt_3D_pseudo_vehicle[:, 0:2, :][valid],
285 |                                         kpt_3D_pseudo_gt_vehicle[:, 0:2, :][valid])
286 |             keypoint_loss *= loss_weights['keypoint_plane']
287 |             losses['keypoint_plane'] = keypoint_loss
288 |             losses['total'] += keypoint_loss
289 | 
290 |         # Pose loss either using the full 6DOF pose or a subset (x, y, heading).
291 |         if test or (epoch >= self.config['training']['start_pose_estimation']):
292 |             if ('pose' in loss_types):
293 |                 T_trg_src_gt = pose_se3
294 |                 rot_loss, trans_loss = self.pose_loss(T_trg_src_gt, T_trg_src, mse_loss_fn)
295 |                 rotation_loss = loss_weights['rotation'] * rot_loss
296 |                 translation_loss = loss_weights['translation'] * trans_loss
297 |                 pose_loss = rotation_loss + translation_loss
298 |                 losses['rotation'] = rotation_loss
299 |                 losses['translation'] = translation_loss
300 | 
301 |             elif ('pose_plane' in loss_types):
302 |                 w = torch.zeros(6)
303 |                 w[0] = loss_weights['translation_x']
304 |                 w[1] = loss_weights['translation_y']
305 |                 w[5] = loss_weights['rotation_heading']
306 |                 w = torch.diag(w).unsqueeze(0).expand(batch_size, 6, 6).cuda()
307 | 
308 |                 T_trg_src_gt = pose_se3
309 |                 log_pose_err = se3_log(T_trg_src.bmm(torch.inverse(T_trg_src_gt))).unsqueeze(2)
310 |                 pose_loss = (1.0 / batch_size) * torch.sum(log_pose_err.transpose(2, 1).bmm(w).bmm(log_pose_err))
311 | 
312 |             losses['pose'] = pose_loss
313 |             losses['total'] += pose_loss
314 | 
315 |         ################################################################################################################
316 |         # Return the estimated pose and the loss
317 |         ################################################################################################################
318 | 
319 |         if epoch >= self.config['training']['start_pose_estimation']:
320 |             return losses, T_trg_src
321 |         else:
322 |             # Haven't computed the pose yet, just the keypoint loss.
323 |             return losses, pose_se3
324 | 
325 | 
326 |     def pose_loss(self, T, T_est, loss_fn):
327 |         """
328 |             Compute the pose loss using all DOF.
329 | 
330 |             Args:
331 |                 T (torch.tensor): the ground truth pose represented as a 4x4 matrix (Bx4x4).
332 |                 T_est (torch.tensor): the estimated pose represented as a 4x4 matrix (Bx4x4).
333 |                 loss_fn (): the loss function to use.
334 | 
335 |             Returns:
336 |                 rot_loss (float): the pose loss for the rotational DOF.
337 |                 trans_loss (float): the pose loss for the translational DOF.
338 |         """
339 |         batch_size = T.size(0)
340 | 
341 |         R_est = T_est[:, 0:3, 0:3]
342 |         R = T[:, 0:3, 0:3]
343 |         identity = torch.eye(3).unsqueeze(0).expand(batch_size, 3, 3).cuda()
344 | 
345 |         rot_loss = loss_fn(R_est.transpose(2, 1).bmm(R), identity)
346 |         trans_loss = loss_fn(T_est[:, 0:3, 3], T[:, 0:3, 3])
347 | 
348 |         return rot_loss, trans_loss
349 | 
350 | 
351 | 
352 | 
353 | 
354 | 
355 | 


--------------------------------------------------------------------------------
/src/model/ransac_block.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from src.model.svd_block import SVDBlock
  8 | from src.utils.lie_algebra import se3_log, se3_inv
  9 | from src.utils.stereo_camera_model import StereoCameraModel
 10 | 
 11 | class RANSACBlock(nn.Module):
 12 |     """
 13 |         Use RANSAC for outlier rejection during inference.
 14 |     """
 15 | 
 16 |     def __init__(self, config, T_s_v):
 17 |         """
 18 |             Initialize RANSAC by setting values used to determine how many iterations to run.
 19 | 
 20 |             Args:
 21 |                 config (dict): configuration parameters.
 22 |                 T_s_v (torch.tensor): 4x4 transformation matrix providing the transform from the vehicle frame to the
 23 |                                       sensor frame.
 24 |         """
 25 |         super(RANSACBlock, self).__init__()
 26 | 
 27 |         # Transform from vehicle to sensor frame.
 28 |         self.register_buffer('T_s_v', T_s_v)
 29 | 
 30 |         self.stereo_cam = StereoCameraModel(config['stereo']['cu'], config['stereo']['cv'],
 31 |                                             config['stereo']['f'], config['stereo']['b'])
 32 | 
 33 |         # The minimum number of inliers we want, quit RANSAC when this is achieved.
 34 |         self.inlier_threshold = config['outlier_rejection']['inlier_threshold']
 35 | 
 36 |         # Error must be smaller than or equal to threshold to this be considered inlier.
 37 |         dim_key = config['outlier_rejection']['dim'][0]
 38 |         self.error_tolerance = config['outlier_rejection']['error_tolerance'][dim_key]
 39 | 
 40 |         # Maximum number of iterations to run before giving up.
 41 |         self.num_iterations =  config['outlier_rejection']['num_iterations']
 42 | 
 43 |         # SVD is used to estimate pose.
 44 |         self.svd = SVDBlock(T_s_v)
 45 | 
 46 |     def forward(self, keypoints_3D_src, keypoints_3D_trg, keypoints_2D_trg, valid_pts_src, valid_pts_trg, weights, dim):
 47 |         """
 48 |             Outlier rejection with RANSAC.
 49 | 
 50 |             Args:
 51 |                 keypoints_3D_src (torch,tensor, Bx4xN): 3D point coordinates of keypoints from source frame.
 52 |                 keypoints_3D_trg (torch,tensor, Bx4xN): 3D point coordinates of keypoints from target frame.
 53 |                 keypoints_2D_trg (torch,tensor, Bx2xN): 2D image coordinates of keypoints from source frame.
 54 |                 valid_pts_src (torch.tensor, Bx1xN): Values (0 or 1) to indicate if a keypoint in source frame is valid
 55 |                                                      (i.e. can be used for pose computation).
 56 |                 valid_pts_trg (torch.tensor, Bx1xN): Values (0 or 1) to indicate if a keypoint in target frame is valid
 57 |                                                      (i.e. can be used for pose computation).
 58 |                 weights (torch.tensor, Bx1xN): weights in range (0, 1) associated with the matched source and
 59 |                                                target points.
 60 |                 dim (str):  '2D' or '3D' to specify if error should be taken between 2D image coordinates or 3D point
 61 |                             coordinates.
 62 | 
 63 |             Returns:
 64 |                 inliers (torch.tensor, BxN):
 65 |         """
 66 |         batch_size, _, n_points = keypoints_3D_src.size()
 67 | 
 68 |         valid = valid_pts_src & valid_pts_trg
 69 |         weights_svd = copy.deepcopy(weights)
 70 |         weights_svd[valid == 0] = 0.0
 71 | 
 72 |         pts_3D_src = keypoints_3D_src.detach()
 73 |         pts_3D_trg = keypoints_3D_trg.detach()
 74 |         pts_2D_trg = keypoints_2D_trg.detach()
 75 | 
 76 |         max_num_inliers = torch.zeros(batch_size).type_as(pts_3D_src) # Keep track of highest number of inliers so far.
 77 |         inliers = torch.zeros(batch_size, n_points, dtype=torch.bool).cuda()
 78 | 
 79 |         i = 0
 80 |         ransac_complete = torch.zeros(batch_size).type_as(pts_3D_src).int()
 81 | 
 82 |         while (i < self.num_iterations) and (torch.sum(ransac_complete) < batch_size):
 83 | 
 84 |             # Pick a random subset of 6 point pairs (3 sufficient, but some points will have weight 0 so pick a
 85 |             # few more than needed to increase probability of getting points with rank 3).
 86 |             rand_index = torch.randint(0, n_points, size=(batch_size, 6)).type_as(pts_3D_src).long()
 87 |             rand_index = rand_index.unsqueeze(1)
 88 |             rand_pts_3D_src = torch.gather(pts_3D_src, dim=2, index=rand_index.expand(batch_size, 4, 6))  # 1x4xM
 89 |             rand_pts_3D_trg = torch.gather(pts_3D_trg, dim=2, index=rand_index.expand(batch_size, 4, 6))  # 1x4xM
 90 |             rand_weights = torch.gather(weights.detach(), dim=2, index=rand_index)  # 1x1xM
 91 | 
 92 |             # Run SVD
 93 |             try:
 94 |                 T_trg_src = self.svd(rand_pts_3D_src, rand_pts_3D_trg, rand_weights) # pose in vehicle frame
 95 |             except RuntimeError as e:
 96 |                 print(e)
 97 |                 print("RANSAC SVD did not converge, re-doing iteration {}".format(i))
 98 |                 print("weights: {}".format(rand_weights[0, 0, :]))
 99 |                 print("rank src pts: {}".format(torch.matrix_rank(rand_pts_3D_src[0, 0:3, :])))
100 |                 print("rank trg pts: {}".format(torch.matrix_rank(rand_pts_3D_trg[0, 0:3, :])), flush=True)
101 |                 continue
102 | 
103 |             # Find number of inliers
104 |             T_s_v = self.T_s_v.expand(batch_size, 4, 4)
105 |             T_trg_src_cam = T_s_v.bmm(T_trg_src).bmm(se3_inv(T_s_v)) # pose in camera frame
106 |             pts_3D_trg_est = T_trg_src_cam.bmm(pts_3D_src)
107 |             pts_2D_trg_est = self.stereo_cam.camera_model(pts_3D_trg_est)[:, 0:2, :]
108 | 
109 |             if dim == '2D':
110 |                 err_pts = torch.norm(pts_2D_trg - pts_2D_trg_est, dim=1)  # BxN
111 |             else:
112 |                 err_pts = torch.norm(pts_3D_trg - pts_3D_trg_est, dim=1)  # BxN
113 | 
114 |             err_pts_small = err_pts < self.error_tolerance
115 |             err_pts_small[valid_pts_src[:, 0, :] == 0] = 0
116 |             err_pts_small[valid_pts_trg[:, 0, :] == 0] = 0
117 | 
118 |             num_inliers = torch.sum(err_pts_small, dim=1)
119 | 
120 |             fraction_inliers = num_inliers.float() / n_points
121 |             enough_inliers = fraction_inliers > self.inlier_threshold
122 |             ransac_complete = ransac_complete | enough_inliers
123 | 
124 |             for b in range(batch_size):
125 |                 if num_inliers[b] > max_num_inliers[b]:
126 |                     max_num_inliers[b] = num_inliers[b]
127 |                     inliers[b, :] = err_pts_small[b, :]
128 | 
129 |             i += 1
130 | 
131 |         return inliers


--------------------------------------------------------------------------------
/src/model/svd_block.py:
--------------------------------------------------------------------------------
 1 | """ Some of this code is based on Code from: https://github.com/WangYueFt/dcp/blob/master/model.py """
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | from src.utils.lie_algebra import se3_log, se3_inv
 7 | 
 8 | class SVDBlock(nn.Module):
 9 |     """
10 |         Compute the relative pose between the source and target frames.
11 |     """
12 |     def __init__(self, T_s_v):
13 |         """
14 |             Initialize the SVD class.
15 | 
16 |             Args:
17 |                 T_s_v (torch.tensor): 4x4 transformation matrix providing the transform from the vehicle frame to the
18 |                                       sensor frame.
19 |         """
20 |         super(SVDBlock, self).__init__()
21 | 
22 |         self.register_buffer('T_s_v', T_s_v)
23 | 
24 |     def forward(self, keypoints_3D_src, keypoints_3D_trg, weights):
25 |         """
26 |             Compute the pose, T_trg_src, from the source to the target frame.
27 | 
28 |             Args:
29 |                 keypoints_3D_src (torch,tensor, Bx4xN): 3D point coordinates of keypoints from source frame.
30 |                 keypoints_3D_trg (torch,tensor, Bx4xN): 3D point coordinates of keypoints from target frame.
31 |                 weights (torch.tensor, Bx1xN): weights in range (0, 1) associated with the matched source and target
32 |                                                points.
33 | 
34 |             Returns:
35 |                 T_trg_src (torch.tensor, Bx4x4): relative transform from the source to the target frame.
36 |         """
37 |         batch_size, _, n_points = keypoints_3D_src.size()
38 | 
39 |         # Compute weighted centroids (elementwise multiplication/division)
40 |         centroid_src = torch.sum(keypoints_3D_src[:, 0:3, :] * weights, dim=2, keepdim=True) / torch.sum(weights, dim=2,
41 |                                                                                                          keepdim=True)  # Bx3x1
42 |         centroid_trg = torch.sum(keypoints_3D_trg[:, 0:3, :] * weights, dim=2, keepdim=True) / torch.sum(weights, dim=2,
43 |                                                                                                          keepdim=True)
44 | 
45 |         src_centered = keypoints_3D_src[:, 0:3, :] - centroid_src  # Bx3xN
46 |         trg_centered = keypoints_3D_trg[:, 0:3, :] - centroid_trg
47 | 
48 |         W = torch.diag_embed(weights.reshape(batch_size, n_points))  # BxNxN
49 |         w = torch.sum(weights, dim=2).unsqueeze(2)  # Bx1x1
50 | 
51 |         H = (1.0 / w) * torch.bmm(trg_centered, torch.bmm(W, src_centered.transpose(2, 1).contiguous()))  # Bx3x3
52 | 
53 |         U, S, V = torch.svd(H)
54 | 
55 |         det_UV = torch.det(U) * torch.det(V)
56 |         ones = torch.ones(batch_size, 2).type_as(V)
57 |         diag = torch.diag_embed(torch.cat((ones, det_UV.unsqueeze(1)), dim=1))  # Bx3x3
58 | 
59 |         # Compute rotation and translation (T_trg_src in sensor frame)
60 |         R_trg_src = torch.bmm(U, torch.bmm(diag, V.transpose(2, 1).contiguous()))  # Bx3x3
61 |         t_trg_src_insrc = centroid_src - torch.bmm(R_trg_src.transpose(2, 1).contiguous(), centroid_trg)  # Bx3x1
62 |         t_src_trg_intrg = -R_trg_src.bmm(t_trg_src_insrc) # Translation from trg to src given in src frame.
63 | 
64 |         # Create translation matrix
65 |         zeros = torch.zeros(batch_size, 1, 3).type_as(V)  # Bx1x3
66 |         one = torch.ones(batch_size, 1, 1).type_as(V)  # Bx1x1
67 |         trans_cols = torch.cat([t_src_trg_intrg, one], dim=1)  # Bx4x1
68 |         rot_cols = torch.cat([R_trg_src, zeros], dim=1)  # Bx4x3
69 |         T_trg_src = torch.cat([rot_cols, trans_cols], dim=2)  # Bx4x4
70 | 
71 |         # Convert from sensor to vehicle frame
72 |         T_s_v = self.T_s_v.expand(batch_size, 4, 4)
73 |         T_trg_src = se3_inv(T_s_v).bmm(T_trg_src).bmm(T_s_v)
74 | 
75 |         return T_trg_src


--------------------------------------------------------------------------------
/src/model/unet.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | from torchvision import transforms
  8 | import torchvision
  9 | 
 10 | """ Code for the components of the U-Net model is taken from: 
 11 |     https://github.com/milesial/Pytorch-UNet/blob/master/unet/unet_parts.py"""
 12 | class DoubleConv(nn.Module):
 13 |     """(convolution => [BN] => ReLU) * 2"""
 14 | 
 15 |     def __init__(self, in_channels, out_channels):
 16 |         super().__init__()
 17 |         self.double_conv = nn.Sequential(
 18 |             nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
 19 |             nn.BatchNorm2d(out_channels),
 20 |             nn.ReLU(inplace=True),
 21 |             nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
 22 |             nn.BatchNorm2d(out_channels),
 23 |             nn.ReLU(inplace=True)
 24 |         )
 25 | 
 26 |     def forward(self, x):
 27 |         return self.double_conv(x)
 28 | 
 29 | class Down(nn.Module):
 30 |     """Downscaling with maxpool then double conv"""
 31 | 
 32 |     def __init__(self, in_channels, out_channels):
 33 |         super().__init__()
 34 |         self.maxpool_conv = nn.Sequential(
 35 |             nn.MaxPool2d(2),
 36 |             DoubleConv(in_channels, out_channels)
 37 |         )
 38 | 
 39 |     def forward(self, x):
 40 |         return self.maxpool_conv(x)
 41 | 
 42 | class Up(nn.Module):
 43 |     """Upscaling then double or tiple conv"""
 44 | 
 45 |     def __init__(self, in_channels, out_channels, bilinear=True, double=True):
 46 |         super().__init__()
 47 | 
 48 |         # if bilinear, use the normal convolutions to reduce the number of channels
 49 |         if bilinear:
 50 |             self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
 51 |         else:
 52 |             self.up = nn.ConvTranspose2d(in_channels // 2, in_channels // 2, kernel_size=2, stride=2)
 53 | 
 54 |         self.conv = DoubleConv(in_channels, out_channels) if double else TripleConv(in_channels, out_channels)
 55 | 
 56 |     def forward(self, x1, x2):
 57 |         x1 = self.up(x1)
 58 | 
 59 |         # input is CHW
 60 |         diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
 61 |         diffX = torch.tensor([x2.size()[3] - x1.size()[3]])
 62 | 
 63 |         x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
 64 |                         diffY // 2, diffY - diffY // 2])
 65 | 
 66 |         # if you have padding issues, see
 67 |         # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
 68 |         # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
 69 |         x = torch.cat([x2, x1], dim=1)
 70 | 
 71 |         return self.conv(x)
 72 | 
 73 | class OutConv(nn.Module):
 74 |     def __init__(self, in_channels, out_channels, ks=1):
 75 |         super(OutConv, self).__init__()
 76 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=ks)
 77 | 
 78 |     def forward(self, x):
 79 |         return self.conv(x)
 80 | 
 81 | """ The UNet network is based on code from: https://github.com/milesial/Pytorch-UNet/blob/master/unet/unet_model.py,
 82 |     but includes changes/adaptions."""
 83 | class UNet(nn.Module):
 84 |     """
 85 |         Unet network to compute keypoint detector values, descriptors, and scores.
 86 |     """
 87 |     def __init__(self, n_channels, n_classes, layer_size):
 88 |         """
 89 |             Initialize the network with one encoder and two decoders.
 90 | 
 91 |             Args:
 92 |                 num_channels (int): number of channels in the input image (we use 3 for one RGB image).
 93 |                 num_classes (int): number of classes (output channels from the decoder), 1 in our case.
 94 |                 layer_size (int): size of the first layer if the encoder. The size of the following layers are
 95 |                                   determined from this.
 96 |         """
 97 |         super(UNet, self).__init__()
 98 |         self.n_channels = n_channels
 99 |         self.n_classes = n_classes
100 |         bilinear = True
101 |         m = layer_size
102 | 
103 |         self.inc = DoubleConv(n_channels, m)  # 384 x 512 (height/width after layer given 384 x 512 input image)
104 |         self.down1 = Down(m, m * 2)           # 192 x 256
105 |         self.down2 = Down(m * 2, m * 4)       #  96 x 128
106 |         self.down3 = Down(m * 4, m * 8)       #  48 x  64
107 |         self.down4 = Down(m * 8, m * 16)      #  24 x  32
108 | 
109 |         self.up1_pts = Up(m * 24, m * 8, bilinear)
110 |         self.up2_pts = Up(m * 12, m * 4, bilinear)
111 |         self.up3_pts = Up(m * 6, m * 2, bilinear)
112 |         self.up4_pts = Up(m * 3, m, bilinear)
113 |         self.outc_pts = OutConv(m, n_classes)
114 | 
115 |         self.up1_score = Up(m * 24, m * 8, bilinear)
116 |         self.up2_score = Up(m * 12, m * 4, bilinear)
117 |         self.up3_score = Up(m * 6, m * 2, bilinear)
118 |         self.up4_score = Up(m * 3, m, bilinear)
119 |         self.outc_score = OutConv(m, n_classes)
120 | 
121 |         self.sigmoid = nn.Sigmoid()
122 | 
123 |     def forward(self, x):
124 |         """
125 |             Forward pass of network to get keypoint detector values, descriptors and, scores
126 | 
127 |             Args:
128 |                 x (torch.tensor, Bx3xHxW): RGB images to input to the network.
129 | 
130 |             Returns:
131 |                 logit_pts (torch.tensor, Bx1xHxW): detector values for each pixel, which will be used to compute the
132 |                                                    final keypoint coordinates.
133 |                 score (torch.tensor, Bx1xHxW): an importance score for each pixel.
134 |                 descriptors (torch.tensor, BxCxHxW): descriptors for each pixel, C is length of descriptor.
135 | 
136 |         """
137 |         batch_size, _, height, width = x.size()
138 | 
139 |         x1 = self.inc(x)
140 |         x2 = self.down1(x1)
141 |         x3 = self.down2(x2)
142 |         x4 = self.down3(x3)
143 |         x5 = self.down4(x4)
144 | 
145 |         x4_up_pts = self.up1_pts(x5, x4)
146 |         x3_up_pts = self.up2_pts(x4_up_pts, x3)
147 |         x2_up_pts = self.up3_pts(x3_up_pts, x2)
148 |         x1_up_pts = self.up4_pts(x2_up_pts, x1)
149 |         logits_pts = self.outc_pts(x1_up_pts)
150 | 
151 |         x4_up_score = self.up1_score(x5, x4)
152 |         x3_up_score = self.up2_score(x4_up_score, x3)
153 |         x2_up_score = self.up3_score(x3_up_score, x2)
154 |         x1_up_score = self.up4_score(x2_up_score, x1)
155 |         score = self.outc_score(x1_up_score)
156 |         score = self.sigmoid(score)
157 | 
158 |         # Resize outputs of each encoder layer to the size of the original image. Features are interpolated using
159 |         # bilinear interpolation to get gradients for back-prop. Concatenate along the feature channel to get
160 |         # pixel-wise descriptors of size BxCxHxW.
161 |         f1 = F.interpolate(x1, size=(height, width), mode='bilinear')
162 |         f2 = F.interpolate(x2, size=(height, width), mode='bilinear')
163 |         f3 = F.interpolate(x3, size=(height, width), mode='bilinear')
164 |         f4 = F.interpolate(x4, size=(height, width), mode='bilinear')
165 |         f5 = F.interpolate(x5, size=(height, width), mode='bilinear')
166 | 
167 |         feature_list = [f1, f2, f3, f4, f5]
168 |         descriptors = torch.cat(feature_list, dim=1)
169 | 
170 |         return logits_pts, score, descriptors


--------------------------------------------------------------------------------
/src/model/weight_block.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class WeightBlock(nn.Module):
 6 |     """
 7 |         Computes the weights for matched points pairs that will be used when computing the pose.
 8 |     """
 9 |     def __init__(self):
10 |         super(WeightBlock, self).__init__()
11 | 
12 |     def forward(self, kpt_desc_norm_src, kpt_desc_norm_pseudo, kpt_scores_src, kpt_scores_pseudo):
13 |         """
14 |             Compute the weights for matched point pairs.
15 | 
16 |             Args:
17 |                 kpt_desc_norm_src (torch.tensor, BxCxN): normalized descriptors for the source keypoints.
18 |                 kpt_desc_norm_pseudo (torch.tensor, BxCxN): normalized descriptors for the target pseudo keypoints.
19 |                 kpt_scores_src (torch.tensor, Bx1xN): scores for the source keypoints.
20 |                 kpt_scores_pseudo (torch.tensor, Bx1xN): scores for the target pseudo keypoints.
21 | 
22 |             Returns:
23 |                 weights (torch.tensor, Bx1xN): weights for each matched point pair in range [0, 1] to be used when
24 |                                                computing the pose.
25 |         """
26 |         batch_size, channels, n_points = kpt_desc_norm_src.size()
27 | 
28 |         # Get the zncc between each matched point pair
29 |         desc_src = kpt_desc_norm_src.transpose(2, 1).reshape(batch_size * n_points, channels)        # BNxC
30 |         desc_pseudo = kpt_desc_norm_pseudo.transpose(2, 1).reshape(batch_size * n_points, channels)  # BNxC
31 | 
32 |         desc_match_val = torch.matmul(desc_src.unsqueeze(1), desc_pseudo.unsqueeze(2)) / desc_pseudo.size(1) # BNx1
33 | 
34 |         desc_match_val = desc_match_val.reshape(batch_size, 1, n_points) + 1.0  # Range [0, 2] Bx1xN
35 | 
36 |         weights = 0.5 * desc_match_val * kpt_scores_src * kpt_scores_pseudo     # Range [0, 1]
37 | 
38 |         return weights


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | import pickle
  5 | import re
  6 | import sys
  7 | import time
  8 | 
  9 | import numpy as np
 10 | import torch
 11 | import torch.backends.cudnn as cudnn
 12 | import torch.nn as nn
 13 | import torch.optim as optim
 14 | from torch.utils import data
 15 | from torch.utils.data.sampler import RandomSampler
 16 | 
 17 | from src.dataset import Dataset
 18 | from src.model.pipeline import Pipeline
 19 | from src.model.unet import UNet
 20 | from src.utils.lie_algebra import se3_log
 21 | from src.utils.statistics import Statistics
 22 | from visualization.plots import Plotting
 23 | 
 24 | torch.backends.cudnn.benchmark = True
 25 | 
 26 | def rmse(outputs_se3, targets_se3):
 27 |     """
 28 |         Compute the rotation and translation RMSE for the SE(3) poses provided. Compute RMSE for ich live run
 29 |         individually.
 30 | 
 31 |         Args:
 32 |             outputs_se3 (dict): map from id of the localized live run to a list of estimated pose transforms
 33 |                                 represented as 4x4 numpy arrays.
 34 |             outputs_se3 (dict): map from id of the localized live run to a list of ground truth pose transforms
 35 |                                 represented as 4x4 numpy arrays.
 36 |     """
 37 | 
 38 |     for live_run_id in outputs_se3.keys():
 39 | 
 40 |         out_mat = torch.from_numpy(np.stack(outputs_se3[live_run_id], axis=0))
 41 |         trg_mat = torch.from_numpy(np.stack(targets_se3[live_run_id], axis=0))
 42 | 
 43 |         # Get the difference in pose by T_diff = T_trg * inv(T_src)
 44 |         diff_mat = trg_mat.bmm(out_mat.inverse())
 45 |         diff_R = diff_mat[:, 0:3, 0:3]
 46 |         diff_tr = diff_mat[:, 0:3, 3].numpy()
 47 | 
 48 |         err_tr_sq = (diff_tr[:, 0] * diff_tr[:, 0]) + (diff_tr[:, 1] * diff_tr[:, 1]) + (diff_tr[:, 2] * diff_tr[:, 2])
 49 |         rmse_tr = np.sqrt(np.mean(err_tr_sq, axis=0))
 50 | 
 51 |         d = torch.acos((0.5 * (diff_R[:, 0, 0] + diff_R[:, 1, 1] + diff_R[:, 2, 2] - 1.0)).clamp(-1 + 1e-6, 1 - 1e-6))
 52 |         rmse_rot = np.sqrt(np.mean(np.rad2deg(d.numpy())**2, axis=0))
 53 | 
 54 |         print(f'RMSE, live_run_id {live_run_id}, translation: {rmse_tr}')
 55 |         print(f'RMSE, live_run_id {live_run_id}, rotation: {rmse_rot}\n')
 56 | 
 57 | def test_model(pipeline, net, data_loader, dof):
 58 |     """
 59 |         Run the test.
 60 | 
 61 |         Args:
 62 |             pipeline (Pipeline): the training pipeline object that performs a forward pass of the training pipeline and
 63 |                                  computes pose and losses.
 64 |             net (torch.nn.Module): neural network module.
 65 |             data_loader (torch.utils.data.DataLoader): data loader for training data.
 66 |             dof (list[int]): indices of the pose DOF (out of the 6 DOF) that we have learned.
 67 |     """
 68 |     start_time = time.time()
 69 | 
 70 |     net.eval()
 71 | 
 72 |     # Object to keep track of test errors.
 73 |     stats = Statistics()
 74 | 
 75 |     errors = None
 76 |     # We want to print out the avg./max target pose absolute values for reference together with the pose errors.
 77 |     targets_total = torch.zeros(6)
 78 |     targets_max = torch.zeros(6)
 79 |     batch_size = -1
 80 |     num_batches, num_examples = 0, 0
 81 | 
 82 |     with torch.no_grad():
 83 |         for images, disparities, ids, pose_se3, pose_log in data_loader:
 84 | 
 85 |             batch_size = images.size(0)
 86 | 
 87 |             try:
 88 |                 # Compute the output poses (we use -1 a placeholder as epoch is not relevant).
 89 |                 output_se3 = pipeline.forward(net, images, disparities, pose_se3, pose_log, epoch=-1, test=True)
 90 | 
 91 |             except Exception as e:
 92 |                 print(e)
 93 |                 print("Ids: {}".format(ids))
 94 |                 continue
 95 | 
 96 |             num_batches += 1
 97 |             num_examples += batch_size
 98 | 
 99 |             # Get the error in each of the 6 pose DOF.
100 |             diff = se3_log(output_se3.inverse().bmm(pose_se3.cuda())).detach().cpu().numpy()
101 |             if num_batches == 1:
102 |                 errors = diff
103 |             else:
104 |                 errors = np.concatenate((errors, diff), axis=0)
105 | 
106 |             targets_total += torch.sum(torch.abs(pose_log), dim=0).detach().cpu()
107 |             targets_max = torch.max(targets_max, torch.max(torch.abs(pose_log.detach().cpu()), dim=0)[0])
108 | 
109 |             # Collect the poses so that we can plot the results later.
110 |             output_log_np = se3_log(output_se3).detach().cpu().numpy()    # Bx6
111 |             target_log_np = se3_log(pose_se3).detach().cpu().numpy()
112 |             output_se3_np = output_se3.detach().cpu().numpy()              # Bx4x4
113 |             target_se3_np = pose_se3.detach().cpu().numpy()
114 |             # kpt_inliers = torch.sum(saved_data['valid_inliers'][(0, 1)], dim=2).detach().cpu().numpy()  # B x 1
115 | 
116 |             # Loop over each datas sample in the batch.
117 |             for k in range(output_se3_np.shape[0]):
118 |                 sample_id = ids[k]
119 |                 _, live_run_id, _, map_run_id, _ = re.findall('\w+', sample_id)
120 | 
121 |                 stats.add_sample_id(live_run_id, sample_id)
122 |                 stats.add_live_run_id(live_run_id)
123 |                 stats.set_map_run_id(map_run_id)
124 |                 stats.add_outputs_targets_se3(live_run_id, output_se3_np[k, :, :], target_se3_np[k, :, :])
125 |                 stats.add_outputs_targets_log(live_run_id, output_log_np[k, :], target_log_np[k, :])
126 |                 # stats.add_kpt_inliers(live_run_id, kpt_inliers[k, 0])
127 | 
128 |     # Compute errors.
129 |     # RMSE for each pose DOF.
130 |     errors[:, 3:6] = np.rad2deg(errors[:, 3:6])
131 |     test_errors = np.sqrt(np.mean(errors ** 2, axis=0)).reshape(1, 6)
132 | 
133 |     targets_avg = (targets_total * (1.0 / num_examples)).detach().cpu().numpy()
134 |     targets_avg[3:6] = np.rad2deg(targets_avg[3:6])
135 |     targets_max = targets_max.detach().cpu().numpy()
136 |     targets_max[3:6] = np.rad2deg(targets_max[3:6])
137 | 
138 |     print(f'\nMap run id: {stats.get_map_run_id()}')
139 |     print(f'Live run ids: {stats.get_live_run_ids()}')
140 |     print(f'Pose RMSE by DOF: {test_errors[:, np.array(dof)]}')
141 |     print(f'Average pose targets by DOF: {targets_avg[np.array(dof)]}')
142 |     print(f'Max pose targets by DOF: {targets_max[np.array(dof)]}')
143 | 
144 |     print(f'Path test duration: {time.time() - start_time} seconds.\n')
145 | 
146 |     return stats
147 | 
148 | def test(pipeline, net, test_loaders, results_path):
149 |     """
150 |         Test the model by localizing different runs to each other from one or more paths. Print the pose errors and
151 |         losses and plot the target and estimated poses for each DOF for each test.
152 | 
153 |         Args:
154 |             pipeline (Pipeline): the training pipeline object that performs a forward pass of the training pipeline and
155 |                                  computes pose and losses.
156 |             net (torch.nn.Module): neural network module.
157 |             test_loaders (dict): maps path names to data loaders for test data.
158 |             results_path (string): directory for storing results (outputs, plots).
159 |     """
160 |     # Helper class for plotting results.
161 |     plotting = Plotting(results_path)
162 | 
163 |     # Find which of the DOF of the 6 DOF pose we want to learn. Record the indices to keep in the pose vector.
164 |     dof = [0, 1, 5] if 'pose_plane' in config['loss']['types'] else [0, 1, 2, 3, 4, 5]
165 | 
166 |     # The data loaders are stored in a dict that maps from path name to a list of data loaders as we may test using
167 |     # data from more than one path. Also, for each path we localize different runs against each other. One run is used
168 |     # as the map and then one or more other runs are localized to this map run. One individual data loader contains all
169 |     # the samples for all live localized back to one map run.
170 |     for path_name in test_loaders.keys():
171 | 
172 |         print(f'\nTesting path: {path_name}')
173 | 
174 |         start_time = time.time()
175 | 
176 |         # Loop over each data loader (one data loader per map run we localize to).
177 |         for i in range(len(test_loaders[path_name])):
178 | 
179 |             path_stats = test_model(pipeline, net, test_loaders[path_name][i], dof)
180 | 
181 |             outputs_log = path_stats.get_outputs_log()
182 |             targets_log = path_stats.get_targets_log()
183 |             map_run_id = path_stats.get_map_run_id()
184 |             live_run_ids = path_stats.get_live_run_ids()
185 |             sample_ids = path_stats.get_sample_ids()
186 | 
187 |             # Plot each DOF of the estimated and target poses for each pair of live run localized to map run.
188 |             plotting.plot_outputs(outputs_log, targets_log, path_name, map_run_id, dof)
189 | 
190 |             # Compute the RMSE for translation and rotation if we are using all 6 DOF.
191 |             # TODO: provide the same for SE(2).
192 |             if len(dof) == 6:
193 |                 outputs_se3 = path_stats.get_outputs_se3()
194 |                 targets_se3 = path_stats.get_targets_se3()
195 |                 rmse(outputs_se3, targets_se3)
196 | 
197 |         print(f'Test time for path {path_name}: {time.time() - start_time}')
198 | 
199 | def main(config):
200 |     """
201 |         Set up everything needed for training and call the function to run the training loop.
202 | 
203 |         Args:
204 |             config (dict): configurations for training the network.
205 |     """
206 |     results_path = f"{config['home_path']}/results/{config['experiment_name']}/"
207 |     checkpoints_path = f"{config['home_path']}/networks"
208 |     data_path = f"{config['home_path']}/data"
209 |     datasets_path = f"{config['home_path']}/datasets"
210 | 
211 |     checkpoint_name = config['checkpoint_name']
212 |     dataset_name = config['dataset_name']
213 | 
214 |     dataset_path = f'{datasets_path}/{dataset_name}.pickle'
215 |     checkpoint_path = f'{checkpoints_path}/{checkpoint_name}.pth'
216 | 
217 |     if not os.path.exists(results_path):
218 |         os.makedirs(results_path)
219 | 
220 |     # Print outputs to files
221 |     orig_stdout = sys.stdout
222 |     fl = open(f'{results_path}out_test.txt', 'w')
223 |     sys.stdout = fl
224 |     orig_stderr = sys.stderr
225 |     fe = open(f'{results_path}err_test.txt', 'w')
226 |     sys.stderr = fe
227 | 
228 |     # Record the config settings.
229 |     print(config)
230 | 
231 |     # Load the data.
232 |     dataloader_params = config['data_loader']
233 |     dataset_params = config['dataset']
234 |     dataset_params['data_dir'] = data_path
235 | 
236 |     localization_data = None
237 |     with open(dataset_path, 'rb') as handle:
238 |         localization_data = pickle.load(handle)
239 | 
240 |     # The localization testing may contain data from different paths.
241 |     path_loaders = {}
242 |     for path_name in localization_data.paths:
243 | 
244 |         # We localize a set of runs against each other. One run is used as thep map and several other runs are used as
245 |         # 'live' runs and localized to the map run. We create one data loader for each map run. Hence, the loader will
246 |         # hold all data samples of localizing each live run to the map run.
247 |         num_map_runs = len(localization_data.test_ids[path_name]) # The number of data loaders we will need.
248 |         path_loaders[path_name] = []
249 | 
250 |         index = 0
251 |         while index < num_map_runs:
252 | 
253 |             # Check that the dataset is not empty before adding.
254 |             if len(localization_data.test_ids[path_name][index]) > 0:
255 | 
256 |                 test_set = Dataset(**dataset_params)
257 |                 test_set.load_mel_data(localization_data, 'testing', path_name, index)
258 |                 test_loader = data.DataLoader(test_set, **dataloader_params)
259 |                 path_loaders[path_name] += [test_loader]
260 | 
261 |             index += 1
262 | 
263 |     # Set up device, using GPU 0.
264 |     device = torch.device('cuda:{}'.format(0) if torch.cuda.device_count() > 0 else 'cpu')
265 |     torch.cuda.set_device(0)
266 | 
267 |     # Set training pipeline.
268 |     testing_pipeline = Pipeline(config)
269 |     testing_pipeline = testing_pipeline.cuda()
270 | 
271 |     # Set up the network.
272 |     net = UNet(config['network']['num_channels'],
273 |                config['network']['num_classes'],
274 |                config['network']['layer_size'])
275 | 
276 |     # Load the network weights from a checkpoint.
277 |     if os.path.exists(checkpoint_path):
278 |         checkpoint = torch.load(checkpoint_path)
279 |         net.load_state_dict(checkpoint['model_state_dict'])
280 |     else:
281 |         raise RuntimeError(f'The specified checkpoint path does not exists: {checkpoint_path}')
282 | 
283 |     net.cuda()
284 | 
285 |     test(testing_pipeline, net, path_loaders, results_path)
286 | 
287 |     # Stop writing outputs to file.
288 |     sys.stdout = orig_stdout
289 |     fl.close()
290 |     sys.stderr = orig_stderr
291 |     fe.close()
292 | 
293 | if __name__ == '__main__':
294 |     parser = argparse.ArgumentParser()
295 |     parser.add_argument('--config', default=None, type=str,
296 |                       help='config file path (default: None)')
297 | 
298 |     args = parser.parse_args()
299 | 
300 |     with open(args.config) as f:
301 |         config = json.load(f)
302 | 
303 |     main(config)


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import os
  4 | import pickle
  5 | import sys
  6 | import time
  7 | 
  8 | import numpy as np
  9 | import torch
 10 | import torch.backends.cudnn as cudnn
 11 | import torch.nn as nn
 12 | import torch.optim as optim
 13 | from torch.utils import data
 14 | from torch.utils.data.sampler import RandomSampler
 15 | 
 16 | from src.dataset import Dataset
 17 | from src.model.pipeline import Pipeline
 18 | from src.model.unet import UNet
 19 | from src.utils.early_stopping import EarlyStopping
 20 | from src.utils.lie_algebra import se3_log
 21 | from src.utils.statistics import Statistics
 22 | from visualization.plots import Plotting
 23 | 
 24 | torch.backends.cudnn.benchmark = True
 25 | 
 26 | def execute_epoch(pipeline, net, data_loader, stats, epoch, mode, config, dof, optimizer=None, scheduler=None):
 27 |     """
 28 |         Train with data for one epoch.
 29 | 
 30 |         Args:
 31 |             pipeline (Pipeline): the training pipeline object that performs a forward pass of the training pipeline and
 32 |                                  computes pose and losses.
 33 |             net (torch.nn.Module): neural network module.
 34 |             data_loader (torch.utils.data.DataLoader): data loader for training data.
 35 |             stats (Statistics): an object for keeping track of losses.
 36 |             epoch (int): current epoch.
 37 |             mode (string): 'training' or 'validation'
 38 |             config (dict): parameter configurations.
 39 |             dof (list[int]): indices of the pose DOF (out of the 6 DOF) that we want to learn.
 40 |             optimizer (torch.optim.Optimizer or None): optimizer if training or None if validation.
 41 |             scheduler(torch.optim.lr_scheduler or None): scheduler if training or None if validation.
 42 | 
 43 |         Returns:
 44 |             avg_loss (float): the average loss for the epoch computed over all batches. The average for the weighted
 45 |                               sum of all loss types.
 46 |             stats (Statistics): object with recorded losses for all processed data samples.
 47 |     """
 48 |     start_time = time.time()
 49 | 
 50 |     if mode == 'training':
 51 |         net.train()
 52 |     else:
 53 |         net.eval()
 54 | 
 55 |     epoch_losses = {}
 56 |     errors = None
 57 |     # We want to print out the avg./max target pose absolute values for reference together with the pose errors.
 58 |     targets_total = torch.zeros(6)
 59 |     targets_max = torch.zeros(6)
 60 |     batch_size = -1
 61 |     num_batches, num_examples = 0, 0
 62 | 
 63 |     with torch.set_grad_enabled(mode == 'training'):
 64 |         for images, disparities, ids, pose_se3, pose_log in data_loader:
 65 | 
 66 |             losses = {}
 67 |             batch_size = images.size(0)
 68 |             if mode == 'training':
 69 |                 optimizer.zero_grad()
 70 | 
 71 |             try:
 72 |                 # Compute the loss and the output pose.
 73 |                 losses, output_se3 = pipeline.forward(net, images, disparities, pose_se3, pose_log, epoch)
 74 | 
 75 |                 if mode == 'training':
 76 |                     losses['total'].backward()
 77 | 
 78 |             except Exception as e:
 79 |                 print(e)
 80 |                 print("Ids: {}".format(ids))
 81 |                 continue
 82 | 
 83 |             if mode == 'training':
 84 |                 torch.nn.utils.clip_grad_norm(net.parameters(), max_norm=2.0, norm_type=2)
 85 |                 optimizer.step()
 86 | 
 87 |             num_batches += 1
 88 |             num_examples += batch_size
 89 | 
 90 |             # Get the error in each of the 6 pose DOF.
 91 |             diff = se3_log(output_se3.inverse().bmm(pose_se3.cuda())).detach().cpu().numpy()
 92 |             if num_batches == 1:
 93 |                 errors = diff
 94 |             else:
 95 |                 errors = np.concatenate((errors, diff), axis=0)
 96 | 
 97 |             # Save the losses during the epoch.
 98 |             for loss_type in losses.keys():
 99 |                 if loss_type in epoch_losses:
100 |                     epoch_losses[loss_type] += losses[loss_type].item()
101 |                 else:
102 |                     epoch_losses[loss_type] = losses[loss_type].item()
103 | 
104 |             targets_total += torch.sum(torch.abs(pose_log), dim=0).detach().cpu()
105 |             targets_max = torch.max(targets_max, torch.max(torch.abs(pose_log.detach().cpu()), dim=0)[0])
106 | 
107 |     # Compute and print the statistics for the finished epoch.
108 |     print(f'{mode}, epoch {epoch}')
109 | 
110 |     # Compute the average training losses.
111 |     for loss_type in epoch_losses.keys():
112 |         epoch_losses[loss_type] = epoch_losses[loss_type] / float(num_batches)
113 |         print(f'Average {mode} loss, {loss_type}: {epoch_losses[loss_type]:.6f}')
114 | 
115 |     # Only store and print pose errors if we have started computing pose (and are not just computing keypoint loss).
116 |     if epoch >= config['training']['start_pose_estimation']:
117 | 
118 |         # RMSE for each pose DOF.
119 |         errors[:, 3:6] = np.rad2deg(errors[:, 3:6])
120 |         epoch_errors = np.sqrt(np.mean(errors ** 2, axis=0)).reshape(1, 6)
121 | 
122 |         stats.add_epoch_stats(epoch_losses, epoch_errors)
123 | 
124 |         targets_avg = (targets_total * (1.0 / num_examples)).detach().cpu().numpy()
125 |         targets_avg[3:6] = np.rad2deg(targets_avg[3:6])
126 |         targets_max = targets_max.detach().cpu().numpy()
127 |         targets_max[3:6] = np.rad2deg(targets_max[3:6])
128 | 
129 |         # Only print the results for the relevant DOF that we are learning.
130 |         print(f'Pose RMSE by DOF: {epoch_errors[:, np.array(dof)]}')
131 |         print(f'Average pose targets by DOF: {targets_avg[np.array(dof)]}')
132 |         print(f'Max pose targets by DOF: {targets_max[np.array(dof)]}')
133 | 
134 |     print(f'Epoch duration: {time.time() - start_time} seconds. \n')
135 | 
136 |     # Run if we are training, and we are using a scheduler.
137 |     if mode == 'training':
138 |         scheduler.step()
139 | 
140 |     return epoch_losses['total'], stats
141 | 
142 | 
143 | def plot_epoch_statistics(train_stats, validation_stats, plotting, dof):
144 |     """
145 |         Plot the losses from training and validation over epochs.
146 | 
147 |         Args:
148 |             train_stats (Statistics): an object for keeping track of losses and errors for the training data.
149 |             validation_stats (Statistics): an object for keeping track of losses and errors for the validation data.
150 |             plotting (Plotting): an object to handle plotting.
151 |             dof (list[int]): indices of the DOF to plot.
152 |     """
153 |     train_epoch_stats = train_stats.get_epoch_stats()
154 |     valid_epoch_stats = validation_stats.get_epoch_stats()
155 | 
156 |     plotting.plot_epoch_losses(train_epoch_stats[0], valid_epoch_stats[0])
157 |     plotting.plot_epoch_errors(train_epoch_stats[1], valid_epoch_stats[1], dof)
158 | 
159 | def train(pipeline, net, optimizer, scheduler, train_loader, validation_loader, start_epoch, min_validation_loss,
160 |           train_stats, validation_stats, config, results_path, checkpoint_path):
161 |     """
162 |         Run the training loop.
163 | 
164 |         Args:
165 |             pipeline (Pipeline): the training pipeline object that performs a forward pass of the training pipeline and
166 |                                  computes pose and losses.
167 |             net (torch.nn.Module): neural network module.
168 |             optimizer (torch.optim.Optimizer): training optimizer.
169 |             scheduler (torch.optim.lr_scheduler): training scheduler.
170 |             train_loader (torch.utils.data.DataLoader): data loader for training data.
171 |             validation_loader (torch.utils.data.DataLoader): data loader for validation data.
172 |             start_epoch (int): epoch we start training from, 0 if starting from scratch.
173 |             min_validation_loss (float): the minimum validation loss during training.
174 |             train_stats (Statistics): an object for keeping track of losses for the training data.
175 |             validation_stats (Statistics): an object for keeping track of losses for the validation data.
176 |             config (dict): configurations for model training.
177 |             results_path (string): directory for storing results (outputs, plots).
178 |             checkpoint_path (string): file path to store the checkpoint.
179 |     """
180 |     # Helper class for plotting results.
181 |     plotting = Plotting(results_path)
182 | 
183 |     # Early stopping keeps track of when we stop training (validation loss does not decrease) and when we store a
184 |     # checkpoint (each time validation loss decreases below or is equal to the minimum value).
185 |     last_save_epoch = start_epoch - 1 if start_epoch > 0 else 0
186 |     early_stopping = EarlyStopping(config['training']['patience'], min_validation_loss, last_save_epoch)
187 | 
188 |     # Find which of the DOF of the 6 DOF pose we want to learn. Record the indices to keep in the pose vector.
189 |     dof = [0, 1, 5] if 'pose_plane' in config['loss']['types'] else [0, 1, 2, 3, 4, 5]
190 | 
191 |     # The training loop.
192 |     for epoch in range(start_epoch, config['training']['max_epochs']):
193 | 
194 |         train_loss, train_stats = execute_epoch(pipeline,
195 |                                                 net,
196 |                                                 train_loader,
197 |                                                 train_stats,
198 |                                                 epoch,
199 |                                                 'training',
200 |                                                 config,
201 |                                                 dof,
202 |                                                 optimizer,
203 |                                                 scheduler)
204 | 
205 |         validation_loss, validation_stats = execute_epoch(pipeline,
206 |                                                           net,
207 |                                                           validation_loader,
208 |                                                           validation_stats,
209 |                                                           epoch,
210 |                                                           'validation',
211 |                                                           config,
212 |                                                           dof)
213 | 
214 |         # Only start checking the loss after we start computing the pose loss. Sometimes we only compute keypoint loss
215 |         # for the first few epochs.
216 |         if epoch >= config['training']['start_pose_estimation']:
217 | 
218 |             # Plot the losses during training and validations.
219 |             plot_epoch_statistics(train_stats, validation_stats, plotting, dof)
220 | 
221 |             stop, min_validation_loss = early_stopping.check_stop(validation_loss,
222 |                                                                   net,
223 |                                                                   optimizer,
224 |                                                                   checkpoint_path,
225 |                                                                   train_stats,
226 |                                                                   validation_stats,
227 |                                                                   epoch)
228 | 
229 |             # Stop the training loop if we have exceeded the patience.
230 |             if stop:
231 |                 break
232 | 
233 | 
234 | def main(config):
235 |     """
236 |         Set up everything needed for training and call the function to run the training loop.
237 | 
238 |         Args:
239 |             config (dict): configurations for training the network.
240 |     """
241 |     results_path = f"{config['home_path']}/results/{config['experiment_name']}/"
242 |     checkpoints_path = f"{config['home_path']}/networks"
243 |     data_path = f"{config['home_path']}/data"
244 |     datasets_path = f"{config['home_path']}/datasets"
245 | 
246 |     checkpoint_name = config['checkpoint_name']
247 |     dataset_name = config['dataset_name']
248 | 
249 |     dataset_path = f'{datasets_path}/{dataset_name}.pickle'
250 |     checkpoint_path = f'{checkpoints_path}/{checkpoint_name}'
251 | 
252 |     if not os.path.exists(results_path):
253 |         os.makedirs(results_path)
254 | 
255 |     if not os.path.exists(checkpoints_path):
256 |         os.makedirs(checkpoint_path)
257 | 
258 |     # Print outputs to files
259 |     orig_stdout = sys.stdout
260 |     fl = open(f'{results_path}out_train.txt', 'w')
261 |     sys.stdout = fl
262 |     orig_stderr = sys.stderr
263 |     fe = open(f'{results_path}err_train.txt', 'w')
264 |     sys.stderr = fe
265 | 
266 |     # Record the config settings.
267 |     print(f'\nTraining parameter configurations: \n{config}\n')
268 | 
269 |     # Load the data.
270 |     dataloader_params = config['data_loader']
271 |     dataset_params = config['dataset']
272 |     dataset_params['data_dir'] = data_path
273 | 
274 |     localization_data = None
275 |     with open(dataset_path, 'rb') as handle:
276 |         localization_data = pickle.load(handle)
277 | 
278 |     # Training data generator (randomly sample a subset of the full dataset for each epoch).
279 |     train_set = Dataset(**dataset_params)
280 |     train_sampler = RandomSampler(train_set, replacement=True, num_samples=10000)
281 |     train_set.load_mel_data(localization_data, 'training')
282 |     train_loader = data.DataLoader(train_set, sampler=train_sampler, **dataloader_params)
283 | 
284 |     # Validation data generator (randomly sample a subset of the full dataset for each epoch).
285 |     validation_set = Dataset(**dataset_params)
286 |     validation_sampler = RandomSampler(validation_set, replacement=True, num_samples=2500)
287 |     validation_set.load_mel_data(localization_data, 'validation')
288 |     validation_loader = data.DataLoader(validation_set, sampler=validation_sampler, **dataloader_params)
289 | 
290 |     # Set up device, using GPU 0
291 |     device = torch.device('cuda:{}'.format(0) if torch.cuda.device_count() > 0 else 'cpu')
292 |     torch.cuda.set_device(0)
293 | 
294 |     # Set training pipeline
295 |     training_pipeline = Pipeline(config)
296 |     training_pipeline = training_pipeline.cuda()
297 | 
298 |     # Set up the network, optimizer, and scheduler
299 |     net = UNet(config['network']['num_channels'],
300 |                config['network']['num_classes'],
301 |                config['network']['layer_size'])
302 | 
303 |     optimizer = torch.optim.Adam(net.parameters(), lr=config['optimizer']['lr'])
304 |     scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
305 |                                                 step_size=config['scheduler']['step_size'],
306 |                                                 gamma=config['scheduler']['gamma'])
307 | 
308 |     # Variables for keeping track of training progress.
309 |     start_epoch = 0
310 |     min_validation_loss = 10e9
311 |     train_stats = Statistics()
312 |     validation_stats = Statistics()
313 | 
314 |     # Resume training from checkpoint if available
315 |     if os.path.exists(f'{checkpoint_path}.pth'):
316 |         checkpoint = torch.load(f'{checkpoint_path}.pth')
317 |         net.load_state_dict(checkpoint['model_state_dict'])
318 | 
319 |         if 'optimizer_state_dict' in checkpoint.keys():
320 |             optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
321 | 
322 |             for state in optimizer.state.values():
323 |                 for k, v in state.items():
324 |                     if isinstance(v, torch.Tensor):
325 |                         state[k] = v.cuda()
326 | 
327 |         start_epoch = checkpoint['epoch'] + 1 if 'epoch' in checkpoint.keys() else 0
328 |         min_validation_loss = checkpoint['val_loss'] if 'val_loss' in checkpoint.keys() else 10e9
329 |         train_stats = checkpoint['train_stats'] if 'train_stats' in checkpoint.keys() else Statistics()
330 |         validation_stats = checkpoint['valid_stats'] if 'valid_stats' in checkpoint.keys() else Statistics()
331 | 
332 |     net.cuda()
333 | 
334 |     train(training_pipeline, net, optimizer, scheduler, train_loader, validation_loader, start_epoch,
335 |           min_validation_loss, train_stats, validation_stats, config, results_path, checkpoint_path)
336 | 
337 |     # Stop writing outputs to file.
338 |     sys.stdout = orig_stdout
339 |     fl.close()
340 |     sys.stderr = orig_stderr
341 |     fe.close()
342 | 
343 | if __name__ == '__main__':
344 |     parser = argparse.ArgumentParser()
345 |     parser.add_argument('--config', default=None, type=str,
346 |                         help='config file path (default: None)')
347 | 
348 |     args = parser.parse_args()
349 | 
350 |     with open(args.config) as f:
351 |         config = json.load(f)
352 | 
353 |     main(config)


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/src/utils/__init__.py


--------------------------------------------------------------------------------
/src/utils/early_stopping.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | class EarlyStopping:
 5 |     """Based on https://github.com/Bjarten/early-stopping-pytorch/blob/master/pytorchtools.py
 6 |        Early stops the training if validation loss doesn't improve after a given patience.
 7 |     """
 8 |     def __init__(self, patience, loss_min, epoch_min):
 9 |         """
10 |         Initialize by setting the chosen patience and set the minimum loss.
11 | 
12 |         Args:
13 |             patience (int): How long to wait after last time validation loss improved.
14 |                             Default: 7
15 |         """
16 |         self.patience = patience
17 |         self.counter = 0
18 |         self.loss_min = loss_min
19 |         self.epoch_min = epoch_min
20 | 
21 |     def check_stop(self, loss, model, optimizer, checkpoint_path, train_stats, validation_stats, epoch):
22 |         """
23 |             Check if model training needs to stop based on the validation loss. Save the model of the validation loss
24 |             improves (or stays at minimum).
25 | 
26 |             Args:
27 |                 loss (float): validation loss.
28 |                 model (torch.nn.Module): neural network model.
29 |                 optimizer (torch.optim.Optimizer): training optimizer.
30 |                 checkpoint_path (string name): file path for saving the model.
31 |                 train_stats (float): validation loss. TODO
32 |                 validation_stats (float): validation loss. TODO
33 |                 epoch (int): training epoch.
34 |        """
35 | 
36 |         if loss <= self.loss_min:
37 | 
38 |             print(f'Validation loss decreased ({self.loss_min:.6f} --> {loss:.6f}). Saving model at epoch {epoch}.\n')
39 | 
40 |             self.counter = 0
41 |             self.loss_min = loss
42 |             self.epoch_min = epoch
43 | 
44 |             torch.save({'epoch': epoch,
45 |                         'model_state_dict': model.state_dict(),
46 |                         'optimizer_state_dict': optimizer.state_dict(),
47 |                         'loss': loss,
48 |                         'train_stats': train_stats,
49 |                         'valid_stats': validation_stats,
50 |                         }, '{}_{}.pth'.format(checkpoint_path, epoch)) # Add epoch so we don't overwrite existing file.
51 | 
52 |         elif loss > self.loss_min:
53 | 
54 |             self.counter += 1
55 | 
56 |             print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
57 |             print(f'Current validation loss: {loss:.10f}, min validation loss: {self.loss_min:.10f}\n')
58 | 
59 |             if self.counter >= self.patience:
60 |                 print('Early stopping: final model saved at epoch {}'.format(self.epoch_min))
61 |                 return True, self.loss_min
62 | 
63 |         return False, self.loss_min


--------------------------------------------------------------------------------
/src/utils/keypoint_tools.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | def normalize_coords(coords_2D, batch_size, height, width):
  6 |     """
  7 |         Normalize 2D keypoint coordinates to lie in the range [-1, 1].
  8 | 
  9 |         Args:
 10 |             coords_2D (torch.tensor): 2D image coordinates store in order (u, v), (Bx2xN).
 11 |             batch_size (int): batch size.
 12 |             height (int): image height.
 13 |             width (int): image width.
 14 | 
 15 |         Returns:
 16 |             coords_2D_norm (torch.tensor): coordinates normalized to range [-1, 1] and stored in order (u, v), (BxNx2).
 17 |      """
 18 |     u_norm = (2 * coords_2D[:, 0, :].reshape(batch_size, -1) / (width - 1)) - 1
 19 |     v_norm = (2 * coords_2D[:, 1, :].reshape(batch_size, -1) / (height - 1)) - 1
 20 | 
 21 |     return torch.stack([u_norm, v_norm], dim=2)
 22 | 
 23 | def get_norm_descriptors(descriptor_map, sample=False, keypoints_norm=None):
 24 |     """
 25 |         Normalize the descriptors.
 26 | 
 27 |         Args:
 28 |             descriptor_map (torch.tensor): the descriptor map, either dense with one descriptor for each image pixel,
 29 |             BxCxHxW, or sparse with one descriptor for each keypoint, BxCxN.
 30 |             sample (bool): whether to sample descriptors for keypoints in the image or use descriptors for each
 31 |                            pixel.
 32 |             keypoints_norm (torch.tensor or None): normalized keypoint coordinates if we are sampling, otherwise None.
 33 | 
 34 |         Returns:
 35 |             descriptors_norm (torch.tensor): the normalized descriptors, (BxCx(N or HW))
 36 |     """
 37 | 
 38 |     if len(descriptor_map.size()) == 4:
 39 |         # The descriptor map has dense descriptors, one for each pixel.
 40 |         batch_size, channels, height, width = descriptor_map.size()
 41 | 
 42 |         if sample:
 43 |             # Sample descriptors for the given keypoints.
 44 |             descriptors = F.grid_sample(descriptor_map, keypoints_norm, mode='bilinear')     # BxCx1xN
 45 |             descriptors = descriptors.reshape(batch_size, channels, keypoints_norm.size(2))  # BxCxN
 46 |         else:
 47 |             descriptors = descriptor_map.reshape(batch_size, channels, height * width)  # BxCxHW
 48 |     else:
 49 |         # The descriptor map has sparse descriptors, one for each keypoint.
 50 |         descriptors = descriptor_map
 51 | 
 52 |     descriptors_mean = torch.mean(descriptors, dim=1, keepdim=True)           # Bx1x(N or HW)
 53 |     descriptors_std = torch.std(descriptors, dim=1, keepdim=True)             # Bx1x(N or HW)
 54 |     descriptors_norm = (descriptors - descriptors_mean) / descriptors_std     # BxCx(N or HW)
 55 | 
 56 |     return descriptors_norm
 57 | 
 58 | def get_scores(scores_map, keypoints_norm):
 59 |     """
 60 |         Sample scores for keypoints.
 61 | 
 62 |         Args:
 63 |             scores_map (torch.tensor): the scores for each pixel in the image, (Bx1xHxW).
 64 |             keypoints_norm (torch.tensor): normalized keypoint coordinates for sampling, (BxNx2).
 65 | 
 66 |         Returns:
 67 |             kpt_scores (torch.tensor): a score for each keypoint, (Bx1xN).
 68 |     """
 69 |     batch_size, _, num_points, _ = keypoints_norm.size()
 70 | 
 71 |     kpt_scores = F.grid_sample(scores_map, keypoints_norm, mode='bilinear')  # Bx1x1xN
 72 |     kpt_scores = kpt_scores.reshape(batch_size, 1, num_points)
 73 | 
 74 |     return kpt_scores
 75 | 
 76 | def get_keypoint_info(kpt_2D, scores_map, descriptors_map, disparity, stereo_cam):
 77 |     """
 78 |         Gather information we need associated with each detected keypoint. Compute the 3D point coordinates, determine
 79 |         which keypoints have valid 3D coordinates, get the normalized descriptor for each keypoint, and the score for
 80 |         each keypoint.
 81 | 
 82 |         Args:
 83 |             kpt_2D (torch.tensor): keypoint 2D image coordinates, (Bx2xN).
 84 |             scores_map (torch.tensor): scores for each pixel, (Bx1xHxW).
 85 |             descriptors_map (torch.tensor): descriptors for each pixel, (BxCxHxW).
 86 |             disparity (torch.tensor): disparity for one stereo pair, (BxHxW).
 87 |             stereo_cam (StereoCameraModel): stereo camera model.
 88 | 
 89 |         Returns:
 90 |             kpt_3D (torch.tensor): keypoint 3D coordinates in the sensor frame (left camera frame) given in
 91 |                                    homogeneous coordinates, (Bx4xN).
 92 |             valid (torch.tensor): 1 if the keypoint 3D coordinate is valid (i.e. falls in the accepted depth range) and
 93 |                                   0 otherwise, (Bx1xN).
 94 |             kpt_desc_norm (torch.tensor): Normalized descriptor for each keypoint, (BxCxN).
 95 |             kpt_scores (torch.tensor): score for each keypoint, (Bx1xN).
 96 | 
 97 |     """
 98 |     batch_size, _, height, width = scores_map.size()
 99 | 
100 |     kpt_2D_norm = normalize_coords(kpt_2D, batch_size, height, width).unsqueeze(1)  # Bx1xNx2
101 | 
102 |     kpt_desc_norm = get_norm_descriptors(descriptors_map, True, kpt_2D_norm)
103 | 
104 |     kpt_scores = get_scores(scores_map, kpt_2D_norm)
105 | 
106 |     kpt_3D, valid = stereo_cam.inverse_camera_model(kpt_2D, disparity)
107 | 
108 |     return kpt_3D, valid, kpt_desc_norm, kpt_scores


--------------------------------------------------------------------------------
/src/utils/lie_algebra.py:
--------------------------------------------------------------------------------
  1 | """Code is copied from https://github.com/utiasSTARS/dpc-net/blob/master/lie_algebra.py with some adjustments made."""
  2 | 
  3 | import torch
  4 | global EPS
  5 | EPS=1e-6 # Was 1e-12 in original code
  6 | 
  7 | def so3_wedge(phi):
  8 |     #Returns Nx3x3 tensor with each 1x3 row vector in phi wedge'd
  9 | 
 10 |     if phi.dim() < 2:
 11 |         phi = phi.unsqueeze(0)
 12 | 
 13 |     Phi = phi.new(phi.size(0), 3, 3).zero_()
 14 | 
 15 |     Phi[:, 0, 1] = -phi[:, 2]
 16 |     Phi[:, 1, 0] = phi[:, 2]
 17 |     Phi[:, 0, 2] = phi[:, 1]
 18 |     Phi[:, 2, 0] = -phi[:, 1]
 19 |     Phi[:, 1, 2] = -phi[:, 0]
 20 |     Phi[:, 2, 1] = phi[:, 0]
 21 |     return Phi
 22 | 
 23 | def so3_vee(Phi):
 24 |     #Returns Nx3 tensor with each 3x3 lie algebra element converted to a 1x3 coordinate vector
 25 |     phi = Phi.new(Phi.size(0), 3).zero_()
 26 |     phi[:, 0] = Phi[:, 2, 1]
 27 |     phi[:, 1] = Phi[:, 0, 2]
 28 |     phi[:, 2] = Phi[:, 1, 0]
 29 |     return phi
 30 | 
 31 | def batch_trace(R):
 32 |     #Takes in Nx3x3, computes trace of each 3x3 matrix, outputs Nx1 vector with traces
 33 |     I = R.new(3,3).zero_()
 34 |     I[0,0] = I[1,1] = I[2,2] = 1.0
 35 |     return (R*I.expand_as(R)).sum(1, keepdim=True).sum(2, keepdim=True).view(R.size(0),-1)
 36 | 
 37 | def batch_outer_prod(vecs):
 38 |     #Input: NxD vectors
 39 |     #Output: NxDxD outer products
 40 |     N = vecs.size(0)
 41 |     D = vecs.size(1)
 42 |     return vecs.unsqueeze(2).expand(N,D,D)*vecs.unsqueeze(1).expand(N,D,D)
 43 | 
 44 | def so3_log(R):
 45 |     # input: R 64x3x3
 46 |     # output: log(R) 64x3
 47 | 
 48 |     batch_size = R.size(0)
 49 | 
 50 |     # The rotation axis (not unit-length) is given by
 51 |     axes = R.new(batch_size, 3).zero_()
 52 | 
 53 |     axes[:, 0] = R[:, 2, 1] - R[:, 1, 2]
 54 |     axes[:, 1] = R[:, 0, 2] - R[:, 2, 0]
 55 |     axes[:, 2] = R[:, 1, 0] - R[:, 0, 1]
 56 | 
 57 |     # NOTE: clamp ensures that we don't get any nan's due to out of range numerical errors
 58 |     angles = torch.acos((0.5 * batch_trace(R) - 0.5).clamp(-1 + EPS, 1 - EPS))
 59 |     # angles = torch.acos((0.5 * batch_trace(R) - 0.5).clamp(-1., 1.))
 60 |     sin_angles = torch.sin(angles)
 61 | 
 62 |     # If angle is close to zero, use first-order Taylor expansion
 63 |     small_angles_mask = angles.lt(EPS).view(-1)
 64 |     small_angles_num = small_angles_mask.sum()
 65 |     # This tensor is used to extract the 3x3 R's that correspond to small angles
 66 |     small_angles_indices = small_angles_mask.nonzero().squeeze()
 67 | 
 68 |     large_angles_mask = small_angles_mask.logical_not()
 69 |     large_angles_num = large_angles_mask.sum()
 70 |     large_angles_indices = large_angles_mask.nonzero().squeeze()
 71 | 
 72 |     logs = R.new_empty(batch_size, 3)
 73 | 
 74 |     if small_angles_num > 0:
 75 |         I = R.new(3, 3).zero_()
 76 |         I[0, 0] = I[1, 1] = I[2, 2] = 1.0
 77 |         I = I.expand(small_angles_num, 3, 3)  # I is now small_angles_numx3x3
 78 |         logs[small_angles_indices, :] = so3_vee(R[small_angles_indices] - I)
 79 | 
 80 |     if large_angles_num > 0:
 81 |         ax_sin = axes[large_angles_indices] / sin_angles[large_angles_indices].expand_as(axes[large_angles_indices])
 82 |         logs[large_angles_indices, :] = 0.5 * angles[large_angles_indices].expand_as(ax_sin) * ax_sin
 83 | 
 84 |     return logs
 85 | 
 86 | def so3_exp(phi):
 87 |     #input: phi Nx3
 88 |     #output: perturbation Nx3x3
 89 |     
 90 |     if phi.dim() < 2:
 91 |         phi = phi.unsqueeze(0)
 92 | 
 93 |     batch_size = phi.size(0)
 94 |     
 95 |     #Take the norms of each row
 96 |     angles = vec_norms(phi)
 97 | 
 98 |     I = phi.new(3, 3).zero_()
 99 |     I[0,0] = I[1,1] = I[2,2] = 1.0
100 |     I = I.expand(batch_size, 3,3) #I is now num_samplesx3x3
101 | 
102 |     # If angle is close to zero, use first-order Taylor expansion
103 |     small_angles_mask = angles.lt(EPS).view(-1)
104 |     small_angles_num = small_angles_mask.sum()
105 |     small_angles_indices = small_angles_mask.nonzero().squeeze()
106 | 
107 |     if small_angles_num == batch_size:
108 |         #Taylor expansion
109 |         I = I.expand(batch_size, 3,3) 
110 |         phi_w = so3_wedge(phi)
111 |         return I + phi_w
112 | 
113 |     axes = phi / angles.expand(batch_size, 3)
114 |     s = torch.sin(angles)
115 |     c = torch.cos(angles)
116 | 
117 |     #This piece of magic multiplies each 3x3 matrix by each element in c
118 | 
119 |     c = c.view(-1,1,1).expand_as(I)
120 |     s = s.view(-1,1,1).expand_as(I)
121 | 
122 |     outer_prod_axes = batch_outer_prod(axes)
123 |     R = c * I + (1 - c) * outer_prod_axes + s * so3_wedge(axes)
124 | 
125 |     if 0 < small_angles_num < batch_size:
126 |         I_small = I.expand(small_angles_num, 3,3)
127 |         phi_w = so3_wedge(phi[small_angles_indices])
128 |         small_exp = I + phi_w
129 |         R[small_angles_indices] = small_exp
130 |     
131 | 
132 |     return R
133 |     
134 | 
135 | def vec_norms(input):
136 |     #Takes Nx3 tensor of row vectors an outputs Nx1 tensor with 2-norms of each row
137 |     norms = input.pow(2).sum(dim=1, keepdim=True).sqrt()
138 |     return norms
139 | 
140 | def vec_square_norms(input):
141 |     #Takes Nx3 tensor of row vectors an outputs Nx1 tensor with squared 2-norms of each row
142 |     sq_norms = input.pow(2).sum(dim=1, keepdim=True)
143 |     return sq_norms
144 | 
145 | def so3_inv_left_jacobian(phi):
146 |     """Inverse left SO(3) Jacobian (see Barfoot).
147 |     """
148 |     if phi.dim() < 2:
149 |         phi = phi.unsqueeze(0)
150 | 
151 |     invJ = phi.new_empty(phi.shape[0], 3, 3)
152 | 
153 |     angles = vec_norms(phi)
154 |     I = phi.new(3, 3).zero_()
155 |     I[0, 0] = I[1, 1] = I[2, 2] = 1.0
156 |     batch_size = phi.size(0)
157 | 
158 |     # If angle is close to zero, use first-order Taylor expansion
159 |     small_angles_mask = angles.lt(EPS).view(-1)
160 |     small_angles_num = small_angles_mask.sum()
161 |     small_angles_indices = small_angles_mask.nonzero().squeeze()
162 | 
163 |     large_angles_mask = small_angles_mask.logical_not()
164 |     large_angles_num = large_angles_mask.sum()
165 |     large_angles_indices = large_angles_mask.nonzero().squeeze()
166 | 
167 |     if small_angles_num > 0:
168 |         I_small = I.expand(small_angles_num, 3, 3)
169 |         small_invJ = I_small - 0.5 * so3_wedge(phi[small_angles_indices])
170 |         invJ[small_angles_indices] = small_invJ
171 | 
172 |     if large_angles_num > 0:
173 |         angles_large = angles[large_angles_indices]
174 |         if angles_large.dim() < 2:
175 |             angles_large  = angles_large.unsqueeze(0)
176 | 
177 |         phi_large = phi[large_angles_indices]
178 |         if phi_large.dim() < 2:
179 |             phi_large  = phi_large.unsqueeze(0)
180 | 
181 |         axes = phi_large / angles_large.expand(large_angles_num, 3)
182 |         half_angles = 0.5 * angles_large
183 |         cot_half_angles = 1. / torch.tan(half_angles)
184 |         I_full = I.expand(large_angles_num, 3, 3)  # I is now num_samplesx3x3
185 | 
186 |         # Compute outer products of the Nx3 axes vectors, and put them into a Nx3x3 tensor
187 |         outer_prod_axes = batch_outer_prod(axes)
188 | 
189 |         # This piece of magic changes the vector so that it can multiply each 3x3 matrix of a Nx3x3 tensor
190 |         h_a_cot_a = (half_angles * cot_half_angles).view(-1, 1, 1).expand_as(I_full)
191 |         h_a = half_angles.view(-1, 1, 1).expand_as(I_full)
192 |         invJ[large_angles_indices] = h_a_cot_a * I_full + (1 - h_a_cot_a) * outer_prod_axes - (h_a * so3_wedge(axes))
193 | 
194 |     return invJ
195 | 
196 | def so3_left_jacobian(phi):
197 |     """Inverse left SO(3) Jacobian (see Barfoot).
198 |     """
199 | 
200 |     angles = vec_norms(phi)
201 |     I = phi.new(3, 3).zero_()
202 |     I[0,0] = I[1,1] = I[2,2] = 1.0
203 |     batch_size = phi.size(0)
204 | 
205 |     # If angle is close to zero, use first-order Taylor expansion
206 |     small_angles_mask = angles.lt(EPS).view(-1)
207 |     small_angles_num = small_angles_mask.sum()
208 |     small_angles_indices = small_angles_mask.nonzero().squeeze()
209 | 
210 |     if small_angles_num == batch_size:
211 |         I = I.expand(batch_size, 3,3)
212 |         return I + 0.5*so3_wedge(phi)
213 |   
214 |     axes = phi / angles.expand(batch_size, 3)
215 |     #Compute outer products of the Nx3 axes vectors, and put them into a Nx3x3 tensor
216 |     outer_prod_axes = batch_outer_prod(axes)
217 | 
218 |     sin_ph = torch.sin(angles)
219 |     cos_ph = torch.cos(angles)
220 | 
221 |     I_full = I.expand(batch_size, 3,3)
222 |     t1 = sin_ph/angles
223 |     t2 = 1 - t1
224 |     t3 = (1 - cos_ph)/angles
225 | 
226 |     t1 = t1.view(-1,1,1).expand_as(I_full)
227 |     t2 = t2.view(-1,1,1).expand_as(I_full)
228 |     t3 = t3.view(-1,1,1).expand_as(I_full)
229 |     
230 |     J = t1 * I_full + t2 * outer_prod_axes + t3 * so3_wedge(axes)
231 | 
232 |     if 0 < small_angles_num < batch_size:
233 |         I_small = I.expand(small_angles_num, 3,3)
234 |         small_J = I_small + 0.5*so3_wedge(phi[small_angles_indices])
235 |         J[small_angles_indices] = small_J
236 | 
237 |     return J
238 | 
239 |     
240 | 
241 | def so3_to_rpy(rot):
242 |     """Convert a rotation matrix to RPY Euler angles."""
243 |     #Nx3x3 -> 3xN
244 | 
245 |     PI = 3.14159265358979323846
246 | 
247 |     pitch = torch.atan2(-rot[:, 2, 0],
248 |                         torch.sqrt(rot[:, 0, 0]**2 + rot[:, 1, 0]**2))
249 | 
250 |     sec_pitch = 1. / torch.cos(pitch)
251 |     yaw = torch.atan2(rot[:, 1, 0] * sec_pitch,
252 |                         rot[:, 0, 0] * sec_pitch)
253 |     roll = torch.atan2(rot[:, 2, 1] * sec_pitch,
254 |                         rot[:, 2, 2] * sec_pitch)
255 | 
256 |     pospi2_mask = torch.abs(pitch - PI/2.0).lt(EPS).view(-1)
257 |     pospi2_angles_num = pospi2_mask.sum()
258 |     pospi2_indices = pospi2_mask.nonzero().squeeze()
259 |     
260 |     negpi2_mask = torch.abs(pitch + PI/2.0).lt(EPS).view(-1)
261 |     negpi2_angles_num = negpi2_mask.sum()
262 |     negpi2_indices = negpi2_mask.nonzero().squeeze()
263 | 
264 |     if pospi2_angles_num > 0:
265 |         yaw[pospi2_indices] = 0.
266 |         roll[pospi2_indices] = torch.atan2(rot[pospi2_indices, 0, 1], rot[pospi2_indices, 1, 1])
267 | 
268 |     if negpi2_angles_num > 0:
269 |         yaw[pospi2_indices] = 0.
270 |         roll[pospi2_indices] = -torch.atan2(rot[pospi2_indices, 0, 1], rot[pospi2_indices, 1, 1])
271 |         
272 | 
273 |     return torch.cat((roll.view(-1,1), pitch.view(-1,1), yaw.view(-1,1)), 1)
274 | 
275 | def rpy_to_so3(rpy):
276 |     """Convert RPY Euler angles to a rotation matrix."""
277 |     #3xN -> Nx3x3
278 |     roll = rpy[:,0].view(-1,1,1)
279 |     pitch = rpy[:,1].view(-1,1,1)
280 |     yaw = rpy[:,2].view(-1,1,1)
281 |     
282 |     c_r = torch.cos(roll)
283 |     s_r = torch.sin(roll)
284 | 
285 |     c_p = torch.cos(pitch)
286 |     s_p = torch.sin(pitch)
287 | 
288 |     c_y = torch.cos(yaw)
289 |     s_y = torch.sin(yaw)
290 | 
291 |     rotz = rpy.new(rpy.size(0), 3, 3).zero_()
292 |     rotz[:,2,2] = 1.0
293 |     rotz[:,0,0] = rotz[:,1,1] = c_y
294 |     rotz[:,0,1] = -s_y
295 |     rotz[:,1,0] = s_y
296 | 
297 |     roty = rpy.new(rpy.size(0), 3, 3).zero_()
298 |     roty[:,1,1] = 1.0
299 |     roty[:,0,0] = roty[:,2,2] = c_p
300 |     roty[:,0,2] = s_p
301 |     roty[:,2,0] = -s_p
302 |     
303 |     rotx = rpy.new(rpy.size(0), 3, 3).zero_()
304 |     rotx[:,0,0] = 1.0
305 |     rotx[:,1,1] = rotz[:,2,2] = c_r
306 |     rotx[:,1,2] = -s_r
307 |     rotx[:,2,1] = s_r
308 |     
309 |     return rotz.bmm(roty.bmm(rotx))
310 | 
311 | #================================SE(3)====================================#
312 | def se3_wedge(xi):
313 | 
314 |     if xi.dim() < 2:
315 |         xi = xi.unsqueeze(0)
316 | 
317 |     #Returns Nx4x4 tensor with each 1x6 row vector in xi SE(3) wedge'd
318 |     Xi = xi.new(xi.size(0), 4, 4).zero_()
319 |     rho = xi[:, :3]
320 |     phi = xi[:, 3:]
321 |     Phi = so3_wedge(phi)
322 | 
323 |     Xi[:, :3, :3] = Phi
324 |     Xi[:, :3, 3] = rho
325 |     
326 |     return Xi
327 | 
328 | def se3_curly_wedge(xi):
329 |     #Returns Nx4x4 tensor with each 1x6 row vector in xi SE(3) curly wedge'd
330 |     Xi = xi.new(xi.size(0), 6, 6).zero_()
331 |     rho = xi[:, 0:3]
332 |     phi = xi[:, 3:6]
333 |     Phi = so3_wedge(phi)
334 |     Rho = so3_wedge(rho)
335 | 
336 |     Xi[:, 0:3, 0:3] = Phi
337 |     Xi[:, 0:3, 3:6] = Rho
338 |     Xi[:, 3:6, 3:6] = Phi
339 |     
340 |     return Xi 
341 | 
342 | def se3_log(T):
343 | 
344 |     """Logarithmic map for SE(3)
345 |     Computes a SE(3) tangent vector from a transformation
346 |     matrix.
347 |     This is the inverse operation to exp
348 |     #input: T Nx4x4
349 |     #output: log(T) Nx6
350 |     """
351 |     if T.dim() < 3:
352 |         T = T.unsqueeze(0)
353 | 
354 |     R = T[:,0:3,0:3]
355 |     t = T[:,0:3,3:4]
356 |     sample_size = t.size(0)
357 |     phi = so3_log(R)
358 |     invl_js = so3_inv_left_jacobian(phi)
359 |     rho = (invl_js.bmm(t)).view(sample_size, 3)
360 |     xi = torch.cat((rho, phi), 1)
361 | 
362 |     return xi
363 | 
364 | def se3_exp(xi):
365 |     #input: xi Nx6
366 |     #output: T Nx4x4
367 |     #New efficient way without having to compute Q!
368 | 
369 |     if xi.dim() < 2:
370 |         xi = xi.unsqueeze(0)
371 |     
372 |     batch_size = xi.size(0)
373 |     phi = vec_norms(xi[:, 3:6])
374 | 
375 |     I = xi.new(4, 4).zero_()
376 |     I[0,0] = I[1,1] = I[2,2] = I[3,3] = 1.0
377 | 
378 |     # If angle is close to zero, use first-order Taylor expansion
379 |     small_angles_mask = phi.lt(EPS).view(-1)
380 |     small_angles_num = small_angles_mask.sum()
381 |     small_angles_indices = small_angles_mask.nonzero().squeeze()
382 | 
383 | 
384 |     if small_angles_num == batch_size:
385 |         #Taylor expansion
386 |         I = I.expand(batch_size, 4,4) 
387 |         xi_w = se3_wedge(xi)
388 |         return I + xi_w
389 | 
390 |     xi_w = se3_wedge(xi)
391 |     xi_w2 = xi_w.bmm(xi_w)
392 |     xi_w3 = xi_w2.bmm(xi_w)
393 |     
394 |     phi2 = phi.pow(2)
395 |     phi3 = phi.pow(3)
396 | 
397 |     cos_phi = torch.cos(phi)
398 |     sin_phi = torch.sin(phi)
399 | 
400 |     I_full = I.expand(batch_size, 4,4) 
401 |     t2 = (1 - cos_phi)/phi2
402 |     t3 = (phi - sin_phi)/phi3
403 |     t2 = t2.view(-1,1,1).expand_as(I_full)
404 |     t3 = t3.view(-1,1,1).expand_as(I_full)
405 | 
406 |     T = I_full + xi_w + t2*xi_w2 + t3*xi_w3
407 |     
408 |     if 0 < small_angles_num < batch_size:
409 |         I_small = I.expand(small_angles_num, 4,4)
410 |         xi_w = se3_wedge(xi[small_angles_indices])
411 |         small_exp = I + xi_w
412 |         T[small_angles_indices] = small_exp
413 |     
414 |     return T
415 | 
416 | 
417 | def se3_inv(T):
418 |     
419 |     if T.dim() < 3:
420 |         T = T.unsqueeze(0)
421 | 
422 |     #Batch invert Nx4x4 SE(3) matrices
423 |     Rt = T[:, 0:3, 0:3].transpose(1,2)
424 |     t = T[:, 0:3, 3:4]
425 | 
426 |     T_inv = T.clone()
427 |     #T_inv = T.new(T.size()).zero_()
428 |     #T_inv[:,3,3] = 1.0
429 | 
430 |     T_inv[:, 0:3, 0:3] = Rt
431 |     T_inv[:, 0:3, 3:4] = -Rt.bmm(t) 
432 |     
433 |     return T_inv
434 | 
435 | def se3_Q(rho, phi):
436 |     #SE(3) Q function
437 |     #Used in the SE(3) jacobians
438 |     #See b
439 | 
440 |     ph = vec_norms(phi)
441 | 
442 |     ph_test = phi.norm(p=2, dim=1)
443 | 
444 |     ph2 = ph*ph
445 |     ph3 = ph2*ph
446 |     ph4 = ph3*ph
447 |     ph5 = ph4*ph
448 | 
449 |     rx = so3_wedge(rho)
450 |     px = so3_wedge(phi)
451 | 
452 |     #Turn Nx1 into a Nx3x3 (with each 3x3 having 9 identical numbers)
453 |     cph = torch.cos(ph).view(-1,1,1).expand_as(rx)
454 |     sph = torch.sin(ph).view(-1,1,1).expand_as(rx)
455 |     ph = ph.view(-1,1,1).expand_as(rx)
456 |     ph2 = ph2.view(-1,1,1).expand_as(rx)
457 |     ph3 = ph3.view(-1,1,1).expand_as(rx)
458 |     ph4 = ph4.view(-1,1,1).expand_as(rx)
459 |     ph5 = ph5.view(-1,1,1).expand_as(rx)
460 |     
461 |     m1 = 0.5
462 |     m2 = (ph - sph)/ph3
463 |     m3 = (ph2 + 2. * cph - 2.)/(2.*ph4)
464 |     m4 = (2.*ph - 3.*sph + ph*cph)/(2.*ph5)
465 | 
466 |     t1 = m1 * rx
467 |     t2 = m2 * (px.bmm(rx) + rx.bmm(px) + px.bmm(rx).bmm(px))
468 |     t3 = m3 * (px.bmm(px).bmm(rx) + rx.bmm(px).bmm(px) - 3. * px.bmm(rx).bmm(px))
469 |     t4 = m4 * (px.bmm(rx).bmm(px).bmm(px) + px.bmm(px).bmm(rx).bmm(px))
470 | 
471 |     Q = t1 + t2 + t3 + t4
472 | 
473 |     return Q
474 | 
475 | def se3_left_jacobian(xi):
476 |     """Computes SE(3) left jacobian of N xi vectors (arranged into NxD tensor)"""
477 |     rho = xi[:, 0:3]
478 |     phi = xi[:, 3:6]
479 | 
480 |     batch_size = xi.size(0)
481 |     angles = vec_norms(xi[:, 3:6])
482 | 
483 |     # If angle is close to zero, use first-order Taylor expansion
484 |     small_angles_mask = angles.lt(EPS).view(-1)
485 |     small_angles_num = small_angles_mask.sum()
486 |     small_angles_indices = small_angles_mask.nonzero().squeeze()
487 | 
488 |     if small_angles_num == batch_size:
489 |         #Taylor expansion
490 |         I = xi.new(6, 6).zero_()
491 |         I[0,0] = I[1,1] = I[2,2] = I[3,3] = I[4,4] = I[5,5] =  1.0
492 |         I = I.expand(batch_size, 6,6) 
493 |         return I + 0.5*se3_curly_wedge(xi)
494 | 
495 | 
496 |     J = so3_left_jacobian(phi)
497 |     Q = se3_Q(rho, phi)
498 |     zero_mat = xi.new(3, 3).zero_().expand(batch_size, 3, 3)
499 |   
500 |     upper_rows = torch.cat((J, Q), 2)
501 |     lower_rows = torch.cat((zero_mat, J), 2)
502 |     
503 |     J = torch.cat((upper_rows, lower_rows), 1)
504 | 
505 |     if 0 < small_angles_num < batch_size:
506 |         I = xi.new(6, 6).zero_()
507 |         I[0,0] = I[1,1] = I[2,2] = I[3,3] = I[4,4] = I[5,5] =  1.0
508 |         I = I.expand(small_angles_num, 6,6) 
509 |         small_J =  I + 0.5*se3_curly_wedge(xi[small_angles_indices])
510 |         J[small_angles_indices] = small_J
511 | 
512 |     return J
513 |     
514 | 
515 | 
516 | def se3_inv_left_jacobian(xi):
517 |     """Computes SE(3) inverse left jacobian of N xi vectors (arranged into NxD tensor)"""
518 |     rho = xi[:, 0:3]
519 |     phi = xi[:, 3:6]
520 | 
521 |     batch_size = xi.size(0)
522 |     angles = vec_norms(xi[:, 3:6])
523 | 
524 |     # If angle is close to zero, use first-order Taylor expansion
525 |     small_angles_mask = angles.lt(EPS).view(-1)
526 |     small_angles_num = small_angles_mask.sum()
527 |     small_angles_indices = small_angles_mask.nonzero().squeeze()
528 | 
529 |     if small_angles_num == batch_size:
530 |         #Taylor expansion
531 |         I = xi.new(6, 6).zero_()
532 |         I[0,0] = I[1,1] = I[2,2] = I[3,3] = I[4,4] = I[5,5] =  1.0
533 |         I = I.expand(batch_size, 6,6)  
534 |         return I - 0.5*se3_curly_wedge(xi)
535 | 
536 |     invl_j = so3_inv_left_jacobian(phi)
537 |     Q = se3_Q(rho, phi)
538 |     zero_mat = xi.new(3, 3).zero_().expand(batch_size, 3, 3)
539 |   
540 |     upper_rows = torch.cat((invl_j, -invl_j.bmm(Q).bmm(invl_j)), 2)
541 |     lower_rows = torch.cat((zero_mat, invl_j), 2)
542 |     
543 |     inv_J = torch.cat((upper_rows, lower_rows), 1)
544 | 
545 |     if 0 < small_angles_num < batch_size:
546 |         I = xi.new(6, 6).zero_()
547 |         I[0,0] = I[1,1] = I[2,2] = I[3,3] = I[4,4] = I[5,5] =  1.0
548 |         I = I.expand(small_angles_num, 6,6) 
549 |         small_inv_J =  I - 0.5*se3_curly_wedge(xi[small_angles_indices])
550 |         inv_J[small_angles_indices] = small_inv_J
551 | 
552 | 
553 |     return inv_J
554 | 
555 | 
556 | def se3_adjoint(T):
557 |     if T.dim() < 2:
558 |         T = T.unsqueeze(dim=0)
559 | 
560 |     C = T[:, :3, :3]
561 |     Jrho_wedge = so3_wedge(T[:, :3, 3].view(-1, 3))
562 | 
563 |     adj_T = T.new_zeros((T.shape[0], 6,6))
564 |     adj_T[:, :3, :3] = C
565 |     adj_T[:, :3, 3:] = Jrho_wedge.bmm(C)
566 |     adj_T[:, 3:, 3:] = C
567 | 
568 |     return adj_T
569 | 
570 | def se3_inv_adjoint(T):
571 |     if T.dim() < 2:
572 |         T = T.unsqueeze(dim=0)
573 | 
574 |     C = T[:, :3, :3]
575 |     C_T = torch.transpose(C, 1, 2)
576 |     Jrho_wedge = so3_wedge(T[:, :3, 3].view(-1, 3))
577 | 
578 |     inv_adj_T = T.new_zeros((T.shape[0], 6,6))
579 |     inv_adj_T[:, :3, :3] = C_T
580 |     inv_adj_T[:, :3, 3:] = -C_T.bmm(Jrho_wedge)
581 |     inv_adj_T[:, 3:, 3:] = C_T
582 | 
583 |     return inv_adj_T
584 | 


--------------------------------------------------------------------------------
/src/utils/statistics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class Statistics:
 4 |     """
 5 |         A class for keeping track of poses, errors, and losses.
 6 |     """
 7 |     def __init__(self):
 8 |         self.epoch_losses = {}
 9 |         self.epoch_errors = np.empty(0)
10 |         self.outputs_se3 = {}
11 |         self.targets_se3 = {}
12 |         self.outputs_log = {}
13 |         self.targets_log = {}
14 |         self.live_run_ids = []
15 |         self.map_run_id = None
16 |         self.sample_ids = {}
17 | 
18 |     def get_epoch_stats(self):
19 |         return self.epoch_losses, self.epoch_errors
20 | 
21 |     def get_outputs_se3(self):
22 |         return self.outputs_se3
23 | 
24 |     def get_targets_se3(self):
25 |         return self.targets_se3
26 | 
27 |     def get_outputs_log(self):
28 |         return self.outputs_log
29 | 
30 |     def get_targets_log(self):
31 |         return self.targets_log
32 | 
33 |     def get_sample_ids(self):
34 |         return self.sample_ids
35 | 
36 |     def get_live_run_ids(self):
37 |         return self.live_run_ids
38 | 
39 |     def get_map_run_id(self):
40 |         return self.map_run_id
41 | 
42 |     def add_epoch_stats(self, losses, errors):
43 |         for loss_type in losses.keys():
44 |             if loss_type in self.epoch_losses:
45 |                 self.epoch_losses[loss_type] += [losses[loss_type]]
46 |             else:
47 |                 self.epoch_losses[loss_type] = [losses[loss_type]]
48 | 
49 |         if self.epoch_errors.shape[0] == 0:
50 |             self.epoch_errors = errors
51 |         else:
52 |             self.epoch_errors = np.concatenate((self.epoch_errors, errors), axis=0)
53 | 
54 |     def add_outputs_targets_se3(self, live_run_id, output, target):
55 |         if live_run_id not in self.outputs_se3.keys():
56 |             self.outputs_se3[live_run_id] = [output]
57 |             self.targets_se3[live_run_id] = [target]
58 | 
59 |         self.outputs_se3[live_run_id] = self.outputs_se3[live_run_id] + [output]
60 |         self.targets_se3[live_run_id] = self.targets_se3[live_run_id] + [target]
61 | 
62 |     def add_outputs_targets_log(self, live_run_id, outputs, targets):
63 |         num_dof = outputs.shape[0]
64 | 
65 |         if live_run_id not in self.outputs_log.keys():
66 |             self.outputs_log[live_run_id] = np.zeros((1, num_dof))
67 |             self.targets_log[live_run_id] = np.zeros((1, num_dof))
68 | 
69 |         self.outputs_log[live_run_id] = np.concatenate((self.outputs_log[live_run_id], outputs.reshape(1, num_dof)), axis=0)
70 |         self.targets_log[live_run_id] = np.concatenate((self.targets_log[live_run_id], targets.reshape(1, num_dof)), axis=0)
71 | 
72 |     def add_sample_id(self, live_run_id, sample_id):
73 |         if live_run_id not in self.sample_ids.keys():
74 |             self.sample_ids[live_run_id] = [sample_id]
75 |         else:
76 |             self.sample_ids[live_run_id] += [sample_id]
77 | 
78 |     def add_live_run_id(self, live_run_id):
79 |         if live_run_id not in self.live_run_ids:
80 |             self.live_run_ids.append(live_run_id)
81 | 
82 |     def set_map_run_id(self, map_run_id):
83 |         if self.map_run_id is None:
84 |             self.map_run_id = map_run_id
85 | 


--------------------------------------------------------------------------------
/src/utils/stereo_camera_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class StereoCameraModel(nn.Module):
  6 |     """
  7 |         The stereo camera model.
  8 |     """
  9 | 
 10 |     def __init__(self, cu, cv, f, b):
 11 |         """
 12 |             Set up the stereo camera model with the stereo camera parameters.
 13 | 
 14 |             Args:
 15 |                 cu (float): optical centre u coordinate.
 16 |                 cv (float): optical centre v coordinate.
 17 |                 f (float): focal length.
 18 |                 b (float): stereo camera base line.
 19 |         """
 20 |         super(StereoCameraModel, self).__init__()
 21 |     
 22 |         self.cu = cu
 23 |         self.cv = cv
 24 |         self.f = f
 25 |         self.b = b
 26 |         
 27 |         # Matrices for camera model needed to projecting/reprojecting between the camera and image frames
 28 |         M, Q = self.set_camera_model_matrices(self.cu, self.cv, self.f, self.b)
 29 |         self.register_buffer('M', M)
 30 |         self.register_buffer('Q', Q)
 31 | 
 32 |     def set_camera_model_matrices(self, cu, cv, f, b):
 33 |         """
 34 |             Create the matrices needed to project 3D camera coordinates into 2D image coordinates (M) and compute
 35 |             3D camera coordinates from 2D image coordinates (Q).
 36 | 
 37 |             Args:
 38 |                 cu (float): optical centre u coordinate.
 39 |                 cv (float): optical centre v coordinate.
 40 |                 f (float): focal length.
 41 |                 b (float): stereo camera base line.
 42 | 
 43 |             Returns:
 44 |                 M (torch.tensor): matrix used to project 3D camera coordinates to 2D image coordinates, (4x4).
 45 |                 Q (torch.tensor): matrix used to compute 3D camera coordinates from 2D image coordinates, (4x4).
 46 |         """
 47 |         # Matrix needed to project 3D points into stereo camera coordinates.
 48 |         # [ul, vl, ur, vr]^T = (1/z) * M * [x, y, z, 1]^T (using left camera model)
 49 |         # 
 50 |         # [f, 0, cu,      0]
 51 |         # [0, f, cv,      0]
 52 |         # [f, 0, cu, -f * b]
 53 |         # [0, f, cv,      0]
 54 |         #
 55 |         M = torch.tensor([[self.f, 0.0, self.cu, 0.0], 
 56 |                           [0.0, self.f, self.cv, 0.0], 
 57 |                           [self.f, 0.0, self.cu, -(self.f * self.b)], 
 58 |                           [0.0, self.f, self.cv, 0.0]])
 59 |         
 60 |         # Matrix needed to transform image coordinates into 3D points.
 61 |         # [x, y, z, 1] = (1/d) * Q * [ul, vl, d, 1]^T
 62 |         # 
 63 |         # [b, 0, 0, -b * cu]
 64 |         # [0, b, 0, -b * cv]
 65 |         # [0, 0, 0,   f * b]
 66 |         # [0, 0, 1,       0]
 67 |         #
 68 |         Q = torch.tensor([[self.b, 0.0, 0.0, -(self.b * self.cu)], 
 69 |                           [0.0, self.b, 0.0, -(self.b * self.cv)], 
 70 |                           [0.0, 0.0, 0.0, self.f * self.b], 
 71 |                           [0.0, 0.0, 1.0, 0.0]])
 72 | 
 73 |         return M, Q
 74 | 
 75 |     def normalize_coords(self, coords_2D, batch_size, height, width):
 76 |         """
 77 |             Normalize 2D keypoint coordinates to lie in the range [-1, 1].
 78 | 
 79 |             Args:
 80 |                 coords_2D (torch.tensor): 2D image coordinates store in order (u, v), (Bx2xN).
 81 |                 batch_size (int): batch size.
 82 |                 height (int): image height.
 83 |                 width (int): image width.
 84 | 
 85 |             Returns:
 86 |                 coords_2D_norm (torch.tensor): coordinates normalized to range [-1, 1] and stored in order (u, v), (BxNx2).
 87 |          """
 88 |         u_norm = (2 * coords_2D[:, 0, :].reshape(batch_size, -1) / (width - 1)) - 1
 89 |         v_norm = (2 * coords_2D[:, 1, :].reshape(batch_size, -1) / (height - 1)) - 1
 90 | 
 91 |         return torch.stack([u_norm, v_norm], dim=2)
 92 | 
 93 |     def check_valid_disparity(self, disparity):
 94 |         """
 95 |             Check if disparity falls withing our chosen accepted range (0.1m - 400.0m).
 96 | 
 97 |             Args:
 98 |                 disparity (torch.tensor): the disparity map for the image, (Bx1xN).
 99 | 
100 |             Returns:
101 |                 valid_disparity (torch.tensor): True for valid disparity values, False otherwise.
102 |         """
103 |         disp_min = (self.f * self.b) / 400.0  # Farthest point 400 m
104 |         disp_max = (self.f * self.b) / 0.1    # Closest point 0.1 m
105 | 
106 |         return (disparity >= disp_min) & (disparity <= disp_max)
107 | 
108 |     def camera_to_image(self, cam_coords, M):
109 |         """
110 |             Project 3D points given in the camera frame into 2D image coordinates.
111 | 
112 |             Args:
113 |                 cam_coords (torch.tensor): 3D camera coordinates given as homogeneous coordinates, (Bx4xN).
114 |                 M (torch.tensor): matrix for projecting points into image, (Bx4x4).
115 | 
116 |             Returns:
117 |                 img_coords (torch.tensor): 2D image coordinates given in order (ul, vl, ur, vr), (Bx4xN).
118 |         """
119 |         batch_size, _, num_points = cam_coords.size()
120 |         
121 |         # [Ul, Vl, Ur, Vr] = M * [x, y, z, 1]^T
122 |         img_coords = M.bmm(cam_coords)
123 | 
124 |         inv_z = 1.0 / (cam_coords[:, 2, :].reshape(batch_size, 1, num_points))  # Bx1xN, elementwise division.
125 |         img_coords = img_coords * inv_z                                         # Bx4xN, elementwise multiplication.
126 | 
127 |         return img_coords
128 | 
129 |     def camera_model(self, cam_coords):
130 |         """
131 |             Project 3D points given in the camera frame into image coordinates.
132 | 
133 |             Args:
134 |                 cam_coords (torch.tensor): 3D camera coordinates given as homogeneous coordinates, (Bx4xN).
135 | 
136 |             Returns:
137 |                 img_coords (torch.tensor): 2D image coordinates given in order (ul, vl, ur, vr), (Bx4xN).
138 |         """
139 |         batch_size = cam_coords.size(0)
140 | 
141 |         # Expand fixed matrix to the correct batch size.
142 |         M = self.M.expand(batch_size, 4, 4).cuda()
143 | 
144 |         # Get the image coordinates.
145 |         img_coords = self.camera_to_image(cam_coords, M)
146 | 
147 |         if (torch.sum(torch.isnan(img_coords)) > 0) or (torch.sum(torch.isinf(img_coords)) > 0):
148 |             print('Warning: Nan or Inf values in image coordinate tensor.')
149 |             raise ValueError("Nan or Inf in image coordinates")
150 | 
151 |         return img_coords
152 | 
153 |     def image_to_camera(self, img_coords, disparity, Q):
154 |         """
155 |             Compute 3D point coordinates (in the camera frame) for the given 2D image coordinates.
156 | 
157 |             Args:
158 |                 img_coords (torch.tensor): 2D image coordinates given in order (u, v), (Bx2xN).
159 |                 disparity (torch.tensor): the disparity map for the image, (BxHxW).
160 |                 Q (torch.tensor): matrix for reprojecting image coordinates, (Bx4x4).
161 | 
162 |             Returns:
163 |                 cam_coords (torch.tensor): 3D points in camera frame given as homogeneous coordinates, (Bx4xN).
164 |                 valid_points (torch.tensor): value 1 for each valid 3D point with depth in accepted range, otherwise 0,
165 |                                              (Bx1xN).
166 |         """
167 |         batch_size, height, width = disparity.size()
168 |         disparity = disparity.unsqueeze(1)
169 |         num_points = img_coords.size(2)
170 | 
171 |         if (torch.sum(disparity == 0.0) > 0):
172 |             print('Warning: 0.0 in disparities.')
173 | 
174 |         # Sample disparity for the image coordinates.
175 |         img_coords_norm = self.normalize_coords(img_coords, batch_size, height, width).unsqueeze(1)  # Bx1xNx2
176 | 
177 |         point_disparities = F.grid_sample(disparity, img_coords_norm, mode='nearest', padding_mode='border')  # Bx1x1xN
178 |         point_disparities = point_disparities.reshape(batch_size, 1, num_points)  # Bxx1xN
179 |         valid_points = self.check_valid_disparity(point_disparities)  # Bx1xN
180 | 
181 |         # Create the [ul, vl, d, 1] vector
182 |         ones = torch.ones(batch_size, num_points).type_as(disparity)
183 |         uvd1_pixel_coords = torch.stack((img_coords[:, 0, :],
184 |                                          img_coords[:, 1, :],
185 |                                          point_disparities[:, 0, :],
186 |                                          ones),
187 |                                         dim=1)  # Bx4xN
188 | 
189 |         # [X, Y, Z, d]^T = Q * [ul, vl, d, 1]^T
190 |         cam_coords = Q.bmm(uvd1_pixel_coords)  # Bx4xN
191 | 
192 |         # [x, y, z, 1]^T = (1/d) * [X, Y, Z, d]^T
193 |         inv_disparity = (1.0 / point_disparities)  # Elementwise division
194 |         cam_coords = cam_coords * inv_disparity    # Elementwise multiplication
195 | 
196 |         return cam_coords, valid_points
197 | 
198 |     def inverse_camera_model(self, img_coords, disparity):
199 |         """
200 |             Compute 3D point coordinates (in the camera frame) from the given 2D image coordinates.
201 | 
202 |             Args:
203 |                 img_coords (torch.tensor): 2D image coordinates given in order (u, v), (Bx2xN).
204 |                 disparity (torch.tensor): the disparity map for the image, (BxHxW).
205 | 
206 |             Returns:
207 |                 cam_coords (torch.tensor): 3D points in camera frame given as homogeneous coordinates, (Bx4xN).
208 |                 valid_points (torch.tensor): value 1 for each valid 3D point with depth in accepted range, otherwise 0,
209 |                                              (Bx1xN).
210 |         """
211 |         batch_size, height, width = disparity.size()
212 |         
213 |         # Expand fixed matrix to the correct batch size
214 |         Q = self.Q.expand(batch_size, 4, 4).cuda()
215 | 
216 |         # Get the 3D camera coordinates.
217 |         cam_coords, valid_points = self.image_to_camera(img_coords, disparity, Q)
218 | 
219 |         if (torch.sum(torch.isnan(cam_coords)) > 0) or (torch.sum(torch.isinf(cam_coords)) > 0):
220 |             print('Warning: Nan or Inf values in camera coordinate tensor.')
221 |             raise ValueError("Nan or Inf in camera coordinates")
222 | 
223 |         return cam_coords, valid_points
224 |             
225 |        
226 | 
227 | 


--------------------------------------------------------------------------------
/src/utils/transform.py:
--------------------------------------------------------------------------------
  1 | """Code to handle pose transforms. Some overlap with code in
  2 |    https://github.com/utiasASRL/vtr-dataset-tools/blob/master/transform.py.
  3 | """
  4 | 
  5 | import numpy as np
  6 | 
  7 | class Transform:
  8 |     def __init__(self, C_ba, r_ab_inb):
  9 |         self.C_ba = C_ba
 10 |         self.r_ab_inb = r_ab_inb
 11 | 
 12 |     def __mul__(self, other):
 13 |         if other.__class__ == Transform:
 14 |             return Transform(self.C_ba.dot(other.C_ba), self.r_ab_inb + self.C_ba.dot(other.r_ab_inb))
 15 |         elif other.__class__ == np.ndarray:
 16 |             if other.shape in [(3, 1), (3,)]:
 17 |                 return self.C_ba.dot(other) + self.r_ab_inb
 18 |             elif other.shape == (1, 3):
 19 |                 return self.C_ba.dot(other.T) + self.r_ab_inb
 20 |             else:
 21 |                 raise NotImplementedError("Cannot multiply with array of shape " + str(other.shape))
 22 |         else:
 23 |             raise NotImplementedError("Cannot multiply with type " + str(other.__class__))
 24 | 
 25 |     def inv(self):
 26 |         return Transform(self.C_ba.T, -self.C_ba.T.dot(self.r_ab_inb))
 27 | 
 28 |     @property
 29 |     def matrix(self):
 30 |         rval = np.eye(4)
 31 |         rval[:3, :3] = self.C_ba
 32 |         rval[:3, 3] = self.r_ab_inb
 33 | 
 34 |         return rval
 35 | 
 36 |     def __repr__(self):
 37 |         return str(self.matrix)
 38 | 
 39 |     @property
 40 |     def phi(self):
 41 |         # Get angle
 42 |         phi_ba = np.arccos(0.5*(self.C_ba.trace()-1.0))
 43 |         sinphi_ba = np.sin(phi_ba)
 44 | 
 45 |         if abs(sinphi_ba) > 1e-9:
 46 | 
 47 |             # General case, angle is NOT near 0, pi, or 2*pi
 48 |             axis = np.array([self.C_ba[2, 1] - self.C_ba[1, 2],
 49 |                              self.C_ba[0, 2] - self.C_ba[2, 0],
 50 |                              self.C_ba[1, 0] - self.C_ba[0, 1]])
 51 |             return (0.5*phi_ba/sinphi_ba)*axis
 52 | 
 53 |         elif abs(phi_ba) > 1e-9:
 54 |             # Angle is near pi or 2*pi
 55 |             # ** Note with this method we do not know the sign of 'phi', however since we know phi is
 56 |             #    close to pi or 2*pi, the sign is unimportant..
 57 | 
 58 |             # Find the eigenvalues and eigenvectors
 59 |             y, v = np.linalg.eig(self.C_ba)
 60 | 
 61 |             # Try each eigenvalue
 62 |             for i in range(3):
 63 |                 # Check if eigen value is near +1.0
 64 |                 if abs(y[i] - 1.0) < 1e-6:
 65 |                     # Get corresponding angle-axis
 66 |                     return phi_ba*v[i]
 67 | 
 68 |             # Runtime error
 69 |             raise RuntimeError("so3 logarithmic map failed to find an axis-angle, "
 70 |                                "angle was near pi, or 2*pi, but no eigenvalues were near 1")
 71 | 
 72 |         else:
 73 |             # Angle is near zero
 74 |             return np.zeros((3,))
 75 | 
 76 |     @staticmethod
 77 |     def hat(v):
 78 |         if len(v) == 4: v = v[:3] / v[3]
 79 |         skv = np.roll(np.roll(np.diag(v.flatten()), 1, 1), -1, 0)
 80 |         return skv - skv.T
 81 |         # return np.array([[  0.0,  -v[2],   v[1]],
 82 |         #                  [ v[2],    0.0,  -v[0]],
 83 |         #                  [-v[1],   v[0],    0.0]])
 84 | 
 85 |     @staticmethod
 86 |     def SO3JacInv(aaxis_ba):
 87 |         ## separate the angle/axis
 88 |         angle_ba = np.linalg.norm(aaxis_ba)
 89 |         axis_ba = aaxis_ba / angle_ba
 90 | 
 91 |         # if its small then return identity
 92 |         if angle_ba < 1e-12:
 93 |             return np.eye(3, 3)
 94 | 
 95 |         halfphi = 0.5 * angle_ba
 96 |         cotanTerm = halfphi / np.tan(halfphi)
 97 | 
 98 |         ## jacinv = phi/2 * cot(phi/2)eye + (1-phi/2*cot(phi/2))aa^T - phi/2 * a^
 99 |         jacinv = np.eye(3, 3) * cotanTerm + (1.0 - cotanTerm) * \
100 |                  axis_ba * axis_ba.transpose() - halfphi * Transform.hat(axis_ba)
101 | 
102 |         return jacinv
103 | 
104 |     @staticmethod
105 |     def LogMap(tf):
106 |         # Get the SO3 inverse jacobian
107 |         jacinv = Transform.SO3JacInv(tf.phi)
108 |         # multiply by translation to get rho
109 |         rho_ab_inb = np.matmul(jacinv, tf.r_ab_inb.reshape((3, 1)))
110 |         # return 6x1 vector
111 |         return np.concatenate((rho_ab_inb.flatten(), tf.phi), axis=0)
112 | 
113 |     @classmethod
114 |     def ExpMap(cls, xi):
115 |         phi_ba = np.linalg.norm(xi[3:])
116 |         if abs(phi_ba) < 1e-9:
117 |             # If angle is very small, return identity rotation with linear offset
118 |             return Transform(np.eye(3), xi[:3])
119 | 
120 |         axis = xi[3:] / phi_ba
121 |         sin_term = np.sin(phi_ba) / phi_ba
122 |         cos_term = (1.0 - np.cos(phi_ba)) / phi_ba
123 | 
124 |         A = cls.hat(axis)
125 | 
126 |         J = sin_term * np.eye(3) + (1.0 - sin_term) * np.outer(axis, axis) + cos_term * A
127 |         return Transform(np.eye(3) + phi_ba * A.dot(J), J.dot(xi[:3]))


--------------------------------------------------------------------------------
/src/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | from torch.utils import data
  6 | 
  7 | from src.dataset import Dataset
  8 | 
  9 | class MELData():
 10 |     """
 11 |         Class for storing the training, validation, and test data that we generate from the VT&R Multi-Experience
 12 |         Localization (MEL) data.
 13 |     """
 14 | 
 15 |     def __init__(self):
 16 |         self.mean = [0.0, 0.0, 0.0]
 17 |         self.std_dev = [0.0, 0.0, 0.0]
 18 | 
 19 |         self.train_labels_se3 = {}
 20 |         self.train_labels_log = {}
 21 |         self.train_ids = []
 22 | 
 23 |         self.valid_labels_se3 = {}
 24 |         self.valid_labels_log = {}
 25 |         self.valid_ids = []
 26 | 
 27 |         self.paths = []
 28 |         self.test_labels_se3 = {}
 29 |         self.test_labels_log = {}
 30 |         self.test_ids = {}
 31 | 
 32 | def compute_mean_std(mel_data, data_dir, height, width):
 33 |     """
 34 |         Compute the mean and standard deviation for each channel of the images in a dataset.
 35 | 
 36 |         Args:
 37 |             mel_data (MELData): the object storing the images.
 38 |             data_dir (string): the directory where the images are stored
 39 |             height (int): image height.
 40 |             width (int): image width.
 41 | 
 42 |         Returns:
 43 |             pop_mean_final (list[float]): the mean for each of the three RGB channels for the images.
 44 |             pop_std_final (list[float]): the standard deviation for each of the three RGB channels for the images.
 45 |     """
 46 | 
 47 |     start = time.time()
 48 | 
 49 |     image_params = {'data_dir': data_dir,
 50 |                     'height': height,
 51 |                     'width': width,
 52 |                     'use_normalization': False,
 53 |                     'use_disparity': False,
 54 |                     }
 55 | 
 56 |     num_channels = 12
 57 |     batch_size = 128
 58 |     
 59 |     params = {'batch_size': batch_size,
 60 |               'shuffle': False,
 61 |               'num_workers': 12}
 62 | 
 63 |     training_set = Dataset(**image_params)
 64 |     training_set.load_mel_data(mel_data, 'training')
 65 |     training_generator = data.DataLoader(training_set, **params)
 66 | 
 67 |     fst_moment = torch.zeros(num_channels)
 68 |     snd_moment = torch.zeros(num_channels)
 69 | 
 70 |     fst_moment = fst_moment.cuda()
 71 |     snd_moment = snd_moment.cuda()
 72 | 
 73 |     i = 0
 74 |     cnt = 0
 75 |     for images, _, _, _, _ in training_generator:
 76 |         i += 1
 77 | 
 78 |         images = images[:, 0:num_channels, :, :].cuda()
 79 | 
 80 |         b, c, h, w = images.size()
 81 |         nb_pixels = b * h * w
 82 |         sum_ = torch.sum(images, dim=[0,2,3])
 83 |         sum_of_square = torch.sum(images ** 2, dim=[0,2,3])
 84 |         fst_moment = ((cnt * fst_moment) + sum_) / (cnt + nb_pixels)
 85 |         snd_moment = ((cnt * snd_moment) + sum_of_square) / (cnt + nb_pixels)
 86 | 
 87 |         cnt += nb_pixels
 88 | 
 89 |     pop_mean = fst_moment
 90 |     pop_std = torch.sqrt(snd_moment - (fst_moment ** 2))
 91 | 
 92 |     pop_mean_final = [0, 0, 0]
 93 |     pop_std_final = [0, 0, 0]
 94 | 
 95 |     num_comp = int(num_channels / 3.0)
 96 | 
 97 |     for i in range(3):
 98 |         for j in range(num_comp):
 99 |             pop_mean_final[i] += pop_mean[(j * 3) + i].item()
100 |             pop_std_final[i] += pop_std[(j * 3) + i].item()
101 | 
102 |         pop_mean_final[i] = pop_mean_final[i] / num_comp
103 |         pop_std_final[i] = pop_std_final[i] / num_comp
104 | 
105 |     print(f'Training images mean: {pop_mean_final}')
106 |     print(f'Training images std: {pop_std_final}')
107 |     print(f'Computing mean/std took: {time.time() - start}')
108 | 
109 |     return pop_mean_final, pop_std_final
110 | 


--------------------------------------------------------------------------------
/visualization/plots.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import matplotlib
  5 | matplotlib.use('Agg')
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | class Plotting:
  9 |     """
 10 |         Class for plotting results.
 11 |     """
 12 | 
 13 |     def __init__(self, results_dir):
 14 |         """
 15 |             Initialize plotting.
 16 | 
 17 |             Args:
 18 |                 results_dir (string): the directory in which to store the plots.
 19 |         """
 20 |         self.results_dir = results_dir
 21 | 
 22 |     def plot_epoch_losses(self, epoch_losses_train, epoch_losses_valid):
 23 |         """
 24 |             Plot the average training and validation loss for each epoch. Plot each individual type of loss and also
 25 |             the weighted sum of the losses.
 26 | 
 27 |             Args:
 28 |                 epoch_losses_train (dict): the average training losses for each epoch.
 29 |                 epoch_losses_valid (dict): the average training losses for each epoch.
 30 |         """
 31 |         for loss_type in epoch_losses_train.keys():
 32 | 
 33 |             plt.figure()
 34 |             p1 = plt.plot(epoch_losses_train[loss_type])
 35 |             p2 = plt.plot(epoch_losses_valid[loss_type])
 36 |         
 37 |             plt.legend((p1[0], p2[0]), ('training', 'validation'))
 38 |             plt.ylabel('Loss')
 39 |             plt.xlabel('Epoch')
 40 |             plt.title(f'Loss for each epoch, {loss_type}')
 41 |         
 42 |             plt.savefig(f'{self.results_dir}loss_epoch_{loss_type}.png', format='png')
 43 |             plt.savefig(f'{self.results_dir}loss_epoch_{loss_type}.pdf', format='pdf')
 44 |             plt.close()
 45 | 
 46 |             plt.figure()
 47 |             p1 = plt.plot(np.log(epoch_losses_train[loss_type]))
 48 |             p2 = plt.plot(np.log(epoch_losses_valid[loss_type]))
 49 | 
 50 |             plt.legend((p1[0], p2[0]), ('training', 'validation'))
 51 |             plt.ylabel('Log of loss')
 52 |             plt.xlabel('Epoch')
 53 |             plt.title('Log of loss for each epoch, {}'.format(loss_type))
 54 | 
 55 |             plt.savefig(f'{self.results_dir}log_loss_epoch_{loss_type}.png', format='png')
 56 |             plt.savefig(f'{self.results_dir}log_loss_epoch_{loss_type}.pdf', format='pdf')
 57 |             plt.close()
 58 | 
 59 |     def plot_epoch_errors(self, epoch_error_train, epoch_error_valid, dof):
 60 |         """
 61 |             Plot the average error for each specified pose DOF for each epoch for training and validation.
 62 | 
 63 |             Args:
 64 |                 epoch_error_train (dict): the average pose errors for each DOF for each epoch.
 65 |                 epoch_error_valid (dict): the average pose errors for each DOF for each epoch.
 66 |                 dof (list[int]): indices of the DOF to plot.
 67 |         """
 68 |         dof_str = ['x', 'y', 'z', 'roll', 'pitch', 'yaw']
 69 | 
 70 |         for i in range(len(dof)):
 71 |             dof_index = dof[i]
 72 | 
 73 |             plt.figure()
 74 |             p1 = plt.plot(epoch_error_train[:, dof_index])
 75 |             p2 = plt.plot(epoch_error_valid[:, dof_index])
 76 | 
 77 |             plt.legend((p1[0], p2[0]), ('training', 'validation'))
 78 |             plt.ylabel('RMSE')
 79 |             plt.xlabel('Epoch')
 80 |             plt.title(f'Error for each epoch, {dof_str[dof_index]}')
 81 | 
 82 |             plt.savefig(f'{self.results_dir}error_epoch_{dof_str[dof_index]}.png', format='png')
 83 |             plt.savefig(f'{self.results_dir}error_epoch_{dof_str[dof_index]}.pdf', format='pdf')
 84 |             plt.close()
 85 | 
 86 |     def plot_outputs(self, outputs_log, targets_log, path_name, map_run_id, dof):
 87 |         """
 88 |             Plot estimated and target poses. Plot each of the estimated DOF separately.
 89 | 
 90 |             Args:
 91 |                 outputs_log (dict): a map from the live run id to the estimated poses for all localized vertices on
 92 |                                     that run provided as length 6 vectors stacked in a numpy array.
 93 |                 targets_log (dict): a map from the live run id to the ground truth target poses for all localized
 94 |                                     vertices on that run provided as length 6 vectors stacked in a numpy array.
 95 |                 path_name (string): name of the path.
 96 |                 map_run_id (int): the id of the run used as the map, i.e. which all the other runs are localized to.
 97 |                 dof (list[int]): indices of the DOF to plot.
 98 |         """
 99 |         # Store the plots of all runs localized to the map run in the same directory.
100 |         directory = f'{self.results_dir}/{path_name}/map_run_{map_run_id}'
101 |         if not os.path.exists(directory):
102 |             os.makedirs(directory)
103 | 
104 |         dof_str = ['x', 'y', 'z', 'roll', 'pitch', 'yaw']
105 | 
106 |         for live_run_id in outputs_log.keys():
107 | 
108 |             for dof_ind in dof:
109 | 
110 |                 # Convert to degrees if rotation.
111 |                 targets_plot = np.rad2deg(targets_log[live_run_id][:, dof_ind]) if dof_ind > 2 else targets_log[live_run_id][:, dof_ind]
112 |                 outputs_plot = np.rad2deg(outputs_log[live_run_id][:, dof_ind]) if dof_ind > 2 else outputs_log[live_run_id][:, dof_ind]
113 | 
114 |                 f = plt.figure(figsize=(18,6))
115 |                 f.tight_layout(rect=[0, 0.03, 1, 0.95])
116 |                 p1 = plt.plot(targets_plot, 'C1')
117 |                 p2 = plt.plot(outputs_plot, 'C0')
118 |                 plt.legend((p1[0], p2[0]), ('ground truth', 'estimated'))
119 | 
120 |                 ylabel = 'Degrees' if dof_ind > 2 else 'Metres'
121 |                 plt.ylabel(ylabel)
122 |                 plt.xlabel('Vertex')
123 |                 plt.title(f'Error - {dof_str[dof_ind]}')
124 | 
125 |                 plt.savefig(f'{directory}/pose_{dof_str[dof_ind]}_live_run_{live_run_id}.png', format='png')
126 |                 plt.savefig(f'{directory}/pose_{dof_str[dof_ind]}_live_run_{live_run_id}.pdf', format='pdf')
127 |                 plt.close()


--------------------------------------------------------------------------------