├── .gitignore ├── LICENSE ├── README.md ├── config ├── data.json ├── test.json └── train.json ├── data ├── __init__.py ├── build_datasets.py ├── build_train_test_dataset_loc.py ├── pose_graph_tools.py └── sample_data.py ├── docker ├── Dockerfile ├── docker_data.sh ├── docker_test.sh └── docker_train.sh ├── download_datasets.sh ├── download_networks.sh ├── extract_features.py ├── figures ├── box_plot_generalization.png ├── box_plot_lighting_change.png ├── conditions_generalization.png ├── conditions_lighting_change.png ├── network.png ├── overview.png └── pipeline.png ├── requirements.txt ├── src ├── __init__.py ├── dataset.py ├── model │ ├── __init__.py │ ├── keypoint_block.py │ ├── matcher_block.py │ ├── pipeline.py │ ├── ransac_block.py │ ├── svd_block.py │ ├── unet.py │ └── weight_block.py ├── test.py ├── train.py └── utils │ ├── __init__.py │ ├── early_stopping.py │ ├── keypoint_tools.py │ ├── lie_algebra.py │ ├── statistics.py │ ├── stereo_camera_model.py │ ├── transform.py │ └── utils.py └── visualization └── plots.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, ASRL - Autonomous Space Robotics Lab 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Keeping an Eye on Things: Deep Learned Features for Long-Term Visual Localization 2 | 3 | ![](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/overview.png?raw=true) 4 | 5 | We learn visual features for localization across a large appearace change and used them in Visual Teach and repeat (VT&R) for closed-loop path-following on a robot outdoors. In VT&R, we manually drive the robot to teach a path (and build a map) that the robot subsequently repeats autonoously. We include three videos, two that detail the method and experiments ([video1](https://www.youtube.com/watch?v=26AVY1ITE3Q), [video2](https://youtu.be/JsEBs9Y5XVg)) and another that includes a [live demonstration](https://www.youtube.com/watch?v=KkG6TQOVXak) of localization between daytime and after dark. 6 | 7 | We train a neural network to predict sparse keypoints with associated descriptors and scores that can be used together with a classical pose estimator for localization. Our training pipeline includes a differentiable pose estimator such that training can be supervised with ground truth poses from data collected earlier. 8 | 9 | We insert the learned features into the existing VT&R pipeline to perform closed-loop path-following in unstructured outdoor environments. We show successful path following across all lighting conditions despite the robot's map being constructed using daylight conditions. In all, we validated the approach with 35.5 km of autonomous path-following experiments in challenging conditions. 10 | 11 | The details of our method can be found in the [paper](https://arxiv.org/abs/2109.04041): 12 | ``` 13 | @ARTICLE{gridseth_ral2022, 14 | author={Gridseth, Mona and Barfoot, Timothy D.}, 15 | journal={IEEE Robotics and Automation Letters}, 16 | title={Keeping an Eye on Things: Deep Learned Features for Long-Term Visual Localization}, 17 | year={2022}, 18 | volume={7}, 19 | number={2}, 20 | pages={1016-1023}, 21 | doi={10.1109/LRA.2021.3136867}} 22 | ``` 23 | 24 | ## Learning Pipeline 25 | 26 | Our method is inpired by Barnes et al. and their [paper](https://arxiv.org/abs/2001.10789) Under the Radar: Learning to Predict Robust Keypoints for Odometry Estimation and Metric Localisation in Radar. We have adapted a similar differentiable learning pipeline and network architecture to work for a stereo camera. The training pipeline takes a pair of stereo images and uses the neural network to extract keypoints, descriptors, and scores. We match keypoints using the descriptors and compute 3D points for each of the matched 2D keypoints. The 3D points are passed to a differentiable pose estimator that uses SVD. The pipeline is illustrated below: 27 | 28 | ![Test](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/pipeline.png?raw=true) 29 | 30 | The neural network consists of an encoder and two decoders. The descriptors are extracted from the encoder, while the two decoders provide the keypoint coordinates and scores, see the figure below. 31 | 32 | ![](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/network.png?raw=true) 33 | 34 | ## Training Data 35 | 36 | We train the network with data collected during autonomous path-following with a Grizzly robot using Multi-Experience VT&R. VT&R stores images and relative poses between them in a spatio-temporal pose graph. Each time the robot repeats a path, new data is added to the pose graph. We can sample images and relative ground truth poses from a large range of appearance conditions from this pose graph. We train using data from the UTIAS Long-Term Localization and Mapping Dataset. The data is desribed in detail and can be download from this [website](http://asrl.utias.utoronto.ca/datasets/2020-vtr-dataset/). 37 | 38 | ## Getting Started 39 | 40 | We explain how to run code for generating datasets, training the model, testing the model, and extracting features. We provide a docker container that can be used to run the code. If running without docker is preferred, we provide a `requirements.txt` to list the neccessary dependencies and provide the the commands neeed to run the python scripts. 41 | 42 | ### Docker 43 | 44 | In the `docker` folder, we provide a Dockerfile that can be used to buid an image that will run the code. To build the docker image, run the following command: 45 | 46 | ``` 47 | cd docker 48 | docker image build --shm-size=64g -t . 49 | ``` 50 | ### Extract Features 51 | 52 | We provide a simple script that shows how keypoints, scores, and descriptors can be extracted from an image. 53 | 54 | ``` 55 | python extract_features.py 56 | ``` 57 | 58 | ### Generate Datset 59 | 60 | As mentioned above, we use data from the UTIAS Long-Term Localization and Mapping Dataset. We have written a script that will generate training, validation, and test datasets from the localization data. We sample image pairs and relative poses from the VT&R pose graph. The code for dataset generation is found under the `data` folder. Our code assumes that the data is structured the same way as described on the dataset [website](http://asrl.utias.utoronto.ca/datasets/2020-vtr-dataset/). 61 | 62 | For the traiing and validation data we sample randomly from the pose graph for a chosen set of runs. For the test data we sample image pairs and relative poses sequentialy for the whole run to emulate localization for each run. We localize each of the chosen test runs to each other. 63 | 64 | With our code, we have generated one set of training, testing, and validation datasets using the UTIAS Multiseason data and another set using the UTIAS In-The-Dark data. These can be downloaded using the `download_dataset.sh` script. This stores the data sample ids and ground truth labels (poses) needed in Pytorch. The images from the UTIAS Long-Term Localization and Mapping Dataset still need to be stored one the computer to be available to load. 65 | 66 | Alternatively, if wanting to generate new datasets from scratch, this can be done by running a docker container as shown below. The `docker_data.sh` has to be updated with the correct file paths, where place holders are present. 67 | 68 | ``` 69 | cd docker 70 | ./docker_data.sh 71 | ``` 72 | 73 | Finally, the code can be run without the use of docker by executing the following command: 74 | 75 | ``` 76 | python -m data.build_train_test_dataset_loc --config /deep_learned_visual_features/config/data.json 77 | ``` 78 | 79 | ### Train 80 | 81 | The model can be trained by running the following docker container, provided that updates are made to the file path place holders in `docker_test.sh`: 82 | 83 | ``` 84 | cd docker 85 | ./docker_test.sh 86 | ``` 87 | 88 | The command for running the training code without docker is the following: 89 | 90 | ``` 91 | python -m src.train --config /deep_learned_visual_features/config/train.json 92 | ``` 93 | 94 | ### Test 95 | 96 | We provide model weights from training the network with the UTIAS Multiseason and UTIAS In-The-Dark datasets. They can be downloaded using the `download_networks.sh` script. We have the following model weight files: 97 | 98 | `network_inthedark_layer32.pth`: the network was trained on data from the UTIAS In-The-Dark dataset and used in the lighting-change experiment described in the paper. The first layer of the network encoder is of size 32. 99 | 100 | `network_multiseason_layer16.pth`: the network was trained on data from the UTIAS Multiseason dataset. The first layer of the network encoder is of size 16. 101 | 102 | `network_mutiseason_inthedark_layer16.pth`: the network was trained on data from the UTIAS Multiseason dataset and then subsequently fine-tuned with that from the UTIAS In-The-Dark dataset. It was used in the generalization experiment described in the paper. The first layer of the network encoder is of size 16. 103 | 104 | The testing can be done using the docker container provided that place holder file paths in `docker_test.sh` are updated: 105 | 106 | ``` 107 | cd docker 108 | ./docker_test.sh 109 | ``` 110 | The command for running the python test script is the following: 111 | 112 | ``` 113 | python -m src.test --config /deep_learned_visual_features/config/test.json 114 | ``` 115 | ### Configurations 116 | 117 | In the `config` folder we provide json files that set the configuration parameters for the different tasks. 118 | 119 | #### Important note 120 | The UTIAS Multiseason and UTIAS In-The-Dark datasets were collected with two different cameras and hence their calibration parameters are different. 121 | 122 | ``` 123 | UTIAS Multiseason camera parameters: 124 | width: 512 125 | height: 384 126 | focal length: 388.425 127 | cx: 253.502 128 | cy: 196.822 129 | baseline: 0.239946 130 | ``` 131 | ``` 132 | UTIAS In-The-Dark camera parameters: 133 | width: 512 134 | height: 384 135 | focal length: 387.777 136 | cx: 257.446 137 | cy: 197.718 138 | baseline: 0.239965 139 | ``` 140 | 141 | The poses from the dataset are all given in the vehicle frame of the robot. In our code we have a transform `T_s_v` that is the vehicle to sensor transform for the robot. The sensor frame coincides with the left camera frame of the stereo camera. In the code we compute pose losses with the poses in the vehicle frame. When computing the pose from 3D points using SVD, the 3D points are in the sensor frame. Hence the initially computed pose is given in the sensor frame before we transform it back to the vehicle frame, see code [here](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/src/model/svd_block.py#L71). 142 | 143 | #### Generate Dataset 144 | 145 | In `config/data.json`, we find the following configurations: 146 | 147 | `dataset_name`: give the daaset a name to reference when loading it for training/testing.\ 148 | `num_train_samples`: the number of training samples we want to generate.\ 149 | `num_validation_samples`: the number of validation samples we want to generate.\ 150 | `max_temporal_len`: when localizing live vertices to the map vertices, allow up to this topoligical distance between them.\ 151 | `dataset`: image height and width.\ 152 | `train_paths`: list of name of paths we want to include in he training data ('multiseason', 'inthedark'). Use the name you have stored the dataset under on your computer, i.e. `/path_to_data/dataset_name/run_000001/...`. \ 153 | `test_paths`: list of name of paths we want to include in the test data. 154 | `sampling_ratios_train`: for each path in the training data, which portion of the total samples should it contribute. If we use two paths ('multiseason', 'inthedark') we could for example sample 0.7 from 'multiseason' and 0.3 from 'intthedark'.\ 155 | `sampling_ratios_valid`: same as above for validation data.\ 156 | `train_runs`: the runs from the paths to include in the training data.\ 157 | `validation_runs`: the runs from the paths to include in the validation data.\ 158 | `test_runs`:the runs from the paths to include in the test data.\ 159 | `temporal_len`: same topoligical distance as max_temporal_len above. Here we set one fixed distance for each test run (when the specific run is used as the teach/map run). 160 | 161 | #### Train 162 | 163 | In `config/train.json`, we find the following configurations: 164 | 165 | `dataset_name`: name of the dataset to use. \ 166 | `checkpoint_name`: name for the checkpoint to store the model, alternatively load and resume training an exsiting model. \ 167 | `training`: \ 168 |     `start_pose_estimation`: we can train initially using only the keypoint loss. This says which epoch to start pose estimation and using the pose loss. \ 169 |     `max_epochs`: stop training after this many epochs. \ 170 |     `patience`: how many epochs to run with validation loss not improving before stopping. \ 171 | `network`: \ 172 |     `num_channels`: number of input channels, 3 for one RGB image. \ 173 |     `num_classes`: number of classes for the output of the decoders, should be 1. \ 174 |     `layer_size`: the size of the first layer of the encoder. Subsequent layer sizes are determined automatically. \ 175 | `pipeline`: height and with of the windows in which we detect a keypoint and a parameter for whether to do dense or sparse matching of descriptors. \ 176 | `outlier_rejection`: \ 177 |     `type`: outlier rejection is done with ground truth poses during training. \ 178 |     `dim`: doing outlier rejection for 2D points, 3D points, or in the plane (only considering x, y, heading). \ 179 |     `inlier_threshold`: ignored, only needed for RANSAC during testing. \ 180 |     `error_tolerance`: error threshold to be considered an inlier. \ 181 |     `num_iterations`: ignored, only needed for RANSAC during testing. \ 182 | `data_loader`: condifgurations for the data loader. \ 183 | `stereo`: camera calibration parameters. \ 184 | `dataset`: parameters for the dataset such as image heigh, width and whether to normalize the images and include disparity. \ 185 | `optimizer`: configurations for the optimizer. \ 186 | `scheduler`: configurations for the scheduler. \ 187 | `loss`: \ 188 |     `types`: the loss types to use (`pose`, `pose_plane` (only SE(2)), `keypoint_2D` (error between 2D image coordinates), `keypoint_3D` error between 3D points), `keypoint_plane` (only SE(2)). \ 189 |     `weights`: weights for the different types of losses. 190 | 191 | #### Test 192 | 193 | In `config/test.json`, we find the following configurations: 194 | 195 | `dataset_name`: name of the dataset to use. \ 196 | `checkpoint_name`: name for the model checkpoint to load. \ 197 | `network`: \ 198 |     `num_channels`: number of input channels, 3 for one RGB image. \ 199 |     `num_classes`: number of classes for the output of the decoders, should be 1. \ 200 |     `layer_size`: the size of the first layer of the encoder. Subsequent layer sizes are determined automatically. \ 201 | `pipeline`: height and with of the windows in which we detect a keypoint and a parameter for whether to do dense or sparse matching of descriptors. \ 202 | `outlier_rejection`: \ 203 |     `type`: outlier rejection is done with RANSAC during testing. \ 204 |     `dim`: doing outlier rejection for 2D points or 3D points. \ 205 |     `inlier_threshold`: minimum ratio of inliers rewuired to stop RANSAC early. \ 206 |     `error_tolerance`: error threshold to be considered an inlier. \ 207 |     `num_iterations`: number of iterations to run RANSAC. \ 208 | `data_loader`: condifgurations for the data loader. \ 209 | `stereo`: camera calibration parameters. \ 210 | `dataset`: parameters for the dataset such as image heigh, width and whether to normalize the images and include disparity. \ 211 | `optimizer`: configurations for the optimizer. \ 212 | `scheduler`: configurations for the scheduler. \ 213 | `loss`: \ 214 |     `types`: we include the pose loss type during testing so we know if we are estimation the full SE(3) pose (`pose`) or only SE(2) (`pose_plane`). 215 | 216 | ## Experimental Results 217 | 218 | We use the learned features in the VT&R system such that we can test them in closed-loop operation on a robot outdoors. The code for VT&R is available open-source [here](https://utiasasrl.github.io/). The experiments are described in detail in our paper. To summarize, we conducted two experiments. In the lighting-change experiment we trained the network using the UTIAS In-The-Dark dataset from 2016 and taught a new path in the same location around noon in August 2021. We repeated the path across a full range of lighting change from 3 a.m. until 10.30 p.m. with a 100% autonomy rate. In the figures below, we show example images from different conditons and a box plot of the inlier feature matches for each run. 219 | 220 | ![](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/conditions_lighting_change.png?raw=true) 221 | ![](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/box_plot_lighting_change.png?raw=true) 222 | 223 | In the generalization experiment we trained the network using the UTIAS In-The-Dark dataset from 2016 and the UTIAS Multiseason datasets from 2017 and taught a new path in August 2021. In this path we included new areas that have not been seen in he training data to test whether the features can generalize outside of the training paths. We taught the path around 1:30 p.m. and repeated it across a full range of lighting change from 4 a.m. until 9 p.m. with a 100% autonomy rate. In the figures below, we show example images from different conditons and a box plot of the inlier feature matches for each run. 224 | 225 | ![](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/conditions_generalization.png?raw=true) 226 | ![](https://github.com/utiasASRL/deep_learned_visual_features/blob/main/figures/box_plot_generalization.png?raw=true) 227 | 228 | 229 | 230 | -------------------------------------------------------------------------------- /config/data.json: -------------------------------------------------------------------------------- 1 | { 2 | "home_path": "/home/", 3 | 4 | "dataset_name": "multiseason", 5 | "num_train_samples": 100000, 6 | "num_validation_samples": 20000, 7 | "max_temporal_len": 3, 8 | 9 | "dataset":{ 10 | "height": 384, 11 | "width": 512 12 | }, 13 | 14 | "train_paths": ["multiseason"], 15 | "test_paths": ["multiseason"], 16 | 17 | "sampling_ratios_train": { 18 | "multiseason": 1.0, 19 | "inthedark": 0.0 20 | }, 21 | 22 | "sampling_ratios_valid": { 23 | "multiseason": 1.0, 24 | "inthedark": 0.0 25 | }, 26 | 27 | "train_runs": { 28 | "multiseason": [1, 2, 12, 14, 15, 16, 18, 22, 24, 26, 4, 7, 9, 10, 11, 75, 77, 101, 102, 28, 30, 32, 78, 80, 34, 35, 36, 37, 42, 44, 45, 47, 48, 49, 50, 51, 53, 54, 55, 56, 59, 60, 61, 62, 63, 65, 67, 68, 69, 70, 71, 72, 73, 74, 81, 83, 84, 87, 89, 93, 94, 95, 96, 97, 98, 104, 106, 107, 108, 110, 111, 113, 115, 116, 118, 119, 121, 123, 124, 126, 128, 129, 130, 131, 133, 134], 29 | "inthedark": [1, 5, 6, 8, 10, 13, 14, 18, 20, 21, 22, 24, 26, 27, 29, 30, 31, 33, 34, 36, 38] 30 | }, 31 | 32 | "validation_runs": { 33 | "multiseason": [13, 17, 23, 8, 100, 27, 33, 46, 52, 58, 66, 82, 85, 88, 90, 99, 103, 109, 112, 117, 120, 122, 127, 132], 34 | "inthedark": [3, 7, 9, 12, 15, 19, 25, 32, 37] 35 | }, 36 | 37 | "test_runs": { 38 | "multiseason": [3, 5, 29, 43, 64, 105, 125], 39 | "inthedark": [2, 11, 16, 17, 23, 28, 35] 40 | }, 41 | 42 | "temporal_len": { 43 | "multiseason": {"3":1, "5":1, "29":1, "43":1, "64":1, "105":1, "125":1}, 44 | "inthedark": {"2":1, "11":1, "16": 1, "17": 1, "23":1, "28":1, "35":1} 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /config/test.json: -------------------------------------------------------------------------------- 1 | { 2 | "home_path": "/home/", 3 | 4 | "experiment_name": "test_model", 5 | "dataset_name": "dataset_multiseason", 6 | "checkpoint_name": "network_multiseason_layer16", 7 | 8 | "network": { 9 | "num_channels": 3, 10 | "num_classes": 1, 11 | "layer_size": 16 12 | }, 13 | 14 | "pipeline": { 15 | "window_h": 16, 16 | "window_w": 16, 17 | "dense_matching": true 18 | }, 19 | 20 | "outlier_rejection": { 21 | "on": true, 22 | "type": "ransac", 23 | "dim": ["3D"], 24 | "inlier_threshold": 0.6, 25 | "error_tolerance": {"3D": 4.0, "2D": 50.0}, 26 | "num_iterations": 15 27 | }, 28 | 29 | "data_loader": { 30 | "batch_size": 3, 31 | "num_workers": 3, 32 | "shuffle": false 33 | }, 34 | 35 | "stereo": { 36 | "cu": 253.502, 37 | "cv": 196.822, 38 | "f": 388.425, 39 | "b": 0.239946 40 | }, 41 | 42 | "dataset": { 43 | "height": 384, 44 | "width": 512, 45 | "use_normalization": false, 46 | "use_disparity": true 47 | }, 48 | 49 | "loss": { 50 | "types": [ 51 | "pose_plane" 52 | ] 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /config/train.json: -------------------------------------------------------------------------------- 1 | { 2 | "home_path": "/home/", 3 | 4 | "experiment_name": "train_model", 5 | "dataset_name": "dataset_multiseason", 6 | "checkpoint_name": "network_v1", 7 | 8 | "training": { 9 | "start_pose_estimation": 4, 10 | "max_epochs": 500, 11 | "patience": 50 12 | }, 13 | 14 | "network": { 15 | "num_channels": 3, 16 | "num_classes": 1, 17 | "layer_size": 16 18 | }, 19 | 20 | "pipeline": { 21 | "window_h": 16, 22 | "window_w": 16, 23 | "dense_matching": true 24 | }, 25 | 26 | "outlier_rejection": { 27 | "on": true, 28 | "type": "ground_truth", 29 | "dim": ["plane"], 30 | "inlier_threshold": 0.6, 31 | "error_tolerance": {"3D": 4.0, "2D": 50.0, "plane": 3.0}, 32 | "num_iterations": 15 33 | }, 34 | 35 | "data_loader": { 36 | "batch_size": 3, 37 | "num_workers": 3, 38 | "shuffle": false 39 | }, 40 | 41 | "stereo": { 42 | "cu": 253.502, 43 | "cv": 196.822, 44 | "f": 388.425, 45 | "b": 0.239946 46 | }, 47 | 48 | "dataset": { 49 | "height": 384, 50 | "width": 512, 51 | "use_normalization": false, 52 | "use_disparity": true 53 | }, 54 | 55 | "optimizer": { 56 | "type": "Adam", 57 | "lr": 0.0001 58 | }, 59 | 60 | "scheduler": { 61 | "type": "StepLR", 62 | "step_size": 50, 63 | "gamma": 1.0 64 | }, 65 | 66 | "loss": { 67 | "types": [ 68 | "pose_plane", 69 | "keypoint_plane" 70 | ], 71 | "weights": { 72 | "translation": 1.0, 73 | "rotation": 10.0, 74 | "translation_x": 1.0, 75 | "translation_y": 50.0, 76 | "rotation_heading": 10.0, 77 | "keypoint_2D": 0.01, 78 | "keypoint_3D": 1.0, 79 | "keypoint_plane": 2.5 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/data/__init__.py -------------------------------------------------------------------------------- /data/build_datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | from torch.utils import data 5 | import numpy as np 6 | 7 | from data.pose_graph_tools import Graph 8 | from data.sample_data import random_sample, sequential_sample 9 | 10 | def build_sub_graph(runs, data_dir): 11 | """ 12 | Build a pose graph from the stored data files. We build the pose graph using the tools provided in 13 | https://github.com/utiasASRL/vtr-dataset-tools. We use the data provided by the UTIAS Long-Term Localization 14 | and Mapping Dataset that can be found at http://asrl.utias.utoronto.ca/datasets/2020-vtr-dataset/index.html. 15 | 16 | Args: 17 | runs (list[int]): the ids of the runs from the dataset that we want to include in the pose graph. 18 | data_dir (string): the top-level directory that holds the data for the path for which we build a pose graph. 19 | 20 | Returns: 21 | graph (Graph): a pose graph built from the runs that was provided. 22 | """ 23 | teach_file = f'{data_dir}/run_000000/transforms_temporal.txt' 24 | repeat_files = [] 25 | 26 | for run in runs: 27 | if run != 0: 28 | run_str = str(run).zfill(6) 29 | run_file = f'{data_dir}/run_{run_str}/transforms_spatial.txt' 30 | if os.path.isfile(run_file): 31 | repeat_files.append(run_file) 32 | else: 33 | print(f'Building pose graph, run_file does not exist: {run_file}') 34 | exit(1) 35 | 36 | return Graph(teach_file, repeat_files) 37 | 38 | def build_random_loc_dataset(data_path, path_names, runs, num_samples, temporal_length): 39 | """ 40 | Build a dataset that localizes vertices of one run in the pose graph to another. We sample vertex pairs 41 | randomly from the pose graph. 42 | 43 | Record the pose transforms as 4x4 matrices and the 6 DOF vector equivalents. The pose from vertex, v1, to 44 | vertex, v2, is given as T_v2_v1. Create sample ids from the vertex ids. A vertex id consists of the id of the 45 | run the vertex belongs to and the id of the pose along that run, i.e. vertex_id = (run_id, pose_id). The sample 46 | id corresponding to pose transform T_v2_v1 is on the form pathname_runid1_poseid1_runid2_poseid2, for 47 | instance: mutliseason_1_531_5_542. 48 | 49 | Args: 50 | data_path (string): path to where the different runs of data are stored. 51 | path_names (list[string]): the paths we will use for sampling. One pose graph is created for each path. 52 | runs (dict): map from the path names to a list of the runs to use for localization pose sampling for the 53 | given path. 54 | num_samples (dict): map from the path names to the number of samples to generate for the given paths. 55 | temporal_length (int): we can 'walk along' the pose graph to pair vertices that har further apart (i.e. 56 | not the closest pair). We set a fixed topological distance/steps we move away 57 | from the start vertex. 58 | 59 | Returns: 60 | samples (list[string]): list of all the sample ids. 61 | labels_se3 (dict): dictionary mapping sample id to pose transform 4x4 matrix provided as a torch.Tensor. 62 | labels_log (dict): dictionary mapping sample id to pose transform 6 DOF vector provided as a torch.Tensor. 63 | """ 64 | all_ids = [] 65 | labels_se3 = {} 66 | labels_log = {} 67 | 68 | for path_name in path_names: 69 | 70 | num_samples_path = num_samples[path_name] 71 | num_runs = len(runs[path_name]) 72 | 73 | print(f'\nRandom dataset from path: {path_name}') 74 | print(f'Sample from runs: {runs[path_name]}') 75 | print(f'Collect {num_samples_path} samples \n') 76 | 77 | data_dir = f'{data_path}/{path_name}' 78 | pose_graph = build_sub_graph(runs[path_name], data_dir) 79 | 80 | path_ids, path_labels_se3, path_labels_log = random_sample(path_name, pose_graph, runs[path_name], 81 | num_samples_path, temporal_length) 82 | 83 | all_ids = all_ids + path_ids 84 | labels_se3 = {**labels_se3, **path_labels_se3} 85 | labels_log = {**labels_log, **path_labels_log} 86 | 87 | print(f'\nRandom dataset total samples: {len(all_ids)}\n') 88 | 89 | return all_ids, labels_se3, labels_log 90 | 91 | def build_sequential_loc_dataset(data_path, path_name, map_run_id, live_run_ids, temporal_length): 92 | """ 93 | Build a dataset that localizes all the vertices of one or more runs in the pose graph to the vertices on one 94 | map (or teach) run. I.e. we localize one or more live (or repeat) runs to one run that we choose as the map 95 | run. We get relative pose transforms for each localized vertex in the order that the vertices were created when 96 | driving the robot during data collection. 97 | 98 | Record the pose transforms as 4x4 matrices and the 6 DOF vector equivalents. The pose from vertex, v1, to 99 | vertex, v2, is given as T_v2_v1. Create sample ids from the vertex ids. A vertex id consists of the id of the 100 | run the vertex belongs to and the id of the pose along that run, i.e. vertex_id = (run_id, pose_id). The sample 101 | id corresponding to pose transform T_v2_v1 is on the form pathname_runid1_poseid1_runid2_poseid2, for 102 | instance: mutliseason_1_531_5_542. 103 | 104 | Args: 105 | data_path (string): path to where the different runs of data are stored. 106 | path_name (string): name given to the path that the pose graph represents. 107 | map_run_id (int): id of the run to localize to, i.e. compute the relative pose to vertices on this run. 108 | live_run_ids (list[int]): the runs we localize to the map run, i.e. compute relative pose from vertices on 109 | these runs. 110 | temporal_length (int): we can 'walk along' the pose graph to pair vertices that har further apart (i.e. 111 | not the closest pair). We set a fixed topological distance/steps we move away 112 | from the start vertex. 113 | 114 | Returns: 115 | samples (list[string]): list of all the sample ids. 116 | labels_se3 (dict): dictionary mapping sample id to pose transform 4x4 matrix provided as a torch.Tensor. 117 | labels_log (dict): dictionary mapping sample id to pose transform 6 DOF vector provided as a torch.Tensor. 118 | """ 119 | 120 | print(f'\nSequential dataset from path: {path_name}') 121 | print(f'Map (teach) run: {map_run_id}') 122 | print(f'Live (repeat) runs to localize: {live_run_ids} \n') 123 | 124 | data_dir = f'{data_path}/{path_name}' 125 | pose_graph = build_sub_graph([map_run_id] + live_run_ids, data_dir) 126 | 127 | return sequential_sample(path_name, pose_graph, map_run_id, live_run_ids, temporal_length) 128 | 129 | -------------------------------------------------------------------------------- /data/build_train_test_dataset_loc.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import math 4 | import os 5 | import pickle 6 | import random 7 | import sys 8 | import time 9 | 10 | import numpy as np 11 | import torch 12 | import torch.backends.cudnn as cudnn 13 | from torch.utils import data 14 | 15 | from data.build_datasets import build_random_loc_dataset, build_sequential_loc_dataset 16 | from src.utils.utils import MELData, compute_mean_std 17 | 18 | torch.backends.cudnn.benchmark = True 19 | 20 | def main(config): 21 | 22 | # Directory to store outputs to stdout/stderr. 23 | results_path = f"{config['home_path']}/results/dataset/{config['dataset_name']}/" 24 | # Directory where the data is stored (images, transforms text files). 25 | data_path = f"{config['home_path']}/data" 26 | # File to store the generated training dataset. 27 | dataset_path = f"{config['home_path']}/datasets/{config['dataset_name']}.pickle" 28 | 29 | if not os.path.exists(results_path): 30 | os.makedirs(results_path) 31 | 32 | # Print outputs to files 33 | orig_stdout = sys.stdout 34 | fl = open(f'{results_path}out_data.txt', 'w') 35 | sys.stdout = fl 36 | orig_stderr = sys.stderr 37 | fe = open(f'{results_path}err_data.txt', 'w') 38 | sys.stderr = fe 39 | 40 | # Record the parameter configurations. 41 | print(config) 42 | 43 | # Set up device, using GPU 0 44 | device = torch.device('cuda:{}'.format(0) if torch.cuda.device_count() > 0 else 'cpu') 45 | torch.cuda.set_device(0) 46 | 47 | start = time.time() 48 | 49 | num_samples_train = {} 50 | for path_name in config['train_paths']: 51 | num_samples_train[path_name] = math.floor(config['sampling_ratios_train'][path_name] * config['num_train_samples']) 52 | 53 | num_samples_valid = {} 54 | for path_name in config['test_paths']: 55 | num_samples_valid[path_name] = math.floor(config['sampling_ratios_valid'][path_name] * config['num_validation_samples']) 56 | 57 | # Training and validation data 58 | train_ids, train_labels_se3, train_labels_log = build_random_loc_dataset(data_path, 59 | config['train_paths'], 60 | config['train_runs'], 61 | num_samples_train, 62 | config['max_temporal_len']) 63 | 64 | valid_ids, valid_labels_se3, valid_labels_log = build_random_loc_dataset(data_path, 65 | config['test_paths'], 66 | config['validation_runs'], 67 | num_samples_valid, 68 | config['max_temporal_len']) 69 | 70 | # Class for storing the data before we write it to a pickle file. 71 | mel_data = MELData() 72 | 73 | mel_data.train_ids = train_ids 74 | mel_data.train_labels_se3 = train_labels_se3 75 | mel_data.train_labels_log = train_labels_log 76 | 77 | mel_data.valid_ids = valid_ids 78 | mel_data.valid_labels_se3 = valid_labels_se3 79 | mel_data.valid_labels_log = valid_labels_log 80 | 81 | with open(dataset_path, 'wb') as f: 82 | pickle.dump(mel_data, f, pickle.HIGHEST_PROTOCOL) 83 | 84 | print(f'\nTraining data size: {len(train_ids)}') 85 | print(f'Validation data size: {len(valid_ids)}\n') 86 | print('Saved training and validation dataset', flush=True) 87 | print(f'Generating training and validation datasets took {time.time() - start} s\n') 88 | 89 | # Compute mean and std_dev for the training set to use for input normalization if desirable. 90 | mean, std_dev = compute_mean_std(mel_data, data_path, config['dataset']['height'], config['dataset']['width']) 91 | mel_data.mean = mean 92 | mel_data.std_dev = std_dev 93 | 94 | # Generate sequential datasets that can be used for testing. 95 | mel_data.paths = config['test_paths'] 96 | for path_name in config['test_paths']: 97 | 98 | test_runs = config['test_runs'][path_name] 99 | 100 | # We localize different runs of a path against each other and so store the resulting data in lists. 101 | mel_data.test_ids[path_name] = [] 102 | mel_data.test_labels_se3[path_name] = [] 103 | mel_data.test_labels_log[path_name] = [] 104 | 105 | # Localize each of the runs to each other (but not both ways or to themselves). 106 | for i in range(len(test_runs) - 1): 107 | 108 | teach_run = test_runs[i] 109 | repeat_runs = test_runs[(i + 1):] 110 | temporal_len = config['temporal_len'][path_name][str(teach_run)] 111 | test_ids, test_labels_se3, test_labels_log = build_sequential_loc_dataset(data_path, 112 | path_name, 113 | teach_run, 114 | repeat_runs, 115 | temporal_len) 116 | 117 | mel_data.test_ids[path_name] = mel_data.test_ids[path_name] + [test_ids] 118 | mel_data.test_labels_se3[path_name] = mel_data.test_labels_se3[path_name] + [test_labels_se3] 119 | mel_data.test_labels_log[path_name] = mel_data.test_labels_log[path_name] + [test_labels_log] 120 | 121 | with open(dataset_path, 'wb') as f: 122 | pickle.dump(mel_data, f, pickle.HIGHEST_PROTOCOL) 123 | 124 | print('\nSaved test dataset', flush=True) 125 | print(f'Generating full training, validation, and test dataset took {time.time() - start} s') 126 | 127 | # Stop writing outputs to files. 128 | sys.stdout = orig_stdout 129 | fl.close() 130 | sys.stderr = orig_stderr 131 | fe.close() 132 | 133 | if __name__ == '__main__': 134 | parser = argparse.ArgumentParser() 135 | parser.add_argument('--config', default=None, type=str, 136 | help='config file path (default: None)') 137 | 138 | args = parser.parse_args() 139 | 140 | with open(args.config) as f: 141 | config = json.load(f) 142 | 143 | main(config) 144 | -------------------------------------------------------------------------------- /data/pose_graph_tools.py: -------------------------------------------------------------------------------- 1 | """Copied from vtr-dataset-tools: https://github.com/utiasASRL/vtr-dataset-tools/blob/master/tools.py""" 2 | 3 | import copy 4 | import numpy as np 5 | 6 | from src.utils.transform import Transform 7 | 8 | class Vertex: 9 | """ 10 | The Vertex object represents a vertex that is part of a pose graph. 11 | Args: 12 | run_id (int): Id of the run the vertex belongs to. 13 | pose_id (int): Id of the pose for the vertex. 14 | next_run_id (int): Id of the run of the vertex that this vertex is connected to, 15 | either spatially (from repeat to teach) or temporally (one teach vertex to another). 16 | next_pose_id (int): Id of the pose of the vertex that this vertex is connected to, 17 | either spatially (from repeat to teach) or temporally (one teach vertex to another). 18 | next_transform (Transform): Transform from this to the connected vertex. 19 | teach (bool): True if vertex belongs to a teach run, False if repeat run. 20 | prev_run_id (int): Id of the previous consecutive run (optional). 21 | prev_pose_id (int): Id of the previous consecutive pose (optional). 22 | timestamp (float): timestamp in seconds of the image associated with the vertex (optional). 23 | gps_time (float): timestamp in seconds of the GPS measurement associated with the vertex (optional). 24 | latitude (float): GPS measurement associated with the vertex (optional). 25 | longitude (float): GPS measurement associated with the vertex (optional). 26 | altitude (float): GPS measurement associated with the vertex (optional). 27 | """ 28 | def __init__(self, run_id, pose_id, next_run_id, next_pose_id, next_transform, teach=False, prev_run_id=-1, 29 | prev_pose_id=-1, timestamp=None, gps_time=None, latitude=None, longitude=None, altitude=None): 30 | self.vertex_id = (int(run_id), int(pose_id)) 31 | self.next_id = (int(next_run_id), int(next_pose_id)) 32 | self.next_transform = next_transform 33 | self.prev_id = (int(prev_run_id), int(prev_pose_id)) 34 | self.teach = teach 35 | self.timestamp = timestamp 36 | self.gps_time = gps_time 37 | self.latitude = latitude 38 | self.longitude = longitude 39 | self.altitude = altitude 40 | 41 | 42 | class Graph: 43 | ''' 44 | The Graph object represents a pose graph with a teach run and several repeat runs. Each run has 45 | several sequentially connected vertices. Vertices can also be connected between different runs. 46 | Args: 47 | teach_file (str): Path to file containing temporal transforms for teach run. 48 | repeat_files (list of str): Paths to files containing spatial transformations for repeat runs. 49 | im_time_files (list of str): Paths to files containing image time stamps for runs. 50 | gps_files (list of str): Paths to files contaning gps data for runs. 51 | gps_time_files (list of str): Paths to files containing gps time stamps for runs. 52 | ''' 53 | def __init__(self, teach_file, repeat_files=None, im_time_files=None, gps_files=None, gps_time_files=None): 54 | if repeat_files is None: 55 | repeat_files = [] 56 | if im_time_files is None: 57 | im_time_files = {} 58 | if gps_files is None: 59 | gps_files = {} 60 | if gps_time_files is None: 61 | gps_time_files = {} 62 | 63 | # Read teach run transforms from file 64 | transforms_temporal = np.loadtxt(teach_file, delimiter=",") 65 | self.teach_id = int(transforms_temporal[0, 0]) 66 | self.vertices = {} 67 | self.matches = {} 68 | 69 | # Add teach run vertices 70 | for row in transforms_temporal: 71 | transform = Transform(np.array([row[4:7], row[8:11], row[12:15]]), np.array([row[7], row[11], row[15]])) 72 | self.add_vertex(row[0], row[1], row[2], row[3], transform, True) 73 | 74 | self.add_prev_ids() 75 | 76 | # If repeat run transforms, add those to graph 77 | for run_file in repeat_files: 78 | self.add_run(run_file) 79 | 80 | self.add_timestamps(im_time_files) 81 | self.add_gps(gps_files, gps_time_files) 82 | 83 | def get_vertex(self, vertex_id): 84 | """Returns a vertex in the graph. 85 | Args: 86 | vertex_id (tuple of ints): Id of the graph vertex object. The id is a tuple containing 87 | the run id and the the pose id. 88 | Returns: 89 | Vertex: The vertex object, None if the vertex does not exist. 90 | """ 91 | return self.vertices.get(vertex_id) 92 | 93 | def get_all_vertices(self): 94 | 95 | return self.vertices 96 | 97 | def is_vertex(self, vertex_id): 98 | """Returns whether a vertex object corresponding to the given vertex id exists in the graph. 99 | Args: 100 | vertex_id (tuple of ints): Id of the graph vertex object. The id is a tuple containing 101 | the run id and the the pose id. 102 | Returns: 103 | bool: True if vertex exists in graph, False otherwise. 104 | """ 105 | if vertex_id in self.vertices: 106 | return True 107 | else: 108 | return False 109 | 110 | def add_vertex(self, run_id, pose_id, next_run_id, next_pose_id, next_transform, teach=False): 111 | """Adds a new vertex to the graph. 112 | Args: 113 | run_id (int): Id of the run the vertex belongs to. 114 | pose_id (int): Id of the pose for the vertex. 115 | next_run_id (int): Id of the run of the vertex that this vertex is connected to, 116 | either spatially (from repeat to teach) or temporally (one teach vertex to another). 117 | next_pose_id (int): Id of the pose of the vertex that this vertex is connected to, 118 | either spatially (from repeat to teach) or temporally (one teach vertex to another). 119 | next_transform (Transform): Transform from this to the connected vertex. 120 | """ 121 | v = Vertex(run_id, pose_id, next_run_id, next_pose_id, next_transform, teach) 122 | 123 | # Check if vertex already exists 124 | if self.is_vertex(v.vertex_id): 125 | print("Vertex {0} already exists in graph".format(v.vertex_id)) 126 | return 127 | 128 | # If teach vertex, create an empty list of repeat vertices that have matched, 129 | # otherwise check that teach vertex exists and add this vertex to its list of matches 130 | if teach: 131 | self.matches[v.vertex_id] = [] 132 | self.vertices[v.vertex_id] = v 133 | elif v.next_id in self.vertices: 134 | self.matches[v.next_id].append(v) 135 | self.vertices[v.vertex_id] = v 136 | else: 137 | print("Warning: teach vertex {0} not found in graph so vertex {1} was not added.".format(v.next_id, v.vertex_id)) 138 | 139 | def add_prev_ids(self): 140 | """Iterates over all the teach vertices and adds the index of the previous vertex as an 141 | attribute to each one. 142 | """ 143 | for v_id in self.vertices: 144 | v = self.get_vertex(v_id) 145 | if v.teach and v.next_id in self.vertices: 146 | self.vertices[v.next_id].prev_id = v_id 147 | 148 | def add_run(self, run_file): 149 | """Add a new repeat run to the graph. 150 | Args: 151 | run_file (str): Path to file containing spatial transformations for the repeat run. 152 | 153 | """ 154 | transforms_spatial = np.loadtxt(run_file, delimiter=",") 155 | for row in transforms_spatial: 156 | transform = Transform(np.array([row[4:7], row[8:11], row[12:15]]), np.array([row[7], row[11], row[15]])) 157 | self.add_vertex(row[0], row[1], row[2], row[3], transform, False) 158 | 159 | def add_timestamps(self, im_time_files): 160 | """Add image timestamps to vertices. 161 | Args: 162 | im_time_files (list of str): Paths to files containing image time stamps for runs. 163 | """ 164 | for run in im_time_files: 165 | run_times = np.loadtxt(im_time_files[run], delimiter=",") 166 | for row in run_times: 167 | if (run, int(row[0])) in self.vertices: 168 | self.vertices[(run, row[0])].timestamp = float(row[1]) * 10**-9 169 | else: 170 | print("Warning: attempted to add timestamp for vertex ({0}, {1}) not in graph.".format(run, int(row[0]))) 171 | 172 | def add_gps(self, gps_files, gps_time_files): 173 | """Add GPS measurements and the associated timestamps to vertices. 174 | Args: 175 | gps_files (list of str): Paths to files contaning gps data for runs. 176 | gps_time_files (list of str): Paths to files containing gps time stamps for runs. 177 | """ 178 | for run in gps_files: 179 | run_times = np.loadtxt(gps_files[run], delimiter=",") 180 | for row in run_times: 181 | if (run, row[0]) in self.vertices: 182 | self.vertices[(run, row[1])].latitude = row[2] 183 | self.vertices[(run, row[1])].longitude = row[3] 184 | self.vertices[(run, row[1])].altitude = row[4] 185 | else: 186 | print("Warning: attempted to add GPS data for vertex ({0}, {1}) not in graph.".format(run, int(row[1]))) 187 | 188 | for run in gps_time_files: 189 | run_times = np.loadtxt(gps_time_files[run], delimiter=",") 190 | for row in run_times: 191 | if (run, int(row[0])) in self.vertices: 192 | self.vertices[(run, row[0])].gps_time = float(row[1]) * 10**-9 193 | 194 | def get_subgraph(self, start, end): 195 | """Returns a subgraph made from the teach vertices between (teach_id, start) and ( 196 | teach_id, end), where teach_id refers to the run id of the teach run. 197 | Args: 198 | start (int): pose id of the vertex that starts the subgraph. 199 | end (int): pose id of the vertex that ends the subgraph. 200 | Returns: 201 | Graph: subgraph made from teach vertices between start and end. Original graph if pose 202 | ids do not correspond to existing vertices. 203 | """ 204 | if self.is_vertex((self.teach_id, start)) and self.is_vertex((self.teach_id, end)): 205 | subgraph = copy.copy(self) 206 | subgraph.matches = {id: self.matches[id] for id in self.matches if start <= id[1] <= end} 207 | 208 | subgraph.vertices = {} 209 | for m in subgraph.matches: 210 | subgraph.vertices.update({m: self.get_vertex(m)}) 211 | subgraph.vertices.update({repeat_vertex.vertex_id: repeat_vertex for repeat_vertex in subgraph.matches[m]}) 212 | 213 | return subgraph 214 | else: 215 | print("Warning: invalid vertex chosen for subgraph, returning original graph.") 216 | return self 217 | 218 | def get_topological_dist(self, vertex_id1, vertex_id2): 219 | """Returns number of edges between two vertices in the graph. 220 | Args: 221 | vertex_id1 (tuple of ints): Id of the first vertex object. The id is a tuple containing 222 | the run id and the the pose id. 223 | vertex_id2 (tuple of ints): Id of the second vertex object. The id is a tuple containing 224 | the run id and the the pose id. 225 | Returns: 226 | int: The number of edges between the two vertices, -1 if no path exists between them. 227 | """ 228 | path, _ = self.get_path(vertex_id1, vertex_id2) 229 | return len(path) - 1 230 | 231 | def get_transform(self, vertex_id1, vertex_id2): 232 | """Returns the transform, T_21, from vertex 1 to vertex 2 in the graph. 233 | Args: 234 | vertex_id1 (tuple of ints): Id of the first vertex object. The id is a tuple 235 | containing the run id and the the pose id. 236 | vertex_id2 (tuple of ints): Id of the second vertex object. The id is a tuple 237 | containing the run id and the the pose id. 238 | Returns: 239 | Transform: The transform, T_21, from vertex 1 to vertex 2. Identity transform if no 240 | path exists between the vertices. 241 | """ 242 | transform = Transform(np.eye(3), np.zeros((3,))) 243 | 244 | path, forward = self.get_path(vertex_id1, vertex_id2) 245 | 246 | if len(path) == 0: 247 | print("Warning: no path found between vertex {0} and vertex {1}, returning identity transform.".format(vertex_id1, vertex_id2)) 248 | return transform 249 | if len(path) == 1: 250 | return transform 251 | 252 | # Always calculate transform in forward direction then flip if backward 253 | if not forward: 254 | path.reverse() 255 | 256 | # Compose transforms 257 | for vertex in path[:-2]: 258 | transform = self.get_vertex(vertex).next_transform * transform 259 | 260 | # Check if last vertex in path is a teach vertex 261 | if self.get_vertex(path[-2]).next_id == path[-1]: 262 | transform = self.get_vertex(path[-2]).next_transform * transform 263 | else: 264 | transform = self.get_vertex(path[-1]).next_transform.inv() * transform 265 | 266 | if forward: 267 | return transform 268 | else: 269 | return transform.inv() 270 | 271 | def get_path(self, vertex_id1, vertex_id2): 272 | """Returns a list of vertices connecting vertex 1 and vertex 2. 273 | Traverses graph both ways and returns shorter path (topologically). 274 | Args: 275 | vertex_id1 (tuple of ints): Id of the first vertex object. The id is a tuple containing 276 | the run id and the the pose id. 277 | vertex_id2 (tuple of ints): Id of the second vertex object. The id is a tuple containing 278 | the run id and the the pose id. 279 | Returns: 280 | list of tuples of ints: List of vertex ids of vertices connecting vertex 1 and vertex 2. 281 | List of length one if vertex 1 and 2 are the same. Empty list if no path is found 282 | or if at least one of the vertices does not exist in the graph. 283 | bool: True if path is in the forward direction, otherwise False. 284 | """ 285 | if self.get_vertex(vertex_id1) is None or self.get_vertex(vertex_id2) is None: 286 | print("Warning: no path found. At least one of the vertices are invalid.") 287 | return [], False 288 | 289 | # Set to false if can't find path between vertices 290 | forward_valid = True 291 | backward_valid = True 292 | 293 | forward_path = [vertex_id1] 294 | 295 | # Check if vertex are the same 296 | if vertex_id1 == vertex_id2: 297 | return forward_path, True 298 | 299 | # Follow chain of vertices from vertex 1 to vertex 2 300 | start = self.get_vertex(vertex_id1) 301 | goal = self.get_vertex(vertex_id2) 302 | if not goal.teach: 303 | goal = self.get_vertex(goal.next_id) 304 | while start != goal: 305 | start = self.get_vertex(start.next_id) 306 | if start is None: 307 | forward_valid = False 308 | break 309 | else: 310 | forward_path.append(start.vertex_id) 311 | # If vertex 2 is repeat vertex add edge from teach path to vertex 2 at end 312 | if forward_path[-1] != vertex_id2: 313 | forward_path.append(vertex_id2) 314 | 315 | # Check other way around loop 316 | backward_path = [vertex_id2] 317 | start = self.get_vertex(vertex_id2) 318 | goal = self.get_vertex(vertex_id1) 319 | if not goal.teach: 320 | goal = self.get_vertex(goal.next_id) 321 | while start != goal: 322 | start = self.get_vertex(start.next_id) 323 | if start is None: 324 | backward_valid = False 325 | break 326 | else: 327 | backward_path.append(start.vertex_id) 328 | # If vertex 1 is repeat vertex add edge from teach path to vertex 1 at end 329 | if backward_path[-1] != vertex_id1: 330 | backward_path.append(vertex_id1) 331 | 332 | # Return shorter of the two. Returns boolean to indicate if path is in the forward direction 333 | if len(forward_path) <= len(backward_path) or not backward_valid and forward_valid: 334 | return forward_path, True 335 | elif backward_valid: 336 | backward_path.reverse() 337 | return backward_path, False 338 | else: 339 | print("Warning: no path found. Problem with graph.") 340 | return [], False 341 | 342 | def get_topo_neighbours(self, vertex_id, radius): 343 | """Returns a set of topological neighbour vertices within radius number of edges of the 344 | given vertex. 345 | Args: 346 | vertex_id (tuple of ints): Id of the first vertex object. The id is a tuple containing 347 | the run id and the the pose id. 348 | radius (int): Distance in number of edges used as the search radius. 349 | Returns: 350 | set: Set of vertex ids for neighbour vertices. Set of size 1 if radius is 0. 351 | Empty set if radius is negative or if the given vertex does not exist in the graph. 352 | """ 353 | if not self.is_vertex(vertex_id): 354 | print("Warning: no topo neighbours found. Vertex {0} does not exist.".format(vertex_id)) 355 | return set() 356 | 357 | if radius < 0: 358 | raise ValueError("Negative radius specified.") 359 | 360 | neighbours = {vertex_id} 361 | if radius == 0: 362 | return neighbours 363 | 364 | search_radius = 0 365 | 366 | v = self.get_vertex(vertex_id) 367 | if not v.teach: 368 | neighbours.add(v.next_id) 369 | left_bound = v.next_id 370 | right_bound = v.next_id 371 | search_radius += 1 372 | else: 373 | left_bound = vertex_id 374 | right_bound = vertex_id 375 | 376 | while search_radius < radius: 377 | neighbours.update(m.vertex_id for m in self.matches[left_bound]) 378 | neighbours.update(m.vertex_id for m in self.matches[right_bound]) 379 | left_bound = self.get_vertex(left_bound).prev_id 380 | right_bound = self.get_vertex(right_bound).next_id 381 | neighbours.add(left_bound) 382 | neighbours.add(right_bound) 383 | search_radius += 1 384 | 385 | return neighbours 386 | 387 | def get_metric_neighbours(self, vertex_id, radius): 388 | """Returns a set of metric neighbour vertices within radius meters of the given vertex. 389 | Args: 390 | vertex_id (tuple of ints): Id of the first vertex object. The id is a tuple containing 391 | the run id and the the pose id. 392 | radius (float): Distance in meters used as the search radius. 393 | Returns: 394 | set: Set of vertex ids for neighbour vertices. Empty set if radius is negative 395 | or if the given vertex does not exist in the graph. 396 | """ 397 | if not self.is_vertex(vertex_id): 398 | print("Warning: no metric neighbours found. Vertex {0} does not exist.".format(vertex_id)) 399 | return set() 400 | 401 | if radius < 0: 402 | raise ValueError("Negative radius specified.") 403 | 404 | neighbours = {vertex_id} 405 | v = self.get_vertex(vertex_id) 406 | if not v.teach: 407 | teach_v = self.get_vertex(v.next_id) 408 | teach_T = v.next_transform 409 | else: 410 | teach_v = v 411 | teach_T = Transform(np.eye(3), np.zeros((3,))) 412 | 413 | # Search forward direction 414 | query_v = teach_v 415 | query_T = teach_T 416 | while True: 417 | # Test repeat vertices localized to current teach 418 | for m in self.matches[query_v.vertex_id]: 419 | T = m.next_transform.inv() * query_T 420 | if np.linalg.norm(T.r_ab_inb) < radius: 421 | neighbours.add(m.vertex_id) 422 | 423 | # Update current teach 424 | if np.linalg.norm(query_T.r_ab_inb) < radius: 425 | neighbours.add(query_v.vertex_id) 426 | query_T = query_v.next_transform * query_T 427 | query_v = self.get_vertex(query_v.next_id) 428 | else: 429 | break 430 | 431 | # Search backward direction 432 | query_v = teach_v 433 | query_T = teach_T 434 | while True: 435 | # Update current teach. Order flipped to avoid checking closest teach vertex twice 436 | if np.linalg.norm(query_T.r_ab_inb) < radius: 437 | neighbours.add(query_v.vertex_id) 438 | query_v = self.get_vertex(query_v.prev_id) 439 | query_T = query_v.next_transform.inv() * query_T 440 | else: 441 | break 442 | # Test repeat vertices localized to current teach 443 | for m in self.matches[query_v.vertex_id]: 444 | T = m.next_transform.inv() * query_T 445 | if np.linalg.norm(T.r_ab_inb) < radius: 446 | neighbours.add(m.vertex_id) 447 | 448 | return neighbours 449 | -------------------------------------------------------------------------------- /data/sample_data.py: -------------------------------------------------------------------------------- 1 | from operator import itemgetter 2 | import re 3 | import random 4 | 5 | import torch 6 | import numpy as np 7 | 8 | from src.utils.transform import Transform 9 | 10 | def random_sample(path_name, pose_graph, runs, num_samples, max_temporal_length): 11 | """ 12 | Sample relative pose transforms for localization randomly from the pose graph. Compute the pose between vertices 13 | from different experiences in the pose graph. Record the pose transform as a 4x4 matrix and the 6 DOF vector 14 | equivalent. The pose from vertex, v1, to vertex, v2, is given as T_v2_v1. 15 | 16 | Create sample ids the vertex ids. A vertex id consists of the id of the run the vertex belongs to and the id of 17 | the pose along that run, i.e. vertex_id = (run_id, pose_id). The sample id corresponding to pose transform 18 | T_v2_v1 is on the form pathname_runid1_poseid1_runid2_poseid2, for instance: mutliseason_1_531_5_542. 19 | 20 | Args: 21 | path_name (string): name given to the path that the pose graph represents. 22 | pose_graph (Graph): the pose graph. 23 | runs (list[int]): list of the run ids of runs to sample from. 24 | num_samples (int): the number of samples to collect. 25 | max_temporal_length (int): we can 'walk along' the pose graph to pair vertices that har further apart (i.e. 26 | not the closest pair). This is the max topological distance/steps we move away 27 | form the start vertex. 28 | 29 | Returns: 30 | samples (list[string]): list of all the sample ids. 31 | labels_se3 (dict): dictionary mapping sample id to pose transform 4x4 matrix provided as a torch.Tensor. 32 | labels_log (dict): dictionary mapping sample id to pose transform 6 DOF vector provided as a torch.Tensor. 33 | """ 34 | sample_counter = 0 35 | sample_counter_identity = 0 36 | samples = [] 37 | labels_se3 = {} 38 | labels_log = {} 39 | 40 | # The ids of all the vertices in the pose graph. 41 | vertex_ids = pose_graph.get_all_vertices().keys() 42 | 43 | while sample_counter < num_samples: 44 | 45 | # Randomly choose a live index, live_id is on the form (run_id, pose_id) 46 | live_id = random.sample(vertex_ids, 1)[0] 47 | live_vertex = pose_graph.get_vertex(live_id) 48 | 49 | if live_vertex is None: 50 | print('live vertex is None') 51 | continue 52 | 53 | # The chosen vertex is not in one of the runs we want to sample from. 54 | if live_id[0] not in runs: 55 | continue 56 | 57 | # Get all the neighbouring vertices (on all runs/experiences) withing the specified distance. 58 | try: 59 | neighbour_ids = pose_graph.get_topo_neighbours(live_id, max_temporal_length + 1) 60 | except Exception as e: 61 | print(e) 62 | print(f'Could not get topological neighbours for live_id: {live_id}') 63 | continue 64 | 65 | if len(neighbour_ids) == 0: 66 | print(f'{path_name} - Random sampling: Could not find neighbours for vertex {live_id} within topological' \ 67 | f' distance {max_temporal_length}') 68 | continue 69 | 70 | # Randomly pick a target vertex from the neighbours. 71 | other_id = random.sample(neighbour_ids, 1)[0] 72 | 73 | if other_id[0] not in runs: 74 | print(f'{path_name} - Random sampling: other vertex is in run {other_id[0]}, which is not in desired runs') 75 | continue 76 | 77 | if live_id[0] == other_id[0]: 78 | print(f'{path_name} - Random sampling: do not want to localize to vertex on the same run') 79 | continue 80 | 81 | other_vertex = pose_graph.get_vertex(other_id) 82 | 83 | if other_vertex is None: 84 | print('other vertex is None') 85 | continue 86 | 87 | # Create a sample ID and check that it has not already been added. 88 | sample_id = f'{path_name}-{live_id[0]}-{live_id[1]}-{other_id[0]}-{other_id[1]}' 89 | 90 | if not sample_id in samples: 91 | 92 | T_other_live = pose_graph.get_transform(live_id, other_id) 93 | log_other_live = Transform.LogMap(T_other_live) 94 | 95 | samples.append(sample_id) 96 | labels_se3[sample_id] = torch.tensor(T_other_live.matrix, dtype=torch.float) 97 | labels_log[sample_id] = torch.tensor(log_other_live, dtype=torch.float) 98 | 99 | sample_counter += 1 100 | 101 | else: 102 | 103 | print(f'{path_name} - Random sampling: sample {sample_id} has already been added', flush=True) 104 | print(f'Number of sampled poses so far: {sample_counter}') 105 | 106 | return samples, labels_se3, labels_log 107 | 108 | 109 | def sequential_sample(path_name, pose_graph, map_run_id, live_runs, temporal_length): 110 | """ 111 | Sample relative pose transforms for localization sequentially from the pose graph. Compute the pose from 112 | vertices from each of the live runs to one map run. Compute the pose for each vertex on the live runs 113 | sequentially. Record the pose transform as a 4x4 matrix and the 6 DOF vector equivalent. The pose from vertex, 114 | v1, to vertex, v2, is given as T_v2_v1. 115 | 116 | Create sample ids the vertex ids. A vertex id consists of the id of the run the vertex belongs to and the id of 117 | the pose along that run, i.e. vertex_id = (run_id, pose_id). The sample id corresponding to pose transform 118 | T_v2_v1 is on the form pathname_runid1_poseid1_runid2_poseid2, for instance: mutliseason_1_531_5_542. 119 | 120 | Args: 121 | path_name (string): name given to the path that the pose graph represents. 122 | pose_graph (Graph): the pose graph. 123 | map_run_id (int): id of the run to localize to, i.e. compute the relative pose to vertices on this run. 124 | live_runs (list[int]): the runs we localize to the map run, i.e. compute relative pose from vertices on 125 | these runs. 126 | temporal_length (int): we can 'walk along' the pose graph to pair vertices that har further apart (i.e. 127 | not the closest pair). We set a fixed topological distance/steps we move away 128 | from the start vertex. 129 | 130 | Returns: 131 | samples (list[string]): list of all the sample ids. 132 | labels_se3 (dict): dictionary mapping sample id to pose transform 4x4 matrix provided as a torch.Tensor. 133 | labels_log (dict): dictionary mapping sample id to pose transform 6 DOF vector provided as a torch.Tensor. 134 | """ 135 | samples = [] 136 | added_live = [] 137 | labels_se3 = {} 138 | labels_log = {} 139 | 140 | # Get the graph vertices in a sorted list by run_id and pose_id. The vertex ids are on the form (run_id, pose_id). 141 | # We retrieve all the vertices in the pose graph and filter out the correct ones later. 142 | vertex_ids = pose_graph.get_all_vertices().keys() 143 | vertex_ids = sorted(vertex_ids, key=itemgetter(0,1)) 144 | 145 | # Localize each vertex in the live run sequentially. 146 | for live_id in vertex_ids: 147 | 148 | live_vertex = pose_graph.get_vertex(live_id) 149 | 150 | if live_vertex is None: 151 | continue 152 | 153 | if (live_id[0] in live_runs) and (live_id[0] != map_run_id): 154 | 155 | # Vertices are connected via the teach run 0 in the pose graph, which adds one to the distance count between 156 | # them, i.e. if run5_pose50 is localized directly to run3_pose54, we get r5_p50 -> r0_p52 -> r3_p54 so a 157 | # radius of 2 instead of 1. 158 | radius = temporal_length + 1 159 | max_radius = 8 160 | map_pose_id = -1 161 | smallest_metric_dist = 1000 162 | chosen_topo_dist = 1000 163 | 164 | # Try to find a vertex on the map run to localize against. If we can find one at the specified topological 165 | # distance, then increase the search radius. 166 | while (map_pose_id == -1) and (radius <= max_radius): 167 | try: 168 | neighbour_ids = pose_graph.get_topo_neighbours(live_id, radius) 169 | except Exception as e: 170 | print(e) 171 | print(f'{path_name} - Sequential sampling: Could not localize {live_id} to map run, topological' \ 172 | f' neighbours failed') 173 | radius += 1 174 | continue 175 | 176 | # Find the closest vertex (on the map run) of the valid neighbours to localize to. 177 | for n_id in neighbour_ids: 178 | if n_id[0] == map_run_id: 179 | topo_dist = pose_graph.get_topological_dist(live_id, n_id) 180 | T_n_live = pose_graph.get_transform(live_id, n_id) 181 | metric_dist = np.linalg.norm(T_n_live.r_ab_inb) 182 | if (metric_dist < smallest_metric_dist) and (topo_dist >= temporal_length + 1): 183 | smallest_metric_dist = metric_dist 184 | chosen_topo_dist = topo_dist 185 | map_pose_id = n_id[1] 186 | 187 | radius += 1 188 | 189 | if map_pose_id == -1: 190 | print(f'{path_name} - Sequential sampling: Could not localize {live_id} to map run ' \ 191 | f'within {max_radius -1} edges.') 192 | continue 193 | 194 | if chosen_topo_dist != temporal_length + 1: 195 | print(f'{path_name} - Sequential sampling: Could not match {live_id} at topological distance ' \ 196 | f'{temporal_length}, matched at length {chosen_topo_dist - 1} instead.') 197 | 198 | # Get the map vertex we want to localize to. 199 | map_id = (map_run_id, map_pose_id) 200 | map_vertex = pose_graph.get_vertex(map_id) 201 | 202 | if map_vertex is None: 203 | print(f'{path_name} - Sequential sampling: Could not localize {live_id} at topological distance ' \ 204 | f'{temporal_length} (map vertex is None).') 205 | continue 206 | 207 | # Create a sample ID and check that it has not already been added 208 | sample_id = f'{path_name}-{live_id[0]}-{live_id[1]}-{map_id[0]}-{map_id[1]}' 209 | 210 | if not sample_id in samples: 211 | 212 | T_map_live = pose_graph.get_transform(live_id, map_id) 213 | log_map_live = Transform.LogMap(T_map_live) 214 | 215 | samples.append(sample_id) 216 | labels_se3[sample_id] = torch.tensor(T_map_live.matrix, dtype=torch.float) 217 | labels_log[sample_id] = torch.tensor(log_map_live, dtype=torch.float) 218 | 219 | else: 220 | 221 | print(f'{path_name} - Sequential sampling: sample {sample_id} has already been added') 222 | 223 | return samples, labels_se3, labels_log -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # This example Dockerfile illustrates a method to install 3 | # additional packages on top of NVIDIA's PyTorch container image. 4 | # 5 | # To use this Dockerfile, use the `docker build` command. 6 | # See https://docs.docker.com/engine/reference/builder/ 7 | # for more information. 8 | # 9 | # Software available with the container version 20.11 is listed at: 10 | # https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html 11 | # 12 | FROM nvcr.io/nvidia/pytorch:20.11-py3 13 | 14 | # Python OpenCV should be available, if not, run the following. 15 | RUN apt-get update 16 | RUN apt-get upgrade -y --no-install-recommends 17 | RUN apt-get update 18 | 19 | RUN pip install --upgrade pip 20 | RUN pip install opencv-python==3.4.8.29 21 | 22 | # Add a user if desirable 23 | #RUN useradd -ms /bin/bash -u 24 | #USER 25 | #WORKDIR /home/ 26 | -------------------------------------------------------------------------------- /docker/docker_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "" 4 | echo "Run docker container detached, add data volumes, and execute python script." 5 | echo "" 6 | 7 | docker run --gpus "device=0" -d -it --rm --name create_vtr_dataset --shm-size=64g \ 8 | -v :/home//data/inthedark:ro \ 9 | -v :/home//data/multiseason:ro \ 10 | -v /home//code:/home//code \ 11 | -v /home//results:/home//results \ 12 | -v /home//networks:/home//networks \ 13 | -v /home//datasets:/home//datasets \ 14 | \ 15 | bash -c "cd /home//code/deep_learned_visual_features && python -m data.build_train_test_dataset_loc --config /home//code/deep_learned_visual_features/config/data.json" 16 | -------------------------------------------------------------------------------- /docker/docker_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "" 4 | echo "Run docker container detached, add data volumes, and execute python script." 5 | echo "" 6 | 7 | docker run --gpus "device=0" -d -it --rm --name create_vtr_dataset --shm-size=64g \ 8 | -v :/home//data/inthedark:ro \ 9 | -v :/home//data/multiseason:ro \ 10 | -v /home//code:/home//code \ 11 | -v /home//results:/home//results \ 12 | -v /home//networks:/home//networks \ 13 | -v /home//datasets:/home//datasets \ 14 | \ 15 | bash -c "cd /home//code/deep_learned_visual_features && python -m src.test --config /home//code/deep_learned_visual_features/config/test.json" 16 | 17 | -------------------------------------------------------------------------------- /docker/docker_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "" 4 | echo "Run docker container detached, add data volumes, and execute python script." 5 | echo "" 6 | 7 | docker run --gpus "device=0" -d -it --rm --name train_model --shm-size=64g \ 8 | -v :/home//data/inthedark:ro \ 9 | -v :/home//data/multiseason:ro \ 10 | -v /home//code:/home//code \ 11 | -v /home//results:/home//results \ 12 | -v /home//networks:/home//networks \ 13 | -v /home//datasets:/home//datasets \ 14 | \ 15 | bash -c "cd /home//code/deep_learned_visual_features && python -m src.train --config /home//code/deep_learned_visual_features/config/train.json" 16 | 17 | -------------------------------------------------------------------------------- /download_datasets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | wget --load-cookies /tmp/cookies.txt "https://drive.google.com/uc?export=download&confirm=$(wget --quiet \ 3 | --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate \ 4 | 'https://drive.google.com/uc?export=download&id=10yOGwbGDhJEhbm0h4faiy-lO-7NPGJEQ' -O- | \ 5 | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=10yOGwbGDhJEhbm0h4faiy-lO-7NPGJEQ" -O datasets.zip \ 6 | && rm -rf /tmp/cookies.txt 7 | unzip datasets.zip 8 | rm datasets.zip -------------------------------------------------------------------------------- /download_networks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | wget --load-cookies /tmp/cookies.txt "https://drive.google.com/uc?export=download&confirm=$(wget --quiet \ 3 | --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate \ 4 | 'https://drive.google.com/uc?export=download&id=1xFGIJACmW6R_J2uzU6JAZKzQ6TBWDJ55' -O- | \ 5 | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1xFGIJACmW6R_J2uzU6JAZKzQ6TBWDJ55" -O networks.zip \ 6 | && rm -rf /tmp/cookies.txt 7 | unzip networks.zip 8 | rm networks.zip -------------------------------------------------------------------------------- /extract_features.py: -------------------------------------------------------------------------------- 1 | '''A script that shows how to pass an image to the network to get keypoints, descriptors and scrores. ''' 2 | 3 | import sys 4 | import os 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | from src.model.unet import UNet 12 | from src.model.keypoint_block import KeypointBlock 13 | from src.utils.keypoint_tools import normalize_coords, get_norm_descriptors, get_scores 14 | 15 | 16 | def get_keypoint_info(kpt_2D, scores_map, descriptors_map): 17 | """ 18 | Gather information we need associated with each detected keypoint. Compute the normalized 19 | descriptor and the score for each keypoint. 20 | 21 | Args: 22 | kpt_2D (torch.tensor): keypoint 2D image coordinates, (Bx2xN). 23 | scores_map (torch.tensor): scores for each pixel, (Bx1xHxW). 24 | descriptors_map (torch.tensor): descriptors for each pixel, (BxCxHxW). 25 | 26 | Returns: 27 | kpt_desc_norm (torch.tensor): Normalized descriptor for each keypoint, (BxCxN). 28 | kpt_scores (torch.tensor): score for each keypoint, (Bx1xN). 29 | 30 | """ 31 | batch_size, _, height, width = scores_map.size() 32 | 33 | kpt_2D_norm = normalize_coords(kpt_2D, batch_size, height, width).unsqueeze(1) # Bx1xNx2 34 | 35 | kpt_desc_norm = get_norm_descriptors(descriptors_map, True, kpt_2D_norm) 36 | 37 | kpt_scores = get_scores(scores_map, kpt_2D_norm) 38 | 39 | return kpt_desc_norm, kpt_scores 40 | 41 | 42 | class LearnedFeatureDetector(nn.Module): 43 | """ 44 | Class to detect learned features. 45 | """ 46 | def __init__(self, n_channels, layer_size, window_height, window_width, image_height, image_width, checkpoint_path, cuda): 47 | """ 48 | Set the variables needed to initialize the network. 49 | 50 | Args: 51 | num_channels (int): number of channels in the input image (we use 3 for one RGB image). 52 | layer_size (int): size of the first layer if the encoder. The size of the following layers are 53 | determined from this. 54 | window_height (int): height of window, inside which we detect one keypoint. 55 | window_width (int): width of window, inside which we detect one keypoint. 56 | image_height (int): height of the image. 57 | image_width (int): width of the image. 58 | checkpoint_path (string): path to where the network weights are stored. 59 | cuda (bool): true if using the GPU. 60 | """ 61 | super(LearnedFeatureDetector, self).__init__() 62 | 63 | self.cuda = cuda 64 | self.n_classes = 1 65 | self.n_channels = n_channels 66 | self.layer_size = layer_size 67 | self.window_h = window_height 68 | self.window_w = window_width 69 | self.height = image_height 70 | self.width = image_width 71 | 72 | # Load the network weights from a checkpoint. 73 | if os.path.exists(checkpoint_path): 74 | checkpoint = torch.load(checkpoint_path) 75 | else: 76 | raise RuntimeError(f'The specified checkpoint path does not exists: {checkpoint_path}') 77 | 78 | self.net = UNet(self.n_channels, self.n_classes, self.layer_size) 79 | # self.net = UNet(self.n_channels, self.n_classes, self.layer_size, self.height, self.width, checkpoint) 80 | self.net.load_state_dict(checkpoint['model_state_dict']) 81 | self.keypoint_block = KeypointBlock(self.window_h, self.window_w, self.height, self.width) 82 | self.sigmoid = nn.Sigmoid() 83 | 84 | if cuda: 85 | self.net.cuda() 86 | self.keypoint_block.cuda() 87 | 88 | self.net.eval() 89 | 90 | def run(self, image_tensor): 91 | """ 92 | Forward pass of network to get keypoint detector values, descriptors and, scores 93 | 94 | Args: 95 | image_tensor (torch.tensor, Bx3xHxW): RGB images to input to the network. 96 | 97 | Returns: 98 | keypoints (torch.tensor, Bx2xN): the detected keypoints, N=number of keypoints. 99 | descriptors (torch.tensor, BxCxN): descriptors for each keypoint, C=496 is length of descriptor. 100 | scores (torch.tensor, Bx1xN): an importance score for each keypoint. 101 | 102 | """ 103 | if self.cuda: 104 | image_tensor = image_tensor.cuda() 105 | 106 | detector_scores, scores, descriptors = self.net(image_tensor) 107 | scores = self.sigmoid(scores) 108 | 109 | # Get 2D keypoint coordinates from detector scores, Bx2xN 110 | keypoints = self.keypoint_block(detector_scores) 111 | 112 | # Get one descriptor and scrore per keypoint, BxCxN, Bx1xN, C=496. 113 | point_descriptors_norm, point_scores = get_keypoint_info(keypoints, scores, descriptors) 114 | 115 | return keypoints.detach().cpu(), point_descriptors_norm.detach().cpu(), point_scores.detach().cpu() 116 | 117 | 118 | if __name__ == '__main__': 119 | 120 | cuda = False 121 | checkpoint = 'path_to_network/network_multiseason_inthedark_layer16.pth' 122 | learned_feature_detector = LearnedFeatureDetector(n_channels=3, 123 | layer_size=16, 124 | window_height=16, 125 | window_width=16, 126 | image_height=384, 127 | image_width=512, 128 | checkpoint_path=checkpoint, 129 | cuda=cuda) 130 | 131 | 132 | test_image = torch.rand(1, 3, 384, 512) 133 | keypoints, descriptors, scores = learned_feature_detector.run(test_image) 134 | 135 | print(keypoints.size()) 136 | print(descriptors.size()) 137 | print(scores.size()) 138 | 139 | -------------------------------------------------------------------------------- /figures/box_plot_generalization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/box_plot_generalization.png -------------------------------------------------------------------------------- /figures/box_plot_lighting_change.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/box_plot_lighting_change.png -------------------------------------------------------------------------------- /figures/conditions_generalization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/conditions_generalization.png -------------------------------------------------------------------------------- /figures/conditions_lighting_change.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/conditions_lighting_change.png -------------------------------------------------------------------------------- /figures/network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/network.png -------------------------------------------------------------------------------- /figures/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/overview.png -------------------------------------------------------------------------------- /figures/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/figures/pipeline.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.3.2 2 | numpy==1.19.2 3 | opencv-python==3.4.8.29 4 | Pillow=-7.0.0 5 | scikit-image==0.15.0 6 | scikit-learn==0.23.2 7 | scipy==1.5.2 8 | torch==1.8.0 9 | torchvision=0.8.0 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/src/__init__.py -------------------------------------------------------------------------------- /src/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import re 4 | 5 | import cv2 6 | import numpy as np 7 | from PIL import Image 8 | import torch 9 | from torch.utils import data 10 | from torchvision import transforms 11 | 12 | class Dataset(data.Dataset): 13 | """ 14 | Dataset for the VT&R localization data. 15 | """ 16 | 17 | def __init__(self, data_dir, height=384, width=512, use_normalization=False, use_disparity=True): 18 | """ 19 | Initialize the Dataset class with the necessary parameters. 20 | 21 | Args: 22 | data_dir (string): the directory where the images are stored. 23 | height (int): height of the images. 24 | width (int): width of the images. 25 | use_normalization (bool): whether to normalize the images. 26 | use_disparity (bool): whether to generate disparity for stereo pairs. 27 | """ 28 | 29 | self.list_ids = [] 30 | self.labels_se3 = {} 31 | self.labels_log = {} 32 | self.data_dir = data_dir 33 | self.height = height 34 | self.width = width 35 | self.use_normalization = use_normalization 36 | self.use_disparity = use_disparity 37 | 38 | # Create transforms to apply to images 39 | self.image_transforms = transforms.Compose([ 40 | transforms.ToTensor() 41 | ]) 42 | 43 | self.normalize_transforms = None 44 | 45 | def load_mel_data(self, mel_data, mode, path_name='', index=None): 46 | """ 47 | Load the dataset that we have created ahead of time. 48 | 49 | Args: 50 | mel_data (MELData): object that stores the data sample ids and labels. 51 | mode (string): a string with value 'training', 'validation', or 'testing'. 52 | path_name (string, optional): if the data is used for testing, the name of the path we want to test. 53 | index (int, optional): if the data is used for testing, we have several test paths, the index indicates 54 | which one to use. 55 | """ 56 | 57 | # Set the mean and std_dev if we want to normalize the data. 58 | self.normalize_transforms = transforms.Compose([ 59 | transforms.ToTensor(), 60 | transforms.Normalize(mel_data.mean, mel_data.std_dev) 61 | ]) 62 | 63 | if mode == 'training': 64 | self.list_ids = mel_data.train_ids 65 | self.labels_se3 = mel_data.train_labels_se3 66 | self.labels_log = mel_data.train_labels_log 67 | 68 | elif mode == 'validation': 69 | self.list_ids = mel_data.valid_ids 70 | self.labels_se3 = mel_data.valid_labels_se3 71 | self.labels_log = mel_data.valid_labels_log 72 | 73 | elif mode == 'testing': 74 | self.list_ids = mel_data.test_ids[path_name][index] 75 | self.labels_se3 = mel_data.test_labels_se3[path_name][index] 76 | self.labels_log = mel_data.test_labels_log[path_name][index] 77 | 78 | else: 79 | raise ValueError('Dataset, load_mel_data: mode must be set to training, validatation, or testing.') 80 | 81 | def __len__(self): 82 | """ 83 | The number of samples in the dataset. 84 | """ 85 | return len(self.list_ids) 86 | 87 | def __getitem__(self, index): 88 | """ 89 | Load a sample from the dataset 90 | 91 | Args: 92 | index (int): index of the sample to load. 93 | 94 | Returns: 95 | X (torch.tensor): two RGB stereo pairs of images stacked together, one source image pair and one target 96 | image pair (Bx12xHxW). 97 | D (torch.tensor): two disparity images, computed from the two pairs of stereo images (Bx2xHW). 98 | sample_id (string): the id for the data sample on the form 99 | pathname_runidsrc_poseidsrc_runidtrg_poseidtrg. 100 | y_se3 (torch.tensor): the relative pose from the source to target, T_trg_src, given as a 4x4 matrix 101 | (Bx4x4). 102 | y_log (torch.tensor): the relative pose from the source to target given as a length 6 vector (Bx6). 103 | """ 104 | 105 | # Select sample 106 | sample_id = self.list_ids[index] 107 | 108 | # Get the path name, run id, and pose id for the sample. Combining the run and pose id gives us the id of 109 | # a given vertex in the pose graph that the data was sampled from. 110 | data_info = re.findall('\w+', sample_id) 111 | path_name = data_info[0] 112 | run_ids = data_info[1::2] 113 | pose_ids = data_info[2::2] 114 | 115 | num_frames = 2 # One source and one target frame. 116 | num_channels = num_frames * 6 # Two RGB stereo pairs, 2 * 2 * 3. 117 | 118 | X = torch.zeros(num_channels, self.height, self.width) # Images 119 | D = torch.zeros(num_frames, self.height, self.width) # Disparity 120 | 121 | self.add_images(X, 0, path_name, run_ids[0], pose_ids[0], self.use_normalization) # Source image 122 | self.add_images(X, 1, path_name, run_ids[1], pose_ids[1], self.use_normalization) # Target image 123 | 124 | if self.use_disparity: 125 | self.add_disparity(D, 0, path_name, run_ids[0], pose_ids[0]) # Source disparity 126 | self.add_disparity(D, 1, path_name, run_ids[1], pose_ids[1]) # Target disparity 127 | 128 | # Pose transforms 129 | y_se3 = self.labels_se3[sample_id] 130 | y_log = self.labels_log[sample_id] 131 | 132 | return X, D, sample_id, y_se3, y_log 133 | 134 | def add_images(self, X, ind, path_name, run_id, pose_id, normalize_img): 135 | """ 136 | Add a stereo pair of images to the images tensor. 137 | 138 | Args: 139 | X (torch.tensor): the tensor to hold the RGB stereo pairs of images (Bx12xHxW). 140 | ind (int): index indicating which image to add. 141 | path_name (string): name of the path the images are taken from. 142 | run_id (int): id of the run the images are taken from. 143 | pose_id (int): id of the pose along the given run that the images are taken from. 144 | normalize_img (bool): whether to normalize the image. 145 | """ 146 | # Stereo pair of RGB images (2 x 3 channels). 147 | start = ind * 6 148 | self.add_image(X, start, 'left', path_name, run_id, pose_id, normalize_img) 149 | self.add_image(X, start + 3, 'right', path_name, run_id, pose_id, normalize_img) 150 | 151 | def add_image(self, X, start_ind, loc, path_name, run_id, pose_id, normalize_img): 152 | """ 153 | Add one image to the images tensor. 154 | 155 | Args: 156 | X (torch.tensor): the tensor to hold the RGB stereo pairs of images (Bx12xHxW). 157 | start_ind (int): the index of the tensor where we start adding the images channels. 158 | loc (string): 'left' or 'right' insicating which image in the stereo pair we are adding. 159 | path_name (string): name of the path the images are taken from. 160 | run_id (int): id of the run the images are taken from. 161 | pose_id (int): id of the pose along the given run that the images are taken from. 162 | normalize_img (bool): whether to normalize the image. 163 | """ 164 | # Generate the image file name based on the id of the vertex the image belongs to. 165 | img_file = f"{self.data_dir}/{path_name}/run_{run_id.zfill(6)}/" \ 166 | f"images/{loc}/{pose_id.zfill(6)}.png" 167 | 168 | img = Image.open(img_file) 169 | 170 | # Turn the image into a tensor and normalize if required. 171 | if normalize_img: 172 | X[start_ind:start_ind + 3, :, :] = self.normalize_transforms(img) 173 | else: 174 | X[start_ind:start_ind + 3, :, :] = self.image_transforms(img) 175 | 176 | def get_disparity(self, left_img, right_img): 177 | """ 178 | Create the disparity image using functions from OpenCV. 179 | 180 | Args: 181 | left_img (numpy.uint8): left stereo image. 182 | right_img (numpy.uint8): right stereo image. 183 | """ 184 | stereo = cv2.StereoSGBM_create(minDisparity = 0, 185 | numDisparities = 48, 186 | blockSize = 5, 187 | preFilterCap = 30, 188 | uniquenessRatio = 20, 189 | P1 = 200, 190 | P2 = 800, 191 | speckleWindowSize = 200, 192 | speckleRange = 1, 193 | disp12MaxDiff = -1) 194 | 195 | disp = stereo.compute(left_img, right_img) 196 | disp = disp.astype(np.float32) / 16.0 197 | 198 | # Adjust values close to or equal to zero, which would cause problems for depth calculation. 199 | disp[(disp < 1e-4) & (disp >= 0.0)] = 1e-4 200 | 201 | return torch.from_numpy(disp) 202 | 203 | def add_disparity(self, D, index, path_name, run_id, pose_id): 204 | """ 205 | Add a disparity image to the tensor holding the disparity images. 206 | 207 | Args: 208 | D (torch.tensor): the tensor holding the disparity images. 209 | index (int): the index of the channel where we will add the disparity image in D. 210 | path_name (string): name of the path the images are taken from. 211 | run_id (int): id of the run the images are taken from. 212 | pose_id (int): id of the pose along the given run that the images are taken from. 213 | """ 214 | # Generate the image file names based on the id of the vertex the image belongs to. 215 | img_file_left = f"{self.data_dir}/{path_name}/run_{run_id.zfill(6)}/images/" \ 216 | f"left/{pose_id.zfill(6)}.png" 217 | img_file_right = f"{self.data_dir}/{path_name}/run_{run_id.zfill(6)}/images/" \ 218 | f"right/{pose_id.zfill(6)}.png" 219 | 220 | left_img = np.uint8(cv2.imread(img_file_left, 0)) 221 | right_img = np.uint8(cv2.imread(img_file_right, 0)) 222 | 223 | # Compute disparity using OpenCV. 224 | disparity = self.get_disparity(left_img, right_img) 225 | 226 | D[index, :, :] = disparity -------------------------------------------------------------------------------- /src/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/src/model/__init__.py -------------------------------------------------------------------------------- /src/model/keypoint_block.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class KeypointBlock(nn.Module): 6 | """ 7 | Given the dense detector values from the UNet decoder, compute the 2D keypoints. 8 | """ 9 | def __init__(self, window_height, window_width, image_height, image_width): 10 | """ 11 | Initialize by setting the height and width of windows where keypoints should be detected. One keypoint is 12 | detected per window and hence the size of the windows determines the number of possible keypoints. 13 | 14 | Args: 15 | window_height (int): height of window, inside which we detect one keypoint. 16 | window_width (int): width of window, inside which we detect one keypoint. 17 | image_height (int): height of the image. 18 | image_width (int): width of the image. 19 | """ 20 | super(KeypointBlock, self).__init__() 21 | self.window_height = window_height 22 | self.window_width = window_width 23 | self.temperature = 1.0 24 | 25 | v_coords, u_coords = torch.meshgrid([torch.arange(0, image_height), torch.arange(0, image_width)]) 26 | v_coords = v_coords.unsqueeze(0).float() # 1 x H x W 27 | u_coords = u_coords.unsqueeze(0).float() 28 | 29 | self.register_buffer('v_coords', v_coords) 30 | self.register_buffer('u_coords', u_coords) 31 | 32 | def forward(self, detector_values): 33 | """ 34 | Given a tensor of detector values (same width/height as the original image), divide the tensor into 35 | windows and use a spatial softmax over each window. The 2D coordinates of one keypoint is estimated for each 36 | window. 37 | 38 | Args: 39 | detector_values (torch.tensor, Bx1xHxW): Tensor of detector values from the network decoder. 40 | Returns: 41 | keypoints_2D (torch.tensor, Bx2xN): Keypoint coordinates. 42 | """ 43 | 44 | batch_size, _, height, width = detector_values.size() 45 | 46 | v_windows = F.unfold(self.v_coords.expand(batch_size, 1, height, width), 47 | kernel_size=(self.window_height, self.window_width), 48 | stride=(self.window_height, self.window_width)) # B x n_window_elements x n_windows 49 | u_windows = F.unfold(self.u_coords.expand(batch_size, 1, height, width), 50 | kernel_size=(self.window_height, self.window_width), 51 | stride=(self.window_height, self.window_width)) 52 | 53 | detector_values_windows = F.unfold(detector_values, 54 | kernel_size=(self.window_height, self.window_width), 55 | stride=(self.window_height, self.window_width)) # B x n_wind_elements x n_windows 56 | 57 | softmax_attention = F.softmax(detector_values_windows / self.temperature, dim=1) # B x n_wind_elements x n_windows 58 | 59 | expected_v = torch.sum(v_windows * softmax_attention, dim=1) # B x n_windows 60 | expected_u = torch.sum(u_windows * softmax_attention, dim=1) 61 | keypoints_2D = torch.stack([expected_u, expected_v], dim=2).transpose(2, 1) # B x 2 x n_windows 62 | 63 | return keypoints_2D 64 | -------------------------------------------------------------------------------- /src/model/matcher_block.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class MatcherBlock(nn.Module): 6 | """ 7 | Match the descriptors corresponding to the source keypoints to all descriptors in the target frame. Use the 8 | values from descriptor matching to compute the coordinates of the matched keypoints in the target frame, which 9 | we call pseudo-points. 10 | """ 11 | 12 | def __init__(self): 13 | super(MatcherBlock, self).__init__() 14 | 15 | def forward(self, kpt_2D_src, kpt_2D_trg, kpt_desc_norm_src, kpt_desc_norm_trg): 16 | """ 17 | Compute coordinates for the matches target keypoints, which we refer to as pseudo-points. 18 | 19 | Args: 20 | kpt_2D_src (torch.tensor, Bx2xN): sparse keypoints detected in the source image. 21 | kpt_2D_trg (torch.tensor, Bx2x(N or HW)): coordinates of target keypoints to match. If we are doing 22 | dense matching these will be the coordinates of all the pixels 23 | in the target image (Bx2xWH). If we are doing sparse matching, 24 | they will be the coordinates of the sparse detected keypoints 25 | in the target image (Bx2xN). 26 | kpt_desc_norm_src (torch.tensor, BxCxN): Normalized descriptors corresponding to the source keypoints. 27 | (C is the length of the descriptor). 28 | kpt_desc_norm_trg (torch.tensor, BxCx(N or HW)): Normalized descriptors corresponding to the target 29 | keypoints. BxCxHW for dense matching and BxCxN for 30 | sparse matching. 31 | 32 | Returns: 33 | kpt_2D_pseudo (torch.tensor, Bx2xN): keypoint coordinates of the matched pseudo-points in the target 34 | frame. 35 | """ 36 | 37 | batch_size, _, n_points = kpt_2D_src.size() 38 | 39 | # For all source keypoints, compute match value between each source point and all points in target frame. 40 | # Apply softmax for each source point along the dimension of the target points. 41 | match_vals = torch.matmul(kpt_desc_norm_src.transpose(2, 1), kpt_desc_norm_trg) \ 42 | / float(kpt_desc_norm_trg.size(1)) # BxNx(HW or N) 43 | 44 | match_vals = (match_vals + 1.0) # [-1, 1] -> [0, 2] 45 | temperature = 0.01 46 | soft_match_vals = F.softmax(match_vals / 0.01, dim=2) # BxNx(HW or N) 47 | 48 | # Compute pseudo-point as weighted sum of point coordinates from target image, Bx2xN 49 | kpt_2D_pseudo = torch.matmul(kpt_2D_trg, soft_match_vals.transpose(2, 1)) 50 | 51 | return kpt_2D_pseudo 52 | 53 | -------------------------------------------------------------------------------- /src/model/pipeline.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from src.model.keypoint_block import KeypointBlock 6 | from src.model.matcher_block import MatcherBlock 7 | from src.model.ransac_block import RANSACBlock 8 | from src.model.svd_block import SVDBlock 9 | from src.model.unet import UNet 10 | from src.model.weight_block import WeightBlock 11 | from src.utils.keypoint_tools import get_keypoint_info, get_norm_descriptors 12 | from src.utils.lie_algebra import se3_inv, se3_log 13 | from src.utils.stereo_camera_model import StereoCameraModel 14 | 15 | class Pipeline(nn.Module): 16 | """ 17 | The Pipeline class implements the training pipeline. It takes a source and target image pair and the ground 18 | truth relative pose between them. It calls functions to execute the steps of the training pipeline such as 19 | keypoint detection, descriptor and score computation, keypoint matching, 3D point computation and, finally, 20 | pose computation. It also computes the loss functions for training. 21 | """ 22 | def __init__(self, config): 23 | """ 24 | Set up variables needed in the training pipeline. 25 | 26 | Args: 27 | config (dict): dictionary with the configuration parameters for the training pipeline. 28 | """ 29 | super(Pipeline, self).__init__() 30 | 31 | self.config = config 32 | 33 | self.window_h = config['pipeline']['window_h'] 34 | self.window_w = config['pipeline']['window_w'] 35 | self.dense_matching = config['pipeline']['dense_matching'] 36 | 37 | # Image height and width 38 | self.height = config['dataset']['height'] 39 | self.width = config['dataset']['width'] 40 | 41 | # Transform from sensor to vehicle. 42 | T_s_v = torch.tensor([[0.000796327, -1.0, 0.0, 0.119873], 43 | [-0.330472, -0.000263164, -0.943816, 1.49473], 44 | [0.943815, 0.000751586, -0.330472, 0.354804], 45 | [0.0, 0.0, 0.0, 1.0]]) 46 | self.register_buffer('T_s_v', T_s_v) 47 | 48 | # Set up the different blocks needed in the pipeline 49 | self.keypoint_block = KeypointBlock(self.window_h, self.window_w, self.height, self.width) 50 | self.matcher_block = MatcherBlock() 51 | self.weight_block = WeightBlock() 52 | self.svd_block = SVDBlock(self.T_s_v) 53 | self.ransac_block = RANSACBlock(config, self.T_s_v) 54 | 55 | self.stereo_cam = StereoCameraModel(config['stereo']['cu'], config['stereo']['cv'], 56 | config['stereo']['f'], config['stereo']['b']) 57 | 58 | # Set up the coordinates for the image pixels, only need to do this once. 59 | v_coord, u_coord = torch.meshgrid([torch.arange(0, self.height), torch.arange(0, self.width)]) 60 | v_coord = v_coord.reshape(self.height * self.width).float() # HW 61 | u_coord = u_coord.reshape(self.height * self.width).float() 62 | image_coords = torch.stack((u_coord, v_coord), dim=0) # 2xHW 63 | self.register_buffer('image_coords', image_coords) 64 | 65 | def forward(self, net, images, disparities, pose_se3, pose_log, epoch, test=False): 66 | """ 67 | A forward pass of the training piepline to estimate the relative pose given a source and target image 68 | pair. Also computes losses for training. 69 | 70 | Args: 71 | net (torch.nn.Module): neural network module. 72 | images (torch.tensor): a stereo pair of RGB source and target images stacked together (Bx12xHxW). 73 | disparities (torch.tensor): a pair of source and target disparity images stacked together (Bx2xHxW). 74 | The disparity is estimated from the left and right stereo pair. 75 | pose_se3 (torch.tensor): a 4x4 matrix representing the ground truth relative pose transformation from 76 | the source to target frame, T_trg_src (Bx4x4). 77 | pose_log (torch.tensor): a length 6 vector representing the ground truth relative pose transformation 78 | from the source to target frame (Bx6). 79 | epoch (int): the current training epoch. 80 | test (bool): True if we are running a test, False if training or validation. 81 | 82 | Returns: 83 | losses (dict): a dictionary mapping the type of loss to its value. Also includes the weighted sum of 84 | the individual losses. 85 | T_trg_src (torch.tensor): a 4x4 matrix representing the estimated relative pose transformation from 86 | the source to target frame, T_trg_src (Bx4x4). 87 | """ 88 | 89 | 90 | ################################################################################################################ 91 | # Setup of variables 92 | ################################################################################################################ 93 | 94 | batch_size = images.size(0) 95 | 96 | im_channels = [0, 1, 2, 6, 7, 8] # Picking out the left images from the four stereo src and trg images. 97 | images = images[:, im_channels, :, :].cuda() 98 | disparities = disparities.cuda() 99 | 100 | pose_se3 = pose_se3.cuda() 101 | pose_log = pose_log.cuda() 102 | 103 | ################################################################################################################ 104 | # Get keypoints and associated info for the source and target frames 105 | ################################################################################################################ 106 | 107 | # Pass the images through the UNet (Bx1xHxW, Bx1xHxW, BxCxHxW) 108 | detector_scores_src, scores_src, descriptors_src = net(images[:, :3, :, :]) 109 | detector_scores_trg, scores_trg, descriptors_trg = net(images[:, 3:, :, :]) 110 | 111 | # Get 2D keypoint coordinates from detector scores, Bx2xN 112 | kpt_2D_src = self.keypoint_block(detector_scores_src) 113 | kpt_2D_trg = self.keypoint_block(detector_scores_trg) 114 | 115 | # Get 3D point coordinates, normalized descriptors, and scores associated with each individual keypoint 116 | # (Bx4xN, BxCxN, Bx1xN). 117 | kpt_3D_src, kpt_valid_src, kpt_desc_norm_src, kpt_scores_src = get_keypoint_info(kpt_2D_src, 118 | scores_src, 119 | descriptors_src, 120 | disparities[:, 0, :, :], 121 | self.stereo_cam) 122 | 123 | kpt_3D_trg, kpt_valid_trg, kpt_desc_norm_trg, kpt_scores_trg = get_keypoint_info(kpt_2D_trg, 124 | scores_trg, 125 | descriptors_trg, 126 | disparities[:, 0, :, :], 127 | self.stereo_cam) 128 | 129 | 130 | ################################################################################################################ 131 | # Match keypoints from the source and target frames 132 | ################################################################################################################ 133 | 134 | if self.config['pipeline']['dense_matching']: 135 | 136 | # Match against descriptors for each pixel in the target. 137 | desc_norm_trg_dense = get_norm_descriptors(descriptors_trg) 138 | kpt_2D_trg_dense = self.image_coords.unsqueeze(0).expand(batch_size, 2, self.height * self.width) 139 | 140 | # Compute the coordinates of the matched keypoints in the target frame, which we refer to as pseudo points. 141 | kpt_2D_pseudo = self.matcher_block(kpt_2D_src, kpt_2D_trg_dense, kpt_desc_norm_src, desc_norm_trg_dense) 142 | 143 | else: 144 | 145 | # Match only against descriptors associated with detected keypoints in the target frame. 146 | kpt_2D_pseudo = self.matcher_block(kpt_2D_src, kpt_2D_trg, kpt_desc_norm_src, kpt_desc_norm_trg) 147 | 148 | # Get 3D point coordinates, normalized descriptors, and scores associated with each individual matched pseudo 149 | # point in the target frame (Bx4xN, BxCxN, Bx1xN). 150 | kpt_3D_pseudo, kpt_valid_pseudo, kpt_desc_norm_pseudo, kpt_scores_pseudo = get_keypoint_info(kpt_2D_pseudo, 151 | scores_trg, 152 | descriptors_trg, 153 | disparities[:,1,:,:], 154 | self.stereo_cam) 155 | 156 | # Compute the weight associated with each matched point pair. They will be used when computing the pose. 157 | weights = self.weight_block(kpt_desc_norm_src, kpt_desc_norm_pseudo, kpt_scores_src, kpt_scores_pseudo) 158 | 159 | 160 | ################################################################################################################ 161 | # Outlier rejection 162 | ################################################################################################################ 163 | 164 | # Find the inliers either by using the ground truth pose (training) or RANSAC (inference). 165 | valid_inliers = torch.ones(kpt_valid_src.size()).type_as(kpt_valid_src) 166 | 167 | if self.config['outlier_rejection']['on'] and (self.config['outlier_rejection']['type'] == 'ground_truth'): 168 | 169 | # For training, we use the ground truth pose to find the ground truth location of the target points by 170 | # transforming them from the source frame to the target frames. 171 | 172 | T_trg_src_gt_inl = pose_se3 # Ground truth transform to use to find inliers. 173 | 174 | if 'plane' in self.config['outlier_rejection']['dim']: 175 | # If we use only part of the ground truth pose (x, y, heading) we transform the points in the plane 176 | # only and adjust the ground truth transform accordingly. 177 | log_pose = pose_log 178 | 179 | one = torch.ones(batch_size).cuda() # B 180 | zero = torch.zeros(batch_size).cuda() # B 181 | 182 | rot_col1 = torch.stack([torch.cos(log_pose[:, 5]), torch.sin(log_pose[:, 5]), 183 | zero.clone(), zero.clone()], dim=1) # Bx4 184 | rot_col2 = torch.stack([-torch.sin(log_pose[:, 5]), torch.cos(log_pose[:, 5]), 185 | zero.clone(), zero.clone()], dim=1) # Bx4 186 | rot_col3 = torch.stack([zero.clone(), zero.clone(), one.clone(), zero.clone()], dim=1) # Bx4 187 | trans_col = torch.stack([log_pose[:, 0], log_pose[:, 1], zero.clone(), one.clone()], dim=1) # Bx4 188 | 189 | T_trg_src_gt_inl = torch.stack([rot_col1, rot_col2, rot_col3, trans_col], dim=2) # Bx4x4 190 | 191 | T_s_v = self.T_s_v.expand(batch_size, 4, 4) 192 | T_trg_src_gt_inl_sensor = T_s_v.bmm(T_trg_src_gt_inl).bmm(se3_inv(T_s_v)) # Transform pose to sensor frame. 193 | 194 | # The ground truth target points, which will be compared to the matched target points. 195 | kpt_3D_pseudo_gt = T_trg_src_gt_inl_sensor.bmm(kpt_3D_src) # Bx4xN 196 | kpt_2D_pseudo_gt = self.stereo_cam.camera_model(kpt_3D_pseudo_gt)[:, 0:2, :] # Bx2xN 197 | 198 | # Find the residual between the ground truth and matched point coordinates and determine the inliers. 199 | if '3D' in self.config['outlier_rejection']['dim']: 200 | err = torch.norm(kpt_3D_pseudo - kpt_3D_pseudo_gt, dim=1) # B x N 201 | inliers_3D = err < self.config['outlier_rejection']['error_tolerance']['3D'] 202 | valid_inliers = valid_inliers & (inliers_3D.unsqueeze(1)) 203 | 204 | if ('2D' in self.config['outlier_rejection']['dim']): 205 | err = torch.norm(kpt_2D_pseudo - kpt_2D_pseudo_gt, dim=1) # B x N 206 | inliers_2D = err < self.config['outlier_rejection']['error_tolerance']['2D'] 207 | valid_inliers = valid_inliers & (inliers_2D.unsqueeze(1)) 208 | 209 | if 'plane' in self.config['outlier_rejection']['dim']: 210 | # Do comparison in the vehicle frame, not the sensor frame. 211 | kpt_3D_pseudo_gt_vehicle = T_trg_src_gt_inl.bmm(se3_inv(T_s_v).bmm(kpt_3D_src)) # Bx4xN 212 | kpt_3D_pseudo_vehicle = se3_inv(T_s_v).bmm(kpt_3D_pseudo) 213 | err = torch.norm(kpt_3D_pseudo_vehicle[:, 0:2, :] - kpt_3D_pseudo_gt_vehicle[:, 0:2, :], dim=1) # BxN 214 | inliers_plane = err < self.config['outlier_rejection']['error_tolerance']['plane'] 215 | valid_inliers = valid_inliers & (inliers_plane.unsqueeze(1)) 216 | 217 | if self.config['outlier_rejection']['on'] and (self.config['outlier_rejection']['type'] == 'ransac'): 218 | # Find outlier based on RANSAC. 219 | ransac_inliers = self.ransac_block(kpt_3D_src, 220 | kpt_3D_pseudo, 221 | kpt_2D_pseudo, 222 | kpt_valid_src, 223 | kpt_valid_pseudo, 224 | weights, 225 | self.config['outlier_rejection']['dim'][0]) 226 | 227 | valid_inliers = ransac_inliers.unsqueeze(1) 228 | 229 | 230 | ################################################################################################################ 231 | # Compute the pose 232 | ################################################################################################################ 233 | 234 | # We can choose to use just the keypoint loss and not compute pose for the first few epochs. 235 | if test or (epoch >= self.config['training']['start_pose_estimation']): 236 | 237 | # Check that we have enough inliers for all example sin the bach to compute pose. 238 | valid = kpt_valid_src & kpt_valid_pseudo & valid_inliers 239 | 240 | num_inliers = torch.sum(valid.squeeze(1), dim=1)[0] 241 | if torch.any(num_inliers < 6): 242 | raise RuntimeError('Too few inliers to compute pose: {}'.format(num_inliers)) 243 | 244 | weights[valid == 0] = 0.0 245 | T_trg_src = self.svd_block(kpt_3D_src, kpt_3D_pseudo, weights) 246 | 247 | # If we are testing, return the pose. 248 | if test: 249 | return T_trg_src 250 | 251 | ################################################################################################################ 252 | # Compute the losses 253 | ################################################################################################################ 254 | 255 | # Variables to store the loss 256 | losses = {'total': torch.zeros(1).cuda()} 257 | mse_loss_fn = torch.nn.MSELoss() 258 | loss_types = self.config['loss']['types'] 259 | loss_weights = self.config['loss']['weights'] 260 | 261 | # Keypoint loss in different versions depending on using 2D coordinates, 3D coordinates, or a subset of the 262 | # ground truth pose to transform the target points in the plane. 263 | if 'keypoint_3D' in loss_types: 264 | valid = kpt_valid_pseudo & kpt_valid_src & valid_inliers # B x 1 x N 265 | valid = valid.expand(batch_size, 4, valid.size(2)) # B x 4 x N 266 | keypoint_loss = mse_loss_fn(kpt_3D_pseudo[valid], kpt_3D_pseudo_gt[valid]) 267 | keypoint_loss *= loss_weights['keypoint_3D'] 268 | losses['keypoint_3D'] = keypoint_loss 269 | losses['total'] += keypoint_loss 270 | 271 | if 'keypoint_2D' in loss_types: 272 | valid = kpt_valid_pseudo & kpt_valid_src & valid_inliers # B x 1 x N 273 | valid = valid.expand(batch_size, 2, valid.size(2)) # B x 2 x N 274 | keypoint_loss = mse_loss_fn(kpt_2D_pseudo[valid], kpt_2D_pseudo_gt[valid]) 275 | keypoint_loss *= loss_weights['keypoint_2D'] 276 | losses['keypoint_2D'] = keypoint_loss 277 | losses['total'] += keypoint_loss 278 | 279 | if ('keypoint_plane' in loss_types): 280 | valid = kpt_valid_pseudo & kpt_valid_src & valid_inliers # B x 1 x N 281 | valid = valid.expand(batch_size, 2, valid.size(2)) # B x 4 x N 282 | kpt_3D_pseudo_gt_vehicle = T_trg_src_gt_inl.bmm(se3_inv(T_s_v).bmm(kpt_3D_src)) # Bx4xN 283 | kpt_3D_pseudo_vehicle = se3_inv(T_s_v).bmm(kpt_3D_pseudo) 284 | keypoint_loss = mse_loss_fn(kpt_3D_pseudo_vehicle[:, 0:2, :][valid], 285 | kpt_3D_pseudo_gt_vehicle[:, 0:2, :][valid]) 286 | keypoint_loss *= loss_weights['keypoint_plane'] 287 | losses['keypoint_plane'] = keypoint_loss 288 | losses['total'] += keypoint_loss 289 | 290 | # Pose loss either using the full 6DOF pose or a subset (x, y, heading). 291 | if test or (epoch >= self.config['training']['start_pose_estimation']): 292 | if ('pose' in loss_types): 293 | T_trg_src_gt = pose_se3 294 | rot_loss, trans_loss = self.pose_loss(T_trg_src_gt, T_trg_src, mse_loss_fn) 295 | rotation_loss = loss_weights['rotation'] * rot_loss 296 | translation_loss = loss_weights['translation'] * trans_loss 297 | pose_loss = rotation_loss + translation_loss 298 | losses['rotation'] = rotation_loss 299 | losses['translation'] = translation_loss 300 | 301 | elif ('pose_plane' in loss_types): 302 | w = torch.zeros(6) 303 | w[0] = loss_weights['translation_x'] 304 | w[1] = loss_weights['translation_y'] 305 | w[5] = loss_weights['rotation_heading'] 306 | w = torch.diag(w).unsqueeze(0).expand(batch_size, 6, 6).cuda() 307 | 308 | T_trg_src_gt = pose_se3 309 | log_pose_err = se3_log(T_trg_src.bmm(torch.inverse(T_trg_src_gt))).unsqueeze(2) 310 | pose_loss = (1.0 / batch_size) * torch.sum(log_pose_err.transpose(2, 1).bmm(w).bmm(log_pose_err)) 311 | 312 | losses['pose'] = pose_loss 313 | losses['total'] += pose_loss 314 | 315 | ################################################################################################################ 316 | # Return the estimated pose and the loss 317 | ################################################################################################################ 318 | 319 | if epoch >= self.config['training']['start_pose_estimation']: 320 | return losses, T_trg_src 321 | else: 322 | # Haven't computed the pose yet, just the keypoint loss. 323 | return losses, pose_se3 324 | 325 | 326 | def pose_loss(self, T, T_est, loss_fn): 327 | """ 328 | Compute the pose loss using all DOF. 329 | 330 | Args: 331 | T (torch.tensor): the ground truth pose represented as a 4x4 matrix (Bx4x4). 332 | T_est (torch.tensor): the estimated pose represented as a 4x4 matrix (Bx4x4). 333 | loss_fn (): the loss function to use. 334 | 335 | Returns: 336 | rot_loss (float): the pose loss for the rotational DOF. 337 | trans_loss (float): the pose loss for the translational DOF. 338 | """ 339 | batch_size = T.size(0) 340 | 341 | R_est = T_est[:, 0:3, 0:3] 342 | R = T[:, 0:3, 0:3] 343 | identity = torch.eye(3).unsqueeze(0).expand(batch_size, 3, 3).cuda() 344 | 345 | rot_loss = loss_fn(R_est.transpose(2, 1).bmm(R), identity) 346 | trans_loss = loss_fn(T_est[:, 0:3, 3], T[:, 0:3, 3]) 347 | 348 | return rot_loss, trans_loss 349 | 350 | 351 | 352 | 353 | 354 | 355 | -------------------------------------------------------------------------------- /src/model/ransac_block.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from src.model.svd_block import SVDBlock 8 | from src.utils.lie_algebra import se3_log, se3_inv 9 | from src.utils.stereo_camera_model import StereoCameraModel 10 | 11 | class RANSACBlock(nn.Module): 12 | """ 13 | Use RANSAC for outlier rejection during inference. 14 | """ 15 | 16 | def __init__(self, config, T_s_v): 17 | """ 18 | Initialize RANSAC by setting values used to determine how many iterations to run. 19 | 20 | Args: 21 | config (dict): configuration parameters. 22 | T_s_v (torch.tensor): 4x4 transformation matrix providing the transform from the vehicle frame to the 23 | sensor frame. 24 | """ 25 | super(RANSACBlock, self).__init__() 26 | 27 | # Transform from vehicle to sensor frame. 28 | self.register_buffer('T_s_v', T_s_v) 29 | 30 | self.stereo_cam = StereoCameraModel(config['stereo']['cu'], config['stereo']['cv'], 31 | config['stereo']['f'], config['stereo']['b']) 32 | 33 | # The minimum number of inliers we want, quit RANSAC when this is achieved. 34 | self.inlier_threshold = config['outlier_rejection']['inlier_threshold'] 35 | 36 | # Error must be smaller than or equal to threshold to this be considered inlier. 37 | dim_key = config['outlier_rejection']['dim'][0] 38 | self.error_tolerance = config['outlier_rejection']['error_tolerance'][dim_key] 39 | 40 | # Maximum number of iterations to run before giving up. 41 | self.num_iterations = config['outlier_rejection']['num_iterations'] 42 | 43 | # SVD is used to estimate pose. 44 | self.svd = SVDBlock(T_s_v) 45 | 46 | def forward(self, keypoints_3D_src, keypoints_3D_trg, keypoints_2D_trg, valid_pts_src, valid_pts_trg, weights, dim): 47 | """ 48 | Outlier rejection with RANSAC. 49 | 50 | Args: 51 | keypoints_3D_src (torch,tensor, Bx4xN): 3D point coordinates of keypoints from source frame. 52 | keypoints_3D_trg (torch,tensor, Bx4xN): 3D point coordinates of keypoints from target frame. 53 | keypoints_2D_trg (torch,tensor, Bx2xN): 2D image coordinates of keypoints from source frame. 54 | valid_pts_src (torch.tensor, Bx1xN): Values (0 or 1) to indicate if a keypoint in source frame is valid 55 | (i.e. can be used for pose computation). 56 | valid_pts_trg (torch.tensor, Bx1xN): Values (0 or 1) to indicate if a keypoint in target frame is valid 57 | (i.e. can be used for pose computation). 58 | weights (torch.tensor, Bx1xN): weights in range (0, 1) associated with the matched source and 59 | target points. 60 | dim (str): '2D' or '3D' to specify if error should be taken between 2D image coordinates or 3D point 61 | coordinates. 62 | 63 | Returns: 64 | inliers (torch.tensor, BxN): 65 | """ 66 | batch_size, _, n_points = keypoints_3D_src.size() 67 | 68 | valid = valid_pts_src & valid_pts_trg 69 | weights_svd = copy.deepcopy(weights) 70 | weights_svd[valid == 0] = 0.0 71 | 72 | pts_3D_src = keypoints_3D_src.detach() 73 | pts_3D_trg = keypoints_3D_trg.detach() 74 | pts_2D_trg = keypoints_2D_trg.detach() 75 | 76 | max_num_inliers = torch.zeros(batch_size).type_as(pts_3D_src) # Keep track of highest number of inliers so far. 77 | inliers = torch.zeros(batch_size, n_points, dtype=torch.bool).cuda() 78 | 79 | i = 0 80 | ransac_complete = torch.zeros(batch_size).type_as(pts_3D_src).int() 81 | 82 | while (i < self.num_iterations) and (torch.sum(ransac_complete) < batch_size): 83 | 84 | # Pick a random subset of 6 point pairs (3 sufficient, but some points will have weight 0 so pick a 85 | # few more than needed to increase probability of getting points with rank 3). 86 | rand_index = torch.randint(0, n_points, size=(batch_size, 6)).type_as(pts_3D_src).long() 87 | rand_index = rand_index.unsqueeze(1) 88 | rand_pts_3D_src = torch.gather(pts_3D_src, dim=2, index=rand_index.expand(batch_size, 4, 6)) # 1x4xM 89 | rand_pts_3D_trg = torch.gather(pts_3D_trg, dim=2, index=rand_index.expand(batch_size, 4, 6)) # 1x4xM 90 | rand_weights = torch.gather(weights.detach(), dim=2, index=rand_index) # 1x1xM 91 | 92 | # Run SVD 93 | try: 94 | T_trg_src = self.svd(rand_pts_3D_src, rand_pts_3D_trg, rand_weights) # pose in vehicle frame 95 | except RuntimeError as e: 96 | print(e) 97 | print("RANSAC SVD did not converge, re-doing iteration {}".format(i)) 98 | print("weights: {}".format(rand_weights[0, 0, :])) 99 | print("rank src pts: {}".format(torch.matrix_rank(rand_pts_3D_src[0, 0:3, :]))) 100 | print("rank trg pts: {}".format(torch.matrix_rank(rand_pts_3D_trg[0, 0:3, :])), flush=True) 101 | continue 102 | 103 | # Find number of inliers 104 | T_s_v = self.T_s_v.expand(batch_size, 4, 4) 105 | T_trg_src_cam = T_s_v.bmm(T_trg_src).bmm(se3_inv(T_s_v)) # pose in camera frame 106 | pts_3D_trg_est = T_trg_src_cam.bmm(pts_3D_src) 107 | pts_2D_trg_est = self.stereo_cam.camera_model(pts_3D_trg_est)[:, 0:2, :] 108 | 109 | if dim == '2D': 110 | err_pts = torch.norm(pts_2D_trg - pts_2D_trg_est, dim=1) # BxN 111 | else: 112 | err_pts = torch.norm(pts_3D_trg - pts_3D_trg_est, dim=1) # BxN 113 | 114 | err_pts_small = err_pts < self.error_tolerance 115 | err_pts_small[valid_pts_src[:, 0, :] == 0] = 0 116 | err_pts_small[valid_pts_trg[:, 0, :] == 0] = 0 117 | 118 | num_inliers = torch.sum(err_pts_small, dim=1) 119 | 120 | fraction_inliers = num_inliers.float() / n_points 121 | enough_inliers = fraction_inliers > self.inlier_threshold 122 | ransac_complete = ransac_complete | enough_inliers 123 | 124 | for b in range(batch_size): 125 | if num_inliers[b] > max_num_inliers[b]: 126 | max_num_inliers[b] = num_inliers[b] 127 | inliers[b, :] = err_pts_small[b, :] 128 | 129 | i += 1 130 | 131 | return inliers -------------------------------------------------------------------------------- /src/model/svd_block.py: -------------------------------------------------------------------------------- 1 | """ Some of this code is based on Code from: https://github.com/WangYueFt/dcp/blob/master/model.py """ 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from src.utils.lie_algebra import se3_log, se3_inv 7 | 8 | class SVDBlock(nn.Module): 9 | """ 10 | Compute the relative pose between the source and target frames. 11 | """ 12 | def __init__(self, T_s_v): 13 | """ 14 | Initialize the SVD class. 15 | 16 | Args: 17 | T_s_v (torch.tensor): 4x4 transformation matrix providing the transform from the vehicle frame to the 18 | sensor frame. 19 | """ 20 | super(SVDBlock, self).__init__() 21 | 22 | self.register_buffer('T_s_v', T_s_v) 23 | 24 | def forward(self, keypoints_3D_src, keypoints_3D_trg, weights): 25 | """ 26 | Compute the pose, T_trg_src, from the source to the target frame. 27 | 28 | Args: 29 | keypoints_3D_src (torch,tensor, Bx4xN): 3D point coordinates of keypoints from source frame. 30 | keypoints_3D_trg (torch,tensor, Bx4xN): 3D point coordinates of keypoints from target frame. 31 | weights (torch.tensor, Bx1xN): weights in range (0, 1) associated with the matched source and target 32 | points. 33 | 34 | Returns: 35 | T_trg_src (torch.tensor, Bx4x4): relative transform from the source to the target frame. 36 | """ 37 | batch_size, _, n_points = keypoints_3D_src.size() 38 | 39 | # Compute weighted centroids (elementwise multiplication/division) 40 | centroid_src = torch.sum(keypoints_3D_src[:, 0:3, :] * weights, dim=2, keepdim=True) / torch.sum(weights, dim=2, 41 | keepdim=True) # Bx3x1 42 | centroid_trg = torch.sum(keypoints_3D_trg[:, 0:3, :] * weights, dim=2, keepdim=True) / torch.sum(weights, dim=2, 43 | keepdim=True) 44 | 45 | src_centered = keypoints_3D_src[:, 0:3, :] - centroid_src # Bx3xN 46 | trg_centered = keypoints_3D_trg[:, 0:3, :] - centroid_trg 47 | 48 | W = torch.diag_embed(weights.reshape(batch_size, n_points)) # BxNxN 49 | w = torch.sum(weights, dim=2).unsqueeze(2) # Bx1x1 50 | 51 | H = (1.0 / w) * torch.bmm(trg_centered, torch.bmm(W, src_centered.transpose(2, 1).contiguous())) # Bx3x3 52 | 53 | U, S, V = torch.svd(H) 54 | 55 | det_UV = torch.det(U) * torch.det(V) 56 | ones = torch.ones(batch_size, 2).type_as(V) 57 | diag = torch.diag_embed(torch.cat((ones, det_UV.unsqueeze(1)), dim=1)) # Bx3x3 58 | 59 | # Compute rotation and translation (T_trg_src in sensor frame) 60 | R_trg_src = torch.bmm(U, torch.bmm(diag, V.transpose(2, 1).contiguous())) # Bx3x3 61 | t_trg_src_insrc = centroid_src - torch.bmm(R_trg_src.transpose(2, 1).contiguous(), centroid_trg) # Bx3x1 62 | t_src_trg_intrg = -R_trg_src.bmm(t_trg_src_insrc) # Translation from trg to src given in src frame. 63 | 64 | # Create translation matrix 65 | zeros = torch.zeros(batch_size, 1, 3).type_as(V) # Bx1x3 66 | one = torch.ones(batch_size, 1, 1).type_as(V) # Bx1x1 67 | trans_cols = torch.cat([t_src_trg_intrg, one], dim=1) # Bx4x1 68 | rot_cols = torch.cat([R_trg_src, zeros], dim=1) # Bx4x3 69 | T_trg_src = torch.cat([rot_cols, trans_cols], dim=2) # Bx4x4 70 | 71 | # Convert from sensor to vehicle frame 72 | T_s_v = self.T_s_v.expand(batch_size, 4, 4) 73 | T_trg_src = se3_inv(T_s_v).bmm(T_trg_src).bmm(T_s_v) 74 | 75 | return T_trg_src -------------------------------------------------------------------------------- /src/model/unet.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torchvision import transforms 8 | import torchvision 9 | 10 | """ Code for the components of the U-Net model is taken from: 11 | https://github.com/milesial/Pytorch-UNet/blob/master/unet/unet_parts.py""" 12 | class DoubleConv(nn.Module): 13 | """(convolution => [BN] => ReLU) * 2""" 14 | 15 | def __init__(self, in_channels, out_channels): 16 | super().__init__() 17 | self.double_conv = nn.Sequential( 18 | nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), 19 | nn.BatchNorm2d(out_channels), 20 | nn.ReLU(inplace=True), 21 | nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), 22 | nn.BatchNorm2d(out_channels), 23 | nn.ReLU(inplace=True) 24 | ) 25 | 26 | def forward(self, x): 27 | return self.double_conv(x) 28 | 29 | class Down(nn.Module): 30 | """Downscaling with maxpool then double conv""" 31 | 32 | def __init__(self, in_channels, out_channels): 33 | super().__init__() 34 | self.maxpool_conv = nn.Sequential( 35 | nn.MaxPool2d(2), 36 | DoubleConv(in_channels, out_channels) 37 | ) 38 | 39 | def forward(self, x): 40 | return self.maxpool_conv(x) 41 | 42 | class Up(nn.Module): 43 | """Upscaling then double or tiple conv""" 44 | 45 | def __init__(self, in_channels, out_channels, bilinear=True, double=True): 46 | super().__init__() 47 | 48 | # if bilinear, use the normal convolutions to reduce the number of channels 49 | if bilinear: 50 | self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) 51 | else: 52 | self.up = nn.ConvTranspose2d(in_channels // 2, in_channels // 2, kernel_size=2, stride=2) 53 | 54 | self.conv = DoubleConv(in_channels, out_channels) if double else TripleConv(in_channels, out_channels) 55 | 56 | def forward(self, x1, x2): 57 | x1 = self.up(x1) 58 | 59 | # input is CHW 60 | diffY = torch.tensor([x2.size()[2] - x1.size()[2]]) 61 | diffX = torch.tensor([x2.size()[3] - x1.size()[3]]) 62 | 63 | x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2, 64 | diffY // 2, diffY - diffY // 2]) 65 | 66 | # if you have padding issues, see 67 | # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a 68 | # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd 69 | x = torch.cat([x2, x1], dim=1) 70 | 71 | return self.conv(x) 72 | 73 | class OutConv(nn.Module): 74 | def __init__(self, in_channels, out_channels, ks=1): 75 | super(OutConv, self).__init__() 76 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=ks) 77 | 78 | def forward(self, x): 79 | return self.conv(x) 80 | 81 | """ The UNet network is based on code from: https://github.com/milesial/Pytorch-UNet/blob/master/unet/unet_model.py, 82 | but includes changes/adaptions.""" 83 | class UNet(nn.Module): 84 | """ 85 | Unet network to compute keypoint detector values, descriptors, and scores. 86 | """ 87 | def __init__(self, n_channels, n_classes, layer_size): 88 | """ 89 | Initialize the network with one encoder and two decoders. 90 | 91 | Args: 92 | num_channels (int): number of channels in the input image (we use 3 for one RGB image). 93 | num_classes (int): number of classes (output channels from the decoder), 1 in our case. 94 | layer_size (int): size of the first layer if the encoder. The size of the following layers are 95 | determined from this. 96 | """ 97 | super(UNet, self).__init__() 98 | self.n_channels = n_channels 99 | self.n_classes = n_classes 100 | bilinear = True 101 | m = layer_size 102 | 103 | self.inc = DoubleConv(n_channels, m) # 384 x 512 (height/width after layer given 384 x 512 input image) 104 | self.down1 = Down(m, m * 2) # 192 x 256 105 | self.down2 = Down(m * 2, m * 4) # 96 x 128 106 | self.down3 = Down(m * 4, m * 8) # 48 x 64 107 | self.down4 = Down(m * 8, m * 16) # 24 x 32 108 | 109 | self.up1_pts = Up(m * 24, m * 8, bilinear) 110 | self.up2_pts = Up(m * 12, m * 4, bilinear) 111 | self.up3_pts = Up(m * 6, m * 2, bilinear) 112 | self.up4_pts = Up(m * 3, m, bilinear) 113 | self.outc_pts = OutConv(m, n_classes) 114 | 115 | self.up1_score = Up(m * 24, m * 8, bilinear) 116 | self.up2_score = Up(m * 12, m * 4, bilinear) 117 | self.up3_score = Up(m * 6, m * 2, bilinear) 118 | self.up4_score = Up(m * 3, m, bilinear) 119 | self.outc_score = OutConv(m, n_classes) 120 | 121 | self.sigmoid = nn.Sigmoid() 122 | 123 | def forward(self, x): 124 | """ 125 | Forward pass of network to get keypoint detector values, descriptors and, scores 126 | 127 | Args: 128 | x (torch.tensor, Bx3xHxW): RGB images to input to the network. 129 | 130 | Returns: 131 | logit_pts (torch.tensor, Bx1xHxW): detector values for each pixel, which will be used to compute the 132 | final keypoint coordinates. 133 | score (torch.tensor, Bx1xHxW): an importance score for each pixel. 134 | descriptors (torch.tensor, BxCxHxW): descriptors for each pixel, C is length of descriptor. 135 | 136 | """ 137 | batch_size, _, height, width = x.size() 138 | 139 | x1 = self.inc(x) 140 | x2 = self.down1(x1) 141 | x3 = self.down2(x2) 142 | x4 = self.down3(x3) 143 | x5 = self.down4(x4) 144 | 145 | x4_up_pts = self.up1_pts(x5, x4) 146 | x3_up_pts = self.up2_pts(x4_up_pts, x3) 147 | x2_up_pts = self.up3_pts(x3_up_pts, x2) 148 | x1_up_pts = self.up4_pts(x2_up_pts, x1) 149 | logits_pts = self.outc_pts(x1_up_pts) 150 | 151 | x4_up_score = self.up1_score(x5, x4) 152 | x3_up_score = self.up2_score(x4_up_score, x3) 153 | x2_up_score = self.up3_score(x3_up_score, x2) 154 | x1_up_score = self.up4_score(x2_up_score, x1) 155 | score = self.outc_score(x1_up_score) 156 | score = self.sigmoid(score) 157 | 158 | # Resize outputs of each encoder layer to the size of the original image. Features are interpolated using 159 | # bilinear interpolation to get gradients for back-prop. Concatenate along the feature channel to get 160 | # pixel-wise descriptors of size BxCxHxW. 161 | f1 = F.interpolate(x1, size=(height, width), mode='bilinear') 162 | f2 = F.interpolate(x2, size=(height, width), mode='bilinear') 163 | f3 = F.interpolate(x3, size=(height, width), mode='bilinear') 164 | f4 = F.interpolate(x4, size=(height, width), mode='bilinear') 165 | f5 = F.interpolate(x5, size=(height, width), mode='bilinear') 166 | 167 | feature_list = [f1, f2, f3, f4, f5] 168 | descriptors = torch.cat(feature_list, dim=1) 169 | 170 | return logits_pts, score, descriptors -------------------------------------------------------------------------------- /src/model/weight_block.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class WeightBlock(nn.Module): 6 | """ 7 | Computes the weights for matched points pairs that will be used when computing the pose. 8 | """ 9 | def __init__(self): 10 | super(WeightBlock, self).__init__() 11 | 12 | def forward(self, kpt_desc_norm_src, kpt_desc_norm_pseudo, kpt_scores_src, kpt_scores_pseudo): 13 | """ 14 | Compute the weights for matched point pairs. 15 | 16 | Args: 17 | kpt_desc_norm_src (torch.tensor, BxCxN): normalized descriptors for the source keypoints. 18 | kpt_desc_norm_pseudo (torch.tensor, BxCxN): normalized descriptors for the target pseudo keypoints. 19 | kpt_scores_src (torch.tensor, Bx1xN): scores for the source keypoints. 20 | kpt_scores_pseudo (torch.tensor, Bx1xN): scores for the target pseudo keypoints. 21 | 22 | Returns: 23 | weights (torch.tensor, Bx1xN): weights for each matched point pair in range [0, 1] to be used when 24 | computing the pose. 25 | """ 26 | batch_size, channels, n_points = kpt_desc_norm_src.size() 27 | 28 | # Get the zncc between each matched point pair 29 | desc_src = kpt_desc_norm_src.transpose(2, 1).reshape(batch_size * n_points, channels) # BNxC 30 | desc_pseudo = kpt_desc_norm_pseudo.transpose(2, 1).reshape(batch_size * n_points, channels) # BNxC 31 | 32 | desc_match_val = torch.matmul(desc_src.unsqueeze(1), desc_pseudo.unsqueeze(2)) / desc_pseudo.size(1) # BNx1 33 | 34 | desc_match_val = desc_match_val.reshape(batch_size, 1, n_points) + 1.0 # Range [0, 2] Bx1xN 35 | 36 | weights = 0.5 * desc_match_val * kpt_scores_src * kpt_scores_pseudo # Range [0, 1] 37 | 38 | return weights -------------------------------------------------------------------------------- /src/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import pickle 5 | import re 6 | import sys 7 | import time 8 | 9 | import numpy as np 10 | import torch 11 | import torch.backends.cudnn as cudnn 12 | import torch.nn as nn 13 | import torch.optim as optim 14 | from torch.utils import data 15 | from torch.utils.data.sampler import RandomSampler 16 | 17 | from src.dataset import Dataset 18 | from src.model.pipeline import Pipeline 19 | from src.model.unet import UNet 20 | from src.utils.lie_algebra import se3_log 21 | from src.utils.statistics import Statistics 22 | from visualization.plots import Plotting 23 | 24 | torch.backends.cudnn.benchmark = True 25 | 26 | def rmse(outputs_se3, targets_se3): 27 | """ 28 | Compute the rotation and translation RMSE for the SE(3) poses provided. Compute RMSE for ich live run 29 | individually. 30 | 31 | Args: 32 | outputs_se3 (dict): map from id of the localized live run to a list of estimated pose transforms 33 | represented as 4x4 numpy arrays. 34 | outputs_se3 (dict): map from id of the localized live run to a list of ground truth pose transforms 35 | represented as 4x4 numpy arrays. 36 | """ 37 | 38 | for live_run_id in outputs_se3.keys(): 39 | 40 | out_mat = torch.from_numpy(np.stack(outputs_se3[live_run_id], axis=0)) 41 | trg_mat = torch.from_numpy(np.stack(targets_se3[live_run_id], axis=0)) 42 | 43 | # Get the difference in pose by T_diff = T_trg * inv(T_src) 44 | diff_mat = trg_mat.bmm(out_mat.inverse()) 45 | diff_R = diff_mat[:, 0:3, 0:3] 46 | diff_tr = diff_mat[:, 0:3, 3].numpy() 47 | 48 | err_tr_sq = (diff_tr[:, 0] * diff_tr[:, 0]) + (diff_tr[:, 1] * diff_tr[:, 1]) + (diff_tr[:, 2] * diff_tr[:, 2]) 49 | rmse_tr = np.sqrt(np.mean(err_tr_sq, axis=0)) 50 | 51 | d = torch.acos((0.5 * (diff_R[:, 0, 0] + diff_R[:, 1, 1] + diff_R[:, 2, 2] - 1.0)).clamp(-1 + 1e-6, 1 - 1e-6)) 52 | rmse_rot = np.sqrt(np.mean(np.rad2deg(d.numpy())**2, axis=0)) 53 | 54 | print(f'RMSE, live_run_id {live_run_id}, translation: {rmse_tr}') 55 | print(f'RMSE, live_run_id {live_run_id}, rotation: {rmse_rot}\n') 56 | 57 | def test_model(pipeline, net, data_loader, dof): 58 | """ 59 | Run the test. 60 | 61 | Args: 62 | pipeline (Pipeline): the training pipeline object that performs a forward pass of the training pipeline and 63 | computes pose and losses. 64 | net (torch.nn.Module): neural network module. 65 | data_loader (torch.utils.data.DataLoader): data loader for training data. 66 | dof (list[int]): indices of the pose DOF (out of the 6 DOF) that we have learned. 67 | """ 68 | start_time = time.time() 69 | 70 | net.eval() 71 | 72 | # Object to keep track of test errors. 73 | stats = Statistics() 74 | 75 | errors = None 76 | # We want to print out the avg./max target pose absolute values for reference together with the pose errors. 77 | targets_total = torch.zeros(6) 78 | targets_max = torch.zeros(6) 79 | batch_size = -1 80 | num_batches, num_examples = 0, 0 81 | 82 | with torch.no_grad(): 83 | for images, disparities, ids, pose_se3, pose_log in data_loader: 84 | 85 | batch_size = images.size(0) 86 | 87 | try: 88 | # Compute the output poses (we use -1 a placeholder as epoch is not relevant). 89 | output_se3 = pipeline.forward(net, images, disparities, pose_se3, pose_log, epoch=-1, test=True) 90 | 91 | except Exception as e: 92 | print(e) 93 | print("Ids: {}".format(ids)) 94 | continue 95 | 96 | num_batches += 1 97 | num_examples += batch_size 98 | 99 | # Get the error in each of the 6 pose DOF. 100 | diff = se3_log(output_se3.inverse().bmm(pose_se3.cuda())).detach().cpu().numpy() 101 | if num_batches == 1: 102 | errors = diff 103 | else: 104 | errors = np.concatenate((errors, diff), axis=0) 105 | 106 | targets_total += torch.sum(torch.abs(pose_log), dim=0).detach().cpu() 107 | targets_max = torch.max(targets_max, torch.max(torch.abs(pose_log.detach().cpu()), dim=0)[0]) 108 | 109 | # Collect the poses so that we can plot the results later. 110 | output_log_np = se3_log(output_se3).detach().cpu().numpy() # Bx6 111 | target_log_np = se3_log(pose_se3).detach().cpu().numpy() 112 | output_se3_np = output_se3.detach().cpu().numpy() # Bx4x4 113 | target_se3_np = pose_se3.detach().cpu().numpy() 114 | # kpt_inliers = torch.sum(saved_data['valid_inliers'][(0, 1)], dim=2).detach().cpu().numpy() # B x 1 115 | 116 | # Loop over each datas sample in the batch. 117 | for k in range(output_se3_np.shape[0]): 118 | sample_id = ids[k] 119 | _, live_run_id, _, map_run_id, _ = re.findall('\w+', sample_id) 120 | 121 | stats.add_sample_id(live_run_id, sample_id) 122 | stats.add_live_run_id(live_run_id) 123 | stats.set_map_run_id(map_run_id) 124 | stats.add_outputs_targets_se3(live_run_id, output_se3_np[k, :, :], target_se3_np[k, :, :]) 125 | stats.add_outputs_targets_log(live_run_id, output_log_np[k, :], target_log_np[k, :]) 126 | # stats.add_kpt_inliers(live_run_id, kpt_inliers[k, 0]) 127 | 128 | # Compute errors. 129 | # RMSE for each pose DOF. 130 | errors[:, 3:6] = np.rad2deg(errors[:, 3:6]) 131 | test_errors = np.sqrt(np.mean(errors ** 2, axis=0)).reshape(1, 6) 132 | 133 | targets_avg = (targets_total * (1.0 / num_examples)).detach().cpu().numpy() 134 | targets_avg[3:6] = np.rad2deg(targets_avg[3:6]) 135 | targets_max = targets_max.detach().cpu().numpy() 136 | targets_max[3:6] = np.rad2deg(targets_max[3:6]) 137 | 138 | print(f'\nMap run id: {stats.get_map_run_id()}') 139 | print(f'Live run ids: {stats.get_live_run_ids()}') 140 | print(f'Pose RMSE by DOF: {test_errors[:, np.array(dof)]}') 141 | print(f'Average pose targets by DOF: {targets_avg[np.array(dof)]}') 142 | print(f'Max pose targets by DOF: {targets_max[np.array(dof)]}') 143 | 144 | print(f'Path test duration: {time.time() - start_time} seconds.\n') 145 | 146 | return stats 147 | 148 | def test(pipeline, net, test_loaders, results_path): 149 | """ 150 | Test the model by localizing different runs to each other from one or more paths. Print the pose errors and 151 | losses and plot the target and estimated poses for each DOF for each test. 152 | 153 | Args: 154 | pipeline (Pipeline): the training pipeline object that performs a forward pass of the training pipeline and 155 | computes pose and losses. 156 | net (torch.nn.Module): neural network module. 157 | test_loaders (dict): maps path names to data loaders for test data. 158 | results_path (string): directory for storing results (outputs, plots). 159 | """ 160 | # Helper class for plotting results. 161 | plotting = Plotting(results_path) 162 | 163 | # Find which of the DOF of the 6 DOF pose we want to learn. Record the indices to keep in the pose vector. 164 | dof = [0, 1, 5] if 'pose_plane' in config['loss']['types'] else [0, 1, 2, 3, 4, 5] 165 | 166 | # The data loaders are stored in a dict that maps from path name to a list of data loaders as we may test using 167 | # data from more than one path. Also, for each path we localize different runs against each other. One run is used 168 | # as the map and then one or more other runs are localized to this map run. One individual data loader contains all 169 | # the samples for all live localized back to one map run. 170 | for path_name in test_loaders.keys(): 171 | 172 | print(f'\nTesting path: {path_name}') 173 | 174 | start_time = time.time() 175 | 176 | # Loop over each data loader (one data loader per map run we localize to). 177 | for i in range(len(test_loaders[path_name])): 178 | 179 | path_stats = test_model(pipeline, net, test_loaders[path_name][i], dof) 180 | 181 | outputs_log = path_stats.get_outputs_log() 182 | targets_log = path_stats.get_targets_log() 183 | map_run_id = path_stats.get_map_run_id() 184 | live_run_ids = path_stats.get_live_run_ids() 185 | sample_ids = path_stats.get_sample_ids() 186 | 187 | # Plot each DOF of the estimated and target poses for each pair of live run localized to map run. 188 | plotting.plot_outputs(outputs_log, targets_log, path_name, map_run_id, dof) 189 | 190 | # Compute the RMSE for translation and rotation if we are using all 6 DOF. 191 | # TODO: provide the same for SE(2). 192 | if len(dof) == 6: 193 | outputs_se3 = path_stats.get_outputs_se3() 194 | targets_se3 = path_stats.get_targets_se3() 195 | rmse(outputs_se3, targets_se3) 196 | 197 | print(f'Test time for path {path_name}: {time.time() - start_time}') 198 | 199 | def main(config): 200 | """ 201 | Set up everything needed for training and call the function to run the training loop. 202 | 203 | Args: 204 | config (dict): configurations for training the network. 205 | """ 206 | results_path = f"{config['home_path']}/results/{config['experiment_name']}/" 207 | checkpoints_path = f"{config['home_path']}/networks" 208 | data_path = f"{config['home_path']}/data" 209 | datasets_path = f"{config['home_path']}/datasets" 210 | 211 | checkpoint_name = config['checkpoint_name'] 212 | dataset_name = config['dataset_name'] 213 | 214 | dataset_path = f'{datasets_path}/{dataset_name}.pickle' 215 | checkpoint_path = f'{checkpoints_path}/{checkpoint_name}.pth' 216 | 217 | if not os.path.exists(results_path): 218 | os.makedirs(results_path) 219 | 220 | # Print outputs to files 221 | orig_stdout = sys.stdout 222 | fl = open(f'{results_path}out_test.txt', 'w') 223 | sys.stdout = fl 224 | orig_stderr = sys.stderr 225 | fe = open(f'{results_path}err_test.txt', 'w') 226 | sys.stderr = fe 227 | 228 | # Record the config settings. 229 | print(config) 230 | 231 | # Load the data. 232 | dataloader_params = config['data_loader'] 233 | dataset_params = config['dataset'] 234 | dataset_params['data_dir'] = data_path 235 | 236 | localization_data = None 237 | with open(dataset_path, 'rb') as handle: 238 | localization_data = pickle.load(handle) 239 | 240 | # The localization testing may contain data from different paths. 241 | path_loaders = {} 242 | for path_name in localization_data.paths: 243 | 244 | # We localize a set of runs against each other. One run is used as thep map and several other runs are used as 245 | # 'live' runs and localized to the map run. We create one data loader for each map run. Hence, the loader will 246 | # hold all data samples of localizing each live run to the map run. 247 | num_map_runs = len(localization_data.test_ids[path_name]) # The number of data loaders we will need. 248 | path_loaders[path_name] = [] 249 | 250 | index = 0 251 | while index < num_map_runs: 252 | 253 | # Check that the dataset is not empty before adding. 254 | if len(localization_data.test_ids[path_name][index]) > 0: 255 | 256 | test_set = Dataset(**dataset_params) 257 | test_set.load_mel_data(localization_data, 'testing', path_name, index) 258 | test_loader = data.DataLoader(test_set, **dataloader_params) 259 | path_loaders[path_name] += [test_loader] 260 | 261 | index += 1 262 | 263 | # Set up device, using GPU 0. 264 | device = torch.device('cuda:{}'.format(0) if torch.cuda.device_count() > 0 else 'cpu') 265 | torch.cuda.set_device(0) 266 | 267 | # Set training pipeline. 268 | testing_pipeline = Pipeline(config) 269 | testing_pipeline = testing_pipeline.cuda() 270 | 271 | # Set up the network. 272 | net = UNet(config['network']['num_channels'], 273 | config['network']['num_classes'], 274 | config['network']['layer_size']) 275 | 276 | # Load the network weights from a checkpoint. 277 | if os.path.exists(checkpoint_path): 278 | checkpoint = torch.load(checkpoint_path) 279 | net.load_state_dict(checkpoint['model_state_dict']) 280 | else: 281 | raise RuntimeError(f'The specified checkpoint path does not exists: {checkpoint_path}') 282 | 283 | net.cuda() 284 | 285 | test(testing_pipeline, net, path_loaders, results_path) 286 | 287 | # Stop writing outputs to file. 288 | sys.stdout = orig_stdout 289 | fl.close() 290 | sys.stderr = orig_stderr 291 | fe.close() 292 | 293 | if __name__ == '__main__': 294 | parser = argparse.ArgumentParser() 295 | parser.add_argument('--config', default=None, type=str, 296 | help='config file path (default: None)') 297 | 298 | args = parser.parse_args() 299 | 300 | with open(args.config) as f: 301 | config = json.load(f) 302 | 303 | main(config) -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import pickle 5 | import sys 6 | import time 7 | 8 | import numpy as np 9 | import torch 10 | import torch.backends.cudnn as cudnn 11 | import torch.nn as nn 12 | import torch.optim as optim 13 | from torch.utils import data 14 | from torch.utils.data.sampler import RandomSampler 15 | 16 | from src.dataset import Dataset 17 | from src.model.pipeline import Pipeline 18 | from src.model.unet import UNet 19 | from src.utils.early_stopping import EarlyStopping 20 | from src.utils.lie_algebra import se3_log 21 | from src.utils.statistics import Statistics 22 | from visualization.plots import Plotting 23 | 24 | torch.backends.cudnn.benchmark = True 25 | 26 | def execute_epoch(pipeline, net, data_loader, stats, epoch, mode, config, dof, optimizer=None, scheduler=None): 27 | """ 28 | Train with data for one epoch. 29 | 30 | Args: 31 | pipeline (Pipeline): the training pipeline object that performs a forward pass of the training pipeline and 32 | computes pose and losses. 33 | net (torch.nn.Module): neural network module. 34 | data_loader (torch.utils.data.DataLoader): data loader for training data. 35 | stats (Statistics): an object for keeping track of losses. 36 | epoch (int): current epoch. 37 | mode (string): 'training' or 'validation' 38 | config (dict): parameter configurations. 39 | dof (list[int]): indices of the pose DOF (out of the 6 DOF) that we want to learn. 40 | optimizer (torch.optim.Optimizer or None): optimizer if training or None if validation. 41 | scheduler(torch.optim.lr_scheduler or None): scheduler if training or None if validation. 42 | 43 | Returns: 44 | avg_loss (float): the average loss for the epoch computed over all batches. The average for the weighted 45 | sum of all loss types. 46 | stats (Statistics): object with recorded losses for all processed data samples. 47 | """ 48 | start_time = time.time() 49 | 50 | if mode == 'training': 51 | net.train() 52 | else: 53 | net.eval() 54 | 55 | epoch_losses = {} 56 | errors = None 57 | # We want to print out the avg./max target pose absolute values for reference together with the pose errors. 58 | targets_total = torch.zeros(6) 59 | targets_max = torch.zeros(6) 60 | batch_size = -1 61 | num_batches, num_examples = 0, 0 62 | 63 | with torch.set_grad_enabled(mode == 'training'): 64 | for images, disparities, ids, pose_se3, pose_log in data_loader: 65 | 66 | losses = {} 67 | batch_size = images.size(0) 68 | if mode == 'training': 69 | optimizer.zero_grad() 70 | 71 | try: 72 | # Compute the loss and the output pose. 73 | losses, output_se3 = pipeline.forward(net, images, disparities, pose_se3, pose_log, epoch) 74 | 75 | if mode == 'training': 76 | losses['total'].backward() 77 | 78 | except Exception as e: 79 | print(e) 80 | print("Ids: {}".format(ids)) 81 | continue 82 | 83 | if mode == 'training': 84 | torch.nn.utils.clip_grad_norm(net.parameters(), max_norm=2.0, norm_type=2) 85 | optimizer.step() 86 | 87 | num_batches += 1 88 | num_examples += batch_size 89 | 90 | # Get the error in each of the 6 pose DOF. 91 | diff = se3_log(output_se3.inverse().bmm(pose_se3.cuda())).detach().cpu().numpy() 92 | if num_batches == 1: 93 | errors = diff 94 | else: 95 | errors = np.concatenate((errors, diff), axis=0) 96 | 97 | # Save the losses during the epoch. 98 | for loss_type in losses.keys(): 99 | if loss_type in epoch_losses: 100 | epoch_losses[loss_type] += losses[loss_type].item() 101 | else: 102 | epoch_losses[loss_type] = losses[loss_type].item() 103 | 104 | targets_total += torch.sum(torch.abs(pose_log), dim=0).detach().cpu() 105 | targets_max = torch.max(targets_max, torch.max(torch.abs(pose_log.detach().cpu()), dim=0)[0]) 106 | 107 | # Compute and print the statistics for the finished epoch. 108 | print(f'{mode}, epoch {epoch}') 109 | 110 | # Compute the average training losses. 111 | for loss_type in epoch_losses.keys(): 112 | epoch_losses[loss_type] = epoch_losses[loss_type] / float(num_batches) 113 | print(f'Average {mode} loss, {loss_type}: {epoch_losses[loss_type]:.6f}') 114 | 115 | # Only store and print pose errors if we have started computing pose (and are not just computing keypoint loss). 116 | if epoch >= config['training']['start_pose_estimation']: 117 | 118 | # RMSE for each pose DOF. 119 | errors[:, 3:6] = np.rad2deg(errors[:, 3:6]) 120 | epoch_errors = np.sqrt(np.mean(errors ** 2, axis=0)).reshape(1, 6) 121 | 122 | stats.add_epoch_stats(epoch_losses, epoch_errors) 123 | 124 | targets_avg = (targets_total * (1.0 / num_examples)).detach().cpu().numpy() 125 | targets_avg[3:6] = np.rad2deg(targets_avg[3:6]) 126 | targets_max = targets_max.detach().cpu().numpy() 127 | targets_max[3:6] = np.rad2deg(targets_max[3:6]) 128 | 129 | # Only print the results for the relevant DOF that we are learning. 130 | print(f'Pose RMSE by DOF: {epoch_errors[:, np.array(dof)]}') 131 | print(f'Average pose targets by DOF: {targets_avg[np.array(dof)]}') 132 | print(f'Max pose targets by DOF: {targets_max[np.array(dof)]}') 133 | 134 | print(f'Epoch duration: {time.time() - start_time} seconds. \n') 135 | 136 | # Run if we are training, and we are using a scheduler. 137 | if mode == 'training': 138 | scheduler.step() 139 | 140 | return epoch_losses['total'], stats 141 | 142 | 143 | def plot_epoch_statistics(train_stats, validation_stats, plotting, dof): 144 | """ 145 | Plot the losses from training and validation over epochs. 146 | 147 | Args: 148 | train_stats (Statistics): an object for keeping track of losses and errors for the training data. 149 | validation_stats (Statistics): an object for keeping track of losses and errors for the validation data. 150 | plotting (Plotting): an object to handle plotting. 151 | dof (list[int]): indices of the DOF to plot. 152 | """ 153 | train_epoch_stats = train_stats.get_epoch_stats() 154 | valid_epoch_stats = validation_stats.get_epoch_stats() 155 | 156 | plotting.plot_epoch_losses(train_epoch_stats[0], valid_epoch_stats[0]) 157 | plotting.plot_epoch_errors(train_epoch_stats[1], valid_epoch_stats[1], dof) 158 | 159 | def train(pipeline, net, optimizer, scheduler, train_loader, validation_loader, start_epoch, min_validation_loss, 160 | train_stats, validation_stats, config, results_path, checkpoint_path): 161 | """ 162 | Run the training loop. 163 | 164 | Args: 165 | pipeline (Pipeline): the training pipeline object that performs a forward pass of the training pipeline and 166 | computes pose and losses. 167 | net (torch.nn.Module): neural network module. 168 | optimizer (torch.optim.Optimizer): training optimizer. 169 | scheduler (torch.optim.lr_scheduler): training scheduler. 170 | train_loader (torch.utils.data.DataLoader): data loader for training data. 171 | validation_loader (torch.utils.data.DataLoader): data loader for validation data. 172 | start_epoch (int): epoch we start training from, 0 if starting from scratch. 173 | min_validation_loss (float): the minimum validation loss during training. 174 | train_stats (Statistics): an object for keeping track of losses for the training data. 175 | validation_stats (Statistics): an object for keeping track of losses for the validation data. 176 | config (dict): configurations for model training. 177 | results_path (string): directory for storing results (outputs, plots). 178 | checkpoint_path (string): file path to store the checkpoint. 179 | """ 180 | # Helper class for plotting results. 181 | plotting = Plotting(results_path) 182 | 183 | # Early stopping keeps track of when we stop training (validation loss does not decrease) and when we store a 184 | # checkpoint (each time validation loss decreases below or is equal to the minimum value). 185 | last_save_epoch = start_epoch - 1 if start_epoch > 0 else 0 186 | early_stopping = EarlyStopping(config['training']['patience'], min_validation_loss, last_save_epoch) 187 | 188 | # Find which of the DOF of the 6 DOF pose we want to learn. Record the indices to keep in the pose vector. 189 | dof = [0, 1, 5] if 'pose_plane' in config['loss']['types'] else [0, 1, 2, 3, 4, 5] 190 | 191 | # The training loop. 192 | for epoch in range(start_epoch, config['training']['max_epochs']): 193 | 194 | train_loss, train_stats = execute_epoch(pipeline, 195 | net, 196 | train_loader, 197 | train_stats, 198 | epoch, 199 | 'training', 200 | config, 201 | dof, 202 | optimizer, 203 | scheduler) 204 | 205 | validation_loss, validation_stats = execute_epoch(pipeline, 206 | net, 207 | validation_loader, 208 | validation_stats, 209 | epoch, 210 | 'validation', 211 | config, 212 | dof) 213 | 214 | # Only start checking the loss after we start computing the pose loss. Sometimes we only compute keypoint loss 215 | # for the first few epochs. 216 | if epoch >= config['training']['start_pose_estimation']: 217 | 218 | # Plot the losses during training and validations. 219 | plot_epoch_statistics(train_stats, validation_stats, plotting, dof) 220 | 221 | stop, min_validation_loss = early_stopping.check_stop(validation_loss, 222 | net, 223 | optimizer, 224 | checkpoint_path, 225 | train_stats, 226 | validation_stats, 227 | epoch) 228 | 229 | # Stop the training loop if we have exceeded the patience. 230 | if stop: 231 | break 232 | 233 | 234 | def main(config): 235 | """ 236 | Set up everything needed for training and call the function to run the training loop. 237 | 238 | Args: 239 | config (dict): configurations for training the network. 240 | """ 241 | results_path = f"{config['home_path']}/results/{config['experiment_name']}/" 242 | checkpoints_path = f"{config['home_path']}/networks" 243 | data_path = f"{config['home_path']}/data" 244 | datasets_path = f"{config['home_path']}/datasets" 245 | 246 | checkpoint_name = config['checkpoint_name'] 247 | dataset_name = config['dataset_name'] 248 | 249 | dataset_path = f'{datasets_path}/{dataset_name}.pickle' 250 | checkpoint_path = f'{checkpoints_path}/{checkpoint_name}' 251 | 252 | if not os.path.exists(results_path): 253 | os.makedirs(results_path) 254 | 255 | if not os.path.exists(checkpoints_path): 256 | os.makedirs(checkpoint_path) 257 | 258 | # Print outputs to files 259 | orig_stdout = sys.stdout 260 | fl = open(f'{results_path}out_train.txt', 'w') 261 | sys.stdout = fl 262 | orig_stderr = sys.stderr 263 | fe = open(f'{results_path}err_train.txt', 'w') 264 | sys.stderr = fe 265 | 266 | # Record the config settings. 267 | print(f'\nTraining parameter configurations: \n{config}\n') 268 | 269 | # Load the data. 270 | dataloader_params = config['data_loader'] 271 | dataset_params = config['dataset'] 272 | dataset_params['data_dir'] = data_path 273 | 274 | localization_data = None 275 | with open(dataset_path, 'rb') as handle: 276 | localization_data = pickle.load(handle) 277 | 278 | # Training data generator (randomly sample a subset of the full dataset for each epoch). 279 | train_set = Dataset(**dataset_params) 280 | train_sampler = RandomSampler(train_set, replacement=True, num_samples=10000) 281 | train_set.load_mel_data(localization_data, 'training') 282 | train_loader = data.DataLoader(train_set, sampler=train_sampler, **dataloader_params) 283 | 284 | # Validation data generator (randomly sample a subset of the full dataset for each epoch). 285 | validation_set = Dataset(**dataset_params) 286 | validation_sampler = RandomSampler(validation_set, replacement=True, num_samples=2500) 287 | validation_set.load_mel_data(localization_data, 'validation') 288 | validation_loader = data.DataLoader(validation_set, sampler=validation_sampler, **dataloader_params) 289 | 290 | # Set up device, using GPU 0 291 | device = torch.device('cuda:{}'.format(0) if torch.cuda.device_count() > 0 else 'cpu') 292 | torch.cuda.set_device(0) 293 | 294 | # Set training pipeline 295 | training_pipeline = Pipeline(config) 296 | training_pipeline = training_pipeline.cuda() 297 | 298 | # Set up the network, optimizer, and scheduler 299 | net = UNet(config['network']['num_channels'], 300 | config['network']['num_classes'], 301 | config['network']['layer_size']) 302 | 303 | optimizer = torch.optim.Adam(net.parameters(), lr=config['optimizer']['lr']) 304 | scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 305 | step_size=config['scheduler']['step_size'], 306 | gamma=config['scheduler']['gamma']) 307 | 308 | # Variables for keeping track of training progress. 309 | start_epoch = 0 310 | min_validation_loss = 10e9 311 | train_stats = Statistics() 312 | validation_stats = Statistics() 313 | 314 | # Resume training from checkpoint if available 315 | if os.path.exists(f'{checkpoint_path}.pth'): 316 | checkpoint = torch.load(f'{checkpoint_path}.pth') 317 | net.load_state_dict(checkpoint['model_state_dict']) 318 | 319 | if 'optimizer_state_dict' in checkpoint.keys(): 320 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 321 | 322 | for state in optimizer.state.values(): 323 | for k, v in state.items(): 324 | if isinstance(v, torch.Tensor): 325 | state[k] = v.cuda() 326 | 327 | start_epoch = checkpoint['epoch'] + 1 if 'epoch' in checkpoint.keys() else 0 328 | min_validation_loss = checkpoint['val_loss'] if 'val_loss' in checkpoint.keys() else 10e9 329 | train_stats = checkpoint['train_stats'] if 'train_stats' in checkpoint.keys() else Statistics() 330 | validation_stats = checkpoint['valid_stats'] if 'valid_stats' in checkpoint.keys() else Statistics() 331 | 332 | net.cuda() 333 | 334 | train(training_pipeline, net, optimizer, scheduler, train_loader, validation_loader, start_epoch, 335 | min_validation_loss, train_stats, validation_stats, config, results_path, checkpoint_path) 336 | 337 | # Stop writing outputs to file. 338 | sys.stdout = orig_stdout 339 | fl.close() 340 | sys.stderr = orig_stderr 341 | fe.close() 342 | 343 | if __name__ == '__main__': 344 | parser = argparse.ArgumentParser() 345 | parser.add_argument('--config', default=None, type=str, 346 | help='config file path (default: None)') 347 | 348 | args = parser.parse_args() 349 | 350 | with open(args.config) as f: 351 | config = json.load(f) 352 | 353 | main(config) -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utiasASRL/deep_learned_visual_features/f4710364993ca56f797ecc936b252e6267910ec1/src/utils/__init__.py -------------------------------------------------------------------------------- /src/utils/early_stopping.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | class EarlyStopping: 5 | """Based on https://github.com/Bjarten/early-stopping-pytorch/blob/master/pytorchtools.py 6 | Early stops the training if validation loss doesn't improve after a given patience. 7 | """ 8 | def __init__(self, patience, loss_min, epoch_min): 9 | """ 10 | Initialize by setting the chosen patience and set the minimum loss. 11 | 12 | Args: 13 | patience (int): How long to wait after last time validation loss improved. 14 | Default: 7 15 | """ 16 | self.patience = patience 17 | self.counter = 0 18 | self.loss_min = loss_min 19 | self.epoch_min = epoch_min 20 | 21 | def check_stop(self, loss, model, optimizer, checkpoint_path, train_stats, validation_stats, epoch): 22 | """ 23 | Check if model training needs to stop based on the validation loss. Save the model of the validation loss 24 | improves (or stays at minimum). 25 | 26 | Args: 27 | loss (float): validation loss. 28 | model (torch.nn.Module): neural network model. 29 | optimizer (torch.optim.Optimizer): training optimizer. 30 | checkpoint_path (string name): file path for saving the model. 31 | train_stats (float): validation loss. TODO 32 | validation_stats (float): validation loss. TODO 33 | epoch (int): training epoch. 34 | """ 35 | 36 | if loss <= self.loss_min: 37 | 38 | print(f'Validation loss decreased ({self.loss_min:.6f} --> {loss:.6f}). Saving model at epoch {epoch}.\n') 39 | 40 | self.counter = 0 41 | self.loss_min = loss 42 | self.epoch_min = epoch 43 | 44 | torch.save({'epoch': epoch, 45 | 'model_state_dict': model.state_dict(), 46 | 'optimizer_state_dict': optimizer.state_dict(), 47 | 'loss': loss, 48 | 'train_stats': train_stats, 49 | 'valid_stats': validation_stats, 50 | }, '{}_{}.pth'.format(checkpoint_path, epoch)) # Add epoch so we don't overwrite existing file. 51 | 52 | elif loss > self.loss_min: 53 | 54 | self.counter += 1 55 | 56 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 57 | print(f'Current validation loss: {loss:.10f}, min validation loss: {self.loss_min:.10f}\n') 58 | 59 | if self.counter >= self.patience: 60 | print('Early stopping: final model saved at epoch {}'.format(self.epoch_min)) 61 | return True, self.loss_min 62 | 63 | return False, self.loss_min -------------------------------------------------------------------------------- /src/utils/keypoint_tools.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | def normalize_coords(coords_2D, batch_size, height, width): 6 | """ 7 | Normalize 2D keypoint coordinates to lie in the range [-1, 1]. 8 | 9 | Args: 10 | coords_2D (torch.tensor): 2D image coordinates store in order (u, v), (Bx2xN). 11 | batch_size (int): batch size. 12 | height (int): image height. 13 | width (int): image width. 14 | 15 | Returns: 16 | coords_2D_norm (torch.tensor): coordinates normalized to range [-1, 1] and stored in order (u, v), (BxNx2). 17 | """ 18 | u_norm = (2 * coords_2D[:, 0, :].reshape(batch_size, -1) / (width - 1)) - 1 19 | v_norm = (2 * coords_2D[:, 1, :].reshape(batch_size, -1) / (height - 1)) - 1 20 | 21 | return torch.stack([u_norm, v_norm], dim=2) 22 | 23 | def get_norm_descriptors(descriptor_map, sample=False, keypoints_norm=None): 24 | """ 25 | Normalize the descriptors. 26 | 27 | Args: 28 | descriptor_map (torch.tensor): the descriptor map, either dense with one descriptor for each image pixel, 29 | BxCxHxW, or sparse with one descriptor for each keypoint, BxCxN. 30 | sample (bool): whether to sample descriptors for keypoints in the image or use descriptors for each 31 | pixel. 32 | keypoints_norm (torch.tensor or None): normalized keypoint coordinates if we are sampling, otherwise None. 33 | 34 | Returns: 35 | descriptors_norm (torch.tensor): the normalized descriptors, (BxCx(N or HW)) 36 | """ 37 | 38 | if len(descriptor_map.size()) == 4: 39 | # The descriptor map has dense descriptors, one for each pixel. 40 | batch_size, channels, height, width = descriptor_map.size() 41 | 42 | if sample: 43 | # Sample descriptors for the given keypoints. 44 | descriptors = F.grid_sample(descriptor_map, keypoints_norm, mode='bilinear') # BxCx1xN 45 | descriptors = descriptors.reshape(batch_size, channels, keypoints_norm.size(2)) # BxCxN 46 | else: 47 | descriptors = descriptor_map.reshape(batch_size, channels, height * width) # BxCxHW 48 | else: 49 | # The descriptor map has sparse descriptors, one for each keypoint. 50 | descriptors = descriptor_map 51 | 52 | descriptors_mean = torch.mean(descriptors, dim=1, keepdim=True) # Bx1x(N or HW) 53 | descriptors_std = torch.std(descriptors, dim=1, keepdim=True) # Bx1x(N or HW) 54 | descriptors_norm = (descriptors - descriptors_mean) / descriptors_std # BxCx(N or HW) 55 | 56 | return descriptors_norm 57 | 58 | def get_scores(scores_map, keypoints_norm): 59 | """ 60 | Sample scores for keypoints. 61 | 62 | Args: 63 | scores_map (torch.tensor): the scores for each pixel in the image, (Bx1xHxW). 64 | keypoints_norm (torch.tensor): normalized keypoint coordinates for sampling, (BxNx2). 65 | 66 | Returns: 67 | kpt_scores (torch.tensor): a score for each keypoint, (Bx1xN). 68 | """ 69 | batch_size, _, num_points, _ = keypoints_norm.size() 70 | 71 | kpt_scores = F.grid_sample(scores_map, keypoints_norm, mode='bilinear') # Bx1x1xN 72 | kpt_scores = kpt_scores.reshape(batch_size, 1, num_points) 73 | 74 | return kpt_scores 75 | 76 | def get_keypoint_info(kpt_2D, scores_map, descriptors_map, disparity, stereo_cam): 77 | """ 78 | Gather information we need associated with each detected keypoint. Compute the 3D point coordinates, determine 79 | which keypoints have valid 3D coordinates, get the normalized descriptor for each keypoint, and the score for 80 | each keypoint. 81 | 82 | Args: 83 | kpt_2D (torch.tensor): keypoint 2D image coordinates, (Bx2xN). 84 | scores_map (torch.tensor): scores for each pixel, (Bx1xHxW). 85 | descriptors_map (torch.tensor): descriptors for each pixel, (BxCxHxW). 86 | disparity (torch.tensor): disparity for one stereo pair, (BxHxW). 87 | stereo_cam (StereoCameraModel): stereo camera model. 88 | 89 | Returns: 90 | kpt_3D (torch.tensor): keypoint 3D coordinates in the sensor frame (left camera frame) given in 91 | homogeneous coordinates, (Bx4xN). 92 | valid (torch.tensor): 1 if the keypoint 3D coordinate is valid (i.e. falls in the accepted depth range) and 93 | 0 otherwise, (Bx1xN). 94 | kpt_desc_norm (torch.tensor): Normalized descriptor for each keypoint, (BxCxN). 95 | kpt_scores (torch.tensor): score for each keypoint, (Bx1xN). 96 | 97 | """ 98 | batch_size, _, height, width = scores_map.size() 99 | 100 | kpt_2D_norm = normalize_coords(kpt_2D, batch_size, height, width).unsqueeze(1) # Bx1xNx2 101 | 102 | kpt_desc_norm = get_norm_descriptors(descriptors_map, True, kpt_2D_norm) 103 | 104 | kpt_scores = get_scores(scores_map, kpt_2D_norm) 105 | 106 | kpt_3D, valid = stereo_cam.inverse_camera_model(kpt_2D, disparity) 107 | 108 | return kpt_3D, valid, kpt_desc_norm, kpt_scores -------------------------------------------------------------------------------- /src/utils/lie_algebra.py: -------------------------------------------------------------------------------- 1 | """Code is copied from https://github.com/utiasSTARS/dpc-net/blob/master/lie_algebra.py with some adjustments made.""" 2 | 3 | import torch 4 | global EPS 5 | EPS=1e-6 # Was 1e-12 in original code 6 | 7 | def so3_wedge(phi): 8 | #Returns Nx3x3 tensor with each 1x3 row vector in phi wedge'd 9 | 10 | if phi.dim() < 2: 11 | phi = phi.unsqueeze(0) 12 | 13 | Phi = phi.new(phi.size(0), 3, 3).zero_() 14 | 15 | Phi[:, 0, 1] = -phi[:, 2] 16 | Phi[:, 1, 0] = phi[:, 2] 17 | Phi[:, 0, 2] = phi[:, 1] 18 | Phi[:, 2, 0] = -phi[:, 1] 19 | Phi[:, 1, 2] = -phi[:, 0] 20 | Phi[:, 2, 1] = phi[:, 0] 21 | return Phi 22 | 23 | def so3_vee(Phi): 24 | #Returns Nx3 tensor with each 3x3 lie algebra element converted to a 1x3 coordinate vector 25 | phi = Phi.new(Phi.size(0), 3).zero_() 26 | phi[:, 0] = Phi[:, 2, 1] 27 | phi[:, 1] = Phi[:, 0, 2] 28 | phi[:, 2] = Phi[:, 1, 0] 29 | return phi 30 | 31 | def batch_trace(R): 32 | #Takes in Nx3x3, computes trace of each 3x3 matrix, outputs Nx1 vector with traces 33 | I = R.new(3,3).zero_() 34 | I[0,0] = I[1,1] = I[2,2] = 1.0 35 | return (R*I.expand_as(R)).sum(1, keepdim=True).sum(2, keepdim=True).view(R.size(0),-1) 36 | 37 | def batch_outer_prod(vecs): 38 | #Input: NxD vectors 39 | #Output: NxDxD outer products 40 | N = vecs.size(0) 41 | D = vecs.size(1) 42 | return vecs.unsqueeze(2).expand(N,D,D)*vecs.unsqueeze(1).expand(N,D,D) 43 | 44 | def so3_log(R): 45 | # input: R 64x3x3 46 | # output: log(R) 64x3 47 | 48 | batch_size = R.size(0) 49 | 50 | # The rotation axis (not unit-length) is given by 51 | axes = R.new(batch_size, 3).zero_() 52 | 53 | axes[:, 0] = R[:, 2, 1] - R[:, 1, 2] 54 | axes[:, 1] = R[:, 0, 2] - R[:, 2, 0] 55 | axes[:, 2] = R[:, 1, 0] - R[:, 0, 1] 56 | 57 | # NOTE: clamp ensures that we don't get any nan's due to out of range numerical errors 58 | angles = torch.acos((0.5 * batch_trace(R) - 0.5).clamp(-1 + EPS, 1 - EPS)) 59 | # angles = torch.acos((0.5 * batch_trace(R) - 0.5).clamp(-1., 1.)) 60 | sin_angles = torch.sin(angles) 61 | 62 | # If angle is close to zero, use first-order Taylor expansion 63 | small_angles_mask = angles.lt(EPS).view(-1) 64 | small_angles_num = small_angles_mask.sum() 65 | # This tensor is used to extract the 3x3 R's that correspond to small angles 66 | small_angles_indices = small_angles_mask.nonzero().squeeze() 67 | 68 | large_angles_mask = small_angles_mask.logical_not() 69 | large_angles_num = large_angles_mask.sum() 70 | large_angles_indices = large_angles_mask.nonzero().squeeze() 71 | 72 | logs = R.new_empty(batch_size, 3) 73 | 74 | if small_angles_num > 0: 75 | I = R.new(3, 3).zero_() 76 | I[0, 0] = I[1, 1] = I[2, 2] = 1.0 77 | I = I.expand(small_angles_num, 3, 3) # I is now small_angles_numx3x3 78 | logs[small_angles_indices, :] = so3_vee(R[small_angles_indices] - I) 79 | 80 | if large_angles_num > 0: 81 | ax_sin = axes[large_angles_indices] / sin_angles[large_angles_indices].expand_as(axes[large_angles_indices]) 82 | logs[large_angles_indices, :] = 0.5 * angles[large_angles_indices].expand_as(ax_sin) * ax_sin 83 | 84 | return logs 85 | 86 | def so3_exp(phi): 87 | #input: phi Nx3 88 | #output: perturbation Nx3x3 89 | 90 | if phi.dim() < 2: 91 | phi = phi.unsqueeze(0) 92 | 93 | batch_size = phi.size(0) 94 | 95 | #Take the norms of each row 96 | angles = vec_norms(phi) 97 | 98 | I = phi.new(3, 3).zero_() 99 | I[0,0] = I[1,1] = I[2,2] = 1.0 100 | I = I.expand(batch_size, 3,3) #I is now num_samplesx3x3 101 | 102 | # If angle is close to zero, use first-order Taylor expansion 103 | small_angles_mask = angles.lt(EPS).view(-1) 104 | small_angles_num = small_angles_mask.sum() 105 | small_angles_indices = small_angles_mask.nonzero().squeeze() 106 | 107 | if small_angles_num == batch_size: 108 | #Taylor expansion 109 | I = I.expand(batch_size, 3,3) 110 | phi_w = so3_wedge(phi) 111 | return I + phi_w 112 | 113 | axes = phi / angles.expand(batch_size, 3) 114 | s = torch.sin(angles) 115 | c = torch.cos(angles) 116 | 117 | #This piece of magic multiplies each 3x3 matrix by each element in c 118 | 119 | c = c.view(-1,1,1).expand_as(I) 120 | s = s.view(-1,1,1).expand_as(I) 121 | 122 | outer_prod_axes = batch_outer_prod(axes) 123 | R = c * I + (1 - c) * outer_prod_axes + s * so3_wedge(axes) 124 | 125 | if 0 < small_angles_num < batch_size: 126 | I_small = I.expand(small_angles_num, 3,3) 127 | phi_w = so3_wedge(phi[small_angles_indices]) 128 | small_exp = I + phi_w 129 | R[small_angles_indices] = small_exp 130 | 131 | 132 | return R 133 | 134 | 135 | def vec_norms(input): 136 | #Takes Nx3 tensor of row vectors an outputs Nx1 tensor with 2-norms of each row 137 | norms = input.pow(2).sum(dim=1, keepdim=True).sqrt() 138 | return norms 139 | 140 | def vec_square_norms(input): 141 | #Takes Nx3 tensor of row vectors an outputs Nx1 tensor with squared 2-norms of each row 142 | sq_norms = input.pow(2).sum(dim=1, keepdim=True) 143 | return sq_norms 144 | 145 | def so3_inv_left_jacobian(phi): 146 | """Inverse left SO(3) Jacobian (see Barfoot). 147 | """ 148 | if phi.dim() < 2: 149 | phi = phi.unsqueeze(0) 150 | 151 | invJ = phi.new_empty(phi.shape[0], 3, 3) 152 | 153 | angles = vec_norms(phi) 154 | I = phi.new(3, 3).zero_() 155 | I[0, 0] = I[1, 1] = I[2, 2] = 1.0 156 | batch_size = phi.size(0) 157 | 158 | # If angle is close to zero, use first-order Taylor expansion 159 | small_angles_mask = angles.lt(EPS).view(-1) 160 | small_angles_num = small_angles_mask.sum() 161 | small_angles_indices = small_angles_mask.nonzero().squeeze() 162 | 163 | large_angles_mask = small_angles_mask.logical_not() 164 | large_angles_num = large_angles_mask.sum() 165 | large_angles_indices = large_angles_mask.nonzero().squeeze() 166 | 167 | if small_angles_num > 0: 168 | I_small = I.expand(small_angles_num, 3, 3) 169 | small_invJ = I_small - 0.5 * so3_wedge(phi[small_angles_indices]) 170 | invJ[small_angles_indices] = small_invJ 171 | 172 | if large_angles_num > 0: 173 | angles_large = angles[large_angles_indices] 174 | if angles_large.dim() < 2: 175 | angles_large = angles_large.unsqueeze(0) 176 | 177 | phi_large = phi[large_angles_indices] 178 | if phi_large.dim() < 2: 179 | phi_large = phi_large.unsqueeze(0) 180 | 181 | axes = phi_large / angles_large.expand(large_angles_num, 3) 182 | half_angles = 0.5 * angles_large 183 | cot_half_angles = 1. / torch.tan(half_angles) 184 | I_full = I.expand(large_angles_num, 3, 3) # I is now num_samplesx3x3 185 | 186 | # Compute outer products of the Nx3 axes vectors, and put them into a Nx3x3 tensor 187 | outer_prod_axes = batch_outer_prod(axes) 188 | 189 | # This piece of magic changes the vector so that it can multiply each 3x3 matrix of a Nx3x3 tensor 190 | h_a_cot_a = (half_angles * cot_half_angles).view(-1, 1, 1).expand_as(I_full) 191 | h_a = half_angles.view(-1, 1, 1).expand_as(I_full) 192 | invJ[large_angles_indices] = h_a_cot_a * I_full + (1 - h_a_cot_a) * outer_prod_axes - (h_a * so3_wedge(axes)) 193 | 194 | return invJ 195 | 196 | def so3_left_jacobian(phi): 197 | """Inverse left SO(3) Jacobian (see Barfoot). 198 | """ 199 | 200 | angles = vec_norms(phi) 201 | I = phi.new(3, 3).zero_() 202 | I[0,0] = I[1,1] = I[2,2] = 1.0 203 | batch_size = phi.size(0) 204 | 205 | # If angle is close to zero, use first-order Taylor expansion 206 | small_angles_mask = angles.lt(EPS).view(-1) 207 | small_angles_num = small_angles_mask.sum() 208 | small_angles_indices = small_angles_mask.nonzero().squeeze() 209 | 210 | if small_angles_num == batch_size: 211 | I = I.expand(batch_size, 3,3) 212 | return I + 0.5*so3_wedge(phi) 213 | 214 | axes = phi / angles.expand(batch_size, 3) 215 | #Compute outer products of the Nx3 axes vectors, and put them into a Nx3x3 tensor 216 | outer_prod_axes = batch_outer_prod(axes) 217 | 218 | sin_ph = torch.sin(angles) 219 | cos_ph = torch.cos(angles) 220 | 221 | I_full = I.expand(batch_size, 3,3) 222 | t1 = sin_ph/angles 223 | t2 = 1 - t1 224 | t3 = (1 - cos_ph)/angles 225 | 226 | t1 = t1.view(-1,1,1).expand_as(I_full) 227 | t2 = t2.view(-1,1,1).expand_as(I_full) 228 | t3 = t3.view(-1,1,1).expand_as(I_full) 229 | 230 | J = t1 * I_full + t2 * outer_prod_axes + t3 * so3_wedge(axes) 231 | 232 | if 0 < small_angles_num < batch_size: 233 | I_small = I.expand(small_angles_num, 3,3) 234 | small_J = I_small + 0.5*so3_wedge(phi[small_angles_indices]) 235 | J[small_angles_indices] = small_J 236 | 237 | return J 238 | 239 | 240 | 241 | def so3_to_rpy(rot): 242 | """Convert a rotation matrix to RPY Euler angles.""" 243 | #Nx3x3 -> 3xN 244 | 245 | PI = 3.14159265358979323846 246 | 247 | pitch = torch.atan2(-rot[:, 2, 0], 248 | torch.sqrt(rot[:, 0, 0]**2 + rot[:, 1, 0]**2)) 249 | 250 | sec_pitch = 1. / torch.cos(pitch) 251 | yaw = torch.atan2(rot[:, 1, 0] * sec_pitch, 252 | rot[:, 0, 0] * sec_pitch) 253 | roll = torch.atan2(rot[:, 2, 1] * sec_pitch, 254 | rot[:, 2, 2] * sec_pitch) 255 | 256 | pospi2_mask = torch.abs(pitch - PI/2.0).lt(EPS).view(-1) 257 | pospi2_angles_num = pospi2_mask.sum() 258 | pospi2_indices = pospi2_mask.nonzero().squeeze() 259 | 260 | negpi2_mask = torch.abs(pitch + PI/2.0).lt(EPS).view(-1) 261 | negpi2_angles_num = negpi2_mask.sum() 262 | negpi2_indices = negpi2_mask.nonzero().squeeze() 263 | 264 | if pospi2_angles_num > 0: 265 | yaw[pospi2_indices] = 0. 266 | roll[pospi2_indices] = torch.atan2(rot[pospi2_indices, 0, 1], rot[pospi2_indices, 1, 1]) 267 | 268 | if negpi2_angles_num > 0: 269 | yaw[pospi2_indices] = 0. 270 | roll[pospi2_indices] = -torch.atan2(rot[pospi2_indices, 0, 1], rot[pospi2_indices, 1, 1]) 271 | 272 | 273 | return torch.cat((roll.view(-1,1), pitch.view(-1,1), yaw.view(-1,1)), 1) 274 | 275 | def rpy_to_so3(rpy): 276 | """Convert RPY Euler angles to a rotation matrix.""" 277 | #3xN -> Nx3x3 278 | roll = rpy[:,0].view(-1,1,1) 279 | pitch = rpy[:,1].view(-1,1,1) 280 | yaw = rpy[:,2].view(-1,1,1) 281 | 282 | c_r = torch.cos(roll) 283 | s_r = torch.sin(roll) 284 | 285 | c_p = torch.cos(pitch) 286 | s_p = torch.sin(pitch) 287 | 288 | c_y = torch.cos(yaw) 289 | s_y = torch.sin(yaw) 290 | 291 | rotz = rpy.new(rpy.size(0), 3, 3).zero_() 292 | rotz[:,2,2] = 1.0 293 | rotz[:,0,0] = rotz[:,1,1] = c_y 294 | rotz[:,0,1] = -s_y 295 | rotz[:,1,0] = s_y 296 | 297 | roty = rpy.new(rpy.size(0), 3, 3).zero_() 298 | roty[:,1,1] = 1.0 299 | roty[:,0,0] = roty[:,2,2] = c_p 300 | roty[:,0,2] = s_p 301 | roty[:,2,0] = -s_p 302 | 303 | rotx = rpy.new(rpy.size(0), 3, 3).zero_() 304 | rotx[:,0,0] = 1.0 305 | rotx[:,1,1] = rotz[:,2,2] = c_r 306 | rotx[:,1,2] = -s_r 307 | rotx[:,2,1] = s_r 308 | 309 | return rotz.bmm(roty.bmm(rotx)) 310 | 311 | #================================SE(3)====================================# 312 | def se3_wedge(xi): 313 | 314 | if xi.dim() < 2: 315 | xi = xi.unsqueeze(0) 316 | 317 | #Returns Nx4x4 tensor with each 1x6 row vector in xi SE(3) wedge'd 318 | Xi = xi.new(xi.size(0), 4, 4).zero_() 319 | rho = xi[:, :3] 320 | phi = xi[:, 3:] 321 | Phi = so3_wedge(phi) 322 | 323 | Xi[:, :3, :3] = Phi 324 | Xi[:, :3, 3] = rho 325 | 326 | return Xi 327 | 328 | def se3_curly_wedge(xi): 329 | #Returns Nx4x4 tensor with each 1x6 row vector in xi SE(3) curly wedge'd 330 | Xi = xi.new(xi.size(0), 6, 6).zero_() 331 | rho = xi[:, 0:3] 332 | phi = xi[:, 3:6] 333 | Phi = so3_wedge(phi) 334 | Rho = so3_wedge(rho) 335 | 336 | Xi[:, 0:3, 0:3] = Phi 337 | Xi[:, 0:3, 3:6] = Rho 338 | Xi[:, 3:6, 3:6] = Phi 339 | 340 | return Xi 341 | 342 | def se3_log(T): 343 | 344 | """Logarithmic map for SE(3) 345 | Computes a SE(3) tangent vector from a transformation 346 | matrix. 347 | This is the inverse operation to exp 348 | #input: T Nx4x4 349 | #output: log(T) Nx6 350 | """ 351 | if T.dim() < 3: 352 | T = T.unsqueeze(0) 353 | 354 | R = T[:,0:3,0:3] 355 | t = T[:,0:3,3:4] 356 | sample_size = t.size(0) 357 | phi = so3_log(R) 358 | invl_js = so3_inv_left_jacobian(phi) 359 | rho = (invl_js.bmm(t)).view(sample_size, 3) 360 | xi = torch.cat((rho, phi), 1) 361 | 362 | return xi 363 | 364 | def se3_exp(xi): 365 | #input: xi Nx6 366 | #output: T Nx4x4 367 | #New efficient way without having to compute Q! 368 | 369 | if xi.dim() < 2: 370 | xi = xi.unsqueeze(0) 371 | 372 | batch_size = xi.size(0) 373 | phi = vec_norms(xi[:, 3:6]) 374 | 375 | I = xi.new(4, 4).zero_() 376 | I[0,0] = I[1,1] = I[2,2] = I[3,3] = 1.0 377 | 378 | # If angle is close to zero, use first-order Taylor expansion 379 | small_angles_mask = phi.lt(EPS).view(-1) 380 | small_angles_num = small_angles_mask.sum() 381 | small_angles_indices = small_angles_mask.nonzero().squeeze() 382 | 383 | 384 | if small_angles_num == batch_size: 385 | #Taylor expansion 386 | I = I.expand(batch_size, 4,4) 387 | xi_w = se3_wedge(xi) 388 | return I + xi_w 389 | 390 | xi_w = se3_wedge(xi) 391 | xi_w2 = xi_w.bmm(xi_w) 392 | xi_w3 = xi_w2.bmm(xi_w) 393 | 394 | phi2 = phi.pow(2) 395 | phi3 = phi.pow(3) 396 | 397 | cos_phi = torch.cos(phi) 398 | sin_phi = torch.sin(phi) 399 | 400 | I_full = I.expand(batch_size, 4,4) 401 | t2 = (1 - cos_phi)/phi2 402 | t3 = (phi - sin_phi)/phi3 403 | t2 = t2.view(-1,1,1).expand_as(I_full) 404 | t3 = t3.view(-1,1,1).expand_as(I_full) 405 | 406 | T = I_full + xi_w + t2*xi_w2 + t3*xi_w3 407 | 408 | if 0 < small_angles_num < batch_size: 409 | I_small = I.expand(small_angles_num, 4,4) 410 | xi_w = se3_wedge(xi[small_angles_indices]) 411 | small_exp = I + xi_w 412 | T[small_angles_indices] = small_exp 413 | 414 | return T 415 | 416 | 417 | def se3_inv(T): 418 | 419 | if T.dim() < 3: 420 | T = T.unsqueeze(0) 421 | 422 | #Batch invert Nx4x4 SE(3) matrices 423 | Rt = T[:, 0:3, 0:3].transpose(1,2) 424 | t = T[:, 0:3, 3:4] 425 | 426 | T_inv = T.clone() 427 | #T_inv = T.new(T.size()).zero_() 428 | #T_inv[:,3,3] = 1.0 429 | 430 | T_inv[:, 0:3, 0:3] = Rt 431 | T_inv[:, 0:3, 3:4] = -Rt.bmm(t) 432 | 433 | return T_inv 434 | 435 | def se3_Q(rho, phi): 436 | #SE(3) Q function 437 | #Used in the SE(3) jacobians 438 | #See b 439 | 440 | ph = vec_norms(phi) 441 | 442 | ph_test = phi.norm(p=2, dim=1) 443 | 444 | ph2 = ph*ph 445 | ph3 = ph2*ph 446 | ph4 = ph3*ph 447 | ph5 = ph4*ph 448 | 449 | rx = so3_wedge(rho) 450 | px = so3_wedge(phi) 451 | 452 | #Turn Nx1 into a Nx3x3 (with each 3x3 having 9 identical numbers) 453 | cph = torch.cos(ph).view(-1,1,1).expand_as(rx) 454 | sph = torch.sin(ph).view(-1,1,1).expand_as(rx) 455 | ph = ph.view(-1,1,1).expand_as(rx) 456 | ph2 = ph2.view(-1,1,1).expand_as(rx) 457 | ph3 = ph3.view(-1,1,1).expand_as(rx) 458 | ph4 = ph4.view(-1,1,1).expand_as(rx) 459 | ph5 = ph5.view(-1,1,1).expand_as(rx) 460 | 461 | m1 = 0.5 462 | m2 = (ph - sph)/ph3 463 | m3 = (ph2 + 2. * cph - 2.)/(2.*ph4) 464 | m4 = (2.*ph - 3.*sph + ph*cph)/(2.*ph5) 465 | 466 | t1 = m1 * rx 467 | t2 = m2 * (px.bmm(rx) + rx.bmm(px) + px.bmm(rx).bmm(px)) 468 | t3 = m3 * (px.bmm(px).bmm(rx) + rx.bmm(px).bmm(px) - 3. * px.bmm(rx).bmm(px)) 469 | t4 = m4 * (px.bmm(rx).bmm(px).bmm(px) + px.bmm(px).bmm(rx).bmm(px)) 470 | 471 | Q = t1 + t2 + t3 + t4 472 | 473 | return Q 474 | 475 | def se3_left_jacobian(xi): 476 | """Computes SE(3) left jacobian of N xi vectors (arranged into NxD tensor)""" 477 | rho = xi[:, 0:3] 478 | phi = xi[:, 3:6] 479 | 480 | batch_size = xi.size(0) 481 | angles = vec_norms(xi[:, 3:6]) 482 | 483 | # If angle is close to zero, use first-order Taylor expansion 484 | small_angles_mask = angles.lt(EPS).view(-1) 485 | small_angles_num = small_angles_mask.sum() 486 | small_angles_indices = small_angles_mask.nonzero().squeeze() 487 | 488 | if small_angles_num == batch_size: 489 | #Taylor expansion 490 | I = xi.new(6, 6).zero_() 491 | I[0,0] = I[1,1] = I[2,2] = I[3,3] = I[4,4] = I[5,5] = 1.0 492 | I = I.expand(batch_size, 6,6) 493 | return I + 0.5*se3_curly_wedge(xi) 494 | 495 | 496 | J = so3_left_jacobian(phi) 497 | Q = se3_Q(rho, phi) 498 | zero_mat = xi.new(3, 3).zero_().expand(batch_size, 3, 3) 499 | 500 | upper_rows = torch.cat((J, Q), 2) 501 | lower_rows = torch.cat((zero_mat, J), 2) 502 | 503 | J = torch.cat((upper_rows, lower_rows), 1) 504 | 505 | if 0 < small_angles_num < batch_size: 506 | I = xi.new(6, 6).zero_() 507 | I[0,0] = I[1,1] = I[2,2] = I[3,3] = I[4,4] = I[5,5] = 1.0 508 | I = I.expand(small_angles_num, 6,6) 509 | small_J = I + 0.5*se3_curly_wedge(xi[small_angles_indices]) 510 | J[small_angles_indices] = small_J 511 | 512 | return J 513 | 514 | 515 | 516 | def se3_inv_left_jacobian(xi): 517 | """Computes SE(3) inverse left jacobian of N xi vectors (arranged into NxD tensor)""" 518 | rho = xi[:, 0:3] 519 | phi = xi[:, 3:6] 520 | 521 | batch_size = xi.size(0) 522 | angles = vec_norms(xi[:, 3:6]) 523 | 524 | # If angle is close to zero, use first-order Taylor expansion 525 | small_angles_mask = angles.lt(EPS).view(-1) 526 | small_angles_num = small_angles_mask.sum() 527 | small_angles_indices = small_angles_mask.nonzero().squeeze() 528 | 529 | if small_angles_num == batch_size: 530 | #Taylor expansion 531 | I = xi.new(6, 6).zero_() 532 | I[0,0] = I[1,1] = I[2,2] = I[3,3] = I[4,4] = I[5,5] = 1.0 533 | I = I.expand(batch_size, 6,6) 534 | return I - 0.5*se3_curly_wedge(xi) 535 | 536 | invl_j = so3_inv_left_jacobian(phi) 537 | Q = se3_Q(rho, phi) 538 | zero_mat = xi.new(3, 3).zero_().expand(batch_size, 3, 3) 539 | 540 | upper_rows = torch.cat((invl_j, -invl_j.bmm(Q).bmm(invl_j)), 2) 541 | lower_rows = torch.cat((zero_mat, invl_j), 2) 542 | 543 | inv_J = torch.cat((upper_rows, lower_rows), 1) 544 | 545 | if 0 < small_angles_num < batch_size: 546 | I = xi.new(6, 6).zero_() 547 | I[0,0] = I[1,1] = I[2,2] = I[3,3] = I[4,4] = I[5,5] = 1.0 548 | I = I.expand(small_angles_num, 6,6) 549 | small_inv_J = I - 0.5*se3_curly_wedge(xi[small_angles_indices]) 550 | inv_J[small_angles_indices] = small_inv_J 551 | 552 | 553 | return inv_J 554 | 555 | 556 | def se3_adjoint(T): 557 | if T.dim() < 2: 558 | T = T.unsqueeze(dim=0) 559 | 560 | C = T[:, :3, :3] 561 | Jrho_wedge = so3_wedge(T[:, :3, 3].view(-1, 3)) 562 | 563 | adj_T = T.new_zeros((T.shape[0], 6,6)) 564 | adj_T[:, :3, :3] = C 565 | adj_T[:, :3, 3:] = Jrho_wedge.bmm(C) 566 | adj_T[:, 3:, 3:] = C 567 | 568 | return adj_T 569 | 570 | def se3_inv_adjoint(T): 571 | if T.dim() < 2: 572 | T = T.unsqueeze(dim=0) 573 | 574 | C = T[:, :3, :3] 575 | C_T = torch.transpose(C, 1, 2) 576 | Jrho_wedge = so3_wedge(T[:, :3, 3].view(-1, 3)) 577 | 578 | inv_adj_T = T.new_zeros((T.shape[0], 6,6)) 579 | inv_adj_T[:, :3, :3] = C_T 580 | inv_adj_T[:, :3, 3:] = -C_T.bmm(Jrho_wedge) 581 | inv_adj_T[:, 3:, 3:] = C_T 582 | 583 | return inv_adj_T 584 | -------------------------------------------------------------------------------- /src/utils/statistics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Statistics: 4 | """ 5 | A class for keeping track of poses, errors, and losses. 6 | """ 7 | def __init__(self): 8 | self.epoch_losses = {} 9 | self.epoch_errors = np.empty(0) 10 | self.outputs_se3 = {} 11 | self.targets_se3 = {} 12 | self.outputs_log = {} 13 | self.targets_log = {} 14 | self.live_run_ids = [] 15 | self.map_run_id = None 16 | self.sample_ids = {} 17 | 18 | def get_epoch_stats(self): 19 | return self.epoch_losses, self.epoch_errors 20 | 21 | def get_outputs_se3(self): 22 | return self.outputs_se3 23 | 24 | def get_targets_se3(self): 25 | return self.targets_se3 26 | 27 | def get_outputs_log(self): 28 | return self.outputs_log 29 | 30 | def get_targets_log(self): 31 | return self.targets_log 32 | 33 | def get_sample_ids(self): 34 | return self.sample_ids 35 | 36 | def get_live_run_ids(self): 37 | return self.live_run_ids 38 | 39 | def get_map_run_id(self): 40 | return self.map_run_id 41 | 42 | def add_epoch_stats(self, losses, errors): 43 | for loss_type in losses.keys(): 44 | if loss_type in self.epoch_losses: 45 | self.epoch_losses[loss_type] += [losses[loss_type]] 46 | else: 47 | self.epoch_losses[loss_type] = [losses[loss_type]] 48 | 49 | if self.epoch_errors.shape[0] == 0: 50 | self.epoch_errors = errors 51 | else: 52 | self.epoch_errors = np.concatenate((self.epoch_errors, errors), axis=0) 53 | 54 | def add_outputs_targets_se3(self, live_run_id, output, target): 55 | if live_run_id not in self.outputs_se3.keys(): 56 | self.outputs_se3[live_run_id] = [output] 57 | self.targets_se3[live_run_id] = [target] 58 | 59 | self.outputs_se3[live_run_id] = self.outputs_se3[live_run_id] + [output] 60 | self.targets_se3[live_run_id] = self.targets_se3[live_run_id] + [target] 61 | 62 | def add_outputs_targets_log(self, live_run_id, outputs, targets): 63 | num_dof = outputs.shape[0] 64 | 65 | if live_run_id not in self.outputs_log.keys(): 66 | self.outputs_log[live_run_id] = np.zeros((1, num_dof)) 67 | self.targets_log[live_run_id] = np.zeros((1, num_dof)) 68 | 69 | self.outputs_log[live_run_id] = np.concatenate((self.outputs_log[live_run_id], outputs.reshape(1, num_dof)), axis=0) 70 | self.targets_log[live_run_id] = np.concatenate((self.targets_log[live_run_id], targets.reshape(1, num_dof)), axis=0) 71 | 72 | def add_sample_id(self, live_run_id, sample_id): 73 | if live_run_id not in self.sample_ids.keys(): 74 | self.sample_ids[live_run_id] = [sample_id] 75 | else: 76 | self.sample_ids[live_run_id] += [sample_id] 77 | 78 | def add_live_run_id(self, live_run_id): 79 | if live_run_id not in self.live_run_ids: 80 | self.live_run_ids.append(live_run_id) 81 | 82 | def set_map_run_id(self, map_run_id): 83 | if self.map_run_id is None: 84 | self.map_run_id = map_run_id 85 | -------------------------------------------------------------------------------- /src/utils/stereo_camera_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class StereoCameraModel(nn.Module): 6 | """ 7 | The stereo camera model. 8 | """ 9 | 10 | def __init__(self, cu, cv, f, b): 11 | """ 12 | Set up the stereo camera model with the stereo camera parameters. 13 | 14 | Args: 15 | cu (float): optical centre u coordinate. 16 | cv (float): optical centre v coordinate. 17 | f (float): focal length. 18 | b (float): stereo camera base line. 19 | """ 20 | super(StereoCameraModel, self).__init__() 21 | 22 | self.cu = cu 23 | self.cv = cv 24 | self.f = f 25 | self.b = b 26 | 27 | # Matrices for camera model needed to projecting/reprojecting between the camera and image frames 28 | M, Q = self.set_camera_model_matrices(self.cu, self.cv, self.f, self.b) 29 | self.register_buffer('M', M) 30 | self.register_buffer('Q', Q) 31 | 32 | def set_camera_model_matrices(self, cu, cv, f, b): 33 | """ 34 | Create the matrices needed to project 3D camera coordinates into 2D image coordinates (M) and compute 35 | 3D camera coordinates from 2D image coordinates (Q). 36 | 37 | Args: 38 | cu (float): optical centre u coordinate. 39 | cv (float): optical centre v coordinate. 40 | f (float): focal length. 41 | b (float): stereo camera base line. 42 | 43 | Returns: 44 | M (torch.tensor): matrix used to project 3D camera coordinates to 2D image coordinates, (4x4). 45 | Q (torch.tensor): matrix used to compute 3D camera coordinates from 2D image coordinates, (4x4). 46 | """ 47 | # Matrix needed to project 3D points into stereo camera coordinates. 48 | # [ul, vl, ur, vr]^T = (1/z) * M * [x, y, z, 1]^T (using left camera model) 49 | # 50 | # [f, 0, cu, 0] 51 | # [0, f, cv, 0] 52 | # [f, 0, cu, -f * b] 53 | # [0, f, cv, 0] 54 | # 55 | M = torch.tensor([[self.f, 0.0, self.cu, 0.0], 56 | [0.0, self.f, self.cv, 0.0], 57 | [self.f, 0.0, self.cu, -(self.f * self.b)], 58 | [0.0, self.f, self.cv, 0.0]]) 59 | 60 | # Matrix needed to transform image coordinates into 3D points. 61 | # [x, y, z, 1] = (1/d) * Q * [ul, vl, d, 1]^T 62 | # 63 | # [b, 0, 0, -b * cu] 64 | # [0, b, 0, -b * cv] 65 | # [0, 0, 0, f * b] 66 | # [0, 0, 1, 0] 67 | # 68 | Q = torch.tensor([[self.b, 0.0, 0.0, -(self.b * self.cu)], 69 | [0.0, self.b, 0.0, -(self.b * self.cv)], 70 | [0.0, 0.0, 0.0, self.f * self.b], 71 | [0.0, 0.0, 1.0, 0.0]]) 72 | 73 | return M, Q 74 | 75 | def normalize_coords(self, coords_2D, batch_size, height, width): 76 | """ 77 | Normalize 2D keypoint coordinates to lie in the range [-1, 1]. 78 | 79 | Args: 80 | coords_2D (torch.tensor): 2D image coordinates store in order (u, v), (Bx2xN). 81 | batch_size (int): batch size. 82 | height (int): image height. 83 | width (int): image width. 84 | 85 | Returns: 86 | coords_2D_norm (torch.tensor): coordinates normalized to range [-1, 1] and stored in order (u, v), (BxNx2). 87 | """ 88 | u_norm = (2 * coords_2D[:, 0, :].reshape(batch_size, -1) / (width - 1)) - 1 89 | v_norm = (2 * coords_2D[:, 1, :].reshape(batch_size, -1) / (height - 1)) - 1 90 | 91 | return torch.stack([u_norm, v_norm], dim=2) 92 | 93 | def check_valid_disparity(self, disparity): 94 | """ 95 | Check if disparity falls withing our chosen accepted range (0.1m - 400.0m). 96 | 97 | Args: 98 | disparity (torch.tensor): the disparity map for the image, (Bx1xN). 99 | 100 | Returns: 101 | valid_disparity (torch.tensor): True for valid disparity values, False otherwise. 102 | """ 103 | disp_min = (self.f * self.b) / 400.0 # Farthest point 400 m 104 | disp_max = (self.f * self.b) / 0.1 # Closest point 0.1 m 105 | 106 | return (disparity >= disp_min) & (disparity <= disp_max) 107 | 108 | def camera_to_image(self, cam_coords, M): 109 | """ 110 | Project 3D points given in the camera frame into 2D image coordinates. 111 | 112 | Args: 113 | cam_coords (torch.tensor): 3D camera coordinates given as homogeneous coordinates, (Bx4xN). 114 | M (torch.tensor): matrix for projecting points into image, (Bx4x4). 115 | 116 | Returns: 117 | img_coords (torch.tensor): 2D image coordinates given in order (ul, vl, ur, vr), (Bx4xN). 118 | """ 119 | batch_size, _, num_points = cam_coords.size() 120 | 121 | # [Ul, Vl, Ur, Vr] = M * [x, y, z, 1]^T 122 | img_coords = M.bmm(cam_coords) 123 | 124 | inv_z = 1.0 / (cam_coords[:, 2, :].reshape(batch_size, 1, num_points)) # Bx1xN, elementwise division. 125 | img_coords = img_coords * inv_z # Bx4xN, elementwise multiplication. 126 | 127 | return img_coords 128 | 129 | def camera_model(self, cam_coords): 130 | """ 131 | Project 3D points given in the camera frame into image coordinates. 132 | 133 | Args: 134 | cam_coords (torch.tensor): 3D camera coordinates given as homogeneous coordinates, (Bx4xN). 135 | 136 | Returns: 137 | img_coords (torch.tensor): 2D image coordinates given in order (ul, vl, ur, vr), (Bx4xN). 138 | """ 139 | batch_size = cam_coords.size(0) 140 | 141 | # Expand fixed matrix to the correct batch size. 142 | M = self.M.expand(batch_size, 4, 4).cuda() 143 | 144 | # Get the image coordinates. 145 | img_coords = self.camera_to_image(cam_coords, M) 146 | 147 | if (torch.sum(torch.isnan(img_coords)) > 0) or (torch.sum(torch.isinf(img_coords)) > 0): 148 | print('Warning: Nan or Inf values in image coordinate tensor.') 149 | raise ValueError("Nan or Inf in image coordinates") 150 | 151 | return img_coords 152 | 153 | def image_to_camera(self, img_coords, disparity, Q): 154 | """ 155 | Compute 3D point coordinates (in the camera frame) for the given 2D image coordinates. 156 | 157 | Args: 158 | img_coords (torch.tensor): 2D image coordinates given in order (u, v), (Bx2xN). 159 | disparity (torch.tensor): the disparity map for the image, (BxHxW). 160 | Q (torch.tensor): matrix for reprojecting image coordinates, (Bx4x4). 161 | 162 | Returns: 163 | cam_coords (torch.tensor): 3D points in camera frame given as homogeneous coordinates, (Bx4xN). 164 | valid_points (torch.tensor): value 1 for each valid 3D point with depth in accepted range, otherwise 0, 165 | (Bx1xN). 166 | """ 167 | batch_size, height, width = disparity.size() 168 | disparity = disparity.unsqueeze(1) 169 | num_points = img_coords.size(2) 170 | 171 | if (torch.sum(disparity == 0.0) > 0): 172 | print('Warning: 0.0 in disparities.') 173 | 174 | # Sample disparity for the image coordinates. 175 | img_coords_norm = self.normalize_coords(img_coords, batch_size, height, width).unsqueeze(1) # Bx1xNx2 176 | 177 | point_disparities = F.grid_sample(disparity, img_coords_norm, mode='nearest', padding_mode='border') # Bx1x1xN 178 | point_disparities = point_disparities.reshape(batch_size, 1, num_points) # Bxx1xN 179 | valid_points = self.check_valid_disparity(point_disparities) # Bx1xN 180 | 181 | # Create the [ul, vl, d, 1] vector 182 | ones = torch.ones(batch_size, num_points).type_as(disparity) 183 | uvd1_pixel_coords = torch.stack((img_coords[:, 0, :], 184 | img_coords[:, 1, :], 185 | point_disparities[:, 0, :], 186 | ones), 187 | dim=1) # Bx4xN 188 | 189 | # [X, Y, Z, d]^T = Q * [ul, vl, d, 1]^T 190 | cam_coords = Q.bmm(uvd1_pixel_coords) # Bx4xN 191 | 192 | # [x, y, z, 1]^T = (1/d) * [X, Y, Z, d]^T 193 | inv_disparity = (1.0 / point_disparities) # Elementwise division 194 | cam_coords = cam_coords * inv_disparity # Elementwise multiplication 195 | 196 | return cam_coords, valid_points 197 | 198 | def inverse_camera_model(self, img_coords, disparity): 199 | """ 200 | Compute 3D point coordinates (in the camera frame) from the given 2D image coordinates. 201 | 202 | Args: 203 | img_coords (torch.tensor): 2D image coordinates given in order (u, v), (Bx2xN). 204 | disparity (torch.tensor): the disparity map for the image, (BxHxW). 205 | 206 | Returns: 207 | cam_coords (torch.tensor): 3D points in camera frame given as homogeneous coordinates, (Bx4xN). 208 | valid_points (torch.tensor): value 1 for each valid 3D point with depth in accepted range, otherwise 0, 209 | (Bx1xN). 210 | """ 211 | batch_size, height, width = disparity.size() 212 | 213 | # Expand fixed matrix to the correct batch size 214 | Q = self.Q.expand(batch_size, 4, 4).cuda() 215 | 216 | # Get the 3D camera coordinates. 217 | cam_coords, valid_points = self.image_to_camera(img_coords, disparity, Q) 218 | 219 | if (torch.sum(torch.isnan(cam_coords)) > 0) or (torch.sum(torch.isinf(cam_coords)) > 0): 220 | print('Warning: Nan or Inf values in camera coordinate tensor.') 221 | raise ValueError("Nan or Inf in camera coordinates") 222 | 223 | return cam_coords, valid_points 224 | 225 | 226 | 227 | -------------------------------------------------------------------------------- /src/utils/transform.py: -------------------------------------------------------------------------------- 1 | """Code to handle pose transforms. Some overlap with code in 2 | https://github.com/utiasASRL/vtr-dataset-tools/blob/master/transform.py. 3 | """ 4 | 5 | import numpy as np 6 | 7 | class Transform: 8 | def __init__(self, C_ba, r_ab_inb): 9 | self.C_ba = C_ba 10 | self.r_ab_inb = r_ab_inb 11 | 12 | def __mul__(self, other): 13 | if other.__class__ == Transform: 14 | return Transform(self.C_ba.dot(other.C_ba), self.r_ab_inb + self.C_ba.dot(other.r_ab_inb)) 15 | elif other.__class__ == np.ndarray: 16 | if other.shape in [(3, 1), (3,)]: 17 | return self.C_ba.dot(other) + self.r_ab_inb 18 | elif other.shape == (1, 3): 19 | return self.C_ba.dot(other.T) + self.r_ab_inb 20 | else: 21 | raise NotImplementedError("Cannot multiply with array of shape " + str(other.shape)) 22 | else: 23 | raise NotImplementedError("Cannot multiply with type " + str(other.__class__)) 24 | 25 | def inv(self): 26 | return Transform(self.C_ba.T, -self.C_ba.T.dot(self.r_ab_inb)) 27 | 28 | @property 29 | def matrix(self): 30 | rval = np.eye(4) 31 | rval[:3, :3] = self.C_ba 32 | rval[:3, 3] = self.r_ab_inb 33 | 34 | return rval 35 | 36 | def __repr__(self): 37 | return str(self.matrix) 38 | 39 | @property 40 | def phi(self): 41 | # Get angle 42 | phi_ba = np.arccos(0.5*(self.C_ba.trace()-1.0)) 43 | sinphi_ba = np.sin(phi_ba) 44 | 45 | if abs(sinphi_ba) > 1e-9: 46 | 47 | # General case, angle is NOT near 0, pi, or 2*pi 48 | axis = np.array([self.C_ba[2, 1] - self.C_ba[1, 2], 49 | self.C_ba[0, 2] - self.C_ba[2, 0], 50 | self.C_ba[1, 0] - self.C_ba[0, 1]]) 51 | return (0.5*phi_ba/sinphi_ba)*axis 52 | 53 | elif abs(phi_ba) > 1e-9: 54 | # Angle is near pi or 2*pi 55 | # ** Note with this method we do not know the sign of 'phi', however since we know phi is 56 | # close to pi or 2*pi, the sign is unimportant.. 57 | 58 | # Find the eigenvalues and eigenvectors 59 | y, v = np.linalg.eig(self.C_ba) 60 | 61 | # Try each eigenvalue 62 | for i in range(3): 63 | # Check if eigen value is near +1.0 64 | if abs(y[i] - 1.0) < 1e-6: 65 | # Get corresponding angle-axis 66 | return phi_ba*v[i] 67 | 68 | # Runtime error 69 | raise RuntimeError("so3 logarithmic map failed to find an axis-angle, " 70 | "angle was near pi, or 2*pi, but no eigenvalues were near 1") 71 | 72 | else: 73 | # Angle is near zero 74 | return np.zeros((3,)) 75 | 76 | @staticmethod 77 | def hat(v): 78 | if len(v) == 4: v = v[:3] / v[3] 79 | skv = np.roll(np.roll(np.diag(v.flatten()), 1, 1), -1, 0) 80 | return skv - skv.T 81 | # return np.array([[ 0.0, -v[2], v[1]], 82 | # [ v[2], 0.0, -v[0]], 83 | # [-v[1], v[0], 0.0]]) 84 | 85 | @staticmethod 86 | def SO3JacInv(aaxis_ba): 87 | ## separate the angle/axis 88 | angle_ba = np.linalg.norm(aaxis_ba) 89 | axis_ba = aaxis_ba / angle_ba 90 | 91 | # if its small then return identity 92 | if angle_ba < 1e-12: 93 | return np.eye(3, 3) 94 | 95 | halfphi = 0.5 * angle_ba 96 | cotanTerm = halfphi / np.tan(halfphi) 97 | 98 | ## jacinv = phi/2 * cot(phi/2)eye + (1-phi/2*cot(phi/2))aa^T - phi/2 * a^ 99 | jacinv = np.eye(3, 3) * cotanTerm + (1.0 - cotanTerm) * \ 100 | axis_ba * axis_ba.transpose() - halfphi * Transform.hat(axis_ba) 101 | 102 | return jacinv 103 | 104 | @staticmethod 105 | def LogMap(tf): 106 | # Get the SO3 inverse jacobian 107 | jacinv = Transform.SO3JacInv(tf.phi) 108 | # multiply by translation to get rho 109 | rho_ab_inb = np.matmul(jacinv, tf.r_ab_inb.reshape((3, 1))) 110 | # return 6x1 vector 111 | return np.concatenate((rho_ab_inb.flatten(), tf.phi), axis=0) 112 | 113 | @classmethod 114 | def ExpMap(cls, xi): 115 | phi_ba = np.linalg.norm(xi[3:]) 116 | if abs(phi_ba) < 1e-9: 117 | # If angle is very small, return identity rotation with linear offset 118 | return Transform(np.eye(3), xi[:3]) 119 | 120 | axis = xi[3:] / phi_ba 121 | sin_term = np.sin(phi_ba) / phi_ba 122 | cos_term = (1.0 - np.cos(phi_ba)) / phi_ba 123 | 124 | A = cls.hat(axis) 125 | 126 | J = sin_term * np.eye(3) + (1.0 - sin_term) * np.outer(axis, axis) + cos_term * A 127 | return Transform(np.eye(3) + phi_ba * A.dot(J), J.dot(xi[:3])) -------------------------------------------------------------------------------- /src/utils/utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | import torch 5 | from torch.utils import data 6 | 7 | from src.dataset import Dataset 8 | 9 | class MELData(): 10 | """ 11 | Class for storing the training, validation, and test data that we generate from the VT&R Multi-Experience 12 | Localization (MEL) data. 13 | """ 14 | 15 | def __init__(self): 16 | self.mean = [0.0, 0.0, 0.0] 17 | self.std_dev = [0.0, 0.0, 0.0] 18 | 19 | self.train_labels_se3 = {} 20 | self.train_labels_log = {} 21 | self.train_ids = [] 22 | 23 | self.valid_labels_se3 = {} 24 | self.valid_labels_log = {} 25 | self.valid_ids = [] 26 | 27 | self.paths = [] 28 | self.test_labels_se3 = {} 29 | self.test_labels_log = {} 30 | self.test_ids = {} 31 | 32 | def compute_mean_std(mel_data, data_dir, height, width): 33 | """ 34 | Compute the mean and standard deviation for each channel of the images in a dataset. 35 | 36 | Args: 37 | mel_data (MELData): the object storing the images. 38 | data_dir (string): the directory where the images are stored 39 | height (int): image height. 40 | width (int): image width. 41 | 42 | Returns: 43 | pop_mean_final (list[float]): the mean for each of the three RGB channels for the images. 44 | pop_std_final (list[float]): the standard deviation for each of the three RGB channels for the images. 45 | """ 46 | 47 | start = time.time() 48 | 49 | image_params = {'data_dir': data_dir, 50 | 'height': height, 51 | 'width': width, 52 | 'use_normalization': False, 53 | 'use_disparity': False, 54 | } 55 | 56 | num_channels = 12 57 | batch_size = 128 58 | 59 | params = {'batch_size': batch_size, 60 | 'shuffle': False, 61 | 'num_workers': 12} 62 | 63 | training_set = Dataset(**image_params) 64 | training_set.load_mel_data(mel_data, 'training') 65 | training_generator = data.DataLoader(training_set, **params) 66 | 67 | fst_moment = torch.zeros(num_channels) 68 | snd_moment = torch.zeros(num_channels) 69 | 70 | fst_moment = fst_moment.cuda() 71 | snd_moment = snd_moment.cuda() 72 | 73 | i = 0 74 | cnt = 0 75 | for images, _, _, _, _ in training_generator: 76 | i += 1 77 | 78 | images = images[:, 0:num_channels, :, :].cuda() 79 | 80 | b, c, h, w = images.size() 81 | nb_pixels = b * h * w 82 | sum_ = torch.sum(images, dim=[0,2,3]) 83 | sum_of_square = torch.sum(images ** 2, dim=[0,2,3]) 84 | fst_moment = ((cnt * fst_moment) + sum_) / (cnt + nb_pixels) 85 | snd_moment = ((cnt * snd_moment) + sum_of_square) / (cnt + nb_pixels) 86 | 87 | cnt += nb_pixels 88 | 89 | pop_mean = fst_moment 90 | pop_std = torch.sqrt(snd_moment - (fst_moment ** 2)) 91 | 92 | pop_mean_final = [0, 0, 0] 93 | pop_std_final = [0, 0, 0] 94 | 95 | num_comp = int(num_channels / 3.0) 96 | 97 | for i in range(3): 98 | for j in range(num_comp): 99 | pop_mean_final[i] += pop_mean[(j * 3) + i].item() 100 | pop_std_final[i] += pop_std[(j * 3) + i].item() 101 | 102 | pop_mean_final[i] = pop_mean_final[i] / num_comp 103 | pop_std_final[i] = pop_std_final[i] / num_comp 104 | 105 | print(f'Training images mean: {pop_mean_final}') 106 | print(f'Training images std: {pop_std_final}') 107 | print(f'Computing mean/std took: {time.time() - start}') 108 | 109 | return pop_mean_final, pop_std_final 110 | -------------------------------------------------------------------------------- /visualization/plots.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import matplotlib 5 | matplotlib.use('Agg') 6 | import matplotlib.pyplot as plt 7 | 8 | class Plotting: 9 | """ 10 | Class for plotting results. 11 | """ 12 | 13 | def __init__(self, results_dir): 14 | """ 15 | Initialize plotting. 16 | 17 | Args: 18 | results_dir (string): the directory in which to store the plots. 19 | """ 20 | self.results_dir = results_dir 21 | 22 | def plot_epoch_losses(self, epoch_losses_train, epoch_losses_valid): 23 | """ 24 | Plot the average training and validation loss for each epoch. Plot each individual type of loss and also 25 | the weighted sum of the losses. 26 | 27 | Args: 28 | epoch_losses_train (dict): the average training losses for each epoch. 29 | epoch_losses_valid (dict): the average training losses for each epoch. 30 | """ 31 | for loss_type in epoch_losses_train.keys(): 32 | 33 | plt.figure() 34 | p1 = plt.plot(epoch_losses_train[loss_type]) 35 | p2 = plt.plot(epoch_losses_valid[loss_type]) 36 | 37 | plt.legend((p1[0], p2[0]), ('training', 'validation')) 38 | plt.ylabel('Loss') 39 | plt.xlabel('Epoch') 40 | plt.title(f'Loss for each epoch, {loss_type}') 41 | 42 | plt.savefig(f'{self.results_dir}loss_epoch_{loss_type}.png', format='png') 43 | plt.savefig(f'{self.results_dir}loss_epoch_{loss_type}.pdf', format='pdf') 44 | plt.close() 45 | 46 | plt.figure() 47 | p1 = plt.plot(np.log(epoch_losses_train[loss_type])) 48 | p2 = plt.plot(np.log(epoch_losses_valid[loss_type])) 49 | 50 | plt.legend((p1[0], p2[0]), ('training', 'validation')) 51 | plt.ylabel('Log of loss') 52 | plt.xlabel('Epoch') 53 | plt.title('Log of loss for each epoch, {}'.format(loss_type)) 54 | 55 | plt.savefig(f'{self.results_dir}log_loss_epoch_{loss_type}.png', format='png') 56 | plt.savefig(f'{self.results_dir}log_loss_epoch_{loss_type}.pdf', format='pdf') 57 | plt.close() 58 | 59 | def plot_epoch_errors(self, epoch_error_train, epoch_error_valid, dof): 60 | """ 61 | Plot the average error for each specified pose DOF for each epoch for training and validation. 62 | 63 | Args: 64 | epoch_error_train (dict): the average pose errors for each DOF for each epoch. 65 | epoch_error_valid (dict): the average pose errors for each DOF for each epoch. 66 | dof (list[int]): indices of the DOF to plot. 67 | """ 68 | dof_str = ['x', 'y', 'z', 'roll', 'pitch', 'yaw'] 69 | 70 | for i in range(len(dof)): 71 | dof_index = dof[i] 72 | 73 | plt.figure() 74 | p1 = plt.plot(epoch_error_train[:, dof_index]) 75 | p2 = plt.plot(epoch_error_valid[:, dof_index]) 76 | 77 | plt.legend((p1[0], p2[0]), ('training', 'validation')) 78 | plt.ylabel('RMSE') 79 | plt.xlabel('Epoch') 80 | plt.title(f'Error for each epoch, {dof_str[dof_index]}') 81 | 82 | plt.savefig(f'{self.results_dir}error_epoch_{dof_str[dof_index]}.png', format='png') 83 | plt.savefig(f'{self.results_dir}error_epoch_{dof_str[dof_index]}.pdf', format='pdf') 84 | plt.close() 85 | 86 | def plot_outputs(self, outputs_log, targets_log, path_name, map_run_id, dof): 87 | """ 88 | Plot estimated and target poses. Plot each of the estimated DOF separately. 89 | 90 | Args: 91 | outputs_log (dict): a map from the live run id to the estimated poses for all localized vertices on 92 | that run provided as length 6 vectors stacked in a numpy array. 93 | targets_log (dict): a map from the live run id to the ground truth target poses for all localized 94 | vertices on that run provided as length 6 vectors stacked in a numpy array. 95 | path_name (string): name of the path. 96 | map_run_id (int): the id of the run used as the map, i.e. which all the other runs are localized to. 97 | dof (list[int]): indices of the DOF to plot. 98 | """ 99 | # Store the plots of all runs localized to the map run in the same directory. 100 | directory = f'{self.results_dir}/{path_name}/map_run_{map_run_id}' 101 | if not os.path.exists(directory): 102 | os.makedirs(directory) 103 | 104 | dof_str = ['x', 'y', 'z', 'roll', 'pitch', 'yaw'] 105 | 106 | for live_run_id in outputs_log.keys(): 107 | 108 | for dof_ind in dof: 109 | 110 | # Convert to degrees if rotation. 111 | targets_plot = np.rad2deg(targets_log[live_run_id][:, dof_ind]) if dof_ind > 2 else targets_log[live_run_id][:, dof_ind] 112 | outputs_plot = np.rad2deg(outputs_log[live_run_id][:, dof_ind]) if dof_ind > 2 else outputs_log[live_run_id][:, dof_ind] 113 | 114 | f = plt.figure(figsize=(18,6)) 115 | f.tight_layout(rect=[0, 0.03, 1, 0.95]) 116 | p1 = plt.plot(targets_plot, 'C1') 117 | p2 = plt.plot(outputs_plot, 'C0') 118 | plt.legend((p1[0], p2[0]), ('ground truth', 'estimated')) 119 | 120 | ylabel = 'Degrees' if dof_ind > 2 else 'Metres' 121 | plt.ylabel(ylabel) 122 | plt.xlabel('Vertex') 123 | plt.title(f'Error - {dof_str[dof_ind]}') 124 | 125 | plt.savefig(f'{directory}/pose_{dof_str[dof_ind]}_live_run_{live_run_id}.png', format='png') 126 | plt.savefig(f'{directory}/pose_{dof_str[dof_ind]}_live_run_{live_run_id}.pdf', format='pdf') 127 | plt.close() --------------------------------------------------------------------------------