├── .gitmodules ├── LICENSE ├── README.md ├── configs ├── interiornet │ ├── interiornet_cv_distribution.yaml │ └── interiornet_cv_distribution_overlap.yaml ├── interiornetT │ ├── interiornetT_cv_distribution.yaml │ └── interiornetT_cv_distribution_overlap.yaml ├── streetlearn │ ├── streetlearn_cv_distribution.yaml │ └── streetlearn_cv_distribution_overlap.yaml ├── streetlearnT │ ├── streetlearnT_cv_distribution.yaml │ └── streetlearnT_cv_distribution_overlap.yaml └── sun360 │ ├── sun360_cv_distribution.yaml │ ├── sun360_cv_distribution_overlap.yaml │ ├── sun360_reg_4d_256.yaml │ ├── sun360_reg_4d_256_overlap.yaml │ ├── sun360_reg_6d_128.yaml │ ├── sun360_reg_6d_128_overlap.yaml │ ├── sun360_reg_6d_256.yaml │ ├── sun360_reg_6d_256_overlap.yaml │ ├── sun360_reg_euler_256.yaml │ └── sun360_reg_euler_256_overlap.yaml ├── datasets └── pairwise_dataloader.py ├── evaluation ├── __pycache__ │ └── evaluation_metrics.cpython-36.pyc └── evaluation_metrics.py ├── models ├── __init__.py ├── __pycache__ │ └── __init__.cpython-36.pyc ├── encoder │ ├── __init__.py │ ├── __pycache__ │ │ └── mlp_cat_pers_fc_equi.cpython-36.pyc │ ├── resnet_encoder.py │ └── resunet_encoder.py ├── preact_resnet.py └── rotationnet │ ├── __init__.py │ ├── rotation_cv_net.py │ └── rotation_net.py ├── test.py ├── tools └── environment.yml ├── train.py └── trainers ├── __pycache__ ├── base_trainer.cpython-36.pyc └── rotation_trainer.cpython-36.pyc ├── base_trainer.py ├── rotation_trainer.py └── utils ├── __pycache__ ├── compute_utils.cpython-36.pyc └── loss_utils.cpython-36.pyc ├── compute_utils.py └── loss_utils.py /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "PanoBasic"] 2 | path = PanoBasic 3 | url = https://github.com/RuojinCai/PanoBasic.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Ruojin Cai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Extreme Rotation Estimation using Dense Correlation Volumes 2 | 3 | This repository contains a PyTorch implementation of the paper: 4 | 5 | [*Extreme Rotation Estimation using Dense Correlation Volumes*](https://ruojincai.github.io/ExtremeRotation/) 6 | [[Project page]](https://ruojincai.github.io/ExtremeRotation/) 7 | [[Arxiv]](https://arxiv.org/abs/2104.13530) 8 | 9 | [Ruojin Cai](http://www.cs.cornell.edu/~ruojin/), 10 | [Bharath Hariharan](http://home.bharathh.info/), 11 | [Noah Snavely](http://www.cs.cornell.edu/~snavely/), 12 | [Hadar Averbuch-Elor](http://www.cs.cornell.edu/~hadarelor/) 13 | 14 | CVPR 2021 15 | 16 | ## Introduction 17 | We present a technique for estimating the relative 3D rotation of an RGB image pair in an extreme setting, where the images have little or no overlap. We observe that, even when images do not overlap, there may be rich hidden cues as to their geometric relationship, such as light source directions, vanishing points, and symmetries present in the scene. We propose a network design that can automatically learn such implicit cues by comparing all pairs of points between the two input images. Our method therefore constructs dense feature correlation volumes and processes these to predict relative 3D rotations. Our predictions are formed over a fine-grained discretization of rotations, bypassing difficulties associated with regressing 3D rotations. We demonstrate our approach on a large variety of extreme RGB image pairs, including indoor and outdoor images captured under different lighting conditions and geographic locations. Our evaluation shows that our model can successfully estimate relative rotations among non-overlapping images without compromising performance over overlapping image pairs. 18 | 19 | #### Overview of our Method: 20 | ![Overview](https://ruojincai.github.io/ExtremeRotation/assets/overview.png) 21 | 22 | Given a pair of images, a shared-weight Siamese encoder extracts feature maps. We compute a 4D correlation volume using the inner product of features, from which our model predicts the relative rotation (here, as distributions over Euler angles). 23 | 24 | 25 | ## Dependencies 26 | ```bash 27 | # Create conda environment with python 3.6, torch 1.3.1 and CUDA 10.0 28 | conda env create -f ./tools/environment.yml 29 | conda activate rota 30 | ``` 31 | 32 | ## Dataset 33 | 34 | Perspective images are randomly sampled from panoramas with a resolution of 256 × 256 and a 90◦ FoV. 35 | We sample images distributed uniformly over the range of [−180, 180] for yaw angles. 36 | To avoid generating textureless images that focus on the ceiling/sky or the floor, we limit the range over pitch angles to [−30◦, 30◦] for the indoor datasets and [−45◦, 45◦] for the outdoor dataset. 37 | 38 | Download [InteriorNet](https://interiornet.org/), [SUN360](https://vision.cs.princeton.edu/projects/2012/SUN360/data/), and [StreetLearn](https://sites.google.com/view/streetlearn/dataset) datasets to obtain the full panoramas. 39 | 40 | Metadata files about the training and test image pairs are available in the following google drive: [link](https://drive.google.com/drive/folders/1xA6O-FYAKWj0Ed2E3qIu-tKnw29C9q1Z?usp=sharing). 41 | Download the `metadata.zip` file, unzip it and put it under the project root directory. 42 | 43 | We base on this MATLAB [Toolbox](https://github.com/yindaz/PanoBasic) that extracts perspective images from an input panorama. 44 | Before running `PanoBasic/pano2perspective_script.m`, you need to modify the path to the datasets and metadata files in the script. 45 | 46 | ## Pretrained Model 47 | 48 | Pretrained models are be available in the following google drive: [link](https://drive.google.com/drive/folders/1xA6O-FYAKWj0Ed2E3qIu-tKnw29C9q1Z?usp=sharing). 49 | To use the pretrained models, download the `pretrained.zip` file, unzip it and put it under the project root directory. 50 | 51 | #### Testing the pretrained model: 52 | The following commands test the performance of the pre-trained models in the rotation estimation task. 53 | The commands output the mean and median geodesic error, and the percentage of pairs with a relative rotation error under 10◦ for different levels of overlap on the test set. 54 | ```bash 55 | # Usage: 56 | # python test.py --pretrained 57 | 58 | python test.py configs/sun360/sun360_cv_distribution.yaml \ 59 | --pretrained pretrained/sun360_cv_distribution.pt 60 | 61 | python test.py configs/interiornet/interiornet_cv_distribution.yaml \ 62 | --pretrained pretrained/interiornet_cv_distribution.pt 63 | 64 | python test.py configs/interiornetT/interiornetT_cv_distribution.yaml \ 65 | --pretrained pretrained/interiornetT_cv_distribution.pt 66 | 67 | python test.py configs/streetlearn/streetlearn_cv_distribution.yaml \ 68 | --pretrained pretrained/streetlearn_cv_distribution.pt 69 | 70 | python test.py configs/streetlearnT/streetlearnT_cv_distribution.yaml \ 71 | --pretrained pretrained/streetlearnT_cv_distribution.pt 72 | ``` 73 | 74 | Rotation estimation evaluation of the pretrained models is as follows: 75 | | | | InteriorNet | | | | InteriorNet-T | | | | SUM360 | | | | StreetLearn | | | | StreetLearn-T | | 76 | |-------|:------:|:-----------:|:------:|---|:------:|:-------------:|:------:|---|:------:|:------:|:------:|---|:------:|:-----------:|:------:|---|:------:|:-------------:|:------:| 77 | | | Avg(°) | Med(°) | 10° | | Avg(°) | Med(°) | 10° | | Avg(°) | Med(°) | 10° | | Avg(°) | Med(°) | 10° | | Avg(°) | Med(°) | 10° | 78 | | Large | 1.82 | 0.88 | 98.76% | | 8.86 | 1.86 | 93.13% | | 1.37 | 1.09 | 99.51% | | 1.38 | 1.12 | 100.00%| | 24.98 | 2.50 | 78.95% | 79 | | Small | 4.31 | 1.16 | 96.58% | | 30.43 | 2.63 | 74.07% | | 6.13 | 1.77 | 95.86% | | 3.25 | 1.41 | 98.34% | | 27.84 | 3.19 | 74.76% | 80 | | None | 37.69 | 3.15 | 61.97% | | 49.44 | 4.17 | 58.36% | | 34.92 | 4.43 | 61.39% | | 5.46 | 1.65 | 96.60% | | 32.43 | 3.64 | 72.69% | 81 | | All | 13.49 | 1.18 | 86.90% | | 29.68 | 2.58 | 75.10% | | 20.45 | 2.23 | 78.30% | | 4.10 | 1.46 | 97.70% | | 29.85 | 3.19 | 74.30% | 82 | 83 | 84 | ## Training 85 | 86 | ```bash 87 | # Usage: 88 | # python train.py 89 | 90 | python train.py configs/interiornet/interiornet_cv_distribution.yaml 91 | 92 | python train.py configs/interiornetT/interiornetT_cv_distribution.yaml 93 | 94 | python train.py configs/sun360/sun360_cv_distribution_overlap.yaml 95 | python train.py configs/sun360/sun360_cv_distribution.yaml --resume --pretrained 96 | 97 | python train.py configs/streetlearn/streetlearn_cv_distribution_overlap.yaml 98 | python train.py configs/streetlearn/streetlearn_cv_distribution.yaml --resume --pretrained 99 | 100 | python train.py configs/streetlearnT/streetlearnT_cv_distribution_overlap.yaml 101 | python train.py configs/streetlearnT/streetlearnT_cv_distribution.yaml --resume --pretrained 102 | ``` 103 | 104 | For SUN360 and StreetLearn dataset, finetune from the pretrained model, which is training with only overlapping pairs, at epoch 10. 105 | More configs about baselines can be found in the folder `configs/sun360`. 106 | 107 | # Cite 108 | Please cite our work if you find it useful: 109 | ```bibtex 110 | @inproceedings{Cai2021Extreme, 111 | title={Extreme Rotation Estimation using Dense Correlation Volumes}, 112 | author={Cai, Ruojin and Hariharan, Bharath and Snavely, Noah and Averbuch-Elor, Hadar}, 113 | booktitle={IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 114 | year={2021} 115 | } 116 | ``` 117 | 118 | #### Acknowledgment 119 | This work was supported in part by the National Science Foundation (IIS-2008313) and by the generosity of Eric and Wendy Schmidt by recommendation of the Schmidt Futures program and the Zuckerman STEM leadership program. 120 | 121 | -------------------------------------------------------------------------------- /configs/interiornet/interiornet_cv_distribution.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 128 5 | train: 6 | batch_size: 20 7 | path: "data/interiornet/" 8 | pairs_file: "metadata/interiornet/train_pair_rotation.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/interiornet/" 12 | pairs_file: "metadata/interiornet/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'cost_volume' 33 | classification: True 34 | opt: 35 | type: 'adam' 36 | lr: 5e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resunet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | not_concat: True 50 | num_out_layers: 32 51 | rotationnet: 52 | type: models.rotationnet.rotation_cv_net 53 | block: 1 # 0 basic, 1 bottleneck 54 | num_blocks: 1-1 55 | zdim: 512 56 | in_planes: 1024 57 | width: 32 58 | height: 32 59 | out_rotation_mode: 'distribution' 60 | rotationnet_y: 61 | type: models.rotationnet.rotation_cv_net 62 | block: 1 # 0 basic, 1 bottleneck 63 | num_blocks: 1-1 64 | zdim: 512 65 | in_planes: 1024 66 | width: 32 67 | height: 32 68 | out_rotation_mode: 'distribution' 69 | rotationnet_z: 70 | type: models.rotationnet.rotation_cv_net 71 | block: 1 # 0 basic, 1 bottleneck 72 | num_blocks: 1-1 73 | zdim: 512 74 | in_planes: 1024 75 | width: 32 76 | height: 32 77 | out_rotation_mode: 'distribution' 78 | 79 | 80 | -------------------------------------------------------------------------------- /configs/interiornet/interiornet_cv_distribution_overlap.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 128 5 | train: 6 | batch_size: 20 7 | path: "data/interiornet/" 8 | pairs_file: "metadata/interiornet/train_pair_rotation_overlap.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/interiornet/" 12 | pairs_file: "metadata/interiornet/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'cost_volume' 33 | classification: True 34 | opt: 35 | type: 'adam' 36 | lr: 5e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resunet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | not_concat: True 50 | num_out_layers: 32 51 | rotationnet: 52 | type: models.rotationnet.rotation_cv_net 53 | block: 1 # 0 basic, 1 bottleneck 54 | num_blocks: 1-1 55 | zdim: 512 56 | in_planes: 1024 57 | width: 32 58 | height: 32 59 | out_rotation_mode: 'distribution' 60 | rotationnet_y: 61 | type: models.rotationnet.rotation_cv_net 62 | block: 1 # 0 basic, 1 bottleneck 63 | num_blocks: 1-1 64 | zdim: 512 65 | in_planes: 1024 66 | width: 32 67 | height: 32 68 | out_rotation_mode: 'distribution' 69 | rotationnet_z: 70 | type: models.rotationnet.rotation_cv_net 71 | block: 1 # 0 basic, 1 bottleneck 72 | num_blocks: 1-1 73 | zdim: 512 74 | in_planes: 1024 75 | width: 32 76 | height: 32 77 | out_rotation_mode: 'distribution' 78 | 79 | 80 | -------------------------------------------------------------------------------- /configs/interiornetT/interiornetT_cv_distribution.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 128 5 | train: 6 | batch_size: 20 7 | path: "data/interiornet/" 8 | pairs_file: "metadata/interiornetT/train_pair_translation.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/interiornet/" 12 | pairs_file: "metadata/interiornetT/test_pair_translation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'cost_volume' 33 | classification: True 34 | opt: 35 | type: 'adam' 36 | lr: 5e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resunet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | not_concat: True 50 | num_out_layers: 32 51 | rotationnet: 52 | type: models.rotationnet.rotation_cv_net 53 | block: 1 # 0 basic, 1 bottleneck 54 | num_blocks: 1-1 55 | zdim: 512 56 | in_planes: 1024 57 | width: 32 58 | height: 32 59 | out_rotation_mode: 'distribution' 60 | rotationnet_y: 61 | type: models.rotationnet.rotation_cv_net 62 | block: 1 # 0 basic, 1 bottleneck 63 | num_blocks: 1-1 64 | zdim: 512 65 | in_planes: 1024 66 | width: 32 67 | height: 32 68 | out_rotation_mode: 'distribution' 69 | rotationnet_z: 70 | type: models.rotationnet.rotation_cv_net 71 | block: 1 # 0 basic, 1 bottleneck 72 | num_blocks: 1-1 73 | zdim: 512 74 | in_planes: 1024 75 | width: 32 76 | height: 32 77 | out_rotation_mode: 'distribution' 78 | 79 | 80 | -------------------------------------------------------------------------------- /configs/interiornetT/interiornetT_cv_distribution_overlap.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 128 5 | train: 6 | batch_size: 20 7 | path: "data/interiornet/" 8 | pairs_file: "metadata/interiornetT/train_pair_translation_overlap.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/interiornet/" 12 | pairs_file: "metadata/interiornetT/test_pair_translation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'cost_volume' 33 | classification: True 34 | opt: 35 | type: 'adam' 36 | lr: 5e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resunet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | not_concat: True 50 | num_out_layers: 32 51 | rotationnet: 52 | type: models.rotationnet.rotation_cv_net 53 | block: 1 # 0 basic, 1 bottleneck 54 | num_blocks: 1-1 55 | zdim: 512 56 | in_planes: 1024 57 | width: 32 58 | height: 32 59 | out_rotation_mode: 'distribution' 60 | rotationnet_y: 61 | type: models.rotationnet.rotation_cv_net 62 | block: 1 # 0 basic, 1 bottleneck 63 | num_blocks: 1-1 64 | zdim: 512 65 | in_planes: 1024 66 | width: 32 67 | height: 32 68 | out_rotation_mode: 'distribution' 69 | rotationnet_z: 70 | type: models.rotationnet.rotation_cv_net 71 | block: 1 # 0 basic, 1 bottleneck 72 | num_blocks: 1-1 73 | zdim: 512 74 | in_planes: 1024 75 | width: 32 76 | height: 32 77 | out_rotation_mode: 'distribution' 78 | 79 | 80 | -------------------------------------------------------------------------------- /configs/streetlearn/streetlearn_cv_distribution.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 128 5 | train: 6 | batch_size: 20 7 | path: "data/streetlearn/" 8 | pairs_file: "metadata/streetlearn/train_pair_rotation.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/streetlearn/" 12 | pairs_file: "metadata/streetlearn/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'cost_volume' 33 | classification: True 34 | opt: 35 | type: 'adam' 36 | lr: 5e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resunet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | not_concat: True 50 | num_out_layers: 32 51 | rotationnet: 52 | type: models.rotationnet.rotation_cv_net 53 | block: 1 # 0 basic, 1 bottleneck 54 | num_blocks: 1-1 55 | zdim: 512 56 | in_planes: 1024 57 | width: 32 58 | height: 32 59 | out_rotation_mode: 'distribution' 60 | rotationnet_y: 61 | type: models.rotationnet.rotation_cv_net 62 | block: 1 # 0 basic, 1 bottleneck 63 | num_blocks: 1-1 64 | zdim: 512 65 | in_planes: 1024 66 | width: 32 67 | height: 32 68 | out_rotation_mode: 'distribution' 69 | rotationnet_z: 70 | type: models.rotationnet.rotation_cv_net 71 | block: 1 # 0 basic, 1 bottleneck 72 | num_blocks: 1-1 73 | zdim: 512 74 | in_planes: 1024 75 | width: 32 76 | height: 32 77 | out_rotation_mode: 'distribution' 78 | 79 | 80 | -------------------------------------------------------------------------------- /configs/streetlearn/streetlearn_cv_distribution_overlap.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 128 5 | train: 6 | batch_size: 20 7 | path: "data/streetlearn/" 8 | pairs_file: "metadata/streetlearn/train_pair_rotation_overlap.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/streetlearn/" 12 | pairs_file: "metadata/streetlearn/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'cost_volume' 33 | classification: True 34 | opt: 35 | type: 'adam' 36 | lr: 5e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resunet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | not_concat: True 50 | num_out_layers: 32 51 | rotationnet: 52 | type: models.rotationnet.rotation_cv_net 53 | block: 1 # 0 basic, 1 bottleneck 54 | num_blocks: 1-1 55 | zdim: 512 56 | in_planes: 1024 57 | width: 32 58 | height: 32 59 | out_rotation_mode: 'distribution' 60 | rotationnet_y: 61 | type: models.rotationnet.rotation_cv_net 62 | block: 1 # 0 basic, 1 bottleneck 63 | num_blocks: 1-1 64 | zdim: 512 65 | in_planes: 1024 66 | width: 32 67 | height: 32 68 | out_rotation_mode: 'distribution' 69 | rotationnet_z: 70 | type: models.rotationnet.rotation_cv_net 71 | block: 1 # 0 basic, 1 bottleneck 72 | num_blocks: 1-1 73 | zdim: 512 74 | in_planes: 1024 75 | width: 32 76 | height: 32 77 | out_rotation_mode: 'distribution' 78 | 79 | 80 | -------------------------------------------------------------------------------- /configs/streetlearnT/streetlearnT_cv_distribution.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 128 5 | train: 6 | batch_size: 20 7 | path: "data/streetlearn/" 8 | pairs_file: "metadata/streetlearnT/train_pair_translation.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/streetlearn/" 12 | pairs_file: "metadata/streetlearnT/test_pair_translation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'cost_volume' 33 | classification: True 34 | opt: 35 | type: 'adam' 36 | lr: 5e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resunet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | not_concat: True 50 | num_out_layers: 32 51 | rotationnet: 52 | type: models.rotationnet.rotation_cv_net 53 | block: 1 # 0 basic, 1 bottleneck 54 | num_blocks: 1-1 55 | zdim: 512 56 | in_planes: 1024 57 | width: 32 58 | height: 32 59 | out_rotation_mode: 'distribution' 60 | rotationnet_y: 61 | type: models.rotationnet.rotation_cv_net 62 | block: 1 # 0 basic, 1 bottleneck 63 | num_blocks: 1-1 64 | zdim: 512 65 | in_planes: 1024 66 | width: 32 67 | height: 32 68 | out_rotation_mode: 'distribution' 69 | rotationnet_z: 70 | type: models.rotationnet.rotation_cv_net 71 | block: 1 # 0 basic, 1 bottleneck 72 | num_blocks: 1-1 73 | zdim: 512 74 | in_planes: 1024 75 | width: 32 76 | height: 32 77 | out_rotation_mode: 'distribution' 78 | 79 | 80 | -------------------------------------------------------------------------------- /configs/streetlearnT/streetlearnT_cv_distribution_overlap.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 128 5 | train: 6 | batch_size: 20 7 | path: "data/streetlearn/" 8 | pairs_file: "metadata/streetlearnT/train_pair_translation_overlap.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/streetlearn/" 12 | pairs_file: "metadata/streetlearnT/test_pair_translation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'cost_volume' 33 | classification: True 34 | opt: 35 | type: 'adam' 36 | lr: 5e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resunet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | not_concat: True 50 | num_out_layers: 32 51 | rotationnet: 52 | type: models.rotationnet.rotation_cv_net 53 | block: 1 # 0 basic, 1 bottleneck 54 | num_blocks: 1-1 55 | zdim: 512 56 | in_planes: 1024 57 | width: 32 58 | height: 32 59 | out_rotation_mode: 'distribution' 60 | rotationnet_y: 61 | type: models.rotationnet.rotation_cv_net 62 | block: 1 # 0 basic, 1 bottleneck 63 | num_blocks: 1-1 64 | zdim: 512 65 | in_planes: 1024 66 | width: 32 67 | height: 32 68 | out_rotation_mode: 'distribution' 69 | rotationnet_z: 70 | type: models.rotationnet.rotation_cv_net 71 | block: 1 # 0 basic, 1 bottleneck 72 | num_blocks: 1-1 73 | zdim: 512 74 | in_planes: 1024 75 | width: 32 76 | height: 32 77 | out_rotation_mode: 'distribution' 78 | 79 | 80 | -------------------------------------------------------------------------------- /configs/sun360/sun360_cv_distribution.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 128 5 | train: 6 | batch_size: 20 7 | path: "data/sun360" 8 | pairs_file: "metadata/sun360/train_pair_rotation.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/sun360" 12 | pairs_file: "metadata/sun360/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'cost_volume' 33 | classification: True 34 | opt: 35 | type: 'adam' 36 | lr: 5e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resunet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | not_concat: True 50 | num_out_layers: 32 51 | rotationnet: 52 | type: models.rotationnet.rotation_cv_net 53 | block: 1 # 0 basic, 1 bottleneck 54 | num_blocks: 1-1 55 | zdim: 512 56 | in_planes: 1024 57 | width: 32 58 | height: 32 59 | out_rotation_mode: 'distribution' 60 | rotationnet_y: 61 | type: models.rotationnet.rotation_cv_net 62 | block: 1 # 0 basic, 1 bottleneck 63 | num_blocks: 1-1 64 | zdim: 512 65 | in_planes: 1024 66 | width: 32 67 | height: 32 68 | out_rotation_mode: 'distribution' 69 | rotationnet_z: 70 | type: models.rotationnet.rotation_cv_net 71 | block: 1 # 0 basic, 1 bottleneck 72 | num_blocks: 1-1 73 | zdim: 512 74 | in_planes: 1024 75 | width: 32 76 | height: 32 77 | out_rotation_mode: 'distribution' 78 | 79 | 80 | -------------------------------------------------------------------------------- /configs/sun360/sun360_cv_distribution_overlap.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 128 5 | train: 6 | batch_size: 20 7 | path: "data/sun360" 8 | pairs_file: "metadata/sun360/train_pair_rotation_overlap.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/sun360" 12 | pairs_file: "metadata/sun360/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'cost_volume' 33 | classification: True 34 | opt: 35 | type: 'adam' 36 | lr: 5e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resunet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | not_concat: True 50 | num_out_layers: 32 51 | rotationnet: 52 | type: models.rotationnet.rotation_cv_net 53 | block: 1 # 0 basic, 1 bottleneck 54 | num_blocks: 1-1 55 | zdim: 512 56 | in_planes: 1024 57 | width: 32 58 | height: 32 59 | out_rotation_mode: 'distribution' 60 | rotationnet_y: 61 | type: models.rotationnet.rotation_cv_net 62 | block: 1 # 0 basic, 1 bottleneck 63 | num_blocks: 1-1 64 | zdim: 512 65 | in_planes: 1024 66 | width: 32 67 | height: 32 68 | out_rotation_mode: 'distribution' 69 | rotationnet_z: 70 | type: models.rotationnet.rotation_cv_net 71 | block: 1 # 0 basic, 1 bottleneck 72 | num_blocks: 1-1 73 | zdim: 512 74 | in_planes: 1024 75 | width: 32 76 | height: 32 77 | out_rotation_mode: 'distribution' 78 | 79 | 80 | -------------------------------------------------------------------------------- /configs/sun360/sun360_reg_4d_256.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 256 5 | train: 6 | batch_size: 20 7 | path: "data/sun360" 8 | pairs_file: "metadata/sun360/train_pair_rotation.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/sun360" 12 | pairs_file: "metadata/sun360/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'concat' 33 | classification: False 34 | opt: 35 | type: 'adam' 36 | lr: 1e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resnet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | rotationnet: 50 | type: models.rotationnet.rotation_net 51 | block: 1 # 0 basic, 1 bottleneck 52 | num_blocks: 1-1 53 | zdim: 512 54 | out_rotation_mode: 'Quaternion' 55 | rotationnet_y: 56 | type: models.rotationnet.rotation_net 57 | block: 1 # 0 basic, 1 bottleneck 58 | num_blocks: 1-1 59 | zdim: 512 60 | out_rotation_mode: 'Quaternion' 61 | rotationnet_z: 62 | type: models.rotationnet.rotation_net 63 | block: 1 # 0 basic, 1 bottleneck 64 | num_blocks: 1-1 65 | zdim: 512 66 | out_rotation_mode: 'Quaternion' 67 | 68 | 69 | -------------------------------------------------------------------------------- /configs/sun360/sun360_reg_4d_256_overlap.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 256 5 | train: 6 | batch_size: 20 7 | path: "data/sun360" 8 | pairs_file: "metadata/sun360/train_pair_rotation_overlap.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/sun360" 12 | pairs_file: "metadata/sun360/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'concat' 33 | classification: False 34 | opt: 35 | type: 'adam' 36 | lr: 1e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resnet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | rotationnet: 50 | type: models.rotationnet.rotation_net 51 | block: 1 # 0 basic, 1 bottleneck 52 | num_blocks: 1-1 53 | zdim: 512 54 | out_rotation_mode: 'Quaternion' 55 | rotationnet_y: 56 | type: models.rotationnet.rotation_net 57 | block: 1 # 0 basic, 1 bottleneck 58 | num_blocks: 1-1 59 | zdim: 512 60 | out_rotation_mode: 'Quaternion' 61 | rotationnet_z: 62 | type: models.rotationnet.rotation_net 63 | block: 1 # 0 basic, 1 bottleneck 64 | num_blocks: 1-1 65 | zdim: 512 66 | out_rotation_mode: 'Quaternion' 67 | 68 | 69 | -------------------------------------------------------------------------------- /configs/sun360/sun360_reg_6d_128.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 128 5 | train: 6 | batch_size: 20 7 | path: "data/sun360" 8 | pairs_file: "metadata/sun360/train_pair_rotation.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/sun360" 12 | pairs_file: "metadata/sun360/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'concat' 33 | classification: False 34 | opt: 35 | type: 'adam' 36 | lr: 1e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resunet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | not_concat: True 50 | num_out_layers: 32 51 | rotationnet: 52 | type: models.rotationnet.rotation_cv_net 53 | block: 1 # 0 basic, 1 bottleneck 54 | num_blocks: 1-1 55 | zdim: 512 56 | in_planes: 64 57 | width: 32 58 | height: 32 59 | out_rotation_mode: 'ortho6d' 60 | rotationnet_y: 61 | type: models.rotationnet.rotation_cv_net 62 | block: 1 # 0 basic, 1 bottleneck 63 | num_blocks: 1-1 64 | zdim: 512 65 | in_planes: 64 66 | width: 32 67 | height: 32 68 | out_rotation_mode: 'ortho6d' 69 | rotationnet_z: 70 | type: models.rotationnet.rotation_cv_net 71 | block: 1 # 0 basic, 1 bottleneck 72 | num_blocks: 1-1 73 | zdim: 512 74 | in_planes: 64 75 | width: 32 76 | height: 32 77 | out_rotation_mode: 'ortho6d' 78 | 79 | 80 | -------------------------------------------------------------------------------- /configs/sun360/sun360_reg_6d_128_overlap.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 128 5 | train: 6 | batch_size: 20 7 | path: "data/sun360" 8 | pairs_file: "metadata/sun360/train_pair_rotation_overlap.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/sun360" 12 | pairs_file: "metadata/sun360/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'concat' 33 | classification: False 34 | opt: 35 | type: 'adam' 36 | lr: 1e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resunet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | not_concat: True 50 | num_out_layers: 32 51 | rotationnet: 52 | type: models.rotationnet.rotation_cv_net 53 | block: 1 # 0 basic, 1 bottleneck 54 | num_blocks: 1-1 55 | zdim: 512 56 | in_planes: 64 57 | width: 32 58 | height: 32 59 | out_rotation_mode: 'ortho6d' 60 | rotationnet_y: 61 | type: models.rotationnet.rotation_cv_net 62 | block: 1 # 0 basic, 1 bottleneck 63 | num_blocks: 1-1 64 | zdim: 512 65 | in_planes: 64 66 | width: 32 67 | height: 32 68 | out_rotation_mode: 'ortho6d' 69 | rotationnet_z: 70 | type: models.rotationnet.rotation_cv_net 71 | block: 1 # 0 basic, 1 bottleneck 72 | num_blocks: 1-1 73 | zdim: 512 74 | in_planes: 64 75 | width: 32 76 | height: 32 77 | out_rotation_mode: 'ortho6d' 78 | 79 | 80 | -------------------------------------------------------------------------------- /configs/sun360/sun360_reg_6d_256.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 256 5 | train: 6 | batch_size: 20 7 | path: "data/sun360" 8 | pairs_file: "metadata/sun360/train_pair_rotation.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/sun360" 12 | pairs_file: "metadata/sun360/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'concat' 33 | classification: False 34 | opt: 35 | type: 'adam' 36 | lr: 1e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resnet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | rotationnet: 50 | type: models.rotationnet.rotation_net 51 | block: 1 # 0 basic, 1 bottleneck 52 | num_blocks: 1-1 53 | zdim: 512 54 | out_rotation_mode: 'ortho6d' 55 | rotationnet_y: 56 | type: models.rotationnet.rotation_net 57 | block: 1 # 0 basic, 1 bottleneck 58 | num_blocks: 1-1 59 | zdim: 512 60 | out_rotation_mode: 'ortho6d' 61 | rotationnet_z: 62 | type: models.rotationnet.rotation_net 63 | block: 1 # 0 basic, 1 bottleneck 64 | num_blocks: 1-1 65 | zdim: 512 66 | out_rotation_mode: 'ortho6d' 67 | 68 | 69 | -------------------------------------------------------------------------------- /configs/sun360/sun360_reg_6d_256_overlap.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 256 5 | train: 6 | batch_size: 20 7 | path: "data/sun360" 8 | pairs_file: "metadata/sun360/train_pair_rotation_overlap.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/sun360" 12 | pairs_file: "metadata/sun360/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'concat' 33 | classification: False 34 | opt: 35 | type: 'adam' 36 | lr: 1e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resnet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | rotationnet: 50 | type: models.rotationnet.rotation_net 51 | block: 1 # 0 basic, 1 bottleneck 52 | num_blocks: 1-1 53 | zdim: 512 54 | out_rotation_mode: 'ortho6d' 55 | rotationnet_y: 56 | type: models.rotationnet.rotation_net 57 | block: 1 # 0 basic, 1 bottleneck 58 | num_blocks: 1-1 59 | zdim: 512 60 | out_rotation_mode: 'ortho6d' 61 | rotationnet_z: 62 | type: models.rotationnet.rotation_net 63 | block: 1 # 0 basic, 1 bottleneck 64 | num_blocks: 1-1 65 | zdim: 512 66 | out_rotation_mode: 'ortho6d' 67 | 68 | 69 | -------------------------------------------------------------------------------- /configs/sun360/sun360_reg_euler_256.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 256 5 | train: 6 | batch_size: 20 7 | path: "data/sun360" 8 | pairs_file: "metadata/sun360/train_pair_rotation.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/sun360" 12 | pairs_file: "metadata/sun360/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'concat' 33 | classification: False 34 | opt: 35 | type: 'adam' 36 | lr: 1e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resnet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | rotationnet: 50 | type: models.rotationnet.rotation_net 51 | block: 1 # 0 basic, 1 bottleneck 52 | num_blocks: 1-1 53 | zdim: 512 54 | out_rotation_mode: 'euler' 55 | rotationnet_y: 56 | type: models.rotationnet.rotation_net 57 | block: 1 # 0 basic, 1 bottleneck 58 | num_blocks: 1-1 59 | zdim: 512 60 | out_rotation_mode: 'euler' 61 | rotationnet_z: 62 | type: models.rotationnet.rotation_net 63 | block: 1 # 0 basic, 1 bottleneck 64 | num_blocks: 1-1 65 | zdim: 512 66 | out_rotation_mode: 'euler' 67 | 68 | 69 | -------------------------------------------------------------------------------- /configs/sun360/sun360_reg_euler_256_overlap.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | type: datasets.pairwise_dataloader 3 | num_workers: 10 4 | height: 256 5 | train: 6 | batch_size: 20 7 | path: "data/sun360" 8 | pairs_file: "metadata/sun360/train_pair_rotation_overlap.npy" 9 | val: 10 | batch_size: 20 11 | path: "data/sun360" 12 | pairs_file: "metadata/sun360/test_pair_rotation.npy" 13 | 14 | viz: 15 | log_freq: 10 16 | viz_freq: 75000 17 | save_freq: 30000 18 | val_freq: 10000 19 | 20 | # validation options 21 | val: 22 | run_val_during_training: True 23 | save_val_results: False 24 | eval_classification: False 25 | eval_sampling: False 26 | batch_size: 100 27 | 28 | trainer: 29 | type: trainers.rotation_trainer 30 | epochs: 20 31 | seed: 100 32 | pairwise_type: 'concat' 33 | classification: False 34 | opt: 35 | type: 'adam' 36 | lr: 1e-4 # use default learning rate 37 | beta1: 0.9 38 | beta2: 0.999 39 | momentum: 0.9 # for SGD 40 | weight_decay: 0. 41 | scheduler: 'linear' 42 | step_epoch: 10 43 | 44 | models: 45 | encoder: 46 | type: models.encoder.resnet_encoder 47 | block: 1 # 0 basic, 1 bottleneck 48 | num_blocks: 3-3-3 49 | rotationnet: 50 | type: models.rotationnet.rotation_net 51 | block: 1 # 0 basic, 1 bottleneck 52 | num_blocks: 1-1 53 | zdim: 512 54 | out_rotation_mode: 'euler' 55 | rotationnet_y: 56 | type: models.rotationnet.rotation_net 57 | block: 1 # 0 basic, 1 bottleneck 58 | num_blocks: 1-1 59 | zdim: 512 60 | out_rotation_mode: 'euler' 61 | rotationnet_z: 62 | type: models.rotationnet.rotation_net 63 | block: 1 # 0 basic, 1 bottleneck 64 | num_blocks: 1-1 65 | zdim: 512 66 | out_rotation_mode: 'euler' 67 | 68 | 69 | -------------------------------------------------------------------------------- /datasets/pairwise_dataloader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch.utils import data 4 | from torchvision.datasets.vision import VisionDataset 5 | from PIL import Image 6 | import os 7 | import os.path 8 | from torchvision import transforms 9 | from PIL import ImageFile 10 | 11 | ImageFile.LOAD_TRUNCATED_IMAGES = True 12 | 13 | class RotationDataset(VisionDataset): 14 | def __init__(self, root, loader, extensions=None, height=None, pairs_file=None, transform=None, 15 | target_transform=None, Train=True): 16 | super(RotationDataset, self).__init__(root, transform=transform, 17 | target_transform=target_transform) 18 | self.pairs = np.load(pairs_file, allow_pickle=True).item() 19 | self.loader = loader 20 | self.extensions = extensions 21 | self.train = Train 22 | self.height = height 23 | 24 | def __getitem__(self, index): 25 | """ 26 | Args: 27 | index (int): Index 28 | 29 | Returns: 30 | disctionary: img1, img2, rotation_x1, rotation_x2, rotation_y1, rotation_y2, path, path2 31 | """ 32 | img1 = self.pairs[index]['img1'] 33 | img2 = self.pairs[index]['img2'] 34 | path = os.path.join(self.root, img1['path']) 35 | rotation_x1, rotation_y1 = img1['x'], img1['y'] 36 | image1 = self.loader(path) 37 | if self.target_transform is not None: 38 | image1 = self.target_transform(image1) 39 | path2 = os.path.join(self.root, img2['path']) 40 | rotation_x2, rotation_y2 = img2['x'], img2['y'] 41 | image2 = self.loader(path2) 42 | if self.target_transform is not None: 43 | image2 = self.target_transform(image2) 44 | 45 | return { 46 | 'img1': image1, 47 | 'rotation_x1': rotation_x1, 48 | 'rotation_y1': rotation_y1, 49 | 'img2': image2, 50 | 'rotation_x2': rotation_x2, 51 | 'rotation_y2': rotation_y2, 52 | 'path': path, 53 | 'path2': path2, 54 | } 55 | 56 | def __len__(self): 57 | if len(self.pairs) > 1000 and not self.train: 58 | return 1000 59 | return len(self.pairs) 60 | 61 | 62 | IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp') 63 | 64 | 65 | def pil_loader(path): 66 | # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) 67 | with open(path, 'rb') as f: 68 | img = Image.open(f) 69 | return img.convert('RGB') 70 | 71 | 72 | def accimage_loader(path): 73 | import accimage 74 | try: 75 | return accimage.Image(path) 76 | except IOError: 77 | # Potentially a decoding problem, fall back to PIL.Image 78 | return pil_loader(path) 79 | 80 | 81 | def default_loader(path): 82 | from torchvision import get_image_backend 83 | if get_image_backend() == 'accimage': 84 | return accimage_loader(path) 85 | else: 86 | return pil_loader(path) 87 | 88 | 89 | def get_datasets(cfg): 90 | tr_dataset = RotationDataset(cfg.train.path, default_loader, '.png', height=cfg.height, 91 | pairs_file=cfg.train.pairs_file, 92 | transform=transforms.Compose( 93 | [transforms.Resize((int(cfg.height), int(cfg.height))), transforms.ToTensor(), 94 | transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), 95 | ]), 96 | target_transform=transforms.Compose( 97 | [transforms.Resize((int(cfg.height), int(cfg.height))), transforms.ToTensor(), 98 | transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), 99 | ]) 100 | ) 101 | te_dataset = RotationDataset(cfg.val.path, default_loader, '.png', height=cfg.height, pairs_file=cfg.val.pairs_file, 102 | transform=transforms.Compose( 103 | [transforms.Resize((int(cfg.height), int(cfg.height))), transforms.ToTensor(), 104 | transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), 105 | ]), 106 | target_transform=transforms.Compose( 107 | [transforms.Resize((int(cfg.height), int(cfg.height))), transforms.ToTensor(), 108 | transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), 109 | ]), 110 | Train=False) 111 | return tr_dataset, te_dataset 112 | 113 | 114 | def init_np_seed(worker_id): 115 | seed = torch.initial_seed() 116 | np.random.seed(seed % 4294967296) 117 | 118 | 119 | def get_data_loaders(cfg): 120 | tr_dataset, te_dataset = get_datasets(cfg) 121 | train_loader = data.DataLoader( 122 | dataset=tr_dataset, batch_size=cfg.train.batch_size, 123 | shuffle=True, num_workers=cfg.num_workers, drop_last=True, 124 | worker_init_fn=init_np_seed) 125 | test_loader = data.DataLoader( 126 | dataset=te_dataset, batch_size=cfg.val.batch_size, 127 | shuffle=False, num_workers=cfg.num_workers, drop_last=False, 128 | worker_init_fn=init_np_seed) 129 | 130 | loaders = { 131 | "test_loader": test_loader, 132 | 'train_loader': train_loader, 133 | } 134 | return loaders 135 | 136 | 137 | if __name__ == "__main__": 138 | pass 139 | -------------------------------------------------------------------------------- /evaluation/__pycache__/evaluation_metrics.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuojinCai/ExtremeRotation_code/61edf0dc5ec2c34782437e7960145a419c280236/evaluation/__pycache__/evaluation_metrics.cpython-36.pyc -------------------------------------------------------------------------------- /evaluation/evaluation_metrics.py: -------------------------------------------------------------------------------- 1 | from trainers.utils.compute_utils import * 2 | 3 | 4 | def evaluation_metric_rotation(predict_rotation, gt_rotation): 5 | geodesic_loss = compute_geodesic_distance_from_two_matrices(predict_rotation.view(-1, 3, 3), 6 | gt_rotation.view(-1, 3, 3)) / pi * 180 7 | gt_distance = compute_angle_from_r_matrices(gt_rotation.view(-1, 3, 3)) 8 | 9 | geodesic_loss_overlap_none = geodesic_loss[gt_distance.view(-1) > (pi / 2)] 10 | geodesic_loss_overlap_large = geodesic_loss[gt_distance.view(-1) < (pi / 4)] 11 | geodesic_loss_overlap_small = geodesic_loss[(gt_distance.view(-1) >= pi / 4) & (gt_distance.view(-1) < pi / 2)] 12 | 13 | res_error = { 14 | "gt_angle": gt_distance / pi * 180, 15 | "rotation_geodesic_error_overlap_large": geodesic_loss_overlap_large, 16 | "rotation_geodesic_error_overlap_small": geodesic_loss_overlap_small, 17 | "rotation_geodesic_error_overlap_none": geodesic_loss_overlap_none, 18 | "rotation_geodesic_error": geodesic_loss, 19 | } 20 | return res_error 21 | 22 | 23 | def evaluation_metric_rotation_angle(predict_rotation, gt_rotation, gt_rmat1_array, out_rmat1_array): 24 | batch = predict_rotation.size(0) 25 | # _, gt_pitch1 = compute_viewpoint_from_rotation_matrix(gt_rmat1_array, batch) 26 | # gt_rmat2_array = compute_rotation_matrix_from_two_matrices(gt_rotation, gt_rmat1_array.transpose(1,2)) 27 | # gt_yaw, gt_pitch2 = compute_viewpoint_from_rotation_matrix(gt_rmat2_array, batch) 28 | # gt_pitch = gt_pitch2 - gt_pitch1 29 | 30 | gt_yaw1, gt_pitch1 = compute_viewpoint_from_rotation_matrix(gt_rmat1_array, batch) 31 | gt_rmat2_array = compute_rotation_matrix_from_two_matrices(gt_rotation, gt_rmat1_array.transpose(1,2)) 32 | gt_yaw2, gt_pitch2 = compute_viewpoint_from_rotation_matrix(gt_rmat2_array, batch) 33 | gt_yaw = gt_yaw2 - gt_yaw1 34 | gt_pitch = gt_pitch2 - gt_pitch1 35 | 36 | if out_rmat1_array is None: 37 | predict_yaw1, predict_pitch1 = compute_viewpoint_from_rotation_matrix(gt_rmat1_array, batch) 38 | predict_rmat2_array = compute_rotation_matrix_from_two_matrices(predict_rotation, gt_rmat1_array.transpose(1,2)) 39 | else: 40 | predict_yaw1, predict_pitch1 = compute_viewpoint_from_rotation_matrix(out_rmat1_array, batch) 41 | predict_rmat2_array = compute_rotation_matrix_from_two_matrices(predict_rotation, out_rmat1_array.transpose(1,2)) 42 | predict_yaw2, predict_pitch2 = compute_viewpoint_from_rotation_matrix(predict_rmat2_array, batch) 43 | predict_yaw = predict_yaw2 - predict_yaw1 44 | predict_pitch = predict_pitch2 - predict_pitch1 45 | 46 | def angle_range(angle): 47 | while (angle[angle>=pi].size(0)!=0) or (angle[angle<-pi].size(0)!=0): 48 | angle[angle>=pi] -= 2*pi 49 | angle[angle<-pi] += 2*pi 50 | return angle 51 | yaw_error = torch.abs(angle_range(gt_yaw - predict_yaw))/ pi * 180 52 | pitch_error = torch.abs(angle_range(gt_pitch - predict_pitch))/ pi * 180 53 | 54 | gt_distance = compute_angle_from_r_matrices(gt_rotation.view(-1, 3, 3)) 55 | 56 | yaw_error_overlap_none = yaw_error[gt_distance.view(-1) > (pi / 2)] 57 | yaw_error_overlap_large = yaw_error[gt_distance.view(-1) < (pi / 4)] 58 | yaw_error_overlap_small = yaw_error[(gt_distance.view(-1) >= pi / 4) & (gt_distance.view(-1) < pi / 2)] 59 | 60 | pitch_error_overlap_none = pitch_error[gt_distance.view(-1) > (pi / 2)] 61 | pitch_error_overlap_large = pitch_error[gt_distance.view(-1) < (pi / 4)] 62 | pitch_error_overlap_small = pitch_error[(gt_distance.view(-1) >= pi / 4) & (gt_distance.view(-1) < pi / 2)] 63 | 64 | res_error = { 65 | "rotation_yaw_error_overlap_large": yaw_error_overlap_large, 66 | "rotation_yaw_error_overlap_small": yaw_error_overlap_small, 67 | "rotation_yaw_error_overlap_none": yaw_error_overlap_none, 68 | "rotation_yaw_error": yaw_error, 69 | "rotation_pitch_error_overlap_large": pitch_error_overlap_large, 70 | "rotation_pitch_error_overlap_small": pitch_error_overlap_small, 71 | "rotation_pitch_error_overlap_none": pitch_error_overlap_none, 72 | "rotation_pitch_error": pitch_error 73 | } 74 | return res_error 75 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuojinCai/ExtremeRotation_code/61edf0dc5ec2c34782437e7960145a419c280236/models/__init__.py -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuojinCai/ExtremeRotation_code/61edf0dc5ec2c34782437e7960145a419c280236/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /models/encoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuojinCai/ExtremeRotation_code/61edf0dc5ec2c34782437e7960145a419c280236/models/encoder/__init__.py -------------------------------------------------------------------------------- /models/encoder/__pycache__/mlp_cat_pers_fc_equi.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuojinCai/ExtremeRotation_code/61edf0dc5ec2c34782437e7960145a419c280236/models/encoder/__pycache__/mlp_cat_pers_fc_equi.cpython-36.pyc -------------------------------------------------------------------------------- /models/encoder/resnet_encoder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from models.preact_resnet import PreActBlock, PreActBottleneck 4 | 5 | class ImageEncoder(nn.Module): 6 | def __init__(self, cfgmodel): 7 | super(ImageEncoder, self).__init__() 8 | block_type = [PreActBlock, PreActBottleneck] 9 | block = block_type[cfgmodel.block] 10 | num_blocks = [int(x) for x in cfgmodel.num_blocks.strip().split("-")] 11 | self.in_planes = 64 12 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=1, bias=False) 13 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 14 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 15 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 16 | def _make_layer(self, block, planes, num_blocks, stride): 17 | strides = [stride] + [1] * (num_blocks - 1) 18 | layers = [] 19 | for stride in strides: 20 | layers.append(block(self.in_planes, planes, stride)) 21 | self.in_planes = planes * block.expansion 22 | return nn.Sequential(*layers) 23 | 24 | def forward(self, x): 25 | # todo recheck 26 | out = self.conv1(x) 27 | out = self.layer1(out) 28 | out = F.avg_pool2d(out, 2) 29 | out = self.layer2(out) 30 | out = F.avg_pool2d(out, 2) 31 | out = self.layer3(out) 32 | out = F.avg_pool2d(out, 2) 33 | return out 34 | -------------------------------------------------------------------------------- /models/encoder/resunet_encoder.py: -------------------------------------------------------------------------------- 1 | '''ResUNet in PyTorch. 2 | https://github.com/qianqianwang68/caps/blob/master/CAPS/network.py 3 | Reference: 4 | [1] Zhengxin Zhang, Qingjie Liu 5 | Road Extraction by Deep Residual U-Net. arXiv:1711.10684 6 | ''' 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | import sys 12 | 13 | sys.path.append('../') 14 | from models.preact_resnet import PreActBlock, PreActBottleneck 15 | 16 | class conv(nn.Module): 17 | def __init__(self, num_in_layers, num_out_layers, kernel_size, stride): 18 | super(conv, self).__init__() 19 | self.kernel_size = kernel_size 20 | self.conv = nn.Conv2d(num_in_layers, num_out_layers, kernel_size=kernel_size, stride=stride, 21 | padding=(self.kernel_size - 1) // 2) 22 | self.normalize = nn.BatchNorm2d(num_out_layers) 23 | 24 | def forward(self, x): 25 | x = self.conv(x) 26 | x = self.normalize(x) 27 | return F.elu(x, inplace=True) 28 | 29 | 30 | class upconv(nn.Module): 31 | def __init__(self, num_in_layers, num_out_layers, kernel_size, scale): 32 | super(upconv, self).__init__() 33 | self.scale = scale 34 | self.conv1 = conv(num_in_layers, num_out_layers, kernel_size, 1) 35 | 36 | def forward(self, x): 37 | x = nn.functional.interpolate(x, scale_factor=self.scale, mode='bilinear', align_corners=True) 38 | return self.conv1(x) 39 | 40 | 41 | class ImageEncoder(nn.Module): 42 | def __init__(self, cfgmodel, encoder='resnet50', pretrained=True, num_in_layers=3): 43 | super(ImageEncoder, self).__init__() 44 | self.pretrained = pretrained 45 | assert encoder in ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'], "Incorrect encoder type" 46 | filters = [256, 512, 1024, 2048] 47 | self.in_planes = 64 48 | if num_in_layers != 3: # Number of input channels 49 | self.firstconv = nn.Conv2d(num_in_layers, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) 50 | else: 51 | self.firstconv = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) # H/2 52 | self.firstbn = nn.BatchNorm2d(64) 53 | self.firstrelu = nn.ReLU(inplace=True) 54 | self.firstmaxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # H/4 55 | # encoder 56 | block_type = [PreActBlock, PreActBottleneck] 57 | block = block_type[cfgmodel.block] 58 | num_blocks = [int(x) for x in cfgmodel.num_blocks.strip().split("-")] 59 | self.encoder1 = self._make_layer(block, 64, num_blocks[0], stride=1) # H/4 60 | self.encoder2 = self._make_layer(block, 128, num_blocks[1], stride=2) # H/8 61 | self.encoder3 = self._make_layer(block, 256, num_blocks[2], stride=2) # H/16 62 | 63 | # decoder 64 | self.not_concat = getattr(cfgmodel, "not_concat", False) 65 | self.upconv4 = upconv(filters[2], 512, 3, 2) 66 | if not self.not_concat: 67 | self.iconv4 = conv(filters[1] + 512, 512, 3, 1) 68 | else: 69 | self.iconv4 = conv(512, 512, 3, 1) 70 | 71 | self.upconv3 = upconv(512, 256, 3, 2) 72 | if not self.not_concat: 73 | self.iconv3 = conv(filters[0] + 256, 256, 3, 1) 74 | else: 75 | self.iconv3 = conv(256, 256, 3, 1) 76 | 77 | num_out_layers = getattr(cfgmodel, "num_out_layers", 128) 78 | self.outconv = conv(256, num_out_layers, 1, 1) 79 | 80 | def _make_layer(self, block, planes, num_blocks, stride): 81 | strides = [stride] + [1] * (num_blocks - 1) 82 | layers = [] 83 | for stride in strides: 84 | layers.append(block(self.in_planes, planes, stride)) 85 | self.in_planes = planes * block.expansion 86 | return nn.Sequential(*layers) 87 | 88 | def skipconnect(self, x1, x2): 89 | diffY = x2.size()[2] - x1.size()[2] 90 | diffX = x2.size()[3] - x1.size()[3] 91 | 92 | x1 = F.pad(x1, (diffX // 2, diffX - diffX // 2, 93 | diffY // 2, diffY - diffY // 2)) 94 | 95 | # for padding issues, see 96 | # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a 97 | # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd 98 | 99 | x = torch.cat([x2, x1], dim=1) 100 | return x 101 | 102 | def forward(self, x): 103 | # encoding 104 | x1 = self.firstconv(x) 105 | x1 = self.firstbn(x1) 106 | x1 = self.firstrelu(x1) 107 | x1 = self.firstmaxpool(x1) 108 | 109 | x2 = self.encoder1(x1) 110 | x3 = self.encoder2(x2) 111 | x4 = self.encoder3(x3) 112 | 113 | # decoding 114 | x = self.upconv4(x4) 115 | if not self.not_concat: 116 | x = self.skipconnect(x3, x) 117 | x = self.iconv4(x) 118 | 119 | x = self.upconv3(x) 120 | if not self.not_concat: 121 | x = self.skipconnect(x2, x) 122 | x = self.iconv3(x) 123 | 124 | x = self.outconv(x) 125 | return x 126 | -------------------------------------------------------------------------------- /models/preact_resnet.py: -------------------------------------------------------------------------------- 1 | '''Pre-activation ResNet in PyTorch. 2 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/preact_resnet.py 3 | Reference: 4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 5 | Identity Mappings in Deep Residual Networks. arXiv:1603.05027 6 | ''' 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | class PreActBlock(nn.Module): 12 | '''Pre-activation version of the BasicBlock.''' 13 | expansion = 1 14 | 15 | def __init__(self, in_planes, planes, stride=1): 16 | super(PreActBlock, self).__init__() 17 | self.bn1 = nn.BatchNorm2d(in_planes) 18 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 19 | self.bn2 = nn.BatchNorm2d(planes) 20 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 21 | 22 | if stride != 1 or in_planes != self.expansion*planes: 23 | self.shortcut = nn.Sequential( 24 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) 25 | ) 26 | 27 | def forward(self, x): 28 | out = F.relu(self.bn1(x)) 29 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 30 | out = self.conv1(out) 31 | out = self.conv2(F.relu(self.bn2(out))) 32 | out += shortcut 33 | return out 34 | 35 | 36 | class PreActBottleneck(nn.Module): 37 | '''Pre-activation version of the original Bottleneck module.''' 38 | expansion = 4 39 | 40 | def __init__(self, in_planes, planes, stride=1): 41 | super(PreActBottleneck, self).__init__() 42 | self.bn1 = nn.BatchNorm2d(in_planes) 43 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 44 | self.bn2 = nn.BatchNorm2d(planes) 45 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 46 | self.bn3 = nn.BatchNorm2d(planes) 47 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 48 | 49 | if stride != 1 or in_planes != self.expansion*planes: 50 | self.shortcut = nn.Sequential( 51 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) 52 | ) 53 | 54 | def forward(self, x): 55 | out = F.relu(self.bn1(x)) 56 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 57 | out = self.conv1(out) 58 | out = self.conv2(F.relu(self.bn2(out))) 59 | out = self.conv3(F.relu(self.bn3(out))) 60 | out += shortcut 61 | return out 62 | 63 | 64 | class PreActBottleneck_depthwise(nn.Module): 65 | '''Pre-activation version of the original Bottleneck module.''' 66 | expansion = 4 67 | 68 | def __init__(self, in_planes, planes, stride=1): 69 | super(PreActBottleneck_depthwise, self).__init__() 70 | self.bn1 = nn.BatchNorm2d(in_planes) 71 | self.group_num = in_planes if in_planes < planes else planes 72 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False, groups=self.group_num) 73 | self.bn2 = nn.BatchNorm2d(planes) 74 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False, groups=self.group_num) 75 | self.bn3 = nn.BatchNorm2d(planes) 76 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False, groups=self.group_num) 77 | 78 | if stride != 1 or in_planes != self.expansion*planes: 79 | self.shortcut = nn.Sequential( 80 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False, groups=self.group_num) 81 | ) 82 | 83 | def forward(self, x): 84 | out = F.relu(self.bn1(x)) 85 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 86 | out = self.conv1(out) 87 | out = self.conv2(F.relu(self.bn2(out))) 88 | out = self.conv3(F.relu(self.bn3(out))) 89 | out += shortcut 90 | return out 91 | -------------------------------------------------------------------------------- /models/rotationnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuojinCai/ExtremeRotation_code/61edf0dc5ec2c34782437e7960145a419c280236/models/rotationnet/__init__.py -------------------------------------------------------------------------------- /models/rotationnet/rotation_cv_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from trainers.utils.compute_utils import * 5 | from models.preact_resnet import PreActBlock, PreActBottleneck, PreActBottleneck_depthwise 6 | 7 | 8 | class RotationNet(nn.Module): 9 | def __init__(self, cfgmodel): 10 | super(RotationNet, self).__init__() 11 | block_type = [PreActBlock, PreActBottleneck, PreActBottleneck_depthwise] 12 | block = block_type[cfgmodel.block] 13 | num_blocks = [int(x) for x in cfgmodel.num_blocks.strip().split("-")] 14 | if hasattr(cfgmodel, "in_planes"): 15 | self.in_planes = int(cfgmodel.in_planes) 16 | else: 17 | self.in_planes = int(cfgmodel.width) 18 | self.zdim = cfgmodel.zdim 19 | self.out_rotation_mode = cfgmodel.out_rotation_mode 20 | if (self.out_rotation_mode == "Quaternion"): 21 | self.out_dim = 4 22 | elif (self.out_rotation_mode == "ortho6d"): 23 | self.out_dim = 6 24 | elif (self.out_rotation_mode == "ortho5d"): 25 | self.out_dim = 5 26 | elif (self.out_rotation_mode == "axisAngle"): 27 | self.out_dim = 4 28 | elif (self.out_rotation_mode == "euler"): 29 | self.out_dim = 3 30 | elif self.out_rotation_mode == "angle": 31 | self.out_dim = 1 32 | elif self.out_rotation_mode == "distribution": 33 | dist_out = getattr(cfgmodel, "out_dim", 360) 34 | self.out_dim = dist_out 35 | self.layer1 = self._make_layer(block, 128, num_blocks[0], stride=2) 36 | self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2) 37 | self.linear = nn.Linear(64 * block.expansion * int(cfgmodel.width/16) * int(cfgmodel.height/16), self.zdim) 38 | self.linear2 = nn.Linear(self.zdim, self.out_dim) 39 | 40 | def _make_layer(self, block, planes, num_blocks, stride): 41 | strides = [stride] + [1] * (num_blocks - 1) 42 | layers = [] 43 | for stride in strides: 44 | layers.append(block(self.in_planes, planes, stride)) 45 | self.in_planes = planes * block.expansion 46 | return nn.Sequential(*layers) 47 | 48 | def forward(self, x): 49 | batch = x.shape[0] 50 | out = self.layer1(x) 51 | out = F.avg_pool2d(out, 2) 52 | out = self.layer2(out) 53 | out = F.avg_pool2d(out, 2) 54 | out = out.view(out.size(0), -1) 55 | out = self.linear(out) 56 | out_rotation = self.linear2(out) 57 | 58 | if (self.out_rotation_mode == "Quaternion"): 59 | out_r_mat = compute_rotation_matrix_from_quaternion(out_rotation) 60 | elif (self.out_rotation_mode == "ortho6d"): 61 | out_r_mat = compute_rotation_matrix_from_ortho6d(out_rotation) 62 | elif (self.out_rotation_mode == "ortho5d"): 63 | out_r_mat = compute_rotation_matrix_from_ortho5d(out_rotation) 64 | elif (self.out_rotation_mode == "axisAngle"): 65 | out_r_mat = compute_rotation_matrix_from_axisAngle(out_rotation) 66 | elif (self.out_rotation_mode == "euler"): 67 | out_r_mat = compute_rotation_matrix_from_euler(out_rotation) 68 | elif self.out_rotation_mode == "angle": 69 | out_r_mat = compute_rotation_matrix_from_viewpoint(out_rotation, torch.zeros(out_rotation.size(), requires_grad=True).cuda(), 70 | out_rotation.size(0)) 71 | elif self.out_rotation_mode == "distribution": 72 | _, rotation_x = torch.topk(out_rotation, 1, dim=-1) 73 | out_r_mat = compute_rotation_matrix_from_viewpoint(rotation_x.float()/self.out_dim*2*pi, torch.zeros(rotation_x.size(), requires_grad=True).cuda(), 74 | rotation_x.size(0)) 75 | return out_r_mat.cuda(), out_rotation 76 | -------------------------------------------------------------------------------- /models/rotationnet/rotation_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from trainers.utils.compute_utils import * 4 | from models.preact_resnet import PreActBlock, PreActBottleneck 5 | 6 | 7 | class RotationNet(nn.Module): 8 | def __init__(self, cfgmodel): 9 | super(RotationNet, self).__init__() 10 | block_type = [PreActBlock, PreActBottleneck] 11 | block = block_type[cfgmodel.block] 12 | num_blocks = [int(x) for x in cfgmodel.num_blocks.strip().split("-")] 13 | self.zdim = cfgmodel.zdim 14 | if hasattr(cfgmodel, "in_planes"): 15 | self.in_planes = int(cfgmodel.in_planes) 16 | else: 17 | self.in_planes = 2048 18 | self.out_rotation_mode = cfgmodel.out_rotation_mode 19 | if (self.out_rotation_mode == "Quaternion"): 20 | self.out_dim = 4 21 | elif (self.out_rotation_mode == "ortho6d"): 22 | self.out_dim = 6 23 | elif (self.out_rotation_mode == "ortho5d"): 24 | self.out_dim = 5 25 | elif (self.out_rotation_mode == "axisAngle"): 26 | self.out_dim = 4 27 | elif (self.out_rotation_mode == "euler"): 28 | self.out_dim = 3 29 | elif self.out_rotation_mode == "angle": 30 | self.out_dim = 1 31 | elif self.out_rotation_mode == "distribution": 32 | dist_out = getattr(cfgmodel, "out_dim", 360) 33 | 34 | self.layer1 = self._make_layer(block, 512, num_blocks[0], stride=2) 35 | self.layer2 = self._make_layer(block, 512, num_blocks[1], stride=2) 36 | self.linear = nn.Linear(512 * block.expansion, self.zdim) 37 | self.linear2 = nn.Linear(self.zdim, self.out_dim) 38 | 39 | def _make_layer(self, block, planes, num_blocks, stride): 40 | strides = [stride] + [1] * (num_blocks - 1) 41 | layers = [] 42 | for stride in strides: 43 | layers.append(block(self.in_planes, planes, stride)) 44 | self.in_planes = planes * block.expansion 45 | return nn.Sequential(*layers) 46 | 47 | def forward(self, x): 48 | batch = x.shape[0] 49 | out = self.layer1(x) 50 | out = self.layer2(out) 51 | out = out.view(out.size(0), -1) 52 | out = self.linear(out) 53 | out_rotation = self.linear2(out) 54 | 55 | if (self.out_rotation_mode == "Quaternion"): 56 | out_r_mat = compute_rotation_matrix_from_quaternion(out_rotation) 57 | elif (self.out_rotation_mode == "ortho6d"): 58 | out_r_mat = compute_rotation_matrix_from_ortho6d(out_rotation) 59 | elif (self.out_rotation_mode == "ortho5d"): 60 | out_r_mat = compute_rotation_matrix_from_ortho5d(out_rotation) 61 | elif (self.out_rotation_mode == "axisAngle"): 62 | out_r_mat = compute_rotation_matrix_from_axisAngle(out_rotation) 63 | elif (self.out_rotation_mode == "euler"): 64 | out_r_mat = compute_rotation_matrix_from_euler(out_rotation) 65 | elif self.out_rotation_mode == "angle": 66 | out_r_mat = compute_rotation_matrix_from_viewpoint(out_rotation, torch.zeros(out_rotation.size(), requires_grad=True).cuda(), 67 | out_rotation.size(0)) 68 | elif self.out_rotation_mode == "distribution": 69 | _, rotation_x = torch.topk(out_rotation, 1, dim=-1) 70 | out_r_mat = compute_rotation_matrix_from_viewpoint(rotation_x.float()/self.out_dim*2*pi, torch.zeros(rotation_x.size(), requires_grad=True).cuda(), 71 | rotation_x.size(0)) 72 | return out_r_mat.cuda(), out_rotation 73 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import time 4 | import torch 5 | import argparse 6 | import importlib 7 | import torch.distributed 8 | from torch.backends import cudnn 9 | from shutil import copy2 10 | from pprint import pprint 11 | from tensorboardX import SummaryWriter 12 | import re 13 | 14 | 15 | def get_args(): 16 | # command line args 17 | parser = argparse.ArgumentParser(description='Test') 18 | parser.add_argument('config', type=str, 19 | help='The configuration file.') 20 | 21 | # distributed training 22 | parser.add_argument('--world_size', default=1, type=int, 23 | help='Number of distributed nodes.') 24 | parser.add_argument('--dist_url', default='tcp://127.0.0.1:9991', type=str, 25 | help='url used to set up distributed training') 26 | parser.add_argument('--dist_backend', default='nccl', type=str, 27 | help='distributed backend') 28 | parser.add_argument('--distributed', action='store_true', 29 | help='Use multi-processing distributed training to ' 30 | 'launch N processes per node, which has N GPUs. ' 31 | 'This is the fastest way to use PyTorch for ' 32 | 'either single node or multi node data parallel ' 33 | 'training') 34 | parser.add_argument('--rank', default=0, type=int, 35 | help='node rank for distributed training') 36 | parser.add_argument('--gpu', default=None, type=int, 37 | help='GPU id to use. None means using all ' 38 | 'available GPUs.') 39 | 40 | # Resume: 41 | parser.add_argument('--pretrained', default=None, type=str, 42 | help="Pretrained cehckpoint") 43 | parser.add_argument('--val_angle', default=False, action='store_true', 44 | help="Evaluate yaw and pitch error") 45 | args = parser.parse_args() 46 | 47 | def dict2namespace(config): 48 | namespace = argparse.Namespace() 49 | for key, value in config.items(): 50 | if isinstance(value, dict): 51 | new_value = dict2namespace(value) 52 | else: 53 | new_value = value 54 | setattr(namespace, key, new_value) 55 | return namespace 56 | 57 | # parse config file 58 | 59 | with open(args.config, 'r') as f: 60 | config = yaml.load(f) 61 | config = dict2namespace(config) 62 | 63 | # # Create log_name 64 | cfg_file_name = os.path.splitext(os.path.basename(args.config))[0] 65 | run_time = time.strftime('%Y-%b-%d-%H-%M-%S') 66 | # Currently save dir and log_dir are the same 67 | config.log_name = "val_logs/%s_val_%s" % (cfg_file_name, run_time) 68 | config.save_dir = "val_logs/%s_val_%s" % (cfg_file_name, run_time) 69 | config.log_dir = "val_logs/%s_val_%s" % (cfg_file_name, run_time) 70 | os.makedirs(config.log_dir + '/config') 71 | copy2(args.config, config.log_dir + '/config') 72 | return args, config 73 | 74 | 75 | def main_worker(gpu, ngpus_per_node, cfg, args): 76 | # basic setup 77 | cudnn.benchmark = True 78 | writer = SummaryWriter(logdir=cfg.log_name) 79 | 80 | data_lib = importlib.import_module(cfg.data.type) 81 | loaders = data_lib.get_data_loaders(cfg.data) 82 | test_loader = loaders['test_loader'] 83 | trainer_lib = importlib.import_module(cfg.trainer.type) 84 | trainer = trainer_lib.Trainer(cfg, args) 85 | trainer.resume(args.pretrained, test=True) 86 | val_info = trainer.validate(test_loader, epoch=-1, val_angle=args.val_angle) 87 | trainer.log_val(val_info, writer=writer, step=-1) 88 | print("Test done:") 89 | writer.close() 90 | 91 | 92 | def main(): 93 | # command line args 94 | args, cfg = get_args() 95 | 96 | print("Arguments:") 97 | print(args) 98 | 99 | print("Configuration:") 100 | print(cfg) 101 | 102 | ngpus_per_node = torch.cuda.device_count() 103 | main_worker(args.gpu, ngpus_per_node, cfg, args) 104 | 105 | 106 | if __name__ == '__main__': 107 | main() 108 | -------------------------------------------------------------------------------- /tools/environment.yml: -------------------------------------------------------------------------------- 1 | name: rota 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - _pytorch_select=0.2=gpu_0 7 | - blas=1.0=mkl 8 | - ca-certificates=2020.1.1=0 9 | - certifi=2020.4.5.1=py36_0 10 | - cffi=1.14.0=py36h2e261b9_0 11 | - cudatoolkit=10.0.130=0 12 | - cudnn=7.6.5=cuda10.0_0 13 | - cycler=0.10.0=py36_0 14 | - dbus=1.13.12=h746ee38_0 15 | - expat=2.2.6=he6710b0_0 16 | - fontconfig=2.13.0=h9420a91_0 17 | - freetype=2.9.1=h8a8886c_1 18 | - glib=2.63.1=h5a9c865_0 19 | - gst-plugins-base=1.14.0=hbbd80ab_1 20 | - gstreamer=1.14.0=hb453b48_1 21 | - icu=58.2=h9c2bf20_1 22 | - imageio=2.8.0=py_0 23 | - intel-openmp=2020.0=166 24 | - joblib=0.14.1=py_0 25 | - jpeg=9b=h024ee3a_2 26 | - kiwisolver=1.1.0=py36he6710b0_0 27 | - ld_impl_linux-64=2.33.1=h53a641e_7 28 | - libedit=3.1.20181209=hc058e9b_0 29 | - libffi=3.2.1=hd88cf55_4 30 | - libgcc-ng=9.1.0=hdf63c60_0 31 | - libgfortran-ng=7.3.0=hdf63c60_0 32 | - libpng=1.6.37=hbc83047_0 33 | - libstdcxx-ng=9.1.0=hdf63c60_0 34 | - libtiff=4.1.0=h2733197_0 35 | - libuuid=1.0.3=h1bed415_2 36 | - libxcb=1.13=h1bed415_1 37 | - libxml2=2.9.9=hea5a465_1 38 | - matplotlib=3.1.3=py36_0 39 | - matplotlib-base=3.1.3=py36hef1b27d_0 40 | - mkl=2020.0=166 41 | - mkl-service=2.3.0=py36he904b0f_0 42 | - mkl_fft=1.0.15=py36ha843d7b_0 43 | - mkl_random=1.1.0=py36hd6b4f25_0 44 | - ncurses=6.2=he6710b0_0 45 | - ninja=1.9.0=py36hfd86e86_0 46 | - numpy=1.18.1=py36h4f9e942_0 47 | - numpy-base=1.18.1=py36hde5b4d6_1 48 | - olefile=0.46=py36_0 49 | - openssl=1.1.1f=h7b6447c_0 50 | - pcre=8.43=he6710b0_0 51 | - pillow=6.1.0=py36h34e0f95_0 52 | - pip=20.0.2=py36_1 53 | - pycparser=2.20=py_0 54 | - pyparsing=2.4.6=py_0 55 | - pyqt=5.9.2=py36h05f1152_2 56 | - python=3.6.10=hcf32534_1 57 | - python-dateutil=2.8.1=py_0 58 | - pytorch=1.3.1=cuda100py36h53c1284_0 59 | - qt=5.9.7=h5867ecd_1 60 | - readline=8.0=h7b6447c_0 61 | - scikit-learn=0.22.1=py36hd81dba3_0 62 | - scipy=1.4.1=py36h0b6359f_0 63 | - setuptools=46.1.3=py36_0 64 | - sip=4.19.8=py36hf484d3e_0 65 | - six=1.14.0=py36_0 66 | - sqlite=3.31.1=h7b6447c_0 67 | - tk=8.6.8=hbc83047_0 68 | - torchvision=0.4.2=cuda100py36hecfc37a_0 69 | - tornado=6.0.4=py36h7b6447c_1 70 | - tqdm=4.44.1=py_0 71 | - wheel=0.34.2=py36_0 72 | - xz=5.2.5=h7b6447c_0 73 | - zlib=1.2.11=h7b6447c_3 74 | - zstd=1.3.7=h0b5b093_0 75 | - pip: 76 | - absl-py==0.9.0 77 | - astor==0.8.1 78 | - gast==0.3.3 79 | - grpcio==1.28.1 80 | - h5py==2.10.0 81 | - keras-applications==1.0.8 82 | - keras-preprocessing==1.1.0 83 | - markdown==3.2.1 84 | - mock==4.0.2 85 | - plyfile==0.7.2 86 | - protobuf==3.11.3 87 | - pyyaml==5.3.1 88 | - tensorboard==1.13.1 89 | - tensorboardx==1.7 90 | - tensorflow-estimator==1.13.0 91 | - tensorflow-gpu==1.13.1 92 | - termcolor==1.1.0 93 | - torchdiffeq==0.0.1 94 | - werkzeug==1.0.1 95 | 96 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import time 4 | import torch 5 | import argparse 6 | import importlib 7 | import torch.distributed 8 | from torch.backends import cudnn 9 | from tensorboardX import SummaryWriter 10 | from shutil import copy2 11 | 12 | 13 | def get_args(): 14 | # command line args 15 | parser = argparse.ArgumentParser( 16 | description='Flow-based Point Cloud Generation Experiment') 17 | parser.add_argument('config', type=str, 18 | help='The configuration file.') 19 | 20 | # distributed training 21 | parser.add_argument('--world_size', default=1, type=int, 22 | help='Number of distributed nodes.') 23 | parser.add_argument('--dist_url', default='tcp://127.0.0.1:9991', type=str, 24 | help='url used to set up distributed training') 25 | parser.add_argument('--dist_backend', default='nccl', type=str, 26 | help='distributed backend') 27 | parser.add_argument('--distributed', action='store_true', 28 | help='Use multi-processing distributed training to ' 29 | 'launch N processes per node, which has N GPUs. ' 30 | 'This is the fastest way to use PyTorch for ' 31 | 'either single node or multi node data parallel ' 32 | 'training') 33 | parser.add_argument('--rank', default=0, type=int, 34 | help='node rank for distributed training') 35 | parser.add_argument('--gpu', default=None, type=int, 36 | help='GPU id to use. None means using all ' 37 | 'available GPUs.') 38 | 39 | # Resume: 40 | parser.add_argument('--resume', default=False, action='store_true') 41 | parser.add_argument('--pretrained', default=None, type=str, 42 | help="Pretrained cehckpoint") 43 | 44 | # Test run: 45 | parser.add_argument('--test_run', default=False, action='store_true') 46 | args = parser.parse_args() 47 | 48 | def dict2namespace(config): 49 | namespace = argparse.Namespace() 50 | for key, value in config.items(): 51 | if isinstance(value, dict): 52 | new_value = dict2namespace(value) 53 | else: 54 | new_value = value 55 | setattr(namespace, key, new_value) 56 | return namespace 57 | 58 | # parse config file 59 | with open(args.config, 'r') as f: 60 | config = yaml.load(f) 61 | config = dict2namespace(config) 62 | 63 | # Create log_name 64 | cfg_file_name = os.path.splitext(os.path.basename(args.config))[0] 65 | run_time = time.strftime('%Y-%b-%d-%H-%M-%S') 66 | # Currently save dir and log_dir are the same 67 | config.log_name = "logs/%s_%s" % (cfg_file_name, run_time) 68 | config.save_dir = "logs/%s_%s" % (cfg_file_name, run_time) 69 | config.log_dir = "logs/%s_%s" % (cfg_file_name, run_time) 70 | os.makedirs(config.log_dir+'/config') 71 | copy2(args.config, config.log_dir+'/config') 72 | return args, config 73 | 74 | 75 | def main_worker(gpu, ngpus_per_node, cfg, args): 76 | # basic setup 77 | cudnn.benchmark = True 78 | 79 | writer = SummaryWriter(logdir=cfg.log_name) 80 | data_lib = importlib.import_module(cfg.data.type) 81 | loaders = data_lib.get_data_loaders(cfg.data) 82 | train_loader = loaders['train_loader'] 83 | test_loader = loaders['test_loader'] 84 | trainer_lib = importlib.import_module(cfg.trainer.type) 85 | trainer = trainer_lib.Trainer(cfg, args) 86 | 87 | start_epoch = 0 88 | start_time = time.time() 89 | 90 | if args.resume: 91 | if args.pretrained is not None: 92 | start_epoch = trainer.resume(args.pretrained) + 1 93 | else: 94 | start_epoch = trainer.resume(cfg.resume.dir) + 1 95 | 96 | # If test run, go through the validation loop first 97 | if args.test_run: 98 | trainer.save(epoch=-1, step=-1) 99 | val_info = trainer.validate(test_loader, epoch=-1) 100 | trainer.log_val(val_info, writer=writer, epoch=-1) 101 | 102 | # main training loop 103 | print("Start epoch: %d End epoch: %d" % (start_epoch, cfg.trainer.epochs)) 104 | step = 0 105 | for epoch in range(start_epoch, cfg.trainer.epochs): 106 | # train for one epoch 107 | for bidx, data in enumerate(train_loader): 108 | step = bidx + len(train_loader) * epoch + 1 109 | logs_info = trainer.update(data) 110 | if step % int(cfg.viz.log_freq) == 0: 111 | duration = time.time() - start_time 112 | start_time = time.time() 113 | print("Epoch %d Batch [%2d/%2d] Time [%3.2fs] Loss %2.5f" 114 | % (epoch, bidx, len(train_loader), duration, 115 | logs_info['loss'])) 116 | visualize = step % int(cfg.viz.viz_freq) == 0 117 | trainer.log_train( 118 | logs_info, data, 119 | writer=writer, epoch=epoch, step=step, visualize=visualize) 120 | 121 | # Save first so that even if the visualization bugged, 122 | # we still have something 123 | #if (step + 1) % int(cfg.viz.save_freq) == 0: 124 | # trainer.save(epoch=epoch, step=step) 125 | 126 | if (step + 1) % int(cfg.viz.val_freq) == 0: 127 | val_info = trainer.validate(test_loader, epoch=epoch) 128 | trainer.log_val(val_info, writer=writer, epoch=epoch, step=step) 129 | trainer.save(epoch=epoch, step=step) 130 | val_info = trainer.validate(test_loader, epoch=epoch) 131 | trainer.log_val(val_info, writer=writer, epoch=epoch, step=step) 132 | 133 | # Signal the trainer to cleanup now that an epoch has ended 134 | trainer.epoch_end(epoch, writer=writer) 135 | writer.close() 136 | 137 | def main(): 138 | # command line args 139 | args, cfg = get_args() 140 | 141 | print("Arguments:") 142 | print(args) 143 | 144 | print("Configuration:") 145 | print(cfg) 146 | 147 | ngpus_per_node = torch.cuda.device_count() 148 | main_worker(args.gpu, ngpus_per_node, cfg, args) 149 | 150 | 151 | if __name__ == '__main__': 152 | main() 153 | -------------------------------------------------------------------------------- /trainers/__pycache__/base_trainer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuojinCai/ExtremeRotation_code/61edf0dc5ec2c34782437e7960145a419c280236/trainers/__pycache__/base_trainer.cpython-36.pyc -------------------------------------------------------------------------------- /trainers/__pycache__/rotation_trainer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuojinCai/ExtremeRotation_code/61edf0dc5ec2c34782437e7960145a419c280236/trainers/__pycache__/rotation_trainer.cpython-36.pyc -------------------------------------------------------------------------------- /trainers/base_trainer.py: -------------------------------------------------------------------------------- 1 | 2 | class BaseTrainer(): 3 | 4 | def __init__(self, cfg, args): 5 | pass 6 | 7 | def update(self, data): 8 | raise NotImplementedError("Trainer [update] not implemented.") 9 | 10 | def epoch_end(self, epoch, writer=None, **kwargs): 11 | # Signal now that the epoch ends.... 12 | pass 13 | 14 | def multi_gpu_wrapper(self, wrapper): 15 | raise NotImplementedError("Trainer [multi_gpu_wrapper] not implemented.") 16 | 17 | def log_train(self, train_info, train_data, 18 | writer=None, step=None, epoch=None, visualize=False): 19 | raise NotImplementedError("Trainer [log_train] not implemented.") 20 | 21 | def validate(self, test_loader): 22 | raise NotImplementedError("Trainer [validate] not implemented.") 23 | 24 | def log_val(self, val_info, writer=None, step=None, epoch=None): 25 | if writer is not None: 26 | for k, v in val_info.items(): 27 | if step is not None: 28 | writer.add_scalar(k, v, step) 29 | else: 30 | writer.add_scalar(k, v, epoch) 31 | 32 | def save(self, epoch=None, step=None, appendix=None): 33 | raise NotImplementedError("Trainer [save] not implemented.") 34 | 35 | def resume(self, path, strict=True, **args): 36 | raise NotImplementedError("Trainer [resume] not implemented.") 37 | 38 | def test(self, opt, *arg, **kwargs): 39 | raise NotImplementedError("Trainer [test] not implemented.") 40 | -------------------------------------------------------------------------------- /trainers/rotation_trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tqdm 3 | import torch 4 | import importlib 5 | import numpy as np 6 | from trainers.base_trainer import BaseTrainer 7 | from trainers.utils.loss_utils import * 8 | from evaluation.evaluation_metrics import * 9 | 10 | 11 | class Trainer(BaseTrainer): 12 | def __init__(self, cfg, args): 13 | self.cfg = cfg 14 | self.args = args 15 | 16 | encoder_lib = importlib.import_module(cfg.models.encoder.type) 17 | self.encoder = encoder_lib.ImageEncoder(cfg.models.encoder) 18 | self.encoder.cuda() 19 | print("Encoder:") 20 | print(self.encoder) 21 | 22 | dn_lib = importlib.import_module(cfg.models.rotationnet.type) 23 | self.rotation_net = dn_lib.RotationNet(cfg.models.rotationnet) 24 | self.rotation_net.cuda() 25 | print("rotationnet:") 26 | print(self.rotation_net) 27 | 28 | dn_lib_y = importlib.import_module(cfg.models.rotationnet_y.type) 29 | self.rotation_net_y = dn_lib_y.RotationNet(cfg.models.rotationnet_y) 30 | self.rotation_net_y.cuda() 31 | print("rotationnet_y:") 32 | print(self.rotation_net_y) 33 | 34 | dn_lib_z = importlib.import_module(cfg.models.rotationnet_z.type) 35 | self.rotation_net_z = dn_lib_z.RotationNet(cfg.models.rotationnet_z) 36 | self.rotation_net_z.cuda() 37 | print("rotationnet_z:") 38 | print(self.rotation_net_z) 39 | 40 | # The optimizer 41 | if not (hasattr(self.cfg.trainer, "opt_enc") and 42 | hasattr(self.cfg.trainer, "opt_dn")): 43 | self.cfg.trainer.opt_enc = self.cfg.trainer.opt 44 | self.cfg.trainer.opt_dn = self.cfg.trainer.opt 45 | 46 | if getattr(self.cfg.trainer.opt_enc, "scheduler", None) is not None: 47 | self.opt_enc, self.scheduler_enc = get_opt( 48 | self.encoder.parameters(), self.cfg.trainer.opt_enc) 49 | else: 50 | self.opt_enc = get_opt( 51 | self.encoder.parameters(), self.cfg.trainer.opt_enc) 52 | self.scheduler_enc = None 53 | 54 | if getattr(self.cfg.trainer.opt_dn, "scheduler", None) is not None: 55 | self.opt_dn, self.scheduler_dn = get_opt( 56 | list(self.rotation_net.parameters()) + list(self.rotation_net_y.parameters()) + 57 | list(self.rotation_net_z.parameters()), self.cfg.trainer.opt_dn) 58 | else: 59 | self.opt_dn = get_opt( 60 | list(self.rotation_net.parameters()) + list(self.rotation_net_y.parameters()) + 61 | list(self.rotation_net_z.parameters()), self.cfg.trainer.opt_dn) 62 | self.scheduler_dn = None 63 | 64 | self.classification = getattr(self.cfg.trainer, "classification", True) 65 | self.pairwise_type = getattr(self.cfg.trainer, "pairwise_type", "concat") 66 | self.rotation_parameterization = getattr(self.cfg.trainer, "rotation_parameterization", True) 67 | 68 | # Prepare save directory 69 | os.makedirs(os.path.join(cfg.save_dir, "checkpoints"), exist_ok=True) 70 | 71 | def epoch_end(self, epoch, writer=None): 72 | if self.scheduler_dn is not None: 73 | self.scheduler_dn.step(epoch=epoch) 74 | if writer is not None: 75 | writer.add_scalar( 76 | 'train/opt_dn_lr', self.scheduler_dn.get_lr()[0], epoch) 77 | if self.scheduler_enc is not None: 78 | self.scheduler_enc.step(epoch=epoch) 79 | if writer is not None: 80 | writer.add_scalar( 81 | 'train/opt_enc_lr', self.scheduler_enc.get_lr()[0], epoch) 82 | 83 | def update(self, data_full, no_update=False): 84 | img1 = data_full['img1'].cuda() 85 | img2 = data_full['img2'].cuda() 86 | rotation_x1 = data_full['rotation_x1'] 87 | rotation_y1 = data_full['rotation_y1'] 88 | rotation_x2 = data_full['rotation_x2'] 89 | rotation_y2 = data_full['rotation_y2'] 90 | if not no_update: 91 | self.encoder.train() 92 | self.rotation_net.train() 93 | self.rotation_net_y.train() 94 | self.rotation_net_z.train() 95 | self.opt_enc.zero_grad() 96 | self.opt_dn.zero_grad() 97 | 98 | batch_size = img1.size(0) 99 | gt_rmat = compute_gt_rmat(rotation_x1, rotation_y1, rotation_x2, rotation_y2, batch_size) 100 | if self.rotation_parameterization: 101 | angle_x, angle_y, angle_z = compute_angle(rotation_x1, rotation_x2, rotation_y1, rotation_y2) 102 | else: 103 | angle_x, angle_y, angle_z = compute_euler_angles_from_rotation_matrices(gt_rmat) 104 | 105 | image_feature_map1 = self.encoder(img1) 106 | image_feature_map2 = self.encoder(img2) 107 | 108 | # pairwise operation 109 | if self.pairwise_type == "concat": 110 | pairwise_feature = torch.cat([image_feature_map1, image_feature_map2], dim=1) 111 | elif self.pairwise_type == "cost_volume": 112 | pairwise_feature = compute_correlation_volume_pairwise(image_feature_map1, image_feature_map2, num_levels=1) 113 | elif self.pairwise_type == "correlation_volume": 114 | pairwise_feature = compute_correlation_volume_pairwise(image_feature_map1, image_feature_map2, num_levels=4) 115 | 116 | # loss type 117 | if not self.classification: 118 | # regression loss 119 | out_rmat, out_rotation = self.rotation_net(pairwise_feature) 120 | res1 = rotation_loss_reg(out_rmat, gt_rmat) 121 | loss = res1['loss'] 122 | else: 123 | # classification loss 124 | _, out_rotation_x = self.rotation_net(pairwise_feature) 125 | _, out_rotation_y = self.rotation_net_y(pairwise_feature) 126 | _, out_rotation_z = self.rotation_net_z(pairwise_feature) 127 | _, rotation_x = torch.topk(out_rotation_x, 1, dim=-1) 128 | _, rotation_y = torch.topk(out_rotation_y, 1, dim=-1) 129 | _, rotation_z = torch.topk(out_rotation_z, 1, dim=-1) 130 | loss_x = rotation_loss_class(out_rotation_x, angle_x) 131 | loss_y = rotation_loss_class(out_rotation_y, angle_y) 132 | loss_z = rotation_loss_class(out_rotation_z, angle_z) 133 | 134 | loss = loss_x + loss_y + loss_z 135 | res1 = {"loss": loss, "loss_x": loss_x, "loss_y": loss_y, "loss_z": loss_z} 136 | 137 | if not no_update: 138 | loss.backward() 139 | self.opt_enc.step() 140 | self.opt_dn.step() 141 | else: 142 | self.opt_enc.zero_grad() 143 | self.opt_dn.zero_grad() 144 | train_info = {} 145 | train_info.update(res1) 146 | train_info.update({"loss": loss}) 147 | return train_info 148 | 149 | def log_train(self, train_info, train_data, writer=None, 150 | step=None, epoch=None, visualize=False): 151 | if writer is not None: 152 | for k, v in train_info.items(): 153 | if not ('loss' in k) and not ('Error' in k): 154 | continue 155 | if step is not None: 156 | writer.add_scalar('train/' + k, v, step) 157 | else: 158 | assert epoch is not None 159 | writer.add_scalar('train/' + k, v, epoch) 160 | 161 | def validate(self, test_loader, epoch, val_angle=False): 162 | print("Validation") 163 | out_rmat_array = None 164 | gt_rmat_array = None 165 | gt_rmat1_array = None 166 | out_rmat1_array = None 167 | all_res = {} 168 | 169 | with torch.no_grad(): 170 | self.encoder.eval() 171 | self.rotation_net.eval() 172 | self.rotation_net_y.eval() 173 | self.rotation_net_z.eval() 174 | for data_full in tqdm.tqdm(test_loader): 175 | img1 = data_full['img1'].cuda() 176 | img2 = data_full['img2'].cuda() 177 | rotation_x1 = data_full['rotation_x1'] 178 | rotation_y1 = data_full['rotation_y1'] 179 | rotation_x2 = data_full['rotation_x2'] 180 | rotation_y2 = data_full['rotation_y2'] 181 | 182 | batch_size = img1.size(0) 183 | gt_rmat = compute_gt_rmat(rotation_x1, rotation_y1, rotation_x2, rotation_y2, batch_size) 184 | if self.rotation_parameterization: 185 | angle_x, angle_y, angle_z = compute_angle(rotation_x1, rotation_x2, rotation_y1, rotation_y2) 186 | else: 187 | angle_x, angle_y, angle_z = compute_euler_angles_from_rotation_matrices(gt_rmat) 188 | 189 | image_feature_map1 = self.encoder(img1) 190 | image_feature_map2 = self.encoder(img2) 191 | 192 | if self.pairwise_type == "concat": 193 | pairwise_feature = torch.cat([image_feature_map1, image_feature_map2], dim=1) 194 | elif self.pairwise_type == "cost_volume": 195 | pairwise_feature = compute_correlation_volume_pairwise(image_feature_map1, image_feature_map2, 196 | num_levels=1) 197 | elif self.pairwise_type == "correlation_volume": 198 | pairwise_feature = compute_correlation_volume_pairwise(image_feature_map1, image_feature_map2, 199 | num_levels=4) 200 | 201 | if not self.classification: 202 | out_rmat, _ = self.rotation_net(pairwise_feature) 203 | out_rmat1 = None 204 | else: 205 | _, out_rotation_x = self.rotation_net(pairwise_feature) 206 | _, out_rotation_y = self.rotation_net_y(pairwise_feature) 207 | _, out_rotation_z = self.rotation_net_z(pairwise_feature) 208 | if self.rotation_parameterization: 209 | out_rmat, out_rmat1 = compute_out_rmat(out_rotation_x, out_rotation_y, out_rotation_z, batch_size) 210 | else: 211 | out_rmat, out_rmat1 = compute_out_rmat_from_euler(out_rotation_x, out_rotation_y, out_rotation_z, batch_size) 212 | 213 | if gt_rmat_array is None: 214 | gt_rmat_array = gt_rmat 215 | else: 216 | gt_rmat_array = torch.cat((gt_rmat_array, gt_rmat)) 217 | if out_rmat_array is None: 218 | out_rmat_array = out_rmat 219 | else: 220 | out_rmat_array = torch.cat((out_rmat_array, out_rmat)) 221 | if val_angle: 222 | gt_rmat1 = compute_rotation_matrix_from_viewpoint(rotation_x1, rotation_y1, batch_size).view(batch_size, 3, 3).cuda() 223 | if gt_rmat1_array is None: 224 | gt_rmat1_array = gt_rmat1 225 | else: 226 | gt_rmat1_array = torch.cat((gt_rmat1_array, gt_rmat1)) 227 | if out_rmat1_array is None: 228 | out_rmat1_array = out_rmat1 229 | else: 230 | out_rmat1_array = torch.cat((out_rmat1_array, out_rmat1)) 231 | 232 | res_error = evaluation_metric_rotation(out_rmat_array, gt_rmat_array) 233 | if val_angle: 234 | angle_error = evaluation_metric_rotation_angle(out_rmat_array, gt_rmat_array, gt_rmat1_array, out_rmat1_array) 235 | res_error.update(angle_error) 236 | 237 | # mean, median, max, std, 10deg 238 | for k, v in res_error.items(): 239 | v = v.view(-1).detach().cpu().numpy() 240 | if k == "gt_angle" or v.size == 0: 241 | continue 242 | mean = np.mean(v) 243 | median = np.median(v) 244 | error_max = np.max(v) 245 | std = np.std(v) 246 | count_10 = (v <= 10).sum(axis=0) 247 | percent_10 = np.true_divide(count_10, v.shape[0]) 248 | all_res.update({k + '/mean': mean, k + '/median': median, k + '/max': error_max, k + '/std': std, 249 | k + '/10deg': percent_10}) 250 | print("Validation Epoch:%d " % epoch, all_res) 251 | return all_res 252 | 253 | def log_val(self, val_info, writer=None, step=None, epoch=None): 254 | if writer is not None: 255 | for k, v in val_info.items(): 256 | if step is not None: 257 | if 'vis' in k: 258 | writer.add_image(k, v, step) 259 | else: 260 | writer.add_scalar(k, v, step) 261 | else: 262 | if 'vis' in k: 263 | writer.add_image(k, v, epoch) 264 | else: 265 | writer.add_scalar(k, v, epoch) 266 | 267 | def save(self, epoch=None, step=None, appendix=None): 268 | d = { 269 | 'opt_enc': self.opt_enc.state_dict(), 270 | 'opt_dn': self.opt_dn.state_dict(), 271 | 'dn': self.rotation_net.state_dict(), 272 | 'dny': self.rotation_net_y.state_dict(), 273 | 'dnz': self.rotation_net_z.state_dict(), 274 | 'enc': self.encoder.state_dict(), 275 | 'epoch': epoch, 276 | 'step': step 277 | } 278 | if appendix is not None: 279 | d.update(appendix) 280 | #save_name = "epoch_%s_iters_%s.pt" % (epoch, step) 281 | save_name = "epoch_%s.pt" % (epoch) 282 | path = os.path.join(self.cfg.save_dir, "checkpoints", save_name) 283 | torch.save(d, path) 284 | remove_name = "epoch_%s.pt" % (epoch-1) 285 | remove_path = os.path.join(self.cfg.save_dir, "checkpoints", remove_name) 286 | if os.path.exists(remove_path): 287 | os.remove(remove_path) 288 | 289 | def resume(self, path, strict=True, resume_encoder=False, test=False, **args): 290 | ckpt = torch.load(path) 291 | self.encoder.load_state_dict(ckpt['enc'], strict=strict) 292 | if not resume_encoder: 293 | self.rotation_net.load_state_dict(ckpt['dn'], strict=strict) 294 | self.rotation_net_y.load_state_dict(ckpt['dny'], strict=strict) 295 | self.rotation_net_z.load_state_dict(ckpt['dnz'], strict=strict) 296 | if not test: 297 | self.opt_enc.load_state_dict(ckpt['opt_enc']) 298 | self.opt_dn.load_state_dict(ckpt['opt_dn']) 299 | start_epoch = ckpt['epoch'] 300 | else: 301 | start_epoch = 0 302 | return start_epoch 303 | 304 | def test(self, opt, *arg, **kwargs): 305 | raise NotImplementedError("Trainer [test] not implemented.") 306 | -------------------------------------------------------------------------------- /trainers/utils/__pycache__/compute_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuojinCai/ExtremeRotation_code/61edf0dc5ec2c34782437e7960145a419c280236/trainers/utils/__pycache__/compute_utils.cpython-36.pyc -------------------------------------------------------------------------------- /trainers/utils/__pycache__/loss_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuojinCai/ExtremeRotation_code/61edf0dc5ec2c34782437e7960145a419c280236/trainers/utils/__pycache__/loss_utils.cpython-36.pyc -------------------------------------------------------------------------------- /trainers/utils/compute_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from math import pi 4 | import numpy as np 5 | 6 | def compute_gt_rmat(rotation_x1, rotation_y1, rotation_x2, rotation_y2, batch_size): 7 | gt_mtx1 = compute_rotation_matrix_from_viewpoint(rotation_x1, rotation_y1, batch_size).view(batch_size, 3, 3) 8 | gt_mtx2 = compute_rotation_matrix_from_viewpoint(rotation_x2, rotation_y2, batch_size).view(batch_size, 3, 3) 9 | gt_rmat_matrix = compute_rotation_matrix_from_two_matrices(gt_mtx2, gt_mtx1).view(batch_size, 3, 3) 10 | return gt_rmat_matrix.cuda() 11 | 12 | 13 | def compute_angle(rotation_x1, rotation_x2, rotation_y1, rotation_y2): 14 | delta_x = rotation_x2 - rotation_x1 15 | delta_x[delta_x >= pi] -= 2 * pi 16 | delta_x[delta_x < -pi] += 2 * pi 17 | delta_y = rotation_y1 18 | delta_y[delta_y >= pi] -= 2 * pi 19 | delta_y[delta_y < -pi] += 2 * pi 20 | delta_z = rotation_y2 21 | delta_z[delta_z >= pi] -= 2 * pi 22 | delta_z[delta_z < -pi] += 2 * pi 23 | return delta_x, delta_y, delta_z 24 | 25 | def compute_out_rmat(out_rotation_x, out_rotation_y, out_rotation_z, batch_size): 26 | if out_rotation_x.size(-1) == 1: 27 | rt1 = compute_rotation_matrix_from_viewpoint(torch.zeros(out_rotation_x.size()).to(out_rotation_x), 28 | out_rotation_y.float(), 29 | batch_size).view(batch_size, 3, 3) 30 | rt2 = compute_rotation_matrix_from_viewpoint(out_rotation_x.float(), 31 | out_rotation_z.float(), 32 | batch_size).view(batch_size, 3, 3) 33 | else: 34 | _, rotation_x = torch.topk(out_rotation_x, 1, dim=-1) 35 | _, rotation_y = torch.topk(out_rotation_y, 1, dim=-1) 36 | _, rotation_z = torch.topk(out_rotation_z, 1, dim=-1) 37 | rt1 = compute_rotation_matrix_from_viewpoint(torch.zeros(rotation_x.size()).to(rotation_x), 38 | rotation_y.float() / out_rotation_y.size(-1) * 2 * pi - pi, 39 | batch_size).view(batch_size, 3, 3) 40 | rt2 = compute_rotation_matrix_from_viewpoint(rotation_x.float() / out_rotation_x.size(-1) * 2 * pi - pi, 41 | rotation_z.float() / out_rotation_z.size(-1) * 2 * pi - pi, 42 | batch_size).view(batch_size, 3, 3) 43 | out_rmat = compute_rotation_matrix_from_two_matrices(rt2, rt1).view(batch_size, 3, 3).cuda() 44 | return out_rmat, rt1.cuda() 45 | 46 | def compute_out_rmat_from_euler(out_rotation_x, out_rotation_y, out_rotation_z, batch_size): 47 | if out_rotation_x.size(-1) == 1: 48 | out_rmat = compute_rotation_matrix_from_euler_angle(out_rotation_x.float(), out_rotation_y.float(), out_rotation_z.float(), batch_size) 49 | else: 50 | _, rotation_x = torch.topk(out_rotation_x, 1, dim=-1) 51 | _, rotation_y = torch.topk(out_rotation_y, 1, dim=-1) 52 | _, rotation_z = torch.topk(out_rotation_z, 1, dim=-1) 53 | out_rmat = compute_rotation_matrix_from_euler_angle(rotation_x.float() / out_rotation_x.size(-1) * 2 * pi - pi, 54 | rotation_y.float() / out_rotation_y.size(-1) * 2 * pi - pi, 55 | rotation_z.float() / out_rotation_z.size(-1) * 2 * pi - pi, batch_size) 56 | return out_rmat.view(batch_size, 3, 3).cuda(), None 57 | 58 | # compute rotation matrix from view point with azimuth and elevation, (roll=0 here) 59 | # output cuda batch*3*3 matrices in the rotation order of ZYZ euler angle 60 | def compute_rotation_matrix_from_viewpoint(rotation_x, rotation_y, batch): 61 | rotax = rotation_x.view(batch, 1).type(torch.FloatTensor) 62 | rotay = - rotation_y.view(batch, 1).type(torch.FloatTensor) 63 | # rotaz = torch.zeros(batch, 1) 64 | 65 | c1 = torch.cos(rotax).view(batch, 1) # batch*1 66 | s1 = torch.sin(rotax).view(batch, 1) # batch*1 67 | c2 = torch.cos(rotay).view(batch, 1) # batch*1 68 | s2 = torch.sin(rotay).view(batch, 1) # batch*1 69 | 70 | # pitch --> yaw 71 | row1 = torch.cat((c2, s1 * s2, c1 * s2), 1).view(-1, 1, 3) # batch*1*3 72 | row2 = torch.cat((torch.autograd.Variable(torch.zeros(s2.size())), c1, -s1), 1).view(-1, 1, 3) # batch*1*3 73 | row3 = torch.cat((-s2, s1 * c2, c1 * c2), 1).view(-1, 1, 3) # batch*1*3 74 | 75 | matrix = torch.cat((row1, row2, row3), 1) # batch*3*3 76 | 77 | return matrix 78 | 79 | 80 | def compute_viewpoint_from_rotation_matrix(rotation_matrix, batch): 81 | # pitch --> yaw 82 | s1 = - rotation_matrix.view(batch, 3, 3)[:, 1, 2] 83 | c1 = rotation_matrix.view(batch, 3, 3)[:, 1, 1] 84 | s2 = - rotation_matrix.view(batch, 3, 3)[:, 2, 0] 85 | c2 = rotation_matrix.view(batch, 3, 3)[:, 0, 0] 86 | # rotation_x = torch.asin(s1).view(batch, 1) 87 | rotation_x = torch.acos(c1).view(batch, 1) 88 | index = torch.nonzero(s1.view(-1) < 0, as_tuple=True) 89 | rotation_x[index] = -rotation_x[index] 90 | rotation_y = torch.acos(c2).view(batch, 1) 91 | indexy = torch.nonzero(s2.view(-1) < 0, as_tuple=True) 92 | rotation_y[indexy] = -rotation_y[indexy] 93 | rotation_y = - rotation_y 94 | return rotation_x, rotation_y 95 | 96 | 97 | def compute_rotation_matrix_from_two_matrices(m1, m2): 98 | batch = m1.shape[0] 99 | m = torch.bmm(m1, m2.transpose(1, 2)) # batch*3*3 100 | return m 101 | 102 | 103 | def compute_correlation_volume_pairwise(fmap1, fmap2, num_levels): 104 | batch, dim, ht, wd = fmap1.shape 105 | fmap1 = fmap1.view(batch, dim, ht * wd) 106 | fmap2 = fmap2.view(batch, dim, ht * wd) 107 | 108 | corr = torch.matmul(fmap1.transpose(1, 2), fmap2) 109 | corr = corr.view(batch, ht, wd, 1, ht, wd) 110 | corr = corr / torch.sqrt(torch.tensor(dim).float()) 111 | 112 | batch2, h1, w1, dim2, h2, w2 = corr.shape 113 | corr = corr.reshape(batch2 * h1 * w1, dim2, h2, w2) 114 | corr_pyramid = [] 115 | corr_pyramid.append(corr) 116 | for i in range(num_levels - 1): 117 | corr = F.avg_pool2d(corr, 2, stride=2) 118 | corr_pyramid.append(corr) 119 | 120 | out_pyramid = [] 121 | for i in range(num_levels): 122 | corr = corr_pyramid[i] 123 | corr = corr.view(batch2, h1, w1, -1) 124 | out_pyramid.append(corr) 125 | out = torch.cat(out_pyramid, dim=-1) 126 | return out.permute(0, 3, 1, 2).contiguous().float() 127 | 128 | 129 | '''Tools in RotationContinuity. 130 | https://github.com/papagina/RotationContinuity/blob/master/shapenet/code/tools.py 131 | Reference: 132 | [1] Yi Zhou, Connelly Barnes, Jingwan Lu, Jimei Yang, Hao Li 133 | On The Continuity of Rotation Representations in Neural Networks. arXiv:1812.07035 134 | ''' 135 | 136 | # batch*n 137 | def normalize_vector(v): 138 | batch = v.shape[0] 139 | v_mag = torch.sqrt(v.pow(2).sum(1)) # batch 140 | v_mag = torch.max(v_mag, torch.autograd.Variable(torch.FloatTensor([1e-8]).cuda())) 141 | v_mag = v_mag.view(batch, 1).expand(batch, v.shape[1]) 142 | v = v / v_mag 143 | return v 144 | 145 | 146 | # u, v batch*n 147 | def cross_product(u, v): 148 | batch = u.shape[0] 149 | # print (u.shape) 150 | # print (v.shape) 151 | i = u[:, 1] * v[:, 2] - u[:, 2] * v[:, 1] 152 | j = u[:, 2] * v[:, 0] - u[:, 0] * v[:, 2] 153 | k = u[:, 0] * v[:, 1] - u[:, 1] * v[:, 0] 154 | 155 | out = torch.cat((i.view(batch, 1), j.view(batch, 1), k.view(batch, 1)), 1) # batch*3 156 | 157 | return out 158 | 159 | 160 | # poses batch*6 161 | # poses 162 | def compute_rotation_matrix_from_ortho6d(poses): 163 | x_raw = poses[:, 0:3] # batch*3 164 | y_raw = poses[:, 3:6] # batch*3 165 | 166 | x = normalize_vector(x_raw) # batch*3 167 | z = cross_product(x, y_raw) # batch*3 168 | z = normalize_vector(z) # batch*3 169 | y = cross_product(z, x) # batch*3 170 | 171 | x = x.view(-1, 3, 1) 172 | y = y.view(-1, 3, 1) 173 | z = z.view(-1, 3, 1) 174 | matrix = torch.cat((x, y, z), 2) # batch*3*3 175 | return matrix 176 | 177 | 178 | # in a batch*5, axis int 179 | def stereographic_unproject(a, axis=None): 180 | """ 181 | Inverse of stereographic projection: increases dimension by one. 182 | """ 183 | batch = a.shape[0] 184 | if axis is None: 185 | axis = a.shape[1] 186 | s2 = torch.pow(a, 2).sum(1) # batch 187 | ans = torch.autograd.Variable(torch.zeros(batch, a.shape[1] + 1).cuda()) # batch*6 188 | unproj = 2 * a / (s2 + 1).view(batch, 1).repeat(1, a.shape[1]) # batch*5 189 | if (axis > 0): 190 | ans[:, :axis] = unproj[:, :axis] # batch*(axis-0) 191 | ans[:, axis] = (s2 - 1) / (s2 + 1) # batch 192 | ans[:, axis + 1:] = unproj[:, 193 | axis:] # batch*(5-axis) # Note that this is a no-op if the default option (last axis) is used 194 | return ans 195 | 196 | 197 | # a batch*5 198 | # out batch*3*3 199 | def compute_rotation_matrix_from_ortho5d(a): 200 | batch = a.shape[0] 201 | proj_scale_np = np.array([np.sqrt(2) + 1, np.sqrt(2) + 1, np.sqrt(2)]) # 3 202 | proj_scale = torch.autograd.Variable(torch.FloatTensor(proj_scale_np).cuda()).view(1, 3).repeat(batch, 1) # batch,3 203 | 204 | u = stereographic_unproject(a[:, 2:5] * proj_scale, axis=0) # batch*4 205 | norm = torch.sqrt(torch.pow(u[:, 1:], 2).sum(1)) # batch 206 | u = u / norm.view(batch, 1).repeat(1, u.shape[1]) # batch*4 207 | b = torch.cat((a[:, 0:2], u), 1) # batch*6 208 | matrix = compute_rotation_matrix_from_ortho6d(b) 209 | return matrix 210 | 211 | 212 | # quaternion batch*4 213 | def compute_rotation_matrix_from_quaternion(quaternion): 214 | batch = quaternion.shape[0] 215 | 216 | quat = normalize_vector(quaternion) 217 | 218 | qw = quat[..., 0].view(batch, 1) 219 | qx = quat[..., 1].view(batch, 1) 220 | qy = quat[..., 2].view(batch, 1) 221 | qz = quat[..., 3].view(batch, 1) 222 | 223 | # Unit quaternion rotation matrices computatation 224 | xx = qx * qx 225 | yy = qy * qy 226 | zz = qz * qz 227 | xy = qx * qy 228 | xz = qx * qz 229 | yz = qy * qz 230 | xw = qx * qw 231 | yw = qy * qw 232 | zw = qz * qw 233 | 234 | row0 = torch.cat((1 - 2 * yy - 2 * zz, 2 * xy - 2 * zw, 2 * xz + 2 * yw), 1) # batch*3 235 | row1 = torch.cat((2 * xy + 2 * zw, 1 - 2 * xx - 2 * zz, 2 * yz - 2 * xw), 1) # batch*3 236 | row2 = torch.cat((2 * xz - 2 * yw, 2 * yz + 2 * xw, 1 - 2 * xx - 2 * yy), 1) # batch*3 237 | 238 | matrix = torch.cat((row0.view(batch, 1, 3), row1.view(batch, 1, 3), row2.view(batch, 1, 3)), 1) # batch*3*3 239 | 240 | return matrix 241 | 242 | 243 | # axisAngle batch*4 angle, x,y,z 244 | def compute_rotation_matrix_from_axisAngle(axisAngle): 245 | batch = axisAngle.shape[0] 246 | 247 | theta = torch.tanh(axisAngle[:, 0]) * np.pi # [-180, 180] 248 | sin = torch.sin(theta) 249 | axis = normalize_vector(axisAngle[:, 1:4]) # batch*3 250 | qw = torch.cos(theta) 251 | qx = axis[:, 0] * sin 252 | qy = axis[:, 1] * sin 253 | qz = axis[:, 2] * sin 254 | 255 | # Unit quaternion rotation matrices computatation 256 | xx = (qx * qx).view(batch, 1) 257 | yy = (qy * qy).view(batch, 1) 258 | zz = (qz * qz).view(batch, 1) 259 | xy = (qx * qy).view(batch, 1) 260 | xz = (qx * qz).view(batch, 1) 261 | yz = (qy * qz).view(batch, 1) 262 | xw = (qx * qw).view(batch, 1) 263 | yw = (qy * qw).view(batch, 1) 264 | zw = (qz * qw).view(batch, 1) 265 | 266 | row0 = torch.cat((1 - 2 * yy - 2 * zz, 2 * xy - 2 * zw, 2 * xz + 2 * yw), 1) # batch*3 267 | row1 = torch.cat((2 * xy + 2 * zw, 1 - 2 * xx - 2 * zz, 2 * yz - 2 * xw), 1) # batch*3 268 | row2 = torch.cat((2 * xz - 2 * yw, 2 * yz + 2 * xw, 1 - 2 * xx - 2 * yy), 1) # batch*3 269 | 270 | matrix = torch.cat((row0.view(batch, 1, 3), row1.view(batch, 1, 3), row2.view(batch, 1, 3)), 1) # batch*3*3 271 | 272 | return matrix 273 | 274 | 275 | # euler batch*4 276 | # output cuda batch*3*3 matrices in the rotation order of XZ'Y'' (intrinsic) or YZX (extrinsic) 277 | def compute_rotation_matrix_from_euler(euler): 278 | batch = euler.shape[0] 279 | 280 | c1 = torch.cos(euler[:, 0]).view(batch, 1) # batch*1 281 | s1 = torch.sin(euler[:, 0]).view(batch, 1) # batch*1 282 | c2 = torch.cos(euler[:, 2]).view(batch, 1) # batch*1 283 | s2 = torch.sin(euler[:, 2]).view(batch, 1) # batch*1 284 | c3 = torch.cos(euler[:, 1]).view(batch, 1) # batch*1 285 | s3 = torch.sin(euler[:, 1]).view(batch, 1) # batch*1 286 | 287 | row1 = torch.cat((c2 * c3, -s2, c2 * s3), 1).view(-1, 1, 3) # batch*1*3 288 | row2 = torch.cat((c1 * s2 * c3 + s1 * s3, c1 * c2, c1 * s2 * s3 - s1 * c3), 1).view(-1, 1, 3) # batch*1*3 289 | row3 = torch.cat((s1 * s2 * c3 - c1 * s3, s1 * c2, s1 * s2 * s3 + c1 * c3), 1).view(-1, 1, 3) # batch*1*3 290 | 291 | matrix = torch.cat((row1, row2, row3), 1) # batch*3*3 292 | 293 | return matrix 294 | 295 | 296 | # matrices batch*3*3 297 | # both matrix are orthogonal rotation matrices 298 | # out theta between 0 to 180 degree batch 299 | def compute_geodesic_distance_from_two_matrices(m1, m2): 300 | batch = m1.shape[0] 301 | m = torch.bmm(m1, m2.transpose(1, 2)) # batch*3*3 302 | 303 | cos = (m[:, 0, 0] + m[:, 1, 1] + m[:, 2, 2] - 1) / 2 304 | cos = torch.min(cos, torch.autograd.Variable(torch.ones(batch).cuda())) 305 | cos = torch.max(cos, torch.autograd.Variable(torch.ones(batch).cuda()) * -1) 306 | 307 | theta = torch.acos(cos) 308 | 309 | return theta 310 | 311 | 312 | # matrices batch*3*3 313 | # both matrix are orthogonal rotation matrices 314 | # out theta between 0 to pi batch 315 | def compute_angle_from_r_matrices(m): 316 | batch = m.shape[0] 317 | 318 | cos = (m[:, 0, 0] + m[:, 1, 1] + m[:, 2, 2] - 1) / 2 319 | cos = torch.min(cos, torch.autograd.Variable(torch.ones(batch).cuda())) 320 | cos = torch.max(cos, torch.autograd.Variable(torch.ones(batch).cuda()) * -1) 321 | 322 | theta = torch.acos(cos) 323 | 324 | return theta 325 | 326 | 327 | # input batch*4*4 or batch*3*3 328 | # output torch batch*3 x, y, z in radiant 329 | # the rotation is in the sequence of x,y,z 330 | def compute_euler_angles_from_rotation_matrices(rotation_matrices): 331 | batch = rotation_matrices.shape[0] 332 | R = rotation_matrices 333 | sy = torch.sqrt(R[:, 0, 0] * R[:, 0, 0] + R[:, 1, 0] * R[:, 1, 0]) 334 | singular = sy < 1e-6 335 | singular = singular.float() 336 | 337 | x = torch.atan2(R[:, 2, 1], R[:, 2, 2]) 338 | y = torch.atan2(-R[:, 2, 0], sy) 339 | z = torch.atan2(R[:, 1, 0], R[:, 0, 0]) 340 | 341 | xs = torch.atan2(-R[:, 1, 2], R[:, 1, 1]) 342 | ys = torch.atan2(-R[:, 2, 0], sy) 343 | zs = R[:, 1, 0] * 0 344 | 345 | rotation_x = x * (1 - singular) + xs * singular 346 | rotation_y = y * (1 - singular) + ys * singular 347 | rotation_z = z * (1 - singular) + zs * singular 348 | 349 | return rotation_x, rotation_y, rotation_z 350 | 351 | 352 | def compute_rotation_matrix_from_euler_angle(rotation_x, rotation_y, rotation_z=None, batch=None): 353 | rotax = rotation_x.view(batch, 1).type(torch.FloatTensor) 354 | rotay = rotation_y.view(batch, 1).type(torch.FloatTensor) 355 | if rotation_z is None: 356 | rotaz = torch.zeros(batch, 1) 357 | else: 358 | rotaz = rotation_z.view(batch, 1).type(torch.FloatTensor) 359 | 360 | c3 = torch.cos(rotax).view(batch, 1) 361 | s3 = torch.sin(rotax).view(batch, 1) 362 | c2 = torch.cos(rotay).view(batch, 1) 363 | s2 = torch.sin(rotay).view(batch, 1) 364 | c1 = torch.cos(rotaz).view(batch, 1) 365 | s1 = torch.sin(rotaz).view(batch, 1) 366 | 367 | row1 = torch.cat((c1 * c2, c1 * s2 * s3 - s1 * c3, c1 * s2 * c3 + s1 * s3), 1).view(-1, 1, 3) # batch*1*3 368 | row2 = torch.cat((s1 * c2, s1 * s2 * s3 + c1 * c3, s1 * s2 * c3 - c1 * s3), 1).view(-1, 1, 3) # batch*1*3 369 | row3 = torch.cat((-s2, c2 * s3, c2 * c3), 1).view(-1, 1, 3) # batch*1*3 370 | 371 | matrix = torch.cat((row1, row2, row3), 1) # batch*3*3 372 | 373 | return matrix -------------------------------------------------------------------------------- /trainers/utils/loss_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch import optim 4 | from trainers.utils.compute_utils import * 5 | import numpy as np 6 | 7 | 8 | def get_opt(params, cfgopt): 9 | if cfgopt.type == 'adam': 10 | optimizer = optim.Adam(params, lr=float(cfgopt.lr), 11 | betas=(cfgopt.beta1, cfgopt.beta2), 12 | weight_decay=cfgopt.weight_decay) 13 | elif cfgopt.type == 'sgd': 14 | optimizer = torch.optim.SGD( 15 | params, lr=float(cfgopt.lr), momentum=cfgopt.momentum) 16 | else: 17 | assert 0, "Optimizer type should be either 'adam' or 'sgd'" 18 | 19 | scheduler = None 20 | scheduler_type = getattr(cfgopt, "scheduler", None) 21 | if scheduler_type is not None: 22 | if scheduler_type == 'exponential': 23 | decay = float(getattr(cfgopt, "step_decay", 0.1)) 24 | scheduler = optim.lr_scheduler.ExponentialLR(optimizer, decay) 25 | elif scheduler_type == 'step': 26 | step_size = int(getattr(cfgopt, "step_epoch", 500)) 27 | decay = float(getattr(cfgopt, "step_decay", 0.1)) 28 | scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=decay) 29 | elif scheduler_type == 'linear': 30 | step_size = int(getattr(cfgopt, "step_epoch", 2000)) 31 | 32 | def lambda_rule(ep): 33 | lr_l = 1.0 - min(1, max(0, ep - 0.5 * step_size) / float(0.45 * step_size)) * (1 - 0.01) 34 | return lr_l 35 | 36 | scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) 37 | else: 38 | assert 0, "args.schedulers should be either 'exponential' or 'linear' or 'step'" 39 | return optimizer, scheduler 40 | else: 41 | return optimizer 42 | 43 | 44 | def rotation_loss_class(out_rotation_x, angle_x): 45 | length = out_rotation_x.size(-1) 46 | label = ((angle_x.view(-1).cuda() + pi) / 2 / np.pi * length) 47 | label[label < 0] += length 48 | label[label >= length] -= length 49 | if out_rotation_x.size(-1) == 1: 50 | loss_x = ((out_rotation_x - angle_x.view(-1).cuda()) ** 2).mean() 51 | elif out_rotation_x.size(-1) == length: 52 | criterion = nn.CrossEntropyLoss() 53 | loss_x = criterion(out_rotation_x, label.long()) 54 | else: 55 | assert False 56 | return loss_x 57 | 58 | 59 | def rotation_loss_reg(predict_rotation, gt_rotation): 60 | l2_loss = ((predict_rotation.view(-1).cuda() - gt_rotation.view(-1).cuda()) ** 2) 61 | loss = l2_loss.mean() 62 | res = { 63 | "loss": loss, 64 | "rotation_l2_error": l2_loss, 65 | "rotation_l2_loss": l2_loss.mean(), 66 | } 67 | return res 68 | --------------------------------------------------------------------------------