├── .gitignore ├── LICENSE ├── README.md ├── assets ├── p.png ├── test.png └── test_disp.png ├── config ├── __init__.py ├── cfg_cityscape.py ├── cfg_eth3d_autoencoder.py ├── cfg_eth3d_fm.py ├── cfg_euroc_autoencoder.py ├── cfg_euroc_fm.py ├── cfg_folder.py ├── cfg_kitti_autoencoder.py ├── cfg_kitti_fm.py ├── cfg_kitti_fm_joint.py ├── cfg_kitti_fm_refine.py ├── cfg_make3d_fm.py └── cfg_odom_fm.py ├── mono ├── __init__.py ├── apis │ ├── __init__.py │ ├── env.py │ └── trainer.py ├── core │ ├── __init__.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── eval_hooks.py │ │ └── pixel_error.py │ └── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py ├── datasets │ ├── __init__.py │ ├── cityscape_dataset.py │ ├── eth3d_dataset.py │ ├── euroc_dataset.py │ ├── folder_dataset.py │ ├── get_dataset.py │ ├── gt_pose │ │ ├── 00.txt │ │ ├── 01.txt │ │ ├── 02.txt │ │ ├── 03.txt │ │ ├── 04.txt │ │ ├── 05.txt │ │ ├── 06.txt │ │ ├── 07.txt │ │ ├── 08.txt │ │ ├── 09.txt │ │ ├── 10.txt │ │ └── 12.txt │ ├── kitti_dataset.py │ ├── kitti_utils.py │ ├── loader │ │ ├── __init__.py │ │ ├── build_loader.py │ │ └── sampler.py │ ├── mono_dataset.py │ ├── splits │ │ ├── __init__.py │ │ ├── benchmark │ │ │ ├── eigen_to_benchmark_ids.npy │ │ │ ├── test_files.txt │ │ │ ├── train_files.txt │ │ │ └── val_files.txt │ │ ├── cityscape │ │ │ ├── gen_cityscape_split.py │ │ │ ├── test.txt │ │ │ ├── train.txt │ │ │ ├── train_files.txt │ │ │ ├── val.txt │ │ │ └── val_files.txt │ │ ├── eigen_benchmark │ │ │ └── test_files.txt │ │ ├── eigen_full │ │ │ ├── train_files.txt │ │ │ └── val_files.txt │ │ ├── exp │ │ │ ├── __init__.py │ │ │ ├── train_files.txt │ │ │ └── val_files.txt │ │ ├── kitti_archives_to_download.txt │ │ ├── kitti_shot_sequence │ │ │ ├── gen_split.py │ │ │ └── val_files.txt │ │ ├── odom │ │ │ ├── test_files_09.txt │ │ │ ├── test_files_10.txt │ │ │ ├── train_files.txt │ │ │ └── val_files.txt │ │ ├── short │ │ │ ├── __init__.py │ │ │ ├── train_files.txt │ │ │ └── val_files.txt │ │ └── test │ │ │ ├── train_files.txt │ │ │ └── val_files.txt │ └── utils.py ├── model │ ├── __init__.py │ ├── mono_autoencoder │ │ ├── __init__.py │ │ ├── decoder.py │ │ ├── encoder.py │ │ ├── layers.py │ │ ├── net.py │ │ └── resnet.py │ ├── mono_baseline │ │ ├── __init__.py │ │ ├── depth_decoder.py │ │ ├── depth_encoder.py │ │ ├── layers.py │ │ ├── net.py │ │ ├── pose_decoder.py │ │ ├── pose_encoder.py │ │ └── resnet.py │ ├── mono_fm │ │ ├── __init__.py │ │ ├── depth_decoder.py │ │ ├── depth_encoder.py │ │ ├── layers.py │ │ ├── net.py │ │ ├── pose_decoder.py │ │ ├── pose_encoder.py │ │ └── resnet.py │ ├── mono_fm_joint │ │ ├── __init__.py │ │ ├── decoder.py │ │ ├── depth_decoder.py │ │ ├── depth_encoder.py │ │ ├── encoder.py │ │ ├── layers.py │ │ ├── net.py │ │ ├── pose_decoder.py │ │ ├── pose_encoder.py │ │ └── resnet.py │ └── registry.py └── tools │ ├── __init__.py │ ├── file_interface.py │ ├── geometry.py │ ├── kitti_evaluation_toolkit.py │ ├── lie_algebra.py │ ├── pose_evaluation_utils.py │ ├── trajectory.py │ └── transformations.py ├── requirements.txt ├── run.py ├── scripts ├── __init__.py ├── draw_odometry.py ├── eval_depth.py ├── eval_depth_pp.py ├── eval_pose.py ├── infer.py └── infer_singleimage.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | .idea 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 sconly 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # feature_metric_depth 2 | This is offical codes for the methods described in 3 | > **Feature-metric Loss for Self-supervised Learning of Depth and Egomotion** 4 | > 5 | > [ECCV 2020](https://arxiv.org/pdf/2007.10603.pdf) 6 | 7 |

8 | performance 9 |

10 | 11 | If you find our work useful in your research please consider citing our paper: 12 | 13 | ``` 14 | @inproceedings{shu2020featdepth, 15 | title={Feature-metric Loss for Self-supervised Learning of Depth and Egomotion}, 16 | author={Shu, Chang and Yu, Kun and Duan, Zhixiang and Yang, Kuiyuan}, 17 | booktitle={ECCV}, 18 | year={2020} 19 | } 20 | ``` 21 | 22 | ## Setup 23 | 24 | ### Requirements: 25 | - PyTorch1.1+, Python3.5+, Cuda10.0+ 26 | - mmcv==0.4.4 27 | 28 | Our codes are based on mmcv for distributed learning. 29 | To make it convenient for you to train and test our codes, we provide our [anaconda environment](https://drive.google.com/file/d/1NSoGxhP8UpyW-whzpqP3WIB6u2mgGP49/view?usp=sharing), 30 | you only need to download it and extract it to the folder of your anaconda environments, and use the python in it to run our codes. 31 | 32 | If you would like to set up your anaconda environment by yourself, you can do as follows: 33 | ```bash 34 | # first, make sure that your conda is setup properly with the right environment 35 | # for that, check that `which conda`, `which pip` and `which python` points to the 36 | # right path. From a clean conda env, this is what you need to do 37 | 38 | conda create --name featdepth python=3.7 39 | conda activate featdepth 40 | 41 | # this installs the right pip and dependencies for the fresh python 42 | conda install ipython 43 | conda install pip 44 | 45 | # install required packages from requirements.txt 46 | pip install -r requirements.txt 47 | ``` 48 | 49 | ## KITTI training data 50 | 51 | Our training data is the same with other self-supervised monocular depth estimation methods, please refer to [monodepth2](https://github.com/nianticlabs/monodepth2) to prepare the training data. 52 | 53 | ## pretrained weights 54 | 55 | We provide weights for: 56 | (1) [AutoEncoder trained on the kitti raw data](https://drive.google.com/file/d/1ncAWUMvLq2ETMpG-7eI9qfILce_cPPfy/view?usp=sharing); 57 | (2) [FeatDepth trained on the kitti raw data](https://drive.google.com/file/d/1HlAubfuja5nBKpfNU3fQs-3m3Zaiu9RI/view?usp=sharing); 58 | (3) [FeatDepth finetuned on the test split of kitti raw data by using online refinement](https://drive.google.com/file/d/1CfCtz55s4QHya3y3UslxsuD_0cxNlA-D/view?usp=sharing); 59 | (4) [FeatDepth trained on kitti odometry](https://drive.google.com/file/d/1vQJbiyPXv_XNQYpyVocDB3-LKwx2LVka/view?usp=sharing); 60 | (5) [FeatDepth trained on Euroc](https://drive.google.com/file/d/1IMIAKpHXmqyUxiUIiqqp5qI-nJXDUSmj/view?usp=sharing); 61 | (6) [FeatDepth trained on NYU](https://drive.google.com/file/d/1Mo050P-DgG-jrNXWww07GXXyst5h5Q74/view?usp=sharing). 62 | 63 | ## API 64 | We provide an API interface for you to predict depth and pose from an image sequence and visulize some results. 65 | They are stored in folder 'scripts'. 66 | ``` 67 | draw_odometry.py is used to provide several analytical curves and obtain standard kitti odometry evaluation results. 68 | ``` 69 | 70 | ``` 71 | eval_pose.py is used to obtain kitti odometry evaluation results. 72 | ``` 73 | 74 | ``` 75 | eval_depth.py is used to obtain kitti depth evaluation results. 76 | ``` 77 | 78 | ``` 79 | infer.py is used to generate depth maps from given models. 80 | ``` 81 | 82 | ``` 83 | infer_singleimage.py is used to test a single image for view. 84 | ``` 85 | ## Training 86 | You can use following command to launch distributed learning of our model: 87 | ```shell 88 | /path/to/python -m torch.distributed.launch --master_port=9900 --nproc_per_node=1 train.py --config /path/to/cfg_kitti_fm.py --work_dir /dir/for/saving/weights/and/logs' 89 | ``` 90 | Here nproc_per_node refers to GPU number you want to use. 91 | 92 | ## Configurations 93 | We provide a variety of config files for training on different datasets. 94 | They are stored in config folder. 95 | 96 | For example: 97 | (1) 'cfg_kitti_fm.py' is used to train our model on kitti dataset, where the weights of autoencoder are loaded from the pretrained weights we provide and fixed during the traing. 98 | This mode is prefered when your GPU memory is lower than 16 GB; 99 | (2) 'cfg_kitti_fm_joint.py' is used to train our model on kitti dataset, where the autoencoder is jointly trained with depthnet and posenet. 100 | We rescale the input resolution of our model to ensure training with 12 GB GPU memory, slightly reducing the performance. 101 | You can modify the input resolution according to your computational resource. 102 | 103 | For modifying config files, please refer to cfg_kitti_fm.py. 104 | 105 | ## Online refinement 106 | We provide cfg file for online refinement, you can use cfg_kitti_fm_refine.py to refine your model trained on kitti raw data by keeping training on test data. 107 | For settings of online refinement, please refer to details in cfg_kitti_fm_refine.py in the folder config. 108 | 109 | ## Finetuning 110 | If you want to finetune on a given weights, you can modify the 'finetune' term from 'None' to an existing path to a pre-trained weight in the config files. 111 | 112 | ## Resuming 113 | If you want to reproduce the training state of a certain pretrained weight, you can modify the 'resume_from' term from 'None' to an existing path to a pre-trained weight in the config files. 114 | The program will continue training from where the pretrained weight ends. 115 | Note that you have to increase the 'total_epochs' value to make sure that the training have enough epochs left to continue. 116 | 117 | ## Notes 118 | Our model predicts inverse depths. 119 | If you want to get real depth when training stereo model, you have to convert inverse depth to depth, and then multiply it by 36. 120 | -------------------------------------------------------------------------------- /assets/p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/assets/p.png -------------------------------------------------------------------------------- /assets/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/assets/test.png -------------------------------------------------------------------------------- /assets/test_disp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/assets/test_disp.png -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/config/__init__.py -------------------------------------------------------------------------------- /config/cfg_cityscape.py: -------------------------------------------------------------------------------- 1 | split = 'cityscape' 2 | dataset = 'cityscape' 3 | 4 | height = 384 5 | width = 768 6 | disparity_smoothness = 1e-3 7 | scales = [0, 1, 2, 3, 4] 8 | min_depth = 0.1 9 | max_depth = 100.0 10 | frame_ids = [0, -1, 1] 11 | learning_rate = 1e-4 12 | 13 | depth_num_layers = 50 14 | pose_num_layers = 50 15 | total_epochs = 45 16 | device_ids = range(8) 17 | 18 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(depth_num_layers) 19 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(pose_num_layers) 20 | 21 | in_path = '/ssd/Cityscapes' 22 | gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz' 23 | checkpoint_path = '/node01_data5/monodepth2-test/model/refine/smallfigure.pth' 24 | 25 | imgs_per_gpu = 2 26 | workers_per_gpu = 2 27 | 28 | validate = False 29 | 30 | png = True 31 | scale_invariant = False 32 | plane_fitting = False 33 | finetune = False 34 | perception = False 35 | focus_loss = False 36 | 37 | scale_invariant_weight = 0.01 38 | plane_fitting_weight = 0.0001 39 | perceptional_weight = 0.001 40 | 41 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0) 42 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 43 | # learning policy 44 | lr_config = dict( 45 | policy='step', 46 | warmup='linear', 47 | warmup_iters=500, 48 | warmup_ratio=1.0 / 3, 49 | step=[15,25,35], 50 | gamma=0.5, 51 | ) 52 | 53 | checkpoint_config = dict(interval=1) 54 | # yapf:disable 55 | log_config = dict(interval=50, 56 | hooks=[dict(type='TextLoggerHook'),]) 57 | # yapf:enable 58 | # runtime settings 59 | dist_params = dict(backend='nccl') 60 | log_level = 'INFO' 61 | load_from = None 62 | resume_from = None 63 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /config/cfg_eth3d_autoencoder.py: -------------------------------------------------------------------------------- 1 | DEPTH_LAYERS = 50 2 | POSE_LAYERS = 18 3 | FRAME_IDS = [0] 4 | IMGS_PER_GPU = 3 5 | HEIGHT = 448 6 | WIDTH = 736 7 | 8 | data = dict( 9 | name = 'eth3d', 10 | split = 'exp', 11 | height = HEIGHT, 12 | width = WIDTH, 13 | frame_ids = FRAME_IDS, 14 | in_path = '/ssd/ETH3D/slam/cables_4', 15 | gt_depth_path = None, 16 | png = True, 17 | stereo_scale = False, 18 | ) 19 | 20 | model = dict( 21 | name = 'autoencoder', 22 | depth_num_layers = DEPTH_LAYERS, 23 | pose_num_layers = POSE_LAYERS, 24 | frame_ids = FRAME_IDS, 25 | imgs_per_gpu = IMGS_PER_GPU, 26 | height = HEIGHT, 27 | width = WIDTH, 28 | scales = [0, 1, 2, 3], 29 | min_depth = 0.1, 30 | max_depth = 100.0, 31 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS), 32 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS), 33 | automask = True, 34 | disp_norm = True, 35 | use_min_construct = True, 36 | dis=0.001, 37 | cvt=0.001, 38 | ) 39 | 40 | 41 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth' 42 | resume_from = None 43 | finetune = None 44 | total_epochs = 30 45 | imgs_per_gpu = IMGS_PER_GPU 46 | learning_rate = 1e-4 47 | workers_per_gpu = 4 48 | validate = False 49 | 50 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0) 51 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 52 | lr_config = dict( 53 | policy='step', 54 | warmup='linear', 55 | warmup_iters=500, 56 | warmup_ratio=1.0 / 3, 57 | step=[10,20], 58 | gamma=0.5, 59 | ) 60 | 61 | checkpoint_config = dict(interval=1) 62 | log_config = dict(interval=50, 63 | hooks=[dict(type='TextLoggerHook'),]) 64 | dist_params = dict(backend='nccl') 65 | log_level = 'INFO' 66 | load_from = None 67 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /config/cfg_eth3d_fm.py: -------------------------------------------------------------------------------- 1 | DEPTH_LAYERS = 50 2 | POSE_LAYERS = 18 3 | FRAME_IDS = [0, -1, 1, 's'] 4 | IMGS_PER_GPU = 2 5 | HEIGHT = 448 6 | WIDTH = 736 7 | 8 | data = dict( 9 | name = 'eth3d', 10 | split = 'exp', 11 | height = HEIGHT, 12 | width = WIDTH, 13 | frame_ids = FRAME_IDS, 14 | in_path = '/ssd/ETH3D/slam/cables_4', 15 | gt_depth_path = None, 16 | png = True, 17 | stereo_scale = True if 's' in FRAME_IDS else False, 18 | ) 19 | 20 | model = dict( 21 | name = 'mono_fm', 22 | depth_num_layers = DEPTH_LAYERS, 23 | pose_num_layers = POSE_LAYERS, 24 | frame_ids = FRAME_IDS, 25 | imgs_per_gpu = IMGS_PER_GPU, 26 | height = HEIGHT, 27 | width = WIDTH, 28 | scales = [0, 1, 2, 3], 29 | min_depth = 0.1, 30 | max_depth = 100.0, 31 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS), 32 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS), 33 | extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder_eth_1/epoch_30.pth', 34 | automask = False if 's' in FRAME_IDS else True, 35 | disp_norm = False if 's' in FRAME_IDS else True, 36 | perception_weight = 0, 37 | smoothness_weight = 1e-3, 38 | ) 39 | 40 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth' 41 | resume_from = None 42 | finetune = None 43 | total_epochs = 40 44 | imgs_per_gpu = IMGS_PER_GPU 45 | learning_rate = 1e-4 46 | workers_per_gpu = 4 47 | validate = False 48 | 49 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0) 50 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 51 | lr_config = dict( 52 | policy='step', 53 | warmup='linear', 54 | warmup_iters=500, 55 | warmup_ratio=1.0 / 3, 56 | step=[20,30], 57 | gamma=0.5, 58 | ) 59 | 60 | checkpoint_config = dict(interval=1) 61 | log_config = dict(interval=5, 62 | hooks=[dict(type='TextLoggerHook'),]) 63 | dist_params = dict(backend='nccl') 64 | log_level = 'INFO' 65 | load_from = None 66 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /config/cfg_euroc_autoencoder.py: -------------------------------------------------------------------------------- 1 | DEPTH_LAYERS = 50 2 | POSE_LAYERS = 18 3 | FRAME_IDS = [0] 4 | IMGS_PER_GPU = 3 5 | HEIGHT = 480 6 | WIDTH = 768 7 | 8 | data = dict( 9 | name = 'euroc', 10 | split = 'exp', 11 | height = HEIGHT, 12 | width = WIDTH, 13 | frame_ids = FRAME_IDS, 14 | in_path = '/ssd/EuRoc/MH_04_difficult', 15 | gt_depth_path = None, 16 | png = True, 17 | stereo_scale = False, 18 | ) 19 | 20 | model = dict( 21 | name = 'autoencoder', 22 | depth_num_layers = DEPTH_LAYERS, 23 | pose_num_layers = POSE_LAYERS, 24 | frame_ids = FRAME_IDS, 25 | imgs_per_gpu = IMGS_PER_GPU, 26 | height = HEIGHT, 27 | width = WIDTH, 28 | scales = [0, 1, 2, 3], 29 | min_depth = 0.1, 30 | max_depth = 100.0, 31 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS), 32 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS), 33 | automask = True, 34 | disp_norm = True, 35 | use_min_construct = True, 36 | dis=0.001, 37 | cvt=0.001, 38 | ) 39 | 40 | 41 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth' 42 | resume_from = None 43 | finetune = None 44 | total_epochs = 30 45 | imgs_per_gpu = IMGS_PER_GPU 46 | learning_rate = 1e-4 47 | workers_per_gpu = 4 48 | validate = False 49 | 50 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0) 51 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 52 | lr_config = dict( 53 | policy='step', 54 | warmup='linear', 55 | warmup_iters=500, 56 | warmup_ratio=1.0 / 3, 57 | step=[10,20], 58 | gamma=0.5, 59 | ) 60 | 61 | checkpoint_config = dict(interval=1) 62 | log_config = dict(interval=50, 63 | hooks=[dict(type='TextLoggerHook'),]) 64 | dist_params = dict(backend='nccl') 65 | log_level = 'INFO' 66 | load_from = None 67 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /config/cfg_euroc_fm.py: -------------------------------------------------------------------------------- 1 | DEPTH_LAYERS = 50 2 | POSE_LAYERS = 18 3 | FRAME_IDS = [0, -1, 1, 's'] 4 | IMGS_PER_GPU = 2 5 | HEIGHT = 480 6 | WIDTH = 768 7 | 8 | data = dict( 9 | name = 'euroc', 10 | split = 'exp', 11 | height = HEIGHT, 12 | width = WIDTH, 13 | frame_ids = FRAME_IDS, 14 | in_path = '/ssd/EuRoc/MH_02_easy',#'/ssd/EuRoc/MH_02_easy','/ssd/EuRoc/MH_04_difficult' 15 | gt_depth_path = None, 16 | png = True, 17 | stereo_scale = True if 's' in FRAME_IDS else False, 18 | ) 19 | 20 | model = dict( 21 | name = 'mono_fm', 22 | depth_num_layers = DEPTH_LAYERS, 23 | pose_num_layers = POSE_LAYERS, 24 | frame_ids = FRAME_IDS, 25 | imgs_per_gpu = IMGS_PER_GPU, 26 | height = HEIGHT, 27 | width = WIDTH, 28 | scales = [0, 1, 2, 3], 29 | min_depth = 0.1, 30 | max_depth = 50.0, 31 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS), 32 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS), 33 | extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder_euroc/epoch_30.pth', 34 | automask = False, 35 | disp_norm = False, 36 | perception_weight = 1e-3, 37 | smoothness_weight = 1e-3, 38 | ) 39 | 40 | # resume_from = '/node01/jobs/io/out/changshu/fm_euroc/epoch_40.pth' 41 | resume_from = None 42 | finetune = None 43 | total_epochs = 80 44 | imgs_per_gpu = IMGS_PER_GPU 45 | learning_rate = 1e-4 46 | workers_per_gpu = 4 47 | validate = False 48 | 49 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0) 50 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 51 | lr_config = dict( 52 | policy='step', 53 | warmup='linear', 54 | warmup_iters=500, 55 | warmup_ratio=1.0 / 3, 56 | step=[20,30], 57 | gamma=0.5, 58 | ) 59 | 60 | checkpoint_config = dict(interval=1) 61 | log_config = dict(interval=5, 62 | hooks=[dict(type='TextLoggerHook'),]) 63 | dist_params = dict(backend='nccl') 64 | log_level = 'INFO' 65 | load_from = None 66 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /config/cfg_folder.py: -------------------------------------------------------------------------------- 1 | split = 'exp' 2 | dataset = 'folder' 3 | height = 320 4 | width = 640 5 | disparity_smoothness = 1e-3 6 | scales = [0, 1, 2, 3, 4] 7 | min_depth = 0.1 8 | max_depth = 100.0 9 | frame_ids = [0, -1, 1] 10 | learning_rate = 1e-4 11 | depth_num_layers = 50 12 | pose_num_layers = 50 13 | total_epochs = 45 14 | device_ids = range(8) 15 | 16 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(depth_num_layers) 17 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(pose_num_layers) 18 | 19 | in_path = '/ssd/avp/soho_garage3/keyframe_underground' 20 | gt_depth_path = '' 21 | checkpoint_path = '/node01_data5/monodepth2-test/model/refine/smallfigure.pth' 22 | 23 | imgs_per_gpu = 2 24 | workers_per_gpu = 4 25 | 26 | validate = False 27 | 28 | png = False 29 | scale_invariant = False 30 | plane_fitting = False 31 | finetune = False 32 | perception = False 33 | focus_loss = False 34 | 35 | scale_invariant_weight = 0.01 36 | plane_fitting_weight = 0.0001 37 | perceptional_weight = 0.001 38 | 39 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0) 40 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 41 | # learning policy 42 | lr_config = dict( 43 | policy='step', 44 | warmup='linear', 45 | warmup_iters=500, 46 | warmup_ratio=1.0 / 3, 47 | step=[15,25,35], 48 | gamma=0.5, 49 | ) 50 | 51 | checkpoint_config = dict(interval=1) 52 | # yapf:disable 53 | log_config = dict(interval=50, 54 | hooks=[dict(type='TextLoggerHook'),]) 55 | # yapf:enable 56 | # runtime settings 57 | dist_params = dict(backend='nccl') 58 | log_level = 'INFO' 59 | load_from = None 60 | resume_from = None 61 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /config/cfg_kitti_autoencoder.py: -------------------------------------------------------------------------------- 1 | DEPTH_LAYERS = 50 2 | POSE_LAYERS = 18 3 | FRAME_IDS = [0] 4 | IMGS_PER_GPU = 5 5 | HEIGHT = 256 6 | WIDTH = 800 7 | 8 | data = dict( 9 | name = 'kitti', 10 | split = 'exp', 11 | height = HEIGHT, 12 | width = WIDTH, 13 | frame_ids = FRAME_IDS, 14 | in_path = '/node01_data5/kitti_raw', 15 | gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz', 16 | png = False, 17 | stereo_scale = False, 18 | ) 19 | 20 | model = dict( 21 | name = 'autoencoder', 22 | depth_num_layers = DEPTH_LAYERS, 23 | pose_num_layers = POSE_LAYERS, 24 | frame_ids = FRAME_IDS, 25 | imgs_per_gpu = IMGS_PER_GPU, 26 | height = HEIGHT, 27 | width = WIDTH, 28 | scales = [0, 1, 2, 3], 29 | min_depth = 0.1, 30 | max_depth = 100.0, 31 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS), 32 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS), 33 | automask = True, 34 | disp_norm = True, 35 | use_min_construct = True, 36 | dis=0.001, 37 | cvt=0.001, 38 | ) 39 | 40 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth' 41 | resume_from = None 42 | finetune = None 43 | total_epochs = 30 44 | imgs_per_gpu = IMGS_PER_GPU 45 | learning_rate = 1e-4 46 | workers_per_gpu = 4 47 | validate = False 48 | 49 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0) 50 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 51 | lr_config = dict( 52 | policy='step', 53 | warmup='linear', 54 | warmup_iters=500, 55 | warmup_ratio=1.0 / 3, 56 | step=[10,20], 57 | gamma=0.5, 58 | ) 59 | 60 | checkpoint_config = dict(interval=1) 61 | log_config = dict(interval=50, 62 | hooks=[dict(type='TextLoggerHook'),]) 63 | dist_params = dict(backend='nccl') 64 | log_level = 'INFO' 65 | load_from = None 66 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /config/cfg_kitti_fm.py: -------------------------------------------------------------------------------- 1 | DEPTH_LAYERS = 50#resnet50 2 | POSE_LAYERS = 18#resnet18 3 | FRAME_IDS = [0, -1, 1, 's']#0 refers to current frame, -1 and 1 refer to temperally adjacent frames, 's' refers to stereo adjacent frame. 4 | IMGS_PER_GPU = 2 #the number of images fed to each GPU 5 | HEIGHT = 320#input image height 6 | WIDTH = 1024#input image width 7 | 8 | data = dict( 9 | name = 'kitti',#dataset name 10 | split = 'exp',#training split name 11 | height = HEIGHT, 12 | width = WIDTH, 13 | frame_ids = FRAME_IDS, 14 | in_path = '/media/sconly/harddisk/data/kitti/kitti_raw/rawdata',#path to raw data 15 | gt_depth_path = '/media/sconly/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz',#path to gt data 16 | png = False,#image format 17 | stereo_scale = True if 's' in FRAME_IDS else False, 18 | ) 19 | 20 | model = dict( 21 | name = 'mono_fm',# select a model by name 22 | depth_num_layers = DEPTH_LAYERS, 23 | pose_num_layers = POSE_LAYERS, 24 | frame_ids = FRAME_IDS, 25 | imgs_per_gpu = IMGS_PER_GPU, 26 | height = HEIGHT, 27 | width = WIDTH, 28 | scales = [0, 1, 2, 3],# output different scales of depth maps 29 | min_depth = 0.1, # minimum of predicted depth value 30 | max_depth = 100.0, # maximum of predicted depth value 31 | depth_pretrained_path = '/media/sconly/harddisk/weight/resnet/resnet{}.pth'.format(DEPTH_LAYERS),# pretrained weights for resnet 32 | pose_pretrained_path = '/media/sconly/harddisk/weight/resnet/resnet{}.pth'.format(POSE_LAYERS),# pretrained weights for resnet 33 | extractor_pretrained_path = '/media/sconly/harddisk/weight/autoencoder.pth',# pretrained weights for autoencoder 34 | automask = False if 's' in FRAME_IDS else True, 35 | disp_norm = False if 's' in FRAME_IDS else True, 36 | perception_weight = 1e-3, 37 | smoothness_weight = 1e-3, 38 | ) 39 | 40 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'#directly start training from provide weights 41 | resume_from = None 42 | finetune = None 43 | total_epochs = 40 44 | imgs_per_gpu = IMGS_PER_GPU 45 | learning_rate = 1e-4 46 | workers_per_gpu = 4 47 | validate = True 48 | 49 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0) 50 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 51 | lr_config = dict( 52 | policy='step', 53 | warmup='linear', 54 | warmup_iters=500, 55 | warmup_ratio=1.0 / 3, 56 | step=[20,30], 57 | gamma=0.5, 58 | ) 59 | 60 | checkpoint_config = dict(interval=1) 61 | log_config = dict(interval=50, 62 | hooks=[dict(type='TextLoggerHook'),]) 63 | dist_params = dict(backend='nccl') 64 | log_level = 'INFO' 65 | load_from = None 66 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /config/cfg_kitti_fm_joint.py: -------------------------------------------------------------------------------- 1 | DEPTH_LAYERS = 50 2 | POSE_LAYERS = 18 3 | FRAME_IDS = [0, -1, 1, 's'] 4 | IMGS_PER_GPU = 2 5 | HEIGHT = 192#320 6 | WIDTH = 640#1024 7 | 8 | data = dict( 9 | name = 'kitti', 10 | split = 'exp', 11 | height = HEIGHT, 12 | width = WIDTH, 13 | frame_ids = FRAME_IDS, 14 | in_path = '/media/user/harddisk/data/kitti/kitti_raw/rawdata', 15 | gt_depth_path = '/media/user/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz', 16 | png = False, 17 | stereo_scale = True if 's' in FRAME_IDS else False, 18 | ) 19 | 20 | model = dict( 21 | name = 'mono_fm_joint', 22 | depth_num_layers = DEPTH_LAYERS, 23 | pose_num_layers = POSE_LAYERS, 24 | frame_ids = FRAME_IDS, 25 | imgs_per_gpu = IMGS_PER_GPU, 26 | height = HEIGHT, 27 | width = WIDTH, 28 | scales = [0, 1, 2, 3], 29 | min_depth = 0.1, 30 | max_depth = 100.0, 31 | depth_pretrained_path = '/media/user/harddisk/weight/resnet/resnet{}.pth'.format(DEPTH_LAYERS), 32 | pose_pretrained_path = '/media/user/harddisk/weight/resnet/resnet{}.pth'.format(POSE_LAYERS), 33 | extractor_pretrained_path = '/media/user/harddisk/weight/autoencoder.pth', 34 | automask = False if 's' in FRAME_IDS else True, 35 | disp_norm = False if 's' in FRAME_IDS else True, 36 | dis=1e-3, 37 | cvt=1e-3, 38 | perception_weight = 1e-3, 39 | smoothness_weight = 1e-3, 40 | ) 41 | 42 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth' 43 | resume_from = None 44 | finetune = None 45 | total_epochs = 40 46 | imgs_per_gpu = IMGS_PER_GPU 47 | learning_rate = 1e-4 48 | workers_per_gpu = 4 49 | validate = True 50 | 51 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0) 52 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 53 | lr_config = dict( 54 | policy='step', 55 | warmup='linear', 56 | warmup_iters=500, 57 | warmup_ratio=1.0 / 3, 58 | step=[20,30], 59 | gamma=0.5, 60 | ) 61 | 62 | checkpoint_config = dict(interval=1) 63 | log_config = dict(interval=50, 64 | hooks=[dict(type='TextLoggerHook'),]) 65 | dist_params = dict(backend='nccl') 66 | log_level = 'INFO' 67 | load_from = None 68 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /config/cfg_kitti_fm_refine.py: -------------------------------------------------------------------------------- 1 | DEPTH_LAYERS = 50 2 | POSE_LAYERS = 18 3 | FRAME_IDS = [0, -1, 1, 's'] 4 | IMGS_PER_GPU = 2 5 | HEIGHT = 320 6 | WIDTH = 1024 7 | 8 | data = dict( 9 | name = 'kitti', 10 | split = 'test',#the split contains the list of testing data 11 | height = HEIGHT, 12 | width = WIDTH, 13 | frame_ids = FRAME_IDS, 14 | in_path = '/node01_data5/kitti_raw',#path to kitti raw data 15 | gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz',#path to kitti depth ground truth 16 | png = False, 17 | stereo_scale=True if 's' in FRAME_IDS else False, 18 | ) 19 | 20 | model = dict( 21 | name = 'mono_fm', 22 | depth_num_layers = DEPTH_LAYERS, 23 | pose_num_layers = POSE_LAYERS, 24 | frame_ids = FRAME_IDS, 25 | imgs_per_gpu = IMGS_PER_GPU, 26 | height = HEIGHT, 27 | width = WIDTH, 28 | scales = [0, 1, 2, 3], 29 | min_depth = 0.1, 30 | max_depth = 100.0, 31 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),#path to pre-trained resnet weights 32 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),#path to pre-trained resnet weights 33 | extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder3/epoch_30.pth', 34 | automask=False if 's' in FRAME_IDS else True, 35 | disp_norm=False if 's' in FRAME_IDS else True, 36 | perception_weight=1e-3, 37 | smoothness_weight=1e-3, 38 | ) 39 | 40 | #path to the weights trained on the kitti raw data training split 41 | resume_from = '/node01_data5/monodepth2-test/model/wow_320_1024/epoch_40.pth'#we will resume from current epoch for further online refinement 42 | total_epochs = 60# this value must be bigger than the epochs of the weight you resume from 43 | #for example, you have trained 40 epoches on kitti raw data, and use this weight for resuming. 44 | #When resuming, the program will start from epoch 41 and finish the rest of epoches (total_epochs - 40) 45 | imgs_per_gpu = IMGS_PER_GPU 46 | learning_rate = 1e-4 47 | workers_per_gpu = 4 48 | validate = True 49 | 50 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0) 51 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 52 | lr_config = dict( 53 | policy='step', 54 | warmup='linear', 55 | warmup_iters=500, 56 | warmup_ratio=1.0 / 3, 57 | step=[50], 58 | gamma=0.5, 59 | ) 60 | 61 | checkpoint_config = dict(interval=1) 62 | log_config = dict(interval=5, 63 | hooks=[dict(type='TextLoggerHook'),]) 64 | dist_params = dict(backend='nccl') 65 | log_level = 'INFO' 66 | load_from = None 67 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /config/cfg_make3d_fm.py: -------------------------------------------------------------------------------- 1 | DEPTH_LAYERS = 50 2 | POSE_LAYERS = 18 3 | FRAME_IDS = [0, -1, 1, 's'] 4 | IMGS_PER_GPU = 2 5 | HEIGHT = 320 6 | WIDTH = 1024 7 | data = dict( 8 | name = 'folder', 9 | split = 'exp', 10 | height = HEIGHT, 11 | width = WIDTH, 12 | frame_ids = FRAME_IDS, 13 | in_path = '/node01_data5/monodepth2-test/make3d', 14 | gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz', 15 | png = False, 16 | stereo_scale = True if 's' in FRAME_IDS else False, 17 | ) 18 | 19 | model = dict( 20 | name = 'mono_fm', 21 | depth_num_layers = DEPTH_LAYERS, 22 | pose_num_layers = POSE_LAYERS, 23 | frame_ids = FRAME_IDS, 24 | imgs_per_gpu = IMGS_PER_GPU, 25 | height = HEIGHT, 26 | width = WIDTH, 27 | scales = [0, 1, 2, 3], 28 | min_depth = 0.1, 29 | max_depth = 100.0, 30 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS), 31 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS), 32 | extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder3/epoch_30.pth', 33 | automask = False if 's' in FRAME_IDS else True, 34 | disp_norm = False if 's' in FRAME_IDS else True, 35 | perception_weight = 1e-3, 36 | smoothness_weight = 1e-3, 37 | ) 38 | 39 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth' 40 | resume_from = None 41 | finetune = None 42 | total_epochs = 40 43 | imgs_per_gpu = IMGS_PER_GPU 44 | learning_rate = 1e-4 45 | workers_per_gpu = 4 46 | validate = True 47 | 48 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0) 49 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 50 | lr_config = dict( 51 | policy='step', 52 | warmup='linear', 53 | warmup_iters=500, 54 | warmup_ratio=1.0 / 3, 55 | step=[20,30], 56 | gamma=0.5, 57 | ) 58 | 59 | checkpoint_config = dict(interval=1) 60 | log_config = dict(interval=50, 61 | hooks=[dict(type='TextLoggerHook'),]) 62 | dist_params = dict(backend='nccl') 63 | log_level = 'INFO' 64 | load_from = None 65 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /config/cfg_odom_fm.py: -------------------------------------------------------------------------------- 1 | DEPTH_LAYERS = 50 2 | POSE_LAYERS = 18 3 | FRAME_IDS = [0, 1, -1, 's'] 4 | IMGS_PER_GPU = 2 5 | HEIGHT = 320 6 | WIDTH = 1024 7 | 8 | 9 | data = dict( 10 | name = 'kitti_odom', 11 | split = 'odom', 12 | height = HEIGHT, 13 | width = WIDTH, 14 | frame_ids = FRAME_IDS, 15 | in_path = '/node01/odo/dataset', 16 | gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz', 17 | png = True, 18 | stereo_scale = True if 's' in FRAME_IDS else False, 19 | ) 20 | 21 | model = dict( 22 | name = 'mono_fm', 23 | depth_num_layers = DEPTH_LAYERS, 24 | pose_num_layers = POSE_LAYERS, 25 | frame_ids = FRAME_IDS, 26 | imgs_per_gpu = IMGS_PER_GPU, 27 | height = HEIGHT, 28 | width = WIDTH, 29 | scales = [0, 1, 2, 3], 30 | min_depth = 0.1, 31 | max_depth = 100.0, 32 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS), 33 | pose_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS), 34 | extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder3/epoch_30.pth', 35 | automask = False if 's' in FRAME_IDS else True, 36 | disp_norm = False if 's' in FRAME_IDS else True, 37 | perception_weight=1e-3, 38 | smoothness_weight=1e-3, 39 | ) 40 | 41 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth' 42 | resume_from = None 43 | finetune = None 44 | total_epochs = 40 45 | imgs_per_gpu = IMGS_PER_GPU 46 | learning_rate = 1e-4 47 | workers_per_gpu = 4 48 | validate = False 49 | 50 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0) 51 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 52 | lr_config = dict( 53 | policy='step', 54 | warmup='linear', 55 | warmup_iters=500, 56 | warmup_ratio=1.0 / 3, 57 | step=[25, 30], 58 | gamma=0.5, 59 | ) 60 | 61 | checkpoint_config = dict(interval=1) 62 | log_config = dict(interval=50, 63 | hooks=[dict(type='TextLoggerHook'),]) 64 | dist_params = dict(backend='nccl') 65 | log_level = 'INFO' 66 | load_from = None 67 | workflow = [('train', 1)] -------------------------------------------------------------------------------- /mono/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/__init__.py -------------------------------------------------------------------------------- /mono/apis/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | from .trainer import train_mono 6 | from .env import init_dist, get_root_logger, set_random_seed -------------------------------------------------------------------------------- /mono/apis/env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | import logging 6 | import os 7 | import random 8 | import subprocess 9 | 10 | import numpy as np 11 | import torch 12 | import torch.distributed as dist 13 | import torch.multiprocessing as mp 14 | from mmcv.runner import get_dist_info 15 | 16 | 17 | def init_dist(launcher, backend='nccl', **kwargs): 18 | if mp.get_start_method(allow_none=True) is None: 19 | mp.set_start_method('spawn') 20 | if launcher == 'pytorch': 21 | _init_dist_pytorch(backend, **kwargs) 22 | elif launcher == 'mpi': 23 | _init_dist_mpi(backend, **kwargs) 24 | elif launcher == 'slurm': 25 | _init_dist_slurm(backend, **kwargs) 26 | else: 27 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 28 | 29 | 30 | def _init_dist_pytorch(backend, **kwargs): 31 | # TODO: use local_rank instead of rank % num_gpus 32 | rank = int(os.environ['RANK']) 33 | num_gpus = torch.cuda.device_count() 34 | torch.cuda.set_device(rank % num_gpus) 35 | dist.init_process_group(backend=backend, **kwargs) 36 | 37 | 38 | def _init_dist_mpi(backend, **kwargs): 39 | raise NotImplementedError 40 | 41 | 42 | def _init_dist_slurm(backend, port=29500, **kwargs): 43 | proc_id = int(os.environ['SLURM_PROCID']) 44 | ntasks = int(os.environ['SLURM_NTASKS']) 45 | node_list = os.environ['SLURM_NODELIST'] 46 | num_gpus = torch.cuda.device_count() 47 | torch.cuda.set_device(proc_id % num_gpus) 48 | addr = subprocess.getoutput( 49 | 'scontrol show hostname {} | head -n1'.format(node_list)) 50 | os.environ['MASTER_PORT'] = str(port) 51 | os.environ['MASTER_ADDR'] = addr 52 | os.environ['WORLD_SIZE'] = str(ntasks) 53 | os.environ['RANK'] = str(proc_id) 54 | dist.init_process_group(backend=backend) 55 | 56 | 57 | def set_random_seed(seed): 58 | random.seed(seed) 59 | np.random.seed(seed) 60 | torch.manual_seed(seed) 61 | torch.cuda.manual_seed_all(seed) 62 | 63 | 64 | def get_root_logger(log_level=logging.INFO): 65 | logger = logging.getLogger() 66 | if not logger.hasHandlers(): 67 | logging.basicConfig( 68 | format='%(asctime)s - %(levelname)s - %(message)s', 69 | level=log_level) 70 | rank, _ = get_dist_info() 71 | if rank != 0: 72 | logger.setLevel('ERROR') 73 | return logger 74 | -------------------------------------------------------------------------------- /mono/core/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | from .evaluation import NonDistEvalHook, DistEvalMonoHook 6 | from .utils import DistOptimizerHook -------------------------------------------------------------------------------- /mono/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | from .eval_hooks import NonDistEvalHook, DistEvalMonoHook -------------------------------------------------------------------------------- /mono/core/evaluation/pixel_error.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | import numpy as np 6 | 7 | class AverageMeter(object): 8 | """ 9 | Computes and stores the average and current value 10 | """ 11 | def __init__(self): 12 | self.reset() 13 | 14 | def reset(self): 15 | self.val=0 16 | self.avg=0 17 | self.sum=0 18 | self.count=0 19 | 20 | def update(self, val, n=1): 21 | self.val=val 22 | self.sum+=val*n 23 | self.count+=n 24 | self.avg=self.sum/self.count 25 | 26 | 27 | def compute_errors(gt, pred): 28 | """Computation of error metrics between predicted and ground truth depths 29 | """ 30 | thresh = np.maximum((gt / pred), (pred / gt)) 31 | a1 = (thresh < 1.25 ).mean() 32 | a2 = (thresh < 1.25 ** 2).mean() 33 | a3 = (thresh < 1.25 ** 3).mean() 34 | rmse = (gt - pred) ** 2 35 | rmse = np.sqrt(rmse.mean()) 36 | rmse_log = (np.log(gt) - np.log(pred)) ** 2 37 | rmse_log = np.sqrt(rmse_log.mean()) 38 | abs_rel = np.mean(np.abs(gt - pred) / gt) 39 | sq_rel = np.mean(((gt - pred) ** 2) / gt) 40 | return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3 41 | 42 | 43 | def disp_to_depth(disp, min_depth = 0.1, max_depth=100): 44 | min_disp = 1 / max_depth #0.01 45 | max_disp = 1 / min_depth #10 46 | scaled_disp = min_disp + (max_disp - min_disp) * disp #(10-0.01)*disp+0.01 47 | depth = 1 / scaled_disp 48 | return scaled_disp, depth 49 | 50 | -------------------------------------------------------------------------------- /mono/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | from .dist_utils import allreduce_grads, DistOptimizerHook 6 | from .misc import tensor2imgs, unmap, multi_apply 7 | 8 | __all__ = [ 9 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 10 | 'multi_apply' 11 | ] 12 | -------------------------------------------------------------------------------- /mono/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | from collections import OrderedDict 5 | 6 | import torch.distributed as dist 7 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, 8 | _take_tensors) 9 | from mmcv.runner import OptimizerHook 10 | 11 | 12 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 13 | if bucket_size_mb > 0: 14 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 15 | buckets = _take_tensors(tensors, bucket_size_bytes) 16 | else: 17 | buckets = OrderedDict() 18 | for tensor in tensors: 19 | tp = tensor.type() 20 | if tp not in buckets: 21 | buckets[tp] = [] 22 | buckets[tp].append(tensor) 23 | buckets = buckets.values() 24 | 25 | for bucket in buckets: 26 | flat_tensors = _flatten_dense_tensors(bucket) 27 | dist.all_reduce(flat_tensors) 28 | flat_tensors.div_(world_size) 29 | for tensor, synced in zip( 30 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 31 | tensor.copy_(synced) 32 | 33 | 34 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1): 35 | grads = [ 36 | param.grad.data for param in model.parameters() 37 | if param.requires_grad and param.grad is not None 38 | ] 39 | world_size = dist.get_world_size() 40 | if coalesce: 41 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 42 | else: 43 | for tensor in grads: 44 | dist.all_reduce(tensor.div_(world_size)) 45 | 46 | 47 | class DistOptimizerHook(OptimizerHook): 48 | 49 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 50 | self.grad_clip = grad_clip 51 | self.coalesce = coalesce 52 | self.bucket_size_mb = bucket_size_mb 53 | 54 | def after_train_iter(self, runner): 55 | runner.optimizer.zero_grad() 56 | runner.outputs['loss'].backward() 57 | allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb) 58 | if self.grad_clip is not None: 59 | self.clip_grads(runner.model.parameters()) 60 | runner.optimizer.step() 61 | -------------------------------------------------------------------------------- /mono/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | from functools import partial 6 | 7 | import mmcv 8 | import numpy as np 9 | from six.moves import map, zip 10 | 11 | 12 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 13 | num_imgs = tensor.size(0) 14 | mean = np.array(mean, dtype=np.float32) 15 | std = np.array(std, dtype=np.float32) 16 | imgs = [] 17 | for img_id in range(num_imgs): 18 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 19 | img = mmcv.imdenormalize( 20 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 21 | imgs.append(np.ascontiguousarray(img)) 22 | return imgs 23 | 24 | 25 | def multi_apply(func, *args, **kwargs): 26 | pfunc = partial(func, **kwargs) if kwargs else func 27 | map_results = map(pfunc, *args) 28 | return tuple(map(list, zip(*map_results))) 29 | 30 | 31 | def unmap(data, count, inds, fill=0): 32 | """ Unmap a subset of item (data) back to the original set of items (of 33 | size count) """ 34 | if data.dim() == 1: 35 | ret = data.new_full((count, ), fill) 36 | ret[inds] = data 37 | else: 38 | new_size = (count, ) + data.size()[1:] 39 | ret = data.new_full(new_size, fill) 40 | ret[inds, :] = data 41 | return ret 42 | -------------------------------------------------------------------------------- /mono/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .loader import build_dataloader -------------------------------------------------------------------------------- /mono/datasets/eth3d_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import random 3 | import numpy as np 4 | from PIL import Image # using pillow-simd for increased speed 5 | import os 6 | 7 | import torch 8 | import torch.utils.data as data 9 | from torchvision import transforms 10 | 11 | 12 | def pil_loader(filename): 13 | # open path as file to avoid ResourceWarning 14 | # (https://github.com/python-pillow/Pillow/issues/835) 15 | with open(filename, 'rb') as f: 16 | with Image.open(f) as img: 17 | return img.convert('RGB') 18 | 19 | 20 | class FolderDataset(data.Dataset): 21 | """Superclass for monocular dataloaders 22 | 23 | Args: 24 | data_path 25 | filenames 26 | height 27 | width 28 | frame_idxs 29 | num_scales 30 | is_train 31 | img_ext 32 | """ 33 | def __init__(self, 34 | data_path, 35 | filenames, 36 | height, 37 | width, 38 | frame_idxs, 39 | is_train=False, 40 | img_ext='.jpg', 41 | gt_depth_path = None): 42 | super(FolderDataset, self).__init__() 43 | 44 | self.data_path = data_path 45 | self.filenames = sorted(os.listdir(os.path.join(data_path, 'rgb')))[1:-2] 46 | self.height = height 47 | self.width = width 48 | self.interp = Image.ANTIALIAS 49 | self.is_train = is_train 50 | self.frame_idxs = frame_idxs 51 | self.loader = pil_loader 52 | self.to_tensor = transforms.ToTensor() 53 | #726.28741455078 726.28741455078 354.6496887207 186.46566772461 54 | #w=739,h=458 55 | self.K = np.array([[0.9832, 0, 0.5, 0], 56 | [0, 1.58578, 0.5, 0], 57 | [0, 0, 1, 0], 58 | [0, 0, 0, 1]], dtype=np.float32) 59 | 60 | # Need to specify augmentations differently in pytorch 1.0 compared with 0.4 61 | if int(torch.__version__.split('.')[0]) > 0: 62 | self.brightness = (0.8, 1.2) 63 | self.contrast = (0.8, 1.2) 64 | self.saturation = (0.8, 1.2) 65 | self.hue = (-0.1, 0.1) 66 | else: 67 | self.brightness = 0.2 68 | self.contrast = 0.2 69 | self.saturation = 0.2 70 | self.hue = 0.1 71 | 72 | self.resize = transforms.Resize((self.height, self.width), interpolation=self.interp) 73 | 74 | self.flag = np.zeros(self.__len__(), dtype=np.int64) 75 | 76 | def preprocess(self, inputs, color_aug): 77 | """Resize colour images to the required scales and augment if required 78 | 79 | We create the color_aug object in advance and apply the same augmentation to all 80 | images in this item. This ensures that all images input to the pose network receive the 81 | same augmentation. 82 | """ 83 | for k in list(inputs): 84 | if "color" in k: 85 | n, im, i = k 86 | inputs[(n, im, 0)] = self.resize(inputs[(n, im, - 1)]) 87 | 88 | for k in list(inputs): 89 | if "color" in k: 90 | f = inputs[k] 91 | n, im, i = k 92 | inputs[(n, im, i)] = self.to_tensor(f) 93 | if i == 0: 94 | inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f)) 95 | 96 | def __len__(self): 97 | return len(self.filenames)-2 98 | 99 | def __getitem__(self, index): 100 | """Returns a single training item from the dataset as a dictionary. 101 | 102 | Values correspond to torch tensors. 103 | Keys in the dictionary are either strings or tuples: 104 | 105 | ("color", , ) for raw colour images, 106 | ("color_aug", , ) for augmented colour images, 107 | ("K", scale) or ("inv_K", scale) for camera intrinsics, 108 | "stereo_T" for camera extrinsics, and 109 | "depth_gt" for ground truth depth maps. 110 | 111 | is either: 112 | an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index', 113 | or 114 | "s" for the opposite image in the stereo pair. 115 | 116 | is an integer representing the scale of the image relative to the fullsize image: 117 | -1 images at native resolution as loaded from disk 118 | 0 images resized to (self.width, self.height ) 119 | 1 images resized to (self.width // 2, self.height // 2) 120 | 2 images resized to (self.width // 4, self.height // 4) 121 | 3 images resized to (self.width // 8, self.height // 8) 122 | """ 123 | inputs = {} 124 | 125 | index = index+1 126 | 127 | do_color_aug = self.is_train and random.random() > 0.5 128 | do_flip = self.is_train and random.random() > 0.5 129 | 130 | for i in self.frame_idxs: 131 | if i=='s': 132 | filename = os.path.join('rgb2', self.filenames[index]) 133 | else: 134 | filename = os.path.join('rgb', self.filenames[index+i]) 135 | 136 | inputs[("color", i, -1)] = self.get_color(filename, do_flip) 137 | 138 | # adjusting intrinsics to match each scale in the pyramid 139 | K = self.K.copy() 140 | K[0, :] *= self.width 141 | K[1, :] *= self.height 142 | inv_K = np.linalg.pinv(K) 143 | 144 | inputs[("K")] = torch.from_numpy(K) 145 | inputs[("inv_K")] = torch.from_numpy(inv_K) 146 | 147 | if do_color_aug: 148 | color_aug = transforms.ColorJitter.get_params(self.brightness, self.contrast, self.saturation, self.hue) 149 | else: 150 | color_aug = (lambda x: x) 151 | 152 | self.preprocess(inputs, color_aug) 153 | 154 | for i in self.frame_idxs: 155 | del inputs[("color", i, -1)] 156 | 157 | if "s" in self.frame_idxs: 158 | stereo_T = np.eye(4, dtype=np.float32) 159 | baseline_sign = -1 if do_flip else 1 160 | side_sign = -1 161 | stereo_T[0, 3] = side_sign * baseline_sign * 0.1 162 | inputs["stereo_T"] = torch.from_numpy(stereo_T) 163 | 164 | return inputs 165 | 166 | def get_color(self, filename, do_flip): 167 | color = self.loader(os.path.join(self.data_path, filename)) 168 | 169 | if do_flip: 170 | color = color.transpose(Image.FLIP_LEFT_RIGHT) 171 | 172 | return color -------------------------------------------------------------------------------- /mono/datasets/euroc_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import random 3 | import numpy as np 4 | from PIL import Image # using pillow-simd for increased speed 5 | import os 6 | 7 | import torch 8 | import torch.utils.data as data 9 | from torchvision import transforms 10 | 11 | 12 | def pil_loader(filename): 13 | # open path as file to avoid ResourceWarning 14 | # (https://github.com/python-pillow/Pillow/issues/835) 15 | with open(filename, 'rb') as f: 16 | with Image.open(f) as img: 17 | return img.convert('RGB') 18 | 19 | 20 | class FolderDataset(data.Dataset): 21 | """Superclass for monocular dataloaders 22 | 23 | Args: 24 | data_path 25 | filenames 26 | height 27 | width 28 | frame_idxs 29 | num_scales 30 | is_train 31 | img_ext 32 | """ 33 | def __init__(self, 34 | data_path, 35 | filenames, 36 | height, 37 | width, 38 | frame_idxs, 39 | is_train=False, 40 | img_ext='.jpg', 41 | gt_depth_path = None): 42 | super(FolderDataset, self).__init__() 43 | 44 | self.data_path = data_path 45 | # self.filenames = sorted(os.listdir(os.path.join(data_path, 'cam0', 'data')))[1:-2]#420-1940 46 | self.filenames = sorted(os.listdir(os.path.join(data_path, 'cam0', 'data'))) # 420-1940 47 | self.height = height 48 | self.width = width 49 | self.interp = Image.ANTIALIAS 50 | self.is_train = is_train 51 | self.frame_idxs = frame_idxs 52 | self.loader = pil_loader 53 | self.to_tensor = transforms.ToTensor() 54 | 55 | fx = 435.2047 56 | fy = 435.2047 57 | w = 752 58 | h = 480 59 | self.K = np.array([[fx/w, 0, 0.5, 0], 60 | [0, fy/h, 0.5, 0], 61 | [0, 0, 1, 0], 62 | [0, 0, 0, 1]], dtype=np.float32) 63 | 64 | # Need to specify augmentations differently in pytorch 1.0 compared with 0.4 65 | if int(torch.__version__.split('.')[0]) > 0: 66 | self.brightness = (0.8, 1.2) 67 | self.contrast = (0.8, 1.2) 68 | self.saturation = (0.8, 1.2) 69 | self.hue = (-0.1, 0.1) 70 | else: 71 | self.brightness = 0.2 72 | self.contrast = 0.2 73 | self.saturation = 0.2 74 | self.hue = 0.1 75 | 76 | self.resize = transforms.Resize((self.height, self.width), interpolation=self.interp) 77 | 78 | self.flag = np.zeros(self.__len__(), dtype=np.int64) 79 | 80 | def preprocess(self, inputs, color_aug): 81 | """Resize colour images to the required scales and augment if required 82 | 83 | We create the color_aug object in advance and apply the same augmentation to all 84 | images in this item. This ensures that all images input to the pose network receive the 85 | same augmentation. 86 | """ 87 | for k in list(inputs): 88 | if "color" in k: 89 | n, im, i = k 90 | inputs[(n, im, 0)] = self.resize(inputs[(n, im, - 1)]) 91 | 92 | for k in list(inputs): 93 | if "color" in k: 94 | f = inputs[k] 95 | n, im, i = k 96 | inputs[(n, im, i)] = self.to_tensor(f) 97 | if i == 0: 98 | inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f)) 99 | 100 | def __len__(self): 101 | return len(self.filenames)-1 102 | 103 | def __getitem__(self, index): 104 | """Returns a single training item from the dataset as a dictionary. 105 | 106 | Values correspond to torch tensors. 107 | Keys in the dictionary are either strings or tuples: 108 | 109 | ("color", , ) for raw colour images, 110 | ("color_aug", , ) for augmented colour images, 111 | ("K", scale) or ("inv_K", scale) for camera intrinsics, 112 | "stereo_T" for camera extrinsics, and 113 | "depth_gt" for ground truth depth maps. 114 | 115 | is either: 116 | an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index', 117 | or 118 | "s" for the opposite image in the stereo pair. 119 | 120 | is an integer representing the scale of the image relative to the fullsize image: 121 | -1 images at native resolution as loaded from disk 122 | 0 images resized to (self.width, self.height ) 123 | 1 images resized to (self.width // 2, self.height // 2) 124 | 2 images resized to (self.width // 4, self.height // 4) 125 | 3 images resized to (self.width // 8, self.height // 8) 126 | """ 127 | inputs = {} 128 | 129 | do_color_aug = self.is_train and random.random() > 0.5 130 | do_flip = self.is_train and random.random() > 0.5 131 | 132 | for i in self.frame_idxs: 133 | if i=='s': 134 | filename = os.path.join('cam1', 'data', self.filenames[index]) 135 | else: 136 | filename = os.path.join('cam0', 'data', self.filenames[index+i]) 137 | 138 | inputs[("color", i, -1)] = self.get_color(filename, do_flip) 139 | 140 | # adjusting intrinsics to match each scale in the pyramid 141 | K = self.K.copy() 142 | K[0, :] *= self.width 143 | K[1, :] *= self.height 144 | inv_K = np.linalg.pinv(K) 145 | 146 | inputs[("K")] = torch.from_numpy(K) 147 | inputs[("inv_K")] = torch.from_numpy(inv_K) 148 | 149 | if do_color_aug: 150 | color_aug = transforms.ColorJitter.get_params(self.brightness, self.contrast, self.saturation, self.hue) 151 | else: 152 | color_aug = (lambda x: x) 153 | 154 | self.preprocess(inputs, color_aug) 155 | 156 | for i in self.frame_idxs: 157 | del inputs[("color", i, -1)] 158 | 159 | if "s" in self.frame_idxs: 160 | stereo_T = np.eye(4, dtype=np.float32) 161 | baseline_sign = -1 if do_flip else 1 162 | side_sign = -1 163 | stereo_T[0, 3] = side_sign * baseline_sign * 0.1 164 | inputs["stereo_T"] = torch.from_numpy(stereo_T) 165 | 166 | return inputs 167 | 168 | def get_color(self, filename, do_flip): 169 | color = self.loader(os.path.join(self.data_path, filename)) 170 | 171 | if do_flip: 172 | color = color.transpose(Image.FLIP_LEFT_RIGHT) 173 | 174 | return color -------------------------------------------------------------------------------- /mono/datasets/folder_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import random 3 | import numpy as np 4 | from PIL import Image # using pillow-simd for increased speed 5 | import os 6 | 7 | import torch 8 | import torch.utils.data as data 9 | from torchvision import transforms 10 | 11 | 12 | def pil_loader(filename): 13 | # open path as file to avoid ResourceWarning 14 | # (https://github.com/python-pillow/Pillow/issues/835) 15 | with open(filename, 'rb') as f: 16 | with Image.open(f) as img: 17 | return img.convert('RGB') 18 | 19 | 20 | class FolderDataset(data.Dataset): 21 | """Superclass for monocular dataloaders 22 | 23 | Args: 24 | data_path 25 | filenames 26 | height 27 | width 28 | frame_idxs 29 | num_scales 30 | is_train 31 | img_ext 32 | """ 33 | def __init__(self, 34 | data_path, 35 | filenames, 36 | height, 37 | width, 38 | frame_idxs, 39 | is_train=False, 40 | img_ext='.jpg', 41 | gt_depth_path = None): 42 | super(FolderDataset, self).__init__() 43 | 44 | self.data_path = data_path 45 | self.filenames = sorted(os.listdir(data_path)) 46 | self.height = height 47 | self.width = width 48 | self.interp = Image.ANTIALIAS 49 | self.is_train = is_train 50 | self.frame_idxs = frame_idxs 51 | self.loader = pil_loader 52 | self.to_tensor = transforms.ToTensor() 53 | self.K = np.array([[0.9765, 0, 0.5, 0], 54 | [0, 1.736, 0.5, 0], 55 | [0, 0, 1, 0], 56 | [0, 0, 0, 1]], dtype=np.float32) 57 | 58 | # Need to specify augmentations differently in pytorch 1.0 compared with 0.4 59 | if int(torch.__version__.split('.')[0]) > 0: 60 | self.brightness = (0.8, 1.2) 61 | self.contrast = (0.8, 1.2) 62 | self.saturation = (0.8, 1.2) 63 | self.hue = (-0.1, 0.1) 64 | else: 65 | self.brightness = 0.2 66 | self.contrast = 0.2 67 | self.saturation = 0.2 68 | self.hue = 0.1 69 | 70 | self.resize = transforms.Resize((self.height, self.width), interpolation=self.interp) 71 | 72 | self.flag = np.zeros(self.__len__(), dtype=np.int64) 73 | 74 | def preprocess(self, inputs, color_aug): 75 | """Resize colour images to the required scales and augment if required 76 | 77 | We create the color_aug object in advance and apply the same augmentation to all 78 | images in this item. This ensures that all images input to the pose network receive the 79 | same augmentation. 80 | """ 81 | for k in list(inputs): 82 | if "color" in k: 83 | n, im, i = k 84 | inputs[(n, im, 0)] = self.resize(inputs[(n, im, - 1)]) 85 | 86 | for k in list(inputs): 87 | if "color" in k: 88 | f = inputs[k] 89 | n, im, i = k 90 | inputs[(n, im, i)] = self.to_tensor(f) 91 | if i == 0: 92 | inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f)) 93 | 94 | def __len__(self): 95 | return len(self.filenames) 96 | 97 | def __getitem__(self, index): 98 | """Returns a single training item from the dataset as a dictionary. 99 | 100 | Values correspond to torch tensors. 101 | Keys in the dictionary are either strings or tuples: 102 | 103 | ("color", , ) for raw colour images, 104 | ("color_aug", , ) for augmented colour images, 105 | ("K", scale) or ("inv_K", scale) for camera intrinsics, 106 | "stereo_T" for camera extrinsics, and 107 | "depth_gt" for ground truth depth maps. 108 | 109 | is either: 110 | an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index', 111 | or 112 | "s" for the opposite image in the stereo pair. 113 | 114 | is an integer representing the scale of the image relative to the fullsize image: 115 | -1 images at native resolution as loaded from disk 116 | 0 images resized to (self.width, self.height ) 117 | 1 images resized to (self.width // 2, self.height // 2) 118 | 2 images resized to (self.width // 4, self.height // 4) 119 | 3 images resized to (self.width // 8, self.height // 8) 120 | """ 121 | inputs = {} 122 | 123 | do_color_aug = self.is_train and random.random() > 0.5 124 | do_flip = self.is_train and random.random() > 0.5 125 | 126 | for i in self.frame_idxs: 127 | try: 128 | filename = self.filenames[index+i] 129 | except: 130 | filename = self.filenames[index] 131 | 132 | inputs[("color", i, -1)] = self.get_color(filename, do_flip) 133 | 134 | # adjusting intrinsics to match each scale in the pyramid 135 | K = self.K.copy() 136 | K[0, :] *= self.width 137 | K[1, :] *= self.height 138 | inv_K = np.linalg.pinv(K) 139 | 140 | inputs[("K", 0)] = torch.from_numpy(K) 141 | inputs[("inv_K", 0)] = torch.from_numpy(inv_K) 142 | 143 | if do_color_aug: 144 | color_aug = transforms.ColorJitter.get_params(self.brightness, self.contrast, self.saturation, self.hue) 145 | else: 146 | color_aug = (lambda x: x) 147 | 148 | self.preprocess(inputs, color_aug) 149 | 150 | for i in self.frame_idxs: 151 | del inputs[("color", i, -1)] 152 | 153 | return inputs 154 | 155 | def get_color(self, filename, do_flip): 156 | color = self.loader(os.path.join(self.data_path, filename)) 157 | 158 | if do_flip: 159 | color = color.transpose(Image.FLIP_LEFT_RIGHT) 160 | 161 | return color -------------------------------------------------------------------------------- /mono/datasets/get_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | import os 6 | from .utils import readlines, sec_to_hm_str 7 | 8 | 9 | def get_dataset(cfg, training=True): 10 | dataset_name = cfg['name'] 11 | if dataset_name == 'kitti': 12 | from .kitti_dataset import KITTIRAWDataset as dataset 13 | elif dataset_name == 'kitti_odom': 14 | from .kitti_dataset import KITTIOdomDataset as dataset 15 | elif dataset_name == 'cityscape': 16 | from .cityscape_dataset import CityscapeDataset as dataset 17 | elif dataset_name == 'folder': 18 | from .folder_dataset import FolderDataset as dataset 19 | elif dataset_name == 'eth3d': 20 | from .eth3d_dataset import FolderDataset as dataset 21 | elif dataset_name == 'euroc': 22 | from .euroc_dataset import FolderDataset as dataset 23 | 24 | fpath = os.path.join(os.path.dirname(__file__), "splits", cfg.split, "{}_files.txt") 25 | filenames = readlines(fpath.format("train")) if training else readlines(fpath.format('val')) 26 | img_ext = '.png' if cfg.png == True else '.jpg' 27 | 28 | dataset = dataset(cfg.in_path, 29 | filenames, 30 | cfg.height, 31 | cfg.width, 32 | cfg.frame_ids if training else [0], 33 | is_train=training, 34 | img_ext=img_ext, 35 | gt_depth_path=cfg.gt_depth_path) 36 | return dataset -------------------------------------------------------------------------------- /mono/datasets/kitti_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | import scipy.misc 5 | import numpy as np 6 | import PIL.Image as pil 7 | import datetime 8 | 9 | from .kitti_utils import generate_depth_map, read_calib_file, transform_from_rot_trans, pose_from_oxts_packet 10 | from .mono_dataset import MonoDataset 11 | 12 | 13 | class KITTIDataset(MonoDataset): 14 | """Superclass for different types of KITTI dataset loaders 15 | """ 16 | def __init__(self, *args, **kwargs): 17 | super(KITTIDataset, self).__init__(*args, **kwargs) 18 | 19 | self.K = np.array([[0.58, 0, 0.5, 0], 20 | [0, 1.92, 0.5, 0], 21 | [0, 0, 1, 0], 22 | [0, 0, 0, 1]], dtype=np.float32) 23 | 24 | self.full_res_shape = (1242, 375) 25 | self.side_map = {"2": 2, "3": 3, "l": 2, "r": 3} 26 | 27 | def check_depth(self): 28 | line = self.filenames[0].split() 29 | scene_name = line[0] 30 | frame_index = int(line[1]) 31 | 32 | velo_filename = os.path.join( 33 | self.data_path, 34 | scene_name, 35 | "velodyne_points/data/{:010d}.bin".format(int(frame_index))) 36 | 37 | return os.path.isfile(velo_filename) 38 | 39 | def get_color(self, folder, frame_index, side, do_flip): 40 | color = self.loader(self.get_image_path(folder, frame_index, side)) 41 | 42 | if do_flip: 43 | color = color.transpose(pil.FLIP_LEFT_RIGHT) 44 | 45 | return color 46 | 47 | 48 | class KITTIRAWDataset(KITTIDataset): 49 | """KITTI dataset which loads the original velodyne depth maps for ground truth 50 | """ 51 | def __init__(self, *args, **kwargs): 52 | super(KITTIRAWDataset, self).__init__(*args, **kwargs) 53 | 54 | def get_image_path(self, folder, frame_index, side): 55 | f_str = "{:010d}{}".format(frame_index, self.img_ext) 56 | image_path = os.path.join( 57 | self.data_path, folder, "image_0{}/data".format(self.side_map[side]), f_str) 58 | return image_path 59 | 60 | def get_depth(self, folder, frame_index, side, do_flip): 61 | calib_path = os.path.join(self.data_path, folder.split("/")[0]) 62 | 63 | velo_filename = os.path.join( 64 | self.data_path, 65 | folder, 66 | "velodyne_points/data/{:010d}.bin".format(int(frame_index))) 67 | 68 | depth_gt = generate_depth_map(calib_path, velo_filename, self.side_map[side]) 69 | depth_gt = scipy.misc.imresize(depth_gt, self.full_res_shape[::-1], "nearest") 70 | 71 | if do_flip: 72 | depth_gt = np.fliplr(depth_gt) 73 | 74 | return depth_gt 75 | 76 | def get_pose(self, folder, frame_index, offset): 77 | oxts_root = os.path.join(self.data_path, folder, 'oxts') 78 | with open(os.path.join(oxts_root, 'timestamps.txt')) as f: 79 | timestamps = np.array([datetime.datetime.strptime(ts[:-3], "%Y-%m-%d %H:%M:%S.%f").timestamp() 80 | for ts in f.read().splitlines()]) 81 | 82 | speed0 = np.genfromtxt(os.path.join(oxts_root, 'data', '{:010d}.txt'.format(frame_index)))[[8, 9, 10]] 83 | # speed1 = np.genfromtxt(os.path.join(oxts_root, 'data', '{:010d}.txt'.format(frame_index+offset)))[[8, 9, 10]] 84 | 85 | timestamp0 = timestamps[frame_index] 86 | timestamp1 = timestamps[frame_index+offset] 87 | # displacement = 0.5 * (speed0 + speed1) * (timestamp1 - timestamp0) 88 | displacement = speed0 * (timestamp1 - timestamp0) 89 | 90 | imu2velo = read_calib_file(os.path.join(self.data_path, os.path.dirname(folder), 'calib_imu_to_velo.txt')) 91 | velo2cam = read_calib_file(os.path.join(self.data_path, os.path.dirname(folder), 'calib_velo_to_cam.txt')) 92 | cam2cam = read_calib_file(os.path.join(self.data_path, os.path.dirname(folder), 'calib_cam_to_cam.txt')) 93 | 94 | velo2cam_mat = transform_from_rot_trans(velo2cam['R'], velo2cam['T']) 95 | imu2velo_mat = transform_from_rot_trans(imu2velo['R'], imu2velo['T']) 96 | cam_2rect_mat = transform_from_rot_trans(cam2cam['R_rect_00'], np.zeros(3)) 97 | 98 | imu2cam = cam_2rect_mat @ velo2cam_mat @ imu2velo_mat 99 | 100 | odo_pose = imu2cam[:3,:3] @ displacement + imu2cam[:3,3] 101 | 102 | return odo_pose 103 | 104 | 105 | class KITTIOdomDataset(KITTIDataset): 106 | """KITTI dataset for odometry training and testing 107 | """ 108 | def __init__(self, *args, **kwargs): 109 | super(KITTIOdomDataset, self).__init__(*args, **kwargs) 110 | 111 | def get_image_path(self, folder, frame_index, side): 112 | f_str = "{:06d}{}".format(frame_index, self.img_ext) 113 | side_map = {"l": 0, "r": 1} 114 | image_path = os.path.join( 115 | self.data_path, 116 | "sequences/{:02d}".format(int(folder)), 117 | "image_{}".format(side_map[side]), 118 | f_str) 119 | return image_path 120 | 121 | 122 | class KITTIDepthDataset(KITTIDataset): 123 | """KITTI dataset which uses the updated ground truth depth maps 124 | """ 125 | def __init__(self, *args, **kwargs): 126 | super(KITTIDepthDataset, self).__init__(*args, **kwargs) 127 | 128 | def get_image_path(self, folder, frame_index, side): 129 | f_str = "{:010d}{}".format(frame_index, self.img_ext) 130 | image_path = os.path.join( 131 | self.data_path, 132 | folder, 133 | "image_0{}/data".format(self.side_map[side]), 134 | f_str) 135 | return image_path 136 | 137 | def get_depth(self, folder, frame_index, side, do_flip): 138 | f_str = "{:010d}.png".format(frame_index) 139 | depth_path = os.path.join( 140 | self.data_path, 141 | folder, 142 | "proj_depth/groundtruth/image_0{}".format(self.side_map[side]), 143 | f_str) 144 | 145 | depth_gt = pil.open(depth_path) 146 | depth_gt = depth_gt.resize(self.full_res_shape, pil.NEAREST) 147 | depth_gt = np.array(depth_gt).astype(np.float32) / 256 148 | 149 | if do_flip: 150 | depth_gt = np.fliplr(depth_gt) 151 | 152 | return depth_gt 153 | -------------------------------------------------------------------------------- /mono/datasets/kitti_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | from __future__ import absolute_import, division, print_function 6 | 7 | import os 8 | import numpy as np 9 | from collections import Counter 10 | 11 | 12 | def load_velodyne_points(filename): 13 | """Load 3D point cloud from KITTI file format 14 | (adapted from https://github.com/hunse/kitti) 15 | """ 16 | points = np.fromfile(filename, dtype=np.float32).reshape(-1, 4) 17 | points[:, 3] = 1.0 # homogeneous 18 | return points 19 | 20 | 21 | def read_calib_file(path): 22 | """Read KITTI calibration file 23 | (from https://github.com/hunse/kitti) 24 | """ 25 | float_chars = set("0123456789.e+- ") 26 | data = {} 27 | with open(path, 'r') as f: 28 | for line in f.readlines(): 29 | key, value = line.split(':', 1) 30 | value = value.strip() 31 | data[key] = value 32 | if float_chars.issuperset(value): 33 | # try to cast to float array 34 | try: 35 | data[key] = np.array(list(map(float, value.split(' ')))) 36 | except ValueError: 37 | # casting error: data[key] already eq. value, so pass 38 | pass 39 | 40 | return data 41 | 42 | 43 | def sub2ind(matrixSize, rowSub, colSub): 44 | """Convert row, col matrix subscripts to linear indices 45 | """ 46 | m, n = matrixSize 47 | return rowSub * (n-1) + colSub - 1 48 | 49 | 50 | def generate_depth_map(calib_dir, velo_filename, cam=2, vel_depth=False): 51 | """Generate a depth map from velodyne data 52 | """ 53 | # load calibration files 54 | cam2cam = read_calib_file(os.path.join(calib_dir, 'calib_cam_to_cam.txt')) 55 | velo2cam = read_calib_file(os.path.join(calib_dir, 'calib_velo_to_cam.txt')) 56 | velo2cam = np.hstack((velo2cam['R'].reshape(3, 3), velo2cam['T'][..., np.newaxis])) 57 | velo2cam = np.vstack((velo2cam, np.array([0, 0, 0, 1.0]))) 58 | 59 | # get image shape 60 | im_shape = cam2cam["S_rect_02"][::-1].astype(np.int32) 61 | 62 | # compute projection matrix velodyne->image plane 63 | R_cam2rect = np.eye(4) 64 | R_cam2rect[:3, :3] = cam2cam['R_rect_00'].reshape(3, 3) 65 | P_rect = cam2cam['P_rect_0'+str(cam)].reshape(3, 4) 66 | P_velo2im = np.dot(np.dot(P_rect, R_cam2rect), velo2cam) 67 | 68 | # load velodyne points and remove all behind image plane (approximation) 69 | # each row of the velodyne data is forward, left, up, reflectance 70 | velo = load_velodyne_points(velo_filename) 71 | velo = velo[velo[:, 0] >= 0, :] 72 | 73 | # project the points to the camera 74 | velo_pts_im = np.dot(P_velo2im, velo.T).T 75 | velo_pts_im[:, :2] = velo_pts_im[:, :2] / velo_pts_im[:, 2][..., np.newaxis] 76 | 77 | if vel_depth: 78 | velo_pts_im[:, 2] = velo[:, 0] 79 | 80 | # check if in bounds 81 | # use minus 1 to get the exact same value as KITTI matlab code 82 | velo_pts_im[:, 0] = np.round(velo_pts_im[:, 0]) - 1 83 | velo_pts_im[:, 1] = np.round(velo_pts_im[:, 1]) - 1 84 | val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0) 85 | val_inds = val_inds & (velo_pts_im[:, 0] < im_shape[1]) & (velo_pts_im[:, 1] < im_shape[0]) 86 | velo_pts_im = velo_pts_im[val_inds, :] 87 | 88 | # project to image 89 | depth = np.zeros((im_shape[:2])) 90 | depth[velo_pts_im[:, 1].astype(np.int), velo_pts_im[:, 0].astype(np.int)] = velo_pts_im[:, 2] 91 | 92 | # find the duplicate points and choose the closest depth 93 | inds = sub2ind(depth.shape, velo_pts_im[:, 1], velo_pts_im[:, 0]) 94 | dupe_inds = [item for item, count in Counter(inds).items() if count > 1] 95 | for dd in dupe_inds: 96 | pts = np.where(inds == dd)[0] 97 | x_loc = int(velo_pts_im[pts[0], 0]) 98 | y_loc = int(velo_pts_im[pts[0], 1]) 99 | depth[y_loc, x_loc] = velo_pts_im[pts, 2].min() 100 | depth[depth < 0] = 0 101 | 102 | return depth 103 | 104 | 105 | def rotx(t): 106 | """Rotation about the x-axis.""" 107 | c = np.cos(t) 108 | s = np.sin(t) 109 | return np.array([[1, 0, 0], 110 | [0, c, -s], 111 | [0, s, c]]) 112 | 113 | 114 | def roty(t): 115 | """Rotation about the y-axis.""" 116 | c = np.cos(t) 117 | s = np.sin(t) 118 | return np.array([[c, 0, s], 119 | [0, 1, 0], 120 | [-s, 0, c]]) 121 | 122 | 123 | def rotz(t): 124 | """Rotation about the z-axis.""" 125 | c = np.cos(t) 126 | s = np.sin(t) 127 | return np.array([[c, -s, 0], 128 | [s, c, 0], 129 | [0, 0, 1]]) 130 | 131 | 132 | def pose_from_oxts_packet(metadata, scale): 133 | 134 | lat, lon, alt, roll, pitch, yaw = metadata 135 | """Helper method to compute a SE(3) pose matrix from an OXTS packet. 136 | Taken from https://github.com/utiasSTARS/pykitti 137 | """ 138 | 139 | er = 6378137. # earth radius (approx.) in meters 140 | # Use a Mercator projection to get the translation vector 141 | 142 | tx = scale * lon * np.pi * er / 180. 143 | ty = scale * er * \ 144 | np.log(np.tan((90. + lat) * np.pi / 360.)) 145 | tz = alt 146 | t = np.array([tx, ty, tz]).reshape(-1,1) 147 | 148 | # Use the Euler angles to get the rotation matrix 149 | Rx = rotx(roll) 150 | Ry = roty(pitch) 151 | Rz = rotz(yaw) 152 | R = Rz.dot(Ry.dot(Rx)) 153 | return transform_from_rot_trans(R, t) 154 | 155 | 156 | def transform_from_rot_trans(R, t): 157 | """Transforation matrix from rotation matrix and translation vector.""" 158 | R = R.reshape(3, 3) 159 | t = t.reshape(3, 1) 160 | return np.vstack((np.hstack([R, t]), [0, 0, 0, 1])) 161 | 162 | 163 | -------------------------------------------------------------------------------- /mono/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | from .build_loader import build_dataloader -------------------------------------------------------------------------------- /mono/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | from functools import partial 6 | 7 | from mmcv.runner import get_dist_info 8 | from mmcv.parallel import collate 9 | from torch.utils.data import DataLoader 10 | from .sampler import GroupSampler, DistributedGroupSampler, DistributedSampler 11 | 12 | # https://github.com/pytorch/pytorch/issues/973 13 | import resource 14 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 15 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 16 | 17 | 18 | def build_dataloader(dataset, 19 | imgs_per_gpu, 20 | workers_per_gpu, 21 | num_gpus=1, 22 | dist=True, 23 | **kwargs): 24 | shuffle = kwargs.get('shuffle', True) 25 | if dist: 26 | rank, world_size = get_dist_info() 27 | if shuffle: 28 | sampler = DistributedGroupSampler(dataset, 29 | imgs_per_gpu, 30 | world_size, 31 | rank) 32 | else: 33 | sampler = DistributedSampler(dataset, 34 | world_size, 35 | rank, 36 | shuffle=False) 37 | batch_size = imgs_per_gpu 38 | num_workers = workers_per_gpu 39 | else: 40 | sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None 41 | batch_size = num_gpus * imgs_per_gpu 42 | num_workers = num_gpus * workers_per_gpu 43 | 44 | data_loader = DataLoader(dataset, 45 | batch_size=batch_size, 46 | sampler=sampler, 47 | num_workers=num_workers, 48 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 49 | pin_memory=False, 50 | **kwargs, 51 | drop_last=True 52 | ) 53 | 54 | return data_loader 55 | -------------------------------------------------------------------------------- /mono/datasets/loader/sampler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | from __future__ import division 6 | 7 | import math 8 | import torch 9 | import numpy as np 10 | 11 | from torch.distributed import get_world_size, get_rank 12 | from torch.utils.data import Sampler 13 | from torch.utils.data import DistributedSampler as _DistributedSampler 14 | 15 | 16 | class DistributedSampler(_DistributedSampler): 17 | 18 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 19 | super().__init__(dataset, num_replicas=num_replicas, rank=rank) 20 | self.shuffle = shuffle 21 | 22 | def __iter__(self): 23 | # deterministically shuffle based on epoch 24 | if self.shuffle: 25 | g = torch.Generator() 26 | g.manual_seed(self.epoch) 27 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 28 | else: 29 | indices = torch.arange(len(self.dataset)).tolist() 30 | 31 | # add extra samples to make it evenly divisible 32 | indices += indices[:(self.total_size - len(indices))] 33 | assert len(indices) == self.total_size 34 | 35 | # subsample 36 | indices = indices[self.rank:self.total_size:self.num_replicas] 37 | assert len(indices) == self.num_samples 38 | 39 | return iter(indices) 40 | 41 | 42 | class GroupSampler(Sampler): 43 | 44 | def __init__(self, dataset, samples_per_gpu=1): 45 | assert hasattr(dataset, 'flag') 46 | self.dataset = dataset 47 | self.samples_per_gpu = samples_per_gpu 48 | self.flag = dataset.flag.astype(np.int64) 49 | self.group_sizes = np.bincount(self.flag) 50 | self.num_samples = 0 51 | for i, size in enumerate(self.group_sizes): 52 | self.num_samples += int(np.ceil( 53 | size / self.samples_per_gpu)) * self.samples_per_gpu 54 | 55 | def __iter__(self): 56 | indices = [] 57 | for i, size in enumerate(self.group_sizes): 58 | if size == 0: 59 | continue 60 | indice = np.where(self.flag == i)[0] 61 | assert len(indice) == size 62 | np.random.shuffle(indice) 63 | num_extra = int(np.ceil(size / self.samples_per_gpu) 64 | ) * self.samples_per_gpu - len(indice) 65 | indice = np.concatenate([indice, indice[:num_extra]]) 66 | indices.append(indice) 67 | indices = np.concatenate(indices) 68 | indices = [ 69 | indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu] 70 | for i in np.random.permutation( 71 | range(len(indices) // self.samples_per_gpu)) 72 | ] 73 | indices = np.concatenate(indices) 74 | indices = torch.from_numpy(indices).long() 75 | assert len(indices) == self.num_samples 76 | return iter(indices) 77 | 78 | def __len__(self): 79 | return self.num_samples 80 | 81 | 82 | class DistributedGroupSampler(Sampler): 83 | """Sampler that restricts data loading to a subset of the dataset. 84 | It is especially useful in conjunction with 85 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 86 | process can pass a DistributedSampler instance as a DataLoader sampler, 87 | and load a subset of the original dataset that is exclusive to it. 88 | .. note:: 89 | Dataset is assumed to be of constant size. 90 | Arguments: 91 | dataset: Dataset used for sampling. 92 | num_replicas (optional): Number of processes participating in 93 | distributed training. 94 | rank (optional): Rank of the current process within num_replicas. 95 | """ 96 | 97 | def __init__(self, 98 | dataset, 99 | samples_per_gpu=1, 100 | num_replicas=None, 101 | rank=None): 102 | if num_replicas is None: 103 | num_replicas = get_world_size() 104 | if rank is None: 105 | rank = get_rank() 106 | self.dataset = dataset 107 | self.samples_per_gpu = samples_per_gpu 108 | self.num_replicas = num_replicas 109 | self.rank = rank 110 | self.epoch = 0 111 | 112 | assert hasattr(self.dataset, 'flag') 113 | self.flag = self.dataset.flag 114 | self.group_sizes = np.bincount(self.flag) 115 | 116 | self.num_samples = 0 117 | for i, j in enumerate(self.group_sizes): 118 | self.num_samples += int( 119 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu / 120 | self.num_replicas)) * self.samples_per_gpu 121 | self.total_size = self.num_samples * self.num_replicas 122 | 123 | def __iter__(self): 124 | # deterministically shuffle based on epoch 125 | g = torch.Generator() 126 | g.manual_seed(self.epoch) 127 | 128 | indices = [] 129 | for i, size in enumerate(self.group_sizes): 130 | if size > 0: 131 | indice = np.where(self.flag == i)[0] 132 | assert len(indice) == size 133 | indice = indice[list(torch.randperm(int(size), 134 | generator=g))].tolist() 135 | extra = int( 136 | math.ceil( 137 | size * 1.0 / self.samples_per_gpu / self.num_replicas) 138 | ) * self.samples_per_gpu * self.num_replicas - len(indice) 139 | indice += indice[:extra] 140 | indices += indice 141 | 142 | assert len(indices) == self.total_size 143 | 144 | indices = [ 145 | indices[j] for i in list( 146 | torch.randperm(len(indices) // self.samples_per_gpu, 147 | generator=g)) 148 | for j in range(i * self.samples_per_gpu, (i + 1) * 149 | self.samples_per_gpu) 150 | ] 151 | 152 | # subsample 153 | offset = self.num_samples * self.rank 154 | indices = indices[offset:offset + self.num_samples] 155 | assert len(indices) == self.num_samples 156 | 157 | return iter(indices) 158 | 159 | def __len__(self): 160 | return self.num_samples 161 | 162 | def set_epoch(self, epoch): 163 | self.epoch = epoch 164 | -------------------------------------------------------------------------------- /mono/datasets/splits/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) -------------------------------------------------------------------------------- /mono/datasets/splits/benchmark/eigen_to_benchmark_ids.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/datasets/splits/benchmark/eigen_to_benchmark_ids.npy -------------------------------------------------------------------------------- /mono/datasets/splits/cityscape/gen_cityscape_split.py: -------------------------------------------------------------------------------- 1 | import zipfile 2 | import os 3 | 4 | def main(): 5 | file = '/ssd/Cityscapes/leftImg8bit_sequence_trainvaltest.zip' 6 | archive = zipfile.ZipFile(file, 'r') 7 | namelist = sorted(archive.namelist()) 8 | 9 | if os.path.exists(os.path.join('..', 'splits', 'cityscape')): 10 | print('path exists') 11 | else: 12 | os.makedirs(os.path.join('..', 'splits', 'cityscape')) 13 | with open(os.path.join('..', 'splits', 'cityscape', 'train.txt'), 'w') as trainfile: 14 | with open(os.path.join('..', 'splits', 'cityscape', 'val.txt'), 'w') as valfile: 15 | with open(os.path.join('..', 'splits', 'cityscape', 'test.txt'), 'w') as testfile: 16 | for i in range(len(namelist)): 17 | str = namelist[i] 18 | if 'png' in str: 19 | if 'train' in str: 20 | trainfile.write(str) 21 | trainfile.write('\n') 22 | elif 'val' in str: 23 | valfile.write(str) 24 | valfile.write('\n') 25 | elif 'test' in str: 26 | testfile.write(str) 27 | testfile.write('\n') 28 | 29 | 30 | 31 | if __name__ == '__main__': 32 | main() -------------------------------------------------------------------------------- /mono/datasets/splits/cityscape/val_files.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/datasets/splits/cityscape/val_files.txt -------------------------------------------------------------------------------- /mono/datasets/splits/exp/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) -------------------------------------------------------------------------------- /mono/datasets/splits/kitti_shot_sequence/gen_split.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | 3 | f = open('val_files.txt', 'w') 4 | for i in range(108): 5 | f.writelines(['2011_09_26/2011_09_26_drive_0001_sync ', str(i).zfill(10), ' l\n']) 6 | 7 | f.close() 8 | print('done') -------------------------------------------------------------------------------- /mono/datasets/splits/kitti_shot_sequence/val_files.txt: -------------------------------------------------------------------------------- 1 | 2011_09_26/2011_09_26_drive_0001_sync 0000000000 l 2 | 2011_09_26/2011_09_26_drive_0001_sync 0000000001 l 3 | 2011_09_26/2011_09_26_drive_0001_sync 0000000002 l 4 | 2011_09_26/2011_09_26_drive_0001_sync 0000000003 l 5 | 2011_09_26/2011_09_26_drive_0001_sync 0000000004 l 6 | 2011_09_26/2011_09_26_drive_0001_sync 0000000005 l 7 | 2011_09_26/2011_09_26_drive_0001_sync 0000000006 l 8 | 2011_09_26/2011_09_26_drive_0001_sync 0000000007 l 9 | 2011_09_26/2011_09_26_drive_0001_sync 0000000008 l 10 | 2011_09_26/2011_09_26_drive_0001_sync 0000000009 l 11 | 2011_09_26/2011_09_26_drive_0001_sync 0000000010 l 12 | 2011_09_26/2011_09_26_drive_0001_sync 0000000011 l 13 | 2011_09_26/2011_09_26_drive_0001_sync 0000000012 l 14 | 2011_09_26/2011_09_26_drive_0001_sync 0000000013 l 15 | 2011_09_26/2011_09_26_drive_0001_sync 0000000014 l 16 | 2011_09_26/2011_09_26_drive_0001_sync 0000000015 l 17 | 2011_09_26/2011_09_26_drive_0001_sync 0000000016 l 18 | 2011_09_26/2011_09_26_drive_0001_sync 0000000017 l 19 | 2011_09_26/2011_09_26_drive_0001_sync 0000000018 l 20 | 2011_09_26/2011_09_26_drive_0001_sync 0000000019 l 21 | 2011_09_26/2011_09_26_drive_0001_sync 0000000020 l 22 | 2011_09_26/2011_09_26_drive_0001_sync 0000000021 l 23 | 2011_09_26/2011_09_26_drive_0001_sync 0000000022 l 24 | 2011_09_26/2011_09_26_drive_0001_sync 0000000023 l 25 | 2011_09_26/2011_09_26_drive_0001_sync 0000000024 l 26 | 2011_09_26/2011_09_26_drive_0001_sync 0000000025 l 27 | 2011_09_26/2011_09_26_drive_0001_sync 0000000026 l 28 | 2011_09_26/2011_09_26_drive_0001_sync 0000000027 l 29 | 2011_09_26/2011_09_26_drive_0001_sync 0000000028 l 30 | 2011_09_26/2011_09_26_drive_0001_sync 0000000029 l 31 | 2011_09_26/2011_09_26_drive_0001_sync 0000000030 l 32 | 2011_09_26/2011_09_26_drive_0001_sync 0000000031 l 33 | 2011_09_26/2011_09_26_drive_0001_sync 0000000032 l 34 | 2011_09_26/2011_09_26_drive_0001_sync 0000000033 l 35 | 2011_09_26/2011_09_26_drive_0001_sync 0000000034 l 36 | 2011_09_26/2011_09_26_drive_0001_sync 0000000035 l 37 | 2011_09_26/2011_09_26_drive_0001_sync 0000000036 l 38 | 2011_09_26/2011_09_26_drive_0001_sync 0000000037 l 39 | 2011_09_26/2011_09_26_drive_0001_sync 0000000038 l 40 | 2011_09_26/2011_09_26_drive_0001_sync 0000000039 l 41 | 2011_09_26/2011_09_26_drive_0001_sync 0000000040 l 42 | 2011_09_26/2011_09_26_drive_0001_sync 0000000041 l 43 | 2011_09_26/2011_09_26_drive_0001_sync 0000000042 l 44 | 2011_09_26/2011_09_26_drive_0001_sync 0000000043 l 45 | 2011_09_26/2011_09_26_drive_0001_sync 0000000044 l 46 | 2011_09_26/2011_09_26_drive_0001_sync 0000000045 l 47 | 2011_09_26/2011_09_26_drive_0001_sync 0000000046 l 48 | 2011_09_26/2011_09_26_drive_0001_sync 0000000047 l 49 | 2011_09_26/2011_09_26_drive_0001_sync 0000000048 l 50 | 2011_09_26/2011_09_26_drive_0001_sync 0000000049 l 51 | 2011_09_26/2011_09_26_drive_0001_sync 0000000050 l 52 | 2011_09_26/2011_09_26_drive_0001_sync 0000000051 l 53 | 2011_09_26/2011_09_26_drive_0001_sync 0000000052 l 54 | 2011_09_26/2011_09_26_drive_0001_sync 0000000053 l 55 | 2011_09_26/2011_09_26_drive_0001_sync 0000000054 l 56 | 2011_09_26/2011_09_26_drive_0001_sync 0000000055 l 57 | 2011_09_26/2011_09_26_drive_0001_sync 0000000056 l 58 | 2011_09_26/2011_09_26_drive_0001_sync 0000000057 l 59 | 2011_09_26/2011_09_26_drive_0001_sync 0000000058 l 60 | 2011_09_26/2011_09_26_drive_0001_sync 0000000059 l 61 | 2011_09_26/2011_09_26_drive_0001_sync 0000000060 l 62 | 2011_09_26/2011_09_26_drive_0001_sync 0000000061 l 63 | 2011_09_26/2011_09_26_drive_0001_sync 0000000062 l 64 | 2011_09_26/2011_09_26_drive_0001_sync 0000000063 l 65 | 2011_09_26/2011_09_26_drive_0001_sync 0000000064 l 66 | 2011_09_26/2011_09_26_drive_0001_sync 0000000065 l 67 | 2011_09_26/2011_09_26_drive_0001_sync 0000000066 l 68 | 2011_09_26/2011_09_26_drive_0001_sync 0000000067 l 69 | 2011_09_26/2011_09_26_drive_0001_sync 0000000068 l 70 | 2011_09_26/2011_09_26_drive_0001_sync 0000000069 l 71 | 2011_09_26/2011_09_26_drive_0001_sync 0000000070 l 72 | 2011_09_26/2011_09_26_drive_0001_sync 0000000071 l 73 | 2011_09_26/2011_09_26_drive_0001_sync 0000000072 l 74 | 2011_09_26/2011_09_26_drive_0001_sync 0000000073 l 75 | 2011_09_26/2011_09_26_drive_0001_sync 0000000074 l 76 | 2011_09_26/2011_09_26_drive_0001_sync 0000000075 l 77 | 2011_09_26/2011_09_26_drive_0001_sync 0000000076 l 78 | 2011_09_26/2011_09_26_drive_0001_sync 0000000077 l 79 | 2011_09_26/2011_09_26_drive_0001_sync 0000000078 l 80 | 2011_09_26/2011_09_26_drive_0001_sync 0000000079 l 81 | 2011_09_26/2011_09_26_drive_0001_sync 0000000080 l 82 | 2011_09_26/2011_09_26_drive_0001_sync 0000000081 l 83 | 2011_09_26/2011_09_26_drive_0001_sync 0000000082 l 84 | 2011_09_26/2011_09_26_drive_0001_sync 0000000083 l 85 | 2011_09_26/2011_09_26_drive_0001_sync 0000000084 l 86 | 2011_09_26/2011_09_26_drive_0001_sync 0000000085 l 87 | 2011_09_26/2011_09_26_drive_0001_sync 0000000086 l 88 | 2011_09_26/2011_09_26_drive_0001_sync 0000000087 l 89 | 2011_09_26/2011_09_26_drive_0001_sync 0000000088 l 90 | 2011_09_26/2011_09_26_drive_0001_sync 0000000089 l 91 | 2011_09_26/2011_09_26_drive_0001_sync 0000000090 l 92 | 2011_09_26/2011_09_26_drive_0001_sync 0000000091 l 93 | 2011_09_26/2011_09_26_drive_0001_sync 0000000092 l 94 | 2011_09_26/2011_09_26_drive_0001_sync 0000000093 l 95 | 2011_09_26/2011_09_26_drive_0001_sync 0000000094 l 96 | 2011_09_26/2011_09_26_drive_0001_sync 0000000095 l 97 | 2011_09_26/2011_09_26_drive_0001_sync 0000000096 l 98 | 2011_09_26/2011_09_26_drive_0001_sync 0000000097 l 99 | 2011_09_26/2011_09_26_drive_0001_sync 0000000098 l 100 | 2011_09_26/2011_09_26_drive_0001_sync 0000000099 l 101 | 2011_09_26/2011_09_26_drive_0001_sync 0000000100 l 102 | 2011_09_26/2011_09_26_drive_0001_sync 0000000101 l 103 | 2011_09_26/2011_09_26_drive_0001_sync 0000000102 l 104 | 2011_09_26/2011_09_26_drive_0001_sync 0000000103 l 105 | 2011_09_26/2011_09_26_drive_0001_sync 0000000104 l 106 | 2011_09_26/2011_09_26_drive_0001_sync 0000000105 l 107 | 2011_09_26/2011_09_26_drive_0001_sync 0000000106 l 108 | 2011_09_26/2011_09_26_drive_0001_sync 0000000107 l 109 | -------------------------------------------------------------------------------- /mono/datasets/splits/short/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) -------------------------------------------------------------------------------- /mono/datasets/splits/short/train_files.txt: -------------------------------------------------------------------------------- 1 | 2011_09_26/2011_09_26_drive_0022_sync 473 r 2 | 2011_09_29/2011_09_29_drive_0026_sync 1 l 3 | 2011_09_26/2011_09_26_drive_0087_sync 185 r 4 | 2011_09_30/2011_09_30_drive_0028_sync 497 l 5 | 2011_10_03/2011_10_03_drive_0034_sync 215 l 6 | 2011_10_03/2011_10_03_drive_0042_sync 514 r 7 | 2011_09_30/2011_09_30_drive_0028_sync 2975 l 8 | 2011_10_03/2011_10_03_drive_0034_sync 1214 r 9 | 2011_09_26/2011_09_26_drive_0061_sync 601 l 10 | 2011_09_30/2011_09_30_drive_0028_sync 1924 l 11 | 2011_09_26/2011_09_26_drive_0091_sync 270 r 12 | 2011_09_30/2011_09_30_drive_0033_sync 979 r 13 | 2011_09_29/2011_09_29_drive_0004_sync 288 l 14 | 2011_09_30/2011_09_30_drive_0033_sync 1029 r 15 | 2011_09_30/2011_09_30_drive_0028_sync 5004 r 16 | 2011_09_26/2011_09_26_drive_0051_sync 138 r 17 | 2011_10_03/2011_10_03_drive_0034_sync 3247 l 18 | 2011_09_26/2011_09_26_drive_0014_sync 285 l 19 | 2011_09_30/2011_09_30_drive_0028_sync 573 l 20 | 2011_09_26/2011_09_26_drive_0051_sync 425 r 21 | 2011_09_30/2011_09_30_drive_0028_sync 2380 l 22 | 2011_09_30/2011_09_30_drive_0028_sync 1323 l 23 | 2011_09_30/2011_09_30_drive_0028_sync 733 r 24 | 2011_09_26/2011_09_26_drive_0087_sync 331 r 25 | 2011_09_30/2011_09_30_drive_0028_sync 536 l 26 | 2011_09_30/2011_09_30_drive_0028_sync 2935 r 27 | 2011_10_03/2011_10_03_drive_0034_sync 562 l 28 | 2011_09_26/2011_09_26_drive_0032_sync 226 r 29 | 2011_09_30/2011_09_30_drive_0028_sync 5148 r 30 | 2011_10_03/2011_10_03_drive_0034_sync 1355 r 31 | 2011_10_03/2011_10_03_drive_0034_sync 2695 l 32 | 2011_09_30/2011_09_30_drive_0028_sync 3546 r 33 | 2011_10_03/2011_10_03_drive_0034_sync 4023 r 34 | 2011_09_26/2011_09_26_drive_0051_sync 301 r 35 | 2011_09_30/2011_09_30_drive_0028_sync 402 r 36 | 2011_09_30/2011_09_30_drive_0033_sync 294 r 37 | 2011_09_30/2011_09_30_drive_0033_sync 1106 r 38 | 2011_09_30/2011_09_30_drive_0028_sync 4906 r 39 | 2011_10_03/2011_10_03_drive_0034_sync 1504 l 40 | 2011_10_03/2011_10_03_drive_0042_sync 478 r 41 | 2011_09_30/2011_09_30_drive_0033_sync 980 l 42 | 2011_09_30/2011_09_30_drive_0028_sync 684 l 43 | 2011_09_30/2011_09_30_drive_0028_sync 3418 r 44 | 2011_09_26/2011_09_26_drive_0028_sync 68 r 45 | 2011_09_26/2011_09_26_drive_0039_sync 245 l 46 | 2011_09_26/2011_09_26_drive_0087_sync 363 l 47 | 2011_10_03/2011_10_03_drive_0034_sync 655 r 48 | 2011_10_03/2011_10_03_drive_0034_sync 3379 l 49 | 2011_10_03/2011_10_03_drive_0034_sync 684 r 50 | 2011_09_26/2011_09_26_drive_0018_sync 100 l 51 | 2011_09_26/2011_09_26_drive_0104_sync 163 l 52 | 2011_10_03/2011_10_03_drive_0034_sync 2587 l 53 | 2011_09_30/2011_09_30_drive_0028_sync 663 l 54 | 2011_09_30/2011_09_30_drive_0033_sync 273 r 55 | 2011_10_03/2011_10_03_drive_0042_sync 768 r 56 | 2011_09_30/2011_09_30_drive_0033_sync 1543 l 57 | 2011_10_03/2011_10_03_drive_0034_sync 4614 r 58 | 2011_10_03/2011_10_03_drive_0034_sync 475 l 59 | 2011_09_30/2011_09_30_drive_0028_sync 3297 l 60 | 2011_09_26/2011_09_26_drive_0039_sync 165 l 61 | 2011_09_30/2011_09_30_drive_0028_sync 1031 l 62 | 2011_10_03/2011_10_03_drive_0034_sync 2656 l 63 | 2011_10_03/2011_10_03_drive_0042_sync 66 r 64 | 2011_10_03/2011_10_03_drive_0042_sync 297 r 65 | 2011_09_30/2011_09_30_drive_0028_sync 2604 l 66 | 2011_09_26/2011_09_26_drive_0104_sync 97 r 67 | 2011_10_03/2011_10_03_drive_0034_sync 3787 l 68 | 2011_09_30/2011_09_30_drive_0028_sync 2946 l 69 | 2011_10_03/2011_10_03_drive_0034_sync 1184 l 70 | 2011_10_03/2011_10_03_drive_0042_sync 458 l 71 | 2011_09_30/2011_09_30_drive_0028_sync 4166 r 72 | 2011_09_30/2011_09_30_drive_0028_sync 4922 l 73 | 2011_09_30/2011_09_30_drive_0033_sync 1323 l 74 | 2011_10_03/2011_10_03_drive_0034_sync 1819 l 75 | 2011_10_03/2011_10_03_drive_0042_sync 569 l 76 | 2011_09_26/2011_09_26_drive_0070_sync 205 l 77 | 2011_10_03/2011_10_03_drive_0042_sync 249 l 78 | 2011_09_30/2011_09_30_drive_0034_sync 371 r 79 | 2011_09_26/2011_09_26_drive_0039_sync 104 l 80 | 2011_09_30/2011_09_30_drive_0028_sync 382 r 81 | 2011_09_26/2011_09_26_drive_0087_sync 295 l 82 | 2011_09_30/2011_09_30_drive_0028_sync 3023 l 83 | 2011_10_03/2011_10_03_drive_0042_sync 591 l 84 | 2011_10_03/2011_10_03_drive_0034_sync 1472 l 85 | 2011_09_26/2011_09_26_drive_0001_sync 77 r 86 | 2011_10_03/2011_10_03_drive_0034_sync 3269 l 87 | 2011_09_30/2011_09_30_drive_0020_sync 185 r 88 | 2011_10_03/2011_10_03_drive_0034_sync 2437 r 89 | 2011_10_03/2011_10_03_drive_0034_sync 4050 l 90 | 2011_09_26/2011_09_26_drive_0039_sync 147 r 91 | 2011_09_30/2011_09_30_drive_0028_sync 4741 l 92 | 2011_09_30/2011_09_30_drive_0028_sync 3557 r 93 | 2011_10_03/2011_10_03_drive_0034_sync 394 l 94 | 2011_09_30/2011_09_30_drive_0028_sync 158 r 95 | 2011_10_03/2011_10_03_drive_0034_sync 1804 l 96 | 2011_09_29/2011_09_29_drive_0004_sync 62 r 97 | 2011_09_30/2011_09_30_drive_0028_sync 220 l 98 | 2011_10_03/2011_10_03_drive_0034_sync 1420 r 99 | 2011_10_03/2011_10_03_drive_0034_sync 2310 l 100 | 2011_09_30/2011_09_30_drive_0034_sync 839 r -------------------------------------------------------------------------------- /mono/datasets/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | from __future__ import absolute_import, division, print_function 6 | import torch 7 | import numpy as np 8 | import cv2 9 | 10 | 11 | def readlines(filename): 12 | """Read all the lines in a text file and return as a list 13 | """ 14 | with open(filename, 'r') as f: 15 | lines = f.read().splitlines() 16 | return lines 17 | 18 | 19 | def normalize_image(x): 20 | """Rescale image pixels to span range [0, 1] 21 | """ 22 | ma = float(x.max().cpu().data) 23 | mi = float(x.min().cpu().data) 24 | d = ma - mi if ma != mi else 1e5 25 | return (x - mi) / d 26 | 27 | 28 | def sec_to_hm(t): 29 | """Convert time in seconds to time in hours, minutes and seconds 30 | e.g. 10239 -> (2, 50, 39) 31 | """ 32 | t = int(t) 33 | s = t % 60 34 | t //= 60 35 | m = t % 60 36 | t //= 60 37 | return t, m, s 38 | 39 | 40 | def sec_to_hm_str(t): 41 | """Convert time in seconds to a nice string 42 | e.g. 10239 -> '02h50m39s' 43 | """ 44 | h, m, s = sec_to_hm(t) 45 | return "{:02d}h{:02d}m{:02d}s".format(h, m, s) 46 | 47 | 48 | def transformation_from_parameters(axisangle, translation, invert=False): 49 | R = rot_from_axisangle(axisangle) 50 | t = translation.clone() 51 | if invert: 52 | R = R.transpose(1, 2) 53 | t *= -1 54 | T = get_translation_matrix(t) 55 | if invert: 56 | M = torch.matmul(R, T) 57 | else: 58 | M = torch.matmul(T, R) 59 | return M 60 | 61 | 62 | def get_translation_matrix(translation_vector): 63 | T = torch.zeros(translation_vector.shape[0], 4, 4).cuda() 64 | t = translation_vector.contiguous().view(-1, 3, 1) 65 | T[:, 0, 0] = 1 66 | T[:, 1, 1] = 1 67 | T[:, 2, 2] = 1 68 | T[:, 3, 3] = 1 69 | T[:, :3, 3, None] = t 70 | return T 71 | 72 | 73 | def rot_from_axisangle(vec): 74 | angle = torch.norm(vec, 2, 2, True) 75 | axis = vec / (angle + 1e-7) 76 | ca = torch.cos(angle) 77 | sa = torch.sin(angle) 78 | C = 1 - ca 79 | x = axis[..., 0].unsqueeze(1) 80 | y = axis[..., 1].unsqueeze(1) 81 | z = axis[..., 2].unsqueeze(1) 82 | xs = x * sa 83 | ys = y * sa 84 | zs = z * sa 85 | xC = x * C 86 | yC = y * C 87 | zC = z * C 88 | xyC = x * yC 89 | yzC = y * zC 90 | zxC = z * xC 91 | rot = torch.zeros((vec.shape[0], 4, 4)).cuda() 92 | rot[:, 0, 0] = torch.squeeze(x * xC + ca) 93 | rot[:, 0, 1] = torch.squeeze(xyC - zs) 94 | rot[:, 0, 2] = torch.squeeze(zxC + ys) 95 | rot[:, 1, 0] = torch.squeeze(xyC + zs) 96 | rot[:, 1, 1] = torch.squeeze(y * yC + ca) 97 | rot[:, 1, 2] = torch.squeeze(yzC - xs) 98 | rot[:, 2, 0] = torch.squeeze(zxC - ys) 99 | rot[:, 2, 1] = torch.squeeze(yzC + xs) 100 | rot[:, 2, 2] = torch.squeeze(z * zC + ca) 101 | rot[:, 3, 3] = 1 102 | return rot 103 | 104 | 105 | def dump_xyz(source_to_target_transformations): 106 | xyzs = [] 107 | cam_to_world = np.eye(4) 108 | xyzs.append(cam_to_world[:3, 3]) 109 | for source_to_target_transformation in source_to_target_transformations: 110 | cam_to_world = np.dot(cam_to_world, source_to_target_transformation) 111 | xyzs.append(cam_to_world[:3, 3]) 112 | return xyzs 113 | 114 | 115 | def compute_ate(gtruth_xyz, pred_xyz_o): 116 | offset = gtruth_xyz[0] - pred_xyz_o[0] 117 | pred_xyz = pred_xyz_o + offset[None, :] 118 | 119 | scale = np.sum(gtruth_xyz * pred_xyz) / np.sum(pred_xyz ** 2) 120 | alignment_error = pred_xyz * scale - gtruth_xyz 121 | rmse = np.sqrt(np.sum(alignment_error ** 2)) / gtruth_xyz.shape[0] 122 | return rmse 123 | 124 | 125 | def extract_match(queryImage, trainImage, num): 126 | orb = cv2.ORB_create() 127 | kp_query, des_query = orb.detectAndCompute(queryImage, None) 128 | kp_train, des_train = orb.detectAndCompute(trainImage, None) 129 | bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) 130 | matches = bf.match(des_query, des_train) 131 | matches = sorted(matches, key=lambda x: x.distance) 132 | query_position = [] 133 | train_position = [] 134 | for i in range(num): 135 | match = matches[i] 136 | queryIdx = match.queryIdx 137 | trainIdx = match.trainIdx 138 | query_position.append(kp_query[queryIdx].pt) 139 | train_position.append(kp_train[trainIdx].pt) 140 | return query_position, train_position 141 | 142 | 143 | def compute_errors(gt, pred): 144 | """Computation of error metrics between predicted and ground truth depths 145 | """ 146 | thresh = np.maximum((gt / pred), (pred / gt)) 147 | a1 = (thresh < 1.25 ).mean() 148 | a2 = (thresh < 1.25 ** 2).mean() 149 | a3 = (thresh < 1.25 ** 3).mean() 150 | 151 | rmse = (gt - pred) ** 2 152 | rmse = np.sqrt(rmse.mean()) 153 | 154 | rmse_log = (np.log(gt) - np.log(pred)) ** 2 155 | rmse_log = np.sqrt(rmse_log.mean()) 156 | 157 | abs_rel = np.mean(np.abs(gt - pred) / gt) 158 | 159 | sq_rel = np.mean(((gt - pred) ** 2) / gt) 160 | 161 | return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3 162 | 163 | 164 | def batch_post_process_disparity(l_disp, r_disp): 165 | """Apply the disparity post-processing method as introduced in Monodepthv1 166 | """ 167 | _, h, w = l_disp.shape 168 | m_disp = 0.5 * (l_disp + r_disp) 169 | l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h)) 170 | l_mask = (1.0 - np.clip(20 * (l - 0.05), 0, 1))[None, ...] 171 | r_mask = l_mask[:, :, ::-1] 172 | return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp -------------------------------------------------------------------------------- /mono/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .mono_baseline.net import Baseline 2 | from .mono_autoencoder.net import autoencoder 3 | from .mono_fm.net import mono_fm 4 | from .mono_fm_joint.net import mono_fm_joint -------------------------------------------------------------------------------- /mono/model/mono_autoencoder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/model/mono_autoencoder/__init__.py -------------------------------------------------------------------------------- /mono/model/mono_autoencoder/decoder.py: -------------------------------------------------------------------------------- 1 | # import torch 2 | # import torch.nn as nn 3 | # import torch.nn.functional as F 4 | # from .layers import Conv1x1, Conv3x3, CRPBlock, upsample 5 | # 6 | # 7 | # class Decoder(nn.Module): 8 | # def __init__(self, num_ch_enc): 9 | # super(Decoder, self).__init__() 10 | # 11 | # bottleneck = 256 12 | # stage = 4 13 | # self.do = nn.Dropout(p=0.5) 14 | # 15 | # self.reduce4 = Conv1x1(num_ch_enc[4], 512, bias=False) 16 | # self.reduce3 = Conv1x1(num_ch_enc[3], bottleneck, bias=False) 17 | # self.reduce2 = Conv1x1(num_ch_enc[2], bottleneck, bias=False) 18 | # self.reduce1 = Conv1x1(num_ch_enc[1], bottleneck, bias=False) 19 | # 20 | # self.iconv4 = Conv3x3(512, bottleneck) 21 | # self.iconv3 = Conv3x3(bottleneck, bottleneck) 22 | # self.iconv2 = Conv3x3(bottleneck, bottleneck) 23 | # self.iconv1 = Conv3x3(bottleneck, bottleneck) 24 | # 25 | # self.crp4 = self._make_crp(bottleneck, bottleneck, stage) 26 | # self.crp3 = self._make_crp(bottleneck, bottleneck, stage) 27 | # self.crp2 = self._make_crp(bottleneck, bottleneck, stage) 28 | # self.crp1 = self._make_crp(bottleneck, bottleneck, stage) 29 | # 30 | # self.merge4 = Conv3x3(bottleneck, bottleneck) 31 | # self.merge3 = Conv3x3(bottleneck, bottleneck) 32 | # self.merge2 = Conv3x3(bottleneck, bottleneck) 33 | # self.merge1 = Conv3x3(bottleneck, bottleneck) 34 | # 35 | # # disp 36 | # self.disp4 = nn.Sequential(Conv3x3(bottleneck, 3), nn.Sigmoid()) 37 | # self.disp3 = nn.Sequential(Conv3x3(bottleneck, 3), nn.Sigmoid()) 38 | # self.disp2 = nn.Sequential(Conv3x3(bottleneck, 3), nn.Sigmoid()) 39 | # self.disp1 = nn.Sequential(Conv3x3(bottleneck, 3), nn.Sigmoid()) 40 | # 41 | # def _make_crp(self, in_planes, out_planes, stages): 42 | # layers = [CRPBlock(in_planes, out_planes,stages)] 43 | # return nn.Sequential(*layers) 44 | # 45 | # def forward(self, input_features, frame_id): 46 | # self.outputs = {} 47 | # l0, l1, l2, l3, l4 = input_features 48 | # 49 | # x4 = self.reduce4(l4) 50 | # x4 = self.iconv4(x4) 51 | # x4 = F.leaky_relu(x4) 52 | # x4 = self.crp4(x4) 53 | # x4 = self.merge4(x4) 54 | # x4 = F.leaky_relu(x4) 55 | # x4 = upsample(x4) 56 | # disp4 = self.disp4(x4) 57 | # 58 | # 59 | # x3 = self.reduce3(x4) 60 | # x3 = self.iconv3(x3) 61 | # x3 = F.leaky_relu(x3) 62 | # x3 = self.crp3(x3) 63 | # x3 = self.merge3(x3) 64 | # x3 = F.leaky_relu(x3) 65 | # x3 = upsample(x3) 66 | # disp3 = self.disp3(x3) 67 | # 68 | # 69 | # x2 = self.reduce2(l2) 70 | # x2 = torch.cat((x2), 1) 71 | # x2 = self.iconv2(x2) 72 | # x2 = F.leaky_relu(x2) 73 | # x2 = self.crp2(x2) 74 | # x2 = self.merge2(x2) 75 | # x2 = F.leaky_relu(x2) 76 | # x2 = upsample(x2) 77 | # disp2 = self.disp2(x2) 78 | # 79 | # x1 = self.reduce1(l1) 80 | # x1 = torch.cat((x1), 1) 81 | # x1 = self.iconv1(x1) 82 | # x1 = F.leaky_relu(x1) 83 | # x1 = self.crp1(x1) 84 | # x1 = self.merge1(x1) 85 | # x1 = F.leaky_relu(x1) 86 | # x1 = upsample(x1) 87 | # disp1 = self.disp1(x1) 88 | # 89 | # self.outputs[("disp", frame_id, 3)] = disp4 90 | # self.outputs[("disp", frame_id, 2)] = disp3 91 | # self.outputs[("disp", frame_id, 1)] = disp2 92 | # self.outputs[("disp", frame_id, 0)] = disp1 93 | # 94 | # return self.outputs 95 | 96 | 97 | from __future__ import absolute_import, division, print_function 98 | import torch.nn as nn 99 | from .layers import ConvBlock, Conv3x3, upsample 100 | 101 | 102 | class Decoder(nn.Module): 103 | def __init__(self, num_ch_enc, num_output_channels=3): 104 | super(Decoder, self).__init__() 105 | 106 | num_ch_dec = [16, 32, 64, 128, 256] 107 | 108 | # upconv 109 | self.upconv5 = ConvBlock(num_ch_enc[4], num_ch_dec[4]) 110 | self.upconv4 = ConvBlock(num_ch_dec[4], num_ch_dec[3]) 111 | self.upconv3 = ConvBlock(num_ch_dec[3], num_ch_dec[2]) 112 | self.upconv2 = ConvBlock(num_ch_dec[2], num_ch_dec[1]) 113 | self.upconv1 = ConvBlock(num_ch_dec[1], num_ch_dec[0]) 114 | 115 | # iconv 116 | self.iconv5 = ConvBlock(num_ch_dec[4], num_ch_dec[4]) 117 | self.iconv4 = ConvBlock(num_ch_dec[3], num_ch_dec[3]) 118 | self.iconv3 = ConvBlock(num_ch_dec[2], num_ch_dec[2]) 119 | self.iconv2 = ConvBlock(num_ch_dec[1], num_ch_dec[1]) 120 | self.iconv1 = ConvBlock(num_ch_dec[0], num_ch_dec[0]) 121 | 122 | # disp 123 | self.disp4 = Conv3x3(num_ch_dec[3], num_output_channels) 124 | self.disp3 = Conv3x3(num_ch_dec[2], num_output_channels) 125 | self.disp2 = Conv3x3(num_ch_dec[1], num_output_channels) 126 | self.disp1 = Conv3x3(num_ch_dec[0], num_output_channels) 127 | 128 | self.sigmoid = nn.Sigmoid() 129 | 130 | 131 | def forward(self, input_features, frame_id=0): 132 | self.outputs = {} 133 | _, _, _, _, econv5 = input_features 134 | # (64,64,128,256,512)*4 135 | 136 | upconv5 = upsample(self.upconv5(econv5)) 137 | iconv5 = self.iconv5(upconv5) 138 | 139 | upconv4 = upsample(self.upconv4(iconv5)) 140 | iconv4 = self.iconv4(upconv4) 141 | 142 | upconv3 = upsample(self.upconv3(iconv4)) 143 | iconv3 = self.iconv3(upconv3) 144 | 145 | upconv2 = upsample(self.upconv2(iconv3)) 146 | iconv2 = self.iconv2(upconv2) 147 | 148 | upconv1 = upsample(self.upconv1(iconv2)) 149 | iconv1 = self.iconv1(upconv1) 150 | 151 | self.outputs[("disp", frame_id, 3)] = self.sigmoid(self.disp4(iconv4)) 152 | self.outputs[("disp", frame_id, 2)] = self.sigmoid(self.disp3(iconv3)) 153 | self.outputs[("disp", frame_id, 1)] = self.sigmoid(self.disp2(iconv2)) 154 | self.outputs[("disp", frame_id, 0)] = self.sigmoid(self.disp1(iconv1)) 155 | return self.outputs -------------------------------------------------------------------------------- /mono/model/mono_autoencoder/encoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from .resnet import resnet18, resnet34, resnet50, resnet101 6 | 7 | 8 | class Encoder(nn.Module): 9 | def __init__(self, num_layers, pretrained_path=None): 10 | super(Encoder, self).__init__() 11 | 12 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 13 | 14 | resnets = {18: resnet18, 15 | 34: resnet34, 16 | 50: resnet50, 17 | 101: resnet101,} 18 | 19 | if num_layers not in resnets: 20 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 21 | 22 | 23 | self.encoder = resnets[num_layers]() 24 | if pretrained_path is not None: 25 | checkpoint = torch.load(pretrained_path) 26 | self.encoder.load_state_dict(checkpoint) 27 | 28 | if num_layers > 34: 29 | self.num_ch_enc[1:] *= 4 30 | 31 | # for name, param in self.encoder.named_parameters(): 32 | # if 'bn' in name: 33 | # param.requires_grad = False 34 | 35 | def forward(self, input_image): 36 | self.features = [] 37 | self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(input_image)))) 38 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 39 | self.features.append(self.encoder.layer2(self.features[-1])) 40 | self.features.append(self.encoder.layer3(self.features[-1])) 41 | self.features.append(self.encoder.layer4(self.features[-1])) 42 | 43 | return self.features 44 | -------------------------------------------------------------------------------- /mono/model/mono_autoencoder/layers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class SSIM(nn.Module): 9 | def __init__(self): 10 | super(SSIM, self).__init__() 11 | self.mu_x_pool = nn.AvgPool2d(3, 1) 12 | self.mu_y_pool = nn.AvgPool2d(3, 1) 13 | self.sig_x_pool = nn.AvgPool2d(3, 1) 14 | self.sig_y_pool = nn.AvgPool2d(3, 1) 15 | self.sig_xy_pool = nn.AvgPool2d(3, 1) 16 | self.refl = nn.ReflectionPad2d(1) 17 | self.C1 = 0.01 ** 2 18 | self.C2 = 0.03 ** 2 19 | 20 | def forward(self, x, y): 21 | x = self.refl(x) 22 | y = self.refl(y) 23 | mu_x = self.mu_x_pool(x) 24 | mu_y = self.mu_y_pool(y) 25 | sigma_x = self.sig_x_pool(x ** 2) - mu_x ** 2 26 | sigma_y = self.sig_y_pool(y ** 2) - mu_y ** 2 27 | sigma_xy = self.sig_xy_pool(x * y) - mu_x * mu_y 28 | SSIM_n = (2 * mu_x * mu_y + self.C1) * (2 * sigma_xy + self.C2) 29 | SSIM_d = (mu_x ** 2 + mu_y ** 2 + self.C1) * (sigma_x + sigma_y + self.C2) 30 | return torch.clamp((1 - SSIM_n / SSIM_d) / 2, 0, 1) 31 | 32 | 33 | def upsample(x): 34 | return F.interpolate(x, scale_factor=2, mode="nearest") 35 | 36 | 37 | class ConvBlock(nn.Module): 38 | def __init__(self, in_channels, out_channels): 39 | super(ConvBlock, self).__init__() 40 | self.conv = Conv3x3(in_channels, out_channels) 41 | self.nonlin = nn.ELU(inplace=True) 42 | def forward(self, x): 43 | out = self.conv(x) 44 | out = self.nonlin(out) 45 | return out 46 | 47 | 48 | class Conv1x1(nn.Module): 49 | def __init__(self, in_channels, out_channels, bias=False): 50 | super(Conv1x1, self).__init__() 51 | self.conv = nn.Conv2d(int(in_channels), int(out_channels), kernel_size=1, stride=1, bias=bias) 52 | def forward(self, x): 53 | out = self.conv(x) 54 | return out 55 | 56 | 57 | class Conv3x3(nn.Module): 58 | def __init__(self, in_channels, out_channels, use_refl=True): 59 | super(Conv3x3, self).__init__() 60 | if use_refl: 61 | self.pad = nn.ReflectionPad2d(1) 62 | else: 63 | self.pad = nn.ZeroPad2d(1) 64 | self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3) 65 | def forward(self, x): 66 | out = self.pad(x) 67 | out = self.conv(out) 68 | return out 69 | 70 | 71 | class Conv5x5(nn.Module): 72 | def __init__(self, in_channels, out_channels, use_refl=True): 73 | super(Conv5x5, self).__init__() 74 | if use_refl: 75 | self.pad = nn.ReflectionPad2d(2) 76 | else: 77 | self.pad = nn.ZeroPad2d(2) 78 | self.conv = nn.Conv2d(int(in_channels), int(out_channels), 5) 79 | def forward(self, x): 80 | out = self.pad(x) 81 | out = self.conv(out) 82 | return out 83 | 84 | 85 | class CRPBlock(nn.Module): 86 | def __init__(self, in_planes, out_planes, n_stages): 87 | super(CRPBlock, self).__init__() 88 | for i in range(n_stages): 89 | setattr(self, '{}_{}'.format(i + 1, 'pointwise'), Conv1x1(in_planes if (i == 0) else out_planes, out_planes, False)) 90 | self.stride = 1 91 | self.n_stages = n_stages 92 | self.maxpool = nn.MaxPool2d(kernel_size=5, stride=1, padding=2) 93 | 94 | def forward(self, x): 95 | top = x 96 | for i in range(self.n_stages): 97 | top = self.maxpool(top) 98 | top = getattr(self, '{}_{}'.format(i + 1, 'pointwise'))(top) 99 | x = top + x 100 | return x 101 | 102 | 103 | def compute_depth_errors(gt, pred): 104 | thresh = torch.max((gt / pred), (pred / gt)) 105 | a1 = (thresh < 1.25 ).float().mean() 106 | a2 = (thresh < 1.25 ** 2).float().mean() 107 | a3 = (thresh < 1.25 ** 3).float().mean() 108 | rmse = (gt - pred) ** 2 109 | rmse = torch.sqrt(rmse.mean()) 110 | rmse_log = (torch.log(gt) - torch.log(pred)) ** 2 111 | rmse_log = torch.sqrt(rmse_log.mean()) 112 | abs_rel = torch.mean(torch.abs(gt - pred) / gt) 113 | sq_rel = torch.mean((gt - pred) ** 2 / gt) 114 | return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3 -------------------------------------------------------------------------------- /mono/model/mono_autoencoder/net.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import torch 3 | import torch.nn.functional as F 4 | import torch.nn as nn 5 | 6 | import os 7 | import matplotlib.pyplot as plt 8 | 9 | from .layers import SSIM 10 | from .encoder import Encoder 11 | from .decoder import Decoder 12 | from ..registry import MONO 13 | 14 | 15 | @MONO.register_module 16 | class autoencoder(nn.Module): 17 | def __init__(self, options): 18 | super(autoencoder, self).__init__() 19 | self.opt = options 20 | 21 | self.Encoder = Encoder(self.opt.depth_num_layers, self.opt.depth_pretrained_path) 22 | self.Decoder = Decoder(self.Encoder.num_ch_enc) 23 | 24 | self.ssim = SSIM() 25 | self.count = 0 26 | 27 | def forward(self, inputs): 28 | features = self.Encoder(inputs[("color", 0, 0)]) 29 | outputs = self.Decoder(features, 0) 30 | if self.training: 31 | loss_dict = self.compute_losses(inputs, outputs, features) 32 | return outputs, loss_dict 33 | return outputs 34 | 35 | def robust_l1(self, pred, target): 36 | eps = 1e-3 37 | return torch.sqrt(torch.pow(target - pred, 2) + eps ** 2) 38 | 39 | def compute_reprojection_loss(self, pred, target): 40 | photometric_loss = self.robust_l1(pred, target).mean(1, True) 41 | ssim_loss = self.ssim(pred, target).mean(1, True) 42 | reprojection_loss = (0.85 * ssim_loss + 0.15 * photometric_loss) 43 | return reprojection_loss 44 | 45 | def compute_losses(self, inputs, outputs, features): 46 | loss_dict = {} 47 | interval = 1000 48 | target = inputs[("color", 0, 0)] 49 | for i in range(5): 50 | f=features[i] 51 | smooth_loss = self.get_smooth_loss(f, target) 52 | loss_dict[('smooth_loss', i)] = smooth_loss/ (2 ** i)/5 53 | 54 | for scale in self.opt.scales: 55 | """ 56 | initialization 57 | """ 58 | pred = outputs[("disp", 0, scale)] 59 | 60 | _,_,h,w = pred.size() 61 | target = F.interpolate(target, [h, w], mode="bilinear", align_corners=False) 62 | min_reconstruct_loss = self.compute_reprojection_loss(pred, target) 63 | loss_dict[('min_reconstruct_loss', scale)] = min_reconstruct_loss.mean()/len(self.opt.scales) 64 | 65 | if self.count % interval == 0: 66 | img_path = os.path.join('/node01_data5/monodepth2-test/odo', 'auto_{:0>4d}_{}.png'.format(self.count // interval, scale)) 67 | plt.imsave(img_path, pred[0].transpose(0,1).transpose(1,2).data.cpu().numpy()) 68 | img_path = os.path.join('/node01_data5/monodepth2-test/odo', 'img_{:0>4d}_{}.png'.format(self.count // interval, scale)) 69 | plt.imsave(img_path, target[0].transpose(0, 1).transpose(1, 2).data.cpu().numpy()) 70 | 71 | self.count += 1 72 | return loss_dict 73 | 74 | def get_smooth_loss(self, disp, img): 75 | b, _, h, w = disp.size() 76 | img = F.interpolate(img, (h, w), mode='area') 77 | 78 | disp_dx, disp_dy = self.gradient(disp) 79 | img_dx, img_dy = self.gradient(img) 80 | 81 | disp_dxx, disp_dxy = self.gradient(disp_dx) 82 | disp_dyx, disp_dyy = self.gradient(disp_dy) 83 | 84 | img_dxx, img_dxy = self.gradient(img_dx) 85 | img_dyx, img_dyy = self.gradient(img_dy) 86 | 87 | smooth1 = torch.mean(disp_dx.abs() * torch.exp(-img_dx.abs().mean(1, True))) + \ 88 | torch.mean(disp_dy.abs() * torch.exp(-img_dy.abs().mean(1, True))) 89 | 90 | smooth2 = torch.mean(disp_dxx.abs() * torch.exp(-img_dxx.abs().mean(1, True))) + \ 91 | torch.mean(disp_dxy.abs() * torch.exp(-img_dxy.abs().mean(1, True))) + \ 92 | torch.mean(disp_dyx.abs() * torch.exp(-img_dyx.abs().mean(1, True))) + \ 93 | torch.mean(disp_dyy.abs() * torch.exp(-img_dyy.abs().mean(1, True))) 94 | 95 | return -self.opt.dis * smooth1+ self.opt.cvt * smooth2 96 | 97 | def gradient(self, D): 98 | dy = D[:, :, 1:] - D[:, :, :-1] 99 | dx = D[:, :, :, 1:] - D[:, :, :, :-1] 100 | return dx, dy 101 | 102 | -------------------------------------------------------------------------------- /mono/model/mono_autoencoder/resnet.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import BatchNorm2d as bn 5 | 6 | def conv3x3(in_planes, out_planes, stride=1): 7 | """3x3 convolution with padding""" 8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 9 | 10 | 11 | def conv1x1(in_planes, out_planes, stride=1): 12 | """1x1 convolution""" 13 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 14 | 15 | 16 | class BasicBlock(nn.Module): 17 | expansion = 1 18 | 19 | def __init__(self, inplanes, planes, stride=1, downsample=None): 20 | super(BasicBlock, self).__init__() 21 | self.conv1 = conv3x3(inplanes, planes, stride) 22 | self.bn1 = bn(planes) 23 | self.relu = nn.ReLU(inplace=True) 24 | self.conv2 = conv3x3(planes, planes) 25 | self.bn2 = bn(planes) 26 | self.downsample = downsample 27 | self.stride = stride 28 | 29 | def forward(self, x): 30 | residual = x 31 | 32 | out = self.conv1(x) 33 | out = self.bn1(out) 34 | out = self.relu(out) 35 | 36 | out = self.conv2(out) 37 | out = self.bn2(out) 38 | 39 | if self.downsample is not None: 40 | residual = self.downsample(x) 41 | 42 | out += residual 43 | out = self.relu(out) 44 | 45 | return out 46 | 47 | 48 | class Bottleneck(nn.Module): 49 | expansion = 4 50 | 51 | def __init__(self, inplanes, planes, stride=1, downsample=None): 52 | super(Bottleneck, self).__init__() 53 | self.conv1 = conv1x1(inplanes, planes) 54 | self.bn1 = bn(planes) 55 | self.conv2 = conv3x3(planes, planes, stride) 56 | self.bn2 = bn(planes) 57 | self.conv3 = conv1x1(planes, planes * self.expansion) 58 | self.bn3 = bn(planes * self.expansion) 59 | self.relu = nn.ReLU(inplace=True) 60 | self.downsample = downsample 61 | self.stride = stride 62 | 63 | def forward(self, x): 64 | residual = x 65 | 66 | out = self.conv1(x) 67 | out = self.bn1(out) 68 | out = self.relu(out) 69 | 70 | out = self.conv2(out) 71 | out = self.bn2(out) 72 | out = self.relu(out) 73 | 74 | out = self.conv3(out) 75 | out = self.bn3(out) 76 | 77 | if self.downsample is not None: 78 | residual = self.downsample(x) 79 | 80 | out += residual 81 | out = self.relu(out) 82 | 83 | return out 84 | 85 | 86 | class ResNet(nn.Module): 87 | 88 | def __init__(self, block, layers, num_classes=1000): 89 | super(ResNet, self).__init__() 90 | self.inplanes = 64 91 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 92 | self.bn1 = bn(64) 93 | self.relu = nn.ReLU(inplace=True) 94 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 95 | self.layer1 = self._make_layer(block, 64, layers[0]) 96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 97 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 98 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 99 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 100 | self.fc = nn.Linear(512 * block.expansion, num_classes) 101 | 102 | for m in self.modules(): 103 | if isinstance(m, nn.Conv2d): 104 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 105 | elif isinstance(m, bn): 106 | nn.init.constant_(m.weight, 1) 107 | nn.init.constant_(m.bias, 0) 108 | 109 | def _make_layer(self, block, planes, blocks, stride=1): 110 | downsample = None 111 | if stride != 1 or self.inplanes != planes * block.expansion: 112 | downsample = nn.Sequential( 113 | conv1x1(self.inplanes, planes * block.expansion, stride), 114 | bn(planes * block.expansion), 115 | ) 116 | 117 | layers = [] 118 | layers.append(block(self.inplanes, planes, stride, downsample)) 119 | self.inplanes = planes * block.expansion 120 | for _ in range(1, blocks): 121 | layers.append(block(self.inplanes, planes)) 122 | 123 | return nn.Sequential(*layers) 124 | 125 | def forward(self, x): 126 | x = self.conv1(x) 127 | x = self.bn1(x) 128 | x = self.relu(x) 129 | x = self.maxpool(x) 130 | 131 | x = self.layer1(x) 132 | x = self.layer2(x) 133 | x = self.layer3(x) 134 | x = self.layer4(x) 135 | 136 | return x 137 | 138 | 139 | def resnet18(pretrained_path=None): 140 | """Constructs a ResNet-18 model. 141 | Args: 142 | pretrained (bool): If True, returns a model pre-trained on ImageNet 143 | """ 144 | model = ResNet(BasicBlock, [2, 2, 2, 2]) 145 | if pretrained_path is not None: 146 | model.load_state_dict(torch.load(pretrained_path)) 147 | print('Loaded pre-trained weights') 148 | return model 149 | 150 | 151 | def resnet34(pretrained_path=None, **kwargs): 152 | """Constructs a ResNet-34 model. 153 | Args: 154 | pretrained (bool): If True, returns a model pre-trained on ImageNet 155 | """ 156 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 157 | if pretrained_path is not None: 158 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet34.pth'))) 159 | print('Loaded pre-trained weights') 160 | return model 161 | 162 | 163 | def resnet50(pretrained_path=None, **kwargs): 164 | """Constructs a ResNet-50 model. 165 | Args: 166 | pretrained (bool): If True, returns a model pre-trained on ImageNet 167 | """ 168 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 169 | if pretrained_path is not None: 170 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet50.pth'))) 171 | print('Loaded pre-trained weights') 172 | return model 173 | 174 | 175 | def resnet101(pretrained_path=None, **kwargs): 176 | """Constructs a ResNet-101 model. 177 | Args: 178 | pretrained (bool): If True, returns a model pre-trained on ImageNet 179 | """ 180 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 181 | if pretrained_path is not None: 182 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet101.pth'))) 183 | print('Loaded pre-trained weights') 184 | return model 185 | -------------------------------------------------------------------------------- /mono/model/mono_baseline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/model/mono_baseline/__init__.py -------------------------------------------------------------------------------- /mono/model/mono_baseline/depth_decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .layers import Conv1x1, Conv3x3, CRPBlock, upsample 5 | 6 | 7 | class DepthDecoder(nn.Module): 8 | def __init__(self, num_ch_enc): 9 | super(DepthDecoder, self).__init__() 10 | 11 | bottleneck = 256 12 | stage = 4 13 | self.do = nn.Dropout(p=0.5) 14 | 15 | self.reduce4 = Conv1x1(num_ch_enc[4], 512, bias=False) 16 | self.reduce3 = Conv1x1(num_ch_enc[3], bottleneck, bias=False) 17 | self.reduce2 = Conv1x1(num_ch_enc[2], bottleneck, bias=False) 18 | self.reduce1 = Conv1x1(num_ch_enc[1], bottleneck, bias=False) 19 | 20 | self.iconv4 = Conv3x3(512, bottleneck) 21 | self.iconv3 = Conv3x3(bottleneck*2+1, bottleneck) 22 | self.iconv2 = Conv3x3(bottleneck*2+1, bottleneck) 23 | self.iconv1 = Conv3x3(bottleneck*2+1, bottleneck) 24 | 25 | self.crp4 = self._make_crp(bottleneck, bottleneck, stage) 26 | self.crp3 = self._make_crp(bottleneck, bottleneck, stage) 27 | self.crp2 = self._make_crp(bottleneck, bottleneck, stage) 28 | self.crp1 = self._make_crp(bottleneck, bottleneck, stage) 29 | 30 | self.merge4 = Conv3x3(bottleneck, bottleneck) 31 | self.merge3 = Conv3x3(bottleneck, bottleneck) 32 | self.merge2 = Conv3x3(bottleneck, bottleneck) 33 | self.merge1 = Conv3x3(bottleneck, bottleneck) 34 | 35 | # disp 36 | self.disp4 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid()) 37 | self.disp3 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid()) 38 | self.disp2 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid()) 39 | self.disp1 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid()) 40 | 41 | def _make_crp(self, in_planes, out_planes, stages): 42 | layers = [CRPBlock(in_planes, out_planes,stages)] 43 | return nn.Sequential(*layers) 44 | 45 | def forward(self, input_features, frame_id=0): 46 | self.outputs = {} 47 | l0, l1, l2, l3, l4 = input_features 48 | 49 | l4 = self.do(l4) 50 | l3 = self.do(l3) 51 | 52 | x4 = self.reduce4(l4) 53 | x4 = self.iconv4(x4) 54 | x4 = F.leaky_relu(x4) 55 | x4 = self.crp4(x4) 56 | x4 = self.merge4(x4) 57 | x4 = F.leaky_relu(x4) 58 | x4 = upsample(x4) 59 | disp4 = self.disp4(x4) 60 | 61 | 62 | x3 = self.reduce3(l3) 63 | x3 = torch.cat((x3, x4, disp4), 1) 64 | x3 = self.iconv3(x3) 65 | x3 = F.leaky_relu(x3) 66 | x3 = self.crp3(x3) 67 | x3 = self.merge3(x3) 68 | x3 = F.leaky_relu(x3) 69 | x3 = upsample(x3) 70 | disp3 = self.disp3(x3) 71 | 72 | 73 | x2 = self.reduce2(l2) 74 | x2 = torch.cat((x2, x3 , disp3), 1) 75 | x2 = self.iconv2(x2) 76 | x2 = F.leaky_relu(x2) 77 | x2 = self.crp2(x2) 78 | x2 = self.merge2(x2) 79 | x2 = F.leaky_relu(x2) 80 | x2 = upsample(x2) 81 | disp2 = self.disp2(x2) 82 | 83 | x1 = self.reduce1(l1) 84 | x1 = torch.cat((x1, x2, disp2), 1) 85 | x1 = self.iconv1(x1) 86 | x1 = F.leaky_relu(x1) 87 | x1 = self.crp1(x1) 88 | x1 = self.merge1(x1) 89 | x1 = F.leaky_relu(x1) 90 | x1 = upsample(x1) 91 | disp1 = self.disp1(x1) 92 | 93 | self.outputs[("disp", frame_id, 3)] = disp4 94 | self.outputs[("disp", frame_id, 2)] = disp3 95 | self.outputs[("disp", frame_id, 1)] = disp2 96 | self.outputs[("disp", frame_id, 0)] = disp1 97 | 98 | return self.outputs 99 | -------------------------------------------------------------------------------- /mono/model/mono_baseline/depth_encoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from .resnet import resnet18, resnet34, resnet50, resnet101 6 | 7 | 8 | class DepthEncoder(nn.Module): 9 | def __init__(self, num_layers, pretrained_path=None): 10 | super(DepthEncoder, self).__init__() 11 | 12 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 13 | 14 | resnets = {18: resnet18, 15 | 34: resnet34, 16 | 50: resnet50, 17 | 101: resnet101,} 18 | 19 | if num_layers not in resnets: 20 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 21 | 22 | 23 | self.encoder = resnets[num_layers]() 24 | if pretrained_path is not None: 25 | checkpoint = torch.load(pretrained_path) 26 | self.encoder.load_state_dict(checkpoint) 27 | 28 | if num_layers > 34: 29 | self.num_ch_enc[1:] *= 4 30 | 31 | # for name, param in self.encoder.named_parameters(): 32 | # if 'bn' in name: 33 | # param.requires_grad = False 34 | 35 | def forward(self, input_image): 36 | self.features = [] 37 | x = (input_image - 0.45) / 0.225 38 | self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(x)))) 39 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 40 | self.features.append(self.encoder.layer2(self.features[-1])) 41 | self.features.append(self.encoder.layer3(self.features[-1])) 42 | self.features.append(self.encoder.layer4(self.features[-1])) 43 | 44 | return self.features 45 | -------------------------------------------------------------------------------- /mono/model/mono_baseline/pose_decoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import torch.nn as nn 3 | 4 | 5 | class PoseDecoder(nn.Module): 6 | def __init__(self, num_ch_enc, stride=1): 7 | super(PoseDecoder, self).__init__() 8 | 9 | self.reduce = nn.Conv2d(num_ch_enc[-1], 256, 1) 10 | self.conv1 = nn.Conv2d(256, 256, 3, stride, 1) 11 | self.conv2 = nn.Conv2d(256, 256, 3, stride, 1) 12 | self.conv3 = nn.Conv2d(256, 6, 1) 13 | 14 | self.relu = nn.ReLU() 15 | 16 | def forward(self, input_features): 17 | f = input_features[-1] 18 | out = self.relu(self.reduce(f)) 19 | out = self.relu(self.conv1(out)) 20 | out = self.relu(self.conv2(out)) 21 | out = self.conv3(out) 22 | out = out.mean(3).mean(2) 23 | out = 0.01 * out.view(-1, 1, 1, 6) 24 | axisangle = out[..., :3] 25 | translation = out[..., 3:] 26 | return axisangle, translation 27 | -------------------------------------------------------------------------------- /mono/model/mono_baseline/pose_encoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | 5 | import torch 6 | import torch.nn as nn 7 | from .resnet import ResNet, BasicBlock, resnet18, resnet34, resnet50, resnet101, Bottleneck 8 | from torch.nn import BatchNorm2d as bn 9 | 10 | 11 | class ResNetMultiImageInput(ResNet): 12 | def __init__(self, block, layers, num_classes=1000, num_input_images=2): 13 | super(ResNetMultiImageInput, self).__init__(block, layers) 14 | self.inplanes = 64 15 | self.conv1 = nn.Conv2d(num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False) 16 | self.bn1 = bn(64) 17 | self.relu = nn.ReLU(inplace=True) 18 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 19 | self.layer1 = self._make_layer(block, 64, layers[0]) 20 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 21 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 22 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 23 | 24 | for m in self.modules(): 25 | if isinstance(m, nn.Conv2d): 26 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 27 | elif isinstance(m, nn.BatchNorm2d): 28 | nn.init.constant_(m.weight, 1) 29 | nn.init.constant_(m.bias, 0) 30 | 31 | 32 | def resnet_multiimage_input(num_layers, num_input_images=2, pretrained_path=None): 33 | assert num_layers in [18, 34, 50, 101], "Can only run with 18, 34, 50, 101 layers resnet" 34 | blocks = {18 : [2, 2, 2, 2], 35 | 34 : [3, 4, 6, 3], 36 | 50 : [3, 4, 6, 3], 37 | 101: [3, 4, 23, 3], 38 | }[num_layers] 39 | 40 | if num_layers < 40: 41 | model = ResNetMultiImageInput(BasicBlock, blocks, num_input_images=num_input_images) 42 | elif num_layers > 40: 43 | model = ResNetMultiImageInput(Bottleneck, blocks, num_input_images=num_input_images) 44 | 45 | if pretrained_path is not None: 46 | loaded = torch.load(pretrained_path) 47 | loaded['conv1.weight'] = torch.cat([loaded['conv1.weight']] * num_input_images, 1) / num_input_images 48 | model.load_state_dict(loaded) 49 | return model 50 | 51 | 52 | class PoseEncoder(nn.Module): 53 | def __init__(self, num_layers, pretrained_path=None, num_input_images=2): 54 | super(PoseEncoder, self).__init__() 55 | 56 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 57 | 58 | resnets = {18: resnet18, 59 | 34: resnet34, 60 | 50: resnet50, 61 | 101: resnet101,} 62 | 63 | if num_layers not in resnets: 64 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 65 | 66 | if num_input_images > 1: 67 | self.encoder = resnet_multiimage_input(num_layers, num_input_images, pretrained_path) 68 | else: 69 | self.encoder = resnets[num_layers]() 70 | if pretrained_path is not None: 71 | checkpoint = torch.load(pretrained_path) 72 | self.encoder.load_state_dict(checkpoint) 73 | 74 | if num_layers > 34: 75 | self.num_ch_enc[1:] *= 4 76 | 77 | # for name, param in self.encoder.named_parameters(): 78 | # if 'bn' in name: 79 | # param.requires_grad = False 80 | 81 | def forward(self, input_image): 82 | self.features = [] 83 | x = (input_image - 0.45) / 0.225 84 | x = self.encoder.conv1(x) 85 | x = self.encoder.bn1(x) 86 | self.features.append(self.encoder.relu(x)) 87 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 88 | self.features.append(self.encoder.layer2(self.features[-1])) 89 | self.features.append(self.encoder.layer3(self.features[-1])) 90 | self.features.append(self.encoder.layer4(self.features[-1])) 91 | 92 | return self.features 93 | -------------------------------------------------------------------------------- /mono/model/mono_baseline/resnet.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import BatchNorm2d as bn 5 | 6 | def conv3x3(in_planes, out_planes, stride=1): 7 | """3x3 convolution with padding""" 8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 9 | 10 | 11 | def conv1x1(in_planes, out_planes, stride=1): 12 | """1x1 convolution""" 13 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 14 | 15 | 16 | class BasicBlock(nn.Module): 17 | expansion = 1 18 | 19 | def __init__(self, inplanes, planes, stride=1, downsample=None): 20 | super(BasicBlock, self).__init__() 21 | self.conv1 = conv3x3(inplanes, planes, stride) 22 | self.bn1 = bn(planes) 23 | self.relu = nn.ReLU(inplace=True) 24 | self.conv2 = conv3x3(planes, planes) 25 | self.bn2 = bn(planes) 26 | self.downsample = downsample 27 | self.stride = stride 28 | 29 | def forward(self, x): 30 | residual = x 31 | 32 | out = self.conv1(x) 33 | out = self.bn1(out) 34 | out = self.relu(out) 35 | 36 | out = self.conv2(out) 37 | out = self.bn2(out) 38 | 39 | if self.downsample is not None: 40 | residual = self.downsample(x) 41 | 42 | out += residual 43 | out = self.relu(out) 44 | 45 | return out 46 | 47 | 48 | class Bottleneck(nn.Module): 49 | expansion = 4 50 | 51 | def __init__(self, inplanes, planes, stride=1, downsample=None): 52 | super(Bottleneck, self).__init__() 53 | self.conv1 = conv1x1(inplanes, planes) 54 | self.bn1 = bn(planes) 55 | self.conv2 = conv3x3(planes, planes, stride) 56 | self.bn2 = bn(planes) 57 | self.conv3 = conv1x1(planes, planes * self.expansion) 58 | self.bn3 = bn(planes * self.expansion) 59 | self.relu = nn.ReLU(inplace=True) 60 | self.downsample = downsample 61 | self.stride = stride 62 | 63 | def forward(self, x): 64 | residual = x 65 | 66 | out = self.conv1(x) 67 | out = self.bn1(out) 68 | out = self.relu(out) 69 | 70 | out = self.conv2(out) 71 | out = self.bn2(out) 72 | out = self.relu(out) 73 | 74 | out = self.conv3(out) 75 | out = self.bn3(out) 76 | 77 | if self.downsample is not None: 78 | residual = self.downsample(x) 79 | 80 | out += residual 81 | out = self.relu(out) 82 | 83 | return out 84 | 85 | 86 | class ResNet(nn.Module): 87 | 88 | def __init__(self, block, layers, num_classes=1000): 89 | super(ResNet, self).__init__() 90 | self.inplanes = 64 91 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 92 | self.bn1 = bn(64) 93 | self.relu = nn.ReLU(inplace=True) 94 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 95 | self.layer1 = self._make_layer(block, 64, layers[0]) 96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 97 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 98 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 99 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 100 | self.fc = nn.Linear(512 * block.expansion, num_classes) 101 | 102 | for m in self.modules(): 103 | if isinstance(m, nn.Conv2d): 104 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 105 | elif isinstance(m, bn): 106 | nn.init.constant_(m.weight, 1) 107 | nn.init.constant_(m.bias, 0) 108 | 109 | def _make_layer(self, block, planes, blocks, stride=1): 110 | downsample = None 111 | if stride != 1 or self.inplanes != planes * block.expansion: 112 | downsample = nn.Sequential( 113 | conv1x1(self.inplanes, planes * block.expansion, stride), 114 | bn(planes * block.expansion), 115 | ) 116 | 117 | layers = [] 118 | layers.append(block(self.inplanes, planes, stride, downsample)) 119 | self.inplanes = planes * block.expansion 120 | for _ in range(1, blocks): 121 | layers.append(block(self.inplanes, planes)) 122 | 123 | return nn.Sequential(*layers) 124 | 125 | def forward(self, x): 126 | x = self.conv1(x) 127 | x = self.bn1(x) 128 | x = self.relu(x) 129 | x = self.maxpool(x) 130 | 131 | x = self.layer1(x) 132 | x = self.layer2(x) 133 | x = self.layer3(x) 134 | x = self.layer4(x) 135 | 136 | return x 137 | 138 | 139 | def resnet18(pretrained_path=None): 140 | """Constructs a ResNet-18 model. 141 | Args: 142 | pretrained (bool): If True, returns a model pre-trained on ImageNet 143 | """ 144 | model = ResNet(BasicBlock, [2, 2, 2, 2]) 145 | if pretrained_path is not None: 146 | model.load_state_dict(torch.load(pretrained_path)) 147 | print('Loaded pre-trained weights') 148 | return model 149 | 150 | 151 | def resnet34(pretrained_path=None, **kwargs): 152 | """Constructs a ResNet-34 model. 153 | Args: 154 | pretrained (bool): If True, returns a model pre-trained on ImageNet 155 | """ 156 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 157 | if pretrained_path is not None: 158 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet34.pth'))) 159 | print('Loaded pre-trained weights') 160 | return model 161 | 162 | 163 | def resnet50(pretrained_path=None, **kwargs): 164 | """Constructs a ResNet-50 model. 165 | Args: 166 | pretrained (bool): If True, returns a model pre-trained on ImageNet 167 | """ 168 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 169 | if pretrained_path is not None: 170 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet50.pth'))) 171 | print('Loaded pre-trained weights') 172 | return model 173 | 174 | 175 | def resnet101(pretrained_path=None, **kwargs): 176 | """Constructs a ResNet-101 model. 177 | Args: 178 | pretrained (bool): If True, returns a model pre-trained on ImageNet 179 | """ 180 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 181 | if pretrained_path is not None: 182 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet101.pth'))) 183 | print('Loaded pre-trained weights') 184 | return model 185 | -------------------------------------------------------------------------------- /mono/model/mono_fm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/model/mono_fm/__init__.py -------------------------------------------------------------------------------- /mono/model/mono_fm/depth_decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .layers import Conv1x1, Conv3x3, CRPBlock, upsample 5 | 6 | 7 | class DepthDecoder(nn.Module): 8 | def __init__(self, num_ch_enc): 9 | super(DepthDecoder, self).__init__() 10 | 11 | bottleneck = 256 12 | stage = 4 13 | self.do = nn.Dropout(p=0.5) 14 | 15 | self.reduce4 = Conv1x1(num_ch_enc[4], 512, bias=False) 16 | self.reduce3 = Conv1x1(num_ch_enc[3], bottleneck, bias=False) 17 | self.reduce2 = Conv1x1(num_ch_enc[2], bottleneck, bias=False) 18 | self.reduce1 = Conv1x1(num_ch_enc[1], bottleneck, bias=False) 19 | 20 | self.iconv4 = Conv3x3(512, bottleneck) 21 | self.iconv3 = Conv3x3(bottleneck*2+1, bottleneck) 22 | self.iconv2 = Conv3x3(bottleneck*2+1, bottleneck) 23 | self.iconv1 = Conv3x3(bottleneck*2+1, bottleneck) 24 | 25 | self.crp4 = self._make_crp(bottleneck, bottleneck, stage) 26 | self.crp3 = self._make_crp(bottleneck, bottleneck, stage) 27 | self.crp2 = self._make_crp(bottleneck, bottleneck, stage) 28 | self.crp1 = self._make_crp(bottleneck, bottleneck, stage) 29 | 30 | self.merge4 = Conv3x3(bottleneck, bottleneck) 31 | self.merge3 = Conv3x3(bottleneck, bottleneck) 32 | self.merge2 = Conv3x3(bottleneck, bottleneck) 33 | self.merge1 = Conv3x3(bottleneck, bottleneck) 34 | 35 | # disp 36 | self.disp4 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid()) 37 | self.disp3 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid()) 38 | self.disp2 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid()) 39 | self.disp1 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid()) 40 | 41 | def _make_crp(self, in_planes, out_planes, stages): 42 | layers = [CRPBlock(in_planes, out_planes,stages)] 43 | return nn.Sequential(*layers) 44 | 45 | def forward(self, input_features, frame_id=0): 46 | self.outputs = {} 47 | l0, l1, l2, l3, l4 = input_features 48 | 49 | l4 = self.do(l4) 50 | l3 = self.do(l3) 51 | 52 | x4 = self.reduce4(l4) 53 | x4 = self.iconv4(x4) 54 | x4 = F.leaky_relu(x4) 55 | x4 = self.crp4(x4) 56 | x4 = self.merge4(x4) 57 | x4 = F.leaky_relu(x4) 58 | x4 = upsample(x4) 59 | disp4 = self.disp4(x4) 60 | 61 | 62 | x3 = self.reduce3(l3) 63 | x3 = torch.cat((x3, x4, disp4), 1) 64 | x3 = self.iconv3(x3) 65 | x3 = F.leaky_relu(x3) 66 | x3 = self.crp3(x3) 67 | x3 = self.merge3(x3) 68 | x3 = F.leaky_relu(x3) 69 | x3 = upsample(x3) 70 | disp3 = self.disp3(x3) 71 | 72 | 73 | x2 = self.reduce2(l2) 74 | x2 = torch.cat((x2, x3 , disp3), 1) 75 | x2 = self.iconv2(x2) 76 | x2 = F.leaky_relu(x2) 77 | x2 = self.crp2(x2) 78 | x2 = self.merge2(x2) 79 | x2 = F.leaky_relu(x2) 80 | x2 = upsample(x2) 81 | disp2 = self.disp2(x2) 82 | 83 | x1 = self.reduce1(l1) 84 | x1 = torch.cat((x1, x2, disp2), 1) 85 | x1 = self.iconv1(x1) 86 | x1 = F.leaky_relu(x1) 87 | x1 = self.crp1(x1) 88 | x1 = self.merge1(x1) 89 | x1 = F.leaky_relu(x1) 90 | x1 = upsample(x1) 91 | disp1 = self.disp1(x1) 92 | 93 | self.outputs[("disp", frame_id, 3)] = disp4 94 | self.outputs[("disp", frame_id, 2)] = disp3 95 | self.outputs[("disp", frame_id, 1)] = disp2 96 | self.outputs[("disp", frame_id, 0)] = disp1 97 | 98 | return self.outputs 99 | -------------------------------------------------------------------------------- /mono/model/mono_fm/depth_encoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from .resnet import resnet18, resnet34, resnet50, resnet101 6 | 7 | 8 | class DepthEncoder(nn.Module): 9 | def __init__(self, num_layers, pretrained_path=None): 10 | super(DepthEncoder, self).__init__() 11 | 12 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 13 | 14 | resnets = {18: resnet18, 15 | 34: resnet34, 16 | 50: resnet50, 17 | 101: resnet101,} 18 | 19 | if num_layers not in resnets: 20 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 21 | 22 | 23 | self.encoder = resnets[num_layers]() 24 | if pretrained_path is not None: 25 | checkpoint = torch.load(pretrained_path) 26 | self.encoder.load_state_dict(checkpoint) 27 | 28 | if num_layers > 34: 29 | self.num_ch_enc[1:] *= 4 30 | 31 | # for name, param in self.encoder.named_parameters(): 32 | # if 'bn' in name: 33 | # param.requires_grad = False 34 | 35 | def forward(self, input_image): 36 | self.features = [] 37 | x = (input_image - 0.45) / 0.225 38 | self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(x)))) 39 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 40 | self.features.append(self.encoder.layer2(self.features[-1])) 41 | self.features.append(self.encoder.layer3(self.features[-1])) 42 | self.features.append(self.encoder.layer4(self.features[-1])) 43 | 44 | return self.features 45 | -------------------------------------------------------------------------------- /mono/model/mono_fm/pose_decoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import torch.nn as nn 3 | 4 | 5 | class PoseDecoder(nn.Module): 6 | def __init__(self, num_ch_enc, stride=1): 7 | super(PoseDecoder, self).__init__() 8 | 9 | self.reduce = nn.Conv2d(num_ch_enc[-1], 256, 1) 10 | self.conv1 = nn.Conv2d(256, 256, 3, stride, 1) 11 | self.conv2 = nn.Conv2d(256, 256, 3, stride, 1) 12 | self.conv3 = nn.Conv2d(256, 6, 1) 13 | 14 | self.relu = nn.ReLU() 15 | 16 | def forward(self, input_features): 17 | f = input_features[-1] 18 | out = self.relu(self.reduce(f)) 19 | out = self.relu(self.conv1(out)) 20 | out = self.relu(self.conv2(out)) 21 | out = self.conv3(out) 22 | out = out.mean(3).mean(2) 23 | out = 0.01 * out.view(-1, 1, 1, 6) 24 | axisangle = out[..., :3] 25 | translation = out[..., 3:] 26 | return axisangle, translation 27 | -------------------------------------------------------------------------------- /mono/model/mono_fm/pose_encoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | 5 | import torch 6 | import torch.nn as nn 7 | from .resnet import ResNet, BasicBlock, resnet18, resnet34, resnet50, resnet101, Bottleneck 8 | from torch.nn import BatchNorm2d as bn 9 | 10 | 11 | class ResNetMultiImageInput(ResNet): 12 | def __init__(self, block, layers, num_classes=1000, num_input_images=2): 13 | super(ResNetMultiImageInput, self).__init__(block, layers) 14 | self.inplanes = 64 15 | self.conv1 = nn.Conv2d(num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False) 16 | self.bn1 = bn(64) 17 | self.relu = nn.ReLU(inplace=True) 18 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 19 | self.layer1 = self._make_layer(block, 64, layers[0]) 20 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 21 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 22 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 23 | 24 | for m in self.modules(): 25 | if isinstance(m, nn.Conv2d): 26 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 27 | elif isinstance(m, nn.BatchNorm2d): 28 | nn.init.constant_(m.weight, 1) 29 | nn.init.constant_(m.bias, 0) 30 | 31 | 32 | def resnet_multiimage_input(num_layers, num_input_images=2, pretrained_path=None): 33 | assert num_layers in [18, 34, 50, 101], "Can only run with 18, 34, 50, 101 layers resnet" 34 | blocks = {18 : [2, 2, 2, 2], 35 | 34 : [3, 4, 6, 3], 36 | 50 : [3, 4, 6, 3], 37 | 101: [3, 4, 23, 3], 38 | }[num_layers] 39 | 40 | if num_layers < 40: 41 | model = ResNetMultiImageInput(BasicBlock, blocks, num_input_images=num_input_images) 42 | elif num_layers > 40: 43 | model = ResNetMultiImageInput(Bottleneck, blocks, num_input_images=num_input_images) 44 | 45 | if pretrained_path is not None: 46 | loaded = torch.load(pretrained_path) 47 | loaded['conv1.weight'] = torch.cat([loaded['conv1.weight']] * num_input_images, 1) / num_input_images 48 | model.load_state_dict(loaded) 49 | return model 50 | 51 | 52 | class PoseEncoder(nn.Module): 53 | def __init__(self, num_layers, pretrained_path=None, num_input_images=2): 54 | super(PoseEncoder, self).__init__() 55 | 56 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 57 | 58 | resnets = {18: resnet18, 59 | 34: resnet34, 60 | 50: resnet50, 61 | 101: resnet101,} 62 | 63 | if num_layers not in resnets: 64 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 65 | 66 | if num_input_images > 1: 67 | self.encoder = resnet_multiimage_input(num_layers, num_input_images, pretrained_path) 68 | else: 69 | self.encoder = resnets[num_layers]() 70 | if pretrained_path is not None: 71 | checkpoint = torch.load(pretrained_path) 72 | self.encoder.load_state_dict(checkpoint) 73 | 74 | if num_layers > 34: 75 | self.num_ch_enc[1:] *= 4 76 | 77 | # for name, param in self.encoder.named_parameters(): 78 | # if 'bn' in name: 79 | # param.requires_grad = False 80 | 81 | def forward(self, input_image): 82 | self.features = [] 83 | x = (input_image - 0.45) / 0.225 84 | x = self.encoder.conv1(x) 85 | x = self.encoder.bn1(x) 86 | self.features.append(self.encoder.relu(x)) 87 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 88 | self.features.append(self.encoder.layer2(self.features[-1])) 89 | self.features.append(self.encoder.layer3(self.features[-1])) 90 | self.features.append(self.encoder.layer4(self.features[-1])) 91 | 92 | return self.features 93 | -------------------------------------------------------------------------------- /mono/model/mono_fm/resnet.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import BatchNorm2d as bn 5 | 6 | def conv3x3(in_planes, out_planes, stride=1): 7 | """3x3 convolution with padding""" 8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 9 | 10 | 11 | def conv1x1(in_planes, out_planes, stride=1): 12 | """1x1 convolution""" 13 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 14 | 15 | 16 | class BasicBlock(nn.Module): 17 | expansion = 1 18 | 19 | def __init__(self, inplanes, planes, stride=1, downsample=None): 20 | super(BasicBlock, self).__init__() 21 | self.conv1 = conv3x3(inplanes, planes, stride) 22 | self.bn1 = bn(planes) 23 | self.relu = nn.ReLU(inplace=True) 24 | self.conv2 = conv3x3(planes, planes) 25 | self.bn2 = bn(planes) 26 | self.downsample = downsample 27 | self.stride = stride 28 | 29 | def forward(self, x): 30 | residual = x 31 | 32 | out = self.conv1(x) 33 | out = self.bn1(out) 34 | out = self.relu(out) 35 | 36 | out = self.conv2(out) 37 | out = self.bn2(out) 38 | 39 | if self.downsample is not None: 40 | residual = self.downsample(x) 41 | 42 | out += residual 43 | out = self.relu(out) 44 | 45 | return out 46 | 47 | 48 | class Bottleneck(nn.Module): 49 | expansion = 4 50 | 51 | def __init__(self, inplanes, planes, stride=1, downsample=None): 52 | super(Bottleneck, self).__init__() 53 | self.conv1 = conv1x1(inplanes, planes) 54 | self.bn1 = bn(planes) 55 | self.conv2 = conv3x3(planes, planes, stride) 56 | self.bn2 = bn(planes) 57 | self.conv3 = conv1x1(planes, planes * self.expansion) 58 | self.bn3 = bn(planes * self.expansion) 59 | self.relu = nn.ReLU(inplace=True) 60 | self.downsample = downsample 61 | self.stride = stride 62 | 63 | def forward(self, x): 64 | residual = x 65 | 66 | out = self.conv1(x) 67 | out = self.bn1(out) 68 | out = self.relu(out) 69 | 70 | out = self.conv2(out) 71 | out = self.bn2(out) 72 | out = self.relu(out) 73 | 74 | out = self.conv3(out) 75 | out = self.bn3(out) 76 | 77 | if self.downsample is not None: 78 | residual = self.downsample(x) 79 | 80 | out += residual 81 | out = self.relu(out) 82 | 83 | return out 84 | 85 | 86 | class ResNet(nn.Module): 87 | 88 | def __init__(self, block, layers, num_classes=1000): 89 | super(ResNet, self).__init__() 90 | self.inplanes = 64 91 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 92 | self.bn1 = bn(64) 93 | self.relu = nn.ReLU(inplace=True) 94 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 95 | self.layer1 = self._make_layer(block, 64, layers[0]) 96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 97 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 98 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 99 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 100 | self.fc = nn.Linear(512 * block.expansion, num_classes) 101 | 102 | for m in self.modules(): 103 | if isinstance(m, nn.Conv2d): 104 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 105 | elif isinstance(m, bn): 106 | nn.init.constant_(m.weight, 1) 107 | nn.init.constant_(m.bias, 0) 108 | 109 | def _make_layer(self, block, planes, blocks, stride=1): 110 | downsample = None 111 | if stride != 1 or self.inplanes != planes * block.expansion: 112 | downsample = nn.Sequential( 113 | conv1x1(self.inplanes, planes * block.expansion, stride), 114 | bn(planes * block.expansion), 115 | ) 116 | 117 | layers = [] 118 | layers.append(block(self.inplanes, planes, stride, downsample)) 119 | self.inplanes = planes * block.expansion 120 | for _ in range(1, blocks): 121 | layers.append(block(self.inplanes, planes)) 122 | 123 | return nn.Sequential(*layers) 124 | 125 | def forward(self, x): 126 | x = self.conv1(x) 127 | x = self.bn1(x) 128 | x = self.relu(x) 129 | x = self.maxpool(x) 130 | 131 | x = self.layer1(x) 132 | x = self.layer2(x) 133 | x = self.layer3(x) 134 | x = self.layer4(x) 135 | 136 | return x 137 | 138 | 139 | def resnet18(pretrained_path=None): 140 | """Constructs a ResNet-18 model. 141 | Args: 142 | pretrained (bool): If True, returns a model pre-trained on ImageNet 143 | """ 144 | model = ResNet(BasicBlock, [2, 2, 2, 2]) 145 | if pretrained_path is not None: 146 | model.load_state_dict(torch.load(pretrained_path)) 147 | print('Loaded pre-trained weights') 148 | return model 149 | 150 | 151 | def resnet34(pretrained_path=None, **kwargs): 152 | """Constructs a ResNet-34 model. 153 | Args: 154 | pretrained (bool): If True, returns a model pre-trained on ImageNet 155 | """ 156 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 157 | if pretrained_path is not None: 158 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet34.pth'))) 159 | print('Loaded pre-trained weights') 160 | return model 161 | 162 | 163 | def resnet50(pretrained_path=None, **kwargs): 164 | """Constructs a ResNet-50 model. 165 | Args: 166 | pretrained (bool): If True, returns a model pre-trained on ImageNet 167 | """ 168 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 169 | if pretrained_path is not None: 170 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet50.pth'))) 171 | print('Loaded pre-trained weights') 172 | return model 173 | 174 | 175 | def resnet101(pretrained_path=None, **kwargs): 176 | """Constructs a ResNet-101 model. 177 | Args: 178 | pretrained (bool): If True, returns a model pre-trained on ImageNet 179 | """ 180 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 181 | if pretrained_path is not None: 182 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet101.pth'))) 183 | print('Loaded pre-trained weights') 184 | return model 185 | -------------------------------------------------------------------------------- /mono/model/mono_fm_joint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/model/mono_fm_joint/__init__.py -------------------------------------------------------------------------------- /mono/model/mono_fm_joint/decoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import torch.nn as nn 3 | from .layers import ConvBlock, Conv3x3, upsample 4 | 5 | 6 | class Decoder(nn.Module): 7 | def __init__(self, num_ch_enc, num_output_channels=3): 8 | super(Decoder, self).__init__() 9 | 10 | num_ch_dec = [16, 32, 64, 128, 256] 11 | 12 | # upconv 13 | self.upconv5 = ConvBlock(num_ch_enc[4], num_ch_dec[4]) 14 | self.upconv4 = ConvBlock(num_ch_dec[4], num_ch_dec[3]) 15 | self.upconv3 = ConvBlock(num_ch_dec[3], num_ch_dec[2]) 16 | self.upconv2 = ConvBlock(num_ch_dec[2], num_ch_dec[1]) 17 | self.upconv1 = ConvBlock(num_ch_dec[1], num_ch_dec[0]) 18 | 19 | # iconv 20 | self.iconv5 = ConvBlock(num_ch_dec[4], num_ch_dec[4]) 21 | self.iconv4 = ConvBlock(num_ch_dec[3], num_ch_dec[3]) 22 | self.iconv3 = ConvBlock(num_ch_dec[2], num_ch_dec[2]) 23 | self.iconv2 = ConvBlock(num_ch_dec[1], num_ch_dec[1]) 24 | self.iconv1 = ConvBlock(num_ch_dec[0], num_ch_dec[0]) 25 | 26 | # disp 27 | self.disp4 = Conv3x3(num_ch_dec[3], num_output_channels) 28 | self.disp3 = Conv3x3(num_ch_dec[2], num_output_channels) 29 | self.disp2 = Conv3x3(num_ch_dec[1], num_output_channels) 30 | self.disp1 = Conv3x3(num_ch_dec[0], num_output_channels) 31 | 32 | self.sigmoid = nn.Sigmoid() 33 | 34 | 35 | def forward(self, input_features, frame_id=0): 36 | self.outputs = {} 37 | _, _, _, _, econv5 = input_features 38 | # (64,64,128,256,512)*4 39 | 40 | upconv5 = upsample(self.upconv5(econv5)) 41 | iconv5 = self.iconv5(upconv5) 42 | 43 | upconv4 = upsample(self.upconv4(iconv5)) 44 | iconv4 = self.iconv4(upconv4) 45 | 46 | upconv3 = upsample(self.upconv3(iconv4)) 47 | iconv3 = self.iconv3(upconv3) 48 | 49 | upconv2 = upsample(self.upconv2(iconv3)) 50 | iconv2 = self.iconv2(upconv2) 51 | 52 | upconv1 = upsample(self.upconv1(iconv2)) 53 | iconv1 = self.iconv1(upconv1) 54 | 55 | self.outputs[("res_img", frame_id, 3)] = self.sigmoid(self.disp4(iconv4)) 56 | self.outputs[("res_img", frame_id, 2)] = self.sigmoid(self.disp3(iconv3)) 57 | self.outputs[("res_img", frame_id, 1)] = self.sigmoid(self.disp2(iconv2)) 58 | self.outputs[("res_img", frame_id, 0)] = self.sigmoid(self.disp1(iconv1)) 59 | return self.outputs -------------------------------------------------------------------------------- /mono/model/mono_fm_joint/depth_decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .layers import Conv1x1, Conv3x3, CRPBlock, upsample 5 | 6 | 7 | class DepthDecoder(nn.Module): 8 | def __init__(self, num_ch_enc): 9 | super(DepthDecoder, self).__init__() 10 | 11 | bottleneck = 256 12 | stage = 4 13 | self.do = nn.Dropout(p=0.5) 14 | 15 | self.reduce4 = Conv1x1(num_ch_enc[4], 512, bias=False) 16 | self.reduce3 = Conv1x1(num_ch_enc[3], bottleneck, bias=False) 17 | self.reduce2 = Conv1x1(num_ch_enc[2], bottleneck, bias=False) 18 | self.reduce1 = Conv1x1(num_ch_enc[1], bottleneck, bias=False) 19 | 20 | self.iconv4 = Conv3x3(512, bottleneck) 21 | self.iconv3 = Conv3x3(bottleneck*2+1, bottleneck) 22 | self.iconv2 = Conv3x3(bottleneck*2+1, bottleneck) 23 | self.iconv1 = Conv3x3(bottleneck*2+1, bottleneck) 24 | 25 | self.crp4 = self._make_crp(bottleneck, bottleneck, stage) 26 | self.crp3 = self._make_crp(bottleneck, bottleneck, stage) 27 | self.crp2 = self._make_crp(bottleneck, bottleneck, stage) 28 | self.crp1 = self._make_crp(bottleneck, bottleneck, stage) 29 | 30 | self.merge4 = Conv3x3(bottleneck, bottleneck) 31 | self.merge3 = Conv3x3(bottleneck, bottleneck) 32 | self.merge2 = Conv3x3(bottleneck, bottleneck) 33 | self.merge1 = Conv3x3(bottleneck, bottleneck) 34 | 35 | # disp 36 | self.disp4 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid()) 37 | self.disp3 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid()) 38 | self.disp2 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid()) 39 | self.disp1 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid()) 40 | 41 | def _make_crp(self, in_planes, out_planes, stages): 42 | layers = [CRPBlock(in_planes, out_planes,stages)] 43 | return nn.Sequential(*layers) 44 | 45 | def forward(self, input_features, frame_id=0): 46 | self.outputs = {} 47 | l0, l1, l2, l3, l4 = input_features 48 | 49 | l4 = self.do(l4) 50 | l3 = self.do(l3) 51 | 52 | x4 = self.reduce4(l4) 53 | x4 = self.iconv4(x4) 54 | x4 = F.leaky_relu(x4) 55 | x4 = self.crp4(x4) 56 | x4 = self.merge4(x4) 57 | x4 = F.leaky_relu(x4) 58 | x4 = upsample(x4) 59 | disp4 = self.disp4(x4) 60 | 61 | 62 | x3 = self.reduce3(l3) 63 | x3 = torch.cat((x3, x4, disp4), 1) 64 | x3 = self.iconv3(x3) 65 | x3 = F.leaky_relu(x3) 66 | x3 = self.crp3(x3) 67 | x3 = self.merge3(x3) 68 | x3 = F.leaky_relu(x3) 69 | x3 = upsample(x3) 70 | disp3 = self.disp3(x3) 71 | 72 | 73 | x2 = self.reduce2(l2) 74 | x2 = torch.cat((x2, x3 , disp3), 1) 75 | x2 = self.iconv2(x2) 76 | x2 = F.leaky_relu(x2) 77 | x2 = self.crp2(x2) 78 | x2 = self.merge2(x2) 79 | x2 = F.leaky_relu(x2) 80 | x2 = upsample(x2) 81 | disp2 = self.disp2(x2) 82 | 83 | x1 = self.reduce1(l1) 84 | x1 = torch.cat((x1, x2, disp2), 1) 85 | x1 = self.iconv1(x1) 86 | x1 = F.leaky_relu(x1) 87 | x1 = self.crp1(x1) 88 | x1 = self.merge1(x1) 89 | x1 = F.leaky_relu(x1) 90 | x1 = upsample(x1) 91 | disp1 = self.disp1(x1) 92 | 93 | self.outputs[("disp", frame_id, 3)] = disp4 94 | self.outputs[("disp", frame_id, 2)] = disp3 95 | self.outputs[("disp", frame_id, 1)] = disp2 96 | self.outputs[("disp", frame_id, 0)] = disp1 97 | 98 | return self.outputs 99 | -------------------------------------------------------------------------------- /mono/model/mono_fm_joint/depth_encoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from .resnet import resnet18, resnet34, resnet50, resnet101 6 | 7 | 8 | class DepthEncoder(nn.Module): 9 | def __init__(self, num_layers, pretrained_path=None): 10 | super(DepthEncoder, self).__init__() 11 | 12 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 13 | 14 | resnets = {18: resnet18, 15 | 34: resnet34, 16 | 50: resnet50, 17 | 101: resnet101,} 18 | 19 | if num_layers not in resnets: 20 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 21 | 22 | 23 | self.encoder = resnets[num_layers]() 24 | if pretrained_path is not None: 25 | checkpoint = torch.load(pretrained_path) 26 | self.encoder.load_state_dict(checkpoint) 27 | 28 | if num_layers > 34: 29 | self.num_ch_enc[1:] *= 4 30 | 31 | # for name, param in self.encoder.named_parameters(): 32 | # if 'bn' in name: 33 | # param.requires_grad = False 34 | 35 | def forward(self, input_image): 36 | self.features = [] 37 | x = (input_image - 0.45) / 0.225 38 | self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(x)))) 39 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 40 | self.features.append(self.encoder.layer2(self.features[-1])) 41 | self.features.append(self.encoder.layer3(self.features[-1])) 42 | self.features.append(self.encoder.layer4(self.features[-1])) 43 | 44 | return self.features 45 | -------------------------------------------------------------------------------- /mono/model/mono_fm_joint/encoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | from .resnet import resnet18, resnet34, resnet50, resnet101 6 | 7 | 8 | class Encoder(nn.Module): 9 | def __init__(self, num_layers, pretrained_path=None): 10 | super(Encoder, self).__init__() 11 | 12 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 13 | 14 | resnets = {18: resnet18, 15 | 34: resnet34, 16 | 50: resnet50, 17 | 101: resnet101,} 18 | 19 | if num_layers not in resnets: 20 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 21 | 22 | 23 | self.encoder = resnets[num_layers]() 24 | if pretrained_path is not None: 25 | checkpoint = torch.load(pretrained_path) 26 | self.encoder.load_state_dict(checkpoint) 27 | 28 | if num_layers > 34: 29 | self.num_ch_enc[1:] *= 4 30 | 31 | # for name, param in self.encoder.named_parameters(): 32 | # if 'bn' in name: 33 | # param.requires_grad = False 34 | 35 | def forward(self, input_image): 36 | self.features = [] 37 | self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(input_image)))) 38 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 39 | self.features.append(self.encoder.layer2(self.features[-1])) 40 | self.features.append(self.encoder.layer3(self.features[-1])) 41 | self.features.append(self.encoder.layer4(self.features[-1])) 42 | 43 | return self.features 44 | -------------------------------------------------------------------------------- /mono/model/mono_fm_joint/pose_decoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import torch.nn as nn 3 | 4 | 5 | class PoseDecoder(nn.Module): 6 | def __init__(self, num_ch_enc, stride=1): 7 | super(PoseDecoder, self).__init__() 8 | 9 | self.reduce = nn.Conv2d(num_ch_enc[-1], 256, 1) 10 | self.conv1 = nn.Conv2d(256, 256, 3, stride, 1) 11 | self.conv2 = nn.Conv2d(256, 256, 3, stride, 1) 12 | self.conv3 = nn.Conv2d(256, 6, 1) 13 | 14 | self.relu = nn.ReLU() 15 | 16 | def forward(self, input_features): 17 | f = input_features[-1] 18 | out = self.relu(self.reduce(f)) 19 | out = self.relu(self.conv1(out)) 20 | out = self.relu(self.conv2(out)) 21 | out = self.conv3(out) 22 | out = out.mean(3).mean(2) 23 | out = 0.01 * out.view(-1, 1, 1, 6) 24 | axisangle = out[..., :3] 25 | translation = out[..., 3:] 26 | return axisangle, translation 27 | -------------------------------------------------------------------------------- /mono/model/mono_fm_joint/pose_encoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | 5 | import torch 6 | import torch.nn as nn 7 | from .resnet import ResNet, BasicBlock, resnet18, resnet34, resnet50, resnet101, Bottleneck 8 | from torch.nn import BatchNorm2d as bn 9 | 10 | 11 | class ResNetMultiImageInput(ResNet): 12 | def __init__(self, block, layers, num_classes=1000, num_input_images=2): 13 | super(ResNetMultiImageInput, self).__init__(block, layers) 14 | self.inplanes = 64 15 | self.conv1 = nn.Conv2d(num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False) 16 | self.bn1 = bn(64) 17 | self.relu = nn.ReLU(inplace=True) 18 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 19 | self.layer1 = self._make_layer(block, 64, layers[0]) 20 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 21 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 22 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 23 | 24 | for m in self.modules(): 25 | if isinstance(m, nn.Conv2d): 26 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 27 | elif isinstance(m, nn.BatchNorm2d): 28 | nn.init.constant_(m.weight, 1) 29 | nn.init.constant_(m.bias, 0) 30 | 31 | 32 | def resnet_multiimage_input(num_layers, num_input_images=2, pretrained_path=None): 33 | assert num_layers in [18, 34, 50, 101], "Can only run with 18, 34, 50, 101 layers resnet" 34 | blocks = {18 : [2, 2, 2, 2], 35 | 34 : [3, 4, 6, 3], 36 | 50 : [3, 4, 6, 3], 37 | 101: [3, 4, 23, 3], 38 | }[num_layers] 39 | 40 | if num_layers < 40: 41 | model = ResNetMultiImageInput(BasicBlock, blocks, num_input_images=num_input_images) 42 | elif num_layers > 40: 43 | model = ResNetMultiImageInput(Bottleneck, blocks, num_input_images=num_input_images) 44 | 45 | if pretrained_path is not None: 46 | loaded = torch.load(pretrained_path) 47 | loaded['conv1.weight'] = torch.cat([loaded['conv1.weight']] * num_input_images, 1) / num_input_images 48 | model.load_state_dict(loaded) 49 | return model 50 | 51 | 52 | class PoseEncoder(nn.Module): 53 | def __init__(self, num_layers, pretrained_path=None, num_input_images=2): 54 | super(PoseEncoder, self).__init__() 55 | 56 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 57 | 58 | resnets = {18: resnet18, 59 | 34: resnet34, 60 | 50: resnet50, 61 | 101: resnet101,} 62 | 63 | if num_layers not in resnets: 64 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 65 | 66 | if num_input_images > 1: 67 | self.encoder = resnet_multiimage_input(num_layers, num_input_images, pretrained_path) 68 | else: 69 | self.encoder = resnets[num_layers]() 70 | if pretrained_path is not None: 71 | checkpoint = torch.load(pretrained_path) 72 | self.encoder.load_state_dict(checkpoint) 73 | 74 | if num_layers > 34: 75 | self.num_ch_enc[1:] *= 4 76 | 77 | # for name, param in self.encoder.named_parameters(): 78 | # if 'bn' in name: 79 | # param.requires_grad = False 80 | 81 | def forward(self, input_image): 82 | self.features = [] 83 | x = (input_image - 0.45) / 0.225 84 | x = self.encoder.conv1(x) 85 | x = self.encoder.bn1(x) 86 | self.features.append(self.encoder.relu(x)) 87 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 88 | self.features.append(self.encoder.layer2(self.features[-1])) 89 | self.features.append(self.encoder.layer3(self.features[-1])) 90 | self.features.append(self.encoder.layer4(self.features[-1])) 91 | 92 | return self.features 93 | -------------------------------------------------------------------------------- /mono/model/mono_fm_joint/resnet.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import BatchNorm2d as bn 5 | 6 | def conv3x3(in_planes, out_planes, stride=1): 7 | """3x3 convolution with padding""" 8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 9 | 10 | 11 | def conv1x1(in_planes, out_planes, stride=1): 12 | """1x1 convolution""" 13 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 14 | 15 | 16 | class BasicBlock(nn.Module): 17 | expansion = 1 18 | 19 | def __init__(self, inplanes, planes, stride=1, downsample=None): 20 | super(BasicBlock, self).__init__() 21 | self.conv1 = conv3x3(inplanes, planes, stride) 22 | self.bn1 = bn(planes) 23 | self.relu = nn.ReLU(inplace=True) 24 | self.conv2 = conv3x3(planes, planes) 25 | self.bn2 = bn(planes) 26 | self.downsample = downsample 27 | self.stride = stride 28 | 29 | def forward(self, x): 30 | residual = x 31 | 32 | out = self.conv1(x) 33 | out = self.bn1(out) 34 | out = self.relu(out) 35 | 36 | out = self.conv2(out) 37 | out = self.bn2(out) 38 | 39 | if self.downsample is not None: 40 | residual = self.downsample(x) 41 | 42 | out += residual 43 | out = self.relu(out) 44 | 45 | return out 46 | 47 | 48 | class Bottleneck(nn.Module): 49 | expansion = 4 50 | 51 | def __init__(self, inplanes, planes, stride=1, downsample=None): 52 | super(Bottleneck, self).__init__() 53 | self.conv1 = conv1x1(inplanes, planes) 54 | self.bn1 = bn(planes) 55 | self.conv2 = conv3x3(planes, planes, stride) 56 | self.bn2 = bn(planes) 57 | self.conv3 = conv1x1(planes, planes * self.expansion) 58 | self.bn3 = bn(planes * self.expansion) 59 | self.relu = nn.ReLU(inplace=True) 60 | self.downsample = downsample 61 | self.stride = stride 62 | 63 | def forward(self, x): 64 | residual = x 65 | 66 | out = self.conv1(x) 67 | out = self.bn1(out) 68 | out = self.relu(out) 69 | 70 | out = self.conv2(out) 71 | out = self.bn2(out) 72 | out = self.relu(out) 73 | 74 | out = self.conv3(out) 75 | out = self.bn3(out) 76 | 77 | if self.downsample is not None: 78 | residual = self.downsample(x) 79 | 80 | out += residual 81 | out = self.relu(out) 82 | 83 | return out 84 | 85 | 86 | class ResNet(nn.Module): 87 | 88 | def __init__(self, block, layers, num_classes=1000): 89 | super(ResNet, self).__init__() 90 | self.inplanes = 64 91 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 92 | self.bn1 = bn(64) 93 | self.relu = nn.ReLU(inplace=True) 94 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 95 | self.layer1 = self._make_layer(block, 64, layers[0]) 96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 97 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 98 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 99 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 100 | self.fc = nn.Linear(512 * block.expansion, num_classes) 101 | 102 | for m in self.modules(): 103 | if isinstance(m, nn.Conv2d): 104 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 105 | elif isinstance(m, bn): 106 | nn.init.constant_(m.weight, 1) 107 | nn.init.constant_(m.bias, 0) 108 | 109 | def _make_layer(self, block, planes, blocks, stride=1): 110 | downsample = None 111 | if stride != 1 or self.inplanes != planes * block.expansion: 112 | downsample = nn.Sequential( 113 | conv1x1(self.inplanes, planes * block.expansion, stride), 114 | bn(planes * block.expansion), 115 | ) 116 | 117 | layers = [] 118 | layers.append(block(self.inplanes, planes, stride, downsample)) 119 | self.inplanes = planes * block.expansion 120 | for _ in range(1, blocks): 121 | layers.append(block(self.inplanes, planes)) 122 | 123 | return nn.Sequential(*layers) 124 | 125 | def forward(self, x): 126 | x = self.conv1(x) 127 | x = self.bn1(x) 128 | x = self.relu(x) 129 | x = self.maxpool(x) 130 | 131 | x = self.layer1(x) 132 | x = self.layer2(x) 133 | x = self.layer3(x) 134 | x = self.layer4(x) 135 | 136 | return x 137 | 138 | 139 | def resnet18(pretrained_path=None): 140 | """Constructs a ResNet-18 model. 141 | Args: 142 | pretrained (bool): If True, returns a model pre-trained on ImageNet 143 | """ 144 | model = ResNet(BasicBlock, [2, 2, 2, 2]) 145 | if pretrained_path is not None: 146 | model.load_state_dict(torch.load(pretrained_path)) 147 | print('Loaded pre-trained weights') 148 | return model 149 | 150 | 151 | def resnet34(pretrained_path=None, **kwargs): 152 | """Constructs a ResNet-34 model. 153 | Args: 154 | pretrained (bool): If True, returns a model pre-trained on ImageNet 155 | """ 156 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 157 | if pretrained_path is not None: 158 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet34.pth'))) 159 | print('Loaded pre-trained weights') 160 | return model 161 | 162 | 163 | def resnet50(pretrained_path=None, **kwargs): 164 | """Constructs a ResNet-50 model. 165 | Args: 166 | pretrained (bool): If True, returns a model pre-trained on ImageNet 167 | """ 168 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 169 | if pretrained_path is not None: 170 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet50.pth'))) 171 | print('Loaded pre-trained weights') 172 | return model 173 | 174 | 175 | def resnet101(pretrained_path=None, **kwargs): 176 | """Constructs a ResNet-101 model. 177 | Args: 178 | pretrained (bool): If True, returns a model pre-trained on ImageNet 179 | """ 180 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 181 | if pretrained_path is not None: 182 | model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet101.pth'))) 183 | print('Loaded pre-trained weights') 184 | return model 185 | -------------------------------------------------------------------------------- /mono/model/registry.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai) 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | class Registry(object): 9 | def __init__(self, name): 10 | self._name = name 11 | self._module_dict = dict() 12 | 13 | @property 14 | def name(self): 15 | return self._name 16 | 17 | @property 18 | def module_dict(self): 19 | return self._module_dict 20 | 21 | def _register_module(self, module_class): 22 | """Register a module. 23 | 24 | Args: 25 | module (:obj:`nn.Module`): Module to be registered. 26 | """ 27 | if not issubclass(module_class, nn.Module): 28 | raise TypeError( 29 | 'module must be a child of nn.Module, but got {}'.format( 30 | module_class)) 31 | module_name = module_class.__name__ 32 | if module_name in self._module_dict: 33 | raise KeyError('{} is already registered in {}'.format( 34 | module_name, self.name)) 35 | self._module_dict[module_name] = module_class 36 | 37 | def register_module(self, cls): 38 | self._register_module(cls) 39 | return cls 40 | 41 | MONO = Registry('mono') 42 | -------------------------------------------------------------------------------- /mono/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/tools/__init__.py -------------------------------------------------------------------------------- /mono/tools/geometry.py: -------------------------------------------------------------------------------- 1 | """ 2 | Provides generic geometry algorithms. 3 | author: Michael Grupp 4 | This file is part of evo (github.com/MichaelGrupp/evo). 5 | evo is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | evo is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | You should have received a copy of the GNU General Public License 14 | along with evo. If not, see . 15 | """ 16 | 17 | import numpy as np 18 | 19 | 20 | def umeyama_alignment(x, y, with_scale=False): 21 | """ 22 | Computes the least squares solution parameters of an Sim(m) matrix 23 | that minimizes the distance between a set of registered points. 24 | Umeyama, Shinji: Least-squares estimation of transformation parameters 25 | between two point patterns. IEEE PAMI, 1991 26 | :param x: mxn matrix of points, m = dimension, n = nr. of data points 27 | :param y: mxn matrix of points, m = dimension, n = nr. of data points 28 | :param with_scale: set to True to align also the scale (default: 1.0 scale) 29 | :return: r, t, c - rotation matrix, translation vector and scale factor 30 | """ 31 | if x.shape != y.shape: 32 | print("data matrices must have the same shape") 33 | 34 | # m = dimension, n = nr. of data points 35 | m, n = x.shape 36 | 37 | # means, eq. 34 and 35 38 | mean_x = x.mean(axis=1) 39 | mean_y = y.mean(axis=1) 40 | 41 | # variance, eq. 36 42 | # "transpose" for column subtraction 43 | sigma_x = 1.0 / n * (np.linalg.norm(x - mean_x[:, np.newaxis])**2) 44 | 45 | # covariance matrix, eq. 38 46 | outer_sum = np.zeros((m, m)) 47 | for i in range(n): 48 | outer_sum += np.outer((y[:, i] - mean_y), (x[:, i] - mean_x)) 49 | cov_xy = np.multiply(1.0 / n, outer_sum) 50 | 51 | # SVD (text betw. eq. 38 and 39) 52 | u, d, v = np.linalg.svd(cov_xy) 53 | 54 | # S matrix, eq. 43 55 | s = np.eye(m) 56 | if np.linalg.det(u) * np.linalg.det(v) < 0.0: 57 | # Ensure a RHS coordinate system (Kabsch algorithm). 58 | s[m - 1, m - 1] = -1 59 | 60 | # rotation, eq. 40 61 | r = u.dot(s).dot(v) 62 | 63 | # scale & translation, eq. 42 and 41 64 | c = 1 / sigma_x * np.trace(np.diag(d).dot(s)) if with_scale else 1.0 65 | t = mean_y - np.multiply(c, r.dot(mean_x)) 66 | 67 | return r, t, c 68 | 69 | 70 | def arc_len(x): 71 | """ 72 | :param x: nxm array of points, m=dimension 73 | :return: the (discrete approximated) arc-length of the point sequence 74 | """ 75 | return np.sum(np.linalg.norm(x[:-1] - x[1:], axis=1)) 76 | 77 | 78 | def accumulated_distances(x): 79 | """ 80 | :param x: nxm array of points, m=dimension 81 | :return: the accumulated distances along the point sequence 82 | """ 83 | return np.concatenate((np.array([0]), 84 | np.cumsum(np.linalg.norm(x[:-1] - x[1:], axis=1)))) -------------------------------------------------------------------------------- /mono/tools/lie_algebra.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF8 -*- 2 | """ 3 | Provides functions for Lie group calculations. 4 | author: Michael Grupp 5 | This file is part of evo (github.com/MichaelGrupp/evo). 6 | evo is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | evo is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | You should have received a copy of the GNU General Public License 15 | along with evo. If not, see . 16 | """ 17 | 18 | import numpy as np 19 | import scipy.linalg as sl 20 | 21 | import mono.tools.transformations as tr 22 | 23 | 24 | def hat(v): 25 | """ 26 | :param v: 3x1 vector 27 | :return: 3x3 skew symmetric matrix 28 | """ 29 | # yapf: disable 30 | return np.array([[0.0, -v[2], v[1]], 31 | [v[2], 0.0, -v[0]], 32 | [-v[1], v[0], 0.0]]) 33 | # yapf: enable 34 | 35 | 36 | def vee(m): 37 | """ 38 | :param m: 3x3 skew symmetric matrix 39 | :return: 3x1 vector 40 | """ 41 | return np.array([-m[1, 2], m[0, 2], -m[0, 1]]) 42 | 43 | 44 | def so3_exp(axis, angle): 45 | """ 46 | Computes an SO(3) matrix from an axis/angle representation. 47 | Code source: http://stackoverflow.com/a/25709323 48 | :param axis: 3x1 rotation axis (unit vector!) 49 | :param angle: radians 50 | :return: SO(3) rotation matrix (matrix exponential of so(3)) 51 | """ 52 | return sl.expm(np.cross(np.eye(3), axis / np.linalg.norm(axis) * angle)) 53 | 54 | 55 | def so3_log(r, return_angle_only=True, return_skew=False): 56 | """ 57 | :param r: SO(3) rotation matrix 58 | :param return_angle_only: return only the angle (default) 59 | :param return_skew: return skew symmetric Lie algebra element 60 | :return: axis/angle 61 | or if skew: 62 | 3x3 skew symmetric logarithmic map in so(3) (Ma, Soatto eq. 2.8) 63 | """ 64 | if not is_so3(r): 65 | print("matrix is not a valid SO(3) group element") 66 | if return_angle_only and not return_skew: 67 | return np.arccos(min(1, max(-1, (np.trace(r) - 1) / 2))) 68 | angle, axis, _ = tr.rotation_from_matrix(se3(r, [0, 0, 0])) 69 | if return_skew: 70 | return hat(axis * angle) 71 | else: 72 | return axis, angle 73 | 74 | 75 | def se3(r=np.eye(3), t=np.array([0, 0, 0])): 76 | """ 77 | :param r: SO(3) rotation matrix 78 | :param t: 3x1 translation vector 79 | :return: SE(3) transformation matrix 80 | """ 81 | se3 = np.eye(4) 82 | se3[:3, :3] = r 83 | se3[:3, 3] = t 84 | return se3 85 | 86 | 87 | def sim3(r, t, s): 88 | """ 89 | :param r: SO(3) rotation matrix 90 | :param t: 3x1 translation vector 91 | :param s: positive, non-zero scale factor 92 | :return: Sim(3) similarity transformation matrix 93 | """ 94 | sim3 = np.eye(4) 95 | sim3[:3, :3] = s * r 96 | sim3[:3, 3] = t 97 | return sim3 98 | 99 | 100 | def so3_from_se3(p): 101 | """ 102 | :param p: absolute SE(3) pose 103 | :return: the SO(3) rotation matrix in p 104 | """ 105 | return p[:3, :3] 106 | 107 | 108 | def se3_inverse(p): 109 | """ 110 | :param p: absolute SE(3) pose 111 | :return: the inverted pose 112 | """ 113 | r_inv = p[:3, :3].transpose() 114 | t_inv = -r_inv.dot(p[:3, 3]) 115 | return se3(r_inv, t_inv) 116 | 117 | 118 | def is_so3(r): 119 | """ 120 | :param r: a 3x3 matrix 121 | :return: True if r is in the SO(3) group 122 | """ 123 | # Check the determinant. 124 | det_valid = np.isclose(np.linalg.det(r), [1.0], atol=1e-6) 125 | # Check if the transpose is the inverse. 126 | inv_valid = np.allclose(r.transpose().dot(r), np.eye(3), atol=1e-6) 127 | return det_valid and inv_valid 128 | 129 | 130 | def is_se3(p): 131 | """ 132 | :param p: a 4x4 matrix 133 | :return: True if p is in the SE(3) group 134 | """ 135 | rot_valid = is_so3(p[:3, :3]) 136 | lower_valid = np.equal(p[3, :], np.array([0.0, 0.0, 0.0, 1.0])).all() 137 | return rot_valid and lower_valid 138 | 139 | 140 | def is_sim3(p, s): 141 | """ 142 | :param p: a 4x4 matrix 143 | :param s: expected scale factor 144 | :return: True if p is in the Sim(3) group with scale s 145 | """ 146 | rot = p[:3, :3] 147 | rot_unscaled = np.multiply(rot, 1.0 / s) 148 | rot_valid = is_so3(rot_unscaled) 149 | lower_valid = np.equal(p[3, :], np.array([0.0, 0.0, 0.0, 1.0])).all() 150 | return rot_valid and lower_valid 151 | 152 | 153 | def relative_so3(r1, r2): 154 | """ 155 | :param r1, r2: SO(3) matrices 156 | :return: the relative rotation r1^{⁻1} * r2 157 | """ 158 | return np.dot(r1.transpose(), r2) 159 | 160 | 161 | def relative_se3(p1, p2): 162 | """ 163 | :param p1, p2: SE(3) matrices 164 | :return: the relative transformation p1^{⁻1} * p2 165 | """ 166 | return np.dot(se3_inverse(p1), p2) 167 | 168 | 169 | def random_so3(): 170 | """ 171 | :return: a random SO(3) matrix (for debugging) 172 | """ 173 | return tr.random_rotation_matrix()[:3, :3] 174 | 175 | 176 | def random_se3(): 177 | """ 178 | :return: a random SE(3) matrix (for debugging) 179 | """ 180 | r = random_so3() 181 | t = tr.random_vector(3) 182 | return se3(r, t) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | matplotlib 4 | scikit-image 5 | scipy 6 | imageio 7 | tqdm 8 | cython 9 | mmcv==0.4.4 10 | torch>=1.1 11 | torchvision>=0.4.0 12 | pypng -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | if __name__ == '__main__': 4 | # os.system('/home/user/software/anaconda/envs/py37t11/bin/python -m torch.distributed.launch --master_port=9900 --nproc_per_node=1 train.py') 5 | # os.system('/home/hadoop-wallemnl/cephfs/data/shuchang/envs/py37t11/bin/python -m torch.distributed.launch --master_port=9900 --nproc_per_node=8 train.py') 6 | os.system('/home/sconly/Documents/code/py37t11/bin/python -m torch.distributed.launch --master_port=9900 --nproc_per_node=1 train.py --config ./config/cfg_kitti_fm.py --work_dir /media/sconly/harddisk/weight/fmdepth') 7 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/draw_odometry.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import os 3 | import sys 4 | import argparse 5 | import numpy as np 6 | 7 | import torch 8 | from torch.utils.data import DataLoader 9 | 10 | sys.path.append('.') 11 | sys.path.append('..') 12 | from mono.datasets.euroc_dataset import FolderDataset 13 | from mono.datasets.kitti_dataset import KITTIOdomDataset 14 | from mono.datasets.utils import readlines,transformation_from_parameters 15 | from mono.model.mono_baseline.pose_encoder import PoseEncoder 16 | from mono.model.mono_baseline.pose_decoder import PoseDecoder 17 | from mono.tools.kitti_evaluation_toolkit import kittiOdomEval 18 | 19 | 20 | def odo(opt): 21 | if opt.kitti: 22 | filenames = readlines("../mono/datasets/splits/odom/test_files_{:02d}.txt".format(opt.sequence_id)) 23 | 24 | dataset = KITTIOdomDataset(opt.data_path, 25 | filenames, 26 | opt.height, 27 | opt.width, 28 | [0, 1], 29 | is_train=False, 30 | img_ext='.png', 31 | gt_depth_path=None) 32 | else: 33 | dataset = FolderDataset(opt.data_path, 34 | None, 35 | opt.height, 36 | opt.width, 37 | [0, 1], 38 | is_train=False, 39 | img_ext='.png', 40 | gt_depth_path=None) 41 | 42 | dataloader = DataLoader(dataset, 43 | 1, 44 | shuffle=False, 45 | num_workers=4, 46 | pin_memory=True, 47 | drop_last=False) 48 | 49 | pose_encoder = PoseEncoder(18, None, 2) 50 | pose_decoder = PoseDecoder(pose_encoder.num_ch_enc) 51 | 52 | checkpoint = torch.load(opt.model_path) 53 | for name, param in pose_encoder.state_dict().items(): 54 | pose_encoder.state_dict()[name].copy_(checkpoint['state_dict']['PoseEncoder.' + name]) 55 | for name, param in pose_decoder.state_dict().items(): 56 | pose_decoder.state_dict()[name].copy_(checkpoint['state_dict']['PoseDecoder.' + name]) 57 | pose_encoder.cuda() 58 | pose_encoder.eval() 59 | pose_decoder.cuda() 60 | pose_decoder.eval() 61 | 62 | global_pose = np.identity(4) 63 | poses = [global_pose[0:3, :].reshape(1, 12)] 64 | 65 | with torch.no_grad(): 66 | for batch_idx, inputs in enumerate(dataloader): 67 | for key, ipt in inputs.items(): 68 | inputs[key] = ipt.cuda() 69 | all_color_aug = torch.cat([inputs[("color_aug", i, 0)] for i in [0,1]], 1) 70 | axisangle, translation = pose_decoder(pose_encoder(all_color_aug)) 71 | g = transformation_from_parameters(axisangle[:, 0], translation[:, 0]) 72 | backward_transform = g.squeeze().cpu().numpy()#the transformation from frame +1 to frame 0 73 | global_pose = global_pose @ np.linalg.inv(backward_transform) 74 | poses.append(global_pose[0:3, :].reshape(1, 12)) 75 | poses = np.concatenate(poses, axis=0) 76 | 77 | if opt.kitti: 78 | filename = os.path.join(opt.result_dir, "{:02d}_pred.txt".format(opt.sequence_id)) 79 | else: 80 | filename = os.path.join(opt.result_dir, "fm_ms_euroc_mh04_diff_3.txt") 81 | 82 | np.savetxt(filename, poses, delimiter=' ', fmt='%1.8e') 83 | if opt.kitti: 84 | opt.eva_seqs = '{:02d}_pred'.format(opt.sequence_id) 85 | pose_eval = kittiOdomEval(opt) 86 | pose_eval.eval(toCameraCoord=False) # set the value according to the predicted results 87 | print('saving into ', opt.result_dir) 88 | 89 | 90 | if __name__ == "__main__": 91 | parser = argparse.ArgumentParser(description='Train a detector') 92 | parser.add_argument('--model_path', default='/media/sconly/24eda5d5-e79b-423b-8dcc-8339a15f3219/weight/fm_depth_odom.pth', help='model save path') 93 | parser.add_argument('--data_path', default='/media/sconly/24eda5d5-e79b-423b-8dcc-8339a15f3219/data/kitti/Odometry', help='kitti odometry dataset path') 94 | parser.add_argument('--gt_dir', default='../mono/datasets/gt_pose',help='kitti odometry gt path') 95 | parser.add_argument('--result_dir', default='/media/sconly/24eda5d5-e79b-423b-8dcc-8339a15f3219/odom/') 96 | parser.add_argument('--height', default=192) 97 | parser.add_argument('--width', default=640) 98 | parser.add_argument('--kitti', default=True, help='whether test on the kitti odometry dataset') 99 | parser.add_argument('--sequence_id', default=9, help='which kitti odometry sequence for testing') 100 | opts = parser.parse_args() 101 | odo(opts) 102 | print("you can also run 'evo_traj kitti -s *.txt *.txt --ref=*.txt -p --plot_mode=xz' in terminal for visualization") -------------------------------------------------------------------------------- /scripts/eval_depth.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import cv2 3 | import sys 4 | import numpy as np 5 | from mmcv import Config 6 | 7 | import torch 8 | from torch.utils.data import DataLoader 9 | 10 | sys.path.append('.') 11 | from mono.model.registry import MONO 12 | from mono.model.mono_baseline.layers import disp_to_depth 13 | from mono.datasets.utils import readlines, compute_errors 14 | from mono.datasets.kitti_dataset import KITTIRAWDataset 15 | 16 | cv2.setNumThreads(0) # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1) 17 | STEREO_SCALE_FACTOR = 36 18 | MIN_DEPTH=1e-3 19 | MAX_DEPTH=80 20 | 21 | 22 | def evaluate(MODEL_PATH, CFG_PATH, GT_PATH): 23 | filenames = readlines("../mono/datasets/splits/exp/val_files.txt") 24 | cfg = Config.fromfile(CFG_PATH) 25 | 26 | dataset = KITTIRAWDataset(cfg.data['in_path'], 27 | filenames, 28 | cfg.data['height'], 29 | cfg.data['width'], 30 | [0], 31 | is_train=False, 32 | gt_depth_path=GT_PATH) 33 | 34 | dataloader = DataLoader(dataset, 35 | 1, 36 | shuffle=False, 37 | num_workers=4, 38 | pin_memory=True, 39 | drop_last=False) 40 | 41 | cfg.model['imgs_per_gpu'] = 1 42 | model = MONO.module_dict[cfg.model['name']](cfg.model) 43 | checkpoint = torch.load(MODEL_PATH) 44 | model.load_state_dict(checkpoint['state_dict'], strict=True) 45 | model.cuda() 46 | model.eval() 47 | 48 | pred_disps = [] 49 | with torch.no_grad(): 50 | for batch_idx, inputs in enumerate(dataloader): 51 | for key, ipt in inputs.items(): 52 | inputs[key] = ipt.cuda() 53 | outputs = model(inputs) 54 | 55 | disp = outputs[("disp", 0, 0)] 56 | 57 | pred_disp, _ = disp_to_depth(disp, 0.1, 100) 58 | pred_disp = pred_disp.cpu()[:, 0].numpy() 59 | pred_disps.append(pred_disp) 60 | pred_disps = np.concatenate(pred_disps) 61 | 62 | gt_depths = np.load(GT_PATH, allow_pickle=True, fix_imports=True, encoding='latin1')["data"] 63 | 64 | print("-> Evaluating") 65 | if cfg.data['stereo_scale']: 66 | print('using baseline') 67 | else: 68 | print('using mean scaling') 69 | 70 | errors = [] 71 | ratios = [] 72 | for i in range(pred_disps.shape[0]): 73 | gt_depth = gt_depths[i] 74 | gt_height, gt_width = gt_depth.shape[:2] 75 | 76 | pred_disp = pred_disps[i] 77 | pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) 78 | 79 | pred_depth = 1 / pred_disp 80 | 81 | mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) 82 | crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height, 83 | 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32) 84 | crop_mask = np.zeros(mask.shape) 85 | crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 86 | mask = np.logical_and(mask, crop_mask) 87 | 88 | pred_depth = pred_depth[mask] 89 | gt_depth = gt_depth[mask] 90 | 91 | ratio = np.median(gt_depth) / np.median(pred_depth) 92 | ratios.append(ratio) 93 | 94 | if cfg.data['stereo_scale']: 95 | ratio = STEREO_SCALE_FACTOR 96 | 97 | pred_depth *= ratio 98 | pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH 99 | pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH 100 | errors.append(compute_errors(gt_depth, pred_depth)) 101 | 102 | ratios = np.array(ratios) 103 | med = np.median(ratios) 104 | mean_errors = np.array(errors).mean(0) 105 | print("Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med))) 106 | print("\n" + ("{:>}| " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) 107 | print(("&{:.3f} " * 7).format(*mean_errors.tolist()) + "\\\\") 108 | print("\n-> Done!") 109 | 110 | 111 | if __name__ == "__main__": 112 | CFG_PATH = '../config/cfg_kitti_fm.py'#path to cfg file 113 | GT_PATH = '/media/user/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz'#path to kitti gt depth 114 | MODEL_PATH = '/media/user/harddisk/weight/fm_depth.pth'#path to model weights 115 | evaluate(MODEL_PATH, CFG_PATH, GT_PATH) -------------------------------------------------------------------------------- /scripts/eval_depth_pp.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import cv2 3 | import sys 4 | import numpy as np 5 | from mmcv import Config 6 | 7 | import torch 8 | from torch.utils.data import DataLoader 9 | 10 | sys.path.append('.') 11 | sys.path.append('..') 12 | from mono.model.registry import MONO 13 | from mono.model.mono_baseline.layers import disp_to_depth 14 | from mono.datasets.utils import readlines, compute_errors 15 | from mono.datasets.kitti_dataset import KITTIRAWDataset 16 | 17 | cv2.setNumThreads(0) # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1) 18 | STEREO_SCALE_FACTOR = 36 19 | MIN_DEPTH=1e-3 20 | MAX_DEPTH=80 21 | 22 | def batch_post_process_disparity(l_disp, r_disp): 23 | _, h, w = l_disp.shape 24 | m_disp = 0.5 * (l_disp + r_disp) 25 | l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h)) 26 | l_mask = (1.0 - np.clip(20 * (l - 0.05), 0, 1))[None, ...] 27 | r_mask = l_mask[:, :, ::-1] 28 | return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp 29 | 30 | def evaluate(MODEL_PATH, CFG_PATH, GT_PATH): 31 | filenames = readlines("../mono/datasets/splits/exp/val_files.txt") 32 | cfg = Config.fromfile(CFG_PATH) 33 | 34 | dataset = KITTIRAWDataset(cfg.data['in_path'], 35 | filenames, 36 | cfg.data['height'], 37 | cfg.data['width'], 38 | [0], 39 | is_train=False, 40 | gt_depth_path=None) 41 | 42 | dataloader = DataLoader(dataset, 43 | 2, 44 | shuffle=False, 45 | num_workers=1, 46 | pin_memory=True, 47 | drop_last=True) 48 | 49 | cfg.model['imgs_per_gpu'] = 2 50 | model = MONO.module_dict[cfg.model['name']](cfg.model) 51 | checkpoint = torch.load(MODEL_PATH) 52 | model.load_state_dict(checkpoint['state_dict'], strict=True) 53 | model.cuda() 54 | model.eval() 55 | 56 | pred_disps = [] 57 | with torch.no_grad(): 58 | for batch_idx, inputs in enumerate(dataloader): 59 | print(batch_idx) 60 | for key, ipt in inputs.items(): 61 | inputs[key] = ipt.cuda() 62 | 63 | outputs = model(inputs) 64 | 65 | disp = outputs[("disp", 0, 0)] 66 | # N = pred_disp.shape[0] // 2 67 | # pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1]) 68 | pred_disp, _ = disp_to_depth(disp, 0.1, 100) 69 | pred_disp = pred_disp.cpu()[:, 0].numpy() 70 | pred_disps.append(pred_disp) 71 | pred_disps = np.concatenate(pred_disps) 72 | 73 | gt_depths = np.load(GT_PATH, allow_pickle=True, fix_imports=True, encoding='latin1')["data"] 74 | 75 | print("-> Evaluating") 76 | if cfg.data['stereo_scale']: 77 | print('using baseline') 78 | else: 79 | print('using mean scaling') 80 | 81 | errors = [] 82 | ratios = [] 83 | for i in range(pred_disps.shape[0]): 84 | gt_depth = gt_depths[i] 85 | gt_height, gt_width = gt_depth.shape[:2] 86 | 87 | pred_disp = pred_disps[i] 88 | pred_disp = cv2.resize(pred_disp, (gt_width, gt_height)) 89 | 90 | pred_depth = 1 / pred_disp 91 | 92 | mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) 93 | crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height, 94 | 0.03594771 * gt_width, 0.96405229 * gt_width]).astype(np.int32) 95 | crop_mask = np.zeros(mask.shape) 96 | crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 97 | mask = np.logical_and(mask, crop_mask) 98 | 99 | pred_depth = pred_depth[mask] 100 | gt_depth = gt_depth[mask] 101 | 102 | ratio = np.median(gt_depth) / np.median(pred_depth) 103 | ratios.append(ratio) 104 | 105 | if cfg.data['stereo_scale']: 106 | ratio = STEREO_SCALE_FACTOR 107 | 108 | pred_depth *= ratio 109 | pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH 110 | pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH 111 | errors.append(compute_errors(gt_depth, pred_depth)) 112 | 113 | ratios = np.array(ratios) 114 | med = np.median(ratios) 115 | mean_errors = np.array(errors).mean(0) 116 | print("Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med))) 117 | print("\n" + ("{:>}| " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) 118 | print(("&{:.3f} " * 7).format(*mean_errors.tolist()) + "\\\\") 119 | print("\n-> Done!") 120 | 121 | 122 | if __name__ == "__main__": 123 | CFG_PATH = '../config/cfg_kitti_fm.py'#path to cfg file 124 | GT_PATH = '/media/sconly/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz'#path to kitti gt depth 125 | MODEL_PATH = '/media/sconly/harddisk/weight/fm_depth.pth'#path to model weights 126 | evaluate(MODEL_PATH, CFG_PATH, GT_PATH) -------------------------------------------------------------------------------- /scripts/eval_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import os 3 | import sys 4 | import numpy as np 5 | 6 | import torch 7 | from torch.utils.data import DataLoader 8 | 9 | sys.path.append('.') 10 | from mono.datasets.utils import readlines, dump_xyz, compute_ate, transformation_from_parameters 11 | from mono.datasets.kitti_dataset import KITTIOdomDataset 12 | from mono.model.mono_fm.pose_encoder import PoseEncoder 13 | from mono.model.mono_fm.pose_decoder import PoseDecoder 14 | 15 | 16 | 17 | 18 | 19 | def evaluate(data_path,model_path,sequence_id,height,width): 20 | filenames = readlines("../mono/datasets/splits/odom/test_files_{:02d}.txt".format(sequence_id)) 21 | 22 | dataset = KITTIOdomDataset(data_path, 23 | filenames, 24 | height, 25 | width, 26 | [0, 1], 27 | is_train=False, 28 | img_ext='.png', 29 | gt_depth_path=None) 30 | 31 | dataloader = DataLoader(dataset, 32 | 1, 33 | shuffle=False, 34 | num_workers=4, 35 | pin_memory=True, 36 | drop_last=False) 37 | 38 | 39 | pose_encoder = PoseEncoder(18, None, 2) 40 | pose_decoder = PoseDecoder(pose_encoder.num_ch_enc) 41 | 42 | checkpoint = torch.load(model_path) 43 | for name, param in pose_encoder.state_dict().items(): 44 | pose_encoder.state_dict()[name].copy_(checkpoint['state_dict']['PoseEncoder.' + name]) 45 | for name, param in pose_decoder.state_dict().items(): 46 | pose_decoder.state_dict()[name].copy_(checkpoint['state_dict']['PoseDecoder.' + name]) 47 | pose_encoder.cuda() 48 | pose_encoder.eval() 49 | pose_decoder.cuda() 50 | pose_decoder.eval() 51 | 52 | pred_poses = [] 53 | 54 | print("-> Computing pose predictions") 55 | with torch.no_grad(): 56 | for inputs in dataloader: 57 | for key, ipt in inputs.items(): 58 | inputs[key] = ipt.cuda() 59 | all_color_aug = torch.cat([inputs[("color_aug", i, 0)] for i in [0, 1]], 1) 60 | features = pose_encoder(all_color_aug) 61 | axisangle, translation = pose_decoder(features) 62 | pred_poses.append(transformation_from_parameters(axisangle[:, 0], translation[:, 0]).cpu().numpy()) 63 | pred_poses = np.concatenate(pred_poses) 64 | 65 | gt_poses_path = os.path.join(data_path, "poses", "{:02d}.txt".format(sequence_id)) 66 | gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4) 67 | gt_global_poses = np.concatenate((gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1) 68 | gt_global_poses[:, 3, 3] = 1 69 | gt_xyzs = gt_global_poses[:, :3, 3] 70 | gt_local_poses = [] 71 | for i in range(1, len(gt_global_poses)): 72 | gt_local_poses.append(np.linalg.inv(np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i]))) 73 | 74 | ates = [] 75 | num_frames = gt_xyzs.shape[0] 76 | track_length = 5 77 | for i in range(0, num_frames - 1): 78 | local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1])) 79 | gt_local_xyzs = np.array(dump_xyz(gt_local_poses[i:i + track_length - 1])) 80 | ates.append(compute_ate(gt_local_xyzs, local_xyzs)) 81 | 82 | print("\n odom_{} Trajectory error: {:0.3f}, std: {:0.3f}\n".format(sequence_id, np.mean(ates), np.std(ates))) 83 | 84 | # save_path = os.path.join(load_weights_folder, "poses.npy") 85 | # np.save(save_path, pred_poses) 86 | # print("-> Predictions saved to", save_path) 87 | 88 | 89 | if __name__ == "__main__": 90 | data_path='/media/user/harddisk/data/kitti/Odometry/dataset'#path to kitti odometry 91 | model_path = '/media/user/harddisk/weight/fm_depth.pth' 92 | height=320 93 | width=1024 94 | sequence_id =9 95 | evaluate(data_path,model_path,sequence_id,height,width) 96 | sequence_id = 10 97 | evaluate(data_path,model_path,sequence_id,height,width) 98 | -------------------------------------------------------------------------------- /scripts/infer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import os 3 | import cv2 4 | import sys 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | from mmcv import Config 8 | 9 | import torch 10 | from torch.utils.data import DataLoader 11 | 12 | sys.path.append('.') 13 | sys.path.append('..') 14 | from mono.model.registry import MONO 15 | from mono.model.mono_baseline.layers import disp_to_depth 16 | from mono.datasets.utils import readlines 17 | from mono.datasets.kitti_dataset import KITTIRAWDataset 18 | 19 | cv2.setNumThreads(0) # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1) 20 | 21 | MIN_DEPTH=1e-3 22 | MAX_DEPTH=80 23 | SCALE = 36#we set baseline=0.0015m which is 36 times smaller than the actual value (0.54m) 24 | 25 | def transform(cv2_img, height=320, width=1024): 26 | im_tensor = torch.from_numpy(cv2_img.astype(np.float32)).cuda().unsqueeze(0) 27 | im_tensor = im_tensor.permute(0, 3, 1, 2).contiguous() 28 | im_tensor = torch.nn.functional.interpolate(im_tensor, [height, width],mode='bilinear', align_corners=False) 29 | im_tensor /= 255 30 | return im_tensor 31 | 32 | def predict(cv2_img, model): 33 | original_height, original_width = cv2_img.shape[:2] 34 | im_tensor = transform(cv2_img) 35 | 36 | with torch.no_grad(): 37 | input = {} 38 | input['color_aug', 0, 0] = im_tensor 39 | outputs = model(input) 40 | 41 | disp = outputs[("disp", 0, 0)] 42 | disp_resized = torch.nn.functional.interpolate(disp, (original_height, original_width), mode="bilinear", align_corners=False) 43 | min_disp = 1/MAX_DEPTH 44 | max_disp = 1/MIN_DEPTH 45 | depth = 1/(disp_resized.squeeze().cpu().numpy()*max_disp + min_disp) * SCALE 46 | return depth, disp_resized.squeeze().cpu().numpy() 47 | 48 | def evaluate(cfg_path, model_path, img_path, output_path): 49 | cfg = Config.fromfile(cfg_path) 50 | cfg['model']['depth_pretrained_path'] = None 51 | cfg['model']['pose_pretrained_path'] = None 52 | cfg['model']['extractor_pretrained_path'] = None 53 | model = MONO.module_dict[cfg.model['name']](cfg.model) 54 | checkpoint = torch.load(model_path) 55 | model.load_state_dict(checkpoint['state_dict'], strict=True) 56 | model.cuda() 57 | model.eval() 58 | 59 | with torch.no_grad(): 60 | cv2_img = cv2.imread(img_path) 61 | cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2RGB) 62 | 63 | depth, disp_resized = predict(cv2_img, model) 64 | 65 | vmax = np.percentile(disp_resized, 95) 66 | plt.imsave(output_path, disp_resized, cmap='magma', vmax=vmax) 67 | 68 | print("\n-> Done!") 69 | 70 | 71 | if __name__ == "__main__": 72 | cfg_path = '../config/cfg_kitti_fm.py'# path to cfg file 73 | model_path = '/media/sconly/harddisk/weight/fm_depth.pth'# path to model weight 74 | img_path = '../assets/test.png' 75 | output_path = '../assets/test_disp.png' # dir for saving depth maps 76 | evaluate(cfg_path, model_path, img_path, output_path) -------------------------------------------------------------------------------- /scripts/infer_singleimage.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import os 3 | import cv2 4 | import sys 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | from mmcv import Config 8 | 9 | import torch 10 | from torch.utils.data import DataLoader 11 | 12 | sys.path.append('.') 13 | from mono.model.registry import MONO 14 | from mono.model.mono_baseline.layers import disp_to_depth 15 | from mono.datasets.utils import readlines 16 | from mono.datasets.kitti_dataset import KITTIRAWDataset 17 | 18 | cv2.setNumThreads(0) # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1) 19 | 20 | 21 | 22 | def evaluate(cfg_path,model_path,gt_path, output_path): 23 | filenames = readlines("../mono/datasets/splits/exp/val_files.txt") 24 | cfg = Config.fromfile(cfg_path) 25 | 26 | dataset = KITTIRAWDataset(cfg.data['in_path'], 27 | filenames, 28 | cfg.data['height'], 29 | cfg.data['width'], 30 | [0], 31 | is_train=False, 32 | gt_depth_path=gt_path) 33 | 34 | dataloader = DataLoader(dataset, 35 | 1, 36 | shuffle=False, 37 | num_workers=4, 38 | pin_memory=True, 39 | drop_last=False) 40 | 41 | cfg.model['imgs_per_gpu'] = 1 42 | model = MONO.module_dict[cfg.model['name']](cfg.model) 43 | checkpoint = torch.load(model_path) 44 | model.load_state_dict(checkpoint['state_dict'], strict=False) 45 | model.cuda() 46 | model.eval() 47 | 48 | with torch.no_grad(): 49 | for batch_idx, inputs in enumerate(dataloader): 50 | for key, ipt in inputs.items(): 51 | inputs[key] = ipt.cuda() 52 | outputs = model(inputs) 53 | 54 | img_path = os.path.join(output_path, 'img_{:0>4d}.jpg'.format(batch_idx)) 55 | plt.imsave(img_path, inputs[("color", 0, 0)][0].squeeze().transpose(0,1).transpose(1,2).cpu().numpy()) 56 | 57 | disp = outputs[("disp", 0, 0)] 58 | pred_disp, _ = disp_to_depth(disp, 0.1, 100) 59 | pred_disp = pred_disp[0, 0].cpu().numpy() 60 | pred_disp = cv2.resize(pred_disp, (cfg.data['width'], cfg.data['height'])) 61 | 62 | img_path = os.path.join(output_path, 'disp_{:0>4d}.jpg'.format(batch_idx)) 63 | vmax = np.percentile(pred_disp, 95) 64 | plt.imsave(img_path, pred_disp, cmap='magma', vmax=vmax) 65 | 66 | print("\n-> Done!") 67 | 68 | 69 | if __name__ == "__main__": 70 | cfg_path = '../config/cfg_kitti_fm.py'# path to cfg file 71 | model_path = '/media/user/harddisk/weight/fm_depth.pth'# path to model weight 72 | gt_path = '/media/user/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz' # path to kitti gt depth 73 | output_path = '/media/user/harddisk/results' # dir for saving depth maps 74 | if not os.path.exists(output_path): 75 | os.mkdir(output_path) 76 | evaluate(cfg_path,model_path,gt_path,output_path) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | from mmcv import Config 5 | from mmcv.runner import load_checkpoint 6 | 7 | from mono.datasets.get_dataset import get_dataset 8 | from mono.apis import (train_mono, 9 | init_dist, 10 | get_root_logger, 11 | set_random_seed) 12 | from mono.model.registry import MONO 13 | import torch 14 | 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser(description='Train a detector') 18 | parser.add_argument('--config', 19 | default='/home/user/Documents/code/fm_depth/config/cfg_kitti_fm_joint.py', 20 | help='train config file path') 21 | parser.add_argument('--work_dir', 22 | default='/media/user/harddisk/weight/fmdepth', 23 | help='the dir to save logs and models') 24 | parser.add_argument('--resume_from', 25 | help='the checkpoint file to resume from') 26 | parser.add_argument('--gpus', 27 | default='0', 28 | type=str, 29 | help='number of gpus to use ' 30 | '(only applicable to non-distributed training)') 31 | parser.add_argument('--seed', 32 | type=int, 33 | default=1024, 34 | help='random seed') 35 | parser.add_argument('--launcher', 36 | choices=['none', 'pytorch', 'slurm', 'mpi'], 37 | default='pytorch', 38 | help='job launcher') 39 | parser.add_argument('--local_rank', 40 | type=int, 41 | default=0) 42 | args = parser.parse_args() 43 | return args 44 | 45 | 46 | def main(): 47 | args = parse_args() 48 | print(args.config) 49 | cfg = Config.fromfile(args.config) 50 | cfg.work_dir = args.work_dir 51 | 52 | # set cudnn_benchmark 53 | if cfg.get('cudnn_benchmark', False): 54 | torch.backends.cudnn.benchmark = True 55 | 56 | if args.resume_from is not None: 57 | cfg.resume_from = args.resume_from 58 | cfg.gpus = [int(_) for _ in args.gpus.split(',')] 59 | 60 | # init distributed env first, since logger depends on the dist info. 61 | if args.launcher == 'none': 62 | distributed = False 63 | else: 64 | distributed = True 65 | init_dist(args.launcher, **cfg.dist_params) 66 | 67 | print('cfg is ', cfg) 68 | # init logger before other steps 69 | logger = get_root_logger(cfg.log_level) 70 | logger.info('Distributed training: {}'.format(distributed)) 71 | 72 | # set random seeds 73 | if args.seed is not None: 74 | logger.info('Set random seed to {}'.format(args.seed)) 75 | set_random_seed(args.seed) 76 | 77 | model_name = cfg.model['name'] 78 | model = MONO.module_dict[model_name](cfg.model) 79 | 80 | if cfg.resume_from is not None: 81 | load_checkpoint(model, cfg.resume_from, map_location='cpu') 82 | elif cfg.finetune is not None: 83 | print('loading from', cfg.finetune) 84 | checkpoint = torch.load(cfg.finetune, map_location='cpu') 85 | model.load_state_dict(checkpoint['state_dict'], strict=False) 86 | 87 | train_dataset = get_dataset(cfg.data, training=True) 88 | if cfg.validate: 89 | val_dataset = get_dataset(cfg.data, training=False) 90 | else: 91 | val_dataset = None 92 | 93 | train_mono(model, 94 | train_dataset, 95 | val_dataset, 96 | cfg, 97 | distributed=distributed, 98 | validate=cfg.validate, 99 | logger=logger) 100 | 101 | 102 | if __name__ == '__main__': 103 | main() --------------------------------------------------------------------------------