├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── p.png
    ├── test.png
    └── test_disp.png
├── config
    ├── __init__.py
    ├── cfg_cityscape.py
    ├── cfg_eth3d_autoencoder.py
    ├── cfg_eth3d_fm.py
    ├── cfg_euroc_autoencoder.py
    ├── cfg_euroc_fm.py
    ├── cfg_folder.py
    ├── cfg_kitti_autoencoder.py
    ├── cfg_kitti_fm.py
    ├── cfg_kitti_fm_joint.py
    ├── cfg_kitti_fm_refine.py
    ├── cfg_make3d_fm.py
    └── cfg_odom_fm.py
├── mono
    ├── __init__.py
    ├── apis
    │   ├── __init__.py
    │   ├── env.py
    │   └── trainer.py
    ├── core
    │   ├── __init__.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── eval_hooks.py
    │   │   └── pixel_error.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── dist_utils.py
    │   │   └── misc.py
    ├── datasets
    │   ├── __init__.py
    │   ├── cityscape_dataset.py
    │   ├── eth3d_dataset.py
    │   ├── euroc_dataset.py
    │   ├── folder_dataset.py
    │   ├── get_dataset.py
    │   ├── gt_pose
    │   │   ├── 00.txt
    │   │   ├── 01.txt
    │   │   ├── 02.txt
    │   │   ├── 03.txt
    │   │   ├── 04.txt
    │   │   ├── 05.txt
    │   │   ├── 06.txt
    │   │   ├── 07.txt
    │   │   ├── 08.txt
    │   │   ├── 09.txt
    │   │   ├── 10.txt
    │   │   └── 12.txt
    │   ├── kitti_dataset.py
    │   ├── kitti_utils.py
    │   ├── loader
    │   │   ├── __init__.py
    │   │   ├── build_loader.py
    │   │   └── sampler.py
    │   ├── mono_dataset.py
    │   ├── splits
    │   │   ├── __init__.py
    │   │   ├── benchmark
    │   │   │   ├── eigen_to_benchmark_ids.npy
    │   │   │   ├── test_files.txt
    │   │   │   ├── train_files.txt
    │   │   │   └── val_files.txt
    │   │   ├── cityscape
    │   │   │   ├── gen_cityscape_split.py
    │   │   │   ├── test.txt
    │   │   │   ├── train.txt
    │   │   │   ├── train_files.txt
    │   │   │   ├── val.txt
    │   │   │   └── val_files.txt
    │   │   ├── eigen_benchmark
    │   │   │   └── test_files.txt
    │   │   ├── eigen_full
    │   │   │   ├── train_files.txt
    │   │   │   └── val_files.txt
    │   │   ├── exp
    │   │   │   ├── __init__.py
    │   │   │   ├── train_files.txt
    │   │   │   └── val_files.txt
    │   │   ├── kitti_archives_to_download.txt
    │   │   ├── kitti_shot_sequence
    │   │   │   ├── gen_split.py
    │   │   │   └── val_files.txt
    │   │   ├── odom
    │   │   │   ├── test_files_09.txt
    │   │   │   ├── test_files_10.txt
    │   │   │   ├── train_files.txt
    │   │   │   └── val_files.txt
    │   │   ├── short
    │   │   │   ├── __init__.py
    │   │   │   ├── train_files.txt
    │   │   │   └── val_files.txt
    │   │   └── test
    │   │   │   ├── train_files.txt
    │   │   │   └── val_files.txt
    │   └── utils.py
    ├── model
    │   ├── __init__.py
    │   ├── mono_autoencoder
    │   │   ├── __init__.py
    │   │   ├── decoder.py
    │   │   ├── encoder.py
    │   │   ├── layers.py
    │   │   ├── net.py
    │   │   └── resnet.py
    │   ├── mono_baseline
    │   │   ├── __init__.py
    │   │   ├── depth_decoder.py
    │   │   ├── depth_encoder.py
    │   │   ├── layers.py
    │   │   ├── net.py
    │   │   ├── pose_decoder.py
    │   │   ├── pose_encoder.py
    │   │   └── resnet.py
    │   ├── mono_fm
    │   │   ├── __init__.py
    │   │   ├── depth_decoder.py
    │   │   ├── depth_encoder.py
    │   │   ├── layers.py
    │   │   ├── net.py
    │   │   ├── pose_decoder.py
    │   │   ├── pose_encoder.py
    │   │   └── resnet.py
    │   ├── mono_fm_joint
    │   │   ├── __init__.py
    │   │   ├── decoder.py
    │   │   ├── depth_decoder.py
    │   │   ├── depth_encoder.py
    │   │   ├── encoder.py
    │   │   ├── layers.py
    │   │   ├── net.py
    │   │   ├── pose_decoder.py
    │   │   ├── pose_encoder.py
    │   │   └── resnet.py
    │   └── registry.py
    └── tools
    │   ├── __init__.py
    │   ├── file_interface.py
    │   ├── geometry.py
    │   ├── kitti_evaluation_toolkit.py
    │   ├── lie_algebra.py
    │   ├── pose_evaluation_utils.py
    │   ├── trajectory.py
    │   └── transformations.py
├── requirements.txt
├── run.py
├── scripts
    ├── __init__.py
    ├── draw_odometry.py
    ├── eval_depth.py
    ├── eval_depth_pp.py
    ├── eval_pose.py
    ├── infer.py
    └── infer_singleimage.py
└── train.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | .idea
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 sconly
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # feature_metric_depth
  2 | This is offical codes for the methods described in
  3 | > **Feature-metric Loss for Self-supervised Learning of Depth and Egomotion**
  4 | >
  5 | > [ECCV 2020](https://arxiv.org/pdf/2007.10603.pdf)
  6 | 
  7 | <p align="center">
  8 |   <img src="assets/p.png" alt="performance" width="600" />
  9 | </p>
 10 | 
 11 | If you find our work useful in your research please consider citing our paper:
 12 | 
 13 | ```
 14 | @inproceedings{shu2020featdepth,
 15 |   title={Feature-metric Loss for Self-supervised Learning of Depth and Egomotion},
 16 |   author={Shu, Chang and Yu, Kun and Duan, Zhixiang and Yang, Kuiyuan},
 17 |   booktitle={ECCV},
 18 |   year={2020}
 19 | }
 20 | ```
 21 | 
 22 | ## Setup
 23 | 
 24 | ### Requirements:
 25 | - PyTorch1.1+, Python3.5+, Cuda10.0+
 26 | - mmcv==0.4.4
 27 | 
 28 | Our codes are based on mmcv for distributed learning.
 29 | To make it convenient for you to train and test our codes, we provide our [anaconda environment](https://drive.google.com/file/d/1NSoGxhP8UpyW-whzpqP3WIB6u2mgGP49/view?usp=sharing), 
 30 | you only need to download it and extract it to the folder of your anaconda environments, and use the python in it to run our codes.
 31 | 
 32 | If you would like to set up your anaconda environment by yourself, you can do as follows:
 33 | ```bash
 34 | # first, make sure that your conda is setup properly with the right environment
 35 | # for that, check that `which conda`, `which pip` and `which python` points to the
 36 | # right path. From a clean conda env, this is what you need to do
 37 | 
 38 | conda create --name featdepth python=3.7
 39 | conda activate featdepth
 40 | 
 41 | # this installs the right pip and dependencies for the fresh python
 42 | conda install ipython
 43 | conda install pip
 44 | 
 45 | # install required packages from requirements.txt
 46 | pip install -r requirements.txt
 47 | ```
 48 | 
 49 | ## KITTI training data
 50 | 
 51 | Our training data is the same with other self-supervised monocular depth estimation methods, please refer to [monodepth2](https://github.com/nianticlabs/monodepth2) to prepare the training data.
 52 | 
 53 | ## pretrained weights
 54 | 
 55 | We provide weights for:  
 56 | (1) [AutoEncoder trained on the kitti raw data](https://drive.google.com/file/d/1ncAWUMvLq2ETMpG-7eI9qfILce_cPPfy/view?usp=sharing);  
 57 | (2) [FeatDepth trained on the kitti raw data](https://drive.google.com/file/d/1HlAubfuja5nBKpfNU3fQs-3m3Zaiu9RI/view?usp=sharing);  
 58 | (3) [FeatDepth finetuned on the test split of kitti raw data by using online refinement](https://drive.google.com/file/d/1CfCtz55s4QHya3y3UslxsuD_0cxNlA-D/view?usp=sharing);  
 59 | (4) [FeatDepth trained on kitti odometry](https://drive.google.com/file/d/1vQJbiyPXv_XNQYpyVocDB3-LKwx2LVka/view?usp=sharing);  
 60 | (5) [FeatDepth trained on Euroc](https://drive.google.com/file/d/1IMIAKpHXmqyUxiUIiqqp5qI-nJXDUSmj/view?usp=sharing);  
 61 | (6) [FeatDepth trained on NYU](https://drive.google.com/file/d/1Mo050P-DgG-jrNXWww07GXXyst5h5Q74/view?usp=sharing).
 62 | 
 63 | ## API
 64 | We provide an API interface for you to predict depth and pose from an image sequence and visulize some results.
 65 | They are stored in folder 'scripts'.
 66 | ```
 67 | draw_odometry.py is used to provide several analytical curves and obtain standard kitti odometry evaluation results.
 68 | ```
 69 | 
 70 | ```
 71 | eval_pose.py is used to obtain kitti odometry evaluation results.
 72 | ```
 73 | 
 74 | ```
 75 | eval_depth.py is used to obtain kitti depth evaluation results.
 76 | ```
 77 | 
 78 | ```
 79 | infer.py is used to generate depth maps from given models.
 80 | ```
 81 | 
 82 | ```
 83 | infer_singleimage.py is used to test a single image for view.
 84 | ```
 85 | ## Training
 86 | You can use following command to launch distributed learning of our model:
 87 | ```shell
 88 | /path/to/python -m torch.distributed.launch --master_port=9900 --nproc_per_node=1 train.py --config /path/to/cfg_kitti_fm.py --work_dir /dir/for/saving/weights/and/logs'
 89 | ```
 90 | Here nproc_per_node refers to GPU number you want to use.
 91 | 
 92 | ## Configurations
 93 | We provide a variety of config files for training on different datasets.
 94 | They are stored in config folder.
 95 | 
 96 | For example:  
 97 | (1) 'cfg_kitti_fm.py' is used to train our model on kitti dataset, where the weights of autoencoder are loaded from the pretrained weights we provide and fixed during the traing. 
 98 | This mode is prefered when your GPU memory is lower than 16 GB;  
 99 | (2) 'cfg_kitti_fm_joint.py' is used to train our model on kitti dataset, where the autoencoder is jointly trained with depthnet and posenet.
100 | We rescale the input resolution of our model to ensure training with 12 GB GPU memory, slightly reducing the performance.
101 | You can modify the input resolution according to your computational resource.
102 | 
103 | For modifying config files, please refer to cfg_kitti_fm.py.
104 | 
105 | ## Online refinement
106 | We provide cfg file for online refinement, you can use cfg_kitti_fm_refine.py to refine your model trained on kitti raw data by keeping training on test data.
107 | For settings of online refinement, please refer to details in cfg_kitti_fm_refine.py in the folder config.
108 | 
109 | ## Finetuning
110 | If you want to finetune on a given weights, you can modify the 'finetune' term from 'None' to an existing path to a pre-trained weight in the config files.
111 | 
112 | ## Resuming
113 | If you want to reproduce the training state of a certain pretrained weight, you can modify the 'resume_from' term from 'None' to an existing path to a pre-trained weight in the config files.
114 | The program will continue training from where the pretrained weight ends.
115 | Note that you have to increase the 'total_epochs' value to make sure that the training have enough epochs left to continue.
116 | 
117 | ## Notes
118 | Our model predicts inverse depths.
119 | If you want to get real depth when training stereo model, you have to convert inverse depth to depth, and then multiply it by 36.
120 | 


--------------------------------------------------------------------------------
/assets/p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/assets/p.png


--------------------------------------------------------------------------------
/assets/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/assets/test.png


--------------------------------------------------------------------------------
/assets/test_disp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/assets/test_disp.png


--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/config/__init__.py


--------------------------------------------------------------------------------
/config/cfg_cityscape.py:
--------------------------------------------------------------------------------
 1 | split = 'cityscape'
 2 | dataset = 'cityscape'
 3 | 
 4 | height = 384
 5 | width = 768
 6 | disparity_smoothness = 1e-3
 7 | scales = [0, 1, 2, 3, 4]
 8 | min_depth = 0.1
 9 | max_depth = 100.0
10 | frame_ids = [0, -1, 1]
11 | learning_rate = 1e-4
12 | 
13 | depth_num_layers = 50
14 | pose_num_layers = 50
15 | total_epochs = 45
16 | device_ids = range(8)
17 | 
18 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(depth_num_layers)
19 | pose_pretrained_path =  '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(pose_num_layers)
20 | 
21 | in_path = '/ssd/Cityscapes'
22 | gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz'
23 | checkpoint_path = '/node01_data5/monodepth2-test/model/refine/smallfigure.pth'
24 | 
25 | imgs_per_gpu = 2
26 | workers_per_gpu = 2
27 | 
28 | validate = False
29 | 
30 | png = True
31 | scale_invariant = False
32 | plane_fitting = False
33 | finetune = False
34 | perception = False
35 | focus_loss = False
36 | 
37 | scale_invariant_weight = 0.01
38 | plane_fitting_weight = 0.0001
39 | perceptional_weight = 0.001
40 | 
41 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
42 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
43 | # learning policy
44 | lr_config = dict(
45 |     policy='step',
46 |     warmup='linear',
47 |     warmup_iters=500,
48 |     warmup_ratio=1.0 / 3,
49 |     step=[15,25,35],
50 |     gamma=0.5,
51 | )
52 | 
53 | checkpoint_config = dict(interval=1)
54 | # yapf:disable
55 | log_config = dict(interval=50,
56 |                   hooks=[dict(type='TextLoggerHook'),])
57 | # yapf:enable
58 | # runtime settings
59 | dist_params = dict(backend='nccl')
60 | log_level = 'INFO'
61 | load_from = None
62 | resume_from = None
63 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/config/cfg_eth3d_autoencoder.py:
--------------------------------------------------------------------------------
 1 | DEPTH_LAYERS = 50
 2 | POSE_LAYERS = 18
 3 | FRAME_IDS = [0]
 4 | IMGS_PER_GPU = 3
 5 | HEIGHT = 448
 6 | WIDTH = 736
 7 | 
 8 | data = dict(
 9 |     name = 'eth3d',
10 |     split = 'exp',
11 |     height = HEIGHT,
12 |     width = WIDTH,
13 |     frame_ids = FRAME_IDS,
14 |     in_path = '/ssd/ETH3D/slam/cables_4',
15 |     gt_depth_path = None,
16 |     png = True,
17 |     stereo_scale = False,
18 | )
19 | 
20 | model = dict(
21 |     name = 'autoencoder',
22 |     depth_num_layers = DEPTH_LAYERS,
23 |     pose_num_layers = POSE_LAYERS,
24 |     frame_ids = FRAME_IDS,
25 |     imgs_per_gpu = IMGS_PER_GPU,
26 |     height = HEIGHT,
27 |     width = WIDTH,
28 |     scales = [0, 1, 2, 3],
29 |     min_depth = 0.1,
30 |     max_depth = 100.0,
31 |     depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
32 |     pose_pretrained_path =  '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
33 |     automask = True,
34 |     disp_norm = True,
35 |     use_min_construct = True,
36 |     dis=0.001,
37 |     cvt=0.001,
38 | )
39 | 
40 | 
41 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
42 | resume_from = None
43 | finetune = None
44 | total_epochs = 30
45 | imgs_per_gpu = IMGS_PER_GPU
46 | learning_rate = 1e-4
47 | workers_per_gpu = 4
48 | validate = False
49 | 
50 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
51 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
52 | lr_config = dict(
53 |     policy='step',
54 |     warmup='linear',
55 |     warmup_iters=500,
56 |     warmup_ratio=1.0 / 3,
57 |     step=[10,20],
58 |     gamma=0.5,
59 | )
60 | 
61 | checkpoint_config = dict(interval=1)
62 | log_config = dict(interval=50,
63 |                   hooks=[dict(type='TextLoggerHook'),])
64 | dist_params = dict(backend='nccl')
65 | log_level = 'INFO'
66 | load_from = None
67 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/config/cfg_eth3d_fm.py:
--------------------------------------------------------------------------------
 1 | DEPTH_LAYERS = 50
 2 | POSE_LAYERS = 18
 3 | FRAME_IDS = [0, -1, 1, 's']
 4 | IMGS_PER_GPU = 2
 5 | HEIGHT = 448
 6 | WIDTH = 736
 7 | 
 8 | data = dict(
 9 |     name = 'eth3d',
10 |     split = 'exp',
11 |     height = HEIGHT,
12 |     width = WIDTH,
13 |     frame_ids = FRAME_IDS,
14 |     in_path = '/ssd/ETH3D/slam/cables_4',
15 |     gt_depth_path = None,
16 |     png = True,
17 |     stereo_scale = True if 's' in FRAME_IDS else False,
18 | )
19 | 
20 | model = dict(
21 |     name = 'mono_fm',
22 |     depth_num_layers = DEPTH_LAYERS,
23 |     pose_num_layers = POSE_LAYERS,
24 |     frame_ids = FRAME_IDS,
25 |     imgs_per_gpu = IMGS_PER_GPU,
26 |     height = HEIGHT,
27 |     width = WIDTH,
28 |     scales = [0, 1, 2, 3],
29 |     min_depth = 0.1,
30 |     max_depth = 100.0,
31 |     depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
32 |     pose_pretrained_path =  '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
33 |     extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder_eth_1/epoch_30.pth',
34 |     automask = False if 's' in FRAME_IDS else True,
35 |     disp_norm = False if 's' in FRAME_IDS else True,
36 |     perception_weight = 0,
37 |     smoothness_weight = 1e-3,
38 | )
39 | 
40 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
41 | resume_from = None
42 | finetune = None
43 | total_epochs = 40
44 | imgs_per_gpu = IMGS_PER_GPU
45 | learning_rate = 1e-4
46 | workers_per_gpu = 4
47 | validate = False
48 | 
49 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
50 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
51 | lr_config = dict(
52 |     policy='step',
53 |     warmup='linear',
54 |     warmup_iters=500,
55 |     warmup_ratio=1.0 / 3,
56 |     step=[20,30],
57 |     gamma=0.5,
58 | )
59 | 
60 | checkpoint_config = dict(interval=1)
61 | log_config = dict(interval=5,
62 |                   hooks=[dict(type='TextLoggerHook'),])
63 | dist_params = dict(backend='nccl')
64 | log_level = 'INFO'
65 | load_from = None
66 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/config/cfg_euroc_autoencoder.py:
--------------------------------------------------------------------------------
 1 | DEPTH_LAYERS = 50
 2 | POSE_LAYERS = 18
 3 | FRAME_IDS = [0]
 4 | IMGS_PER_GPU = 3
 5 | HEIGHT = 480
 6 | WIDTH = 768
 7 | 
 8 | data = dict(
 9 |     name = 'euroc',
10 |     split = 'exp',
11 |     height = HEIGHT,
12 |     width = WIDTH,
13 |     frame_ids = FRAME_IDS,
14 |     in_path = '/ssd/EuRoc/MH_04_difficult',
15 |     gt_depth_path = None,
16 |     png = True,
17 |     stereo_scale = False,
18 | )
19 | 
20 | model = dict(
21 |     name = 'autoencoder',
22 |     depth_num_layers = DEPTH_LAYERS,
23 |     pose_num_layers = POSE_LAYERS,
24 |     frame_ids = FRAME_IDS,
25 |     imgs_per_gpu = IMGS_PER_GPU,
26 |     height = HEIGHT,
27 |     width = WIDTH,
28 |     scales = [0, 1, 2, 3],
29 |     min_depth = 0.1,
30 |     max_depth = 100.0,
31 |     depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
32 |     pose_pretrained_path =  '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
33 |     automask = True,
34 |     disp_norm = True,
35 |     use_min_construct = True,
36 |     dis=0.001,
37 |     cvt=0.001,
38 | )
39 | 
40 | 
41 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
42 | resume_from = None
43 | finetune = None
44 | total_epochs = 30
45 | imgs_per_gpu = IMGS_PER_GPU
46 | learning_rate = 1e-4
47 | workers_per_gpu = 4
48 | validate = False
49 | 
50 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
51 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
52 | lr_config = dict(
53 |     policy='step',
54 |     warmup='linear',
55 |     warmup_iters=500,
56 |     warmup_ratio=1.0 / 3,
57 |     step=[10,20],
58 |     gamma=0.5,
59 | )
60 | 
61 | checkpoint_config = dict(interval=1)
62 | log_config = dict(interval=50,
63 |                   hooks=[dict(type='TextLoggerHook'),])
64 | dist_params = dict(backend='nccl')
65 | log_level = 'INFO'
66 | load_from = None
67 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/config/cfg_euroc_fm.py:
--------------------------------------------------------------------------------
 1 | DEPTH_LAYERS = 50
 2 | POSE_LAYERS = 18
 3 | FRAME_IDS = [0, -1, 1, 's']
 4 | IMGS_PER_GPU = 2
 5 | HEIGHT = 480
 6 | WIDTH = 768
 7 | 
 8 | data = dict(
 9 |     name = 'euroc',
10 |     split = 'exp',
11 |     height = HEIGHT,
12 |     width = WIDTH,
13 |     frame_ids = FRAME_IDS,
14 |     in_path = '/ssd/EuRoc/MH_02_easy',#'/ssd/EuRoc/MH_02_easy','/ssd/EuRoc/MH_04_difficult'
15 |     gt_depth_path = None,
16 |     png = True,
17 |     stereo_scale = True if 's' in FRAME_IDS else False,
18 | )
19 | 
20 | model = dict(
21 |     name = 'mono_fm',
22 |     depth_num_layers = DEPTH_LAYERS,
23 |     pose_num_layers = POSE_LAYERS,
24 |     frame_ids = FRAME_IDS,
25 |     imgs_per_gpu = IMGS_PER_GPU,
26 |     height = HEIGHT,
27 |     width = WIDTH,
28 |     scales = [0, 1, 2, 3],
29 |     min_depth = 0.1,
30 |     max_depth = 50.0,
31 |     depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
32 |     pose_pretrained_path =  '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
33 |     extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder_euroc/epoch_30.pth',
34 |     automask = False,
35 |     disp_norm = False,
36 |     perception_weight = 1e-3,
37 |     smoothness_weight = 1e-3,
38 | )
39 | 
40 | # resume_from = '/node01/jobs/io/out/changshu/fm_euroc/epoch_40.pth'
41 | resume_from = None
42 | finetune = None
43 | total_epochs = 80
44 | imgs_per_gpu = IMGS_PER_GPU
45 | learning_rate = 1e-4
46 | workers_per_gpu = 4
47 | validate = False
48 | 
49 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
50 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
51 | lr_config = dict(
52 |     policy='step',
53 |     warmup='linear',
54 |     warmup_iters=500,
55 |     warmup_ratio=1.0 / 3,
56 |     step=[20,30],
57 |     gamma=0.5,
58 | )
59 | 
60 | checkpoint_config = dict(interval=1)
61 | log_config = dict(interval=5,
62 |                   hooks=[dict(type='TextLoggerHook'),])
63 | dist_params = dict(backend='nccl')
64 | log_level = 'INFO'
65 | load_from = None
66 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/config/cfg_folder.py:
--------------------------------------------------------------------------------
 1 | split = 'exp'
 2 | dataset = 'folder'
 3 | height = 320
 4 | width = 640
 5 | disparity_smoothness = 1e-3
 6 | scales = [0, 1, 2, 3, 4]
 7 | min_depth = 0.1
 8 | max_depth = 100.0
 9 | frame_ids = [0, -1, 1]
10 | learning_rate = 1e-4
11 | depth_num_layers = 50
12 | pose_num_layers = 50
13 | total_epochs = 45
14 | device_ids = range(8)
15 | 
16 | depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(depth_num_layers)
17 | pose_pretrained_path =  '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(pose_num_layers)
18 | 
19 | in_path = '/ssd/avp/soho_garage3/keyframe_underground'
20 | gt_depth_path = ''
21 | checkpoint_path = '/node01_data5/monodepth2-test/model/refine/smallfigure.pth'
22 | 
23 | imgs_per_gpu = 2
24 | workers_per_gpu = 4
25 | 
26 | validate = False
27 | 
28 | png = False
29 | scale_invariant = False
30 | plane_fitting = False
31 | finetune = False
32 | perception = False
33 | focus_loss = False
34 | 
35 | scale_invariant_weight = 0.01
36 | plane_fitting_weight = 0.0001
37 | perceptional_weight = 0.001
38 | 
39 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
40 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
41 | # learning policy
42 | lr_config = dict(
43 |     policy='step',
44 |     warmup='linear',
45 |     warmup_iters=500,
46 |     warmup_ratio=1.0 / 3,
47 |     step=[15,25,35],
48 |     gamma=0.5,
49 |     )
50 | 
51 | checkpoint_config = dict(interval=1)
52 | # yapf:disable
53 | log_config = dict(interval=50,
54 |                   hooks=[dict(type='TextLoggerHook'),])
55 | # yapf:enable
56 | # runtime settings
57 | dist_params = dict(backend='nccl')
58 | log_level = 'INFO'
59 | load_from = None
60 | resume_from = None
61 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/config/cfg_kitti_autoencoder.py:
--------------------------------------------------------------------------------
 1 | DEPTH_LAYERS = 50
 2 | POSE_LAYERS = 18
 3 | FRAME_IDS = [0]
 4 | IMGS_PER_GPU = 5
 5 | HEIGHT = 256
 6 | WIDTH = 800
 7 | 
 8 | data = dict(
 9 |     name = 'kitti',
10 |     split = 'exp',
11 |     height = HEIGHT,
12 |     width = WIDTH,
13 |     frame_ids = FRAME_IDS,
14 |     in_path = '/node01_data5/kitti_raw',
15 |     gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz',
16 |     png = False,
17 |     stereo_scale = False,
18 | )
19 | 
20 | model = dict(
21 |     name = 'autoencoder',
22 |     depth_num_layers = DEPTH_LAYERS,
23 |     pose_num_layers = POSE_LAYERS,
24 |     frame_ids = FRAME_IDS,
25 |     imgs_per_gpu = IMGS_PER_GPU,
26 |     height = HEIGHT,
27 |     width = WIDTH,
28 |     scales = [0, 1, 2, 3],
29 |     min_depth = 0.1,
30 |     max_depth = 100.0,
31 |     depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
32 |     pose_pretrained_path =  '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
33 |     automask = True,
34 |     disp_norm = True,
35 |     use_min_construct = True,
36 |     dis=0.001,
37 |     cvt=0.001,
38 | )
39 | 
40 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
41 | resume_from = None
42 | finetune = None
43 | total_epochs = 30
44 | imgs_per_gpu = IMGS_PER_GPU
45 | learning_rate = 1e-4
46 | workers_per_gpu = 4
47 | validate = False
48 | 
49 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
50 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
51 | lr_config = dict(
52 |     policy='step',
53 |     warmup='linear',
54 |     warmup_iters=500,
55 |     warmup_ratio=1.0 / 3,
56 |     step=[10,20],
57 |     gamma=0.5,
58 | )
59 | 
60 | checkpoint_config = dict(interval=1)
61 | log_config = dict(interval=50,
62 |                   hooks=[dict(type='TextLoggerHook'),])
63 | dist_params = dict(backend='nccl')
64 | log_level = 'INFO'
65 | load_from = None
66 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/config/cfg_kitti_fm.py:
--------------------------------------------------------------------------------
 1 | DEPTH_LAYERS = 50#resnet50
 2 | POSE_LAYERS = 18#resnet18
 3 | FRAME_IDS = [0, -1, 1, 's']#0 refers to current frame, -1 and 1 refer to temperally adjacent frames, 's' refers to stereo adjacent frame.
 4 | IMGS_PER_GPU = 2 #the number of images fed to each GPU
 5 | HEIGHT = 320#input image height
 6 | WIDTH = 1024#input image width
 7 | 
 8 | data = dict(
 9 |     name = 'kitti',#dataset name
10 |     split = 'exp',#training split name
11 |     height = HEIGHT,
12 |     width = WIDTH,
13 |     frame_ids = FRAME_IDS,
14 |     in_path = '/media/sconly/harddisk/data/kitti/kitti_raw/rawdata',#path to raw data
15 |     gt_depth_path = '/media/sconly/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz',#path to gt data
16 |     png = False,#image format
17 |     stereo_scale = True if 's' in FRAME_IDS else False,
18 | )
19 | 
20 | model = dict(
21 |     name = 'mono_fm',# select a model by name
22 |     depth_num_layers = DEPTH_LAYERS,
23 |     pose_num_layers = POSE_LAYERS,
24 |     frame_ids = FRAME_IDS,
25 |     imgs_per_gpu = IMGS_PER_GPU,
26 |     height = HEIGHT,
27 |     width = WIDTH,
28 |     scales = [0, 1, 2, 3],# output different scales of depth maps
29 |     min_depth = 0.1, # minimum of predicted depth value
30 |     max_depth = 100.0, # maximum of predicted depth value
31 |     depth_pretrained_path = '/media/sconly/harddisk/weight/resnet/resnet{}.pth'.format(DEPTH_LAYERS),# pretrained weights for resnet
32 |     pose_pretrained_path =  '/media/sconly/harddisk/weight/resnet/resnet{}.pth'.format(POSE_LAYERS),# pretrained weights for resnet
33 |     extractor_pretrained_path = '/media/sconly/harddisk/weight/autoencoder.pth',# pretrained weights for autoencoder
34 |     automask = False if 's' in FRAME_IDS else True,
35 |     disp_norm = False if 's' in FRAME_IDS else True,
36 |     perception_weight = 1e-3,
37 |     smoothness_weight = 1e-3,
38 | )
39 | 
40 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'#directly start training from provide weights
41 | resume_from = None
42 | finetune = None
43 | total_epochs = 40
44 | imgs_per_gpu = IMGS_PER_GPU
45 | learning_rate = 1e-4
46 | workers_per_gpu = 4
47 | validate = True
48 | 
49 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
50 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
51 | lr_config = dict(
52 |     policy='step',
53 |     warmup='linear',
54 |     warmup_iters=500,
55 |     warmup_ratio=1.0 / 3,
56 |     step=[20,30],
57 |     gamma=0.5,
58 | )
59 | 
60 | checkpoint_config = dict(interval=1)
61 | log_config = dict(interval=50,
62 |                   hooks=[dict(type='TextLoggerHook'),])
63 | dist_params = dict(backend='nccl')
64 | log_level = 'INFO'
65 | load_from = None
66 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/config/cfg_kitti_fm_joint.py:
--------------------------------------------------------------------------------
 1 | DEPTH_LAYERS = 50
 2 | POSE_LAYERS = 18
 3 | FRAME_IDS = [0, -1, 1, 's']
 4 | IMGS_PER_GPU = 2
 5 | HEIGHT = 192#320
 6 | WIDTH = 640#1024
 7 | 
 8 | data = dict(
 9 |     name = 'kitti',
10 |     split = 'exp',
11 |     height = HEIGHT,
12 |     width = WIDTH,
13 |     frame_ids = FRAME_IDS,
14 |     in_path = '/media/user/harddisk/data/kitti/kitti_raw/rawdata',
15 |     gt_depth_path = '/media/user/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz',
16 |     png = False,
17 |     stereo_scale = True if 's' in FRAME_IDS else False,
18 | )
19 | 
20 | model = dict(
21 |     name = 'mono_fm_joint',
22 |     depth_num_layers = DEPTH_LAYERS,
23 |     pose_num_layers = POSE_LAYERS,
24 |     frame_ids = FRAME_IDS,
25 |     imgs_per_gpu = IMGS_PER_GPU,
26 |     height = HEIGHT,
27 |     width = WIDTH,
28 |     scales = [0, 1, 2, 3],
29 |     min_depth = 0.1,
30 |     max_depth = 100.0,
31 |     depth_pretrained_path = '/media/user/harddisk/weight/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
32 |     pose_pretrained_path =  '/media/user/harddisk/weight/resnet/resnet{}.pth'.format(POSE_LAYERS),
33 |     extractor_pretrained_path = '/media/user/harddisk/weight/autoencoder.pth',
34 |     automask = False if 's' in FRAME_IDS else True,
35 |     disp_norm = False if 's' in FRAME_IDS else True,
36 |     dis=1e-3,
37 |     cvt=1e-3,
38 |     perception_weight = 1e-3,
39 |     smoothness_weight = 1e-3,
40 | )
41 | 
42 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
43 | resume_from = None
44 | finetune = None
45 | total_epochs = 40
46 | imgs_per_gpu = IMGS_PER_GPU
47 | learning_rate = 1e-4
48 | workers_per_gpu = 4
49 | validate = True
50 | 
51 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
52 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
53 | lr_config = dict(
54 |     policy='step',
55 |     warmup='linear',
56 |     warmup_iters=500,
57 |     warmup_ratio=1.0 / 3,
58 |     step=[20,30],
59 |     gamma=0.5,
60 | )
61 | 
62 | checkpoint_config = dict(interval=1)
63 | log_config = dict(interval=50,
64 |                   hooks=[dict(type='TextLoggerHook'),])
65 | dist_params = dict(backend='nccl')
66 | log_level = 'INFO'
67 | load_from = None
68 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/config/cfg_kitti_fm_refine.py:
--------------------------------------------------------------------------------
 1 | DEPTH_LAYERS = 50
 2 | POSE_LAYERS = 18
 3 | FRAME_IDS = [0, -1, 1, 's']
 4 | IMGS_PER_GPU = 2
 5 | HEIGHT = 320
 6 | WIDTH = 1024
 7 | 
 8 | data = dict(
 9 |     name = 'kitti',
10 |     split = 'test',#the split contains the list of testing data
11 |     height = HEIGHT,
12 |     width = WIDTH,
13 |     frame_ids = FRAME_IDS,
14 |     in_path = '/node01_data5/kitti_raw',#path to kitti raw data
15 |     gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz',#path to kitti depth ground truth
16 |     png = False,
17 |     stereo_scale=True if 's' in FRAME_IDS else False,
18 | )
19 | 
20 | model = dict(
21 |     name = 'mono_fm',
22 |     depth_num_layers = DEPTH_LAYERS,
23 |     pose_num_layers = POSE_LAYERS,
24 |     frame_ids = FRAME_IDS,
25 |     imgs_per_gpu = IMGS_PER_GPU,
26 |     height = HEIGHT,
27 |     width = WIDTH,
28 |     scales = [0, 1, 2, 3],
29 |     min_depth = 0.1,
30 |     max_depth = 100.0,
31 |     depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),#path to pre-trained resnet weights
32 |     pose_pretrained_path =  '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),#path to pre-trained resnet weights
33 |     extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder3/epoch_30.pth',
34 |     automask=False if 's' in FRAME_IDS else True,
35 |     disp_norm=False if 's' in FRAME_IDS else True,
36 |     perception_weight=1e-3,
37 |     smoothness_weight=1e-3,
38 | )
39 | 
40 | #path to the weights trained on the kitti raw data training split
41 | resume_from = '/node01_data5/monodepth2-test/model/wow_320_1024/epoch_40.pth'#we will resume from current epoch for further online refinement
42 | total_epochs = 60# this value must be bigger than the epochs of the weight you resume from
43 | #for example, you have trained 40 epoches on kitti raw data, and use this weight for resuming.
44 | #When resuming, the program will start from epoch 41 and finish the rest of epoches (total_epochs - 40)
45 | imgs_per_gpu = IMGS_PER_GPU
46 | learning_rate = 1e-4
47 | workers_per_gpu = 4
48 | validate = True
49 | 
50 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
51 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
52 | lr_config = dict(
53 |     policy='step',
54 |     warmup='linear',
55 |     warmup_iters=500,
56 |     warmup_ratio=1.0 / 3,
57 |     step=[50],
58 |     gamma=0.5,
59 | )
60 | 
61 | checkpoint_config = dict(interval=1)
62 | log_config = dict(interval=5,
63 |                   hooks=[dict(type='TextLoggerHook'),])
64 | dist_params = dict(backend='nccl')
65 | log_level = 'INFO'
66 | load_from = None
67 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/config/cfg_make3d_fm.py:
--------------------------------------------------------------------------------
 1 | DEPTH_LAYERS = 50
 2 | POSE_LAYERS = 18
 3 | FRAME_IDS = [0, -1, 1, 's']
 4 | IMGS_PER_GPU = 2
 5 | HEIGHT = 320
 6 | WIDTH = 1024
 7 | data = dict(
 8 |     name = 'folder',
 9 |     split = 'exp',
10 |     height = HEIGHT,
11 |     width = WIDTH,
12 |     frame_ids = FRAME_IDS,
13 |     in_path = '/node01_data5/monodepth2-test/make3d',
14 |     gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz',
15 |     png = False,
16 |     stereo_scale = True if 's' in FRAME_IDS else False,
17 | )
18 | 
19 | model = dict(
20 |     name = 'mono_fm',
21 |     depth_num_layers = DEPTH_LAYERS,
22 |     pose_num_layers = POSE_LAYERS,
23 |     frame_ids = FRAME_IDS,
24 |     imgs_per_gpu = IMGS_PER_GPU,
25 |     height = HEIGHT,
26 |     width = WIDTH,
27 |     scales = [0, 1, 2, 3],
28 |     min_depth = 0.1,
29 |     max_depth = 100.0,
30 |     depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
31 |     pose_pretrained_path =  '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
32 |     extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder3/epoch_30.pth',
33 |     automask = False if 's' in FRAME_IDS else True,
34 |     disp_norm = False if 's' in FRAME_IDS else True,
35 |     perception_weight = 1e-3,
36 |     smoothness_weight = 1e-3,
37 | )
38 | 
39 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
40 | resume_from = None
41 | finetune = None
42 | total_epochs = 40
43 | imgs_per_gpu = IMGS_PER_GPU
44 | learning_rate = 1e-4
45 | workers_per_gpu = 4
46 | validate = True
47 | 
48 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
49 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
50 | lr_config = dict(
51 |     policy='step',
52 |     warmup='linear',
53 |     warmup_iters=500,
54 |     warmup_ratio=1.0 / 3,
55 |     step=[20,30],
56 |     gamma=0.5,
57 | )
58 | 
59 | checkpoint_config = dict(interval=1)
60 | log_config = dict(interval=50,
61 |                   hooks=[dict(type='TextLoggerHook'),])
62 | dist_params = dict(backend='nccl')
63 | log_level = 'INFO'
64 | load_from = None
65 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/config/cfg_odom_fm.py:
--------------------------------------------------------------------------------
 1 | DEPTH_LAYERS = 50
 2 | POSE_LAYERS = 18
 3 | FRAME_IDS = [0, 1, -1, 's']
 4 | IMGS_PER_GPU = 2
 5 | HEIGHT = 320
 6 | WIDTH = 1024
 7 | 
 8 | 
 9 | data = dict(
10 |     name = 'kitti_odom',
11 |     split = 'odom',
12 |     height = HEIGHT,
13 |     width = WIDTH,
14 |     frame_ids = FRAME_IDS,
15 |     in_path = '/node01/odo/dataset',
16 |     gt_depth_path = '/node01_data5/monodepth2-test/monodepth2/gt_depths.npz',
17 |     png = True,
18 |     stereo_scale = True if 's' in FRAME_IDS else False,
19 | )
20 | 
21 | model = dict(
22 |     name = 'mono_fm',
23 |     depth_num_layers = DEPTH_LAYERS,
24 |     pose_num_layers = POSE_LAYERS,
25 |     frame_ids = FRAME_IDS,
26 |     imgs_per_gpu = IMGS_PER_GPU,
27 |     height = HEIGHT,
28 |     width = WIDTH,
29 |     scales = [0, 1, 2, 3],
30 |     min_depth = 0.1,
31 |     max_depth = 100.0,
32 |     depth_pretrained_path = '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(DEPTH_LAYERS),
33 |     pose_pretrained_path =  '/node01/jobs/io/pretrained/checkpoints/resnet/resnet{}.pth'.format(POSE_LAYERS),
34 |     extractor_pretrained_path = '/node01/jobs/io/out/changshu/autoencoder3/epoch_30.pth',
35 |     automask = False if 's' in FRAME_IDS else True,
36 |     disp_norm = False if 's' in FRAME_IDS else True,
37 |     perception_weight=1e-3,
38 |     smoothness_weight=1e-3,
39 | )
40 | 
41 | # resume_from = '/node01_data5/monodepth2-test/model/ms/ms.pth'
42 | resume_from = None
43 | finetune = None
44 | total_epochs = 40
45 | imgs_per_gpu = IMGS_PER_GPU
46 | learning_rate = 1e-4
47 | workers_per_gpu = 4
48 | validate = False
49 | 
50 | optimizer = dict(type='Adam', lr=learning_rate, weight_decay=0)
51 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
52 | lr_config = dict(
53 |     policy='step',
54 |     warmup='linear',
55 |     warmup_iters=500,
56 |     warmup_ratio=1.0 / 3,
57 |     step=[25, 30],
58 |     gamma=0.5,
59 | )
60 | 
61 | checkpoint_config = dict(interval=1)
62 | log_config = dict(interval=50,
63 |                   hooks=[dict(type='TextLoggerHook'),])
64 | dist_params = dict(backend='nccl')
65 | log_level = 'INFO'
66 | load_from = None
67 | workflow = [('train', 1)]


--------------------------------------------------------------------------------
/mono/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/__init__.py


--------------------------------------------------------------------------------
/mono/apis/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python 
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 | 
5 | from .trainer import train_mono
6 | from .env import init_dist, get_root_logger, set_random_seed


--------------------------------------------------------------------------------
/mono/apis/env.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python 
 2 | # -*- coding:utf-8 -*-
 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
 4 | 
 5 | import logging
 6 | import os
 7 | import random
 8 | import subprocess
 9 | 
10 | import numpy as np
11 | import torch
12 | import torch.distributed as dist
13 | import torch.multiprocessing as mp
14 | from mmcv.runner import get_dist_info
15 | 
16 | 
17 | def init_dist(launcher, backend='nccl', **kwargs):
18 |     if mp.get_start_method(allow_none=True) is None:
19 |         mp.set_start_method('spawn')
20 |     if launcher == 'pytorch':
21 |         _init_dist_pytorch(backend, **kwargs)
22 |     elif launcher == 'mpi':
23 |         _init_dist_mpi(backend, **kwargs)
24 |     elif launcher == 'slurm':
25 |         _init_dist_slurm(backend, **kwargs)
26 |     else:
27 |         raise ValueError('Invalid launcher type: {}'.format(launcher))
28 | 
29 | 
30 | def _init_dist_pytorch(backend, **kwargs):
31 |     # TODO: use local_rank instead of rank % num_gpus
32 |     rank = int(os.environ['RANK'])
33 |     num_gpus = torch.cuda.device_count()
34 |     torch.cuda.set_device(rank % num_gpus)
35 |     dist.init_process_group(backend=backend, **kwargs)
36 | 
37 | 
38 | def _init_dist_mpi(backend, **kwargs):
39 |     raise NotImplementedError
40 | 
41 | 
42 | def _init_dist_slurm(backend, port=29500, **kwargs):
43 |     proc_id = int(os.environ['SLURM_PROCID'])
44 |     ntasks = int(os.environ['SLURM_NTASKS'])
45 |     node_list = os.environ['SLURM_NODELIST']
46 |     num_gpus = torch.cuda.device_count()
47 |     torch.cuda.set_device(proc_id % num_gpus)
48 |     addr = subprocess.getoutput(
49 |         'scontrol show hostname {} | head -n1'.format(node_list))
50 |     os.environ['MASTER_PORT'] = str(port)
51 |     os.environ['MASTER_ADDR'] = addr
52 |     os.environ['WORLD_SIZE'] = str(ntasks)
53 |     os.environ['RANK'] = str(proc_id)
54 |     dist.init_process_group(backend=backend)
55 | 
56 | 
57 | def set_random_seed(seed):
58 |     random.seed(seed)
59 |     np.random.seed(seed)
60 |     torch.manual_seed(seed)
61 |     torch.cuda.manual_seed_all(seed)
62 | 
63 | 
64 | def get_root_logger(log_level=logging.INFO):
65 |     logger = logging.getLogger()
66 |     if not logger.hasHandlers():
67 |         logging.basicConfig(
68 |             format='%(asctime)s - %(levelname)s - %(message)s',
69 |             level=log_level)
70 |     rank, _ = get_dist_info()
71 |     if rank != 0:
72 |         logger.setLevel('ERROR')
73 |     return logger
74 | 


--------------------------------------------------------------------------------
/mono/core/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python 
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 | 
5 | from .evaluation import NonDistEvalHook, DistEvalMonoHook
6 | from .utils import DistOptimizerHook


--------------------------------------------------------------------------------
/mono/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python 
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 | 
5 | from .eval_hooks import NonDistEvalHook, DistEvalMonoHook


--------------------------------------------------------------------------------
/mono/core/evaluation/pixel_error.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python 
 2 | # -*- coding:utf-8 -*-
 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
 4 | 
 5 | import numpy as np
 6 | 
 7 | class AverageMeter(object):
 8 |     """
 9 |     Computes and stores the average and current value
10 |     """
11 |     def __init__(self):
12 |         self.reset()
13 | 
14 |     def reset(self):
15 |         self.val=0
16 |         self.avg=0
17 |         self.sum=0
18 |         self.count=0
19 | 
20 |     def update(self, val, n=1):
21 |         self.val=val
22 |         self.sum+=val*n
23 |         self.count+=n
24 |         self.avg=self.sum/self.count
25 | 
26 | 
27 | def compute_errors(gt, pred):
28 |     """Computation of error metrics between predicted and ground truth depths
29 |     """
30 |     thresh = np.maximum((gt / pred), (pred / gt))
31 |     a1 = (thresh < 1.25     ).mean()
32 |     a2 = (thresh < 1.25 ** 2).mean()
33 |     a3 = (thresh < 1.25 ** 3).mean()
34 |     rmse = (gt - pred) ** 2
35 |     rmse = np.sqrt(rmse.mean())
36 |     rmse_log = (np.log(gt) - np.log(pred)) ** 2
37 |     rmse_log = np.sqrt(rmse_log.mean())
38 |     abs_rel = np.mean(np.abs(gt - pred) / gt)
39 |     sq_rel = np.mean(((gt - pred) ** 2) / gt)
40 |     return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
41 | 
42 | 
43 | def disp_to_depth(disp, min_depth = 0.1, max_depth=100):
44 |     min_disp = 1 / max_depth #0.01
45 |     max_disp = 1 / min_depth #10
46 |     scaled_disp = min_disp + (max_disp - min_disp) * disp #(10-0.01)*disp+0.01
47 |     depth = 1 / scaled_disp
48 |     return scaled_disp, depth
49 | 
50 | 


--------------------------------------------------------------------------------
/mono/core/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python 
 2 | # -*- coding:utf-8 -*-
 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
 4 | 
 5 | from .dist_utils import allreduce_grads, DistOptimizerHook
 6 | from .misc import tensor2imgs, unmap, multi_apply
 7 | 
 8 | __all__ = [
 9 |     'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
10 |     'multi_apply'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mono/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python 
 2 | # -*- coding:utf-8 -*-
 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
 4 | from collections import OrderedDict
 5 | 
 6 | import torch.distributed as dist
 7 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors,
 8 |                           _take_tensors)
 9 | from mmcv.runner import OptimizerHook
10 | 
11 | 
12 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
13 |     if bucket_size_mb > 0:
14 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
15 |         buckets = _take_tensors(tensors, bucket_size_bytes)
16 |     else:
17 |         buckets = OrderedDict()
18 |         for tensor in tensors:
19 |             tp = tensor.type()
20 |             if tp not in buckets:
21 |                 buckets[tp] = []
22 |             buckets[tp].append(tensor)
23 |         buckets = buckets.values()
24 | 
25 |     for bucket in buckets:
26 |         flat_tensors = _flatten_dense_tensors(bucket)
27 |         dist.all_reduce(flat_tensors)
28 |         flat_tensors.div_(world_size)
29 |         for tensor, synced in zip(
30 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
31 |             tensor.copy_(synced)
32 | 
33 | 
34 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
35 |     grads = [
36 |         param.grad.data for param in model.parameters()
37 |         if param.requires_grad and param.grad is not None
38 |         ]
39 |     world_size = dist.get_world_size()
40 |     if coalesce:
41 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
42 |     else:
43 |         for tensor in grads:
44 |             dist.all_reduce(tensor.div_(world_size))
45 | 
46 | 
47 | class DistOptimizerHook(OptimizerHook):
48 | 
49 |     def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
50 |         self.grad_clip = grad_clip
51 |         self.coalesce = coalesce
52 |         self.bucket_size_mb = bucket_size_mb
53 | 
54 |     def after_train_iter(self, runner):
55 |         runner.optimizer.zero_grad()
56 |         runner.outputs['loss'].backward()
57 |         allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb)
58 |         if self.grad_clip is not None:
59 |             self.clip_grads(runner.model.parameters())
60 |         runner.optimizer.step()
61 | 


--------------------------------------------------------------------------------
/mono/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python 
 2 | # -*- coding:utf-8 -*-
 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
 4 | 
 5 | from functools import partial
 6 | 
 7 | import mmcv
 8 | import numpy as np
 9 | from six.moves import map, zip
10 | 
11 | 
12 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
13 |     num_imgs = tensor.size(0)
14 |     mean = np.array(mean, dtype=np.float32)
15 |     std = np.array(std, dtype=np.float32)
16 |     imgs = []
17 |     for img_id in range(num_imgs):
18 |         img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
19 |         img = mmcv.imdenormalize(
20 |             img, mean, std, to_bgr=to_rgb).astype(np.uint8)
21 |         imgs.append(np.ascontiguousarray(img))
22 |     return imgs
23 | 
24 | 
25 | def multi_apply(func, *args, **kwargs):
26 |     pfunc = partial(func, **kwargs) if kwargs else func
27 |     map_results = map(pfunc, *args)
28 |     return tuple(map(list, zip(*map_results)))
29 | 
30 | 
31 | def unmap(data, count, inds, fill=0):
32 |     """ Unmap a subset of item (data) back to the original set of items (of
33 |     size count) """
34 |     if data.dim() == 1:
35 |         ret = data.new_full((count, ), fill)
36 |         ret[inds] = data
37 |     else:
38 |         new_size = (count, ) + data.size()[1:]
39 |         ret = data.new_full(new_size, fill)
40 |         ret[inds, :] = data
41 |     return ret
42 | 


--------------------------------------------------------------------------------
/mono/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .loader import build_dataloader


--------------------------------------------------------------------------------
/mono/datasets/eth3d_dataset.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | import random
  3 | import numpy as np
  4 | from PIL import Image  # using pillow-simd for increased speed
  5 | import os
  6 | 
  7 | import torch
  8 | import torch.utils.data as data
  9 | from torchvision import transforms
 10 | 
 11 | 
 12 | def pil_loader(filename):
 13 |     # open path as file to avoid ResourceWarning
 14 |     # (https://github.com/python-pillow/Pillow/issues/835)
 15 |     with open(filename, 'rb') as f:
 16 |         with Image.open(f) as img:
 17 |             return img.convert('RGB')
 18 | 
 19 | 
 20 | class FolderDataset(data.Dataset):
 21 |     """Superclass for monocular dataloaders
 22 | 
 23 |     Args:
 24 |         data_path
 25 |         filenames
 26 |         height
 27 |         width
 28 |         frame_idxs
 29 |         num_scales
 30 |         is_train
 31 |         img_ext
 32 |     """
 33 |     def __init__(self,
 34 |                  data_path,
 35 |                  filenames,
 36 |                  height,
 37 |                  width,
 38 |                  frame_idxs,
 39 |                  is_train=False,
 40 |                  img_ext='.jpg',
 41 |                  gt_depth_path = None):
 42 |         super(FolderDataset, self).__init__()
 43 | 
 44 |         self.data_path = data_path
 45 |         self.filenames = sorted(os.listdir(os.path.join(data_path, 'rgb')))[1:-2]
 46 |         self.height = height
 47 |         self.width = width
 48 |         self.interp = Image.ANTIALIAS
 49 |         self.is_train = is_train
 50 |         self.frame_idxs = frame_idxs
 51 |         self.loader = pil_loader
 52 |         self.to_tensor = transforms.ToTensor()
 53 |         #726.28741455078 726.28741455078 354.6496887207 186.46566772461
 54 |         #w=739,h=458
 55 |         self.K = np.array([[0.9832, 0, 0.5, 0],
 56 |                            [0, 1.58578, 0.5, 0],
 57 |                            [0, 0, 1, 0],
 58 |                            [0, 0, 0, 1]], dtype=np.float32)
 59 | 
 60 |         # Need to specify augmentations differently in pytorch 1.0 compared with 0.4
 61 |         if int(torch.__version__.split('.')[0]) > 0:
 62 |             self.brightness = (0.8, 1.2)
 63 |             self.contrast = (0.8, 1.2)
 64 |             self.saturation = (0.8, 1.2)
 65 |             self.hue = (-0.1, 0.1)
 66 |         else:
 67 |             self.brightness = 0.2
 68 |             self.contrast = 0.2
 69 |             self.saturation = 0.2
 70 |             self.hue = 0.1
 71 | 
 72 |         self.resize = transforms.Resize((self.height, self.width), interpolation=self.interp)
 73 | 
 74 |         self.flag = np.zeros(self.__len__(), dtype=np.int64)
 75 | 
 76 |     def preprocess(self, inputs, color_aug):
 77 |         """Resize colour images to the required scales and augment if required
 78 | 
 79 |         We create the color_aug object in advance and apply the same augmentation to all
 80 |         images in this item. This ensures that all images input to the pose network receive the
 81 |         same augmentation.
 82 |         """
 83 |         for k in list(inputs):
 84 |             if "color" in k:
 85 |                 n, im, i = k
 86 |                 inputs[(n, im, 0)] = self.resize(inputs[(n, im, - 1)])
 87 | 
 88 |         for k in list(inputs):
 89 |             if "color" in k:
 90 |                 f = inputs[k]
 91 |                 n, im, i = k
 92 |                 inputs[(n, im, i)] = self.to_tensor(f)
 93 |                 if i == 0:
 94 |                     inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f))
 95 | 
 96 |     def __len__(self):
 97 |         return len(self.filenames)-2
 98 | 
 99 |     def __getitem__(self, index):
100 |         """Returns a single training item from the dataset as a dictionary.
101 | 
102 |         Values correspond to torch tensors.
103 |         Keys in the dictionary are either strings or tuples:
104 | 
105 |             ("color", <frame_id>, <scale>)          for raw colour images,
106 |             ("color_aug", <frame_id>, <scale>)      for augmented colour images,
107 |             ("K", scale) or ("inv_K", scale)        for camera intrinsics,
108 |             "stereo_T"                              for camera extrinsics, and
109 |             "depth_gt"                              for ground truth depth maps.
110 | 
111 |         <frame_id> is either:
112 |             an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index',
113 |         or
114 |             "s" for the opposite image in the stereo pair.
115 | 
116 |         <scale> is an integer representing the scale of the image relative to the fullsize image:
117 |             -1      images at native resolution as loaded from disk
118 |             0       images resized to (self.width,      self.height     )
119 |             1       images resized to (self.width // 2, self.height // 2)
120 |             2       images resized to (self.width // 4, self.height // 4)
121 |             3       images resized to (self.width // 8, self.height // 8)
122 |         """
123 |         inputs = {}
124 | 
125 |         index = index+1
126 | 
127 |         do_color_aug = self.is_train and random.random() > 0.5
128 |         do_flip = self.is_train and random.random() > 0.5
129 | 
130 |         for i in self.frame_idxs:
131 |             if i=='s':
132 |                 filename = os.path.join('rgb2', self.filenames[index])
133 |             else:
134 |                 filename = os.path.join('rgb', self.filenames[index+i])
135 | 
136 |             inputs[("color", i, -1)] = self.get_color(filename, do_flip)
137 | 
138 |         # adjusting intrinsics to match each scale in the pyramid
139 |         K = self.K.copy()
140 |         K[0, :] *= self.width
141 |         K[1, :] *= self.height
142 |         inv_K = np.linalg.pinv(K)
143 | 
144 |         inputs[("K")] = torch.from_numpy(K)
145 |         inputs[("inv_K")] = torch.from_numpy(inv_K)
146 | 
147 |         if do_color_aug:
148 |             color_aug = transforms.ColorJitter.get_params(self.brightness, self.contrast, self.saturation, self.hue)
149 |         else:
150 |             color_aug = (lambda x: x)
151 | 
152 |         self.preprocess(inputs, color_aug)
153 | 
154 |         for i in self.frame_idxs:
155 |             del inputs[("color", i, -1)]
156 | 
157 |         if "s" in self.frame_idxs:
158 |             stereo_T = np.eye(4, dtype=np.float32)
159 |             baseline_sign = -1 if do_flip else 1
160 |             side_sign = -1
161 |             stereo_T[0, 3] = side_sign * baseline_sign * 0.1
162 |             inputs["stereo_T"] = torch.from_numpy(stereo_T)
163 | 
164 |         return inputs
165 | 
166 |     def get_color(self, filename, do_flip):
167 |         color = self.loader(os.path.join(self.data_path, filename))
168 | 
169 |         if do_flip:
170 |             color = color.transpose(Image.FLIP_LEFT_RIGHT)
171 | 
172 |         return color


--------------------------------------------------------------------------------
/mono/datasets/euroc_dataset.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | import random
  3 | import numpy as np
  4 | from PIL import Image  # using pillow-simd for increased speed
  5 | import os
  6 | 
  7 | import torch
  8 | import torch.utils.data as data
  9 | from torchvision import transforms
 10 | 
 11 | 
 12 | def pil_loader(filename):
 13 |     # open path as file to avoid ResourceWarning
 14 |     # (https://github.com/python-pillow/Pillow/issues/835)
 15 |     with open(filename, 'rb') as f:
 16 |         with Image.open(f) as img:
 17 |             return img.convert('RGB')
 18 | 
 19 | 
 20 | class FolderDataset(data.Dataset):
 21 |     """Superclass for monocular dataloaders
 22 | 
 23 |     Args:
 24 |         data_path
 25 |         filenames
 26 |         height
 27 |         width
 28 |         frame_idxs
 29 |         num_scales
 30 |         is_train
 31 |         img_ext
 32 |     """
 33 |     def __init__(self,
 34 |                  data_path,
 35 |                  filenames,
 36 |                  height,
 37 |                  width,
 38 |                  frame_idxs,
 39 |                  is_train=False,
 40 |                  img_ext='.jpg',
 41 |                  gt_depth_path = None):
 42 |         super(FolderDataset, self).__init__()
 43 | 
 44 |         self.data_path = data_path
 45 |         # self.filenames = sorted(os.listdir(os.path.join(data_path, 'cam0', 'data')))[1:-2]#420-1940
 46 |         self.filenames = sorted(os.listdir(os.path.join(data_path, 'cam0', 'data')))  # 420-1940
 47 |         self.height = height
 48 |         self.width = width
 49 |         self.interp = Image.ANTIALIAS
 50 |         self.is_train = is_train
 51 |         self.frame_idxs = frame_idxs
 52 |         self.loader = pil_loader
 53 |         self.to_tensor = transforms.ToTensor()
 54 | 
 55 |         fx = 435.2047
 56 |         fy = 435.2047
 57 |         w = 752
 58 |         h = 480
 59 |         self.K = np.array([[fx/w, 0, 0.5, 0],
 60 |                            [0, fy/h, 0.5, 0],
 61 |                            [0, 0, 1, 0],
 62 |                            [0, 0, 0, 1]], dtype=np.float32)
 63 | 
 64 |         # Need to specify augmentations differently in pytorch 1.0 compared with 0.4
 65 |         if int(torch.__version__.split('.')[0]) > 0:
 66 |             self.brightness = (0.8, 1.2)
 67 |             self.contrast = (0.8, 1.2)
 68 |             self.saturation = (0.8, 1.2)
 69 |             self.hue = (-0.1, 0.1)
 70 |         else:
 71 |             self.brightness = 0.2
 72 |             self.contrast = 0.2
 73 |             self.saturation = 0.2
 74 |             self.hue = 0.1
 75 | 
 76 |         self.resize = transforms.Resize((self.height, self.width), interpolation=self.interp)
 77 | 
 78 |         self.flag = np.zeros(self.__len__(), dtype=np.int64)
 79 | 
 80 |     def preprocess(self, inputs, color_aug):
 81 |         """Resize colour images to the required scales and augment if required
 82 | 
 83 |         We create the color_aug object in advance and apply the same augmentation to all
 84 |         images in this item. This ensures that all images input to the pose network receive the
 85 |         same augmentation.
 86 |         """
 87 |         for k in list(inputs):
 88 |             if "color" in k:
 89 |                 n, im, i = k
 90 |                 inputs[(n, im, 0)] = self.resize(inputs[(n, im, - 1)])
 91 | 
 92 |         for k in list(inputs):
 93 |             if "color" in k:
 94 |                 f = inputs[k]
 95 |                 n, im, i = k
 96 |                 inputs[(n, im, i)] = self.to_tensor(f)
 97 |                 if i == 0:
 98 |                     inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f))
 99 | 
100 |     def __len__(self):
101 |         return len(self.filenames)-1
102 | 
103 |     def __getitem__(self, index):
104 |         """Returns a single training item from the dataset as a dictionary.
105 | 
106 |         Values correspond to torch tensors.
107 |         Keys in the dictionary are either strings or tuples:
108 | 
109 |             ("color", <frame_id>, <scale>)          for raw colour images,
110 |             ("color_aug", <frame_id>, <scale>)      for augmented colour images,
111 |             ("K", scale) or ("inv_K", scale)        for camera intrinsics,
112 |             "stereo_T"                              for camera extrinsics, and
113 |             "depth_gt"                              for ground truth depth maps.
114 | 
115 |         <frame_id> is either:
116 |             an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index',
117 |         or
118 |             "s" for the opposite image in the stereo pair.
119 | 
120 |         <scale> is an integer representing the scale of the image relative to the fullsize image:
121 |             -1      images at native resolution as loaded from disk
122 |             0       images resized to (self.width,      self.height     )
123 |             1       images resized to (self.width // 2, self.height // 2)
124 |             2       images resized to (self.width // 4, self.height // 4)
125 |             3       images resized to (self.width // 8, self.height // 8)
126 |         """
127 |         inputs = {}
128 | 
129 |         do_color_aug = self.is_train and random.random() > 0.5
130 |         do_flip = self.is_train and random.random() > 0.5
131 | 
132 |         for i in self.frame_idxs:
133 |             if i=='s':
134 |                 filename = os.path.join('cam1', 'data', self.filenames[index])
135 |             else:
136 |                 filename = os.path.join('cam0', 'data', self.filenames[index+i])
137 | 
138 |             inputs[("color", i, -1)] = self.get_color(filename, do_flip)
139 | 
140 |         # adjusting intrinsics to match each scale in the pyramid
141 |         K = self.K.copy()
142 |         K[0, :] *= self.width
143 |         K[1, :] *= self.height
144 |         inv_K = np.linalg.pinv(K)
145 | 
146 |         inputs[("K")] = torch.from_numpy(K)
147 |         inputs[("inv_K")] = torch.from_numpy(inv_K)
148 | 
149 |         if do_color_aug:
150 |             color_aug = transforms.ColorJitter.get_params(self.brightness, self.contrast, self.saturation, self.hue)
151 |         else:
152 |             color_aug = (lambda x: x)
153 | 
154 |         self.preprocess(inputs, color_aug)
155 | 
156 |         for i in self.frame_idxs:
157 |             del inputs[("color", i, -1)]
158 | 
159 |         if "s" in self.frame_idxs:
160 |             stereo_T = np.eye(4, dtype=np.float32)
161 |             baseline_sign = -1 if do_flip else 1
162 |             side_sign = -1
163 |             stereo_T[0, 3] = side_sign * baseline_sign * 0.1
164 |             inputs["stereo_T"] = torch.from_numpy(stereo_T)
165 | 
166 |         return inputs
167 | 
168 |     def get_color(self, filename, do_flip):
169 |         color = self.loader(os.path.join(self.data_path, filename))
170 | 
171 |         if do_flip:
172 |             color = color.transpose(Image.FLIP_LEFT_RIGHT)
173 | 
174 |         return color


--------------------------------------------------------------------------------
/mono/datasets/folder_dataset.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | import random
  3 | import numpy as np
  4 | from PIL import Image  # using pillow-simd for increased speed
  5 | import os
  6 | 
  7 | import torch
  8 | import torch.utils.data as data
  9 | from torchvision import transforms
 10 | 
 11 | 
 12 | def pil_loader(filename):
 13 |     # open path as file to avoid ResourceWarning
 14 |     # (https://github.com/python-pillow/Pillow/issues/835)
 15 |     with open(filename, 'rb') as f:
 16 |         with Image.open(f) as img:
 17 |             return img.convert('RGB')
 18 | 
 19 | 
 20 | class FolderDataset(data.Dataset):
 21 |     """Superclass for monocular dataloaders
 22 | 
 23 |     Args:
 24 |         data_path
 25 |         filenames
 26 |         height
 27 |         width
 28 |         frame_idxs
 29 |         num_scales
 30 |         is_train
 31 |         img_ext
 32 |     """
 33 |     def __init__(self,
 34 |                  data_path,
 35 |                  filenames,
 36 |                  height,
 37 |                  width,
 38 |                  frame_idxs,
 39 |                  is_train=False,
 40 |                  img_ext='.jpg',
 41 |                  gt_depth_path = None):
 42 |         super(FolderDataset, self).__init__()
 43 | 
 44 |         self.data_path = data_path
 45 |         self.filenames = sorted(os.listdir(data_path))
 46 |         self.height = height
 47 |         self.width = width
 48 |         self.interp = Image.ANTIALIAS
 49 |         self.is_train = is_train
 50 |         self.frame_idxs = frame_idxs
 51 |         self.loader = pil_loader
 52 |         self.to_tensor = transforms.ToTensor()
 53 |         self.K = np.array([[0.9765, 0, 0.5, 0],
 54 |                            [0, 1.736, 0.5, 0],
 55 |                            [0, 0, 1, 0],
 56 |                            [0, 0, 0, 1]], dtype=np.float32)
 57 | 
 58 |         # Need to specify augmentations differently in pytorch 1.0 compared with 0.4
 59 |         if int(torch.__version__.split('.')[0]) > 0:
 60 |             self.brightness = (0.8, 1.2)
 61 |             self.contrast = (0.8, 1.2)
 62 |             self.saturation = (0.8, 1.2)
 63 |             self.hue = (-0.1, 0.1)
 64 |         else:
 65 |             self.brightness = 0.2
 66 |             self.contrast = 0.2
 67 |             self.saturation = 0.2
 68 |             self.hue = 0.1
 69 | 
 70 |         self.resize = transforms.Resize((self.height, self.width), interpolation=self.interp)
 71 | 
 72 |         self.flag = np.zeros(self.__len__(), dtype=np.int64)
 73 | 
 74 |     def preprocess(self, inputs, color_aug):
 75 |         """Resize colour images to the required scales and augment if required
 76 | 
 77 |         We create the color_aug object in advance and apply the same augmentation to all
 78 |         images in this item. This ensures that all images input to the pose network receive the
 79 |         same augmentation.
 80 |         """
 81 |         for k in list(inputs):
 82 |             if "color" in k:
 83 |                 n, im, i = k
 84 |                 inputs[(n, im, 0)] = self.resize(inputs[(n, im, - 1)])
 85 | 
 86 |         for k in list(inputs):
 87 |             if "color" in k:
 88 |                 f = inputs[k]
 89 |                 n, im, i = k
 90 |                 inputs[(n, im, i)] = self.to_tensor(f)
 91 |                 if i == 0:
 92 |                     inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f))
 93 | 
 94 |     def __len__(self):
 95 |         return len(self.filenames)
 96 | 
 97 |     def __getitem__(self, index):
 98 |         """Returns a single training item from the dataset as a dictionary.
 99 | 
100 |         Values correspond to torch tensors.
101 |         Keys in the dictionary are either strings or tuples:
102 | 
103 |             ("color", <frame_id>, <scale>)          for raw colour images,
104 |             ("color_aug", <frame_id>, <scale>)      for augmented colour images,
105 |             ("K", scale) or ("inv_K", scale)        for camera intrinsics,
106 |             "stereo_T"                              for camera extrinsics, and
107 |             "depth_gt"                              for ground truth depth maps.
108 | 
109 |         <frame_id> is either:
110 |             an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index',
111 |         or
112 |             "s" for the opposite image in the stereo pair.
113 | 
114 |         <scale> is an integer representing the scale of the image relative to the fullsize image:
115 |             -1      images at native resolution as loaded from disk
116 |             0       images resized to (self.width,      self.height     )
117 |             1       images resized to (self.width // 2, self.height // 2)
118 |             2       images resized to (self.width // 4, self.height // 4)
119 |             3       images resized to (self.width // 8, self.height // 8)
120 |         """
121 |         inputs = {}
122 | 
123 |         do_color_aug = self.is_train and random.random() > 0.5
124 |         do_flip = self.is_train and random.random() > 0.5
125 | 
126 |         for i in self.frame_idxs:
127 |             try:
128 |                 filename = self.filenames[index+i]
129 |             except:
130 |                 filename = self.filenames[index]
131 | 
132 |             inputs[("color", i, -1)] = self.get_color(filename, do_flip)
133 | 
134 |         # adjusting intrinsics to match each scale in the pyramid
135 |         K = self.K.copy()
136 |         K[0, :] *= self.width
137 |         K[1, :] *= self.height
138 |         inv_K = np.linalg.pinv(K)
139 | 
140 |         inputs[("K", 0)] = torch.from_numpy(K)
141 |         inputs[("inv_K", 0)] = torch.from_numpy(inv_K)
142 | 
143 |         if do_color_aug:
144 |             color_aug = transforms.ColorJitter.get_params(self.brightness, self.contrast, self.saturation, self.hue)
145 |         else:
146 |             color_aug = (lambda x: x)
147 | 
148 |         self.preprocess(inputs, color_aug)
149 | 
150 |         for i in self.frame_idxs:
151 |             del inputs[("color", i, -1)]
152 | 
153 |         return inputs
154 | 
155 |     def get_color(self, filename, do_flip):
156 |         color = self.loader(os.path.join(self.data_path, filename))
157 | 
158 |         if do_flip:
159 |             color = color.transpose(Image.FLIP_LEFT_RIGHT)
160 | 
161 |         return color


--------------------------------------------------------------------------------
/mono/datasets/get_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python 
 2 | # -*- coding:utf-8 -*-
 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
 4 | 
 5 | import os
 6 | from .utils import readlines, sec_to_hm_str
 7 | 
 8 | 
 9 | def get_dataset(cfg, training=True):
10 |     dataset_name = cfg['name']
11 |     if dataset_name == 'kitti':
12 |         from .kitti_dataset import KITTIRAWDataset as dataset
13 |     elif dataset_name == 'kitti_odom':
14 |         from .kitti_dataset import KITTIOdomDataset as dataset
15 |     elif dataset_name == 'cityscape':
16 |         from .cityscape_dataset import CityscapeDataset as dataset
17 |     elif dataset_name == 'folder':
18 |         from .folder_dataset import FolderDataset as dataset
19 |     elif dataset_name == 'eth3d':
20 |         from .eth3d_dataset import FolderDataset as dataset
21 |     elif dataset_name == 'euroc':
22 |         from .euroc_dataset import FolderDataset as dataset
23 | 
24 |     fpath = os.path.join(os.path.dirname(__file__), "splits", cfg.split, "{}_files.txt")
25 |     filenames = readlines(fpath.format("train")) if training else readlines(fpath.format('val'))
26 |     img_ext = '.png' if cfg.png == True else '.jpg'
27 | 
28 |     dataset = dataset(cfg.in_path,
29 |                       filenames,
30 |                       cfg.height,
31 |                       cfg.width,
32 |                       cfg.frame_ids if training else [0],
33 |                       is_train=training,
34 |                       img_ext=img_ext,
35 |                       gt_depth_path=cfg.gt_depth_path)
36 |     return dataset


--------------------------------------------------------------------------------
/mono/datasets/kitti_dataset.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | import scipy.misc
  5 | import numpy as np
  6 | import PIL.Image as pil
  7 | import datetime
  8 | 
  9 | from .kitti_utils import generate_depth_map, read_calib_file, transform_from_rot_trans, pose_from_oxts_packet
 10 | from .mono_dataset import MonoDataset
 11 | 
 12 | 
 13 | class KITTIDataset(MonoDataset):
 14 |     """Superclass for different types of KITTI dataset loaders
 15 |     """
 16 |     def __init__(self, *args, **kwargs):
 17 |         super(KITTIDataset, self).__init__(*args, **kwargs)
 18 | 
 19 |         self.K = np.array([[0.58, 0, 0.5, 0],
 20 |                            [0, 1.92, 0.5, 0],
 21 |                            [0, 0, 1, 0],
 22 |                            [0, 0, 0, 1]], dtype=np.float32)
 23 | 
 24 |         self.full_res_shape = (1242, 375)
 25 |         self.side_map = {"2": 2, "3": 3, "l": 2, "r": 3}
 26 | 
 27 |     def check_depth(self):
 28 |         line = self.filenames[0].split()
 29 |         scene_name = line[0]
 30 |         frame_index = int(line[1])
 31 | 
 32 |         velo_filename = os.path.join(
 33 |             self.data_path,
 34 |             scene_name,
 35 |             "velodyne_points/data/{:010d}.bin".format(int(frame_index)))
 36 | 
 37 |         return os.path.isfile(velo_filename)
 38 | 
 39 |     def get_color(self, folder, frame_index, side, do_flip):
 40 |         color = self.loader(self.get_image_path(folder, frame_index, side))
 41 | 
 42 |         if do_flip:
 43 |             color = color.transpose(pil.FLIP_LEFT_RIGHT)
 44 | 
 45 |         return color
 46 | 
 47 | 
 48 | class KITTIRAWDataset(KITTIDataset):
 49 |     """KITTI dataset which loads the original velodyne depth maps for ground truth
 50 |     """
 51 |     def __init__(self, *args, **kwargs):
 52 |         super(KITTIRAWDataset, self).__init__(*args, **kwargs)
 53 | 
 54 |     def get_image_path(self, folder, frame_index, side):
 55 |         f_str = "{:010d}{}".format(frame_index, self.img_ext)
 56 |         image_path = os.path.join(
 57 |             self.data_path, folder, "image_0{}/data".format(self.side_map[side]), f_str)
 58 |         return image_path
 59 | 
 60 |     def get_depth(self, folder, frame_index, side, do_flip):
 61 |         calib_path = os.path.join(self.data_path, folder.split("/")[0])
 62 | 
 63 |         velo_filename = os.path.join(
 64 |             self.data_path,
 65 |             folder,
 66 |             "velodyne_points/data/{:010d}.bin".format(int(frame_index)))
 67 | 
 68 |         depth_gt = generate_depth_map(calib_path, velo_filename, self.side_map[side])
 69 |         depth_gt = scipy.misc.imresize(depth_gt, self.full_res_shape[::-1], "nearest")
 70 | 
 71 |         if do_flip:
 72 |             depth_gt = np.fliplr(depth_gt)
 73 | 
 74 |         return depth_gt
 75 | 
 76 |     def get_pose(self, folder, frame_index, offset):
 77 |         oxts_root = os.path.join(self.data_path, folder, 'oxts')
 78 |         with open(os.path.join(oxts_root, 'timestamps.txt')) as f:
 79 |             timestamps = np.array([datetime.datetime.strptime(ts[:-3], "%Y-%m-%d %H:%M:%S.%f").timestamp()
 80 |                                    for ts in f.read().splitlines()])
 81 | 
 82 |         speed0 = np.genfromtxt(os.path.join(oxts_root, 'data', '{:010d}.txt'.format(frame_index)))[[8, 9, 10]]
 83 |         # speed1 = np.genfromtxt(os.path.join(oxts_root, 'data', '{:010d}.txt'.format(frame_index+offset)))[[8, 9, 10]]
 84 | 
 85 |         timestamp0 = timestamps[frame_index]
 86 |         timestamp1 = timestamps[frame_index+offset]
 87 |         # displacement = 0.5 * (speed0 + speed1) * (timestamp1 - timestamp0)
 88 |         displacement = speed0 * (timestamp1 - timestamp0)
 89 | 
 90 |         imu2velo = read_calib_file(os.path.join(self.data_path, os.path.dirname(folder), 'calib_imu_to_velo.txt'))
 91 |         velo2cam = read_calib_file(os.path.join(self.data_path, os.path.dirname(folder), 'calib_velo_to_cam.txt'))
 92 |         cam2cam = read_calib_file(os.path.join(self.data_path, os.path.dirname(folder), 'calib_cam_to_cam.txt'))
 93 | 
 94 |         velo2cam_mat = transform_from_rot_trans(velo2cam['R'], velo2cam['T'])
 95 |         imu2velo_mat = transform_from_rot_trans(imu2velo['R'], imu2velo['T'])
 96 |         cam_2rect_mat = transform_from_rot_trans(cam2cam['R_rect_00'], np.zeros(3))
 97 | 
 98 |         imu2cam = cam_2rect_mat @ velo2cam_mat @ imu2velo_mat
 99 | 
100 |         odo_pose = imu2cam[:3,:3] @ displacement + imu2cam[:3,3]
101 | 
102 |         return odo_pose
103 | 
104 | 
105 | class KITTIOdomDataset(KITTIDataset):
106 |     """KITTI dataset for odometry training and testing
107 |     """
108 |     def __init__(self, *args, **kwargs):
109 |         super(KITTIOdomDataset, self).__init__(*args, **kwargs)
110 | 
111 |     def get_image_path(self, folder, frame_index, side):
112 |         f_str = "{:06d}{}".format(frame_index, self.img_ext)
113 |         side_map = {"l": 0, "r": 1}
114 |         image_path = os.path.join(
115 |             self.data_path,
116 |             "sequences/{:02d}".format(int(folder)),
117 |             "image_{}".format(side_map[side]),
118 |             f_str)
119 |         return image_path
120 | 
121 | 
122 | class KITTIDepthDataset(KITTIDataset):
123 |     """KITTI dataset which uses the updated ground truth depth maps
124 |     """
125 |     def __init__(self, *args, **kwargs):
126 |         super(KITTIDepthDataset, self).__init__(*args, **kwargs)
127 | 
128 |     def get_image_path(self, folder, frame_index, side):
129 |         f_str = "{:010d}{}".format(frame_index, self.img_ext)
130 |         image_path = os.path.join(
131 |             self.data_path,
132 |             folder,
133 |             "image_0{}/data".format(self.side_map[side]),
134 |             f_str)
135 |         return image_path
136 | 
137 |     def get_depth(self, folder, frame_index, side, do_flip):
138 |         f_str = "{:010d}.png".format(frame_index)
139 |         depth_path = os.path.join(
140 |             self.data_path,
141 |             folder,
142 |             "proj_depth/groundtruth/image_0{}".format(self.side_map[side]),
143 |             f_str)
144 | 
145 |         depth_gt = pil.open(depth_path)
146 |         depth_gt = depth_gt.resize(self.full_res_shape, pil.NEAREST)
147 |         depth_gt = np.array(depth_gt).astype(np.float32) / 256
148 | 
149 |         if do_flip:
150 |             depth_gt = np.fliplr(depth_gt)
151 | 
152 |         return depth_gt
153 | 


--------------------------------------------------------------------------------
/mono/datasets/kitti_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python 
  2 | # -*- coding:utf-8 -*-
  3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
  4 | 
  5 | from __future__ import absolute_import, division, print_function
  6 | 
  7 | import os
  8 | import numpy as np
  9 | from collections import Counter
 10 | 
 11 | 
 12 | def load_velodyne_points(filename):
 13 |     """Load 3D point cloud from KITTI file format
 14 |     (adapted from https://github.com/hunse/kitti)
 15 |     """
 16 |     points = np.fromfile(filename, dtype=np.float32).reshape(-1, 4)
 17 |     points[:, 3] = 1.0  # homogeneous
 18 |     return points
 19 | 
 20 | 
 21 | def read_calib_file(path):
 22 |     """Read KITTI calibration file
 23 |     (from https://github.com/hunse/kitti)
 24 |     """
 25 |     float_chars = set("0123456789.e+- ")
 26 |     data = {}
 27 |     with open(path, 'r') as f:
 28 |         for line in f.readlines():
 29 |             key, value = line.split(':', 1)
 30 |             value = value.strip()
 31 |             data[key] = value
 32 |             if float_chars.issuperset(value):
 33 |                 # try to cast to float array
 34 |                 try:
 35 |                     data[key] = np.array(list(map(float, value.split(' '))))
 36 |                 except ValueError:
 37 |                     # casting error: data[key] already eq. value, so pass
 38 |                     pass
 39 | 
 40 |     return data
 41 | 
 42 | 
 43 | def sub2ind(matrixSize, rowSub, colSub):
 44 |     """Convert row, col matrix subscripts to linear indices
 45 |     """
 46 |     m, n = matrixSize
 47 |     return rowSub * (n-1) + colSub - 1
 48 | 
 49 | 
 50 | def generate_depth_map(calib_dir, velo_filename, cam=2, vel_depth=False):
 51 |     """Generate a depth map from velodyne data
 52 |     """
 53 |     # load calibration files
 54 |     cam2cam = read_calib_file(os.path.join(calib_dir, 'calib_cam_to_cam.txt'))
 55 |     velo2cam = read_calib_file(os.path.join(calib_dir, 'calib_velo_to_cam.txt'))
 56 |     velo2cam = np.hstack((velo2cam['R'].reshape(3, 3), velo2cam['T'][..., np.newaxis]))
 57 |     velo2cam = np.vstack((velo2cam, np.array([0, 0, 0, 1.0])))
 58 | 
 59 |     # get image shape
 60 |     im_shape = cam2cam["S_rect_02"][::-1].astype(np.int32)
 61 | 
 62 |     # compute projection matrix velodyne->image plane
 63 |     R_cam2rect = np.eye(4)
 64 |     R_cam2rect[:3, :3] = cam2cam['R_rect_00'].reshape(3, 3)
 65 |     P_rect = cam2cam['P_rect_0'+str(cam)].reshape(3, 4)
 66 |     P_velo2im = np.dot(np.dot(P_rect, R_cam2rect), velo2cam)
 67 | 
 68 |     # load velodyne points and remove all behind image plane (approximation)
 69 |     # each row of the velodyne data is forward, left, up, reflectance
 70 |     velo = load_velodyne_points(velo_filename)
 71 |     velo = velo[velo[:, 0] >= 0, :]
 72 | 
 73 |     # project the points to the camera
 74 |     velo_pts_im = np.dot(P_velo2im, velo.T).T
 75 |     velo_pts_im[:, :2] = velo_pts_im[:, :2] / velo_pts_im[:, 2][..., np.newaxis]
 76 | 
 77 |     if vel_depth:
 78 |         velo_pts_im[:, 2] = velo[:, 0]
 79 | 
 80 |     # check if in bounds
 81 |     # use minus 1 to get the exact same value as KITTI matlab code
 82 |     velo_pts_im[:, 0] = np.round(velo_pts_im[:, 0]) - 1
 83 |     velo_pts_im[:, 1] = np.round(velo_pts_im[:, 1]) - 1
 84 |     val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0)
 85 |     val_inds = val_inds & (velo_pts_im[:, 0] < im_shape[1]) & (velo_pts_im[:, 1] < im_shape[0])
 86 |     velo_pts_im = velo_pts_im[val_inds, :]
 87 | 
 88 |     # project to image
 89 |     depth = np.zeros((im_shape[:2]))
 90 |     depth[velo_pts_im[:, 1].astype(np.int), velo_pts_im[:, 0].astype(np.int)] = velo_pts_im[:, 2]
 91 | 
 92 |     # find the duplicate points and choose the closest depth
 93 |     inds = sub2ind(depth.shape, velo_pts_im[:, 1], velo_pts_im[:, 0])
 94 |     dupe_inds = [item for item, count in Counter(inds).items() if count > 1]
 95 |     for dd in dupe_inds:
 96 |         pts = np.where(inds == dd)[0]
 97 |         x_loc = int(velo_pts_im[pts[0], 0])
 98 |         y_loc = int(velo_pts_im[pts[0], 1])
 99 |         depth[y_loc, x_loc] = velo_pts_im[pts, 2].min()
100 |     depth[depth < 0] = 0
101 | 
102 |     return depth
103 | 
104 | 
105 | def rotx(t):
106 |     """Rotation about the x-axis."""
107 |     c = np.cos(t)
108 |     s = np.sin(t)
109 |     return np.array([[1,  0,  0],
110 |                      [0,  c, -s],
111 |                      [0,  s,  c]])
112 | 
113 | 
114 | def roty(t):
115 |     """Rotation about the y-axis."""
116 |     c = np.cos(t)
117 |     s = np.sin(t)
118 |     return np.array([[c,  0,  s],
119 |                      [0,  1,  0],
120 |                      [-s, 0,  c]])
121 | 
122 | 
123 | def rotz(t):
124 |     """Rotation about the z-axis."""
125 |     c = np.cos(t)
126 |     s = np.sin(t)
127 |     return np.array([[c, -s,  0],
128 |                      [s,  c,  0],
129 |                      [0,  0,  1]])
130 | 
131 | 
132 | def pose_from_oxts_packet(metadata, scale):
133 | 
134 |     lat, lon, alt, roll, pitch, yaw = metadata
135 |     """Helper method to compute a SE(3) pose matrix from an OXTS packet.
136 |     Taken from https://github.com/utiasSTARS/pykitti
137 |     """
138 | 
139 |     er = 6378137.  # earth radius (approx.) in meters
140 |     # Use a Mercator projection to get the translation vector
141 | 
142 |     tx = scale * lon * np.pi * er / 180.
143 |     ty = scale * er * \
144 |         np.log(np.tan((90. + lat) * np.pi / 360.))
145 |     tz = alt
146 |     t = np.array([tx, ty, tz]).reshape(-1,1)
147 | 
148 |     # Use the Euler angles to get the rotation matrix
149 |     Rx = rotx(roll)
150 |     Ry = roty(pitch)
151 |     Rz = rotz(yaw)
152 |     R = Rz.dot(Ry.dot(Rx))
153 |     return transform_from_rot_trans(R, t)
154 | 
155 | 
156 | def transform_from_rot_trans(R, t):
157 |     """Transforation matrix from rotation matrix and translation vector."""
158 |     R = R.reshape(3, 3)
159 |     t = t.reshape(3, 1)
160 |     return np.vstack((np.hstack([R, t]), [0, 0, 0, 1]))
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/mono/datasets/loader/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python 
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
4 | 
5 | from .build_loader import build_dataloader


--------------------------------------------------------------------------------
/mono/datasets/loader/build_loader.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python 
 2 | # -*- coding:utf-8 -*-
 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
 4 | 
 5 | from functools import partial
 6 | 
 7 | from mmcv.runner import get_dist_info
 8 | from mmcv.parallel import collate
 9 | from torch.utils.data import DataLoader
10 | from .sampler import GroupSampler, DistributedGroupSampler, DistributedSampler
11 | 
12 | # https://github.com/pytorch/pytorch/issues/973
13 | import resource
14 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
15 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
16 | 
17 | 
18 | def build_dataloader(dataset,
19 |                      imgs_per_gpu,
20 |                      workers_per_gpu,
21 |                      num_gpus=1,
22 |                      dist=True,
23 |                      **kwargs):
24 |     shuffle = kwargs.get('shuffle', True)
25 |     if dist:
26 |         rank, world_size = get_dist_info()
27 |         if shuffle:
28 |             sampler = DistributedGroupSampler(dataset,
29 |                                               imgs_per_gpu,
30 |                                               world_size,
31 |                                               rank)
32 |         else:
33 |             sampler = DistributedSampler(dataset,
34 |                                          world_size,
35 |                                          rank,
36 |                                          shuffle=False)
37 |         batch_size = imgs_per_gpu
38 |         num_workers = workers_per_gpu
39 |     else:
40 |         sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None
41 |         batch_size = num_gpus * imgs_per_gpu
42 |         num_workers = num_gpus * workers_per_gpu
43 | 
44 |     data_loader = DataLoader(dataset,
45 |                              batch_size=batch_size,
46 |                              sampler=sampler,
47 |                              num_workers=num_workers,
48 |                              collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
49 |                              pin_memory=False,
50 |                              **kwargs,
51 |                              drop_last=True
52 |                              )
53 | 
54 |     return data_loader
55 | 


--------------------------------------------------------------------------------
/mono/datasets/loader/sampler.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python 
  2 | # -*- coding:utf-8 -*-
  3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
  4 | 
  5 | from __future__ import division
  6 | 
  7 | import math
  8 | import torch
  9 | import numpy as np
 10 | 
 11 | from torch.distributed import get_world_size, get_rank
 12 | from torch.utils.data import Sampler
 13 | from torch.utils.data import DistributedSampler as _DistributedSampler
 14 | 
 15 | 
 16 | class DistributedSampler(_DistributedSampler):
 17 | 
 18 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
 19 |         super().__init__(dataset, num_replicas=num_replicas, rank=rank)
 20 |         self.shuffle = shuffle
 21 | 
 22 |     def __iter__(self):
 23 |         # deterministically shuffle based on epoch
 24 |         if self.shuffle:
 25 |             g = torch.Generator()
 26 |             g.manual_seed(self.epoch)
 27 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
 28 |         else:
 29 |             indices = torch.arange(len(self.dataset)).tolist()
 30 | 
 31 |         # add extra samples to make it evenly divisible
 32 |         indices += indices[:(self.total_size - len(indices))]
 33 |         assert len(indices) == self.total_size
 34 | 
 35 |         # subsample
 36 |         indices = indices[self.rank:self.total_size:self.num_replicas]
 37 |         assert len(indices) == self.num_samples
 38 | 
 39 |         return iter(indices)
 40 | 
 41 | 
 42 | class GroupSampler(Sampler):
 43 | 
 44 |     def __init__(self, dataset, samples_per_gpu=1):
 45 |         assert hasattr(dataset, 'flag')
 46 |         self.dataset = dataset
 47 |         self.samples_per_gpu = samples_per_gpu
 48 |         self.flag = dataset.flag.astype(np.int64)
 49 |         self.group_sizes = np.bincount(self.flag)
 50 |         self.num_samples = 0
 51 |         for i, size in enumerate(self.group_sizes):
 52 |             self.num_samples += int(np.ceil(
 53 |                 size / self.samples_per_gpu)) * self.samples_per_gpu
 54 | 
 55 |     def __iter__(self):
 56 |         indices = []
 57 |         for i, size in enumerate(self.group_sizes):
 58 |             if size == 0:
 59 |                 continue
 60 |             indice = np.where(self.flag == i)[0]
 61 |             assert len(indice) == size
 62 |             np.random.shuffle(indice)
 63 |             num_extra = int(np.ceil(size / self.samples_per_gpu)
 64 |                             ) * self.samples_per_gpu - len(indice)
 65 |             indice = np.concatenate([indice, indice[:num_extra]])
 66 |             indices.append(indice)
 67 |         indices = np.concatenate(indices)
 68 |         indices = [
 69 |             indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu]
 70 |             for i in np.random.permutation(
 71 |                 range(len(indices) // self.samples_per_gpu))
 72 |             ]
 73 |         indices = np.concatenate(indices)
 74 |         indices = torch.from_numpy(indices).long()
 75 |         assert len(indices) == self.num_samples
 76 |         return iter(indices)
 77 | 
 78 |     def __len__(self):
 79 |         return self.num_samples
 80 | 
 81 | 
 82 | class DistributedGroupSampler(Sampler):
 83 |     """Sampler that restricts data loading to a subset of the dataset.
 84 |     It is especially useful in conjunction with
 85 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
 86 |     process can pass a DistributedSampler instance as a DataLoader sampler,
 87 |     and load a subset of the original dataset that is exclusive to it.
 88 |     .. note::
 89 |         Dataset is assumed to be of constant size.
 90 |     Arguments:
 91 |         dataset: Dataset used for sampling.
 92 |         num_replicas (optional): Number of processes participating in
 93 |             distributed training.
 94 |         rank (optional): Rank of the current process within num_replicas.
 95 |     """
 96 | 
 97 |     def __init__(self,
 98 |                  dataset,
 99 |                  samples_per_gpu=1,
100 |                  num_replicas=None,
101 |                  rank=None):
102 |         if num_replicas is None:
103 |             num_replicas = get_world_size()
104 |         if rank is None:
105 |             rank = get_rank()
106 |         self.dataset = dataset
107 |         self.samples_per_gpu = samples_per_gpu
108 |         self.num_replicas = num_replicas
109 |         self.rank = rank
110 |         self.epoch = 0
111 | 
112 |         assert hasattr(self.dataset, 'flag')
113 |         self.flag = self.dataset.flag
114 |         self.group_sizes = np.bincount(self.flag)
115 | 
116 |         self.num_samples = 0
117 |         for i, j in enumerate(self.group_sizes):
118 |             self.num_samples += int(
119 |                 math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
120 |                           self.num_replicas)) * self.samples_per_gpu
121 |         self.total_size = self.num_samples * self.num_replicas
122 | 
123 |     def __iter__(self):
124 |         # deterministically shuffle based on epoch
125 |         g = torch.Generator()
126 |         g.manual_seed(self.epoch)
127 | 
128 |         indices = []
129 |         for i, size in enumerate(self.group_sizes):
130 |             if size > 0:
131 |                 indice = np.where(self.flag == i)[0]
132 |                 assert len(indice) == size
133 |                 indice = indice[list(torch.randperm(int(size),
134 |                                                     generator=g))].tolist()
135 |                 extra = int(
136 |                     math.ceil(
137 |                         size * 1.0 / self.samples_per_gpu / self.num_replicas)
138 |                 ) * self.samples_per_gpu * self.num_replicas - len(indice)
139 |                 indice += indice[:extra]
140 |                 indices += indice
141 | 
142 |         assert len(indices) == self.total_size
143 | 
144 |         indices = [
145 |             indices[j] for i in list(
146 |                 torch.randperm(len(indices) // self.samples_per_gpu,
147 |                                generator=g))
148 |             for j in range(i * self.samples_per_gpu, (i + 1) *
149 |                            self.samples_per_gpu)
150 |             ]
151 | 
152 |         # subsample
153 |         offset = self.num_samples * self.rank
154 |         indices = indices[offset:offset + self.num_samples]
155 |         assert len(indices) == self.num_samples
156 | 
157 |         return iter(indices)
158 | 
159 |     def __len__(self):
160 |         return self.num_samples
161 | 
162 |     def set_epoch(self, epoch):
163 |         self.epoch = epoch
164 | 


--------------------------------------------------------------------------------
/mono/datasets/splits/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python 
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)


--------------------------------------------------------------------------------
/mono/datasets/splits/benchmark/eigen_to_benchmark_ids.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/datasets/splits/benchmark/eigen_to_benchmark_ids.npy


--------------------------------------------------------------------------------
/mono/datasets/splits/cityscape/gen_cityscape_split.py:
--------------------------------------------------------------------------------
 1 | import zipfile
 2 | import os
 3 | 
 4 | def main():
 5 |     file = '/ssd/Cityscapes/leftImg8bit_sequence_trainvaltest.zip'
 6 |     archive = zipfile.ZipFile(file, 'r')
 7 |     namelist = sorted(archive.namelist())
 8 | 
 9 |     if os.path.exists(os.path.join('..', 'splits', 'cityscape')):
10 |         print('path exists')
11 |     else:
12 |         os.makedirs(os.path.join('..', 'splits', 'cityscape'))
13 |     with open(os.path.join('..', 'splits', 'cityscape', 'train.txt'), 'w') as trainfile:
14 |         with open(os.path.join('..', 'splits', 'cityscape', 'val.txt'), 'w') as valfile:
15 |             with open(os.path.join('..', 'splits', 'cityscape', 'test.txt'), 'w') as testfile:
16 |                 for i in range(len(namelist)):
17 |                     str = namelist[i]
18 |                     if 'png' in str:
19 |                         if 'train' in str:
20 |                             trainfile.write(str)
21 |                             trainfile.write('\n')
22 |                         elif 'val' in str:
23 |                             valfile.write(str)
24 |                             valfile.write('\n')
25 |                         elif 'test' in str:
26 |                             testfile.write(str)
27 |                             testfile.write('\n')
28 | 
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     main()


--------------------------------------------------------------------------------
/mono/datasets/splits/cityscape/val_files.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/datasets/splits/cityscape/val_files.txt


--------------------------------------------------------------------------------
/mono/datasets/splits/exp/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python 
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)


--------------------------------------------------------------------------------
/mono/datasets/splits/kitti_shot_sequence/gen_split.py:
--------------------------------------------------------------------------------
1 | if __name__ == "__main__":
2 | 
3 |     f = open('val_files.txt', 'w')
4 |     for i in range(108):
5 |         f.writelines(['2011_09_26/2011_09_26_drive_0001_sync ', str(i).zfill(10), ' l\n'])
6 | 
7 |     f.close()
8 |     print('done')


--------------------------------------------------------------------------------
/mono/datasets/splits/kitti_shot_sequence/val_files.txt:
--------------------------------------------------------------------------------
  1 | 2011_09_26/2011_09_26_drive_0001_sync 0000000000 l
  2 | 2011_09_26/2011_09_26_drive_0001_sync 0000000001 l
  3 | 2011_09_26/2011_09_26_drive_0001_sync 0000000002 l
  4 | 2011_09_26/2011_09_26_drive_0001_sync 0000000003 l
  5 | 2011_09_26/2011_09_26_drive_0001_sync 0000000004 l
  6 | 2011_09_26/2011_09_26_drive_0001_sync 0000000005 l
  7 | 2011_09_26/2011_09_26_drive_0001_sync 0000000006 l
  8 | 2011_09_26/2011_09_26_drive_0001_sync 0000000007 l
  9 | 2011_09_26/2011_09_26_drive_0001_sync 0000000008 l
 10 | 2011_09_26/2011_09_26_drive_0001_sync 0000000009 l
 11 | 2011_09_26/2011_09_26_drive_0001_sync 0000000010 l
 12 | 2011_09_26/2011_09_26_drive_0001_sync 0000000011 l
 13 | 2011_09_26/2011_09_26_drive_0001_sync 0000000012 l
 14 | 2011_09_26/2011_09_26_drive_0001_sync 0000000013 l
 15 | 2011_09_26/2011_09_26_drive_0001_sync 0000000014 l
 16 | 2011_09_26/2011_09_26_drive_0001_sync 0000000015 l
 17 | 2011_09_26/2011_09_26_drive_0001_sync 0000000016 l
 18 | 2011_09_26/2011_09_26_drive_0001_sync 0000000017 l
 19 | 2011_09_26/2011_09_26_drive_0001_sync 0000000018 l
 20 | 2011_09_26/2011_09_26_drive_0001_sync 0000000019 l
 21 | 2011_09_26/2011_09_26_drive_0001_sync 0000000020 l
 22 | 2011_09_26/2011_09_26_drive_0001_sync 0000000021 l
 23 | 2011_09_26/2011_09_26_drive_0001_sync 0000000022 l
 24 | 2011_09_26/2011_09_26_drive_0001_sync 0000000023 l
 25 | 2011_09_26/2011_09_26_drive_0001_sync 0000000024 l
 26 | 2011_09_26/2011_09_26_drive_0001_sync 0000000025 l
 27 | 2011_09_26/2011_09_26_drive_0001_sync 0000000026 l
 28 | 2011_09_26/2011_09_26_drive_0001_sync 0000000027 l
 29 | 2011_09_26/2011_09_26_drive_0001_sync 0000000028 l
 30 | 2011_09_26/2011_09_26_drive_0001_sync 0000000029 l
 31 | 2011_09_26/2011_09_26_drive_0001_sync 0000000030 l
 32 | 2011_09_26/2011_09_26_drive_0001_sync 0000000031 l
 33 | 2011_09_26/2011_09_26_drive_0001_sync 0000000032 l
 34 | 2011_09_26/2011_09_26_drive_0001_sync 0000000033 l
 35 | 2011_09_26/2011_09_26_drive_0001_sync 0000000034 l
 36 | 2011_09_26/2011_09_26_drive_0001_sync 0000000035 l
 37 | 2011_09_26/2011_09_26_drive_0001_sync 0000000036 l
 38 | 2011_09_26/2011_09_26_drive_0001_sync 0000000037 l
 39 | 2011_09_26/2011_09_26_drive_0001_sync 0000000038 l
 40 | 2011_09_26/2011_09_26_drive_0001_sync 0000000039 l
 41 | 2011_09_26/2011_09_26_drive_0001_sync 0000000040 l
 42 | 2011_09_26/2011_09_26_drive_0001_sync 0000000041 l
 43 | 2011_09_26/2011_09_26_drive_0001_sync 0000000042 l
 44 | 2011_09_26/2011_09_26_drive_0001_sync 0000000043 l
 45 | 2011_09_26/2011_09_26_drive_0001_sync 0000000044 l
 46 | 2011_09_26/2011_09_26_drive_0001_sync 0000000045 l
 47 | 2011_09_26/2011_09_26_drive_0001_sync 0000000046 l
 48 | 2011_09_26/2011_09_26_drive_0001_sync 0000000047 l
 49 | 2011_09_26/2011_09_26_drive_0001_sync 0000000048 l
 50 | 2011_09_26/2011_09_26_drive_0001_sync 0000000049 l
 51 | 2011_09_26/2011_09_26_drive_0001_sync 0000000050 l
 52 | 2011_09_26/2011_09_26_drive_0001_sync 0000000051 l
 53 | 2011_09_26/2011_09_26_drive_0001_sync 0000000052 l
 54 | 2011_09_26/2011_09_26_drive_0001_sync 0000000053 l
 55 | 2011_09_26/2011_09_26_drive_0001_sync 0000000054 l
 56 | 2011_09_26/2011_09_26_drive_0001_sync 0000000055 l
 57 | 2011_09_26/2011_09_26_drive_0001_sync 0000000056 l
 58 | 2011_09_26/2011_09_26_drive_0001_sync 0000000057 l
 59 | 2011_09_26/2011_09_26_drive_0001_sync 0000000058 l
 60 | 2011_09_26/2011_09_26_drive_0001_sync 0000000059 l
 61 | 2011_09_26/2011_09_26_drive_0001_sync 0000000060 l
 62 | 2011_09_26/2011_09_26_drive_0001_sync 0000000061 l
 63 | 2011_09_26/2011_09_26_drive_0001_sync 0000000062 l
 64 | 2011_09_26/2011_09_26_drive_0001_sync 0000000063 l
 65 | 2011_09_26/2011_09_26_drive_0001_sync 0000000064 l
 66 | 2011_09_26/2011_09_26_drive_0001_sync 0000000065 l
 67 | 2011_09_26/2011_09_26_drive_0001_sync 0000000066 l
 68 | 2011_09_26/2011_09_26_drive_0001_sync 0000000067 l
 69 | 2011_09_26/2011_09_26_drive_0001_sync 0000000068 l
 70 | 2011_09_26/2011_09_26_drive_0001_sync 0000000069 l
 71 | 2011_09_26/2011_09_26_drive_0001_sync 0000000070 l
 72 | 2011_09_26/2011_09_26_drive_0001_sync 0000000071 l
 73 | 2011_09_26/2011_09_26_drive_0001_sync 0000000072 l
 74 | 2011_09_26/2011_09_26_drive_0001_sync 0000000073 l
 75 | 2011_09_26/2011_09_26_drive_0001_sync 0000000074 l
 76 | 2011_09_26/2011_09_26_drive_0001_sync 0000000075 l
 77 | 2011_09_26/2011_09_26_drive_0001_sync 0000000076 l
 78 | 2011_09_26/2011_09_26_drive_0001_sync 0000000077 l
 79 | 2011_09_26/2011_09_26_drive_0001_sync 0000000078 l
 80 | 2011_09_26/2011_09_26_drive_0001_sync 0000000079 l
 81 | 2011_09_26/2011_09_26_drive_0001_sync 0000000080 l
 82 | 2011_09_26/2011_09_26_drive_0001_sync 0000000081 l
 83 | 2011_09_26/2011_09_26_drive_0001_sync 0000000082 l
 84 | 2011_09_26/2011_09_26_drive_0001_sync 0000000083 l
 85 | 2011_09_26/2011_09_26_drive_0001_sync 0000000084 l
 86 | 2011_09_26/2011_09_26_drive_0001_sync 0000000085 l
 87 | 2011_09_26/2011_09_26_drive_0001_sync 0000000086 l
 88 | 2011_09_26/2011_09_26_drive_0001_sync 0000000087 l
 89 | 2011_09_26/2011_09_26_drive_0001_sync 0000000088 l
 90 | 2011_09_26/2011_09_26_drive_0001_sync 0000000089 l
 91 | 2011_09_26/2011_09_26_drive_0001_sync 0000000090 l
 92 | 2011_09_26/2011_09_26_drive_0001_sync 0000000091 l
 93 | 2011_09_26/2011_09_26_drive_0001_sync 0000000092 l
 94 | 2011_09_26/2011_09_26_drive_0001_sync 0000000093 l
 95 | 2011_09_26/2011_09_26_drive_0001_sync 0000000094 l
 96 | 2011_09_26/2011_09_26_drive_0001_sync 0000000095 l
 97 | 2011_09_26/2011_09_26_drive_0001_sync 0000000096 l
 98 | 2011_09_26/2011_09_26_drive_0001_sync 0000000097 l
 99 | 2011_09_26/2011_09_26_drive_0001_sync 0000000098 l
100 | 2011_09_26/2011_09_26_drive_0001_sync 0000000099 l
101 | 2011_09_26/2011_09_26_drive_0001_sync 0000000100 l
102 | 2011_09_26/2011_09_26_drive_0001_sync 0000000101 l
103 | 2011_09_26/2011_09_26_drive_0001_sync 0000000102 l
104 | 2011_09_26/2011_09_26_drive_0001_sync 0000000103 l
105 | 2011_09_26/2011_09_26_drive_0001_sync 0000000104 l
106 | 2011_09_26/2011_09_26_drive_0001_sync 0000000105 l
107 | 2011_09_26/2011_09_26_drive_0001_sync 0000000106 l
108 | 2011_09_26/2011_09_26_drive_0001_sync 0000000107 l
109 | 


--------------------------------------------------------------------------------
/mono/datasets/splits/short/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python 
2 | # -*- coding:utf-8 -*-
3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)


--------------------------------------------------------------------------------
/mono/datasets/splits/short/train_files.txt:
--------------------------------------------------------------------------------
  1 | 2011_09_26/2011_09_26_drive_0022_sync 473 r
  2 | 2011_09_29/2011_09_29_drive_0026_sync 1 l
  3 | 2011_09_26/2011_09_26_drive_0087_sync 185 r
  4 | 2011_09_30/2011_09_30_drive_0028_sync 497 l
  5 | 2011_10_03/2011_10_03_drive_0034_sync 215 l
  6 | 2011_10_03/2011_10_03_drive_0042_sync 514 r
  7 | 2011_09_30/2011_09_30_drive_0028_sync 2975 l
  8 | 2011_10_03/2011_10_03_drive_0034_sync 1214 r
  9 | 2011_09_26/2011_09_26_drive_0061_sync 601 l
 10 | 2011_09_30/2011_09_30_drive_0028_sync 1924 l
 11 | 2011_09_26/2011_09_26_drive_0091_sync 270 r
 12 | 2011_09_30/2011_09_30_drive_0033_sync 979 r
 13 | 2011_09_29/2011_09_29_drive_0004_sync 288 l
 14 | 2011_09_30/2011_09_30_drive_0033_sync 1029 r
 15 | 2011_09_30/2011_09_30_drive_0028_sync 5004 r
 16 | 2011_09_26/2011_09_26_drive_0051_sync 138 r
 17 | 2011_10_03/2011_10_03_drive_0034_sync 3247 l
 18 | 2011_09_26/2011_09_26_drive_0014_sync 285 l
 19 | 2011_09_30/2011_09_30_drive_0028_sync 573 l
 20 | 2011_09_26/2011_09_26_drive_0051_sync 425 r
 21 | 2011_09_30/2011_09_30_drive_0028_sync 2380 l
 22 | 2011_09_30/2011_09_30_drive_0028_sync 1323 l
 23 | 2011_09_30/2011_09_30_drive_0028_sync 733 r
 24 | 2011_09_26/2011_09_26_drive_0087_sync 331 r
 25 | 2011_09_30/2011_09_30_drive_0028_sync 536 l
 26 | 2011_09_30/2011_09_30_drive_0028_sync 2935 r
 27 | 2011_10_03/2011_10_03_drive_0034_sync 562 l
 28 | 2011_09_26/2011_09_26_drive_0032_sync 226 r
 29 | 2011_09_30/2011_09_30_drive_0028_sync 5148 r
 30 | 2011_10_03/2011_10_03_drive_0034_sync 1355 r
 31 | 2011_10_03/2011_10_03_drive_0034_sync 2695 l
 32 | 2011_09_30/2011_09_30_drive_0028_sync 3546 r
 33 | 2011_10_03/2011_10_03_drive_0034_sync 4023 r
 34 | 2011_09_26/2011_09_26_drive_0051_sync 301 r
 35 | 2011_09_30/2011_09_30_drive_0028_sync 402 r
 36 | 2011_09_30/2011_09_30_drive_0033_sync 294 r
 37 | 2011_09_30/2011_09_30_drive_0033_sync 1106 r
 38 | 2011_09_30/2011_09_30_drive_0028_sync 4906 r
 39 | 2011_10_03/2011_10_03_drive_0034_sync 1504 l
 40 | 2011_10_03/2011_10_03_drive_0042_sync 478 r
 41 | 2011_09_30/2011_09_30_drive_0033_sync 980 l
 42 | 2011_09_30/2011_09_30_drive_0028_sync 684 l
 43 | 2011_09_30/2011_09_30_drive_0028_sync 3418 r
 44 | 2011_09_26/2011_09_26_drive_0028_sync 68 r
 45 | 2011_09_26/2011_09_26_drive_0039_sync 245 l
 46 | 2011_09_26/2011_09_26_drive_0087_sync 363 l
 47 | 2011_10_03/2011_10_03_drive_0034_sync 655 r
 48 | 2011_10_03/2011_10_03_drive_0034_sync 3379 l
 49 | 2011_10_03/2011_10_03_drive_0034_sync 684 r
 50 | 2011_09_26/2011_09_26_drive_0018_sync 100 l
 51 | 2011_09_26/2011_09_26_drive_0104_sync 163 l
 52 | 2011_10_03/2011_10_03_drive_0034_sync 2587 l
 53 | 2011_09_30/2011_09_30_drive_0028_sync 663 l
 54 | 2011_09_30/2011_09_30_drive_0033_sync 273 r
 55 | 2011_10_03/2011_10_03_drive_0042_sync 768 r
 56 | 2011_09_30/2011_09_30_drive_0033_sync 1543 l
 57 | 2011_10_03/2011_10_03_drive_0034_sync 4614 r
 58 | 2011_10_03/2011_10_03_drive_0034_sync 475 l
 59 | 2011_09_30/2011_09_30_drive_0028_sync 3297 l
 60 | 2011_09_26/2011_09_26_drive_0039_sync 165 l
 61 | 2011_09_30/2011_09_30_drive_0028_sync 1031 l
 62 | 2011_10_03/2011_10_03_drive_0034_sync 2656 l
 63 | 2011_10_03/2011_10_03_drive_0042_sync 66 r
 64 | 2011_10_03/2011_10_03_drive_0042_sync 297 r
 65 | 2011_09_30/2011_09_30_drive_0028_sync 2604 l
 66 | 2011_09_26/2011_09_26_drive_0104_sync 97 r
 67 | 2011_10_03/2011_10_03_drive_0034_sync 3787 l
 68 | 2011_09_30/2011_09_30_drive_0028_sync 2946 l
 69 | 2011_10_03/2011_10_03_drive_0034_sync 1184 l
 70 | 2011_10_03/2011_10_03_drive_0042_sync 458 l
 71 | 2011_09_30/2011_09_30_drive_0028_sync 4166 r
 72 | 2011_09_30/2011_09_30_drive_0028_sync 4922 l
 73 | 2011_09_30/2011_09_30_drive_0033_sync 1323 l
 74 | 2011_10_03/2011_10_03_drive_0034_sync 1819 l
 75 | 2011_10_03/2011_10_03_drive_0042_sync 569 l
 76 | 2011_09_26/2011_09_26_drive_0070_sync 205 l
 77 | 2011_10_03/2011_10_03_drive_0042_sync 249 l
 78 | 2011_09_30/2011_09_30_drive_0034_sync 371 r
 79 | 2011_09_26/2011_09_26_drive_0039_sync 104 l
 80 | 2011_09_30/2011_09_30_drive_0028_sync 382 r
 81 | 2011_09_26/2011_09_26_drive_0087_sync 295 l
 82 | 2011_09_30/2011_09_30_drive_0028_sync 3023 l
 83 | 2011_10_03/2011_10_03_drive_0042_sync 591 l
 84 | 2011_10_03/2011_10_03_drive_0034_sync 1472 l
 85 | 2011_09_26/2011_09_26_drive_0001_sync 77 r
 86 | 2011_10_03/2011_10_03_drive_0034_sync 3269 l
 87 | 2011_09_30/2011_09_30_drive_0020_sync 185 r
 88 | 2011_10_03/2011_10_03_drive_0034_sync 2437 r
 89 | 2011_10_03/2011_10_03_drive_0034_sync 4050 l
 90 | 2011_09_26/2011_09_26_drive_0039_sync 147 r
 91 | 2011_09_30/2011_09_30_drive_0028_sync 4741 l
 92 | 2011_09_30/2011_09_30_drive_0028_sync 3557 r
 93 | 2011_10_03/2011_10_03_drive_0034_sync 394 l
 94 | 2011_09_30/2011_09_30_drive_0028_sync 158 r
 95 | 2011_10_03/2011_10_03_drive_0034_sync 1804 l
 96 | 2011_09_29/2011_09_29_drive_0004_sync 62 r
 97 | 2011_09_30/2011_09_30_drive_0028_sync 220 l
 98 | 2011_10_03/2011_10_03_drive_0034_sync 1420 r
 99 | 2011_10_03/2011_10_03_drive_0034_sync 2310 l
100 | 2011_09_30/2011_09_30_drive_0034_sync 839 r


--------------------------------------------------------------------------------
/mono/datasets/utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python 
  2 | # -*- coding:utf-8 -*-
  3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
  4 | 
  5 | from __future__ import absolute_import, division, print_function
  6 | import torch
  7 | import numpy as np
  8 | import cv2
  9 | 
 10 | 
 11 | def readlines(filename):
 12 |     """Read all the lines in a text file and return as a list
 13 |     """
 14 |     with open(filename, 'r') as f:
 15 |         lines = f.read().splitlines()
 16 |     return lines
 17 | 
 18 | 
 19 | def normalize_image(x):
 20 |     """Rescale image pixels to span range [0, 1]
 21 |     """
 22 |     ma = float(x.max().cpu().data)
 23 |     mi = float(x.min().cpu().data)
 24 |     d = ma - mi if ma != mi else 1e5
 25 |     return (x - mi) / d
 26 | 
 27 | 
 28 | def sec_to_hm(t):
 29 |     """Convert time in seconds to time in hours, minutes and seconds
 30 |     e.g. 10239 -> (2, 50, 39)
 31 |     """
 32 |     t = int(t)
 33 |     s = t % 60
 34 |     t //= 60
 35 |     m = t % 60
 36 |     t //= 60
 37 |     return t, m, s
 38 | 
 39 | 
 40 | def sec_to_hm_str(t):
 41 |     """Convert time in seconds to a nice string
 42 |     e.g. 10239 -> '02h50m39s'
 43 |     """
 44 |     h, m, s = sec_to_hm(t)
 45 |     return "{:02d}h{:02d}m{:02d}s".format(h, m, s)
 46 | 
 47 | 
 48 | def transformation_from_parameters(axisangle, translation, invert=False):
 49 |     R = rot_from_axisangle(axisangle)
 50 |     t = translation.clone()
 51 |     if invert:
 52 |         R = R.transpose(1, 2)
 53 |         t *= -1
 54 |     T = get_translation_matrix(t)
 55 |     if invert:
 56 |         M = torch.matmul(R, T)
 57 |     else:
 58 |         M = torch.matmul(T, R)
 59 |     return M
 60 | 
 61 | 
 62 | def get_translation_matrix(translation_vector):
 63 |     T = torch.zeros(translation_vector.shape[0], 4, 4).cuda()
 64 |     t = translation_vector.contiguous().view(-1, 3, 1)
 65 |     T[:, 0, 0] = 1
 66 |     T[:, 1, 1] = 1
 67 |     T[:, 2, 2] = 1
 68 |     T[:, 3, 3] = 1
 69 |     T[:, :3, 3, None] = t
 70 |     return T
 71 | 
 72 | 
 73 | def rot_from_axisangle(vec):
 74 |     angle = torch.norm(vec, 2, 2, True)
 75 |     axis = vec / (angle + 1e-7)
 76 |     ca = torch.cos(angle)
 77 |     sa = torch.sin(angle)
 78 |     C = 1 - ca
 79 |     x = axis[..., 0].unsqueeze(1)
 80 |     y = axis[..., 1].unsqueeze(1)
 81 |     z = axis[..., 2].unsqueeze(1)
 82 |     xs = x * sa
 83 |     ys = y * sa
 84 |     zs = z * sa
 85 |     xC = x * C
 86 |     yC = y * C
 87 |     zC = z * C
 88 |     xyC = x * yC
 89 |     yzC = y * zC
 90 |     zxC = z * xC
 91 |     rot = torch.zeros((vec.shape[0], 4, 4)).cuda()
 92 |     rot[:, 0, 0] = torch.squeeze(x * xC + ca)
 93 |     rot[:, 0, 1] = torch.squeeze(xyC - zs)
 94 |     rot[:, 0, 2] = torch.squeeze(zxC + ys)
 95 |     rot[:, 1, 0] = torch.squeeze(xyC + zs)
 96 |     rot[:, 1, 1] = torch.squeeze(y * yC + ca)
 97 |     rot[:, 1, 2] = torch.squeeze(yzC - xs)
 98 |     rot[:, 2, 0] = torch.squeeze(zxC - ys)
 99 |     rot[:, 2, 1] = torch.squeeze(yzC + xs)
100 |     rot[:, 2, 2] = torch.squeeze(z * zC + ca)
101 |     rot[:, 3, 3] = 1
102 |     return rot
103 | 
104 | 
105 | def dump_xyz(source_to_target_transformations):
106 |     xyzs = []
107 |     cam_to_world = np.eye(4)
108 |     xyzs.append(cam_to_world[:3, 3])
109 |     for source_to_target_transformation in source_to_target_transformations:
110 |         cam_to_world = np.dot(cam_to_world, source_to_target_transformation)
111 |         xyzs.append(cam_to_world[:3, 3])
112 |     return xyzs
113 | 
114 | 
115 | def compute_ate(gtruth_xyz, pred_xyz_o):
116 |     offset = gtruth_xyz[0] - pred_xyz_o[0]
117 |     pred_xyz = pred_xyz_o + offset[None, :]
118 | 
119 |     scale = np.sum(gtruth_xyz * pred_xyz) / np.sum(pred_xyz ** 2)
120 |     alignment_error = pred_xyz * scale - gtruth_xyz
121 |     rmse = np.sqrt(np.sum(alignment_error ** 2)) / gtruth_xyz.shape[0]
122 |     return rmse
123 | 
124 | 
125 | def extract_match(queryImage, trainImage, num):
126 |     orb = cv2.ORB_create()
127 |     kp_query, des_query = orb.detectAndCompute(queryImage, None)
128 |     kp_train, des_train = orb.detectAndCompute(trainImage, None)
129 |     bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
130 |     matches = bf.match(des_query, des_train)
131 |     matches = sorted(matches, key=lambda x: x.distance)
132 |     query_position = []
133 |     train_position = []
134 |     for i in range(num):
135 |         match = matches[i]
136 |         queryIdx = match.queryIdx
137 |         trainIdx = match.trainIdx
138 |         query_position.append(kp_query[queryIdx].pt)
139 |         train_position.append(kp_train[trainIdx].pt)
140 |     return query_position, train_position
141 | 
142 | 
143 | def compute_errors(gt, pred):
144 |     """Computation of error metrics between predicted and ground truth depths
145 |     """
146 |     thresh = np.maximum((gt / pred), (pred / gt))
147 |     a1 = (thresh < 1.25     ).mean()
148 |     a2 = (thresh < 1.25 ** 2).mean()
149 |     a3 = (thresh < 1.25 ** 3).mean()
150 | 
151 |     rmse = (gt - pred) ** 2
152 |     rmse = np.sqrt(rmse.mean())
153 | 
154 |     rmse_log = (np.log(gt) - np.log(pred)) ** 2
155 |     rmse_log = np.sqrt(rmse_log.mean())
156 | 
157 |     abs_rel = np.mean(np.abs(gt - pred) / gt)
158 | 
159 |     sq_rel = np.mean(((gt - pred) ** 2) / gt)
160 | 
161 |     return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
162 | 
163 | 
164 | def batch_post_process_disparity(l_disp, r_disp):
165 |     """Apply the disparity post-processing method as introduced in Monodepthv1
166 |     """
167 |     _, h, w = l_disp.shape
168 |     m_disp = 0.5 * (l_disp + r_disp)
169 |     l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h))
170 |     l_mask = (1.0 - np.clip(20 * (l - 0.05), 0, 1))[None, ...]
171 |     r_mask = l_mask[:, :, ::-1]
172 |     return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp


--------------------------------------------------------------------------------
/mono/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .mono_baseline.net import Baseline
2 | from .mono_autoencoder.net import autoencoder
3 | from .mono_fm.net import mono_fm
4 | from .mono_fm_joint.net import mono_fm_joint


--------------------------------------------------------------------------------
/mono/model/mono_autoencoder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/model/mono_autoencoder/__init__.py


--------------------------------------------------------------------------------
/mono/model/mono_autoencoder/decoder.py:
--------------------------------------------------------------------------------
  1 | # import torch
  2 | # import torch.nn as nn
  3 | # import torch.nn.functional as F
  4 | # from .layers import Conv1x1, Conv3x3, CRPBlock, upsample
  5 | #
  6 | #
  7 | # class Decoder(nn.Module):
  8 | #     def __init__(self,  num_ch_enc):
  9 | #         super(Decoder, self).__init__()
 10 | #
 11 | #         bottleneck = 256
 12 | #         stage = 4
 13 | #         self.do = nn.Dropout(p=0.5)
 14 | #
 15 | #         self.reduce4 = Conv1x1(num_ch_enc[4], 512, bias=False)
 16 | #         self.reduce3 = Conv1x1(num_ch_enc[3], bottleneck, bias=False)
 17 | #         self.reduce2 = Conv1x1(num_ch_enc[2], bottleneck, bias=False)
 18 | #         self.reduce1 = Conv1x1(num_ch_enc[1], bottleneck, bias=False)
 19 | #
 20 | #         self.iconv4 = Conv3x3(512, bottleneck)
 21 | #         self.iconv3 = Conv3x3(bottleneck, bottleneck)
 22 | #         self.iconv2 = Conv3x3(bottleneck, bottleneck)
 23 | #         self.iconv1 = Conv3x3(bottleneck, bottleneck)
 24 | #
 25 | #         self.crp4 = self._make_crp(bottleneck, bottleneck, stage)
 26 | #         self.crp3 = self._make_crp(bottleneck, bottleneck, stage)
 27 | #         self.crp2 = self._make_crp(bottleneck, bottleneck, stage)
 28 | #         self.crp1 = self._make_crp(bottleneck, bottleneck, stage)
 29 | #
 30 | #         self.merge4 = Conv3x3(bottleneck, bottleneck)
 31 | #         self.merge3 = Conv3x3(bottleneck, bottleneck)
 32 | #         self.merge2 = Conv3x3(bottleneck, bottleneck)
 33 | #         self.merge1 = Conv3x3(bottleneck, bottleneck)
 34 | #
 35 | #         # disp
 36 | #         self.disp4 = nn.Sequential(Conv3x3(bottleneck, 3), nn.Sigmoid())
 37 | #         self.disp3 = nn.Sequential(Conv3x3(bottleneck, 3), nn.Sigmoid())
 38 | #         self.disp2 = nn.Sequential(Conv3x3(bottleneck, 3), nn.Sigmoid())
 39 | #         self.disp1 = nn.Sequential(Conv3x3(bottleneck, 3), nn.Sigmoid())
 40 | #
 41 | #     def _make_crp(self, in_planes, out_planes, stages):
 42 | #         layers = [CRPBlock(in_planes, out_planes,stages)]
 43 | #         return nn.Sequential(*layers)
 44 | #
 45 | #     def forward(self, input_features, frame_id):
 46 | #         self.outputs = {}
 47 | #         l0, l1, l2, l3, l4 = input_features
 48 | #
 49 | #         x4 = self.reduce4(l4)
 50 | #         x4 = self.iconv4(x4)
 51 | #         x4 = F.leaky_relu(x4)
 52 | #         x4 = self.crp4(x4)
 53 | #         x4 = self.merge4(x4)
 54 | #         x4 = F.leaky_relu(x4)
 55 | #         x4 = upsample(x4)
 56 | #         disp4 = self.disp4(x4)
 57 | #
 58 | #
 59 | #         x3 = self.reduce3(x4)
 60 | #         x3 = self.iconv3(x3)
 61 | #         x3 = F.leaky_relu(x3)
 62 | #         x3 = self.crp3(x3)
 63 | #         x3 = self.merge3(x3)
 64 | #         x3 = F.leaky_relu(x3)
 65 | #         x3 = upsample(x3)
 66 | #         disp3 = self.disp3(x3)
 67 | #
 68 | #
 69 | #         x2 = self.reduce2(l2)
 70 | #         x2 = torch.cat((x2), 1)
 71 | #         x2 = self.iconv2(x2)
 72 | #         x2 = F.leaky_relu(x2)
 73 | #         x2 = self.crp2(x2)
 74 | #         x2 = self.merge2(x2)
 75 | #         x2 = F.leaky_relu(x2)
 76 | #         x2 = upsample(x2)
 77 | #         disp2 = self.disp2(x2)
 78 | #
 79 | #         x1 = self.reduce1(l1)
 80 | #         x1 = torch.cat((x1), 1)
 81 | #         x1 = self.iconv1(x1)
 82 | #         x1 = F.leaky_relu(x1)
 83 | #         x1 = self.crp1(x1)
 84 | #         x1 = self.merge1(x1)
 85 | #         x1 = F.leaky_relu(x1)
 86 | #         x1 = upsample(x1)
 87 | #         disp1 = self.disp1(x1)
 88 | #
 89 | #         self.outputs[("disp", frame_id, 3)] = disp4
 90 | #         self.outputs[("disp", frame_id, 2)] = disp3
 91 | #         self.outputs[("disp", frame_id, 1)] = disp2
 92 | #         self.outputs[("disp", frame_id, 0)] = disp1
 93 | #
 94 | #         return self.outputs
 95 | 
 96 | 
 97 | from __future__ import absolute_import, division, print_function
 98 | import torch.nn as nn
 99 | from .layers import ConvBlock, Conv3x3, upsample
100 | 
101 | 
102 | class Decoder(nn.Module):
103 |     def __init__(self, num_ch_enc, num_output_channels=3):
104 |         super(Decoder, self).__init__()
105 | 
106 |         num_ch_dec = [16, 32, 64, 128, 256]
107 | 
108 |         # upconv
109 |         self.upconv5 = ConvBlock(num_ch_enc[4], num_ch_dec[4])
110 |         self.upconv4 = ConvBlock(num_ch_dec[4], num_ch_dec[3])
111 |         self.upconv3 = ConvBlock(num_ch_dec[3], num_ch_dec[2])
112 |         self.upconv2 = ConvBlock(num_ch_dec[2], num_ch_dec[1])
113 |         self.upconv1 = ConvBlock(num_ch_dec[1], num_ch_dec[0])
114 | 
115 |         # iconv
116 |         self.iconv5 = ConvBlock(num_ch_dec[4], num_ch_dec[4])
117 |         self.iconv4 = ConvBlock(num_ch_dec[3], num_ch_dec[3])
118 |         self.iconv3 = ConvBlock(num_ch_dec[2], num_ch_dec[2])
119 |         self.iconv2 = ConvBlock(num_ch_dec[1], num_ch_dec[1])
120 |         self.iconv1 = ConvBlock(num_ch_dec[0], num_ch_dec[0])
121 | 
122 |         # disp
123 |         self.disp4 = Conv3x3(num_ch_dec[3], num_output_channels)
124 |         self.disp3 = Conv3x3(num_ch_dec[2], num_output_channels)
125 |         self.disp2 = Conv3x3(num_ch_dec[1], num_output_channels)
126 |         self.disp1 = Conv3x3(num_ch_dec[0], num_output_channels)
127 | 
128 |         self.sigmoid = nn.Sigmoid()
129 | 
130 | 
131 |     def forward(self, input_features, frame_id=0):
132 |         self.outputs = {}
133 |         _, _, _, _, econv5 = input_features
134 |         # (64,64,128,256,512)*4
135 | 
136 |         upconv5 = upsample(self.upconv5(econv5))
137 |         iconv5 = self.iconv5(upconv5)
138 | 
139 |         upconv4 = upsample(self.upconv4(iconv5))
140 |         iconv4 = self.iconv4(upconv4)
141 | 
142 |         upconv3 = upsample(self.upconv3(iconv4))
143 |         iconv3 = self.iconv3(upconv3)
144 | 
145 |         upconv2 = upsample(self.upconv2(iconv3))
146 |         iconv2 = self.iconv2(upconv2)
147 | 
148 |         upconv1 = upsample(self.upconv1(iconv2))
149 |         iconv1 = self.iconv1(upconv1)
150 | 
151 |         self.outputs[("disp", frame_id, 3)] = self.sigmoid(self.disp4(iconv4))
152 |         self.outputs[("disp", frame_id, 2)] = self.sigmoid(self.disp3(iconv3))
153 |         self.outputs[("disp", frame_id, 1)] = self.sigmoid(self.disp2(iconv2))
154 |         self.outputs[("disp", frame_id, 0)] = self.sigmoid(self.disp1(iconv1))
155 |         return self.outputs


--------------------------------------------------------------------------------
/mono/model/mono_autoencoder/encoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | from .resnet import resnet18, resnet34, resnet50, resnet101
 6 | 
 7 | 
 8 | class Encoder(nn.Module):
 9 |     def __init__(self, num_layers, pretrained_path=None):
10 |         super(Encoder, self).__init__()
11 | 
12 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
13 | 
14 |         resnets = {18: resnet18,
15 |                    34: resnet34,
16 |                    50: resnet50,
17 |                    101: resnet101,}
18 | 
19 |         if num_layers not in resnets:
20 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
21 | 
22 | 
23 |         self.encoder = resnets[num_layers]()
24 |         if pretrained_path is not None:
25 |             checkpoint = torch.load(pretrained_path)
26 |             self.encoder.load_state_dict(checkpoint)
27 | 
28 |         if num_layers > 34:
29 |             self.num_ch_enc[1:] *= 4
30 | 
31 |         # for name, param in self.encoder.named_parameters():
32 |         #     if 'bn' in name:
33 |         #         param.requires_grad = False
34 | 
35 |     def forward(self, input_image):
36 |         self.features = []
37 |         self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(input_image))))
38 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
39 |         self.features.append(self.encoder.layer2(self.features[-1]))
40 |         self.features.append(self.encoder.layer3(self.features[-1]))
41 |         self.features.append(self.encoder.layer4(self.features[-1]))
42 | 
43 |         return self.features
44 | 


--------------------------------------------------------------------------------
/mono/model/mono_autoencoder/layers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | 
  8 | class SSIM(nn.Module):
  9 |     def __init__(self):
 10 |         super(SSIM, self).__init__()
 11 |         self.mu_x_pool   = nn.AvgPool2d(3, 1)
 12 |         self.mu_y_pool   = nn.AvgPool2d(3, 1)
 13 |         self.sig_x_pool  = nn.AvgPool2d(3, 1)
 14 |         self.sig_y_pool  = nn.AvgPool2d(3, 1)
 15 |         self.sig_xy_pool = nn.AvgPool2d(3, 1)
 16 |         self.refl = nn.ReflectionPad2d(1)
 17 |         self.C1 = 0.01 ** 2
 18 |         self.C2 = 0.03 ** 2
 19 | 
 20 |     def forward(self, x, y):
 21 |         x = self.refl(x)
 22 |         y = self.refl(y)
 23 |         mu_x = self.mu_x_pool(x)
 24 |         mu_y = self.mu_y_pool(y)
 25 |         sigma_x  = self.sig_x_pool(x ** 2) - mu_x ** 2
 26 |         sigma_y  = self.sig_y_pool(y ** 2) - mu_y ** 2
 27 |         sigma_xy = self.sig_xy_pool(x * y) - mu_x * mu_y
 28 |         SSIM_n = (2 * mu_x * mu_y + self.C1) * (2 * sigma_xy + self.C2)
 29 |         SSIM_d = (mu_x ** 2 + mu_y ** 2 + self.C1) * (sigma_x + sigma_y + self.C2)
 30 |         return torch.clamp((1 - SSIM_n / SSIM_d) / 2, 0, 1)
 31 | 
 32 | 
 33 | def upsample(x):
 34 |     return F.interpolate(x, scale_factor=2, mode="nearest")
 35 | 
 36 | 
 37 | class ConvBlock(nn.Module):
 38 |     def __init__(self, in_channels, out_channels):
 39 |         super(ConvBlock, self).__init__()
 40 |         self.conv = Conv3x3(in_channels, out_channels)
 41 |         self.nonlin = nn.ELU(inplace=True)
 42 |     def forward(self, x):
 43 |         out = self.conv(x)
 44 |         out = self.nonlin(out)
 45 |         return out
 46 | 
 47 | 
 48 | class Conv1x1(nn.Module):
 49 |     def __init__(self, in_channels, out_channels, bias=False):
 50 |         super(Conv1x1, self).__init__()
 51 |         self.conv = nn.Conv2d(int(in_channels), int(out_channels), kernel_size=1, stride=1, bias=bias)
 52 |     def forward(self, x):
 53 |         out = self.conv(x)
 54 |         return out
 55 | 
 56 | 
 57 | class Conv3x3(nn.Module):
 58 |     def __init__(self, in_channels, out_channels, use_refl=True):
 59 |         super(Conv3x3, self).__init__()
 60 |         if use_refl:
 61 |             self.pad = nn.ReflectionPad2d(1)
 62 |         else:
 63 |             self.pad = nn.ZeroPad2d(1)
 64 |         self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3)
 65 |     def forward(self, x):
 66 |         out = self.pad(x)
 67 |         out = self.conv(out)
 68 |         return out
 69 | 
 70 | 
 71 | class Conv5x5(nn.Module):
 72 |     def __init__(self, in_channels, out_channels, use_refl=True):
 73 |         super(Conv5x5, self).__init__()
 74 |         if use_refl:
 75 |             self.pad = nn.ReflectionPad2d(2)
 76 |         else:
 77 |             self.pad = nn.ZeroPad2d(2)
 78 |         self.conv = nn.Conv2d(int(in_channels), int(out_channels), 5)
 79 |     def forward(self, x):
 80 |         out = self.pad(x)
 81 |         out = self.conv(out)
 82 |         return out
 83 | 
 84 | 
 85 | class CRPBlock(nn.Module):
 86 |     def __init__(self, in_planes, out_planes, n_stages):
 87 |         super(CRPBlock, self).__init__()
 88 |         for i in range(n_stages):
 89 |             setattr(self, '{}_{}'.format(i + 1, 'pointwise'), Conv1x1(in_planes if (i == 0) else out_planes, out_planes, False))
 90 |         self.stride = 1
 91 |         self.n_stages = n_stages
 92 |         self.maxpool = nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
 93 | 
 94 |     def forward(self, x):
 95 |         top = x
 96 |         for i in range(self.n_stages):
 97 |             top = self.maxpool(top)
 98 |             top = getattr(self, '{}_{}'.format(i + 1, 'pointwise'))(top)
 99 |             x = top + x
100 |         return x
101 | 
102 | 
103 | def compute_depth_errors(gt, pred):
104 |     thresh = torch.max((gt / pred), (pred / gt))
105 |     a1 = (thresh < 1.25     ).float().mean()
106 |     a2 = (thresh < 1.25 ** 2).float().mean()
107 |     a3 = (thresh < 1.25 ** 3).float().mean()
108 |     rmse = (gt - pred) ** 2
109 |     rmse = torch.sqrt(rmse.mean())
110 |     rmse_log = (torch.log(gt) - torch.log(pred)) ** 2
111 |     rmse_log = torch.sqrt(rmse_log.mean())
112 |     abs_rel = torch.mean(torch.abs(gt - pred) / gt)
113 |     sq_rel = torch.mean((gt - pred) ** 2 / gt)
114 |     return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3


--------------------------------------------------------------------------------
/mono/model/mono_autoencoder/net.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | import torch
  3 | import torch.nn.functional as F
  4 | import torch.nn as nn
  5 | 
  6 | import os
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | from .layers import SSIM
 10 | from .encoder import Encoder
 11 | from .decoder import Decoder
 12 | from ..registry import MONO
 13 | 
 14 | 
 15 | @MONO.register_module
 16 | class autoencoder(nn.Module):
 17 |     def __init__(self, options):
 18 |         super(autoencoder, self).__init__()
 19 |         self.opt = options
 20 | 
 21 |         self.Encoder = Encoder(self.opt.depth_num_layers, self.opt.depth_pretrained_path)
 22 |         self.Decoder = Decoder(self.Encoder.num_ch_enc)
 23 | 
 24 |         self.ssim = SSIM()
 25 |         self.count = 0
 26 | 
 27 |     def forward(self, inputs):
 28 |         features = self.Encoder(inputs[("color", 0, 0)])
 29 |         outputs = self.Decoder(features, 0)
 30 |         if self.training:
 31 |             loss_dict = self.compute_losses(inputs, outputs, features)
 32 |             return outputs, loss_dict
 33 |         return outputs
 34 | 
 35 |     def robust_l1(self, pred, target):
 36 |         eps = 1e-3
 37 |         return torch.sqrt(torch.pow(target - pred, 2) + eps ** 2)
 38 | 
 39 |     def compute_reprojection_loss(self, pred, target):
 40 |         photometric_loss = self.robust_l1(pred, target).mean(1, True)
 41 |         ssim_loss = self.ssim(pred, target).mean(1, True)
 42 |         reprojection_loss = (0.85 * ssim_loss + 0.15 * photometric_loss)
 43 |         return reprojection_loss
 44 | 
 45 |     def compute_losses(self, inputs, outputs, features):
 46 |         loss_dict = {}
 47 |         interval = 1000
 48 |         target = inputs[("color", 0, 0)]
 49 |         for i in range(5):
 50 |             f=features[i]
 51 |             smooth_loss = self.get_smooth_loss(f, target)
 52 |             loss_dict[('smooth_loss', i)] = smooth_loss/ (2 ** i)/5
 53 | 
 54 |         for scale in self.opt.scales:
 55 |             """
 56 |             initialization
 57 |             """
 58 |             pred = outputs[("disp", 0, scale)]
 59 | 
 60 |             _,_,h,w = pred.size()
 61 |             target = F.interpolate(target, [h, w], mode="bilinear", align_corners=False)
 62 |             min_reconstruct_loss = self.compute_reprojection_loss(pred, target)
 63 |             loss_dict[('min_reconstruct_loss', scale)] = min_reconstruct_loss.mean()/len(self.opt.scales)
 64 | 
 65 |             if self.count % interval == 0:
 66 |                 img_path = os.path.join('/node01_data5/monodepth2-test/odo', 'auto_{:0>4d}_{}.png'.format(self.count // interval, scale))
 67 |                 plt.imsave(img_path, pred[0].transpose(0,1).transpose(1,2).data.cpu().numpy())
 68 |                 img_path = os.path.join('/node01_data5/monodepth2-test/odo', 'img_{:0>4d}_{}.png'.format(self.count // interval, scale))
 69 |                 plt.imsave(img_path, target[0].transpose(0, 1).transpose(1, 2).data.cpu().numpy())
 70 | 
 71 |         self.count += 1
 72 |         return loss_dict
 73 | 
 74 |     def get_smooth_loss(self, disp, img):
 75 |         b, _, h, w = disp.size()
 76 |         img = F.interpolate(img, (h, w), mode='area')
 77 | 
 78 |         disp_dx, disp_dy = self.gradient(disp)
 79 |         img_dx, img_dy = self.gradient(img)
 80 | 
 81 |         disp_dxx, disp_dxy = self.gradient(disp_dx)
 82 |         disp_dyx, disp_dyy = self.gradient(disp_dy)
 83 | 
 84 |         img_dxx, img_dxy = self.gradient(img_dx)
 85 |         img_dyx, img_dyy = self.gradient(img_dy)
 86 | 
 87 |         smooth1 = torch.mean(disp_dx.abs() * torch.exp(-img_dx.abs().mean(1, True))) + \
 88 |                   torch.mean(disp_dy.abs() * torch.exp(-img_dy.abs().mean(1, True)))
 89 | 
 90 |         smooth2 = torch.mean(disp_dxx.abs() * torch.exp(-img_dxx.abs().mean(1, True))) + \
 91 |                   torch.mean(disp_dxy.abs() * torch.exp(-img_dxy.abs().mean(1, True))) + \
 92 |                   torch.mean(disp_dyx.abs() * torch.exp(-img_dyx.abs().mean(1, True))) + \
 93 |                   torch.mean(disp_dyy.abs() * torch.exp(-img_dyy.abs().mean(1, True)))
 94 | 
 95 |         return -self.opt.dis * smooth1+ self.opt.cvt * smooth2
 96 | 
 97 |     def gradient(self, D):
 98 |         dy = D[:, :, 1:] - D[:, :, :-1]
 99 |         dx = D[:, :, :, 1:] - D[:, :, :, :-1]
100 |         return dx, dy
101 | 
102 | 


--------------------------------------------------------------------------------
/mono/model/mono_autoencoder/resnet.py:
--------------------------------------------------------------------------------
  1 | import os.path as osp
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.nn import BatchNorm2d as bn
  5 | 
  6 | def conv3x3(in_planes, out_planes, stride=1):
  7 |     """3x3 convolution with padding"""
  8 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
  9 | 
 10 | 
 11 | def conv1x1(in_planes, out_planes, stride=1):
 12 |     """1x1 convolution"""
 13 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 14 | 
 15 | 
 16 | class BasicBlock(nn.Module):
 17 |     expansion = 1
 18 | 
 19 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 20 |         super(BasicBlock, self).__init__()
 21 |         self.conv1 = conv3x3(inplanes, planes, stride)
 22 |         self.bn1 = bn(planes)
 23 |         self.relu = nn.ReLU(inplace=True)
 24 |         self.conv2 = conv3x3(planes, planes)
 25 |         self.bn2 = bn(planes)
 26 |         self.downsample = downsample
 27 |         self.stride = stride
 28 | 
 29 |     def forward(self, x):
 30 |         residual = x
 31 | 
 32 |         out = self.conv1(x)
 33 |         out = self.bn1(out)
 34 |         out = self.relu(out)
 35 | 
 36 |         out = self.conv2(out)
 37 |         out = self.bn2(out)
 38 | 
 39 |         if self.downsample is not None:
 40 |             residual = self.downsample(x)
 41 | 
 42 |         out += residual
 43 |         out = self.relu(out)
 44 | 
 45 |         return out
 46 | 
 47 | 
 48 | class Bottleneck(nn.Module):
 49 |     expansion = 4
 50 | 
 51 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 52 |         super(Bottleneck, self).__init__()
 53 |         self.conv1 = conv1x1(inplanes, planes)
 54 |         self.bn1 = bn(planes)
 55 |         self.conv2 = conv3x3(planes, planes, stride)
 56 |         self.bn2 = bn(planes)
 57 |         self.conv3 = conv1x1(planes, planes * self.expansion)
 58 |         self.bn3 = bn(planes * self.expansion)
 59 |         self.relu = nn.ReLU(inplace=True)
 60 |         self.downsample = downsample
 61 |         self.stride = stride
 62 | 
 63 |     def forward(self, x):
 64 |         residual = x
 65 | 
 66 |         out = self.conv1(x)
 67 |         out = self.bn1(out)
 68 |         out = self.relu(out)
 69 | 
 70 |         out = self.conv2(out)
 71 |         out = self.bn2(out)
 72 |         out = self.relu(out)
 73 | 
 74 |         out = self.conv3(out)
 75 |         out = self.bn3(out)
 76 | 
 77 |         if self.downsample is not None:
 78 |             residual = self.downsample(x)
 79 | 
 80 |         out += residual
 81 |         out = self.relu(out)
 82 | 
 83 |         return out
 84 | 
 85 | 
 86 | class ResNet(nn.Module):
 87 | 
 88 |     def __init__(self, block, layers, num_classes=1000):
 89 |         super(ResNet, self).__init__()
 90 |         self.inplanes = 64
 91 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
 92 |         self.bn1 = bn(64)
 93 |         self.relu = nn.ReLU(inplace=True)
 94 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 95 |         self.layer1 = self._make_layer(block, 64, layers[0])
 96 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 97 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 98 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 99 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
100 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
101 | 
102 |         for m in self.modules():
103 |             if isinstance(m, nn.Conv2d):
104 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
105 |             elif isinstance(m, bn):
106 |                 nn.init.constant_(m.weight, 1)
107 |                 nn.init.constant_(m.bias, 0)
108 | 
109 |     def _make_layer(self, block, planes, blocks, stride=1):
110 |         downsample = None
111 |         if stride != 1 or self.inplanes != planes * block.expansion:
112 |             downsample = nn.Sequential(
113 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
114 |                 bn(planes * block.expansion),
115 |             )
116 | 
117 |         layers = []
118 |         layers.append(block(self.inplanes, planes, stride, downsample))
119 |         self.inplanes = planes * block.expansion
120 |         for _ in range(1, blocks):
121 |             layers.append(block(self.inplanes, planes))
122 | 
123 |         return nn.Sequential(*layers)
124 | 
125 |     def forward(self, x):
126 |         x = self.conv1(x)
127 |         x = self.bn1(x)
128 |         x = self.relu(x)
129 |         x = self.maxpool(x)
130 | 
131 |         x = self.layer1(x)
132 |         x = self.layer2(x)
133 |         x = self.layer3(x)
134 |         x = self.layer4(x)
135 | 
136 |         return x
137 | 
138 | 
139 | def resnet18(pretrained_path=None):
140 |     """Constructs a ResNet-18 model.
141 |     Args:
142 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
143 |     """
144 |     model = ResNet(BasicBlock, [2, 2, 2, 2])
145 |     if pretrained_path is not None:
146 |         model.load_state_dict(torch.load(pretrained_path))
147 |         print('Loaded pre-trained weights')
148 |     return model
149 | 
150 | 
151 | def resnet34(pretrained_path=None, **kwargs):
152 |     """Constructs a ResNet-34 model.
153 |     Args:
154 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
155 |     """
156 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
157 |     if pretrained_path is not None:
158 |         model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet34.pth')))
159 |         print('Loaded pre-trained weights')
160 |     return model
161 | 
162 | 
163 | def resnet50(pretrained_path=None, **kwargs):
164 |     """Constructs a ResNet-50 model.
165 |     Args:
166 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
167 |     """
168 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
169 |     if pretrained_path is not None:
170 |         model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet50.pth')))
171 |         print('Loaded pre-trained weights')
172 |     return model
173 | 
174 | 
175 | def resnet101(pretrained_path=None, **kwargs):
176 |     """Constructs a ResNet-101 model.
177 |     Args:
178 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
179 |     """
180 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
181 |     if pretrained_path is not None:
182 |         model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet101.pth')))
183 |         print('Loaded pre-trained weights')
184 |     return model
185 | 


--------------------------------------------------------------------------------
/mono/model/mono_baseline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/model/mono_baseline/__init__.py


--------------------------------------------------------------------------------
/mono/model/mono_baseline/depth_decoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from .layers import Conv1x1, Conv3x3, CRPBlock, upsample
 5 | 
 6 | 
 7 | class DepthDecoder(nn.Module):
 8 |     def __init__(self,  num_ch_enc):
 9 |         super(DepthDecoder, self).__init__()
10 | 
11 |         bottleneck = 256
12 |         stage = 4
13 |         self.do = nn.Dropout(p=0.5)
14 | 
15 |         self.reduce4 = Conv1x1(num_ch_enc[4], 512, bias=False)
16 |         self.reduce3 = Conv1x1(num_ch_enc[3], bottleneck, bias=False)
17 |         self.reduce2 = Conv1x1(num_ch_enc[2], bottleneck, bias=False)
18 |         self.reduce1 = Conv1x1(num_ch_enc[1], bottleneck, bias=False)
19 | 
20 |         self.iconv4 = Conv3x3(512, bottleneck)
21 |         self.iconv3 = Conv3x3(bottleneck*2+1, bottleneck)
22 |         self.iconv2 = Conv3x3(bottleneck*2+1, bottleneck)
23 |         self.iconv1 = Conv3x3(bottleneck*2+1, bottleneck)
24 | 
25 |         self.crp4 = self._make_crp(bottleneck, bottleneck, stage)
26 |         self.crp3 = self._make_crp(bottleneck, bottleneck, stage)
27 |         self.crp2 = self._make_crp(bottleneck, bottleneck, stage)
28 |         self.crp1 = self._make_crp(bottleneck, bottleneck, stage)
29 | 
30 |         self.merge4 = Conv3x3(bottleneck, bottleneck)
31 |         self.merge3 = Conv3x3(bottleneck, bottleneck)
32 |         self.merge2 = Conv3x3(bottleneck, bottleneck)
33 |         self.merge1 = Conv3x3(bottleneck, bottleneck)
34 | 
35 |         # disp
36 |         self.disp4 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
37 |         self.disp3 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
38 |         self.disp2 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
39 |         self.disp1 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
40 | 
41 |     def _make_crp(self, in_planes, out_planes, stages):
42 |         layers = [CRPBlock(in_planes, out_planes,stages)]
43 |         return nn.Sequential(*layers)
44 | 
45 |     def forward(self, input_features, frame_id=0):
46 |         self.outputs = {}
47 |         l0, l1, l2, l3, l4 = input_features
48 | 
49 |         l4 = self.do(l4)
50 |         l3 = self.do(l3)
51 | 
52 |         x4 = self.reduce4(l4)
53 |         x4 = self.iconv4(x4)
54 |         x4 = F.leaky_relu(x4)
55 |         x4 = self.crp4(x4)
56 |         x4 = self.merge4(x4)
57 |         x4 = F.leaky_relu(x4)
58 |         x4 = upsample(x4)
59 |         disp4 = self.disp4(x4)
60 | 
61 | 
62 |         x3 = self.reduce3(l3)
63 |         x3 = torch.cat((x3, x4, disp4), 1)
64 |         x3 = self.iconv3(x3)
65 |         x3 = F.leaky_relu(x3)
66 |         x3 = self.crp3(x3)
67 |         x3 = self.merge3(x3)
68 |         x3 = F.leaky_relu(x3)
69 |         x3 = upsample(x3)
70 |         disp3 = self.disp3(x3)
71 | 
72 | 
73 |         x2 = self.reduce2(l2)
74 |         x2 = torch.cat((x2, x3 , disp3), 1)
75 |         x2 = self.iconv2(x2)
76 |         x2 = F.leaky_relu(x2)
77 |         x2 = self.crp2(x2)
78 |         x2 = self.merge2(x2)
79 |         x2 = F.leaky_relu(x2)
80 |         x2 = upsample(x2)
81 |         disp2 = self.disp2(x2)
82 | 
83 |         x1 = self.reduce1(l1)
84 |         x1 = torch.cat((x1, x2, disp2), 1)
85 |         x1 = self.iconv1(x1)
86 |         x1 = F.leaky_relu(x1)
87 |         x1 = self.crp1(x1)
88 |         x1 = self.merge1(x1)
89 |         x1 = F.leaky_relu(x1)
90 |         x1 = upsample(x1)
91 |         disp1 = self.disp1(x1)
92 | 
93 |         self.outputs[("disp", frame_id, 3)] = disp4
94 |         self.outputs[("disp", frame_id, 2)] = disp3
95 |         self.outputs[("disp", frame_id, 1)] = disp2
96 |         self.outputs[("disp", frame_id, 0)] = disp1
97 | 
98 |         return self.outputs
99 | 


--------------------------------------------------------------------------------
/mono/model/mono_baseline/depth_encoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | from .resnet import resnet18, resnet34, resnet50, resnet101
 6 | 
 7 | 
 8 | class DepthEncoder(nn.Module):
 9 |     def __init__(self, num_layers, pretrained_path=None):
10 |         super(DepthEncoder, self).__init__()
11 | 
12 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
13 | 
14 |         resnets = {18: resnet18,
15 |                    34: resnet34,
16 |                    50: resnet50,
17 |                    101: resnet101,}
18 | 
19 |         if num_layers not in resnets:
20 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
21 | 
22 | 
23 |         self.encoder = resnets[num_layers]()
24 |         if pretrained_path is not None:
25 |             checkpoint = torch.load(pretrained_path)
26 |             self.encoder.load_state_dict(checkpoint)
27 | 
28 |         if num_layers > 34:
29 |             self.num_ch_enc[1:] *= 4
30 | 
31 |         # for name, param in self.encoder.named_parameters():
32 |         #     if 'bn' in name:
33 |         #         param.requires_grad = False
34 | 
35 |     def forward(self, input_image):
36 |         self.features = []
37 |         x = (input_image - 0.45) / 0.225
38 |         self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(x))))
39 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
40 |         self.features.append(self.encoder.layer2(self.features[-1]))
41 |         self.features.append(self.encoder.layer3(self.features[-1]))
42 |         self.features.append(self.encoder.layer4(self.features[-1]))
43 | 
44 |         return self.features
45 | 


--------------------------------------------------------------------------------
/mono/model/mono_baseline/pose_decoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class PoseDecoder(nn.Module):
 6 |     def __init__(self, num_ch_enc, stride=1):
 7 |         super(PoseDecoder, self).__init__()
 8 | 
 9 |         self.reduce = nn.Conv2d(num_ch_enc[-1], 256, 1)
10 |         self.conv1 = nn.Conv2d(256, 256, 3, stride, 1)
11 |         self.conv2 = nn.Conv2d(256, 256, 3, stride, 1)
12 |         self.conv3 = nn.Conv2d(256, 6, 1)
13 | 
14 |         self.relu = nn.ReLU()
15 | 
16 |     def forward(self, input_features):
17 |         f = input_features[-1]
18 |         out = self.relu(self.reduce(f))
19 |         out = self.relu(self.conv1(out))
20 |         out = self.relu(self.conv2(out))
21 |         out = self.conv3(out)
22 |         out = out.mean(3).mean(2)
23 |         out = 0.01 * out.view(-1, 1, 1, 6)
24 |         axisangle = out[..., :3]
25 |         translation = out[..., 3:]
26 |         return axisangle, translation
27 | 


--------------------------------------------------------------------------------
/mono/model/mono_baseline/pose_encoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import numpy as np
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from .resnet import ResNet, BasicBlock, resnet18, resnet34, resnet50, resnet101, Bottleneck
 8 | from torch.nn import BatchNorm2d as bn
 9 | 
10 | 
11 | class ResNetMultiImageInput(ResNet):
12 |     def __init__(self, block, layers, num_classes=1000, num_input_images=2):
13 |         super(ResNetMultiImageInput, self).__init__(block, layers)
14 |         self.inplanes = 64
15 |         self.conv1 = nn.Conv2d(num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
16 |         self.bn1 = bn(64)
17 |         self.relu = nn.ReLU(inplace=True)
18 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
19 |         self.layer1 = self._make_layer(block, 64, layers[0])
20 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
21 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
22 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
23 | 
24 |         for m in self.modules():
25 |             if isinstance(m, nn.Conv2d):
26 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
27 |             elif isinstance(m, nn.BatchNorm2d):
28 |                 nn.init.constant_(m.weight, 1)
29 |                 nn.init.constant_(m.bias, 0)
30 | 
31 | 
32 | def resnet_multiimage_input(num_layers, num_input_images=2, pretrained_path=None):
33 |     assert num_layers in [18, 34, 50, 101], "Can only run with 18, 34, 50, 101 layers resnet"
34 |     blocks = {18 : [2, 2, 2,  2],
35 |               34 : [3, 4, 6,  3],
36 |               50 : [3, 4, 6,  3],
37 |               101: [3, 4, 23, 3],
38 |               }[num_layers]
39 | 
40 |     if num_layers < 40:
41 |         model = ResNetMultiImageInput(BasicBlock, blocks, num_input_images=num_input_images)
42 |     elif num_layers > 40:
43 |         model = ResNetMultiImageInput(Bottleneck, blocks, num_input_images=num_input_images)
44 | 
45 |     if pretrained_path is not None:
46 |         loaded = torch.load(pretrained_path)
47 |         loaded['conv1.weight'] = torch.cat([loaded['conv1.weight']] * num_input_images, 1) / num_input_images
48 |         model.load_state_dict(loaded)
49 |     return model
50 | 
51 | 
52 | class PoseEncoder(nn.Module):
53 |     def __init__(self, num_layers, pretrained_path=None, num_input_images=2):
54 |         super(PoseEncoder, self).__init__()
55 | 
56 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
57 | 
58 |         resnets = {18: resnet18,
59 |                    34: resnet34,
60 |                    50: resnet50,
61 |                    101: resnet101,}
62 | 
63 |         if num_layers not in resnets:
64 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
65 | 
66 |         if num_input_images > 1:
67 |             self.encoder = resnet_multiimage_input(num_layers, num_input_images, pretrained_path)
68 |         else:
69 |             self.encoder = resnets[num_layers]()
70 |             if pretrained_path is not None:
71 |                 checkpoint = torch.load(pretrained_path)
72 |                 self.encoder.load_state_dict(checkpoint)
73 | 
74 |         if num_layers > 34:
75 |             self.num_ch_enc[1:] *= 4
76 | 
77 |         # for name, param in self.encoder.named_parameters():
78 |         #     if 'bn' in name:
79 |         #         param.requires_grad = False
80 | 
81 |     def forward(self, input_image):
82 |         self.features = []
83 |         x = (input_image - 0.45) / 0.225
84 |         x = self.encoder.conv1(x)
85 |         x = self.encoder.bn1(x)
86 |         self.features.append(self.encoder.relu(x))
87 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
88 |         self.features.append(self.encoder.layer2(self.features[-1]))
89 |         self.features.append(self.encoder.layer3(self.features[-1]))
90 |         self.features.append(self.encoder.layer4(self.features[-1]))
91 | 
92 |         return self.features
93 | 


--------------------------------------------------------------------------------
/mono/model/mono_baseline/resnet.py:
--------------------------------------------------------------------------------
  1 | import os.path as osp
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.nn import BatchNorm2d as bn
  5 | 
  6 | def conv3x3(in_planes, out_planes, stride=1):
  7 |     """3x3 convolution with padding"""
  8 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
  9 | 
 10 | 
 11 | def conv1x1(in_planes, out_planes, stride=1):
 12 |     """1x1 convolution"""
 13 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 14 | 
 15 | 
 16 | class BasicBlock(nn.Module):
 17 |     expansion = 1
 18 | 
 19 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 20 |         super(BasicBlock, self).__init__()
 21 |         self.conv1 = conv3x3(inplanes, planes, stride)
 22 |         self.bn1 = bn(planes)
 23 |         self.relu = nn.ReLU(inplace=True)
 24 |         self.conv2 = conv3x3(planes, planes)
 25 |         self.bn2 = bn(planes)
 26 |         self.downsample = downsample
 27 |         self.stride = stride
 28 | 
 29 |     def forward(self, x):
 30 |         residual = x
 31 | 
 32 |         out = self.conv1(x)
 33 |         out = self.bn1(out)
 34 |         out = self.relu(out)
 35 | 
 36 |         out = self.conv2(out)
 37 |         out = self.bn2(out)
 38 | 
 39 |         if self.downsample is not None:
 40 |             residual = self.downsample(x)
 41 | 
 42 |         out += residual
 43 |         out = self.relu(out)
 44 | 
 45 |         return out
 46 | 
 47 | 
 48 | class Bottleneck(nn.Module):
 49 |     expansion = 4
 50 | 
 51 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 52 |         super(Bottleneck, self).__init__()
 53 |         self.conv1 = conv1x1(inplanes, planes)
 54 |         self.bn1 = bn(planes)
 55 |         self.conv2 = conv3x3(planes, planes, stride)
 56 |         self.bn2 = bn(planes)
 57 |         self.conv3 = conv1x1(planes, planes * self.expansion)
 58 |         self.bn3 = bn(planes * self.expansion)
 59 |         self.relu = nn.ReLU(inplace=True)
 60 |         self.downsample = downsample
 61 |         self.stride = stride
 62 | 
 63 |     def forward(self, x):
 64 |         residual = x
 65 | 
 66 |         out = self.conv1(x)
 67 |         out = self.bn1(out)
 68 |         out = self.relu(out)
 69 | 
 70 |         out = self.conv2(out)
 71 |         out = self.bn2(out)
 72 |         out = self.relu(out)
 73 | 
 74 |         out = self.conv3(out)
 75 |         out = self.bn3(out)
 76 | 
 77 |         if self.downsample is not None:
 78 |             residual = self.downsample(x)
 79 | 
 80 |         out += residual
 81 |         out = self.relu(out)
 82 | 
 83 |         return out
 84 | 
 85 | 
 86 | class ResNet(nn.Module):
 87 | 
 88 |     def __init__(self, block, layers, num_classes=1000):
 89 |         super(ResNet, self).__init__()
 90 |         self.inplanes = 64
 91 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
 92 |         self.bn1 = bn(64)
 93 |         self.relu = nn.ReLU(inplace=True)
 94 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 95 |         self.layer1 = self._make_layer(block, 64, layers[0])
 96 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 97 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 98 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 99 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
100 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
101 | 
102 |         for m in self.modules():
103 |             if isinstance(m, nn.Conv2d):
104 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
105 |             elif isinstance(m, bn):
106 |                 nn.init.constant_(m.weight, 1)
107 |                 nn.init.constant_(m.bias, 0)
108 | 
109 |     def _make_layer(self, block, planes, blocks, stride=1):
110 |         downsample = None
111 |         if stride != 1 or self.inplanes != planes * block.expansion:
112 |             downsample = nn.Sequential(
113 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
114 |                 bn(planes * block.expansion),
115 |             )
116 | 
117 |         layers = []
118 |         layers.append(block(self.inplanes, planes, stride, downsample))
119 |         self.inplanes = planes * block.expansion
120 |         for _ in range(1, blocks):
121 |             layers.append(block(self.inplanes, planes))
122 | 
123 |         return nn.Sequential(*layers)
124 | 
125 |     def forward(self, x):
126 |         x = self.conv1(x)
127 |         x = self.bn1(x)
128 |         x = self.relu(x)
129 |         x = self.maxpool(x)
130 | 
131 |         x = self.layer1(x)
132 |         x = self.layer2(x)
133 |         x = self.layer3(x)
134 |         x = self.layer4(x)
135 | 
136 |         return x
137 | 
138 | 
139 | def resnet18(pretrained_path=None):
140 |     """Constructs a ResNet-18 model.
141 |     Args:
142 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
143 |     """
144 |     model = ResNet(BasicBlock, [2, 2, 2, 2])
145 |     if pretrained_path is not None:
146 |         model.load_state_dict(torch.load(pretrained_path))
147 |         print('Loaded pre-trained weights')
148 |     return model
149 | 
150 | 
151 | def resnet34(pretrained_path=None, **kwargs):
152 |     """Constructs a ResNet-34 model.
153 |     Args:
154 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
155 |     """
156 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
157 |     if pretrained_path is not None:
158 |         model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet34.pth')))
159 |         print('Loaded pre-trained weights')
160 |     return model
161 | 
162 | 
163 | def resnet50(pretrained_path=None, **kwargs):
164 |     """Constructs a ResNet-50 model.
165 |     Args:
166 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
167 |     """
168 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
169 |     if pretrained_path is not None:
170 |         model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet50.pth')))
171 |         print('Loaded pre-trained weights')
172 |     return model
173 | 
174 | 
175 | def resnet101(pretrained_path=None, **kwargs):
176 |     """Constructs a ResNet-101 model.
177 |     Args:
178 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
179 |     """
180 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
181 |     if pretrained_path is not None:
182 |         model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet101.pth')))
183 |         print('Loaded pre-trained weights')
184 |     return model
185 | 


--------------------------------------------------------------------------------
/mono/model/mono_fm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/model/mono_fm/__init__.py


--------------------------------------------------------------------------------
/mono/model/mono_fm/depth_decoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from .layers import Conv1x1, Conv3x3, CRPBlock, upsample
 5 | 
 6 | 
 7 | class DepthDecoder(nn.Module):
 8 |     def __init__(self,  num_ch_enc):
 9 |         super(DepthDecoder, self).__init__()
10 | 
11 |         bottleneck = 256
12 |         stage = 4
13 |         self.do = nn.Dropout(p=0.5)
14 | 
15 |         self.reduce4 = Conv1x1(num_ch_enc[4], 512, bias=False)
16 |         self.reduce3 = Conv1x1(num_ch_enc[3], bottleneck, bias=False)
17 |         self.reduce2 = Conv1x1(num_ch_enc[2], bottleneck, bias=False)
18 |         self.reduce1 = Conv1x1(num_ch_enc[1], bottleneck, bias=False)
19 | 
20 |         self.iconv4 = Conv3x3(512, bottleneck)
21 |         self.iconv3 = Conv3x3(bottleneck*2+1, bottleneck)
22 |         self.iconv2 = Conv3x3(bottleneck*2+1, bottleneck)
23 |         self.iconv1 = Conv3x3(bottleneck*2+1, bottleneck)
24 | 
25 |         self.crp4 = self._make_crp(bottleneck, bottleneck, stage)
26 |         self.crp3 = self._make_crp(bottleneck, bottleneck, stage)
27 |         self.crp2 = self._make_crp(bottleneck, bottleneck, stage)
28 |         self.crp1 = self._make_crp(bottleneck, bottleneck, stage)
29 | 
30 |         self.merge4 = Conv3x3(bottleneck, bottleneck)
31 |         self.merge3 = Conv3x3(bottleneck, bottleneck)
32 |         self.merge2 = Conv3x3(bottleneck, bottleneck)
33 |         self.merge1 = Conv3x3(bottleneck, bottleneck)
34 | 
35 |         # disp
36 |         self.disp4 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
37 |         self.disp3 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
38 |         self.disp2 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
39 |         self.disp1 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
40 | 
41 |     def _make_crp(self, in_planes, out_planes, stages):
42 |         layers = [CRPBlock(in_planes, out_planes,stages)]
43 |         return nn.Sequential(*layers)
44 | 
45 |     def forward(self, input_features, frame_id=0):
46 |         self.outputs = {}
47 |         l0, l1, l2, l3, l4 = input_features
48 | 
49 |         l4 = self.do(l4)
50 |         l3 = self.do(l3)
51 | 
52 |         x4 = self.reduce4(l4)
53 |         x4 = self.iconv4(x4)
54 |         x4 = F.leaky_relu(x4)
55 |         x4 = self.crp4(x4)
56 |         x4 = self.merge4(x4)
57 |         x4 = F.leaky_relu(x4)
58 |         x4 = upsample(x4)
59 |         disp4 = self.disp4(x4)
60 | 
61 | 
62 |         x3 = self.reduce3(l3)
63 |         x3 = torch.cat((x3, x4, disp4), 1)
64 |         x3 = self.iconv3(x3)
65 |         x3 = F.leaky_relu(x3)
66 |         x3 = self.crp3(x3)
67 |         x3 = self.merge3(x3)
68 |         x3 = F.leaky_relu(x3)
69 |         x3 = upsample(x3)
70 |         disp3 = self.disp3(x3)
71 | 
72 | 
73 |         x2 = self.reduce2(l2)
74 |         x2 = torch.cat((x2, x3 , disp3), 1)
75 |         x2 = self.iconv2(x2)
76 |         x2 = F.leaky_relu(x2)
77 |         x2 = self.crp2(x2)
78 |         x2 = self.merge2(x2)
79 |         x2 = F.leaky_relu(x2)
80 |         x2 = upsample(x2)
81 |         disp2 = self.disp2(x2)
82 | 
83 |         x1 = self.reduce1(l1)
84 |         x1 = torch.cat((x1, x2, disp2), 1)
85 |         x1 = self.iconv1(x1)
86 |         x1 = F.leaky_relu(x1)
87 |         x1 = self.crp1(x1)
88 |         x1 = self.merge1(x1)
89 |         x1 = F.leaky_relu(x1)
90 |         x1 = upsample(x1)
91 |         disp1 = self.disp1(x1)
92 | 
93 |         self.outputs[("disp", frame_id, 3)] = disp4
94 |         self.outputs[("disp", frame_id, 2)] = disp3
95 |         self.outputs[("disp", frame_id, 1)] = disp2
96 |         self.outputs[("disp", frame_id, 0)] = disp1
97 | 
98 |         return self.outputs
99 | 


--------------------------------------------------------------------------------
/mono/model/mono_fm/depth_encoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | from .resnet import resnet18, resnet34, resnet50, resnet101
 6 | 
 7 | 
 8 | class DepthEncoder(nn.Module):
 9 |     def __init__(self, num_layers, pretrained_path=None):
10 |         super(DepthEncoder, self).__init__()
11 | 
12 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
13 | 
14 |         resnets = {18: resnet18,
15 |                    34: resnet34,
16 |                    50: resnet50,
17 |                    101: resnet101,}
18 | 
19 |         if num_layers not in resnets:
20 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
21 | 
22 | 
23 |         self.encoder = resnets[num_layers]()
24 |         if pretrained_path is not None:
25 |             checkpoint = torch.load(pretrained_path)
26 |             self.encoder.load_state_dict(checkpoint)
27 | 
28 |         if num_layers > 34:
29 |             self.num_ch_enc[1:] *= 4
30 | 
31 |         # for name, param in self.encoder.named_parameters():
32 |         #     if 'bn' in name:
33 |         #         param.requires_grad = False
34 | 
35 |     def forward(self, input_image):
36 |         self.features = []
37 |         x = (input_image - 0.45) / 0.225
38 |         self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(x))))
39 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
40 |         self.features.append(self.encoder.layer2(self.features[-1]))
41 |         self.features.append(self.encoder.layer3(self.features[-1]))
42 |         self.features.append(self.encoder.layer4(self.features[-1]))
43 | 
44 |         return self.features
45 | 


--------------------------------------------------------------------------------
/mono/model/mono_fm/pose_decoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class PoseDecoder(nn.Module):
 6 |     def __init__(self, num_ch_enc, stride=1):
 7 |         super(PoseDecoder, self).__init__()
 8 | 
 9 |         self.reduce = nn.Conv2d(num_ch_enc[-1], 256, 1)
10 |         self.conv1 = nn.Conv2d(256, 256, 3, stride, 1)
11 |         self.conv2 = nn.Conv2d(256, 256, 3, stride, 1)
12 |         self.conv3 = nn.Conv2d(256, 6, 1)
13 | 
14 |         self.relu = nn.ReLU()
15 | 
16 |     def forward(self, input_features):
17 |         f = input_features[-1]
18 |         out = self.relu(self.reduce(f))
19 |         out = self.relu(self.conv1(out))
20 |         out = self.relu(self.conv2(out))
21 |         out = self.conv3(out)
22 |         out = out.mean(3).mean(2)
23 |         out = 0.01 * out.view(-1, 1, 1, 6)
24 |         axisangle = out[..., :3]
25 |         translation = out[..., 3:]
26 |         return axisangle, translation
27 | 


--------------------------------------------------------------------------------
/mono/model/mono_fm/pose_encoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import numpy as np
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from .resnet import ResNet, BasicBlock, resnet18, resnet34, resnet50, resnet101, Bottleneck
 8 | from torch.nn import BatchNorm2d as bn
 9 | 
10 | 
11 | class ResNetMultiImageInput(ResNet):
12 |     def __init__(self, block, layers, num_classes=1000, num_input_images=2):
13 |         super(ResNetMultiImageInput, self).__init__(block, layers)
14 |         self.inplanes = 64
15 |         self.conv1 = nn.Conv2d(num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
16 |         self.bn1 = bn(64)
17 |         self.relu = nn.ReLU(inplace=True)
18 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
19 |         self.layer1 = self._make_layer(block, 64, layers[0])
20 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
21 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
22 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
23 | 
24 |         for m in self.modules():
25 |             if isinstance(m, nn.Conv2d):
26 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
27 |             elif isinstance(m, nn.BatchNorm2d):
28 |                 nn.init.constant_(m.weight, 1)
29 |                 nn.init.constant_(m.bias, 0)
30 | 
31 | 
32 | def resnet_multiimage_input(num_layers, num_input_images=2, pretrained_path=None):
33 |     assert num_layers in [18, 34, 50, 101], "Can only run with 18, 34, 50, 101 layers resnet"
34 |     blocks = {18 : [2, 2, 2,  2],
35 |               34 : [3, 4, 6,  3],
36 |               50 : [3, 4, 6,  3],
37 |               101: [3, 4, 23, 3],
38 |               }[num_layers]
39 | 
40 |     if num_layers < 40:
41 |         model = ResNetMultiImageInput(BasicBlock, blocks, num_input_images=num_input_images)
42 |     elif num_layers > 40:
43 |         model = ResNetMultiImageInput(Bottleneck, blocks, num_input_images=num_input_images)
44 | 
45 |     if pretrained_path is not None:
46 |         loaded = torch.load(pretrained_path)
47 |         loaded['conv1.weight'] = torch.cat([loaded['conv1.weight']] * num_input_images, 1) / num_input_images
48 |         model.load_state_dict(loaded)
49 |     return model
50 | 
51 | 
52 | class PoseEncoder(nn.Module):
53 |     def __init__(self, num_layers, pretrained_path=None, num_input_images=2):
54 |         super(PoseEncoder, self).__init__()
55 | 
56 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
57 | 
58 |         resnets = {18: resnet18,
59 |                    34: resnet34,
60 |                    50: resnet50,
61 |                    101: resnet101,}
62 | 
63 |         if num_layers not in resnets:
64 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
65 | 
66 |         if num_input_images > 1:
67 |             self.encoder = resnet_multiimage_input(num_layers, num_input_images, pretrained_path)
68 |         else:
69 |             self.encoder = resnets[num_layers]()
70 |             if pretrained_path is not None:
71 |                 checkpoint = torch.load(pretrained_path)
72 |                 self.encoder.load_state_dict(checkpoint)
73 | 
74 |         if num_layers > 34:
75 |             self.num_ch_enc[1:] *= 4
76 | 
77 |         # for name, param in self.encoder.named_parameters():
78 |         #     if 'bn' in name:
79 |         #         param.requires_grad = False
80 | 
81 |     def forward(self, input_image):
82 |         self.features = []
83 |         x = (input_image - 0.45) / 0.225
84 |         x = self.encoder.conv1(x)
85 |         x = self.encoder.bn1(x)
86 |         self.features.append(self.encoder.relu(x))
87 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
88 |         self.features.append(self.encoder.layer2(self.features[-1]))
89 |         self.features.append(self.encoder.layer3(self.features[-1]))
90 |         self.features.append(self.encoder.layer4(self.features[-1]))
91 | 
92 |         return self.features
93 | 


--------------------------------------------------------------------------------
/mono/model/mono_fm/resnet.py:
--------------------------------------------------------------------------------
  1 | import os.path as osp
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.nn import BatchNorm2d as bn
  5 | 
  6 | def conv3x3(in_planes, out_planes, stride=1):
  7 |     """3x3 convolution with padding"""
  8 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
  9 | 
 10 | 
 11 | def conv1x1(in_planes, out_planes, stride=1):
 12 |     """1x1 convolution"""
 13 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 14 | 
 15 | 
 16 | class BasicBlock(nn.Module):
 17 |     expansion = 1
 18 | 
 19 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 20 |         super(BasicBlock, self).__init__()
 21 |         self.conv1 = conv3x3(inplanes, planes, stride)
 22 |         self.bn1 = bn(planes)
 23 |         self.relu = nn.ReLU(inplace=True)
 24 |         self.conv2 = conv3x3(planes, planes)
 25 |         self.bn2 = bn(planes)
 26 |         self.downsample = downsample
 27 |         self.stride = stride
 28 | 
 29 |     def forward(self, x):
 30 |         residual = x
 31 | 
 32 |         out = self.conv1(x)
 33 |         out = self.bn1(out)
 34 |         out = self.relu(out)
 35 | 
 36 |         out = self.conv2(out)
 37 |         out = self.bn2(out)
 38 | 
 39 |         if self.downsample is not None:
 40 |             residual = self.downsample(x)
 41 | 
 42 |         out += residual
 43 |         out = self.relu(out)
 44 | 
 45 |         return out
 46 | 
 47 | 
 48 | class Bottleneck(nn.Module):
 49 |     expansion = 4
 50 | 
 51 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 52 |         super(Bottleneck, self).__init__()
 53 |         self.conv1 = conv1x1(inplanes, planes)
 54 |         self.bn1 = bn(planes)
 55 |         self.conv2 = conv3x3(planes, planes, stride)
 56 |         self.bn2 = bn(planes)
 57 |         self.conv3 = conv1x1(planes, planes * self.expansion)
 58 |         self.bn3 = bn(planes * self.expansion)
 59 |         self.relu = nn.ReLU(inplace=True)
 60 |         self.downsample = downsample
 61 |         self.stride = stride
 62 | 
 63 |     def forward(self, x):
 64 |         residual = x
 65 | 
 66 |         out = self.conv1(x)
 67 |         out = self.bn1(out)
 68 |         out = self.relu(out)
 69 | 
 70 |         out = self.conv2(out)
 71 |         out = self.bn2(out)
 72 |         out = self.relu(out)
 73 | 
 74 |         out = self.conv3(out)
 75 |         out = self.bn3(out)
 76 | 
 77 |         if self.downsample is not None:
 78 |             residual = self.downsample(x)
 79 | 
 80 |         out += residual
 81 |         out = self.relu(out)
 82 | 
 83 |         return out
 84 | 
 85 | 
 86 | class ResNet(nn.Module):
 87 | 
 88 |     def __init__(self, block, layers, num_classes=1000):
 89 |         super(ResNet, self).__init__()
 90 |         self.inplanes = 64
 91 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
 92 |         self.bn1 = bn(64)
 93 |         self.relu = nn.ReLU(inplace=True)
 94 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 95 |         self.layer1 = self._make_layer(block, 64, layers[0])
 96 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 97 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 98 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 99 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
100 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
101 | 
102 |         for m in self.modules():
103 |             if isinstance(m, nn.Conv2d):
104 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
105 |             elif isinstance(m, bn):
106 |                 nn.init.constant_(m.weight, 1)
107 |                 nn.init.constant_(m.bias, 0)
108 | 
109 |     def _make_layer(self, block, planes, blocks, stride=1):
110 |         downsample = None
111 |         if stride != 1 or self.inplanes != planes * block.expansion:
112 |             downsample = nn.Sequential(
113 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
114 |                 bn(planes * block.expansion),
115 |             )
116 | 
117 |         layers = []
118 |         layers.append(block(self.inplanes, planes, stride, downsample))
119 |         self.inplanes = planes * block.expansion
120 |         for _ in range(1, blocks):
121 |             layers.append(block(self.inplanes, planes))
122 | 
123 |         return nn.Sequential(*layers)
124 | 
125 |     def forward(self, x):
126 |         x = self.conv1(x)
127 |         x = self.bn1(x)
128 |         x = self.relu(x)
129 |         x = self.maxpool(x)
130 | 
131 |         x = self.layer1(x)
132 |         x = self.layer2(x)
133 |         x = self.layer3(x)
134 |         x = self.layer4(x)
135 | 
136 |         return x
137 | 
138 | 
139 | def resnet18(pretrained_path=None):
140 |     """Constructs a ResNet-18 model.
141 |     Args:
142 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
143 |     """
144 |     model = ResNet(BasicBlock, [2, 2, 2, 2])
145 |     if pretrained_path is not None:
146 |         model.load_state_dict(torch.load(pretrained_path))
147 |         print('Loaded pre-trained weights')
148 |     return model
149 | 
150 | 
151 | def resnet34(pretrained_path=None, **kwargs):
152 |     """Constructs a ResNet-34 model.
153 |     Args:
154 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
155 |     """
156 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
157 |     if pretrained_path is not None:
158 |         model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet34.pth')))
159 |         print('Loaded pre-trained weights')
160 |     return model
161 | 
162 | 
163 | def resnet50(pretrained_path=None, **kwargs):
164 |     """Constructs a ResNet-50 model.
165 |     Args:
166 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
167 |     """
168 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
169 |     if pretrained_path is not None:
170 |         model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet50.pth')))
171 |         print('Loaded pre-trained weights')
172 |     return model
173 | 
174 | 
175 | def resnet101(pretrained_path=None, **kwargs):
176 |     """Constructs a ResNet-101 model.
177 |     Args:
178 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
179 |     """
180 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
181 |     if pretrained_path is not None:
182 |         model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet101.pth')))
183 |         print('Loaded pre-trained weights')
184 |     return model
185 | 


--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/model/mono_fm_joint/__init__.py


--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/decoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import torch.nn as nn
 3 | from .layers import ConvBlock, Conv3x3, upsample
 4 | 
 5 | 
 6 | class Decoder(nn.Module):
 7 |     def __init__(self, num_ch_enc, num_output_channels=3):
 8 |         super(Decoder, self).__init__()
 9 | 
10 |         num_ch_dec = [16, 32, 64, 128, 256]
11 | 
12 |         # upconv
13 |         self.upconv5 = ConvBlock(num_ch_enc[4], num_ch_dec[4])
14 |         self.upconv4 = ConvBlock(num_ch_dec[4], num_ch_dec[3])
15 |         self.upconv3 = ConvBlock(num_ch_dec[3], num_ch_dec[2])
16 |         self.upconv2 = ConvBlock(num_ch_dec[2], num_ch_dec[1])
17 |         self.upconv1 = ConvBlock(num_ch_dec[1], num_ch_dec[0])
18 | 
19 |         # iconv
20 |         self.iconv5 = ConvBlock(num_ch_dec[4], num_ch_dec[4])
21 |         self.iconv4 = ConvBlock(num_ch_dec[3], num_ch_dec[3])
22 |         self.iconv3 = ConvBlock(num_ch_dec[2], num_ch_dec[2])
23 |         self.iconv2 = ConvBlock(num_ch_dec[1], num_ch_dec[1])
24 |         self.iconv1 = ConvBlock(num_ch_dec[0], num_ch_dec[0])
25 | 
26 |         # disp
27 |         self.disp4 = Conv3x3(num_ch_dec[3], num_output_channels)
28 |         self.disp3 = Conv3x3(num_ch_dec[2], num_output_channels)
29 |         self.disp2 = Conv3x3(num_ch_dec[1], num_output_channels)
30 |         self.disp1 = Conv3x3(num_ch_dec[0], num_output_channels)
31 | 
32 |         self.sigmoid = nn.Sigmoid()
33 | 
34 | 
35 |     def forward(self, input_features, frame_id=0):
36 |         self.outputs = {}
37 |         _, _, _, _, econv5 = input_features
38 |         # (64,64,128,256,512)*4
39 | 
40 |         upconv5 = upsample(self.upconv5(econv5))
41 |         iconv5 = self.iconv5(upconv5)
42 | 
43 |         upconv4 = upsample(self.upconv4(iconv5))
44 |         iconv4 = self.iconv4(upconv4)
45 | 
46 |         upconv3 = upsample(self.upconv3(iconv4))
47 |         iconv3 = self.iconv3(upconv3)
48 | 
49 |         upconv2 = upsample(self.upconv2(iconv3))
50 |         iconv2 = self.iconv2(upconv2)
51 | 
52 |         upconv1 = upsample(self.upconv1(iconv2))
53 |         iconv1 = self.iconv1(upconv1)
54 | 
55 |         self.outputs[("res_img", frame_id, 3)] = self.sigmoid(self.disp4(iconv4))
56 |         self.outputs[("res_img", frame_id, 2)] = self.sigmoid(self.disp3(iconv3))
57 |         self.outputs[("res_img", frame_id, 1)] = self.sigmoid(self.disp2(iconv2))
58 |         self.outputs[("res_img", frame_id, 0)] = self.sigmoid(self.disp1(iconv1))
59 |         return self.outputs


--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/depth_decoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from .layers import Conv1x1, Conv3x3, CRPBlock, upsample
 5 | 
 6 | 
 7 | class DepthDecoder(nn.Module):
 8 |     def __init__(self,  num_ch_enc):
 9 |         super(DepthDecoder, self).__init__()
10 | 
11 |         bottleneck = 256
12 |         stage = 4
13 |         self.do = nn.Dropout(p=0.5)
14 | 
15 |         self.reduce4 = Conv1x1(num_ch_enc[4], 512, bias=False)
16 |         self.reduce3 = Conv1x1(num_ch_enc[3], bottleneck, bias=False)
17 |         self.reduce2 = Conv1x1(num_ch_enc[2], bottleneck, bias=False)
18 |         self.reduce1 = Conv1x1(num_ch_enc[1], bottleneck, bias=False)
19 | 
20 |         self.iconv4 = Conv3x3(512, bottleneck)
21 |         self.iconv3 = Conv3x3(bottleneck*2+1, bottleneck)
22 |         self.iconv2 = Conv3x3(bottleneck*2+1, bottleneck)
23 |         self.iconv1 = Conv3x3(bottleneck*2+1, bottleneck)
24 | 
25 |         self.crp4 = self._make_crp(bottleneck, bottleneck, stage)
26 |         self.crp3 = self._make_crp(bottleneck, bottleneck, stage)
27 |         self.crp2 = self._make_crp(bottleneck, bottleneck, stage)
28 |         self.crp1 = self._make_crp(bottleneck, bottleneck, stage)
29 | 
30 |         self.merge4 = Conv3x3(bottleneck, bottleneck)
31 |         self.merge3 = Conv3x3(bottleneck, bottleneck)
32 |         self.merge2 = Conv3x3(bottleneck, bottleneck)
33 |         self.merge1 = Conv3x3(bottleneck, bottleneck)
34 | 
35 |         # disp
36 |         self.disp4 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
37 |         self.disp3 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
38 |         self.disp2 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
39 |         self.disp1 = nn.Sequential(Conv3x3(bottleneck, 1), nn.Sigmoid())
40 | 
41 |     def _make_crp(self, in_planes, out_planes, stages):
42 |         layers = [CRPBlock(in_planes, out_planes,stages)]
43 |         return nn.Sequential(*layers)
44 | 
45 |     def forward(self, input_features, frame_id=0):
46 |         self.outputs = {}
47 |         l0, l1, l2, l3, l4 = input_features
48 | 
49 |         l4 = self.do(l4)
50 |         l3 = self.do(l3)
51 | 
52 |         x4 = self.reduce4(l4)
53 |         x4 = self.iconv4(x4)
54 |         x4 = F.leaky_relu(x4)
55 |         x4 = self.crp4(x4)
56 |         x4 = self.merge4(x4)
57 |         x4 = F.leaky_relu(x4)
58 |         x4 = upsample(x4)
59 |         disp4 = self.disp4(x4)
60 | 
61 | 
62 |         x3 = self.reduce3(l3)
63 |         x3 = torch.cat((x3, x4, disp4), 1)
64 |         x3 = self.iconv3(x3)
65 |         x3 = F.leaky_relu(x3)
66 |         x3 = self.crp3(x3)
67 |         x3 = self.merge3(x3)
68 |         x3 = F.leaky_relu(x3)
69 |         x3 = upsample(x3)
70 |         disp3 = self.disp3(x3)
71 | 
72 | 
73 |         x2 = self.reduce2(l2)
74 |         x2 = torch.cat((x2, x3 , disp3), 1)
75 |         x2 = self.iconv2(x2)
76 |         x2 = F.leaky_relu(x2)
77 |         x2 = self.crp2(x2)
78 |         x2 = self.merge2(x2)
79 |         x2 = F.leaky_relu(x2)
80 |         x2 = upsample(x2)
81 |         disp2 = self.disp2(x2)
82 | 
83 |         x1 = self.reduce1(l1)
84 |         x1 = torch.cat((x1, x2, disp2), 1)
85 |         x1 = self.iconv1(x1)
86 |         x1 = F.leaky_relu(x1)
87 |         x1 = self.crp1(x1)
88 |         x1 = self.merge1(x1)
89 |         x1 = F.leaky_relu(x1)
90 |         x1 = upsample(x1)
91 |         disp1 = self.disp1(x1)
92 | 
93 |         self.outputs[("disp", frame_id, 3)] = disp4
94 |         self.outputs[("disp", frame_id, 2)] = disp3
95 |         self.outputs[("disp", frame_id, 1)] = disp2
96 |         self.outputs[("disp", frame_id, 0)] = disp1
97 | 
98 |         return self.outputs
99 | 


--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/depth_encoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | from .resnet import resnet18, resnet34, resnet50, resnet101
 6 | 
 7 | 
 8 | class DepthEncoder(nn.Module):
 9 |     def __init__(self, num_layers, pretrained_path=None):
10 |         super(DepthEncoder, self).__init__()
11 | 
12 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
13 | 
14 |         resnets = {18: resnet18,
15 |                    34: resnet34,
16 |                    50: resnet50,
17 |                    101: resnet101,}
18 | 
19 |         if num_layers not in resnets:
20 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
21 | 
22 | 
23 |         self.encoder = resnets[num_layers]()
24 |         if pretrained_path is not None:
25 |             checkpoint = torch.load(pretrained_path)
26 |             self.encoder.load_state_dict(checkpoint)
27 | 
28 |         if num_layers > 34:
29 |             self.num_ch_enc[1:] *= 4
30 | 
31 |         # for name, param in self.encoder.named_parameters():
32 |         #     if 'bn' in name:
33 |         #         param.requires_grad = False
34 | 
35 |     def forward(self, input_image):
36 |         self.features = []
37 |         x = (input_image - 0.45) / 0.225
38 |         self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(x))))
39 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
40 |         self.features.append(self.encoder.layer2(self.features[-1]))
41 |         self.features.append(self.encoder.layer3(self.features[-1]))
42 |         self.features.append(self.encoder.layer4(self.features[-1]))
43 | 
44 |         return self.features
45 | 


--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/encoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | from .resnet import resnet18, resnet34, resnet50, resnet101
 6 | 
 7 | 
 8 | class Encoder(nn.Module):
 9 |     def __init__(self, num_layers, pretrained_path=None):
10 |         super(Encoder, self).__init__()
11 | 
12 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
13 | 
14 |         resnets = {18: resnet18,
15 |                    34: resnet34,
16 |                    50: resnet50,
17 |                    101: resnet101,}
18 | 
19 |         if num_layers not in resnets:
20 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
21 | 
22 | 
23 |         self.encoder = resnets[num_layers]()
24 |         if pretrained_path is not None:
25 |             checkpoint = torch.load(pretrained_path)
26 |             self.encoder.load_state_dict(checkpoint)
27 | 
28 |         if num_layers > 34:
29 |             self.num_ch_enc[1:] *= 4
30 | 
31 |         # for name, param in self.encoder.named_parameters():
32 |         #     if 'bn' in name:
33 |         #         param.requires_grad = False
34 | 
35 |     def forward(self, input_image):
36 |         self.features = []
37 |         self.features.append(self.encoder.relu(self.encoder.bn1(self.encoder.conv1(input_image))))
38 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
39 |         self.features.append(self.encoder.layer2(self.features[-1]))
40 |         self.features.append(self.encoder.layer3(self.features[-1]))
41 |         self.features.append(self.encoder.layer4(self.features[-1]))
42 | 
43 |         return self.features
44 | 


--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/pose_decoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class PoseDecoder(nn.Module):
 6 |     def __init__(self, num_ch_enc, stride=1):
 7 |         super(PoseDecoder, self).__init__()
 8 | 
 9 |         self.reduce = nn.Conv2d(num_ch_enc[-1], 256, 1)
10 |         self.conv1 = nn.Conv2d(256, 256, 3, stride, 1)
11 |         self.conv2 = nn.Conv2d(256, 256, 3, stride, 1)
12 |         self.conv3 = nn.Conv2d(256, 6, 1)
13 | 
14 |         self.relu = nn.ReLU()
15 | 
16 |     def forward(self, input_features):
17 |         f = input_features[-1]
18 |         out = self.relu(self.reduce(f))
19 |         out = self.relu(self.conv1(out))
20 |         out = self.relu(self.conv2(out))
21 |         out = self.conv3(out)
22 |         out = out.mean(3).mean(2)
23 |         out = 0.01 * out.view(-1, 1, 1, 6)
24 |         axisangle = out[..., :3]
25 |         translation = out[..., 3:]
26 |         return axisangle, translation
27 | 


--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/pose_encoder.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | import numpy as np
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from .resnet import ResNet, BasicBlock, resnet18, resnet34, resnet50, resnet101, Bottleneck
 8 | from torch.nn import BatchNorm2d as bn
 9 | 
10 | 
11 | class ResNetMultiImageInput(ResNet):
12 |     def __init__(self, block, layers, num_classes=1000, num_input_images=2):
13 |         super(ResNetMultiImageInput, self).__init__(block, layers)
14 |         self.inplanes = 64
15 |         self.conv1 = nn.Conv2d(num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
16 |         self.bn1 = bn(64)
17 |         self.relu = nn.ReLU(inplace=True)
18 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
19 |         self.layer1 = self._make_layer(block, 64, layers[0])
20 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
21 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
22 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
23 | 
24 |         for m in self.modules():
25 |             if isinstance(m, nn.Conv2d):
26 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
27 |             elif isinstance(m, nn.BatchNorm2d):
28 |                 nn.init.constant_(m.weight, 1)
29 |                 nn.init.constant_(m.bias, 0)
30 | 
31 | 
32 | def resnet_multiimage_input(num_layers, num_input_images=2, pretrained_path=None):
33 |     assert num_layers in [18, 34, 50, 101], "Can only run with 18, 34, 50, 101 layers resnet"
34 |     blocks = {18 : [2, 2, 2,  2],
35 |               34 : [3, 4, 6,  3],
36 |               50 : [3, 4, 6,  3],
37 |               101: [3, 4, 23, 3],
38 |               }[num_layers]
39 | 
40 |     if num_layers < 40:
41 |         model = ResNetMultiImageInput(BasicBlock, blocks, num_input_images=num_input_images)
42 |     elif num_layers > 40:
43 |         model = ResNetMultiImageInput(Bottleneck, blocks, num_input_images=num_input_images)
44 | 
45 |     if pretrained_path is not None:
46 |         loaded = torch.load(pretrained_path)
47 |         loaded['conv1.weight'] = torch.cat([loaded['conv1.weight']] * num_input_images, 1) / num_input_images
48 |         model.load_state_dict(loaded)
49 |     return model
50 | 
51 | 
52 | class PoseEncoder(nn.Module):
53 |     def __init__(self, num_layers, pretrained_path=None, num_input_images=2):
54 |         super(PoseEncoder, self).__init__()
55 | 
56 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
57 | 
58 |         resnets = {18: resnet18,
59 |                    34: resnet34,
60 |                    50: resnet50,
61 |                    101: resnet101,}
62 | 
63 |         if num_layers not in resnets:
64 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
65 | 
66 |         if num_input_images > 1:
67 |             self.encoder = resnet_multiimage_input(num_layers, num_input_images, pretrained_path)
68 |         else:
69 |             self.encoder = resnets[num_layers]()
70 |             if pretrained_path is not None:
71 |                 checkpoint = torch.load(pretrained_path)
72 |                 self.encoder.load_state_dict(checkpoint)
73 | 
74 |         if num_layers > 34:
75 |             self.num_ch_enc[1:] *= 4
76 | 
77 |         # for name, param in self.encoder.named_parameters():
78 |         #     if 'bn' in name:
79 |         #         param.requires_grad = False
80 | 
81 |     def forward(self, input_image):
82 |         self.features = []
83 |         x = (input_image - 0.45) / 0.225
84 |         x = self.encoder.conv1(x)
85 |         x = self.encoder.bn1(x)
86 |         self.features.append(self.encoder.relu(x))
87 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
88 |         self.features.append(self.encoder.layer2(self.features[-1]))
89 |         self.features.append(self.encoder.layer3(self.features[-1]))
90 |         self.features.append(self.encoder.layer4(self.features[-1]))
91 | 
92 |         return self.features
93 | 


--------------------------------------------------------------------------------
/mono/model/mono_fm_joint/resnet.py:
--------------------------------------------------------------------------------
  1 | import os.path as osp
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.nn import BatchNorm2d as bn
  5 | 
  6 | def conv3x3(in_planes, out_planes, stride=1):
  7 |     """3x3 convolution with padding"""
  8 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
  9 | 
 10 | 
 11 | def conv1x1(in_planes, out_planes, stride=1):
 12 |     """1x1 convolution"""
 13 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 14 | 
 15 | 
 16 | class BasicBlock(nn.Module):
 17 |     expansion = 1
 18 | 
 19 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 20 |         super(BasicBlock, self).__init__()
 21 |         self.conv1 = conv3x3(inplanes, planes, stride)
 22 |         self.bn1 = bn(planes)
 23 |         self.relu = nn.ReLU(inplace=True)
 24 |         self.conv2 = conv3x3(planes, planes)
 25 |         self.bn2 = bn(planes)
 26 |         self.downsample = downsample
 27 |         self.stride = stride
 28 | 
 29 |     def forward(self, x):
 30 |         residual = x
 31 | 
 32 |         out = self.conv1(x)
 33 |         out = self.bn1(out)
 34 |         out = self.relu(out)
 35 | 
 36 |         out = self.conv2(out)
 37 |         out = self.bn2(out)
 38 | 
 39 |         if self.downsample is not None:
 40 |             residual = self.downsample(x)
 41 | 
 42 |         out += residual
 43 |         out = self.relu(out)
 44 | 
 45 |         return out
 46 | 
 47 | 
 48 | class Bottleneck(nn.Module):
 49 |     expansion = 4
 50 | 
 51 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 52 |         super(Bottleneck, self).__init__()
 53 |         self.conv1 = conv1x1(inplanes, planes)
 54 |         self.bn1 = bn(planes)
 55 |         self.conv2 = conv3x3(planes, planes, stride)
 56 |         self.bn2 = bn(planes)
 57 |         self.conv3 = conv1x1(planes, planes * self.expansion)
 58 |         self.bn3 = bn(planes * self.expansion)
 59 |         self.relu = nn.ReLU(inplace=True)
 60 |         self.downsample = downsample
 61 |         self.stride = stride
 62 | 
 63 |     def forward(self, x):
 64 |         residual = x
 65 | 
 66 |         out = self.conv1(x)
 67 |         out = self.bn1(out)
 68 |         out = self.relu(out)
 69 | 
 70 |         out = self.conv2(out)
 71 |         out = self.bn2(out)
 72 |         out = self.relu(out)
 73 | 
 74 |         out = self.conv3(out)
 75 |         out = self.bn3(out)
 76 | 
 77 |         if self.downsample is not None:
 78 |             residual = self.downsample(x)
 79 | 
 80 |         out += residual
 81 |         out = self.relu(out)
 82 | 
 83 |         return out
 84 | 
 85 | 
 86 | class ResNet(nn.Module):
 87 | 
 88 |     def __init__(self, block, layers, num_classes=1000):
 89 |         super(ResNet, self).__init__()
 90 |         self.inplanes = 64
 91 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
 92 |         self.bn1 = bn(64)
 93 |         self.relu = nn.ReLU(inplace=True)
 94 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 95 |         self.layer1 = self._make_layer(block, 64, layers[0])
 96 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 97 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 98 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 99 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
100 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
101 | 
102 |         for m in self.modules():
103 |             if isinstance(m, nn.Conv2d):
104 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
105 |             elif isinstance(m, bn):
106 |                 nn.init.constant_(m.weight, 1)
107 |                 nn.init.constant_(m.bias, 0)
108 | 
109 |     def _make_layer(self, block, planes, blocks, stride=1):
110 |         downsample = None
111 |         if stride != 1 or self.inplanes != planes * block.expansion:
112 |             downsample = nn.Sequential(
113 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
114 |                 bn(planes * block.expansion),
115 |             )
116 | 
117 |         layers = []
118 |         layers.append(block(self.inplanes, planes, stride, downsample))
119 |         self.inplanes = planes * block.expansion
120 |         for _ in range(1, blocks):
121 |             layers.append(block(self.inplanes, planes))
122 | 
123 |         return nn.Sequential(*layers)
124 | 
125 |     def forward(self, x):
126 |         x = self.conv1(x)
127 |         x = self.bn1(x)
128 |         x = self.relu(x)
129 |         x = self.maxpool(x)
130 | 
131 |         x = self.layer1(x)
132 |         x = self.layer2(x)
133 |         x = self.layer3(x)
134 |         x = self.layer4(x)
135 | 
136 |         return x
137 | 
138 | 
139 | def resnet18(pretrained_path=None):
140 |     """Constructs a ResNet-18 model.
141 |     Args:
142 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
143 |     """
144 |     model = ResNet(BasicBlock, [2, 2, 2, 2])
145 |     if pretrained_path is not None:
146 |         model.load_state_dict(torch.load(pretrained_path))
147 |         print('Loaded pre-trained weights')
148 |     return model
149 | 
150 | 
151 | def resnet34(pretrained_path=None, **kwargs):
152 |     """Constructs a ResNet-34 model.
153 |     Args:
154 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
155 |     """
156 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
157 |     if pretrained_path is not None:
158 |         model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet34.pth')))
159 |         print('Loaded pre-trained weights')
160 |     return model
161 | 
162 | 
163 | def resnet50(pretrained_path=None, **kwargs):
164 |     """Constructs a ResNet-50 model.
165 |     Args:
166 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
167 |     """
168 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
169 |     if pretrained_path is not None:
170 |         model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet50.pth')))
171 |         print('Loaded pre-trained weights')
172 |     return model
173 | 
174 | 
175 | def resnet101(pretrained_path=None, **kwargs):
176 |     """Constructs a ResNet-101 model.
177 |     Args:
178 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
179 |     """
180 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
181 |     if pretrained_path is not None:
182 |         model.load_state_dict(torch.load(osp.join(pretrained_path, 'resnet101.pth')))
183 |         print('Loaded pre-trained weights')
184 |     return model
185 | 


--------------------------------------------------------------------------------
/mono/model/registry.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python 
 2 | # -*- coding:utf-8 -*-
 3 | # Author: Duanzhixiang(zhixiangduan@deepmotion.ai)
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | class Registry(object):
 9 |     def __init__(self, name):
10 |         self._name = name
11 |         self._module_dict = dict()
12 | 
13 |     @property
14 |     def name(self):
15 |         return self._name
16 | 
17 |     @property
18 |     def module_dict(self):
19 |         return self._module_dict
20 | 
21 |     def _register_module(self, module_class):
22 |         """Register a module.
23 | 
24 |         Args:
25 |             module (:obj:`nn.Module`): Module to be registered.
26 |         """
27 |         if not issubclass(module_class, nn.Module):
28 |             raise TypeError(
29 |                 'module must be a child of nn.Module, but got {}'.format(
30 |                     module_class))
31 |         module_name = module_class.__name__
32 |         if module_name in self._module_dict:
33 |             raise KeyError('{} is already registered in {}'.format(
34 |                 module_name, self.name))
35 |         self._module_dict[module_name] = module_class
36 | 
37 |     def register_module(self, cls):
38 |         self._register_module(cls)
39 |         return cls
40 | 
41 | MONO = Registry('mono')
42 | 


--------------------------------------------------------------------------------
/mono/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/mono/tools/__init__.py


--------------------------------------------------------------------------------
/mono/tools/geometry.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Provides generic geometry algorithms.
 3 | author: Michael Grupp
 4 | This file is part of evo (github.com/MichaelGrupp/evo).
 5 | evo is free software: you can redistribute it and/or modify
 6 | it under the terms of the GNU General Public License as published by
 7 | the Free Software Foundation, either version 3 of the License, or
 8 | (at your option) any later version.
 9 | evo is distributed in the hope that it will be useful,
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | GNU General Public License for more details.
13 | You should have received a copy of the GNU General Public License
14 | along with evo.  If not, see <http://www.gnu.org/licenses/>.
15 | """
16 | 
17 | import numpy as np
18 | 
19 | 
20 | def umeyama_alignment(x, y, with_scale=False):
21 |     """
22 |     Computes the least squares solution parameters of an Sim(m) matrix
23 |     that minimizes the distance between a set of registered points.
24 |     Umeyama, Shinji: Least-squares estimation of transformation parameters
25 |                      between two point patterns. IEEE PAMI, 1991
26 |     :param x: mxn matrix of points, m = dimension, n = nr. of data points
27 |     :param y: mxn matrix of points, m = dimension, n = nr. of data points
28 |     :param with_scale: set to True to align also the scale (default: 1.0 scale)
29 |     :return: r, t, c - rotation matrix, translation vector and scale factor
30 |     """
31 |     if x.shape != y.shape:
32 |         print("data matrices must have the same shape")
33 | 
34 |     # m = dimension, n = nr. of data points
35 |     m, n = x.shape
36 | 
37 |     # means, eq. 34 and 35
38 |     mean_x = x.mean(axis=1)
39 |     mean_y = y.mean(axis=1)
40 | 
41 |     # variance, eq. 36
42 |     # "transpose" for column subtraction
43 |     sigma_x = 1.0 / n * (np.linalg.norm(x - mean_x[:, np.newaxis])**2)
44 | 
45 |     # covariance matrix, eq. 38
46 |     outer_sum = np.zeros((m, m))
47 |     for i in range(n):
48 |         outer_sum += np.outer((y[:, i] - mean_y), (x[:, i] - mean_x))
49 |     cov_xy = np.multiply(1.0 / n, outer_sum)
50 | 
51 |     # SVD (text betw. eq. 38 and 39)
52 |     u, d, v = np.linalg.svd(cov_xy)
53 | 
54 |     # S matrix, eq. 43
55 |     s = np.eye(m)
56 |     if np.linalg.det(u) * np.linalg.det(v) < 0.0:
57 |         # Ensure a RHS coordinate system (Kabsch algorithm).
58 |         s[m - 1, m - 1] = -1
59 | 
60 |     # rotation, eq. 40
61 |     r = u.dot(s).dot(v)
62 | 
63 |     # scale & translation, eq. 42 and 41
64 |     c = 1 / sigma_x * np.trace(np.diag(d).dot(s)) if with_scale else 1.0
65 |     t = mean_y - np.multiply(c, r.dot(mean_x))
66 | 
67 |     return r, t, c
68 | 
69 | 
70 | def arc_len(x):
71 |     """
72 |     :param x: nxm array of points, m=dimension
73 |     :return: the (discrete approximated) arc-length of the point sequence
74 |     """
75 |     return np.sum(np.linalg.norm(x[:-1] - x[1:], axis=1))
76 | 
77 | 
78 | def accumulated_distances(x):
79 |     """
80 |     :param x: nxm array of points, m=dimension
81 |     :return: the accumulated distances along the point sequence
82 |     """
83 |     return np.concatenate((np.array([0]),
84 |                            np.cumsum(np.linalg.norm(x[:-1] - x[1:], axis=1))))


--------------------------------------------------------------------------------
/mono/tools/lie_algebra.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: UTF8 -*-
  2 | """
  3 | Provides functions for Lie group calculations.
  4 | author: Michael Grupp
  5 | This file is part of evo (github.com/MichaelGrupp/evo).
  6 | evo is free software: you can redistribute it and/or modify
  7 | it under the terms of the GNU General Public License as published by
  8 | the Free Software Foundation, either version 3 of the License, or
  9 | (at your option) any later version.
 10 | evo is distributed in the hope that it will be useful,
 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | GNU General Public License for more details.
 14 | You should have received a copy of the GNU General Public License
 15 | along with evo.  If not, see <http://www.gnu.org/licenses/>.
 16 | """
 17 | 
 18 | import numpy as np
 19 | import scipy.linalg as sl
 20 | 
 21 | import mono.tools.transformations as tr
 22 | 
 23 | 
 24 | def hat(v):
 25 |     """
 26 |     :param v: 3x1 vector
 27 |     :return: 3x3 skew symmetric matrix
 28 |     """
 29 |     # yapf: disable
 30 |     return np.array([[0.0, -v[2], v[1]],
 31 |                      [v[2], 0.0, -v[0]],
 32 |                      [-v[1], v[0], 0.0]])
 33 |     # yapf: enable
 34 | 
 35 | 
 36 | def vee(m):
 37 |     """
 38 |     :param m: 3x3 skew symmetric matrix
 39 |     :return: 3x1 vector
 40 |     """
 41 |     return np.array([-m[1, 2], m[0, 2], -m[0, 1]])
 42 | 
 43 | 
 44 | def so3_exp(axis, angle):
 45 |     """
 46 |     Computes an SO(3) matrix from an axis/angle representation.
 47 |     Code source: http://stackoverflow.com/a/25709323
 48 |     :param axis: 3x1 rotation axis (unit vector!)
 49 |     :param angle: radians
 50 |     :return: SO(3) rotation matrix (matrix exponential of so(3))
 51 |     """
 52 |     return sl.expm(np.cross(np.eye(3), axis / np.linalg.norm(axis) * angle))
 53 | 
 54 | 
 55 | def so3_log(r, return_angle_only=True, return_skew=False):
 56 |     """
 57 |     :param r: SO(3) rotation matrix
 58 |     :param return_angle_only: return only the angle (default)
 59 |     :param return_skew: return skew symmetric Lie algebra element
 60 |     :return: axis/angle
 61 |         or if skew:
 62 |              3x3 skew symmetric logarithmic map in so(3) (Ma, Soatto eq. 2.8)
 63 |     """
 64 |     if not is_so3(r):
 65 |         print("matrix is not a valid SO(3) group element")
 66 |     if return_angle_only and not return_skew:
 67 |         return np.arccos(min(1, max(-1, (np.trace(r) - 1) / 2)))
 68 |     angle, axis, _ = tr.rotation_from_matrix(se3(r, [0, 0, 0]))
 69 |     if return_skew:
 70 |         return hat(axis * angle)
 71 |     else:
 72 |         return axis, angle
 73 | 
 74 | 
 75 | def se3(r=np.eye(3), t=np.array([0, 0, 0])):
 76 |     """
 77 |     :param r: SO(3) rotation matrix
 78 |     :param t: 3x1 translation vector
 79 |     :return: SE(3) transformation matrix
 80 |     """
 81 |     se3 = np.eye(4)
 82 |     se3[:3, :3] = r
 83 |     se3[:3, 3] = t
 84 |     return se3
 85 | 
 86 | 
 87 | def sim3(r, t, s):
 88 |     """
 89 |     :param r: SO(3) rotation matrix
 90 |     :param t: 3x1 translation vector
 91 |     :param s: positive, non-zero scale factor
 92 |     :return: Sim(3) similarity transformation matrix
 93 |     """
 94 |     sim3 = np.eye(4)
 95 |     sim3[:3, :3] = s * r
 96 |     sim3[:3, 3] = t
 97 |     return sim3
 98 | 
 99 | 
100 | def so3_from_se3(p):
101 |     """
102 |     :param p: absolute SE(3) pose
103 |     :return: the SO(3) rotation matrix in p
104 |     """
105 |     return p[:3, :3]
106 | 
107 | 
108 | def se3_inverse(p):
109 |     """
110 |     :param p: absolute SE(3) pose
111 |     :return: the inverted pose
112 |     """
113 |     r_inv = p[:3, :3].transpose()
114 |     t_inv = -r_inv.dot(p[:3, 3])
115 |     return se3(r_inv, t_inv)
116 | 
117 | 
118 | def is_so3(r):
119 |     """
120 |     :param r: a 3x3 matrix
121 |     :return: True if r is in the SO(3) group
122 |     """
123 |     # Check the determinant.
124 |     det_valid = np.isclose(np.linalg.det(r), [1.0], atol=1e-6)
125 |     # Check if the transpose is the inverse.
126 |     inv_valid = np.allclose(r.transpose().dot(r), np.eye(3), atol=1e-6)
127 |     return det_valid and inv_valid
128 | 
129 | 
130 | def is_se3(p):
131 |     """
132 |     :param p: a 4x4 matrix
133 |     :return: True if p is in the SE(3) group
134 |     """
135 |     rot_valid = is_so3(p[:3, :3])
136 |     lower_valid = np.equal(p[3, :], np.array([0.0, 0.0, 0.0, 1.0])).all()
137 |     return rot_valid and lower_valid
138 | 
139 | 
140 | def is_sim3(p, s):
141 |     """
142 |     :param p: a 4x4 matrix
143 |     :param s: expected scale factor
144 |     :return: True if p is in the Sim(3) group with scale s
145 |     """
146 |     rot = p[:3, :3]
147 |     rot_unscaled = np.multiply(rot, 1.0 / s)
148 |     rot_valid = is_so3(rot_unscaled)
149 |     lower_valid = np.equal(p[3, :], np.array([0.0, 0.0, 0.0, 1.0])).all()
150 |     return rot_valid and lower_valid
151 | 
152 | 
153 | def relative_so3(r1, r2):
154 |     """
155 |     :param r1, r2: SO(3) matrices
156 |     :return: the relative rotation r1^{⁻1} * r2
157 |     """
158 |     return np.dot(r1.transpose(), r2)
159 | 
160 | 
161 | def relative_se3(p1, p2):
162 |     """
163 |     :param p1, p2: SE(3) matrices
164 |     :return: the relative transformation p1^{⁻1} * p2
165 |     """
166 |     return np.dot(se3_inverse(p1), p2)
167 | 
168 | 
169 | def random_so3():
170 |     """
171 |     :return: a random SO(3) matrix (for debugging)
172 |     """
173 |     return tr.random_rotation_matrix()[:3, :3]
174 | 
175 | 
176 | def random_se3():
177 |     """
178 |     :return: a random SE(3) matrix (for debugging)
179 |     """
180 |     r = random_so3()
181 |     t = tr.random_vector(3)
182 |     return se3(r, t)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | pandas
 3 | matplotlib
 4 | scikit-image
 5 | scipy
 6 | imageio
 7 | tqdm
 8 | cython
 9 | mmcv==0.4.4
10 | torch>=1.1
11 | torchvision>=0.4.0
12 | pypng


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | if __name__ == '__main__':
4 |     # os.system('/home/user/software/anaconda/envs/py37t11/bin/python -m torch.distributed.launch --master_port=9900 --nproc_per_node=1 train.py')
5 |     # os.system('/home/hadoop-wallemnl/cephfs/data/shuchang/envs/py37t11/bin/python -m torch.distributed.launch --master_port=9900 --nproc_per_node=8 train.py')
6 |     os.system('/home/sconly/Documents/code/py37t11/bin/python -m torch.distributed.launch --master_port=9900 --nproc_per_node=1 train.py --config ./config/cfg_kitti_fm.py --work_dir /media/sconly/harddisk/weight/fmdepth')
7 |     


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sconlyshootery/FeatDepth/550420b3fb51a027549716b74c6fbce41651d3a5/scripts/__init__.py


--------------------------------------------------------------------------------
/scripts/draw_odometry.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | import os
  3 | import sys
  4 | import argparse
  5 | import numpy as np
  6 | 
  7 | import torch
  8 | from torch.utils.data import DataLoader
  9 | 
 10 | sys.path.append('.')
 11 | sys.path.append('..')
 12 | from mono.datasets.euroc_dataset import FolderDataset
 13 | from mono.datasets.kitti_dataset import KITTIOdomDataset
 14 | from mono.datasets.utils import readlines,transformation_from_parameters
 15 | from mono.model.mono_baseline.pose_encoder import PoseEncoder
 16 | from mono.model.mono_baseline.pose_decoder import PoseDecoder
 17 | from mono.tools.kitti_evaluation_toolkit import kittiOdomEval
 18 | 
 19 | 
 20 | def odo(opt):
 21 |     if opt.kitti:
 22 |         filenames = readlines("../mono/datasets/splits/odom/test_files_{:02d}.txt".format(opt.sequence_id))
 23 | 
 24 |         dataset = KITTIOdomDataset(opt.data_path,
 25 |                                    filenames,
 26 |                                    opt.height,
 27 |                                    opt.width,
 28 |                                    [0, 1],
 29 |                                    is_train=False,
 30 |                                    img_ext='.png',
 31 |                                    gt_depth_path=None)
 32 |     else:
 33 |         dataset = FolderDataset(opt.data_path,
 34 |                                 None,
 35 |                                 opt.height,
 36 |                                 opt.width,
 37 |                                 [0, 1],
 38 |                                 is_train=False,
 39 |                                 img_ext='.png',
 40 |                                 gt_depth_path=None)
 41 | 
 42 |     dataloader = DataLoader(dataset,
 43 |                             1,
 44 |                             shuffle=False,
 45 |                             num_workers=4,
 46 |                             pin_memory=True,
 47 |                             drop_last=False)
 48 | 
 49 |     pose_encoder = PoseEncoder(18, None, 2)
 50 |     pose_decoder = PoseDecoder(pose_encoder.num_ch_enc)
 51 | 
 52 |     checkpoint = torch.load(opt.model_path)
 53 |     for name, param in pose_encoder.state_dict().items():
 54 |         pose_encoder.state_dict()[name].copy_(checkpoint['state_dict']['PoseEncoder.' + name])
 55 |     for name, param in pose_decoder.state_dict().items():
 56 |         pose_decoder.state_dict()[name].copy_(checkpoint['state_dict']['PoseDecoder.' + name])
 57 |     pose_encoder.cuda()
 58 |     pose_encoder.eval()
 59 |     pose_decoder.cuda()
 60 |     pose_decoder.eval()
 61 | 
 62 |     global_pose = np.identity(4)
 63 |     poses = [global_pose[0:3, :].reshape(1, 12)]
 64 | 
 65 |     with torch.no_grad():
 66 |         for batch_idx, inputs in enumerate(dataloader):
 67 |             for key, ipt in inputs.items():
 68 |                 inputs[key] = ipt.cuda()
 69 |             all_color_aug = torch.cat([inputs[("color_aug", i, 0)] for i in [0,1]], 1)
 70 |             axisangle, translation = pose_decoder(pose_encoder(all_color_aug))
 71 |             g = transformation_from_parameters(axisangle[:, 0], translation[:, 0])
 72 |             backward_transform = g.squeeze().cpu().numpy()#the transformation from frame +1 to frame 0
 73 |             global_pose = global_pose @ np.linalg.inv(backward_transform)
 74 |             poses.append(global_pose[0:3, :].reshape(1, 12))
 75 |     poses = np.concatenate(poses, axis=0)
 76 | 
 77 |     if opt.kitti:
 78 |         filename = os.path.join(opt.result_dir, "{:02d}_pred.txt".format(opt.sequence_id))
 79 |     else:
 80 |         filename = os.path.join(opt.result_dir, "fm_ms_euroc_mh04_diff_3.txt")
 81 | 
 82 |     np.savetxt(filename, poses, delimiter=' ', fmt='%1.8e')
 83 |     if opt.kitti:
 84 |         opt.eva_seqs = '{:02d}_pred'.format(opt.sequence_id)
 85 |         pose_eval = kittiOdomEval(opt)
 86 |         pose_eval.eval(toCameraCoord=False)  # set the value according to the predicted results
 87 |     print('saving into ', opt.result_dir)
 88 | 
 89 | 
 90 | if __name__ == "__main__":
 91 |     parser = argparse.ArgumentParser(description='Train a detector')
 92 |     parser.add_argument('--model_path', default='/media/sconly/24eda5d5-e79b-423b-8dcc-8339a15f3219/weight/fm_depth_odom.pth', help='model save path')
 93 |     parser.add_argument('--data_path', default='/media/sconly/24eda5d5-e79b-423b-8dcc-8339a15f3219/data/kitti/Odometry', help='kitti odometry dataset path')
 94 |     parser.add_argument('--gt_dir', default='../mono/datasets/gt_pose',help='kitti odometry gt path')
 95 |     parser.add_argument('--result_dir', default='/media/sconly/24eda5d5-e79b-423b-8dcc-8339a15f3219/odom/')
 96 |     parser.add_argument('--height', default=192)
 97 |     parser.add_argument('--width', default=640)
 98 |     parser.add_argument('--kitti', default=True, help='whether test on the kitti odometry dataset')
 99 |     parser.add_argument('--sequence_id', default=9, help='which kitti odometry sequence for testing')
100 |     opts = parser.parse_args()
101 |     odo(opts)
102 |     print("you can also run 'evo_traj kitti -s *.txt *.txt --ref=*.txt -p --plot_mode=xz' in terminal for visualization")


--------------------------------------------------------------------------------
/scripts/eval_depth.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | import cv2
  3 | import sys
  4 | import numpy as np
  5 | from mmcv import Config
  6 | 
  7 | import torch
  8 | from torch.utils.data import DataLoader
  9 | 
 10 | sys.path.append('.')
 11 | from mono.model.registry import MONO
 12 | from mono.model.mono_baseline.layers import disp_to_depth
 13 | from mono.datasets.utils import readlines, compute_errors
 14 | from mono.datasets.kitti_dataset import KITTIRAWDataset
 15 | 
 16 | cv2.setNumThreads(0)  # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)
 17 | STEREO_SCALE_FACTOR = 36
 18 | MIN_DEPTH=1e-3
 19 | MAX_DEPTH=80
 20 | 
 21 | 
 22 | def evaluate(MODEL_PATH, CFG_PATH, GT_PATH):
 23 |     filenames = readlines("../mono/datasets/splits/exp/val_files.txt")
 24 |     cfg = Config.fromfile(CFG_PATH)
 25 | 
 26 |     dataset = KITTIRAWDataset(cfg.data['in_path'],
 27 |                               filenames,
 28 |                               cfg.data['height'],
 29 |                               cfg.data['width'],
 30 |                               [0],
 31 |                               is_train=False,
 32 |                               gt_depth_path=GT_PATH)
 33 | 
 34 |     dataloader = DataLoader(dataset,
 35 |                             1,
 36 |                             shuffle=False,
 37 |                             num_workers=4,
 38 |                             pin_memory=True,
 39 |                             drop_last=False)
 40 | 
 41 |     cfg.model['imgs_per_gpu'] = 1
 42 |     model = MONO.module_dict[cfg.model['name']](cfg.model)
 43 |     checkpoint = torch.load(MODEL_PATH)
 44 |     model.load_state_dict(checkpoint['state_dict'], strict=True)
 45 |     model.cuda()
 46 |     model.eval()
 47 | 
 48 |     pred_disps = []
 49 |     with torch.no_grad():
 50 |         for batch_idx, inputs in enumerate(dataloader):
 51 |             for key, ipt in inputs.items():
 52 |                 inputs[key] = ipt.cuda()
 53 |             outputs = model(inputs)
 54 | 
 55 |             disp = outputs[("disp", 0, 0)]
 56 | 
 57 |             pred_disp, _ = disp_to_depth(disp, 0.1, 100)
 58 |             pred_disp = pred_disp.cpu()[:, 0].numpy()
 59 |             pred_disps.append(pred_disp)
 60 |     pred_disps = np.concatenate(pred_disps)
 61 | 
 62 |     gt_depths = np.load(GT_PATH, allow_pickle=True, fix_imports=True, encoding='latin1')["data"]
 63 | 
 64 |     print("-> Evaluating")
 65 |     if cfg.data['stereo_scale']:
 66 |         print('using baseline')
 67 |     else:
 68 |         print('using mean scaling')
 69 | 
 70 |     errors = []
 71 |     ratios = []
 72 |     for i in range(pred_disps.shape[0]):
 73 |         gt_depth = gt_depths[i]
 74 |         gt_height, gt_width = gt_depth.shape[:2]
 75 | 
 76 |         pred_disp = pred_disps[i]
 77 |         pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
 78 | 
 79 |         pred_depth = 1 / pred_disp
 80 | 
 81 |         mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)
 82 |         crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
 83 |                          0.03594771 * gt_width,  0.96405229 * gt_width]).astype(np.int32)
 84 |         crop_mask = np.zeros(mask.shape)
 85 |         crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
 86 |         mask = np.logical_and(mask, crop_mask)
 87 | 
 88 |         pred_depth = pred_depth[mask]
 89 |         gt_depth = gt_depth[mask]
 90 | 
 91 |         ratio = np.median(gt_depth) / np.median(pred_depth)
 92 |         ratios.append(ratio)
 93 | 
 94 |         if cfg.data['stereo_scale']:
 95 |             ratio = STEREO_SCALE_FACTOR
 96 | 
 97 |         pred_depth *= ratio
 98 |         pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
 99 |         pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH
100 |         errors.append(compute_errors(gt_depth, pred_depth))
101 | 
102 |     ratios = np.array(ratios)
103 |     med = np.median(ratios)
104 |     mean_errors = np.array(errors).mean(0)
105 |     print("Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med)))
106 |     print("\n" + ("{:>}| " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
107 |     print(("&{:.3f} " * 7).format(*mean_errors.tolist()) + "\\\\")
108 |     print("\n-> Done!")
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     CFG_PATH = '../config/cfg_kitti_fm.py'#path to cfg file
113 |     GT_PATH = '/media/user/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz'#path to kitti gt depth
114 |     MODEL_PATH = '/media/user/harddisk/weight/fm_depth.pth'#path to model weights
115 |     evaluate(MODEL_PATH, CFG_PATH, GT_PATH)


--------------------------------------------------------------------------------
/scripts/eval_depth_pp.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | import cv2
  3 | import sys
  4 | import numpy as np
  5 | from mmcv import Config
  6 | 
  7 | import torch
  8 | from torch.utils.data import DataLoader
  9 | 
 10 | sys.path.append('.')
 11 | sys.path.append('..')
 12 | from mono.model.registry import MONO
 13 | from mono.model.mono_baseline.layers import disp_to_depth
 14 | from mono.datasets.utils import readlines, compute_errors
 15 | from mono.datasets.kitti_dataset import KITTIRAWDataset
 16 | 
 17 | cv2.setNumThreads(0)  # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)
 18 | STEREO_SCALE_FACTOR = 36
 19 | MIN_DEPTH=1e-3
 20 | MAX_DEPTH=80
 21 | 
 22 | def batch_post_process_disparity(l_disp, r_disp):
 23 |     _, h, w = l_disp.shape
 24 |     m_disp = 0.5 * (l_disp + r_disp)
 25 |     l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h))
 26 |     l_mask = (1.0 - np.clip(20 * (l - 0.05), 0, 1))[None, ...]
 27 |     r_mask = l_mask[:, :, ::-1]
 28 |     return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp
 29 | 
 30 | def evaluate(MODEL_PATH, CFG_PATH, GT_PATH):
 31 |     filenames = readlines("../mono/datasets/splits/exp/val_files.txt")
 32 |     cfg = Config.fromfile(CFG_PATH)
 33 | 
 34 |     dataset = KITTIRAWDataset(cfg.data['in_path'],
 35 |                           filenames,
 36 |                           cfg.data['height'],
 37 |                           cfg.data['width'],
 38 |                           [0],
 39 |                           is_train=False,
 40 |                           gt_depth_path=None)
 41 | 
 42 |     dataloader = DataLoader(dataset,
 43 |                         2,
 44 |                         shuffle=False,
 45 |                         num_workers=1,
 46 |                         pin_memory=True,
 47 |                         drop_last=True)
 48 | 
 49 |     cfg.model['imgs_per_gpu'] = 2
 50 |     model = MONO.module_dict[cfg.model['name']](cfg.model)
 51 |     checkpoint = torch.load(MODEL_PATH)
 52 |     model.load_state_dict(checkpoint['state_dict'], strict=True)
 53 |     model.cuda()
 54 |     model.eval()
 55 | 
 56 |     pred_disps = []
 57 |     with torch.no_grad():
 58 |         for batch_idx, inputs in enumerate(dataloader):
 59 |             print(batch_idx)
 60 |             for key, ipt in inputs.items():
 61 |                 inputs[key] = ipt.cuda()
 62 | 
 63 |             outputs = model(inputs)
 64 | 
 65 |             disp = outputs[("disp", 0, 0)]
 66 |             # N = pred_disp.shape[0] // 2
 67 |             # pred_disp = batch_post_process_disparity(pred_disp[:N], pred_disp[N:, :, ::-1])
 68 |             pred_disp, _ = disp_to_depth(disp, 0.1, 100)
 69 |             pred_disp = pred_disp.cpu()[:, 0].numpy()
 70 |             pred_disps.append(pred_disp)
 71 |     pred_disps = np.concatenate(pred_disps)
 72 | 
 73 |     gt_depths = np.load(GT_PATH, allow_pickle=True, fix_imports=True, encoding='latin1')["data"]
 74 | 
 75 |     print("-> Evaluating")
 76 |     if cfg.data['stereo_scale']:
 77 |         print('using baseline')
 78 |     else:
 79 |         print('using mean scaling')
 80 | 
 81 |     errors = []
 82 |     ratios = []
 83 |     for i in range(pred_disps.shape[0]):
 84 |         gt_depth = gt_depths[i]
 85 |         gt_height, gt_width = gt_depth.shape[:2]
 86 | 
 87 |         pred_disp = pred_disps[i]
 88 |         pred_disp = cv2.resize(pred_disp, (gt_width, gt_height))
 89 | 
 90 |         pred_depth = 1 / pred_disp
 91 | 
 92 |         mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH)
 93 |         crop = np.array([0.40810811 * gt_height, 0.99189189 * gt_height,
 94 |                          0.03594771 * gt_width,  0.96405229 * gt_width]).astype(np.int32)
 95 |         crop_mask = np.zeros(mask.shape)
 96 |         crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1
 97 |         mask = np.logical_and(mask, crop_mask)
 98 | 
 99 |         pred_depth = pred_depth[mask]
100 |         gt_depth = gt_depth[mask]
101 | 
102 |         ratio = np.median(gt_depth) / np.median(pred_depth)
103 |         ratios.append(ratio)
104 | 
105 |         if cfg.data['stereo_scale']:
106 |             ratio = STEREO_SCALE_FACTOR
107 | 
108 |         pred_depth *= ratio
109 |         pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
110 |         pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH
111 |         errors.append(compute_errors(gt_depth, pred_depth))
112 | 
113 |     ratios = np.array(ratios)
114 |     med = np.median(ratios)
115 |     mean_errors = np.array(errors).mean(0)
116 |     print("Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(med, np.std(ratios / med)))
117 |     print("\n" + ("{:>}| " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
118 |     print(("&{:.3f} " * 7).format(*mean_errors.tolist()) + "\\\\")
119 |     print("\n-> Done!")
120 | 
121 | 
122 | if __name__ == "__main__":
123 |     CFG_PATH = '../config/cfg_kitti_fm.py'#path to cfg file
124 |     GT_PATH = '/media/sconly/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz'#path to kitti gt depth
125 |     MODEL_PATH = '/media/sconly/harddisk/weight/fm_depth.pth'#path to model weights
126 |     evaluate(MODEL_PATH, CFG_PATH, GT_PATH)


--------------------------------------------------------------------------------
/scripts/eval_pose.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import os
 3 | import sys
 4 | import numpy as np
 5 | 
 6 | import torch
 7 | from torch.utils.data import DataLoader
 8 | 
 9 | sys.path.append('.')
10 | from mono.datasets.utils import readlines, dump_xyz, compute_ate, transformation_from_parameters
11 | from mono.datasets.kitti_dataset import KITTIOdomDataset
12 | from mono.model.mono_fm.pose_encoder import PoseEncoder
13 | from mono.model.mono_fm.pose_decoder import PoseDecoder
14 | 
15 | 
16 | 
17 | 
18 | 
19 | def evaluate(data_path,model_path,sequence_id,height,width):
20 |     filenames = readlines("../mono/datasets/splits/odom/test_files_{:02d}.txt".format(sequence_id))
21 | 
22 |     dataset = KITTIOdomDataset(data_path,
23 |                                filenames,
24 |                                height,
25 |                                width,
26 |                                [0, 1],
27 |                                is_train=False,
28 |                                img_ext='.png',
29 |                                gt_depth_path=None)
30 | 
31 |     dataloader = DataLoader(dataset,
32 |                             1,
33 |                             shuffle=False,
34 |                             num_workers=4,
35 |                             pin_memory=True,
36 |                             drop_last=False)
37 | 
38 | 
39 |     pose_encoder = PoseEncoder(18, None, 2)
40 |     pose_decoder = PoseDecoder(pose_encoder.num_ch_enc)
41 | 
42 |     checkpoint = torch.load(model_path)
43 |     for name, param in pose_encoder.state_dict().items():
44 |         pose_encoder.state_dict()[name].copy_(checkpoint['state_dict']['PoseEncoder.' + name])
45 |     for name, param in pose_decoder.state_dict().items():
46 |         pose_decoder.state_dict()[name].copy_(checkpoint['state_dict']['PoseDecoder.' + name])
47 |     pose_encoder.cuda()
48 |     pose_encoder.eval()
49 |     pose_decoder.cuda()
50 |     pose_decoder.eval()
51 | 
52 |     pred_poses = []
53 | 
54 |     print("-> Computing pose predictions")
55 |     with torch.no_grad():
56 |         for inputs in dataloader:
57 |             for key, ipt in inputs.items():
58 |                 inputs[key] = ipt.cuda()
59 |             all_color_aug = torch.cat([inputs[("color_aug", i, 0)] for i in [0, 1]], 1)
60 |             features = pose_encoder(all_color_aug)
61 |             axisangle, translation = pose_decoder(features)
62 |             pred_poses.append(transformation_from_parameters(axisangle[:, 0], translation[:, 0]).cpu().numpy())
63 |     pred_poses = np.concatenate(pred_poses)
64 | 
65 |     gt_poses_path = os.path.join(data_path, "poses", "{:02d}.txt".format(sequence_id))
66 |     gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4)
67 |     gt_global_poses = np.concatenate((gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1)
68 |     gt_global_poses[:, 3, 3] = 1
69 |     gt_xyzs = gt_global_poses[:, :3, 3]
70 |     gt_local_poses = []
71 |     for i in range(1, len(gt_global_poses)):
72 |         gt_local_poses.append(np.linalg.inv(np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i])))
73 | 
74 |     ates = []
75 |     num_frames = gt_xyzs.shape[0]
76 |     track_length = 5
77 |     for i in range(0, num_frames - 1):
78 |         local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1]))
79 |         gt_local_xyzs = np.array(dump_xyz(gt_local_poses[i:i + track_length - 1]))
80 |         ates.append(compute_ate(gt_local_xyzs, local_xyzs))
81 | 
82 |     print("\n  odom_{} Trajectory error: {:0.3f}, std: {:0.3f}\n".format(sequence_id, np.mean(ates), np.std(ates)))
83 | 
84 |     # save_path = os.path.join(load_weights_folder, "poses.npy")
85 |     # np.save(save_path, pred_poses)
86 |     # print("-> Predictions saved to", save_path)
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     data_path='/media/user/harddisk/data/kitti/Odometry/dataset'#path to kitti odometry
91 |     model_path = '/media/user/harddisk/weight/fm_depth.pth'
92 |     height=320
93 |     width=1024
94 |     sequence_id =9
95 |     evaluate(data_path,model_path,sequence_id,height,width)
96 |     sequence_id = 10
97 |     evaluate(data_path,model_path,sequence_id,height,width)
98 | 


--------------------------------------------------------------------------------
/scripts/infer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import os
 3 | import cv2
 4 | import sys
 5 | import matplotlib.pyplot as plt
 6 | import numpy as np
 7 | from mmcv import Config
 8 | 
 9 | import torch
10 | from torch.utils.data import DataLoader
11 | 
12 | sys.path.append('.')
13 | sys.path.append('..')
14 | from mono.model.registry import MONO
15 | from mono.model.mono_baseline.layers import disp_to_depth
16 | from mono.datasets.utils import readlines
17 | from mono.datasets.kitti_dataset import KITTIRAWDataset
18 | 
19 | cv2.setNumThreads(0)  # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)
20 | 
21 | MIN_DEPTH=1e-3
22 | MAX_DEPTH=80
23 | SCALE = 36#we set baseline=0.0015m which is 36 times smaller than the actual value (0.54m)
24 | 
25 | def transform(cv2_img, height=320, width=1024):
26 |     im_tensor = torch.from_numpy(cv2_img.astype(np.float32)).cuda().unsqueeze(0)
27 |     im_tensor = im_tensor.permute(0, 3, 1, 2).contiguous()
28 |     im_tensor = torch.nn.functional.interpolate(im_tensor, [height, width],mode='bilinear', align_corners=False)
29 |     im_tensor /= 255
30 |     return im_tensor
31 | 
32 | def predict(cv2_img, model):
33 |     original_height, original_width = cv2_img.shape[:2]
34 |     im_tensor = transform(cv2_img)
35 | 
36 |     with torch.no_grad():
37 |         input = {}
38 |         input['color_aug', 0, 0] = im_tensor
39 |         outputs = model(input)
40 | 
41 |     disp = outputs[("disp", 0, 0)]
42 |     disp_resized = torch.nn.functional.interpolate(disp, (original_height, original_width), mode="bilinear", align_corners=False)
43 |     min_disp = 1/MAX_DEPTH
44 |     max_disp = 1/MIN_DEPTH
45 |     depth = 1/(disp_resized.squeeze().cpu().numpy()*max_disp + min_disp) * SCALE
46 |     return depth, disp_resized.squeeze().cpu().numpy()
47 | 
48 | def evaluate(cfg_path, model_path, img_path, output_path):
49 |     cfg = Config.fromfile(cfg_path)
50 |     cfg['model']['depth_pretrained_path'] = None
51 |     cfg['model']['pose_pretrained_path'] = None
52 |     cfg['model']['extractor_pretrained_path'] = None
53 |     model = MONO.module_dict[cfg.model['name']](cfg.model)
54 |     checkpoint = torch.load(model_path)
55 |     model.load_state_dict(checkpoint['state_dict'], strict=True)
56 |     model.cuda()
57 |     model.eval()
58 | 
59 |     with torch.no_grad():
60 |         cv2_img = cv2.imread(img_path)
61 |         cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2RGB)
62 | 
63 |         depth, disp_resized = predict(cv2_img, model)
64 | 
65 |         vmax = np.percentile(disp_resized, 95)
66 |         plt.imsave(output_path, disp_resized, cmap='magma', vmax=vmax)
67 | 
68 |     print("\n-> Done!")
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     cfg_path = '../config/cfg_kitti_fm.py'# path to cfg file
73 |     model_path = '/media/sconly/harddisk/weight/fm_depth.pth'# path to model weight
74 |     img_path = '../assets/test.png'
75 |     output_path = '../assets/test_disp.png' # dir for saving depth maps
76 |     evaluate(cfg_path, model_path, img_path, output_path)


--------------------------------------------------------------------------------
/scripts/infer_singleimage.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import os
 3 | import cv2
 4 | import sys
 5 | import matplotlib.pyplot as plt
 6 | import numpy as np
 7 | from mmcv import Config
 8 | 
 9 | import torch
10 | from torch.utils.data import DataLoader
11 | 
12 | sys.path.append('.')
13 | from mono.model.registry import MONO
14 | from mono.model.mono_baseline.layers import disp_to_depth
15 | from mono.datasets.utils import readlines
16 | from mono.datasets.kitti_dataset import KITTIRAWDataset
17 | 
18 | cv2.setNumThreads(0)  # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)
19 | 
20 | 
21 | 
22 | def evaluate(cfg_path,model_path,gt_path, output_path):
23 |     filenames = readlines("../mono/datasets/splits/exp/val_files.txt")
24 |     cfg = Config.fromfile(cfg_path)
25 | 
26 |     dataset = KITTIRAWDataset(cfg.data['in_path'],
27 |                               filenames,
28 |                               cfg.data['height'],
29 |                               cfg.data['width'],
30 |                               [0],
31 |                               is_train=False,
32 |                               gt_depth_path=gt_path)
33 | 
34 |     dataloader = DataLoader(dataset,
35 |                             1,
36 |                             shuffle=False,
37 |                             num_workers=4,
38 |                             pin_memory=True,
39 |                             drop_last=False)
40 | 
41 |     cfg.model['imgs_per_gpu'] = 1
42 |     model = MONO.module_dict[cfg.model['name']](cfg.model)
43 |     checkpoint = torch.load(model_path)
44 |     model.load_state_dict(checkpoint['state_dict'], strict=False)
45 |     model.cuda()
46 |     model.eval()
47 | 
48 |     with torch.no_grad():
49 |         for batch_idx, inputs in enumerate(dataloader):
50 |             for key, ipt in inputs.items():
51 |                 inputs[key] = ipt.cuda()
52 |             outputs = model(inputs)
53 | 
54 |             img_path = os.path.join(output_path, 'img_{:0>4d}.jpg'.format(batch_idx))
55 |             plt.imsave(img_path, inputs[("color", 0, 0)][0].squeeze().transpose(0,1).transpose(1,2).cpu().numpy())
56 | 
57 |             disp = outputs[("disp", 0, 0)]
58 |             pred_disp, _ = disp_to_depth(disp, 0.1, 100)
59 |             pred_disp = pred_disp[0, 0].cpu().numpy()
60 |             pred_disp = cv2.resize(pred_disp, (cfg.data['width'], cfg.data['height']))
61 | 
62 |             img_path = os.path.join(output_path, 'disp_{:0>4d}.jpg'.format(batch_idx))
63 |             vmax = np.percentile(pred_disp, 95)
64 |             plt.imsave(img_path, pred_disp, cmap='magma', vmax=vmax)
65 | 
66 |     print("\n-> Done!")
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     cfg_path = '../config/cfg_kitti_fm.py'# path to cfg file
71 |     model_path = '/media/user/harddisk/weight/fm_depth.pth'# path to model weight
72 |     gt_path = '/media/user/harddisk/data/kitti/kitti_raw/rawdata/gt_depths.npz' # path to kitti gt depth
73 |     output_path = '/media/user/harddisk/results' # dir for saving depth maps
74 |     if not os.path.exists(output_path):
75 |         os.mkdir(output_path)
76 |     evaluate(cfg_path,model_path,gt_path,output_path)


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import argparse
  4 | from mmcv import Config
  5 | from mmcv.runner import load_checkpoint
  6 | 
  7 | from mono.datasets.get_dataset import get_dataset
  8 | from mono.apis import (train_mono,
  9 |                        init_dist,
 10 |                        get_root_logger,
 11 |                        set_random_seed)
 12 | from mono.model.registry import MONO
 13 | import torch
 14 | 
 15 | 
 16 | def parse_args():
 17 |     parser = argparse.ArgumentParser(description='Train a detector')
 18 |     parser.add_argument('--config',
 19 |                         default='/home/user/Documents/code/fm_depth/config/cfg_kitti_fm_joint.py',
 20 |                         help='train config file path')
 21 |     parser.add_argument('--work_dir',
 22 |                         default='/media/user/harddisk/weight/fmdepth',
 23 |                         help='the dir to save logs and models')
 24 |     parser.add_argument('--resume_from',
 25 |                         help='the checkpoint file to resume from')
 26 |     parser.add_argument('--gpus',
 27 |                         default='0',
 28 |                         type=str,
 29 |                         help='number of gpus to use '
 30 |                              '(only applicable to non-distributed training)')
 31 |     parser.add_argument('--seed',
 32 |                         type=int,
 33 |                         default=1024,
 34 |                         help='random seed')
 35 |     parser.add_argument('--launcher',
 36 |                         choices=['none', 'pytorch', 'slurm', 'mpi'],
 37 |                         default='pytorch',
 38 |                         help='job launcher')
 39 |     parser.add_argument('--local_rank',
 40 |                         type=int,
 41 |                         default=0)
 42 |     args = parser.parse_args()
 43 |     return args
 44 | 
 45 | 
 46 | def main():
 47 |     args = parse_args()
 48 |     print(args.config)
 49 |     cfg = Config.fromfile(args.config)
 50 |     cfg.work_dir = args.work_dir
 51 | 
 52 |     # set cudnn_benchmark
 53 |     if cfg.get('cudnn_benchmark', False):
 54 |         torch.backends.cudnn.benchmark = True
 55 | 
 56 |     if args.resume_from is not None:
 57 |         cfg.resume_from = args.resume_from
 58 |     cfg.gpus = [int(_) for _ in args.gpus.split(',')]
 59 | 
 60 |     # init distributed env first, since logger depends on the dist info.
 61 |     if args.launcher == 'none':
 62 |         distributed = False
 63 |     else:
 64 |         distributed = True
 65 |         init_dist(args.launcher, **cfg.dist_params)
 66 | 
 67 |     print('cfg is ', cfg)
 68 |     # init logger before other steps
 69 |     logger = get_root_logger(cfg.log_level)
 70 |     logger.info('Distributed training: {}'.format(distributed))
 71 | 
 72 |     # set random seeds
 73 |     if args.seed is not None:
 74 |         logger.info('Set random seed to {}'.format(args.seed))
 75 |         set_random_seed(args.seed)
 76 | 
 77 |     model_name = cfg.model['name']
 78 |     model = MONO.module_dict[model_name](cfg.model)
 79 | 
 80 |     if cfg.resume_from is not None:
 81 |         load_checkpoint(model, cfg.resume_from, map_location='cpu')
 82 |     elif cfg.finetune is not None:
 83 |         print('loading from', cfg.finetune)
 84 |         checkpoint = torch.load(cfg.finetune, map_location='cpu')
 85 |         model.load_state_dict(checkpoint['state_dict'], strict=False)
 86 | 
 87 |     train_dataset = get_dataset(cfg.data, training=True)
 88 |     if cfg.validate:
 89 |         val_dataset = get_dataset(cfg.data, training=False)
 90 |     else:
 91 |         val_dataset = None
 92 | 
 93 |     train_mono(model,
 94 |                train_dataset,
 95 |                val_dataset,
 96 |                cfg,
 97 |                distributed=distributed,
 98 |                validate=cfg.validate,
 99 |                logger=logger)
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     main()


--------------------------------------------------------------------------------